{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100.0, "global_step": 21405, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001401541695865452, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 8.4692, "step": 1 }, { "epoch": 0.0002803083391730904, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 11.0279, "step": 2 }, { "epoch": 0.0004204625087596356, "grad_norm": 7.121354103088379, "learning_rate": 0.0, "loss": 8.7646, "step": 3 }, { "epoch": 0.0005606166783461808, "grad_norm": 7.236179828643799, "learning_rate": 6e-07, "loss": 8.7277, "step": 4 }, { "epoch": 0.000700770847932726, "grad_norm": 6.580241680145264, "learning_rate": 1.2e-06, "loss": 7.9166, "step": 5 }, { "epoch": 0.0008409250175192712, "grad_norm": 6.403110027313232, "learning_rate": 1.8e-06, "loss": 7.7798, "step": 6 }, { "epoch": 0.0009810791871058165, "grad_norm": 7.469630241394043, "learning_rate": 2.4e-06, "loss": 8.9914, "step": 7 }, { "epoch": 0.0011212333566923615, "grad_norm": 7.176066875457764, "learning_rate": 2.9999999999999997e-06, "loss": 8.8701, "step": 8 }, { "epoch": 0.0012613875262789068, "grad_norm": 7.3517656326293945, "learning_rate": 3.6e-06, "loss": 8.9468, "step": 9 }, { "epoch": 0.001401541695865452, "grad_norm": 7.987253189086914, "learning_rate": 4.2e-06, "loss": 9.5461, "step": 10 }, { "epoch": 0.0015416958654519972, "grad_norm": 7.5793657302856445, "learning_rate": 4.8e-06, "loss": 9.1411, "step": 11 }, { "epoch": 0.0016818500350385423, "grad_norm": 7.367099761962891, "learning_rate": 5.399999999999999e-06, "loss": 8.8992, "step": 12 }, { "epoch": 0.0018220042046250876, "grad_norm": 6.14939546585083, "learning_rate": 5.999999999999999e-06, "loss": 7.4343, "step": 13 }, { "epoch": 0.001962158374211633, "grad_norm": 7.571694374084473, "learning_rate": 6.599999999999999e-06, "loss": 9.3066, "step": 14 }, { "epoch": 0.002102312543798178, "grad_norm": 6.577233791351318, "learning_rate": 7.2e-06, "loss": 7.9043, "step": 15 }, { "epoch": 0.002242466713384723, "grad_norm": 8.27171516418457, "learning_rate": 7.799999999999998e-06, "loss": 10.096, "step": 16 }, { "epoch": 0.0023826208829712684, "grad_norm": 6.9634599685668945, "learning_rate": 8.4e-06, "loss": 8.2555, "step": 17 }, { "epoch": 0.0025227750525578137, "grad_norm": 6.922667026519775, "learning_rate": 8.999999999999999e-06, "loss": 8.4747, "step": 18 }, { "epoch": 0.002662929222144359, "grad_norm": 7.340466022491455, "learning_rate": 9.6e-06, "loss": 8.8149, "step": 19 }, { "epoch": 0.002803083391730904, "grad_norm": 6.512653350830078, "learning_rate": 1.02e-05, "loss": 7.8311, "step": 20 }, { "epoch": 0.002943237561317449, "grad_norm": 8.32393741607666, "learning_rate": 1.0799999999999998e-05, "loss": 9.9733, "step": 21 }, { "epoch": 0.0030833917309039944, "grad_norm": 7.370021820068359, "learning_rate": 1.14e-05, "loss": 9.0241, "step": 22 }, { "epoch": 0.0032235459004905397, "grad_norm": 7.775641441345215, "learning_rate": 1.1999999999999999e-05, "loss": 9.5082, "step": 23 }, { "epoch": 0.0033637000700770846, "grad_norm": 7.187759876251221, "learning_rate": 1.26e-05, "loss": 8.6962, "step": 24 }, { "epoch": 0.00350385423966363, "grad_norm": 9.324918746948242, "learning_rate": 1.3199999999999997e-05, "loss": 11.2191, "step": 25 }, { "epoch": 0.003644008409250175, "grad_norm": 7.992179870605469, "learning_rate": 1.3799999999999998e-05, "loss": 9.4027, "step": 26 }, { "epoch": 0.0037841625788367205, "grad_norm": 8.400440216064453, "learning_rate": 1.44e-05, "loss": 10.1472, "step": 27 }, { "epoch": 0.003924316748423266, "grad_norm": 7.050783634185791, "learning_rate": 1.4999999999999999e-05, "loss": 8.353, "step": 28 }, { "epoch": 0.004064470918009811, "grad_norm": 7.598066806793213, "learning_rate": 1.5599999999999996e-05, "loss": 9.0813, "step": 29 }, { "epoch": 0.004204625087596356, "grad_norm": 7.8444600105285645, "learning_rate": 1.6199999999999997e-05, "loss": 9.2958, "step": 30 }, { "epoch": 0.004344779257182901, "grad_norm": 7.099885940551758, "learning_rate": 1.68e-05, "loss": 8.4595, "step": 31 }, { "epoch": 0.004484933426769446, "grad_norm": 7.714644908905029, "learning_rate": 1.74e-05, "loss": 9.2685, "step": 32 }, { "epoch": 0.0046250875963559914, "grad_norm": 7.360316276550293, "learning_rate": 1.7999999999999997e-05, "loss": 8.7488, "step": 33 }, { "epoch": 0.004765241765942537, "grad_norm": 7.624013900756836, "learning_rate": 1.8599999999999998e-05, "loss": 9.0688, "step": 34 }, { "epoch": 0.004905395935529082, "grad_norm": 8.676628112792969, "learning_rate": 1.92e-05, "loss": 10.4329, "step": 35 }, { "epoch": 0.005045550105115627, "grad_norm": 7.6324005126953125, "learning_rate": 1.98e-05, "loss": 8.8521, "step": 36 }, { "epoch": 0.005185704274702173, "grad_norm": 7.269668102264404, "learning_rate": 2.04e-05, "loss": 8.5564, "step": 37 }, { "epoch": 0.005325858444288718, "grad_norm": 7.439398288726807, "learning_rate": 2.1e-05, "loss": 8.7445, "step": 38 }, { "epoch": 0.005466012613875262, "grad_norm": 7.443110942840576, "learning_rate": 2.1599999999999996e-05, "loss": 8.5582, "step": 39 }, { "epoch": 0.005606166783461808, "grad_norm": 8.377077102661133, "learning_rate": 2.2199999999999998e-05, "loss": 9.6897, "step": 40 }, { "epoch": 0.005746320953048353, "grad_norm": 7.658986568450928, "learning_rate": 2.28e-05, "loss": 8.7662, "step": 41 }, { "epoch": 0.005886475122634898, "grad_norm": 11.107864379882812, "learning_rate": 2.34e-05, "loss": 11.7328, "step": 42 }, { "epoch": 0.006026629292221444, "grad_norm": 9.172978401184082, "learning_rate": 2.3999999999999997e-05, "loss": 10.4654, "step": 43 }, { "epoch": 0.006166783461807989, "grad_norm": 7.016298294067383, "learning_rate": 2.4599999999999998e-05, "loss": 7.8736, "step": 44 }, { "epoch": 0.006306937631394534, "grad_norm": 9.631633758544922, "learning_rate": 2.52e-05, "loss": 10.5877, "step": 45 }, { "epoch": 0.0064470918009810795, "grad_norm": 7.38787317276001, "learning_rate": 2.5799999999999997e-05, "loss": 8.5266, "step": 46 }, { "epoch": 0.006587245970567625, "grad_norm": Infinity, "learning_rate": 2.6399999999999995e-05, "loss": 16.7878, "step": 47 }, { "epoch": 0.006727400140154169, "grad_norm": 7.5063323974609375, "learning_rate": 2.6399999999999995e-05, "loss": 8.6264, "step": 48 }, { "epoch": 0.0068675543097407145, "grad_norm": 6.67501974105835, "learning_rate": 2.6999999999999996e-05, "loss": 7.7156, "step": 49 }, { "epoch": 0.00700770847932726, "grad_norm": 8.071187019348145, "learning_rate": 2.7599999999999997e-05, "loss": 10.1454, "step": 50 }, { "epoch": 0.007147862648913805, "grad_norm": 7.581518173217773, "learning_rate": 2.8199999999999998e-05, "loss": 8.4984, "step": 51 }, { "epoch": 0.00728801681850035, "grad_norm": 6.454829692840576, "learning_rate": 2.88e-05, "loss": 7.3631, "step": 52 }, { "epoch": 0.007428170988086896, "grad_norm": 8.451108932495117, "learning_rate": 2.94e-05, "loss": 9.2767, "step": 53 }, { "epoch": 0.007568325157673441, "grad_norm": 7.960541248321533, "learning_rate": 2.9999999999999997e-05, "loss": 8.7883, "step": 54 }, { "epoch": 0.007708479327259986, "grad_norm": 8.233039855957031, "learning_rate": 3.06e-05, "loss": 9.1653, "step": 55 }, { "epoch": 0.007848633496846532, "grad_norm": 10.144512176513672, "learning_rate": 3.119999999999999e-05, "loss": 10.5917, "step": 56 }, { "epoch": 0.007988787666433076, "grad_norm": 8.107776641845703, "learning_rate": 3.1799999999999994e-05, "loss": 8.7712, "step": 57 }, { "epoch": 0.008128941836019622, "grad_norm": 11.103604316711426, "learning_rate": 3.2399999999999995e-05, "loss": 11.2103, "step": 58 }, { "epoch": 0.008269096005606167, "grad_norm": 8.37151050567627, "learning_rate": 3.2999999999999996e-05, "loss": 9.1048, "step": 59 }, { "epoch": 0.008409250175192713, "grad_norm": 8.462974548339844, "learning_rate": 3.36e-05, "loss": 9.0559, "step": 60 }, { "epoch": 0.008549404344779257, "grad_norm": 7.523645877838135, "learning_rate": 3.42e-05, "loss": 8.2492, "step": 61 }, { "epoch": 0.008689558514365802, "grad_norm": 7.640683650970459, "learning_rate": 3.48e-05, "loss": 8.2912, "step": 62 }, { "epoch": 0.008829712683952348, "grad_norm": 7.68369197845459, "learning_rate": 3.539999999999999e-05, "loss": 8.1744, "step": 63 }, { "epoch": 0.008969866853538892, "grad_norm": 9.849550247192383, "learning_rate": 3.5999999999999994e-05, "loss": 10.102, "step": 64 }, { "epoch": 0.009110021023125438, "grad_norm": 7.607925891876221, "learning_rate": 3.6599999999999995e-05, "loss": 8.1789, "step": 65 }, { "epoch": 0.009250175192711983, "grad_norm": 7.810117244720459, "learning_rate": 3.7199999999999996e-05, "loss": 8.4259, "step": 66 }, { "epoch": 0.009390329362298529, "grad_norm": 7.368885517120361, "learning_rate": 3.78e-05, "loss": 7.7666, "step": 67 }, { "epoch": 0.009530483531885073, "grad_norm": 8.448766708374023, "learning_rate": 3.84e-05, "loss": 8.9383, "step": 68 }, { "epoch": 0.009670637701471618, "grad_norm": 9.512053489685059, "learning_rate": 3.9e-05, "loss": 9.8389, "step": 69 }, { "epoch": 0.009810791871058164, "grad_norm": 9.501230239868164, "learning_rate": 3.96e-05, "loss": 9.6005, "step": 70 }, { "epoch": 0.009950946040644709, "grad_norm": 10.725296020507812, "learning_rate": 4.02e-05, "loss": 9.7433, "step": 71 }, { "epoch": 0.010091100210231255, "grad_norm": 7.951533317565918, "learning_rate": 4.08e-05, "loss": 8.1849, "step": 72 }, { "epoch": 0.010231254379817799, "grad_norm": 6.88252592086792, "learning_rate": 4.14e-05, "loss": 7.277, "step": 73 }, { "epoch": 0.010371408549404345, "grad_norm": 8.40587329864502, "learning_rate": 4.2e-05, "loss": 8.5527, "step": 74 }, { "epoch": 0.01051156271899089, "grad_norm": 9.101225852966309, "learning_rate": 4.259999999999999e-05, "loss": 9.049, "step": 75 }, { "epoch": 0.010651716888577436, "grad_norm": 9.011225700378418, "learning_rate": 4.319999999999999e-05, "loss": 8.7113, "step": 76 }, { "epoch": 0.01079187105816398, "grad_norm": 8.225573539733887, "learning_rate": 4.3799999999999994e-05, "loss": 8.441, "step": 77 }, { "epoch": 0.010932025227750525, "grad_norm": 8.690326690673828, "learning_rate": 4.4399999999999995e-05, "loss": 8.4745, "step": 78 }, { "epoch": 0.011072179397337071, "grad_norm": 9.590045928955078, "learning_rate": 4.4999999999999996e-05, "loss": 9.2402, "step": 79 }, { "epoch": 0.011212333566923615, "grad_norm": 10.65174388885498, "learning_rate": 4.56e-05, "loss": 9.9752, "step": 80 }, { "epoch": 0.011352487736510162, "grad_norm": 9.207296371459961, "learning_rate": 4.62e-05, "loss": 8.864, "step": 81 }, { "epoch": 0.011492641906096706, "grad_norm": 7.806180477142334, "learning_rate": 4.68e-05, "loss": 7.7845, "step": 82 }, { "epoch": 0.011632796075683252, "grad_norm": 8.521522521972656, "learning_rate": 4.7399999999999993e-05, "loss": 8.3196, "step": 83 }, { "epoch": 0.011772950245269797, "grad_norm": 8.61214542388916, "learning_rate": 4.7999999999999994e-05, "loss": 8.3665, "step": 84 }, { "epoch": 0.011913104414856343, "grad_norm": 10.06464958190918, "learning_rate": 4.8599999999999995e-05, "loss": 8.9888, "step": 85 }, { "epoch": 0.012053258584442887, "grad_norm": 8.386164665222168, "learning_rate": 4.9199999999999997e-05, "loss": 8.0003, "step": 86 }, { "epoch": 0.012193412754029432, "grad_norm": 8.232111930847168, "learning_rate": 4.98e-05, "loss": 7.7958, "step": 87 }, { "epoch": 0.012333566923615978, "grad_norm": 8.352997779846191, "learning_rate": 5.04e-05, "loss": 8.1255, "step": 88 }, { "epoch": 0.012473721093202522, "grad_norm": 10.184182167053223, "learning_rate": 5.1e-05, "loss": 9.0337, "step": 89 }, { "epoch": 0.012613875262789068, "grad_norm": 8.01484203338623, "learning_rate": 5.1599999999999994e-05, "loss": 7.4799, "step": 90 }, { "epoch": 0.012754029432375613, "grad_norm": 8.27849292755127, "learning_rate": 5.2199999999999995e-05, "loss": 7.7335, "step": 91 }, { "epoch": 0.012894183601962159, "grad_norm": 9.302130699157715, "learning_rate": 5.279999999999999e-05, "loss": 8.2463, "step": 92 }, { "epoch": 0.013034337771548703, "grad_norm": 7.507938385009766, "learning_rate": 5.339999999999999e-05, "loss": 6.9302, "step": 93 }, { "epoch": 0.01317449194113525, "grad_norm": 8.125649452209473, "learning_rate": 5.399999999999999e-05, "loss": 7.3503, "step": 94 }, { "epoch": 0.013314646110721794, "grad_norm": 8.156755447387695, "learning_rate": 5.459999999999999e-05, "loss": 7.396, "step": 95 }, { "epoch": 0.013454800280308338, "grad_norm": 8.820056915283203, "learning_rate": 5.519999999999999e-05, "loss": 7.7505, "step": 96 }, { "epoch": 0.013594954449894885, "grad_norm": 7.855623722076416, "learning_rate": 5.5799999999999994e-05, "loss": 7.1811, "step": 97 }, { "epoch": 0.013735108619481429, "grad_norm": 9.308487892150879, "learning_rate": 5.6399999999999995e-05, "loss": 7.7698, "step": 98 }, { "epoch": 0.013875262789067975, "grad_norm": 8.365824699401855, "learning_rate": 5.6999999999999996e-05, "loss": 7.3467, "step": 99 }, { "epoch": 0.01401541695865452, "grad_norm": 9.018302917480469, "learning_rate": 5.76e-05, "loss": 7.5294, "step": 100 }, { "epoch": 0.014155571128241066, "grad_norm": 8.283246994018555, "learning_rate": 5.82e-05, "loss": 7.1433, "step": 101 }, { "epoch": 0.01429572529782761, "grad_norm": 10.114821434020996, "learning_rate": 5.88e-05, "loss": 8.0797, "step": 102 }, { "epoch": 0.014435879467414156, "grad_norm": 10.132929801940918, "learning_rate": 5.94e-05, "loss": 8.2213, "step": 103 }, { "epoch": 0.0145760336370007, "grad_norm": 9.059672355651855, "learning_rate": 5.9999999999999995e-05, "loss": 7.5188, "step": 104 }, { "epoch": 0.014716187806587245, "grad_norm": 11.894686698913574, "learning_rate": 6.0599999999999996e-05, "loss": 8.9093, "step": 105 }, { "epoch": 0.014856341976173791, "grad_norm": 8.074777603149414, "learning_rate": 6.12e-05, "loss": 6.8733, "step": 106 }, { "epoch": 0.014996496145760336, "grad_norm": 10.521833419799805, "learning_rate": 6.18e-05, "loss": 7.8667, "step": 107 }, { "epoch": 0.015136650315346882, "grad_norm": 10.695642471313477, "learning_rate": 6.239999999999999e-05, "loss": 8.2731, "step": 108 }, { "epoch": 0.015276804484933426, "grad_norm": 9.828344345092773, "learning_rate": 6.299999999999999e-05, "loss": 7.5958, "step": 109 }, { "epoch": 0.015416958654519973, "grad_norm": 9.938995361328125, "learning_rate": 6.359999999999999e-05, "loss": 7.6425, "step": 110 }, { "epoch": 0.015557112824106517, "grad_norm": 8.546896934509277, "learning_rate": 6.419999999999999e-05, "loss": 6.9451, "step": 111 }, { "epoch": 0.015697266993693063, "grad_norm": 9.221542358398438, "learning_rate": 6.479999999999999e-05, "loss": 7.2392, "step": 112 }, { "epoch": 0.015837421163279606, "grad_norm": 9.153562545776367, "learning_rate": 6.539999999999999e-05, "loss": 7.0752, "step": 113 }, { "epoch": 0.015977575332866152, "grad_norm": 12.386898040771484, "learning_rate": 6.599999999999999e-05, "loss": 6.8654, "step": 114 }, { "epoch": 0.0161177295024527, "grad_norm": 8.489028930664062, "learning_rate": 6.659999999999999e-05, "loss": 6.547, "step": 115 }, { "epoch": 0.016257883672039244, "grad_norm": 8.682656288146973, "learning_rate": 6.72e-05, "loss": 6.7826, "step": 116 }, { "epoch": 0.016398037841625787, "grad_norm": 7.733676433563232, "learning_rate": 6.78e-05, "loss": 6.0851, "step": 117 }, { "epoch": 0.016538192011212333, "grad_norm": 9.6995849609375, "learning_rate": 6.84e-05, "loss": 7.3635, "step": 118 }, { "epoch": 0.01667834618079888, "grad_norm": 12.979042053222656, "learning_rate": 6.9e-05, "loss": 6.9633, "step": 119 }, { "epoch": 0.016818500350385426, "grad_norm": NaN, "learning_rate": 6.96e-05, "loss": 7.0443, "step": 120 }, { "epoch": 0.01695865451997197, "grad_norm": 13.420258522033691, "learning_rate": 6.96e-05, "loss": 5.9981, "step": 121 }, { "epoch": 0.017098808689558515, "grad_norm": 12.565690994262695, "learning_rate": 7.02e-05, "loss": 4.2515, "step": 122 }, { "epoch": 0.01723896285914506, "grad_norm": 19.81829261779785, "learning_rate": 7.079999999999999e-05, "loss": 4.9932, "step": 123 }, { "epoch": 0.017379117028731603, "grad_norm": 12.81844711303711, "learning_rate": 7.139999999999999e-05, "loss": 4.2595, "step": 124 }, { "epoch": 0.01751927119831815, "grad_norm": 11.58743667602539, "learning_rate": 7.199999999999999e-05, "loss": 3.9826, "step": 125 }, { "epoch": 0.017659425367904696, "grad_norm": 13.66759204864502, "learning_rate": 7.259999999999999e-05, "loss": 4.0842, "step": 126 }, { "epoch": 0.017799579537491242, "grad_norm": 10.878744125366211, "learning_rate": 7.319999999999999e-05, "loss": 3.7609, "step": 127 }, { "epoch": 0.017939733707077785, "grad_norm": 10.014312744140625, "learning_rate": 7.379999999999999e-05, "loss": 3.6731, "step": 128 }, { "epoch": 0.01807988787666433, "grad_norm": 8.86495590209961, "learning_rate": 7.439999999999999e-05, "loss": 3.5619, "step": 129 }, { "epoch": 0.018220042046250877, "grad_norm": 10.835160255432129, "learning_rate": 7.5e-05, "loss": 3.6498, "step": 130 }, { "epoch": 0.01836019621583742, "grad_norm": 8.720636367797852, "learning_rate": 7.56e-05, "loss": 3.5168, "step": 131 }, { "epoch": 0.018500350385423966, "grad_norm": 7.215508937835693, "learning_rate": 7.62e-05, "loss": 3.394, "step": 132 }, { "epoch": 0.018640504555010512, "grad_norm": 7.7803168296813965, "learning_rate": 7.68e-05, "loss": 3.4178, "step": 133 }, { "epoch": 0.018780658724597058, "grad_norm": 6.6492085456848145, "learning_rate": 7.74e-05, "loss": 3.3398, "step": 134 }, { "epoch": 0.0189208128941836, "grad_norm": 9.923211097717285, "learning_rate": 7.8e-05, "loss": 3.5073, "step": 135 }, { "epoch": 0.019060967063770147, "grad_norm": 8.024308204650879, "learning_rate": 7.86e-05, "loss": 3.3961, "step": 136 }, { "epoch": 0.019201121233356693, "grad_norm": 7.969802379608154, "learning_rate": 7.92e-05, "loss": 3.368, "step": 137 }, { "epoch": 0.019341275402943236, "grad_norm": 5.528815746307373, "learning_rate": 7.98e-05, "loss": 3.2395, "step": 138 }, { "epoch": 0.019481429572529782, "grad_norm": 6.242894172668457, "learning_rate": 8.04e-05, "loss": 3.2522, "step": 139 }, { "epoch": 0.019621583742116328, "grad_norm": 6.63530158996582, "learning_rate": 8.1e-05, "loss": 3.2979, "step": 140 }, { "epoch": 0.019761737911702874, "grad_norm": 4.684387683868408, "learning_rate": 8.16e-05, "loss": 3.1842, "step": 141 }, { "epoch": 0.019901892081289417, "grad_norm": 4.867077350616455, "learning_rate": 8.22e-05, "loss": 3.1842, "step": 142 }, { "epoch": 0.020042046250875963, "grad_norm": 4.582318305969238, "learning_rate": 8.28e-05, "loss": 3.1722, "step": 143 }, { "epoch": 0.02018220042046251, "grad_norm": 5.831948757171631, "learning_rate": 8.34e-05, "loss": 3.2393, "step": 144 }, { "epoch": 0.020322354590049056, "grad_norm": 2.2314395904541016, "learning_rate": 8.4e-05, "loss": 3.0924, "step": 145 }, { "epoch": 0.020462508759635598, "grad_norm": 4.964643955230713, "learning_rate": 8.459999999999998e-05, "loss": 3.1685, "step": 146 }, { "epoch": 0.020602662929222144, "grad_norm": 4.167355060577393, "learning_rate": 8.519999999999998e-05, "loss": 3.1267, "step": 147 }, { "epoch": 0.02074281709880869, "grad_norm": 3.614588975906372, "learning_rate": 8.579999999999998e-05, "loss": 3.1363, "step": 148 }, { "epoch": 0.020882971268395233, "grad_norm": 2.30169677734375, "learning_rate": 8.639999999999999e-05, "loss": 3.0837, "step": 149 }, { "epoch": 0.02102312543798178, "grad_norm": 4.596937656402588, "learning_rate": 8.699999999999999e-05, "loss": 3.1314, "step": 150 }, { "epoch": 0.021163279607568326, "grad_norm": 3.200518846511841, "learning_rate": 8.759999999999999e-05, "loss": 3.0718, "step": 151 }, { "epoch": 0.021303433777154872, "grad_norm": 2.7224369049072266, "learning_rate": 8.819999999999999e-05, "loss": 3.0051, "step": 152 }, { "epoch": 0.021443587946741414, "grad_norm": 2.585502862930298, "learning_rate": 8.879999999999999e-05, "loss": 3.0545, "step": 153 }, { "epoch": 0.02158374211632796, "grad_norm": 4.326929092407227, "learning_rate": 8.939999999999999e-05, "loss": 3.0337, "step": 154 }, { "epoch": 0.021723896285914507, "grad_norm": 2.0838465690612793, "learning_rate": 8.999999999999999e-05, "loss": 2.9955, "step": 155 }, { "epoch": 0.02186405045550105, "grad_norm": 1.980881929397583, "learning_rate": 9.059999999999999e-05, "loss": 2.9981, "step": 156 }, { "epoch": 0.022004204625087596, "grad_norm": 2.443854808807373, "learning_rate": 9.12e-05, "loss": 2.9851, "step": 157 }, { "epoch": 0.022144358794674142, "grad_norm": 6.009809970855713, "learning_rate": 9.18e-05, "loss": 3.0649, "step": 158 }, { "epoch": 0.022284512964260688, "grad_norm": 1.1571614742279053, "learning_rate": 9.24e-05, "loss": 2.969, "step": 159 }, { "epoch": 0.02242466713384723, "grad_norm": 1.4274930953979492, "learning_rate": 9.3e-05, "loss": 2.9455, "step": 160 }, { "epoch": 0.022564821303433777, "grad_norm": 2.8754162788391113, "learning_rate": 9.36e-05, "loss": 2.9669, "step": 161 }, { "epoch": 0.022704975473020323, "grad_norm": 1.7089864015579224, "learning_rate": 9.419999999999999e-05, "loss": 2.9549, "step": 162 }, { "epoch": 0.02284512964260687, "grad_norm": 2.965763807296753, "learning_rate": 9.479999999999999e-05, "loss": 2.9707, "step": 163 }, { "epoch": 0.022985283812193412, "grad_norm": 1.996239185333252, "learning_rate": 9.539999999999999e-05, "loss": 2.9744, "step": 164 }, { "epoch": 0.023125437981779958, "grad_norm": 0.8861143589019775, "learning_rate": 9.599999999999999e-05, "loss": 2.921, "step": 165 }, { "epoch": 0.023265592151366504, "grad_norm": 1.0977675914764404, "learning_rate": 9.659999999999999e-05, "loss": 2.9073, "step": 166 }, { "epoch": 0.023405746320953047, "grad_norm": 0.6487427949905396, "learning_rate": 9.719999999999999e-05, "loss": 2.9416, "step": 167 }, { "epoch": 0.023545900490539593, "grad_norm": 1.6304970979690552, "learning_rate": 9.779999999999999e-05, "loss": 2.9264, "step": 168 }, { "epoch": 0.02368605466012614, "grad_norm": 1.029008150100708, "learning_rate": 9.839999999999999e-05, "loss": 2.9081, "step": 169 }, { "epoch": 0.023826208829712685, "grad_norm": 1.8654495477676392, "learning_rate": 9.9e-05, "loss": 2.9003, "step": 170 }, { "epoch": 0.023966362999299228, "grad_norm": 1.41410493850708, "learning_rate": 9.96e-05, "loss": 2.91, "step": 171 }, { "epoch": 0.024106517168885774, "grad_norm": 1.3176707029342651, "learning_rate": 0.0001002, "loss": 2.9348, "step": 172 }, { "epoch": 0.02424667133847232, "grad_norm": 1.9054174423217773, "learning_rate": 0.0001008, "loss": 2.9161, "step": 173 }, { "epoch": 0.024386825508058863, "grad_norm": 1.1568753719329834, "learning_rate": 0.0001014, "loss": 2.9115, "step": 174 }, { "epoch": 0.02452697967764541, "grad_norm": 3.0676801204681396, "learning_rate": 0.000102, "loss": 2.9599, "step": 175 }, { "epoch": 0.024667133847231956, "grad_norm": 0.8326209783554077, "learning_rate": 0.0001026, "loss": 2.8861, "step": 176 }, { "epoch": 0.0248072880168185, "grad_norm": 0.7381007075309753, "learning_rate": 0.00010319999999999999, "loss": 2.8958, "step": 177 }, { "epoch": 0.024947442186405044, "grad_norm": 0.5153982639312744, "learning_rate": 0.00010379999999999999, "loss": 2.9183, "step": 178 }, { "epoch": 0.02508759635599159, "grad_norm": 1.976210117340088, "learning_rate": 0.00010439999999999999, "loss": 2.8974, "step": 179 }, { "epoch": 0.025227750525578137, "grad_norm": 1.870713233947754, "learning_rate": 0.00010499999999999999, "loss": 2.9263, "step": 180 }, { "epoch": 0.02536790469516468, "grad_norm": 2.3896396160125732, "learning_rate": 0.00010559999999999998, "loss": 2.8912, "step": 181 }, { "epoch": 0.025508058864751226, "grad_norm": 1.2706975936889648, "learning_rate": 0.00010619999999999998, "loss": 2.9116, "step": 182 }, { "epoch": 0.025648213034337772, "grad_norm": 0.9402898550033569, "learning_rate": 0.00010679999999999998, "loss": 2.9286, "step": 183 }, { "epoch": 0.025788367203924318, "grad_norm": 0.7575398087501526, "learning_rate": 0.00010739999999999998, "loss": 2.9138, "step": 184 }, { "epoch": 0.02592852137351086, "grad_norm": 3.0395188331604004, "learning_rate": 0.00010799999999999998, "loss": 3.023, "step": 185 }, { "epoch": 0.026068675543097407, "grad_norm": 0.7392745018005371, "learning_rate": 0.00010859999999999998, "loss": 2.956, "step": 186 }, { "epoch": 0.026208829712683953, "grad_norm": 1.332564353942871, "learning_rate": 0.00010919999999999998, "loss": 2.9403, "step": 187 }, { "epoch": 0.0263489838822705, "grad_norm": 0.6688811182975769, "learning_rate": 0.00010979999999999999, "loss": 2.8767, "step": 188 }, { "epoch": 0.026489138051857042, "grad_norm": 0.882061243057251, "learning_rate": 0.00011039999999999999, "loss": 2.9085, "step": 189 }, { "epoch": 0.026629292221443588, "grad_norm": 0.7065404653549194, "learning_rate": 0.00011099999999999999, "loss": 2.9152, "step": 190 }, { "epoch": 0.026769446391030134, "grad_norm": 1.5446754693984985, "learning_rate": 0.00011159999999999999, "loss": 2.8788, "step": 191 }, { "epoch": 0.026909600560616677, "grad_norm": 0.5920804738998413, "learning_rate": 0.00011219999999999999, "loss": 2.9091, "step": 192 }, { "epoch": 0.027049754730203223, "grad_norm": 0.573976993560791, "learning_rate": 0.00011279999999999999, "loss": 2.9037, "step": 193 }, { "epoch": 0.02718990889978977, "grad_norm": 0.7928887009620667, "learning_rate": 0.00011339999999999999, "loss": 2.9208, "step": 194 }, { "epoch": 0.027330063069376315, "grad_norm": 1.1057881116867065, "learning_rate": 0.00011399999999999999, "loss": 2.855, "step": 195 }, { "epoch": 0.027470217238962858, "grad_norm": 1.963627815246582, "learning_rate": 0.0001146, "loss": 2.8812, "step": 196 }, { "epoch": 0.027610371408549404, "grad_norm": 1.3813062906265259, "learning_rate": 0.0001152, "loss": 2.8663, "step": 197 }, { "epoch": 0.02775052557813595, "grad_norm": 3.114259719848633, "learning_rate": 0.0001158, "loss": 2.9357, "step": 198 }, { "epoch": 0.027890679747722493, "grad_norm": 1.8745334148406982, "learning_rate": 0.0001164, "loss": 2.8679, "step": 199 }, { "epoch": 0.02803083391730904, "grad_norm": 4.405505180358887, "learning_rate": 0.000117, "loss": 3.0878, "step": 200 }, { "epoch": 0.028170988086895585, "grad_norm": 1.3865611553192139, "learning_rate": 0.0001176, "loss": 2.8596, "step": 201 }, { "epoch": 0.02831114225648213, "grad_norm": 0.9126668572425842, "learning_rate": 0.0001182, "loss": 2.9487, "step": 202 }, { "epoch": 0.028451296426068674, "grad_norm": 0.6223608255386353, "learning_rate": 0.0001188, "loss": 2.8767, "step": 203 }, { "epoch": 0.02859145059565522, "grad_norm": 2.065746545791626, "learning_rate": 0.0001194, "loss": 2.8937, "step": 204 }, { "epoch": 0.028731604765241767, "grad_norm": 1.4100656509399414, "learning_rate": 0.00011999999999999999, "loss": 2.9177, "step": 205 }, { "epoch": 0.028871758934828313, "grad_norm": 0.936177134513855, "learning_rate": 0.00012059999999999999, "loss": 2.8652, "step": 206 }, { "epoch": 0.029011913104414856, "grad_norm": 0.4871051609516144, "learning_rate": 0.00012119999999999999, "loss": 2.8553, "step": 207 }, { "epoch": 0.0291520672740014, "grad_norm": 3.69014573097229, "learning_rate": 0.00012179999999999999, "loss": 2.9089, "step": 208 }, { "epoch": 0.029292221443587948, "grad_norm": 0.4774470925331116, "learning_rate": 0.0001224, "loss": 2.8655, "step": 209 }, { "epoch": 0.02943237561317449, "grad_norm": 0.7280520796775818, "learning_rate": 0.00012299999999999998, "loss": 2.8594, "step": 210 }, { "epoch": 0.029572529782761037, "grad_norm": 1.935536503791809, "learning_rate": 0.0001236, "loss": 2.8728, "step": 211 }, { "epoch": 0.029712683952347583, "grad_norm": 0.8123719692230225, "learning_rate": 0.00012419999999999998, "loss": 2.9089, "step": 212 }, { "epoch": 0.02985283812193413, "grad_norm": 0.9120416641235352, "learning_rate": 0.00012479999999999997, "loss": 2.8723, "step": 213 }, { "epoch": 0.029992992291520672, "grad_norm": 0.31208303570747375, "learning_rate": 0.00012539999999999999, "loss": 2.8848, "step": 214 }, { "epoch": 0.030133146461107218, "grad_norm": 1.156059741973877, "learning_rate": 0.00012599999999999997, "loss": 2.862, "step": 215 }, { "epoch": 0.030273300630693764, "grad_norm": 0.5333488583564758, "learning_rate": 0.0001266, "loss": 2.8534, "step": 216 }, { "epoch": 0.030413454800280307, "grad_norm": 1.356495976448059, "learning_rate": 0.00012719999999999997, "loss": 2.8656, "step": 217 }, { "epoch": 0.030553608969866853, "grad_norm": 0.7143720388412476, "learning_rate": 0.0001278, "loss": 2.8471, "step": 218 }, { "epoch": 0.0306937631394534, "grad_norm": 1.2398115396499634, "learning_rate": 0.00012839999999999998, "loss": 2.8298, "step": 219 }, { "epoch": 0.030833917309039945, "grad_norm": 1.4916234016418457, "learning_rate": 0.000129, "loss": 2.8283, "step": 220 }, { "epoch": 0.030974071478626488, "grad_norm": 1.075265884399414, "learning_rate": 0.00012959999999999998, "loss": 2.8709, "step": 221 }, { "epoch": 0.031114225648213034, "grad_norm": 0.7169482707977295, "learning_rate": 0.0001302, "loss": 2.8371, "step": 222 }, { "epoch": 0.03125437981779958, "grad_norm": 1.6476532220840454, "learning_rate": 0.00013079999999999998, "loss": 2.8555, "step": 223 }, { "epoch": 0.031394533987386126, "grad_norm": 1.5766204595565796, "learning_rate": 0.0001314, "loss": 2.8857, "step": 224 }, { "epoch": 0.03153468815697267, "grad_norm": 1.301588535308838, "learning_rate": 0.00013199999999999998, "loss": 2.8482, "step": 225 }, { "epoch": 0.03167484232655921, "grad_norm": 0.4020247459411621, "learning_rate": 0.0001326, "loss": 2.8864, "step": 226 }, { "epoch": 0.03181499649614576, "grad_norm": 1.7609339952468872, "learning_rate": 0.00013319999999999999, "loss": 2.8646, "step": 227 }, { "epoch": 0.031955150665732304, "grad_norm": 0.6501938104629517, "learning_rate": 0.0001338, "loss": 2.8688, "step": 228 }, { "epoch": 0.032095304835318854, "grad_norm": 0.5484358072280884, "learning_rate": 0.0001344, "loss": 2.8752, "step": 229 }, { "epoch": 0.0322354590049054, "grad_norm": 0.7517778873443604, "learning_rate": 0.000135, "loss": 2.8287, "step": 230 }, { "epoch": 0.03237561317449194, "grad_norm": 0.590181827545166, "learning_rate": 0.0001356, "loss": 2.8778, "step": 231 }, { "epoch": 0.03251576734407849, "grad_norm": 1.5680372714996338, "learning_rate": 0.0001362, "loss": 2.8207, "step": 232 }, { "epoch": 0.03265592151366503, "grad_norm": 1.1434855461120605, "learning_rate": 0.0001368, "loss": 2.8577, "step": 233 }, { "epoch": 0.032796075683251574, "grad_norm": 1.7322049140930176, "learning_rate": 0.0001374, "loss": 2.8589, "step": 234 }, { "epoch": 0.032936229852838124, "grad_norm": 0.3845532238483429, "learning_rate": 0.000138, "loss": 2.8265, "step": 235 }, { "epoch": 0.03307638402242467, "grad_norm": 1.3557482957839966, "learning_rate": 0.0001386, "loss": 2.8262, "step": 236 }, { "epoch": 0.03321653819201121, "grad_norm": 1.2092598676681519, "learning_rate": 0.0001392, "loss": 2.8347, "step": 237 }, { "epoch": 0.03335669236159776, "grad_norm": 3.0183780193328857, "learning_rate": 0.00013979999999999998, "loss": 2.8297, "step": 238 }, { "epoch": 0.0334968465311843, "grad_norm": 1.6880500316619873, "learning_rate": 0.0001404, "loss": 2.9211, "step": 239 }, { "epoch": 0.03363700070077085, "grad_norm": 0.8898597359657288, "learning_rate": 0.00014099999999999998, "loss": 2.8295, "step": 240 }, { "epoch": 0.033777154870357394, "grad_norm": 0.5321073532104492, "learning_rate": 0.00014159999999999997, "loss": 2.8406, "step": 241 }, { "epoch": 0.03391730903994394, "grad_norm": 0.7906889915466309, "learning_rate": 0.0001422, "loss": 2.8915, "step": 242 }, { "epoch": 0.034057463209530486, "grad_norm": 0.5971015095710754, "learning_rate": 0.00014279999999999997, "loss": 2.8852, "step": 243 }, { "epoch": 0.03419761737911703, "grad_norm": 2.295685291290283, "learning_rate": 0.0001434, "loss": 2.8779, "step": 244 }, { "epoch": 0.03433777154870357, "grad_norm": 0.9585694670677185, "learning_rate": 0.00014399999999999998, "loss": 2.9366, "step": 245 }, { "epoch": 0.03447792571829012, "grad_norm": 1.4037121534347534, "learning_rate": 0.0001446, "loss": 2.8852, "step": 246 }, { "epoch": 0.034618079887876664, "grad_norm": 0.8975903987884521, "learning_rate": 0.00014519999999999998, "loss": 2.8585, "step": 247 }, { "epoch": 0.03475823405746321, "grad_norm": 2.904129981994629, "learning_rate": 0.0001458, "loss": 2.9175, "step": 248 }, { "epoch": 0.034898388227049756, "grad_norm": 0.9668857455253601, "learning_rate": 0.00014639999999999998, "loss": 2.9313, "step": 249 }, { "epoch": 0.0350385423966363, "grad_norm": 0.9312328100204468, "learning_rate": 0.000147, "loss": 3.0427, "step": 250 }, { "epoch": 0.03517869656622284, "grad_norm": 0.9613952040672302, "learning_rate": 0.00014759999999999998, "loss": 2.888, "step": 251 }, { "epoch": 0.03531885073580939, "grad_norm": 0.5064305067062378, "learning_rate": 0.0001482, "loss": 2.882, "step": 252 }, { "epoch": 0.035459004905395934, "grad_norm": 0.9140638113021851, "learning_rate": 0.00014879999999999998, "loss": 2.8515, "step": 253 }, { "epoch": 0.035599159074982484, "grad_norm": 0.742790162563324, "learning_rate": 0.0001494, "loss": 2.8343, "step": 254 }, { "epoch": 0.035739313244569026, "grad_norm": 0.44202330708503723, "learning_rate": 0.00015, "loss": 2.8345, "step": 255 }, { "epoch": 0.03587946741415557, "grad_norm": 0.3355579674243927, "learning_rate": 0.00015059999999999997, "loss": 2.8469, "step": 256 }, { "epoch": 0.03601962158374212, "grad_norm": 1.9614343643188477, "learning_rate": 0.0001512, "loss": 2.841, "step": 257 }, { "epoch": 0.03615977575332866, "grad_norm": 2.2115304470062256, "learning_rate": 0.00015179999999999998, "loss": 2.852, "step": 258 }, { "epoch": 0.036299929922915204, "grad_norm": 1.1922167539596558, "learning_rate": 0.0001524, "loss": 2.864, "step": 259 }, { "epoch": 0.036440084092501754, "grad_norm": 0.9516031742095947, "learning_rate": 0.00015299999999999998, "loss": 2.8438, "step": 260 }, { "epoch": 0.0365802382620883, "grad_norm": 0.5913712978363037, "learning_rate": 0.0001536, "loss": 2.8534, "step": 261 }, { "epoch": 0.03672039243167484, "grad_norm": 1.1199309825897217, "learning_rate": 0.00015419999999999998, "loss": 2.8423, "step": 262 }, { "epoch": 0.03686054660126139, "grad_norm": 0.5776010751724243, "learning_rate": 0.0001548, "loss": 2.8878, "step": 263 }, { "epoch": 0.03700070077084793, "grad_norm": 0.5159766674041748, "learning_rate": 0.00015539999999999998, "loss": 2.8519, "step": 264 }, { "epoch": 0.03714085494043448, "grad_norm": 0.5820059180259705, "learning_rate": 0.000156, "loss": 2.8866, "step": 265 }, { "epoch": 0.037281009110021024, "grad_norm": 1.8129342794418335, "learning_rate": 0.00015659999999999998, "loss": 2.8437, "step": 266 }, { "epoch": 0.03742116327960757, "grad_norm": 0.5767428278923035, "learning_rate": 0.0001572, "loss": 2.8326, "step": 267 }, { "epoch": 0.037561317449194116, "grad_norm": 0.392638623714447, "learning_rate": 0.0001578, "loss": 2.8714, "step": 268 }, { "epoch": 0.03770147161878066, "grad_norm": 0.7401202917098999, "learning_rate": 0.0001584, "loss": 2.8517, "step": 269 }, { "epoch": 0.0378416257883672, "grad_norm": 0.4700906574726105, "learning_rate": 0.000159, "loss": 2.8904, "step": 270 }, { "epoch": 0.03798177995795375, "grad_norm": 2.120917320251465, "learning_rate": 0.0001596, "loss": 2.8843, "step": 271 }, { "epoch": 0.038121934127540294, "grad_norm": 0.46234700083732605, "learning_rate": 0.0001602, "loss": 2.815, "step": 272 }, { "epoch": 0.03826208829712684, "grad_norm": 1.6301603317260742, "learning_rate": 0.0001608, "loss": 2.8317, "step": 273 }, { "epoch": 0.038402242466713386, "grad_norm": 0.7664803862571716, "learning_rate": 0.0001614, "loss": 2.8285, "step": 274 }, { "epoch": 0.03854239663629993, "grad_norm": 0.5692226886749268, "learning_rate": 0.000162, "loss": 2.8247, "step": 275 }, { "epoch": 0.03868255080588647, "grad_norm": 1.199317216873169, "learning_rate": 0.0001626, "loss": 2.8341, "step": 276 }, { "epoch": 0.03882270497547302, "grad_norm": 1.7699816226959229, "learning_rate": 0.0001632, "loss": 2.888, "step": 277 }, { "epoch": 0.038962859145059564, "grad_norm": 1.5419954061508179, "learning_rate": 0.0001638, "loss": 2.8286, "step": 278 }, { "epoch": 0.039103013314646114, "grad_norm": 2.050661087036133, "learning_rate": 0.0001644, "loss": 2.8947, "step": 279 }, { "epoch": 0.039243167484232656, "grad_norm": 0.802770733833313, "learning_rate": 0.000165, "loss": 2.8268, "step": 280 }, { "epoch": 0.0393833216538192, "grad_norm": 1.0551692247390747, "learning_rate": 0.0001656, "loss": 2.8512, "step": 281 }, { "epoch": 0.03952347582340575, "grad_norm": 0.5748604536056519, "learning_rate": 0.0001662, "loss": 2.8443, "step": 282 }, { "epoch": 0.03966362999299229, "grad_norm": 1.8012722730636597, "learning_rate": 0.0001668, "loss": 2.8339, "step": 283 }, { "epoch": 0.039803784162578834, "grad_norm": 2.066579818725586, "learning_rate": 0.0001674, "loss": 2.8402, "step": 284 }, { "epoch": 0.039943938332165384, "grad_norm": 0.491862416267395, "learning_rate": 0.000168, "loss": 2.8373, "step": 285 }, { "epoch": 0.040084092501751926, "grad_norm": 2.1423556804656982, "learning_rate": 0.0001686, "loss": 2.8744, "step": 286 }, { "epoch": 0.04022424667133847, "grad_norm": 18.57097053527832, "learning_rate": 0.00016919999999999997, "loss": 3.6457, "step": 287 }, { "epoch": 0.04036440084092502, "grad_norm": 1.6656326055526733, "learning_rate": 0.00016979999999999998, "loss": 2.8279, "step": 288 }, { "epoch": 0.04050455501051156, "grad_norm": 2.3413543701171875, "learning_rate": 0.00017039999999999997, "loss": 2.8795, "step": 289 }, { "epoch": 0.04064470918009811, "grad_norm": 2.042393207550049, "learning_rate": 0.00017099999999999998, "loss": 2.8585, "step": 290 }, { "epoch": 0.040784863349684654, "grad_norm": 0.9782095551490784, "learning_rate": 0.00017159999999999997, "loss": 2.8575, "step": 291 }, { "epoch": 0.040925017519271197, "grad_norm": 0.4077852666378021, "learning_rate": 0.00017219999999999998, "loss": 2.8529, "step": 292 }, { "epoch": 0.041065171688857746, "grad_norm": 1.9978406429290771, "learning_rate": 0.00017279999999999997, "loss": 2.8674, "step": 293 }, { "epoch": 0.04120532585844429, "grad_norm": 1.3952635526657104, "learning_rate": 0.00017339999999999996, "loss": 2.8605, "step": 294 }, { "epoch": 0.04134548002803083, "grad_norm": 0.8728106021881104, "learning_rate": 0.00017399999999999997, "loss": 2.8661, "step": 295 }, { "epoch": 0.04148563419761738, "grad_norm": 0.7876748442649841, "learning_rate": 0.00017459999999999996, "loss": 2.818, "step": 296 }, { "epoch": 0.041625788367203924, "grad_norm": 1.246867299079895, "learning_rate": 0.00017519999999999998, "loss": 2.8833, "step": 297 }, { "epoch": 0.04176594253679047, "grad_norm": 0.8078440427780151, "learning_rate": 0.00017579999999999996, "loss": 2.9058, "step": 298 }, { "epoch": 0.041906096706377016, "grad_norm": 1.0067757368087769, "learning_rate": 0.00017639999999999998, "loss": 2.7852, "step": 299 }, { "epoch": 0.04204625087596356, "grad_norm": 1.1474734544754028, "learning_rate": 0.00017699999999999997, "loss": 2.9193, "step": 300 }, { "epoch": 0.04218640504555011, "grad_norm": 2.7537102699279785, "learning_rate": 0.00017759999999999998, "loss": 2.8676, "step": 301 }, { "epoch": 0.04232655921513665, "grad_norm": 0.8940155506134033, "learning_rate": 0.00017819999999999997, "loss": 2.872, "step": 302 }, { "epoch": 0.042466713384723194, "grad_norm": 0.7787176966667175, "learning_rate": 0.00017879999999999998, "loss": 2.8232, "step": 303 }, { "epoch": 0.042606867554309744, "grad_norm": 1.5186501741409302, "learning_rate": 0.00017939999999999997, "loss": 2.8408, "step": 304 }, { "epoch": 0.042747021723896286, "grad_norm": 1.1387662887573242, "learning_rate": 0.00017999999999999998, "loss": 2.8433, "step": 305 }, { "epoch": 0.04288717589348283, "grad_norm": 2.0307185649871826, "learning_rate": 0.00018059999999999997, "loss": 2.9168, "step": 306 }, { "epoch": 0.04302733006306938, "grad_norm": 0.6528588533401489, "learning_rate": 0.00018119999999999999, "loss": 2.8807, "step": 307 }, { "epoch": 0.04316748423265592, "grad_norm": 0.45445582270622253, "learning_rate": 0.00018179999999999997, "loss": 2.8789, "step": 308 }, { "epoch": 0.043307638402242464, "grad_norm": 0.5610876083374023, "learning_rate": 0.0001824, "loss": 2.828, "step": 309 }, { "epoch": 0.043447792571829014, "grad_norm": 2.383838653564453, "learning_rate": 0.00018299999999999998, "loss": 2.8649, "step": 310 }, { "epoch": 0.043587946741415556, "grad_norm": 1.3355058431625366, "learning_rate": 0.0001836, "loss": 2.8304, "step": 311 }, { "epoch": 0.0437281009110021, "grad_norm": 0.5329657196998596, "learning_rate": 0.00018419999999999998, "loss": 2.8369, "step": 312 }, { "epoch": 0.04386825508058865, "grad_norm": 0.422691285610199, "learning_rate": 0.0001848, "loss": 2.8273, "step": 313 }, { "epoch": 0.04400840925017519, "grad_norm": 0.4409281313419342, "learning_rate": 0.00018539999999999998, "loss": 2.8523, "step": 314 }, { "epoch": 0.04414856341976174, "grad_norm": 2.9379351139068604, "learning_rate": 0.000186, "loss": 2.8821, "step": 315 }, { "epoch": 0.044288717589348284, "grad_norm": 0.3748665153980255, "learning_rate": 0.00018659999999999998, "loss": 2.8506, "step": 316 }, { "epoch": 0.044428871758934826, "grad_norm": 0.529017448425293, "learning_rate": 0.0001872, "loss": 2.852, "step": 317 }, { "epoch": 0.044569025928521376, "grad_norm": 1.6215025186538696, "learning_rate": 0.00018779999999999998, "loss": 2.8704, "step": 318 }, { "epoch": 0.04470918009810792, "grad_norm": 1.9423863887786865, "learning_rate": 0.00018839999999999997, "loss": 2.8542, "step": 319 }, { "epoch": 0.04484933426769446, "grad_norm": 0.6593060493469238, "learning_rate": 0.00018899999999999999, "loss": 2.8426, "step": 320 }, { "epoch": 0.04498948843728101, "grad_norm": 0.34480753540992737, "learning_rate": 0.00018959999999999997, "loss": 2.8502, "step": 321 }, { "epoch": 0.045129642606867554, "grad_norm": 1.324047327041626, "learning_rate": 0.0001902, "loss": 2.8123, "step": 322 }, { "epoch": 0.045269796776454096, "grad_norm": 0.7530344724655151, "learning_rate": 0.00019079999999999998, "loss": 2.8417, "step": 323 }, { "epoch": 0.045409950946040646, "grad_norm": 0.4720246195793152, "learning_rate": 0.0001914, "loss": 2.841, "step": 324 }, { "epoch": 0.04555010511562719, "grad_norm": 1.2542595863342285, "learning_rate": 0.00019199999999999998, "loss": 2.8635, "step": 325 }, { "epoch": 0.04569025928521374, "grad_norm": 1.5343085527420044, "learning_rate": 0.0001926, "loss": 2.8487, "step": 326 }, { "epoch": 0.04583041345480028, "grad_norm": 0.613078236579895, "learning_rate": 0.00019319999999999998, "loss": 2.8425, "step": 327 }, { "epoch": 0.045970567624386824, "grad_norm": 0.5128901600837708, "learning_rate": 0.0001938, "loss": 2.8412, "step": 328 }, { "epoch": 0.046110721793973374, "grad_norm": 1.5029518604278564, "learning_rate": 0.00019439999999999998, "loss": 2.8883, "step": 329 }, { "epoch": 0.046250875963559916, "grad_norm": 1.5685569047927856, "learning_rate": 0.000195, "loss": 2.8264, "step": 330 }, { "epoch": 0.04639103013314646, "grad_norm": 0.4594239890575409, "learning_rate": 0.00019559999999999998, "loss": 2.8357, "step": 331 }, { "epoch": 0.04653118430273301, "grad_norm": 1.7477678060531616, "learning_rate": 0.0001962, "loss": 2.8293, "step": 332 }, { "epoch": 0.04667133847231955, "grad_norm": 1.4861485958099365, "learning_rate": 0.00019679999999999999, "loss": 2.8524, "step": 333 }, { "epoch": 0.046811492641906094, "grad_norm": 0.48972010612487793, "learning_rate": 0.0001974, "loss": 2.8838, "step": 334 }, { "epoch": 0.046951646811492644, "grad_norm": 0.6660546660423279, "learning_rate": 0.000198, "loss": 2.8777, "step": 335 }, { "epoch": 0.047091800981079186, "grad_norm": 2.614840030670166, "learning_rate": 0.0001986, "loss": 2.8734, "step": 336 }, { "epoch": 0.04723195515066573, "grad_norm": 2.2148666381835938, "learning_rate": 0.0001992, "loss": 2.8422, "step": 337 }, { "epoch": 0.04737210932025228, "grad_norm": 1.6337977647781372, "learning_rate": 0.0001998, "loss": 2.8671, "step": 338 }, { "epoch": 0.04751226348983882, "grad_norm": 2.3236374855041504, "learning_rate": 0.0002004, "loss": 2.8773, "step": 339 }, { "epoch": 0.04765241765942537, "grad_norm": 0.4809541702270508, "learning_rate": 0.000201, "loss": 2.8244, "step": 340 }, { "epoch": 0.047792571829011914, "grad_norm": 1.523079752922058, "learning_rate": 0.0002016, "loss": 2.8679, "step": 341 }, { "epoch": 0.047932725998598456, "grad_norm": 1.1739789247512817, "learning_rate": 0.0002022, "loss": 2.8072, "step": 342 }, { "epoch": 0.048072880168185006, "grad_norm": 0.6344507932662964, "learning_rate": 0.0002028, "loss": 2.8647, "step": 343 }, { "epoch": 0.04821303433777155, "grad_norm": 1.3769617080688477, "learning_rate": 0.00020339999999999998, "loss": 2.8539, "step": 344 }, { "epoch": 0.04835318850735809, "grad_norm": 0.4988667964935303, "learning_rate": 0.000204, "loss": 2.8529, "step": 345 }, { "epoch": 0.04849334267694464, "grad_norm": 0.8496665358543396, "learning_rate": 0.00020459999999999999, "loss": 2.8972, "step": 346 }, { "epoch": 0.048633496846531184, "grad_norm": 1.0026979446411133, "learning_rate": 0.0002052, "loss": 2.8639, "step": 347 }, { "epoch": 0.048773651016117726, "grad_norm": 1.7278631925582886, "learning_rate": 0.0002058, "loss": 2.8485, "step": 348 }, { "epoch": 0.048913805185704276, "grad_norm": 0.6872090101242065, "learning_rate": 0.00020639999999999998, "loss": 2.899, "step": 349 }, { "epoch": 0.04905395935529082, "grad_norm": 1.0069712400436401, "learning_rate": 0.00020699999999999996, "loss": 2.8375, "step": 350 }, { "epoch": 0.04919411352487737, "grad_norm": 0.4872021973133087, "learning_rate": 0.00020759999999999998, "loss": 2.8516, "step": 351 }, { "epoch": 0.04933426769446391, "grad_norm": 1.8567527532577515, "learning_rate": 0.00020819999999999996, "loss": 2.8876, "step": 352 }, { "epoch": 0.049474421864050454, "grad_norm": 1.2505429983139038, "learning_rate": 0.00020879999999999998, "loss": 2.8486, "step": 353 }, { "epoch": 0.049614576033637, "grad_norm": 0.7958128452301025, "learning_rate": 0.00020939999999999997, "loss": 2.8298, "step": 354 }, { "epoch": 0.049754730203223546, "grad_norm": 0.5082005858421326, "learning_rate": 0.00020999999999999998, "loss": 2.8309, "step": 355 }, { "epoch": 0.04989488437281009, "grad_norm": 1.5224041938781738, "learning_rate": 0.00021059999999999997, "loss": 2.8561, "step": 356 }, { "epoch": 0.05003503854239664, "grad_norm": 0.5568313598632812, "learning_rate": 0.00021119999999999996, "loss": 2.8423, "step": 357 }, { "epoch": 0.05017519271198318, "grad_norm": 1.4406063556671143, "learning_rate": 0.00021179999999999997, "loss": 2.7996, "step": 358 }, { "epoch": 0.050315346881569724, "grad_norm": 2.8648271560668945, "learning_rate": 0.00021239999999999996, "loss": 2.8842, "step": 359 }, { "epoch": 0.050455501051156273, "grad_norm": 1.8214137554168701, "learning_rate": 0.00021299999999999997, "loss": 2.8375, "step": 360 }, { "epoch": 0.050595655220742816, "grad_norm": 1.0080620050430298, "learning_rate": 0.00021359999999999996, "loss": 2.8281, "step": 361 }, { "epoch": 0.05073580939032936, "grad_norm": 0.5720990896224976, "learning_rate": 0.00021419999999999998, "loss": 2.827, "step": 362 }, { "epoch": 0.05087596355991591, "grad_norm": 1.4787616729736328, "learning_rate": 0.00021479999999999996, "loss": 2.897, "step": 363 }, { "epoch": 0.05101611772950245, "grad_norm": 0.944041907787323, "learning_rate": 0.00021539999999999998, "loss": 2.8256, "step": 364 }, { "epoch": 0.051156271899089, "grad_norm": 0.9127120971679688, "learning_rate": 0.00021599999999999996, "loss": 2.8517, "step": 365 }, { "epoch": 0.051296426068675544, "grad_norm": 0.46851101517677307, "learning_rate": 0.00021659999999999998, "loss": 2.8267, "step": 366 }, { "epoch": 0.051436580238262086, "grad_norm": 0.5274301171302795, "learning_rate": 0.00021719999999999997, "loss": 2.8684, "step": 367 }, { "epoch": 0.051576734407848636, "grad_norm": 0.6189049482345581, "learning_rate": 0.00021779999999999998, "loss": 2.8831, "step": 368 }, { "epoch": 0.05171688857743518, "grad_norm": 1.1459282636642456, "learning_rate": 0.00021839999999999997, "loss": 2.8211, "step": 369 }, { "epoch": 0.05185704274702172, "grad_norm": 1.3347804546356201, "learning_rate": 0.00021899999999999998, "loss": 2.8871, "step": 370 }, { "epoch": 0.05199719691660827, "grad_norm": 0.47682449221611023, "learning_rate": 0.00021959999999999997, "loss": 2.8804, "step": 371 }, { "epoch": 0.052137351086194814, "grad_norm": 2.0062739849090576, "learning_rate": 0.00022019999999999999, "loss": 2.8677, "step": 372 }, { "epoch": 0.052277505255781356, "grad_norm": 0.9368986487388611, "learning_rate": 0.00022079999999999997, "loss": 2.8389, "step": 373 }, { "epoch": 0.052417659425367906, "grad_norm": 0.8859605193138123, "learning_rate": 0.0002214, "loss": 2.8442, "step": 374 }, { "epoch": 0.05255781359495445, "grad_norm": 0.8973392844200134, "learning_rate": 0.00022199999999999998, "loss": 2.8508, "step": 375 }, { "epoch": 0.052697967764541, "grad_norm": 0.552753210067749, "learning_rate": 0.0002226, "loss": 2.8554, "step": 376 }, { "epoch": 0.05283812193412754, "grad_norm": 1.781485915184021, "learning_rate": 0.00022319999999999998, "loss": 2.8305, "step": 377 }, { "epoch": 0.052978276103714084, "grad_norm": 1.4769580364227295, "learning_rate": 0.0002238, "loss": 2.8233, "step": 378 }, { "epoch": 0.05311843027330063, "grad_norm": 0.480735182762146, "learning_rate": 0.00022439999999999998, "loss": 2.7994, "step": 379 }, { "epoch": 0.053258584442887176, "grad_norm": 0.44265174865722656, "learning_rate": 0.000225, "loss": 2.8467, "step": 380 }, { "epoch": 0.05339873861247372, "grad_norm": 2.6101057529449463, "learning_rate": 0.00022559999999999998, "loss": 2.8585, "step": 381 }, { "epoch": 0.05353889278206027, "grad_norm": 0.7626937031745911, "learning_rate": 0.00022619999999999997, "loss": 2.8747, "step": 382 }, { "epoch": 0.05367904695164681, "grad_norm": 2.9585444927215576, "learning_rate": 0.00022679999999999998, "loss": 2.8791, "step": 383 }, { "epoch": 0.053819201121233354, "grad_norm": 0.573103666305542, "learning_rate": 0.00022739999999999997, "loss": 2.8262, "step": 384 }, { "epoch": 0.0539593552908199, "grad_norm": 1.5471686124801636, "learning_rate": 0.00022799999999999999, "loss": 2.8254, "step": 385 }, { "epoch": 0.054099509460406446, "grad_norm": 0.5622177124023438, "learning_rate": 0.00022859999999999997, "loss": 2.8457, "step": 386 }, { "epoch": 0.054239663629992996, "grad_norm": 0.8726809024810791, "learning_rate": 0.0002292, "loss": 2.823, "step": 387 }, { "epoch": 0.05437981779957954, "grad_norm": 0.6712299585342407, "learning_rate": 0.00022979999999999997, "loss": 2.8658, "step": 388 }, { "epoch": 0.05451997196916608, "grad_norm": 1.3056963682174683, "learning_rate": 0.0002304, "loss": 2.8463, "step": 389 }, { "epoch": 0.05466012613875263, "grad_norm": 2.0756328105926514, "learning_rate": 0.00023099999999999998, "loss": 2.8616, "step": 390 }, { "epoch": 0.05480028030833917, "grad_norm": 0.638837993144989, "learning_rate": 0.0002316, "loss": 2.8496, "step": 391 }, { "epoch": 0.054940434477925716, "grad_norm": 0.5293677449226379, "learning_rate": 0.00023219999999999998, "loss": 2.8431, "step": 392 }, { "epoch": 0.055080588647512266, "grad_norm": 1.3139581680297852, "learning_rate": 0.0002328, "loss": 2.9194, "step": 393 }, { "epoch": 0.05522074281709881, "grad_norm": 1.280122995376587, "learning_rate": 0.00023339999999999998, "loss": 2.8804, "step": 394 }, { "epoch": 0.05536089698668535, "grad_norm": 1.2172834873199463, "learning_rate": 0.000234, "loss": 2.8266, "step": 395 }, { "epoch": 0.0555010511562719, "grad_norm": 1.5580065250396729, "learning_rate": 0.00023459999999999998, "loss": 2.9107, "step": 396 }, { "epoch": 0.055641205325858444, "grad_norm": 0.5468613505363464, "learning_rate": 0.0002352, "loss": 2.8887, "step": 397 }, { "epoch": 0.055781359495444986, "grad_norm": 1.5642807483673096, "learning_rate": 0.00023579999999999999, "loss": 2.8515, "step": 398 }, { "epoch": 0.055921513665031536, "grad_norm": 1.492555022239685, "learning_rate": 0.0002364, "loss": 2.8246, "step": 399 }, { "epoch": 0.05606166783461808, "grad_norm": 2.943617343902588, "learning_rate": 0.000237, "loss": 2.8775, "step": 400 }, { "epoch": 0.05620182200420463, "grad_norm": 0.8271451592445374, "learning_rate": 0.0002376, "loss": 2.8883, "step": 401 }, { "epoch": 0.05634197617379117, "grad_norm": 1.8347681760787964, "learning_rate": 0.0002382, "loss": 2.8397, "step": 402 }, { "epoch": 0.056482130343377714, "grad_norm": 1.0476518869400024, "learning_rate": 0.0002388, "loss": 2.854, "step": 403 }, { "epoch": 0.05662228451296426, "grad_norm": 0.5847808122634888, "learning_rate": 0.0002394, "loss": 2.8279, "step": 404 }, { "epoch": 0.056762438682550806, "grad_norm": 1.2081626653671265, "learning_rate": 0.00023999999999999998, "loss": 2.8142, "step": 405 }, { "epoch": 0.05690259285213735, "grad_norm": 1.6852529048919678, "learning_rate": 0.0002406, "loss": 2.8055, "step": 406 }, { "epoch": 0.0570427470217239, "grad_norm": 1.3509167432785034, "learning_rate": 0.00024119999999999998, "loss": 2.8701, "step": 407 }, { "epoch": 0.05718290119131044, "grad_norm": 2.65307354927063, "learning_rate": 0.0002418, "loss": 2.8621, "step": 408 }, { "epoch": 0.057323055360896984, "grad_norm": 0.5879318118095398, "learning_rate": 0.00024239999999999998, "loss": 2.8647, "step": 409 }, { "epoch": 0.05746320953048353, "grad_norm": 2.9156978130340576, "learning_rate": 0.000243, "loss": 2.8549, "step": 410 }, { "epoch": 0.057603363700070076, "grad_norm": 1.1443407535552979, "learning_rate": 0.00024359999999999999, "loss": 2.8621, "step": 411 }, { "epoch": 0.057743517869656626, "grad_norm": 1.2485133409500122, "learning_rate": 0.00024419999999999997, "loss": 2.8343, "step": 412 }, { "epoch": 0.05788367203924317, "grad_norm": 1.3243160247802734, "learning_rate": 0.0002448, "loss": 2.8165, "step": 413 }, { "epoch": 0.05802382620882971, "grad_norm": 0.671999990940094, "learning_rate": 0.00024539999999999995, "loss": 2.8125, "step": 414 }, { "epoch": 0.05816398037841626, "grad_norm": 2.0717403888702393, "learning_rate": 0.00024599999999999996, "loss": 2.87, "step": 415 }, { "epoch": 0.0583041345480028, "grad_norm": 1.918505072593689, "learning_rate": 0.0002466, "loss": 2.8406, "step": 416 }, { "epoch": 0.058444288717589346, "grad_norm": 0.8821729421615601, "learning_rate": 0.0002472, "loss": 2.8408, "step": 417 }, { "epoch": 0.058584442887175896, "grad_norm": 1.0364701747894287, "learning_rate": 0.00024779999999999995, "loss": 2.8325, "step": 418 }, { "epoch": 0.05872459705676244, "grad_norm": 1.1036708354949951, "learning_rate": 0.00024839999999999997, "loss": 2.8547, "step": 419 }, { "epoch": 0.05886475122634898, "grad_norm": 0.47669097781181335, "learning_rate": 0.000249, "loss": 2.7921, "step": 420 }, { "epoch": 0.05900490539593553, "grad_norm": 0.5236237645149231, "learning_rate": 0.00024959999999999994, "loss": 2.8615, "step": 421 }, { "epoch": 0.05914505956552207, "grad_norm": 0.7237966060638428, "learning_rate": 0.00025019999999999996, "loss": 2.8656, "step": 422 }, { "epoch": 0.059285213735108616, "grad_norm": 0.8609464168548584, "learning_rate": 0.00025079999999999997, "loss": 2.8718, "step": 423 }, { "epoch": 0.059425367904695166, "grad_norm": 0.683222234249115, "learning_rate": 0.0002514, "loss": 2.8529, "step": 424 }, { "epoch": 0.05956552207428171, "grad_norm": 1.165490746498108, "learning_rate": 0.00025199999999999995, "loss": 2.8316, "step": 425 }, { "epoch": 0.05970567624386826, "grad_norm": 0.49919211864471436, "learning_rate": 0.00025259999999999996, "loss": 2.8195, "step": 426 }, { "epoch": 0.0598458304134548, "grad_norm": 0.2870229184627533, "learning_rate": 0.0002532, "loss": 2.8689, "step": 427 }, { "epoch": 0.059985984583041344, "grad_norm": 0.8433550596237183, "learning_rate": 0.0002538, "loss": 2.8503, "step": 428 }, { "epoch": 0.06012613875262789, "grad_norm": 2.0954675674438477, "learning_rate": 0.00025439999999999995, "loss": 2.8753, "step": 429 }, { "epoch": 0.060266292922214436, "grad_norm": 1.3331342935562134, "learning_rate": 0.00025499999999999996, "loss": 2.8756, "step": 430 }, { "epoch": 0.06040644709180098, "grad_norm": 0.755756139755249, "learning_rate": 0.0002556, "loss": 2.8152, "step": 431 }, { "epoch": 0.06054660126138753, "grad_norm": 0.48160722851753235, "learning_rate": 0.0002562, "loss": 2.8827, "step": 432 }, { "epoch": 0.06068675543097407, "grad_norm": 1.1558563709259033, "learning_rate": 0.00025679999999999995, "loss": 2.842, "step": 433 }, { "epoch": 0.060826909600560614, "grad_norm": 0.4860924482345581, "learning_rate": 0.00025739999999999997, "loss": 2.8391, "step": 434 }, { "epoch": 0.06096706377014716, "grad_norm": 2.3018696308135986, "learning_rate": 0.000258, "loss": 2.8771, "step": 435 }, { "epoch": 0.061107217939733706, "grad_norm": 0.5955384373664856, "learning_rate": 0.0002586, "loss": 2.8905, "step": 436 }, { "epoch": 0.061247372109320256, "grad_norm": 1.4681602716445923, "learning_rate": 0.00025919999999999996, "loss": 2.8492, "step": 437 }, { "epoch": 0.0613875262789068, "grad_norm": 1.8079183101654053, "learning_rate": 0.00025979999999999997, "loss": 2.8385, "step": 438 }, { "epoch": 0.06152768044849334, "grad_norm": 0.6035788655281067, "learning_rate": 0.0002604, "loss": 2.8479, "step": 439 }, { "epoch": 0.06166783461807989, "grad_norm": 0.7603915333747864, "learning_rate": 0.000261, "loss": 2.8522, "step": 440 }, { "epoch": 0.06180798878766643, "grad_norm": 1.7382203340530396, "learning_rate": 0.00026159999999999996, "loss": 2.8522, "step": 441 }, { "epoch": 0.061948142957252976, "grad_norm": 0.7512733340263367, "learning_rate": 0.0002622, "loss": 2.8483, "step": 442 }, { "epoch": 0.062088297126839526, "grad_norm": 2.5912866592407227, "learning_rate": 0.0002628, "loss": 2.8408, "step": 443 }, { "epoch": 0.06222845129642607, "grad_norm": 0.9884958267211914, "learning_rate": 0.00026339999999999995, "loss": 2.8384, "step": 444 }, { "epoch": 0.06236860546601261, "grad_norm": 2.7016091346740723, "learning_rate": 0.00026399999999999997, "loss": 2.9177, "step": 445 }, { "epoch": 0.06250875963559915, "grad_norm": 1.9022507667541504, "learning_rate": 0.0002646, "loss": 2.9078, "step": 446 }, { "epoch": 0.06264891380518571, "grad_norm": 0.9832592010498047, "learning_rate": 0.0002652, "loss": 2.8912, "step": 447 }, { "epoch": 0.06278906797477225, "grad_norm": 0.9287002682685852, "learning_rate": 0.00026579999999999996, "loss": 2.8694, "step": 448 }, { "epoch": 0.0629292221443588, "grad_norm": 1.1300129890441895, "learning_rate": 0.00026639999999999997, "loss": 2.9087, "step": 449 }, { "epoch": 0.06306937631394534, "grad_norm": 2.3987514972686768, "learning_rate": 0.000267, "loss": 2.9048, "step": 450 }, { "epoch": 0.06320953048353188, "grad_norm": 1.8747663497924805, "learning_rate": 0.0002676, "loss": 2.8403, "step": 451 }, { "epoch": 0.06334968465311842, "grad_norm": 1.1864196062088013, "learning_rate": 0.00026819999999999996, "loss": 2.8734, "step": 452 }, { "epoch": 0.06348983882270498, "grad_norm": 0.40319573879241943, "learning_rate": 0.0002688, "loss": 2.8677, "step": 453 }, { "epoch": 0.06362999299229152, "grad_norm": 3.450862169265747, "learning_rate": 0.0002694, "loss": 2.8852, "step": 454 }, { "epoch": 0.06377014716187807, "grad_norm": 0.3499014675617218, "learning_rate": 0.00027, "loss": 2.8179, "step": 455 }, { "epoch": 0.06391030133146461, "grad_norm": 2.264272689819336, "learning_rate": 0.00027059999999999996, "loss": 2.9051, "step": 456 }, { "epoch": 0.06405045550105115, "grad_norm": 2.137887954711914, "learning_rate": 0.0002712, "loss": 2.8614, "step": 457 }, { "epoch": 0.06419060967063771, "grad_norm": 0.527977705001831, "learning_rate": 0.0002718, "loss": 2.8431, "step": 458 }, { "epoch": 0.06433076384022425, "grad_norm": 1.5349394083023071, "learning_rate": 0.0002724, "loss": 2.8297, "step": 459 }, { "epoch": 0.0644709180098108, "grad_norm": 0.48932817578315735, "learning_rate": 0.00027299999999999997, "loss": 2.8337, "step": 460 }, { "epoch": 0.06461107217939734, "grad_norm": 0.44389232993125916, "learning_rate": 0.0002736, "loss": 2.8659, "step": 461 }, { "epoch": 0.06475122634898388, "grad_norm": 0.4781602621078491, "learning_rate": 0.0002742, "loss": 2.8562, "step": 462 }, { "epoch": 0.06489138051857042, "grad_norm": 1.7393834590911865, "learning_rate": 0.0002748, "loss": 2.822, "step": 463 }, { "epoch": 0.06503153468815698, "grad_norm": 1.2854914665222168, "learning_rate": 0.00027539999999999997, "loss": 2.8046, "step": 464 }, { "epoch": 0.06517168885774352, "grad_norm": 0.983287513256073, "learning_rate": 0.000276, "loss": 2.8248, "step": 465 }, { "epoch": 0.06531184302733006, "grad_norm": 1.7185975313186646, "learning_rate": 0.0002766, "loss": 2.8546, "step": 466 }, { "epoch": 0.0654519971969166, "grad_norm": 2.0991129875183105, "learning_rate": 0.0002772, "loss": 2.843, "step": 467 }, { "epoch": 0.06559215136650315, "grad_norm": 1.4711074829101562, "learning_rate": 0.0002778, "loss": 2.8146, "step": 468 }, { "epoch": 0.0657323055360897, "grad_norm": 0.789509117603302, "learning_rate": 0.0002784, "loss": 2.8189, "step": 469 }, { "epoch": 0.06587245970567625, "grad_norm": 0.5332419872283936, "learning_rate": 0.000279, "loss": 2.8548, "step": 470 }, { "epoch": 0.06601261387526279, "grad_norm": 1.6290559768676758, "learning_rate": 0.00027959999999999997, "loss": 2.8416, "step": 471 }, { "epoch": 0.06615276804484933, "grad_norm": 1.8316640853881836, "learning_rate": 0.0002802, "loss": 2.8407, "step": 472 }, { "epoch": 0.06629292221443588, "grad_norm": 1.1368061304092407, "learning_rate": 0.0002808, "loss": 2.8067, "step": 473 }, { "epoch": 0.06643307638402242, "grad_norm": 1.3229907751083374, "learning_rate": 0.00028139999999999996, "loss": 2.8425, "step": 474 }, { "epoch": 0.06657323055360898, "grad_norm": 1.197740077972412, "learning_rate": 0.00028199999999999997, "loss": 2.8299, "step": 475 }, { "epoch": 0.06671338472319552, "grad_norm": 0.820569634437561, "learning_rate": 0.0002826, "loss": 2.8054, "step": 476 }, { "epoch": 0.06685353889278206, "grad_norm": 1.2852872610092163, "learning_rate": 0.00028319999999999994, "loss": 2.8526, "step": 477 }, { "epoch": 0.0669936930623686, "grad_norm": 0.5447834730148315, "learning_rate": 0.00028379999999999996, "loss": 2.8504, "step": 478 }, { "epoch": 0.06713384723195515, "grad_norm": 0.9654664397239685, "learning_rate": 0.0002844, "loss": 2.8271, "step": 479 }, { "epoch": 0.0672740014015417, "grad_norm": 1.7507197856903076, "learning_rate": 0.000285, "loss": 2.8422, "step": 480 }, { "epoch": 0.06741415557112825, "grad_norm": 1.2217130661010742, "learning_rate": 0.00028559999999999995, "loss": 2.8106, "step": 481 }, { "epoch": 0.06755430974071479, "grad_norm": 0.8571678400039673, "learning_rate": 0.00028619999999999996, "loss": 2.8634, "step": 482 }, { "epoch": 0.06769446391030133, "grad_norm": 1.1033737659454346, "learning_rate": 0.0002868, "loss": 2.8464, "step": 483 }, { "epoch": 0.06783461807988787, "grad_norm": 1.261634349822998, "learning_rate": 0.00028739999999999994, "loss": 2.8423, "step": 484 }, { "epoch": 0.06797477224947442, "grad_norm": 0.5701166391372681, "learning_rate": 0.00028799999999999995, "loss": 2.8253, "step": 485 }, { "epoch": 0.06811492641906097, "grad_norm": 0.7327373623847961, "learning_rate": 0.00028859999999999997, "loss": 2.8154, "step": 486 }, { "epoch": 0.06825508058864752, "grad_norm": 1.3246729373931885, "learning_rate": 0.0002892, "loss": 2.8537, "step": 487 }, { "epoch": 0.06839523475823406, "grad_norm": 0.9053966403007507, "learning_rate": 0.00028979999999999994, "loss": 2.8544, "step": 488 }, { "epoch": 0.0685353889278206, "grad_norm": 0.7421877980232239, "learning_rate": 0.00029039999999999996, "loss": 2.8277, "step": 489 }, { "epoch": 0.06867554309740714, "grad_norm": 0.4885466992855072, "learning_rate": 0.00029099999999999997, "loss": 2.8848, "step": 490 }, { "epoch": 0.06881569726699369, "grad_norm": 0.458811491727829, "learning_rate": 0.0002916, "loss": 2.8791, "step": 491 }, { "epoch": 0.06895585143658024, "grad_norm": 1.3304370641708374, "learning_rate": 0.00029219999999999995, "loss": 2.887, "step": 492 }, { "epoch": 0.06909600560616679, "grad_norm": 1.2078003883361816, "learning_rate": 0.00029279999999999996, "loss": 2.8148, "step": 493 }, { "epoch": 0.06923615977575333, "grad_norm": 1.063692569732666, "learning_rate": 0.0002934, "loss": 2.8957, "step": 494 }, { "epoch": 0.06937631394533987, "grad_norm": 0.6019052267074585, "learning_rate": 0.000294, "loss": 2.8422, "step": 495 }, { "epoch": 0.06951646811492641, "grad_norm": 1.3142274618148804, "learning_rate": 0.00029459999999999995, "loss": 2.8856, "step": 496 }, { "epoch": 0.06965662228451297, "grad_norm": 2.929884195327759, "learning_rate": 0.00029519999999999997, "loss": 2.8955, "step": 497 }, { "epoch": 0.06979677645409951, "grad_norm": 0.9539110660552979, "learning_rate": 0.0002958, "loss": 2.8155, "step": 498 }, { "epoch": 0.06993693062368606, "grad_norm": 3.578932762145996, "learning_rate": 0.0002964, "loss": 2.9668, "step": 499 }, { "epoch": 0.0700770847932726, "grad_norm": 2.2652204036712646, "learning_rate": 0.00029699999999999996, "loss": 2.847, "step": 500 }, { "epoch": 0.07021723896285914, "grad_norm": 0.4257313907146454, "learning_rate": 0.00029759999999999997, "loss": 2.854, "step": 501 }, { "epoch": 0.07035739313244568, "grad_norm": 0.9282848834991455, "learning_rate": 0.0002982, "loss": 2.8182, "step": 502 }, { "epoch": 0.07049754730203224, "grad_norm": 0.6917478442192078, "learning_rate": 0.0002988, "loss": 2.82, "step": 503 }, { "epoch": 0.07063770147161878, "grad_norm": 1.0468342304229736, "learning_rate": 0.00029939999999999996, "loss": 2.859, "step": 504 }, { "epoch": 0.07077785564120533, "grad_norm": 0.547046959400177, "learning_rate": 0.0003, "loss": 2.7904, "step": 505 }, { "epoch": 0.07091800981079187, "grad_norm": 2.35677433013916, "learning_rate": 0.0002999856493661803, "loss": 2.8388, "step": 506 }, { "epoch": 0.07105816398037841, "grad_norm": 0.5450826287269592, "learning_rate": 0.0002999712987323607, "loss": 2.8602, "step": 507 }, { "epoch": 0.07119831814996497, "grad_norm": 1.2112269401550293, "learning_rate": 0.000299956948098541, "loss": 2.8173, "step": 508 }, { "epoch": 0.07133847231955151, "grad_norm": 0.8833276033401489, "learning_rate": 0.00029994259746472134, "loss": 2.8481, "step": 509 }, { "epoch": 0.07147862648913805, "grad_norm": 0.4758220314979553, "learning_rate": 0.00029992824683090167, "loss": 2.8409, "step": 510 }, { "epoch": 0.0716187806587246, "grad_norm": 1.2521374225616455, "learning_rate": 0.000299913896197082, "loss": 2.8397, "step": 511 }, { "epoch": 0.07175893482831114, "grad_norm": 1.1220290660858154, "learning_rate": 0.00029989954556326233, "loss": 2.813, "step": 512 }, { "epoch": 0.07189908899789768, "grad_norm": 2.468276262283325, "learning_rate": 0.0002998851949294427, "loss": 2.8517, "step": 513 }, { "epoch": 0.07203924316748424, "grad_norm": 2.1448941230773926, "learning_rate": 0.00029987084429562304, "loss": 2.8776, "step": 514 }, { "epoch": 0.07217939733707078, "grad_norm": 0.7374310493469238, "learning_rate": 0.00029985649366180337, "loss": 2.8523, "step": 515 }, { "epoch": 0.07231955150665732, "grad_norm": 1.072592854499817, "learning_rate": 0.0002998421430279837, "loss": 2.874, "step": 516 }, { "epoch": 0.07245970567624387, "grad_norm": 1.4461050033569336, "learning_rate": 0.0002998277923941641, "loss": 2.851, "step": 517 }, { "epoch": 0.07259985984583041, "grad_norm": 1.34160578250885, "learning_rate": 0.0002998134417603444, "loss": 2.8605, "step": 518 }, { "epoch": 0.07274001401541696, "grad_norm": 2.1639254093170166, "learning_rate": 0.00029979909112652474, "loss": 2.8328, "step": 519 }, { "epoch": 0.07288016818500351, "grad_norm": 0.6973652243614197, "learning_rate": 0.00029978474049270506, "loss": 2.8316, "step": 520 }, { "epoch": 0.07302032235459005, "grad_norm": 0.37315288186073303, "learning_rate": 0.0002997703898588854, "loss": 2.8319, "step": 521 }, { "epoch": 0.0731604765241766, "grad_norm": 0.37722694873809814, "learning_rate": 0.0002997560392250657, "loss": 2.8682, "step": 522 }, { "epoch": 0.07330063069376314, "grad_norm": 1.1548172235488892, "learning_rate": 0.0002997416885912461, "loss": 2.8356, "step": 523 }, { "epoch": 0.07344078486334968, "grad_norm": 2.373615264892578, "learning_rate": 0.00029972733795742643, "loss": 2.8617, "step": 524 }, { "epoch": 0.07358093903293624, "grad_norm": 1.537207007408142, "learning_rate": 0.00029971298732360676, "loss": 2.8203, "step": 525 }, { "epoch": 0.07372109320252278, "grad_norm": 0.3691214621067047, "learning_rate": 0.00029969863668978714, "loss": 2.8562, "step": 526 }, { "epoch": 0.07386124737210932, "grad_norm": 0.6084275841712952, "learning_rate": 0.0002996842860559675, "loss": 2.7979, "step": 527 }, { "epoch": 0.07400140154169586, "grad_norm": 2.023315668106079, "learning_rate": 0.0002996699354221478, "loss": 2.8636, "step": 528 }, { "epoch": 0.0741415557112824, "grad_norm": 1.2384412288665771, "learning_rate": 0.00029965558478832813, "loss": 2.8563, "step": 529 }, { "epoch": 0.07428170988086896, "grad_norm": 0.9338071942329407, "learning_rate": 0.00029964123415450846, "loss": 2.8837, "step": 530 }, { "epoch": 0.0744218640504555, "grad_norm": 1.2200719118118286, "learning_rate": 0.0002996268835206888, "loss": 2.8741, "step": 531 }, { "epoch": 0.07456201822004205, "grad_norm": 1.0358259677886963, "learning_rate": 0.0002996125328868691, "loss": 2.8885, "step": 532 }, { "epoch": 0.07470217238962859, "grad_norm": 0.7400373816490173, "learning_rate": 0.0002995981822530495, "loss": 2.8382, "step": 533 }, { "epoch": 0.07484232655921513, "grad_norm": 2.8748083114624023, "learning_rate": 0.00029958383161922983, "loss": 2.8345, "step": 534 }, { "epoch": 0.07498248072880168, "grad_norm": 2.020559787750244, "learning_rate": 0.00029956948098541016, "loss": 2.885, "step": 535 }, { "epoch": 0.07512263489838823, "grad_norm": 3.0204548835754395, "learning_rate": 0.0002995551303515905, "loss": 2.8575, "step": 536 }, { "epoch": 0.07526278906797478, "grad_norm": 2.7605319023132324, "learning_rate": 0.00029954077971777087, "loss": 2.885, "step": 537 }, { "epoch": 0.07540294323756132, "grad_norm": 1.4202461242675781, "learning_rate": 0.0002995264290839512, "loss": 2.8707, "step": 538 }, { "epoch": 0.07554309740714786, "grad_norm": 0.6885740756988525, "learning_rate": 0.0002995120784501315, "loss": 2.8068, "step": 539 }, { "epoch": 0.0756832515767344, "grad_norm": 2.3399600982666016, "learning_rate": 0.00029949772781631185, "loss": 2.8882, "step": 540 }, { "epoch": 0.07582340574632096, "grad_norm": 1.3935046195983887, "learning_rate": 0.0002994833771824922, "loss": 2.8035, "step": 541 }, { "epoch": 0.0759635599159075, "grad_norm": 1.8794046640396118, "learning_rate": 0.00029946902654867256, "loss": 2.8572, "step": 542 }, { "epoch": 0.07610371408549405, "grad_norm": 2.469773769378662, "learning_rate": 0.0002994546759148529, "loss": 2.8994, "step": 543 }, { "epoch": 0.07624386825508059, "grad_norm": 1.048001766204834, "learning_rate": 0.0002994403252810332, "loss": 2.8137, "step": 544 }, { "epoch": 0.07638402242466713, "grad_norm": 0.8849368691444397, "learning_rate": 0.00029942597464721355, "loss": 2.8401, "step": 545 }, { "epoch": 0.07652417659425367, "grad_norm": 1.7072055339813232, "learning_rate": 0.0002994116240133939, "loss": 2.8557, "step": 546 }, { "epoch": 0.07666433076384023, "grad_norm": 1.740373134613037, "learning_rate": 0.0002993972733795742, "loss": 2.8559, "step": 547 }, { "epoch": 0.07680448493342677, "grad_norm": 1.8785969018936157, "learning_rate": 0.0002993829227457546, "loss": 2.8995, "step": 548 }, { "epoch": 0.07694463910301332, "grad_norm": 1.5170527696609497, "learning_rate": 0.0002993685721119349, "loss": 2.8803, "step": 549 }, { "epoch": 0.07708479327259986, "grad_norm": 1.2071936130523682, "learning_rate": 0.00029935422147811525, "loss": 2.8162, "step": 550 }, { "epoch": 0.0772249474421864, "grad_norm": 2.1247119903564453, "learning_rate": 0.0002993398708442956, "loss": 2.8225, "step": 551 }, { "epoch": 0.07736510161177294, "grad_norm": 0.5597912073135376, "learning_rate": 0.00029932552021047596, "loss": 2.8832, "step": 552 }, { "epoch": 0.0775052557813595, "grad_norm": 0.9041293859481812, "learning_rate": 0.0002993111695766563, "loss": 2.789, "step": 553 }, { "epoch": 0.07764540995094604, "grad_norm": 0.4143102467060089, "learning_rate": 0.0002992968189428366, "loss": 2.8113, "step": 554 }, { "epoch": 0.07778556412053259, "grad_norm": 0.9289295673370361, "learning_rate": 0.00029928246830901694, "loss": 2.8364, "step": 555 }, { "epoch": 0.07792571829011913, "grad_norm": 0.8136385083198547, "learning_rate": 0.00029926811767519727, "loss": 2.791, "step": 556 }, { "epoch": 0.07806587245970567, "grad_norm": 3.271901845932007, "learning_rate": 0.0002992537670413776, "loss": 2.9064, "step": 557 }, { "epoch": 0.07820602662929223, "grad_norm": 2.2082877159118652, "learning_rate": 0.000299239416407558, "loss": 2.8753, "step": 558 }, { "epoch": 0.07834618079887877, "grad_norm": 0.38986533880233765, "learning_rate": 0.0002992250657737383, "loss": 2.8168, "step": 559 }, { "epoch": 0.07848633496846531, "grad_norm": 1.8162808418273926, "learning_rate": 0.00029921071513991864, "loss": 2.8152, "step": 560 }, { "epoch": 0.07862648913805186, "grad_norm": 0.8369203209877014, "learning_rate": 0.000299196364506099, "loss": 2.8359, "step": 561 }, { "epoch": 0.0787666433076384, "grad_norm": 0.4745757281780243, "learning_rate": 0.00029918201387227935, "loss": 2.8316, "step": 562 }, { "epoch": 0.07890679747722494, "grad_norm": 1.5335533618927002, "learning_rate": 0.0002991676632384597, "loss": 2.8297, "step": 563 }, { "epoch": 0.0790469516468115, "grad_norm": 1.0716702938079834, "learning_rate": 0.00029915331260464, "loss": 2.8689, "step": 564 }, { "epoch": 0.07918710581639804, "grad_norm": 1.863500952720642, "learning_rate": 0.00029913896197082034, "loss": 2.8462, "step": 565 }, { "epoch": 0.07932725998598458, "grad_norm": 1.3357001543045044, "learning_rate": 0.00029912461133700067, "loss": 2.7989, "step": 566 }, { "epoch": 0.07946741415557113, "grad_norm": 0.7774486541748047, "learning_rate": 0.000299110260703181, "loss": 2.8174, "step": 567 }, { "epoch": 0.07960756832515767, "grad_norm": 0.6659305691719055, "learning_rate": 0.0002990959100693614, "loss": 2.8042, "step": 568 }, { "epoch": 0.07974772249474422, "grad_norm": 0.5252679586410522, "learning_rate": 0.0002990815594355417, "loss": 2.8254, "step": 569 }, { "epoch": 0.07988787666433077, "grad_norm": 1.5993984937667847, "learning_rate": 0.00029906720880172203, "loss": 2.8631, "step": 570 }, { "epoch": 0.08002803083391731, "grad_norm": 1.2399847507476807, "learning_rate": 0.0002990528581679024, "loss": 2.8652, "step": 571 }, { "epoch": 0.08016818500350385, "grad_norm": 1.3382014036178589, "learning_rate": 0.00029903850753408275, "loss": 2.8044, "step": 572 }, { "epoch": 0.0803083391730904, "grad_norm": 0.36626508831977844, "learning_rate": 0.0002990241569002631, "loss": 2.8503, "step": 573 }, { "epoch": 0.08044849334267694, "grad_norm": 0.5636441707611084, "learning_rate": 0.0002990098062664434, "loss": 2.8446, "step": 574 }, { "epoch": 0.0805886475122635, "grad_norm": 0.975190281867981, "learning_rate": 0.00029899545563262373, "loss": 2.8376, "step": 575 }, { "epoch": 0.08072880168185004, "grad_norm": 1.7194899320602417, "learning_rate": 0.00029898110499880406, "loss": 2.876, "step": 576 }, { "epoch": 0.08086895585143658, "grad_norm": 1.046378254890442, "learning_rate": 0.00029896675436498444, "loss": 2.8959, "step": 577 }, { "epoch": 0.08100911002102312, "grad_norm": 0.40222662687301636, "learning_rate": 0.00029895240373116477, "loss": 2.8193, "step": 578 }, { "epoch": 0.08114926419060967, "grad_norm": 1.0115432739257812, "learning_rate": 0.0002989380530973451, "loss": 2.8054, "step": 579 }, { "epoch": 0.08128941836019622, "grad_norm": 0.8710485696792603, "learning_rate": 0.0002989237024635255, "loss": 2.8417, "step": 580 }, { "epoch": 0.08142957252978276, "grad_norm": 0.5470033288002014, "learning_rate": 0.0002989093518297058, "loss": 2.7937, "step": 581 }, { "epoch": 0.08156972669936931, "grad_norm": 2.266864776611328, "learning_rate": 0.00029889500119588614, "loss": 2.8503, "step": 582 }, { "epoch": 0.08170988086895585, "grad_norm": 0.5184596180915833, "learning_rate": 0.00029888065056206647, "loss": 2.8387, "step": 583 }, { "epoch": 0.08185003503854239, "grad_norm": 1.1281994581222534, "learning_rate": 0.0002988662999282468, "loss": 2.8761, "step": 584 }, { "epoch": 0.08199018920812894, "grad_norm": 0.36221277713775635, "learning_rate": 0.0002988519492944271, "loss": 2.8019, "step": 585 }, { "epoch": 0.08213034337771549, "grad_norm": 0.40339556336402893, "learning_rate": 0.00029883759866060745, "loss": 2.8583, "step": 586 }, { "epoch": 0.08227049754730204, "grad_norm": 0.8000438809394836, "learning_rate": 0.00029882324802678784, "loss": 2.8069, "step": 587 }, { "epoch": 0.08241065171688858, "grad_norm": 0.7097018361091614, "learning_rate": 0.00029880889739296817, "loss": 2.8529, "step": 588 }, { "epoch": 0.08255080588647512, "grad_norm": 0.8318480849266052, "learning_rate": 0.0002987945467591485, "loss": 2.8447, "step": 589 }, { "epoch": 0.08269096005606166, "grad_norm": 0.8203009366989136, "learning_rate": 0.0002987801961253289, "loss": 2.8843, "step": 590 }, { "epoch": 0.08283111422564822, "grad_norm": 0.8691034317016602, "learning_rate": 0.0002987658454915092, "loss": 2.8418, "step": 591 }, { "epoch": 0.08297126839523476, "grad_norm": 0.8345413208007812, "learning_rate": 0.00029875149485768953, "loss": 2.8885, "step": 592 }, { "epoch": 0.0831114225648213, "grad_norm": 0.5269362330436707, "learning_rate": 0.00029873714422386986, "loss": 2.8721, "step": 593 }, { "epoch": 0.08325157673440785, "grad_norm": 0.8924299478530884, "learning_rate": 0.0002987227935900502, "loss": 2.8992, "step": 594 }, { "epoch": 0.08339173090399439, "grad_norm": 0.5429219007492065, "learning_rate": 0.0002987084429562305, "loss": 2.8454, "step": 595 }, { "epoch": 0.08353188507358093, "grad_norm": 0.7319108843803406, "learning_rate": 0.0002986940923224109, "loss": 2.8692, "step": 596 }, { "epoch": 0.08367203924316749, "grad_norm": 1.0079227685928345, "learning_rate": 0.00029867974168859123, "loss": 2.9254, "step": 597 }, { "epoch": 0.08381219341275403, "grad_norm": 1.6758877038955688, "learning_rate": 0.00029866539105477156, "loss": 2.8259, "step": 598 }, { "epoch": 0.08395234758234058, "grad_norm": 1.7681196928024292, "learning_rate": 0.0002986510404209519, "loss": 2.865, "step": 599 }, { "epoch": 0.08409250175192712, "grad_norm": 1.3744769096374512, "learning_rate": 0.0002986366897871322, "loss": 2.8459, "step": 600 }, { "epoch": 0.08423265592151366, "grad_norm": 0.495762437582016, "learning_rate": 0.0002986223391533126, "loss": 2.8304, "step": 601 }, { "epoch": 0.08437281009110022, "grad_norm": 1.6722474098205566, "learning_rate": 0.00029860798851949293, "loss": 2.8219, "step": 602 }, { "epoch": 0.08451296426068676, "grad_norm": 0.4749285876750946, "learning_rate": 0.00029859363788567326, "loss": 2.8286, "step": 603 }, { "epoch": 0.0846531184302733, "grad_norm": 0.6313146948814392, "learning_rate": 0.0002985792872518536, "loss": 2.8612, "step": 604 }, { "epoch": 0.08479327259985985, "grad_norm": 0.769989013671875, "learning_rate": 0.0002985649366180339, "loss": 2.8284, "step": 605 }, { "epoch": 0.08493342676944639, "grad_norm": 0.5903698205947876, "learning_rate": 0.0002985505859842143, "loss": 2.8803, "step": 606 }, { "epoch": 0.08507358093903293, "grad_norm": 0.622111976146698, "learning_rate": 0.0002985362353503946, "loss": 2.8348, "step": 607 }, { "epoch": 0.08521373510861949, "grad_norm": 0.9654092192649841, "learning_rate": 0.00029852188471657495, "loss": 2.8219, "step": 608 }, { "epoch": 0.08535388927820603, "grad_norm": 0.739698052406311, "learning_rate": 0.0002985075340827553, "loss": 2.8007, "step": 609 }, { "epoch": 0.08549404344779257, "grad_norm": 0.7439742088317871, "learning_rate": 0.0002984931834489356, "loss": 2.8283, "step": 610 }, { "epoch": 0.08563419761737912, "grad_norm": 0.5097655653953552, "learning_rate": 0.00029847883281511594, "loss": 2.8618, "step": 611 }, { "epoch": 0.08577435178696566, "grad_norm": 0.557064950466156, "learning_rate": 0.0002984644821812963, "loss": 2.8195, "step": 612 }, { "epoch": 0.0859145059565522, "grad_norm": 0.40500202775001526, "learning_rate": 0.00029845013154747665, "loss": 2.818, "step": 613 }, { "epoch": 0.08605466012613876, "grad_norm": 1.3171998262405396, "learning_rate": 0.000298435780913657, "loss": 2.835, "step": 614 }, { "epoch": 0.0861948142957253, "grad_norm": 0.8443118929862976, "learning_rate": 0.00029842143027983736, "loss": 2.838, "step": 615 }, { "epoch": 0.08633496846531184, "grad_norm": 0.5561873912811279, "learning_rate": 0.0002984070796460177, "loss": 2.8168, "step": 616 }, { "epoch": 0.08647512263489839, "grad_norm": 1.3321802616119385, "learning_rate": 0.000298392729012198, "loss": 2.7918, "step": 617 }, { "epoch": 0.08661527680448493, "grad_norm": 0.6840429306030273, "learning_rate": 0.00029837837837837835, "loss": 2.8632, "step": 618 }, { "epoch": 0.08675543097407148, "grad_norm": 0.5101912021636963, "learning_rate": 0.0002983640277445587, "loss": 2.8235, "step": 619 }, { "epoch": 0.08689558514365803, "grad_norm": 0.330929160118103, "learning_rate": 0.000298349677110739, "loss": 2.8051, "step": 620 }, { "epoch": 0.08703573931324457, "grad_norm": 1.2912763357162476, "learning_rate": 0.00029833532647691933, "loss": 2.8092, "step": 621 }, { "epoch": 0.08717589348283111, "grad_norm": 0.45430678129196167, "learning_rate": 0.0002983209758430997, "loss": 2.8301, "step": 622 }, { "epoch": 0.08731604765241766, "grad_norm": 1.664919137954712, "learning_rate": 0.00029830662520928004, "loss": 2.8641, "step": 623 }, { "epoch": 0.0874562018220042, "grad_norm": 0.5690026879310608, "learning_rate": 0.0002982922745754604, "loss": 2.8253, "step": 624 }, { "epoch": 0.08759635599159075, "grad_norm": 0.49163636565208435, "learning_rate": 0.00029827792394164076, "loss": 2.7965, "step": 625 }, { "epoch": 0.0877365101611773, "grad_norm": 0.5771698951721191, "learning_rate": 0.0002982635733078211, "loss": 2.811, "step": 626 }, { "epoch": 0.08787666433076384, "grad_norm": 1.500550389289856, "learning_rate": 0.0002982492226740014, "loss": 2.8566, "step": 627 }, { "epoch": 0.08801681850035038, "grad_norm": 1.1520962715148926, "learning_rate": 0.00029823487204018174, "loss": 2.8206, "step": 628 }, { "epoch": 0.08815697266993693, "grad_norm": 0.9202871322631836, "learning_rate": 0.00029822052140636207, "loss": 2.8741, "step": 629 }, { "epoch": 0.08829712683952348, "grad_norm": 0.7967159152030945, "learning_rate": 0.0002982061707725424, "loss": 2.8376, "step": 630 }, { "epoch": 0.08843728100911002, "grad_norm": 1.4931164979934692, "learning_rate": 0.0002981918201387228, "loss": 2.8284, "step": 631 }, { "epoch": 0.08857743517869657, "grad_norm": 0.48530471324920654, "learning_rate": 0.0002981774695049031, "loss": 2.8405, "step": 632 }, { "epoch": 0.08871758934828311, "grad_norm": 0.8628910183906555, "learning_rate": 0.00029816311887108344, "loss": 2.8141, "step": 633 }, { "epoch": 0.08885774351786965, "grad_norm": 0.8304014205932617, "learning_rate": 0.0002981487682372638, "loss": 2.8201, "step": 634 }, { "epoch": 0.0889978976874562, "grad_norm": 0.3863701820373535, "learning_rate": 0.00029813441760344415, "loss": 2.835, "step": 635 }, { "epoch": 0.08913805185704275, "grad_norm": 0.5625427961349487, "learning_rate": 0.0002981200669696245, "loss": 2.8139, "step": 636 }, { "epoch": 0.0892782060266293, "grad_norm": 0.5027657151222229, "learning_rate": 0.0002981057163358048, "loss": 2.772, "step": 637 }, { "epoch": 0.08941836019621584, "grad_norm": 1.7406222820281982, "learning_rate": 0.00029809136570198514, "loss": 2.8658, "step": 638 }, { "epoch": 0.08955851436580238, "grad_norm": 1.1116559505462646, "learning_rate": 0.00029807701506816546, "loss": 2.7864, "step": 639 }, { "epoch": 0.08969866853538892, "grad_norm": 0.6801770329475403, "learning_rate": 0.0002980626644343458, "loss": 2.8095, "step": 640 }, { "epoch": 0.08983882270497548, "grad_norm": 1.4479053020477295, "learning_rate": 0.0002980483138005262, "loss": 2.8471, "step": 641 }, { "epoch": 0.08997897687456202, "grad_norm": 0.5120633244514465, "learning_rate": 0.0002980339631667065, "loss": 2.8464, "step": 642 }, { "epoch": 0.09011913104414856, "grad_norm": 0.45446351170539856, "learning_rate": 0.00029801961253288683, "loss": 2.7928, "step": 643 }, { "epoch": 0.09025928521373511, "grad_norm": 0.9317551851272583, "learning_rate": 0.0002980052618990672, "loss": 2.8274, "step": 644 }, { "epoch": 0.09039943938332165, "grad_norm": 0.6883323192596436, "learning_rate": 0.00029799091126524754, "loss": 2.8461, "step": 645 }, { "epoch": 0.09053959355290819, "grad_norm": 0.7593468427658081, "learning_rate": 0.00029797656063142787, "loss": 2.7726, "step": 646 }, { "epoch": 0.09067974772249475, "grad_norm": 0.7465790510177612, "learning_rate": 0.0002979622099976082, "loss": 2.7725, "step": 647 }, { "epoch": 0.09081990189208129, "grad_norm": 1.7886598110198975, "learning_rate": 0.00029794785936378853, "loss": 2.8947, "step": 648 }, { "epoch": 0.09096005606166783, "grad_norm": 0.6924257874488831, "learning_rate": 0.00029793350872996886, "loss": 2.8607, "step": 649 }, { "epoch": 0.09110021023125438, "grad_norm": 1.339035987854004, "learning_rate": 0.00029791915809614924, "loss": 2.7822, "step": 650 }, { "epoch": 0.09124036440084092, "grad_norm": 0.43366163969039917, "learning_rate": 0.00029790480746232957, "loss": 2.7786, "step": 651 }, { "epoch": 0.09138051857042748, "grad_norm": 1.2496447563171387, "learning_rate": 0.0002978904568285099, "loss": 2.8206, "step": 652 }, { "epoch": 0.09152067274001402, "grad_norm": 1.8453083038330078, "learning_rate": 0.0002978761061946902, "loss": 2.8424, "step": 653 }, { "epoch": 0.09166082690960056, "grad_norm": 1.6689263582229614, "learning_rate": 0.0002978617555608706, "loss": 2.7919, "step": 654 }, { "epoch": 0.0918009810791871, "grad_norm": 2.8990964889526367, "learning_rate": 0.00029784740492705094, "loss": 2.8228, "step": 655 }, { "epoch": 0.09194113524877365, "grad_norm": 2.011852741241455, "learning_rate": 0.00029783305429323127, "loss": 2.8267, "step": 656 }, { "epoch": 0.09208128941836019, "grad_norm": 0.7403373718261719, "learning_rate": 0.0002978187036594116, "loss": 2.7918, "step": 657 }, { "epoch": 0.09222144358794675, "grad_norm": 0.7828468680381775, "learning_rate": 0.0002978043530255919, "loss": 2.8375, "step": 658 }, { "epoch": 0.09236159775753329, "grad_norm": 0.8046102523803711, "learning_rate": 0.00029779000239177225, "loss": 2.8533, "step": 659 }, { "epoch": 0.09250175192711983, "grad_norm": 1.337938904762268, "learning_rate": 0.00029777565175795264, "loss": 2.7909, "step": 660 }, { "epoch": 0.09264190609670638, "grad_norm": 0.45919352769851685, "learning_rate": 0.00029776130112413296, "loss": 2.7858, "step": 661 }, { "epoch": 0.09278206026629292, "grad_norm": 0.9781817197799683, "learning_rate": 0.0002977469504903133, "loss": 2.7464, "step": 662 }, { "epoch": 0.09292221443587946, "grad_norm": 0.5097095370292664, "learning_rate": 0.0002977325998564936, "loss": 2.7552, "step": 663 }, { "epoch": 0.09306236860546602, "grad_norm": 0.9436579346656799, "learning_rate": 0.000297718249222674, "loss": 2.7777, "step": 664 }, { "epoch": 0.09320252277505256, "grad_norm": 1.041588306427002, "learning_rate": 0.00029770389858885433, "loss": 2.8027, "step": 665 }, { "epoch": 0.0933426769446391, "grad_norm": 2.2087807655334473, "learning_rate": 0.00029768954795503466, "loss": 2.7533, "step": 666 }, { "epoch": 0.09348283111422565, "grad_norm": 0.5946235656738281, "learning_rate": 0.000297675197321215, "loss": 2.7246, "step": 667 }, { "epoch": 0.09362298528381219, "grad_norm": 1.53168785572052, "learning_rate": 0.0002976608466873953, "loss": 2.7586, "step": 668 }, { "epoch": 0.09376313945339874, "grad_norm": 0.6736552119255066, "learning_rate": 0.0002976464960535757, "loss": 2.758, "step": 669 }, { "epoch": 0.09390329362298529, "grad_norm": 2.449899196624756, "learning_rate": 0.00029763214541975603, "loss": 2.7873, "step": 670 }, { "epoch": 0.09404344779257183, "grad_norm": 1.226711630821228, "learning_rate": 0.00029761779478593636, "loss": 2.7737, "step": 671 }, { "epoch": 0.09418360196215837, "grad_norm": 1.5849742889404297, "learning_rate": 0.0002976034441521167, "loss": 2.7496, "step": 672 }, { "epoch": 0.09432375613174492, "grad_norm": 1.3360331058502197, "learning_rate": 0.000297589093518297, "loss": 2.7938, "step": 673 }, { "epoch": 0.09446391030133146, "grad_norm": 0.40812867879867554, "learning_rate": 0.00029757474288447734, "loss": 2.735, "step": 674 }, { "epoch": 0.09460406447091801, "grad_norm": 2.3891429901123047, "learning_rate": 0.00029756039225065767, "loss": 2.7499, "step": 675 }, { "epoch": 0.09474421864050456, "grad_norm": 1.6537995338439941, "learning_rate": 0.00029754604161683805, "loss": 2.732, "step": 676 }, { "epoch": 0.0948843728100911, "grad_norm": 0.6037951111793518, "learning_rate": 0.0002975316909830184, "loss": 2.7448, "step": 677 }, { "epoch": 0.09502452697967764, "grad_norm": 2.345980644226074, "learning_rate": 0.0002975173403491987, "loss": 2.7403, "step": 678 }, { "epoch": 0.09516468114926419, "grad_norm": 1.643855094909668, "learning_rate": 0.0002975029897153791, "loss": 2.7589, "step": 679 }, { "epoch": 0.09530483531885074, "grad_norm": 1.035865306854248, "learning_rate": 0.0002974886390815594, "loss": 2.7081, "step": 680 }, { "epoch": 0.09544498948843728, "grad_norm": 0.4680897891521454, "learning_rate": 0.00029747428844773975, "loss": 2.6718, "step": 681 }, { "epoch": 0.09558514365802383, "grad_norm": 1.733031153678894, "learning_rate": 0.0002974599378139201, "loss": 2.703, "step": 682 }, { "epoch": 0.09572529782761037, "grad_norm": 0.830502986907959, "learning_rate": 0.0002974455871801004, "loss": 2.645, "step": 683 }, { "epoch": 0.09586545199719691, "grad_norm": 0.5354193449020386, "learning_rate": 0.00029743123654628074, "loss": 2.6269, "step": 684 }, { "epoch": 0.09600560616678346, "grad_norm": 0.7488672733306885, "learning_rate": 0.0002974168859124611, "loss": 2.6441, "step": 685 }, { "epoch": 0.09614576033637001, "grad_norm": 0.8250200152397156, "learning_rate": 0.00029740253527864145, "loss": 2.6969, "step": 686 }, { "epoch": 0.09628591450595655, "grad_norm": 3.3549208641052246, "learning_rate": 0.0002973881846448218, "loss": 2.767, "step": 687 }, { "epoch": 0.0964260686755431, "grad_norm": 1.539199709892273, "learning_rate": 0.0002973738340110021, "loss": 2.6454, "step": 688 }, { "epoch": 0.09656622284512964, "grad_norm": 1.9080121517181396, "learning_rate": 0.0002973594833771825, "loss": 2.6029, "step": 689 }, { "epoch": 0.09670637701471618, "grad_norm": 1.0665572881698608, "learning_rate": 0.0002973451327433628, "loss": 2.6352, "step": 690 }, { "epoch": 0.09684653118430274, "grad_norm": 1.162095308303833, "learning_rate": 0.00029733078210954315, "loss": 2.6054, "step": 691 }, { "epoch": 0.09698668535388928, "grad_norm": 0.8056411743164062, "learning_rate": 0.0002973164314757235, "loss": 2.5109, "step": 692 }, { "epoch": 0.09712683952347582, "grad_norm": 2.13920521736145, "learning_rate": 0.0002973020808419038, "loss": 2.5706, "step": 693 }, { "epoch": 0.09726699369306237, "grad_norm": 0.9956919550895691, "learning_rate": 0.00029728773020808413, "loss": 2.4241, "step": 694 }, { "epoch": 0.09740714786264891, "grad_norm": 1.7647011280059814, "learning_rate": 0.0002972733795742645, "loss": 2.553, "step": 695 }, { "epoch": 0.09754730203223545, "grad_norm": 1.1218814849853516, "learning_rate": 0.00029725902894044484, "loss": 2.4433, "step": 696 }, { "epoch": 0.09768745620182201, "grad_norm": 1.4899669885635376, "learning_rate": 0.00029724467830662517, "loss": 2.5247, "step": 697 }, { "epoch": 0.09782761037140855, "grad_norm": 0.9114047884941101, "learning_rate": 0.00029723032767280555, "loss": 2.4372, "step": 698 }, { "epoch": 0.0979677645409951, "grad_norm": 1.6940371990203857, "learning_rate": 0.0002972159770389859, "loss": 2.5265, "step": 699 }, { "epoch": 0.09810791871058164, "grad_norm": 1.4025936126708984, "learning_rate": 0.0002972016264051662, "loss": 2.4353, "step": 700 }, { "epoch": 0.09824807288016818, "grad_norm": 1.901639461517334, "learning_rate": 0.00029718727577134654, "loss": 2.3507, "step": 701 }, { "epoch": 0.09838822704975474, "grad_norm": 0.7116155028343201, "learning_rate": 0.00029717292513752687, "loss": 2.41, "step": 702 }, { "epoch": 0.09852838121934128, "grad_norm": 0.8871269226074219, "learning_rate": 0.0002971585745037072, "loss": 2.2023, "step": 703 }, { "epoch": 0.09866853538892782, "grad_norm": 0.8017802834510803, "learning_rate": 0.0002971442238698876, "loss": 2.2224, "step": 704 }, { "epoch": 0.09880868955851436, "grad_norm": 1.6170402765274048, "learning_rate": 0.0002971298732360679, "loss": 2.1752, "step": 705 }, { "epoch": 0.09894884372810091, "grad_norm": 1.2236593961715698, "learning_rate": 0.00029711552260224824, "loss": 2.2152, "step": 706 }, { "epoch": 0.09908899789768745, "grad_norm": 0.8129385113716125, "learning_rate": 0.00029710117196842857, "loss": 2.1591, "step": 707 }, { "epoch": 0.099229152067274, "grad_norm": 0.7264969944953918, "learning_rate": 0.00029708682133460895, "loss": 2.1175, "step": 708 }, { "epoch": 0.09936930623686055, "grad_norm": 1.3700792789459229, "learning_rate": 0.0002970724707007893, "loss": 2.0763, "step": 709 }, { "epoch": 0.09950946040644709, "grad_norm": 0.8801718950271606, "learning_rate": 0.0002970581200669696, "loss": 1.9882, "step": 710 }, { "epoch": 0.09964961457603363, "grad_norm": 1.5149402618408203, "learning_rate": 0.00029704376943314993, "loss": 1.9982, "step": 711 }, { "epoch": 0.09978976874562018, "grad_norm": 0.797324001789093, "learning_rate": 0.00029702941879933026, "loss": 1.8953, "step": 712 }, { "epoch": 0.09992992291520673, "grad_norm": 0.704853355884552, "learning_rate": 0.0002970150681655106, "loss": 1.8684, "step": 713 }, { "epoch": 0.10007007708479328, "grad_norm": 1.9356935024261475, "learning_rate": 0.000297000717531691, "loss": 1.8943, "step": 714 }, { "epoch": 0.10021023125437982, "grad_norm": 1.0025293827056885, "learning_rate": 0.0002969863668978713, "loss": 1.8833, "step": 715 }, { "epoch": 0.10035038542396636, "grad_norm": 0.7290270924568176, "learning_rate": 0.00029697201626405163, "loss": 1.7453, "step": 716 }, { "epoch": 0.1004905395935529, "grad_norm": 0.9768915176391602, "learning_rate": 0.000296957665630232, "loss": 1.7353, "step": 717 }, { "epoch": 0.10063069376313945, "grad_norm": 0.9743488430976868, "learning_rate": 0.00029694331499641234, "loss": 1.6711, "step": 718 }, { "epoch": 0.100770847932726, "grad_norm": 0.8709413409233093, "learning_rate": 0.00029692896436259267, "loss": 1.6368, "step": 719 }, { "epoch": 0.10091100210231255, "grad_norm": 0.7288438677787781, "learning_rate": 0.000296914613728773, "loss": 1.5678, "step": 720 }, { "epoch": 0.10105115627189909, "grad_norm": 1.2401670217514038, "learning_rate": 0.00029690026309495333, "loss": 1.5383, "step": 721 }, { "epoch": 0.10119131044148563, "grad_norm": 0.7433953881263733, "learning_rate": 0.00029688591246113366, "loss": 1.3487, "step": 722 }, { "epoch": 0.10133146461107218, "grad_norm": 1.1703156232833862, "learning_rate": 0.000296871561827314, "loss": 1.4389, "step": 723 }, { "epoch": 0.10147161878065872, "grad_norm": 1.0181443691253662, "learning_rate": 0.00029685721119349437, "loss": 1.2741, "step": 724 }, { "epoch": 0.10161177295024527, "grad_norm": 0.8912602663040161, "learning_rate": 0.0002968428605596747, "loss": 1.3337, "step": 725 }, { "epoch": 0.10175192711983182, "grad_norm": 1.1339879035949707, "learning_rate": 0.000296828509925855, "loss": 1.2564, "step": 726 }, { "epoch": 0.10189208128941836, "grad_norm": 1.0206938982009888, "learning_rate": 0.00029681415929203535, "loss": 1.2673, "step": 727 }, { "epoch": 0.1020322354590049, "grad_norm": 0.7952700853347778, "learning_rate": 0.00029679980865821574, "loss": 1.1733, "step": 728 }, { "epoch": 0.10217238962859145, "grad_norm": 1.1448856592178345, "learning_rate": 0.00029678545802439606, "loss": 1.1501, "step": 729 }, { "epoch": 0.102312543798178, "grad_norm": 0.8677394390106201, "learning_rate": 0.0002967711073905764, "loss": 1.1525, "step": 730 }, { "epoch": 0.10245269796776454, "grad_norm": 0.9588388204574585, "learning_rate": 0.0002967567567567567, "loss": 1.0873, "step": 731 }, { "epoch": 0.10259285213735109, "grad_norm": 0.9860378503799438, "learning_rate": 0.00029674240612293705, "loss": 1.1744, "step": 732 }, { "epoch": 0.10273300630693763, "grad_norm": 0.8007164001464844, "learning_rate": 0.00029672805548911743, "loss": 1.0184, "step": 733 }, { "epoch": 0.10287316047652417, "grad_norm": 0.9642511606216431, "learning_rate": 0.00029671370485529776, "loss": 0.9591, "step": 734 }, { "epoch": 0.10301331464611072, "grad_norm": 0.9371851086616516, "learning_rate": 0.0002966993542214781, "loss": 1.0169, "step": 735 }, { "epoch": 0.10315346881569727, "grad_norm": 1.197338581085205, "learning_rate": 0.0002966850035876584, "loss": 1.0416, "step": 736 }, { "epoch": 0.10329362298528381, "grad_norm": 0.8738648891448975, "learning_rate": 0.00029667065295383875, "loss": 1.0081, "step": 737 }, { "epoch": 0.10343377715487036, "grad_norm": 0.8442432284355164, "learning_rate": 0.0002966563023200191, "loss": 0.7859, "step": 738 }, { "epoch": 0.1035739313244569, "grad_norm": 0.9249336123466492, "learning_rate": 0.00029664195168619946, "loss": 0.9638, "step": 739 }, { "epoch": 0.10371408549404344, "grad_norm": 0.9109815359115601, "learning_rate": 0.0002966276010523798, "loss": 0.9012, "step": 740 }, { "epoch": 0.10385423966363, "grad_norm": 0.6473962664604187, "learning_rate": 0.0002966132504185601, "loss": 0.799, "step": 741 }, { "epoch": 0.10399439383321654, "grad_norm": 0.7730115056037903, "learning_rate": 0.00029659889978474044, "loss": 0.7371, "step": 742 }, { "epoch": 0.10413454800280308, "grad_norm": 1.0130146741867065, "learning_rate": 0.00029658454915092083, "loss": 0.8051, "step": 743 }, { "epoch": 0.10427470217238963, "grad_norm": 1.0508068799972534, "learning_rate": 0.00029657019851710116, "loss": 0.8287, "step": 744 }, { "epoch": 0.10441485634197617, "grad_norm": 0.9912812113761902, "learning_rate": 0.0002965558478832815, "loss": 0.7549, "step": 745 }, { "epoch": 0.10455501051156271, "grad_norm": 1.7838891744613647, "learning_rate": 0.0002965414972494618, "loss": 0.7836, "step": 746 }, { "epoch": 0.10469516468114927, "grad_norm": 1.3858511447906494, "learning_rate": 0.00029652714661564214, "loss": 0.7246, "step": 747 }, { "epoch": 0.10483531885073581, "grad_norm": 1.1503989696502686, "learning_rate": 0.00029651279598182247, "loss": 0.7644, "step": 748 }, { "epoch": 0.10497547302032235, "grad_norm": 2.166940689086914, "learning_rate": 0.00029649844534800285, "loss": 1.0979, "step": 749 }, { "epoch": 0.1051156271899089, "grad_norm": 4.8708672523498535, "learning_rate": 0.0002964840947141832, "loss": 1.3126, "step": 750 }, { "epoch": 0.10525578135949544, "grad_norm": 1.5065680742263794, "learning_rate": 0.0002964697440803635, "loss": 1.0039, "step": 751 }, { "epoch": 0.105395935529082, "grad_norm": 1.3567593097686768, "learning_rate": 0.0002964553934465439, "loss": 0.7777, "step": 752 }, { "epoch": 0.10553608969866854, "grad_norm": 1.324194073677063, "learning_rate": 0.0002964410428127242, "loss": 0.8186, "step": 753 }, { "epoch": 0.10567624386825508, "grad_norm": 0.7472732067108154, "learning_rate": 0.00029642669217890455, "loss": 0.7239, "step": 754 }, { "epoch": 0.10581639803784162, "grad_norm": 0.6719563603401184, "learning_rate": 0.0002964123415450849, "loss": 0.6175, "step": 755 }, { "epoch": 0.10595655220742817, "grad_norm": 0.5964769721031189, "learning_rate": 0.0002963979909112652, "loss": 0.639, "step": 756 }, { "epoch": 0.10609670637701471, "grad_norm": 0.9903078675270081, "learning_rate": 0.00029638364027744554, "loss": 0.7978, "step": 757 }, { "epoch": 0.10623686054660127, "grad_norm": 1.1828066110610962, "learning_rate": 0.00029636928964362586, "loss": 0.791, "step": 758 }, { "epoch": 0.10637701471618781, "grad_norm": 0.5313505530357361, "learning_rate": 0.00029635493900980625, "loss": 0.6287, "step": 759 }, { "epoch": 0.10651716888577435, "grad_norm": 0.8737642168998718, "learning_rate": 0.0002963405883759866, "loss": 0.5805, "step": 760 }, { "epoch": 0.1066573230553609, "grad_norm": 0.9216781258583069, "learning_rate": 0.0002963262377421669, "loss": 0.664, "step": 761 }, { "epoch": 0.10679747722494744, "grad_norm": 0.8127090930938721, "learning_rate": 0.0002963118871083473, "loss": 0.6276, "step": 762 }, { "epoch": 0.106937631394534, "grad_norm": 1.3506488800048828, "learning_rate": 0.0002962975364745276, "loss": 0.5912, "step": 763 }, { "epoch": 0.10707778556412054, "grad_norm": 1.2340604066848755, "learning_rate": 0.00029628318584070794, "loss": 0.7388, "step": 764 }, { "epoch": 0.10721793973370708, "grad_norm": 0.669952929019928, "learning_rate": 0.00029626883520688827, "loss": 0.5855, "step": 765 }, { "epoch": 0.10735809390329362, "grad_norm": 0.6108424067497253, "learning_rate": 0.0002962544845730686, "loss": 0.6295, "step": 766 }, { "epoch": 0.10749824807288016, "grad_norm": 1.2293285131454468, "learning_rate": 0.00029624013393924893, "loss": 0.6832, "step": 767 }, { "epoch": 0.10763840224246671, "grad_norm": 0.7247191667556763, "learning_rate": 0.0002962257833054293, "loss": 0.532, "step": 768 }, { "epoch": 0.10777855641205326, "grad_norm": 0.8430277109146118, "learning_rate": 0.00029621143267160964, "loss": 0.5374, "step": 769 }, { "epoch": 0.1079187105816398, "grad_norm": 0.5940694212913513, "learning_rate": 0.00029619708203778997, "loss": 0.5505, "step": 770 }, { "epoch": 0.10805886475122635, "grad_norm": 1.552704095840454, "learning_rate": 0.00029618273140397035, "loss": 0.5974, "step": 771 }, { "epoch": 0.10819901892081289, "grad_norm": 0.9161866903305054, "learning_rate": 0.0002961683807701507, "loss": 0.5543, "step": 772 }, { "epoch": 0.10833917309039943, "grad_norm": 0.663697361946106, "learning_rate": 0.000296154030136331, "loss": 0.4751, "step": 773 }, { "epoch": 0.10847932725998599, "grad_norm": 0.7411726117134094, "learning_rate": 0.00029613967950251134, "loss": 0.5135, "step": 774 }, { "epoch": 0.10861948142957253, "grad_norm": 0.6696982979774475, "learning_rate": 0.00029612532886869167, "loss": 0.5468, "step": 775 }, { "epoch": 0.10875963559915908, "grad_norm": 0.7200729250907898, "learning_rate": 0.000296110978234872, "loss": 0.5524, "step": 776 }, { "epoch": 0.10889978976874562, "grad_norm": 0.7118507027626038, "learning_rate": 0.0002960966276010523, "loss": 0.5293, "step": 777 }, { "epoch": 0.10903994393833216, "grad_norm": 0.9761529564857483, "learning_rate": 0.0002960822769672327, "loss": 0.5712, "step": 778 }, { "epoch": 0.1091800981079187, "grad_norm": 0.6491013169288635, "learning_rate": 0.00029606792633341303, "loss": 0.5138, "step": 779 }, { "epoch": 0.10932025227750526, "grad_norm": 1.068501591682434, "learning_rate": 0.00029605357569959336, "loss": 0.4659, "step": 780 }, { "epoch": 0.1094604064470918, "grad_norm": 1.220663070678711, "learning_rate": 0.00029603922506577375, "loss": 0.5934, "step": 781 }, { "epoch": 0.10960056061667835, "grad_norm": 1.1378167867660522, "learning_rate": 0.0002960248744319541, "loss": 0.4591, "step": 782 }, { "epoch": 0.10974071478626489, "grad_norm": 0.7223802208900452, "learning_rate": 0.0002960105237981344, "loss": 0.4731, "step": 783 }, { "epoch": 0.10988086895585143, "grad_norm": 0.5333732962608337, "learning_rate": 0.00029599617316431473, "loss": 0.4557, "step": 784 }, { "epoch": 0.11002102312543798, "grad_norm": 0.7551818490028381, "learning_rate": 0.00029598182253049506, "loss": 0.5145, "step": 785 }, { "epoch": 0.11016117729502453, "grad_norm": 1.171321988105774, "learning_rate": 0.0002959674718966754, "loss": 0.5494, "step": 786 }, { "epoch": 0.11030133146461107, "grad_norm": 0.6282427906990051, "learning_rate": 0.00029595312126285577, "loss": 0.4243, "step": 787 }, { "epoch": 0.11044148563419762, "grad_norm": 3.9415132999420166, "learning_rate": 0.0002959387706290361, "loss": 0.7097, "step": 788 }, { "epoch": 0.11058163980378416, "grad_norm": 1.1901054382324219, "learning_rate": 0.00029592441999521643, "loss": 0.4911, "step": 789 }, { "epoch": 0.1107217939733707, "grad_norm": 1.140281081199646, "learning_rate": 0.00029591006936139676, "loss": 0.5038, "step": 790 }, { "epoch": 0.11086194814295726, "grad_norm": 0.7657198905944824, "learning_rate": 0.00029589571872757714, "loss": 0.5436, "step": 791 }, { "epoch": 0.1110021023125438, "grad_norm": 1.3570201396942139, "learning_rate": 0.00029588136809375747, "loss": 0.5629, "step": 792 }, { "epoch": 0.11114225648213034, "grad_norm": 1.1226831674575806, "learning_rate": 0.0002958670174599378, "loss": 0.567, "step": 793 }, { "epoch": 0.11128241065171689, "grad_norm": 2.085481882095337, "learning_rate": 0.0002958526668261181, "loss": 0.6337, "step": 794 }, { "epoch": 0.11142256482130343, "grad_norm": 1.2003912925720215, "learning_rate": 0.00029583831619229845, "loss": 0.5762, "step": 795 }, { "epoch": 0.11156271899088997, "grad_norm": 0.7766419053077698, "learning_rate": 0.0002958239655584788, "loss": 0.4641, "step": 796 }, { "epoch": 0.11170287316047653, "grad_norm": 1.336724877357483, "learning_rate": 0.00029580961492465917, "loss": 0.5853, "step": 797 }, { "epoch": 0.11184302733006307, "grad_norm": 2.394106388092041, "learning_rate": 0.0002957952642908395, "loss": 0.7521, "step": 798 }, { "epoch": 0.11198318149964961, "grad_norm": 2.7776308059692383, "learning_rate": 0.0002957809136570198, "loss": 0.8316, "step": 799 }, { "epoch": 0.11212333566923616, "grad_norm": 3.571431875228882, "learning_rate": 0.00029576656302320015, "loss": 0.962, "step": 800 }, { "epoch": 0.1122634898388227, "grad_norm": 1.0274524688720703, "learning_rate": 0.0002957522123893805, "loss": 0.6233, "step": 801 }, { "epoch": 0.11240364400840926, "grad_norm": 0.9401232600212097, "learning_rate": 0.0002957378617555608, "loss": 0.5457, "step": 802 }, { "epoch": 0.1125437981779958, "grad_norm": 0.7741809487342834, "learning_rate": 0.0002957235111217412, "loss": 0.5304, "step": 803 }, { "epoch": 0.11268395234758234, "grad_norm": 0.7393455505371094, "learning_rate": 0.0002957091604879215, "loss": 0.5699, "step": 804 }, { "epoch": 0.11282410651716888, "grad_norm": 0.6533899903297424, "learning_rate": 0.00029569480985410185, "loss": 0.4363, "step": 805 }, { "epoch": 0.11296426068675543, "grad_norm": 0.8278070092201233, "learning_rate": 0.00029568045922028223, "loss": 0.5238, "step": 806 }, { "epoch": 0.11310441485634197, "grad_norm": 0.6343386173248291, "learning_rate": 0.00029566610858646256, "loss": 0.4623, "step": 807 }, { "epoch": 0.11324456902592853, "grad_norm": 0.7172487378120422, "learning_rate": 0.0002956517579526429, "loss": 0.4413, "step": 808 }, { "epoch": 0.11338472319551507, "grad_norm": 1.3451247215270996, "learning_rate": 0.0002956374073188232, "loss": 0.5434, "step": 809 }, { "epoch": 0.11352487736510161, "grad_norm": 0.6080853343009949, "learning_rate": 0.00029562305668500355, "loss": 0.4458, "step": 810 }, { "epoch": 0.11366503153468815, "grad_norm": 0.9870772361755371, "learning_rate": 0.0002956087060511839, "loss": 0.5647, "step": 811 }, { "epoch": 0.1138051857042747, "grad_norm": 0.8646849393844604, "learning_rate": 0.0002955943554173642, "loss": 0.4547, "step": 812 }, { "epoch": 0.11394533987386125, "grad_norm": 0.9252731800079346, "learning_rate": 0.0002955800047835446, "loss": 0.395, "step": 813 }, { "epoch": 0.1140854940434478, "grad_norm": 0.5344290733337402, "learning_rate": 0.0002955656541497249, "loss": 0.4093, "step": 814 }, { "epoch": 0.11422564821303434, "grad_norm": 0.7339096069335938, "learning_rate": 0.00029555130351590524, "loss": 0.506, "step": 815 }, { "epoch": 0.11436580238262088, "grad_norm": 0.45406267046928406, "learning_rate": 0.0002955369528820856, "loss": 0.3366, "step": 816 }, { "epoch": 0.11450595655220742, "grad_norm": 0.5326426029205322, "learning_rate": 0.00029552260224826595, "loss": 0.4403, "step": 817 }, { "epoch": 0.11464611072179397, "grad_norm": 0.48331156373023987, "learning_rate": 0.0002955082516144463, "loss": 0.3566, "step": 818 }, { "epoch": 0.11478626489138052, "grad_norm": 0.7945199608802795, "learning_rate": 0.0002954939009806266, "loss": 0.4666, "step": 819 }, { "epoch": 0.11492641906096707, "grad_norm": 0.7521847486495972, "learning_rate": 0.00029547955034680694, "loss": 0.4222, "step": 820 }, { "epoch": 0.11506657323055361, "grad_norm": 0.7186934947967529, "learning_rate": 0.00029546519971298727, "loss": 0.3886, "step": 821 }, { "epoch": 0.11520672740014015, "grad_norm": 0.8452358841896057, "learning_rate": 0.00029545084907916765, "loss": 0.4749, "step": 822 }, { "epoch": 0.1153468815697267, "grad_norm": 0.5826500058174133, "learning_rate": 0.000295436498445348, "loss": 0.3369, "step": 823 }, { "epoch": 0.11548703573931325, "grad_norm": 0.5616267323493958, "learning_rate": 0.0002954221478115283, "loss": 0.4216, "step": 824 }, { "epoch": 0.1156271899088998, "grad_norm": 1.2843900918960571, "learning_rate": 0.0002954077971777087, "loss": 0.4776, "step": 825 }, { "epoch": 0.11576734407848634, "grad_norm": 0.7610683441162109, "learning_rate": 0.000295393446543889, "loss": 0.3296, "step": 826 }, { "epoch": 0.11590749824807288, "grad_norm": 1.0371572971343994, "learning_rate": 0.00029537909591006935, "loss": 0.4196, "step": 827 }, { "epoch": 0.11604765241765942, "grad_norm": 0.5327167510986328, "learning_rate": 0.0002953647452762497, "loss": 0.38, "step": 828 }, { "epoch": 0.11618780658724596, "grad_norm": 0.7355878353118896, "learning_rate": 0.00029535039464243, "loss": 0.4441, "step": 829 }, { "epoch": 0.11632796075683252, "grad_norm": 0.5058201551437378, "learning_rate": 0.00029533604400861033, "loss": 0.4299, "step": 830 }, { "epoch": 0.11646811492641906, "grad_norm": 0.7439339756965637, "learning_rate": 0.00029532169337479066, "loss": 0.369, "step": 831 }, { "epoch": 0.1166082690960056, "grad_norm": 0.6938100457191467, "learning_rate": 0.00029530734274097104, "loss": 0.37, "step": 832 }, { "epoch": 0.11674842326559215, "grad_norm": 0.5563863515853882, "learning_rate": 0.0002952929921071514, "loss": 0.2806, "step": 833 }, { "epoch": 0.11688857743517869, "grad_norm": 0.8294783234596252, "learning_rate": 0.0002952786414733317, "loss": 0.5081, "step": 834 }, { "epoch": 0.11702873160476523, "grad_norm": 0.5650732517242432, "learning_rate": 0.0002952642908395121, "loss": 0.3773, "step": 835 }, { "epoch": 0.11716888577435179, "grad_norm": 0.713193416595459, "learning_rate": 0.0002952499402056924, "loss": 0.3845, "step": 836 }, { "epoch": 0.11730903994393833, "grad_norm": 0.7967789173126221, "learning_rate": 0.00029523558957187274, "loss": 0.4202, "step": 837 }, { "epoch": 0.11744919411352488, "grad_norm": 0.6098101139068604, "learning_rate": 0.00029522123893805307, "loss": 0.4096, "step": 838 }, { "epoch": 0.11758934828311142, "grad_norm": 0.8075692057609558, "learning_rate": 0.0002952068883042334, "loss": 0.318, "step": 839 }, { "epoch": 0.11772950245269796, "grad_norm": 0.9130297899246216, "learning_rate": 0.00029519253767041373, "loss": 0.2853, "step": 840 }, { "epoch": 0.11786965662228452, "grad_norm": 1.4829981327056885, "learning_rate": 0.0002951781870365941, "loss": 0.494, "step": 841 }, { "epoch": 0.11800981079187106, "grad_norm": 0.7205755710601807, "learning_rate": 0.00029516383640277444, "loss": 0.3676, "step": 842 }, { "epoch": 0.1181499649614576, "grad_norm": 1.1660726070404053, "learning_rate": 0.00029514948576895477, "loss": 0.3481, "step": 843 }, { "epoch": 0.11829011913104415, "grad_norm": 2.141003131866455, "learning_rate": 0.00029513513513513515, "loss": 0.4338, "step": 844 }, { "epoch": 0.11843027330063069, "grad_norm": 1.3865693807601929, "learning_rate": 0.0002951207845013155, "loss": 0.673, "step": 845 }, { "epoch": 0.11857042747021723, "grad_norm": 1.0828391313552856, "learning_rate": 0.0002951064338674958, "loss": 0.5126, "step": 846 }, { "epoch": 0.11871058163980379, "grad_norm": 1.4041882753372192, "learning_rate": 0.00029509208323367614, "loss": 0.4598, "step": 847 }, { "epoch": 0.11885073580939033, "grad_norm": 1.1987242698669434, "learning_rate": 0.00029507773259985646, "loss": 0.4092, "step": 848 }, { "epoch": 0.11899088997897687, "grad_norm": 1.8898736238479614, "learning_rate": 0.0002950633819660368, "loss": 0.5179, "step": 849 }, { "epoch": 0.11913104414856342, "grad_norm": 2.226663827896118, "learning_rate": 0.0002950490313322171, "loss": 0.7412, "step": 850 }, { "epoch": 0.11927119831814996, "grad_norm": 0.7179242372512817, "learning_rate": 0.0002950346806983975, "loss": 0.451, "step": 851 }, { "epoch": 0.11941135248773652, "grad_norm": 0.9541609883308411, "learning_rate": 0.00029502033006457783, "loss": 0.426, "step": 852 }, { "epoch": 0.11955150665732306, "grad_norm": 0.8750336170196533, "learning_rate": 0.00029500597943075816, "loss": 0.4546, "step": 853 }, { "epoch": 0.1196916608269096, "grad_norm": 0.44224628806114197, "learning_rate": 0.00029499162879693854, "loss": 0.4102, "step": 854 }, { "epoch": 0.11983181499649614, "grad_norm": 0.7057518362998962, "learning_rate": 0.00029497727816311887, "loss": 0.4254, "step": 855 }, { "epoch": 0.11997196916608269, "grad_norm": 0.6637173295021057, "learning_rate": 0.0002949629275292992, "loss": 0.3963, "step": 856 }, { "epoch": 0.12011212333566923, "grad_norm": 0.9395194053649902, "learning_rate": 0.00029494857689547953, "loss": 0.3936, "step": 857 }, { "epoch": 0.12025227750525579, "grad_norm": 0.42872217297554016, "learning_rate": 0.00029493422626165986, "loss": 0.2949, "step": 858 }, { "epoch": 0.12039243167484233, "grad_norm": 0.49341869354248047, "learning_rate": 0.0002949198756278402, "loss": 0.309, "step": 859 }, { "epoch": 0.12053258584442887, "grad_norm": 0.6677924394607544, "learning_rate": 0.00029490552499402057, "loss": 0.4954, "step": 860 }, { "epoch": 0.12067274001401541, "grad_norm": 0.6404086351394653, "learning_rate": 0.0002948911743602009, "loss": 0.3655, "step": 861 }, { "epoch": 0.12081289418360196, "grad_norm": 0.45750537514686584, "learning_rate": 0.0002948768237263812, "loss": 0.2629, "step": 862 }, { "epoch": 0.12095304835318851, "grad_norm": 0.7091655731201172, "learning_rate": 0.00029486247309256156, "loss": 0.3562, "step": 863 }, { "epoch": 0.12109320252277506, "grad_norm": 0.7352771759033203, "learning_rate": 0.0002948481224587419, "loss": 0.4015, "step": 864 }, { "epoch": 0.1212333566923616, "grad_norm": 0.4700618386268616, "learning_rate": 0.0002948337718249222, "loss": 0.3375, "step": 865 }, { "epoch": 0.12137351086194814, "grad_norm": 0.6827343106269836, "learning_rate": 0.00029481942119110254, "loss": 0.4001, "step": 866 }, { "epoch": 0.12151366503153468, "grad_norm": 0.6253527402877808, "learning_rate": 0.0002948050705572829, "loss": 0.3327, "step": 867 }, { "epoch": 0.12165381920112123, "grad_norm": 0.5779173970222473, "learning_rate": 0.00029479071992346325, "loss": 0.3193, "step": 868 }, { "epoch": 0.12179397337070778, "grad_norm": 0.8246491551399231, "learning_rate": 0.0002947763692896436, "loss": 0.3463, "step": 869 }, { "epoch": 0.12193412754029433, "grad_norm": 0.8151153326034546, "learning_rate": 0.00029476201865582396, "loss": 0.3097, "step": 870 }, { "epoch": 0.12207428170988087, "grad_norm": 0.5913914442062378, "learning_rate": 0.0002947476680220043, "loss": 0.3373, "step": 871 }, { "epoch": 0.12221443587946741, "grad_norm": 0.76949143409729, "learning_rate": 0.0002947333173881846, "loss": 0.2267, "step": 872 }, { "epoch": 0.12235459004905395, "grad_norm": 0.4098033010959625, "learning_rate": 0.00029471896675436495, "loss": 0.3199, "step": 873 }, { "epoch": 0.12249474421864051, "grad_norm": 0.7013890147209167, "learning_rate": 0.0002947046161205453, "loss": 0.3377, "step": 874 }, { "epoch": 0.12263489838822705, "grad_norm": 0.41884222626686096, "learning_rate": 0.0002946902654867256, "loss": 0.2448, "step": 875 }, { "epoch": 0.1227750525578136, "grad_norm": 0.4423123598098755, "learning_rate": 0.000294675914852906, "loss": 0.2241, "step": 876 }, { "epoch": 0.12291520672740014, "grad_norm": 0.49844491481781006, "learning_rate": 0.0002946615642190863, "loss": 0.2462, "step": 877 }, { "epoch": 0.12305536089698668, "grad_norm": 0.7215579152107239, "learning_rate": 0.00029464721358526665, "loss": 0.2465, "step": 878 }, { "epoch": 0.12319551506657322, "grad_norm": 0.7288787364959717, "learning_rate": 0.00029463286295144703, "loss": 0.2142, "step": 879 }, { "epoch": 0.12333566923615978, "grad_norm": 0.6424992084503174, "learning_rate": 0.00029461851231762736, "loss": 0.2674, "step": 880 }, { "epoch": 0.12347582340574632, "grad_norm": 0.5809525847434998, "learning_rate": 0.0002946041616838077, "loss": 0.2308, "step": 881 }, { "epoch": 0.12361597757533287, "grad_norm": 0.5676800608634949, "learning_rate": 0.000294589811049988, "loss": 0.2667, "step": 882 }, { "epoch": 0.12375613174491941, "grad_norm": 0.8107067346572876, "learning_rate": 0.00029457546041616834, "loss": 0.3554, "step": 883 }, { "epoch": 0.12389628591450595, "grad_norm": 0.7875389456748962, "learning_rate": 0.00029456110978234867, "loss": 0.3677, "step": 884 }, { "epoch": 0.12403644008409251, "grad_norm": 1.7270439863204956, "learning_rate": 0.000294546759148529, "loss": 0.3136, "step": 885 }, { "epoch": 0.12417659425367905, "grad_norm": 0.5410801768302917, "learning_rate": 0.0002945324085147094, "loss": 0.2402, "step": 886 }, { "epoch": 0.1243167484232656, "grad_norm": 0.6522603631019592, "learning_rate": 0.0002945180578808897, "loss": 0.3489, "step": 887 }, { "epoch": 0.12445690259285214, "grad_norm": 0.9362044334411621, "learning_rate": 0.00029450370724707004, "loss": 0.3319, "step": 888 }, { "epoch": 0.12459705676243868, "grad_norm": 1.1232060194015503, "learning_rate": 0.0002944893566132504, "loss": 0.2982, "step": 889 }, { "epoch": 0.12473721093202522, "grad_norm": 0.7525444030761719, "learning_rate": 0.00029447500597943075, "loss": 0.3529, "step": 890 }, { "epoch": 0.12487736510161178, "grad_norm": 0.5798377990722656, "learning_rate": 0.0002944606553456111, "loss": 0.3072, "step": 891 }, { "epoch": 0.1250175192711983, "grad_norm": 1.2238162755966187, "learning_rate": 0.0002944463047117914, "loss": 0.4976, "step": 892 }, { "epoch": 0.12515767344078485, "grad_norm": 0.6953086256980896, "learning_rate": 0.00029443195407797174, "loss": 0.27, "step": 893 }, { "epoch": 0.12529782761037142, "grad_norm": 0.41317448019981384, "learning_rate": 0.00029441760344415207, "loss": 0.2135, "step": 894 }, { "epoch": 0.12543798177995796, "grad_norm": 1.0593888759613037, "learning_rate": 0.00029440325281033245, "loss": 0.2639, "step": 895 }, { "epoch": 0.1255781359495445, "grad_norm": 1.0215004682540894, "learning_rate": 0.0002943889021765128, "loss": 0.2244, "step": 896 }, { "epoch": 0.12571829011913105, "grad_norm": 1.18927800655365, "learning_rate": 0.0002943745515426931, "loss": 0.3516, "step": 897 }, { "epoch": 0.1258584442887176, "grad_norm": 1.8573057651519775, "learning_rate": 0.00029436020090887343, "loss": 0.4673, "step": 898 }, { "epoch": 0.12599859845830413, "grad_norm": 1.3714181184768677, "learning_rate": 0.0002943458502750538, "loss": 0.2739, "step": 899 }, { "epoch": 0.12613875262789068, "grad_norm": 2.108304500579834, "learning_rate": 0.00029433149964123415, "loss": 0.4144, "step": 900 }, { "epoch": 0.12627890679747722, "grad_norm": 0.6717967987060547, "learning_rate": 0.0002943171490074145, "loss": 0.315, "step": 901 }, { "epoch": 0.12641906096706376, "grad_norm": 0.7445950508117676, "learning_rate": 0.0002943027983735948, "loss": 0.3095, "step": 902 }, { "epoch": 0.1265592151366503, "grad_norm": 0.7132701873779297, "learning_rate": 0.00029428844773977513, "loss": 0.2809, "step": 903 }, { "epoch": 0.12669936930623685, "grad_norm": 0.8447664976119995, "learning_rate": 0.00029427409710595546, "loss": 0.3618, "step": 904 }, { "epoch": 0.12683952347582342, "grad_norm": 0.8230907917022705, "learning_rate": 0.00029425974647213584, "loss": 0.2536, "step": 905 }, { "epoch": 0.12697967764540996, "grad_norm": 1.2871246337890625, "learning_rate": 0.00029424539583831617, "loss": 0.4985, "step": 906 }, { "epoch": 0.1271198318149965, "grad_norm": 0.39408767223358154, "learning_rate": 0.0002942310452044965, "loss": 0.2835, "step": 907 }, { "epoch": 0.12725998598458305, "grad_norm": 0.4728454351425171, "learning_rate": 0.0002942166945706769, "loss": 0.3499, "step": 908 }, { "epoch": 0.1274001401541696, "grad_norm": 1.3943095207214355, "learning_rate": 0.0002942023439368572, "loss": 0.3032, "step": 909 }, { "epoch": 0.12754029432375613, "grad_norm": 0.6031758785247803, "learning_rate": 0.00029418799330303754, "loss": 0.2535, "step": 910 }, { "epoch": 0.12768044849334267, "grad_norm": 0.8355891108512878, "learning_rate": 0.00029417364266921787, "loss": 0.3006, "step": 911 }, { "epoch": 0.12782060266292922, "grad_norm": 0.7548608779907227, "learning_rate": 0.0002941592920353982, "loss": 0.3633, "step": 912 }, { "epoch": 0.12796075683251576, "grad_norm": 0.839979350566864, "learning_rate": 0.0002941449414015785, "loss": 0.3087, "step": 913 }, { "epoch": 0.1281009110021023, "grad_norm": 0.6054525971412659, "learning_rate": 0.0002941305907677589, "loss": 0.2655, "step": 914 }, { "epoch": 0.12824106517168885, "grad_norm": 0.6736500859260559, "learning_rate": 0.00029411624013393924, "loss": 0.3425, "step": 915 }, { "epoch": 0.12838121934127542, "grad_norm": 0.6754444241523743, "learning_rate": 0.00029410188950011957, "loss": 0.3403, "step": 916 }, { "epoch": 0.12852137351086196, "grad_norm": 0.580228328704834, "learning_rate": 0.0002940875388662999, "loss": 0.2767, "step": 917 }, { "epoch": 0.1286615276804485, "grad_norm": 1.5820302963256836, "learning_rate": 0.0002940731882324803, "loss": 0.3742, "step": 918 }, { "epoch": 0.12880168185003504, "grad_norm": 0.7250005602836609, "learning_rate": 0.0002940588375986606, "loss": 0.2758, "step": 919 }, { "epoch": 0.1289418360196216, "grad_norm": 0.9791303873062134, "learning_rate": 0.00029404448696484093, "loss": 0.3885, "step": 920 }, { "epoch": 0.12908199018920813, "grad_norm": 0.3761806786060333, "learning_rate": 0.00029403013633102126, "loss": 0.2247, "step": 921 }, { "epoch": 0.12922214435879467, "grad_norm": 0.4687352478504181, "learning_rate": 0.0002940157856972016, "loss": 0.2165, "step": 922 }, { "epoch": 0.12936229852838121, "grad_norm": 0.7041298747062683, "learning_rate": 0.0002940014350633819, "loss": 0.2938, "step": 923 }, { "epoch": 0.12950245269796776, "grad_norm": 0.5893972516059875, "learning_rate": 0.0002939870844295623, "loss": 0.2835, "step": 924 }, { "epoch": 0.1296426068675543, "grad_norm": 0.9080823063850403, "learning_rate": 0.00029397273379574263, "loss": 0.2739, "step": 925 }, { "epoch": 0.12978276103714084, "grad_norm": 1.0706671476364136, "learning_rate": 0.00029395838316192296, "loss": 0.2591, "step": 926 }, { "epoch": 0.1299229152067274, "grad_norm": 0.9298347234725952, "learning_rate": 0.0002939440325281033, "loss": 0.347, "step": 927 }, { "epoch": 0.13006306937631396, "grad_norm": 0.8759827017784119, "learning_rate": 0.0002939296818942836, "loss": 0.3267, "step": 928 }, { "epoch": 0.1302032235459005, "grad_norm": 0.4727971851825714, "learning_rate": 0.00029391533126046395, "loss": 0.2267, "step": 929 }, { "epoch": 0.13034337771548704, "grad_norm": 0.754137396812439, "learning_rate": 0.00029390098062664433, "loss": 0.2867, "step": 930 }, { "epoch": 0.13048353188507358, "grad_norm": 0.5513168573379517, "learning_rate": 0.00029388662999282466, "loss": 0.2234, "step": 931 }, { "epoch": 0.13062368605466013, "grad_norm": 0.6047320365905762, "learning_rate": 0.000293872279359005, "loss": 0.2005, "step": 932 }, { "epoch": 0.13076384022424667, "grad_norm": 1.0456677675247192, "learning_rate": 0.0002938579287251853, "loss": 0.3261, "step": 933 }, { "epoch": 0.1309039943938332, "grad_norm": 0.5368068218231201, "learning_rate": 0.0002938435780913657, "loss": 0.3023, "step": 934 }, { "epoch": 0.13104414856341975, "grad_norm": 0.32677504420280457, "learning_rate": 0.000293829227457546, "loss": 0.2282, "step": 935 }, { "epoch": 0.1311843027330063, "grad_norm": 0.8589951395988464, "learning_rate": 0.00029381487682372635, "loss": 0.2576, "step": 936 }, { "epoch": 0.13132445690259284, "grad_norm": 0.8622442483901978, "learning_rate": 0.0002938005261899067, "loss": 0.2456, "step": 937 }, { "epoch": 0.1314646110721794, "grad_norm": 0.7158130407333374, "learning_rate": 0.000293786175556087, "loss": 0.3435, "step": 938 }, { "epoch": 0.13160476524176595, "grad_norm": 0.965425968170166, "learning_rate": 0.00029377182492226734, "loss": 0.327, "step": 939 }, { "epoch": 0.1317449194113525, "grad_norm": 0.8095971941947937, "learning_rate": 0.0002937574742884477, "loss": 0.2978, "step": 940 }, { "epoch": 0.13188507358093904, "grad_norm": 0.8291435241699219, "learning_rate": 0.00029374312365462805, "loss": 0.3361, "step": 941 }, { "epoch": 0.13202522775052558, "grad_norm": 0.5811228156089783, "learning_rate": 0.0002937287730208084, "loss": 0.2784, "step": 942 }, { "epoch": 0.13216538192011212, "grad_norm": 0.7495807409286499, "learning_rate": 0.00029371442238698876, "loss": 0.2987, "step": 943 }, { "epoch": 0.13230553608969867, "grad_norm": 0.980492115020752, "learning_rate": 0.0002937000717531691, "loss": 0.4014, "step": 944 }, { "epoch": 0.1324456902592852, "grad_norm": 0.9034101366996765, "learning_rate": 0.0002936857211193494, "loss": 0.1718, "step": 945 }, { "epoch": 0.13258584442887175, "grad_norm": 1.4471925497055054, "learning_rate": 0.00029367137048552975, "loss": 0.3174, "step": 946 }, { "epoch": 0.1327259985984583, "grad_norm": 3.6291677951812744, "learning_rate": 0.0002936570198517101, "loss": 0.4778, "step": 947 }, { "epoch": 0.13286615276804484, "grad_norm": 0.8271286487579346, "learning_rate": 0.0002936426692178904, "loss": 0.2167, "step": 948 }, { "epoch": 0.1330063069376314, "grad_norm": 4.98697566986084, "learning_rate": 0.00029362831858407073, "loss": 0.879, "step": 949 }, { "epoch": 0.13314646110721795, "grad_norm": 3.502244710922241, "learning_rate": 0.0002936139679502511, "loss": 1.2223, "step": 950 }, { "epoch": 0.1332866152768045, "grad_norm": 0.7069107890129089, "learning_rate": 0.00029359961731643144, "loss": 0.3105, "step": 951 }, { "epoch": 0.13342676944639104, "grad_norm": 0.5436837673187256, "learning_rate": 0.0002935852666826118, "loss": 0.2988, "step": 952 }, { "epoch": 0.13356692361597758, "grad_norm": 0.6500002145767212, "learning_rate": 0.00029357091604879216, "loss": 0.405, "step": 953 }, { "epoch": 0.13370707778556412, "grad_norm": 0.39849650859832764, "learning_rate": 0.0002935565654149725, "loss": 0.2854, "step": 954 }, { "epoch": 0.13384723195515066, "grad_norm": 0.5219706296920776, "learning_rate": 0.0002935422147811528, "loss": 0.2648, "step": 955 }, { "epoch": 0.1339873861247372, "grad_norm": 0.6016669869422913, "learning_rate": 0.00029352786414733314, "loss": 0.3059, "step": 956 }, { "epoch": 0.13412754029432375, "grad_norm": 0.6434925198554993, "learning_rate": 0.00029351351351351347, "loss": 0.3159, "step": 957 }, { "epoch": 0.1342676944639103, "grad_norm": 0.9862679839134216, "learning_rate": 0.0002934991628796938, "loss": 0.3284, "step": 958 }, { "epoch": 0.13440784863349683, "grad_norm": 0.5369930267333984, "learning_rate": 0.0002934848122458742, "loss": 0.3041, "step": 959 }, { "epoch": 0.1345480028030834, "grad_norm": 0.680402934551239, "learning_rate": 0.0002934704616120545, "loss": 0.3951, "step": 960 }, { "epoch": 0.13468815697266995, "grad_norm": 0.49302855134010315, "learning_rate": 0.00029345611097823484, "loss": 0.3065, "step": 961 }, { "epoch": 0.1348283111422565, "grad_norm": 0.3662201762199402, "learning_rate": 0.0002934417603444152, "loss": 0.2148, "step": 962 }, { "epoch": 0.13496846531184303, "grad_norm": 1.3564612865447998, "learning_rate": 0.00029342740971059555, "loss": 0.2587, "step": 963 }, { "epoch": 0.13510861948142958, "grad_norm": 0.9542006254196167, "learning_rate": 0.0002934130590767759, "loss": 0.3687, "step": 964 }, { "epoch": 0.13524877365101612, "grad_norm": 0.8196474313735962, "learning_rate": 0.0002933987084429562, "loss": 0.273, "step": 965 }, { "epoch": 0.13538892782060266, "grad_norm": 0.4538367986679077, "learning_rate": 0.00029338435780913654, "loss": 0.1551, "step": 966 }, { "epoch": 0.1355290819901892, "grad_norm": 0.537915050983429, "learning_rate": 0.00029337000717531686, "loss": 0.3536, "step": 967 }, { "epoch": 0.13566923615977575, "grad_norm": 0.5604998469352722, "learning_rate": 0.0002933556565414972, "loss": 0.2973, "step": 968 }, { "epoch": 0.1358093903293623, "grad_norm": 0.6562977433204651, "learning_rate": 0.0002933413059076776, "loss": 0.2933, "step": 969 }, { "epoch": 0.13594954449894883, "grad_norm": 0.5113474130630493, "learning_rate": 0.0002933269552738579, "loss": 0.3259, "step": 970 }, { "epoch": 0.1360896986685354, "grad_norm": 0.576876699924469, "learning_rate": 0.00029331260464003823, "loss": 0.2725, "step": 971 }, { "epoch": 0.13622985283812195, "grad_norm": 0.7563326954841614, "learning_rate": 0.0002932982540062186, "loss": 0.2997, "step": 972 }, { "epoch": 0.1363700070077085, "grad_norm": 0.30106568336486816, "learning_rate": 0.00029328390337239894, "loss": 0.1572, "step": 973 }, { "epoch": 0.13651016117729503, "grad_norm": 1.384362816810608, "learning_rate": 0.00029326955273857927, "loss": 0.2946, "step": 974 }, { "epoch": 0.13665031534688157, "grad_norm": 0.5033469200134277, "learning_rate": 0.0002932552021047596, "loss": 0.2046, "step": 975 }, { "epoch": 0.13679046951646812, "grad_norm": 0.6536031365394592, "learning_rate": 0.00029324085147093993, "loss": 0.3499, "step": 976 }, { "epoch": 0.13693062368605466, "grad_norm": 0.5294567942619324, "learning_rate": 0.00029322650083712026, "loss": 0.2348, "step": 977 }, { "epoch": 0.1370707778556412, "grad_norm": 0.9334960579872131, "learning_rate": 0.00029321215020330064, "loss": 0.2194, "step": 978 }, { "epoch": 0.13721093202522774, "grad_norm": 0.5313480496406555, "learning_rate": 0.00029319779956948097, "loss": 0.1931, "step": 979 }, { "epoch": 0.1373510861948143, "grad_norm": 0.4705626368522644, "learning_rate": 0.0002931834489356613, "loss": 0.1537, "step": 980 }, { "epoch": 0.13749124036440083, "grad_norm": 0.7432977557182312, "learning_rate": 0.0002931690983018417, "loss": 0.3833, "step": 981 }, { "epoch": 0.13763139453398737, "grad_norm": 0.6052164435386658, "learning_rate": 0.000293154747668022, "loss": 0.2779, "step": 982 }, { "epoch": 0.13777154870357394, "grad_norm": 0.671550452709198, "learning_rate": 0.00029314039703420234, "loss": 0.3163, "step": 983 }, { "epoch": 0.13791170287316049, "grad_norm": 0.8387758731842041, "learning_rate": 0.00029312604640038267, "loss": 0.2762, "step": 984 }, { "epoch": 0.13805185704274703, "grad_norm": 0.6830567121505737, "learning_rate": 0.000293111695766563, "loss": 0.3395, "step": 985 }, { "epoch": 0.13819201121233357, "grad_norm": 0.9115906357765198, "learning_rate": 0.0002930973451327433, "loss": 0.2845, "step": 986 }, { "epoch": 0.1383321653819201, "grad_norm": 1.1519874334335327, "learning_rate": 0.00029308299449892365, "loss": 0.2206, "step": 987 }, { "epoch": 0.13847231955150666, "grad_norm": 0.5526348352432251, "learning_rate": 0.00029306864386510404, "loss": 0.2042, "step": 988 }, { "epoch": 0.1386124737210932, "grad_norm": 0.9735465049743652, "learning_rate": 0.00029305429323128436, "loss": 0.3277, "step": 989 }, { "epoch": 0.13875262789067974, "grad_norm": 0.7076131701469421, "learning_rate": 0.0002930399425974647, "loss": 0.2557, "step": 990 }, { "epoch": 0.13889278206026628, "grad_norm": 0.8041265606880188, "learning_rate": 0.000293025591963645, "loss": 0.2214, "step": 991 }, { "epoch": 0.13903293622985283, "grad_norm": 0.5854584574699402, "learning_rate": 0.00029301124132982535, "loss": 0.2721, "step": 992 }, { "epoch": 0.13917309039943937, "grad_norm": 0.9238445162773132, "learning_rate": 0.0002929968906960057, "loss": 0.2782, "step": 993 }, { "epoch": 0.13931324456902594, "grad_norm": 2.3666505813598633, "learning_rate": 0.00029298254006218606, "loss": 0.411, "step": 994 }, { "epoch": 0.13945339873861248, "grad_norm": 1.6303638219833374, "learning_rate": 0.0002929681894283664, "loss": 0.3095, "step": 995 }, { "epoch": 0.13959355290819903, "grad_norm": 0.9080213904380798, "learning_rate": 0.0002929538387945467, "loss": 0.2901, "step": 996 }, { "epoch": 0.13973370707778557, "grad_norm": 1.881140112876892, "learning_rate": 0.0002929394881607271, "loss": 0.3931, "step": 997 }, { "epoch": 0.1398738612473721, "grad_norm": 1.4179917573928833, "learning_rate": 0.00029292513752690743, "loss": 0.3517, "step": 998 }, { "epoch": 0.14001401541695865, "grad_norm": 1.5313589572906494, "learning_rate": 0.00029291078689308776, "loss": 0.2621, "step": 999 }, { "epoch": 0.1401541695865452, "grad_norm": 2.040766716003418, "learning_rate": 0.0002928964362592681, "loss": 0.2916, "step": 1000 }, { "epoch": 0.14029432375613174, "grad_norm": 0.6564692854881287, "learning_rate": 0.0002928820856254484, "loss": 0.2523, "step": 1001 }, { "epoch": 0.14043447792571828, "grad_norm": 0.7949646711349487, "learning_rate": 0.00029286773499162874, "loss": 0.2727, "step": 1002 }, { "epoch": 0.14057463209530482, "grad_norm": 0.9629088640213013, "learning_rate": 0.00029285338435780907, "loss": 0.2348, "step": 1003 }, { "epoch": 0.14071478626489137, "grad_norm": 0.9612164497375488, "learning_rate": 0.00029283903372398945, "loss": 0.2592, "step": 1004 }, { "epoch": 0.14085494043447794, "grad_norm": 0.8088884353637695, "learning_rate": 0.0002928246830901698, "loss": 0.2401, "step": 1005 }, { "epoch": 0.14099509460406448, "grad_norm": 0.6948018074035645, "learning_rate": 0.0002928103324563501, "loss": 0.2583, "step": 1006 }, { "epoch": 0.14113524877365102, "grad_norm": 0.5473814606666565, "learning_rate": 0.0002927959818225305, "loss": 0.2481, "step": 1007 }, { "epoch": 0.14127540294323757, "grad_norm": 0.4820880889892578, "learning_rate": 0.0002927816311887108, "loss": 0.2994, "step": 1008 }, { "epoch": 0.1414155571128241, "grad_norm": 0.46331194043159485, "learning_rate": 0.00029276728055489115, "loss": 0.1575, "step": 1009 }, { "epoch": 0.14155571128241065, "grad_norm": 0.6499099731445312, "learning_rate": 0.0002927529299210715, "loss": 0.2881, "step": 1010 }, { "epoch": 0.1416958654519972, "grad_norm": 1.3492460250854492, "learning_rate": 0.0002927385792872518, "loss": 0.2339, "step": 1011 }, { "epoch": 0.14183601962158374, "grad_norm": 0.37316715717315674, "learning_rate": 0.00029272422865343214, "loss": 0.1807, "step": 1012 }, { "epoch": 0.14197617379117028, "grad_norm": 0.9624108076095581, "learning_rate": 0.0002927098780196125, "loss": 0.3353, "step": 1013 }, { "epoch": 0.14211632796075682, "grad_norm": 0.8035147786140442, "learning_rate": 0.00029269552738579285, "loss": 0.2025, "step": 1014 }, { "epoch": 0.14225648213034336, "grad_norm": 0.4931930899620056, "learning_rate": 0.0002926811767519732, "loss": 0.2892, "step": 1015 }, { "epoch": 0.14239663629992994, "grad_norm": 0.5515649318695068, "learning_rate": 0.00029266682611815356, "loss": 0.219, "step": 1016 }, { "epoch": 0.14253679046951648, "grad_norm": 0.5605678558349609, "learning_rate": 0.0002926524754843339, "loss": 0.2067, "step": 1017 }, { "epoch": 0.14267694463910302, "grad_norm": 0.772860050201416, "learning_rate": 0.0002926381248505142, "loss": 0.2783, "step": 1018 }, { "epoch": 0.14281709880868956, "grad_norm": 0.5818120837211609, "learning_rate": 0.00029262377421669455, "loss": 0.2423, "step": 1019 }, { "epoch": 0.1429572529782761, "grad_norm": 0.8352418541908264, "learning_rate": 0.0002926094235828749, "loss": 0.1817, "step": 1020 }, { "epoch": 0.14309740714786265, "grad_norm": 0.4489438235759735, "learning_rate": 0.0002925950729490552, "loss": 0.2192, "step": 1021 }, { "epoch": 0.1432375613174492, "grad_norm": 0.6198673248291016, "learning_rate": 0.00029258072231523553, "loss": 0.185, "step": 1022 }, { "epoch": 0.14337771548703573, "grad_norm": 0.5971577167510986, "learning_rate": 0.0002925663716814159, "loss": 0.3252, "step": 1023 }, { "epoch": 0.14351786965662228, "grad_norm": 0.8665616512298584, "learning_rate": 0.00029255202104759624, "loss": 0.2205, "step": 1024 }, { "epoch": 0.14365802382620882, "grad_norm": 1.0874003171920776, "learning_rate": 0.00029253767041377657, "loss": 0.2067, "step": 1025 }, { "epoch": 0.14379817799579536, "grad_norm": 0.5821864604949951, "learning_rate": 0.00029252331977995695, "loss": 0.287, "step": 1026 }, { "epoch": 0.14393833216538193, "grad_norm": 0.3562723994255066, "learning_rate": 0.0002925089691461373, "loss": 0.216, "step": 1027 }, { "epoch": 0.14407848633496848, "grad_norm": 0.5792339444160461, "learning_rate": 0.0002924946185123176, "loss": 0.2107, "step": 1028 }, { "epoch": 0.14421864050455502, "grad_norm": 0.6203877925872803, "learning_rate": 0.00029248026787849794, "loss": 0.1572, "step": 1029 }, { "epoch": 0.14435879467414156, "grad_norm": 0.5179003477096558, "learning_rate": 0.00029246591724467827, "loss": 0.2192, "step": 1030 }, { "epoch": 0.1444989488437281, "grad_norm": 0.4894259572029114, "learning_rate": 0.0002924515666108586, "loss": 0.2466, "step": 1031 }, { "epoch": 0.14463910301331465, "grad_norm": 0.6231358647346497, "learning_rate": 0.000292437215977039, "loss": 0.1881, "step": 1032 }, { "epoch": 0.1447792571829012, "grad_norm": 0.5783756971359253, "learning_rate": 0.0002924228653432193, "loss": 0.2478, "step": 1033 }, { "epoch": 0.14491941135248773, "grad_norm": 1.7217965126037598, "learning_rate": 0.00029240851470939964, "loss": 0.1941, "step": 1034 }, { "epoch": 0.14505956552207427, "grad_norm": 0.38905397057533264, "learning_rate": 0.00029239416407558, "loss": 0.1675, "step": 1035 }, { "epoch": 0.14519971969166082, "grad_norm": 0.638530969619751, "learning_rate": 0.00029237981344176035, "loss": 0.2163, "step": 1036 }, { "epoch": 0.14533987386124736, "grad_norm": 0.4328227937221527, "learning_rate": 0.0002923654628079407, "loss": 0.147, "step": 1037 }, { "epoch": 0.14548002803083393, "grad_norm": 1.1534838676452637, "learning_rate": 0.000292351112174121, "loss": 0.184, "step": 1038 }, { "epoch": 0.14562018220042047, "grad_norm": 0.5368075370788574, "learning_rate": 0.00029233676154030133, "loss": 0.1647, "step": 1039 }, { "epoch": 0.14576033637000702, "grad_norm": 0.3025093078613281, "learning_rate": 0.00029232241090648166, "loss": 0.0858, "step": 1040 }, { "epoch": 0.14590049053959356, "grad_norm": 0.7990778684616089, "learning_rate": 0.000292308060272662, "loss": 0.2087, "step": 1041 }, { "epoch": 0.1460406447091801, "grad_norm": 1.5308758020401, "learning_rate": 0.0002922937096388424, "loss": 0.3255, "step": 1042 }, { "epoch": 0.14618079887876664, "grad_norm": 2.1593265533447266, "learning_rate": 0.0002922793590050227, "loss": 0.2336, "step": 1043 }, { "epoch": 0.1463209530483532, "grad_norm": 1.7676151990890503, "learning_rate": 0.00029226500837120303, "loss": 0.3244, "step": 1044 }, { "epoch": 0.14646110721793973, "grad_norm": 0.5936906337738037, "learning_rate": 0.0002922506577373834, "loss": 0.1466, "step": 1045 }, { "epoch": 0.14660126138752627, "grad_norm": 0.7183467745780945, "learning_rate": 0.00029223630710356374, "loss": 0.1865, "step": 1046 }, { "epoch": 0.14674141555711281, "grad_norm": 1.5002034902572632, "learning_rate": 0.00029222195646974407, "loss": 0.2901, "step": 1047 }, { "epoch": 0.14688156972669936, "grad_norm": 0.51473468542099, "learning_rate": 0.0002922076058359244, "loss": 0.1346, "step": 1048 }, { "epoch": 0.14702172389628593, "grad_norm": 1.5852375030517578, "learning_rate": 0.00029219325520210473, "loss": 0.2854, "step": 1049 }, { "epoch": 0.14716187806587247, "grad_norm": 1.9233812093734741, "learning_rate": 0.00029217890456828506, "loss": 0.2318, "step": 1050 }, { "epoch": 0.147302032235459, "grad_norm": 0.4522407352924347, "learning_rate": 0.00029216455393446544, "loss": 0.1817, "step": 1051 }, { "epoch": 0.14744218640504556, "grad_norm": 0.5314527153968811, "learning_rate": 0.00029215020330064577, "loss": 0.2001, "step": 1052 }, { "epoch": 0.1475823405746321, "grad_norm": 0.45209234952926636, "learning_rate": 0.0002921358526668261, "loss": 0.2472, "step": 1053 }, { "epoch": 0.14772249474421864, "grad_norm": 0.3947276473045349, "learning_rate": 0.0002921215020330064, "loss": 0.2458, "step": 1054 }, { "epoch": 0.14786264891380518, "grad_norm": 0.7615723013877869, "learning_rate": 0.00029210715139918675, "loss": 0.2498, "step": 1055 }, { "epoch": 0.14800280308339173, "grad_norm": 0.4284062683582306, "learning_rate": 0.0002920928007653671, "loss": 0.2253, "step": 1056 }, { "epoch": 0.14814295725297827, "grad_norm": 0.690561830997467, "learning_rate": 0.0002920784501315474, "loss": 0.2275, "step": 1057 }, { "epoch": 0.1482831114225648, "grad_norm": 0.3718281090259552, "learning_rate": 0.0002920640994977278, "loss": 0.1717, "step": 1058 }, { "epoch": 0.14842326559215135, "grad_norm": 0.882584810256958, "learning_rate": 0.0002920497488639081, "loss": 0.2968, "step": 1059 }, { "epoch": 0.14856341976173792, "grad_norm": 0.8743811249732971, "learning_rate": 0.00029203539823008845, "loss": 0.2523, "step": 1060 }, { "epoch": 0.14870357393132447, "grad_norm": 0.6886244416236877, "learning_rate": 0.00029202104759626883, "loss": 0.2255, "step": 1061 }, { "epoch": 0.148843728100911, "grad_norm": 0.2720070481300354, "learning_rate": 0.00029200669696244916, "loss": 0.1723, "step": 1062 }, { "epoch": 0.14898388227049755, "grad_norm": 0.4363411068916321, "learning_rate": 0.0002919923463286295, "loss": 0.2229, "step": 1063 }, { "epoch": 0.1491240364400841, "grad_norm": 0.4638037085533142, "learning_rate": 0.0002919779956948098, "loss": 0.1472, "step": 1064 }, { "epoch": 0.14926419060967064, "grad_norm": 0.9076966643333435, "learning_rate": 0.00029196364506099015, "loss": 0.295, "step": 1065 }, { "epoch": 0.14940434477925718, "grad_norm": 0.4324796199798584, "learning_rate": 0.0002919492944271705, "loss": 0.1955, "step": 1066 }, { "epoch": 0.14954449894884372, "grad_norm": 2.00901460647583, "learning_rate": 0.00029193494379335086, "loss": 0.223, "step": 1067 }, { "epoch": 0.14968465311843027, "grad_norm": 0.5622586011886597, "learning_rate": 0.0002919205931595312, "loss": 0.1851, "step": 1068 }, { "epoch": 0.1498248072880168, "grad_norm": 0.6686916947364807, "learning_rate": 0.0002919062425257115, "loss": 0.2852, "step": 1069 }, { "epoch": 0.14996496145760335, "grad_norm": 1.70041823387146, "learning_rate": 0.0002918918918918919, "loss": 0.1535, "step": 1070 }, { "epoch": 0.15010511562718992, "grad_norm": 0.522042453289032, "learning_rate": 0.00029187754125807223, "loss": 0.2031, "step": 1071 }, { "epoch": 0.15024526979677646, "grad_norm": 1.0612695217132568, "learning_rate": 0.00029186319062425256, "loss": 0.3035, "step": 1072 }, { "epoch": 0.150385423966363, "grad_norm": 0.5846889019012451, "learning_rate": 0.0002918488399904329, "loss": 0.2559, "step": 1073 }, { "epoch": 0.15052557813594955, "grad_norm": 0.45221787691116333, "learning_rate": 0.0002918344893566132, "loss": 0.2564, "step": 1074 }, { "epoch": 0.1506657323055361, "grad_norm": 0.4605032801628113, "learning_rate": 0.00029182013872279354, "loss": 0.2377, "step": 1075 }, { "epoch": 0.15080588647512264, "grad_norm": 0.6658544540405273, "learning_rate": 0.00029180578808897387, "loss": 0.3347, "step": 1076 }, { "epoch": 0.15094604064470918, "grad_norm": 0.8150544166564941, "learning_rate": 0.00029179143745515425, "loss": 0.2513, "step": 1077 }, { "epoch": 0.15108619481429572, "grad_norm": 0.32213959097862244, "learning_rate": 0.0002917770868213346, "loss": 0.1681, "step": 1078 }, { "epoch": 0.15122634898388226, "grad_norm": 1.1388335227966309, "learning_rate": 0.0002917627361875149, "loss": 0.3986, "step": 1079 }, { "epoch": 0.1513665031534688, "grad_norm": 0.9581372737884521, "learning_rate": 0.0002917483855536953, "loss": 0.2778, "step": 1080 }, { "epoch": 0.15150665732305535, "grad_norm": 0.6253133416175842, "learning_rate": 0.0002917340349198756, "loss": 0.2072, "step": 1081 }, { "epoch": 0.15164681149264192, "grad_norm": 0.34929677844047546, "learning_rate": 0.00029171968428605595, "loss": 0.1321, "step": 1082 }, { "epoch": 0.15178696566222846, "grad_norm": 0.3293485641479492, "learning_rate": 0.0002917053336522363, "loss": 0.1635, "step": 1083 }, { "epoch": 0.151927119831815, "grad_norm": 1.2029085159301758, "learning_rate": 0.0002916909830184166, "loss": 0.3263, "step": 1084 }, { "epoch": 0.15206727400140155, "grad_norm": 0.3753032982349396, "learning_rate": 0.00029167663238459694, "loss": 0.1284, "step": 1085 }, { "epoch": 0.1522074281709881, "grad_norm": 0.6432974338531494, "learning_rate": 0.0002916622817507773, "loss": 0.2181, "step": 1086 }, { "epoch": 0.15234758234057463, "grad_norm": 0.7160272598266602, "learning_rate": 0.00029164793111695765, "loss": 0.2578, "step": 1087 }, { "epoch": 0.15248773651016118, "grad_norm": 0.7639457583427429, "learning_rate": 0.000291633580483138, "loss": 0.2939, "step": 1088 }, { "epoch": 0.15262789067974772, "grad_norm": 0.699263870716095, "learning_rate": 0.0002916192298493183, "loss": 0.2517, "step": 1089 }, { "epoch": 0.15276804484933426, "grad_norm": 0.5161824822425842, "learning_rate": 0.0002916048792154987, "loss": 0.1953, "step": 1090 }, { "epoch": 0.1529081990189208, "grad_norm": 0.4656703770160675, "learning_rate": 0.000291590528581679, "loss": 0.2245, "step": 1091 }, { "epoch": 0.15304835318850735, "grad_norm": 0.5346180200576782, "learning_rate": 0.00029157617794785934, "loss": 0.2257, "step": 1092 }, { "epoch": 0.15318850735809392, "grad_norm": 0.6907939314842224, "learning_rate": 0.00029156182731403967, "loss": 0.2394, "step": 1093 }, { "epoch": 0.15332866152768046, "grad_norm": 0.690552294254303, "learning_rate": 0.00029154747668022, "loss": 0.2181, "step": 1094 }, { "epoch": 0.153468815697267, "grad_norm": 0.43683817982673645, "learning_rate": 0.00029153312604640033, "loss": 0.1848, "step": 1095 }, { "epoch": 0.15360896986685355, "grad_norm": 0.48682838678359985, "learning_rate": 0.0002915187754125807, "loss": 0.1116, "step": 1096 }, { "epoch": 0.1537491240364401, "grad_norm": 1.0500648021697998, "learning_rate": 0.00029150442477876104, "loss": 0.3946, "step": 1097 }, { "epoch": 0.15388927820602663, "grad_norm": 2.116490364074707, "learning_rate": 0.00029149007414494137, "loss": 0.5003, "step": 1098 }, { "epoch": 0.15402943237561317, "grad_norm": 2.300340175628662, "learning_rate": 0.00029147572351112175, "loss": 0.638, "step": 1099 }, { "epoch": 0.15416958654519972, "grad_norm": 1.7466180324554443, "learning_rate": 0.0002914613728773021, "loss": 0.2625, "step": 1100 }, { "epoch": 0.15430974071478626, "grad_norm": 0.42691710591316223, "learning_rate": 0.0002914470222434824, "loss": 0.2048, "step": 1101 }, { "epoch": 0.1544498948843728, "grad_norm": 0.4420432448387146, "learning_rate": 0.00029143267160966274, "loss": 0.2027, "step": 1102 }, { "epoch": 0.15459004905395934, "grad_norm": 0.5113061666488647, "learning_rate": 0.00029141832097584307, "loss": 0.1723, "step": 1103 }, { "epoch": 0.1547302032235459, "grad_norm": 0.6693875789642334, "learning_rate": 0.0002914039703420234, "loss": 0.176, "step": 1104 }, { "epoch": 0.15487035739313246, "grad_norm": 0.41856876015663147, "learning_rate": 0.0002913896197082038, "loss": 0.1345, "step": 1105 }, { "epoch": 0.155010511562719, "grad_norm": 0.8738510012626648, "learning_rate": 0.0002913752690743841, "loss": 0.2735, "step": 1106 }, { "epoch": 0.15515066573230554, "grad_norm": 0.8897049427032471, "learning_rate": 0.00029136091844056443, "loss": 0.2832, "step": 1107 }, { "epoch": 0.15529081990189209, "grad_norm": 0.48510468006134033, "learning_rate": 0.00029134656780674476, "loss": 0.1509, "step": 1108 }, { "epoch": 0.15543097407147863, "grad_norm": 0.94893878698349, "learning_rate": 0.00029133221717292515, "loss": 0.2569, "step": 1109 }, { "epoch": 0.15557112824106517, "grad_norm": 0.5670791268348694, "learning_rate": 0.0002913178665391055, "loss": 0.1519, "step": 1110 }, { "epoch": 0.1557112824106517, "grad_norm": 0.6965529918670654, "learning_rate": 0.0002913035159052858, "loss": 0.1842, "step": 1111 }, { "epoch": 0.15585143658023826, "grad_norm": 0.3937253952026367, "learning_rate": 0.00029128916527146613, "loss": 0.1999, "step": 1112 }, { "epoch": 0.1559915907498248, "grad_norm": 0.33624860644340515, "learning_rate": 0.00029127481463764646, "loss": 0.191, "step": 1113 }, { "epoch": 0.15613174491941134, "grad_norm": 0.9810729026794434, "learning_rate": 0.0002912604640038268, "loss": 0.1928, "step": 1114 }, { "epoch": 0.15627189908899788, "grad_norm": 0.638692319393158, "learning_rate": 0.00029124611337000717, "loss": 0.1824, "step": 1115 }, { "epoch": 0.15641205325858445, "grad_norm": 1.0449035167694092, "learning_rate": 0.0002912317627361875, "loss": 0.2524, "step": 1116 }, { "epoch": 0.156552207428171, "grad_norm": 0.7117040753364563, "learning_rate": 0.00029121741210236783, "loss": 0.1824, "step": 1117 }, { "epoch": 0.15669236159775754, "grad_norm": 0.479743629693985, "learning_rate": 0.00029120306146854816, "loss": 0.2058, "step": 1118 }, { "epoch": 0.15683251576734408, "grad_norm": 0.689148485660553, "learning_rate": 0.0002911887108347285, "loss": 0.2216, "step": 1119 }, { "epoch": 0.15697266993693063, "grad_norm": 0.6677058339118958, "learning_rate": 0.0002911743602009088, "loss": 0.1864, "step": 1120 }, { "epoch": 0.15711282410651717, "grad_norm": 0.5702966451644897, "learning_rate": 0.0002911600095670892, "loss": 0.178, "step": 1121 }, { "epoch": 0.1572529782761037, "grad_norm": 0.7367022037506104, "learning_rate": 0.0002911456589332695, "loss": 0.1856, "step": 1122 }, { "epoch": 0.15739313244569025, "grad_norm": 0.5543919205665588, "learning_rate": 0.00029113130829944985, "loss": 0.1388, "step": 1123 }, { "epoch": 0.1575332866152768, "grad_norm": 0.4027252793312073, "learning_rate": 0.0002911169576656302, "loss": 0.1598, "step": 1124 }, { "epoch": 0.15767344078486334, "grad_norm": 0.5057381391525269, "learning_rate": 0.00029110260703181057, "loss": 0.1759, "step": 1125 }, { "epoch": 0.15781359495444988, "grad_norm": 0.36679747700691223, "learning_rate": 0.0002910882563979909, "loss": 0.1271, "step": 1126 }, { "epoch": 0.15795374912403645, "grad_norm": 0.36856910586357117, "learning_rate": 0.0002910739057641712, "loss": 0.0974, "step": 1127 }, { "epoch": 0.158093903293623, "grad_norm": 0.38675305247306824, "learning_rate": 0.00029105955513035155, "loss": 0.1521, "step": 1128 }, { "epoch": 0.15823405746320954, "grad_norm": 0.6520252227783203, "learning_rate": 0.0002910452044965319, "loss": 0.2322, "step": 1129 }, { "epoch": 0.15837421163279608, "grad_norm": 0.5653290748596191, "learning_rate": 0.0002910308538627122, "loss": 0.1934, "step": 1130 }, { "epoch": 0.15851436580238262, "grad_norm": 0.6227279901504517, "learning_rate": 0.0002910165032288926, "loss": 0.2423, "step": 1131 }, { "epoch": 0.15865451997196917, "grad_norm": 0.5886867642402649, "learning_rate": 0.0002910021525950729, "loss": 0.186, "step": 1132 }, { "epoch": 0.1587946741415557, "grad_norm": 0.40995121002197266, "learning_rate": 0.00029098780196125325, "loss": 0.1735, "step": 1133 }, { "epoch": 0.15893482831114225, "grad_norm": 0.40254729986190796, "learning_rate": 0.00029097345132743363, "loss": 0.1902, "step": 1134 }, { "epoch": 0.1590749824807288, "grad_norm": 1.3046905994415283, "learning_rate": 0.00029095910069361396, "loss": 0.3026, "step": 1135 }, { "epoch": 0.15921513665031534, "grad_norm": 0.6232987642288208, "learning_rate": 0.0002909447500597943, "loss": 0.3754, "step": 1136 }, { "epoch": 0.15935529081990188, "grad_norm": 0.9278186559677124, "learning_rate": 0.0002909303994259746, "loss": 0.141, "step": 1137 }, { "epoch": 0.15949544498948845, "grad_norm": 0.37663301825523376, "learning_rate": 0.00029091604879215495, "loss": 0.1289, "step": 1138 }, { "epoch": 0.159635599159075, "grad_norm": 0.6425600647926331, "learning_rate": 0.0002909016981583353, "loss": 0.2836, "step": 1139 }, { "epoch": 0.15977575332866154, "grad_norm": 0.7579872608184814, "learning_rate": 0.00029088734752451566, "loss": 0.2834, "step": 1140 }, { "epoch": 0.15991590749824808, "grad_norm": 0.608771562576294, "learning_rate": 0.000290872996890696, "loss": 0.332, "step": 1141 }, { "epoch": 0.16005606166783462, "grad_norm": 0.6470134258270264, "learning_rate": 0.0002908586462568763, "loss": 0.2865, "step": 1142 }, { "epoch": 0.16019621583742116, "grad_norm": 0.6094939112663269, "learning_rate": 0.00029084429562305664, "loss": 0.1197, "step": 1143 }, { "epoch": 0.1603363700070077, "grad_norm": 0.7962201833724976, "learning_rate": 0.000290829944989237, "loss": 0.3345, "step": 1144 }, { "epoch": 0.16047652417659425, "grad_norm": 0.8785037994384766, "learning_rate": 0.00029081559435541735, "loss": 0.2335, "step": 1145 }, { "epoch": 0.1606166783461808, "grad_norm": 1.3294227123260498, "learning_rate": 0.0002908012437215977, "loss": 0.3035, "step": 1146 }, { "epoch": 0.16075683251576733, "grad_norm": 0.6238111257553101, "learning_rate": 0.000290786893087778, "loss": 0.2428, "step": 1147 }, { "epoch": 0.16089698668535388, "grad_norm": 0.8492755889892578, "learning_rate": 0.00029077254245395834, "loss": 0.3282, "step": 1148 }, { "epoch": 0.16103714085494045, "grad_norm": 0.4155972898006439, "learning_rate": 0.00029075819182013867, "loss": 0.1277, "step": 1149 }, { "epoch": 0.161177295024527, "grad_norm": 2.380148410797119, "learning_rate": 0.00029074384118631905, "loss": 0.6203, "step": 1150 }, { "epoch": 0.16131744919411353, "grad_norm": 0.677034318447113, "learning_rate": 0.0002907294905524994, "loss": 0.2896, "step": 1151 }, { "epoch": 0.16145760336370008, "grad_norm": 0.45069196820259094, "learning_rate": 0.0002907151399186797, "loss": 0.2905, "step": 1152 }, { "epoch": 0.16159775753328662, "grad_norm": 0.33286333084106445, "learning_rate": 0.0002907007892848601, "loss": 0.2217, "step": 1153 }, { "epoch": 0.16173791170287316, "grad_norm": 0.49553343653678894, "learning_rate": 0.0002906864386510404, "loss": 0.1927, "step": 1154 }, { "epoch": 0.1618780658724597, "grad_norm": 0.4819639027118683, "learning_rate": 0.00029067208801722075, "loss": 0.1897, "step": 1155 }, { "epoch": 0.16201822004204625, "grad_norm": 0.35513758659362793, "learning_rate": 0.0002906577373834011, "loss": 0.1524, "step": 1156 }, { "epoch": 0.1621583742116328, "grad_norm": 0.7157977223396301, "learning_rate": 0.0002906433867495814, "loss": 0.3223, "step": 1157 }, { "epoch": 0.16229852838121933, "grad_norm": 0.41456320881843567, "learning_rate": 0.00029062903611576173, "loss": 0.2112, "step": 1158 }, { "epoch": 0.16243868255080587, "grad_norm": 0.41751816868782043, "learning_rate": 0.00029061468548194206, "loss": 0.1673, "step": 1159 }, { "epoch": 0.16257883672039244, "grad_norm": 0.4203392267227173, "learning_rate": 0.00029060033484812244, "loss": 0.1987, "step": 1160 }, { "epoch": 0.162718990889979, "grad_norm": 0.41105687618255615, "learning_rate": 0.0002905859842143028, "loss": 0.1593, "step": 1161 }, { "epoch": 0.16285914505956553, "grad_norm": 0.8102649450302124, "learning_rate": 0.0002905716335804831, "loss": 0.276, "step": 1162 }, { "epoch": 0.16299929922915207, "grad_norm": 0.45249101519584656, "learning_rate": 0.0002905572829466635, "loss": 0.1379, "step": 1163 }, { "epoch": 0.16313945339873862, "grad_norm": 0.7759928703308105, "learning_rate": 0.0002905429323128438, "loss": 0.2219, "step": 1164 }, { "epoch": 0.16327960756832516, "grad_norm": 0.5148497819900513, "learning_rate": 0.00029052858167902414, "loss": 0.2177, "step": 1165 }, { "epoch": 0.1634197617379117, "grad_norm": 0.644158661365509, "learning_rate": 0.00029051423104520447, "loss": 0.192, "step": 1166 }, { "epoch": 0.16355991590749824, "grad_norm": 0.6915527582168579, "learning_rate": 0.0002904998804113848, "loss": 0.2209, "step": 1167 }, { "epoch": 0.16370007007708479, "grad_norm": 0.9675236344337463, "learning_rate": 0.00029048552977756513, "loss": 0.3563, "step": 1168 }, { "epoch": 0.16384022424667133, "grad_norm": 0.40057894587516785, "learning_rate": 0.0002904711791437455, "loss": 0.1739, "step": 1169 }, { "epoch": 0.16398037841625787, "grad_norm": 0.4606488049030304, "learning_rate": 0.00029045682850992584, "loss": 0.2186, "step": 1170 }, { "epoch": 0.16412053258584444, "grad_norm": 0.6207302212715149, "learning_rate": 0.00029044247787610617, "loss": 0.2411, "step": 1171 }, { "epoch": 0.16426068675543098, "grad_norm": 2.341611862182617, "learning_rate": 0.00029042812724228655, "loss": 0.1843, "step": 1172 }, { "epoch": 0.16440084092501753, "grad_norm": 0.5193768739700317, "learning_rate": 0.0002904137766084669, "loss": 0.1503, "step": 1173 }, { "epoch": 0.16454099509460407, "grad_norm": 0.40665102005004883, "learning_rate": 0.0002903994259746472, "loss": 0.1144, "step": 1174 }, { "epoch": 0.1646811492641906, "grad_norm": 0.4301244020462036, "learning_rate": 0.00029038507534082754, "loss": 0.2114, "step": 1175 }, { "epoch": 0.16482130343377716, "grad_norm": 0.33480599522590637, "learning_rate": 0.00029037072470700786, "loss": 0.1172, "step": 1176 }, { "epoch": 0.1649614576033637, "grad_norm": 0.6966586112976074, "learning_rate": 0.0002903563740731882, "loss": 0.2555, "step": 1177 }, { "epoch": 0.16510161177295024, "grad_norm": 0.29830777645111084, "learning_rate": 0.0002903420234393685, "loss": 0.1183, "step": 1178 }, { "epoch": 0.16524176594253678, "grad_norm": 0.6931130886077881, "learning_rate": 0.0002903276728055489, "loss": 0.1152, "step": 1179 }, { "epoch": 0.16538192011212333, "grad_norm": 1.156959056854248, "learning_rate": 0.00029031332217172923, "loss": 0.1983, "step": 1180 }, { "epoch": 0.16552207428170987, "grad_norm": 0.4397577941417694, "learning_rate": 0.00029029897153790956, "loss": 0.1394, "step": 1181 }, { "epoch": 0.16566222845129644, "grad_norm": 0.6719510555267334, "learning_rate": 0.0002902846209040899, "loss": 0.2182, "step": 1182 }, { "epoch": 0.16580238262088298, "grad_norm": 0.7313705086708069, "learning_rate": 0.0002902702702702702, "loss": 0.1437, "step": 1183 }, { "epoch": 0.16594253679046952, "grad_norm": 0.5166248083114624, "learning_rate": 0.00029025591963645055, "loss": 0.1476, "step": 1184 }, { "epoch": 0.16608269096005607, "grad_norm": 0.7364222407341003, "learning_rate": 0.00029024156900263093, "loss": 0.1655, "step": 1185 }, { "epoch": 0.1662228451296426, "grad_norm": 0.30972933769226074, "learning_rate": 0.00029022721836881126, "loss": 0.1021, "step": 1186 }, { "epoch": 0.16636299929922915, "grad_norm": 0.8522205352783203, "learning_rate": 0.0002902128677349916, "loss": 0.1592, "step": 1187 }, { "epoch": 0.1665031534688157, "grad_norm": 0.41604235768318176, "learning_rate": 0.00029019851710117197, "loss": 0.2006, "step": 1188 }, { "epoch": 0.16664330763840224, "grad_norm": 0.8863270282745361, "learning_rate": 0.0002901841664673523, "loss": 0.168, "step": 1189 }, { "epoch": 0.16678346180798878, "grad_norm": 0.6913481950759888, "learning_rate": 0.0002901698158335326, "loss": 0.2031, "step": 1190 }, { "epoch": 0.16692361597757532, "grad_norm": 0.5856875777244568, "learning_rate": 0.00029015546519971296, "loss": 0.1683, "step": 1191 }, { "epoch": 0.16706377014716187, "grad_norm": 1.1891721487045288, "learning_rate": 0.0002901411145658933, "loss": 0.4038, "step": 1192 }, { "epoch": 0.16720392431674844, "grad_norm": 0.7332741022109985, "learning_rate": 0.0002901267639320736, "loss": 0.3317, "step": 1193 }, { "epoch": 0.16734407848633498, "grad_norm": 0.5969712138175964, "learning_rate": 0.00029011241329825394, "loss": 0.1826, "step": 1194 }, { "epoch": 0.16748423265592152, "grad_norm": 0.69462651014328, "learning_rate": 0.0002900980626644343, "loss": 0.2139, "step": 1195 }, { "epoch": 0.16762438682550806, "grad_norm": 0.575545072555542, "learning_rate": 0.00029008371203061465, "loss": 0.1516, "step": 1196 }, { "epoch": 0.1677645409950946, "grad_norm": 0.9290174841880798, "learning_rate": 0.000290069361396795, "loss": 0.1988, "step": 1197 }, { "epoch": 0.16790469516468115, "grad_norm": 1.6319156885147095, "learning_rate": 0.00029005501076297536, "loss": 0.2739, "step": 1198 }, { "epoch": 0.1680448493342677, "grad_norm": 0.7445603609085083, "learning_rate": 0.0002900406601291557, "loss": 0.1989, "step": 1199 }, { "epoch": 0.16818500350385424, "grad_norm": 3.079102039337158, "learning_rate": 0.000290026309495336, "loss": 0.8688, "step": 1200 }, { "epoch": 0.16832515767344078, "grad_norm": 0.5677631497383118, "learning_rate": 0.00029001195886151635, "loss": 0.1762, "step": 1201 }, { "epoch": 0.16846531184302732, "grad_norm": 0.46313443779945374, "learning_rate": 0.0002899976082276967, "loss": 0.2257, "step": 1202 }, { "epoch": 0.16860546601261386, "grad_norm": 0.5117608308792114, "learning_rate": 0.000289983257593877, "loss": 0.241, "step": 1203 }, { "epoch": 0.16874562018220043, "grad_norm": 0.5307624936103821, "learning_rate": 0.0002899689069600574, "loss": 0.2225, "step": 1204 }, { "epoch": 0.16888577435178698, "grad_norm": 0.5278559923171997, "learning_rate": 0.0002899545563262377, "loss": 0.198, "step": 1205 }, { "epoch": 0.16902592852137352, "grad_norm": 0.7261818051338196, "learning_rate": 0.00028994020569241805, "loss": 0.2842, "step": 1206 }, { "epoch": 0.16916608269096006, "grad_norm": 0.6507210731506348, "learning_rate": 0.00028992585505859843, "loss": 0.3037, "step": 1207 }, { "epoch": 0.1693062368605466, "grad_norm": 0.42979133129119873, "learning_rate": 0.00028991150442477876, "loss": 0.1736, "step": 1208 }, { "epoch": 0.16944639103013315, "grad_norm": 0.5951622724533081, "learning_rate": 0.0002898971537909591, "loss": 0.1306, "step": 1209 }, { "epoch": 0.1695865451997197, "grad_norm": 0.5094631314277649, "learning_rate": 0.0002898828031571394, "loss": 0.1604, "step": 1210 }, { "epoch": 0.16972669936930623, "grad_norm": 0.5467761754989624, "learning_rate": 0.00028986845252331974, "loss": 0.276, "step": 1211 }, { "epoch": 0.16986685353889278, "grad_norm": 0.4251236021518707, "learning_rate": 0.00028985410188950007, "loss": 0.1468, "step": 1212 }, { "epoch": 0.17000700770847932, "grad_norm": 0.5952158570289612, "learning_rate": 0.0002898397512556804, "loss": 0.169, "step": 1213 }, { "epoch": 0.17014716187806586, "grad_norm": 0.4211752116680145, "learning_rate": 0.0002898254006218608, "loss": 0.0981, "step": 1214 }, { "epoch": 0.1702873160476524, "grad_norm": 0.33614712953567505, "learning_rate": 0.0002898110499880411, "loss": 0.1024, "step": 1215 }, { "epoch": 0.17042747021723897, "grad_norm": 0.40574654936790466, "learning_rate": 0.00028979669935422144, "loss": 0.1809, "step": 1216 }, { "epoch": 0.17056762438682552, "grad_norm": 0.379718154668808, "learning_rate": 0.0002897823487204018, "loss": 0.1697, "step": 1217 }, { "epoch": 0.17070777855641206, "grad_norm": 0.6298273801803589, "learning_rate": 0.00028976799808658215, "loss": 0.1259, "step": 1218 }, { "epoch": 0.1708479327259986, "grad_norm": 0.5226387977600098, "learning_rate": 0.0002897536474527625, "loss": 0.2276, "step": 1219 }, { "epoch": 0.17098808689558515, "grad_norm": 0.32652711868286133, "learning_rate": 0.0002897392968189428, "loss": 0.1374, "step": 1220 }, { "epoch": 0.1711282410651717, "grad_norm": 0.45564866065979004, "learning_rate": 0.00028972494618512314, "loss": 0.1694, "step": 1221 }, { "epoch": 0.17126839523475823, "grad_norm": 0.5506298542022705, "learning_rate": 0.00028971059555130347, "loss": 0.2138, "step": 1222 }, { "epoch": 0.17140854940434477, "grad_norm": 0.3355783224105835, "learning_rate": 0.00028969624491748385, "loss": 0.1219, "step": 1223 }, { "epoch": 0.17154870357393132, "grad_norm": 0.3820132613182068, "learning_rate": 0.0002896818942836642, "loss": 0.1469, "step": 1224 }, { "epoch": 0.17168885774351786, "grad_norm": 0.5089153051376343, "learning_rate": 0.0002896675436498445, "loss": 0.1831, "step": 1225 }, { "epoch": 0.1718290119131044, "grad_norm": 0.38970282673835754, "learning_rate": 0.0002896531930160249, "loss": 0.1031, "step": 1226 }, { "epoch": 0.17196916608269097, "grad_norm": 0.45952531695365906, "learning_rate": 0.0002896388423822052, "loss": 0.1464, "step": 1227 }, { "epoch": 0.17210932025227751, "grad_norm": 1.0034610033035278, "learning_rate": 0.00028962449174838555, "loss": 0.1909, "step": 1228 }, { "epoch": 0.17224947442186406, "grad_norm": 0.48172807693481445, "learning_rate": 0.0002896101411145659, "loss": 0.0866, "step": 1229 }, { "epoch": 0.1723896285914506, "grad_norm": 0.6868981719017029, "learning_rate": 0.0002895957904807462, "loss": 0.2065, "step": 1230 }, { "epoch": 0.17252978276103714, "grad_norm": 0.7861095070838928, "learning_rate": 0.00028958143984692653, "loss": 0.2421, "step": 1231 }, { "epoch": 0.17266993693062369, "grad_norm": 0.5128983855247498, "learning_rate": 0.00028956708921310686, "loss": 0.1752, "step": 1232 }, { "epoch": 0.17281009110021023, "grad_norm": 0.38063517212867737, "learning_rate": 0.00028955273857928724, "loss": 0.1847, "step": 1233 }, { "epoch": 0.17295024526979677, "grad_norm": 0.5560049414634705, "learning_rate": 0.00028953838794546757, "loss": 0.2814, "step": 1234 }, { "epoch": 0.1730903994393833, "grad_norm": 0.9167608618736267, "learning_rate": 0.0002895240373116479, "loss": 0.2449, "step": 1235 }, { "epoch": 0.17323055360896986, "grad_norm": 0.28223785758018494, "learning_rate": 0.0002895096866778283, "loss": 0.0955, "step": 1236 }, { "epoch": 0.1733707077785564, "grad_norm": 0.5269321799278259, "learning_rate": 0.0002894953360440086, "loss": 0.2127, "step": 1237 }, { "epoch": 0.17351086194814297, "grad_norm": 0.7515736222267151, "learning_rate": 0.00028948098541018894, "loss": 0.1773, "step": 1238 }, { "epoch": 0.1736510161177295, "grad_norm": 0.4676588773727417, "learning_rate": 0.00028946663477636927, "loss": 0.1735, "step": 1239 }, { "epoch": 0.17379117028731605, "grad_norm": 0.9766138195991516, "learning_rate": 0.0002894522841425496, "loss": 0.2785, "step": 1240 }, { "epoch": 0.1739313244569026, "grad_norm": 1.1965867280960083, "learning_rate": 0.0002894379335087299, "loss": 0.3369, "step": 1241 }, { "epoch": 0.17407147862648914, "grad_norm": 0.7254909873008728, "learning_rate": 0.0002894235828749103, "loss": 0.218, "step": 1242 }, { "epoch": 0.17421163279607568, "grad_norm": 0.4795093834400177, "learning_rate": 0.00028940923224109064, "loss": 0.0886, "step": 1243 }, { "epoch": 0.17435178696566223, "grad_norm": 0.3527316749095917, "learning_rate": 0.00028939488160727097, "loss": 0.0889, "step": 1244 }, { "epoch": 0.17449194113524877, "grad_norm": 2.0535407066345215, "learning_rate": 0.0002893805309734513, "loss": 0.3005, "step": 1245 }, { "epoch": 0.1746320953048353, "grad_norm": 1.333620309829712, "learning_rate": 0.0002893661803396316, "loss": 0.376, "step": 1246 }, { "epoch": 0.17477224947442185, "grad_norm": 1.0444930791854858, "learning_rate": 0.00028935182970581195, "loss": 0.1221, "step": 1247 }, { "epoch": 0.1749124036440084, "grad_norm": 1.4253756999969482, "learning_rate": 0.0002893374790719923, "loss": 0.2826, "step": 1248 }, { "epoch": 0.17505255781359497, "grad_norm": 3.2174179553985596, "learning_rate": 0.00028932312843817266, "loss": 0.2926, "step": 1249 }, { "epoch": 0.1751927119831815, "grad_norm": 7.869657039642334, "learning_rate": 0.000289308777804353, "loss": 0.3666, "step": 1250 }, { "epoch": 0.17533286615276805, "grad_norm": 0.5538425445556641, "learning_rate": 0.0002892944271705333, "loss": 0.1636, "step": 1251 }, { "epoch": 0.1754730203223546, "grad_norm": 0.5059335231781006, "learning_rate": 0.0002892800765367137, "loss": 0.2067, "step": 1252 }, { "epoch": 0.17561317449194114, "grad_norm": 0.5134457945823669, "learning_rate": 0.00028926572590289403, "loss": 0.2463, "step": 1253 }, { "epoch": 0.17575332866152768, "grad_norm": 0.5054817199707031, "learning_rate": 0.00028925137526907436, "loss": 0.1702, "step": 1254 }, { "epoch": 0.17589348283111422, "grad_norm": 0.555178701877594, "learning_rate": 0.0002892370246352547, "loss": 0.1271, "step": 1255 }, { "epoch": 0.17603363700070077, "grad_norm": 0.48891961574554443, "learning_rate": 0.000289222674001435, "loss": 0.1568, "step": 1256 }, { "epoch": 0.1761737911702873, "grad_norm": 0.4247031807899475, "learning_rate": 0.00028920832336761535, "loss": 0.1396, "step": 1257 }, { "epoch": 0.17631394533987385, "grad_norm": 0.49350419640541077, "learning_rate": 0.00028919397273379573, "loss": 0.1609, "step": 1258 }, { "epoch": 0.1764540995094604, "grad_norm": 0.457766592502594, "learning_rate": 0.00028917962209997606, "loss": 0.152, "step": 1259 }, { "epoch": 0.17659425367904696, "grad_norm": 0.6182723641395569, "learning_rate": 0.0002891652714661564, "loss": 0.1174, "step": 1260 }, { "epoch": 0.1767344078486335, "grad_norm": 0.41902482509613037, "learning_rate": 0.00028915092083233677, "loss": 0.1428, "step": 1261 }, { "epoch": 0.17687456201822005, "grad_norm": 0.5420647859573364, "learning_rate": 0.0002891365701985171, "loss": 0.1686, "step": 1262 }, { "epoch": 0.1770147161878066, "grad_norm": 0.7247220873832703, "learning_rate": 0.0002891222195646974, "loss": 0.1538, "step": 1263 }, { "epoch": 0.17715487035739313, "grad_norm": 0.6142184138298035, "learning_rate": 0.00028910786893087775, "loss": 0.2385, "step": 1264 }, { "epoch": 0.17729502452697968, "grad_norm": 0.3689082860946655, "learning_rate": 0.0002890935182970581, "loss": 0.153, "step": 1265 }, { "epoch": 0.17743517869656622, "grad_norm": 0.38291871547698975, "learning_rate": 0.0002890791676632384, "loss": 0.0839, "step": 1266 }, { "epoch": 0.17757533286615276, "grad_norm": 0.504447877407074, "learning_rate": 0.00028906481702941874, "loss": 0.1569, "step": 1267 }, { "epoch": 0.1777154870357393, "grad_norm": 0.8035547137260437, "learning_rate": 0.0002890504663955991, "loss": 0.1979, "step": 1268 }, { "epoch": 0.17785564120532585, "grad_norm": 0.4465544819831848, "learning_rate": 0.00028903611576177945, "loss": 0.1963, "step": 1269 }, { "epoch": 0.1779957953749124, "grad_norm": 0.5887908339500427, "learning_rate": 0.0002890217651279598, "loss": 0.2743, "step": 1270 }, { "epoch": 0.17813594954449896, "grad_norm": 0.5897113084793091, "learning_rate": 0.00028900741449414016, "loss": 0.1245, "step": 1271 }, { "epoch": 0.1782761037140855, "grad_norm": 0.626499593257904, "learning_rate": 0.0002889930638603205, "loss": 0.1179, "step": 1272 }, { "epoch": 0.17841625788367205, "grad_norm": 0.4210624694824219, "learning_rate": 0.0002889787132265008, "loss": 0.1471, "step": 1273 }, { "epoch": 0.1785564120532586, "grad_norm": 0.5204456448554993, "learning_rate": 0.00028896436259268115, "loss": 0.2015, "step": 1274 }, { "epoch": 0.17869656622284513, "grad_norm": 0.3530441224575043, "learning_rate": 0.0002889500119588615, "loss": 0.1381, "step": 1275 }, { "epoch": 0.17883672039243168, "grad_norm": 0.4907524585723877, "learning_rate": 0.0002889356613250418, "loss": 0.0779, "step": 1276 }, { "epoch": 0.17897687456201822, "grad_norm": 0.32851338386535645, "learning_rate": 0.0002889213106912222, "loss": 0.0708, "step": 1277 }, { "epoch": 0.17911702873160476, "grad_norm": 0.749977171421051, "learning_rate": 0.0002889069600574025, "loss": 0.1663, "step": 1278 }, { "epoch": 0.1792571829011913, "grad_norm": 0.7493078708648682, "learning_rate": 0.00028889260942358284, "loss": 0.3067, "step": 1279 }, { "epoch": 0.17939733707077785, "grad_norm": 1.1422078609466553, "learning_rate": 0.0002888782587897632, "loss": 0.1707, "step": 1280 }, { "epoch": 0.1795374912403644, "grad_norm": 0.5078191757202148, "learning_rate": 0.00028886390815594356, "loss": 0.1743, "step": 1281 }, { "epoch": 0.17967764540995096, "grad_norm": 0.3899461627006531, "learning_rate": 0.0002888495575221239, "loss": 0.0804, "step": 1282 }, { "epoch": 0.1798177995795375, "grad_norm": 0.4183603525161743, "learning_rate": 0.0002888352068883042, "loss": 0.1473, "step": 1283 }, { "epoch": 0.17995795374912404, "grad_norm": 0.3961012661457062, "learning_rate": 0.00028882085625448454, "loss": 0.2304, "step": 1284 }, { "epoch": 0.1800981079187106, "grad_norm": 0.5780870914459229, "learning_rate": 0.00028880650562066487, "loss": 0.1907, "step": 1285 }, { "epoch": 0.18023826208829713, "grad_norm": 0.6096864938735962, "learning_rate": 0.0002887921549868452, "loss": 0.2281, "step": 1286 }, { "epoch": 0.18037841625788367, "grad_norm": 0.3767504096031189, "learning_rate": 0.0002887778043530256, "loss": 0.1214, "step": 1287 }, { "epoch": 0.18051857042747022, "grad_norm": 0.4762822389602661, "learning_rate": 0.0002887634537192059, "loss": 0.115, "step": 1288 }, { "epoch": 0.18065872459705676, "grad_norm": 0.6894064545631409, "learning_rate": 0.00028874910308538624, "loss": 0.2195, "step": 1289 }, { "epoch": 0.1807988787666433, "grad_norm": 0.6677369475364685, "learning_rate": 0.0002887347524515666, "loss": 0.1651, "step": 1290 }, { "epoch": 0.18093903293622984, "grad_norm": 0.7982863187789917, "learning_rate": 0.00028872040181774695, "loss": 0.1831, "step": 1291 }, { "epoch": 0.18107918710581639, "grad_norm": 1.0460959672927856, "learning_rate": 0.0002887060511839273, "loss": 0.2112, "step": 1292 }, { "epoch": 0.18121934127540296, "grad_norm": 1.3564528226852417, "learning_rate": 0.0002886917005501076, "loss": 0.1927, "step": 1293 }, { "epoch": 0.1813594954449895, "grad_norm": 0.20479118824005127, "learning_rate": 0.00028867734991628794, "loss": 0.0808, "step": 1294 }, { "epoch": 0.18149964961457604, "grad_norm": 2.3068814277648926, "learning_rate": 0.00028866299928246826, "loss": 0.2418, "step": 1295 }, { "epoch": 0.18163980378416258, "grad_norm": 1.7506601810455322, "learning_rate": 0.00028864864864864865, "loss": 0.2347, "step": 1296 }, { "epoch": 0.18177995795374913, "grad_norm": 0.8877953290939331, "learning_rate": 0.000288634298014829, "loss": 0.1053, "step": 1297 }, { "epoch": 0.18192011212333567, "grad_norm": 0.4029606282711029, "learning_rate": 0.0002886199473810093, "loss": 0.0948, "step": 1298 }, { "epoch": 0.1820602662929222, "grad_norm": 2.6804661750793457, "learning_rate": 0.00028860559674718963, "loss": 0.3532, "step": 1299 }, { "epoch": 0.18220042046250876, "grad_norm": 8.153922080993652, "learning_rate": 0.00028859124611337, "loss": 0.2741, "step": 1300 }, { "epoch": 0.1823405746320953, "grad_norm": 0.6255004405975342, "learning_rate": 0.00028857689547955034, "loss": 0.2317, "step": 1301 }, { "epoch": 0.18248072880168184, "grad_norm": 0.3549646735191345, "learning_rate": 0.00028856254484573067, "loss": 0.1315, "step": 1302 }, { "epoch": 0.18262088297126838, "grad_norm": 0.6672636866569519, "learning_rate": 0.000288548194211911, "loss": 0.1785, "step": 1303 }, { "epoch": 0.18276103714085495, "grad_norm": 0.2588634490966797, "learning_rate": 0.00028853384357809133, "loss": 0.1197, "step": 1304 }, { "epoch": 0.1829011913104415, "grad_norm": 0.9322260618209839, "learning_rate": 0.00028851949294427166, "loss": 0.1204, "step": 1305 }, { "epoch": 0.18304134548002804, "grad_norm": 0.8750333786010742, "learning_rate": 0.00028850514231045204, "loss": 0.1787, "step": 1306 }, { "epoch": 0.18318149964961458, "grad_norm": 0.6682153940200806, "learning_rate": 0.00028849079167663237, "loss": 0.1977, "step": 1307 }, { "epoch": 0.18332165381920112, "grad_norm": 0.4636789858341217, "learning_rate": 0.0002884764410428127, "loss": 0.2323, "step": 1308 }, { "epoch": 0.18346180798878767, "grad_norm": 0.5990692377090454, "learning_rate": 0.000288462090408993, "loss": 0.1087, "step": 1309 }, { "epoch": 0.1836019621583742, "grad_norm": 0.7650004029273987, "learning_rate": 0.00028844773977517336, "loss": 0.2298, "step": 1310 }, { "epoch": 0.18374211632796075, "grad_norm": 0.5602273941040039, "learning_rate": 0.0002884333891413537, "loss": 0.1699, "step": 1311 }, { "epoch": 0.1838822704975473, "grad_norm": 0.5199315547943115, "learning_rate": 0.00028841903850753407, "loss": 0.2183, "step": 1312 }, { "epoch": 0.18402242466713384, "grad_norm": 0.608881950378418, "learning_rate": 0.0002884046878737144, "loss": 0.191, "step": 1313 }, { "epoch": 0.18416257883672038, "grad_norm": 0.48028749227523804, "learning_rate": 0.0002883903372398947, "loss": 0.1747, "step": 1314 }, { "epoch": 0.18430273300630695, "grad_norm": 0.4894683361053467, "learning_rate": 0.00028837598660607505, "loss": 0.1669, "step": 1315 }, { "epoch": 0.1844428871758935, "grad_norm": 0.48642149567604065, "learning_rate": 0.00028836163597225543, "loss": 0.1613, "step": 1316 }, { "epoch": 0.18458304134548004, "grad_norm": 0.9096436500549316, "learning_rate": 0.00028834728533843576, "loss": 0.1661, "step": 1317 }, { "epoch": 0.18472319551506658, "grad_norm": 0.3663023114204407, "learning_rate": 0.0002883329347046161, "loss": 0.0566, "step": 1318 }, { "epoch": 0.18486334968465312, "grad_norm": 0.34242933988571167, "learning_rate": 0.0002883185840707964, "loss": 0.1048, "step": 1319 }, { "epoch": 0.18500350385423966, "grad_norm": 0.4216375946998596, "learning_rate": 0.00028830423343697675, "loss": 0.1892, "step": 1320 }, { "epoch": 0.1851436580238262, "grad_norm": 0.46315309405326843, "learning_rate": 0.0002882898828031571, "loss": 0.1357, "step": 1321 }, { "epoch": 0.18528381219341275, "grad_norm": 0.35083702206611633, "learning_rate": 0.00028827553216933746, "loss": 0.2003, "step": 1322 }, { "epoch": 0.1854239663629993, "grad_norm": 0.3840385973453522, "learning_rate": 0.0002882611815355178, "loss": 0.0842, "step": 1323 }, { "epoch": 0.18556412053258584, "grad_norm": 0.4021473824977875, "learning_rate": 0.0002882468309016981, "loss": 0.1556, "step": 1324 }, { "epoch": 0.18570427470217238, "grad_norm": 0.5646082758903503, "learning_rate": 0.0002882324802678785, "loss": 0.1444, "step": 1325 }, { "epoch": 0.18584442887175892, "grad_norm": 0.7728922367095947, "learning_rate": 0.00028821812963405883, "loss": 0.2644, "step": 1326 }, { "epoch": 0.1859845830413455, "grad_norm": 0.4661784768104553, "learning_rate": 0.00028820377900023916, "loss": 0.2211, "step": 1327 }, { "epoch": 0.18612473721093203, "grad_norm": 0.6245259046554565, "learning_rate": 0.0002881894283664195, "loss": 0.2828, "step": 1328 }, { "epoch": 0.18626489138051858, "grad_norm": 0.4976140558719635, "learning_rate": 0.0002881750777325998, "loss": 0.1125, "step": 1329 }, { "epoch": 0.18640504555010512, "grad_norm": 0.2560175061225891, "learning_rate": 0.00028816072709878014, "loss": 0.0535, "step": 1330 }, { "epoch": 0.18654519971969166, "grad_norm": 0.3945830762386322, "learning_rate": 0.0002881463764649605, "loss": 0.0972, "step": 1331 }, { "epoch": 0.1866853538892782, "grad_norm": 0.4562370181083679, "learning_rate": 0.00028813202583114085, "loss": 0.2328, "step": 1332 }, { "epoch": 0.18682550805886475, "grad_norm": 0.5884147882461548, "learning_rate": 0.0002881176751973212, "loss": 0.1608, "step": 1333 }, { "epoch": 0.1869656622284513, "grad_norm": 0.38829508423805237, "learning_rate": 0.0002881033245635015, "loss": 0.1331, "step": 1334 }, { "epoch": 0.18710581639803783, "grad_norm": 1.426316261291504, "learning_rate": 0.0002880889739296819, "loss": 0.3349, "step": 1335 }, { "epoch": 0.18724597056762438, "grad_norm": 0.5064998269081116, "learning_rate": 0.0002880746232958622, "loss": 0.1161, "step": 1336 }, { "epoch": 0.18738612473721092, "grad_norm": 1.0166008472442627, "learning_rate": 0.00028806027266204255, "loss": 0.2141, "step": 1337 }, { "epoch": 0.1875262789067975, "grad_norm": 0.5756743550300598, "learning_rate": 0.0002880459220282229, "loss": 0.1583, "step": 1338 }, { "epoch": 0.18766643307638403, "grad_norm": 1.0782535076141357, "learning_rate": 0.0002880315713944032, "loss": 0.1775, "step": 1339 }, { "epoch": 0.18780658724597057, "grad_norm": 0.32486552000045776, "learning_rate": 0.00028801722076058354, "loss": 0.1643, "step": 1340 }, { "epoch": 0.18794674141555712, "grad_norm": 0.5556925535202026, "learning_rate": 0.0002880028701267639, "loss": 0.174, "step": 1341 }, { "epoch": 0.18808689558514366, "grad_norm": 0.811795175075531, "learning_rate": 0.00028798851949294425, "loss": 0.1511, "step": 1342 }, { "epoch": 0.1882270497547302, "grad_norm": 1.1259410381317139, "learning_rate": 0.0002879741688591246, "loss": 0.3106, "step": 1343 }, { "epoch": 0.18836720392431675, "grad_norm": 1.4740264415740967, "learning_rate": 0.00028795981822530496, "loss": 0.4054, "step": 1344 }, { "epoch": 0.1885073580939033, "grad_norm": 1.2227998971939087, "learning_rate": 0.0002879454675914853, "loss": 0.2528, "step": 1345 }, { "epoch": 0.18864751226348983, "grad_norm": 0.9776144027709961, "learning_rate": 0.0002879311169576656, "loss": 0.2043, "step": 1346 }, { "epoch": 0.18878766643307637, "grad_norm": 0.8177399635314941, "learning_rate": 0.00028791676632384595, "loss": 0.1982, "step": 1347 }, { "epoch": 0.18892782060266292, "grad_norm": 2.1901967525482178, "learning_rate": 0.0002879024156900263, "loss": 0.1518, "step": 1348 }, { "epoch": 0.1890679747722495, "grad_norm": 1.3925788402557373, "learning_rate": 0.0002878880650562066, "loss": 0.2656, "step": 1349 }, { "epoch": 0.18920812894183603, "grad_norm": 4.939290523529053, "learning_rate": 0.00028787371442238693, "loss": 0.3708, "step": 1350 }, { "epoch": 0.18934828311142257, "grad_norm": 0.5326824188232422, "learning_rate": 0.0002878593637885673, "loss": 0.2052, "step": 1351 }, { "epoch": 0.18948843728100911, "grad_norm": 0.4210324287414551, "learning_rate": 0.00028784501315474764, "loss": 0.1451, "step": 1352 }, { "epoch": 0.18962859145059566, "grad_norm": 0.49748069047927856, "learning_rate": 0.00028783066252092797, "loss": 0.1511, "step": 1353 }, { "epoch": 0.1897687456201822, "grad_norm": 0.28506726026535034, "learning_rate": 0.00028781631188710835, "loss": 0.1263, "step": 1354 }, { "epoch": 0.18990889978976874, "grad_norm": 0.4613712430000305, "learning_rate": 0.0002878019612532887, "loss": 0.2148, "step": 1355 }, { "epoch": 0.19004905395935529, "grad_norm": 0.368649959564209, "learning_rate": 0.000287787610619469, "loss": 0.1333, "step": 1356 }, { "epoch": 0.19018920812894183, "grad_norm": 1.4647291898727417, "learning_rate": 0.00028777325998564934, "loss": 0.2487, "step": 1357 }, { "epoch": 0.19032936229852837, "grad_norm": 0.3617301285266876, "learning_rate": 0.00028775890935182967, "loss": 0.1319, "step": 1358 }, { "epoch": 0.1904695164681149, "grad_norm": 0.5744082927703857, "learning_rate": 0.00028774455871801, "loss": 0.1897, "step": 1359 }, { "epoch": 0.19060967063770148, "grad_norm": 0.5286508202552795, "learning_rate": 0.0002877302080841904, "loss": 0.1503, "step": 1360 }, { "epoch": 0.19074982480728803, "grad_norm": 0.661304771900177, "learning_rate": 0.0002877158574503707, "loss": 0.1246, "step": 1361 }, { "epoch": 0.19088997897687457, "grad_norm": 0.5126602053642273, "learning_rate": 0.00028770150681655104, "loss": 0.189, "step": 1362 }, { "epoch": 0.1910301331464611, "grad_norm": 0.3035547435283661, "learning_rate": 0.0002876871561827314, "loss": 0.0818, "step": 1363 }, { "epoch": 0.19117028731604765, "grad_norm": 0.6415405869483948, "learning_rate": 0.00028767280554891175, "loss": 0.1362, "step": 1364 }, { "epoch": 0.1913104414856342, "grad_norm": 0.8118913769721985, "learning_rate": 0.0002876584549150921, "loss": 0.1395, "step": 1365 }, { "epoch": 0.19145059565522074, "grad_norm": 0.3385295271873474, "learning_rate": 0.0002876441042812724, "loss": 0.18, "step": 1366 }, { "epoch": 0.19159074982480728, "grad_norm": 0.6493491530418396, "learning_rate": 0.00028762975364745273, "loss": 0.2065, "step": 1367 }, { "epoch": 0.19173090399439383, "grad_norm": 0.7460654973983765, "learning_rate": 0.00028761540301363306, "loss": 0.1064, "step": 1368 }, { "epoch": 0.19187105816398037, "grad_norm": 0.7090083956718445, "learning_rate": 0.0002876010523798134, "loss": 0.1694, "step": 1369 }, { "epoch": 0.1920112123335669, "grad_norm": 0.34292009472846985, "learning_rate": 0.0002875867017459938, "loss": 0.1415, "step": 1370 }, { "epoch": 0.19215136650315348, "grad_norm": 0.48215794563293457, "learning_rate": 0.0002875723511121741, "loss": 0.1198, "step": 1371 }, { "epoch": 0.19229152067274002, "grad_norm": 0.5662413239479065, "learning_rate": 0.00028755800047835443, "loss": 0.1172, "step": 1372 }, { "epoch": 0.19243167484232657, "grad_norm": 0.6156641840934753, "learning_rate": 0.00028754364984453476, "loss": 0.2177, "step": 1373 }, { "epoch": 0.1925718290119131, "grad_norm": 0.4750995635986328, "learning_rate": 0.0002875292992107151, "loss": 0.1776, "step": 1374 }, { "epoch": 0.19271198318149965, "grad_norm": 0.5293868184089661, "learning_rate": 0.0002875149485768954, "loss": 0.1552, "step": 1375 }, { "epoch": 0.1928521373510862, "grad_norm": 0.9702204465866089, "learning_rate": 0.0002875005979430758, "loss": 0.1385, "step": 1376 }, { "epoch": 0.19299229152067274, "grad_norm": 0.38313114643096924, "learning_rate": 0.00028748624730925613, "loss": 0.132, "step": 1377 }, { "epoch": 0.19313244569025928, "grad_norm": 0.7046389579772949, "learning_rate": 0.00028747189667543646, "loss": 0.1788, "step": 1378 }, { "epoch": 0.19327259985984582, "grad_norm": 0.5358118414878845, "learning_rate": 0.00028745754604161684, "loss": 0.0801, "step": 1379 }, { "epoch": 0.19341275402943237, "grad_norm": 0.47918954491615295, "learning_rate": 0.00028744319540779717, "loss": 0.154, "step": 1380 }, { "epoch": 0.1935529081990189, "grad_norm": 0.44163206219673157, "learning_rate": 0.0002874288447739775, "loss": 0.089, "step": 1381 }, { "epoch": 0.19369306236860548, "grad_norm": 0.6863482594490051, "learning_rate": 0.0002874144941401578, "loss": 0.1171, "step": 1382 }, { "epoch": 0.19383321653819202, "grad_norm": 0.45869338512420654, "learning_rate": 0.00028740014350633815, "loss": 0.1539, "step": 1383 }, { "epoch": 0.19397337070777856, "grad_norm": 0.47624510526657104, "learning_rate": 0.0002873857928725185, "loss": 0.0989, "step": 1384 }, { "epoch": 0.1941135248773651, "grad_norm": 0.4153755307197571, "learning_rate": 0.0002873714422386988, "loss": 0.1437, "step": 1385 }, { "epoch": 0.19425367904695165, "grad_norm": 0.5450621843338013, "learning_rate": 0.0002873570916048792, "loss": 0.1553, "step": 1386 }, { "epoch": 0.1943938332165382, "grad_norm": 0.3404199182987213, "learning_rate": 0.0002873427409710595, "loss": 0.0683, "step": 1387 }, { "epoch": 0.19453398738612473, "grad_norm": 0.7234523892402649, "learning_rate": 0.00028732839033723985, "loss": 0.0938, "step": 1388 }, { "epoch": 0.19467414155571128, "grad_norm": 0.9329773783683777, "learning_rate": 0.00028731403970342023, "loss": 0.115, "step": 1389 }, { "epoch": 0.19481429572529782, "grad_norm": 0.9350193738937378, "learning_rate": 0.00028729968906960056, "loss": 0.1766, "step": 1390 }, { "epoch": 0.19495444989488436, "grad_norm": 0.5233431458473206, "learning_rate": 0.0002872853384357809, "loss": 0.1586, "step": 1391 }, { "epoch": 0.1950946040644709, "grad_norm": 0.683952808380127, "learning_rate": 0.0002872709878019612, "loss": 0.1786, "step": 1392 }, { "epoch": 0.19523475823405748, "grad_norm": 1.8429738283157349, "learning_rate": 0.00028725663716814155, "loss": 0.4211, "step": 1393 }, { "epoch": 0.19537491240364402, "grad_norm": 1.6243500709533691, "learning_rate": 0.0002872422865343219, "loss": 0.2126, "step": 1394 }, { "epoch": 0.19551506657323056, "grad_norm": 0.9260746240615845, "learning_rate": 0.00028722793590050226, "loss": 0.2339, "step": 1395 }, { "epoch": 0.1956552207428171, "grad_norm": 0.9837828278541565, "learning_rate": 0.0002872135852666826, "loss": 0.2341, "step": 1396 }, { "epoch": 0.19579537491240365, "grad_norm": 1.4956419467926025, "learning_rate": 0.0002871992346328629, "loss": 0.1598, "step": 1397 }, { "epoch": 0.1959355290819902, "grad_norm": 3.089427947998047, "learning_rate": 0.0002871848839990433, "loss": 0.1495, "step": 1398 }, { "epoch": 0.19607568325157673, "grad_norm": 2.4646904468536377, "learning_rate": 0.0002871705333652236, "loss": 0.467, "step": 1399 }, { "epoch": 0.19621583742116327, "grad_norm": 3.984333038330078, "learning_rate": 0.00028715618273140396, "loss": 0.4142, "step": 1400 }, { "epoch": 0.19635599159074982, "grad_norm": 0.7834465503692627, "learning_rate": 0.0002871418320975843, "loss": 0.3954, "step": 1401 }, { "epoch": 0.19649614576033636, "grad_norm": 0.5351545214653015, "learning_rate": 0.0002871274814637646, "loss": 0.1846, "step": 1402 }, { "epoch": 0.1966362999299229, "grad_norm": 0.6234658360481262, "learning_rate": 0.00028711313082994494, "loss": 0.2162, "step": 1403 }, { "epoch": 0.19677645409950947, "grad_norm": 0.414691299200058, "learning_rate": 0.00028709878019612527, "loss": 0.0986, "step": 1404 }, { "epoch": 0.19691660826909602, "grad_norm": 0.5032076239585876, "learning_rate": 0.00028708442956230565, "loss": 0.1129, "step": 1405 }, { "epoch": 0.19705676243868256, "grad_norm": 0.3615491986274719, "learning_rate": 0.000287070078928486, "loss": 0.1238, "step": 1406 }, { "epoch": 0.1971969166082691, "grad_norm": 0.4679245948791504, "learning_rate": 0.0002870557282946663, "loss": 0.2511, "step": 1407 }, { "epoch": 0.19733707077785564, "grad_norm": 0.4984559714794159, "learning_rate": 0.0002870413776608467, "loss": 0.2127, "step": 1408 }, { "epoch": 0.1974772249474422, "grad_norm": 0.46827319264411926, "learning_rate": 0.000287027027027027, "loss": 0.1863, "step": 1409 }, { "epoch": 0.19761737911702873, "grad_norm": 0.339138388633728, "learning_rate": 0.00028701267639320735, "loss": 0.1832, "step": 1410 }, { "epoch": 0.19775753328661527, "grad_norm": 0.434836208820343, "learning_rate": 0.0002869983257593877, "loss": 0.1421, "step": 1411 }, { "epoch": 0.19789768745620182, "grad_norm": 0.39442136883735657, "learning_rate": 0.000286983975125568, "loss": 0.2238, "step": 1412 }, { "epoch": 0.19803784162578836, "grad_norm": 0.5383408665657043, "learning_rate": 0.00028696962449174834, "loss": 0.1585, "step": 1413 }, { "epoch": 0.1981779957953749, "grad_norm": 0.48914170265197754, "learning_rate": 0.0002869552738579287, "loss": 0.1656, "step": 1414 }, { "epoch": 0.19831814996496147, "grad_norm": 0.3213764429092407, "learning_rate": 0.00028694092322410905, "loss": 0.0942, "step": 1415 }, { "epoch": 0.198458304134548, "grad_norm": 0.41128483414649963, "learning_rate": 0.0002869265725902894, "loss": 0.1012, "step": 1416 }, { "epoch": 0.19859845830413456, "grad_norm": 1.0505154132843018, "learning_rate": 0.00028691222195646976, "loss": 0.1758, "step": 1417 }, { "epoch": 0.1987386124737211, "grad_norm": 0.5430999994277954, "learning_rate": 0.0002868978713226501, "loss": 0.1706, "step": 1418 }, { "epoch": 0.19887876664330764, "grad_norm": 0.35043174028396606, "learning_rate": 0.0002868835206888304, "loss": 0.1575, "step": 1419 }, { "epoch": 0.19901892081289418, "grad_norm": 0.5660715699195862, "learning_rate": 0.00028686917005501074, "loss": 0.2044, "step": 1420 }, { "epoch": 0.19915907498248073, "grad_norm": 0.7045791745185852, "learning_rate": 0.00028685481942119107, "loss": 0.0895, "step": 1421 }, { "epoch": 0.19929922915206727, "grad_norm": 0.6727139949798584, "learning_rate": 0.0002868404687873714, "loss": 0.1915, "step": 1422 }, { "epoch": 0.1994393833216538, "grad_norm": 0.7017869353294373, "learning_rate": 0.00028682611815355173, "loss": 0.1363, "step": 1423 }, { "epoch": 0.19957953749124036, "grad_norm": 0.5484580993652344, "learning_rate": 0.0002868117675197321, "loss": 0.1651, "step": 1424 }, { "epoch": 0.1997196916608269, "grad_norm": 0.5813344717025757, "learning_rate": 0.00028679741688591244, "loss": 0.1533, "step": 1425 }, { "epoch": 0.19985984583041347, "grad_norm": 0.3388593792915344, "learning_rate": 0.00028678306625209277, "loss": 0.1263, "step": 1426 }, { "epoch": 0.2, "grad_norm": 0.5175798535346985, "learning_rate": 0.00028676871561827315, "loss": 0.1913, "step": 1427 }, { "epoch": 0.20014015416958655, "grad_norm": 0.9213618636131287, "learning_rate": 0.0002867543649844535, "loss": 0.1763, "step": 1428 }, { "epoch": 0.2002803083391731, "grad_norm": 0.422063410282135, "learning_rate": 0.0002867400143506338, "loss": 0.1423, "step": 1429 }, { "epoch": 0.20042046250875964, "grad_norm": 1.2014931440353394, "learning_rate": 0.00028672566371681414, "loss": 0.1637, "step": 1430 }, { "epoch": 0.20056061667834618, "grad_norm": 0.5003225207328796, "learning_rate": 0.00028671131308299447, "loss": 0.2425, "step": 1431 }, { "epoch": 0.20070077084793272, "grad_norm": 0.42734184861183167, "learning_rate": 0.0002866969624491748, "loss": 0.1308, "step": 1432 }, { "epoch": 0.20084092501751927, "grad_norm": 0.7353927493095398, "learning_rate": 0.0002866826118153552, "loss": 0.1463, "step": 1433 }, { "epoch": 0.2009810791871058, "grad_norm": 0.4108189046382904, "learning_rate": 0.0002866682611815355, "loss": 0.091, "step": 1434 }, { "epoch": 0.20112123335669235, "grad_norm": 0.8403388261795044, "learning_rate": 0.00028665391054771583, "loss": 0.2363, "step": 1435 }, { "epoch": 0.2012613875262789, "grad_norm": 1.2731560468673706, "learning_rate": 0.00028663955991389616, "loss": 0.1101, "step": 1436 }, { "epoch": 0.20140154169586547, "grad_norm": 0.630267322063446, "learning_rate": 0.0002866252092800765, "loss": 0.1613, "step": 1437 }, { "epoch": 0.201541695865452, "grad_norm": 0.3950931429862976, "learning_rate": 0.0002866108586462568, "loss": 0.1389, "step": 1438 }, { "epoch": 0.20168185003503855, "grad_norm": 0.8729126453399658, "learning_rate": 0.0002865965080124372, "loss": 0.1055, "step": 1439 }, { "epoch": 0.2018220042046251, "grad_norm": 0.677051842212677, "learning_rate": 0.00028658215737861753, "loss": 0.1355, "step": 1440 }, { "epoch": 0.20196215837421164, "grad_norm": 0.24138006567955017, "learning_rate": 0.00028656780674479786, "loss": 0.0625, "step": 1441 }, { "epoch": 0.20210231254379818, "grad_norm": 0.36985257267951965, "learning_rate": 0.0002865534561109782, "loss": 0.0869, "step": 1442 }, { "epoch": 0.20224246671338472, "grad_norm": 0.8477890491485596, "learning_rate": 0.00028653910547715857, "loss": 0.1193, "step": 1443 }, { "epoch": 0.20238262088297126, "grad_norm": 0.593531608581543, "learning_rate": 0.0002865247548433389, "loss": 0.09, "step": 1444 }, { "epoch": 0.2025227750525578, "grad_norm": 0.7947191596031189, "learning_rate": 0.00028651040420951923, "loss": 0.1022, "step": 1445 }, { "epoch": 0.20266292922214435, "grad_norm": 0.9791816473007202, "learning_rate": 0.00028649605357569956, "loss": 0.1804, "step": 1446 }, { "epoch": 0.2028030833917309, "grad_norm": 2.58573842048645, "learning_rate": 0.0002864817029418799, "loss": 0.2212, "step": 1447 }, { "epoch": 0.20294323756131744, "grad_norm": 1.0994421243667603, "learning_rate": 0.0002864673523080602, "loss": 0.2621, "step": 1448 }, { "epoch": 0.203083391730904, "grad_norm": 4.8346734046936035, "learning_rate": 0.0002864530016742406, "loss": 0.6657, "step": 1449 }, { "epoch": 0.20322354590049055, "grad_norm": 1.9288171529769897, "learning_rate": 0.0002864386510404209, "loss": 0.1628, "step": 1450 }, { "epoch": 0.2033637000700771, "grad_norm": 0.9603410363197327, "learning_rate": 0.00028642430040660125, "loss": 0.217, "step": 1451 }, { "epoch": 0.20350385423966363, "grad_norm": 0.513755202293396, "learning_rate": 0.00028640994977278164, "loss": 0.1873, "step": 1452 }, { "epoch": 0.20364400840925018, "grad_norm": 0.3526681065559387, "learning_rate": 0.00028639559913896197, "loss": 0.0829, "step": 1453 }, { "epoch": 0.20378416257883672, "grad_norm": 0.7088523507118225, "learning_rate": 0.0002863812485051423, "loss": 0.1644, "step": 1454 }, { "epoch": 0.20392431674842326, "grad_norm": 0.420288622379303, "learning_rate": 0.0002863668978713226, "loss": 0.089, "step": 1455 }, { "epoch": 0.2040644709180098, "grad_norm": 0.5015362501144409, "learning_rate": 0.00028635254723750295, "loss": 0.179, "step": 1456 }, { "epoch": 0.20420462508759635, "grad_norm": 1.0615626573562622, "learning_rate": 0.0002863381966036833, "loss": 0.166, "step": 1457 }, { "epoch": 0.2043447792571829, "grad_norm": 0.793479859828949, "learning_rate": 0.0002863238459698636, "loss": 0.1935, "step": 1458 }, { "epoch": 0.20448493342676943, "grad_norm": 0.48837974667549133, "learning_rate": 0.000286309495336044, "loss": 0.1676, "step": 1459 }, { "epoch": 0.204625087596356, "grad_norm": 0.8710083365440369, "learning_rate": 0.0002862951447022243, "loss": 0.2354, "step": 1460 }, { "epoch": 0.20476524176594255, "grad_norm": 0.4601697325706482, "learning_rate": 0.00028628079406840465, "loss": 0.1746, "step": 1461 }, { "epoch": 0.2049053959355291, "grad_norm": 0.4005391299724579, "learning_rate": 0.00028626644343458503, "loss": 0.0803, "step": 1462 }, { "epoch": 0.20504555010511563, "grad_norm": 0.75310879945755, "learning_rate": 0.00028625209280076536, "loss": 0.1524, "step": 1463 }, { "epoch": 0.20518570427470217, "grad_norm": 0.5398361086845398, "learning_rate": 0.0002862377421669457, "loss": 0.183, "step": 1464 }, { "epoch": 0.20532585844428872, "grad_norm": 0.29483023285865784, "learning_rate": 0.000286223391533126, "loss": 0.0921, "step": 1465 }, { "epoch": 0.20546601261387526, "grad_norm": 0.7226589918136597, "learning_rate": 0.00028620904089930635, "loss": 0.146, "step": 1466 }, { "epoch": 0.2056061667834618, "grad_norm": 0.799090564250946, "learning_rate": 0.0002861946902654867, "loss": 0.2097, "step": 1467 }, { "epoch": 0.20574632095304835, "grad_norm": 0.7113801836967468, "learning_rate": 0.00028618033963166706, "loss": 0.158, "step": 1468 }, { "epoch": 0.2058864751226349, "grad_norm": 0.3885248303413391, "learning_rate": 0.0002861659889978474, "loss": 0.1376, "step": 1469 }, { "epoch": 0.20602662929222143, "grad_norm": 0.5118241310119629, "learning_rate": 0.0002861516383640277, "loss": 0.1761, "step": 1470 }, { "epoch": 0.206166783461808, "grad_norm": 0.4573477804660797, "learning_rate": 0.0002861372877302081, "loss": 0.1556, "step": 1471 }, { "epoch": 0.20630693763139454, "grad_norm": 0.5322966575622559, "learning_rate": 0.0002861229370963884, "loss": 0.194, "step": 1472 }, { "epoch": 0.2064470918009811, "grad_norm": 0.5503672957420349, "learning_rate": 0.00028610858646256875, "loss": 0.212, "step": 1473 }, { "epoch": 0.20658724597056763, "grad_norm": 0.5848712921142578, "learning_rate": 0.0002860942358287491, "loss": 0.1735, "step": 1474 }, { "epoch": 0.20672740014015417, "grad_norm": 0.8324587345123291, "learning_rate": 0.0002860798851949294, "loss": 0.1932, "step": 1475 }, { "epoch": 0.20686755430974071, "grad_norm": 0.4048483073711395, "learning_rate": 0.00028606553456110974, "loss": 0.1362, "step": 1476 }, { "epoch": 0.20700770847932726, "grad_norm": 0.46509283781051636, "learning_rate": 0.00028605118392729007, "loss": 0.0806, "step": 1477 }, { "epoch": 0.2071478626489138, "grad_norm": 0.5476142168045044, "learning_rate": 0.00028603683329347045, "loss": 0.1505, "step": 1478 }, { "epoch": 0.20728801681850034, "grad_norm": 0.7694693803787231, "learning_rate": 0.0002860224826596508, "loss": 0.1832, "step": 1479 }, { "epoch": 0.20742817098808689, "grad_norm": 0.7152474522590637, "learning_rate": 0.0002860081320258311, "loss": 0.1734, "step": 1480 }, { "epoch": 0.20756832515767343, "grad_norm": 0.8956233859062195, "learning_rate": 0.0002859937813920115, "loss": 0.2818, "step": 1481 }, { "epoch": 0.20770847932726, "grad_norm": 0.41789087653160095, "learning_rate": 0.0002859794307581918, "loss": 0.1164, "step": 1482 }, { "epoch": 0.20784863349684654, "grad_norm": 0.5818623900413513, "learning_rate": 0.00028596508012437215, "loss": 0.1244, "step": 1483 }, { "epoch": 0.20798878766643308, "grad_norm": 0.4546411335468292, "learning_rate": 0.0002859507294905525, "loss": 0.1317, "step": 1484 }, { "epoch": 0.20812894183601963, "grad_norm": 0.7050706744194031, "learning_rate": 0.0002859363788567328, "loss": 0.2242, "step": 1485 }, { "epoch": 0.20826909600560617, "grad_norm": 0.40126219391822815, "learning_rate": 0.00028592202822291313, "loss": 0.0961, "step": 1486 }, { "epoch": 0.2084092501751927, "grad_norm": 0.2570498585700989, "learning_rate": 0.0002859076775890935, "loss": 0.085, "step": 1487 }, { "epoch": 0.20854940434477925, "grad_norm": 0.48797452449798584, "learning_rate": 0.00028589332695527384, "loss": 0.1351, "step": 1488 }, { "epoch": 0.2086895585143658, "grad_norm": 0.4947861135005951, "learning_rate": 0.0002858789763214542, "loss": 0.1715, "step": 1489 }, { "epoch": 0.20882971268395234, "grad_norm": 0.5540120601654053, "learning_rate": 0.0002858646256876345, "loss": 0.1817, "step": 1490 }, { "epoch": 0.20896986685353888, "grad_norm": 0.4998747408390045, "learning_rate": 0.0002858502750538149, "loss": 0.1797, "step": 1491 }, { "epoch": 0.20911002102312543, "grad_norm": 0.3598507344722748, "learning_rate": 0.0002858359244199952, "loss": 0.0924, "step": 1492 }, { "epoch": 0.209250175192712, "grad_norm": 1.6876142024993896, "learning_rate": 0.00028582157378617554, "loss": 0.112, "step": 1493 }, { "epoch": 0.20939032936229854, "grad_norm": 0.6825133562088013, "learning_rate": 0.00028580722315235587, "loss": 0.1467, "step": 1494 }, { "epoch": 0.20953048353188508, "grad_norm": 1.0023508071899414, "learning_rate": 0.0002857928725185362, "loss": 0.2285, "step": 1495 }, { "epoch": 0.20967063770147162, "grad_norm": 0.6522983312606812, "learning_rate": 0.00028577852188471653, "loss": 0.1628, "step": 1496 }, { "epoch": 0.20981079187105817, "grad_norm": 0.8857536911964417, "learning_rate": 0.0002857641712508969, "loss": 0.2025, "step": 1497 }, { "epoch": 0.2099509460406447, "grad_norm": 1.0567398071289062, "learning_rate": 0.00028574982061707724, "loss": 0.3421, "step": 1498 }, { "epoch": 0.21009110021023125, "grad_norm": 2.0932374000549316, "learning_rate": 0.00028573546998325757, "loss": 0.2864, "step": 1499 }, { "epoch": 0.2102312543798178, "grad_norm": 1.6516791582107544, "learning_rate": 0.0002857211193494379, "loss": 0.4194, "step": 1500 }, { "epoch": 0.21037140854940434, "grad_norm": 0.5387606024742126, "learning_rate": 0.0002857067687156182, "loss": 0.1382, "step": 1501 }, { "epoch": 0.21051156271899088, "grad_norm": 0.8462368845939636, "learning_rate": 0.00028569241808179855, "loss": 0.3042, "step": 1502 }, { "epoch": 0.21065171688857742, "grad_norm": 0.3508150577545166, "learning_rate": 0.00028567806744797894, "loss": 0.1083, "step": 1503 }, { "epoch": 0.210791871058164, "grad_norm": 0.6700693368911743, "learning_rate": 0.00028566371681415926, "loss": 0.1737, "step": 1504 }, { "epoch": 0.21093202522775054, "grad_norm": 0.27587416768074036, "learning_rate": 0.0002856493661803396, "loss": 0.092, "step": 1505 }, { "epoch": 0.21107217939733708, "grad_norm": 0.5318671464920044, "learning_rate": 0.0002856350155465199, "loss": 0.2715, "step": 1506 }, { "epoch": 0.21121233356692362, "grad_norm": 0.6454751491546631, "learning_rate": 0.0002856206649127003, "loss": 0.2197, "step": 1507 }, { "epoch": 0.21135248773651016, "grad_norm": 0.33636653423309326, "learning_rate": 0.00028560631427888063, "loss": 0.1302, "step": 1508 }, { "epoch": 0.2114926419060967, "grad_norm": 0.44161441922187805, "learning_rate": 0.00028559196364506096, "loss": 0.1617, "step": 1509 }, { "epoch": 0.21163279607568325, "grad_norm": 0.2663300931453705, "learning_rate": 0.0002855776130112413, "loss": 0.0803, "step": 1510 }, { "epoch": 0.2117729502452698, "grad_norm": 0.5626079440116882, "learning_rate": 0.0002855632623774216, "loss": 0.1307, "step": 1511 }, { "epoch": 0.21191310441485633, "grad_norm": 0.5090451836585999, "learning_rate": 0.00028554891174360195, "loss": 0.1519, "step": 1512 }, { "epoch": 0.21205325858444288, "grad_norm": 0.5128536820411682, "learning_rate": 0.00028553456110978233, "loss": 0.1458, "step": 1513 }, { "epoch": 0.21219341275402942, "grad_norm": 0.43973883986473083, "learning_rate": 0.00028552021047596266, "loss": 0.1516, "step": 1514 }, { "epoch": 0.212333566923616, "grad_norm": 0.36193305253982544, "learning_rate": 0.000285505859842143, "loss": 0.0904, "step": 1515 }, { "epoch": 0.21247372109320253, "grad_norm": 0.6568478941917419, "learning_rate": 0.00028549150920832337, "loss": 0.1232, "step": 1516 }, { "epoch": 0.21261387526278908, "grad_norm": 0.5821335315704346, "learning_rate": 0.0002854771585745037, "loss": 0.1279, "step": 1517 }, { "epoch": 0.21275402943237562, "grad_norm": 0.3105257749557495, "learning_rate": 0.000285462807940684, "loss": 0.1304, "step": 1518 }, { "epoch": 0.21289418360196216, "grad_norm": 0.5969409346580505, "learning_rate": 0.00028544845730686436, "loss": 0.1366, "step": 1519 }, { "epoch": 0.2130343377715487, "grad_norm": 0.8048906326293945, "learning_rate": 0.0002854341066730447, "loss": 0.099, "step": 1520 }, { "epoch": 0.21317449194113525, "grad_norm": 0.3927711248397827, "learning_rate": 0.000285419756039225, "loss": 0.1209, "step": 1521 }, { "epoch": 0.2133146461107218, "grad_norm": 0.523062527179718, "learning_rate": 0.0002854054054054054, "loss": 0.165, "step": 1522 }, { "epoch": 0.21345480028030833, "grad_norm": 1.0338835716247559, "learning_rate": 0.0002853910547715857, "loss": 0.1858, "step": 1523 }, { "epoch": 0.21359495444989487, "grad_norm": 1.2094815969467163, "learning_rate": 0.00028537670413776605, "loss": 0.1197, "step": 1524 }, { "epoch": 0.21373510861948142, "grad_norm": 0.6911324262619019, "learning_rate": 0.0002853623535039464, "loss": 0.1792, "step": 1525 }, { "epoch": 0.213875262789068, "grad_norm": 0.8866291642189026, "learning_rate": 0.00028534800287012676, "loss": 0.2176, "step": 1526 }, { "epoch": 0.21401541695865453, "grad_norm": 0.48767244815826416, "learning_rate": 0.0002853336522363071, "loss": 0.1429, "step": 1527 }, { "epoch": 0.21415557112824107, "grad_norm": 0.27532628178596497, "learning_rate": 0.0002853193016024874, "loss": 0.0895, "step": 1528 }, { "epoch": 0.21429572529782762, "grad_norm": 0.4672256112098694, "learning_rate": 0.00028530495096866775, "loss": 0.2238, "step": 1529 }, { "epoch": 0.21443587946741416, "grad_norm": 0.3992745280265808, "learning_rate": 0.0002852906003348481, "loss": 0.1187, "step": 1530 }, { "epoch": 0.2145760336370007, "grad_norm": 0.7503077983856201, "learning_rate": 0.0002852762497010284, "loss": 0.1155, "step": 1531 }, { "epoch": 0.21471618780658724, "grad_norm": 0.47434207797050476, "learning_rate": 0.0002852618990672088, "loss": 0.1223, "step": 1532 }, { "epoch": 0.2148563419761738, "grad_norm": 0.26094192266464233, "learning_rate": 0.0002852475484333891, "loss": 0.1307, "step": 1533 }, { "epoch": 0.21499649614576033, "grad_norm": 0.8494062423706055, "learning_rate": 0.00028523319779956945, "loss": 0.1225, "step": 1534 }, { "epoch": 0.21513665031534687, "grad_norm": 0.5196310877799988, "learning_rate": 0.00028521884716574983, "loss": 0.1435, "step": 1535 }, { "epoch": 0.21527680448493342, "grad_norm": 0.6249077916145325, "learning_rate": 0.00028520449653193016, "loss": 0.141, "step": 1536 }, { "epoch": 0.21541695865451999, "grad_norm": 0.37485843896865845, "learning_rate": 0.0002851901458981105, "loss": 0.1085, "step": 1537 }, { "epoch": 0.21555711282410653, "grad_norm": 0.4761439859867096, "learning_rate": 0.0002851757952642908, "loss": 0.1515, "step": 1538 }, { "epoch": 0.21569726699369307, "grad_norm": 0.4248480200767517, "learning_rate": 0.00028516144463047114, "loss": 0.1783, "step": 1539 }, { "epoch": 0.2158374211632796, "grad_norm": 0.4296650290489197, "learning_rate": 0.00028514709399665147, "loss": 0.1289, "step": 1540 }, { "epoch": 0.21597757533286616, "grad_norm": 0.708044707775116, "learning_rate": 0.0002851327433628318, "loss": 0.1794, "step": 1541 }, { "epoch": 0.2161177295024527, "grad_norm": 1.3418065309524536, "learning_rate": 0.0002851183927290122, "loss": 0.1624, "step": 1542 }, { "epoch": 0.21625788367203924, "grad_norm": 0.4759913980960846, "learning_rate": 0.0002851040420951925, "loss": 0.1792, "step": 1543 }, { "epoch": 0.21639803784162578, "grad_norm": 0.5173408389091492, "learning_rate": 0.00028508969146137284, "loss": 0.2286, "step": 1544 }, { "epoch": 0.21653819201121233, "grad_norm": 0.9507098197937012, "learning_rate": 0.0002850753408275532, "loss": 0.3068, "step": 1545 }, { "epoch": 0.21667834618079887, "grad_norm": 0.7133569121360779, "learning_rate": 0.00028506099019373355, "loss": 0.1311, "step": 1546 }, { "epoch": 0.2168185003503854, "grad_norm": 1.594622254371643, "learning_rate": 0.0002850466395599139, "loss": 0.6437, "step": 1547 }, { "epoch": 0.21695865451997198, "grad_norm": 0.3971281349658966, "learning_rate": 0.0002850322889260942, "loss": 0.0561, "step": 1548 }, { "epoch": 0.21709880868955853, "grad_norm": 2.201596975326538, "learning_rate": 0.00028501793829227454, "loss": 0.3337, "step": 1549 }, { "epoch": 0.21723896285914507, "grad_norm": 1.44367253780365, "learning_rate": 0.00028500358765845487, "loss": 0.3225, "step": 1550 }, { "epoch": 0.2173791170287316, "grad_norm": 0.5062150359153748, "learning_rate": 0.00028498923702463525, "loss": 0.1961, "step": 1551 }, { "epoch": 0.21751927119831815, "grad_norm": 0.30372190475463867, "learning_rate": 0.0002849748863908156, "loss": 0.1103, "step": 1552 }, { "epoch": 0.2176594253679047, "grad_norm": 0.35695815086364746, "learning_rate": 0.0002849605357569959, "loss": 0.1488, "step": 1553 }, { "epoch": 0.21779957953749124, "grad_norm": 0.3762584328651428, "learning_rate": 0.0002849461851231763, "loss": 0.1434, "step": 1554 }, { "epoch": 0.21793973370707778, "grad_norm": 0.45391854643821716, "learning_rate": 0.0002849318344893566, "loss": 0.1492, "step": 1555 }, { "epoch": 0.21807988787666432, "grad_norm": 0.5545883178710938, "learning_rate": 0.00028491748385553695, "loss": 0.1336, "step": 1556 }, { "epoch": 0.21822004204625087, "grad_norm": 0.4484400451183319, "learning_rate": 0.0002849031332217173, "loss": 0.1452, "step": 1557 }, { "epoch": 0.2183601962158374, "grad_norm": 0.49088966846466064, "learning_rate": 0.0002848887825878976, "loss": 0.2148, "step": 1558 }, { "epoch": 0.21850035038542395, "grad_norm": 0.5772733092308044, "learning_rate": 0.00028487443195407793, "loss": 0.1981, "step": 1559 }, { "epoch": 0.21864050455501052, "grad_norm": 0.5382531881332397, "learning_rate": 0.00028486008132025826, "loss": 0.188, "step": 1560 }, { "epoch": 0.21878065872459707, "grad_norm": 0.2525745630264282, "learning_rate": 0.00028484573068643864, "loss": 0.0731, "step": 1561 }, { "epoch": 0.2189208128941836, "grad_norm": 0.5901613235473633, "learning_rate": 0.00028483138005261897, "loss": 0.1267, "step": 1562 }, { "epoch": 0.21906096706377015, "grad_norm": 0.38726237416267395, "learning_rate": 0.0002848170294187993, "loss": 0.077, "step": 1563 }, { "epoch": 0.2192011212333567, "grad_norm": 0.5521304607391357, "learning_rate": 0.00028480267878497963, "loss": 0.1845, "step": 1564 }, { "epoch": 0.21934127540294324, "grad_norm": 0.664147138595581, "learning_rate": 0.00028478832815115996, "loss": 0.169, "step": 1565 }, { "epoch": 0.21948142957252978, "grad_norm": 0.5293416380882263, "learning_rate": 0.00028477397751734034, "loss": 0.2059, "step": 1566 }, { "epoch": 0.21962158374211632, "grad_norm": 0.4721493124961853, "learning_rate": 0.00028475962688352067, "loss": 0.1252, "step": 1567 }, { "epoch": 0.21976173791170286, "grad_norm": 1.5911215543746948, "learning_rate": 0.000284745276249701, "loss": 0.2105, "step": 1568 }, { "epoch": 0.2199018920812894, "grad_norm": 0.4312524199485779, "learning_rate": 0.0002847309256158813, "loss": 0.1612, "step": 1569 }, { "epoch": 0.22004204625087595, "grad_norm": 0.4137956500053406, "learning_rate": 0.0002847165749820617, "loss": 0.0973, "step": 1570 }, { "epoch": 0.22018220042046252, "grad_norm": 0.568093478679657, "learning_rate": 0.00028470222434824204, "loss": 0.1812, "step": 1571 }, { "epoch": 0.22032235459004906, "grad_norm": 0.5128418803215027, "learning_rate": 0.00028468787371442237, "loss": 0.1782, "step": 1572 }, { "epoch": 0.2204625087596356, "grad_norm": 0.4304228723049164, "learning_rate": 0.0002846735230806027, "loss": 0.1027, "step": 1573 }, { "epoch": 0.22060266292922215, "grad_norm": 0.7367760539054871, "learning_rate": 0.000284659172446783, "loss": 0.2063, "step": 1574 }, { "epoch": 0.2207428170988087, "grad_norm": 0.5827558040618896, "learning_rate": 0.00028464482181296335, "loss": 0.1445, "step": 1575 }, { "epoch": 0.22088297126839523, "grad_norm": 0.4219644069671631, "learning_rate": 0.0002846304711791437, "loss": 0.1038, "step": 1576 }, { "epoch": 0.22102312543798178, "grad_norm": 0.4029349982738495, "learning_rate": 0.00028461612054532406, "loss": 0.08, "step": 1577 }, { "epoch": 0.22116327960756832, "grad_norm": 0.6098003387451172, "learning_rate": 0.0002846017699115044, "loss": 0.1589, "step": 1578 }, { "epoch": 0.22130343377715486, "grad_norm": 0.7220091223716736, "learning_rate": 0.0002845874192776847, "loss": 0.0989, "step": 1579 }, { "epoch": 0.2214435879467414, "grad_norm": 0.5958103537559509, "learning_rate": 0.0002845730686438651, "loss": 0.0983, "step": 1580 }, { "epoch": 0.22158374211632795, "grad_norm": 0.6537734866142273, "learning_rate": 0.00028455871801004543, "loss": 0.168, "step": 1581 }, { "epoch": 0.22172389628591452, "grad_norm": 0.3661618232727051, "learning_rate": 0.00028454436737622576, "loss": 0.1639, "step": 1582 }, { "epoch": 0.22186405045550106, "grad_norm": 0.5802404284477234, "learning_rate": 0.0002845300167424061, "loss": 0.1556, "step": 1583 }, { "epoch": 0.2220042046250876, "grad_norm": 0.7161227464675903, "learning_rate": 0.0002845156661085864, "loss": 0.1229, "step": 1584 }, { "epoch": 0.22214435879467415, "grad_norm": 0.32338374853134155, "learning_rate": 0.00028450131547476674, "loss": 0.1018, "step": 1585 }, { "epoch": 0.2222845129642607, "grad_norm": 0.43043947219848633, "learning_rate": 0.00028448696484094713, "loss": 0.1294, "step": 1586 }, { "epoch": 0.22242466713384723, "grad_norm": 1.0216281414031982, "learning_rate": 0.00028447261420712746, "loss": 0.2008, "step": 1587 }, { "epoch": 0.22256482130343377, "grad_norm": 0.8226951360702515, "learning_rate": 0.0002844582635733078, "loss": 0.1864, "step": 1588 }, { "epoch": 0.22270497547302032, "grad_norm": 0.5976907014846802, "learning_rate": 0.00028444391293948817, "loss": 0.1547, "step": 1589 }, { "epoch": 0.22284512964260686, "grad_norm": 0.3465006947517395, "learning_rate": 0.0002844295623056685, "loss": 0.0855, "step": 1590 }, { "epoch": 0.2229852838121934, "grad_norm": 2.0604941844940186, "learning_rate": 0.0002844152116718488, "loss": 0.2475, "step": 1591 }, { "epoch": 0.22312543798177994, "grad_norm": 0.43012574315071106, "learning_rate": 0.00028440086103802915, "loss": 0.1734, "step": 1592 }, { "epoch": 0.22326559215136652, "grad_norm": 0.5485913157463074, "learning_rate": 0.0002843865104042095, "loss": 0.1396, "step": 1593 }, { "epoch": 0.22340574632095306, "grad_norm": 0.5287312865257263, "learning_rate": 0.0002843721597703898, "loss": 0.1607, "step": 1594 }, { "epoch": 0.2235459004905396, "grad_norm": 1.0549744367599487, "learning_rate": 0.00028435780913657014, "loss": 0.1337, "step": 1595 }, { "epoch": 0.22368605466012614, "grad_norm": 0.39720407128334045, "learning_rate": 0.0002843434585027505, "loss": 0.0794, "step": 1596 }, { "epoch": 0.2238262088297127, "grad_norm": 0.4827709197998047, "learning_rate": 0.00028432910786893085, "loss": 0.1158, "step": 1597 }, { "epoch": 0.22396636299929923, "grad_norm": 0.7434794902801514, "learning_rate": 0.0002843147572351112, "loss": 0.1725, "step": 1598 }, { "epoch": 0.22410651716888577, "grad_norm": 2.4218194484710693, "learning_rate": 0.00028430040660129156, "loss": 0.197, "step": 1599 }, { "epoch": 0.22424667133847231, "grad_norm": 4.562673091888428, "learning_rate": 0.0002842860559674719, "loss": 0.7878, "step": 1600 }, { "epoch": 0.22438682550805886, "grad_norm": 0.4756203889846802, "learning_rate": 0.0002842717053336522, "loss": 0.1586, "step": 1601 }, { "epoch": 0.2245269796776454, "grad_norm": 0.28987544775009155, "learning_rate": 0.00028425735469983255, "loss": 0.0946, "step": 1602 }, { "epoch": 0.22466713384723194, "grad_norm": 0.5316409468650818, "learning_rate": 0.0002842430040660129, "loss": 0.0897, "step": 1603 }, { "epoch": 0.2248072880168185, "grad_norm": 0.5533539056777954, "learning_rate": 0.0002842286534321932, "loss": 0.1509, "step": 1604 }, { "epoch": 0.22494744218640506, "grad_norm": 0.3826991319656372, "learning_rate": 0.0002842143027983736, "loss": 0.1302, "step": 1605 }, { "epoch": 0.2250875963559916, "grad_norm": 0.6411821246147156, "learning_rate": 0.0002841999521645539, "loss": 0.1069, "step": 1606 }, { "epoch": 0.22522775052557814, "grad_norm": 1.9257643222808838, "learning_rate": 0.00028418560153073424, "loss": 0.094, "step": 1607 }, { "epoch": 0.22536790469516468, "grad_norm": 0.4175720512866974, "learning_rate": 0.0002841712508969146, "loss": 0.0768, "step": 1608 }, { "epoch": 0.22550805886475123, "grad_norm": 0.4755411446094513, "learning_rate": 0.00028415690026309496, "loss": 0.1902, "step": 1609 }, { "epoch": 0.22564821303433777, "grad_norm": 0.8655763864517212, "learning_rate": 0.0002841425496292753, "loss": 0.2774, "step": 1610 }, { "epoch": 0.2257883672039243, "grad_norm": 0.7017194628715515, "learning_rate": 0.0002841281989954556, "loss": 0.2686, "step": 1611 }, { "epoch": 0.22592852137351085, "grad_norm": 0.49989691376686096, "learning_rate": 0.00028411384836163594, "loss": 0.1321, "step": 1612 }, { "epoch": 0.2260686755430974, "grad_norm": 0.43530431389808655, "learning_rate": 0.00028409949772781627, "loss": 0.1604, "step": 1613 }, { "epoch": 0.22620882971268394, "grad_norm": 0.6284531354904175, "learning_rate": 0.0002840851470939966, "loss": 0.2665, "step": 1614 }, { "epoch": 0.2263489838822705, "grad_norm": 0.40533867478370667, "learning_rate": 0.000284070796460177, "loss": 0.1033, "step": 1615 }, { "epoch": 0.22648913805185705, "grad_norm": 1.365997314453125, "learning_rate": 0.0002840564458263573, "loss": 0.1557, "step": 1616 }, { "epoch": 0.2266292922214436, "grad_norm": 0.4368007481098175, "learning_rate": 0.00028404209519253764, "loss": 0.1064, "step": 1617 }, { "epoch": 0.22676944639103014, "grad_norm": 0.5887719392776489, "learning_rate": 0.000284027744558718, "loss": 0.275, "step": 1618 }, { "epoch": 0.22690960056061668, "grad_norm": 0.3346814215183258, "learning_rate": 0.00028401339392489835, "loss": 0.0857, "step": 1619 }, { "epoch": 0.22704975473020322, "grad_norm": 0.46464040875434875, "learning_rate": 0.0002839990432910787, "loss": 0.1826, "step": 1620 }, { "epoch": 0.22718990889978977, "grad_norm": 0.5752182602882385, "learning_rate": 0.000283984692657259, "loss": 0.2196, "step": 1621 }, { "epoch": 0.2273300630693763, "grad_norm": 0.5127241611480713, "learning_rate": 0.00028397034202343934, "loss": 0.1482, "step": 1622 }, { "epoch": 0.22747021723896285, "grad_norm": 0.7479202747344971, "learning_rate": 0.00028395599138961966, "loss": 0.1001, "step": 1623 }, { "epoch": 0.2276103714085494, "grad_norm": 0.4733017683029175, "learning_rate": 0.00028394164075580005, "loss": 0.1854, "step": 1624 }, { "epoch": 0.22775052557813594, "grad_norm": 0.6494266390800476, "learning_rate": 0.0002839272901219804, "loss": 0.0679, "step": 1625 }, { "epoch": 0.2278906797477225, "grad_norm": 0.6814122796058655, "learning_rate": 0.0002839129394881607, "loss": 0.157, "step": 1626 }, { "epoch": 0.22803083391730905, "grad_norm": 1.1917665004730225, "learning_rate": 0.00028389858885434103, "loss": 0.2648, "step": 1627 }, { "epoch": 0.2281709880868956, "grad_norm": 0.3667110502719879, "learning_rate": 0.00028388423822052136, "loss": 0.0965, "step": 1628 }, { "epoch": 0.22831114225648214, "grad_norm": 0.4370315968990326, "learning_rate": 0.0002838698875867017, "loss": 0.113, "step": 1629 }, { "epoch": 0.22845129642606868, "grad_norm": 0.2094385325908661, "learning_rate": 0.00028385553695288207, "loss": 0.0975, "step": 1630 }, { "epoch": 0.22859145059565522, "grad_norm": 0.5804761052131653, "learning_rate": 0.0002838411863190624, "loss": 0.2258, "step": 1631 }, { "epoch": 0.22873160476524176, "grad_norm": 0.632855236530304, "learning_rate": 0.00028382683568524273, "loss": 0.166, "step": 1632 }, { "epoch": 0.2288717589348283, "grad_norm": 0.4397091567516327, "learning_rate": 0.00028381248505142306, "loss": 0.1756, "step": 1633 }, { "epoch": 0.22901191310441485, "grad_norm": 0.4569563567638397, "learning_rate": 0.00028379813441760344, "loss": 0.1451, "step": 1634 }, { "epoch": 0.2291520672740014, "grad_norm": 0.2829950153827667, "learning_rate": 0.00028378378378378377, "loss": 0.0757, "step": 1635 }, { "epoch": 0.22929222144358793, "grad_norm": 0.3748777210712433, "learning_rate": 0.0002837694331499641, "loss": 0.1053, "step": 1636 }, { "epoch": 0.2294323756131745, "grad_norm": 0.6762419939041138, "learning_rate": 0.0002837550825161444, "loss": 0.1543, "step": 1637 }, { "epoch": 0.22957252978276105, "grad_norm": 0.5129497647285461, "learning_rate": 0.00028374073188232475, "loss": 0.1149, "step": 1638 }, { "epoch": 0.2297126839523476, "grad_norm": 0.8427003622055054, "learning_rate": 0.0002837263812485051, "loss": 0.2351, "step": 1639 }, { "epoch": 0.22985283812193413, "grad_norm": 0.6942387819290161, "learning_rate": 0.00028371203061468547, "loss": 0.1763, "step": 1640 }, { "epoch": 0.22999299229152068, "grad_norm": 0.6184607148170471, "learning_rate": 0.0002836976799808658, "loss": 0.1157, "step": 1641 }, { "epoch": 0.23013314646110722, "grad_norm": 0.38860252499580383, "learning_rate": 0.0002836833293470461, "loss": 0.1023, "step": 1642 }, { "epoch": 0.23027330063069376, "grad_norm": 1.0864946842193604, "learning_rate": 0.0002836689787132265, "loss": 0.2841, "step": 1643 }, { "epoch": 0.2304134548002803, "grad_norm": 0.30519142746925354, "learning_rate": 0.00028365462807940683, "loss": 0.1072, "step": 1644 }, { "epoch": 0.23055360896986685, "grad_norm": 4.428708076477051, "learning_rate": 0.00028364027744558716, "loss": 0.3774, "step": 1645 }, { "epoch": 0.2306937631394534, "grad_norm": 0.7120837569236755, "learning_rate": 0.0002836259268117675, "loss": 0.1201, "step": 1646 }, { "epoch": 0.23083391730903993, "grad_norm": 1.7699705362319946, "learning_rate": 0.0002836115761779478, "loss": 0.5443, "step": 1647 }, { "epoch": 0.2309740714786265, "grad_norm": 0.9952479004859924, "learning_rate": 0.00028359722554412815, "loss": 0.1792, "step": 1648 }, { "epoch": 0.23111422564821305, "grad_norm": 1.3076952695846558, "learning_rate": 0.0002835828749103085, "loss": 0.2112, "step": 1649 }, { "epoch": 0.2312543798177996, "grad_norm": 1.6938302516937256, "learning_rate": 0.00028356852427648886, "loss": 0.4612, "step": 1650 }, { "epoch": 0.23139453398738613, "grad_norm": 0.33778470754623413, "learning_rate": 0.0002835541736426692, "loss": 0.1282, "step": 1651 }, { "epoch": 0.23153468815697267, "grad_norm": 0.5366274118423462, "learning_rate": 0.0002835398230088495, "loss": 0.1808, "step": 1652 }, { "epoch": 0.23167484232655922, "grad_norm": 0.6916155815124512, "learning_rate": 0.0002835254723750299, "loss": 0.2167, "step": 1653 }, { "epoch": 0.23181499649614576, "grad_norm": 0.6328081488609314, "learning_rate": 0.00028351112174121023, "loss": 0.1434, "step": 1654 }, { "epoch": 0.2319551506657323, "grad_norm": 0.6392910480499268, "learning_rate": 0.00028349677110739056, "loss": 0.1037, "step": 1655 }, { "epoch": 0.23209530483531884, "grad_norm": 0.2952549457550049, "learning_rate": 0.0002834824204735709, "loss": 0.066, "step": 1656 }, { "epoch": 0.2322354590049054, "grad_norm": 0.6679016947746277, "learning_rate": 0.0002834680698397512, "loss": 0.2282, "step": 1657 }, { "epoch": 0.23237561317449193, "grad_norm": 0.354834645986557, "learning_rate": 0.00028345371920593154, "loss": 0.0842, "step": 1658 }, { "epoch": 0.2325157673440785, "grad_norm": 0.4278321862220764, "learning_rate": 0.0002834393685721119, "loss": 0.1135, "step": 1659 }, { "epoch": 0.23265592151366504, "grad_norm": 1.2079992294311523, "learning_rate": 0.00028342501793829225, "loss": 0.2546, "step": 1660 }, { "epoch": 0.23279607568325159, "grad_norm": 0.44434207677841187, "learning_rate": 0.0002834106673044726, "loss": 0.1513, "step": 1661 }, { "epoch": 0.23293622985283813, "grad_norm": 0.7500813603401184, "learning_rate": 0.00028339631667065297, "loss": 0.2029, "step": 1662 }, { "epoch": 0.23307638402242467, "grad_norm": 0.41560229659080505, "learning_rate": 0.0002833819660368333, "loss": 0.175, "step": 1663 }, { "epoch": 0.2332165381920112, "grad_norm": 0.27083083987236023, "learning_rate": 0.0002833676154030136, "loss": 0.0919, "step": 1664 }, { "epoch": 0.23335669236159776, "grad_norm": 0.47899481654167175, "learning_rate": 0.00028335326476919395, "loss": 0.0977, "step": 1665 }, { "epoch": 0.2334968465311843, "grad_norm": 0.9057362675666809, "learning_rate": 0.0002833389141353743, "loss": 0.1418, "step": 1666 }, { "epoch": 0.23363700070077084, "grad_norm": 0.868535578250885, "learning_rate": 0.0002833245635015546, "loss": 0.1956, "step": 1667 }, { "epoch": 0.23377715487035738, "grad_norm": 0.9943937063217163, "learning_rate": 0.00028331021286773494, "loss": 0.1581, "step": 1668 }, { "epoch": 0.23391730903994393, "grad_norm": 0.6929216980934143, "learning_rate": 0.0002832958622339153, "loss": 0.1216, "step": 1669 }, { "epoch": 0.23405746320953047, "grad_norm": 0.3878985643386841, "learning_rate": 0.00028328151160009565, "loss": 0.115, "step": 1670 }, { "epoch": 0.23419761737911704, "grad_norm": 0.8233346939086914, "learning_rate": 0.000283267160966276, "loss": 0.235, "step": 1671 }, { "epoch": 0.23433777154870358, "grad_norm": 0.40506336092948914, "learning_rate": 0.00028325281033245636, "loss": 0.0697, "step": 1672 }, { "epoch": 0.23447792571829013, "grad_norm": 0.8572129011154175, "learning_rate": 0.0002832384596986367, "loss": 0.0907, "step": 1673 }, { "epoch": 0.23461807988787667, "grad_norm": 0.3173794150352478, "learning_rate": 0.000283224109064817, "loss": 0.1381, "step": 1674 }, { "epoch": 0.2347582340574632, "grad_norm": 0.40838271379470825, "learning_rate": 0.00028320975843099735, "loss": 0.1155, "step": 1675 }, { "epoch": 0.23489838822704975, "grad_norm": 0.37829676270484924, "learning_rate": 0.0002831954077971777, "loss": 0.0765, "step": 1676 }, { "epoch": 0.2350385423966363, "grad_norm": 0.2388998419046402, "learning_rate": 0.000283181057163358, "loss": 0.0664, "step": 1677 }, { "epoch": 0.23517869656622284, "grad_norm": 1.0567635297775269, "learning_rate": 0.0002831667065295384, "loss": 0.104, "step": 1678 }, { "epoch": 0.23531885073580938, "grad_norm": 0.5665960311889648, "learning_rate": 0.0002831523558957187, "loss": 0.1595, "step": 1679 }, { "epoch": 0.23545900490539592, "grad_norm": 0.37826770544052124, "learning_rate": 0.00028313800526189904, "loss": 0.0732, "step": 1680 }, { "epoch": 0.23559915907498247, "grad_norm": 0.5304650664329529, "learning_rate": 0.00028312365462807937, "loss": 0.0886, "step": 1681 }, { "epoch": 0.23573931324456904, "grad_norm": 0.4689146876335144, "learning_rate": 0.00028310930399425975, "loss": 0.1095, "step": 1682 }, { "epoch": 0.23587946741415558, "grad_norm": 0.5237705707550049, "learning_rate": 0.0002830949533604401, "loss": 0.1324, "step": 1683 }, { "epoch": 0.23601962158374212, "grad_norm": 0.586449384689331, "learning_rate": 0.0002830806027266204, "loss": 0.1118, "step": 1684 }, { "epoch": 0.23615977575332867, "grad_norm": 0.8396391272544861, "learning_rate": 0.00028306625209280074, "loss": 0.209, "step": 1685 }, { "epoch": 0.2362999299229152, "grad_norm": 0.5026506185531616, "learning_rate": 0.00028305190145898107, "loss": 0.0844, "step": 1686 }, { "epoch": 0.23644008409250175, "grad_norm": 0.3826777935028076, "learning_rate": 0.0002830375508251614, "loss": 0.1025, "step": 1687 }, { "epoch": 0.2365802382620883, "grad_norm": 0.34839966893196106, "learning_rate": 0.0002830232001913418, "loss": 0.1385, "step": 1688 }, { "epoch": 0.23672039243167484, "grad_norm": 0.42290136218070984, "learning_rate": 0.0002830088495575221, "loss": 0.0731, "step": 1689 }, { "epoch": 0.23686054660126138, "grad_norm": 1.7799055576324463, "learning_rate": 0.00028299449892370244, "loss": 0.276, "step": 1690 }, { "epoch": 0.23700070077084792, "grad_norm": 1.0220530033111572, "learning_rate": 0.00028298014828988276, "loss": 0.2766, "step": 1691 }, { "epoch": 0.23714085494043446, "grad_norm": 0.5796447396278381, "learning_rate": 0.0002829657976560631, "loss": 0.1435, "step": 1692 }, { "epoch": 0.23728100911002104, "grad_norm": 0.711348295211792, "learning_rate": 0.0002829514470222435, "loss": 0.265, "step": 1693 }, { "epoch": 0.23742116327960758, "grad_norm": 1.6933900117874146, "learning_rate": 0.0002829370963884238, "loss": 0.1644, "step": 1694 }, { "epoch": 0.23756131744919412, "grad_norm": 0.5183990597724915, "learning_rate": 0.00028292274575460413, "loss": 0.096, "step": 1695 }, { "epoch": 0.23770147161878066, "grad_norm": 0.5787401795387268, "learning_rate": 0.00028290839512078446, "loss": 0.1613, "step": 1696 }, { "epoch": 0.2378416257883672, "grad_norm": 0.621388852596283, "learning_rate": 0.00028289404448696484, "loss": 0.0753, "step": 1697 }, { "epoch": 0.23798177995795375, "grad_norm": 1.5828367471694946, "learning_rate": 0.0002828796938531452, "loss": 0.3064, "step": 1698 }, { "epoch": 0.2381219341275403, "grad_norm": 0.8595340847969055, "learning_rate": 0.0002828653432193255, "loss": 0.3176, "step": 1699 }, { "epoch": 0.23826208829712683, "grad_norm": 13.367734909057617, "learning_rate": 0.00028285099258550583, "loss": 0.6157, "step": 1700 }, { "epoch": 0.23840224246671338, "grad_norm": 0.7175517678260803, "learning_rate": 0.00028283664195168616, "loss": 0.1334, "step": 1701 }, { "epoch": 0.23854239663629992, "grad_norm": 0.9889596104621887, "learning_rate": 0.0002828222913178665, "loss": 0.2325, "step": 1702 }, { "epoch": 0.23868255080588646, "grad_norm": 0.31712815165519714, "learning_rate": 0.0002828079406840468, "loss": 0.0906, "step": 1703 }, { "epoch": 0.23882270497547303, "grad_norm": 0.7756525278091431, "learning_rate": 0.0002827935900502272, "loss": 0.1938, "step": 1704 }, { "epoch": 0.23896285914505958, "grad_norm": 0.8887304663658142, "learning_rate": 0.00028277923941640753, "loss": 0.1422, "step": 1705 }, { "epoch": 0.23910301331464612, "grad_norm": 0.2697232663631439, "learning_rate": 0.00028276488878258786, "loss": 0.0939, "step": 1706 }, { "epoch": 0.23924316748423266, "grad_norm": 0.38379180431365967, "learning_rate": 0.00028275053814876824, "loss": 0.1197, "step": 1707 }, { "epoch": 0.2393833216538192, "grad_norm": 0.5237014293670654, "learning_rate": 0.00028273618751494857, "loss": 0.16, "step": 1708 }, { "epoch": 0.23952347582340575, "grad_norm": 0.8489036560058594, "learning_rate": 0.0002827218368811289, "loss": 0.1444, "step": 1709 }, { "epoch": 0.2396636299929923, "grad_norm": 0.5384160876274109, "learning_rate": 0.0002827074862473092, "loss": 0.1563, "step": 1710 }, { "epoch": 0.23980378416257883, "grad_norm": 0.8031654357910156, "learning_rate": 0.00028269313561348955, "loss": 0.1999, "step": 1711 }, { "epoch": 0.23994393833216537, "grad_norm": 0.5967836380004883, "learning_rate": 0.0002826787849796699, "loss": 0.1206, "step": 1712 }, { "epoch": 0.24008409250175192, "grad_norm": 0.6417787671089172, "learning_rate": 0.00028266443434585026, "loss": 0.2116, "step": 1713 }, { "epoch": 0.24022424667133846, "grad_norm": 0.5220252275466919, "learning_rate": 0.0002826500837120306, "loss": 0.1285, "step": 1714 }, { "epoch": 0.24036440084092503, "grad_norm": 0.5320206880569458, "learning_rate": 0.0002826357330782109, "loss": 0.1857, "step": 1715 }, { "epoch": 0.24050455501051157, "grad_norm": 0.35923105478286743, "learning_rate": 0.00028262138244439125, "loss": 0.1292, "step": 1716 }, { "epoch": 0.24064470918009812, "grad_norm": 0.7133763432502747, "learning_rate": 0.00028260703181057163, "loss": 0.1718, "step": 1717 }, { "epoch": 0.24078486334968466, "grad_norm": 0.5643021464347839, "learning_rate": 0.00028259268117675196, "loss": 0.1419, "step": 1718 }, { "epoch": 0.2409250175192712, "grad_norm": 0.27440154552459717, "learning_rate": 0.0002825783305429323, "loss": 0.0855, "step": 1719 }, { "epoch": 0.24106517168885774, "grad_norm": 0.46709468960762024, "learning_rate": 0.0002825639799091126, "loss": 0.1914, "step": 1720 }, { "epoch": 0.24120532585844429, "grad_norm": 0.2519575357437134, "learning_rate": 0.00028254962927529295, "loss": 0.0499, "step": 1721 }, { "epoch": 0.24134548002803083, "grad_norm": 0.5738582611083984, "learning_rate": 0.0002825352786414733, "loss": 0.1764, "step": 1722 }, { "epoch": 0.24148563419761737, "grad_norm": 0.43622833490371704, "learning_rate": 0.00028252092800765366, "loss": 0.1487, "step": 1723 }, { "epoch": 0.24162578836720391, "grad_norm": 0.4310140311717987, "learning_rate": 0.000282506577373834, "loss": 0.1111, "step": 1724 }, { "epoch": 0.24176594253679046, "grad_norm": 0.36717653274536133, "learning_rate": 0.0002824922267400143, "loss": 0.144, "step": 1725 }, { "epoch": 0.24190609670637703, "grad_norm": 0.34282341599464417, "learning_rate": 0.0002824778761061947, "loss": 0.0768, "step": 1726 }, { "epoch": 0.24204625087596357, "grad_norm": 0.7245255708694458, "learning_rate": 0.000282463525472375, "loss": 0.2162, "step": 1727 }, { "epoch": 0.2421864050455501, "grad_norm": 0.5724814534187317, "learning_rate": 0.00028244917483855536, "loss": 0.1002, "step": 1728 }, { "epoch": 0.24232655921513666, "grad_norm": 0.5859802961349487, "learning_rate": 0.0002824348242047357, "loss": 0.0752, "step": 1729 }, { "epoch": 0.2424667133847232, "grad_norm": 0.7630148530006409, "learning_rate": 0.000282420473570916, "loss": 0.0818, "step": 1730 }, { "epoch": 0.24260686755430974, "grad_norm": 0.41757214069366455, "learning_rate": 0.00028240612293709634, "loss": 0.109, "step": 1731 }, { "epoch": 0.24274702172389628, "grad_norm": 0.547549307346344, "learning_rate": 0.0002823917723032767, "loss": 0.126, "step": 1732 }, { "epoch": 0.24288717589348283, "grad_norm": 0.4793872833251953, "learning_rate": 0.00028237742166945705, "loss": 0.1508, "step": 1733 }, { "epoch": 0.24302733006306937, "grad_norm": 0.5726809501647949, "learning_rate": 0.0002823630710356374, "loss": 0.1467, "step": 1734 }, { "epoch": 0.2431674842326559, "grad_norm": 0.43208590149879456, "learning_rate": 0.0002823487204018177, "loss": 0.1654, "step": 1735 }, { "epoch": 0.24330763840224245, "grad_norm": 0.43308356404304504, "learning_rate": 0.0002823343697679981, "loss": 0.0821, "step": 1736 }, { "epoch": 0.24344779257182902, "grad_norm": 0.48286673426628113, "learning_rate": 0.0002823200191341784, "loss": 0.1521, "step": 1737 }, { "epoch": 0.24358794674141557, "grad_norm": 0.4979013204574585, "learning_rate": 0.00028230566850035875, "loss": 0.1914, "step": 1738 }, { "epoch": 0.2437281009110021, "grad_norm": 0.637593150138855, "learning_rate": 0.0002822913178665391, "loss": 0.1922, "step": 1739 }, { "epoch": 0.24386825508058865, "grad_norm": 0.6245976090431213, "learning_rate": 0.0002822769672327194, "loss": 0.1741, "step": 1740 }, { "epoch": 0.2440084092501752, "grad_norm": 0.548738420009613, "learning_rate": 0.00028226261659889973, "loss": 0.157, "step": 1741 }, { "epoch": 0.24414856341976174, "grad_norm": 0.5295251607894897, "learning_rate": 0.0002822482659650801, "loss": 0.2166, "step": 1742 }, { "epoch": 0.24428871758934828, "grad_norm": 0.7167810201644897, "learning_rate": 0.00028223391533126045, "loss": 0.0407, "step": 1743 }, { "epoch": 0.24442887175893482, "grad_norm": 0.5691710114479065, "learning_rate": 0.0002822195646974408, "loss": 0.1082, "step": 1744 }, { "epoch": 0.24456902592852137, "grad_norm": 0.4993327856063843, "learning_rate": 0.00028220521406362116, "loss": 0.0635, "step": 1745 }, { "epoch": 0.2447091800981079, "grad_norm": 2.1290979385375977, "learning_rate": 0.0002821908634298015, "loss": 0.2961, "step": 1746 }, { "epoch": 0.24484933426769445, "grad_norm": 1.4649319648742676, "learning_rate": 0.0002821765127959818, "loss": 0.122, "step": 1747 }, { "epoch": 0.24498948843728102, "grad_norm": 0.8563714027404785, "learning_rate": 0.00028216216216216214, "loss": 0.1444, "step": 1748 }, { "epoch": 0.24512964260686756, "grad_norm": 2.282214403152466, "learning_rate": 0.00028214781152834247, "loss": 0.3792, "step": 1749 }, { "epoch": 0.2452697967764541, "grad_norm": 1.725813627243042, "learning_rate": 0.0002821334608945228, "loss": 0.2854, "step": 1750 }, { "epoch": 0.24540995094604065, "grad_norm": 0.451020747423172, "learning_rate": 0.00028211911026070313, "loss": 0.1077, "step": 1751 }, { "epoch": 0.2455501051156272, "grad_norm": 0.7115446925163269, "learning_rate": 0.0002821047596268835, "loss": 0.1016, "step": 1752 }, { "epoch": 0.24569025928521374, "grad_norm": 0.6940094232559204, "learning_rate": 0.00028209040899306384, "loss": 0.2974, "step": 1753 }, { "epoch": 0.24583041345480028, "grad_norm": 0.4503176212310791, "learning_rate": 0.00028207605835924417, "loss": 0.1457, "step": 1754 }, { "epoch": 0.24597056762438682, "grad_norm": 0.2638428509235382, "learning_rate": 0.0002820617077254245, "loss": 0.1091, "step": 1755 }, { "epoch": 0.24611072179397336, "grad_norm": 0.4660966694355011, "learning_rate": 0.0002820473570916048, "loss": 0.1168, "step": 1756 }, { "epoch": 0.2462508759635599, "grad_norm": 0.3073606491088867, "learning_rate": 0.0002820330064577852, "loss": 0.0645, "step": 1757 }, { "epoch": 0.24639103013314645, "grad_norm": 0.2171139121055603, "learning_rate": 0.00028201865582396554, "loss": 0.0749, "step": 1758 }, { "epoch": 0.24653118430273302, "grad_norm": 0.5129942297935486, "learning_rate": 0.00028200430519014587, "loss": 0.2485, "step": 1759 }, { "epoch": 0.24667133847231956, "grad_norm": 0.329819917678833, "learning_rate": 0.0002819899545563262, "loss": 0.1029, "step": 1760 }, { "epoch": 0.2468114926419061, "grad_norm": 0.13715021312236786, "learning_rate": 0.0002819756039225066, "loss": 0.0382, "step": 1761 }, { "epoch": 0.24695164681149265, "grad_norm": 1.19892156124115, "learning_rate": 0.0002819612532886869, "loss": 0.2696, "step": 1762 }, { "epoch": 0.2470918009810792, "grad_norm": 0.6392252445220947, "learning_rate": 0.00028194690265486723, "loss": 0.1501, "step": 1763 }, { "epoch": 0.24723195515066573, "grad_norm": 0.36968758702278137, "learning_rate": 0.00028193255202104756, "loss": 0.1253, "step": 1764 }, { "epoch": 0.24737210932025228, "grad_norm": 0.5530754923820496, "learning_rate": 0.0002819182013872279, "loss": 0.1945, "step": 1765 }, { "epoch": 0.24751226348983882, "grad_norm": 0.4176405668258667, "learning_rate": 0.0002819038507534082, "loss": 0.1087, "step": 1766 }, { "epoch": 0.24765241765942536, "grad_norm": 0.7563183903694153, "learning_rate": 0.00028188950011958855, "loss": 0.2602, "step": 1767 }, { "epoch": 0.2477925718290119, "grad_norm": 0.455348402261734, "learning_rate": 0.00028187514948576893, "loss": 0.0768, "step": 1768 }, { "epoch": 0.24793272599859845, "grad_norm": 0.4709351360797882, "learning_rate": 0.00028186079885194926, "loss": 0.1149, "step": 1769 }, { "epoch": 0.24807288016818502, "grad_norm": 0.4431311786174774, "learning_rate": 0.0002818464482181296, "loss": 0.1066, "step": 1770 }, { "epoch": 0.24821303433777156, "grad_norm": 0.29459160566329956, "learning_rate": 0.00028183209758430997, "loss": 0.0773, "step": 1771 }, { "epoch": 0.2483531885073581, "grad_norm": 0.6708833575248718, "learning_rate": 0.0002818177469504903, "loss": 0.1052, "step": 1772 }, { "epoch": 0.24849334267694465, "grad_norm": 0.3578279912471771, "learning_rate": 0.00028180339631667063, "loss": 0.0929, "step": 1773 }, { "epoch": 0.2486334968465312, "grad_norm": 0.35013461112976074, "learning_rate": 0.00028178904568285096, "loss": 0.1361, "step": 1774 }, { "epoch": 0.24877365101611773, "grad_norm": 1.1069989204406738, "learning_rate": 0.0002817746950490313, "loss": 0.2124, "step": 1775 }, { "epoch": 0.24891380518570427, "grad_norm": 0.41144025325775146, "learning_rate": 0.0002817603444152116, "loss": 0.1135, "step": 1776 }, { "epoch": 0.24905395935529082, "grad_norm": 0.3035569190979004, "learning_rate": 0.000281745993781392, "loss": 0.091, "step": 1777 }, { "epoch": 0.24919411352487736, "grad_norm": 0.4869634807109833, "learning_rate": 0.0002817316431475723, "loss": 0.1446, "step": 1778 }, { "epoch": 0.2493342676944639, "grad_norm": 0.46246063709259033, "learning_rate": 0.00028171729251375265, "loss": 0.1514, "step": 1779 }, { "epoch": 0.24947442186405044, "grad_norm": 0.48864373564720154, "learning_rate": 0.00028170294187993304, "loss": 0.1294, "step": 1780 }, { "epoch": 0.24961457603363701, "grad_norm": 0.7949655652046204, "learning_rate": 0.00028168859124611337, "loss": 0.1426, "step": 1781 }, { "epoch": 0.24975473020322356, "grad_norm": 0.3172481656074524, "learning_rate": 0.0002816742406122937, "loss": 0.124, "step": 1782 }, { "epoch": 0.2498948843728101, "grad_norm": 0.5989638566970825, "learning_rate": 0.000281659889978474, "loss": 0.1502, "step": 1783 }, { "epoch": 0.2500350385423966, "grad_norm": 0.3476879298686981, "learning_rate": 0.00028164553934465435, "loss": 0.0561, "step": 1784 }, { "epoch": 0.2501751927119832, "grad_norm": 0.3938239812850952, "learning_rate": 0.0002816311887108347, "loss": 0.0708, "step": 1785 }, { "epoch": 0.2503153468815697, "grad_norm": 0.42315611243247986, "learning_rate": 0.000281616838077015, "loss": 0.1009, "step": 1786 }, { "epoch": 0.25045550105115627, "grad_norm": 0.7514938116073608, "learning_rate": 0.0002816024874431954, "loss": 0.2724, "step": 1787 }, { "epoch": 0.25059565522074284, "grad_norm": 0.6184118390083313, "learning_rate": 0.0002815881368093757, "loss": 0.1858, "step": 1788 }, { "epoch": 0.25073580939032936, "grad_norm": 0.8807806968688965, "learning_rate": 0.00028157378617555605, "loss": 0.2336, "step": 1789 }, { "epoch": 0.2508759635599159, "grad_norm": 0.9115694761276245, "learning_rate": 0.00028155943554173643, "loss": 0.1494, "step": 1790 }, { "epoch": 0.25101611772950244, "grad_norm": 0.9098187685012817, "learning_rate": 0.00028154508490791676, "loss": 0.1941, "step": 1791 }, { "epoch": 0.251156271899089, "grad_norm": 0.44079074263572693, "learning_rate": 0.0002815307342740971, "loss": 0.143, "step": 1792 }, { "epoch": 0.2512964260686755, "grad_norm": 0.6528348326683044, "learning_rate": 0.0002815163836402774, "loss": 0.1396, "step": 1793 }, { "epoch": 0.2514365802382621, "grad_norm": 0.6168358325958252, "learning_rate": 0.00028150203300645774, "loss": 0.135, "step": 1794 }, { "epoch": 0.2515767344078486, "grad_norm": 0.700537919998169, "learning_rate": 0.0002814876823726381, "loss": 0.222, "step": 1795 }, { "epoch": 0.2517168885774352, "grad_norm": 0.21935659646987915, "learning_rate": 0.00028147333173881846, "loss": 0.03, "step": 1796 }, { "epoch": 0.2518570427470217, "grad_norm": 1.1119674444198608, "learning_rate": 0.0002814589811049988, "loss": 0.2208, "step": 1797 }, { "epoch": 0.25199719691660827, "grad_norm": 0.7732111811637878, "learning_rate": 0.0002814446304711791, "loss": 0.2581, "step": 1798 }, { "epoch": 0.25213735108619484, "grad_norm": 1.658157229423523, "learning_rate": 0.0002814302798373595, "loss": 0.1683, "step": 1799 }, { "epoch": 0.25227750525578135, "grad_norm": 0.7387439608573914, "learning_rate": 0.0002814159292035398, "loss": 0.1923, "step": 1800 }, { "epoch": 0.2524176594253679, "grad_norm": 0.43980246782302856, "learning_rate": 0.00028140157856972015, "loss": 0.1267, "step": 1801 }, { "epoch": 0.25255781359495444, "grad_norm": 0.5206082463264465, "learning_rate": 0.0002813872279359005, "loss": 0.1216, "step": 1802 }, { "epoch": 0.252697967764541, "grad_norm": 0.8885549306869507, "learning_rate": 0.0002813728773020808, "loss": 0.2284, "step": 1803 }, { "epoch": 0.2528381219341275, "grad_norm": 0.3616775870323181, "learning_rate": 0.00028135852666826114, "loss": 0.0819, "step": 1804 }, { "epoch": 0.2529782761037141, "grad_norm": 0.3501027226448059, "learning_rate": 0.00028134417603444147, "loss": 0.1188, "step": 1805 }, { "epoch": 0.2531184302733006, "grad_norm": 0.4040818214416504, "learning_rate": 0.00028132982540062185, "loss": 0.1001, "step": 1806 }, { "epoch": 0.2532585844428872, "grad_norm": 0.6182248592376709, "learning_rate": 0.0002813154747668022, "loss": 0.2516, "step": 1807 }, { "epoch": 0.2533987386124737, "grad_norm": 0.7381235361099243, "learning_rate": 0.0002813011241329825, "loss": 0.2194, "step": 1808 }, { "epoch": 0.25353889278206027, "grad_norm": 0.48105478286743164, "learning_rate": 0.0002812867734991629, "loss": 0.1115, "step": 1809 }, { "epoch": 0.25367904695164684, "grad_norm": 0.5309714674949646, "learning_rate": 0.0002812724228653432, "loss": 0.1236, "step": 1810 }, { "epoch": 0.25381920112123335, "grad_norm": 0.5463789105415344, "learning_rate": 0.00028125807223152355, "loss": 0.0989, "step": 1811 }, { "epoch": 0.2539593552908199, "grad_norm": 0.5178471803665161, "learning_rate": 0.0002812437215977039, "loss": 0.1162, "step": 1812 }, { "epoch": 0.25409950946040644, "grad_norm": 0.5952447056770325, "learning_rate": 0.0002812293709638842, "loss": 0.172, "step": 1813 }, { "epoch": 0.254239663629993, "grad_norm": 0.27206680178642273, "learning_rate": 0.00028121502033006453, "loss": 0.0503, "step": 1814 }, { "epoch": 0.2543798177995795, "grad_norm": 0.4301283657550812, "learning_rate": 0.0002812006696962449, "loss": 0.096, "step": 1815 }, { "epoch": 0.2545199719691661, "grad_norm": 0.4496886134147644, "learning_rate": 0.00028118631906242524, "loss": 0.1711, "step": 1816 }, { "epoch": 0.2546601261387526, "grad_norm": 0.9504410624504089, "learning_rate": 0.00028117196842860557, "loss": 0.1346, "step": 1817 }, { "epoch": 0.2548002803083392, "grad_norm": 0.47178420424461365, "learning_rate": 0.0002811576177947859, "loss": 0.0894, "step": 1818 }, { "epoch": 0.2549404344779257, "grad_norm": 0.4664304852485657, "learning_rate": 0.00028114326716096623, "loss": 0.1447, "step": 1819 }, { "epoch": 0.25508058864751226, "grad_norm": 0.7794228196144104, "learning_rate": 0.0002811289165271466, "loss": 0.1076, "step": 1820 }, { "epoch": 0.25522074281709883, "grad_norm": 0.41424092650413513, "learning_rate": 0.00028111456589332694, "loss": 0.1063, "step": 1821 }, { "epoch": 0.25536089698668535, "grad_norm": 0.3069503903388977, "learning_rate": 0.00028110021525950727, "loss": 0.1064, "step": 1822 }, { "epoch": 0.2555010511562719, "grad_norm": 0.6126590371131897, "learning_rate": 0.0002810858646256876, "loss": 0.1543, "step": 1823 }, { "epoch": 0.25564120532585843, "grad_norm": 0.8415027856826782, "learning_rate": 0.0002810715139918679, "loss": 0.1141, "step": 1824 }, { "epoch": 0.255781359495445, "grad_norm": 0.5680672526359558, "learning_rate": 0.0002810571633580483, "loss": 0.1598, "step": 1825 }, { "epoch": 0.2559215136650315, "grad_norm": 0.38887882232666016, "learning_rate": 0.00028104281272422864, "loss": 0.1737, "step": 1826 }, { "epoch": 0.2560616678346181, "grad_norm": 0.634729266166687, "learning_rate": 0.00028102846209040897, "loss": 0.1353, "step": 1827 }, { "epoch": 0.2562018220042046, "grad_norm": 0.3087935745716095, "learning_rate": 0.0002810141114565893, "loss": 0.0708, "step": 1828 }, { "epoch": 0.2563419761737912, "grad_norm": 0.6217717528343201, "learning_rate": 0.0002809997608227696, "loss": 0.1578, "step": 1829 }, { "epoch": 0.2564821303433777, "grad_norm": 0.8649902939796448, "learning_rate": 0.00028098541018894995, "loss": 0.1094, "step": 1830 }, { "epoch": 0.25662228451296426, "grad_norm": 0.6316784024238586, "learning_rate": 0.00028097105955513034, "loss": 0.2179, "step": 1831 }, { "epoch": 0.25676243868255083, "grad_norm": 0.9165083765983582, "learning_rate": 0.00028095670892131066, "loss": 0.286, "step": 1832 }, { "epoch": 0.25690259285213735, "grad_norm": 0.39727166295051575, "learning_rate": 0.000280942358287491, "loss": 0.0984, "step": 1833 }, { "epoch": 0.2570427470217239, "grad_norm": 0.3837035298347473, "learning_rate": 0.0002809280076536714, "loss": 0.0675, "step": 1834 }, { "epoch": 0.25718290119131043, "grad_norm": 0.7482810020446777, "learning_rate": 0.0002809136570198517, "loss": 0.2355, "step": 1835 }, { "epoch": 0.257323055360897, "grad_norm": 0.4094725549221039, "learning_rate": 0.00028089930638603203, "loss": 0.182, "step": 1836 }, { "epoch": 0.2574632095304835, "grad_norm": 0.3261123299598694, "learning_rate": 0.00028088495575221236, "loss": 0.1948, "step": 1837 }, { "epoch": 0.2576033637000701, "grad_norm": 0.8239557147026062, "learning_rate": 0.0002808706051183927, "loss": 0.0818, "step": 1838 }, { "epoch": 0.2577435178696566, "grad_norm": 0.2915264368057251, "learning_rate": 0.000280856254484573, "loss": 0.0542, "step": 1839 }, { "epoch": 0.2578836720392432, "grad_norm": 0.8499298095703125, "learning_rate": 0.00028084190385075335, "loss": 0.1301, "step": 1840 }, { "epoch": 0.2580238262088297, "grad_norm": 0.37676772475242615, "learning_rate": 0.00028082755321693373, "loss": 0.1291, "step": 1841 }, { "epoch": 0.25816398037841626, "grad_norm": 0.8743988275527954, "learning_rate": 0.00028081320258311406, "loss": 0.1639, "step": 1842 }, { "epoch": 0.25830413454800283, "grad_norm": 0.7587233781814575, "learning_rate": 0.0002807988519492944, "loss": 0.1182, "step": 1843 }, { "epoch": 0.25844428871758934, "grad_norm": 0.36376047134399414, "learning_rate": 0.00028078450131547477, "loss": 0.0992, "step": 1844 }, { "epoch": 0.2585844428871759, "grad_norm": 0.5320486426353455, "learning_rate": 0.0002807701506816551, "loss": 0.0972, "step": 1845 }, { "epoch": 0.25872459705676243, "grad_norm": 0.6233716011047363, "learning_rate": 0.0002807558000478354, "loss": 0.0923, "step": 1846 }, { "epoch": 0.258864751226349, "grad_norm": 0.6135765910148621, "learning_rate": 0.00028074144941401575, "loss": 0.1878, "step": 1847 }, { "epoch": 0.2590049053959355, "grad_norm": 1.0948060750961304, "learning_rate": 0.0002807270987801961, "loss": 0.1315, "step": 1848 }, { "epoch": 0.2591450595655221, "grad_norm": 0.6060850620269775, "learning_rate": 0.0002807127481463764, "loss": 0.0968, "step": 1849 }, { "epoch": 0.2592852137351086, "grad_norm": 0.9469805955886841, "learning_rate": 0.0002806983975125568, "loss": 0.1142, "step": 1850 }, { "epoch": 0.25942536790469517, "grad_norm": 0.6662535667419434, "learning_rate": 0.0002806840468787371, "loss": 0.2374, "step": 1851 }, { "epoch": 0.2595655220742817, "grad_norm": 0.25526994466781616, "learning_rate": 0.00028066969624491745, "loss": 0.0783, "step": 1852 }, { "epoch": 0.25970567624386826, "grad_norm": 0.3429538309574127, "learning_rate": 0.00028065534561109783, "loss": 0.076, "step": 1853 }, { "epoch": 0.2598458304134548, "grad_norm": 0.6768313646316528, "learning_rate": 0.00028064099497727816, "loss": 0.1519, "step": 1854 }, { "epoch": 0.25998598458304134, "grad_norm": 0.36945641040802, "learning_rate": 0.0002806266443434585, "loss": 0.0899, "step": 1855 }, { "epoch": 0.2601261387526279, "grad_norm": 0.5451635718345642, "learning_rate": 0.0002806122937096388, "loss": 0.1678, "step": 1856 }, { "epoch": 0.2602662929222144, "grad_norm": 0.9854544997215271, "learning_rate": 0.00028059794307581915, "loss": 0.1791, "step": 1857 }, { "epoch": 0.260406447091801, "grad_norm": 0.576852560043335, "learning_rate": 0.0002805835924419995, "loss": 0.2015, "step": 1858 }, { "epoch": 0.2605466012613875, "grad_norm": 0.4926300048828125, "learning_rate": 0.0002805692418081798, "loss": 0.1634, "step": 1859 }, { "epoch": 0.2606867554309741, "grad_norm": 0.6340547800064087, "learning_rate": 0.0002805548911743602, "loss": 0.1112, "step": 1860 }, { "epoch": 0.2608269096005606, "grad_norm": 0.479902058839798, "learning_rate": 0.0002805405405405405, "loss": 0.1006, "step": 1861 }, { "epoch": 0.26096706377014717, "grad_norm": 0.4118190407752991, "learning_rate": 0.00028052618990672085, "loss": 0.066, "step": 1862 }, { "epoch": 0.2611072179397337, "grad_norm": 0.9400238990783691, "learning_rate": 0.00028051183927290123, "loss": 0.1515, "step": 1863 }, { "epoch": 0.26124737210932025, "grad_norm": 0.33944255113601685, "learning_rate": 0.00028049748863908156, "loss": 0.1046, "step": 1864 }, { "epoch": 0.2613875262789068, "grad_norm": 0.34444308280944824, "learning_rate": 0.0002804831380052619, "loss": 0.1608, "step": 1865 }, { "epoch": 0.26152768044849334, "grad_norm": 0.3691379129886627, "learning_rate": 0.0002804687873714422, "loss": 0.13, "step": 1866 }, { "epoch": 0.2616678346180799, "grad_norm": 0.24631360173225403, "learning_rate": 0.00028045443673762254, "loss": 0.0661, "step": 1867 }, { "epoch": 0.2618079887876664, "grad_norm": 1.240393877029419, "learning_rate": 0.00028044008610380287, "loss": 0.163, "step": 1868 }, { "epoch": 0.261948142957253, "grad_norm": 0.6703735589981079, "learning_rate": 0.00028042573546998325, "loss": 0.0774, "step": 1869 }, { "epoch": 0.2620882971268395, "grad_norm": 0.5705479979515076, "learning_rate": 0.0002804113848361636, "loss": 0.1566, "step": 1870 }, { "epoch": 0.2622284512964261, "grad_norm": 0.5477330684661865, "learning_rate": 0.0002803970342023439, "loss": 0.1033, "step": 1871 }, { "epoch": 0.2623686054660126, "grad_norm": 0.23318515717983246, "learning_rate": 0.00028038268356852424, "loss": 0.0696, "step": 1872 }, { "epoch": 0.26250875963559916, "grad_norm": 0.5640891194343567, "learning_rate": 0.0002803683329347046, "loss": 0.1741, "step": 1873 }, { "epoch": 0.2626489138051857, "grad_norm": 0.24505697190761566, "learning_rate": 0.00028035398230088495, "loss": 0.0481, "step": 1874 }, { "epoch": 0.26278906797477225, "grad_norm": 0.5220059156417847, "learning_rate": 0.0002803396316670653, "loss": 0.0798, "step": 1875 }, { "epoch": 0.2629292221443588, "grad_norm": 0.47825273871421814, "learning_rate": 0.0002803252810332456, "loss": 0.1447, "step": 1876 }, { "epoch": 0.26306937631394534, "grad_norm": 0.5538408756256104, "learning_rate": 0.00028031093039942594, "loss": 0.2263, "step": 1877 }, { "epoch": 0.2632095304835319, "grad_norm": 0.7235707640647888, "learning_rate": 0.00028029657976560627, "loss": 0.1822, "step": 1878 }, { "epoch": 0.2633496846531184, "grad_norm": 0.41711851954460144, "learning_rate": 0.00028028222913178665, "loss": 0.1532, "step": 1879 }, { "epoch": 0.263489838822705, "grad_norm": 0.48717182874679565, "learning_rate": 0.000280267878497967, "loss": 0.1238, "step": 1880 }, { "epoch": 0.2636299929922915, "grad_norm": 0.3388001024723053, "learning_rate": 0.0002802535278641473, "loss": 0.1096, "step": 1881 }, { "epoch": 0.2637701471618781, "grad_norm": 0.7721616625785828, "learning_rate": 0.00028023917723032763, "loss": 0.1976, "step": 1882 }, { "epoch": 0.2639103013314646, "grad_norm": 0.6086646318435669, "learning_rate": 0.000280224826596508, "loss": 0.1601, "step": 1883 }, { "epoch": 0.26405045550105116, "grad_norm": 0.4716200828552246, "learning_rate": 0.00028021047596268835, "loss": 0.1791, "step": 1884 }, { "epoch": 0.2641906096706377, "grad_norm": 0.6042973399162292, "learning_rate": 0.0002801961253288687, "loss": 0.1304, "step": 1885 }, { "epoch": 0.26433076384022425, "grad_norm": 0.43370291590690613, "learning_rate": 0.000280181774695049, "loss": 0.1164, "step": 1886 }, { "epoch": 0.2644709180098108, "grad_norm": 0.7117361426353455, "learning_rate": 0.00028016742406122933, "loss": 0.1952, "step": 1887 }, { "epoch": 0.26461107217939733, "grad_norm": 0.4921676218509674, "learning_rate": 0.0002801530734274097, "loss": 0.0949, "step": 1888 }, { "epoch": 0.2647512263489839, "grad_norm": 0.5655998587608337, "learning_rate": 0.00028013872279359004, "loss": 0.1588, "step": 1889 }, { "epoch": 0.2648913805185704, "grad_norm": 0.649277925491333, "learning_rate": 0.00028012437215977037, "loss": 0.2138, "step": 1890 }, { "epoch": 0.265031534688157, "grad_norm": 0.29094550013542175, "learning_rate": 0.0002801100215259507, "loss": 0.0561, "step": 1891 }, { "epoch": 0.2651716888577435, "grad_norm": 0.42921003699302673, "learning_rate": 0.00028009567089213103, "loss": 0.1148, "step": 1892 }, { "epoch": 0.2653118430273301, "grad_norm": 0.5670558214187622, "learning_rate": 0.00028008132025831136, "loss": 0.1614, "step": 1893 }, { "epoch": 0.2654519971969166, "grad_norm": 0.5095421075820923, "learning_rate": 0.0002800669696244917, "loss": 0.1247, "step": 1894 }, { "epoch": 0.26559215136650316, "grad_norm": 0.8093964457511902, "learning_rate": 0.00028005261899067207, "loss": 0.2039, "step": 1895 }, { "epoch": 0.2657323055360897, "grad_norm": 0.5574644207954407, "learning_rate": 0.0002800382683568524, "loss": 0.1065, "step": 1896 }, { "epoch": 0.26587245970567625, "grad_norm": 1.0862740278244019, "learning_rate": 0.0002800239177230327, "loss": 0.195, "step": 1897 }, { "epoch": 0.2660126138752628, "grad_norm": 0.887651264667511, "learning_rate": 0.0002800095670892131, "loss": 0.1834, "step": 1898 }, { "epoch": 0.26615276804484933, "grad_norm": 1.5075024366378784, "learning_rate": 0.00027999521645539344, "loss": 0.3211, "step": 1899 }, { "epoch": 0.2662929222144359, "grad_norm": 1.5829147100448608, "learning_rate": 0.00027998086582157376, "loss": 0.3588, "step": 1900 }, { "epoch": 0.2664330763840224, "grad_norm": 0.47432902455329895, "learning_rate": 0.0002799665151877541, "loss": 0.1559, "step": 1901 }, { "epoch": 0.266573230553609, "grad_norm": 0.5671768188476562, "learning_rate": 0.0002799521645539344, "loss": 0.1576, "step": 1902 }, { "epoch": 0.2667133847231955, "grad_norm": 0.6967267990112305, "learning_rate": 0.00027993781392011475, "loss": 0.1389, "step": 1903 }, { "epoch": 0.26685353889278207, "grad_norm": 0.7033321261405945, "learning_rate": 0.00027992346328629513, "loss": 0.1995, "step": 1904 }, { "epoch": 0.2669936930623686, "grad_norm": 0.4978240728378296, "learning_rate": 0.00027990911265247546, "loss": 0.1443, "step": 1905 }, { "epoch": 0.26713384723195516, "grad_norm": 0.37465062737464905, "learning_rate": 0.0002798947620186558, "loss": 0.0831, "step": 1906 }, { "epoch": 0.26727400140154167, "grad_norm": 0.25860920548439026, "learning_rate": 0.0002798804113848361, "loss": 0.0893, "step": 1907 }, { "epoch": 0.26741415557112824, "grad_norm": 0.30216631293296814, "learning_rate": 0.0002798660607510165, "loss": 0.0761, "step": 1908 }, { "epoch": 0.2675543097407148, "grad_norm": 0.4658152759075165, "learning_rate": 0.00027985171011719683, "loss": 0.1566, "step": 1909 }, { "epoch": 0.26769446391030133, "grad_norm": 0.32072290778160095, "learning_rate": 0.00027983735948337716, "loss": 0.1108, "step": 1910 }, { "epoch": 0.2678346180798879, "grad_norm": 0.8918739557266235, "learning_rate": 0.0002798230088495575, "loss": 0.145, "step": 1911 }, { "epoch": 0.2679747722494744, "grad_norm": 0.6238973140716553, "learning_rate": 0.0002798086582157378, "loss": 0.1087, "step": 1912 }, { "epoch": 0.268114926419061, "grad_norm": 0.5043307542800903, "learning_rate": 0.00027979430758191814, "loss": 0.1154, "step": 1913 }, { "epoch": 0.2682550805886475, "grad_norm": 0.27551767230033875, "learning_rate": 0.00027977995694809853, "loss": 0.0949, "step": 1914 }, { "epoch": 0.26839523475823407, "grad_norm": 0.43672341108322144, "learning_rate": 0.00027976560631427886, "loss": 0.1264, "step": 1915 }, { "epoch": 0.2685353889278206, "grad_norm": 0.4689143896102905, "learning_rate": 0.0002797512556804592, "loss": 0.0713, "step": 1916 }, { "epoch": 0.26867554309740715, "grad_norm": 0.6661012768745422, "learning_rate": 0.00027973690504663957, "loss": 0.1595, "step": 1917 }, { "epoch": 0.26881569726699367, "grad_norm": 0.30119356513023376, "learning_rate": 0.0002797225544128199, "loss": 0.0826, "step": 1918 }, { "epoch": 0.26895585143658024, "grad_norm": 0.6648711562156677, "learning_rate": 0.0002797082037790002, "loss": 0.1563, "step": 1919 }, { "epoch": 0.2690960056061668, "grad_norm": 0.5036062002182007, "learning_rate": 0.00027969385314518055, "loss": 0.203, "step": 1920 }, { "epoch": 0.2692361597757533, "grad_norm": 0.7475723028182983, "learning_rate": 0.0002796795025113609, "loss": 0.1058, "step": 1921 }, { "epoch": 0.2693763139453399, "grad_norm": 0.5780730843544006, "learning_rate": 0.0002796651518775412, "loss": 0.0567, "step": 1922 }, { "epoch": 0.2695164681149264, "grad_norm": 0.5885787606239319, "learning_rate": 0.0002796508012437216, "loss": 0.1979, "step": 1923 }, { "epoch": 0.269656622284513, "grad_norm": 0.590232789516449, "learning_rate": 0.0002796364506099019, "loss": 0.1878, "step": 1924 }, { "epoch": 0.2697967764540995, "grad_norm": 0.42100223898887634, "learning_rate": 0.00027962209997608225, "loss": 0.1478, "step": 1925 }, { "epoch": 0.26993693062368607, "grad_norm": 0.9727389216423035, "learning_rate": 0.0002796077493422626, "loss": 0.1672, "step": 1926 }, { "epoch": 0.2700770847932726, "grad_norm": 0.20500816404819489, "learning_rate": 0.00027959339870844296, "loss": 0.0528, "step": 1927 }, { "epoch": 0.27021723896285915, "grad_norm": 0.3219503164291382, "learning_rate": 0.0002795790480746233, "loss": 0.0874, "step": 1928 }, { "epoch": 0.27035739313244567, "grad_norm": 0.7591564059257507, "learning_rate": 0.0002795646974408036, "loss": 0.2104, "step": 1929 }, { "epoch": 0.27049754730203224, "grad_norm": 0.48872920870780945, "learning_rate": 0.00027955034680698395, "loss": 0.0533, "step": 1930 }, { "epoch": 0.2706377014716188, "grad_norm": 0.6220650672912598, "learning_rate": 0.0002795359961731643, "loss": 0.1051, "step": 1931 }, { "epoch": 0.2707778556412053, "grad_norm": 0.8095846772193909, "learning_rate": 0.0002795216455393446, "loss": 0.421, "step": 1932 }, { "epoch": 0.2709180098107919, "grad_norm": 0.3527679741382599, "learning_rate": 0.000279507294905525, "loss": 0.2359, "step": 1933 }, { "epoch": 0.2710581639803784, "grad_norm": 0.40157654881477356, "learning_rate": 0.0002794929442717053, "loss": 0.075, "step": 1934 }, { "epoch": 0.271198318149965, "grad_norm": 0.25354140996932983, "learning_rate": 0.00027947859363788564, "loss": 0.1114, "step": 1935 }, { "epoch": 0.2713384723195515, "grad_norm": 0.4825376868247986, "learning_rate": 0.000279464243004066, "loss": 0.1007, "step": 1936 }, { "epoch": 0.27147862648913806, "grad_norm": 0.4860278069972992, "learning_rate": 0.00027944989237024636, "loss": 0.1568, "step": 1937 }, { "epoch": 0.2716187806587246, "grad_norm": 1.5211390256881714, "learning_rate": 0.0002794355417364267, "loss": 0.109, "step": 1938 }, { "epoch": 0.27175893482831115, "grad_norm": 0.7738736271858215, "learning_rate": 0.000279421191102607, "loss": 0.1209, "step": 1939 }, { "epoch": 0.27189908899789766, "grad_norm": 0.7095392346382141, "learning_rate": 0.00027940684046878734, "loss": 0.1307, "step": 1940 }, { "epoch": 0.27203924316748423, "grad_norm": 0.5415706634521484, "learning_rate": 0.00027939248983496767, "loss": 0.1052, "step": 1941 }, { "epoch": 0.2721793973370708, "grad_norm": 1.4570891857147217, "learning_rate": 0.000279378139201148, "loss": 0.1442, "step": 1942 }, { "epoch": 0.2723195515066573, "grad_norm": 0.6593202948570251, "learning_rate": 0.0002793637885673284, "loss": 0.1015, "step": 1943 }, { "epoch": 0.2724597056762439, "grad_norm": 0.48740774393081665, "learning_rate": 0.0002793494379335087, "loss": 0.1103, "step": 1944 }, { "epoch": 0.2725998598458304, "grad_norm": 0.61367267370224, "learning_rate": 0.00027933508729968904, "loss": 0.1874, "step": 1945 }, { "epoch": 0.272740014015417, "grad_norm": 0.623350977897644, "learning_rate": 0.00027932073666586937, "loss": 0.1305, "step": 1946 }, { "epoch": 0.2728801681850035, "grad_norm": 0.7416171431541443, "learning_rate": 0.00027930638603204975, "loss": 0.1333, "step": 1947 }, { "epoch": 0.27302032235459006, "grad_norm": 1.5345262289047241, "learning_rate": 0.0002792920353982301, "loss": 0.3047, "step": 1948 }, { "epoch": 0.2731604765241766, "grad_norm": 0.9212145209312439, "learning_rate": 0.0002792776847644104, "loss": 0.2816, "step": 1949 }, { "epoch": 0.27330063069376315, "grad_norm": 1.7418906688690186, "learning_rate": 0.00027926333413059074, "loss": 0.2853, "step": 1950 }, { "epoch": 0.27344078486334966, "grad_norm": 0.3374670743942261, "learning_rate": 0.00027924898349677106, "loss": 0.1446, "step": 1951 }, { "epoch": 0.27358093903293623, "grad_norm": 0.2873058617115021, "learning_rate": 0.00027923463286295145, "loss": 0.0837, "step": 1952 }, { "epoch": 0.2737210932025228, "grad_norm": 0.4602125287055969, "learning_rate": 0.0002792202822291318, "loss": 0.1722, "step": 1953 }, { "epoch": 0.2738612473721093, "grad_norm": 0.45416730642318726, "learning_rate": 0.0002792059315953121, "loss": 0.1651, "step": 1954 }, { "epoch": 0.2740014015416959, "grad_norm": 0.3982296884059906, "learning_rate": 0.00027919158096149243, "loss": 0.1645, "step": 1955 }, { "epoch": 0.2741415557112824, "grad_norm": 0.48279327154159546, "learning_rate": 0.00027917723032767276, "loss": 0.1068, "step": 1956 }, { "epoch": 0.274281709880869, "grad_norm": 0.7379068732261658, "learning_rate": 0.0002791628796938531, "loss": 0.2246, "step": 1957 }, { "epoch": 0.2744218640504555, "grad_norm": 0.45647430419921875, "learning_rate": 0.00027914852906003347, "loss": 0.1772, "step": 1958 }, { "epoch": 0.27456201822004206, "grad_norm": 0.3247532546520233, "learning_rate": 0.0002791341784262138, "loss": 0.0913, "step": 1959 }, { "epoch": 0.2747021723896286, "grad_norm": 0.5176326632499695, "learning_rate": 0.00027911982779239413, "loss": 0.1146, "step": 1960 }, { "epoch": 0.27484232655921514, "grad_norm": 0.31669044494628906, "learning_rate": 0.00027910547715857446, "loss": 0.088, "step": 1961 }, { "epoch": 0.27498248072880166, "grad_norm": 0.40046054124832153, "learning_rate": 0.00027909112652475484, "loss": 0.069, "step": 1962 }, { "epoch": 0.27512263489838823, "grad_norm": 1.0397855043411255, "learning_rate": 0.00027907677589093517, "loss": 0.185, "step": 1963 }, { "epoch": 0.27526278906797474, "grad_norm": 0.7936228513717651, "learning_rate": 0.0002790624252571155, "loss": 0.1904, "step": 1964 }, { "epoch": 0.2754029432375613, "grad_norm": 0.31362998485565186, "learning_rate": 0.0002790480746232958, "loss": 0.0623, "step": 1965 }, { "epoch": 0.2755430974071479, "grad_norm": 0.5750884413719177, "learning_rate": 0.00027903372398947615, "loss": 0.17, "step": 1966 }, { "epoch": 0.2756832515767344, "grad_norm": 0.205519899725914, "learning_rate": 0.0002790193733556565, "loss": 0.0783, "step": 1967 }, { "epoch": 0.27582340574632097, "grad_norm": 0.9362422823905945, "learning_rate": 0.00027900502272183687, "loss": 0.1199, "step": 1968 }, { "epoch": 0.2759635599159075, "grad_norm": 0.5203693509101868, "learning_rate": 0.0002789906720880172, "loss": 0.1317, "step": 1969 }, { "epoch": 0.27610371408549406, "grad_norm": 0.37986159324645996, "learning_rate": 0.0002789763214541975, "loss": 0.1756, "step": 1970 }, { "epoch": 0.27624386825508057, "grad_norm": 0.5829340219497681, "learning_rate": 0.0002789619708203779, "loss": 0.1251, "step": 1971 }, { "epoch": 0.27638402242466714, "grad_norm": 0.2515009343624115, "learning_rate": 0.00027894762018655823, "loss": 0.0744, "step": 1972 }, { "epoch": 0.27652417659425366, "grad_norm": 0.6552347540855408, "learning_rate": 0.00027893326955273856, "loss": 0.1315, "step": 1973 }, { "epoch": 0.2766643307638402, "grad_norm": 0.288085401058197, "learning_rate": 0.0002789189189189189, "loss": 0.123, "step": 1974 }, { "epoch": 0.27680448493342674, "grad_norm": 0.45238152146339417, "learning_rate": 0.0002789045682850992, "loss": 0.1116, "step": 1975 }, { "epoch": 0.2769446391030133, "grad_norm": 0.47496235370635986, "learning_rate": 0.00027889021765127955, "loss": 0.1037, "step": 1976 }, { "epoch": 0.2770847932725999, "grad_norm": 0.8938223719596863, "learning_rate": 0.0002788758670174599, "loss": 0.1688, "step": 1977 }, { "epoch": 0.2772249474421864, "grad_norm": 0.6534609794616699, "learning_rate": 0.00027886151638364026, "loss": 0.181, "step": 1978 }, { "epoch": 0.27736510161177297, "grad_norm": 0.5342714190483093, "learning_rate": 0.0002788471657498206, "loss": 0.1323, "step": 1979 }, { "epoch": 0.2775052557813595, "grad_norm": 0.30895674228668213, "learning_rate": 0.0002788328151160009, "loss": 0.0859, "step": 1980 }, { "epoch": 0.27764540995094605, "grad_norm": 0.39004939794540405, "learning_rate": 0.0002788184644821813, "loss": 0.1137, "step": 1981 }, { "epoch": 0.27778556412053257, "grad_norm": 0.5131301283836365, "learning_rate": 0.00027880411384836163, "loss": 0.1123, "step": 1982 }, { "epoch": 0.27792571829011914, "grad_norm": 0.9090350270271301, "learning_rate": 0.00027878976321454196, "loss": 0.1331, "step": 1983 }, { "epoch": 0.27806587245970565, "grad_norm": 0.2680957615375519, "learning_rate": 0.0002787754125807223, "loss": 0.057, "step": 1984 }, { "epoch": 0.2782060266292922, "grad_norm": 0.3268495500087738, "learning_rate": 0.0002787610619469026, "loss": 0.0908, "step": 1985 }, { "epoch": 0.27834618079887874, "grad_norm": 0.9438428282737732, "learning_rate": 0.00027874671131308294, "loss": 0.1241, "step": 1986 }, { "epoch": 0.2784863349684653, "grad_norm": 0.8221608996391296, "learning_rate": 0.0002787323606792633, "loss": 0.1307, "step": 1987 }, { "epoch": 0.2786264891380519, "grad_norm": 0.25961288809776306, "learning_rate": 0.00027871801004544365, "loss": 0.0677, "step": 1988 }, { "epoch": 0.2787666433076384, "grad_norm": 0.3286683261394501, "learning_rate": 0.000278703659411624, "loss": 0.1018, "step": 1989 }, { "epoch": 0.27890679747722497, "grad_norm": 0.3367217779159546, "learning_rate": 0.00027868930877780437, "loss": 0.0607, "step": 1990 }, { "epoch": 0.2790469516468115, "grad_norm": 0.36401456594467163, "learning_rate": 0.0002786749581439847, "loss": 0.099, "step": 1991 }, { "epoch": 0.27918710581639805, "grad_norm": 1.0048301219940186, "learning_rate": 0.000278660607510165, "loss": 0.1441, "step": 1992 }, { "epoch": 0.27932725998598457, "grad_norm": 0.8426941633224487, "learning_rate": 0.00027864625687634535, "loss": 0.1515, "step": 1993 }, { "epoch": 0.27946741415557114, "grad_norm": 0.6565719246864319, "learning_rate": 0.0002786319062425257, "loss": 0.2035, "step": 1994 }, { "epoch": 0.27960756832515765, "grad_norm": 1.0536584854125977, "learning_rate": 0.000278617555608706, "loss": 0.3281, "step": 1995 }, { "epoch": 0.2797477224947442, "grad_norm": 0.9979150891304016, "learning_rate": 0.00027860320497488634, "loss": 0.1591, "step": 1996 }, { "epoch": 0.27988787666433074, "grad_norm": 1.3841400146484375, "learning_rate": 0.0002785888543410667, "loss": 0.262, "step": 1997 }, { "epoch": 0.2800280308339173, "grad_norm": 0.6444082856178284, "learning_rate": 0.00027857450370724705, "loss": 0.1298, "step": 1998 }, { "epoch": 0.2801681850035039, "grad_norm": 1.870656967163086, "learning_rate": 0.0002785601530734274, "loss": 0.2845, "step": 1999 }, { "epoch": 0.2803083391730904, "grad_norm": 2.030590057373047, "learning_rate": 0.00027854580243960776, "loss": 0.2723, "step": 2000 }, { "epoch": 0.28044849334267696, "grad_norm": 0.5691972970962524, "learning_rate": 0.0002785314518057881, "loss": 0.2209, "step": 2001 }, { "epoch": 0.2805886475122635, "grad_norm": 0.8753064870834351, "learning_rate": 0.0002785171011719684, "loss": 0.0718, "step": 2002 }, { "epoch": 0.28072880168185005, "grad_norm": 0.178070068359375, "learning_rate": 0.00027850275053814875, "loss": 0.0348, "step": 2003 }, { "epoch": 0.28086895585143656, "grad_norm": 0.2784869074821472, "learning_rate": 0.0002784883999043291, "loss": 0.0897, "step": 2004 }, { "epoch": 0.28100911002102313, "grad_norm": 0.6817112565040588, "learning_rate": 0.0002784740492705094, "loss": 0.0816, "step": 2005 }, { "epoch": 0.28114926419060965, "grad_norm": 0.4369611144065857, "learning_rate": 0.0002784596986366898, "loss": 0.114, "step": 2006 }, { "epoch": 0.2812894183601962, "grad_norm": 0.473345011472702, "learning_rate": 0.0002784453480028701, "loss": 0.1443, "step": 2007 }, { "epoch": 0.28142957252978273, "grad_norm": 0.41427335143089294, "learning_rate": 0.00027843099736905044, "loss": 0.1758, "step": 2008 }, { "epoch": 0.2815697266993693, "grad_norm": 0.40301406383514404, "learning_rate": 0.00027841664673523077, "loss": 0.0994, "step": 2009 }, { "epoch": 0.2817098808689559, "grad_norm": 0.44364669919013977, "learning_rate": 0.00027840229610141115, "loss": 0.1118, "step": 2010 }, { "epoch": 0.2818500350385424, "grad_norm": 0.48147669434547424, "learning_rate": 0.0002783879454675915, "loss": 0.1248, "step": 2011 }, { "epoch": 0.28199018920812896, "grad_norm": 0.293087363243103, "learning_rate": 0.0002783735948337718, "loss": 0.1014, "step": 2012 }, { "epoch": 0.2821303433777155, "grad_norm": 0.6260691285133362, "learning_rate": 0.00027835924419995214, "loss": 0.1585, "step": 2013 }, { "epoch": 0.28227049754730205, "grad_norm": 0.28251245617866516, "learning_rate": 0.00027834489356613247, "loss": 0.0681, "step": 2014 }, { "epoch": 0.28241065171688856, "grad_norm": 0.4360058605670929, "learning_rate": 0.0002783305429323128, "loss": 0.1168, "step": 2015 }, { "epoch": 0.28255080588647513, "grad_norm": 0.7703859806060791, "learning_rate": 0.0002783161922984932, "loss": 0.0812, "step": 2016 }, { "epoch": 0.28269096005606165, "grad_norm": 0.7642639875411987, "learning_rate": 0.0002783018416646735, "loss": 0.0815, "step": 2017 }, { "epoch": 0.2828311142256482, "grad_norm": 1.523699164390564, "learning_rate": 0.00027828749103085384, "loss": 0.1542, "step": 2018 }, { "epoch": 0.28297126839523473, "grad_norm": 0.9008207321166992, "learning_rate": 0.00027827314039703416, "loss": 0.1171, "step": 2019 }, { "epoch": 0.2831114225648213, "grad_norm": 0.40990594029426575, "learning_rate": 0.0002782587897632145, "loss": 0.1253, "step": 2020 }, { "epoch": 0.2832515767344079, "grad_norm": 0.33235472440719604, "learning_rate": 0.0002782444391293948, "loss": 0.1417, "step": 2021 }, { "epoch": 0.2833917309039944, "grad_norm": 0.7677909135818481, "learning_rate": 0.0002782300884955752, "loss": 0.1365, "step": 2022 }, { "epoch": 0.28353188507358096, "grad_norm": 0.34211885929107666, "learning_rate": 0.00027821573786175553, "loss": 0.0595, "step": 2023 }, { "epoch": 0.2836720392431675, "grad_norm": 0.6408612132072449, "learning_rate": 0.00027820138722793586, "loss": 0.1728, "step": 2024 }, { "epoch": 0.28381219341275404, "grad_norm": 0.5557684302330017, "learning_rate": 0.00027818703659411624, "loss": 0.1429, "step": 2025 }, { "epoch": 0.28395234758234056, "grad_norm": 0.45655104517936707, "learning_rate": 0.0002781726859602966, "loss": 0.0908, "step": 2026 }, { "epoch": 0.28409250175192713, "grad_norm": 0.8670726418495178, "learning_rate": 0.0002781583353264769, "loss": 0.0976, "step": 2027 }, { "epoch": 0.28423265592151364, "grad_norm": 0.7119932770729065, "learning_rate": 0.00027814398469265723, "loss": 0.1147, "step": 2028 }, { "epoch": 0.2843728100911002, "grad_norm": 0.45708343386650085, "learning_rate": 0.00027812963405883756, "loss": 0.1077, "step": 2029 }, { "epoch": 0.28451296426068673, "grad_norm": 0.26318418979644775, "learning_rate": 0.0002781152834250179, "loss": 0.0616, "step": 2030 }, { "epoch": 0.2846531184302733, "grad_norm": 0.7760747075080872, "learning_rate": 0.0002781009327911982, "loss": 0.086, "step": 2031 }, { "epoch": 0.28479327259985987, "grad_norm": 1.0396136045455933, "learning_rate": 0.0002780865821573786, "loss": 0.1421, "step": 2032 }, { "epoch": 0.2849334267694464, "grad_norm": 0.6434550285339355, "learning_rate": 0.00027807223152355893, "loss": 0.1136, "step": 2033 }, { "epoch": 0.28507358093903296, "grad_norm": 0.3558732867240906, "learning_rate": 0.00027805788088973926, "loss": 0.0528, "step": 2034 }, { "epoch": 0.28521373510861947, "grad_norm": 0.6915003657341003, "learning_rate": 0.00027804353025591964, "loss": 0.2294, "step": 2035 }, { "epoch": 0.28535388927820604, "grad_norm": 0.6733664274215698, "learning_rate": 0.00027802917962209997, "loss": 0.1216, "step": 2036 }, { "epoch": 0.28549404344779256, "grad_norm": 0.9360905885696411, "learning_rate": 0.0002780148289882803, "loss": 0.1214, "step": 2037 }, { "epoch": 0.2856341976173791, "grad_norm": 1.0321345329284668, "learning_rate": 0.0002780004783544606, "loss": 0.2451, "step": 2038 }, { "epoch": 0.28577435178696564, "grad_norm": 0.775350034236908, "learning_rate": 0.00027798612772064095, "loss": 0.0924, "step": 2039 }, { "epoch": 0.2859145059565522, "grad_norm": 2.416033983230591, "learning_rate": 0.0002779717770868213, "loss": 0.3264, "step": 2040 }, { "epoch": 0.2860546601261387, "grad_norm": 0.5261764526367188, "learning_rate": 0.00027795742645300166, "loss": 0.0972, "step": 2041 }, { "epoch": 0.2861948142957253, "grad_norm": 0.5157500505447388, "learning_rate": 0.000277943075819182, "loss": 0.1498, "step": 2042 }, { "epoch": 0.28633496846531187, "grad_norm": 0.8179082274436951, "learning_rate": 0.0002779287251853623, "loss": 0.176, "step": 2043 }, { "epoch": 0.2864751226348984, "grad_norm": 0.9213723540306091, "learning_rate": 0.0002779143745515427, "loss": 0.179, "step": 2044 }, { "epoch": 0.28661527680448495, "grad_norm": 0.7316744327545166, "learning_rate": 0.00027790002391772303, "loss": 0.2004, "step": 2045 }, { "epoch": 0.28675543097407147, "grad_norm": 0.3566551208496094, "learning_rate": 0.00027788567328390336, "loss": 0.0421, "step": 2046 }, { "epoch": 0.28689558514365804, "grad_norm": 1.1435343027114868, "learning_rate": 0.0002778713226500837, "loss": 0.259, "step": 2047 }, { "epoch": 0.28703573931324455, "grad_norm": 1.2187875509262085, "learning_rate": 0.000277856972016264, "loss": 0.1821, "step": 2048 }, { "epoch": 0.2871758934828311, "grad_norm": 2.9546449184417725, "learning_rate": 0.00027784262138244435, "loss": 0.2565, "step": 2049 }, { "epoch": 0.28731604765241764, "grad_norm": 3.7390589714050293, "learning_rate": 0.0002778282707486247, "loss": 0.4227, "step": 2050 }, { "epoch": 0.2874562018220042, "grad_norm": 0.783820629119873, "learning_rate": 0.00027781392011480506, "loss": 0.2159, "step": 2051 }, { "epoch": 0.2875963559915907, "grad_norm": 0.3262910544872284, "learning_rate": 0.0002777995694809854, "loss": 0.07, "step": 2052 }, { "epoch": 0.2877365101611773, "grad_norm": 0.5076556205749512, "learning_rate": 0.0002777852188471657, "loss": 0.0809, "step": 2053 }, { "epoch": 0.28787666433076387, "grad_norm": 0.4554128050804138, "learning_rate": 0.0002777708682133461, "loss": 0.1281, "step": 2054 }, { "epoch": 0.2880168185003504, "grad_norm": 0.6828322410583496, "learning_rate": 0.0002777565175795264, "loss": 0.1742, "step": 2055 }, { "epoch": 0.28815697266993695, "grad_norm": 0.45777180790901184, "learning_rate": 0.00027774216694570676, "loss": 0.1594, "step": 2056 }, { "epoch": 0.28829712683952347, "grad_norm": 0.8016679883003235, "learning_rate": 0.0002777278163118871, "loss": 0.0888, "step": 2057 }, { "epoch": 0.28843728100911004, "grad_norm": 0.48551881313323975, "learning_rate": 0.0002777134656780674, "loss": 0.1238, "step": 2058 }, { "epoch": 0.28857743517869655, "grad_norm": 0.399253249168396, "learning_rate": 0.00027769911504424774, "loss": 0.1403, "step": 2059 }, { "epoch": 0.2887175893482831, "grad_norm": 1.1162843704223633, "learning_rate": 0.0002776847644104281, "loss": 0.096, "step": 2060 }, { "epoch": 0.28885774351786964, "grad_norm": 0.4744342267513275, "learning_rate": 0.00027767041377660845, "loss": 0.1264, "step": 2061 }, { "epoch": 0.2889978976874562, "grad_norm": 0.4353397786617279, "learning_rate": 0.0002776560631427888, "loss": 0.0911, "step": 2062 }, { "epoch": 0.2891380518570427, "grad_norm": 0.5171084403991699, "learning_rate": 0.0002776417125089691, "loss": 0.0827, "step": 2063 }, { "epoch": 0.2892782060266293, "grad_norm": 0.5962603092193604, "learning_rate": 0.0002776273618751495, "loss": 0.0975, "step": 2064 }, { "epoch": 0.28941836019621586, "grad_norm": 0.884111762046814, "learning_rate": 0.0002776130112413298, "loss": 0.1641, "step": 2065 }, { "epoch": 0.2895585143658024, "grad_norm": 0.4736906886100769, "learning_rate": 0.00027759866060751015, "loss": 0.1806, "step": 2066 }, { "epoch": 0.28969866853538895, "grad_norm": 0.5883544683456421, "learning_rate": 0.0002775843099736905, "loss": 0.1239, "step": 2067 }, { "epoch": 0.28983882270497546, "grad_norm": 0.6384047269821167, "learning_rate": 0.0002775699593398708, "loss": 0.1671, "step": 2068 }, { "epoch": 0.28997897687456203, "grad_norm": 0.7631522417068481, "learning_rate": 0.00027755560870605113, "loss": 0.0713, "step": 2069 }, { "epoch": 0.29011913104414855, "grad_norm": 0.6706103086471558, "learning_rate": 0.0002775412580722315, "loss": 0.1271, "step": 2070 }, { "epoch": 0.2902592852137351, "grad_norm": 0.3679930567741394, "learning_rate": 0.00027752690743841185, "loss": 0.1174, "step": 2071 }, { "epoch": 0.29039943938332163, "grad_norm": 0.416925311088562, "learning_rate": 0.0002775125568045922, "loss": 0.1083, "step": 2072 }, { "epoch": 0.2905395935529082, "grad_norm": 0.6641778349876404, "learning_rate": 0.0002774982061707725, "loss": 0.1367, "step": 2073 }, { "epoch": 0.2906797477224947, "grad_norm": 0.5840435028076172, "learning_rate": 0.0002774838555369529, "loss": 0.0973, "step": 2074 }, { "epoch": 0.2908199018920813, "grad_norm": 0.8699474930763245, "learning_rate": 0.0002774695049031332, "loss": 0.1856, "step": 2075 }, { "epoch": 0.29096005606166786, "grad_norm": 0.5407617688179016, "learning_rate": 0.00027745515426931354, "loss": 0.0798, "step": 2076 }, { "epoch": 0.2911002102312544, "grad_norm": 0.4400215148925781, "learning_rate": 0.00027744080363549387, "loss": 0.1526, "step": 2077 }, { "epoch": 0.29124036440084095, "grad_norm": 0.6826701760292053, "learning_rate": 0.0002774264530016742, "loss": 0.1006, "step": 2078 }, { "epoch": 0.29138051857042746, "grad_norm": 0.33418065309524536, "learning_rate": 0.0002774121023678546, "loss": 0.0771, "step": 2079 }, { "epoch": 0.29152067274001403, "grad_norm": 0.46389448642730713, "learning_rate": 0.0002773977517340349, "loss": 0.1087, "step": 2080 }, { "epoch": 0.29166082690960055, "grad_norm": 0.80023592710495, "learning_rate": 0.00027738340110021524, "loss": 0.115, "step": 2081 }, { "epoch": 0.2918009810791871, "grad_norm": 0.9113709926605225, "learning_rate": 0.00027736905046639557, "loss": 0.106, "step": 2082 }, { "epoch": 0.29194113524877363, "grad_norm": 0.8177204728126526, "learning_rate": 0.0002773546998325759, "loss": 0.1477, "step": 2083 }, { "epoch": 0.2920812894183602, "grad_norm": 0.9234193563461304, "learning_rate": 0.0002773403491987562, "loss": 0.2335, "step": 2084 }, { "epoch": 0.2922214435879467, "grad_norm": 0.30937492847442627, "learning_rate": 0.00027732599856493655, "loss": 0.1049, "step": 2085 }, { "epoch": 0.2923615977575333, "grad_norm": 0.6365763545036316, "learning_rate": 0.00027731164793111694, "loss": 0.094, "step": 2086 }, { "epoch": 0.29250175192711986, "grad_norm": 0.7770087718963623, "learning_rate": 0.00027729729729729727, "loss": 0.1203, "step": 2087 }, { "epoch": 0.2926419060967064, "grad_norm": 1.9098001718521118, "learning_rate": 0.0002772829466634776, "loss": 0.2285, "step": 2088 }, { "epoch": 0.29278206026629294, "grad_norm": 0.9815558791160583, "learning_rate": 0.000277268596029658, "loss": 0.1604, "step": 2089 }, { "epoch": 0.29292221443587946, "grad_norm": 0.6442099809646606, "learning_rate": 0.0002772542453958383, "loss": 0.1251, "step": 2090 }, { "epoch": 0.29306236860546603, "grad_norm": 0.6111890077590942, "learning_rate": 0.00027723989476201863, "loss": 0.0643, "step": 2091 }, { "epoch": 0.29320252277505254, "grad_norm": 1.4717985391616821, "learning_rate": 0.00027722554412819896, "loss": 0.1283, "step": 2092 }, { "epoch": 0.2933426769446391, "grad_norm": 0.5396863222122192, "learning_rate": 0.0002772111934943793, "loss": 0.1471, "step": 2093 }, { "epoch": 0.29348283111422563, "grad_norm": 2.047018051147461, "learning_rate": 0.0002771968428605596, "loss": 0.2377, "step": 2094 }, { "epoch": 0.2936229852838122, "grad_norm": 0.45205017924308777, "learning_rate": 0.00027718249222674, "loss": 0.1273, "step": 2095 }, { "epoch": 0.2937631394533987, "grad_norm": 0.9719533920288086, "learning_rate": 0.00027716814159292033, "loss": 0.2856, "step": 2096 }, { "epoch": 0.2939032936229853, "grad_norm": 0.8585732579231262, "learning_rate": 0.00027715379095910066, "loss": 0.1802, "step": 2097 }, { "epoch": 0.29404344779257185, "grad_norm": 1.4114614725112915, "learning_rate": 0.000277139440325281, "loss": 0.3464, "step": 2098 }, { "epoch": 0.29418360196215837, "grad_norm": 1.4573348760604858, "learning_rate": 0.00027712508969146137, "loss": 0.362, "step": 2099 }, { "epoch": 0.29432375613174494, "grad_norm": 1.972069263458252, "learning_rate": 0.0002771107390576417, "loss": 0.1895, "step": 2100 }, { "epoch": 0.29446391030133146, "grad_norm": 0.515295147895813, "learning_rate": 0.00027709638842382203, "loss": 0.1237, "step": 2101 }, { "epoch": 0.294604064470918, "grad_norm": 0.3586462736129761, "learning_rate": 0.00027708203779000236, "loss": 0.1471, "step": 2102 }, { "epoch": 0.29474421864050454, "grad_norm": 0.5159236192703247, "learning_rate": 0.0002770676871561827, "loss": 0.082, "step": 2103 }, { "epoch": 0.2948843728100911, "grad_norm": 0.5777571797370911, "learning_rate": 0.000277053336522363, "loss": 0.1718, "step": 2104 }, { "epoch": 0.2950245269796776, "grad_norm": 0.44277429580688477, "learning_rate": 0.0002770389858885434, "loss": 0.1499, "step": 2105 }, { "epoch": 0.2951646811492642, "grad_norm": 0.757902979850769, "learning_rate": 0.0002770246352547237, "loss": 0.1492, "step": 2106 }, { "epoch": 0.2953048353188507, "grad_norm": 0.5982668399810791, "learning_rate": 0.00027701028462090405, "loss": 0.0845, "step": 2107 }, { "epoch": 0.2954449894884373, "grad_norm": 0.5739657878875732, "learning_rate": 0.00027699593398708444, "loss": 0.1285, "step": 2108 }, { "epoch": 0.29558514365802385, "grad_norm": 0.6959678530693054, "learning_rate": 0.00027698158335326477, "loss": 0.1415, "step": 2109 }, { "epoch": 0.29572529782761037, "grad_norm": 0.630250871181488, "learning_rate": 0.0002769672327194451, "loss": 0.1056, "step": 2110 }, { "epoch": 0.29586545199719694, "grad_norm": 0.36131590604782104, "learning_rate": 0.0002769528820856254, "loss": 0.092, "step": 2111 }, { "epoch": 0.29600560616678345, "grad_norm": 0.7285513877868652, "learning_rate": 0.00027693853145180575, "loss": 0.1308, "step": 2112 }, { "epoch": 0.29614576033637, "grad_norm": 0.640658974647522, "learning_rate": 0.0002769241808179861, "loss": 0.2182, "step": 2113 }, { "epoch": 0.29628591450595654, "grad_norm": 0.43515151739120483, "learning_rate": 0.00027690983018416646, "loss": 0.1212, "step": 2114 }, { "epoch": 0.2964260686755431, "grad_norm": 0.4214324951171875, "learning_rate": 0.0002768954795503468, "loss": 0.0648, "step": 2115 }, { "epoch": 0.2965662228451296, "grad_norm": 0.31286269426345825, "learning_rate": 0.0002768811289165271, "loss": 0.094, "step": 2116 }, { "epoch": 0.2967063770147162, "grad_norm": 0.657855749130249, "learning_rate": 0.00027686677828270745, "loss": 0.1252, "step": 2117 }, { "epoch": 0.2968465311843027, "grad_norm": 0.5995632410049438, "learning_rate": 0.00027685242764888783, "loss": 0.1427, "step": 2118 }, { "epoch": 0.2969866853538893, "grad_norm": 0.6053673028945923, "learning_rate": 0.00027683807701506816, "loss": 0.1685, "step": 2119 }, { "epoch": 0.29712683952347585, "grad_norm": 0.2206595540046692, "learning_rate": 0.0002768237263812485, "loss": 0.0544, "step": 2120 }, { "epoch": 0.29726699369306236, "grad_norm": 0.4044966995716095, "learning_rate": 0.0002768093757474288, "loss": 0.1102, "step": 2121 }, { "epoch": 0.29740714786264894, "grad_norm": 0.6795877814292908, "learning_rate": 0.00027679502511360914, "loss": 0.1473, "step": 2122 }, { "epoch": 0.29754730203223545, "grad_norm": 0.3030228614807129, "learning_rate": 0.0002767806744797895, "loss": 0.0765, "step": 2123 }, { "epoch": 0.297687456201822, "grad_norm": 0.41661736369132996, "learning_rate": 0.00027676632384596986, "loss": 0.1451, "step": 2124 }, { "epoch": 0.29782761037140854, "grad_norm": 0.26537394523620605, "learning_rate": 0.0002767519732121502, "loss": 0.0769, "step": 2125 }, { "epoch": 0.2979677645409951, "grad_norm": 0.6869179010391235, "learning_rate": 0.0002767376225783305, "loss": 0.1007, "step": 2126 }, { "epoch": 0.2981079187105816, "grad_norm": 0.3951539993286133, "learning_rate": 0.0002767232719445109, "loss": 0.1099, "step": 2127 }, { "epoch": 0.2982480728801682, "grad_norm": 0.812346339225769, "learning_rate": 0.0002767089213106912, "loss": 0.1996, "step": 2128 }, { "epoch": 0.2983882270497547, "grad_norm": 0.5661276578903198, "learning_rate": 0.00027669457067687155, "loss": 0.079, "step": 2129 }, { "epoch": 0.2985283812193413, "grad_norm": 0.6014966368675232, "learning_rate": 0.0002766802200430519, "loss": 0.119, "step": 2130 }, { "epoch": 0.29866853538892785, "grad_norm": 0.6107602119445801, "learning_rate": 0.0002766658694092322, "loss": 0.1715, "step": 2131 }, { "epoch": 0.29880868955851436, "grad_norm": 0.4255887269973755, "learning_rate": 0.00027665151877541254, "loss": 0.1627, "step": 2132 }, { "epoch": 0.29894884372810093, "grad_norm": 0.9967101216316223, "learning_rate": 0.00027663716814159287, "loss": 0.1354, "step": 2133 }, { "epoch": 0.29908899789768745, "grad_norm": 0.6783173680305481, "learning_rate": 0.00027662281750777325, "loss": 0.1371, "step": 2134 }, { "epoch": 0.299229152067274, "grad_norm": 0.5224384665489197, "learning_rate": 0.0002766084668739536, "loss": 0.1273, "step": 2135 }, { "epoch": 0.29936930623686053, "grad_norm": 1.199905514717102, "learning_rate": 0.0002765941162401339, "loss": 0.1542, "step": 2136 }, { "epoch": 0.2995094604064471, "grad_norm": 0.5567576289176941, "learning_rate": 0.0002765797656063143, "loss": 0.1095, "step": 2137 }, { "epoch": 0.2996496145760336, "grad_norm": 0.2655661404132843, "learning_rate": 0.0002765654149724946, "loss": 0.0691, "step": 2138 }, { "epoch": 0.2997897687456202, "grad_norm": 0.6687948107719421, "learning_rate": 0.00027655106433867495, "loss": 0.1734, "step": 2139 }, { "epoch": 0.2999299229152067, "grad_norm": 0.6922125220298767, "learning_rate": 0.0002765367137048553, "loss": 0.1694, "step": 2140 }, { "epoch": 0.3000700770847933, "grad_norm": 0.2712704837322235, "learning_rate": 0.0002765223630710356, "loss": 0.0718, "step": 2141 }, { "epoch": 0.30021023125437984, "grad_norm": 0.9073896408081055, "learning_rate": 0.00027650801243721593, "loss": 0.1681, "step": 2142 }, { "epoch": 0.30035038542396636, "grad_norm": 0.5596049427986145, "learning_rate": 0.0002764936618033963, "loss": 0.157, "step": 2143 }, { "epoch": 0.30049053959355293, "grad_norm": 0.5209670066833496, "learning_rate": 0.00027647931116957664, "loss": 0.1109, "step": 2144 }, { "epoch": 0.30063069376313944, "grad_norm": 0.7527725696563721, "learning_rate": 0.00027646496053575697, "loss": 0.2062, "step": 2145 }, { "epoch": 0.300770847932726, "grad_norm": 0.6075131297111511, "learning_rate": 0.0002764506099019373, "loss": 0.222, "step": 2146 }, { "epoch": 0.30091100210231253, "grad_norm": 1.0059218406677246, "learning_rate": 0.00027643625926811763, "loss": 0.1261, "step": 2147 }, { "epoch": 0.3010511562718991, "grad_norm": 2.278404474258423, "learning_rate": 0.00027642190863429796, "loss": 0.3453, "step": 2148 }, { "epoch": 0.3011913104414856, "grad_norm": 2.37385892868042, "learning_rate": 0.00027640755800047834, "loss": 0.0978, "step": 2149 }, { "epoch": 0.3013314646110722, "grad_norm": 2.6766862869262695, "learning_rate": 0.00027639320736665867, "loss": 0.1361, "step": 2150 }, { "epoch": 0.3014716187806587, "grad_norm": 0.46212106943130493, "learning_rate": 0.000276378856732839, "loss": 0.1649, "step": 2151 }, { "epoch": 0.30161177295024527, "grad_norm": 0.6219832301139832, "learning_rate": 0.0002763645060990193, "loss": 0.1395, "step": 2152 }, { "epoch": 0.30175192711983184, "grad_norm": 0.4304855465888977, "learning_rate": 0.0002763501554651997, "loss": 0.1202, "step": 2153 }, { "epoch": 0.30189208128941836, "grad_norm": 0.7878482341766357, "learning_rate": 0.00027633580483138004, "loss": 0.1496, "step": 2154 }, { "epoch": 0.3020322354590049, "grad_norm": 0.48217785358428955, "learning_rate": 0.00027632145419756037, "loss": 0.1022, "step": 2155 }, { "epoch": 0.30217238962859144, "grad_norm": 0.4914371371269226, "learning_rate": 0.0002763071035637407, "loss": 0.0931, "step": 2156 }, { "epoch": 0.302312543798178, "grad_norm": 0.46999767422676086, "learning_rate": 0.000276292752929921, "loss": 0.1325, "step": 2157 }, { "epoch": 0.3024526979677645, "grad_norm": 0.328476220369339, "learning_rate": 0.00027627840229610135, "loss": 0.1263, "step": 2158 }, { "epoch": 0.3025928521373511, "grad_norm": 0.729601263999939, "learning_rate": 0.00027626405166228174, "loss": 0.1066, "step": 2159 }, { "epoch": 0.3027330063069376, "grad_norm": 0.3119988739490509, "learning_rate": 0.00027624970102846206, "loss": 0.0844, "step": 2160 }, { "epoch": 0.3028731604765242, "grad_norm": 0.7414813041687012, "learning_rate": 0.0002762353503946424, "loss": 0.2207, "step": 2161 }, { "epoch": 0.3030133146461107, "grad_norm": 0.469025194644928, "learning_rate": 0.0002762209997608228, "loss": 0.1302, "step": 2162 }, { "epoch": 0.30315346881569727, "grad_norm": 0.684956967830658, "learning_rate": 0.0002762066491270031, "loss": 0.2083, "step": 2163 }, { "epoch": 0.30329362298528384, "grad_norm": 0.4641209542751312, "learning_rate": 0.00027619229849318343, "loss": 0.1101, "step": 2164 }, { "epoch": 0.30343377715487035, "grad_norm": 0.29255983233451843, "learning_rate": 0.00027617794785936376, "loss": 0.108, "step": 2165 }, { "epoch": 0.3035739313244569, "grad_norm": 1.2404125928878784, "learning_rate": 0.0002761635972255441, "loss": 0.2365, "step": 2166 }, { "epoch": 0.30371408549404344, "grad_norm": 0.3747747540473938, "learning_rate": 0.0002761492465917244, "loss": 0.1346, "step": 2167 }, { "epoch": 0.30385423966363, "grad_norm": 0.41646280884742737, "learning_rate": 0.00027613489595790475, "loss": 0.1159, "step": 2168 }, { "epoch": 0.3039943938332165, "grad_norm": 0.5553839802742004, "learning_rate": 0.00027612054532408513, "loss": 0.0909, "step": 2169 }, { "epoch": 0.3041345480028031, "grad_norm": 0.20594193041324615, "learning_rate": 0.00027610619469026546, "loss": 0.0503, "step": 2170 }, { "epoch": 0.3042747021723896, "grad_norm": 0.503252387046814, "learning_rate": 0.0002760918440564458, "loss": 0.115, "step": 2171 }, { "epoch": 0.3044148563419762, "grad_norm": 0.645164966583252, "learning_rate": 0.00027607749342262617, "loss": 0.1806, "step": 2172 }, { "epoch": 0.3045550105115627, "grad_norm": 0.5107470750808716, "learning_rate": 0.0002760631427888065, "loss": 0.1072, "step": 2173 }, { "epoch": 0.30469516468114927, "grad_norm": 0.41755470633506775, "learning_rate": 0.0002760487921549868, "loss": 0.2055, "step": 2174 }, { "epoch": 0.30483531885073584, "grad_norm": 0.3775356411933899, "learning_rate": 0.00027603444152116715, "loss": 0.1298, "step": 2175 }, { "epoch": 0.30497547302032235, "grad_norm": 0.6275502443313599, "learning_rate": 0.0002760200908873475, "loss": 0.1854, "step": 2176 }, { "epoch": 0.3051156271899089, "grad_norm": 0.17002259194850922, "learning_rate": 0.0002760057402535278, "loss": 0.0361, "step": 2177 }, { "epoch": 0.30525578135949544, "grad_norm": 0.5578188300132751, "learning_rate": 0.0002759913896197082, "loss": 0.1732, "step": 2178 }, { "epoch": 0.305395935529082, "grad_norm": 0.4350545406341553, "learning_rate": 0.0002759770389858885, "loss": 0.1123, "step": 2179 }, { "epoch": 0.3055360896986685, "grad_norm": 0.6175668239593506, "learning_rate": 0.00027596268835206885, "loss": 0.1147, "step": 2180 }, { "epoch": 0.3056762438682551, "grad_norm": 0.4442678689956665, "learning_rate": 0.00027594833771824923, "loss": 0.1469, "step": 2181 }, { "epoch": 0.3058163980378416, "grad_norm": 0.4055914580821991, "learning_rate": 0.00027593398708442956, "loss": 0.101, "step": 2182 }, { "epoch": 0.3059565522074282, "grad_norm": 0.4393932521343231, "learning_rate": 0.0002759196364506099, "loss": 0.1427, "step": 2183 }, { "epoch": 0.3060967063770147, "grad_norm": 0.41718894243240356, "learning_rate": 0.0002759052858167902, "loss": 0.0872, "step": 2184 }, { "epoch": 0.30623686054660126, "grad_norm": 0.5062853693962097, "learning_rate": 0.00027589093518297055, "loss": 0.1061, "step": 2185 }, { "epoch": 0.30637701471618783, "grad_norm": 0.4873693585395813, "learning_rate": 0.0002758765845491509, "loss": 0.0687, "step": 2186 }, { "epoch": 0.30651716888577435, "grad_norm": 0.2480587512254715, "learning_rate": 0.0002758622339153312, "loss": 0.046, "step": 2187 }, { "epoch": 0.3066573230553609, "grad_norm": 0.6864371299743652, "learning_rate": 0.0002758478832815116, "loss": 0.0848, "step": 2188 }, { "epoch": 0.30679747722494743, "grad_norm": 0.4345424473285675, "learning_rate": 0.0002758335326476919, "loss": 0.061, "step": 2189 }, { "epoch": 0.306937631394534, "grad_norm": 1.7501213550567627, "learning_rate": 0.00027581918201387225, "loss": 0.2407, "step": 2190 }, { "epoch": 0.3070777855641205, "grad_norm": 0.7332161068916321, "learning_rate": 0.00027580483138005263, "loss": 0.3168, "step": 2191 }, { "epoch": 0.3072179397337071, "grad_norm": 0.611071765422821, "learning_rate": 0.00027579048074623296, "loss": 0.0883, "step": 2192 }, { "epoch": 0.3073580939032936, "grad_norm": 0.386831670999527, "learning_rate": 0.0002757761301124133, "loss": 0.0785, "step": 2193 }, { "epoch": 0.3074982480728802, "grad_norm": 1.0226197242736816, "learning_rate": 0.0002757617794785936, "loss": 0.294, "step": 2194 }, { "epoch": 0.3076384022424667, "grad_norm": 0.9176251888275146, "learning_rate": 0.00027574742884477394, "loss": 0.3836, "step": 2195 }, { "epoch": 0.30777855641205326, "grad_norm": 2.0792934894561768, "learning_rate": 0.00027573307821095427, "loss": 0.2776, "step": 2196 }, { "epoch": 0.3079187105816398, "grad_norm": 1.2955394983291626, "learning_rate": 0.00027571872757713465, "loss": 0.5569, "step": 2197 }, { "epoch": 0.30805886475122635, "grad_norm": 1.8353787660598755, "learning_rate": 0.000275704376943315, "loss": 0.3247, "step": 2198 }, { "epoch": 0.3081990189208129, "grad_norm": 0.8331082463264465, "learning_rate": 0.0002756900263094953, "loss": 0.2205, "step": 2199 }, { "epoch": 0.30833917309039943, "grad_norm": 1.3991813659667969, "learning_rate": 0.00027567567567567564, "loss": 0.1944, "step": 2200 }, { "epoch": 0.308479327259986, "grad_norm": 0.2933048605918884, "learning_rate": 0.000275661325041856, "loss": 0.1416, "step": 2201 }, { "epoch": 0.3086194814295725, "grad_norm": 0.3607500493526459, "learning_rate": 0.00027564697440803635, "loss": 0.1689, "step": 2202 }, { "epoch": 0.3087596355991591, "grad_norm": 0.5643554925918579, "learning_rate": 0.0002756326237742167, "loss": 0.2199, "step": 2203 }, { "epoch": 0.3088997897687456, "grad_norm": 0.36085689067840576, "learning_rate": 0.000275618273140397, "loss": 0.1228, "step": 2204 }, { "epoch": 0.3090399439383322, "grad_norm": 0.39612624049186707, "learning_rate": 0.00027560392250657734, "loss": 0.1684, "step": 2205 }, { "epoch": 0.3091800981079187, "grad_norm": 0.6036938428878784, "learning_rate": 0.00027558957187275767, "loss": 0.1474, "step": 2206 }, { "epoch": 0.30932025227750526, "grad_norm": 0.37125927209854126, "learning_rate": 0.00027557522123893805, "loss": 0.1597, "step": 2207 }, { "epoch": 0.3094604064470918, "grad_norm": 0.42839667201042175, "learning_rate": 0.0002755608706051184, "loss": 0.1155, "step": 2208 }, { "epoch": 0.30960056061667834, "grad_norm": 0.2159223109483719, "learning_rate": 0.0002755465199712987, "loss": 0.0748, "step": 2209 }, { "epoch": 0.3097407147862649, "grad_norm": 0.27447277307510376, "learning_rate": 0.00027553216933747903, "loss": 0.0879, "step": 2210 }, { "epoch": 0.30988086895585143, "grad_norm": 0.7030569314956665, "learning_rate": 0.00027551781870365936, "loss": 0.1659, "step": 2211 }, { "epoch": 0.310021023125438, "grad_norm": 0.5806667804718018, "learning_rate": 0.0002755034680698397, "loss": 0.1167, "step": 2212 }, { "epoch": 0.3101611772950245, "grad_norm": 0.44887638092041016, "learning_rate": 0.0002754891174360201, "loss": 0.1281, "step": 2213 }, { "epoch": 0.3103013314646111, "grad_norm": 0.8567746877670288, "learning_rate": 0.0002754747668022004, "loss": 0.2058, "step": 2214 }, { "epoch": 0.3104414856341976, "grad_norm": 0.7291098237037659, "learning_rate": 0.00027546041616838073, "loss": 0.1576, "step": 2215 }, { "epoch": 0.31058163980378417, "grad_norm": 0.3068133294582367, "learning_rate": 0.0002754460655345611, "loss": 0.0861, "step": 2216 }, { "epoch": 0.3107217939733707, "grad_norm": 0.7029969692230225, "learning_rate": 0.00027543171490074144, "loss": 0.0961, "step": 2217 }, { "epoch": 0.31086194814295726, "grad_norm": 0.6289925575256348, "learning_rate": 0.00027541736426692177, "loss": 0.0877, "step": 2218 }, { "epoch": 0.31100210231254377, "grad_norm": 0.8315983414649963, "learning_rate": 0.0002754030136331021, "loss": 0.1617, "step": 2219 }, { "epoch": 0.31114225648213034, "grad_norm": 0.3475262224674225, "learning_rate": 0.00027538866299928243, "loss": 0.0862, "step": 2220 }, { "epoch": 0.3112824106517169, "grad_norm": 0.4499328136444092, "learning_rate": 0.00027537431236546276, "loss": 0.1697, "step": 2221 }, { "epoch": 0.3114225648213034, "grad_norm": 0.3628060221672058, "learning_rate": 0.0002753599617316431, "loss": 0.1003, "step": 2222 }, { "epoch": 0.31156271899089, "grad_norm": 0.4234078526496887, "learning_rate": 0.00027534561109782347, "loss": 0.0853, "step": 2223 }, { "epoch": 0.3117028731604765, "grad_norm": 0.6662800312042236, "learning_rate": 0.0002753312604640038, "loss": 0.1542, "step": 2224 }, { "epoch": 0.3118430273300631, "grad_norm": 0.5647245049476624, "learning_rate": 0.0002753169098301841, "loss": 0.0793, "step": 2225 }, { "epoch": 0.3119831814996496, "grad_norm": 1.3708993196487427, "learning_rate": 0.0002753025591963645, "loss": 0.0948, "step": 2226 }, { "epoch": 0.31212333566923617, "grad_norm": 0.4312724471092224, "learning_rate": 0.00027528820856254484, "loss": 0.1168, "step": 2227 }, { "epoch": 0.3122634898388227, "grad_norm": 0.5120587944984436, "learning_rate": 0.00027527385792872516, "loss": 0.0557, "step": 2228 }, { "epoch": 0.31240364400840925, "grad_norm": 0.3675335645675659, "learning_rate": 0.0002752595072949055, "loss": 0.0964, "step": 2229 }, { "epoch": 0.31254379817799577, "grad_norm": 0.6422421336174011, "learning_rate": 0.0002752451566610858, "loss": 0.1545, "step": 2230 }, { "epoch": 0.31268395234758234, "grad_norm": 0.33705493807792664, "learning_rate": 0.00027523080602726615, "loss": 0.0765, "step": 2231 }, { "epoch": 0.3128241065171689, "grad_norm": 0.3716358542442322, "learning_rate": 0.00027521645539344653, "loss": 0.0688, "step": 2232 }, { "epoch": 0.3129642606867554, "grad_norm": 0.8271526098251343, "learning_rate": 0.00027520210475962686, "loss": 0.1556, "step": 2233 }, { "epoch": 0.313104414856342, "grad_norm": 0.5413151383399963, "learning_rate": 0.0002751877541258072, "loss": 0.0599, "step": 2234 }, { "epoch": 0.3132445690259285, "grad_norm": 0.6147760152816772, "learning_rate": 0.0002751734034919876, "loss": 0.1059, "step": 2235 }, { "epoch": 0.3133847231955151, "grad_norm": 0.5133717060089111, "learning_rate": 0.0002751590528581679, "loss": 0.137, "step": 2236 }, { "epoch": 0.3135248773651016, "grad_norm": 0.2922382056713104, "learning_rate": 0.00027514470222434823, "loss": 0.0415, "step": 2237 }, { "epoch": 0.31366503153468817, "grad_norm": 0.6263818144798279, "learning_rate": 0.00027513035159052856, "loss": 0.0994, "step": 2238 }, { "epoch": 0.3138051857042747, "grad_norm": 0.3334684371948242, "learning_rate": 0.0002751160009567089, "loss": 0.0806, "step": 2239 }, { "epoch": 0.31394533987386125, "grad_norm": 0.4657023549079895, "learning_rate": 0.0002751016503228892, "loss": 0.2023, "step": 2240 }, { "epoch": 0.31408549404344777, "grad_norm": 0.43214061856269836, "learning_rate": 0.00027508729968906954, "loss": 0.0662, "step": 2241 }, { "epoch": 0.31422564821303434, "grad_norm": 0.6005951166152954, "learning_rate": 0.00027507294905524993, "loss": 0.2053, "step": 2242 }, { "epoch": 0.3143658023826209, "grad_norm": 0.6431288719177246, "learning_rate": 0.00027505859842143026, "loss": 0.1224, "step": 2243 }, { "epoch": 0.3145059565522074, "grad_norm": 0.7339452505111694, "learning_rate": 0.0002750442477876106, "loss": 0.2113, "step": 2244 }, { "epoch": 0.314646110721794, "grad_norm": 1.1418890953063965, "learning_rate": 0.00027502989715379097, "loss": 0.2087, "step": 2245 }, { "epoch": 0.3147862648913805, "grad_norm": 0.8680152893066406, "learning_rate": 0.0002750155465199713, "loss": 0.2276, "step": 2246 }, { "epoch": 0.3149264190609671, "grad_norm": 1.125942587852478, "learning_rate": 0.0002750011958861516, "loss": 0.1291, "step": 2247 }, { "epoch": 0.3150665732305536, "grad_norm": 0.7187932133674622, "learning_rate": 0.00027498684525233195, "loss": 0.18, "step": 2248 }, { "epoch": 0.31520672740014016, "grad_norm": 4.580008029937744, "learning_rate": 0.0002749724946185123, "loss": 0.2097, "step": 2249 }, { "epoch": 0.3153468815697267, "grad_norm": 3.5999698638916016, "learning_rate": 0.0002749581439846926, "loss": 0.1976, "step": 2250 }, { "epoch": 0.31548703573931325, "grad_norm": 0.9778472781181335, "learning_rate": 0.000274943793350873, "loss": 0.1211, "step": 2251 }, { "epoch": 0.31562718990889976, "grad_norm": 0.5670143365859985, "learning_rate": 0.0002749294427170533, "loss": 0.1103, "step": 2252 }, { "epoch": 0.31576734407848633, "grad_norm": 0.3790888488292694, "learning_rate": 0.00027491509208323365, "loss": 0.1399, "step": 2253 }, { "epoch": 0.3159074982480729, "grad_norm": 0.3025812804698944, "learning_rate": 0.00027490074144941403, "loss": 0.083, "step": 2254 }, { "epoch": 0.3160476524176594, "grad_norm": 0.3784865438938141, "learning_rate": 0.00027488639081559436, "loss": 0.0829, "step": 2255 }, { "epoch": 0.316187806587246, "grad_norm": 0.35081565380096436, "learning_rate": 0.0002748720401817747, "loss": 0.0657, "step": 2256 }, { "epoch": 0.3163279607568325, "grad_norm": 0.4120871126651764, "learning_rate": 0.000274857689547955, "loss": 0.1125, "step": 2257 }, { "epoch": 0.3164681149264191, "grad_norm": 1.0590656995773315, "learning_rate": 0.00027484333891413535, "loss": 0.0828, "step": 2258 }, { "epoch": 0.3166082690960056, "grad_norm": 0.5151995420455933, "learning_rate": 0.0002748289882803157, "loss": 0.1685, "step": 2259 }, { "epoch": 0.31674842326559216, "grad_norm": 0.416642963886261, "learning_rate": 0.000274814637646496, "loss": 0.1337, "step": 2260 }, { "epoch": 0.3168885774351787, "grad_norm": 0.2912634015083313, "learning_rate": 0.0002748002870126764, "loss": 0.052, "step": 2261 }, { "epoch": 0.31702873160476525, "grad_norm": 0.43086275458335876, "learning_rate": 0.0002747859363788567, "loss": 0.1773, "step": 2262 }, { "epoch": 0.31716888577435176, "grad_norm": 0.6742277145385742, "learning_rate": 0.00027477158574503704, "loss": 0.1893, "step": 2263 }, { "epoch": 0.31730903994393833, "grad_norm": 0.21309377253055573, "learning_rate": 0.0002747572351112174, "loss": 0.0588, "step": 2264 }, { "epoch": 0.3174491941135249, "grad_norm": 0.30748504400253296, "learning_rate": 0.00027474288447739776, "loss": 0.0832, "step": 2265 }, { "epoch": 0.3175893482831114, "grad_norm": 0.4409842789173126, "learning_rate": 0.0002747285338435781, "loss": 0.1029, "step": 2266 }, { "epoch": 0.317729502452698, "grad_norm": 0.4778917729854584, "learning_rate": 0.0002747141832097584, "loss": 0.0937, "step": 2267 }, { "epoch": 0.3178696566222845, "grad_norm": 0.31895700097084045, "learning_rate": 0.00027469983257593874, "loss": 0.1028, "step": 2268 }, { "epoch": 0.3180098107918711, "grad_norm": 0.4421759247779846, "learning_rate": 0.00027468548194211907, "loss": 0.1301, "step": 2269 }, { "epoch": 0.3181499649614576, "grad_norm": 0.5262652635574341, "learning_rate": 0.00027467113130829945, "loss": 0.0738, "step": 2270 }, { "epoch": 0.31829011913104416, "grad_norm": 1.1306207180023193, "learning_rate": 0.0002746567806744798, "loss": 0.1503, "step": 2271 }, { "epoch": 0.3184302733006307, "grad_norm": 0.4872584342956543, "learning_rate": 0.0002746424300406601, "loss": 0.1322, "step": 2272 }, { "epoch": 0.31857042747021724, "grad_norm": 0.6786035895347595, "learning_rate": 0.00027462807940684044, "loss": 0.1121, "step": 2273 }, { "epoch": 0.31871058163980376, "grad_norm": 0.7159054279327393, "learning_rate": 0.00027461372877302077, "loss": 0.1216, "step": 2274 }, { "epoch": 0.31885073580939033, "grad_norm": 0.6088541746139526, "learning_rate": 0.0002745993781392011, "loss": 0.0756, "step": 2275 }, { "epoch": 0.3189908899789769, "grad_norm": 0.41552990674972534, "learning_rate": 0.0002745850275053814, "loss": 0.1689, "step": 2276 }, { "epoch": 0.3191310441485634, "grad_norm": 0.5906903147697449, "learning_rate": 0.0002745706768715618, "loss": 0.1266, "step": 2277 }, { "epoch": 0.31927119831815, "grad_norm": 0.5163956880569458, "learning_rate": 0.00027455632623774213, "loss": 0.1437, "step": 2278 }, { "epoch": 0.3194113524877365, "grad_norm": 0.5164892673492432, "learning_rate": 0.00027454197560392246, "loss": 0.1044, "step": 2279 }, { "epoch": 0.31955150665732307, "grad_norm": 0.6598469018936157, "learning_rate": 0.00027452762497010285, "loss": 0.149, "step": 2280 }, { "epoch": 0.3196916608269096, "grad_norm": 0.4031849503517151, "learning_rate": 0.0002745132743362832, "loss": 0.0607, "step": 2281 }, { "epoch": 0.31983181499649616, "grad_norm": 0.42672184109687805, "learning_rate": 0.0002744989237024635, "loss": 0.1155, "step": 2282 }, { "epoch": 0.31997196916608267, "grad_norm": 0.3876674771308899, "learning_rate": 0.00027448457306864383, "loss": 0.1112, "step": 2283 }, { "epoch": 0.32011212333566924, "grad_norm": 0.37237003445625305, "learning_rate": 0.00027447022243482416, "loss": 0.1216, "step": 2284 }, { "epoch": 0.32025227750525576, "grad_norm": 0.2973545789718628, "learning_rate": 0.0002744558718010045, "loss": 0.1295, "step": 2285 }, { "epoch": 0.3203924316748423, "grad_norm": 0.40184876322746277, "learning_rate": 0.00027444152116718487, "loss": 0.0885, "step": 2286 }, { "epoch": 0.3205325858444289, "grad_norm": 0.35069435834884644, "learning_rate": 0.0002744271705333652, "loss": 0.0852, "step": 2287 }, { "epoch": 0.3206727400140154, "grad_norm": 0.854923665523529, "learning_rate": 0.00027441281989954553, "loss": 0.1985, "step": 2288 }, { "epoch": 0.320812894183602, "grad_norm": 0.4210989773273468, "learning_rate": 0.0002743984692657259, "loss": 0.1204, "step": 2289 }, { "epoch": 0.3209530483531885, "grad_norm": 0.5041469931602478, "learning_rate": 0.00027438411863190624, "loss": 0.0988, "step": 2290 }, { "epoch": 0.32109320252277507, "grad_norm": 0.4445111155509949, "learning_rate": 0.00027436976799808657, "loss": 0.1457, "step": 2291 }, { "epoch": 0.3212333566923616, "grad_norm": 1.157723307609558, "learning_rate": 0.0002743554173642669, "loss": 0.1566, "step": 2292 }, { "epoch": 0.32137351086194815, "grad_norm": 0.4383234977722168, "learning_rate": 0.0002743410667304472, "loss": 0.0529, "step": 2293 }, { "epoch": 0.32151366503153467, "grad_norm": 1.1336066722869873, "learning_rate": 0.00027432671609662755, "loss": 0.0918, "step": 2294 }, { "epoch": 0.32165381920112124, "grad_norm": 0.6418235301971436, "learning_rate": 0.0002743123654628079, "loss": 0.1019, "step": 2295 }, { "epoch": 0.32179397337070775, "grad_norm": 0.8508378863334656, "learning_rate": 0.00027429801482898827, "loss": 0.1527, "step": 2296 }, { "epoch": 0.3219341275402943, "grad_norm": 2.3819377422332764, "learning_rate": 0.0002742836641951686, "loss": 0.2723, "step": 2297 }, { "epoch": 0.3220742817098809, "grad_norm": 1.8200066089630127, "learning_rate": 0.0002742693135613489, "loss": 0.2375, "step": 2298 }, { "epoch": 0.3222144358794674, "grad_norm": 0.1963467001914978, "learning_rate": 0.0002742549629275293, "loss": 0.0159, "step": 2299 }, { "epoch": 0.322354590049054, "grad_norm": 1.9495701789855957, "learning_rate": 0.00027424061229370963, "loss": 0.3231, "step": 2300 }, { "epoch": 0.3224947442186405, "grad_norm": 0.4890349507331848, "learning_rate": 0.00027422626165988996, "loss": 0.2164, "step": 2301 }, { "epoch": 0.32263489838822706, "grad_norm": 0.28434503078460693, "learning_rate": 0.0002742119110260703, "loss": 0.0338, "step": 2302 }, { "epoch": 0.3227750525578136, "grad_norm": 0.46354666352272034, "learning_rate": 0.0002741975603922506, "loss": 0.1001, "step": 2303 }, { "epoch": 0.32291520672740015, "grad_norm": 0.34605783224105835, "learning_rate": 0.00027418320975843095, "loss": 0.102, "step": 2304 }, { "epoch": 0.32305536089698667, "grad_norm": 0.30339422821998596, "learning_rate": 0.00027416885912461133, "loss": 0.0734, "step": 2305 }, { "epoch": 0.32319551506657324, "grad_norm": 1.1335664987564087, "learning_rate": 0.00027415450849079166, "loss": 0.1667, "step": 2306 }, { "epoch": 0.32333566923615975, "grad_norm": 0.4269837439060211, "learning_rate": 0.000274140157856972, "loss": 0.1496, "step": 2307 }, { "epoch": 0.3234758234057463, "grad_norm": 0.3875157833099365, "learning_rate": 0.0002741258072231523, "loss": 0.1133, "step": 2308 }, { "epoch": 0.3236159775753329, "grad_norm": 0.45066824555397034, "learning_rate": 0.0002741114565893327, "loss": 0.0993, "step": 2309 }, { "epoch": 0.3237561317449194, "grad_norm": 0.450717568397522, "learning_rate": 0.00027409710595551303, "loss": 0.0484, "step": 2310 }, { "epoch": 0.323896285914506, "grad_norm": 0.46739262342453003, "learning_rate": 0.00027408275532169336, "loss": 0.1357, "step": 2311 }, { "epoch": 0.3240364400840925, "grad_norm": 0.5234241485595703, "learning_rate": 0.0002740684046878737, "loss": 0.1473, "step": 2312 }, { "epoch": 0.32417659425367906, "grad_norm": 0.478306382894516, "learning_rate": 0.000274054054054054, "loss": 0.159, "step": 2313 }, { "epoch": 0.3243167484232656, "grad_norm": 0.48222991824150085, "learning_rate": 0.00027403970342023434, "loss": 0.1308, "step": 2314 }, { "epoch": 0.32445690259285215, "grad_norm": 0.2981216013431549, "learning_rate": 0.0002740253527864147, "loss": 0.1422, "step": 2315 }, { "epoch": 0.32459705676243866, "grad_norm": 0.2919451892375946, "learning_rate": 0.00027401100215259505, "loss": 0.1156, "step": 2316 }, { "epoch": 0.32473721093202523, "grad_norm": 0.3588232100009918, "learning_rate": 0.0002739966515187754, "loss": 0.0591, "step": 2317 }, { "epoch": 0.32487736510161175, "grad_norm": 0.4017181992530823, "learning_rate": 0.00027398230088495577, "loss": 0.1042, "step": 2318 }, { "epoch": 0.3250175192711983, "grad_norm": 0.5259623527526855, "learning_rate": 0.0002739679502511361, "loss": 0.0716, "step": 2319 }, { "epoch": 0.3251576734407849, "grad_norm": 0.59149169921875, "learning_rate": 0.0002739535996173164, "loss": 0.1199, "step": 2320 }, { "epoch": 0.3252978276103714, "grad_norm": 0.8077289462089539, "learning_rate": 0.00027393924898349675, "loss": 0.0706, "step": 2321 }, { "epoch": 0.325437981779958, "grad_norm": 0.2897542715072632, "learning_rate": 0.0002739248983496771, "loss": 0.0562, "step": 2322 }, { "epoch": 0.3255781359495445, "grad_norm": 0.5956229567527771, "learning_rate": 0.0002739105477158574, "loss": 0.1163, "step": 2323 }, { "epoch": 0.32571829011913106, "grad_norm": 0.6491693258285522, "learning_rate": 0.00027389619708203774, "loss": 0.1054, "step": 2324 }, { "epoch": 0.3258584442887176, "grad_norm": 0.4410995841026306, "learning_rate": 0.0002738818464482181, "loss": 0.0892, "step": 2325 }, { "epoch": 0.32599859845830415, "grad_norm": 0.5765685439109802, "learning_rate": 0.00027386749581439845, "loss": 0.1489, "step": 2326 }, { "epoch": 0.32613875262789066, "grad_norm": 0.2881010174751282, "learning_rate": 0.0002738531451805788, "loss": 0.1369, "step": 2327 }, { "epoch": 0.32627890679747723, "grad_norm": 0.27360406517982483, "learning_rate": 0.00027383879454675916, "loss": 0.0889, "step": 2328 }, { "epoch": 0.32641906096706375, "grad_norm": 0.4190860390663147, "learning_rate": 0.0002738244439129395, "loss": 0.1312, "step": 2329 }, { "epoch": 0.3265592151366503, "grad_norm": 0.31932252645492554, "learning_rate": 0.0002738100932791198, "loss": 0.0836, "step": 2330 }, { "epoch": 0.3266993693062369, "grad_norm": 0.4889891743659973, "learning_rate": 0.00027379574264530014, "loss": 0.0817, "step": 2331 }, { "epoch": 0.3268395234758234, "grad_norm": 0.653013288974762, "learning_rate": 0.0002737813920114805, "loss": 0.0734, "step": 2332 }, { "epoch": 0.32697967764540997, "grad_norm": 0.20587152242660522, "learning_rate": 0.0002737670413776608, "loss": 0.0331, "step": 2333 }, { "epoch": 0.3271198318149965, "grad_norm": 0.41813984513282776, "learning_rate": 0.0002737526907438412, "loss": 0.0714, "step": 2334 }, { "epoch": 0.32725998598458306, "grad_norm": 0.4206618368625641, "learning_rate": 0.0002737383401100215, "loss": 0.0963, "step": 2335 }, { "epoch": 0.32740014015416957, "grad_norm": 0.5330125093460083, "learning_rate": 0.00027372398947620184, "loss": 0.1573, "step": 2336 }, { "epoch": 0.32754029432375614, "grad_norm": 0.6578435301780701, "learning_rate": 0.00027370963884238217, "loss": 0.1191, "step": 2337 }, { "epoch": 0.32768044849334266, "grad_norm": 0.7211946249008179, "learning_rate": 0.0002736952882085625, "loss": 0.1443, "step": 2338 }, { "epoch": 0.32782060266292923, "grad_norm": 0.5612217783927917, "learning_rate": 0.00027368093757474283, "loss": 0.045, "step": 2339 }, { "epoch": 0.32796075683251574, "grad_norm": 0.20903946459293365, "learning_rate": 0.0002736665869409232, "loss": 0.0442, "step": 2340 }, { "epoch": 0.3281009110021023, "grad_norm": 0.5159934163093567, "learning_rate": 0.00027365223630710354, "loss": 0.1769, "step": 2341 }, { "epoch": 0.3282410651716889, "grad_norm": 1.0761363506317139, "learning_rate": 0.00027363788567328387, "loss": 0.0786, "step": 2342 }, { "epoch": 0.3283812193412754, "grad_norm": 0.5273336172103882, "learning_rate": 0.0002736235350394642, "loss": 0.0591, "step": 2343 }, { "epoch": 0.32852137351086197, "grad_norm": 0.9999638199806213, "learning_rate": 0.0002736091844056446, "loss": 0.0878, "step": 2344 }, { "epoch": 0.3286615276804485, "grad_norm": 0.7054980397224426, "learning_rate": 0.0002735948337718249, "loss": 0.1423, "step": 2345 }, { "epoch": 0.32880168185003505, "grad_norm": 1.0636564493179321, "learning_rate": 0.00027358048313800524, "loss": 0.1585, "step": 2346 }, { "epoch": 0.32894183601962157, "grad_norm": 2.299084186553955, "learning_rate": 0.00027356613250418556, "loss": 0.2647, "step": 2347 }, { "epoch": 0.32908199018920814, "grad_norm": 1.3058886528015137, "learning_rate": 0.0002735517818703659, "loss": 0.4329, "step": 2348 }, { "epoch": 0.32922214435879465, "grad_norm": 0.9073130488395691, "learning_rate": 0.0002735374312365462, "loss": 0.111, "step": 2349 }, { "epoch": 0.3293622985283812, "grad_norm": 1.7146743535995483, "learning_rate": 0.0002735230806027266, "loss": 0.4954, "step": 2350 }, { "epoch": 0.32950245269796774, "grad_norm": 0.5724501013755798, "learning_rate": 0.00027350872996890693, "loss": 0.1379, "step": 2351 }, { "epoch": 0.3296426068675543, "grad_norm": 0.7578223943710327, "learning_rate": 0.00027349437933508726, "loss": 0.1376, "step": 2352 }, { "epoch": 0.3297827610371409, "grad_norm": 0.4114321768283844, "learning_rate": 0.00027348002870126764, "loss": 0.1206, "step": 2353 }, { "epoch": 0.3299229152067274, "grad_norm": 0.4495943486690521, "learning_rate": 0.00027346567806744797, "loss": 0.1106, "step": 2354 }, { "epoch": 0.33006306937631397, "grad_norm": 0.42052367329597473, "learning_rate": 0.0002734513274336283, "loss": 0.1106, "step": 2355 }, { "epoch": 0.3302032235459005, "grad_norm": 0.5298296809196472, "learning_rate": 0.00027343697679980863, "loss": 0.1954, "step": 2356 }, { "epoch": 0.33034337771548705, "grad_norm": 0.40673714876174927, "learning_rate": 0.00027342262616598896, "loss": 0.1229, "step": 2357 }, { "epoch": 0.33048353188507357, "grad_norm": 0.4631592035293579, "learning_rate": 0.0002734082755321693, "loss": 0.1185, "step": 2358 }, { "epoch": 0.33062368605466014, "grad_norm": 0.29532358050346375, "learning_rate": 0.0002733939248983496, "loss": 0.1155, "step": 2359 }, { "epoch": 0.33076384022424665, "grad_norm": 0.15663360059261322, "learning_rate": 0.00027337957426453, "loss": 0.041, "step": 2360 }, { "epoch": 0.3309039943938332, "grad_norm": 0.30265161395072937, "learning_rate": 0.0002733652236307103, "loss": 0.1252, "step": 2361 }, { "epoch": 0.33104414856341974, "grad_norm": 0.2407407909631729, "learning_rate": 0.00027335087299689066, "loss": 0.0421, "step": 2362 }, { "epoch": 0.3311843027330063, "grad_norm": 0.24498705565929413, "learning_rate": 0.00027333652236307104, "loss": 0.0686, "step": 2363 }, { "epoch": 0.3313244569025929, "grad_norm": 0.3211524784564972, "learning_rate": 0.00027332217172925137, "loss": 0.1107, "step": 2364 }, { "epoch": 0.3314646110721794, "grad_norm": 0.4031507074832916, "learning_rate": 0.0002733078210954317, "loss": 0.1145, "step": 2365 }, { "epoch": 0.33160476524176596, "grad_norm": 0.35542893409729004, "learning_rate": 0.000273293470461612, "loss": 0.1093, "step": 2366 }, { "epoch": 0.3317449194113525, "grad_norm": 0.8909142017364502, "learning_rate": 0.00027327911982779235, "loss": 0.1011, "step": 2367 }, { "epoch": 0.33188507358093905, "grad_norm": 0.6860010623931885, "learning_rate": 0.0002732647691939727, "loss": 0.1648, "step": 2368 }, { "epoch": 0.33202522775052556, "grad_norm": 0.6805834770202637, "learning_rate": 0.00027325041856015306, "loss": 0.0957, "step": 2369 }, { "epoch": 0.33216538192011213, "grad_norm": 0.31497296690940857, "learning_rate": 0.0002732360679263334, "loss": 0.0735, "step": 2370 }, { "epoch": 0.33230553608969865, "grad_norm": 0.2355368733406067, "learning_rate": 0.0002732217172925137, "loss": 0.0729, "step": 2371 }, { "epoch": 0.3324456902592852, "grad_norm": 0.2858748137950897, "learning_rate": 0.0002732073666586941, "loss": 0.0703, "step": 2372 }, { "epoch": 0.33258584442887174, "grad_norm": 0.9212529063224792, "learning_rate": 0.00027319301602487443, "loss": 0.1264, "step": 2373 }, { "epoch": 0.3327259985984583, "grad_norm": 0.4797137379646301, "learning_rate": 0.00027317866539105476, "loss": 0.0964, "step": 2374 }, { "epoch": 0.3328661527680449, "grad_norm": 0.5888754725456238, "learning_rate": 0.0002731643147572351, "loss": 0.1047, "step": 2375 }, { "epoch": 0.3330063069376314, "grad_norm": 1.2329363822937012, "learning_rate": 0.0002731499641234154, "loss": 0.0467, "step": 2376 }, { "epoch": 0.33314646110721796, "grad_norm": 0.4240868091583252, "learning_rate": 0.00027313561348959575, "loss": 0.1885, "step": 2377 }, { "epoch": 0.3332866152768045, "grad_norm": 0.5215107798576355, "learning_rate": 0.0002731212628557761, "loss": 0.16, "step": 2378 }, { "epoch": 0.33342676944639105, "grad_norm": 0.7044126391410828, "learning_rate": 0.00027310691222195646, "loss": 0.1282, "step": 2379 }, { "epoch": 0.33356692361597756, "grad_norm": 0.4650145173072815, "learning_rate": 0.0002730925615881368, "loss": 0.0771, "step": 2380 }, { "epoch": 0.33370707778556413, "grad_norm": 0.7449501752853394, "learning_rate": 0.0002730782109543171, "loss": 0.1764, "step": 2381 }, { "epoch": 0.33384723195515065, "grad_norm": 0.53734290599823, "learning_rate": 0.0002730638603204975, "loss": 0.1278, "step": 2382 }, { "epoch": 0.3339873861247372, "grad_norm": 0.822650134563446, "learning_rate": 0.0002730495096866778, "loss": 0.0721, "step": 2383 }, { "epoch": 0.33412754029432373, "grad_norm": 0.2684287428855896, "learning_rate": 0.00027303515905285815, "loss": 0.127, "step": 2384 }, { "epoch": 0.3342676944639103, "grad_norm": 0.2761191427707672, "learning_rate": 0.0002730208084190385, "loss": 0.0441, "step": 2385 }, { "epoch": 0.3344078486334969, "grad_norm": 0.39176127314567566, "learning_rate": 0.0002730064577852188, "loss": 0.1067, "step": 2386 }, { "epoch": 0.3345480028030834, "grad_norm": 0.21785077452659607, "learning_rate": 0.00027299210715139914, "loss": 0.0578, "step": 2387 }, { "epoch": 0.33468815697266996, "grad_norm": 0.266147643327713, "learning_rate": 0.0002729777565175795, "loss": 0.0525, "step": 2388 }, { "epoch": 0.3348283111422565, "grad_norm": 0.35395312309265137, "learning_rate": 0.00027296340588375985, "loss": 0.1046, "step": 2389 }, { "epoch": 0.33496846531184304, "grad_norm": 0.31794142723083496, "learning_rate": 0.0002729490552499402, "loss": 0.0561, "step": 2390 }, { "epoch": 0.33510861948142956, "grad_norm": 0.5520455241203308, "learning_rate": 0.00027293470461612056, "loss": 0.1263, "step": 2391 }, { "epoch": 0.33524877365101613, "grad_norm": 0.3143710494041443, "learning_rate": 0.0002729203539823009, "loss": 0.0439, "step": 2392 }, { "epoch": 0.33538892782060264, "grad_norm": 0.5282881855964661, "learning_rate": 0.0002729060033484812, "loss": 0.2055, "step": 2393 }, { "epoch": 0.3355290819901892, "grad_norm": 0.3690708577632904, "learning_rate": 0.00027289165271466155, "loss": 0.0657, "step": 2394 }, { "epoch": 0.33566923615977573, "grad_norm": 0.34283822774887085, "learning_rate": 0.0002728773020808419, "loss": 0.1089, "step": 2395 }, { "epoch": 0.3358093903293623, "grad_norm": 0.6684614419937134, "learning_rate": 0.0002728629514470222, "loss": 0.1495, "step": 2396 }, { "epoch": 0.33594954449894887, "grad_norm": 0.2404477596282959, "learning_rate": 0.00027284860081320253, "loss": 0.081, "step": 2397 }, { "epoch": 0.3360896986685354, "grad_norm": 0.7326449751853943, "learning_rate": 0.0002728342501793829, "loss": 0.1415, "step": 2398 }, { "epoch": 0.33622985283812196, "grad_norm": 6.717862606048584, "learning_rate": 0.00027281989954556325, "loss": 0.3853, "step": 2399 }, { "epoch": 0.33637000700770847, "grad_norm": 0.7335455417633057, "learning_rate": 0.0002728055489117436, "loss": 0.1097, "step": 2400 }, { "epoch": 0.33651016117729504, "grad_norm": 0.5232634544372559, "learning_rate": 0.0002727911982779239, "loss": 0.1818, "step": 2401 }, { "epoch": 0.33665031534688156, "grad_norm": 0.28279921412467957, "learning_rate": 0.00027277684764410423, "loss": 0.0704, "step": 2402 }, { "epoch": 0.3367904695164681, "grad_norm": 1.2273950576782227, "learning_rate": 0.00027276249701028456, "loss": 0.0747, "step": 2403 }, { "epoch": 0.33693062368605464, "grad_norm": 0.27787306904792786, "learning_rate": 0.00027274814637646494, "loss": 0.0369, "step": 2404 }, { "epoch": 0.3370707778556412, "grad_norm": 0.42845991253852844, "learning_rate": 0.00027273379574264527, "loss": 0.1509, "step": 2405 }, { "epoch": 0.3372109320252277, "grad_norm": 0.21993932127952576, "learning_rate": 0.0002727194451088256, "loss": 0.0511, "step": 2406 }, { "epoch": 0.3373510861948143, "grad_norm": 0.4367145001888275, "learning_rate": 0.000272705094475006, "loss": 0.1215, "step": 2407 }, { "epoch": 0.33749124036440087, "grad_norm": 1.1007795333862305, "learning_rate": 0.0002726907438411863, "loss": 0.1673, "step": 2408 }, { "epoch": 0.3376313945339874, "grad_norm": 0.5768246650695801, "learning_rate": 0.00027267639320736664, "loss": 0.1904, "step": 2409 }, { "epoch": 0.33777154870357395, "grad_norm": 0.6666380167007446, "learning_rate": 0.00027266204257354697, "loss": 0.1605, "step": 2410 }, { "epoch": 0.33791170287316047, "grad_norm": 0.2841531038284302, "learning_rate": 0.0002726476919397273, "loss": 0.0635, "step": 2411 }, { "epoch": 0.33805185704274704, "grad_norm": 0.3090771734714508, "learning_rate": 0.0002726333413059076, "loss": 0.082, "step": 2412 }, { "epoch": 0.33819201121233355, "grad_norm": 0.37035658955574036, "learning_rate": 0.00027261899067208795, "loss": 0.0988, "step": 2413 }, { "epoch": 0.3383321653819201, "grad_norm": 0.19008462131023407, "learning_rate": 0.00027260464003826834, "loss": 0.0361, "step": 2414 }, { "epoch": 0.33847231955150664, "grad_norm": 0.36050063371658325, "learning_rate": 0.00027259028940444867, "loss": 0.0656, "step": 2415 }, { "epoch": 0.3386124737210932, "grad_norm": 0.39316341280937195, "learning_rate": 0.000272575938770629, "loss": 0.1009, "step": 2416 }, { "epoch": 0.3387526278906797, "grad_norm": 0.5990994572639465, "learning_rate": 0.0002725615881368094, "loss": 0.1116, "step": 2417 }, { "epoch": 0.3388927820602663, "grad_norm": 0.3368386924266815, "learning_rate": 0.0002725472375029897, "loss": 0.1009, "step": 2418 }, { "epoch": 0.3390329362298528, "grad_norm": 0.3756292164325714, "learning_rate": 0.00027253288686917003, "loss": 0.1382, "step": 2419 }, { "epoch": 0.3391730903994394, "grad_norm": 0.5949392318725586, "learning_rate": 0.00027251853623535036, "loss": 0.1747, "step": 2420 }, { "epoch": 0.33931324456902595, "grad_norm": 0.36781832575798035, "learning_rate": 0.0002725041856015307, "loss": 0.0894, "step": 2421 }, { "epoch": 0.33945339873861247, "grad_norm": 0.7466620802879333, "learning_rate": 0.000272489834967711, "loss": 0.1488, "step": 2422 }, { "epoch": 0.33959355290819904, "grad_norm": 0.4443918466567993, "learning_rate": 0.0002724754843338914, "loss": 0.0925, "step": 2423 }, { "epoch": 0.33973370707778555, "grad_norm": 0.43173474073410034, "learning_rate": 0.00027246113370007173, "loss": 0.1676, "step": 2424 }, { "epoch": 0.3398738612473721, "grad_norm": 0.37713611125946045, "learning_rate": 0.00027244678306625206, "loss": 0.0608, "step": 2425 }, { "epoch": 0.34001401541695864, "grad_norm": 0.42651447653770447, "learning_rate": 0.00027243243243243244, "loss": 0.1129, "step": 2426 }, { "epoch": 0.3401541695865452, "grad_norm": 3.156534194946289, "learning_rate": 0.00027241808179861277, "loss": 0.1655, "step": 2427 }, { "epoch": 0.3402943237561317, "grad_norm": 0.6820510625839233, "learning_rate": 0.0002724037311647931, "loss": 0.1953, "step": 2428 }, { "epoch": 0.3404344779257183, "grad_norm": 0.8211063146591187, "learning_rate": 0.00027238938053097343, "loss": 0.0797, "step": 2429 }, { "epoch": 0.3405746320953048, "grad_norm": 0.643652081489563, "learning_rate": 0.00027237502989715376, "loss": 0.1635, "step": 2430 }, { "epoch": 0.3407147862648914, "grad_norm": 0.4518626928329468, "learning_rate": 0.0002723606792633341, "loss": 0.0927, "step": 2431 }, { "epoch": 0.34085494043447795, "grad_norm": 0.7045007944107056, "learning_rate": 0.0002723463286295144, "loss": 0.1078, "step": 2432 }, { "epoch": 0.34099509460406446, "grad_norm": 0.32810983061790466, "learning_rate": 0.0002723319779956948, "loss": 0.1296, "step": 2433 }, { "epoch": 0.34113524877365103, "grad_norm": 0.28168684244155884, "learning_rate": 0.0002723176273618751, "loss": 0.0869, "step": 2434 }, { "epoch": 0.34127540294323755, "grad_norm": 0.6390880346298218, "learning_rate": 0.00027230327672805545, "loss": 0.0685, "step": 2435 }, { "epoch": 0.3414155571128241, "grad_norm": 0.569926917552948, "learning_rate": 0.00027228892609423584, "loss": 0.0687, "step": 2436 }, { "epoch": 0.34155571128241063, "grad_norm": 1.0435515642166138, "learning_rate": 0.00027227457546041616, "loss": 0.1286, "step": 2437 }, { "epoch": 0.3416958654519972, "grad_norm": 0.20368793606758118, "learning_rate": 0.0002722602248265965, "loss": 0.0492, "step": 2438 }, { "epoch": 0.3418360196215837, "grad_norm": 0.23373551666736603, "learning_rate": 0.0002722458741927768, "loss": 0.0376, "step": 2439 }, { "epoch": 0.3419761737911703, "grad_norm": 0.47204384207725525, "learning_rate": 0.00027223152355895715, "loss": 0.0353, "step": 2440 }, { "epoch": 0.3421163279607568, "grad_norm": 0.4461885392665863, "learning_rate": 0.0002722171729251375, "loss": 0.1415, "step": 2441 }, { "epoch": 0.3422564821303434, "grad_norm": 0.9008255004882812, "learning_rate": 0.00027220282229131786, "loss": 0.148, "step": 2442 }, { "epoch": 0.34239663629992995, "grad_norm": 0.5254664421081543, "learning_rate": 0.0002721884716574982, "loss": 0.0916, "step": 2443 }, { "epoch": 0.34253679046951646, "grad_norm": 1.1218018531799316, "learning_rate": 0.0002721741210236785, "loss": 0.1065, "step": 2444 }, { "epoch": 0.34267694463910303, "grad_norm": 2.630772590637207, "learning_rate": 0.0002721597703898589, "loss": 0.1218, "step": 2445 }, { "epoch": 0.34281709880868955, "grad_norm": 0.7606246471405029, "learning_rate": 0.00027214541975603923, "loss": 0.2005, "step": 2446 }, { "epoch": 0.3429572529782761, "grad_norm": 1.449053406715393, "learning_rate": 0.00027213106912221956, "loss": 0.199, "step": 2447 }, { "epoch": 0.34309740714786263, "grad_norm": 1.8010755777359009, "learning_rate": 0.0002721167184883999, "loss": 0.2187, "step": 2448 }, { "epoch": 0.3432375613174492, "grad_norm": 0.8930305242538452, "learning_rate": 0.0002721023678545802, "loss": 0.1005, "step": 2449 }, { "epoch": 0.3433777154870357, "grad_norm": 0.7936392426490784, "learning_rate": 0.00027208801722076054, "loss": 0.1686, "step": 2450 }, { "epoch": 0.3435178696566223, "grad_norm": 0.6133139729499817, "learning_rate": 0.0002720736665869409, "loss": 0.1502, "step": 2451 }, { "epoch": 0.3436580238262088, "grad_norm": 0.7587975263595581, "learning_rate": 0.00027205931595312126, "loss": 0.1817, "step": 2452 }, { "epoch": 0.3437981779957954, "grad_norm": 0.4359076917171478, "learning_rate": 0.0002720449653193016, "loss": 0.0672, "step": 2453 }, { "epoch": 0.34393833216538194, "grad_norm": 0.46345996856689453, "learning_rate": 0.0002720306146854819, "loss": 0.0888, "step": 2454 }, { "epoch": 0.34407848633496846, "grad_norm": 0.4289470613002777, "learning_rate": 0.0002720162640516623, "loss": 0.0948, "step": 2455 }, { "epoch": 0.34421864050455503, "grad_norm": 1.0907361507415771, "learning_rate": 0.0002720019134178426, "loss": 0.1187, "step": 2456 }, { "epoch": 0.34435879467414154, "grad_norm": 0.43418994545936584, "learning_rate": 0.00027198756278402295, "loss": 0.1056, "step": 2457 }, { "epoch": 0.3444989488437281, "grad_norm": 0.33865877985954285, "learning_rate": 0.0002719732121502033, "loss": 0.0924, "step": 2458 }, { "epoch": 0.34463910301331463, "grad_norm": 0.29642388224601746, "learning_rate": 0.0002719588615163836, "loss": 0.0834, "step": 2459 }, { "epoch": 0.3447792571829012, "grad_norm": 0.28908008337020874, "learning_rate": 0.00027194451088256394, "loss": 0.1045, "step": 2460 }, { "epoch": 0.3449194113524877, "grad_norm": 0.4186856150627136, "learning_rate": 0.0002719301602487443, "loss": 0.0856, "step": 2461 }, { "epoch": 0.3450595655220743, "grad_norm": 0.4745382070541382, "learning_rate": 0.00027191580961492465, "loss": 0.1195, "step": 2462 }, { "epoch": 0.3451997196916608, "grad_norm": 0.7490723729133606, "learning_rate": 0.000271901458981105, "loss": 0.1232, "step": 2463 }, { "epoch": 0.34533987386124737, "grad_norm": 1.2677481174468994, "learning_rate": 0.0002718871083472853, "loss": 0.1933, "step": 2464 }, { "epoch": 0.34548002803083394, "grad_norm": 0.7623156905174255, "learning_rate": 0.00027187275771346564, "loss": 0.1704, "step": 2465 }, { "epoch": 0.34562018220042046, "grad_norm": 0.5481401085853577, "learning_rate": 0.00027185840707964596, "loss": 0.2198, "step": 2466 }, { "epoch": 0.345760336370007, "grad_norm": 0.6694737672805786, "learning_rate": 0.0002718440564458263, "loss": 0.1672, "step": 2467 }, { "epoch": 0.34590049053959354, "grad_norm": 0.6540376543998718, "learning_rate": 0.0002718297058120067, "loss": 0.238, "step": 2468 }, { "epoch": 0.3460406447091801, "grad_norm": 0.6377232074737549, "learning_rate": 0.000271815355178187, "loss": 0.2207, "step": 2469 }, { "epoch": 0.3461807988787666, "grad_norm": 0.4143263101577759, "learning_rate": 0.00027180100454436733, "loss": 0.0804, "step": 2470 }, { "epoch": 0.3463209530483532, "grad_norm": 0.4887005090713501, "learning_rate": 0.0002717866539105477, "loss": 0.1923, "step": 2471 }, { "epoch": 0.3464611072179397, "grad_norm": 0.4390082359313965, "learning_rate": 0.00027177230327672804, "loss": 0.0777, "step": 2472 }, { "epoch": 0.3466012613875263, "grad_norm": 0.22069427371025085, "learning_rate": 0.00027175795264290837, "loss": 0.0511, "step": 2473 }, { "epoch": 0.3467414155571128, "grad_norm": 0.417019784450531, "learning_rate": 0.0002717436020090887, "loss": 0.0865, "step": 2474 }, { "epoch": 0.34688156972669937, "grad_norm": 0.6320773959159851, "learning_rate": 0.00027172925137526903, "loss": 0.0638, "step": 2475 }, { "epoch": 0.34702172389628594, "grad_norm": 0.33546149730682373, "learning_rate": 0.00027171490074144936, "loss": 0.0822, "step": 2476 }, { "epoch": 0.34716187806587245, "grad_norm": 0.9183886051177979, "learning_rate": 0.00027170055010762974, "loss": 0.1159, "step": 2477 }, { "epoch": 0.347302032235459, "grad_norm": 0.5276036262512207, "learning_rate": 0.00027168619947381007, "loss": 0.1684, "step": 2478 }, { "epoch": 0.34744218640504554, "grad_norm": 0.33101022243499756, "learning_rate": 0.0002716718488399904, "loss": 0.0965, "step": 2479 }, { "epoch": 0.3475823405746321, "grad_norm": 0.34179821610450745, "learning_rate": 0.0002716574982061708, "loss": 0.111, "step": 2480 }, { "epoch": 0.3477224947442186, "grad_norm": 0.3510150909423828, "learning_rate": 0.0002716431475723511, "loss": 0.0954, "step": 2481 }, { "epoch": 0.3478626489138052, "grad_norm": 0.39792853593826294, "learning_rate": 0.00027162879693853144, "loss": 0.0898, "step": 2482 }, { "epoch": 0.3480028030833917, "grad_norm": 0.3537274897098541, "learning_rate": 0.00027161444630471177, "loss": 0.1344, "step": 2483 }, { "epoch": 0.3481429572529783, "grad_norm": 0.3773926794528961, "learning_rate": 0.0002716000956708921, "loss": 0.0793, "step": 2484 }, { "epoch": 0.3482831114225648, "grad_norm": 0.5031774044036865, "learning_rate": 0.0002715857450370724, "loss": 0.1004, "step": 2485 }, { "epoch": 0.34842326559215137, "grad_norm": 0.2686522901058197, "learning_rate": 0.00027157139440325275, "loss": 0.066, "step": 2486 }, { "epoch": 0.34856341976173794, "grad_norm": 0.4645906984806061, "learning_rate": 0.00027155704376943313, "loss": 0.1754, "step": 2487 }, { "epoch": 0.34870357393132445, "grad_norm": 0.34407687187194824, "learning_rate": 0.00027154269313561346, "loss": 0.0742, "step": 2488 }, { "epoch": 0.348843728100911, "grad_norm": 0.34344732761383057, "learning_rate": 0.0002715283425017938, "loss": 0.1301, "step": 2489 }, { "epoch": 0.34898388227049754, "grad_norm": 0.514249324798584, "learning_rate": 0.0002715139918679742, "loss": 0.1335, "step": 2490 }, { "epoch": 0.3491240364400841, "grad_norm": 0.40635946393013, "learning_rate": 0.0002714996412341545, "loss": 0.0584, "step": 2491 }, { "epoch": 0.3492641906096706, "grad_norm": 0.8239240646362305, "learning_rate": 0.00027148529060033483, "loss": 0.3027, "step": 2492 }, { "epoch": 0.3494043447792572, "grad_norm": 0.2898925244808197, "learning_rate": 0.00027147093996651516, "loss": 0.0396, "step": 2493 }, { "epoch": 0.3495444989488437, "grad_norm": 0.6368575692176819, "learning_rate": 0.0002714565893326955, "loss": 0.1457, "step": 2494 }, { "epoch": 0.3496846531184303, "grad_norm": 0.664352297782898, "learning_rate": 0.0002714422386988758, "loss": 0.1002, "step": 2495 }, { "epoch": 0.3498248072880168, "grad_norm": 0.7303014397621155, "learning_rate": 0.0002714278880650562, "loss": 0.1534, "step": 2496 }, { "epoch": 0.34996496145760336, "grad_norm": 0.4517455995082855, "learning_rate": 0.00027141353743123653, "loss": 0.0741, "step": 2497 }, { "epoch": 0.35010511562718993, "grad_norm": 1.1914290189743042, "learning_rate": 0.00027139918679741686, "loss": 0.1185, "step": 2498 }, { "epoch": 0.35024526979677645, "grad_norm": 2.456247568130493, "learning_rate": 0.0002713848361635972, "loss": 0.1924, "step": 2499 }, { "epoch": 0.350385423966363, "grad_norm": 2.8768482208251953, "learning_rate": 0.00027137048552977757, "loss": 0.3982, "step": 2500 }, { "epoch": 0.35052557813594953, "grad_norm": 0.44898679852485657, "learning_rate": 0.0002713561348959579, "loss": 0.0891, "step": 2501 }, { "epoch": 0.3506657323055361, "grad_norm": 0.4719737768173218, "learning_rate": 0.0002713417842621382, "loss": 0.1062, "step": 2502 }, { "epoch": 0.3508058864751226, "grad_norm": 0.5575537085533142, "learning_rate": 0.00027132743362831855, "loss": 0.1219, "step": 2503 }, { "epoch": 0.3509460406447092, "grad_norm": 0.5102971792221069, "learning_rate": 0.0002713130829944989, "loss": 0.1151, "step": 2504 }, { "epoch": 0.3510861948142957, "grad_norm": 0.8450426459312439, "learning_rate": 0.0002712987323606792, "loss": 0.1439, "step": 2505 }, { "epoch": 0.3512263489838823, "grad_norm": 0.608574390411377, "learning_rate": 0.0002712843817268596, "loss": 0.0638, "step": 2506 }, { "epoch": 0.3513665031534688, "grad_norm": 0.38713815808296204, "learning_rate": 0.0002712700310930399, "loss": 0.1224, "step": 2507 }, { "epoch": 0.35150665732305536, "grad_norm": 0.5570006370544434, "learning_rate": 0.00027125568045922025, "loss": 0.0665, "step": 2508 }, { "epoch": 0.35164681149264193, "grad_norm": 0.47272542119026184, "learning_rate": 0.00027124132982540063, "loss": 0.1593, "step": 2509 }, { "epoch": 0.35178696566222845, "grad_norm": 0.32512497901916504, "learning_rate": 0.00027122697919158096, "loss": 0.097, "step": 2510 }, { "epoch": 0.351927119831815, "grad_norm": 0.3275410830974579, "learning_rate": 0.0002712126285577613, "loss": 0.0978, "step": 2511 }, { "epoch": 0.35206727400140153, "grad_norm": 0.9663432240486145, "learning_rate": 0.0002711982779239416, "loss": 0.1423, "step": 2512 }, { "epoch": 0.3522074281709881, "grad_norm": 0.8780844807624817, "learning_rate": 0.00027118392729012195, "loss": 0.0753, "step": 2513 }, { "epoch": 0.3523475823405746, "grad_norm": 0.4171141982078552, "learning_rate": 0.0002711695766563023, "loss": 0.1024, "step": 2514 }, { "epoch": 0.3524877365101612, "grad_norm": 0.38195255398750305, "learning_rate": 0.00027115522602248266, "loss": 0.0956, "step": 2515 }, { "epoch": 0.3526278906797477, "grad_norm": 0.616711437702179, "learning_rate": 0.000271140875388663, "loss": 0.0632, "step": 2516 }, { "epoch": 0.3527680448493343, "grad_norm": 0.3601103127002716, "learning_rate": 0.0002711265247548433, "loss": 0.1041, "step": 2517 }, { "epoch": 0.3529081990189208, "grad_norm": 0.4452815055847168, "learning_rate": 0.00027111217412102365, "loss": 0.0983, "step": 2518 }, { "epoch": 0.35304835318850736, "grad_norm": 0.618134081363678, "learning_rate": 0.00027109782348720403, "loss": 0.1594, "step": 2519 }, { "epoch": 0.35318850735809393, "grad_norm": 0.31251460313796997, "learning_rate": 0.00027108347285338436, "loss": 0.0936, "step": 2520 }, { "epoch": 0.35332866152768044, "grad_norm": 0.37265312671661377, "learning_rate": 0.0002710691222195647, "loss": 0.0989, "step": 2521 }, { "epoch": 0.353468815697267, "grad_norm": 0.629951000213623, "learning_rate": 0.000271054771585745, "loss": 0.0924, "step": 2522 }, { "epoch": 0.35360896986685353, "grad_norm": 0.3488209843635559, "learning_rate": 0.00027104042095192534, "loss": 0.0513, "step": 2523 }, { "epoch": 0.3537491240364401, "grad_norm": 0.8103635907173157, "learning_rate": 0.00027102607031810567, "loss": 0.1135, "step": 2524 }, { "epoch": 0.3538892782060266, "grad_norm": 0.3749235272407532, "learning_rate": 0.00027101171968428605, "loss": 0.1301, "step": 2525 }, { "epoch": 0.3540294323756132, "grad_norm": 0.2901221215724945, "learning_rate": 0.0002709973690504664, "loss": 0.0809, "step": 2526 }, { "epoch": 0.3541695865451997, "grad_norm": 0.6542328596115112, "learning_rate": 0.0002709830184166467, "loss": 0.1007, "step": 2527 }, { "epoch": 0.35430974071478627, "grad_norm": 0.49765893816947937, "learning_rate": 0.00027096866778282704, "loss": 0.0822, "step": 2528 }, { "epoch": 0.3544498948843728, "grad_norm": 0.44325828552246094, "learning_rate": 0.00027095431714900737, "loss": 0.0677, "step": 2529 }, { "epoch": 0.35459004905395936, "grad_norm": 0.32594650983810425, "learning_rate": 0.0002709399665151877, "loss": 0.0702, "step": 2530 }, { "epoch": 0.3547302032235459, "grad_norm": 0.2752164900302887, "learning_rate": 0.0002709256158813681, "loss": 0.0291, "step": 2531 }, { "epoch": 0.35487035739313244, "grad_norm": 0.43771252036094666, "learning_rate": 0.0002709112652475484, "loss": 0.1288, "step": 2532 }, { "epoch": 0.355010511562719, "grad_norm": 0.27745795249938965, "learning_rate": 0.00027089691461372874, "loss": 0.0717, "step": 2533 }, { "epoch": 0.3551506657323055, "grad_norm": 0.3869256377220154, "learning_rate": 0.00027088256397990907, "loss": 0.1279, "step": 2534 }, { "epoch": 0.3552908199018921, "grad_norm": 0.37020087242126465, "learning_rate": 0.00027086821334608945, "loss": 0.0666, "step": 2535 }, { "epoch": 0.3554309740714786, "grad_norm": 1.041060447692871, "learning_rate": 0.0002708538627122698, "loss": 0.1882, "step": 2536 }, { "epoch": 0.3555711282410652, "grad_norm": 1.5964288711547852, "learning_rate": 0.0002708395120784501, "loss": 0.1383, "step": 2537 }, { "epoch": 0.3557112824106517, "grad_norm": 0.39310428500175476, "learning_rate": 0.00027082516144463043, "loss": 0.0718, "step": 2538 }, { "epoch": 0.35585143658023827, "grad_norm": 0.22166696190834045, "learning_rate": 0.00027081081081081076, "loss": 0.0422, "step": 2539 }, { "epoch": 0.3559915907498248, "grad_norm": 0.4267207980155945, "learning_rate": 0.0002707964601769911, "loss": 0.1146, "step": 2540 }, { "epoch": 0.35613174491941135, "grad_norm": 0.7645246982574463, "learning_rate": 0.0002707821095431715, "loss": 0.1091, "step": 2541 }, { "epoch": 0.3562718990889979, "grad_norm": 0.37269437313079834, "learning_rate": 0.0002707677589093518, "loss": 0.113, "step": 2542 }, { "epoch": 0.35641205325858444, "grad_norm": 0.5857687592506409, "learning_rate": 0.00027075340827553213, "loss": 0.0876, "step": 2543 }, { "epoch": 0.356552207428171, "grad_norm": 0.40357160568237305, "learning_rate": 0.0002707390576417125, "loss": 0.1032, "step": 2544 }, { "epoch": 0.3566923615977575, "grad_norm": 0.44467687606811523, "learning_rate": 0.00027072470700789284, "loss": 0.1375, "step": 2545 }, { "epoch": 0.3568325157673441, "grad_norm": 0.4880650043487549, "learning_rate": 0.00027071035637407317, "loss": 0.1046, "step": 2546 }, { "epoch": 0.3569726699369306, "grad_norm": 2.2533798217773438, "learning_rate": 0.0002706960057402535, "loss": 0.2249, "step": 2547 }, { "epoch": 0.3571128241065172, "grad_norm": 4.149808406829834, "learning_rate": 0.00027068165510643383, "loss": 0.1759, "step": 2548 }, { "epoch": 0.3572529782761037, "grad_norm": 0.460557222366333, "learning_rate": 0.00027066730447261416, "loss": 0.0493, "step": 2549 }, { "epoch": 0.35739313244569026, "grad_norm": 0.47430360317230225, "learning_rate": 0.00027065295383879454, "loss": 0.1308, "step": 2550 }, { "epoch": 0.3575332866152768, "grad_norm": 0.6032901406288147, "learning_rate": 0.00027063860320497487, "loss": 0.1162, "step": 2551 }, { "epoch": 0.35767344078486335, "grad_norm": 0.5174827575683594, "learning_rate": 0.0002706242525711552, "loss": 0.141, "step": 2552 }, { "epoch": 0.3578135949544499, "grad_norm": 0.4386367201805115, "learning_rate": 0.0002706099019373355, "loss": 0.1585, "step": 2553 }, { "epoch": 0.35795374912403644, "grad_norm": 0.3321300446987152, "learning_rate": 0.0002705955513035159, "loss": 0.0801, "step": 2554 }, { "epoch": 0.358093903293623, "grad_norm": 0.8595570921897888, "learning_rate": 0.00027058120066969624, "loss": 0.0832, "step": 2555 }, { "epoch": 0.3582340574632095, "grad_norm": 0.34902796149253845, "learning_rate": 0.00027056685003587656, "loss": 0.0685, "step": 2556 }, { "epoch": 0.3583742116327961, "grad_norm": 0.7868647575378418, "learning_rate": 0.0002705524994020569, "loss": 0.1004, "step": 2557 }, { "epoch": 0.3585143658023826, "grad_norm": 0.2486821860074997, "learning_rate": 0.0002705381487682372, "loss": 0.0649, "step": 2558 }, { "epoch": 0.3586545199719692, "grad_norm": 0.37497857213020325, "learning_rate": 0.00027052379813441755, "loss": 0.1346, "step": 2559 }, { "epoch": 0.3587946741415557, "grad_norm": 0.4706312119960785, "learning_rate": 0.00027050944750059793, "loss": 0.0804, "step": 2560 }, { "epoch": 0.35893482831114226, "grad_norm": 1.0178109407424927, "learning_rate": 0.00027049509686677826, "loss": 0.2915, "step": 2561 }, { "epoch": 0.3590749824807288, "grad_norm": 0.5455099940299988, "learning_rate": 0.0002704807462329586, "loss": 0.098, "step": 2562 }, { "epoch": 0.35921513665031535, "grad_norm": 0.5274816751480103, "learning_rate": 0.00027046639559913897, "loss": 0.0971, "step": 2563 }, { "epoch": 0.3593552908199019, "grad_norm": 0.6896162033081055, "learning_rate": 0.0002704520449653193, "loss": 0.2009, "step": 2564 }, { "epoch": 0.35949544498948843, "grad_norm": 0.36828720569610596, "learning_rate": 0.00027043769433149963, "loss": 0.0989, "step": 2565 }, { "epoch": 0.359635599159075, "grad_norm": 0.656211256980896, "learning_rate": 0.00027042334369767996, "loss": 0.1403, "step": 2566 }, { "epoch": 0.3597757533286615, "grad_norm": 0.29173436760902405, "learning_rate": 0.0002704089930638603, "loss": 0.089, "step": 2567 }, { "epoch": 0.3599159074982481, "grad_norm": 0.24642011523246765, "learning_rate": 0.0002703946424300406, "loss": 0.0272, "step": 2568 }, { "epoch": 0.3600560616678346, "grad_norm": 0.6337753534317017, "learning_rate": 0.00027038029179622094, "loss": 0.0785, "step": 2569 }, { "epoch": 0.3601962158374212, "grad_norm": 0.4922681450843811, "learning_rate": 0.0002703659411624013, "loss": 0.1003, "step": 2570 }, { "epoch": 0.3603363700070077, "grad_norm": 0.3286852538585663, "learning_rate": 0.00027035159052858166, "loss": 0.0703, "step": 2571 }, { "epoch": 0.36047652417659426, "grad_norm": 0.5443809032440186, "learning_rate": 0.000270337239894762, "loss": 0.1734, "step": 2572 }, { "epoch": 0.3606166783461808, "grad_norm": 0.3691226840019226, "learning_rate": 0.00027032288926094237, "loss": 0.0935, "step": 2573 }, { "epoch": 0.36075683251576735, "grad_norm": 0.6144779324531555, "learning_rate": 0.0002703085386271227, "loss": 0.0791, "step": 2574 }, { "epoch": 0.3608969866853539, "grad_norm": 0.3534158170223236, "learning_rate": 0.000270294187993303, "loss": 0.0963, "step": 2575 }, { "epoch": 0.36103714085494043, "grad_norm": 0.23296035826206207, "learning_rate": 0.00027027983735948335, "loss": 0.0369, "step": 2576 }, { "epoch": 0.361177295024527, "grad_norm": 0.3788452446460724, "learning_rate": 0.0002702654867256637, "loss": 0.0674, "step": 2577 }, { "epoch": 0.3613174491941135, "grad_norm": 0.6378711462020874, "learning_rate": 0.000270251136091844, "loss": 0.1474, "step": 2578 }, { "epoch": 0.3614576033637001, "grad_norm": 0.23953083157539368, "learning_rate": 0.0002702367854580244, "loss": 0.0452, "step": 2579 }, { "epoch": 0.3615977575332866, "grad_norm": 0.37547391653060913, "learning_rate": 0.0002702224348242047, "loss": 0.0911, "step": 2580 }, { "epoch": 0.36173791170287317, "grad_norm": 0.5833947062492371, "learning_rate": 0.00027020808419038505, "loss": 0.0793, "step": 2581 }, { "epoch": 0.3618780658724597, "grad_norm": 0.4846550226211548, "learning_rate": 0.00027019373355656543, "loss": 0.1074, "step": 2582 }, { "epoch": 0.36201822004204626, "grad_norm": 2.035660982131958, "learning_rate": 0.00027017938292274576, "loss": 0.2078, "step": 2583 }, { "epoch": 0.36215837421163277, "grad_norm": 0.30998244881629944, "learning_rate": 0.0002701650322889261, "loss": 0.0521, "step": 2584 }, { "epoch": 0.36229852838121934, "grad_norm": 0.8155659437179565, "learning_rate": 0.0002701506816551064, "loss": 0.0759, "step": 2585 }, { "epoch": 0.3624386825508059, "grad_norm": 0.32316064834594727, "learning_rate": 0.00027013633102128675, "loss": 0.1013, "step": 2586 }, { "epoch": 0.36257883672039243, "grad_norm": 0.19872768223285675, "learning_rate": 0.0002701219803874671, "loss": 0.0499, "step": 2587 }, { "epoch": 0.362718990889979, "grad_norm": 0.6700904369354248, "learning_rate": 0.0002701076297536474, "loss": 0.125, "step": 2588 }, { "epoch": 0.3628591450595655, "grad_norm": 0.41210803389549255, "learning_rate": 0.0002700932791198278, "loss": 0.1046, "step": 2589 }, { "epoch": 0.3629992992291521, "grad_norm": 0.6869544982910156, "learning_rate": 0.0002700789284860081, "loss": 0.0856, "step": 2590 }, { "epoch": 0.3631394533987386, "grad_norm": 1.2647379636764526, "learning_rate": 0.00027006457785218844, "loss": 0.2693, "step": 2591 }, { "epoch": 0.36327960756832517, "grad_norm": 0.34526896476745605, "learning_rate": 0.00027005022721836877, "loss": 0.0904, "step": 2592 }, { "epoch": 0.3634197617379117, "grad_norm": 0.43056705594062805, "learning_rate": 0.0002700358765845491, "loss": 0.1129, "step": 2593 }, { "epoch": 0.36355991590749825, "grad_norm": 1.3661410808563232, "learning_rate": 0.00027002152595072943, "loss": 0.1855, "step": 2594 }, { "epoch": 0.36370007007708477, "grad_norm": 0.49362099170684814, "learning_rate": 0.0002700071753169098, "loss": 0.091, "step": 2595 }, { "epoch": 0.36384022424667134, "grad_norm": 0.9674927592277527, "learning_rate": 0.00026999282468309014, "loss": 0.1332, "step": 2596 }, { "epoch": 0.3639803784162579, "grad_norm": 0.7007322311401367, "learning_rate": 0.00026997847404927047, "loss": 0.2655, "step": 2597 }, { "epoch": 0.3641205325858444, "grad_norm": 1.149762511253357, "learning_rate": 0.00026996412341545085, "loss": 0.067, "step": 2598 }, { "epoch": 0.364260686755431, "grad_norm": 2.4529738426208496, "learning_rate": 0.0002699497727816312, "loss": 0.1923, "step": 2599 }, { "epoch": 0.3644008409250175, "grad_norm": 2.5562126636505127, "learning_rate": 0.0002699354221478115, "loss": 0.3356, "step": 2600 }, { "epoch": 0.3645409950946041, "grad_norm": 0.4913303554058075, "learning_rate": 0.00026992107151399184, "loss": 0.106, "step": 2601 }, { "epoch": 0.3646811492641906, "grad_norm": 0.35753095149993896, "learning_rate": 0.00026990672088017217, "loss": 0.0644, "step": 2602 }, { "epoch": 0.36482130343377717, "grad_norm": 0.27814993262290955, "learning_rate": 0.0002698923702463525, "loss": 0.0594, "step": 2603 }, { "epoch": 0.3649614576033637, "grad_norm": 0.5234560966491699, "learning_rate": 0.0002698780196125328, "loss": 0.2094, "step": 2604 }, { "epoch": 0.36510161177295025, "grad_norm": 0.45245644450187683, "learning_rate": 0.0002698636689787132, "loss": 0.1318, "step": 2605 }, { "epoch": 0.36524176594253677, "grad_norm": 0.4091087281703949, "learning_rate": 0.00026984931834489353, "loss": 0.0882, "step": 2606 }, { "epoch": 0.36538192011212334, "grad_norm": 0.45572516322135925, "learning_rate": 0.00026983496771107386, "loss": 0.1447, "step": 2607 }, { "epoch": 0.3655220742817099, "grad_norm": 0.2645706236362457, "learning_rate": 0.00026982061707725425, "loss": 0.0735, "step": 2608 }, { "epoch": 0.3656622284512964, "grad_norm": 0.2757112681865692, "learning_rate": 0.0002698062664434346, "loss": 0.0578, "step": 2609 }, { "epoch": 0.365802382620883, "grad_norm": 0.4935554265975952, "learning_rate": 0.0002697919158096149, "loss": 0.0988, "step": 2610 }, { "epoch": 0.3659425367904695, "grad_norm": 0.5613978505134583, "learning_rate": 0.00026977756517579523, "loss": 0.0946, "step": 2611 }, { "epoch": 0.3660826909600561, "grad_norm": 0.7950171828269958, "learning_rate": 0.00026976321454197556, "loss": 0.1646, "step": 2612 }, { "epoch": 0.3662228451296426, "grad_norm": 0.4371897280216217, "learning_rate": 0.0002697488639081559, "loss": 0.0887, "step": 2613 }, { "epoch": 0.36636299929922916, "grad_norm": 0.36467158794403076, "learning_rate": 0.00026973451327433627, "loss": 0.0458, "step": 2614 }, { "epoch": 0.3665031534688157, "grad_norm": 0.3143620193004608, "learning_rate": 0.0002697201626405166, "loss": 0.0531, "step": 2615 }, { "epoch": 0.36664330763840225, "grad_norm": 0.379914790391922, "learning_rate": 0.00026970581200669693, "loss": 0.1284, "step": 2616 }, { "epoch": 0.36678346180798876, "grad_norm": 2.0873301029205322, "learning_rate": 0.0002696914613728773, "loss": 0.0807, "step": 2617 }, { "epoch": 0.36692361597757533, "grad_norm": 0.43054527044296265, "learning_rate": 0.00026967711073905764, "loss": 0.0463, "step": 2618 }, { "epoch": 0.3670637701471619, "grad_norm": 0.4760350286960602, "learning_rate": 0.00026966276010523797, "loss": 0.0826, "step": 2619 }, { "epoch": 0.3672039243167484, "grad_norm": 0.9995202422142029, "learning_rate": 0.0002696484094714183, "loss": 0.1378, "step": 2620 }, { "epoch": 0.367344078486335, "grad_norm": 0.7339271903038025, "learning_rate": 0.0002696340588375986, "loss": 0.1573, "step": 2621 }, { "epoch": 0.3674842326559215, "grad_norm": 0.6659233570098877, "learning_rate": 0.00026961970820377895, "loss": 0.0768, "step": 2622 }, { "epoch": 0.3676243868255081, "grad_norm": 0.38232073187828064, "learning_rate": 0.0002696053575699593, "loss": 0.0535, "step": 2623 }, { "epoch": 0.3677645409950946, "grad_norm": 0.6639416217803955, "learning_rate": 0.00026959100693613967, "loss": 0.1746, "step": 2624 }, { "epoch": 0.36790469516468116, "grad_norm": 0.614084005355835, "learning_rate": 0.00026957665630232, "loss": 0.1573, "step": 2625 }, { "epoch": 0.3680448493342677, "grad_norm": 0.4498412311077118, "learning_rate": 0.0002695623056685003, "loss": 0.1079, "step": 2626 }, { "epoch": 0.36818500350385425, "grad_norm": 0.6188480854034424, "learning_rate": 0.0002695479550346807, "loss": 0.065, "step": 2627 }, { "epoch": 0.36832515767344076, "grad_norm": 0.4791368544101715, "learning_rate": 0.00026953360440086103, "loss": 0.0572, "step": 2628 }, { "epoch": 0.36846531184302733, "grad_norm": 0.6614050269126892, "learning_rate": 0.00026951925376704136, "loss": 0.1763, "step": 2629 }, { "epoch": 0.3686054660126139, "grad_norm": 0.4944930076599121, "learning_rate": 0.0002695049031332217, "loss": 0.0964, "step": 2630 }, { "epoch": 0.3687456201822004, "grad_norm": 0.38349610567092896, "learning_rate": 0.000269490552499402, "loss": 0.0554, "step": 2631 }, { "epoch": 0.368885774351787, "grad_norm": 0.3273887038230896, "learning_rate": 0.00026947620186558235, "loss": 0.0505, "step": 2632 }, { "epoch": 0.3690259285213735, "grad_norm": 0.5440751314163208, "learning_rate": 0.00026946185123176273, "loss": 0.1368, "step": 2633 }, { "epoch": 0.3691660826909601, "grad_norm": 0.2655757963657379, "learning_rate": 0.00026944750059794306, "loss": 0.061, "step": 2634 }, { "epoch": 0.3693062368605466, "grad_norm": 0.4824255406856537, "learning_rate": 0.0002694331499641234, "loss": 0.1301, "step": 2635 }, { "epoch": 0.36944639103013316, "grad_norm": 0.4690677225589752, "learning_rate": 0.00026941879933030377, "loss": 0.1251, "step": 2636 }, { "epoch": 0.3695865451997197, "grad_norm": 0.40310895442962646, "learning_rate": 0.0002694044486964841, "loss": 0.1116, "step": 2637 }, { "epoch": 0.36972669936930624, "grad_norm": 0.3774832785129547, "learning_rate": 0.00026939009806266443, "loss": 0.0761, "step": 2638 }, { "epoch": 0.36986685353889276, "grad_norm": 0.41188836097717285, "learning_rate": 0.00026937574742884476, "loss": 0.0666, "step": 2639 }, { "epoch": 0.37000700770847933, "grad_norm": 0.6129133701324463, "learning_rate": 0.0002693613967950251, "loss": 0.1096, "step": 2640 }, { "epoch": 0.3701471618780659, "grad_norm": 0.35266202688217163, "learning_rate": 0.0002693470461612054, "loss": 0.0914, "step": 2641 }, { "epoch": 0.3702873160476524, "grad_norm": 0.6404738426208496, "learning_rate": 0.00026933269552738574, "loss": 0.104, "step": 2642 }, { "epoch": 0.370427470217239, "grad_norm": 1.3113828897476196, "learning_rate": 0.0002693183448935661, "loss": 0.1766, "step": 2643 }, { "epoch": 0.3705676243868255, "grad_norm": 1.2462184429168701, "learning_rate": 0.00026930399425974645, "loss": 0.142, "step": 2644 }, { "epoch": 0.37070777855641207, "grad_norm": 0.9791223406791687, "learning_rate": 0.0002692896436259268, "loss": 0.0822, "step": 2645 }, { "epoch": 0.3708479327259986, "grad_norm": 1.0689698457717896, "learning_rate": 0.00026927529299210716, "loss": 0.2384, "step": 2646 }, { "epoch": 0.37098808689558516, "grad_norm": 0.2582118511199951, "learning_rate": 0.0002692609423582875, "loss": 0.0345, "step": 2647 }, { "epoch": 0.37112824106517167, "grad_norm": 2.33042311668396, "learning_rate": 0.0002692465917244678, "loss": 0.4221, "step": 2648 }, { "epoch": 0.37126839523475824, "grad_norm": 9.557506561279297, "learning_rate": 0.00026923224109064815, "loss": 0.5129, "step": 2649 }, { "epoch": 0.37140854940434476, "grad_norm": 2.5115840435028076, "learning_rate": 0.0002692178904568285, "loss": 0.515, "step": 2650 }, { "epoch": 0.3715487035739313, "grad_norm": 0.2643502354621887, "learning_rate": 0.0002692035398230088, "loss": 0.1052, "step": 2651 }, { "epoch": 0.37168885774351784, "grad_norm": 0.5183128118515015, "learning_rate": 0.0002691891891891892, "loss": 0.13, "step": 2652 }, { "epoch": 0.3718290119131044, "grad_norm": 0.5094909071922302, "learning_rate": 0.0002691748385553695, "loss": 0.0855, "step": 2653 }, { "epoch": 0.371969166082691, "grad_norm": 0.37950798869132996, "learning_rate": 0.00026916048792154985, "loss": 0.0853, "step": 2654 }, { "epoch": 0.3721093202522775, "grad_norm": 0.3840545117855072, "learning_rate": 0.0002691461372877302, "loss": 0.0783, "step": 2655 }, { "epoch": 0.37224947442186407, "grad_norm": 0.41216596961021423, "learning_rate": 0.0002691317866539105, "loss": 0.1514, "step": 2656 }, { "epoch": 0.3723896285914506, "grad_norm": 0.39185765385627747, "learning_rate": 0.00026911743602009083, "loss": 0.1446, "step": 2657 }, { "epoch": 0.37252978276103715, "grad_norm": 0.4257045388221741, "learning_rate": 0.00026910308538627116, "loss": 0.0753, "step": 2658 }, { "epoch": 0.37266993693062367, "grad_norm": 0.24160675704479218, "learning_rate": 0.00026908873475245154, "loss": 0.0529, "step": 2659 }, { "epoch": 0.37281009110021024, "grad_norm": 0.38580718636512756, "learning_rate": 0.0002690743841186319, "loss": 0.1124, "step": 2660 }, { "epoch": 0.37295024526979675, "grad_norm": 0.5381124019622803, "learning_rate": 0.0002690600334848122, "loss": 0.1175, "step": 2661 }, { "epoch": 0.3730903994393833, "grad_norm": 0.6174787282943726, "learning_rate": 0.0002690456828509926, "loss": 0.1924, "step": 2662 }, { "epoch": 0.37323055360896984, "grad_norm": 0.4316098690032959, "learning_rate": 0.0002690313322171729, "loss": 0.0926, "step": 2663 }, { "epoch": 0.3733707077785564, "grad_norm": 0.23370975255966187, "learning_rate": 0.00026901698158335324, "loss": 0.063, "step": 2664 }, { "epoch": 0.373510861948143, "grad_norm": 0.6281079053878784, "learning_rate": 0.00026900263094953357, "loss": 0.1527, "step": 2665 }, { "epoch": 0.3736510161177295, "grad_norm": 0.6845026016235352, "learning_rate": 0.0002689882803157139, "loss": 0.1397, "step": 2666 }, { "epoch": 0.37379117028731607, "grad_norm": 0.43456852436065674, "learning_rate": 0.00026897392968189423, "loss": 0.1373, "step": 2667 }, { "epoch": 0.3739313244569026, "grad_norm": 0.46237489581108093, "learning_rate": 0.0002689595790480746, "loss": 0.2164, "step": 2668 }, { "epoch": 0.37407147862648915, "grad_norm": 0.2265549749135971, "learning_rate": 0.00026894522841425494, "loss": 0.0514, "step": 2669 }, { "epoch": 0.37421163279607567, "grad_norm": 0.6893908381462097, "learning_rate": 0.00026893087778043527, "loss": 0.1429, "step": 2670 }, { "epoch": 0.37435178696566224, "grad_norm": 0.46016186475753784, "learning_rate": 0.00026891652714661565, "loss": 0.1524, "step": 2671 }, { "epoch": 0.37449194113524875, "grad_norm": 0.7288147807121277, "learning_rate": 0.000268902176512796, "loss": 0.0862, "step": 2672 }, { "epoch": 0.3746320953048353, "grad_norm": 0.5569015145301819, "learning_rate": 0.0002688878258789763, "loss": 0.0597, "step": 2673 }, { "epoch": 0.37477224947442184, "grad_norm": 0.6619922518730164, "learning_rate": 0.00026887347524515664, "loss": 0.1766, "step": 2674 }, { "epoch": 0.3749124036440084, "grad_norm": 0.43870124220848083, "learning_rate": 0.00026885912461133696, "loss": 0.0611, "step": 2675 }, { "epoch": 0.375052557813595, "grad_norm": 0.5007179975509644, "learning_rate": 0.0002688447739775173, "loss": 0.0612, "step": 2676 }, { "epoch": 0.3751927119831815, "grad_norm": 0.668543815612793, "learning_rate": 0.0002688304233436976, "loss": 0.1205, "step": 2677 }, { "epoch": 0.37533286615276806, "grad_norm": 0.635049045085907, "learning_rate": 0.000268816072709878, "loss": 0.1249, "step": 2678 }, { "epoch": 0.3754730203223546, "grad_norm": 0.4230019152164459, "learning_rate": 0.00026880172207605833, "loss": 0.0506, "step": 2679 }, { "epoch": 0.37561317449194115, "grad_norm": 0.522056519985199, "learning_rate": 0.00026878737144223866, "loss": 0.0971, "step": 2680 }, { "epoch": 0.37575332866152766, "grad_norm": 0.4881804287433624, "learning_rate": 0.00026877302080841904, "loss": 0.0421, "step": 2681 }, { "epoch": 0.37589348283111423, "grad_norm": 0.4913877248764038, "learning_rate": 0.00026875867017459937, "loss": 0.0579, "step": 2682 }, { "epoch": 0.37603363700070075, "grad_norm": 0.39831966161727905, "learning_rate": 0.0002687443195407797, "loss": 0.0855, "step": 2683 }, { "epoch": 0.3761737911702873, "grad_norm": 0.7055082321166992, "learning_rate": 0.00026872996890696003, "loss": 0.1354, "step": 2684 }, { "epoch": 0.37631394533987383, "grad_norm": 0.6139459013938904, "learning_rate": 0.00026871561827314036, "loss": 0.1284, "step": 2685 }, { "epoch": 0.3764540995094604, "grad_norm": 0.4901534616947174, "learning_rate": 0.0002687012676393207, "loss": 0.0991, "step": 2686 }, { "epoch": 0.376594253679047, "grad_norm": 0.47301438450813293, "learning_rate": 0.00026868691700550107, "loss": 0.0584, "step": 2687 }, { "epoch": 0.3767344078486335, "grad_norm": 0.28055500984191895, "learning_rate": 0.0002686725663716814, "loss": 0.0614, "step": 2688 }, { "epoch": 0.37687456201822006, "grad_norm": 0.27549150586128235, "learning_rate": 0.0002686582157378617, "loss": 0.0478, "step": 2689 }, { "epoch": 0.3770147161878066, "grad_norm": 0.4826743006706238, "learning_rate": 0.00026864386510404206, "loss": 0.1763, "step": 2690 }, { "epoch": 0.37715487035739315, "grad_norm": 0.6995766162872314, "learning_rate": 0.00026862951447022244, "loss": 0.1223, "step": 2691 }, { "epoch": 0.37729502452697966, "grad_norm": 0.5631874799728394, "learning_rate": 0.00026861516383640277, "loss": 0.1884, "step": 2692 }, { "epoch": 0.37743517869656623, "grad_norm": 1.017300009727478, "learning_rate": 0.0002686008132025831, "loss": 0.3021, "step": 2693 }, { "epoch": 0.37757533286615275, "grad_norm": 0.5196384787559509, "learning_rate": 0.0002685864625687634, "loss": 0.0587, "step": 2694 }, { "epoch": 0.3777154870357393, "grad_norm": 1.3323756456375122, "learning_rate": 0.00026857211193494375, "loss": 0.0605, "step": 2695 }, { "epoch": 0.37785564120532583, "grad_norm": 5.389350891113281, "learning_rate": 0.0002685577613011241, "loss": 0.4775, "step": 2696 }, { "epoch": 0.3779957953749124, "grad_norm": 0.7972546815872192, "learning_rate": 0.00026854341066730446, "loss": 0.2121, "step": 2697 }, { "epoch": 0.378135949544499, "grad_norm": 1.7250601053237915, "learning_rate": 0.0002685290600334848, "loss": 0.4052, "step": 2698 }, { "epoch": 0.3782761037140855, "grad_norm": 1.4964070320129395, "learning_rate": 0.0002685147093996651, "loss": 0.1881, "step": 2699 }, { "epoch": 0.37841625788367206, "grad_norm": 0.9878752827644348, "learning_rate": 0.0002685003587658455, "loss": 0.1696, "step": 2700 }, { "epoch": 0.3785564120532586, "grad_norm": 0.4222157597541809, "learning_rate": 0.00026848600813202583, "loss": 0.1127, "step": 2701 }, { "epoch": 0.37869656622284514, "grad_norm": 0.7448936104774475, "learning_rate": 0.00026847165749820616, "loss": 0.1394, "step": 2702 }, { "epoch": 0.37883672039243166, "grad_norm": 0.3859920799732208, "learning_rate": 0.0002684573068643865, "loss": 0.0808, "step": 2703 }, { "epoch": 0.37897687456201823, "grad_norm": 0.29533955454826355, "learning_rate": 0.0002684429562305668, "loss": 0.0698, "step": 2704 }, { "epoch": 0.37911702873160474, "grad_norm": 0.5440114736557007, "learning_rate": 0.00026842860559674715, "loss": 0.1567, "step": 2705 }, { "epoch": 0.3792571829011913, "grad_norm": 0.2795041799545288, "learning_rate": 0.00026841425496292753, "loss": 0.0693, "step": 2706 }, { "epoch": 0.37939733707077783, "grad_norm": 0.21150372922420502, "learning_rate": 0.00026839990432910786, "loss": 0.0974, "step": 2707 }, { "epoch": 0.3795374912403644, "grad_norm": 0.4844374358654022, "learning_rate": 0.0002683855536952882, "loss": 0.1669, "step": 2708 }, { "epoch": 0.37967764540995097, "grad_norm": 0.3226596713066101, "learning_rate": 0.0002683712030614685, "loss": 0.0809, "step": 2709 }, { "epoch": 0.3798177995795375, "grad_norm": 0.6389403939247131, "learning_rate": 0.0002683568524276489, "loss": 0.0491, "step": 2710 }, { "epoch": 0.37995795374912406, "grad_norm": 0.33429408073425293, "learning_rate": 0.0002683425017938292, "loss": 0.1099, "step": 2711 }, { "epoch": 0.38009810791871057, "grad_norm": 0.7514497637748718, "learning_rate": 0.00026832815116000955, "loss": 0.1274, "step": 2712 }, { "epoch": 0.38023826208829714, "grad_norm": 0.2460016906261444, "learning_rate": 0.0002683138005261899, "loss": 0.0586, "step": 2713 }, { "epoch": 0.38037841625788366, "grad_norm": 0.424694687128067, "learning_rate": 0.0002682994498923702, "loss": 0.0435, "step": 2714 }, { "epoch": 0.3805185704274702, "grad_norm": 0.24495062232017517, "learning_rate": 0.00026828509925855054, "loss": 0.0467, "step": 2715 }, { "epoch": 0.38065872459705674, "grad_norm": 0.631500244140625, "learning_rate": 0.0002682707486247309, "loss": 0.1284, "step": 2716 }, { "epoch": 0.3807988787666433, "grad_norm": 0.5932847261428833, "learning_rate": 0.00026825639799091125, "loss": 0.125, "step": 2717 }, { "epoch": 0.3809390329362298, "grad_norm": 0.7991192936897278, "learning_rate": 0.0002682420473570916, "loss": 0.0797, "step": 2718 }, { "epoch": 0.3810791871058164, "grad_norm": 0.8800172209739685, "learning_rate": 0.0002682276967232719, "loss": 0.1538, "step": 2719 }, { "epoch": 0.38121934127540297, "grad_norm": 0.6050055623054504, "learning_rate": 0.00026821334608945224, "loss": 0.1026, "step": 2720 }, { "epoch": 0.3813594954449895, "grad_norm": 0.3597300350666046, "learning_rate": 0.00026819899545563257, "loss": 0.0788, "step": 2721 }, { "epoch": 0.38149964961457605, "grad_norm": 0.45745959877967834, "learning_rate": 0.00026818464482181295, "loss": 0.1535, "step": 2722 }, { "epoch": 0.38163980378416257, "grad_norm": 0.38664647936820984, "learning_rate": 0.0002681702941879933, "loss": 0.112, "step": 2723 }, { "epoch": 0.38177995795374914, "grad_norm": 0.2691517174243927, "learning_rate": 0.0002681559435541736, "loss": 0.0814, "step": 2724 }, { "epoch": 0.38192011212333565, "grad_norm": 1.1752463579177856, "learning_rate": 0.00026814159292035393, "loss": 0.2208, "step": 2725 }, { "epoch": 0.3820602662929222, "grad_norm": 0.2488446980714798, "learning_rate": 0.0002681272422865343, "loss": 0.0715, "step": 2726 }, { "epoch": 0.38220042046250874, "grad_norm": 0.3645718991756439, "learning_rate": 0.00026811289165271465, "loss": 0.0512, "step": 2727 }, { "epoch": 0.3823405746320953, "grad_norm": 0.581358015537262, "learning_rate": 0.000268098541018895, "loss": 0.1292, "step": 2728 }, { "epoch": 0.3824807288016818, "grad_norm": 0.26961782574653625, "learning_rate": 0.0002680841903850753, "loss": 0.0528, "step": 2729 }, { "epoch": 0.3826208829712684, "grad_norm": 0.6025241017341614, "learning_rate": 0.00026806983975125563, "loss": 0.1213, "step": 2730 }, { "epoch": 0.38276103714085496, "grad_norm": 0.39697104692459106, "learning_rate": 0.00026805548911743596, "loss": 0.1117, "step": 2731 }, { "epoch": 0.3829011913104415, "grad_norm": 0.5270704627037048, "learning_rate": 0.00026804113848361634, "loss": 0.0867, "step": 2732 }, { "epoch": 0.38304134548002805, "grad_norm": 2.1186790466308594, "learning_rate": 0.00026802678784979667, "loss": 0.2197, "step": 2733 }, { "epoch": 0.38318149964961457, "grad_norm": 0.2457168698310852, "learning_rate": 0.000268012437215977, "loss": 0.0489, "step": 2734 }, { "epoch": 0.38332165381920114, "grad_norm": 0.6924535036087036, "learning_rate": 0.0002679980865821574, "loss": 0.0848, "step": 2735 }, { "epoch": 0.38346180798878765, "grad_norm": 0.2335180789232254, "learning_rate": 0.0002679837359483377, "loss": 0.0556, "step": 2736 }, { "epoch": 0.3836019621583742, "grad_norm": 0.23670127987861633, "learning_rate": 0.00026796938531451804, "loss": 0.0452, "step": 2737 }, { "epoch": 0.38374211632796074, "grad_norm": 0.27039679884910583, "learning_rate": 0.00026795503468069837, "loss": 0.1049, "step": 2738 }, { "epoch": 0.3838822704975473, "grad_norm": 0.4019028842449188, "learning_rate": 0.0002679406840468787, "loss": 0.0846, "step": 2739 }, { "epoch": 0.3840224246671338, "grad_norm": 1.0159893035888672, "learning_rate": 0.000267926333413059, "loss": 0.1307, "step": 2740 }, { "epoch": 0.3841625788367204, "grad_norm": 0.2937857508659363, "learning_rate": 0.0002679119827792394, "loss": 0.0524, "step": 2741 }, { "epoch": 0.38430273300630696, "grad_norm": 1.8584258556365967, "learning_rate": 0.00026789763214541974, "loss": 0.3712, "step": 2742 }, { "epoch": 0.3844428871758935, "grad_norm": 0.5510191917419434, "learning_rate": 0.00026788328151160007, "loss": 0.0927, "step": 2743 }, { "epoch": 0.38458304134548005, "grad_norm": 1.2359387874603271, "learning_rate": 0.0002678689308777804, "loss": 0.2157, "step": 2744 }, { "epoch": 0.38472319551506656, "grad_norm": 0.6061068177223206, "learning_rate": 0.0002678545802439608, "loss": 0.1064, "step": 2745 }, { "epoch": 0.38486334968465313, "grad_norm": 0.4549236297607422, "learning_rate": 0.0002678402296101411, "loss": 0.0746, "step": 2746 }, { "epoch": 0.38500350385423965, "grad_norm": 0.34904977679252625, "learning_rate": 0.00026782587897632143, "loss": 0.0519, "step": 2747 }, { "epoch": 0.3851436580238262, "grad_norm": 0.4819194972515106, "learning_rate": 0.00026781152834250176, "loss": 0.2932, "step": 2748 }, { "epoch": 0.38528381219341273, "grad_norm": 2.390181541442871, "learning_rate": 0.0002677971777086821, "loss": 0.1389, "step": 2749 }, { "epoch": 0.3854239663629993, "grad_norm": 2.2200119495391846, "learning_rate": 0.0002677828270748624, "loss": 0.5359, "step": 2750 }, { "epoch": 0.3855641205325858, "grad_norm": 0.33571431040763855, "learning_rate": 0.0002677684764410428, "loss": 0.0552, "step": 2751 }, { "epoch": 0.3857042747021724, "grad_norm": 0.41551390290260315, "learning_rate": 0.00026775412580722313, "loss": 0.1274, "step": 2752 }, { "epoch": 0.38584442887175896, "grad_norm": 0.4421321451663971, "learning_rate": 0.00026773977517340346, "loss": 0.1504, "step": 2753 }, { "epoch": 0.3859845830413455, "grad_norm": 0.39728522300720215, "learning_rate": 0.00026772542453958384, "loss": 0.144, "step": 2754 }, { "epoch": 0.38612473721093205, "grad_norm": 0.4899279773235321, "learning_rate": 0.00026771107390576417, "loss": 0.1237, "step": 2755 }, { "epoch": 0.38626489138051856, "grad_norm": 0.6530525088310242, "learning_rate": 0.0002676967232719445, "loss": 0.124, "step": 2756 }, { "epoch": 0.38640504555010513, "grad_norm": 0.30461323261260986, "learning_rate": 0.00026768237263812483, "loss": 0.0963, "step": 2757 }, { "epoch": 0.38654519971969165, "grad_norm": 0.4590955376625061, "learning_rate": 0.00026766802200430516, "loss": 0.1259, "step": 2758 }, { "epoch": 0.3866853538892782, "grad_norm": 0.30805009603500366, "learning_rate": 0.0002676536713704855, "loss": 0.0824, "step": 2759 }, { "epoch": 0.38682550805886473, "grad_norm": 0.3916451930999756, "learning_rate": 0.0002676393207366658, "loss": 0.113, "step": 2760 }, { "epoch": 0.3869656622284513, "grad_norm": 0.33900728821754456, "learning_rate": 0.0002676249701028462, "loss": 0.0649, "step": 2761 }, { "epoch": 0.3871058163980378, "grad_norm": 0.37784919142723083, "learning_rate": 0.0002676106194690265, "loss": 0.053, "step": 2762 }, { "epoch": 0.3872459705676244, "grad_norm": 0.47026288509368896, "learning_rate": 0.00026759626883520685, "loss": 0.1056, "step": 2763 }, { "epoch": 0.38738612473721096, "grad_norm": 0.3922427296638489, "learning_rate": 0.00026758191820138724, "loss": 0.0546, "step": 2764 }, { "epoch": 0.3875262789067975, "grad_norm": 0.7709530591964722, "learning_rate": 0.00026756756756756756, "loss": 0.143, "step": 2765 }, { "epoch": 0.38766643307638404, "grad_norm": 0.4271753132343292, "learning_rate": 0.0002675532169337479, "loss": 0.0875, "step": 2766 }, { "epoch": 0.38780658724597056, "grad_norm": 0.262979656457901, "learning_rate": 0.0002675388662999282, "loss": 0.069, "step": 2767 }, { "epoch": 0.38794674141555713, "grad_norm": 0.5255424976348877, "learning_rate": 0.00026752451566610855, "loss": 0.0924, "step": 2768 }, { "epoch": 0.38808689558514364, "grad_norm": 0.30937355756759644, "learning_rate": 0.0002675101650322889, "loss": 0.1292, "step": 2769 }, { "epoch": 0.3882270497547302, "grad_norm": 0.37371179461479187, "learning_rate": 0.00026749581439846926, "loss": 0.0752, "step": 2770 }, { "epoch": 0.38836720392431673, "grad_norm": 0.5643699765205383, "learning_rate": 0.0002674814637646496, "loss": 0.2093, "step": 2771 }, { "epoch": 0.3885073580939033, "grad_norm": 0.38963592052459717, "learning_rate": 0.0002674671131308299, "loss": 0.1255, "step": 2772 }, { "epoch": 0.3886475122634898, "grad_norm": 0.38058674335479736, "learning_rate": 0.0002674527624970103, "loss": 0.1023, "step": 2773 }, { "epoch": 0.3887876664330764, "grad_norm": 0.8996592164039612, "learning_rate": 0.00026743841186319063, "loss": 0.0876, "step": 2774 }, { "epoch": 0.38892782060266295, "grad_norm": 0.6100387573242188, "learning_rate": 0.00026742406122937096, "loss": 0.1035, "step": 2775 }, { "epoch": 0.38906797477224947, "grad_norm": 0.662075400352478, "learning_rate": 0.0002674097105955513, "loss": 0.1833, "step": 2776 }, { "epoch": 0.38920812894183604, "grad_norm": 0.5425719022750854, "learning_rate": 0.0002673953599617316, "loss": 0.1492, "step": 2777 }, { "epoch": 0.38934828311142256, "grad_norm": 0.5210924744606018, "learning_rate": 0.00026738100932791194, "loss": 0.1409, "step": 2778 }, { "epoch": 0.3894884372810091, "grad_norm": 0.6877641677856445, "learning_rate": 0.0002673666586940923, "loss": 0.1507, "step": 2779 }, { "epoch": 0.38962859145059564, "grad_norm": 0.5524642467498779, "learning_rate": 0.00026735230806027266, "loss": 0.0747, "step": 2780 }, { "epoch": 0.3897687456201822, "grad_norm": 0.33044764399528503, "learning_rate": 0.000267337957426453, "loss": 0.0613, "step": 2781 }, { "epoch": 0.3899088997897687, "grad_norm": 0.7215332388877869, "learning_rate": 0.0002673236067926333, "loss": 0.1626, "step": 2782 }, { "epoch": 0.3900490539593553, "grad_norm": 0.5219545960426331, "learning_rate": 0.00026730925615881364, "loss": 0.0705, "step": 2783 }, { "epoch": 0.3901892081289418, "grad_norm": 0.5790599584579468, "learning_rate": 0.00026729490552499397, "loss": 0.0681, "step": 2784 }, { "epoch": 0.3903293622985284, "grad_norm": 0.4165177047252655, "learning_rate": 0.0002672805548911743, "loss": 0.0988, "step": 2785 }, { "epoch": 0.39046951646811495, "grad_norm": 0.4054563045501709, "learning_rate": 0.0002672662042573547, "loss": 0.1123, "step": 2786 }, { "epoch": 0.39060967063770147, "grad_norm": 0.2322995364665985, "learning_rate": 0.000267251853623535, "loss": 0.0698, "step": 2787 }, { "epoch": 0.39074982480728804, "grad_norm": 0.3329775035381317, "learning_rate": 0.00026723750298971534, "loss": 0.0646, "step": 2788 }, { "epoch": 0.39088997897687455, "grad_norm": 0.3626221716403961, "learning_rate": 0.0002672231523558957, "loss": 0.1145, "step": 2789 }, { "epoch": 0.3910301331464611, "grad_norm": 0.36956289410591125, "learning_rate": 0.00026720880172207605, "loss": 0.079, "step": 2790 }, { "epoch": 0.39117028731604764, "grad_norm": 1.1502017974853516, "learning_rate": 0.0002671944510882564, "loss": 0.1271, "step": 2791 }, { "epoch": 0.3913104414856342, "grad_norm": 0.8659423589706421, "learning_rate": 0.0002671801004544367, "loss": 0.1307, "step": 2792 }, { "epoch": 0.3914505956552207, "grad_norm": 0.8645521998405457, "learning_rate": 0.00026716574982061704, "loss": 0.0852, "step": 2793 }, { "epoch": 0.3915907498248073, "grad_norm": 0.6566555500030518, "learning_rate": 0.00026715139918679736, "loss": 0.1364, "step": 2794 }, { "epoch": 0.3917309039943938, "grad_norm": 0.8128021955490112, "learning_rate": 0.0002671370485529777, "loss": 0.1103, "step": 2795 }, { "epoch": 0.3918710581639804, "grad_norm": 1.0918937921524048, "learning_rate": 0.0002671226979191581, "loss": 0.2245, "step": 2796 }, { "epoch": 0.39201121233356695, "grad_norm": 0.741801381111145, "learning_rate": 0.0002671083472853384, "loss": 0.2202, "step": 2797 }, { "epoch": 0.39215136650315346, "grad_norm": 0.4222448170185089, "learning_rate": 0.00026709399665151873, "loss": 0.0374, "step": 2798 }, { "epoch": 0.39229152067274004, "grad_norm": 0.5807374119758606, "learning_rate": 0.0002670796460176991, "loss": 0.1535, "step": 2799 }, { "epoch": 0.39243167484232655, "grad_norm": 1.4028120040893555, "learning_rate": 0.00026706529538387944, "loss": 0.1488, "step": 2800 }, { "epoch": 0.3925718290119131, "grad_norm": 0.41136837005615234, "learning_rate": 0.00026705094475005977, "loss": 0.1106, "step": 2801 }, { "epoch": 0.39271198318149964, "grad_norm": 0.35249119997024536, "learning_rate": 0.0002670365941162401, "loss": 0.0743, "step": 2802 }, { "epoch": 0.3928521373510862, "grad_norm": 0.5796114206314087, "learning_rate": 0.00026702224348242043, "loss": 0.15, "step": 2803 }, { "epoch": 0.3929922915206727, "grad_norm": 0.28602728247642517, "learning_rate": 0.00026700789284860076, "loss": 0.0789, "step": 2804 }, { "epoch": 0.3931324456902593, "grad_norm": 0.39246585965156555, "learning_rate": 0.00026699354221478114, "loss": 0.0915, "step": 2805 }, { "epoch": 0.3932725998598458, "grad_norm": 0.7621869444847107, "learning_rate": 0.00026697919158096147, "loss": 0.1138, "step": 2806 }, { "epoch": 0.3934127540294324, "grad_norm": 0.52781081199646, "learning_rate": 0.0002669648409471418, "loss": 0.1115, "step": 2807 }, { "epoch": 0.39355290819901895, "grad_norm": 0.5050719976425171, "learning_rate": 0.0002669504903133222, "loss": 0.1652, "step": 2808 }, { "epoch": 0.39369306236860546, "grad_norm": 0.8677018880844116, "learning_rate": 0.0002669361396795025, "loss": 0.2224, "step": 2809 }, { "epoch": 0.39383321653819203, "grad_norm": 0.6989110708236694, "learning_rate": 0.00026692178904568284, "loss": 0.1402, "step": 2810 }, { "epoch": 0.39397337070777855, "grad_norm": 0.4217401146888733, "learning_rate": 0.00026690743841186317, "loss": 0.0703, "step": 2811 }, { "epoch": 0.3941135248773651, "grad_norm": 0.6364090442657471, "learning_rate": 0.0002668930877780435, "loss": 0.1797, "step": 2812 }, { "epoch": 0.39425367904695163, "grad_norm": 0.7878068685531616, "learning_rate": 0.0002668787371442238, "loss": 0.1714, "step": 2813 }, { "epoch": 0.3943938332165382, "grad_norm": 0.5863538384437561, "learning_rate": 0.00026686438651040415, "loss": 0.1672, "step": 2814 }, { "epoch": 0.3945339873861247, "grad_norm": 0.7067665457725525, "learning_rate": 0.00026685003587658453, "loss": 0.1089, "step": 2815 }, { "epoch": 0.3946741415557113, "grad_norm": 0.903645932674408, "learning_rate": 0.00026683568524276486, "loss": 0.1005, "step": 2816 }, { "epoch": 0.3948142957252978, "grad_norm": 0.6870549917221069, "learning_rate": 0.0002668213346089452, "loss": 0.1138, "step": 2817 }, { "epoch": 0.3949544498948844, "grad_norm": 0.5287951827049255, "learning_rate": 0.0002668069839751256, "loss": 0.1159, "step": 2818 }, { "epoch": 0.39509460406447094, "grad_norm": 0.5331532955169678, "learning_rate": 0.0002667926333413059, "loss": 0.0755, "step": 2819 }, { "epoch": 0.39523475823405746, "grad_norm": 0.5666272044181824, "learning_rate": 0.00026677828270748623, "loss": 0.1031, "step": 2820 }, { "epoch": 0.39537491240364403, "grad_norm": 0.36146727204322815, "learning_rate": 0.00026676393207366656, "loss": 0.1319, "step": 2821 }, { "epoch": 0.39551506657323054, "grad_norm": 0.2870951294898987, "learning_rate": 0.0002667495814398469, "loss": 0.0896, "step": 2822 }, { "epoch": 0.3956552207428171, "grad_norm": 0.274265855550766, "learning_rate": 0.0002667352308060272, "loss": 0.0828, "step": 2823 }, { "epoch": 0.39579537491240363, "grad_norm": 0.33852601051330566, "learning_rate": 0.0002667208801722076, "loss": 0.0799, "step": 2824 }, { "epoch": 0.3959355290819902, "grad_norm": 0.6449515223503113, "learning_rate": 0.00026670652953838793, "loss": 0.1358, "step": 2825 }, { "epoch": 0.3960756832515767, "grad_norm": 0.3763137459754944, "learning_rate": 0.00026669217890456826, "loss": 0.0643, "step": 2826 }, { "epoch": 0.3962158374211633, "grad_norm": 0.41076070070266724, "learning_rate": 0.00026667782827074864, "loss": 0.063, "step": 2827 }, { "epoch": 0.3963559915907498, "grad_norm": 0.5818364024162292, "learning_rate": 0.00026666347763692897, "loss": 0.124, "step": 2828 }, { "epoch": 0.39649614576033637, "grad_norm": 0.6228339672088623, "learning_rate": 0.0002666491270031093, "loss": 0.1254, "step": 2829 }, { "epoch": 0.39663629992992294, "grad_norm": 0.47882217168807983, "learning_rate": 0.0002666347763692896, "loss": 0.0891, "step": 2830 }, { "epoch": 0.39677645409950946, "grad_norm": 0.35936740040779114, "learning_rate": 0.00026662042573546995, "loss": 0.1275, "step": 2831 }, { "epoch": 0.396916608269096, "grad_norm": 0.5200803279876709, "learning_rate": 0.0002666060751016503, "loss": 0.1202, "step": 2832 }, { "epoch": 0.39705676243868254, "grad_norm": 0.3239125907421112, "learning_rate": 0.0002665917244678306, "loss": 0.0439, "step": 2833 }, { "epoch": 0.3971969166082691, "grad_norm": 0.44537585973739624, "learning_rate": 0.000266577373834011, "loss": 0.1007, "step": 2834 }, { "epoch": 0.3973370707778556, "grad_norm": 0.3647289276123047, "learning_rate": 0.0002665630232001913, "loss": 0.0237, "step": 2835 }, { "epoch": 0.3974772249474422, "grad_norm": 0.46190890669822693, "learning_rate": 0.00026654867256637165, "loss": 0.1221, "step": 2836 }, { "epoch": 0.3976173791170287, "grad_norm": 0.4426904618740082, "learning_rate": 0.00026653432193255203, "loss": 0.14, "step": 2837 }, { "epoch": 0.3977575332866153, "grad_norm": 1.7598018646240234, "learning_rate": 0.00026651997129873236, "loss": 0.1223, "step": 2838 }, { "epoch": 0.3978976874562018, "grad_norm": 1.075234055519104, "learning_rate": 0.0002665056206649127, "loss": 0.1454, "step": 2839 }, { "epoch": 0.39803784162578837, "grad_norm": 1.100995421409607, "learning_rate": 0.000266491270031093, "loss": 0.1231, "step": 2840 }, { "epoch": 0.39817799579537494, "grad_norm": 0.5203006267547607, "learning_rate": 0.00026647691939727335, "loss": 0.1033, "step": 2841 }, { "epoch": 0.39831814996496145, "grad_norm": 0.49494871497154236, "learning_rate": 0.0002664625687634537, "loss": 0.1666, "step": 2842 }, { "epoch": 0.398458304134548, "grad_norm": 0.6560659408569336, "learning_rate": 0.00026644821812963406, "loss": 0.1705, "step": 2843 }, { "epoch": 0.39859845830413454, "grad_norm": 0.702608585357666, "learning_rate": 0.0002664338674958144, "loss": 0.1548, "step": 2844 }, { "epoch": 0.3987386124737211, "grad_norm": 1.7253023386001587, "learning_rate": 0.0002664195168619947, "loss": 0.1621, "step": 2845 }, { "epoch": 0.3988787666433076, "grad_norm": 0.5822128057479858, "learning_rate": 0.00026640516622817505, "loss": 0.1396, "step": 2846 }, { "epoch": 0.3990189208128942, "grad_norm": 1.302255392074585, "learning_rate": 0.0002663908155943554, "loss": 0.2037, "step": 2847 }, { "epoch": 0.3991590749824807, "grad_norm": 0.3844779133796692, "learning_rate": 0.0002663764649605357, "loss": 0.0452, "step": 2848 }, { "epoch": 0.3992992291520673, "grad_norm": 1.5568729639053345, "learning_rate": 0.0002663621143267161, "loss": 0.2632, "step": 2849 }, { "epoch": 0.3994393833216538, "grad_norm": 1.6648600101470947, "learning_rate": 0.0002663477636928964, "loss": 0.2718, "step": 2850 }, { "epoch": 0.39957953749124037, "grad_norm": 0.37880682945251465, "learning_rate": 0.00026633341305907674, "loss": 0.1197, "step": 2851 }, { "epoch": 0.39971969166082694, "grad_norm": 0.6926692724227905, "learning_rate": 0.00026631906242525707, "loss": 0.1162, "step": 2852 }, { "epoch": 0.39985984583041345, "grad_norm": 0.34722068905830383, "learning_rate": 0.00026630471179143745, "loss": 0.1213, "step": 2853 }, { "epoch": 0.4, "grad_norm": 0.29237687587738037, "learning_rate": 0.0002662903611576178, "loss": 0.043, "step": 2854 }, { "epoch": 0.40014015416958654, "grad_norm": 0.3461633622646332, "learning_rate": 0.0002662760105237981, "loss": 0.1524, "step": 2855 }, { "epoch": 0.4002803083391731, "grad_norm": 0.3609064519405365, "learning_rate": 0.00026626165988997844, "loss": 0.1184, "step": 2856 }, { "epoch": 0.4004204625087596, "grad_norm": 0.3734062910079956, "learning_rate": 0.00026624730925615877, "loss": 0.1052, "step": 2857 }, { "epoch": 0.4005606166783462, "grad_norm": 0.1862742304801941, "learning_rate": 0.0002662329586223391, "loss": 0.053, "step": 2858 }, { "epoch": 0.4007007708479327, "grad_norm": 0.3383943736553192, "learning_rate": 0.0002662186079885195, "loss": 0.1108, "step": 2859 }, { "epoch": 0.4008409250175193, "grad_norm": 0.6033865213394165, "learning_rate": 0.0002662042573546998, "loss": 0.12, "step": 2860 }, { "epoch": 0.4009810791871058, "grad_norm": 0.3873053193092346, "learning_rate": 0.00026618990672088014, "loss": 0.1069, "step": 2861 }, { "epoch": 0.40112123335669236, "grad_norm": 0.491430401802063, "learning_rate": 0.0002661755560870605, "loss": 0.1415, "step": 2862 }, { "epoch": 0.40126138752627893, "grad_norm": 0.44992104172706604, "learning_rate": 0.00026616120545324085, "loss": 0.0965, "step": 2863 }, { "epoch": 0.40140154169586545, "grad_norm": 0.5101977586746216, "learning_rate": 0.0002661468548194212, "loss": 0.1512, "step": 2864 }, { "epoch": 0.401541695865452, "grad_norm": 0.2565525472164154, "learning_rate": 0.0002661325041856015, "loss": 0.0809, "step": 2865 }, { "epoch": 0.40168185003503853, "grad_norm": 0.4568197429180145, "learning_rate": 0.00026611815355178183, "loss": 0.1424, "step": 2866 }, { "epoch": 0.4018220042046251, "grad_norm": 0.3124846816062927, "learning_rate": 0.00026610380291796216, "loss": 0.0816, "step": 2867 }, { "epoch": 0.4019621583742116, "grad_norm": 0.4760637581348419, "learning_rate": 0.0002660894522841425, "loss": 0.0587, "step": 2868 }, { "epoch": 0.4021023125437982, "grad_norm": 0.40039730072021484, "learning_rate": 0.0002660751016503229, "loss": 0.1076, "step": 2869 }, { "epoch": 0.4022424667133847, "grad_norm": 0.37872010469436646, "learning_rate": 0.0002660607510165032, "loss": 0.0934, "step": 2870 }, { "epoch": 0.4023826208829713, "grad_norm": 0.657885730266571, "learning_rate": 0.00026604640038268353, "loss": 0.1092, "step": 2871 }, { "epoch": 0.4025227750525578, "grad_norm": 0.3977409601211548, "learning_rate": 0.0002660320497488639, "loss": 0.1252, "step": 2872 }, { "epoch": 0.40266292922214436, "grad_norm": 0.4142765700817108, "learning_rate": 0.00026601769911504424, "loss": 0.1236, "step": 2873 }, { "epoch": 0.40280308339173093, "grad_norm": 0.4369869828224182, "learning_rate": 0.00026600334848122457, "loss": 0.0793, "step": 2874 }, { "epoch": 0.40294323756131745, "grad_norm": 0.6777017712593079, "learning_rate": 0.0002659889978474049, "loss": 0.1849, "step": 2875 }, { "epoch": 0.403083391730904, "grad_norm": 0.5213425755500793, "learning_rate": 0.00026597464721358523, "loss": 0.0817, "step": 2876 }, { "epoch": 0.40322354590049053, "grad_norm": 0.2951483130455017, "learning_rate": 0.00026596029657976556, "loss": 0.0941, "step": 2877 }, { "epoch": 0.4033637000700771, "grad_norm": 0.41839635372161865, "learning_rate": 0.00026594594594594594, "loss": 0.1444, "step": 2878 }, { "epoch": 0.4035038542396636, "grad_norm": 0.7733237743377686, "learning_rate": 0.00026593159531212627, "loss": 0.1435, "step": 2879 }, { "epoch": 0.4036440084092502, "grad_norm": 0.43835964798927307, "learning_rate": 0.0002659172446783066, "loss": 0.0766, "step": 2880 }, { "epoch": 0.4037841625788367, "grad_norm": 0.6225267648696899, "learning_rate": 0.0002659028940444869, "loss": 0.1538, "step": 2881 }, { "epoch": 0.4039243167484233, "grad_norm": 0.5458643436431885, "learning_rate": 0.0002658885434106673, "loss": 0.1374, "step": 2882 }, { "epoch": 0.4040644709180098, "grad_norm": 0.39032381772994995, "learning_rate": 0.00026587419277684764, "loss": 0.0909, "step": 2883 }, { "epoch": 0.40420462508759636, "grad_norm": 0.14219030737876892, "learning_rate": 0.00026585984214302796, "loss": 0.045, "step": 2884 }, { "epoch": 0.4043447792571829, "grad_norm": 0.5623557567596436, "learning_rate": 0.0002658454915092083, "loss": 0.1137, "step": 2885 }, { "epoch": 0.40448493342676944, "grad_norm": 0.05237596482038498, "learning_rate": 0.0002658311408753886, "loss": 0.0092, "step": 2886 }, { "epoch": 0.404625087596356, "grad_norm": 0.5156688690185547, "learning_rate": 0.00026581679024156895, "loss": 0.1508, "step": 2887 }, { "epoch": 0.40476524176594253, "grad_norm": 0.4336191713809967, "learning_rate": 0.00026580243960774933, "loss": 0.1281, "step": 2888 }, { "epoch": 0.4049053959355291, "grad_norm": 0.5453097224235535, "learning_rate": 0.00026578808897392966, "loss": 0.0864, "step": 2889 }, { "epoch": 0.4050455501051156, "grad_norm": 0.49457165598869324, "learning_rate": 0.00026577373834011, "loss": 0.0727, "step": 2890 }, { "epoch": 0.4051857042747022, "grad_norm": 0.42322638630867004, "learning_rate": 0.00026575938770629037, "loss": 0.0763, "step": 2891 }, { "epoch": 0.4053258584442887, "grad_norm": 0.2601456344127655, "learning_rate": 0.0002657450370724707, "loss": 0.049, "step": 2892 }, { "epoch": 0.40546601261387527, "grad_norm": 0.38877516984939575, "learning_rate": 0.00026573068643865103, "loss": 0.0971, "step": 2893 }, { "epoch": 0.4056061667834618, "grad_norm": 0.5565824508666992, "learning_rate": 0.00026571633580483136, "loss": 0.072, "step": 2894 }, { "epoch": 0.40574632095304836, "grad_norm": 0.3686055541038513, "learning_rate": 0.0002657019851710117, "loss": 0.1028, "step": 2895 }, { "epoch": 0.40588647512263487, "grad_norm": 0.8749672770500183, "learning_rate": 0.000265687634537192, "loss": 0.1768, "step": 2896 }, { "epoch": 0.40602662929222144, "grad_norm": 3.2944326400756836, "learning_rate": 0.0002656732839033724, "loss": 0.1289, "step": 2897 }, { "epoch": 0.406166783461808, "grad_norm": 2.763009548187256, "learning_rate": 0.0002656589332695527, "loss": 0.0903, "step": 2898 }, { "epoch": 0.4063069376313945, "grad_norm": 0.6687091588973999, "learning_rate": 0.00026564458263573306, "loss": 0.0438, "step": 2899 }, { "epoch": 0.4064470918009811, "grad_norm": 1.7816590070724487, "learning_rate": 0.0002656302320019134, "loss": 0.2906, "step": 2900 }, { "epoch": 0.4065872459705676, "grad_norm": 0.31545910239219666, "learning_rate": 0.00026561588136809377, "loss": 0.0616, "step": 2901 }, { "epoch": 0.4067274001401542, "grad_norm": 0.8465245366096497, "learning_rate": 0.0002656015307342741, "loss": 0.0696, "step": 2902 }, { "epoch": 0.4068675543097407, "grad_norm": 0.5099138021469116, "learning_rate": 0.0002655871801004544, "loss": 0.1222, "step": 2903 }, { "epoch": 0.40700770847932727, "grad_norm": 0.30457115173339844, "learning_rate": 0.00026557282946663475, "loss": 0.0744, "step": 2904 }, { "epoch": 0.4071478626489138, "grad_norm": 0.37230920791625977, "learning_rate": 0.0002655584788328151, "loss": 0.0819, "step": 2905 }, { "epoch": 0.40728801681850035, "grad_norm": 0.3571549654006958, "learning_rate": 0.0002655441281989954, "loss": 0.1272, "step": 2906 }, { "epoch": 0.40742817098808687, "grad_norm": 0.7391756772994995, "learning_rate": 0.0002655297775651758, "loss": 0.1658, "step": 2907 }, { "epoch": 0.40756832515767344, "grad_norm": 0.27977412939071655, "learning_rate": 0.0002655154269313561, "loss": 0.1013, "step": 2908 }, { "epoch": 0.40770847932726, "grad_norm": 0.5675486326217651, "learning_rate": 0.00026550107629753645, "loss": 0.093, "step": 2909 }, { "epoch": 0.4078486334968465, "grad_norm": 0.4636172950267792, "learning_rate": 0.0002654867256637168, "loss": 0.1128, "step": 2910 }, { "epoch": 0.4079887876664331, "grad_norm": 0.4461061954498291, "learning_rate": 0.0002654723750298971, "loss": 0.0874, "step": 2911 }, { "epoch": 0.4081289418360196, "grad_norm": 0.8013656139373779, "learning_rate": 0.0002654580243960775, "loss": 0.2077, "step": 2912 }, { "epoch": 0.4082690960056062, "grad_norm": 0.41661831736564636, "learning_rate": 0.0002654436737622578, "loss": 0.1508, "step": 2913 }, { "epoch": 0.4084092501751927, "grad_norm": 0.6267321109771729, "learning_rate": 0.00026542932312843815, "loss": 0.1069, "step": 2914 }, { "epoch": 0.40854940434477927, "grad_norm": 0.41701653599739075, "learning_rate": 0.0002654149724946185, "loss": 0.0746, "step": 2915 }, { "epoch": 0.4086895585143658, "grad_norm": 0.5108296871185303, "learning_rate": 0.0002654006218607988, "loss": 0.1085, "step": 2916 }, { "epoch": 0.40882971268395235, "grad_norm": 0.45943978428840637, "learning_rate": 0.0002653862712269792, "loss": 0.075, "step": 2917 }, { "epoch": 0.40896986685353887, "grad_norm": 0.1671084612607956, "learning_rate": 0.0002653719205931595, "loss": 0.0355, "step": 2918 }, { "epoch": 0.40911002102312544, "grad_norm": 0.684384822845459, "learning_rate": 0.00026535756995933984, "loss": 0.0888, "step": 2919 }, { "epoch": 0.409250175192712, "grad_norm": 0.6261886358261108, "learning_rate": 0.00026534321932552017, "loss": 0.1541, "step": 2920 }, { "epoch": 0.4093903293622985, "grad_norm": 0.816017746925354, "learning_rate": 0.0002653288686917005, "loss": 0.1254, "step": 2921 }, { "epoch": 0.4095304835318851, "grad_norm": 0.5166419148445129, "learning_rate": 0.00026531451805788083, "loss": 0.1393, "step": 2922 }, { "epoch": 0.4096706377014716, "grad_norm": 0.7215385437011719, "learning_rate": 0.0002653001674240612, "loss": 0.2085, "step": 2923 }, { "epoch": 0.4098107918710582, "grad_norm": 0.4829675853252411, "learning_rate": 0.00026528581679024154, "loss": 0.1129, "step": 2924 }, { "epoch": 0.4099509460406447, "grad_norm": 0.4476076364517212, "learning_rate": 0.00026527146615642187, "loss": 0.0855, "step": 2925 }, { "epoch": 0.41009110021023126, "grad_norm": 0.42605283856391907, "learning_rate": 0.00026525711552260225, "loss": 0.0879, "step": 2926 }, { "epoch": 0.4102312543798178, "grad_norm": 0.5707522034645081, "learning_rate": 0.0002652427648887826, "loss": 0.1281, "step": 2927 }, { "epoch": 0.41037140854940435, "grad_norm": 0.31168338656425476, "learning_rate": 0.0002652284142549629, "loss": 0.0618, "step": 2928 }, { "epoch": 0.41051156271899086, "grad_norm": 0.7115208506584167, "learning_rate": 0.00026521406362114324, "loss": 0.1368, "step": 2929 }, { "epoch": 0.41065171688857743, "grad_norm": 0.38857266306877136, "learning_rate": 0.00026519971298732357, "loss": 0.0938, "step": 2930 }, { "epoch": 0.410791871058164, "grad_norm": 0.9774143695831299, "learning_rate": 0.0002651853623535039, "loss": 0.1251, "step": 2931 }, { "epoch": 0.4109320252277505, "grad_norm": 0.4362261891365051, "learning_rate": 0.0002651710117196843, "loss": 0.0557, "step": 2932 }, { "epoch": 0.4110721793973371, "grad_norm": 0.33460697531700134, "learning_rate": 0.0002651566610858646, "loss": 0.0686, "step": 2933 }, { "epoch": 0.4112123335669236, "grad_norm": 0.2996613681316376, "learning_rate": 0.00026514231045204493, "loss": 0.0786, "step": 2934 }, { "epoch": 0.4113524877365102, "grad_norm": 0.5538722276687622, "learning_rate": 0.00026512795981822526, "loss": 0.0586, "step": 2935 }, { "epoch": 0.4114926419060967, "grad_norm": 0.9747188091278076, "learning_rate": 0.00026511360918440565, "loss": 0.2031, "step": 2936 }, { "epoch": 0.41163279607568326, "grad_norm": 0.3425869047641754, "learning_rate": 0.000265099258550586, "loss": 0.0454, "step": 2937 }, { "epoch": 0.4117729502452698, "grad_norm": 0.820833683013916, "learning_rate": 0.0002650849079167663, "loss": 0.0933, "step": 2938 }, { "epoch": 0.41191310441485635, "grad_norm": 0.562213659286499, "learning_rate": 0.00026507055728294663, "loss": 0.1649, "step": 2939 }, { "epoch": 0.41205325858444286, "grad_norm": 0.28278765082359314, "learning_rate": 0.00026505620664912696, "loss": 0.0369, "step": 2940 }, { "epoch": 0.41219341275402943, "grad_norm": 0.8125547766685486, "learning_rate": 0.0002650418560153073, "loss": 0.1452, "step": 2941 }, { "epoch": 0.412333566923616, "grad_norm": 0.45113614201545715, "learning_rate": 0.00026502750538148767, "loss": 0.1266, "step": 2942 }, { "epoch": 0.4124737210932025, "grad_norm": 0.933447539806366, "learning_rate": 0.000265013154747668, "loss": 0.0754, "step": 2943 }, { "epoch": 0.4126138752627891, "grad_norm": 0.5742002725601196, "learning_rate": 0.00026499880411384833, "loss": 0.0769, "step": 2944 }, { "epoch": 0.4127540294323756, "grad_norm": 0.9658991098403931, "learning_rate": 0.0002649844534800287, "loss": 0.0815, "step": 2945 }, { "epoch": 0.4128941836019622, "grad_norm": 1.565278172492981, "learning_rate": 0.00026497010284620904, "loss": 0.2244, "step": 2946 }, { "epoch": 0.4130343377715487, "grad_norm": 0.43459033966064453, "learning_rate": 0.00026495575221238937, "loss": 0.0706, "step": 2947 }, { "epoch": 0.41317449194113526, "grad_norm": 0.7879146337509155, "learning_rate": 0.0002649414015785697, "loss": 0.1087, "step": 2948 }, { "epoch": 0.4133146461107218, "grad_norm": 1.9656546115875244, "learning_rate": 0.00026492705094475, "loss": 0.2337, "step": 2949 }, { "epoch": 0.41345480028030834, "grad_norm": 4.229058742523193, "learning_rate": 0.00026491270031093035, "loss": 0.2678, "step": 2950 }, { "epoch": 0.41359495444989486, "grad_norm": 0.1756371259689331, "learning_rate": 0.0002648983496771107, "loss": 0.0332, "step": 2951 }, { "epoch": 0.41373510861948143, "grad_norm": 0.4779178202152252, "learning_rate": 0.00026488399904329107, "loss": 0.1091, "step": 2952 }, { "epoch": 0.413875262789068, "grad_norm": 0.254630446434021, "learning_rate": 0.0002648696484094714, "loss": 0.1228, "step": 2953 }, { "epoch": 0.4140154169586545, "grad_norm": 0.7055188417434692, "learning_rate": 0.0002648552977756517, "loss": 0.1322, "step": 2954 }, { "epoch": 0.4141555711282411, "grad_norm": 0.39890941977500916, "learning_rate": 0.0002648409471418321, "loss": 0.0892, "step": 2955 }, { "epoch": 0.4142957252978276, "grad_norm": 0.37668585777282715, "learning_rate": 0.00026482659650801243, "loss": 0.1193, "step": 2956 }, { "epoch": 0.41443587946741417, "grad_norm": 0.38553062081336975, "learning_rate": 0.00026481224587419276, "loss": 0.1208, "step": 2957 }, { "epoch": 0.4145760336370007, "grad_norm": 0.4968641400337219, "learning_rate": 0.0002647978952403731, "loss": 0.1963, "step": 2958 }, { "epoch": 0.41471618780658726, "grad_norm": 0.25200724601745605, "learning_rate": 0.0002647835446065534, "loss": 0.0918, "step": 2959 }, { "epoch": 0.41485634197617377, "grad_norm": 0.49894216656684875, "learning_rate": 0.00026476919397273375, "loss": 0.1804, "step": 2960 }, { "epoch": 0.41499649614576034, "grad_norm": 0.3575514554977417, "learning_rate": 0.00026475484333891413, "loss": 0.1462, "step": 2961 }, { "epoch": 0.41513665031534686, "grad_norm": 0.27937018871307373, "learning_rate": 0.00026474049270509446, "loss": 0.1234, "step": 2962 }, { "epoch": 0.4152768044849334, "grad_norm": 0.20667894184589386, "learning_rate": 0.0002647261420712748, "loss": 0.0405, "step": 2963 }, { "epoch": 0.41541695865452, "grad_norm": 0.39918091893196106, "learning_rate": 0.00026471179143745517, "loss": 0.1539, "step": 2964 }, { "epoch": 0.4155571128241065, "grad_norm": 0.4819334149360657, "learning_rate": 0.0002646974408036355, "loss": 0.0692, "step": 2965 }, { "epoch": 0.4156972669936931, "grad_norm": 0.32738980650901794, "learning_rate": 0.00026468309016981583, "loss": 0.1191, "step": 2966 }, { "epoch": 0.4158374211632796, "grad_norm": 0.353799432516098, "learning_rate": 0.00026466873953599616, "loss": 0.1041, "step": 2967 }, { "epoch": 0.41597757533286617, "grad_norm": 0.32365113496780396, "learning_rate": 0.0002646543889021765, "loss": 0.1183, "step": 2968 }, { "epoch": 0.4161177295024527, "grad_norm": 0.3455530107021332, "learning_rate": 0.0002646400382683568, "loss": 0.0691, "step": 2969 }, { "epoch": 0.41625788367203925, "grad_norm": 0.28271186351776123, "learning_rate": 0.00026462568763453714, "loss": 0.0503, "step": 2970 }, { "epoch": 0.41639803784162577, "grad_norm": 0.26132267713546753, "learning_rate": 0.0002646113370007175, "loss": 0.1201, "step": 2971 }, { "epoch": 0.41653819201121234, "grad_norm": 0.2868199050426483, "learning_rate": 0.00026459698636689785, "loss": 0.129, "step": 2972 }, { "epoch": 0.41667834618079885, "grad_norm": 0.41328686475753784, "learning_rate": 0.0002645826357330782, "loss": 0.0912, "step": 2973 }, { "epoch": 0.4168185003503854, "grad_norm": 0.42311835289001465, "learning_rate": 0.0002645682850992585, "loss": 0.0916, "step": 2974 }, { "epoch": 0.416958654519972, "grad_norm": 0.23855365812778473, "learning_rate": 0.00026455393446543884, "loss": 0.0711, "step": 2975 }, { "epoch": 0.4170988086895585, "grad_norm": 0.6829186081886292, "learning_rate": 0.0002645395838316192, "loss": 0.1439, "step": 2976 }, { "epoch": 0.4172389628591451, "grad_norm": 0.35768362879753113, "learning_rate": 0.00026452523319779955, "loss": 0.1002, "step": 2977 }, { "epoch": 0.4173791170287316, "grad_norm": 0.9309812188148499, "learning_rate": 0.0002645108825639799, "loss": 0.1423, "step": 2978 }, { "epoch": 0.41751927119831816, "grad_norm": 0.3020429313182831, "learning_rate": 0.0002644965319301602, "loss": 0.0776, "step": 2979 }, { "epoch": 0.4176594253679047, "grad_norm": 0.8399023413658142, "learning_rate": 0.0002644821812963406, "loss": 0.1276, "step": 2980 }, { "epoch": 0.41779957953749125, "grad_norm": 0.41155946254730225, "learning_rate": 0.0002644678306625209, "loss": 0.0882, "step": 2981 }, { "epoch": 0.41793973370707777, "grad_norm": 1.7410348653793335, "learning_rate": 0.00026445348002870125, "loss": 0.0627, "step": 2982 }, { "epoch": 0.41807988787666434, "grad_norm": 0.4673553705215454, "learning_rate": 0.0002644391293948816, "loss": 0.1099, "step": 2983 }, { "epoch": 0.41822004204625085, "grad_norm": 0.652702271938324, "learning_rate": 0.0002644247787610619, "loss": 0.1006, "step": 2984 }, { "epoch": 0.4183601962158374, "grad_norm": 0.5195550918579102, "learning_rate": 0.00026441042812724223, "loss": 0.1085, "step": 2985 }, { "epoch": 0.418500350385424, "grad_norm": 0.535480260848999, "learning_rate": 0.00026439607749342256, "loss": 0.06, "step": 2986 }, { "epoch": 0.4186405045550105, "grad_norm": 0.47558096051216125, "learning_rate": 0.00026438172685960294, "loss": 0.0361, "step": 2987 }, { "epoch": 0.4187806587245971, "grad_norm": 0.34890055656433105, "learning_rate": 0.0002643673762257833, "loss": 0.0718, "step": 2988 }, { "epoch": 0.4189208128941836, "grad_norm": 0.4973667860031128, "learning_rate": 0.0002643530255919636, "loss": 0.1232, "step": 2989 }, { "epoch": 0.41906096706377016, "grad_norm": 0.36426812410354614, "learning_rate": 0.000264338674958144, "loss": 0.0783, "step": 2990 }, { "epoch": 0.4192011212333567, "grad_norm": 0.41723981499671936, "learning_rate": 0.0002643243243243243, "loss": 0.0778, "step": 2991 }, { "epoch": 0.41934127540294325, "grad_norm": 0.642321765422821, "learning_rate": 0.00026430997369050464, "loss": 0.1686, "step": 2992 }, { "epoch": 0.41948142957252976, "grad_norm": 0.562143862247467, "learning_rate": 0.00026429562305668497, "loss": 0.0699, "step": 2993 }, { "epoch": 0.41962158374211633, "grad_norm": 0.5614160895347595, "learning_rate": 0.0002642812724228653, "loss": 0.0604, "step": 2994 }, { "epoch": 0.41976173791170285, "grad_norm": 0.9486021399497986, "learning_rate": 0.00026426692178904563, "loss": 0.2225, "step": 2995 }, { "epoch": 0.4199018920812894, "grad_norm": 0.42924001812934875, "learning_rate": 0.000264252571155226, "loss": 0.0718, "step": 2996 }, { "epoch": 0.420042046250876, "grad_norm": 0.39702117443084717, "learning_rate": 0.00026423822052140634, "loss": 0.0527, "step": 2997 }, { "epoch": 0.4201822004204625, "grad_norm": 0.39967837929725647, "learning_rate": 0.00026422386988758667, "loss": 0.1176, "step": 2998 }, { "epoch": 0.4203223545900491, "grad_norm": 0.949603259563446, "learning_rate": 0.00026420951925376705, "loss": 0.2238, "step": 2999 }, { "epoch": 0.4204625087596356, "grad_norm": 0.5017886161804199, "learning_rate": 0.0002641951686199474, "loss": 0.0277, "step": 3000 }, { "epoch": 0.42060266292922216, "grad_norm": 0.7275369763374329, "learning_rate": 0.0002641808179861277, "loss": 0.1272, "step": 3001 }, { "epoch": 0.4207428170988087, "grad_norm": 0.4509636461734772, "learning_rate": 0.00026416646735230804, "loss": 0.1376, "step": 3002 }, { "epoch": 0.42088297126839525, "grad_norm": 0.5083564519882202, "learning_rate": 0.00026415211671848836, "loss": 0.1072, "step": 3003 }, { "epoch": 0.42102312543798176, "grad_norm": 0.5507363080978394, "learning_rate": 0.0002641377660846687, "loss": 0.1756, "step": 3004 }, { "epoch": 0.42116327960756833, "grad_norm": 0.6499862670898438, "learning_rate": 0.000264123415450849, "loss": 0.0708, "step": 3005 }, { "epoch": 0.42130343377715485, "grad_norm": 0.5804831981658936, "learning_rate": 0.0002641090648170294, "loss": 0.0764, "step": 3006 }, { "epoch": 0.4214435879467414, "grad_norm": 0.3736114501953125, "learning_rate": 0.00026409471418320973, "loss": 0.1008, "step": 3007 }, { "epoch": 0.421583742116328, "grad_norm": 0.35122519731521606, "learning_rate": 0.00026408036354939006, "loss": 0.1304, "step": 3008 }, { "epoch": 0.4217238962859145, "grad_norm": 0.5111154913902283, "learning_rate": 0.00026406601291557044, "loss": 0.1603, "step": 3009 }, { "epoch": 0.42186405045550107, "grad_norm": 0.638640284538269, "learning_rate": 0.00026405166228175077, "loss": 0.1403, "step": 3010 }, { "epoch": 0.4220042046250876, "grad_norm": 0.3681301772594452, "learning_rate": 0.0002640373116479311, "loss": 0.0563, "step": 3011 }, { "epoch": 0.42214435879467416, "grad_norm": 0.17159447073936462, "learning_rate": 0.00026402296101411143, "loss": 0.0441, "step": 3012 }, { "epoch": 0.42228451296426067, "grad_norm": 0.12306694686412811, "learning_rate": 0.00026400861038029176, "loss": 0.0191, "step": 3013 }, { "epoch": 0.42242466713384724, "grad_norm": 0.36573100090026855, "learning_rate": 0.0002639942597464721, "loss": 0.0903, "step": 3014 }, { "epoch": 0.42256482130343376, "grad_norm": 0.5017110109329224, "learning_rate": 0.00026397990911265247, "loss": 0.159, "step": 3015 }, { "epoch": 0.42270497547302033, "grad_norm": 0.4422838091850281, "learning_rate": 0.0002639655584788328, "loss": 0.0761, "step": 3016 }, { "epoch": 0.42284512964260684, "grad_norm": 0.34144967794418335, "learning_rate": 0.0002639512078450131, "loss": 0.1163, "step": 3017 }, { "epoch": 0.4229852838121934, "grad_norm": 0.5035829544067383, "learning_rate": 0.0002639368572111935, "loss": 0.2362, "step": 3018 }, { "epoch": 0.42312543798178, "grad_norm": 0.5085100531578064, "learning_rate": 0.00026392250657737384, "loss": 0.1074, "step": 3019 }, { "epoch": 0.4232655921513665, "grad_norm": 0.6563408374786377, "learning_rate": 0.00026390815594355417, "loss": 0.1482, "step": 3020 }, { "epoch": 0.42340574632095307, "grad_norm": 0.4050714373588562, "learning_rate": 0.0002638938053097345, "loss": 0.1033, "step": 3021 }, { "epoch": 0.4235459004905396, "grad_norm": 0.6079158186912537, "learning_rate": 0.0002638794546759148, "loss": 0.1027, "step": 3022 }, { "epoch": 0.42368605466012615, "grad_norm": 0.36508792638778687, "learning_rate": 0.00026386510404209515, "loss": 0.0756, "step": 3023 }, { "epoch": 0.42382620882971267, "grad_norm": 0.5695202946662903, "learning_rate": 0.0002638507534082755, "loss": 0.0869, "step": 3024 }, { "epoch": 0.42396636299929924, "grad_norm": 0.2257179319858551, "learning_rate": 0.00026383640277445586, "loss": 0.053, "step": 3025 }, { "epoch": 0.42410651716888575, "grad_norm": 0.3668532073497772, "learning_rate": 0.0002638220521406362, "loss": 0.0697, "step": 3026 }, { "epoch": 0.4242466713384723, "grad_norm": 0.33470430970191956, "learning_rate": 0.0002638077015068165, "loss": 0.1208, "step": 3027 }, { "epoch": 0.42438682550805884, "grad_norm": 0.24994029104709625, "learning_rate": 0.0002637933508729969, "loss": 0.0927, "step": 3028 }, { "epoch": 0.4245269796776454, "grad_norm": 0.5565463900566101, "learning_rate": 0.00026377900023917723, "loss": 0.0777, "step": 3029 }, { "epoch": 0.424667133847232, "grad_norm": 0.4051344692707062, "learning_rate": 0.00026376464960535756, "loss": 0.1001, "step": 3030 }, { "epoch": 0.4248072880168185, "grad_norm": 0.34019601345062256, "learning_rate": 0.0002637502989715379, "loss": 0.1183, "step": 3031 }, { "epoch": 0.42494744218640507, "grad_norm": 0.42420047521591187, "learning_rate": 0.0002637359483377182, "loss": 0.0537, "step": 3032 }, { "epoch": 0.4250875963559916, "grad_norm": 0.25984880328178406, "learning_rate": 0.00026372159770389855, "loss": 0.1038, "step": 3033 }, { "epoch": 0.42522775052557815, "grad_norm": 1.339253306388855, "learning_rate": 0.00026370724707007893, "loss": 0.1437, "step": 3034 }, { "epoch": 0.42536790469516467, "grad_norm": 0.732659637928009, "learning_rate": 0.00026369289643625926, "loss": 0.127, "step": 3035 }, { "epoch": 0.42550805886475124, "grad_norm": 0.4371141493320465, "learning_rate": 0.0002636785458024396, "loss": 0.1288, "step": 3036 }, { "epoch": 0.42564821303433775, "grad_norm": 0.6234946250915527, "learning_rate": 0.0002636641951686199, "loss": 0.1709, "step": 3037 }, { "epoch": 0.4257883672039243, "grad_norm": 0.3678855001926422, "learning_rate": 0.00026364984453480024, "loss": 0.1082, "step": 3038 }, { "epoch": 0.42592852137351084, "grad_norm": 0.5632939338684082, "learning_rate": 0.0002636354939009806, "loss": 0.1043, "step": 3039 }, { "epoch": 0.4260686755430974, "grad_norm": 0.761344850063324, "learning_rate": 0.00026362114326716095, "loss": 0.2318, "step": 3040 }, { "epoch": 0.426208829712684, "grad_norm": 0.28364187479019165, "learning_rate": 0.0002636067926333413, "loss": 0.066, "step": 3041 }, { "epoch": 0.4263489838822705, "grad_norm": 0.20622359216213226, "learning_rate": 0.0002635924419995216, "loss": 0.0352, "step": 3042 }, { "epoch": 0.42648913805185706, "grad_norm": 0.8606165647506714, "learning_rate": 0.00026357809136570194, "loss": 0.202, "step": 3043 }, { "epoch": 0.4266292922214436, "grad_norm": 0.6587481498718262, "learning_rate": 0.0002635637407318823, "loss": 0.2369, "step": 3044 }, { "epoch": 0.42676944639103015, "grad_norm": 0.7817203402519226, "learning_rate": 0.00026354939009806265, "loss": 0.0755, "step": 3045 }, { "epoch": 0.42690960056061666, "grad_norm": 0.9240639209747314, "learning_rate": 0.000263535039464243, "loss": 0.0997, "step": 3046 }, { "epoch": 0.42704975473020323, "grad_norm": 0.5465213656425476, "learning_rate": 0.0002635206888304233, "loss": 0.0573, "step": 3047 }, { "epoch": 0.42718990889978975, "grad_norm": 1.686435341835022, "learning_rate": 0.00026350633819660364, "loss": 0.2592, "step": 3048 }, { "epoch": 0.4273300630693763, "grad_norm": 1.7949013710021973, "learning_rate": 0.00026349198756278397, "loss": 0.3234, "step": 3049 }, { "epoch": 0.42747021723896284, "grad_norm": 1.3989791870117188, "learning_rate": 0.00026347763692896435, "loss": 0.3474, "step": 3050 }, { "epoch": 0.4276103714085494, "grad_norm": 0.4435987174510956, "learning_rate": 0.0002634632862951447, "loss": 0.1072, "step": 3051 }, { "epoch": 0.427750525578136, "grad_norm": 0.3666754961013794, "learning_rate": 0.000263448935661325, "loss": 0.1159, "step": 3052 }, { "epoch": 0.4278906797477225, "grad_norm": 0.37432336807250977, "learning_rate": 0.0002634345850275054, "loss": 0.0645, "step": 3053 }, { "epoch": 0.42803083391730906, "grad_norm": 0.26446565985679626, "learning_rate": 0.0002634202343936857, "loss": 0.0672, "step": 3054 }, { "epoch": 0.4281709880868956, "grad_norm": 0.4468914270401001, "learning_rate": 0.00026340588375986605, "loss": 0.1082, "step": 3055 }, { "epoch": 0.42831114225648215, "grad_norm": 0.3955155611038208, "learning_rate": 0.0002633915331260464, "loss": 0.0851, "step": 3056 }, { "epoch": 0.42845129642606866, "grad_norm": 0.42428505420684814, "learning_rate": 0.0002633771824922267, "loss": 0.1129, "step": 3057 }, { "epoch": 0.42859145059565523, "grad_norm": 0.3192806839942932, "learning_rate": 0.00026336283185840703, "loss": 0.0899, "step": 3058 }, { "epoch": 0.42873160476524175, "grad_norm": 0.1797448843717575, "learning_rate": 0.00026334848122458736, "loss": 0.0711, "step": 3059 }, { "epoch": 0.4288717589348283, "grad_norm": 0.5377113223075867, "learning_rate": 0.00026333413059076774, "loss": 0.1953, "step": 3060 }, { "epoch": 0.42901191310441483, "grad_norm": 0.32639339566230774, "learning_rate": 0.00026331977995694807, "loss": 0.1044, "step": 3061 }, { "epoch": 0.4291520672740014, "grad_norm": 0.292711079120636, "learning_rate": 0.0002633054293231284, "loss": 0.0798, "step": 3062 }, { "epoch": 0.429292221443588, "grad_norm": 0.3188745975494385, "learning_rate": 0.0002632910786893088, "loss": 0.0716, "step": 3063 }, { "epoch": 0.4294323756131745, "grad_norm": 0.35329943895339966, "learning_rate": 0.0002632767280554891, "loss": 0.1174, "step": 3064 }, { "epoch": 0.42957252978276106, "grad_norm": 0.254645437002182, "learning_rate": 0.00026326237742166944, "loss": 0.063, "step": 3065 }, { "epoch": 0.4297126839523476, "grad_norm": 0.2007201761007309, "learning_rate": 0.00026324802678784977, "loss": 0.0603, "step": 3066 }, { "epoch": 0.42985283812193414, "grad_norm": 0.18286974728107452, "learning_rate": 0.0002632336761540301, "loss": 0.0271, "step": 3067 }, { "epoch": 0.42999299229152066, "grad_norm": 0.2654685080051422, "learning_rate": 0.0002632193255202104, "loss": 0.0739, "step": 3068 }, { "epoch": 0.43013314646110723, "grad_norm": 0.4285154342651367, "learning_rate": 0.0002632049748863908, "loss": 0.1153, "step": 3069 }, { "epoch": 0.43027330063069374, "grad_norm": 0.6475933790206909, "learning_rate": 0.00026319062425257114, "loss": 0.1119, "step": 3070 }, { "epoch": 0.4304134548002803, "grad_norm": 0.40413999557495117, "learning_rate": 0.00026317627361875147, "loss": 0.083, "step": 3071 }, { "epoch": 0.43055360896986683, "grad_norm": 1.027583360671997, "learning_rate": 0.00026316192298493185, "loss": 0.1676, "step": 3072 }, { "epoch": 0.4306937631394534, "grad_norm": 0.8123639225959778, "learning_rate": 0.0002631475723511122, "loss": 0.0651, "step": 3073 }, { "epoch": 0.43083391730903997, "grad_norm": 0.3901086151599884, "learning_rate": 0.0002631332217172925, "loss": 0.0763, "step": 3074 }, { "epoch": 0.4309740714786265, "grad_norm": 0.15434011816978455, "learning_rate": 0.00026311887108347283, "loss": 0.0501, "step": 3075 }, { "epoch": 0.43111422564821306, "grad_norm": 0.6387568116188049, "learning_rate": 0.00026310452044965316, "loss": 0.0862, "step": 3076 }, { "epoch": 0.43125437981779957, "grad_norm": 0.44356054067611694, "learning_rate": 0.0002630901698158335, "loss": 0.0906, "step": 3077 }, { "epoch": 0.43139453398738614, "grad_norm": 1.3771541118621826, "learning_rate": 0.0002630758191820138, "loss": 0.1049, "step": 3078 }, { "epoch": 0.43153468815697266, "grad_norm": 0.4895625412464142, "learning_rate": 0.0002630614685481942, "loss": 0.0343, "step": 3079 }, { "epoch": 0.4316748423265592, "grad_norm": 0.5825363397598267, "learning_rate": 0.00026304711791437453, "loss": 0.0785, "step": 3080 }, { "epoch": 0.43181499649614574, "grad_norm": 0.4963255524635315, "learning_rate": 0.00026303276728055486, "loss": 0.0911, "step": 3081 }, { "epoch": 0.4319551506657323, "grad_norm": 0.3956547975540161, "learning_rate": 0.00026301841664673524, "loss": 0.0966, "step": 3082 }, { "epoch": 0.4320953048353188, "grad_norm": 0.18339678645133972, "learning_rate": 0.00026300406601291557, "loss": 0.0325, "step": 3083 }, { "epoch": 0.4322354590049054, "grad_norm": 0.2436743974685669, "learning_rate": 0.0002629897153790959, "loss": 0.045, "step": 3084 }, { "epoch": 0.43237561317449197, "grad_norm": 0.24353475868701935, "learning_rate": 0.00026297536474527623, "loss": 0.1093, "step": 3085 }, { "epoch": 0.4325157673440785, "grad_norm": 0.5746607184410095, "learning_rate": 0.00026296101411145656, "loss": 0.1071, "step": 3086 }, { "epoch": 0.43265592151366505, "grad_norm": 0.3748045861721039, "learning_rate": 0.0002629466634776369, "loss": 0.086, "step": 3087 }, { "epoch": 0.43279607568325157, "grad_norm": 0.2613745331764221, "learning_rate": 0.00026293231284381727, "loss": 0.0349, "step": 3088 }, { "epoch": 0.43293622985283814, "grad_norm": 0.3729539215564728, "learning_rate": 0.0002629179622099976, "loss": 0.0576, "step": 3089 }, { "epoch": 0.43307638402242465, "grad_norm": 0.5155344009399414, "learning_rate": 0.0002629036115761779, "loss": 0.1923, "step": 3090 }, { "epoch": 0.4332165381920112, "grad_norm": 0.1734478771686554, "learning_rate": 0.00026288926094235825, "loss": 0.0357, "step": 3091 }, { "epoch": 0.43335669236159774, "grad_norm": 0.5360126495361328, "learning_rate": 0.00026287491030853864, "loss": 0.1009, "step": 3092 }, { "epoch": 0.4334968465311843, "grad_norm": 0.43895745277404785, "learning_rate": 0.00026286055967471896, "loss": 0.0717, "step": 3093 }, { "epoch": 0.4336370007007708, "grad_norm": 0.3264348804950714, "learning_rate": 0.0002628462090408993, "loss": 0.041, "step": 3094 }, { "epoch": 0.4337771548703574, "grad_norm": 0.28762224316596985, "learning_rate": 0.0002628318584070796, "loss": 0.0367, "step": 3095 }, { "epoch": 0.43391730903994397, "grad_norm": 0.3108057975769043, "learning_rate": 0.00026281750777325995, "loss": 0.0428, "step": 3096 }, { "epoch": 0.4340574632095305, "grad_norm": 0.7983070015907288, "learning_rate": 0.0002628031571394403, "loss": 0.1611, "step": 3097 }, { "epoch": 0.43419761737911705, "grad_norm": 2.7643096446990967, "learning_rate": 0.00026278880650562066, "loss": 0.1842, "step": 3098 }, { "epoch": 0.43433777154870357, "grad_norm": 1.5378133058547974, "learning_rate": 0.000262774455871801, "loss": 0.1501, "step": 3099 }, { "epoch": 0.43447792571829014, "grad_norm": 1.134103536605835, "learning_rate": 0.0002627601052379813, "loss": 0.0855, "step": 3100 }, { "epoch": 0.43461807988787665, "grad_norm": 0.6849503517150879, "learning_rate": 0.00026274575460416165, "loss": 0.1099, "step": 3101 }, { "epoch": 0.4347582340574632, "grad_norm": 0.5533718466758728, "learning_rate": 0.000262731403970342, "loss": 0.128, "step": 3102 }, { "epoch": 0.43489838822704974, "grad_norm": 0.428962767124176, "learning_rate": 0.00026271705333652236, "loss": 0.0804, "step": 3103 }, { "epoch": 0.4350385423966363, "grad_norm": 0.6989350318908691, "learning_rate": 0.0002627027027027027, "loss": 0.1008, "step": 3104 }, { "epoch": 0.4351786965662228, "grad_norm": 0.7078231573104858, "learning_rate": 0.000262688352068883, "loss": 0.1058, "step": 3105 }, { "epoch": 0.4353188507358094, "grad_norm": 0.3282186985015869, "learning_rate": 0.00026267400143506334, "loss": 0.0401, "step": 3106 }, { "epoch": 0.43545900490539596, "grad_norm": 0.38179269433021545, "learning_rate": 0.0002626596508012437, "loss": 0.1186, "step": 3107 }, { "epoch": 0.4355991590749825, "grad_norm": 0.40050217509269714, "learning_rate": 0.00026264530016742406, "loss": 0.0942, "step": 3108 }, { "epoch": 0.43573931324456905, "grad_norm": 0.582998514175415, "learning_rate": 0.0002626309495336044, "loss": 0.1446, "step": 3109 }, { "epoch": 0.43587946741415556, "grad_norm": 0.479499876499176, "learning_rate": 0.0002626165988997847, "loss": 0.1304, "step": 3110 }, { "epoch": 0.43601962158374213, "grad_norm": 0.43234512209892273, "learning_rate": 0.00026260224826596504, "loss": 0.0875, "step": 3111 }, { "epoch": 0.43615977575332865, "grad_norm": 0.5029392242431641, "learning_rate": 0.00026258789763214537, "loss": 0.0653, "step": 3112 }, { "epoch": 0.4362999299229152, "grad_norm": 0.4136073887348175, "learning_rate": 0.0002625735469983257, "loss": 0.0685, "step": 3113 }, { "epoch": 0.43644008409250173, "grad_norm": 0.2521909773349762, "learning_rate": 0.0002625591963645061, "loss": 0.0494, "step": 3114 }, { "epoch": 0.4365802382620883, "grad_norm": 0.32898223400115967, "learning_rate": 0.0002625448457306864, "loss": 0.0833, "step": 3115 }, { "epoch": 0.4367203924316748, "grad_norm": 0.354140967130661, "learning_rate": 0.00026253049509686674, "loss": 0.057, "step": 3116 }, { "epoch": 0.4368605466012614, "grad_norm": 0.5951328277587891, "learning_rate": 0.0002625161444630471, "loss": 0.1146, "step": 3117 }, { "epoch": 0.4370007007708479, "grad_norm": 0.4011349380016327, "learning_rate": 0.00026250179382922745, "loss": 0.1438, "step": 3118 }, { "epoch": 0.4371408549404345, "grad_norm": 0.28471773862838745, "learning_rate": 0.0002624874431954078, "loss": 0.0847, "step": 3119 }, { "epoch": 0.43728100911002105, "grad_norm": 0.269428551197052, "learning_rate": 0.0002624730925615881, "loss": 0.0586, "step": 3120 }, { "epoch": 0.43742116327960756, "grad_norm": 0.6035513281822205, "learning_rate": 0.00026245874192776844, "loss": 0.0608, "step": 3121 }, { "epoch": 0.43756131744919413, "grad_norm": 0.43457916378974915, "learning_rate": 0.00026244439129394876, "loss": 0.0613, "step": 3122 }, { "epoch": 0.43770147161878065, "grad_norm": 0.36663711071014404, "learning_rate": 0.00026243004066012915, "loss": 0.0665, "step": 3123 }, { "epoch": 0.4378416257883672, "grad_norm": 0.26888251304626465, "learning_rate": 0.0002624156900263095, "loss": 0.0907, "step": 3124 }, { "epoch": 0.43798177995795373, "grad_norm": 0.2705402970314026, "learning_rate": 0.0002624013393924898, "loss": 0.0944, "step": 3125 }, { "epoch": 0.4381219341275403, "grad_norm": 0.5814818143844604, "learning_rate": 0.00026238698875867013, "loss": 0.1185, "step": 3126 }, { "epoch": 0.4382620882971268, "grad_norm": 1.2210744619369507, "learning_rate": 0.0002623726381248505, "loss": 0.1448, "step": 3127 }, { "epoch": 0.4384022424667134, "grad_norm": 0.1287975311279297, "learning_rate": 0.00026235828749103084, "loss": 0.0172, "step": 3128 }, { "epoch": 0.4385423966362999, "grad_norm": 0.40411970019340515, "learning_rate": 0.00026234393685721117, "loss": 0.0874, "step": 3129 }, { "epoch": 0.4386825508058865, "grad_norm": 0.43082526326179504, "learning_rate": 0.0002623295862233915, "loss": 0.1773, "step": 3130 }, { "epoch": 0.43882270497547304, "grad_norm": 0.19812557101249695, "learning_rate": 0.00026231523558957183, "loss": 0.0472, "step": 3131 }, { "epoch": 0.43896285914505956, "grad_norm": 0.9664269685745239, "learning_rate": 0.00026230088495575216, "loss": 0.1734, "step": 3132 }, { "epoch": 0.43910301331464613, "grad_norm": 0.332105427980423, "learning_rate": 0.00026228653432193254, "loss": 0.0914, "step": 3133 }, { "epoch": 0.43924316748423264, "grad_norm": 0.2724143862724304, "learning_rate": 0.00026227218368811287, "loss": 0.0423, "step": 3134 }, { "epoch": 0.4393833216538192, "grad_norm": 0.4498167634010315, "learning_rate": 0.0002622578330542932, "loss": 0.0532, "step": 3135 }, { "epoch": 0.43952347582340573, "grad_norm": 0.52696293592453, "learning_rate": 0.0002622434824204736, "loss": 0.0794, "step": 3136 }, { "epoch": 0.4396636299929923, "grad_norm": 0.719615638256073, "learning_rate": 0.0002622291317866539, "loss": 0.0933, "step": 3137 }, { "epoch": 0.4398037841625788, "grad_norm": 0.8044444918632507, "learning_rate": 0.00026221478115283424, "loss": 0.1427, "step": 3138 }, { "epoch": 0.4399439383321654, "grad_norm": 0.3632006347179413, "learning_rate": 0.00026220043051901457, "loss": 0.0681, "step": 3139 }, { "epoch": 0.4400840925017519, "grad_norm": 0.22276180982589722, "learning_rate": 0.0002621860798851949, "loss": 0.0323, "step": 3140 }, { "epoch": 0.44022424667133847, "grad_norm": 0.45727425813674927, "learning_rate": 0.0002621717292513752, "loss": 0.0811, "step": 3141 }, { "epoch": 0.44036440084092504, "grad_norm": 0.8930152058601379, "learning_rate": 0.0002621573786175556, "loss": 0.2072, "step": 3142 }, { "epoch": 0.44050455501051156, "grad_norm": 1.0207765102386475, "learning_rate": 0.00026214302798373593, "loss": 0.1582, "step": 3143 }, { "epoch": 0.4406447091800981, "grad_norm": 0.5111526250839233, "learning_rate": 0.00026212867734991626, "loss": 0.0848, "step": 3144 }, { "epoch": 0.44078486334968464, "grad_norm": 0.6272589564323425, "learning_rate": 0.0002621143267160966, "loss": 0.1635, "step": 3145 }, { "epoch": 0.4409250175192712, "grad_norm": 0.732872724533081, "learning_rate": 0.000262099976082277, "loss": 0.1136, "step": 3146 }, { "epoch": 0.4410651716888577, "grad_norm": 0.5442157983779907, "learning_rate": 0.0002620856254484573, "loss": 0.1094, "step": 3147 }, { "epoch": 0.4412053258584443, "grad_norm": 0.8131890892982483, "learning_rate": 0.00026207127481463763, "loss": 0.0801, "step": 3148 }, { "epoch": 0.4413454800280308, "grad_norm": 2.3645200729370117, "learning_rate": 0.00026205692418081796, "loss": 0.3437, "step": 3149 }, { "epoch": 0.4414856341976174, "grad_norm": 0.8918802738189697, "learning_rate": 0.0002620425735469983, "loss": 0.1558, "step": 3150 }, { "epoch": 0.4416257883672039, "grad_norm": 0.7216973900794983, "learning_rate": 0.0002620282229131786, "loss": 0.1064, "step": 3151 }, { "epoch": 0.44176594253679047, "grad_norm": 0.4748555123806, "learning_rate": 0.000262013872279359, "loss": 0.1197, "step": 3152 }, { "epoch": 0.44190609670637704, "grad_norm": 0.37840035557746887, "learning_rate": 0.00026199952164553933, "loss": 0.1133, "step": 3153 }, { "epoch": 0.44204625087596355, "grad_norm": 0.4857287108898163, "learning_rate": 0.00026198517101171966, "loss": 0.0831, "step": 3154 }, { "epoch": 0.4421864050455501, "grad_norm": 0.24071750044822693, "learning_rate": 0.00026197082037790004, "loss": 0.0364, "step": 3155 }, { "epoch": 0.44232655921513664, "grad_norm": 1.2475543022155762, "learning_rate": 0.00026195646974408037, "loss": 0.1407, "step": 3156 }, { "epoch": 0.4424667133847232, "grad_norm": 1.206235647201538, "learning_rate": 0.0002619421191102607, "loss": 0.148, "step": 3157 }, { "epoch": 0.4426068675543097, "grad_norm": 0.5146431922912598, "learning_rate": 0.000261927768476441, "loss": 0.0851, "step": 3158 }, { "epoch": 0.4427470217238963, "grad_norm": 0.21378140151500702, "learning_rate": 0.00026191341784262135, "loss": 0.0452, "step": 3159 }, { "epoch": 0.4428871758934828, "grad_norm": 0.690898597240448, "learning_rate": 0.0002618990672088017, "loss": 0.1493, "step": 3160 }, { "epoch": 0.4430273300630694, "grad_norm": 0.20644928514957428, "learning_rate": 0.000261884716574982, "loss": 0.0237, "step": 3161 }, { "epoch": 0.4431674842326559, "grad_norm": 0.6293278932571411, "learning_rate": 0.0002618703659411624, "loss": 0.138, "step": 3162 }, { "epoch": 0.44330763840224247, "grad_norm": 0.40677598118782043, "learning_rate": 0.0002618560153073427, "loss": 0.0668, "step": 3163 }, { "epoch": 0.44344779257182904, "grad_norm": 0.5735763311386108, "learning_rate": 0.00026184166467352305, "loss": 0.1093, "step": 3164 }, { "epoch": 0.44358794674141555, "grad_norm": 0.3656761646270752, "learning_rate": 0.0002618273140397034, "loss": 0.0927, "step": 3165 }, { "epoch": 0.4437281009110021, "grad_norm": 0.6591564416885376, "learning_rate": 0.00026181296340588376, "loss": 0.0832, "step": 3166 }, { "epoch": 0.44386825508058864, "grad_norm": 0.2716527581214905, "learning_rate": 0.0002617986127720641, "loss": 0.0705, "step": 3167 }, { "epoch": 0.4440084092501752, "grad_norm": 0.5181809067726135, "learning_rate": 0.0002617842621382444, "loss": 0.1406, "step": 3168 }, { "epoch": 0.4441485634197617, "grad_norm": 0.6783599257469177, "learning_rate": 0.00026176991150442475, "loss": 0.1937, "step": 3169 }, { "epoch": 0.4442887175893483, "grad_norm": 0.434597373008728, "learning_rate": 0.0002617555608706051, "loss": 0.0832, "step": 3170 }, { "epoch": 0.4444288717589348, "grad_norm": 0.20813724398612976, "learning_rate": 0.00026174121023678546, "loss": 0.0746, "step": 3171 }, { "epoch": 0.4445690259285214, "grad_norm": 0.5710974931716919, "learning_rate": 0.0002617268596029658, "loss": 0.1291, "step": 3172 }, { "epoch": 0.4447091800981079, "grad_norm": 0.4765787720680237, "learning_rate": 0.0002617125089691461, "loss": 0.1318, "step": 3173 }, { "epoch": 0.44484933426769446, "grad_norm": 0.15755750238895416, "learning_rate": 0.00026169815833532645, "loss": 0.0489, "step": 3174 }, { "epoch": 0.44498948843728103, "grad_norm": 0.4330451786518097, "learning_rate": 0.0002616838077015068, "loss": 0.1133, "step": 3175 }, { "epoch": 0.44512964260686755, "grad_norm": 0.4515622854232788, "learning_rate": 0.0002616694570676871, "loss": 0.0789, "step": 3176 }, { "epoch": 0.4452697967764541, "grad_norm": 0.29335352778434753, "learning_rate": 0.00026165510643386743, "loss": 0.0448, "step": 3177 }, { "epoch": 0.44540995094604063, "grad_norm": 0.18906913697719574, "learning_rate": 0.0002616407558000478, "loss": 0.0611, "step": 3178 }, { "epoch": 0.4455501051156272, "grad_norm": 0.7547434568405151, "learning_rate": 0.00026162640516622814, "loss": 0.1624, "step": 3179 }, { "epoch": 0.4456902592852137, "grad_norm": 0.283898264169693, "learning_rate": 0.00026161205453240847, "loss": 0.0698, "step": 3180 }, { "epoch": 0.4458304134548003, "grad_norm": 0.4482957124710083, "learning_rate": 0.00026159770389858885, "loss": 0.0663, "step": 3181 }, { "epoch": 0.4459705676243868, "grad_norm": 0.4283231198787689, "learning_rate": 0.0002615833532647692, "loss": 0.1118, "step": 3182 }, { "epoch": 0.4461107217939734, "grad_norm": 0.7021906971931458, "learning_rate": 0.0002615690026309495, "loss": 0.1235, "step": 3183 }, { "epoch": 0.4462508759635599, "grad_norm": 0.42637211084365845, "learning_rate": 0.00026155465199712984, "loss": 0.2485, "step": 3184 }, { "epoch": 0.44639103013314646, "grad_norm": 0.6978999972343445, "learning_rate": 0.00026154030136331017, "loss": 0.0936, "step": 3185 }, { "epoch": 0.44653118430273303, "grad_norm": 1.0158802270889282, "learning_rate": 0.0002615259507294905, "loss": 0.1193, "step": 3186 }, { "epoch": 0.44667133847231955, "grad_norm": 0.4636441767215729, "learning_rate": 0.0002615116000956709, "loss": 0.1001, "step": 3187 }, { "epoch": 0.4468114926419061, "grad_norm": 0.5528663396835327, "learning_rate": 0.0002614972494618512, "loss": 0.1333, "step": 3188 }, { "epoch": 0.44695164681149263, "grad_norm": 0.4500793516635895, "learning_rate": 0.00026148289882803154, "loss": 0.0912, "step": 3189 }, { "epoch": 0.4470918009810792, "grad_norm": 0.2766513526439667, "learning_rate": 0.0002614685481942119, "loss": 0.074, "step": 3190 }, { "epoch": 0.4472319551506657, "grad_norm": 0.5607905983924866, "learning_rate": 0.00026145419756039225, "loss": 0.1008, "step": 3191 }, { "epoch": 0.4473721093202523, "grad_norm": 0.8401314616203308, "learning_rate": 0.0002614398469265726, "loss": 0.1189, "step": 3192 }, { "epoch": 0.4475122634898388, "grad_norm": 1.1839956045150757, "learning_rate": 0.0002614254962927529, "loss": 0.157, "step": 3193 }, { "epoch": 0.4476524176594254, "grad_norm": 0.49879908561706543, "learning_rate": 0.00026141114565893323, "loss": 0.0865, "step": 3194 }, { "epoch": 0.4477925718290119, "grad_norm": 0.6396143436431885, "learning_rate": 0.00026139679502511356, "loss": 0.0882, "step": 3195 }, { "epoch": 0.44793272599859846, "grad_norm": 0.859186053276062, "learning_rate": 0.0002613824443912939, "loss": 0.0736, "step": 3196 }, { "epoch": 0.44807288016818503, "grad_norm": 2.0409865379333496, "learning_rate": 0.0002613680937574743, "loss": 0.2236, "step": 3197 }, { "epoch": 0.44821303433777154, "grad_norm": 5.258183002471924, "learning_rate": 0.0002613537431236546, "loss": 0.3042, "step": 3198 }, { "epoch": 0.4483531885073581, "grad_norm": 0.9645035862922668, "learning_rate": 0.00026133939248983493, "loss": 0.0763, "step": 3199 }, { "epoch": 0.44849334267694463, "grad_norm": 2.257209539413452, "learning_rate": 0.0002613250418560153, "loss": 0.1904, "step": 3200 }, { "epoch": 0.4486334968465312, "grad_norm": 0.4620717465877533, "learning_rate": 0.00026131069122219564, "loss": 0.1101, "step": 3201 }, { "epoch": 0.4487736510161177, "grad_norm": 0.32010000944137573, "learning_rate": 0.00026129634058837597, "loss": 0.0874, "step": 3202 }, { "epoch": 0.4489138051857043, "grad_norm": 0.6990892291069031, "learning_rate": 0.0002612819899545563, "loss": 0.0711, "step": 3203 }, { "epoch": 0.4490539593552908, "grad_norm": 0.4191516637802124, "learning_rate": 0.00026126763932073663, "loss": 0.073, "step": 3204 }, { "epoch": 0.44919411352487737, "grad_norm": 0.3744420111179352, "learning_rate": 0.00026125328868691696, "loss": 0.0502, "step": 3205 }, { "epoch": 0.4493342676944639, "grad_norm": 1.075827717781067, "learning_rate": 0.00026123893805309734, "loss": 0.1288, "step": 3206 }, { "epoch": 0.44947442186405046, "grad_norm": 0.42123499512672424, "learning_rate": 0.00026122458741927767, "loss": 0.1185, "step": 3207 }, { "epoch": 0.449614576033637, "grad_norm": 1.8295400142669678, "learning_rate": 0.000261210236785458, "loss": 0.1123, "step": 3208 }, { "epoch": 0.44975473020322354, "grad_norm": 0.6613677144050598, "learning_rate": 0.0002611958861516384, "loss": 0.0904, "step": 3209 }, { "epoch": 0.4498948843728101, "grad_norm": 0.31467679142951965, "learning_rate": 0.0002611815355178187, "loss": 0.0749, "step": 3210 }, { "epoch": 0.4500350385423966, "grad_norm": 0.46356335282325745, "learning_rate": 0.00026116718488399904, "loss": 0.1156, "step": 3211 }, { "epoch": 0.4501751927119832, "grad_norm": 0.6180557608604431, "learning_rate": 0.00026115283425017936, "loss": 0.085, "step": 3212 }, { "epoch": 0.4503153468815697, "grad_norm": 0.37459442019462585, "learning_rate": 0.0002611384836163597, "loss": 0.0633, "step": 3213 }, { "epoch": 0.4504555010511563, "grad_norm": 1.0285698175430298, "learning_rate": 0.00026112413298254, "loss": 0.3154, "step": 3214 }, { "epoch": 0.4505956552207428, "grad_norm": 1.2040151357650757, "learning_rate": 0.00026110978234872035, "loss": 0.1674, "step": 3215 }, { "epoch": 0.45073580939032937, "grad_norm": 0.5814149975776672, "learning_rate": 0.00026109543171490073, "loss": 0.1476, "step": 3216 }, { "epoch": 0.4508759635599159, "grad_norm": 0.4208332300186157, "learning_rate": 0.00026108108108108106, "loss": 0.1416, "step": 3217 }, { "epoch": 0.45101611772950245, "grad_norm": 0.798169732093811, "learning_rate": 0.0002610667304472614, "loss": 0.102, "step": 3218 }, { "epoch": 0.451156271899089, "grad_norm": 0.6296437978744507, "learning_rate": 0.00026105237981344177, "loss": 0.0909, "step": 3219 }, { "epoch": 0.45129642606867554, "grad_norm": 0.5009991526603699, "learning_rate": 0.0002610380291796221, "loss": 0.1791, "step": 3220 }, { "epoch": 0.4514365802382621, "grad_norm": 0.3721868693828583, "learning_rate": 0.00026102367854580243, "loss": 0.103, "step": 3221 }, { "epoch": 0.4515767344078486, "grad_norm": 0.5548828840255737, "learning_rate": 0.00026100932791198276, "loss": 0.1465, "step": 3222 }, { "epoch": 0.4517168885774352, "grad_norm": 0.44447898864746094, "learning_rate": 0.0002609949772781631, "loss": 0.0832, "step": 3223 }, { "epoch": 0.4518570427470217, "grad_norm": 0.3573371171951294, "learning_rate": 0.0002609806266443434, "loss": 0.0988, "step": 3224 }, { "epoch": 0.4519971969166083, "grad_norm": 0.45428574085235596, "learning_rate": 0.0002609662760105238, "loss": 0.161, "step": 3225 }, { "epoch": 0.4521373510861948, "grad_norm": 0.23204457759857178, "learning_rate": 0.0002609519253767041, "loss": 0.0699, "step": 3226 }, { "epoch": 0.45227750525578136, "grad_norm": 0.19380664825439453, "learning_rate": 0.00026093757474288446, "loss": 0.0558, "step": 3227 }, { "epoch": 0.4524176594253679, "grad_norm": 0.4129902720451355, "learning_rate": 0.0002609232241090648, "loss": 0.1388, "step": 3228 }, { "epoch": 0.45255781359495445, "grad_norm": 0.27421775460243225, "learning_rate": 0.0002609088734752451, "loss": 0.0512, "step": 3229 }, { "epoch": 0.452697967764541, "grad_norm": 0.28085988759994507, "learning_rate": 0.0002608945228414255, "loss": 0.0969, "step": 3230 }, { "epoch": 0.45283812193412754, "grad_norm": 0.3167339861392975, "learning_rate": 0.0002608801722076058, "loss": 0.077, "step": 3231 }, { "epoch": 0.4529782761037141, "grad_norm": 0.8371204137802124, "learning_rate": 0.00026086582157378615, "loss": 0.1373, "step": 3232 }, { "epoch": 0.4531184302733006, "grad_norm": 0.3662284016609192, "learning_rate": 0.0002608514709399665, "loss": 0.0511, "step": 3233 }, { "epoch": 0.4532585844428872, "grad_norm": 0.44114458560943604, "learning_rate": 0.0002608371203061468, "loss": 0.1005, "step": 3234 }, { "epoch": 0.4533987386124737, "grad_norm": 0.2829136252403259, "learning_rate": 0.0002608227696723272, "loss": 0.0616, "step": 3235 }, { "epoch": 0.4535388927820603, "grad_norm": 0.27263015508651733, "learning_rate": 0.0002608084190385075, "loss": 0.1114, "step": 3236 }, { "epoch": 0.4536790469516468, "grad_norm": 0.4303293526172638, "learning_rate": 0.00026079406840468785, "loss": 0.052, "step": 3237 }, { "epoch": 0.45381920112123336, "grad_norm": 0.4607178568840027, "learning_rate": 0.0002607797177708682, "loss": 0.2341, "step": 3238 }, { "epoch": 0.4539593552908199, "grad_norm": 0.7156077027320862, "learning_rate": 0.0002607653671370485, "loss": 0.0473, "step": 3239 }, { "epoch": 0.45409950946040645, "grad_norm": 0.5410364866256714, "learning_rate": 0.00026075101650322883, "loss": 0.1205, "step": 3240 }, { "epoch": 0.454239663629993, "grad_norm": 0.42635980248451233, "learning_rate": 0.0002607366658694092, "loss": 0.0563, "step": 3241 }, { "epoch": 0.45437981779957953, "grad_norm": 0.8016296625137329, "learning_rate": 0.00026072231523558955, "loss": 0.1667, "step": 3242 }, { "epoch": 0.4545199719691661, "grad_norm": 0.5985812544822693, "learning_rate": 0.0002607079646017699, "loss": 0.1038, "step": 3243 }, { "epoch": 0.4546601261387526, "grad_norm": 0.49753767251968384, "learning_rate": 0.00026069361396795026, "loss": 0.0713, "step": 3244 }, { "epoch": 0.4548002803083392, "grad_norm": 0.711574375629425, "learning_rate": 0.0002606792633341306, "loss": 0.0612, "step": 3245 }, { "epoch": 0.4549404344779257, "grad_norm": 0.42461737990379333, "learning_rate": 0.0002606649127003109, "loss": 0.057, "step": 3246 }, { "epoch": 0.4550805886475123, "grad_norm": 1.2752445936203003, "learning_rate": 0.00026065056206649124, "loss": 0.2273, "step": 3247 }, { "epoch": 0.4552207428170988, "grad_norm": 1.6907846927642822, "learning_rate": 0.00026063621143267157, "loss": 0.1058, "step": 3248 }, { "epoch": 0.45536089698668536, "grad_norm": 0.2871204912662506, "learning_rate": 0.0002606218607988519, "loss": 0.0783, "step": 3249 }, { "epoch": 0.4555010511562719, "grad_norm": 5.855483531951904, "learning_rate": 0.00026060751016503223, "loss": 0.3161, "step": 3250 }, { "epoch": 0.45564120532585844, "grad_norm": 0.41367432475090027, "learning_rate": 0.0002605931595312126, "loss": 0.076, "step": 3251 }, { "epoch": 0.455781359495445, "grad_norm": 0.3567110002040863, "learning_rate": 0.00026057880889739294, "loss": 0.0884, "step": 3252 }, { "epoch": 0.45592151366503153, "grad_norm": 0.8167653679847717, "learning_rate": 0.00026056445826357327, "loss": 0.1222, "step": 3253 }, { "epoch": 0.4560616678346181, "grad_norm": 0.29383742809295654, "learning_rate": 0.00026055010762975365, "loss": 0.0604, "step": 3254 }, { "epoch": 0.4562018220042046, "grad_norm": 0.36296477913856506, "learning_rate": 0.000260535756995934, "loss": 0.0421, "step": 3255 }, { "epoch": 0.4563419761737912, "grad_norm": 0.886231005191803, "learning_rate": 0.0002605214063621143, "loss": 0.1239, "step": 3256 }, { "epoch": 0.4564821303433777, "grad_norm": 0.69781494140625, "learning_rate": 0.00026050705572829464, "loss": 0.114, "step": 3257 }, { "epoch": 0.45662228451296427, "grad_norm": 0.3201445937156677, "learning_rate": 0.00026049270509447497, "loss": 0.0915, "step": 3258 }, { "epoch": 0.4567624386825508, "grad_norm": 0.5376164317131042, "learning_rate": 0.0002604783544606553, "loss": 0.0868, "step": 3259 }, { "epoch": 0.45690259285213736, "grad_norm": 0.3929520547389984, "learning_rate": 0.0002604640038268357, "loss": 0.103, "step": 3260 }, { "epoch": 0.45704274702172387, "grad_norm": 0.5999123454093933, "learning_rate": 0.000260449653193016, "loss": 0.0957, "step": 3261 }, { "epoch": 0.45718290119131044, "grad_norm": 0.983153760433197, "learning_rate": 0.00026043530255919633, "loss": 0.181, "step": 3262 }, { "epoch": 0.457323055360897, "grad_norm": 0.48948073387145996, "learning_rate": 0.0002604209519253767, "loss": 0.1591, "step": 3263 }, { "epoch": 0.4574632095304835, "grad_norm": 0.7965589761734009, "learning_rate": 0.00026040660129155705, "loss": 0.0592, "step": 3264 }, { "epoch": 0.4576033637000701, "grad_norm": 0.7127774357795715, "learning_rate": 0.0002603922506577374, "loss": 0.085, "step": 3265 }, { "epoch": 0.4577435178696566, "grad_norm": 0.41104570031166077, "learning_rate": 0.0002603779000239177, "loss": 0.0611, "step": 3266 }, { "epoch": 0.4578836720392432, "grad_norm": 1.4193328619003296, "learning_rate": 0.00026036354939009803, "loss": 0.0796, "step": 3267 }, { "epoch": 0.4580238262088297, "grad_norm": 0.2687203884124756, "learning_rate": 0.00026034919875627836, "loss": 0.056, "step": 3268 }, { "epoch": 0.45816398037841627, "grad_norm": 0.4520364999771118, "learning_rate": 0.0002603348481224587, "loss": 0.0476, "step": 3269 }, { "epoch": 0.4583041345480028, "grad_norm": 0.4314827620983124, "learning_rate": 0.00026032049748863907, "loss": 0.1113, "step": 3270 }, { "epoch": 0.45844428871758935, "grad_norm": 0.26888686418533325, "learning_rate": 0.0002603061468548194, "loss": 0.0425, "step": 3271 }, { "epoch": 0.45858444288717587, "grad_norm": 0.4072524607181549, "learning_rate": 0.00026029179622099973, "loss": 0.07, "step": 3272 }, { "epoch": 0.45872459705676244, "grad_norm": 0.29469847679138184, "learning_rate": 0.0002602774455871801, "loss": 0.1197, "step": 3273 }, { "epoch": 0.458864751226349, "grad_norm": 0.5500627756118774, "learning_rate": 0.00026026309495336044, "loss": 0.1291, "step": 3274 }, { "epoch": 0.4590049053959355, "grad_norm": 0.3043517470359802, "learning_rate": 0.00026024874431954077, "loss": 0.048, "step": 3275 }, { "epoch": 0.4591450595655221, "grad_norm": 0.977124810218811, "learning_rate": 0.0002602343936857211, "loss": 0.0884, "step": 3276 }, { "epoch": 0.4592852137351086, "grad_norm": 0.5323735475540161, "learning_rate": 0.0002602200430519014, "loss": 0.077, "step": 3277 }, { "epoch": 0.4594253679046952, "grad_norm": 0.7924557328224182, "learning_rate": 0.00026020569241808175, "loss": 0.0493, "step": 3278 }, { "epoch": 0.4595655220742817, "grad_norm": 0.3552142381668091, "learning_rate": 0.00026019134178426214, "loss": 0.1585, "step": 3279 }, { "epoch": 0.45970567624386827, "grad_norm": 0.5548868179321289, "learning_rate": 0.00026017699115044247, "loss": 0.1511, "step": 3280 }, { "epoch": 0.4598458304134548, "grad_norm": 0.25325852632522583, "learning_rate": 0.0002601626405166228, "loss": 0.0745, "step": 3281 }, { "epoch": 0.45998598458304135, "grad_norm": 0.14024783670902252, "learning_rate": 0.0002601482898828031, "loss": 0.0118, "step": 3282 }, { "epoch": 0.46012613875262787, "grad_norm": 0.530278205871582, "learning_rate": 0.0002601339392489835, "loss": 0.0631, "step": 3283 }, { "epoch": 0.46026629292221444, "grad_norm": 0.4354083836078644, "learning_rate": 0.00026011958861516383, "loss": 0.1464, "step": 3284 }, { "epoch": 0.460406447091801, "grad_norm": 0.44976821541786194, "learning_rate": 0.00026010523798134416, "loss": 0.1149, "step": 3285 }, { "epoch": 0.4605466012613875, "grad_norm": 0.9277240037918091, "learning_rate": 0.0002600908873475245, "loss": 0.1709, "step": 3286 }, { "epoch": 0.4606867554309741, "grad_norm": 0.2960893213748932, "learning_rate": 0.0002600765367137048, "loss": 0.0679, "step": 3287 }, { "epoch": 0.4608269096005606, "grad_norm": 0.5488150119781494, "learning_rate": 0.00026006218607988515, "loss": 0.0858, "step": 3288 }, { "epoch": 0.4609670637701472, "grad_norm": 0.5408404469490051, "learning_rate": 0.00026004783544606553, "loss": 0.0467, "step": 3289 }, { "epoch": 0.4611072179397337, "grad_norm": 0.4792618155479431, "learning_rate": 0.00026003348481224586, "loss": 0.1906, "step": 3290 }, { "epoch": 0.46124737210932026, "grad_norm": 0.2402646541595459, "learning_rate": 0.0002600191341784262, "loss": 0.0609, "step": 3291 }, { "epoch": 0.4613875262789068, "grad_norm": 0.4909380078315735, "learning_rate": 0.0002600047835446065, "loss": 0.0445, "step": 3292 }, { "epoch": 0.46152768044849335, "grad_norm": 0.6875008940696716, "learning_rate": 0.0002599904329107869, "loss": 0.1174, "step": 3293 }, { "epoch": 0.46166783461807986, "grad_norm": 1.5048495531082153, "learning_rate": 0.00025997608227696723, "loss": 0.0833, "step": 3294 }, { "epoch": 0.46180798878766643, "grad_norm": 0.39396318793296814, "learning_rate": 0.00025996173164314756, "loss": 0.0897, "step": 3295 }, { "epoch": 0.461948142957253, "grad_norm": 0.6658831238746643, "learning_rate": 0.0002599473810093279, "loss": 0.2576, "step": 3296 }, { "epoch": 0.4620882971268395, "grad_norm": 0.3684833347797394, "learning_rate": 0.0002599330303755082, "loss": 0.0584, "step": 3297 }, { "epoch": 0.4622284512964261, "grad_norm": 1.6234478950500488, "learning_rate": 0.0002599186797416886, "loss": 0.2005, "step": 3298 }, { "epoch": 0.4623686054660126, "grad_norm": 0.48739907145500183, "learning_rate": 0.0002599043291078689, "loss": 0.0398, "step": 3299 }, { "epoch": 0.4625087596355992, "grad_norm": 5.181631565093994, "learning_rate": 0.00025988997847404925, "loss": 0.5623, "step": 3300 }, { "epoch": 0.4626489138051857, "grad_norm": 0.41347596049308777, "learning_rate": 0.0002598756278402296, "loss": 0.1104, "step": 3301 }, { "epoch": 0.46278906797477226, "grad_norm": 0.6285414099693298, "learning_rate": 0.0002598612772064099, "loss": 0.1059, "step": 3302 }, { "epoch": 0.4629292221443588, "grad_norm": 0.146291583776474, "learning_rate": 0.00025984692657259024, "loss": 0.0398, "step": 3303 }, { "epoch": 0.46306937631394535, "grad_norm": 0.8626413941383362, "learning_rate": 0.00025983257593877057, "loss": 0.1472, "step": 3304 }, { "epoch": 0.46320953048353186, "grad_norm": 0.34750375151634216, "learning_rate": 0.00025981822530495095, "loss": 0.1122, "step": 3305 }, { "epoch": 0.46334968465311843, "grad_norm": 0.3703271746635437, "learning_rate": 0.0002598038746711313, "loss": 0.1117, "step": 3306 }, { "epoch": 0.463489838822705, "grad_norm": 0.42451179027557373, "learning_rate": 0.0002597895240373116, "loss": 0.1377, "step": 3307 }, { "epoch": 0.4636299929922915, "grad_norm": 0.6143540143966675, "learning_rate": 0.000259775173403492, "loss": 0.1384, "step": 3308 }, { "epoch": 0.4637701471618781, "grad_norm": 0.7038992643356323, "learning_rate": 0.0002597608227696723, "loss": 0.1523, "step": 3309 }, { "epoch": 0.4639103013314646, "grad_norm": 0.23753102123737335, "learning_rate": 0.00025974647213585265, "loss": 0.0499, "step": 3310 }, { "epoch": 0.4640504555010512, "grad_norm": 0.19012613594532013, "learning_rate": 0.000259732121502033, "loss": 0.0386, "step": 3311 }, { "epoch": 0.4641906096706377, "grad_norm": 0.2870272696018219, "learning_rate": 0.0002597177708682133, "loss": 0.0762, "step": 3312 }, { "epoch": 0.46433076384022426, "grad_norm": 0.35737189650535583, "learning_rate": 0.00025970342023439363, "loss": 0.0603, "step": 3313 }, { "epoch": 0.4644709180098108, "grad_norm": 0.36687299609184265, "learning_rate": 0.000259689069600574, "loss": 0.0919, "step": 3314 }, { "epoch": 0.46461107217939734, "grad_norm": 0.5802489519119263, "learning_rate": 0.00025967471896675434, "loss": 0.1383, "step": 3315 }, { "epoch": 0.46475122634898386, "grad_norm": 0.6128777265548706, "learning_rate": 0.00025966036833293467, "loss": 0.1645, "step": 3316 }, { "epoch": 0.46489138051857043, "grad_norm": 0.3619915544986725, "learning_rate": 0.000259646017699115, "loss": 0.0884, "step": 3317 }, { "epoch": 0.465031534688157, "grad_norm": 0.5430887937545776, "learning_rate": 0.0002596316670652954, "loss": 0.1596, "step": 3318 }, { "epoch": 0.4651716888577435, "grad_norm": 0.4762561023235321, "learning_rate": 0.0002596173164314757, "loss": 0.1249, "step": 3319 }, { "epoch": 0.4653118430273301, "grad_norm": 0.39257121086120605, "learning_rate": 0.00025960296579765604, "loss": 0.0724, "step": 3320 }, { "epoch": 0.4654519971969166, "grad_norm": 0.5943988561630249, "learning_rate": 0.00025958861516383637, "loss": 0.0888, "step": 3321 }, { "epoch": 0.46559215136650317, "grad_norm": 0.34692972898483276, "learning_rate": 0.0002595742645300167, "loss": 0.1072, "step": 3322 }, { "epoch": 0.4657323055360897, "grad_norm": 0.29024553298950195, "learning_rate": 0.000259559913896197, "loss": 0.0634, "step": 3323 }, { "epoch": 0.46587245970567626, "grad_norm": 0.36643385887145996, "learning_rate": 0.0002595455632623774, "loss": 0.0785, "step": 3324 }, { "epoch": 0.46601261387526277, "grad_norm": 0.3790509104728699, "learning_rate": 0.00025953121262855774, "loss": 0.0872, "step": 3325 }, { "epoch": 0.46615276804484934, "grad_norm": 0.2536316514015198, "learning_rate": 0.00025951686199473807, "loss": 0.1019, "step": 3326 }, { "epoch": 0.46629292221443586, "grad_norm": 0.3769552409648895, "learning_rate": 0.00025950251136091845, "loss": 0.0718, "step": 3327 }, { "epoch": 0.4664330763840224, "grad_norm": 0.1781432032585144, "learning_rate": 0.0002594881607270988, "loss": 0.0298, "step": 3328 }, { "epoch": 0.466573230553609, "grad_norm": 0.35734546184539795, "learning_rate": 0.0002594738100932791, "loss": 0.0477, "step": 3329 }, { "epoch": 0.4667133847231955, "grad_norm": 1.0124000310897827, "learning_rate": 0.00025945945945945944, "loss": 0.1497, "step": 3330 }, { "epoch": 0.4668535388927821, "grad_norm": 0.1277385950088501, "learning_rate": 0.00025944510882563976, "loss": 0.0372, "step": 3331 }, { "epoch": 0.4669936930623686, "grad_norm": 0.6442874073982239, "learning_rate": 0.0002594307581918201, "loss": 0.1137, "step": 3332 }, { "epoch": 0.46713384723195517, "grad_norm": 0.3189310133457184, "learning_rate": 0.0002594164075580005, "loss": 0.0777, "step": 3333 }, { "epoch": 0.4672740014015417, "grad_norm": 0.5977842807769775, "learning_rate": 0.0002594020569241808, "loss": 0.1426, "step": 3334 }, { "epoch": 0.46741415557112825, "grad_norm": 0.3326079547405243, "learning_rate": 0.00025938770629036113, "loss": 0.0406, "step": 3335 }, { "epoch": 0.46755430974071477, "grad_norm": 0.7526765465736389, "learning_rate": 0.00025937335565654146, "loss": 0.1369, "step": 3336 }, { "epoch": 0.46769446391030134, "grad_norm": 0.8409889340400696, "learning_rate": 0.00025935900502272184, "loss": 0.1491, "step": 3337 }, { "epoch": 0.46783461807988785, "grad_norm": 0.2698783576488495, "learning_rate": 0.00025934465438890217, "loss": 0.073, "step": 3338 }, { "epoch": 0.4679747722494744, "grad_norm": 1.8809798955917358, "learning_rate": 0.0002593303037550825, "loss": 0.166, "step": 3339 }, { "epoch": 0.46811492641906094, "grad_norm": 0.35007593035697937, "learning_rate": 0.00025931595312126283, "loss": 0.0447, "step": 3340 }, { "epoch": 0.4682550805886475, "grad_norm": 0.7924603223800659, "learning_rate": 0.00025930160248744316, "loss": 0.0846, "step": 3341 }, { "epoch": 0.4683952347582341, "grad_norm": 0.6533512473106384, "learning_rate": 0.0002592872518536235, "loss": 0.1827, "step": 3342 }, { "epoch": 0.4685353889278206, "grad_norm": 0.5200726985931396, "learning_rate": 0.00025927290121980387, "loss": 0.1278, "step": 3343 }, { "epoch": 0.46867554309740717, "grad_norm": 1.2893496751785278, "learning_rate": 0.0002592585505859842, "loss": 0.2467, "step": 3344 }, { "epoch": 0.4688156972669937, "grad_norm": 1.8457653522491455, "learning_rate": 0.0002592441999521645, "loss": 0.3387, "step": 3345 }, { "epoch": 0.46895585143658025, "grad_norm": 0.6498123407363892, "learning_rate": 0.0002592298493183449, "loss": 0.1305, "step": 3346 }, { "epoch": 0.46909600560616677, "grad_norm": 1.3193391561508179, "learning_rate": 0.00025921549868452524, "loss": 0.2333, "step": 3347 }, { "epoch": 0.46923615977575334, "grad_norm": 0.5757919549942017, "learning_rate": 0.00025920114805070557, "loss": 0.091, "step": 3348 }, { "epoch": 0.46937631394533985, "grad_norm": 1.8101173639297485, "learning_rate": 0.0002591867974168859, "loss": 0.2772, "step": 3349 }, { "epoch": 0.4695164681149264, "grad_norm": 1.8984631299972534, "learning_rate": 0.0002591724467830662, "loss": 0.1928, "step": 3350 }, { "epoch": 0.46965662228451294, "grad_norm": 0.33556658029556274, "learning_rate": 0.00025915809614924655, "loss": 0.1031, "step": 3351 }, { "epoch": 0.4697967764540995, "grad_norm": 0.4323277473449707, "learning_rate": 0.0002591437455154269, "loss": 0.0994, "step": 3352 }, { "epoch": 0.4699369306236861, "grad_norm": 0.2132180631160736, "learning_rate": 0.00025912939488160726, "loss": 0.1123, "step": 3353 }, { "epoch": 0.4700770847932726, "grad_norm": 0.24832658469676971, "learning_rate": 0.0002591150442477876, "loss": 0.0786, "step": 3354 }, { "epoch": 0.47021723896285916, "grad_norm": 0.28085970878601074, "learning_rate": 0.0002591006936139679, "loss": 0.0774, "step": 3355 }, { "epoch": 0.4703573931324457, "grad_norm": 0.3220592737197876, "learning_rate": 0.00025908634298014825, "loss": 0.1013, "step": 3356 }, { "epoch": 0.47049754730203225, "grad_norm": 0.24233441054821014, "learning_rate": 0.00025907199234632863, "loss": 0.0624, "step": 3357 }, { "epoch": 0.47063770147161876, "grad_norm": 1.9785246849060059, "learning_rate": 0.00025905764171250896, "loss": 0.1313, "step": 3358 }, { "epoch": 0.47077785564120533, "grad_norm": 0.7743126153945923, "learning_rate": 0.0002590432910786893, "loss": 0.1037, "step": 3359 }, { "epoch": 0.47091800981079185, "grad_norm": 0.4001225233078003, "learning_rate": 0.0002590289404448696, "loss": 0.0623, "step": 3360 }, { "epoch": 0.4710581639803784, "grad_norm": 0.5371206998825073, "learning_rate": 0.00025901458981104995, "loss": 0.158, "step": 3361 }, { "epoch": 0.47119831814996493, "grad_norm": 0.26361533999443054, "learning_rate": 0.00025900023917723033, "loss": 0.0843, "step": 3362 }, { "epoch": 0.4713384723195515, "grad_norm": 0.4106167256832123, "learning_rate": 0.00025898588854341066, "loss": 0.1343, "step": 3363 }, { "epoch": 0.4714786264891381, "grad_norm": 0.2720377445220947, "learning_rate": 0.000258971537909591, "loss": 0.0826, "step": 3364 }, { "epoch": 0.4716187806587246, "grad_norm": 0.7385671138763428, "learning_rate": 0.0002589571872757713, "loss": 0.1856, "step": 3365 }, { "epoch": 0.47175893482831116, "grad_norm": 0.19625452160835266, "learning_rate": 0.00025894283664195164, "loss": 0.04, "step": 3366 }, { "epoch": 0.4718990889978977, "grad_norm": 0.3260664939880371, "learning_rate": 0.00025892848600813197, "loss": 0.0909, "step": 3367 }, { "epoch": 0.47203924316748425, "grad_norm": 0.9174659848213196, "learning_rate": 0.00025891413537431235, "loss": 0.0885, "step": 3368 }, { "epoch": 0.47217939733707076, "grad_norm": 0.3643873631954193, "learning_rate": 0.0002588997847404927, "loss": 0.1192, "step": 3369 }, { "epoch": 0.47231955150665733, "grad_norm": 0.29093697667121887, "learning_rate": 0.000258885434106673, "loss": 0.0406, "step": 3370 }, { "epoch": 0.47245970567624385, "grad_norm": 0.31742843985557556, "learning_rate": 0.00025887108347285334, "loss": 0.0872, "step": 3371 }, { "epoch": 0.4725998598458304, "grad_norm": 0.3487047255039215, "learning_rate": 0.0002588567328390337, "loss": 0.0323, "step": 3372 }, { "epoch": 0.47274001401541693, "grad_norm": 0.5013546943664551, "learning_rate": 0.00025884238220521405, "loss": 0.1103, "step": 3373 }, { "epoch": 0.4728801681850035, "grad_norm": 1.6304062604904175, "learning_rate": 0.0002588280315713944, "loss": 0.1782, "step": 3374 }, { "epoch": 0.4730203223545901, "grad_norm": 0.18943342566490173, "learning_rate": 0.0002588136809375747, "loss": 0.0426, "step": 3375 }, { "epoch": 0.4731604765241766, "grad_norm": 0.8060956597328186, "learning_rate": 0.00025879933030375504, "loss": 0.1552, "step": 3376 }, { "epoch": 0.47330063069376316, "grad_norm": 0.3587362468242645, "learning_rate": 0.00025878497966993537, "loss": 0.0587, "step": 3377 }, { "epoch": 0.4734407848633497, "grad_norm": 0.7555720806121826, "learning_rate": 0.00025877062903611575, "loss": 0.1788, "step": 3378 }, { "epoch": 0.47358093903293624, "grad_norm": 0.1957497000694275, "learning_rate": 0.0002587562784022961, "loss": 0.0349, "step": 3379 }, { "epoch": 0.47372109320252276, "grad_norm": 0.5665290355682373, "learning_rate": 0.0002587419277684764, "loss": 0.1349, "step": 3380 }, { "epoch": 0.47386124737210933, "grad_norm": 0.6021792888641357, "learning_rate": 0.0002587275771346568, "loss": 0.1201, "step": 3381 }, { "epoch": 0.47400140154169584, "grad_norm": 0.7500127553939819, "learning_rate": 0.0002587132265008371, "loss": 0.0763, "step": 3382 }, { "epoch": 0.4741415557112824, "grad_norm": 0.3456926643848419, "learning_rate": 0.00025869887586701745, "loss": 0.1127, "step": 3383 }, { "epoch": 0.47428170988086893, "grad_norm": 0.1884390115737915, "learning_rate": 0.0002586845252331978, "loss": 0.042, "step": 3384 }, { "epoch": 0.4744218640504555, "grad_norm": 0.582822859287262, "learning_rate": 0.0002586701745993781, "loss": 0.0935, "step": 3385 }, { "epoch": 0.47456201822004207, "grad_norm": 0.46902984380722046, "learning_rate": 0.00025865582396555843, "loss": 0.1416, "step": 3386 }, { "epoch": 0.4747021723896286, "grad_norm": 0.9830765724182129, "learning_rate": 0.00025864147333173876, "loss": 0.2058, "step": 3387 }, { "epoch": 0.47484232655921516, "grad_norm": 0.7563696503639221, "learning_rate": 0.00025862712269791914, "loss": 0.1169, "step": 3388 }, { "epoch": 0.47498248072880167, "grad_norm": 0.6189610362052917, "learning_rate": 0.00025861277206409947, "loss": 0.1398, "step": 3389 }, { "epoch": 0.47512263489838824, "grad_norm": 0.36645278334617615, "learning_rate": 0.0002585984214302798, "loss": 0.0562, "step": 3390 }, { "epoch": 0.47526278906797476, "grad_norm": 0.5037279725074768, "learning_rate": 0.0002585840707964602, "loss": 0.145, "step": 3391 }, { "epoch": 0.4754029432375613, "grad_norm": 0.7555498480796814, "learning_rate": 0.0002585697201626405, "loss": 0.1208, "step": 3392 }, { "epoch": 0.47554309740714784, "grad_norm": 0.7340646982192993, "learning_rate": 0.00025855536952882084, "loss": 0.3138, "step": 3393 }, { "epoch": 0.4756832515767344, "grad_norm": 0.6181073188781738, "learning_rate": 0.00025854101889500117, "loss": 0.3114, "step": 3394 }, { "epoch": 0.4758234057463209, "grad_norm": 0.4733201563358307, "learning_rate": 0.0002585266682611815, "loss": 0.0942, "step": 3395 }, { "epoch": 0.4759635599159075, "grad_norm": 0.7280821800231934, "learning_rate": 0.0002585123176273618, "loss": 0.1755, "step": 3396 }, { "epoch": 0.47610371408549407, "grad_norm": 1.449250340461731, "learning_rate": 0.0002584979669935422, "loss": 0.3941, "step": 3397 }, { "epoch": 0.4762438682550806, "grad_norm": 0.5744331479072571, "learning_rate": 0.00025848361635972254, "loss": 0.1021, "step": 3398 }, { "epoch": 0.47638402242466715, "grad_norm": 1.5846236944198608, "learning_rate": 0.00025846926572590286, "loss": 0.1858, "step": 3399 }, { "epoch": 0.47652417659425367, "grad_norm": 0.42692530155181885, "learning_rate": 0.00025845491509208325, "loss": 0.0537, "step": 3400 }, { "epoch": 0.47666433076384024, "grad_norm": 0.32353678345680237, "learning_rate": 0.0002584405644582636, "loss": 0.0778, "step": 3401 }, { "epoch": 0.47680448493342675, "grad_norm": 0.6987240314483643, "learning_rate": 0.0002584262138244439, "loss": 0.1612, "step": 3402 }, { "epoch": 0.4769446391030133, "grad_norm": 0.34401682019233704, "learning_rate": 0.00025841186319062423, "loss": 0.1156, "step": 3403 }, { "epoch": 0.47708479327259984, "grad_norm": 0.37349554896354675, "learning_rate": 0.00025839751255680456, "loss": 0.1284, "step": 3404 }, { "epoch": 0.4772249474421864, "grad_norm": 0.6728724837303162, "learning_rate": 0.0002583831619229849, "loss": 0.1626, "step": 3405 }, { "epoch": 0.4773651016117729, "grad_norm": 0.4453674852848053, "learning_rate": 0.0002583688112891652, "loss": 0.156, "step": 3406 }, { "epoch": 0.4775052557813595, "grad_norm": 1.1556479930877686, "learning_rate": 0.0002583544606553456, "loss": 0.1456, "step": 3407 }, { "epoch": 0.47764540995094606, "grad_norm": 0.8492551445960999, "learning_rate": 0.00025834011002152593, "loss": 0.17, "step": 3408 }, { "epoch": 0.4777855641205326, "grad_norm": 0.416910856962204, "learning_rate": 0.00025832575938770626, "loss": 0.1274, "step": 3409 }, { "epoch": 0.47792571829011915, "grad_norm": 0.5362917184829712, "learning_rate": 0.00025831140875388664, "loss": 0.1398, "step": 3410 }, { "epoch": 0.47806587245970567, "grad_norm": 0.23010285198688507, "learning_rate": 0.00025829705812006697, "loss": 0.0932, "step": 3411 }, { "epoch": 0.47820602662929224, "grad_norm": 0.42692089080810547, "learning_rate": 0.0002582827074862473, "loss": 0.0968, "step": 3412 }, { "epoch": 0.47834618079887875, "grad_norm": 0.9318661689758301, "learning_rate": 0.00025826835685242763, "loss": 0.0882, "step": 3413 }, { "epoch": 0.4784863349684653, "grad_norm": 0.3168836534023285, "learning_rate": 0.00025825400621860796, "loss": 0.0579, "step": 3414 }, { "epoch": 0.47862648913805184, "grad_norm": 0.7715312838554382, "learning_rate": 0.0002582396555847883, "loss": 0.1876, "step": 3415 }, { "epoch": 0.4787666433076384, "grad_norm": 0.8471894860267639, "learning_rate": 0.00025822530495096867, "loss": 0.1608, "step": 3416 }, { "epoch": 0.4789067974772249, "grad_norm": 0.8708312511444092, "learning_rate": 0.000258210954317149, "loss": 0.1306, "step": 3417 }, { "epoch": 0.4790469516468115, "grad_norm": 0.41459688544273376, "learning_rate": 0.0002581966036833293, "loss": 0.1796, "step": 3418 }, { "epoch": 0.47918710581639806, "grad_norm": 0.3730902671813965, "learning_rate": 0.00025818225304950965, "loss": 0.1133, "step": 3419 }, { "epoch": 0.4793272599859846, "grad_norm": 0.288470596075058, "learning_rate": 0.00025816790241569004, "loss": 0.0992, "step": 3420 }, { "epoch": 0.47946741415557115, "grad_norm": 0.3027913570404053, "learning_rate": 0.00025815355178187036, "loss": 0.0575, "step": 3421 }, { "epoch": 0.47960756832515766, "grad_norm": 0.30857786536216736, "learning_rate": 0.0002581392011480507, "loss": 0.1214, "step": 3422 }, { "epoch": 0.47974772249474423, "grad_norm": 0.35822445154190063, "learning_rate": 0.000258124850514231, "loss": 0.0801, "step": 3423 }, { "epoch": 0.47988787666433075, "grad_norm": 0.3174699544906616, "learning_rate": 0.00025811049988041135, "loss": 0.0681, "step": 3424 }, { "epoch": 0.4800280308339173, "grad_norm": 0.3112591505050659, "learning_rate": 0.0002580961492465917, "loss": 0.1038, "step": 3425 }, { "epoch": 0.48016818500350383, "grad_norm": 0.7893008589744568, "learning_rate": 0.00025808179861277206, "loss": 0.1985, "step": 3426 }, { "epoch": 0.4803083391730904, "grad_norm": 0.49721333384513855, "learning_rate": 0.0002580674479789524, "loss": 0.1054, "step": 3427 }, { "epoch": 0.4804484933426769, "grad_norm": 0.6634990572929382, "learning_rate": 0.0002580530973451327, "loss": 0.1543, "step": 3428 }, { "epoch": 0.4805886475122635, "grad_norm": 0.7477396130561829, "learning_rate": 0.00025803874671131305, "loss": 0.1571, "step": 3429 }, { "epoch": 0.48072880168185006, "grad_norm": 0.397464394569397, "learning_rate": 0.0002580243960774934, "loss": 0.1483, "step": 3430 }, { "epoch": 0.4808689558514366, "grad_norm": 0.5868679881095886, "learning_rate": 0.0002580100454436737, "loss": 0.1224, "step": 3431 }, { "epoch": 0.48100911002102315, "grad_norm": 0.3429774045944214, "learning_rate": 0.0002579956948098541, "loss": 0.0623, "step": 3432 }, { "epoch": 0.48114926419060966, "grad_norm": 0.5995844602584839, "learning_rate": 0.0002579813441760344, "loss": 0.1672, "step": 3433 }, { "epoch": 0.48128941836019623, "grad_norm": 0.21627332270145416, "learning_rate": 0.00025796699354221474, "loss": 0.0277, "step": 3434 }, { "epoch": 0.48142957252978275, "grad_norm": 0.3642168939113617, "learning_rate": 0.0002579526429083951, "loss": 0.0414, "step": 3435 }, { "epoch": 0.4815697266993693, "grad_norm": 0.6434520483016968, "learning_rate": 0.00025793829227457546, "loss": 0.039, "step": 3436 }, { "epoch": 0.48170988086895583, "grad_norm": 0.1669931262731552, "learning_rate": 0.0002579239416407558, "loss": 0.0321, "step": 3437 }, { "epoch": 0.4818500350385424, "grad_norm": 0.3662303388118744, "learning_rate": 0.0002579095910069361, "loss": 0.1002, "step": 3438 }, { "epoch": 0.4819901892081289, "grad_norm": 0.9763455986976624, "learning_rate": 0.00025789524037311644, "loss": 0.1275, "step": 3439 }, { "epoch": 0.4821303433777155, "grad_norm": 0.7734778523445129, "learning_rate": 0.00025788088973929677, "loss": 0.2105, "step": 3440 }, { "epoch": 0.48227049754730206, "grad_norm": 0.6286949515342712, "learning_rate": 0.0002578665391054771, "loss": 0.0695, "step": 3441 }, { "epoch": 0.48241065171688857, "grad_norm": 0.19085323810577393, "learning_rate": 0.0002578521884716575, "loss": 0.0724, "step": 3442 }, { "epoch": 0.48255080588647514, "grad_norm": 0.78294837474823, "learning_rate": 0.0002578378378378378, "loss": 0.1144, "step": 3443 }, { "epoch": 0.48269096005606166, "grad_norm": 0.39815643429756165, "learning_rate": 0.00025782348720401814, "loss": 0.0887, "step": 3444 }, { "epoch": 0.48283111422564823, "grad_norm": 0.9066981077194214, "learning_rate": 0.0002578091365701985, "loss": 0.2532, "step": 3445 }, { "epoch": 0.48297126839523474, "grad_norm": 0.7563006281852722, "learning_rate": 0.00025779478593637885, "loss": 0.105, "step": 3446 }, { "epoch": 0.4831114225648213, "grad_norm": 0.5843519568443298, "learning_rate": 0.0002577804353025592, "loss": 0.2324, "step": 3447 }, { "epoch": 0.48325157673440783, "grad_norm": 1.0399240255355835, "learning_rate": 0.0002577660846687395, "loss": 0.1119, "step": 3448 }, { "epoch": 0.4833917309039944, "grad_norm": 1.2726222276687622, "learning_rate": 0.00025775173403491983, "loss": 0.1689, "step": 3449 }, { "epoch": 0.4835318850735809, "grad_norm": 1.1648828983306885, "learning_rate": 0.00025773738340110016, "loss": 0.0367, "step": 3450 }, { "epoch": 0.4836720392431675, "grad_norm": 0.4012549817562103, "learning_rate": 0.00025772303276728055, "loss": 0.1036, "step": 3451 }, { "epoch": 0.48381219341275405, "grad_norm": 0.4353487193584442, "learning_rate": 0.0002577086821334609, "loss": 0.1004, "step": 3452 }, { "epoch": 0.48395234758234057, "grad_norm": 0.7935703992843628, "learning_rate": 0.0002576943314996412, "loss": 0.0752, "step": 3453 }, { "epoch": 0.48409250175192714, "grad_norm": 0.5132361054420471, "learning_rate": 0.0002576799808658216, "loss": 0.092, "step": 3454 }, { "epoch": 0.48423265592151365, "grad_norm": 0.6994789838790894, "learning_rate": 0.0002576656302320019, "loss": 0.1422, "step": 3455 }, { "epoch": 0.4843728100911002, "grad_norm": 0.9248461127281189, "learning_rate": 0.00025765127959818224, "loss": 0.0677, "step": 3456 }, { "epoch": 0.48451296426068674, "grad_norm": 0.5581263303756714, "learning_rate": 0.00025763692896436257, "loss": 0.1042, "step": 3457 }, { "epoch": 0.4846531184302733, "grad_norm": 0.31875401735305786, "learning_rate": 0.0002576225783305429, "loss": 0.0621, "step": 3458 }, { "epoch": 0.4847932725998598, "grad_norm": 0.3572028577327728, "learning_rate": 0.00025760822769672323, "loss": 0.0955, "step": 3459 }, { "epoch": 0.4849334267694464, "grad_norm": 0.5527826547622681, "learning_rate": 0.00025759387706290356, "loss": 0.1427, "step": 3460 }, { "epoch": 0.4850735809390329, "grad_norm": 0.8580110669136047, "learning_rate": 0.00025757952642908394, "loss": 0.1618, "step": 3461 }, { "epoch": 0.4852137351086195, "grad_norm": 0.8033840656280518, "learning_rate": 0.00025756517579526427, "loss": 0.1871, "step": 3462 }, { "epoch": 0.48535388927820605, "grad_norm": 0.3018549680709839, "learning_rate": 0.0002575508251614446, "loss": 0.0674, "step": 3463 }, { "epoch": 0.48549404344779257, "grad_norm": 0.7677834033966064, "learning_rate": 0.000257536474527625, "loss": 0.0978, "step": 3464 }, { "epoch": 0.48563419761737914, "grad_norm": 0.13359415531158447, "learning_rate": 0.0002575221238938053, "loss": 0.0236, "step": 3465 }, { "epoch": 0.48577435178696565, "grad_norm": 0.4001922607421875, "learning_rate": 0.00025750777325998564, "loss": 0.1413, "step": 3466 }, { "epoch": 0.4859145059565522, "grad_norm": 0.4840030074119568, "learning_rate": 0.00025749342262616597, "loss": 0.1417, "step": 3467 }, { "epoch": 0.48605466012613874, "grad_norm": 0.5108070373535156, "learning_rate": 0.0002574790719923463, "loss": 0.1163, "step": 3468 }, { "epoch": 0.4861948142957253, "grad_norm": 0.5165778994560242, "learning_rate": 0.0002574647213585266, "loss": 0.1082, "step": 3469 }, { "epoch": 0.4863349684653118, "grad_norm": 0.37278759479522705, "learning_rate": 0.000257450370724707, "loss": 0.1251, "step": 3470 }, { "epoch": 0.4864751226348984, "grad_norm": 0.40070751309394836, "learning_rate": 0.00025743602009088733, "loss": 0.0892, "step": 3471 }, { "epoch": 0.4866152768044849, "grad_norm": 0.19404344260692596, "learning_rate": 0.00025742166945706766, "loss": 0.0361, "step": 3472 }, { "epoch": 0.4867554309740715, "grad_norm": 1.0165289640426636, "learning_rate": 0.000257407318823248, "loss": 0.1069, "step": 3473 }, { "epoch": 0.48689558514365805, "grad_norm": 0.36197417974472046, "learning_rate": 0.0002573929681894284, "loss": 0.1036, "step": 3474 }, { "epoch": 0.48703573931324456, "grad_norm": 0.550611674785614, "learning_rate": 0.0002573786175556087, "loss": 0.098, "step": 3475 }, { "epoch": 0.48717589348283113, "grad_norm": 0.6606674790382385, "learning_rate": 0.00025736426692178903, "loss": 0.093, "step": 3476 }, { "epoch": 0.48731604765241765, "grad_norm": 0.6661083102226257, "learning_rate": 0.00025734991628796936, "loss": 0.0694, "step": 3477 }, { "epoch": 0.4874562018220042, "grad_norm": 0.6594652533531189, "learning_rate": 0.0002573355656541497, "loss": 0.0879, "step": 3478 }, { "epoch": 0.48759635599159074, "grad_norm": 0.24664607644081116, "learning_rate": 0.00025732121502033, "loss": 0.0616, "step": 3479 }, { "epoch": 0.4877365101611773, "grad_norm": 0.4766852557659149, "learning_rate": 0.0002573068643865104, "loss": 0.0827, "step": 3480 }, { "epoch": 0.4878766643307638, "grad_norm": 0.6749598383903503, "learning_rate": 0.00025729251375269073, "loss": 0.141, "step": 3481 }, { "epoch": 0.4880168185003504, "grad_norm": 0.9280956983566284, "learning_rate": 0.00025727816311887106, "loss": 0.0943, "step": 3482 }, { "epoch": 0.4881569726699369, "grad_norm": 0.3066399097442627, "learning_rate": 0.00025726381248505144, "loss": 0.058, "step": 3483 }, { "epoch": 0.4882971268395235, "grad_norm": 0.3830578327178955, "learning_rate": 0.00025724946185123177, "loss": 0.093, "step": 3484 }, { "epoch": 0.48843728100911005, "grad_norm": 0.3635070323944092, "learning_rate": 0.0002572351112174121, "loss": 0.0965, "step": 3485 }, { "epoch": 0.48857743517869656, "grad_norm": 2.309699773788452, "learning_rate": 0.0002572207605835924, "loss": 0.1327, "step": 3486 }, { "epoch": 0.48871758934828313, "grad_norm": 0.6561049222946167, "learning_rate": 0.00025720640994977275, "loss": 0.1025, "step": 3487 }, { "epoch": 0.48885774351786965, "grad_norm": 0.3517068028450012, "learning_rate": 0.0002571920593159531, "loss": 0.0402, "step": 3488 }, { "epoch": 0.4889978976874562, "grad_norm": 0.3044321537017822, "learning_rate": 0.00025717770868213347, "loss": 0.0799, "step": 3489 }, { "epoch": 0.48913805185704273, "grad_norm": 0.36944735050201416, "learning_rate": 0.0002571633580483138, "loss": 0.1074, "step": 3490 }, { "epoch": 0.4892782060266293, "grad_norm": 1.395851731300354, "learning_rate": 0.0002571490074144941, "loss": 0.2275, "step": 3491 }, { "epoch": 0.4894183601962158, "grad_norm": 1.0201665163040161, "learning_rate": 0.00025713465678067445, "loss": 0.2468, "step": 3492 }, { "epoch": 0.4895585143658024, "grad_norm": 1.5163003206253052, "learning_rate": 0.0002571203061468548, "loss": 0.2178, "step": 3493 }, { "epoch": 0.4896986685353889, "grad_norm": 1.0206705331802368, "learning_rate": 0.0002571059555130351, "loss": 0.2149, "step": 3494 }, { "epoch": 0.4898388227049755, "grad_norm": 0.3390774130821228, "learning_rate": 0.00025709160487921544, "loss": 0.0213, "step": 3495 }, { "epoch": 0.48997897687456204, "grad_norm": 2.1452643871307373, "learning_rate": 0.0002570772542453958, "loss": 0.1764, "step": 3496 }, { "epoch": 0.49011913104414856, "grad_norm": 2.654993772506714, "learning_rate": 0.00025706290361157615, "loss": 0.4339, "step": 3497 }, { "epoch": 0.49025928521373513, "grad_norm": 0.6839194893836975, "learning_rate": 0.0002570485529777565, "loss": 0.1325, "step": 3498 }, { "epoch": 0.49039943938332164, "grad_norm": 6.174987316131592, "learning_rate": 0.00025703420234393686, "loss": 0.4508, "step": 3499 }, { "epoch": 0.4905395935529082, "grad_norm": 8.566628456115723, "learning_rate": 0.0002570198517101172, "loss": 0.325, "step": 3500 }, { "epoch": 0.49067974772249473, "grad_norm": 0.1667787879705429, "learning_rate": 0.0002570055010762975, "loss": 0.0534, "step": 3501 }, { "epoch": 0.4908199018920813, "grad_norm": 0.30611181259155273, "learning_rate": 0.00025699115044247784, "loss": 0.0828, "step": 3502 }, { "epoch": 0.4909600560616678, "grad_norm": 0.3280355632305145, "learning_rate": 0.0002569767998086582, "loss": 0.0964, "step": 3503 }, { "epoch": 0.4911002102312544, "grad_norm": 0.1987239569425583, "learning_rate": 0.0002569624491748385, "loss": 0.0428, "step": 3504 }, { "epoch": 0.4912403644008409, "grad_norm": 0.31350991129875183, "learning_rate": 0.0002569480985410189, "loss": 0.1097, "step": 3505 }, { "epoch": 0.49138051857042747, "grad_norm": 0.3353220224380493, "learning_rate": 0.0002569337479071992, "loss": 0.1157, "step": 3506 }, { "epoch": 0.49152067274001404, "grad_norm": 0.7625632286071777, "learning_rate": 0.00025691939727337954, "loss": 0.1397, "step": 3507 }, { "epoch": 0.49166082690960056, "grad_norm": 0.27706798911094666, "learning_rate": 0.00025690504663955987, "loss": 0.0687, "step": 3508 }, { "epoch": 0.4918009810791871, "grad_norm": 0.20815348625183105, "learning_rate": 0.00025689069600574025, "loss": 0.0612, "step": 3509 }, { "epoch": 0.49194113524877364, "grad_norm": 0.7141574621200562, "learning_rate": 0.0002568763453719206, "loss": 0.1223, "step": 3510 }, { "epoch": 0.4920812894183602, "grad_norm": 0.5768548250198364, "learning_rate": 0.0002568619947381009, "loss": 0.1378, "step": 3511 }, { "epoch": 0.4922214435879467, "grad_norm": 0.40852946043014526, "learning_rate": 0.00025684764410428124, "loss": 0.1122, "step": 3512 }, { "epoch": 0.4923615977575333, "grad_norm": 0.3819160759449005, "learning_rate": 0.00025683329347046157, "loss": 0.1262, "step": 3513 }, { "epoch": 0.4925017519271198, "grad_norm": 0.48255541920661926, "learning_rate": 0.0002568189428366419, "loss": 0.1082, "step": 3514 }, { "epoch": 0.4926419060967064, "grad_norm": 0.6783628463745117, "learning_rate": 0.0002568045922028223, "loss": 0.1658, "step": 3515 }, { "epoch": 0.4927820602662929, "grad_norm": 0.43569812178611755, "learning_rate": 0.0002567902415690026, "loss": 0.1546, "step": 3516 }, { "epoch": 0.49292221443587947, "grad_norm": 0.5011049509048462, "learning_rate": 0.00025677589093518294, "loss": 0.0901, "step": 3517 }, { "epoch": 0.49306236860546604, "grad_norm": 0.22206604480743408, "learning_rate": 0.0002567615403013633, "loss": 0.0677, "step": 3518 }, { "epoch": 0.49320252277505255, "grad_norm": 0.5132867693901062, "learning_rate": 0.00025674718966754365, "loss": 0.133, "step": 3519 }, { "epoch": 0.4933426769446391, "grad_norm": 0.7029857635498047, "learning_rate": 0.000256732839033724, "loss": 0.1019, "step": 3520 }, { "epoch": 0.49348283111422564, "grad_norm": 0.41205543279647827, "learning_rate": 0.0002567184883999043, "loss": 0.0791, "step": 3521 }, { "epoch": 0.4936229852838122, "grad_norm": 0.24129436910152435, "learning_rate": 0.00025670413776608463, "loss": 0.0907, "step": 3522 }, { "epoch": 0.4937631394533987, "grad_norm": 0.5285700559616089, "learning_rate": 0.00025668978713226496, "loss": 0.0735, "step": 3523 }, { "epoch": 0.4939032936229853, "grad_norm": 0.5766040086746216, "learning_rate": 0.00025667543649844534, "loss": 0.1658, "step": 3524 }, { "epoch": 0.4940434477925718, "grad_norm": 0.43381479382514954, "learning_rate": 0.00025666108586462567, "loss": 0.0587, "step": 3525 }, { "epoch": 0.4941836019621584, "grad_norm": 0.27597546577453613, "learning_rate": 0.000256646735230806, "loss": 0.1155, "step": 3526 }, { "epoch": 0.4943237561317449, "grad_norm": 0.5752700567245483, "learning_rate": 0.00025663238459698633, "loss": 0.1109, "step": 3527 }, { "epoch": 0.49446391030133147, "grad_norm": 0.3988467752933502, "learning_rate": 0.0002566180339631667, "loss": 0.0554, "step": 3528 }, { "epoch": 0.49460406447091804, "grad_norm": 0.7903053164482117, "learning_rate": 0.00025660368332934704, "loss": 0.1161, "step": 3529 }, { "epoch": 0.49474421864050455, "grad_norm": 0.5980937480926514, "learning_rate": 0.00025658933269552737, "loss": 0.1469, "step": 3530 }, { "epoch": 0.4948843728100911, "grad_norm": 0.5819936394691467, "learning_rate": 0.0002565749820617077, "loss": 0.1056, "step": 3531 }, { "epoch": 0.49502452697967764, "grad_norm": 0.8243651986122131, "learning_rate": 0.000256560631427888, "loss": 0.1388, "step": 3532 }, { "epoch": 0.4951646811492642, "grad_norm": 1.1008387804031372, "learning_rate": 0.00025654628079406836, "loss": 0.0849, "step": 3533 }, { "epoch": 0.4953048353188507, "grad_norm": 0.3780296742916107, "learning_rate": 0.00025653193016024874, "loss": 0.1, "step": 3534 }, { "epoch": 0.4954449894884373, "grad_norm": 0.351131796836853, "learning_rate": 0.00025651757952642907, "loss": 0.1352, "step": 3535 }, { "epoch": 0.4955851436580238, "grad_norm": 0.20896701514720917, "learning_rate": 0.0002565032288926094, "loss": 0.0608, "step": 3536 }, { "epoch": 0.4957252978276104, "grad_norm": 0.19262166321277618, "learning_rate": 0.0002564888782587898, "loss": 0.033, "step": 3537 }, { "epoch": 0.4958654519971969, "grad_norm": 0.6666895151138306, "learning_rate": 0.0002564745276249701, "loss": 0.1253, "step": 3538 }, { "epoch": 0.49600560616678346, "grad_norm": 0.5336970686912537, "learning_rate": 0.00025646017699115044, "loss": 0.2133, "step": 3539 }, { "epoch": 0.49614576033637003, "grad_norm": 0.5073505640029907, "learning_rate": 0.00025644582635733076, "loss": 0.0923, "step": 3540 }, { "epoch": 0.49628591450595655, "grad_norm": 0.29231294989585876, "learning_rate": 0.0002564314757235111, "loss": 0.0425, "step": 3541 }, { "epoch": 0.4964260686755431, "grad_norm": 0.30701613426208496, "learning_rate": 0.0002564171250896914, "loss": 0.1324, "step": 3542 }, { "epoch": 0.49656622284512963, "grad_norm": 0.6268811821937561, "learning_rate": 0.00025640277445587175, "loss": 0.1805, "step": 3543 }, { "epoch": 0.4967063770147162, "grad_norm": 0.4305824935436249, "learning_rate": 0.00025638842382205213, "loss": 0.047, "step": 3544 }, { "epoch": 0.4968465311843027, "grad_norm": 0.5213370323181152, "learning_rate": 0.00025637407318823246, "loss": 0.0772, "step": 3545 }, { "epoch": 0.4969866853538893, "grad_norm": 1.497339129447937, "learning_rate": 0.0002563597225544128, "loss": 0.2284, "step": 3546 }, { "epoch": 0.4971268395234758, "grad_norm": 1.0039198398590088, "learning_rate": 0.00025634537192059317, "loss": 0.1747, "step": 3547 }, { "epoch": 0.4972669936930624, "grad_norm": 2.4202349185943604, "learning_rate": 0.0002563310212867735, "loss": 0.2279, "step": 3548 }, { "epoch": 0.4974071478626489, "grad_norm": 1.3376708030700684, "learning_rate": 0.00025631667065295383, "loss": 0.2251, "step": 3549 }, { "epoch": 0.49754730203223546, "grad_norm": 0.34456321597099304, "learning_rate": 0.00025630232001913416, "loss": 0.0474, "step": 3550 }, { "epoch": 0.49768745620182203, "grad_norm": 0.4071559011936188, "learning_rate": 0.0002562879693853145, "loss": 0.063, "step": 3551 }, { "epoch": 0.49782761037140855, "grad_norm": 0.25602662563323975, "learning_rate": 0.0002562736187514948, "loss": 0.0572, "step": 3552 }, { "epoch": 0.4979677645409951, "grad_norm": 0.3327733278274536, "learning_rate": 0.0002562592681176752, "loss": 0.0515, "step": 3553 }, { "epoch": 0.49810791871058163, "grad_norm": 0.6401887536048889, "learning_rate": 0.0002562449174838555, "loss": 0.1129, "step": 3554 }, { "epoch": 0.4982480728801682, "grad_norm": 0.37373971939086914, "learning_rate": 0.00025623056685003585, "loss": 0.0688, "step": 3555 }, { "epoch": 0.4983882270497547, "grad_norm": 0.745818018913269, "learning_rate": 0.0002562162162162162, "loss": 0.0769, "step": 3556 }, { "epoch": 0.4985283812193413, "grad_norm": 0.5485659837722778, "learning_rate": 0.0002562018655823965, "loss": 0.095, "step": 3557 }, { "epoch": 0.4986685353889278, "grad_norm": 0.5274074077606201, "learning_rate": 0.00025618751494857684, "loss": 0.0817, "step": 3558 }, { "epoch": 0.4988086895585144, "grad_norm": 0.4357086420059204, "learning_rate": 0.0002561731643147572, "loss": 0.0887, "step": 3559 }, { "epoch": 0.4989488437281009, "grad_norm": 0.365132212638855, "learning_rate": 0.00025615881368093755, "loss": 0.0797, "step": 3560 }, { "epoch": 0.49908899789768746, "grad_norm": 0.305817186832428, "learning_rate": 0.0002561444630471179, "loss": 0.0603, "step": 3561 }, { "epoch": 0.49922915206727403, "grad_norm": 0.8316388130187988, "learning_rate": 0.0002561301124132982, "loss": 0.1481, "step": 3562 }, { "epoch": 0.49936930623686054, "grad_norm": 0.30767759680747986, "learning_rate": 0.0002561157617794786, "loss": 0.0433, "step": 3563 }, { "epoch": 0.4995094604064471, "grad_norm": 0.47065308690071106, "learning_rate": 0.0002561014111456589, "loss": 0.0997, "step": 3564 }, { "epoch": 0.49964961457603363, "grad_norm": 0.5954514741897583, "learning_rate": 0.00025608706051183925, "loss": 0.1261, "step": 3565 }, { "epoch": 0.4997897687456202, "grad_norm": 0.21949557960033417, "learning_rate": 0.0002560727098780196, "loss": 0.0429, "step": 3566 }, { "epoch": 0.4999299229152067, "grad_norm": 0.4462909400463104, "learning_rate": 0.0002560583592441999, "loss": 0.0542, "step": 3567 }, { "epoch": 0.5000700770847932, "grad_norm": 0.5338721871376038, "learning_rate": 0.00025604400861038023, "loss": 0.1435, "step": 3568 }, { "epoch": 0.5002102312543798, "grad_norm": 1.80253005027771, "learning_rate": 0.0002560296579765606, "loss": 0.1108, "step": 3569 }, { "epoch": 0.5003503854239664, "grad_norm": 0.5433024168014526, "learning_rate": 0.00025601530734274095, "loss": 0.1331, "step": 3570 }, { "epoch": 0.5004905395935529, "grad_norm": 0.5902112126350403, "learning_rate": 0.0002560009567089213, "loss": 0.0402, "step": 3571 }, { "epoch": 0.5006306937631394, "grad_norm": 0.4077446758747101, "learning_rate": 0.00025598660607510166, "loss": 0.0749, "step": 3572 }, { "epoch": 0.500770847932726, "grad_norm": 0.7763643264770508, "learning_rate": 0.000255972255441282, "loss": 0.0686, "step": 3573 }, { "epoch": 0.5009110021023125, "grad_norm": 0.5264174342155457, "learning_rate": 0.0002559579048074623, "loss": 0.1041, "step": 3574 }, { "epoch": 0.5010511562718991, "grad_norm": 0.5348429083824158, "learning_rate": 0.00025594355417364264, "loss": 0.1276, "step": 3575 }, { "epoch": 0.5011913104414857, "grad_norm": 0.15242506563663483, "learning_rate": 0.00025592920353982297, "loss": 0.0361, "step": 3576 }, { "epoch": 0.5013314646110721, "grad_norm": 0.3991434872150421, "learning_rate": 0.0002559148529060033, "loss": 0.075, "step": 3577 }, { "epoch": 0.5014716187806587, "grad_norm": 0.29129812121391296, "learning_rate": 0.00025590050227218363, "loss": 0.0662, "step": 3578 }, { "epoch": 0.5016117729502453, "grad_norm": 0.2888050079345703, "learning_rate": 0.000255886151638364, "loss": 0.0913, "step": 3579 }, { "epoch": 0.5017519271198319, "grad_norm": 0.5633611083030701, "learning_rate": 0.00025587180100454434, "loss": 0.064, "step": 3580 }, { "epoch": 0.5018920812894183, "grad_norm": 0.620185911655426, "learning_rate": 0.00025585745037072467, "loss": 0.1173, "step": 3581 }, { "epoch": 0.5020322354590049, "grad_norm": 0.3774522542953491, "learning_rate": 0.00025584309973690505, "loss": 0.1091, "step": 3582 }, { "epoch": 0.5021723896285915, "grad_norm": 0.610437273979187, "learning_rate": 0.0002558287491030854, "loss": 0.1707, "step": 3583 }, { "epoch": 0.502312543798178, "grad_norm": 0.7433859705924988, "learning_rate": 0.0002558143984692657, "loss": 0.168, "step": 3584 }, { "epoch": 0.5024526979677646, "grad_norm": 0.37567129731178284, "learning_rate": 0.00025580004783544604, "loss": 0.1214, "step": 3585 }, { "epoch": 0.502592852137351, "grad_norm": 0.504042387008667, "learning_rate": 0.00025578569720162637, "loss": 0.1119, "step": 3586 }, { "epoch": 0.5027330063069376, "grad_norm": 0.6103062629699707, "learning_rate": 0.0002557713465678067, "loss": 0.0884, "step": 3587 }, { "epoch": 0.5028731604765242, "grad_norm": 0.7486506104469299, "learning_rate": 0.0002557569959339871, "loss": 0.094, "step": 3588 }, { "epoch": 0.5030133146461108, "grad_norm": 0.6592079401016235, "learning_rate": 0.0002557426453001674, "loss": 0.0779, "step": 3589 }, { "epoch": 0.5031534688156972, "grad_norm": 0.3918934762477875, "learning_rate": 0.00025572829466634773, "loss": 0.0587, "step": 3590 }, { "epoch": 0.5032936229852838, "grad_norm": 0.38877201080322266, "learning_rate": 0.0002557139440325281, "loss": 0.0316, "step": 3591 }, { "epoch": 0.5034337771548704, "grad_norm": 0.1634371280670166, "learning_rate": 0.00025569959339870845, "loss": 0.014, "step": 3592 }, { "epoch": 0.5035739313244569, "grad_norm": 0.8864071369171143, "learning_rate": 0.0002556852427648888, "loss": 0.1071, "step": 3593 }, { "epoch": 0.5037140854940434, "grad_norm": 0.34695908427238464, "learning_rate": 0.0002556708921310691, "loss": 0.0266, "step": 3594 }, { "epoch": 0.50385423966363, "grad_norm": 0.4481075406074524, "learning_rate": 0.00025565654149724943, "loss": 0.0984, "step": 3595 }, { "epoch": 0.5039943938332165, "grad_norm": 1.075195550918579, "learning_rate": 0.00025564219086342976, "loss": 0.1276, "step": 3596 }, { "epoch": 0.5041345480028031, "grad_norm": 0.3739490211009979, "learning_rate": 0.0002556278402296101, "loss": 0.0578, "step": 3597 }, { "epoch": 0.5042747021723897, "grad_norm": 0.6815117001533508, "learning_rate": 0.00025561348959579047, "loss": 0.0491, "step": 3598 }, { "epoch": 0.5044148563419761, "grad_norm": 1.9127668142318726, "learning_rate": 0.0002555991389619708, "loss": 0.2415, "step": 3599 }, { "epoch": 0.5045550105115627, "grad_norm": 2.4393773078918457, "learning_rate": 0.00025558478832815113, "loss": 0.2747, "step": 3600 }, { "epoch": 0.5046951646811493, "grad_norm": 0.24543632566928864, "learning_rate": 0.0002555704376943315, "loss": 0.0723, "step": 3601 }, { "epoch": 0.5048353188507358, "grad_norm": 0.2963375449180603, "learning_rate": 0.00025555608706051184, "loss": 0.0776, "step": 3602 }, { "epoch": 0.5049754730203223, "grad_norm": 0.36004024744033813, "learning_rate": 0.00025554173642669217, "loss": 0.0857, "step": 3603 }, { "epoch": 0.5051156271899089, "grad_norm": 0.36819007992744446, "learning_rate": 0.0002555273857928725, "loss": 0.0936, "step": 3604 }, { "epoch": 0.5052557813594954, "grad_norm": 0.48663437366485596, "learning_rate": 0.0002555130351590528, "loss": 0.1217, "step": 3605 }, { "epoch": 0.505395935529082, "grad_norm": 0.2748851478099823, "learning_rate": 0.00025549868452523315, "loss": 0.0806, "step": 3606 }, { "epoch": 0.5055360896986686, "grad_norm": 0.405549019575119, "learning_rate": 0.00025548433389141354, "loss": 0.1514, "step": 3607 }, { "epoch": 0.505676243868255, "grad_norm": 0.531527042388916, "learning_rate": 0.00025546998325759386, "loss": 0.1428, "step": 3608 }, { "epoch": 0.5058163980378416, "grad_norm": 0.4019516706466675, "learning_rate": 0.0002554556326237742, "loss": 0.1675, "step": 3609 }, { "epoch": 0.5059565522074282, "grad_norm": 0.27313774824142456, "learning_rate": 0.0002554412819899546, "loss": 0.0527, "step": 3610 }, { "epoch": 0.5060967063770148, "grad_norm": 0.3598933517932892, "learning_rate": 0.0002554269313561349, "loss": 0.0526, "step": 3611 }, { "epoch": 0.5062368605466012, "grad_norm": 0.6425841450691223, "learning_rate": 0.00025541258072231523, "loss": 0.0658, "step": 3612 }, { "epoch": 0.5063770147161878, "grad_norm": 0.2769329845905304, "learning_rate": 0.00025539823008849556, "loss": 0.1075, "step": 3613 }, { "epoch": 0.5065171688857744, "grad_norm": 0.06827756017446518, "learning_rate": 0.0002553838794546759, "loss": 0.0117, "step": 3614 }, { "epoch": 0.5066573230553609, "grad_norm": 0.34457892179489136, "learning_rate": 0.0002553695288208562, "loss": 0.0481, "step": 3615 }, { "epoch": 0.5067974772249474, "grad_norm": 0.6185826063156128, "learning_rate": 0.00025535517818703655, "loss": 0.1218, "step": 3616 }, { "epoch": 0.506937631394534, "grad_norm": 0.3842414915561676, "learning_rate": 0.00025534082755321693, "loss": 0.0677, "step": 3617 }, { "epoch": 0.5070777855641205, "grad_norm": 0.7708631753921509, "learning_rate": 0.00025532647691939726, "loss": 0.1292, "step": 3618 }, { "epoch": 0.5072179397337071, "grad_norm": 0.4499856233596802, "learning_rate": 0.0002553121262855776, "loss": 0.0821, "step": 3619 }, { "epoch": 0.5073580939032937, "grad_norm": 0.4605194330215454, "learning_rate": 0.0002552977756517579, "loss": 0.0744, "step": 3620 }, { "epoch": 0.5074982480728801, "grad_norm": 0.2140355408191681, "learning_rate": 0.00025528342501793824, "loss": 0.0762, "step": 3621 }, { "epoch": 0.5076384022424667, "grad_norm": 0.3987409770488739, "learning_rate": 0.0002552690743841186, "loss": 0.0593, "step": 3622 }, { "epoch": 0.5077785564120533, "grad_norm": 0.22239546477794647, "learning_rate": 0.00025525472375029896, "loss": 0.0357, "step": 3623 }, { "epoch": 0.5079187105816398, "grad_norm": 0.43409937620162964, "learning_rate": 0.0002552403731164793, "loss": 0.0626, "step": 3624 }, { "epoch": 0.5080588647512263, "grad_norm": 0.32065635919570923, "learning_rate": 0.0002552260224826596, "loss": 0.0379, "step": 3625 }, { "epoch": 0.5081990189208129, "grad_norm": 0.5844007730484009, "learning_rate": 0.00025521167184884, "loss": 0.1395, "step": 3626 }, { "epoch": 0.5083391730903994, "grad_norm": 0.424710214138031, "learning_rate": 0.0002551973212150203, "loss": 0.0891, "step": 3627 }, { "epoch": 0.508479327259986, "grad_norm": 0.6183585524559021, "learning_rate": 0.00025518297058120065, "loss": 0.1645, "step": 3628 }, { "epoch": 0.5086194814295726, "grad_norm": 0.8181776404380798, "learning_rate": 0.000255168619947381, "loss": 0.1303, "step": 3629 }, { "epoch": 0.508759635599159, "grad_norm": 0.581883430480957, "learning_rate": 0.0002551542693135613, "loss": 0.0545, "step": 3630 }, { "epoch": 0.5088997897687456, "grad_norm": 1.1187405586242676, "learning_rate": 0.00025513991867974164, "loss": 0.2164, "step": 3631 }, { "epoch": 0.5090399439383322, "grad_norm": 0.4822360575199127, "learning_rate": 0.00025512556804592197, "loss": 0.1014, "step": 3632 }, { "epoch": 0.5091800981079188, "grad_norm": 0.23033207654953003, "learning_rate": 0.00025511121741210235, "loss": 0.0555, "step": 3633 }, { "epoch": 0.5093202522775052, "grad_norm": 0.3463762402534485, "learning_rate": 0.0002550968667782827, "loss": 0.079, "step": 3634 }, { "epoch": 0.5094604064470918, "grad_norm": 0.7652960419654846, "learning_rate": 0.000255082516144463, "loss": 0.0931, "step": 3635 }, { "epoch": 0.5096005606166784, "grad_norm": 0.2973748445510864, "learning_rate": 0.0002550681655106434, "loss": 0.0567, "step": 3636 }, { "epoch": 0.5097407147862649, "grad_norm": 0.660727858543396, "learning_rate": 0.0002550538148768237, "loss": 0.0993, "step": 3637 }, { "epoch": 0.5098808689558514, "grad_norm": 0.504641592502594, "learning_rate": 0.00025503946424300405, "loss": 0.0945, "step": 3638 }, { "epoch": 0.510021023125438, "grad_norm": 0.31552356481552124, "learning_rate": 0.0002550251136091844, "loss": 0.1125, "step": 3639 }, { "epoch": 0.5101611772950245, "grad_norm": 0.5281522870063782, "learning_rate": 0.0002550107629753647, "loss": 0.134, "step": 3640 }, { "epoch": 0.5103013314646111, "grad_norm": 0.2179023176431656, "learning_rate": 0.00025499641234154503, "loss": 0.0381, "step": 3641 }, { "epoch": 0.5104414856341977, "grad_norm": 0.24178653955459595, "learning_rate": 0.0002549820617077254, "loss": 0.0687, "step": 3642 }, { "epoch": 0.5105816398037841, "grad_norm": 0.40493133664131165, "learning_rate": 0.00025496771107390574, "loss": 0.0782, "step": 3643 }, { "epoch": 0.5107217939733707, "grad_norm": 0.26079076528549194, "learning_rate": 0.00025495336044008607, "loss": 0.0619, "step": 3644 }, { "epoch": 0.5108619481429573, "grad_norm": 0.4745108187198639, "learning_rate": 0.00025493900980626646, "loss": 0.1587, "step": 3645 }, { "epoch": 0.5110021023125438, "grad_norm": 0.32317304611206055, "learning_rate": 0.0002549246591724468, "loss": 0.0521, "step": 3646 }, { "epoch": 0.5111422564821303, "grad_norm": 0.9599839448928833, "learning_rate": 0.0002549103085386271, "loss": 0.2165, "step": 3647 }, { "epoch": 0.5112824106517169, "grad_norm": 0.37458568811416626, "learning_rate": 0.00025489595790480744, "loss": 0.0778, "step": 3648 }, { "epoch": 0.5114225648213034, "grad_norm": 0.9396136999130249, "learning_rate": 0.00025488160727098777, "loss": 0.2227, "step": 3649 }, { "epoch": 0.51156271899089, "grad_norm": 3.522144317626953, "learning_rate": 0.0002548672566371681, "loss": 0.6797, "step": 3650 }, { "epoch": 0.5117028731604766, "grad_norm": 0.5679569840431213, "learning_rate": 0.0002548529060033484, "loss": 0.0692, "step": 3651 }, { "epoch": 0.511843027330063, "grad_norm": 0.2392069697380066, "learning_rate": 0.0002548385553695288, "loss": 0.0502, "step": 3652 }, { "epoch": 0.5119831814996496, "grad_norm": 0.44987377524375916, "learning_rate": 0.00025482420473570914, "loss": 0.1429, "step": 3653 }, { "epoch": 0.5121233356692362, "grad_norm": 0.2524642050266266, "learning_rate": 0.00025480985410188947, "loss": 0.0616, "step": 3654 }, { "epoch": 0.5122634898388227, "grad_norm": 0.23557986319065094, "learning_rate": 0.00025479550346806985, "loss": 0.0783, "step": 3655 }, { "epoch": 0.5124036440084092, "grad_norm": 0.3249613046646118, "learning_rate": 0.0002547811528342502, "loss": 0.0685, "step": 3656 }, { "epoch": 0.5125437981779958, "grad_norm": 0.5398142337799072, "learning_rate": 0.0002547668022004305, "loss": 0.1051, "step": 3657 }, { "epoch": 0.5126839523475824, "grad_norm": 0.45770299434661865, "learning_rate": 0.00025475245156661084, "loss": 0.188, "step": 3658 }, { "epoch": 0.5128241065171689, "grad_norm": 0.4042181670665741, "learning_rate": 0.00025473810093279116, "loss": 0.0848, "step": 3659 }, { "epoch": 0.5129642606867554, "grad_norm": 0.26377978920936584, "learning_rate": 0.0002547237502989715, "loss": 0.0508, "step": 3660 }, { "epoch": 0.513104414856342, "grad_norm": 0.41191262006759644, "learning_rate": 0.0002547093996651519, "loss": 0.0947, "step": 3661 }, { "epoch": 0.5132445690259285, "grad_norm": 0.475039005279541, "learning_rate": 0.0002546950490313322, "loss": 0.0907, "step": 3662 }, { "epoch": 0.5133847231955151, "grad_norm": 0.5674811601638794, "learning_rate": 0.00025468069839751253, "loss": 0.1717, "step": 3663 }, { "epoch": 0.5135248773651017, "grad_norm": 0.4234640300273895, "learning_rate": 0.0002546663477636929, "loss": 0.1178, "step": 3664 }, { "epoch": 0.5136650315346881, "grad_norm": 0.41077032685279846, "learning_rate": 0.00025465199712987324, "loss": 0.1458, "step": 3665 }, { "epoch": 0.5138051857042747, "grad_norm": 0.3074701130390167, "learning_rate": 0.00025463764649605357, "loss": 0.0943, "step": 3666 }, { "epoch": 0.5139453398738613, "grad_norm": 0.412673681974411, "learning_rate": 0.0002546232958622339, "loss": 0.0831, "step": 3667 }, { "epoch": 0.5140854940434478, "grad_norm": 0.4425532817840576, "learning_rate": 0.00025460894522841423, "loss": 0.0777, "step": 3668 }, { "epoch": 0.5142256482130343, "grad_norm": 0.48464179039001465, "learning_rate": 0.00025459459459459456, "loss": 0.0734, "step": 3669 }, { "epoch": 0.5143658023826209, "grad_norm": 0.5093382000923157, "learning_rate": 0.0002545802439607749, "loss": 0.0691, "step": 3670 }, { "epoch": 0.5145059565522074, "grad_norm": 0.31960538029670715, "learning_rate": 0.00025456589332695527, "loss": 0.048, "step": 3671 }, { "epoch": 0.514646110721794, "grad_norm": 0.5682074427604675, "learning_rate": 0.0002545515426931356, "loss": 0.1239, "step": 3672 }, { "epoch": 0.5147862648913806, "grad_norm": 0.6105221509933472, "learning_rate": 0.0002545371920593159, "loss": 0.1195, "step": 3673 }, { "epoch": 0.514926419060967, "grad_norm": 0.1577884405851364, "learning_rate": 0.0002545228414254963, "loss": 0.0249, "step": 3674 }, { "epoch": 0.5150665732305536, "grad_norm": 0.6720603108406067, "learning_rate": 0.00025450849079167664, "loss": 0.1105, "step": 3675 }, { "epoch": 0.5152067274001402, "grad_norm": 0.4378015995025635, "learning_rate": 0.00025449414015785697, "loss": 0.0671, "step": 3676 }, { "epoch": 0.5153468815697267, "grad_norm": 0.4326946437358856, "learning_rate": 0.0002544797895240373, "loss": 0.1354, "step": 3677 }, { "epoch": 0.5154870357393132, "grad_norm": 0.3068216145038605, "learning_rate": 0.0002544654388902176, "loss": 0.1346, "step": 3678 }, { "epoch": 0.5156271899088998, "grad_norm": 0.405368447303772, "learning_rate": 0.00025445108825639795, "loss": 0.1416, "step": 3679 }, { "epoch": 0.5157673440784863, "grad_norm": 0.6262340545654297, "learning_rate": 0.00025443673762257833, "loss": 0.1014, "step": 3680 }, { "epoch": 0.5159074982480729, "grad_norm": 0.38591116666793823, "learning_rate": 0.00025442238698875866, "loss": 0.0652, "step": 3681 }, { "epoch": 0.5160476524176594, "grad_norm": 0.3750234544277191, "learning_rate": 0.000254408036354939, "loss": 0.0707, "step": 3682 }, { "epoch": 0.516187806587246, "grad_norm": 0.7763251662254333, "learning_rate": 0.0002543936857211193, "loss": 0.1007, "step": 3683 }, { "epoch": 0.5163279607568325, "grad_norm": 0.5143107175827026, "learning_rate": 0.00025437933508729965, "loss": 0.0745, "step": 3684 }, { "epoch": 0.5164681149264191, "grad_norm": 0.684270977973938, "learning_rate": 0.00025436498445348, "loss": 0.0437, "step": 3685 }, { "epoch": 0.5166082690960057, "grad_norm": 0.7050482034683228, "learning_rate": 0.0002543506338196603, "loss": 0.1121, "step": 3686 }, { "epoch": 0.5167484232655921, "grad_norm": 0.8040270209312439, "learning_rate": 0.0002543362831858407, "loss": 0.0722, "step": 3687 }, { "epoch": 0.5168885774351787, "grad_norm": 0.36413830518722534, "learning_rate": 0.000254321932552021, "loss": 0.1103, "step": 3688 }, { "epoch": 0.5170287316047653, "grad_norm": 0.6621695756912231, "learning_rate": 0.00025430758191820135, "loss": 0.0611, "step": 3689 }, { "epoch": 0.5171688857743518, "grad_norm": 0.4166024923324585, "learning_rate": 0.00025429323128438173, "loss": 0.0965, "step": 3690 }, { "epoch": 0.5173090399439383, "grad_norm": 0.7858618497848511, "learning_rate": 0.00025427888065056206, "loss": 0.0901, "step": 3691 }, { "epoch": 0.5174491941135249, "grad_norm": 0.38334763050079346, "learning_rate": 0.0002542645300167424, "loss": 0.0873, "step": 3692 }, { "epoch": 0.5175893482831114, "grad_norm": 0.5154088735580444, "learning_rate": 0.0002542501793829227, "loss": 0.1209, "step": 3693 }, { "epoch": 0.517729502452698, "grad_norm": 0.5021282434463501, "learning_rate": 0.00025423582874910304, "loss": 0.1728, "step": 3694 }, { "epoch": 0.5178696566222845, "grad_norm": 0.9212284684181213, "learning_rate": 0.00025422147811528337, "loss": 0.1217, "step": 3695 }, { "epoch": 0.518009810791871, "grad_norm": 0.6294364929199219, "learning_rate": 0.00025420712748146375, "loss": 0.1127, "step": 3696 }, { "epoch": 0.5181499649614576, "grad_norm": 1.495061993598938, "learning_rate": 0.0002541927768476441, "loss": 0.0955, "step": 3697 }, { "epoch": 0.5182901191310442, "grad_norm": 0.6642441153526306, "learning_rate": 0.0002541784262138244, "loss": 0.0836, "step": 3698 }, { "epoch": 0.5184302733006307, "grad_norm": 1.2072820663452148, "learning_rate": 0.0002541640755800048, "loss": 0.1559, "step": 3699 }, { "epoch": 0.5185704274702172, "grad_norm": 1.116833209991455, "learning_rate": 0.0002541497249461851, "loss": 0.3666, "step": 3700 }, { "epoch": 0.5187105816398038, "grad_norm": 0.39377114176750183, "learning_rate": 0.00025413537431236545, "loss": 0.0762, "step": 3701 }, { "epoch": 0.5188507358093903, "grad_norm": 0.5531027317047119, "learning_rate": 0.0002541210236785458, "loss": 0.1778, "step": 3702 }, { "epoch": 0.5189908899789769, "grad_norm": 0.4501936137676239, "learning_rate": 0.0002541066730447261, "loss": 0.0614, "step": 3703 }, { "epoch": 0.5191310441485634, "grad_norm": 0.5126311779022217, "learning_rate": 0.00025409232241090644, "loss": 0.0613, "step": 3704 }, { "epoch": 0.5192711983181499, "grad_norm": 0.41879671812057495, "learning_rate": 0.00025407797177708677, "loss": 0.0498, "step": 3705 }, { "epoch": 0.5194113524877365, "grad_norm": 0.4197855591773987, "learning_rate": 0.00025406362114326715, "loss": 0.0974, "step": 3706 }, { "epoch": 0.5195515066573231, "grad_norm": 0.8715519309043884, "learning_rate": 0.0002540492705094475, "loss": 0.1314, "step": 3707 }, { "epoch": 0.5196916608269097, "grad_norm": 0.636552631855011, "learning_rate": 0.0002540349198756278, "loss": 0.1547, "step": 3708 }, { "epoch": 0.5198318149964961, "grad_norm": 0.44017305970191956, "learning_rate": 0.0002540205692418082, "loss": 0.087, "step": 3709 }, { "epoch": 0.5199719691660827, "grad_norm": 0.18093568086624146, "learning_rate": 0.0002540062186079885, "loss": 0.0374, "step": 3710 }, { "epoch": 0.5201121233356693, "grad_norm": 0.3853932321071625, "learning_rate": 0.00025399186797416885, "loss": 0.1017, "step": 3711 }, { "epoch": 0.5202522775052558, "grad_norm": 0.25976938009262085, "learning_rate": 0.0002539775173403492, "loss": 0.076, "step": 3712 }, { "epoch": 0.5203924316748423, "grad_norm": 0.35607048869132996, "learning_rate": 0.0002539631667065295, "loss": 0.0831, "step": 3713 }, { "epoch": 0.5205325858444289, "grad_norm": 0.4062139689922333, "learning_rate": 0.00025394881607270983, "loss": 0.1315, "step": 3714 }, { "epoch": 0.5206727400140154, "grad_norm": 0.8705641031265259, "learning_rate": 0.0002539344654388902, "loss": 0.0913, "step": 3715 }, { "epoch": 0.520812894183602, "grad_norm": 0.4616554081439972, "learning_rate": 0.00025392011480507054, "loss": 0.1012, "step": 3716 }, { "epoch": 0.5209530483531885, "grad_norm": 0.49104246497154236, "learning_rate": 0.00025390576417125087, "loss": 0.0935, "step": 3717 }, { "epoch": 0.521093202522775, "grad_norm": 0.4312751293182373, "learning_rate": 0.0002538914135374312, "loss": 0.0405, "step": 3718 }, { "epoch": 0.5212333566923616, "grad_norm": 0.36991086602211, "learning_rate": 0.0002538770629036116, "loss": 0.0865, "step": 3719 }, { "epoch": 0.5213735108619482, "grad_norm": 0.3838135600090027, "learning_rate": 0.0002538627122697919, "loss": 0.0939, "step": 3720 }, { "epoch": 0.5215136650315347, "grad_norm": 0.9714263677597046, "learning_rate": 0.00025384836163597224, "loss": 0.1767, "step": 3721 }, { "epoch": 0.5216538192011212, "grad_norm": 0.4344284236431122, "learning_rate": 0.00025383401100215257, "loss": 0.1585, "step": 3722 }, { "epoch": 0.5217939733707078, "grad_norm": 0.40689384937286377, "learning_rate": 0.0002538196603683329, "loss": 0.0677, "step": 3723 }, { "epoch": 0.5219341275402943, "grad_norm": 0.7999861836433411, "learning_rate": 0.0002538053097345132, "loss": 0.1, "step": 3724 }, { "epoch": 0.5220742817098809, "grad_norm": 0.5300484895706177, "learning_rate": 0.0002537909591006936, "loss": 0.1454, "step": 3725 }, { "epoch": 0.5222144358794674, "grad_norm": 0.5659837126731873, "learning_rate": 0.00025377660846687394, "loss": 0.1146, "step": 3726 }, { "epoch": 0.5223545900490539, "grad_norm": 0.5480971932411194, "learning_rate": 0.00025376225783305426, "loss": 0.081, "step": 3727 }, { "epoch": 0.5224947442186405, "grad_norm": 0.5467221736907959, "learning_rate": 0.00025374790719923465, "loss": 0.1481, "step": 3728 }, { "epoch": 0.5226348983882271, "grad_norm": 0.26411089301109314, "learning_rate": 0.000253733556565415, "loss": 0.0447, "step": 3729 }, { "epoch": 0.5227750525578136, "grad_norm": 0.4189164340496063, "learning_rate": 0.0002537192059315953, "loss": 0.1339, "step": 3730 }, { "epoch": 0.5229152067274001, "grad_norm": 0.4686305820941925, "learning_rate": 0.00025370485529777563, "loss": 0.0545, "step": 3731 }, { "epoch": 0.5230553608969867, "grad_norm": 0.6158311367034912, "learning_rate": 0.00025369050466395596, "loss": 0.1736, "step": 3732 }, { "epoch": 0.5231955150665732, "grad_norm": 0.48443883657455444, "learning_rate": 0.0002536761540301363, "loss": 0.0679, "step": 3733 }, { "epoch": 0.5233356692361598, "grad_norm": 0.45886072516441345, "learning_rate": 0.0002536618033963166, "loss": 0.0812, "step": 3734 }, { "epoch": 0.5234758234057463, "grad_norm": 0.34823763370513916, "learning_rate": 0.000253647452762497, "loss": 0.0827, "step": 3735 }, { "epoch": 0.5236159775753328, "grad_norm": 0.5238205194473267, "learning_rate": 0.00025363310212867733, "loss": 0.1046, "step": 3736 }, { "epoch": 0.5237561317449194, "grad_norm": 0.7899017930030823, "learning_rate": 0.00025361875149485766, "loss": 0.079, "step": 3737 }, { "epoch": 0.523896285914506, "grad_norm": 0.5810121893882751, "learning_rate": 0.00025360440086103804, "loss": 0.1014, "step": 3738 }, { "epoch": 0.5240364400840924, "grad_norm": 0.291208952665329, "learning_rate": 0.00025359005022721837, "loss": 0.0477, "step": 3739 }, { "epoch": 0.524176594253679, "grad_norm": 0.4190904498100281, "learning_rate": 0.0002535756995933987, "loss": 0.1305, "step": 3740 }, { "epoch": 0.5243167484232656, "grad_norm": 0.8952327370643616, "learning_rate": 0.00025356134895957903, "loss": 0.0828, "step": 3741 }, { "epoch": 0.5244569025928522, "grad_norm": 0.3364610970020294, "learning_rate": 0.00025354699832575936, "loss": 0.1618, "step": 3742 }, { "epoch": 0.5245970567624387, "grad_norm": 0.4337835907936096, "learning_rate": 0.0002535326476919397, "loss": 0.0525, "step": 3743 }, { "epoch": 0.5247372109320252, "grad_norm": 0.7692883610725403, "learning_rate": 0.00025351829705812007, "loss": 0.0859, "step": 3744 }, { "epoch": 0.5248773651016118, "grad_norm": 1.7545913457870483, "learning_rate": 0.0002535039464243004, "loss": 0.1859, "step": 3745 }, { "epoch": 0.5250175192711983, "grad_norm": 2.708228826522827, "learning_rate": 0.0002534895957904807, "loss": 0.7529, "step": 3746 }, { "epoch": 0.5251576734407849, "grad_norm": 1.251065969467163, "learning_rate": 0.00025347524515666105, "loss": 0.3325, "step": 3747 }, { "epoch": 0.5252978276103714, "grad_norm": 1.7777938842773438, "learning_rate": 0.0002534608945228414, "loss": 0.2494, "step": 3748 }, { "epoch": 0.5254379817799579, "grad_norm": 1.7284437417984009, "learning_rate": 0.0002534465438890217, "loss": 0.1349, "step": 3749 }, { "epoch": 0.5255781359495445, "grad_norm": 1.3511062860488892, "learning_rate": 0.0002534321932552021, "loss": 0.0857, "step": 3750 }, { "epoch": 0.5257182901191311, "grad_norm": 0.7963975071907043, "learning_rate": 0.0002534178426213824, "loss": 0.089, "step": 3751 }, { "epoch": 0.5258584442887176, "grad_norm": 0.2531070411205292, "learning_rate": 0.00025340349198756275, "loss": 0.0537, "step": 3752 }, { "epoch": 0.5259985984583041, "grad_norm": 0.5108381509780884, "learning_rate": 0.0002533891413537431, "loss": 0.117, "step": 3753 }, { "epoch": 0.5261387526278907, "grad_norm": 0.46662551164627075, "learning_rate": 0.00025337479071992346, "loss": 0.1447, "step": 3754 }, { "epoch": 0.5262789067974772, "grad_norm": 0.5900042057037354, "learning_rate": 0.0002533604400861038, "loss": 0.0514, "step": 3755 }, { "epoch": 0.5264190609670638, "grad_norm": 0.4459666609764099, "learning_rate": 0.0002533460894522841, "loss": 0.1248, "step": 3756 }, { "epoch": 0.5265592151366503, "grad_norm": 0.5731019377708435, "learning_rate": 0.00025333173881846445, "loss": 0.1944, "step": 3757 }, { "epoch": 0.5266993693062368, "grad_norm": 0.811814546585083, "learning_rate": 0.0002533173881846448, "loss": 0.1698, "step": 3758 }, { "epoch": 0.5268395234758234, "grad_norm": 0.2540329396724701, "learning_rate": 0.0002533030375508251, "loss": 0.0745, "step": 3759 }, { "epoch": 0.52697967764541, "grad_norm": 0.2398877888917923, "learning_rate": 0.0002532886869170055, "loss": 0.0481, "step": 3760 }, { "epoch": 0.5271198318149964, "grad_norm": 0.18241244554519653, "learning_rate": 0.0002532743362831858, "loss": 0.0255, "step": 3761 }, { "epoch": 0.527259985984583, "grad_norm": 0.5653223395347595, "learning_rate": 0.00025325998564936614, "loss": 0.1234, "step": 3762 }, { "epoch": 0.5274001401541696, "grad_norm": 0.2712160050868988, "learning_rate": 0.0002532456350155465, "loss": 0.079, "step": 3763 }, { "epoch": 0.5275402943237562, "grad_norm": 0.6309765577316284, "learning_rate": 0.00025323128438172686, "loss": 0.0835, "step": 3764 }, { "epoch": 0.5276804484933427, "grad_norm": 0.5536413192749023, "learning_rate": 0.0002532169337479072, "loss": 0.1897, "step": 3765 }, { "epoch": 0.5278206026629292, "grad_norm": 0.19743096828460693, "learning_rate": 0.0002532025831140875, "loss": 0.0493, "step": 3766 }, { "epoch": 0.5279607568325158, "grad_norm": 0.4978054165840149, "learning_rate": 0.00025318823248026784, "loss": 0.0894, "step": 3767 }, { "epoch": 0.5281009110021023, "grad_norm": 0.7236056923866272, "learning_rate": 0.00025317388184644817, "loss": 0.1684, "step": 3768 }, { "epoch": 0.5282410651716889, "grad_norm": 0.4210502803325653, "learning_rate": 0.0002531595312126285, "loss": 0.1019, "step": 3769 }, { "epoch": 0.5283812193412754, "grad_norm": 0.3556743860244751, "learning_rate": 0.0002531451805788089, "loss": 0.0692, "step": 3770 }, { "epoch": 0.5285213735108619, "grad_norm": 0.3373337984085083, "learning_rate": 0.0002531308299449892, "loss": 0.0596, "step": 3771 }, { "epoch": 0.5286615276804485, "grad_norm": 0.5512180924415588, "learning_rate": 0.00025311647931116954, "loss": 0.1073, "step": 3772 }, { "epoch": 0.5288016818500351, "grad_norm": 0.7351663708686829, "learning_rate": 0.0002531021286773499, "loss": 0.035, "step": 3773 }, { "epoch": 0.5289418360196216, "grad_norm": 0.414307564496994, "learning_rate": 0.00025308777804353025, "loss": 0.115, "step": 3774 }, { "epoch": 0.5290819901892081, "grad_norm": 0.8039710521697998, "learning_rate": 0.0002530734274097106, "loss": 0.2339, "step": 3775 }, { "epoch": 0.5292221443587947, "grad_norm": 0.3805621564388275, "learning_rate": 0.0002530590767758909, "loss": 0.0833, "step": 3776 }, { "epoch": 0.5293622985283812, "grad_norm": 0.3147873282432556, "learning_rate": 0.00025304472614207123, "loss": 0.0593, "step": 3777 }, { "epoch": 0.5295024526979678, "grad_norm": 0.43599745631217957, "learning_rate": 0.00025303037550825156, "loss": 0.086, "step": 3778 }, { "epoch": 0.5296426068675543, "grad_norm": 0.46452921628952026, "learning_rate": 0.00025301602487443195, "loss": 0.0752, "step": 3779 }, { "epoch": 0.5297827610371408, "grad_norm": 0.3302789330482483, "learning_rate": 0.0002530016742406123, "loss": 0.1245, "step": 3780 }, { "epoch": 0.5299229152067274, "grad_norm": 0.32080042362213135, "learning_rate": 0.0002529873236067926, "loss": 0.0673, "step": 3781 }, { "epoch": 0.530063069376314, "grad_norm": 0.480005145072937, "learning_rate": 0.000252972972972973, "loss": 0.0665, "step": 3782 }, { "epoch": 0.5302032235459004, "grad_norm": 0.4755353331565857, "learning_rate": 0.0002529586223391533, "loss": 0.0865, "step": 3783 }, { "epoch": 0.530343377715487, "grad_norm": 0.2493240088224411, "learning_rate": 0.00025294427170533364, "loss": 0.0357, "step": 3784 }, { "epoch": 0.5304835318850736, "grad_norm": 0.44424477219581604, "learning_rate": 0.00025292992107151397, "loss": 0.0698, "step": 3785 }, { "epoch": 0.5306236860546601, "grad_norm": 0.3517785966396332, "learning_rate": 0.0002529155704376943, "loss": 0.1085, "step": 3786 }, { "epoch": 0.5307638402242467, "grad_norm": 0.6620528101921082, "learning_rate": 0.00025290121980387463, "loss": 0.1684, "step": 3787 }, { "epoch": 0.5309039943938332, "grad_norm": 0.36682337522506714, "learning_rate": 0.00025288686917005496, "loss": 0.0473, "step": 3788 }, { "epoch": 0.5310441485634197, "grad_norm": 0.5843929648399353, "learning_rate": 0.00025287251853623534, "loss": 0.0641, "step": 3789 }, { "epoch": 0.5311843027330063, "grad_norm": 0.3687399923801422, "learning_rate": 0.00025285816790241567, "loss": 0.0766, "step": 3790 }, { "epoch": 0.5313244569025929, "grad_norm": 0.39288610219955444, "learning_rate": 0.000252843817268596, "loss": 0.1129, "step": 3791 }, { "epoch": 0.5314646110721793, "grad_norm": 0.48271477222442627, "learning_rate": 0.0002528294666347764, "loss": 0.1261, "step": 3792 }, { "epoch": 0.5316047652417659, "grad_norm": 0.6124277710914612, "learning_rate": 0.0002528151160009567, "loss": 0.2185, "step": 3793 }, { "epoch": 0.5317449194113525, "grad_norm": 1.0658903121948242, "learning_rate": 0.00025280076536713704, "loss": 0.1458, "step": 3794 }, { "epoch": 0.5318850735809391, "grad_norm": 0.4022054672241211, "learning_rate": 0.00025278641473331737, "loss": 0.0646, "step": 3795 }, { "epoch": 0.5320252277505256, "grad_norm": 0.6764711141586304, "learning_rate": 0.0002527720640994977, "loss": 0.1157, "step": 3796 }, { "epoch": 0.5321653819201121, "grad_norm": 1.4497134685516357, "learning_rate": 0.000252757713465678, "loss": 0.1317, "step": 3797 }, { "epoch": 0.5323055360896987, "grad_norm": 0.482147753238678, "learning_rate": 0.0002527433628318584, "loss": 0.0587, "step": 3798 }, { "epoch": 0.5324456902592852, "grad_norm": 1.0537500381469727, "learning_rate": 0.00025272901219803873, "loss": 0.289, "step": 3799 }, { "epoch": 0.5325858444288718, "grad_norm": 0.7479227185249329, "learning_rate": 0.00025271466156421906, "loss": 0.2757, "step": 3800 }, { "epoch": 0.5327259985984583, "grad_norm": 0.43164584040641785, "learning_rate": 0.00025270031093039945, "loss": 0.0546, "step": 3801 }, { "epoch": 0.5328661527680448, "grad_norm": 0.5817529559135437, "learning_rate": 0.0002526859602965798, "loss": 0.0922, "step": 3802 }, { "epoch": 0.5330063069376314, "grad_norm": 0.20474418997764587, "learning_rate": 0.0002526716096627601, "loss": 0.0376, "step": 3803 }, { "epoch": 0.533146461107218, "grad_norm": 0.14792661368846893, "learning_rate": 0.00025265725902894043, "loss": 0.0286, "step": 3804 }, { "epoch": 0.5332866152768044, "grad_norm": 0.485036700963974, "learning_rate": 0.00025264290839512076, "loss": 0.0606, "step": 3805 }, { "epoch": 0.533426769446391, "grad_norm": 1.491607666015625, "learning_rate": 0.0002526285577613011, "loss": 0.0684, "step": 3806 }, { "epoch": 0.5335669236159776, "grad_norm": 1.566177248954773, "learning_rate": 0.0002526142071274814, "loss": 0.1531, "step": 3807 }, { "epoch": 0.5337070777855641, "grad_norm": 0.17010587453842163, "learning_rate": 0.0002525998564936618, "loss": 0.0436, "step": 3808 }, { "epoch": 0.5338472319551507, "grad_norm": 0.42140185832977295, "learning_rate": 0.00025258550585984213, "loss": 0.0678, "step": 3809 }, { "epoch": 0.5339873861247372, "grad_norm": 0.33653318881988525, "learning_rate": 0.00025257115522602246, "loss": 0.1267, "step": 3810 }, { "epoch": 0.5341275402943237, "grad_norm": 0.2026250660419464, "learning_rate": 0.0002525568045922028, "loss": 0.0687, "step": 3811 }, { "epoch": 0.5342676944639103, "grad_norm": 0.34078654646873474, "learning_rate": 0.0002525424539583831, "loss": 0.0603, "step": 3812 }, { "epoch": 0.5344078486334969, "grad_norm": 0.47780469059944153, "learning_rate": 0.00025252810332456344, "loss": 0.1145, "step": 3813 }, { "epoch": 0.5345480028030833, "grad_norm": 0.1717555820941925, "learning_rate": 0.0002525137526907438, "loss": 0.0524, "step": 3814 }, { "epoch": 0.5346881569726699, "grad_norm": 0.4112985134124756, "learning_rate": 0.00025249940205692415, "loss": 0.0686, "step": 3815 }, { "epoch": 0.5348283111422565, "grad_norm": 0.696688711643219, "learning_rate": 0.0002524850514231045, "loss": 0.1908, "step": 3816 }, { "epoch": 0.534968465311843, "grad_norm": 0.5086672902107239, "learning_rate": 0.00025247070078928487, "loss": 0.1447, "step": 3817 }, { "epoch": 0.5351086194814296, "grad_norm": 0.43156856298446655, "learning_rate": 0.0002524563501554652, "loss": 0.0527, "step": 3818 }, { "epoch": 0.5352487736510161, "grad_norm": 0.6361207962036133, "learning_rate": 0.0002524419995216455, "loss": 0.1017, "step": 3819 }, { "epoch": 0.5353889278206027, "grad_norm": 0.4026937782764435, "learning_rate": 0.00025242764888782585, "loss": 0.0967, "step": 3820 }, { "epoch": 0.5355290819901892, "grad_norm": 0.28759562969207764, "learning_rate": 0.0002524132982540062, "loss": 0.062, "step": 3821 }, { "epoch": 0.5356692361597758, "grad_norm": 0.8487041592597961, "learning_rate": 0.0002523989476201865, "loss": 0.1786, "step": 3822 }, { "epoch": 0.5358093903293623, "grad_norm": 0.48422151803970337, "learning_rate": 0.00025238459698636684, "loss": 0.0814, "step": 3823 }, { "epoch": 0.5359495444989488, "grad_norm": 0.3119770586490631, "learning_rate": 0.0002523702463525472, "loss": 0.0449, "step": 3824 }, { "epoch": 0.5360896986685354, "grad_norm": 0.778696596622467, "learning_rate": 0.00025235589571872755, "loss": 0.064, "step": 3825 }, { "epoch": 0.536229852838122, "grad_norm": 0.7794495820999146, "learning_rate": 0.0002523415450849079, "loss": 0.1609, "step": 3826 }, { "epoch": 0.5363700070077084, "grad_norm": 0.40856996178627014, "learning_rate": 0.00025232719445108826, "loss": 0.1005, "step": 3827 }, { "epoch": 0.536510161177295, "grad_norm": 0.46720045804977417, "learning_rate": 0.0002523128438172686, "loss": 0.0853, "step": 3828 }, { "epoch": 0.5366503153468816, "grad_norm": 0.47222110629081726, "learning_rate": 0.0002522984931834489, "loss": 0.0675, "step": 3829 }, { "epoch": 0.5367904695164681, "grad_norm": 0.3303963243961334, "learning_rate": 0.00025228414254962924, "loss": 0.0611, "step": 3830 }, { "epoch": 0.5369306236860547, "grad_norm": 0.34422165155410767, "learning_rate": 0.0002522697919158096, "loss": 0.1263, "step": 3831 }, { "epoch": 0.5370707778556412, "grad_norm": 0.7670153975486755, "learning_rate": 0.0002522554412819899, "loss": 0.188, "step": 3832 }, { "epoch": 0.5372109320252277, "grad_norm": 0.7226948738098145, "learning_rate": 0.0002522410906481703, "loss": 0.1451, "step": 3833 }, { "epoch": 0.5373510861948143, "grad_norm": 0.5015245676040649, "learning_rate": 0.0002522267400143506, "loss": 0.0627, "step": 3834 }, { "epoch": 0.5374912403644009, "grad_norm": 0.8689934611320496, "learning_rate": 0.00025221238938053094, "loss": 0.1289, "step": 3835 }, { "epoch": 0.5376313945339873, "grad_norm": 0.5005568265914917, "learning_rate": 0.0002521980387467113, "loss": 0.1736, "step": 3836 }, { "epoch": 0.5377715487035739, "grad_norm": 0.4905838668346405, "learning_rate": 0.00025218368811289165, "loss": 0.0801, "step": 3837 }, { "epoch": 0.5379117028731605, "grad_norm": 0.6516584753990173, "learning_rate": 0.000252169337479072, "loss": 0.1032, "step": 3838 }, { "epoch": 0.538051857042747, "grad_norm": 0.5888903141021729, "learning_rate": 0.0002521549868452523, "loss": 0.0943, "step": 3839 }, { "epoch": 0.5381920112123336, "grad_norm": 0.7239446043968201, "learning_rate": 0.00025214063621143264, "loss": 0.0973, "step": 3840 }, { "epoch": 0.5383321653819201, "grad_norm": 0.46840357780456543, "learning_rate": 0.00025212628557761297, "loss": 0.1203, "step": 3841 }, { "epoch": 0.5384723195515067, "grad_norm": 0.5807044506072998, "learning_rate": 0.0002521119349437933, "loss": 0.1201, "step": 3842 }, { "epoch": 0.5386124737210932, "grad_norm": 0.3918488323688507, "learning_rate": 0.0002520975843099737, "loss": 0.1207, "step": 3843 }, { "epoch": 0.5387526278906798, "grad_norm": 0.4665324091911316, "learning_rate": 0.000252083233676154, "loss": 0.0871, "step": 3844 }, { "epoch": 0.5388927820602663, "grad_norm": 1.6179182529449463, "learning_rate": 0.00025206888304233434, "loss": 0.2216, "step": 3845 }, { "epoch": 0.5390329362298528, "grad_norm": 0.9539428353309631, "learning_rate": 0.0002520545324085147, "loss": 0.2424, "step": 3846 }, { "epoch": 0.5391730903994394, "grad_norm": 1.303011178970337, "learning_rate": 0.00025204018177469505, "loss": 0.2224, "step": 3847 }, { "epoch": 0.539313244569026, "grad_norm": 0.44303184747695923, "learning_rate": 0.0002520258311408754, "loss": 0.0357, "step": 3848 }, { "epoch": 0.5394533987386124, "grad_norm": 2.4665374755859375, "learning_rate": 0.0002520114805070557, "loss": 0.4088, "step": 3849 }, { "epoch": 0.539593552908199, "grad_norm": 0.34303978085517883, "learning_rate": 0.00025199712987323603, "loss": 0.017, "step": 3850 }, { "epoch": 0.5397337070777856, "grad_norm": 0.2562355697154999, "learning_rate": 0.00025198277923941636, "loss": 0.0653, "step": 3851 }, { "epoch": 0.5398738612473721, "grad_norm": 0.37615686655044556, "learning_rate": 0.00025196842860559674, "loss": 0.1234, "step": 3852 }, { "epoch": 0.5400140154169587, "grad_norm": 0.6752070188522339, "learning_rate": 0.00025195407797177707, "loss": 0.0934, "step": 3853 }, { "epoch": 0.5401541695865452, "grad_norm": 0.7379781603813171, "learning_rate": 0.0002519397273379574, "loss": 0.1235, "step": 3854 }, { "epoch": 0.5402943237561317, "grad_norm": 0.34804943203926086, "learning_rate": 0.0002519253767041378, "loss": 0.0816, "step": 3855 }, { "epoch": 0.5404344779257183, "grad_norm": 0.34477242827415466, "learning_rate": 0.0002519110260703181, "loss": 0.1409, "step": 3856 }, { "epoch": 0.5405746320953049, "grad_norm": 0.31011903285980225, "learning_rate": 0.00025189667543649844, "loss": 0.07, "step": 3857 }, { "epoch": 0.5407147862648913, "grad_norm": 0.6184453964233398, "learning_rate": 0.00025188232480267877, "loss": 0.0843, "step": 3858 }, { "epoch": 0.5408549404344779, "grad_norm": 0.5770922303199768, "learning_rate": 0.0002518679741688591, "loss": 0.1214, "step": 3859 }, { "epoch": 0.5409950946040645, "grad_norm": 0.5008496046066284, "learning_rate": 0.0002518536235350394, "loss": 0.1714, "step": 3860 }, { "epoch": 0.541135248773651, "grad_norm": 0.2567344307899475, "learning_rate": 0.00025183927290121976, "loss": 0.0547, "step": 3861 }, { "epoch": 0.5412754029432376, "grad_norm": 0.2598913013935089, "learning_rate": 0.00025182492226740014, "loss": 0.1308, "step": 3862 }, { "epoch": 0.5414155571128241, "grad_norm": 0.33479151129722595, "learning_rate": 0.00025181057163358047, "loss": 0.1038, "step": 3863 }, { "epoch": 0.5415557112824106, "grad_norm": 0.5118029713630676, "learning_rate": 0.0002517962209997608, "loss": 0.0638, "step": 3864 }, { "epoch": 0.5416958654519972, "grad_norm": 0.5354476571083069, "learning_rate": 0.0002517818703659412, "loss": 0.0702, "step": 3865 }, { "epoch": 0.5418360196215838, "grad_norm": 1.1596344709396362, "learning_rate": 0.0002517675197321215, "loss": 0.3, "step": 3866 }, { "epoch": 0.5419761737911702, "grad_norm": 0.25599274039268494, "learning_rate": 0.00025175316909830184, "loss": 0.0861, "step": 3867 }, { "epoch": 0.5421163279607568, "grad_norm": 0.6188617944717407, "learning_rate": 0.00025173881846448216, "loss": 0.1269, "step": 3868 }, { "epoch": 0.5422564821303434, "grad_norm": 0.3649226725101471, "learning_rate": 0.0002517244678306625, "loss": 0.1292, "step": 3869 }, { "epoch": 0.54239663629993, "grad_norm": 0.686793327331543, "learning_rate": 0.0002517101171968428, "loss": 0.3501, "step": 3870 }, { "epoch": 0.5425367904695164, "grad_norm": 0.25735345482826233, "learning_rate": 0.0002516957665630232, "loss": 0.1089, "step": 3871 }, { "epoch": 0.542676944639103, "grad_norm": 0.33736652135849, "learning_rate": 0.00025168141592920353, "loss": 0.108, "step": 3872 }, { "epoch": 0.5428170988086896, "grad_norm": 0.6944626569747925, "learning_rate": 0.00025166706529538386, "loss": 0.1366, "step": 3873 }, { "epoch": 0.5429572529782761, "grad_norm": 0.34482455253601074, "learning_rate": 0.0002516527146615642, "loss": 0.1214, "step": 3874 }, { "epoch": 0.5430974071478627, "grad_norm": 0.38985881209373474, "learning_rate": 0.0002516383640277445, "loss": 0.1173, "step": 3875 }, { "epoch": 0.5432375613174492, "grad_norm": 1.4831211566925049, "learning_rate": 0.00025162401339392485, "loss": 0.1882, "step": 3876 }, { "epoch": 0.5433777154870357, "grad_norm": 0.41491633653640747, "learning_rate": 0.0002516096627601052, "loss": 0.1124, "step": 3877 }, { "epoch": 0.5435178696566223, "grad_norm": 0.5769524574279785, "learning_rate": 0.00025159531212628556, "loss": 0.1211, "step": 3878 }, { "epoch": 0.5436580238262089, "grad_norm": 0.33956480026245117, "learning_rate": 0.0002515809614924659, "loss": 0.0898, "step": 3879 }, { "epoch": 0.5437981779957953, "grad_norm": 0.943324089050293, "learning_rate": 0.0002515666108586462, "loss": 0.1766, "step": 3880 }, { "epoch": 0.5439383321653819, "grad_norm": 0.618470311164856, "learning_rate": 0.0002515522602248266, "loss": 0.1246, "step": 3881 }, { "epoch": 0.5440784863349685, "grad_norm": 0.7491851449012756, "learning_rate": 0.0002515379095910069, "loss": 0.1039, "step": 3882 }, { "epoch": 0.544218640504555, "grad_norm": 0.38389891386032104, "learning_rate": 0.00025152355895718725, "loss": 0.1167, "step": 3883 }, { "epoch": 0.5443587946741416, "grad_norm": 0.3989713788032532, "learning_rate": 0.0002515092083233676, "loss": 0.0456, "step": 3884 }, { "epoch": 0.5444989488437281, "grad_norm": 0.6277985572814941, "learning_rate": 0.0002514948576895479, "loss": 0.0933, "step": 3885 }, { "epoch": 0.5446391030133146, "grad_norm": 0.40347084403038025, "learning_rate": 0.00025148050705572824, "loss": 0.0725, "step": 3886 }, { "epoch": 0.5447792571829012, "grad_norm": 0.5757158994674683, "learning_rate": 0.0002514661564219086, "loss": 0.1307, "step": 3887 }, { "epoch": 0.5449194113524878, "grad_norm": 0.6314204335212708, "learning_rate": 0.00025145180578808895, "loss": 0.2435, "step": 3888 }, { "epoch": 0.5450595655220742, "grad_norm": 0.34765008091926575, "learning_rate": 0.0002514374551542693, "loss": 0.0891, "step": 3889 }, { "epoch": 0.5451997196916608, "grad_norm": 0.2535039484500885, "learning_rate": 0.00025142310452044966, "loss": 0.0591, "step": 3890 }, { "epoch": 0.5453398738612474, "grad_norm": 2.5984435081481934, "learning_rate": 0.00025140875388663, "loss": 0.2619, "step": 3891 }, { "epoch": 0.545480028030834, "grad_norm": 0.5420457720756531, "learning_rate": 0.0002513944032528103, "loss": 0.07, "step": 3892 }, { "epoch": 0.5456201822004204, "grad_norm": 0.5296517014503479, "learning_rate": 0.00025138005261899065, "loss": 0.105, "step": 3893 }, { "epoch": 0.545760336370007, "grad_norm": 0.6605236530303955, "learning_rate": 0.000251365701985171, "loss": 0.1957, "step": 3894 }, { "epoch": 0.5459004905395936, "grad_norm": 0.8708084225654602, "learning_rate": 0.0002513513513513513, "loss": 0.0853, "step": 3895 }, { "epoch": 0.5460406447091801, "grad_norm": 0.8178973197937012, "learning_rate": 0.00025133700071753163, "loss": 0.1541, "step": 3896 }, { "epoch": 0.5461807988787667, "grad_norm": 1.6926020383834839, "learning_rate": 0.000251322650083712, "loss": 0.2722, "step": 3897 }, { "epoch": 0.5463209530483532, "grad_norm": 0.3676035404205322, "learning_rate": 0.00025130829944989235, "loss": 0.0206, "step": 3898 }, { "epoch": 0.5464611072179397, "grad_norm": 0.8457313179969788, "learning_rate": 0.0002512939488160727, "loss": 0.117, "step": 3899 }, { "epoch": 0.5466012613875263, "grad_norm": 2.192903757095337, "learning_rate": 0.00025127959818225306, "loss": 0.5269, "step": 3900 }, { "epoch": 0.5467414155571129, "grad_norm": 0.48463279008865356, "learning_rate": 0.0002512652475484334, "loss": 0.1266, "step": 3901 }, { "epoch": 0.5468815697266993, "grad_norm": 0.3723304569721222, "learning_rate": 0.0002512508969146137, "loss": 0.1186, "step": 3902 }, { "epoch": 0.5470217238962859, "grad_norm": 0.5379852056503296, "learning_rate": 0.00025123654628079404, "loss": 0.1144, "step": 3903 }, { "epoch": 0.5471618780658725, "grad_norm": 0.24861934781074524, "learning_rate": 0.00025122219564697437, "loss": 0.0879, "step": 3904 }, { "epoch": 0.547302032235459, "grad_norm": 0.4434549808502197, "learning_rate": 0.0002512078450131547, "loss": 0.1819, "step": 3905 }, { "epoch": 0.5474421864050456, "grad_norm": 0.5221505761146545, "learning_rate": 0.0002511934943793351, "loss": 0.1046, "step": 3906 }, { "epoch": 0.5475823405746321, "grad_norm": 0.6591808795928955, "learning_rate": 0.0002511791437455154, "loss": 0.1118, "step": 3907 }, { "epoch": 0.5477224947442186, "grad_norm": 1.1716365814208984, "learning_rate": 0.00025116479311169574, "loss": 0.119, "step": 3908 }, { "epoch": 0.5478626489138052, "grad_norm": 0.5857277512550354, "learning_rate": 0.00025115044247787607, "loss": 0.1747, "step": 3909 }, { "epoch": 0.5480028030833918, "grad_norm": 0.29456159472465515, "learning_rate": 0.00025113609184405645, "loss": 0.0642, "step": 3910 }, { "epoch": 0.5481429572529782, "grad_norm": 0.45870885252952576, "learning_rate": 0.0002511217412102368, "loss": 0.0503, "step": 3911 }, { "epoch": 0.5482831114225648, "grad_norm": 0.45464789867401123, "learning_rate": 0.0002511073905764171, "loss": 0.0872, "step": 3912 }, { "epoch": 0.5484232655921514, "grad_norm": 0.40002551674842834, "learning_rate": 0.00025109303994259744, "loss": 0.076, "step": 3913 }, { "epoch": 0.548563419761738, "grad_norm": 0.4472227990627289, "learning_rate": 0.00025107868930877777, "loss": 0.0843, "step": 3914 }, { "epoch": 0.5487035739313244, "grad_norm": 1.1812400817871094, "learning_rate": 0.0002510643386749581, "loss": 0.3072, "step": 3915 }, { "epoch": 0.548843728100911, "grad_norm": 0.6882503628730774, "learning_rate": 0.0002510499880411385, "loss": 0.2001, "step": 3916 }, { "epoch": 0.5489838822704975, "grad_norm": 0.5562772750854492, "learning_rate": 0.0002510356374073188, "loss": 0.0554, "step": 3917 }, { "epoch": 0.5491240364400841, "grad_norm": 0.38235458731651306, "learning_rate": 0.00025102128677349913, "loss": 0.1116, "step": 3918 }, { "epoch": 0.5492641906096707, "grad_norm": 0.3034262955188751, "learning_rate": 0.0002510069361396795, "loss": 0.0751, "step": 3919 }, { "epoch": 0.5494043447792571, "grad_norm": 0.36013907194137573, "learning_rate": 0.00025099258550585985, "loss": 0.0735, "step": 3920 }, { "epoch": 0.5495444989488437, "grad_norm": 0.378262996673584, "learning_rate": 0.0002509782348720402, "loss": 0.0862, "step": 3921 }, { "epoch": 0.5496846531184303, "grad_norm": 0.21409563720226288, "learning_rate": 0.0002509638842382205, "loss": 0.0461, "step": 3922 }, { "epoch": 0.5498248072880169, "grad_norm": 0.7046626210212708, "learning_rate": 0.00025094953360440083, "loss": 0.1074, "step": 3923 }, { "epoch": 0.5499649614576033, "grad_norm": 0.16295380890369415, "learning_rate": 0.00025093518297058116, "loss": 0.0396, "step": 3924 }, { "epoch": 0.5501051156271899, "grad_norm": 0.56124347448349, "learning_rate": 0.00025092083233676154, "loss": 0.1418, "step": 3925 }, { "epoch": 0.5502452697967765, "grad_norm": 0.6420082449913025, "learning_rate": 0.00025090648170294187, "loss": 0.1873, "step": 3926 }, { "epoch": 0.550385423966363, "grad_norm": 0.488039493560791, "learning_rate": 0.0002508921310691222, "loss": 0.1087, "step": 3927 }, { "epoch": 0.5505255781359495, "grad_norm": 0.7287023067474365, "learning_rate": 0.00025087778043530253, "loss": 0.1071, "step": 3928 }, { "epoch": 0.5506657323055361, "grad_norm": 0.34821316599845886, "learning_rate": 0.0002508634298014829, "loss": 0.0925, "step": 3929 }, { "epoch": 0.5508058864751226, "grad_norm": 0.8709393739700317, "learning_rate": 0.00025084907916766324, "loss": 0.1277, "step": 3930 }, { "epoch": 0.5509460406447092, "grad_norm": 0.6097391247749329, "learning_rate": 0.00025083472853384357, "loss": 0.0714, "step": 3931 }, { "epoch": 0.5510861948142958, "grad_norm": 0.5883945822715759, "learning_rate": 0.0002508203779000239, "loss": 0.1688, "step": 3932 }, { "epoch": 0.5512263489838822, "grad_norm": 0.4826839566230774, "learning_rate": 0.0002508060272662042, "loss": 0.0724, "step": 3933 }, { "epoch": 0.5513665031534688, "grad_norm": 0.7326350808143616, "learning_rate": 0.00025079167663238455, "loss": 0.0539, "step": 3934 }, { "epoch": 0.5515066573230554, "grad_norm": 0.35331735014915466, "learning_rate": 0.00025077732599856494, "loss": 0.0556, "step": 3935 }, { "epoch": 0.5516468114926419, "grad_norm": 0.4192046821117401, "learning_rate": 0.00025076297536474526, "loss": 0.0539, "step": 3936 }, { "epoch": 0.5517869656622284, "grad_norm": 0.6254921555519104, "learning_rate": 0.0002507486247309256, "loss": 0.0825, "step": 3937 }, { "epoch": 0.551927119831815, "grad_norm": 0.7162885665893555, "learning_rate": 0.0002507342740971059, "loss": 0.0957, "step": 3938 }, { "epoch": 0.5520672740014015, "grad_norm": 0.5629931092262268, "learning_rate": 0.00025071992346328625, "loss": 0.0493, "step": 3939 }, { "epoch": 0.5522074281709881, "grad_norm": 0.6187030076980591, "learning_rate": 0.0002507055728294666, "loss": 0.0646, "step": 3940 }, { "epoch": 0.5523475823405747, "grad_norm": 0.6613685488700867, "learning_rate": 0.00025069122219564696, "loss": 0.1355, "step": 3941 }, { "epoch": 0.5524877365101611, "grad_norm": 0.7194153666496277, "learning_rate": 0.0002506768715618273, "loss": 0.0883, "step": 3942 }, { "epoch": 0.5526278906797477, "grad_norm": 0.7751923203468323, "learning_rate": 0.0002506625209280076, "loss": 0.1585, "step": 3943 }, { "epoch": 0.5527680448493343, "grad_norm": 0.4823018014431, "learning_rate": 0.00025064817029418795, "loss": 0.136, "step": 3944 }, { "epoch": 0.5529081990189209, "grad_norm": 0.5525204539299011, "learning_rate": 0.00025063381966036833, "loss": 0.0681, "step": 3945 }, { "epoch": 0.5530483531885073, "grad_norm": 0.7681614756584167, "learning_rate": 0.00025061946902654866, "loss": 0.0997, "step": 3946 }, { "epoch": 0.5531885073580939, "grad_norm": 0.36747393012046814, "learning_rate": 0.000250605118392729, "loss": 0.087, "step": 3947 }, { "epoch": 0.5533286615276805, "grad_norm": 0.6003926992416382, "learning_rate": 0.0002505907677589093, "loss": 0.1021, "step": 3948 }, { "epoch": 0.553468815697267, "grad_norm": 0.624787449836731, "learning_rate": 0.00025057641712508964, "loss": 0.1031, "step": 3949 }, { "epoch": 0.5536089698668535, "grad_norm": 2.234396457672119, "learning_rate": 0.00025056206649127, "loss": 0.6069, "step": 3950 }, { "epoch": 0.55374912403644, "grad_norm": 0.2522006928920746, "learning_rate": 0.00025054771585745036, "loss": 0.0566, "step": 3951 }, { "epoch": 0.5538892782060266, "grad_norm": 0.40768468379974365, "learning_rate": 0.0002505333652236307, "loss": 0.0913, "step": 3952 }, { "epoch": 0.5540294323756132, "grad_norm": 0.2317289113998413, "learning_rate": 0.000250519014589811, "loss": 0.0453, "step": 3953 }, { "epoch": 0.5541695865451998, "grad_norm": 0.63600093126297, "learning_rate": 0.0002505046639559914, "loss": 0.1177, "step": 3954 }, { "epoch": 0.5543097407147862, "grad_norm": 0.5825256705284119, "learning_rate": 0.0002504903133221717, "loss": 0.1402, "step": 3955 }, { "epoch": 0.5544498948843728, "grad_norm": 0.3389924466609955, "learning_rate": 0.00025047596268835205, "loss": 0.1094, "step": 3956 }, { "epoch": 0.5545900490539594, "grad_norm": 0.6104356646537781, "learning_rate": 0.0002504616120545324, "loss": 0.1691, "step": 3957 }, { "epoch": 0.5547302032235459, "grad_norm": 0.1497412919998169, "learning_rate": 0.0002504472614207127, "loss": 0.0296, "step": 3958 }, { "epoch": 0.5548703573931324, "grad_norm": 0.4638878405094147, "learning_rate": 0.00025043291078689304, "loss": 0.0746, "step": 3959 }, { "epoch": 0.555010511562719, "grad_norm": 0.4135279953479767, "learning_rate": 0.0002504185601530734, "loss": 0.1493, "step": 3960 }, { "epoch": 0.5551506657323055, "grad_norm": 0.3829207122325897, "learning_rate": 0.00025040420951925375, "loss": 0.1188, "step": 3961 }, { "epoch": 0.5552908199018921, "grad_norm": 0.2558688819408417, "learning_rate": 0.0002503898588854341, "loss": 0.0628, "step": 3962 }, { "epoch": 0.5554309740714787, "grad_norm": 0.4334415793418884, "learning_rate": 0.0002503755082516144, "loss": 0.0449, "step": 3963 }, { "epoch": 0.5555711282410651, "grad_norm": 0.46161124110221863, "learning_rate": 0.0002503611576177948, "loss": 0.08, "step": 3964 }, { "epoch": 0.5557112824106517, "grad_norm": 0.6392189860343933, "learning_rate": 0.0002503468069839751, "loss": 0.1077, "step": 3965 }, { "epoch": 0.5558514365802383, "grad_norm": 0.6778882741928101, "learning_rate": 0.00025033245635015545, "loss": 0.1234, "step": 3966 }, { "epoch": 0.5559915907498248, "grad_norm": 0.3571174144744873, "learning_rate": 0.0002503181057163358, "loss": 0.0796, "step": 3967 }, { "epoch": 0.5561317449194113, "grad_norm": 0.5710231065750122, "learning_rate": 0.0002503037550825161, "loss": 0.1053, "step": 3968 }, { "epoch": 0.5562718990889979, "grad_norm": 0.4297938048839569, "learning_rate": 0.00025028940444869643, "loss": 0.1299, "step": 3969 }, { "epoch": 0.5564120532585844, "grad_norm": 0.34932661056518555, "learning_rate": 0.0002502750538148768, "loss": 0.0847, "step": 3970 }, { "epoch": 0.556552207428171, "grad_norm": 0.82928866147995, "learning_rate": 0.00025026070318105714, "loss": 0.076, "step": 3971 }, { "epoch": 0.5566923615977575, "grad_norm": 0.36403122544288635, "learning_rate": 0.00025024635254723747, "loss": 0.1072, "step": 3972 }, { "epoch": 0.556832515767344, "grad_norm": 0.6261738538742065, "learning_rate": 0.00025023200191341786, "loss": 0.0855, "step": 3973 }, { "epoch": 0.5569726699369306, "grad_norm": 0.39374426007270813, "learning_rate": 0.0002502176512795982, "loss": 0.117, "step": 3974 }, { "epoch": 0.5571128241065172, "grad_norm": 0.26938101649284363, "learning_rate": 0.0002502033006457785, "loss": 0.1074, "step": 3975 }, { "epoch": 0.5572529782761038, "grad_norm": 0.2874991297721863, "learning_rate": 0.00025018895001195884, "loss": 0.0434, "step": 3976 }, { "epoch": 0.5573931324456902, "grad_norm": 0.19152790307998657, "learning_rate": 0.00025017459937813917, "loss": 0.0195, "step": 3977 }, { "epoch": 0.5575332866152768, "grad_norm": 0.7606933116912842, "learning_rate": 0.0002501602487443195, "loss": 0.117, "step": 3978 }, { "epoch": 0.5576734407848634, "grad_norm": 0.3154715299606323, "learning_rate": 0.0002501458981104998, "loss": 0.0322, "step": 3979 }, { "epoch": 0.5578135949544499, "grad_norm": 0.6483924388885498, "learning_rate": 0.0002501315474766802, "loss": 0.0402, "step": 3980 }, { "epoch": 0.5579537491240364, "grad_norm": 0.35352852940559387, "learning_rate": 0.00025011719684286054, "loss": 0.0186, "step": 3981 }, { "epoch": 0.558093903293623, "grad_norm": 0.646351158618927, "learning_rate": 0.00025010284620904087, "loss": 0.1501, "step": 3982 }, { "epoch": 0.5582340574632095, "grad_norm": 0.28428393602371216, "learning_rate": 0.00025008849557522125, "loss": 0.0779, "step": 3983 }, { "epoch": 0.5583742116327961, "grad_norm": 0.24462680518627167, "learning_rate": 0.0002500741449414016, "loss": 0.0289, "step": 3984 }, { "epoch": 0.5585143658023827, "grad_norm": 0.8907936811447144, "learning_rate": 0.0002500597943075819, "loss": 0.1823, "step": 3985 }, { "epoch": 0.5586545199719691, "grad_norm": 0.3714198172092438, "learning_rate": 0.00025004544367376223, "loss": 0.0796, "step": 3986 }, { "epoch": 0.5587946741415557, "grad_norm": 0.26748713850975037, "learning_rate": 0.00025003109303994256, "loss": 0.0749, "step": 3987 }, { "epoch": 0.5589348283111423, "grad_norm": 0.3552827537059784, "learning_rate": 0.0002500167424061229, "loss": 0.0674, "step": 3988 }, { "epoch": 0.5590749824807288, "grad_norm": 0.30290699005126953, "learning_rate": 0.0002500023917723033, "loss": 0.1022, "step": 3989 }, { "epoch": 0.5592151366503153, "grad_norm": 0.38541123270988464, "learning_rate": 0.0002499880411384836, "loss": 0.0574, "step": 3990 }, { "epoch": 0.5593552908199019, "grad_norm": 0.8959506154060364, "learning_rate": 0.00024997369050466393, "loss": 0.1114, "step": 3991 }, { "epoch": 0.5594954449894884, "grad_norm": 0.46974635124206543, "learning_rate": 0.0002499593398708443, "loss": 0.0544, "step": 3992 }, { "epoch": 0.559635599159075, "grad_norm": 0.5642286539077759, "learning_rate": 0.00024994498923702464, "loss": 0.1147, "step": 3993 }, { "epoch": 0.5597757533286615, "grad_norm": 0.6111794114112854, "learning_rate": 0.00024993063860320497, "loss": 0.1448, "step": 3994 }, { "epoch": 0.559915907498248, "grad_norm": 0.3942194879055023, "learning_rate": 0.0002499162879693853, "loss": 0.0425, "step": 3995 }, { "epoch": 0.5600560616678346, "grad_norm": 0.3252815008163452, "learning_rate": 0.00024990193733556563, "loss": 0.0609, "step": 3996 }, { "epoch": 0.5601962158374212, "grad_norm": 1.5041699409484863, "learning_rate": 0.00024988758670174596, "loss": 0.1565, "step": 3997 }, { "epoch": 0.5603363700070078, "grad_norm": 2.7567338943481445, "learning_rate": 0.0002498732360679263, "loss": 0.3778, "step": 3998 }, { "epoch": 0.5604765241765942, "grad_norm": 1.8308205604553223, "learning_rate": 0.00024985888543410667, "loss": 0.2435, "step": 3999 }, { "epoch": 0.5606166783461808, "grad_norm": 1.7873958349227905, "learning_rate": 0.000249844534800287, "loss": 0.364, "step": 4000 }, { "epoch": 0.5607568325157674, "grad_norm": 0.4975601136684418, "learning_rate": 0.0002498301841664673, "loss": 0.1152, "step": 4001 }, { "epoch": 0.5608969866853539, "grad_norm": 0.3295333683490753, "learning_rate": 0.00024981583353264765, "loss": 0.081, "step": 4002 }, { "epoch": 0.5610371408549404, "grad_norm": 0.3756139874458313, "learning_rate": 0.000249801482898828, "loss": 0.1317, "step": 4003 }, { "epoch": 0.561177295024527, "grad_norm": 0.48621007800102234, "learning_rate": 0.0002497871322650083, "loss": 0.1518, "step": 4004 }, { "epoch": 0.5613174491941135, "grad_norm": 0.3367638885974884, "learning_rate": 0.0002497727816311887, "loss": 0.1321, "step": 4005 }, { "epoch": 0.5614576033637001, "grad_norm": 0.2516014277935028, "learning_rate": 0.000249758430997369, "loss": 0.0399, "step": 4006 }, { "epoch": 0.5615977575332867, "grad_norm": 0.44831156730651855, "learning_rate": 0.00024974408036354935, "loss": 0.0791, "step": 4007 }, { "epoch": 0.5617379117028731, "grad_norm": 0.5114452242851257, "learning_rate": 0.00024972972972972973, "loss": 0.1112, "step": 4008 }, { "epoch": 0.5618780658724597, "grad_norm": 0.21150460839271545, "learning_rate": 0.00024971537909591006, "loss": 0.0429, "step": 4009 }, { "epoch": 0.5620182200420463, "grad_norm": 0.3199412226676941, "learning_rate": 0.0002497010284620904, "loss": 0.0751, "step": 4010 }, { "epoch": 0.5621583742116328, "grad_norm": 0.6559122800827026, "learning_rate": 0.0002496866778282707, "loss": 0.1154, "step": 4011 }, { "epoch": 0.5622985283812193, "grad_norm": 0.37633848190307617, "learning_rate": 0.00024967232719445105, "loss": 0.0768, "step": 4012 }, { "epoch": 0.5624386825508059, "grad_norm": 0.27470019459724426, "learning_rate": 0.0002496579765606314, "loss": 0.1047, "step": 4013 }, { "epoch": 0.5625788367203924, "grad_norm": 0.4732481837272644, "learning_rate": 0.0002496436259268117, "loss": 0.1782, "step": 4014 }, { "epoch": 0.562718990889979, "grad_norm": 0.3817456364631653, "learning_rate": 0.0002496292752929921, "loss": 0.1026, "step": 4015 }, { "epoch": 0.5628591450595655, "grad_norm": 0.6931123733520508, "learning_rate": 0.0002496149246591724, "loss": 0.1483, "step": 4016 }, { "epoch": 0.562999299229152, "grad_norm": 0.3439079225063324, "learning_rate": 0.00024960057402535275, "loss": 0.1594, "step": 4017 }, { "epoch": 0.5631394533987386, "grad_norm": 0.47241100668907166, "learning_rate": 0.00024958622339153313, "loss": 0.0952, "step": 4018 }, { "epoch": 0.5632796075683252, "grad_norm": 0.2326921820640564, "learning_rate": 0.00024957187275771346, "loss": 0.0464, "step": 4019 }, { "epoch": 0.5634197617379118, "grad_norm": 0.4949483275413513, "learning_rate": 0.0002495575221238938, "loss": 0.1201, "step": 4020 }, { "epoch": 0.5635599159074982, "grad_norm": 0.26340755820274353, "learning_rate": 0.0002495431714900741, "loss": 0.0397, "step": 4021 }, { "epoch": 0.5637000700770848, "grad_norm": 0.42154282331466675, "learning_rate": 0.00024952882085625444, "loss": 0.0497, "step": 4022 }, { "epoch": 0.5638402242466714, "grad_norm": 0.42211347818374634, "learning_rate": 0.00024951447022243477, "loss": 0.063, "step": 4023 }, { "epoch": 0.5639803784162579, "grad_norm": 0.40765026211738586, "learning_rate": 0.00024950011958861515, "loss": 0.1137, "step": 4024 }, { "epoch": 0.5641205325858444, "grad_norm": 0.5014655590057373, "learning_rate": 0.0002494857689547955, "loss": 0.1083, "step": 4025 }, { "epoch": 0.564260686755431, "grad_norm": 0.5584135055541992, "learning_rate": 0.0002494714183209758, "loss": 0.1316, "step": 4026 }, { "epoch": 0.5644008409250175, "grad_norm": 0.49080297350883484, "learning_rate": 0.0002494570676871562, "loss": 0.0619, "step": 4027 }, { "epoch": 0.5645409950946041, "grad_norm": 0.5462132096290588, "learning_rate": 0.0002494427170533365, "loss": 0.1033, "step": 4028 }, { "epoch": 0.5646811492641907, "grad_norm": 0.7864841818809509, "learning_rate": 0.00024942836641951685, "loss": 0.1765, "step": 4029 }, { "epoch": 0.5648213034337771, "grad_norm": 0.19623635709285736, "learning_rate": 0.0002494140157856972, "loss": 0.0295, "step": 4030 }, { "epoch": 0.5649614576033637, "grad_norm": 1.5547457933425903, "learning_rate": 0.0002493996651518775, "loss": 0.1645, "step": 4031 }, { "epoch": 0.5651016117729503, "grad_norm": 0.25827813148498535, "learning_rate": 0.00024938531451805784, "loss": 0.0641, "step": 4032 }, { "epoch": 0.5652417659425368, "grad_norm": 0.5804337859153748, "learning_rate": 0.00024937096388423817, "loss": 0.0906, "step": 4033 }, { "epoch": 0.5653819201121233, "grad_norm": 0.21657679975032806, "learning_rate": 0.00024935661325041855, "loss": 0.0441, "step": 4034 }, { "epoch": 0.5655220742817099, "grad_norm": 0.41447126865386963, "learning_rate": 0.0002493422626165989, "loss": 0.1223, "step": 4035 }, { "epoch": 0.5656622284512964, "grad_norm": 0.5161694884300232, "learning_rate": 0.0002493279119827792, "loss": 0.131, "step": 4036 }, { "epoch": 0.565802382620883, "grad_norm": 0.36643195152282715, "learning_rate": 0.0002493135613489596, "loss": 0.056, "step": 4037 }, { "epoch": 0.5659425367904695, "grad_norm": 0.4670483469963074, "learning_rate": 0.0002492992107151399, "loss": 0.1201, "step": 4038 }, { "epoch": 0.566082690960056, "grad_norm": 0.5224334001541138, "learning_rate": 0.00024928486008132024, "loss": 0.103, "step": 4039 }, { "epoch": 0.5662228451296426, "grad_norm": 0.42706790566444397, "learning_rate": 0.0002492705094475006, "loss": 0.0639, "step": 4040 }, { "epoch": 0.5663629992992292, "grad_norm": 0.2869316339492798, "learning_rate": 0.0002492561588136809, "loss": 0.0215, "step": 4041 }, { "epoch": 0.5665031534688157, "grad_norm": 0.40846318006515503, "learning_rate": 0.00024924180817986123, "loss": 0.0555, "step": 4042 }, { "epoch": 0.5666433076384022, "grad_norm": 0.26569992303848267, "learning_rate": 0.0002492274575460416, "loss": 0.0293, "step": 4043 }, { "epoch": 0.5667834618079888, "grad_norm": 0.8932842016220093, "learning_rate": 0.00024921310691222194, "loss": 0.0543, "step": 4044 }, { "epoch": 0.5669236159775753, "grad_norm": 0.5756842494010925, "learning_rate": 0.00024919875627840227, "loss": 0.0882, "step": 4045 }, { "epoch": 0.5670637701471619, "grad_norm": 0.3436899185180664, "learning_rate": 0.00024918440564458265, "loss": 0.0415, "step": 4046 }, { "epoch": 0.5672039243167484, "grad_norm": 2.1847753524780273, "learning_rate": 0.000249170055010763, "loss": 0.5203, "step": 4047 }, { "epoch": 0.567344078486335, "grad_norm": 1.7849920988082886, "learning_rate": 0.0002491557043769433, "loss": 0.3395, "step": 4048 }, { "epoch": 0.5674842326559215, "grad_norm": 1.6180174350738525, "learning_rate": 0.00024914135374312364, "loss": 0.0328, "step": 4049 }, { "epoch": 0.5676243868255081, "grad_norm": 3.5772085189819336, "learning_rate": 0.00024912700310930397, "loss": 0.6015, "step": 4050 }, { "epoch": 0.5677645409950947, "grad_norm": 0.35565653443336487, "learning_rate": 0.0002491126524754843, "loss": 0.0595, "step": 4051 }, { "epoch": 0.5679046951646811, "grad_norm": 0.6185441613197327, "learning_rate": 0.0002490983018416646, "loss": 0.1611, "step": 4052 }, { "epoch": 0.5680448493342677, "grad_norm": 0.38222718238830566, "learning_rate": 0.000249083951207845, "loss": 0.1053, "step": 4053 }, { "epoch": 0.5681850035038543, "grad_norm": 0.6351040601730347, "learning_rate": 0.00024906960057402534, "loss": 0.0941, "step": 4054 }, { "epoch": 0.5683251576734408, "grad_norm": 0.21364746987819672, "learning_rate": 0.00024905524994020566, "loss": 0.0187, "step": 4055 }, { "epoch": 0.5684653118430273, "grad_norm": 0.15619692206382751, "learning_rate": 0.00024904089930638605, "loss": 0.0398, "step": 4056 }, { "epoch": 0.5686054660126139, "grad_norm": 0.536992609500885, "learning_rate": 0.0002490265486725664, "loss": 0.1884, "step": 4057 }, { "epoch": 0.5687456201822004, "grad_norm": 0.5837216377258301, "learning_rate": 0.0002490121980387467, "loss": 0.196, "step": 4058 }, { "epoch": 0.568885774351787, "grad_norm": 0.34991973638534546, "learning_rate": 0.00024899784740492703, "loss": 0.1003, "step": 4059 }, { "epoch": 0.5690259285213735, "grad_norm": 0.42867881059646606, "learning_rate": 0.00024898349677110736, "loss": 0.0864, "step": 4060 }, { "epoch": 0.56916608269096, "grad_norm": 0.5674730539321899, "learning_rate": 0.0002489691461372877, "loss": 0.0794, "step": 4061 }, { "epoch": 0.5693062368605466, "grad_norm": 1.384779930114746, "learning_rate": 0.00024895479550346807, "loss": 0.1561, "step": 4062 }, { "epoch": 0.5694463910301332, "grad_norm": 0.6737813949584961, "learning_rate": 0.0002489404448696484, "loss": 0.1744, "step": 4063 }, { "epoch": 0.5695865451997197, "grad_norm": 0.24421334266662598, "learning_rate": 0.00024892609423582873, "loss": 0.0753, "step": 4064 }, { "epoch": 0.5697266993693062, "grad_norm": 0.7380961775779724, "learning_rate": 0.00024891174360200906, "loss": 0.2025, "step": 4065 }, { "epoch": 0.5698668535388928, "grad_norm": 0.39453673362731934, "learning_rate": 0.0002488973929681894, "loss": 0.0693, "step": 4066 }, { "epoch": 0.5700070077084793, "grad_norm": 0.5544195771217346, "learning_rate": 0.0002488830423343697, "loss": 0.105, "step": 4067 }, { "epoch": 0.5701471618780659, "grad_norm": 0.35562485456466675, "learning_rate": 0.0002488686917005501, "loss": 0.0881, "step": 4068 }, { "epoch": 0.5702873160476524, "grad_norm": 0.3294217586517334, "learning_rate": 0.0002488543410667304, "loss": 0.086, "step": 4069 }, { "epoch": 0.5704274702172389, "grad_norm": 0.22964254021644592, "learning_rate": 0.00024883999043291076, "loss": 0.0487, "step": 4070 }, { "epoch": 0.5705676243868255, "grad_norm": 0.34469130635261536, "learning_rate": 0.0002488256397990911, "loss": 0.1562, "step": 4071 }, { "epoch": 0.5707077785564121, "grad_norm": 0.5755208134651184, "learning_rate": 0.00024881128916527147, "loss": 0.0813, "step": 4072 }, { "epoch": 0.5708479327259987, "grad_norm": 0.412884384393692, "learning_rate": 0.0002487969385314518, "loss": 0.0746, "step": 4073 }, { "epoch": 0.5709880868955851, "grad_norm": 0.5088204741477966, "learning_rate": 0.0002487825878976321, "loss": 0.0871, "step": 4074 }, { "epoch": 0.5711282410651717, "grad_norm": 0.4229466915130615, "learning_rate": 0.00024876823726381245, "loss": 0.1073, "step": 4075 }, { "epoch": 0.5712683952347583, "grad_norm": 0.5490579605102539, "learning_rate": 0.0002487538866299928, "loss": 0.1335, "step": 4076 }, { "epoch": 0.5714085494043448, "grad_norm": 0.5359660387039185, "learning_rate": 0.0002487395359961731, "loss": 0.1896, "step": 4077 }, { "epoch": 0.5715487035739313, "grad_norm": 0.45330074429512024, "learning_rate": 0.0002487251853623535, "loss": 0.1007, "step": 4078 }, { "epoch": 0.5716888577435179, "grad_norm": 0.35710084438323975, "learning_rate": 0.0002487108347285338, "loss": 0.0946, "step": 4079 }, { "epoch": 0.5718290119131044, "grad_norm": 0.712883710861206, "learning_rate": 0.00024869648409471415, "loss": 0.0575, "step": 4080 }, { "epoch": 0.571969166082691, "grad_norm": 1.0127742290496826, "learning_rate": 0.00024868213346089453, "loss": 0.2128, "step": 4081 }, { "epoch": 0.5721093202522775, "grad_norm": 0.5833719968795776, "learning_rate": 0.00024866778282707486, "loss": 0.1671, "step": 4082 }, { "epoch": 0.572249474421864, "grad_norm": 0.8719350695610046, "learning_rate": 0.0002486534321932552, "loss": 0.0533, "step": 4083 }, { "epoch": 0.5723896285914506, "grad_norm": 0.271806538105011, "learning_rate": 0.0002486390815594355, "loss": 0.0453, "step": 4084 }, { "epoch": 0.5725297827610372, "grad_norm": 0.3668370246887207, "learning_rate": 0.00024862473092561585, "loss": 0.0832, "step": 4085 }, { "epoch": 0.5726699369306237, "grad_norm": 0.8421066403388977, "learning_rate": 0.0002486103802917962, "loss": 0.093, "step": 4086 }, { "epoch": 0.5728100911002102, "grad_norm": 0.32390454411506653, "learning_rate": 0.0002485960296579765, "loss": 0.0328, "step": 4087 }, { "epoch": 0.5729502452697968, "grad_norm": 0.5096945762634277, "learning_rate": 0.0002485816790241569, "loss": 0.0521, "step": 4088 }, { "epoch": 0.5730903994393833, "grad_norm": 0.30692732334136963, "learning_rate": 0.0002485673283903372, "loss": 0.0471, "step": 4089 }, { "epoch": 0.5732305536089699, "grad_norm": 0.4252639710903168, "learning_rate": 0.00024855297775651754, "loss": 0.1007, "step": 4090 }, { "epoch": 0.5733707077785564, "grad_norm": 0.24195390939712524, "learning_rate": 0.0002485386271226979, "loss": 0.0239, "step": 4091 }, { "epoch": 0.5735108619481429, "grad_norm": 0.6424862146377563, "learning_rate": 0.00024852427648887825, "loss": 0.083, "step": 4092 }, { "epoch": 0.5736510161177295, "grad_norm": 1.4497126340866089, "learning_rate": 0.0002485099258550586, "loss": 0.1553, "step": 4093 }, { "epoch": 0.5737911702873161, "grad_norm": 0.23256167769432068, "learning_rate": 0.0002484955752212389, "loss": 0.048, "step": 4094 }, { "epoch": 0.5739313244569026, "grad_norm": 0.904105007648468, "learning_rate": 0.00024848122458741924, "loss": 0.0256, "step": 4095 }, { "epoch": 0.5740714786264891, "grad_norm": 1.0455588102340698, "learning_rate": 0.00024846687395359957, "loss": 0.101, "step": 4096 }, { "epoch": 0.5742116327960757, "grad_norm": 0.6662237048149109, "learning_rate": 0.00024845252331977995, "loss": 0.1583, "step": 4097 }, { "epoch": 0.5743517869656622, "grad_norm": 0.3509131073951721, "learning_rate": 0.0002484381726859603, "loss": 0.1066, "step": 4098 }, { "epoch": 0.5744919411352488, "grad_norm": 2.289806365966797, "learning_rate": 0.0002484238220521406, "loss": 0.4292, "step": 4099 }, { "epoch": 0.5746320953048353, "grad_norm": 3.5445199012756348, "learning_rate": 0.00024840947141832094, "loss": 0.2655, "step": 4100 }, { "epoch": 0.5747722494744218, "grad_norm": 0.46635720133781433, "learning_rate": 0.0002483951207845013, "loss": 0.0838, "step": 4101 }, { "epoch": 0.5749124036440084, "grad_norm": 0.5635322332382202, "learning_rate": 0.00024838077015068165, "loss": 0.1241, "step": 4102 }, { "epoch": 0.575052557813595, "grad_norm": 0.29454296827316284, "learning_rate": 0.000248366419516862, "loss": 0.0616, "step": 4103 }, { "epoch": 0.5751927119831814, "grad_norm": 0.6339954733848572, "learning_rate": 0.0002483520688830423, "loss": 0.0947, "step": 4104 }, { "epoch": 0.575332866152768, "grad_norm": 1.1464428901672363, "learning_rate": 0.00024833771824922263, "loss": 0.1619, "step": 4105 }, { "epoch": 0.5754730203223546, "grad_norm": 0.35010188817977905, "learning_rate": 0.00024832336761540296, "loss": 0.0692, "step": 4106 }, { "epoch": 0.5756131744919412, "grad_norm": 0.6388195753097534, "learning_rate": 0.00024830901698158335, "loss": 0.1135, "step": 4107 }, { "epoch": 0.5757533286615277, "grad_norm": 0.4957703948020935, "learning_rate": 0.0002482946663477637, "loss": 0.1491, "step": 4108 }, { "epoch": 0.5758934828311142, "grad_norm": 0.5899906158447266, "learning_rate": 0.000248280315713944, "loss": 0.1167, "step": 4109 }, { "epoch": 0.5760336370007008, "grad_norm": 0.5695058703422546, "learning_rate": 0.0002482659650801244, "loss": 0.0461, "step": 4110 }, { "epoch": 0.5761737911702873, "grad_norm": 0.4779747724533081, "learning_rate": 0.0002482516144463047, "loss": 0.1222, "step": 4111 }, { "epoch": 0.5763139453398739, "grad_norm": 0.46240243315696716, "learning_rate": 0.00024823726381248504, "loss": 0.1495, "step": 4112 }, { "epoch": 0.5764540995094604, "grad_norm": 0.5702279806137085, "learning_rate": 0.00024822291317866537, "loss": 0.071, "step": 4113 }, { "epoch": 0.5765942536790469, "grad_norm": 0.3322455585002899, "learning_rate": 0.0002482085625448457, "loss": 0.0652, "step": 4114 }, { "epoch": 0.5767344078486335, "grad_norm": 0.16973331570625305, "learning_rate": 0.00024819421191102603, "loss": 0.0563, "step": 4115 }, { "epoch": 0.5768745620182201, "grad_norm": 0.290131539106369, "learning_rate": 0.0002481798612772064, "loss": 0.0844, "step": 4116 }, { "epoch": 0.5770147161878066, "grad_norm": 0.5400785207748413, "learning_rate": 0.00024816551064338674, "loss": 0.1359, "step": 4117 }, { "epoch": 0.5771548703573931, "grad_norm": 0.3293546438217163, "learning_rate": 0.00024815116000956707, "loss": 0.0684, "step": 4118 }, { "epoch": 0.5772950245269797, "grad_norm": 0.2727906107902527, "learning_rate": 0.0002481368093757474, "loss": 0.0525, "step": 4119 }, { "epoch": 0.5774351786965662, "grad_norm": 0.6346110105514526, "learning_rate": 0.0002481224587419278, "loss": 0.0634, "step": 4120 }, { "epoch": 0.5775753328661528, "grad_norm": 0.7491541504859924, "learning_rate": 0.0002481081081081081, "loss": 0.0766, "step": 4121 }, { "epoch": 0.5777154870357393, "grad_norm": 0.15558725595474243, "learning_rate": 0.00024809375747428844, "loss": 0.036, "step": 4122 }, { "epoch": 0.5778556412053258, "grad_norm": 0.6777389645576477, "learning_rate": 0.00024807940684046877, "loss": 0.054, "step": 4123 }, { "epoch": 0.5779957953749124, "grad_norm": 0.6266332864761353, "learning_rate": 0.0002480650562066491, "loss": 0.1201, "step": 4124 }, { "epoch": 0.578135949544499, "grad_norm": 0.40791377425193787, "learning_rate": 0.0002480507055728294, "loss": 0.0563, "step": 4125 }, { "epoch": 0.5782761037140854, "grad_norm": 1.1017180681228638, "learning_rate": 0.0002480363549390098, "loss": 0.2022, "step": 4126 }, { "epoch": 0.578416257883672, "grad_norm": 0.44981738924980164, "learning_rate": 0.00024802200430519013, "loss": 0.0508, "step": 4127 }, { "epoch": 0.5785564120532586, "grad_norm": 0.7006047368049622, "learning_rate": 0.00024800765367137046, "loss": 0.1259, "step": 4128 }, { "epoch": 0.5786965662228452, "grad_norm": 0.3415127992630005, "learning_rate": 0.0002479933030375508, "loss": 0.0794, "step": 4129 }, { "epoch": 0.5788367203924317, "grad_norm": 0.5654847025871277, "learning_rate": 0.0002479789524037311, "loss": 0.1126, "step": 4130 }, { "epoch": 0.5789768745620182, "grad_norm": 0.4290890097618103, "learning_rate": 0.00024796460176991145, "loss": 0.0566, "step": 4131 }, { "epoch": 0.5791170287316048, "grad_norm": 1.3742039203643799, "learning_rate": 0.00024795025113609183, "loss": 0.1048, "step": 4132 }, { "epoch": 0.5792571829011913, "grad_norm": 0.4091750383377075, "learning_rate": 0.00024793590050227216, "loss": 0.0553, "step": 4133 }, { "epoch": 0.5793973370707779, "grad_norm": 0.2758570909500122, "learning_rate": 0.0002479215498684525, "loss": 0.0683, "step": 4134 }, { "epoch": 0.5795374912403644, "grad_norm": 0.4589548707008362, "learning_rate": 0.0002479071992346328, "loss": 0.0775, "step": 4135 }, { "epoch": 0.5796776454099509, "grad_norm": 0.5256208181381226, "learning_rate": 0.0002478928486008132, "loss": 0.0917, "step": 4136 }, { "epoch": 0.5798177995795375, "grad_norm": 0.6021117568016052, "learning_rate": 0.00024787849796699353, "loss": 0.0724, "step": 4137 }, { "epoch": 0.5799579537491241, "grad_norm": 0.37503284215927124, "learning_rate": 0.00024786414733317386, "loss": 0.0459, "step": 4138 }, { "epoch": 0.5800981079187106, "grad_norm": 1.1888717412948608, "learning_rate": 0.0002478497966993542, "loss": 0.1642, "step": 4139 }, { "epoch": 0.5802382620882971, "grad_norm": 0.5471056699752808, "learning_rate": 0.0002478354460655345, "loss": 0.1095, "step": 4140 }, { "epoch": 0.5803784162578837, "grad_norm": 0.42414405941963196, "learning_rate": 0.00024782109543171484, "loss": 0.0668, "step": 4141 }, { "epoch": 0.5805185704274702, "grad_norm": 0.565610945224762, "learning_rate": 0.0002478067447978952, "loss": 0.0669, "step": 4142 }, { "epoch": 0.5806587245970568, "grad_norm": 1.8306336402893066, "learning_rate": 0.00024779239416407555, "loss": 0.099, "step": 4143 }, { "epoch": 0.5807988787666433, "grad_norm": 0.6975414752960205, "learning_rate": 0.0002477780435302559, "loss": 0.0682, "step": 4144 }, { "epoch": 0.5809390329362298, "grad_norm": 0.38210222125053406, "learning_rate": 0.00024776369289643626, "loss": 0.09, "step": 4145 }, { "epoch": 0.5810791871058164, "grad_norm": 0.931677520275116, "learning_rate": 0.0002477493422626166, "loss": 0.0767, "step": 4146 }, { "epoch": 0.581219341275403, "grad_norm": 0.7239224314689636, "learning_rate": 0.0002477349916287969, "loss": 0.2869, "step": 4147 }, { "epoch": 0.5813594954449894, "grad_norm": 0.3559267520904541, "learning_rate": 0.00024772064099497725, "loss": 0.0428, "step": 4148 }, { "epoch": 0.581499649614576, "grad_norm": 2.6591994762420654, "learning_rate": 0.0002477062903611576, "loss": 0.3192, "step": 4149 }, { "epoch": 0.5816398037841626, "grad_norm": 1.7101879119873047, "learning_rate": 0.0002476919397273379, "loss": 0.3687, "step": 4150 }, { "epoch": 0.5817799579537491, "grad_norm": 0.552477240562439, "learning_rate": 0.0002476775890935183, "loss": 0.0997, "step": 4151 }, { "epoch": 0.5819201121233357, "grad_norm": 0.8018133640289307, "learning_rate": 0.0002476632384596986, "loss": 0.1595, "step": 4152 }, { "epoch": 0.5820602662929222, "grad_norm": 0.46562767028808594, "learning_rate": 0.00024764888782587895, "loss": 0.1232, "step": 4153 }, { "epoch": 0.5822004204625087, "grad_norm": 0.5799255967140198, "learning_rate": 0.0002476345371920593, "loss": 0.0703, "step": 4154 }, { "epoch": 0.5823405746320953, "grad_norm": 0.5444428324699402, "learning_rate": 0.00024762018655823966, "loss": 0.1088, "step": 4155 }, { "epoch": 0.5824807288016819, "grad_norm": 0.44830042123794556, "learning_rate": 0.00024760583592442, "loss": 0.1067, "step": 4156 }, { "epoch": 0.5826208829712684, "grad_norm": 0.29601743817329407, "learning_rate": 0.0002475914852906003, "loss": 0.0516, "step": 4157 }, { "epoch": 0.5827610371408549, "grad_norm": 0.5629777312278748, "learning_rate": 0.00024757713465678064, "loss": 0.0786, "step": 4158 }, { "epoch": 0.5829011913104415, "grad_norm": 0.5988829135894775, "learning_rate": 0.000247562784022961, "loss": 0.1206, "step": 4159 }, { "epoch": 0.5830413454800281, "grad_norm": 0.35152924060821533, "learning_rate": 0.0002475484333891413, "loss": 0.0503, "step": 4160 }, { "epoch": 0.5831814996496145, "grad_norm": 0.4479272663593292, "learning_rate": 0.0002475340827553217, "loss": 0.0567, "step": 4161 }, { "epoch": 0.5833216538192011, "grad_norm": 0.3762878179550171, "learning_rate": 0.000247519732121502, "loss": 0.0888, "step": 4162 }, { "epoch": 0.5834618079887877, "grad_norm": 0.36845239996910095, "learning_rate": 0.00024750538148768234, "loss": 0.0681, "step": 4163 }, { "epoch": 0.5836019621583742, "grad_norm": 0.6185504794120789, "learning_rate": 0.0002474910308538627, "loss": 0.166, "step": 4164 }, { "epoch": 0.5837421163279608, "grad_norm": 0.39327865839004517, "learning_rate": 0.00024747668022004305, "loss": 0.1149, "step": 4165 }, { "epoch": 0.5838822704975473, "grad_norm": 0.33189845085144043, "learning_rate": 0.0002474623295862234, "loss": 0.1345, "step": 4166 }, { "epoch": 0.5840224246671338, "grad_norm": 0.5765977501869202, "learning_rate": 0.0002474479789524037, "loss": 0.1014, "step": 4167 }, { "epoch": 0.5841625788367204, "grad_norm": 0.18811741471290588, "learning_rate": 0.00024743362831858404, "loss": 0.0539, "step": 4168 }, { "epoch": 0.584302733006307, "grad_norm": 0.3931630253791809, "learning_rate": 0.00024741927768476437, "loss": 0.081, "step": 4169 }, { "epoch": 0.5844428871758934, "grad_norm": 0.3942629098892212, "learning_rate": 0.0002474049270509447, "loss": 0.0766, "step": 4170 }, { "epoch": 0.58458304134548, "grad_norm": 0.3167996108531952, "learning_rate": 0.0002473905764171251, "loss": 0.0731, "step": 4171 }, { "epoch": 0.5847231955150666, "grad_norm": 0.3922867774963379, "learning_rate": 0.0002473762257833054, "loss": 0.0663, "step": 4172 }, { "epoch": 0.5848633496846531, "grad_norm": 0.40867912769317627, "learning_rate": 0.00024736187514948574, "loss": 0.0992, "step": 4173 }, { "epoch": 0.5850035038542397, "grad_norm": 0.41855743527412415, "learning_rate": 0.0002473475245156661, "loss": 0.091, "step": 4174 }, { "epoch": 0.5851436580238262, "grad_norm": 0.28520187735557556, "learning_rate": 0.00024733317388184645, "loss": 0.1134, "step": 4175 }, { "epoch": 0.5852838121934127, "grad_norm": 0.7383517026901245, "learning_rate": 0.0002473188232480268, "loss": 0.091, "step": 4176 }, { "epoch": 0.5854239663629993, "grad_norm": 0.6429657936096191, "learning_rate": 0.0002473044726142071, "loss": 0.1305, "step": 4177 }, { "epoch": 0.5855641205325859, "grad_norm": 0.9740548133850098, "learning_rate": 0.00024729012198038743, "loss": 0.1399, "step": 4178 }, { "epoch": 0.5857042747021723, "grad_norm": 0.7544068098068237, "learning_rate": 0.00024727577134656776, "loss": 0.087, "step": 4179 }, { "epoch": 0.5858444288717589, "grad_norm": 0.30386796593666077, "learning_rate": 0.00024726142071274814, "loss": 0.0606, "step": 4180 }, { "epoch": 0.5859845830413455, "grad_norm": 0.3910101354122162, "learning_rate": 0.00024724707007892847, "loss": 0.0623, "step": 4181 }, { "epoch": 0.5861247372109321, "grad_norm": 0.3714311420917511, "learning_rate": 0.0002472327194451088, "loss": 0.1029, "step": 4182 }, { "epoch": 0.5862648913805185, "grad_norm": 0.3898859918117523, "learning_rate": 0.0002472183688112892, "loss": 0.0612, "step": 4183 }, { "epoch": 0.5864050455501051, "grad_norm": 1.8018828630447388, "learning_rate": 0.0002472040181774695, "loss": 0.076, "step": 4184 }, { "epoch": 0.5865451997196917, "grad_norm": 0.35695692896842957, "learning_rate": 0.00024718966754364984, "loss": 0.1092, "step": 4185 }, { "epoch": 0.5866853538892782, "grad_norm": 0.3678378462791443, "learning_rate": 0.00024717531690983017, "loss": 0.0538, "step": 4186 }, { "epoch": 0.5868255080588648, "grad_norm": 0.2878757417201996, "learning_rate": 0.0002471609662760105, "loss": 0.1163, "step": 4187 }, { "epoch": 0.5869656622284513, "grad_norm": 0.6163018941879272, "learning_rate": 0.0002471466156421908, "loss": 0.1305, "step": 4188 }, { "epoch": 0.5871058163980378, "grad_norm": 0.28597933053970337, "learning_rate": 0.00024713226500837116, "loss": 0.0423, "step": 4189 }, { "epoch": 0.5872459705676244, "grad_norm": 0.41139349341392517, "learning_rate": 0.00024711791437455154, "loss": 0.0457, "step": 4190 }, { "epoch": 0.587386124737211, "grad_norm": 0.6114638447761536, "learning_rate": 0.00024710356374073187, "loss": 0.0901, "step": 4191 }, { "epoch": 0.5875262789067974, "grad_norm": 0.24156561493873596, "learning_rate": 0.0002470892131069122, "loss": 0.0479, "step": 4192 }, { "epoch": 0.587666433076384, "grad_norm": 0.660129964351654, "learning_rate": 0.0002470748624730925, "loss": 0.0646, "step": 4193 }, { "epoch": 0.5878065872459706, "grad_norm": 0.6134944558143616, "learning_rate": 0.00024706051183927285, "loss": 0.0892, "step": 4194 }, { "epoch": 0.5879467414155571, "grad_norm": 0.3676808178424835, "learning_rate": 0.00024704616120545323, "loss": 0.1095, "step": 4195 }, { "epoch": 0.5880868955851437, "grad_norm": 0.47366175055503845, "learning_rate": 0.00024703181057163356, "loss": 0.0537, "step": 4196 }, { "epoch": 0.5882270497547302, "grad_norm": 0.8181947469711304, "learning_rate": 0.0002470174599378139, "loss": 0.1164, "step": 4197 }, { "epoch": 0.5883672039243167, "grad_norm": 0.4179781973361969, "learning_rate": 0.0002470031093039942, "loss": 0.0861, "step": 4198 }, { "epoch": 0.5885073580939033, "grad_norm": 0.7941940426826477, "learning_rate": 0.0002469887586701746, "loss": 0.1255, "step": 4199 }, { "epoch": 0.5886475122634899, "grad_norm": 1.4141478538513184, "learning_rate": 0.00024697440803635493, "loss": 0.2585, "step": 4200 }, { "epoch": 0.5887876664330763, "grad_norm": 0.6340208649635315, "learning_rate": 0.00024696005740253526, "loss": 0.0661, "step": 4201 }, { "epoch": 0.5889278206026629, "grad_norm": 0.6301118731498718, "learning_rate": 0.0002469457067687156, "loss": 0.1244, "step": 4202 }, { "epoch": 0.5890679747722495, "grad_norm": 0.3215789794921875, "learning_rate": 0.0002469313561348959, "loss": 0.0701, "step": 4203 }, { "epoch": 0.589208128941836, "grad_norm": 0.2909778952598572, "learning_rate": 0.00024691700550107625, "loss": 0.0475, "step": 4204 }, { "epoch": 0.5893482831114225, "grad_norm": 0.49275925755500793, "learning_rate": 0.0002469026548672566, "loss": 0.1673, "step": 4205 }, { "epoch": 0.5894884372810091, "grad_norm": 0.3579420745372772, "learning_rate": 0.00024688830423343696, "loss": 0.1337, "step": 4206 }, { "epoch": 0.5896285914505957, "grad_norm": 0.453367680311203, "learning_rate": 0.0002468739535996173, "loss": 0.0597, "step": 4207 }, { "epoch": 0.5897687456201822, "grad_norm": 0.9823862314224243, "learning_rate": 0.0002468596029657976, "loss": 0.0816, "step": 4208 }, { "epoch": 0.5899088997897688, "grad_norm": 0.43889015913009644, "learning_rate": 0.000246845252331978, "loss": 0.1123, "step": 4209 }, { "epoch": 0.5900490539593553, "grad_norm": 0.16822578012943268, "learning_rate": 0.0002468309016981583, "loss": 0.0227, "step": 4210 }, { "epoch": 0.5901892081289418, "grad_norm": 0.7074646949768066, "learning_rate": 0.00024681655106433865, "loss": 0.092, "step": 4211 }, { "epoch": 0.5903293622985284, "grad_norm": 0.35537704825401306, "learning_rate": 0.000246802200430519, "loss": 0.0877, "step": 4212 }, { "epoch": 0.590469516468115, "grad_norm": 0.28335681557655334, "learning_rate": 0.0002467878497966993, "loss": 0.1035, "step": 4213 }, { "epoch": 0.5906096706377014, "grad_norm": 0.17370456457138062, "learning_rate": 0.00024677349916287964, "loss": 0.0659, "step": 4214 }, { "epoch": 0.590749824807288, "grad_norm": 0.4285907745361328, "learning_rate": 0.00024675914852906, "loss": 0.0802, "step": 4215 }, { "epoch": 0.5908899789768746, "grad_norm": 0.36889031529426575, "learning_rate": 0.00024674479789524035, "loss": 0.046, "step": 4216 }, { "epoch": 0.5910301331464611, "grad_norm": 0.3619019091129303, "learning_rate": 0.0002467304472614207, "loss": 0.1123, "step": 4217 }, { "epoch": 0.5911702873160477, "grad_norm": 0.3716450333595276, "learning_rate": 0.00024671609662760106, "loss": 0.118, "step": 4218 }, { "epoch": 0.5913104414856342, "grad_norm": 0.5450120568275452, "learning_rate": 0.0002467017459937814, "loss": 0.0497, "step": 4219 }, { "epoch": 0.5914505956552207, "grad_norm": 0.43615761399269104, "learning_rate": 0.0002466873953599617, "loss": 0.092, "step": 4220 }, { "epoch": 0.5915907498248073, "grad_norm": 0.6440950632095337, "learning_rate": 0.00024667304472614205, "loss": 0.0817, "step": 4221 }, { "epoch": 0.5917309039943939, "grad_norm": 0.34157589077949524, "learning_rate": 0.0002466586940923224, "loss": 0.0612, "step": 4222 }, { "epoch": 0.5918710581639803, "grad_norm": 0.29978910088539124, "learning_rate": 0.0002466443434585027, "loss": 0.0744, "step": 4223 }, { "epoch": 0.5920112123335669, "grad_norm": 0.3031977415084839, "learning_rate": 0.00024662999282468303, "loss": 0.122, "step": 4224 }, { "epoch": 0.5921513665031535, "grad_norm": 0.2998000681400299, "learning_rate": 0.0002466156421908634, "loss": 0.0563, "step": 4225 }, { "epoch": 0.59229152067274, "grad_norm": 0.288450688123703, "learning_rate": 0.00024660129155704375, "loss": 0.065, "step": 4226 }, { "epoch": 0.5924316748423265, "grad_norm": 0.5636252164840698, "learning_rate": 0.0002465869409232241, "loss": 0.2363, "step": 4227 }, { "epoch": 0.5925718290119131, "grad_norm": 0.7118433117866516, "learning_rate": 0.00024657259028940446, "loss": 0.0742, "step": 4228 }, { "epoch": 0.5927119831814996, "grad_norm": 0.31525370478630066, "learning_rate": 0.0002465582396555848, "loss": 0.0596, "step": 4229 }, { "epoch": 0.5928521373510862, "grad_norm": 0.36143434047698975, "learning_rate": 0.0002465438890217651, "loss": 0.0937, "step": 4230 }, { "epoch": 0.5929922915206728, "grad_norm": 0.7440447211265564, "learning_rate": 0.00024652953838794544, "loss": 0.1241, "step": 4231 }, { "epoch": 0.5931324456902592, "grad_norm": 0.38406074047088623, "learning_rate": 0.00024651518775412577, "loss": 0.1021, "step": 4232 }, { "epoch": 0.5932725998598458, "grad_norm": 0.33659985661506653, "learning_rate": 0.0002465008371203061, "loss": 0.0783, "step": 4233 }, { "epoch": 0.5934127540294324, "grad_norm": 0.4978031516075134, "learning_rate": 0.0002464864864864865, "loss": 0.0741, "step": 4234 }, { "epoch": 0.593552908199019, "grad_norm": 0.4831142723560333, "learning_rate": 0.0002464721358526668, "loss": 0.1532, "step": 4235 }, { "epoch": 0.5936930623686054, "grad_norm": 0.5020435452461243, "learning_rate": 0.00024645778521884714, "loss": 0.1099, "step": 4236 }, { "epoch": 0.593833216538192, "grad_norm": 0.3171883821487427, "learning_rate": 0.0002464434345850275, "loss": 0.0446, "step": 4237 }, { "epoch": 0.5939733707077786, "grad_norm": 0.3256409466266632, "learning_rate": 0.00024642908395120785, "loss": 0.1188, "step": 4238 }, { "epoch": 0.5941135248773651, "grad_norm": 0.5025069713592529, "learning_rate": 0.0002464147333173882, "loss": 0.23, "step": 4239 }, { "epoch": 0.5942536790469517, "grad_norm": 1.327749490737915, "learning_rate": 0.0002464003826835685, "loss": 0.0435, "step": 4240 }, { "epoch": 0.5943938332165382, "grad_norm": 1.1041979789733887, "learning_rate": 0.00024638603204974884, "loss": 0.1733, "step": 4241 }, { "epoch": 0.5945339873861247, "grad_norm": 0.6252986788749695, "learning_rate": 0.00024637168141592917, "loss": 0.1012, "step": 4242 }, { "epoch": 0.5946741415557113, "grad_norm": 0.4027010202407837, "learning_rate": 0.0002463573307821095, "loss": 0.1079, "step": 4243 }, { "epoch": 0.5948142957252979, "grad_norm": 0.6175240278244019, "learning_rate": 0.0002463429801482899, "loss": 0.1914, "step": 4244 }, { "epoch": 0.5949544498948843, "grad_norm": 0.3476596474647522, "learning_rate": 0.0002463286295144702, "loss": 0.1145, "step": 4245 }, { "epoch": 0.5950946040644709, "grad_norm": 0.364168256521225, "learning_rate": 0.00024631427888065053, "loss": 0.0803, "step": 4246 }, { "epoch": 0.5952347582340575, "grad_norm": 0.9181652665138245, "learning_rate": 0.0002462999282468309, "loss": 0.4212, "step": 4247 }, { "epoch": 0.595374912403644, "grad_norm": 0.6801532506942749, "learning_rate": 0.00024628557761301124, "loss": 0.3116, "step": 4248 }, { "epoch": 0.5955150665732305, "grad_norm": 0.6569100022315979, "learning_rate": 0.0002462712269791916, "loss": 0.1415, "step": 4249 }, { "epoch": 0.5956552207428171, "grad_norm": 0.5212199091911316, "learning_rate": 0.0002462568763453719, "loss": 0.0629, "step": 4250 }, { "epoch": 0.5957953749124036, "grad_norm": 0.48831114172935486, "learning_rate": 0.00024624252571155223, "loss": 0.1779, "step": 4251 }, { "epoch": 0.5959355290819902, "grad_norm": 0.1731216460466385, "learning_rate": 0.00024622817507773256, "loss": 0.0329, "step": 4252 }, { "epoch": 0.5960756832515768, "grad_norm": 0.3428407609462738, "learning_rate": 0.00024621382444391294, "loss": 0.0972, "step": 4253 }, { "epoch": 0.5962158374211632, "grad_norm": 0.3357507586479187, "learning_rate": 0.00024619947381009327, "loss": 0.0578, "step": 4254 }, { "epoch": 0.5963559915907498, "grad_norm": 0.27311787009239197, "learning_rate": 0.0002461851231762736, "loss": 0.0872, "step": 4255 }, { "epoch": 0.5964961457603364, "grad_norm": 0.34937262535095215, "learning_rate": 0.00024617077254245393, "loss": 0.0711, "step": 4256 }, { "epoch": 0.596636299929923, "grad_norm": 0.2117593139410019, "learning_rate": 0.00024615642190863426, "loss": 0.0533, "step": 4257 }, { "epoch": 0.5967764540995094, "grad_norm": 0.3577021062374115, "learning_rate": 0.0002461420712748146, "loss": 0.158, "step": 4258 }, { "epoch": 0.596916608269096, "grad_norm": 0.19032466411590576, "learning_rate": 0.00024612772064099497, "loss": 0.0586, "step": 4259 }, { "epoch": 0.5970567624386826, "grad_norm": 0.7568800449371338, "learning_rate": 0.0002461133700071753, "loss": 0.0689, "step": 4260 }, { "epoch": 0.5971969166082691, "grad_norm": 0.8177509903907776, "learning_rate": 0.0002460990193733556, "loss": 0.0727, "step": 4261 }, { "epoch": 0.5973370707778557, "grad_norm": 0.41709256172180176, "learning_rate": 0.00024608466873953595, "loss": 0.0788, "step": 4262 }, { "epoch": 0.5974772249474422, "grad_norm": 0.3124392628669739, "learning_rate": 0.00024607031810571634, "loss": 0.0566, "step": 4263 }, { "epoch": 0.5976173791170287, "grad_norm": 0.16209626197814941, "learning_rate": 0.00024605596747189666, "loss": 0.0379, "step": 4264 }, { "epoch": 0.5977575332866153, "grad_norm": 1.0296924114227295, "learning_rate": 0.000246041616838077, "loss": 0.0919, "step": 4265 }, { "epoch": 0.5978976874562019, "grad_norm": 0.3040546178817749, "learning_rate": 0.0002460272662042573, "loss": 0.0817, "step": 4266 }, { "epoch": 0.5980378416257883, "grad_norm": 0.9100993275642395, "learning_rate": 0.00024601291557043765, "loss": 0.1831, "step": 4267 }, { "epoch": 0.5981779957953749, "grad_norm": 0.2755889892578125, "learning_rate": 0.000245998564936618, "loss": 0.0284, "step": 4268 }, { "epoch": 0.5983181499649615, "grad_norm": 0.3427288234233856, "learning_rate": 0.00024598421430279836, "loss": 0.0644, "step": 4269 }, { "epoch": 0.598458304134548, "grad_norm": 0.5269371867179871, "learning_rate": 0.0002459698636689787, "loss": 0.0607, "step": 4270 }, { "epoch": 0.5985984583041345, "grad_norm": 0.4627378582954407, "learning_rate": 0.000245955513035159, "loss": 0.0983, "step": 4271 }, { "epoch": 0.5987386124737211, "grad_norm": 0.3181998133659363, "learning_rate": 0.0002459411624013394, "loss": 0.0462, "step": 4272 }, { "epoch": 0.5988787666433076, "grad_norm": 0.30405065417289734, "learning_rate": 0.00024592681176751973, "loss": 0.0772, "step": 4273 }, { "epoch": 0.5990189208128942, "grad_norm": 0.3996593952178955, "learning_rate": 0.00024591246113370006, "loss": 0.1246, "step": 4274 }, { "epoch": 0.5991590749824808, "grad_norm": 0.468820720911026, "learning_rate": 0.0002458981104998804, "loss": 0.1081, "step": 4275 }, { "epoch": 0.5992992291520672, "grad_norm": 0.45422908663749695, "learning_rate": 0.0002458837598660607, "loss": 0.1535, "step": 4276 }, { "epoch": 0.5994393833216538, "grad_norm": 0.5360621213912964, "learning_rate": 0.00024586940923224104, "loss": 0.1074, "step": 4277 }, { "epoch": 0.5995795374912404, "grad_norm": 0.4938092529773712, "learning_rate": 0.00024585505859842137, "loss": 0.0698, "step": 4278 }, { "epoch": 0.599719691660827, "grad_norm": 0.35549435019493103, "learning_rate": 0.00024584070796460176, "loss": 0.034, "step": 4279 }, { "epoch": 0.5998598458304134, "grad_norm": 0.24330860376358032, "learning_rate": 0.0002458263573307821, "loss": 0.0921, "step": 4280 }, { "epoch": 0.6, "grad_norm": 0.4356054663658142, "learning_rate": 0.0002458120066969624, "loss": 0.2351, "step": 4281 }, { "epoch": 0.6001401541695865, "grad_norm": 0.4803878664970398, "learning_rate": 0.0002457976560631428, "loss": 0.1899, "step": 4282 }, { "epoch": 0.6002803083391731, "grad_norm": 0.5061917901039124, "learning_rate": 0.0002457833054293231, "loss": 0.0495, "step": 4283 }, { "epoch": 0.6004204625087597, "grad_norm": 0.4666076898574829, "learning_rate": 0.00024576895479550345, "loss": 0.1593, "step": 4284 }, { "epoch": 0.6005606166783461, "grad_norm": 0.8980773091316223, "learning_rate": 0.0002457546041616838, "loss": 0.2015, "step": 4285 }, { "epoch": 0.6007007708479327, "grad_norm": 0.5777843594551086, "learning_rate": 0.0002457402535278641, "loss": 0.0676, "step": 4286 }, { "epoch": 0.6008409250175193, "grad_norm": 0.993579626083374, "learning_rate": 0.00024572590289404444, "loss": 0.0721, "step": 4287 }, { "epoch": 0.6009810791871059, "grad_norm": 0.5573502779006958, "learning_rate": 0.0002457115522602248, "loss": 0.1125, "step": 4288 }, { "epoch": 0.6011212333566923, "grad_norm": 0.2585557699203491, "learning_rate": 0.00024569720162640515, "loss": 0.0643, "step": 4289 }, { "epoch": 0.6012613875262789, "grad_norm": 0.46761780977249146, "learning_rate": 0.0002456828509925855, "loss": 0.0649, "step": 4290 }, { "epoch": 0.6014015416958655, "grad_norm": 0.7123151421546936, "learning_rate": 0.0002456685003587658, "loss": 0.1156, "step": 4291 }, { "epoch": 0.601541695865452, "grad_norm": 0.6322957277297974, "learning_rate": 0.0002456541497249462, "loss": 0.085, "step": 4292 }, { "epoch": 0.6016818500350385, "grad_norm": 0.7120218873023987, "learning_rate": 0.0002456397990911265, "loss": 0.1058, "step": 4293 }, { "epoch": 0.6018220042046251, "grad_norm": 0.6270059943199158, "learning_rate": 0.00024562544845730685, "loss": 0.1428, "step": 4294 }, { "epoch": 0.6019621583742116, "grad_norm": 0.2194758653640747, "learning_rate": 0.0002456110978234872, "loss": 0.041, "step": 4295 }, { "epoch": 0.6021023125437982, "grad_norm": 0.5218814015388489, "learning_rate": 0.0002455967471896675, "loss": 0.066, "step": 4296 }, { "epoch": 0.6022424667133848, "grad_norm": 0.644420862197876, "learning_rate": 0.00024558239655584783, "loss": 0.0599, "step": 4297 }, { "epoch": 0.6023826208829712, "grad_norm": 0.9728002548217773, "learning_rate": 0.0002455680459220282, "loss": 0.0512, "step": 4298 }, { "epoch": 0.6025227750525578, "grad_norm": 1.218588948249817, "learning_rate": 0.00024555369528820854, "loss": 0.1672, "step": 4299 }, { "epoch": 0.6026629292221444, "grad_norm": 1.6218665838241577, "learning_rate": 0.00024553934465438887, "loss": 0.1723, "step": 4300 }, { "epoch": 0.6028030833917309, "grad_norm": 0.27974990010261536, "learning_rate": 0.00024552499402056925, "loss": 0.0727, "step": 4301 }, { "epoch": 0.6029432375613174, "grad_norm": 0.3434129059314728, "learning_rate": 0.0002455106433867496, "loss": 0.1364, "step": 4302 }, { "epoch": 0.603083391730904, "grad_norm": 0.2227613776922226, "learning_rate": 0.0002454962927529299, "loss": 0.0634, "step": 4303 }, { "epoch": 0.6032235459004905, "grad_norm": 0.3121270537376404, "learning_rate": 0.00024548194211911024, "loss": 0.0351, "step": 4304 }, { "epoch": 0.6033637000700771, "grad_norm": 0.21233882009983063, "learning_rate": 0.00024546759148529057, "loss": 0.0408, "step": 4305 }, { "epoch": 0.6035038542396637, "grad_norm": 0.3764982521533966, "learning_rate": 0.0002454532408514709, "loss": 0.0698, "step": 4306 }, { "epoch": 0.6036440084092501, "grad_norm": 0.7671663761138916, "learning_rate": 0.0002454388902176513, "loss": 0.0777, "step": 4307 }, { "epoch": 0.6037841625788367, "grad_norm": 0.3349737823009491, "learning_rate": 0.0002454245395838316, "loss": 0.1156, "step": 4308 }, { "epoch": 0.6039243167484233, "grad_norm": 0.49075812101364136, "learning_rate": 0.00024541018895001194, "loss": 0.0966, "step": 4309 }, { "epoch": 0.6040644709180099, "grad_norm": 0.7330748438835144, "learning_rate": 0.00024539583831619227, "loss": 0.1238, "step": 4310 }, { "epoch": 0.6042046250875963, "grad_norm": 0.2991892099380493, "learning_rate": 0.00024538148768237265, "loss": 0.0803, "step": 4311 }, { "epoch": 0.6043447792571829, "grad_norm": 0.7433737516403198, "learning_rate": 0.000245367137048553, "loss": 0.0734, "step": 4312 }, { "epoch": 0.6044849334267695, "grad_norm": 0.495500385761261, "learning_rate": 0.0002453527864147333, "loss": 0.0713, "step": 4313 }, { "epoch": 0.604625087596356, "grad_norm": 0.4748336970806122, "learning_rate": 0.00024533843578091363, "loss": 0.1337, "step": 4314 }, { "epoch": 0.6047652417659425, "grad_norm": 0.46583452820777893, "learning_rate": 0.00024532408514709396, "loss": 0.0955, "step": 4315 }, { "epoch": 0.604905395935529, "grad_norm": 0.4464094042778015, "learning_rate": 0.0002453097345132743, "loss": 0.0996, "step": 4316 }, { "epoch": 0.6050455501051156, "grad_norm": 0.4841650426387787, "learning_rate": 0.0002452953838794547, "loss": 0.1073, "step": 4317 }, { "epoch": 0.6051857042747022, "grad_norm": 0.8044472932815552, "learning_rate": 0.000245281033245635, "loss": 0.0814, "step": 4318 }, { "epoch": 0.6053258584442888, "grad_norm": 0.7630645632743835, "learning_rate": 0.00024526668261181533, "loss": 0.154, "step": 4319 }, { "epoch": 0.6054660126138752, "grad_norm": 0.4888552725315094, "learning_rate": 0.00024525233197799566, "loss": 0.087, "step": 4320 }, { "epoch": 0.6056061667834618, "grad_norm": 0.44488105177879333, "learning_rate": 0.000245237981344176, "loss": 0.0751, "step": 4321 }, { "epoch": 0.6057463209530484, "grad_norm": 0.7497250437736511, "learning_rate": 0.00024522363071035637, "loss": 0.1415, "step": 4322 }, { "epoch": 0.6058864751226349, "grad_norm": 0.47417131066322327, "learning_rate": 0.0002452092800765367, "loss": 0.0748, "step": 4323 }, { "epoch": 0.6060266292922214, "grad_norm": 0.269990473985672, "learning_rate": 0.00024519492944271703, "loss": 0.0704, "step": 4324 }, { "epoch": 0.606166783461808, "grad_norm": 0.2408597618341446, "learning_rate": 0.00024518057880889736, "loss": 0.0674, "step": 4325 }, { "epoch": 0.6063069376313945, "grad_norm": 0.5705625414848328, "learning_rate": 0.0002451662281750777, "loss": 0.1137, "step": 4326 }, { "epoch": 0.6064470918009811, "grad_norm": 0.2860572040081024, "learning_rate": 0.00024515187754125807, "loss": 0.0821, "step": 4327 }, { "epoch": 0.6065872459705677, "grad_norm": 0.9140095114707947, "learning_rate": 0.0002451375269074384, "loss": 0.1939, "step": 4328 }, { "epoch": 0.6067274001401541, "grad_norm": 0.32056695222854614, "learning_rate": 0.0002451231762736187, "loss": 0.0553, "step": 4329 }, { "epoch": 0.6068675543097407, "grad_norm": 0.38588041067123413, "learning_rate": 0.00024510882563979905, "loss": 0.1023, "step": 4330 }, { "epoch": 0.6070077084793273, "grad_norm": 0.4503379762172699, "learning_rate": 0.0002450944750059794, "loss": 0.0866, "step": 4331 }, { "epoch": 0.6071478626489138, "grad_norm": 0.48218831419944763, "learning_rate": 0.0002450801243721597, "loss": 0.075, "step": 4332 }, { "epoch": 0.6072880168185003, "grad_norm": 0.7645440697669983, "learning_rate": 0.0002450657737383401, "loss": 0.1228, "step": 4333 }, { "epoch": 0.6074281709880869, "grad_norm": 0.28834107518196106, "learning_rate": 0.0002450514231045204, "loss": 0.0961, "step": 4334 }, { "epoch": 0.6075683251576735, "grad_norm": 0.8012709617614746, "learning_rate": 0.00024503707247070075, "loss": 0.1517, "step": 4335 }, { "epoch": 0.60770847932726, "grad_norm": 0.3561074435710907, "learning_rate": 0.00024502272183688113, "loss": 0.08, "step": 4336 }, { "epoch": 0.6078486334968465, "grad_norm": 0.6639081239700317, "learning_rate": 0.00024500837120306146, "loss": 0.0664, "step": 4337 }, { "epoch": 0.607988787666433, "grad_norm": 0.7292010188102722, "learning_rate": 0.0002449940205692418, "loss": 0.0639, "step": 4338 }, { "epoch": 0.6081289418360196, "grad_norm": 0.5321303606033325, "learning_rate": 0.0002449796699354221, "loss": 0.1339, "step": 4339 }, { "epoch": 0.6082690960056062, "grad_norm": 1.0508623123168945, "learning_rate": 0.00024496531930160245, "loss": 0.1713, "step": 4340 }, { "epoch": 0.6084092501751928, "grad_norm": 0.8318646550178528, "learning_rate": 0.0002449509686677828, "loss": 0.0727, "step": 4341 }, { "epoch": 0.6085494043447792, "grad_norm": 0.3840405344963074, "learning_rate": 0.00024493661803396316, "loss": 0.0468, "step": 4342 }, { "epoch": 0.6086895585143658, "grad_norm": 0.3322233557701111, "learning_rate": 0.0002449222674001435, "loss": 0.0427, "step": 4343 }, { "epoch": 0.6088297126839524, "grad_norm": 0.5010483264923096, "learning_rate": 0.0002449079167663238, "loss": 0.096, "step": 4344 }, { "epoch": 0.6089698668535389, "grad_norm": 1.0068780183792114, "learning_rate": 0.00024489356613250415, "loss": 0.0412, "step": 4345 }, { "epoch": 0.6091100210231254, "grad_norm": 0.8120301365852356, "learning_rate": 0.00024487921549868453, "loss": 0.0981, "step": 4346 }, { "epoch": 0.609250175192712, "grad_norm": 0.9413319826126099, "learning_rate": 0.00024486486486486486, "loss": 0.058, "step": 4347 }, { "epoch": 0.6093903293622985, "grad_norm": 2.320493221282959, "learning_rate": 0.0002448505142310452, "loss": 0.0763, "step": 4348 }, { "epoch": 0.6095304835318851, "grad_norm": 1.0113309621810913, "learning_rate": 0.0002448361635972255, "loss": 0.2882, "step": 4349 }, { "epoch": 0.6096706377014717, "grad_norm": 3.857027292251587, "learning_rate": 0.00024482181296340584, "loss": 0.3451, "step": 4350 }, { "epoch": 0.6098107918710581, "grad_norm": 0.29157987236976624, "learning_rate": 0.00024480746232958617, "loss": 0.0332, "step": 4351 }, { "epoch": 0.6099509460406447, "grad_norm": 0.3212697505950928, "learning_rate": 0.00024479311169576655, "loss": 0.0384, "step": 4352 }, { "epoch": 0.6100911002102313, "grad_norm": 0.25363045930862427, "learning_rate": 0.0002447787610619469, "loss": 0.0756, "step": 4353 }, { "epoch": 0.6102312543798178, "grad_norm": 0.3170822858810425, "learning_rate": 0.0002447644104281272, "loss": 0.0425, "step": 4354 }, { "epoch": 0.6103714085494043, "grad_norm": 0.3066902756690979, "learning_rate": 0.0002447500597943076, "loss": 0.0878, "step": 4355 }, { "epoch": 0.6105115627189909, "grad_norm": 0.3292471468448639, "learning_rate": 0.0002447357091604879, "loss": 0.081, "step": 4356 }, { "epoch": 0.6106517168885774, "grad_norm": 0.4762860834598541, "learning_rate": 0.00024472135852666825, "loss": 0.1618, "step": 4357 }, { "epoch": 0.610791871058164, "grad_norm": 0.35840871930122375, "learning_rate": 0.0002447070078928486, "loss": 0.0835, "step": 4358 }, { "epoch": 0.6109320252277505, "grad_norm": 0.7974569201469421, "learning_rate": 0.0002446926572590289, "loss": 0.0971, "step": 4359 }, { "epoch": 0.611072179397337, "grad_norm": 0.1929108202457428, "learning_rate": 0.00024467830662520924, "loss": 0.0505, "step": 4360 }, { "epoch": 0.6112123335669236, "grad_norm": 0.7492509484291077, "learning_rate": 0.00024466395599138956, "loss": 0.0611, "step": 4361 }, { "epoch": 0.6113524877365102, "grad_norm": 0.3383336663246155, "learning_rate": 0.00024464960535756995, "loss": 0.0606, "step": 4362 }, { "epoch": 0.6114926419060968, "grad_norm": 0.3851757049560547, "learning_rate": 0.0002446352547237503, "loss": 0.0903, "step": 4363 }, { "epoch": 0.6116327960756832, "grad_norm": 0.797488272190094, "learning_rate": 0.0002446209040899306, "loss": 0.1297, "step": 4364 }, { "epoch": 0.6117729502452698, "grad_norm": 0.487002968788147, "learning_rate": 0.000244606553456111, "loss": 0.0707, "step": 4365 }, { "epoch": 0.6119131044148564, "grad_norm": 0.39489510655403137, "learning_rate": 0.0002445922028222913, "loss": 0.0665, "step": 4366 }, { "epoch": 0.6120532585844429, "grad_norm": 0.49685123562812805, "learning_rate": 0.00024457785218847164, "loss": 0.1061, "step": 4367 }, { "epoch": 0.6121934127540294, "grad_norm": 0.3775698244571686, "learning_rate": 0.000244563501554652, "loss": 0.1033, "step": 4368 }, { "epoch": 0.612333566923616, "grad_norm": 0.29300031065940857, "learning_rate": 0.0002445491509208323, "loss": 0.0468, "step": 4369 }, { "epoch": 0.6124737210932025, "grad_norm": 0.20870809257030487, "learning_rate": 0.00024453480028701263, "loss": 0.0246, "step": 4370 }, { "epoch": 0.6126138752627891, "grad_norm": 0.6352375745773315, "learning_rate": 0.000244520449653193, "loss": 0.1485, "step": 4371 }, { "epoch": 0.6127540294323757, "grad_norm": 0.28585219383239746, "learning_rate": 0.00024450609901937334, "loss": 0.0896, "step": 4372 }, { "epoch": 0.6128941836019621, "grad_norm": 0.3606799244880676, "learning_rate": 0.00024449174838555367, "loss": 0.0567, "step": 4373 }, { "epoch": 0.6130343377715487, "grad_norm": 0.46540823578834534, "learning_rate": 0.00024447739775173405, "loss": 0.0842, "step": 4374 }, { "epoch": 0.6131744919411353, "grad_norm": 0.7603920102119446, "learning_rate": 0.0002444630471179144, "loss": 0.086, "step": 4375 }, { "epoch": 0.6133146461107218, "grad_norm": 0.36296015977859497, "learning_rate": 0.0002444486964840947, "loss": 0.0768, "step": 4376 }, { "epoch": 0.6134548002803083, "grad_norm": 0.28165048360824585, "learning_rate": 0.00024443434585027504, "loss": 0.0734, "step": 4377 }, { "epoch": 0.6135949544498949, "grad_norm": 0.37743857502937317, "learning_rate": 0.00024441999521645537, "loss": 0.071, "step": 4378 }, { "epoch": 0.6137351086194814, "grad_norm": 0.37869128584861755, "learning_rate": 0.0002444056445826357, "loss": 0.076, "step": 4379 }, { "epoch": 0.613875262789068, "grad_norm": 0.1750558465719223, "learning_rate": 0.000244391293948816, "loss": 0.0248, "step": 4380 }, { "epoch": 0.6140154169586545, "grad_norm": 0.2464558631181717, "learning_rate": 0.0002443769433149964, "loss": 0.0504, "step": 4381 }, { "epoch": 0.614155571128241, "grad_norm": 0.1975264698266983, "learning_rate": 0.00024436259268117674, "loss": 0.0429, "step": 4382 }, { "epoch": 0.6142957252978276, "grad_norm": 0.39335569739341736, "learning_rate": 0.00024434824204735706, "loss": 0.0813, "step": 4383 }, { "epoch": 0.6144358794674142, "grad_norm": 0.3592526316642761, "learning_rate": 0.0002443338914135374, "loss": 0.0644, "step": 4384 }, { "epoch": 0.6145760336370008, "grad_norm": 0.7763746976852417, "learning_rate": 0.0002443195407797177, "loss": 0.1568, "step": 4385 }, { "epoch": 0.6147161878065872, "grad_norm": 1.4262676239013672, "learning_rate": 0.0002443051901458981, "loss": 0.412, "step": 4386 }, { "epoch": 0.6148563419761738, "grad_norm": 0.6510946750640869, "learning_rate": 0.00024429083951207843, "loss": 0.1386, "step": 4387 }, { "epoch": 0.6149964961457604, "grad_norm": 0.3391013443470001, "learning_rate": 0.00024427648887825876, "loss": 0.1077, "step": 4388 }, { "epoch": 0.6151366503153469, "grad_norm": 0.827523410320282, "learning_rate": 0.0002442621382444391, "loss": 0.1334, "step": 4389 }, { "epoch": 0.6152768044849334, "grad_norm": 0.3507126569747925, "learning_rate": 0.00024424778761061947, "loss": 0.1238, "step": 4390 }, { "epoch": 0.61541695865452, "grad_norm": 0.8287339210510254, "learning_rate": 0.0002442334369767998, "loss": 0.1035, "step": 4391 }, { "epoch": 0.6155571128241065, "grad_norm": 0.7334623336791992, "learning_rate": 0.00024421908634298013, "loss": 0.1257, "step": 4392 }, { "epoch": 0.6156972669936931, "grad_norm": 0.8060169816017151, "learning_rate": 0.00024420473570916046, "loss": 0.0632, "step": 4393 }, { "epoch": 0.6158374211632796, "grad_norm": 0.49323779344558716, "learning_rate": 0.0002441903850753408, "loss": 0.1475, "step": 4394 }, { "epoch": 0.6159775753328661, "grad_norm": 1.107265830039978, "learning_rate": 0.0002441760344415211, "loss": 0.1472, "step": 4395 }, { "epoch": 0.6161177295024527, "grad_norm": 0.4396919310092926, "learning_rate": 0.00024416168380770144, "loss": 0.0765, "step": 4396 }, { "epoch": 0.6162578836720393, "grad_norm": 0.8279666900634766, "learning_rate": 0.0002441473331738818, "loss": 0.1933, "step": 4397 }, { "epoch": 0.6163980378416258, "grad_norm": 2.0156943798065186, "learning_rate": 0.00024413298254006216, "loss": 0.1694, "step": 4398 }, { "epoch": 0.6165381920112123, "grad_norm": 1.5239709615707397, "learning_rate": 0.00024411863190624248, "loss": 0.3152, "step": 4399 }, { "epoch": 0.6166783461807989, "grad_norm": 2.478585720062256, "learning_rate": 0.00024410428127242284, "loss": 0.3303, "step": 4400 }, { "epoch": 0.6168185003503854, "grad_norm": 0.2714337110519409, "learning_rate": 0.0002440899306386032, "loss": 0.111, "step": 4401 }, { "epoch": 0.616958654519972, "grad_norm": 0.4110240042209625, "learning_rate": 0.00024407558000478352, "loss": 0.1119, "step": 4402 }, { "epoch": 0.6170988086895585, "grad_norm": 0.31719252467155457, "learning_rate": 0.00024406122937096388, "loss": 0.0832, "step": 4403 }, { "epoch": 0.617238962859145, "grad_norm": 0.3928353786468506, "learning_rate": 0.0002440468787371442, "loss": 0.1329, "step": 4404 }, { "epoch": 0.6173791170287316, "grad_norm": 0.46870750188827515, "learning_rate": 0.00024403252810332454, "loss": 0.0995, "step": 4405 }, { "epoch": 0.6175192711983182, "grad_norm": 0.3520171642303467, "learning_rate": 0.0002440181774695049, "loss": 0.1214, "step": 4406 }, { "epoch": 0.6176594253679047, "grad_norm": 0.30453261733055115, "learning_rate": 0.00024400382683568522, "loss": 0.0791, "step": 4407 }, { "epoch": 0.6177995795374912, "grad_norm": 0.25287672877311707, "learning_rate": 0.00024398947620186555, "loss": 0.0835, "step": 4408 }, { "epoch": 0.6179397337070778, "grad_norm": 0.39686262607574463, "learning_rate": 0.0002439751255680459, "loss": 0.1069, "step": 4409 }, { "epoch": 0.6180798878766643, "grad_norm": 0.38980939984321594, "learning_rate": 0.00024396077493422623, "loss": 0.1512, "step": 4410 }, { "epoch": 0.6182200420462509, "grad_norm": 0.11704090237617493, "learning_rate": 0.00024394642430040656, "loss": 0.0191, "step": 4411 }, { "epoch": 0.6183601962158374, "grad_norm": 0.23291423916816711, "learning_rate": 0.00024393207366658694, "loss": 0.0843, "step": 4412 }, { "epoch": 0.618500350385424, "grad_norm": 0.19935207068920135, "learning_rate": 0.00024391772303276727, "loss": 0.0629, "step": 4413 }, { "epoch": 0.6186405045550105, "grad_norm": 0.5691215395927429, "learning_rate": 0.0002439033723989476, "loss": 0.0903, "step": 4414 }, { "epoch": 0.6187806587245971, "grad_norm": 0.22797924280166626, "learning_rate": 0.00024388902176512793, "loss": 0.0646, "step": 4415 }, { "epoch": 0.6189208128941835, "grad_norm": 0.23018260300159454, "learning_rate": 0.00024387467113130829, "loss": 0.0932, "step": 4416 }, { "epoch": 0.6190609670637701, "grad_norm": 0.6204971671104431, "learning_rate": 0.00024386032049748861, "loss": 0.1055, "step": 4417 }, { "epoch": 0.6192011212333567, "grad_norm": 0.4983014464378357, "learning_rate": 0.00024384596986366894, "loss": 0.1327, "step": 4418 }, { "epoch": 0.6193412754029433, "grad_norm": 0.5641348958015442, "learning_rate": 0.0002438316192298493, "loss": 0.1468, "step": 4419 }, { "epoch": 0.6194814295725298, "grad_norm": 0.31819379329681396, "learning_rate": 0.00024381726859602963, "loss": 0.0976, "step": 4420 }, { "epoch": 0.6196215837421163, "grad_norm": 0.36068475246429443, "learning_rate": 0.00024380291796220996, "loss": 0.0673, "step": 4421 }, { "epoch": 0.6197617379117029, "grad_norm": 0.6764531135559082, "learning_rate": 0.00024378856732839034, "loss": 0.1174, "step": 4422 }, { "epoch": 0.6199018920812894, "grad_norm": 0.3103175461292267, "learning_rate": 0.00024377421669457067, "loss": 0.0481, "step": 4423 }, { "epoch": 0.620042046250876, "grad_norm": 0.4179884195327759, "learning_rate": 0.000243759866060751, "loss": 0.2048, "step": 4424 }, { "epoch": 0.6201822004204625, "grad_norm": 0.4337652027606964, "learning_rate": 0.00024374551542693135, "loss": 0.0958, "step": 4425 }, { "epoch": 0.620322354590049, "grad_norm": 0.4864049255847931, "learning_rate": 0.00024373116479311168, "loss": 0.0633, "step": 4426 }, { "epoch": 0.6204625087596356, "grad_norm": 0.5863664150238037, "learning_rate": 0.000243716814159292, "loss": 0.1467, "step": 4427 }, { "epoch": 0.6206026629292222, "grad_norm": 0.2796110510826111, "learning_rate": 0.00024370246352547236, "loss": 0.0379, "step": 4428 }, { "epoch": 0.6207428170988087, "grad_norm": 0.5027040243148804, "learning_rate": 0.0002436881128916527, "loss": 0.0806, "step": 4429 }, { "epoch": 0.6208829712683952, "grad_norm": 0.534468948841095, "learning_rate": 0.00024367376225783302, "loss": 0.1218, "step": 4430 }, { "epoch": 0.6210231254379818, "grad_norm": 0.510786771774292, "learning_rate": 0.00024365941162401335, "loss": 0.184, "step": 4431 }, { "epoch": 0.6211632796075683, "grad_norm": 0.41986384987831116, "learning_rate": 0.0002436450609901937, "loss": 0.0406, "step": 4432 }, { "epoch": 0.6213034337771549, "grad_norm": 0.34594154357910156, "learning_rate": 0.00024363071035637406, "loss": 0.0668, "step": 4433 }, { "epoch": 0.6214435879467414, "grad_norm": 0.3428405523300171, "learning_rate": 0.0002436163597225544, "loss": 0.0431, "step": 4434 }, { "epoch": 0.6215837421163279, "grad_norm": 0.4442746341228485, "learning_rate": 0.00024360200908873475, "loss": 0.0855, "step": 4435 }, { "epoch": 0.6217238962859145, "grad_norm": 0.36158838868141174, "learning_rate": 0.00024358765845491507, "loss": 0.0686, "step": 4436 }, { "epoch": 0.6218640504555011, "grad_norm": 0.41118520498275757, "learning_rate": 0.0002435733078210954, "loss": 0.094, "step": 4437 }, { "epoch": 0.6220042046250875, "grad_norm": 0.7667691707611084, "learning_rate": 0.00024355895718727576, "loss": 0.1374, "step": 4438 }, { "epoch": 0.6221443587946741, "grad_norm": 0.3761586546897888, "learning_rate": 0.0002435446065534561, "loss": 0.0448, "step": 4439 }, { "epoch": 0.6222845129642607, "grad_norm": 0.5398222208023071, "learning_rate": 0.00024353025591963642, "loss": 0.0936, "step": 4440 }, { "epoch": 0.6224246671338473, "grad_norm": 0.4447212517261505, "learning_rate": 0.00024351590528581677, "loss": 0.1139, "step": 4441 }, { "epoch": 0.6225648213034338, "grad_norm": 0.36371704936027527, "learning_rate": 0.0002435015546519971, "loss": 0.0634, "step": 4442 }, { "epoch": 0.6227049754730203, "grad_norm": 0.560509204864502, "learning_rate": 0.00024348720401817743, "loss": 0.0689, "step": 4443 }, { "epoch": 0.6228451296426069, "grad_norm": 0.4575555920600891, "learning_rate": 0.0002434728533843578, "loss": 0.0301, "step": 4444 }, { "epoch": 0.6229852838121934, "grad_norm": 0.3562361001968384, "learning_rate": 0.00024345850275053814, "loss": 0.091, "step": 4445 }, { "epoch": 0.62312543798178, "grad_norm": 1.1659826040267944, "learning_rate": 0.00024344415211671847, "loss": 0.257, "step": 4446 }, { "epoch": 0.6232655921513665, "grad_norm": 0.4810757040977478, "learning_rate": 0.00024342980148289882, "loss": 0.1023, "step": 4447 }, { "epoch": 0.623405746320953, "grad_norm": 2.876710891723633, "learning_rate": 0.00024341545084907915, "loss": 0.0887, "step": 4448 }, { "epoch": 0.6235459004905396, "grad_norm": 0.4492841362953186, "learning_rate": 0.00024340110021525948, "loss": 0.0571, "step": 4449 }, { "epoch": 0.6236860546601262, "grad_norm": 2.8683910369873047, "learning_rate": 0.0002433867495814398, "loss": 0.2872, "step": 4450 }, { "epoch": 0.6238262088297127, "grad_norm": 0.38978391885757446, "learning_rate": 0.00024337239894762017, "loss": 0.0873, "step": 4451 }, { "epoch": 0.6239663629992992, "grad_norm": 0.6746708750724792, "learning_rate": 0.0002433580483138005, "loss": 0.1101, "step": 4452 }, { "epoch": 0.6241065171688858, "grad_norm": 0.8837757110595703, "learning_rate": 0.00024334369767998082, "loss": 0.066, "step": 4453 }, { "epoch": 0.6242466713384723, "grad_norm": 0.6266793608665466, "learning_rate": 0.0002433293470461612, "loss": 0.1132, "step": 4454 }, { "epoch": 0.6243868255080589, "grad_norm": 0.65644371509552, "learning_rate": 0.00024331499641234153, "loss": 0.1313, "step": 4455 }, { "epoch": 0.6245269796776454, "grad_norm": 0.3828071355819702, "learning_rate": 0.00024330064577852186, "loss": 0.0594, "step": 4456 }, { "epoch": 0.6246671338472319, "grad_norm": 0.2612523138523102, "learning_rate": 0.00024328629514470222, "loss": 0.085, "step": 4457 }, { "epoch": 0.6248072880168185, "grad_norm": 0.26661962270736694, "learning_rate": 0.00024327194451088255, "loss": 0.063, "step": 4458 }, { "epoch": 0.6249474421864051, "grad_norm": 0.383114755153656, "learning_rate": 0.00024325759387706288, "loss": 0.0802, "step": 4459 }, { "epoch": 0.6250875963559915, "grad_norm": 0.3588640093803406, "learning_rate": 0.00024324324324324323, "loss": 0.0834, "step": 4460 }, { "epoch": 0.6252277505255781, "grad_norm": 0.14527222514152527, "learning_rate": 0.00024322889260942356, "loss": 0.0262, "step": 4461 }, { "epoch": 0.6253679046951647, "grad_norm": 0.3797816336154938, "learning_rate": 0.0002432145419756039, "loss": 0.0754, "step": 4462 }, { "epoch": 0.6255080588647512, "grad_norm": 0.42258310317993164, "learning_rate": 0.00024320019134178424, "loss": 0.0364, "step": 4463 }, { "epoch": 0.6256482130343378, "grad_norm": 0.5546248555183411, "learning_rate": 0.00024318584070796457, "loss": 0.0668, "step": 4464 }, { "epoch": 0.6257883672039243, "grad_norm": 0.2608524560928345, "learning_rate": 0.00024317149007414493, "loss": 0.0336, "step": 4465 }, { "epoch": 0.6259285213735108, "grad_norm": 0.5189929008483887, "learning_rate": 0.00024315713944032526, "loss": 0.0721, "step": 4466 }, { "epoch": 0.6260686755430974, "grad_norm": 0.40395602583885193, "learning_rate": 0.0002431427888065056, "loss": 0.0432, "step": 4467 }, { "epoch": 0.626208829712684, "grad_norm": 1.1061335802078247, "learning_rate": 0.00024312843817268594, "loss": 0.0513, "step": 4468 }, { "epoch": 0.6263489838822704, "grad_norm": 0.3939414322376251, "learning_rate": 0.00024311408753886627, "loss": 0.0877, "step": 4469 }, { "epoch": 0.626489138051857, "grad_norm": 0.6109133958816528, "learning_rate": 0.00024309973690504662, "loss": 0.1487, "step": 4470 }, { "epoch": 0.6266292922214436, "grad_norm": 1.2329773902893066, "learning_rate": 0.00024308538627122695, "loss": 0.1253, "step": 4471 }, { "epoch": 0.6267694463910302, "grad_norm": 0.4583156704902649, "learning_rate": 0.00024307103563740728, "loss": 0.0603, "step": 4472 }, { "epoch": 0.6269096005606167, "grad_norm": 0.7002905011177063, "learning_rate": 0.00024305668500358764, "loss": 0.3109, "step": 4473 }, { "epoch": 0.6270497547302032, "grad_norm": 0.5552048683166504, "learning_rate": 0.00024304233436976797, "loss": 0.2018, "step": 4474 }, { "epoch": 0.6271899088997898, "grad_norm": 0.5952927470207214, "learning_rate": 0.0002430279837359483, "loss": 0.1302, "step": 4475 }, { "epoch": 0.6273300630693763, "grad_norm": 0.32670772075653076, "learning_rate": 0.00024301363310212868, "loss": 0.0501, "step": 4476 }, { "epoch": 0.6274702172389629, "grad_norm": 0.27814510464668274, "learning_rate": 0.000242999282468309, "loss": 0.0888, "step": 4477 }, { "epoch": 0.6276103714085494, "grad_norm": 0.4076862931251526, "learning_rate": 0.00024298493183448933, "loss": 0.0935, "step": 4478 }, { "epoch": 0.6277505255781359, "grad_norm": 0.3227880597114563, "learning_rate": 0.0002429705812006697, "loss": 0.0814, "step": 4479 }, { "epoch": 0.6278906797477225, "grad_norm": 0.7244734764099121, "learning_rate": 0.00024295623056685002, "loss": 0.145, "step": 4480 }, { "epoch": 0.6280308339173091, "grad_norm": 0.30963683128356934, "learning_rate": 0.00024294187993303035, "loss": 0.059, "step": 4481 }, { "epoch": 0.6281709880868955, "grad_norm": 0.3892687261104584, "learning_rate": 0.0002429275292992107, "loss": 0.0925, "step": 4482 }, { "epoch": 0.6283111422564821, "grad_norm": 0.45980051159858704, "learning_rate": 0.00024291317866539103, "loss": 0.0857, "step": 4483 }, { "epoch": 0.6284512964260687, "grad_norm": 0.22275343537330627, "learning_rate": 0.00024289882803157136, "loss": 0.0815, "step": 4484 }, { "epoch": 0.6285914505956552, "grad_norm": 0.08691888302564621, "learning_rate": 0.0002428844773977517, "loss": 0.0202, "step": 4485 }, { "epoch": 0.6287316047652418, "grad_norm": 0.498346745967865, "learning_rate": 0.00024287012676393207, "loss": 0.1034, "step": 4486 }, { "epoch": 0.6288717589348283, "grad_norm": 0.3046766221523285, "learning_rate": 0.0002428557761301124, "loss": 0.0921, "step": 4487 }, { "epoch": 0.6290119131044148, "grad_norm": 0.3708658516407013, "learning_rate": 0.00024284142549629273, "loss": 0.0756, "step": 4488 }, { "epoch": 0.6291520672740014, "grad_norm": 0.49132239818573, "learning_rate": 0.00024282707486247308, "loss": 0.1751, "step": 4489 }, { "epoch": 0.629292221443588, "grad_norm": 0.37744471430778503, "learning_rate": 0.0002428127242286534, "loss": 0.0941, "step": 4490 }, { "epoch": 0.6294323756131744, "grad_norm": 0.4208647608757019, "learning_rate": 0.00024279837359483374, "loss": 0.0893, "step": 4491 }, { "epoch": 0.629572529782761, "grad_norm": 0.928225040435791, "learning_rate": 0.0002427840229610141, "loss": 0.1669, "step": 4492 }, { "epoch": 0.6297126839523476, "grad_norm": 0.12094986438751221, "learning_rate": 0.00024276967232719443, "loss": 0.0171, "step": 4493 }, { "epoch": 0.6298528381219342, "grad_norm": 0.6277962327003479, "learning_rate": 0.00024275532169337475, "loss": 0.1133, "step": 4494 }, { "epoch": 0.6299929922915207, "grad_norm": 0.4300065338611603, "learning_rate": 0.0002427409710595551, "loss": 0.0739, "step": 4495 }, { "epoch": 0.6301331464611072, "grad_norm": 0.383602112531662, "learning_rate": 0.00024272662042573544, "loss": 0.0612, "step": 4496 }, { "epoch": 0.6302733006306938, "grad_norm": 0.8136420845985413, "learning_rate": 0.0002427122697919158, "loss": 0.4323, "step": 4497 }, { "epoch": 0.6304134548002803, "grad_norm": 2.779512882232666, "learning_rate": 0.00024269791915809615, "loss": 0.2818, "step": 4498 }, { "epoch": 0.6305536089698669, "grad_norm": 1.7227791547775269, "learning_rate": 0.00024268356852427648, "loss": 0.4441, "step": 4499 }, { "epoch": 0.6306937631394534, "grad_norm": 3.11232852935791, "learning_rate": 0.0002426692178904568, "loss": 0.2778, "step": 4500 }, { "epoch": 0.6308339173090399, "grad_norm": 0.5326541662216187, "learning_rate": 0.00024265486725663714, "loss": 0.1259, "step": 4501 }, { "epoch": 0.6309740714786265, "grad_norm": 0.3800836205482483, "learning_rate": 0.0002426405166228175, "loss": 0.1182, "step": 4502 }, { "epoch": 0.6311142256482131, "grad_norm": 0.3213178217411041, "learning_rate": 0.00024262616598899782, "loss": 0.0744, "step": 4503 }, { "epoch": 0.6312543798177995, "grad_norm": 0.2859538793563843, "learning_rate": 0.00024261181535517815, "loss": 0.1095, "step": 4504 }, { "epoch": 0.6313945339873861, "grad_norm": 0.3558676242828369, "learning_rate": 0.0002425974647213585, "loss": 0.0637, "step": 4505 }, { "epoch": 0.6315346881569727, "grad_norm": 0.45185384154319763, "learning_rate": 0.00024258311408753883, "loss": 0.058, "step": 4506 }, { "epoch": 0.6316748423265592, "grad_norm": 0.3520965874195099, "learning_rate": 0.00024256876345371916, "loss": 0.0732, "step": 4507 }, { "epoch": 0.6318149964961458, "grad_norm": 0.7532474398612976, "learning_rate": 0.00024255441281989954, "loss": 0.0447, "step": 4508 }, { "epoch": 0.6319551506657323, "grad_norm": 0.25900688767433167, "learning_rate": 0.00024254006218607987, "loss": 0.0613, "step": 4509 }, { "epoch": 0.6320953048353188, "grad_norm": 0.7063947916030884, "learning_rate": 0.0002425257115522602, "loss": 0.067, "step": 4510 }, { "epoch": 0.6322354590049054, "grad_norm": 0.5420425534248352, "learning_rate": 0.00024251136091844056, "loss": 0.1357, "step": 4511 }, { "epoch": 0.632375613174492, "grad_norm": 0.4076794683933258, "learning_rate": 0.00024249701028462089, "loss": 0.0741, "step": 4512 }, { "epoch": 0.6325157673440784, "grad_norm": 0.2640433609485626, "learning_rate": 0.00024248265965080121, "loss": 0.0798, "step": 4513 }, { "epoch": 0.632655921513665, "grad_norm": 0.48513099551200867, "learning_rate": 0.00024246830901698157, "loss": 0.0627, "step": 4514 }, { "epoch": 0.6327960756832516, "grad_norm": 0.4062906503677368, "learning_rate": 0.0002424539583831619, "loss": 0.0986, "step": 4515 }, { "epoch": 0.6329362298528382, "grad_norm": 0.398327499628067, "learning_rate": 0.00024243960774934223, "loss": 0.1177, "step": 4516 }, { "epoch": 0.6330763840224247, "grad_norm": 0.5958709716796875, "learning_rate": 0.0002424252571155226, "loss": 0.1484, "step": 4517 }, { "epoch": 0.6332165381920112, "grad_norm": 0.4138696789741516, "learning_rate": 0.00024241090648170294, "loss": 0.0476, "step": 4518 }, { "epoch": 0.6333566923615978, "grad_norm": 0.3402014672756195, "learning_rate": 0.00024239655584788327, "loss": 0.0467, "step": 4519 }, { "epoch": 0.6334968465311843, "grad_norm": 0.9100338220596313, "learning_rate": 0.0002423822052140636, "loss": 0.0972, "step": 4520 }, { "epoch": 0.6336370007007709, "grad_norm": 0.23704035580158234, "learning_rate": 0.00024236785458024395, "loss": 0.0418, "step": 4521 }, { "epoch": 0.6337771548703574, "grad_norm": 0.8553435802459717, "learning_rate": 0.00024235350394642428, "loss": 0.0573, "step": 4522 }, { "epoch": 0.6339173090399439, "grad_norm": 0.48949527740478516, "learning_rate": 0.0002423391533126046, "loss": 0.0934, "step": 4523 }, { "epoch": 0.6340574632095305, "grad_norm": 1.2994810342788696, "learning_rate": 0.00024232480267878496, "loss": 0.1191, "step": 4524 }, { "epoch": 0.6341976173791171, "grad_norm": 0.16651922464370728, "learning_rate": 0.0002423104520449653, "loss": 0.0198, "step": 4525 }, { "epoch": 0.6343377715487035, "grad_norm": 0.38856351375579834, "learning_rate": 0.00024229610141114562, "loss": 0.0726, "step": 4526 }, { "epoch": 0.6344779257182901, "grad_norm": 0.22474133968353271, "learning_rate": 0.00024228175077732598, "loss": 0.0545, "step": 4527 }, { "epoch": 0.6346180798878767, "grad_norm": 0.6340205669403076, "learning_rate": 0.00024226740014350633, "loss": 0.1177, "step": 4528 }, { "epoch": 0.6347582340574632, "grad_norm": 0.5469197034835815, "learning_rate": 0.00024225304950968666, "loss": 0.1142, "step": 4529 }, { "epoch": 0.6348983882270498, "grad_norm": 0.24882544577121735, "learning_rate": 0.00024223869887586702, "loss": 0.0206, "step": 4530 }, { "epoch": 0.6350385423966363, "grad_norm": 0.4992559552192688, "learning_rate": 0.00024222434824204734, "loss": 0.0444, "step": 4531 }, { "epoch": 0.6351786965662228, "grad_norm": 0.38363006711006165, "learning_rate": 0.00024220999760822767, "loss": 0.1327, "step": 4532 }, { "epoch": 0.6353188507358094, "grad_norm": 0.47136613726615906, "learning_rate": 0.00024219564697440803, "loss": 0.0883, "step": 4533 }, { "epoch": 0.635459004905396, "grad_norm": 0.38705387711524963, "learning_rate": 0.00024218129634058836, "loss": 0.0336, "step": 4534 }, { "epoch": 0.6355991590749824, "grad_norm": 0.44826316833496094, "learning_rate": 0.00024216694570676869, "loss": 0.1161, "step": 4535 }, { "epoch": 0.635739313244569, "grad_norm": 0.8910202383995056, "learning_rate": 0.00024215259507294901, "loss": 0.0834, "step": 4536 }, { "epoch": 0.6358794674141556, "grad_norm": 0.3817906379699707, "learning_rate": 0.00024213824443912937, "loss": 0.0529, "step": 4537 }, { "epoch": 0.6360196215837421, "grad_norm": 0.46518051624298096, "learning_rate": 0.0002421238938053097, "loss": 0.102, "step": 4538 }, { "epoch": 0.6361597757533287, "grad_norm": 0.18469616770744324, "learning_rate": 0.00024210954317149003, "loss": 0.0309, "step": 4539 }, { "epoch": 0.6362999299229152, "grad_norm": 0.6559878587722778, "learning_rate": 0.0002420951925376704, "loss": 0.0748, "step": 4540 }, { "epoch": 0.6364400840925017, "grad_norm": 0.24291583895683289, "learning_rate": 0.00024208084190385074, "loss": 0.0147, "step": 4541 }, { "epoch": 0.6365802382620883, "grad_norm": 0.3506116271018982, "learning_rate": 0.00024206649127003107, "loss": 0.092, "step": 4542 }, { "epoch": 0.6367203924316749, "grad_norm": 0.7008859515190125, "learning_rate": 0.00024205214063621142, "loss": 0.0799, "step": 4543 }, { "epoch": 0.6368605466012613, "grad_norm": 0.36548927426338196, "learning_rate": 0.00024203779000239175, "loss": 0.0581, "step": 4544 }, { "epoch": 0.6370007007708479, "grad_norm": 1.1923069953918457, "learning_rate": 0.00024202343936857208, "loss": 0.2091, "step": 4545 }, { "epoch": 0.6371408549404345, "grad_norm": 0.2648469805717468, "learning_rate": 0.00024200908873475244, "loss": 0.0088, "step": 4546 }, { "epoch": 0.6372810091100211, "grad_norm": 0.5329459309577942, "learning_rate": 0.00024199473810093276, "loss": 0.0616, "step": 4547 }, { "epoch": 0.6374211632796075, "grad_norm": 2.047420024871826, "learning_rate": 0.0002419803874671131, "loss": 0.1895, "step": 4548 }, { "epoch": 0.6375613174491941, "grad_norm": 11.075716018676758, "learning_rate": 0.00024196603683329348, "loss": 0.336, "step": 4549 }, { "epoch": 0.6377014716187807, "grad_norm": 2.1514623165130615, "learning_rate": 0.0002419516861994738, "loss": 0.1376, "step": 4550 }, { "epoch": 0.6378416257883672, "grad_norm": 0.6289103031158447, "learning_rate": 0.00024193733556565413, "loss": 0.0803, "step": 4551 }, { "epoch": 0.6379817799579538, "grad_norm": 0.3676750659942627, "learning_rate": 0.00024192298493183446, "loss": 0.0837, "step": 4552 }, { "epoch": 0.6381219341275403, "grad_norm": 0.46150317788124084, "learning_rate": 0.00024190863429801482, "loss": 0.1248, "step": 4553 }, { "epoch": 0.6382620882971268, "grad_norm": 0.46485984325408936, "learning_rate": 0.00024189428366419515, "loss": 0.0984, "step": 4554 }, { "epoch": 0.6384022424667134, "grad_norm": 0.5445472598075867, "learning_rate": 0.00024187993303037547, "loss": 0.1094, "step": 4555 }, { "epoch": 0.6385423966363, "grad_norm": 0.4286209046840668, "learning_rate": 0.00024186558239655583, "loss": 0.1566, "step": 4556 }, { "epoch": 0.6386825508058864, "grad_norm": 0.45852240920066833, "learning_rate": 0.00024185123176273616, "loss": 0.0801, "step": 4557 }, { "epoch": 0.638822704975473, "grad_norm": 0.27761003375053406, "learning_rate": 0.0002418368811289165, "loss": 0.037, "step": 4558 }, { "epoch": 0.6389628591450596, "grad_norm": 0.23453634977340698, "learning_rate": 0.00024182253049509684, "loss": 0.0557, "step": 4559 }, { "epoch": 0.6391030133146461, "grad_norm": 0.2455267310142517, "learning_rate": 0.0002418081798612772, "loss": 0.0303, "step": 4560 }, { "epoch": 0.6392431674842327, "grad_norm": 0.5576555132865906, "learning_rate": 0.00024179382922745753, "loss": 0.0907, "step": 4561 }, { "epoch": 0.6393833216538192, "grad_norm": 0.36986711621284485, "learning_rate": 0.00024177947859363788, "loss": 0.0467, "step": 4562 }, { "epoch": 0.6395234758234057, "grad_norm": 0.5336852073669434, "learning_rate": 0.0002417651279598182, "loss": 0.0908, "step": 4563 }, { "epoch": 0.6396636299929923, "grad_norm": 0.754698634147644, "learning_rate": 0.00024175077732599854, "loss": 0.1407, "step": 4564 }, { "epoch": 0.6398037841625789, "grad_norm": 0.3857322335243225, "learning_rate": 0.0002417364266921789, "loss": 0.0639, "step": 4565 }, { "epoch": 0.6399439383321653, "grad_norm": 0.8895483613014221, "learning_rate": 0.00024172207605835922, "loss": 0.0936, "step": 4566 }, { "epoch": 0.6400840925017519, "grad_norm": 0.3353564739227295, "learning_rate": 0.00024170772542453955, "loss": 0.1098, "step": 4567 }, { "epoch": 0.6402242466713385, "grad_norm": 0.4150405824184418, "learning_rate": 0.0002416933747907199, "loss": 0.0609, "step": 4568 }, { "epoch": 0.640364400840925, "grad_norm": 0.20065344870090485, "learning_rate": 0.00024167902415690024, "loss": 0.0339, "step": 4569 }, { "epoch": 0.6405045550105115, "grad_norm": 0.3569844365119934, "learning_rate": 0.00024166467352308056, "loss": 0.0917, "step": 4570 }, { "epoch": 0.6406447091800981, "grad_norm": 0.4130513072013855, "learning_rate": 0.0002416503228892609, "loss": 0.1194, "step": 4571 }, { "epoch": 0.6407848633496847, "grad_norm": 0.22559505701065063, "learning_rate": 0.00024163597225544128, "loss": 0.0839, "step": 4572 }, { "epoch": 0.6409250175192712, "grad_norm": 0.18988323211669922, "learning_rate": 0.0002416216216216216, "loss": 0.028, "step": 4573 }, { "epoch": 0.6410651716888578, "grad_norm": 0.49719610810279846, "learning_rate": 0.00024160727098780193, "loss": 0.0615, "step": 4574 }, { "epoch": 0.6412053258584443, "grad_norm": 0.2265058010816574, "learning_rate": 0.0002415929203539823, "loss": 0.0342, "step": 4575 }, { "epoch": 0.6413454800280308, "grad_norm": 0.4475458562374115, "learning_rate": 0.00024157856972016262, "loss": 0.0708, "step": 4576 }, { "epoch": 0.6414856341976174, "grad_norm": 0.2926807403564453, "learning_rate": 0.00024156421908634295, "loss": 0.0429, "step": 4577 }, { "epoch": 0.641625788367204, "grad_norm": 0.16132262349128723, "learning_rate": 0.0002415498684525233, "loss": 0.032, "step": 4578 }, { "epoch": 0.6417659425367904, "grad_norm": 0.5955548286437988, "learning_rate": 0.00024153551781870363, "loss": 0.065, "step": 4579 }, { "epoch": 0.641906096706377, "grad_norm": 0.413908451795578, "learning_rate": 0.00024152116718488396, "loss": 0.0698, "step": 4580 }, { "epoch": 0.6420462508759636, "grad_norm": 0.27634891867637634, "learning_rate": 0.00024150681655106434, "loss": 0.1072, "step": 4581 }, { "epoch": 0.6421864050455501, "grad_norm": 0.4600512385368347, "learning_rate": 0.00024149246591724467, "loss": 0.0664, "step": 4582 }, { "epoch": 0.6423265592151367, "grad_norm": 0.27367252111434937, "learning_rate": 0.000241478115283425, "loss": 0.0354, "step": 4583 }, { "epoch": 0.6424667133847232, "grad_norm": 0.44200485944747925, "learning_rate": 0.00024146376464960535, "loss": 0.0658, "step": 4584 }, { "epoch": 0.6426068675543097, "grad_norm": 0.9172531366348267, "learning_rate": 0.00024144941401578568, "loss": 0.1618, "step": 4585 }, { "epoch": 0.6427470217238963, "grad_norm": 0.4246320128440857, "learning_rate": 0.000241435063381966, "loss": 0.0349, "step": 4586 }, { "epoch": 0.6428871758934829, "grad_norm": 0.7287715077400208, "learning_rate": 0.00024142071274814634, "loss": 0.1912, "step": 4587 }, { "epoch": 0.6430273300630693, "grad_norm": 0.25295770168304443, "learning_rate": 0.0002414063621143267, "loss": 0.0534, "step": 4588 }, { "epoch": 0.6431674842326559, "grad_norm": 0.959119975566864, "learning_rate": 0.00024139201148050702, "loss": 0.1385, "step": 4589 }, { "epoch": 0.6433076384022425, "grad_norm": 0.5495650172233582, "learning_rate": 0.00024137766084668735, "loss": 0.1598, "step": 4590 }, { "epoch": 0.643447792571829, "grad_norm": 0.36835548281669617, "learning_rate": 0.0002413633102128677, "loss": 0.0543, "step": 4591 }, { "epoch": 0.6435879467414155, "grad_norm": 0.6164745092391968, "learning_rate": 0.00024134895957904806, "loss": 0.0457, "step": 4592 }, { "epoch": 0.6437281009110021, "grad_norm": 0.3202774226665497, "learning_rate": 0.0002413346089452284, "loss": 0.049, "step": 4593 }, { "epoch": 0.6438682550805886, "grad_norm": 0.38736215233802795, "learning_rate": 0.00024132025831140875, "loss": 0.2323, "step": 4594 }, { "epoch": 0.6440084092501752, "grad_norm": 0.7943335771560669, "learning_rate": 0.00024130590767758908, "loss": 0.0683, "step": 4595 }, { "epoch": 0.6441485634197618, "grad_norm": 0.36901378631591797, "learning_rate": 0.0002412915570437694, "loss": 0.0815, "step": 4596 }, { "epoch": 0.6442887175893482, "grad_norm": 0.9580649733543396, "learning_rate": 0.00024127720640994976, "loss": 0.2632, "step": 4597 }, { "epoch": 0.6444288717589348, "grad_norm": 0.7454104423522949, "learning_rate": 0.0002412628557761301, "loss": 0.1209, "step": 4598 }, { "epoch": 0.6445690259285214, "grad_norm": 2.1465260982513428, "learning_rate": 0.00024124850514231042, "loss": 0.1245, "step": 4599 }, { "epoch": 0.644709180098108, "grad_norm": 1.2225797176361084, "learning_rate": 0.00024123415450849077, "loss": 0.356, "step": 4600 }, { "epoch": 0.6448493342676944, "grad_norm": 0.29114487767219543, "learning_rate": 0.0002412198038746711, "loss": 0.1, "step": 4601 }, { "epoch": 0.644989488437281, "grad_norm": 0.3677774667739868, "learning_rate": 0.00024120545324085143, "loss": 0.0582, "step": 4602 }, { "epoch": 0.6451296426068676, "grad_norm": 0.22189921140670776, "learning_rate": 0.00024119110260703181, "loss": 0.0756, "step": 4603 }, { "epoch": 0.6452697967764541, "grad_norm": 0.4159783124923706, "learning_rate": 0.00024117675197321214, "loss": 0.1066, "step": 4604 }, { "epoch": 0.6454099509460406, "grad_norm": 0.3475879728794098, "learning_rate": 0.00024116240133939247, "loss": 0.0708, "step": 4605 }, { "epoch": 0.6455501051156272, "grad_norm": 0.2770099341869354, "learning_rate": 0.0002411480507055728, "loss": 0.121, "step": 4606 }, { "epoch": 0.6456902592852137, "grad_norm": 0.9238994121551514, "learning_rate": 0.00024113370007175316, "loss": 0.0834, "step": 4607 }, { "epoch": 0.6458304134548003, "grad_norm": 0.29952025413513184, "learning_rate": 0.00024111934943793348, "loss": 0.1106, "step": 4608 }, { "epoch": 0.6459705676243869, "grad_norm": 0.2555348575115204, "learning_rate": 0.0002411049988041138, "loss": 0.0633, "step": 4609 }, { "epoch": 0.6461107217939733, "grad_norm": 0.42149806022644043, "learning_rate": 0.00024109064817029417, "loss": 0.1139, "step": 4610 }, { "epoch": 0.6462508759635599, "grad_norm": 0.5292149782180786, "learning_rate": 0.0002410762975364745, "loss": 0.0853, "step": 4611 }, { "epoch": 0.6463910301331465, "grad_norm": 0.6884406805038452, "learning_rate": 0.00024106194690265483, "loss": 0.1194, "step": 4612 }, { "epoch": 0.646531184302733, "grad_norm": 0.37258827686309814, "learning_rate": 0.0002410475962688352, "loss": 0.0624, "step": 4613 }, { "epoch": 0.6466713384723195, "grad_norm": 0.8067600727081299, "learning_rate": 0.00024103324563501554, "loss": 0.0473, "step": 4614 }, { "epoch": 0.6468114926419061, "grad_norm": 0.33629342913627625, "learning_rate": 0.00024101889500119587, "loss": 0.1091, "step": 4615 }, { "epoch": 0.6469516468114926, "grad_norm": 0.572068452835083, "learning_rate": 0.00024100454436737622, "loss": 0.1286, "step": 4616 }, { "epoch": 0.6470918009810792, "grad_norm": 0.3866429924964905, "learning_rate": 0.00024099019373355655, "loss": 0.0723, "step": 4617 }, { "epoch": 0.6472319551506658, "grad_norm": 0.23980627954006195, "learning_rate": 0.00024097584309973688, "loss": 0.0529, "step": 4618 }, { "epoch": 0.6473721093202522, "grad_norm": 0.7995784878730774, "learning_rate": 0.00024096149246591723, "loss": 0.16, "step": 4619 }, { "epoch": 0.6475122634898388, "grad_norm": 1.1471000909805298, "learning_rate": 0.00024094714183209756, "loss": 0.1222, "step": 4620 }, { "epoch": 0.6476524176594254, "grad_norm": 0.42206305265426636, "learning_rate": 0.0002409327911982779, "loss": 0.0744, "step": 4621 }, { "epoch": 0.647792571829012, "grad_norm": 0.48790743947029114, "learning_rate": 0.00024091844056445822, "loss": 0.0645, "step": 4622 }, { "epoch": 0.6479327259985984, "grad_norm": 0.3827284574508667, "learning_rate": 0.0002409040899306386, "loss": 0.0614, "step": 4623 }, { "epoch": 0.648072880168185, "grad_norm": 0.5078491568565369, "learning_rate": 0.00024088973929681893, "loss": 0.06, "step": 4624 }, { "epoch": 0.6482130343377716, "grad_norm": 0.13038191199302673, "learning_rate": 0.00024087538866299926, "loss": 0.009, "step": 4625 }, { "epoch": 0.6483531885073581, "grad_norm": 0.3788299560546875, "learning_rate": 0.00024086103802917961, "loss": 0.0476, "step": 4626 }, { "epoch": 0.6484933426769446, "grad_norm": 0.2752673029899597, "learning_rate": 0.00024084668739535994, "loss": 0.0922, "step": 4627 }, { "epoch": 0.6486334968465312, "grad_norm": 0.28419870138168335, "learning_rate": 0.00024083233676154027, "loss": 0.0317, "step": 4628 }, { "epoch": 0.6487736510161177, "grad_norm": 0.7177084684371948, "learning_rate": 0.00024081798612772063, "loss": 0.1128, "step": 4629 }, { "epoch": 0.6489138051857043, "grad_norm": 0.2612219750881195, "learning_rate": 0.00024080363549390096, "loss": 0.0531, "step": 4630 }, { "epoch": 0.6490539593552909, "grad_norm": 0.41845041513442993, "learning_rate": 0.00024078928486008128, "loss": 0.05, "step": 4631 }, { "epoch": 0.6491941135248773, "grad_norm": 0.3815973103046417, "learning_rate": 0.00024077493422626164, "loss": 0.094, "step": 4632 }, { "epoch": 0.6493342676944639, "grad_norm": 0.3043394684791565, "learning_rate": 0.00024076058359244197, "loss": 0.0848, "step": 4633 }, { "epoch": 0.6494744218640505, "grad_norm": 0.3352712094783783, "learning_rate": 0.0002407462329586223, "loss": 0.0247, "step": 4634 }, { "epoch": 0.649614576033637, "grad_norm": 0.7556617856025696, "learning_rate": 0.00024073188232480268, "loss": 0.1307, "step": 4635 }, { "epoch": 0.6497547302032235, "grad_norm": 0.6536099314689636, "learning_rate": 0.000240717531690983, "loss": 0.028, "step": 4636 }, { "epoch": 0.6498948843728101, "grad_norm": 0.49741828441619873, "learning_rate": 0.00024070318105716334, "loss": 0.0838, "step": 4637 }, { "epoch": 0.6500350385423966, "grad_norm": 0.8072445392608643, "learning_rate": 0.0002406888304233437, "loss": 0.0669, "step": 4638 }, { "epoch": 0.6501751927119832, "grad_norm": 0.6338628530502319, "learning_rate": 0.00024067447978952402, "loss": 0.1524, "step": 4639 }, { "epoch": 0.6503153468815698, "grad_norm": 0.24803027510643005, "learning_rate": 0.00024066012915570435, "loss": 0.076, "step": 4640 }, { "epoch": 0.6504555010511562, "grad_norm": 0.2887865900993347, "learning_rate": 0.00024064577852188468, "loss": 0.0583, "step": 4641 }, { "epoch": 0.6505956552207428, "grad_norm": 0.3580440580844879, "learning_rate": 0.00024063142788806503, "loss": 0.0844, "step": 4642 }, { "epoch": 0.6507358093903294, "grad_norm": 1.5213966369628906, "learning_rate": 0.00024061707725424536, "loss": 0.1455, "step": 4643 }, { "epoch": 0.650875963559916, "grad_norm": 0.40449780225753784, "learning_rate": 0.0002406027266204257, "loss": 0.0878, "step": 4644 }, { "epoch": 0.6510161177295024, "grad_norm": 0.5398775339126587, "learning_rate": 0.00024058837598660607, "loss": 0.0664, "step": 4645 }, { "epoch": 0.651156271899089, "grad_norm": 1.061957836151123, "learning_rate": 0.0002405740253527864, "loss": 0.1866, "step": 4646 }, { "epoch": 0.6512964260686755, "grad_norm": 1.5541675090789795, "learning_rate": 0.00024055967471896673, "loss": 0.2731, "step": 4647 }, { "epoch": 0.6514365802382621, "grad_norm": 1.4449336528778076, "learning_rate": 0.0002405453240851471, "loss": 0.2413, "step": 4648 }, { "epoch": 0.6515767344078486, "grad_norm": 1.1806188821792603, "learning_rate": 0.00024053097345132742, "loss": 0.088, "step": 4649 }, { "epoch": 0.6517168885774351, "grad_norm": 1.8111268281936646, "learning_rate": 0.00024051662281750774, "loss": 0.1807, "step": 4650 }, { "epoch": 0.6518570427470217, "grad_norm": 0.18268246948719025, "learning_rate": 0.0002405022721836881, "loss": 0.0353, "step": 4651 }, { "epoch": 0.6519971969166083, "grad_norm": 0.2123505026102066, "learning_rate": 0.00024048792154986843, "loss": 0.042, "step": 4652 }, { "epoch": 0.6521373510861949, "grad_norm": 0.32514774799346924, "learning_rate": 0.00024047357091604876, "loss": 0.0827, "step": 4653 }, { "epoch": 0.6522775052557813, "grad_norm": 0.18817712366580963, "learning_rate": 0.0002404592202822291, "loss": 0.0414, "step": 4654 }, { "epoch": 0.6524176594253679, "grad_norm": 0.4933124780654907, "learning_rate": 0.00024044486964840947, "loss": 0.1173, "step": 4655 }, { "epoch": 0.6525578135949545, "grad_norm": 0.5564436316490173, "learning_rate": 0.0002404305190145898, "loss": 0.0875, "step": 4656 }, { "epoch": 0.652697967764541, "grad_norm": 0.5919475555419922, "learning_rate": 0.00024041616838077013, "loss": 0.1728, "step": 4657 }, { "epoch": 0.6528381219341275, "grad_norm": 0.28961148858070374, "learning_rate": 0.00024040181774695048, "loss": 0.1137, "step": 4658 }, { "epoch": 0.6529782761037141, "grad_norm": 0.36180225014686584, "learning_rate": 0.0002403874671131308, "loss": 0.1185, "step": 4659 }, { "epoch": 0.6531184302733006, "grad_norm": 0.46975764632225037, "learning_rate": 0.00024037311647931114, "loss": 0.0932, "step": 4660 }, { "epoch": 0.6532585844428872, "grad_norm": 0.46723005175590515, "learning_rate": 0.0002403587658454915, "loss": 0.1139, "step": 4661 }, { "epoch": 0.6533987386124738, "grad_norm": 0.36402279138565063, "learning_rate": 0.00024034441521167182, "loss": 0.0828, "step": 4662 }, { "epoch": 0.6535388927820602, "grad_norm": 0.3752870559692383, "learning_rate": 0.00024033006457785215, "loss": 0.0681, "step": 4663 }, { "epoch": 0.6536790469516468, "grad_norm": 0.313428670167923, "learning_rate": 0.0002403157139440325, "loss": 0.0622, "step": 4664 }, { "epoch": 0.6538192011212334, "grad_norm": 0.5038762092590332, "learning_rate": 0.00024030136331021284, "loss": 0.0642, "step": 4665 }, { "epoch": 0.6539593552908199, "grad_norm": 0.26616787910461426, "learning_rate": 0.00024028701267639316, "loss": 0.0495, "step": 4666 }, { "epoch": 0.6540995094604064, "grad_norm": 0.30009329319000244, "learning_rate": 0.00024027266204257355, "loss": 0.04, "step": 4667 }, { "epoch": 0.654239663629993, "grad_norm": 0.4667293131351471, "learning_rate": 0.00024025831140875388, "loss": 0.1864, "step": 4668 }, { "epoch": 0.6543798177995795, "grad_norm": 0.48057129979133606, "learning_rate": 0.0002402439607749342, "loss": 0.1088, "step": 4669 }, { "epoch": 0.6545199719691661, "grad_norm": 0.5876733064651489, "learning_rate": 0.00024022961014111456, "loss": 0.0927, "step": 4670 }, { "epoch": 0.6546601261387526, "grad_norm": 0.6569058895111084, "learning_rate": 0.0002402152595072949, "loss": 0.043, "step": 4671 }, { "epoch": 0.6548002803083391, "grad_norm": 0.7138683795928955, "learning_rate": 0.00024020090887347522, "loss": 0.14, "step": 4672 }, { "epoch": 0.6549404344779257, "grad_norm": 0.5391191244125366, "learning_rate": 0.00024018655823965557, "loss": 0.1094, "step": 4673 }, { "epoch": 0.6550805886475123, "grad_norm": 0.3928048610687256, "learning_rate": 0.0002401722076058359, "loss": 0.0854, "step": 4674 }, { "epoch": 0.6552207428170989, "grad_norm": 0.605776309967041, "learning_rate": 0.00024015785697201623, "loss": 0.1096, "step": 4675 }, { "epoch": 0.6553608969866853, "grad_norm": 0.22983290255069733, "learning_rate": 0.00024014350633819656, "loss": 0.0677, "step": 4676 }, { "epoch": 0.6555010511562719, "grad_norm": 0.2627416253089905, "learning_rate": 0.00024012915570437694, "loss": 0.0863, "step": 4677 }, { "epoch": 0.6556412053258585, "grad_norm": 0.25304800271987915, "learning_rate": 0.00024011480507055727, "loss": 0.0625, "step": 4678 }, { "epoch": 0.655781359495445, "grad_norm": 0.7146767377853394, "learning_rate": 0.0002401004544367376, "loss": 0.1076, "step": 4679 }, { "epoch": 0.6559215136650315, "grad_norm": 0.8054242134094238, "learning_rate": 0.00024008610380291795, "loss": 0.1975, "step": 4680 }, { "epoch": 0.6560616678346181, "grad_norm": 0.3236759305000305, "learning_rate": 0.00024007175316909828, "loss": 0.0466, "step": 4681 }, { "epoch": 0.6562018220042046, "grad_norm": 0.28964611887931824, "learning_rate": 0.0002400574025352786, "loss": 0.0635, "step": 4682 }, { "epoch": 0.6563419761737912, "grad_norm": 0.48297131061553955, "learning_rate": 0.00024004305190145897, "loss": 0.0837, "step": 4683 }, { "epoch": 0.6564821303433778, "grad_norm": 0.16519542038440704, "learning_rate": 0.0002400287012676393, "loss": 0.0458, "step": 4684 }, { "epoch": 0.6566222845129642, "grad_norm": 0.4170892536640167, "learning_rate": 0.00024001435063381962, "loss": 0.0595, "step": 4685 }, { "epoch": 0.6567624386825508, "grad_norm": 0.31840410828590393, "learning_rate": 0.00023999999999999998, "loss": 0.0551, "step": 4686 }, { "epoch": 0.6569025928521374, "grad_norm": 0.4003826677799225, "learning_rate": 0.00023998564936618033, "loss": 0.0468, "step": 4687 }, { "epoch": 0.6570427470217239, "grad_norm": 0.5897322297096252, "learning_rate": 0.00023997129873236066, "loss": 0.1636, "step": 4688 }, { "epoch": 0.6571829011913104, "grad_norm": 0.3908691704273224, "learning_rate": 0.00023995694809854102, "loss": 0.0128, "step": 4689 }, { "epoch": 0.657323055360897, "grad_norm": 0.3724898397922516, "learning_rate": 0.00023994259746472135, "loss": 0.0766, "step": 4690 }, { "epoch": 0.6574632095304835, "grad_norm": 0.40407121181488037, "learning_rate": 0.00023992824683090168, "loss": 0.0506, "step": 4691 }, { "epoch": 0.6576033637000701, "grad_norm": 0.3172832131385803, "learning_rate": 0.000239913896197082, "loss": 0.0763, "step": 4692 }, { "epoch": 0.6577435178696566, "grad_norm": 0.32272204756736755, "learning_rate": 0.00023989954556326236, "loss": 0.043, "step": 4693 }, { "epoch": 0.6578836720392431, "grad_norm": 0.6650473475456238, "learning_rate": 0.0002398851949294427, "loss": 0.1666, "step": 4694 }, { "epoch": 0.6580238262088297, "grad_norm": 0.5213014483451843, "learning_rate": 0.00023987084429562302, "loss": 0.1701, "step": 4695 }, { "epoch": 0.6581639803784163, "grad_norm": 0.49516984820365906, "learning_rate": 0.00023985649366180337, "loss": 0.1059, "step": 4696 }, { "epoch": 0.6583041345480029, "grad_norm": 0.4641109108924866, "learning_rate": 0.0002398421430279837, "loss": 0.0857, "step": 4697 }, { "epoch": 0.6584442887175893, "grad_norm": 4.261027812957764, "learning_rate": 0.00023982779239416403, "loss": 0.1845, "step": 4698 }, { "epoch": 0.6585844428871759, "grad_norm": 0.8818010091781616, "learning_rate": 0.0002398134417603444, "loss": 0.1359, "step": 4699 }, { "epoch": 0.6587245970567625, "grad_norm": 2.034306049346924, "learning_rate": 0.00023979909112652474, "loss": 0.1046, "step": 4700 }, { "epoch": 0.658864751226349, "grad_norm": 0.5487805604934692, "learning_rate": 0.00023978474049270507, "loss": 0.1404, "step": 4701 }, { "epoch": 0.6590049053959355, "grad_norm": 0.8866620659828186, "learning_rate": 0.00023977038985888543, "loss": 0.1678, "step": 4702 }, { "epoch": 0.659145059565522, "grad_norm": 0.3447425365447998, "learning_rate": 0.00023975603922506575, "loss": 0.0572, "step": 4703 }, { "epoch": 0.6592852137351086, "grad_norm": 0.31762513518333435, "learning_rate": 0.00023974168859124608, "loss": 0.0557, "step": 4704 }, { "epoch": 0.6594253679046952, "grad_norm": 0.6727063059806824, "learning_rate": 0.00023972733795742644, "loss": 0.1323, "step": 4705 }, { "epoch": 0.6595655220742818, "grad_norm": 0.323579877614975, "learning_rate": 0.00023971298732360677, "loss": 0.0524, "step": 4706 }, { "epoch": 0.6597056762438682, "grad_norm": 0.3233967423439026, "learning_rate": 0.0002396986366897871, "loss": 0.0605, "step": 4707 }, { "epoch": 0.6598458304134548, "grad_norm": 0.2196207493543625, "learning_rate": 0.00023968428605596748, "loss": 0.0484, "step": 4708 }, { "epoch": 0.6599859845830414, "grad_norm": 0.2329094111919403, "learning_rate": 0.0002396699354221478, "loss": 0.085, "step": 4709 }, { "epoch": 0.6601261387526279, "grad_norm": 0.43465256690979004, "learning_rate": 0.00023965558478832814, "loss": 0.1119, "step": 4710 }, { "epoch": 0.6602662929222144, "grad_norm": 0.2841493785381317, "learning_rate": 0.00023964123415450846, "loss": 0.0525, "step": 4711 }, { "epoch": 0.660406447091801, "grad_norm": 0.47998476028442383, "learning_rate": 0.00023962688352068882, "loss": 0.0859, "step": 4712 }, { "epoch": 0.6605466012613875, "grad_norm": 0.399516761302948, "learning_rate": 0.00023961253288686915, "loss": 0.0758, "step": 4713 }, { "epoch": 0.6606867554309741, "grad_norm": 0.33252906799316406, "learning_rate": 0.00023959818225304948, "loss": 0.0659, "step": 4714 }, { "epoch": 0.6608269096005606, "grad_norm": 0.2603592276573181, "learning_rate": 0.00023958383161922983, "loss": 0.0851, "step": 4715 }, { "epoch": 0.6609670637701471, "grad_norm": 0.23905311524868011, "learning_rate": 0.00023956948098541016, "loss": 0.0539, "step": 4716 }, { "epoch": 0.6611072179397337, "grad_norm": 0.5367918014526367, "learning_rate": 0.0002395551303515905, "loss": 0.0949, "step": 4717 }, { "epoch": 0.6612473721093203, "grad_norm": 0.5777433514595032, "learning_rate": 0.00023954077971777085, "loss": 0.1243, "step": 4718 }, { "epoch": 0.6613875262789068, "grad_norm": 0.5530431866645813, "learning_rate": 0.0002395264290839512, "loss": 0.068, "step": 4719 }, { "epoch": 0.6615276804484933, "grad_norm": 0.33826640248298645, "learning_rate": 0.00023951207845013153, "loss": 0.0496, "step": 4720 }, { "epoch": 0.6616678346180799, "grad_norm": 0.432858943939209, "learning_rate": 0.00023949772781631189, "loss": 0.1, "step": 4721 }, { "epoch": 0.6618079887876664, "grad_norm": 0.34647655487060547, "learning_rate": 0.00023948337718249221, "loss": 0.0985, "step": 4722 }, { "epoch": 0.661948142957253, "grad_norm": 0.48452380299568176, "learning_rate": 0.00023946902654867254, "loss": 0.1269, "step": 4723 }, { "epoch": 0.6620882971268395, "grad_norm": 0.2846594452857971, "learning_rate": 0.0002394546759148529, "loss": 0.0564, "step": 4724 }, { "epoch": 0.662228451296426, "grad_norm": 0.41883227229118347, "learning_rate": 0.00023944032528103323, "loss": 0.0809, "step": 4725 }, { "epoch": 0.6623686054660126, "grad_norm": 0.5125524997711182, "learning_rate": 0.00023942597464721356, "loss": 0.0947, "step": 4726 }, { "epoch": 0.6625087596355992, "grad_norm": 0.6130154132843018, "learning_rate": 0.00023941162401339388, "loss": 0.0821, "step": 4727 }, { "epoch": 0.6626489138051858, "grad_norm": 0.7663873434066772, "learning_rate": 0.00023939727337957424, "loss": 0.1176, "step": 4728 }, { "epoch": 0.6627890679747722, "grad_norm": 0.8150898814201355, "learning_rate": 0.00023938292274575457, "loss": 0.0406, "step": 4729 }, { "epoch": 0.6629292221443588, "grad_norm": 0.34998074173927307, "learning_rate": 0.0002393685721119349, "loss": 0.0529, "step": 4730 }, { "epoch": 0.6630693763139454, "grad_norm": 0.28779566287994385, "learning_rate": 0.00023935422147811528, "loss": 0.0688, "step": 4731 }, { "epoch": 0.6632095304835319, "grad_norm": 0.5959911942481995, "learning_rate": 0.0002393398708442956, "loss": 0.0879, "step": 4732 }, { "epoch": 0.6633496846531184, "grad_norm": 0.47114622592926025, "learning_rate": 0.00023932552021047594, "loss": 0.0975, "step": 4733 }, { "epoch": 0.663489838822705, "grad_norm": 0.45995211601257324, "learning_rate": 0.0002393111695766563, "loss": 0.1008, "step": 4734 }, { "epoch": 0.6636299929922915, "grad_norm": 0.604130744934082, "learning_rate": 0.00023929681894283662, "loss": 0.108, "step": 4735 }, { "epoch": 0.6637701471618781, "grad_norm": 0.5945006012916565, "learning_rate": 0.00023928246830901695, "loss": 0.1285, "step": 4736 }, { "epoch": 0.6639103013314646, "grad_norm": 0.43148139119148254, "learning_rate": 0.0002392681176751973, "loss": 0.1201, "step": 4737 }, { "epoch": 0.6640504555010511, "grad_norm": 0.4388241767883301, "learning_rate": 0.00023925376704137763, "loss": 0.0963, "step": 4738 }, { "epoch": 0.6641906096706377, "grad_norm": 0.7865726351737976, "learning_rate": 0.00023923941640755796, "loss": 0.0858, "step": 4739 }, { "epoch": 0.6643307638402243, "grad_norm": 0.3987134099006653, "learning_rate": 0.00023922506577373834, "loss": 0.1214, "step": 4740 }, { "epoch": 0.6644709180098108, "grad_norm": 0.3827250897884369, "learning_rate": 0.00023921071513991867, "loss": 0.06, "step": 4741 }, { "epoch": 0.6646110721793973, "grad_norm": 0.554088830947876, "learning_rate": 0.000239196364506099, "loss": 0.1411, "step": 4742 }, { "epoch": 0.6647512263489839, "grad_norm": 0.623368501663208, "learning_rate": 0.00023918201387227936, "loss": 0.1916, "step": 4743 }, { "epoch": 0.6648913805185704, "grad_norm": 0.5339826941490173, "learning_rate": 0.00023916766323845969, "loss": 0.0997, "step": 4744 }, { "epoch": 0.665031534688157, "grad_norm": 1.4671242237091064, "learning_rate": 0.00023915331260464001, "loss": 0.1852, "step": 4745 }, { "epoch": 0.6651716888577435, "grad_norm": 1.4832878112792969, "learning_rate": 0.00023913896197082034, "loss": 0.1548, "step": 4746 }, { "epoch": 0.66531184302733, "grad_norm": 0.9231269955635071, "learning_rate": 0.0002391246113370007, "loss": 0.1329, "step": 4747 }, { "epoch": 0.6654519971969166, "grad_norm": 0.6684432029724121, "learning_rate": 0.00023911026070318103, "loss": 0.1273, "step": 4748 }, { "epoch": 0.6655921513665032, "grad_norm": 0.8273610472679138, "learning_rate": 0.00023909591006936136, "loss": 0.1433, "step": 4749 }, { "epoch": 0.6657323055360898, "grad_norm": 0.9958446621894836, "learning_rate": 0.00023908155943554174, "loss": 0.2772, "step": 4750 }, { "epoch": 0.6658724597056762, "grad_norm": 0.2235517054796219, "learning_rate": 0.00023906720880172207, "loss": 0.0532, "step": 4751 }, { "epoch": 0.6660126138752628, "grad_norm": 0.40702909231185913, "learning_rate": 0.0002390528581679024, "loss": 0.1269, "step": 4752 }, { "epoch": 0.6661527680448494, "grad_norm": 0.25221917033195496, "learning_rate": 0.00023903850753408275, "loss": 0.0645, "step": 4753 }, { "epoch": 0.6662929222144359, "grad_norm": 0.36963382363319397, "learning_rate": 0.00023902415690026308, "loss": 0.0915, "step": 4754 }, { "epoch": 0.6664330763840224, "grad_norm": 0.3213574290275574, "learning_rate": 0.0002390098062664434, "loss": 0.1093, "step": 4755 }, { "epoch": 0.666573230553609, "grad_norm": 0.6365625858306885, "learning_rate": 0.00023899545563262376, "loss": 0.1657, "step": 4756 }, { "epoch": 0.6667133847231955, "grad_norm": 0.2171628326177597, "learning_rate": 0.0002389811049988041, "loss": 0.0622, "step": 4757 }, { "epoch": 0.6668535388927821, "grad_norm": 0.6924606561660767, "learning_rate": 0.00023896675436498442, "loss": 0.0916, "step": 4758 }, { "epoch": 0.6669936930623686, "grad_norm": 0.3006793260574341, "learning_rate": 0.00023895240373116478, "loss": 0.0835, "step": 4759 }, { "epoch": 0.6671338472319551, "grad_norm": 0.2549748420715332, "learning_rate": 0.0002389380530973451, "loss": 0.0573, "step": 4760 }, { "epoch": 0.6672740014015417, "grad_norm": 0.35190248489379883, "learning_rate": 0.00023892370246352543, "loss": 0.0804, "step": 4761 }, { "epoch": 0.6674141555711283, "grad_norm": 0.39118215441703796, "learning_rate": 0.00023890935182970576, "loss": 0.0443, "step": 4762 }, { "epoch": 0.6675543097407148, "grad_norm": 0.4683927595615387, "learning_rate": 0.00023889500119588615, "loss": 0.0437, "step": 4763 }, { "epoch": 0.6676944639103013, "grad_norm": 0.7677825689315796, "learning_rate": 0.00023888065056206647, "loss": 0.1322, "step": 4764 }, { "epoch": 0.6678346180798879, "grad_norm": 0.40390464663505554, "learning_rate": 0.0002388662999282468, "loss": 0.0959, "step": 4765 }, { "epoch": 0.6679747722494744, "grad_norm": 0.6339204907417297, "learning_rate": 0.00023885194929442716, "loss": 0.0531, "step": 4766 }, { "epoch": 0.668114926419061, "grad_norm": 0.5264211893081665, "learning_rate": 0.0002388375986606075, "loss": 0.1239, "step": 4767 }, { "epoch": 0.6682550805886475, "grad_norm": 0.4180755615234375, "learning_rate": 0.00023882324802678782, "loss": 0.0635, "step": 4768 }, { "epoch": 0.668395234758234, "grad_norm": 1.2219139337539673, "learning_rate": 0.00023880889739296817, "loss": 0.1783, "step": 4769 }, { "epoch": 0.6685353889278206, "grad_norm": 0.42783322930336, "learning_rate": 0.0002387945467591485, "loss": 0.1009, "step": 4770 }, { "epoch": 0.6686755430974072, "grad_norm": 0.3901127278804779, "learning_rate": 0.00023878019612532883, "loss": 0.0391, "step": 4771 }, { "epoch": 0.6688156972669937, "grad_norm": 0.30918020009994507, "learning_rate": 0.0002387658454915092, "loss": 0.1143, "step": 4772 }, { "epoch": 0.6689558514365802, "grad_norm": 0.4193136394023895, "learning_rate": 0.00023875149485768954, "loss": 0.0811, "step": 4773 }, { "epoch": 0.6690960056061668, "grad_norm": 0.2928798496723175, "learning_rate": 0.00023873714422386987, "loss": 0.0325, "step": 4774 }, { "epoch": 0.6692361597757533, "grad_norm": 0.5497828125953674, "learning_rate": 0.00023872279359005022, "loss": 0.0795, "step": 4775 }, { "epoch": 0.6693763139453399, "grad_norm": 0.3676680326461792, "learning_rate": 0.00023870844295623055, "loss": 0.0625, "step": 4776 }, { "epoch": 0.6695164681149264, "grad_norm": 0.31779536604881287, "learning_rate": 0.00023869409232241088, "loss": 0.0331, "step": 4777 }, { "epoch": 0.669656622284513, "grad_norm": 1.0345954895019531, "learning_rate": 0.00023867974168859124, "loss": 0.1395, "step": 4778 }, { "epoch": 0.6697967764540995, "grad_norm": 0.6910383701324463, "learning_rate": 0.00023866539105477157, "loss": 0.0505, "step": 4779 }, { "epoch": 0.6699369306236861, "grad_norm": 0.38107091188430786, "learning_rate": 0.0002386510404209519, "loss": 0.086, "step": 4780 }, { "epoch": 0.6700770847932725, "grad_norm": 0.4565012454986572, "learning_rate": 0.00023863668978713222, "loss": 0.1188, "step": 4781 }, { "epoch": 0.6702172389628591, "grad_norm": 0.40541872382164, "learning_rate": 0.0002386223391533126, "loss": 0.1722, "step": 4782 }, { "epoch": 0.6703573931324457, "grad_norm": 0.42854994535446167, "learning_rate": 0.00023860798851949293, "loss": 0.1306, "step": 4783 }, { "epoch": 0.6704975473020323, "grad_norm": 0.9302937984466553, "learning_rate": 0.00023859363788567326, "loss": 0.1103, "step": 4784 }, { "epoch": 0.6706377014716188, "grad_norm": 0.23054715991020203, "learning_rate": 0.00023857928725185362, "loss": 0.0615, "step": 4785 }, { "epoch": 0.6707778556412053, "grad_norm": 0.56084805727005, "learning_rate": 0.00023856493661803395, "loss": 0.1594, "step": 4786 }, { "epoch": 0.6709180098107919, "grad_norm": 0.5259038209915161, "learning_rate": 0.00023855058598421427, "loss": 0.1421, "step": 4787 }, { "epoch": 0.6710581639803784, "grad_norm": 0.22728697955608368, "learning_rate": 0.00023853623535039463, "loss": 0.0463, "step": 4788 }, { "epoch": 0.671198318149965, "grad_norm": 0.5576725006103516, "learning_rate": 0.00023852188471657496, "loss": 0.1141, "step": 4789 }, { "epoch": 0.6713384723195515, "grad_norm": 0.3839956223964691, "learning_rate": 0.0002385075340827553, "loss": 0.0439, "step": 4790 }, { "epoch": 0.671478626489138, "grad_norm": 1.3236631155014038, "learning_rate": 0.00023849318344893564, "loss": 0.1423, "step": 4791 }, { "epoch": 0.6716187806587246, "grad_norm": 1.4242830276489258, "learning_rate": 0.00023847883281511597, "loss": 0.1033, "step": 4792 }, { "epoch": 0.6717589348283112, "grad_norm": 0.7619696855545044, "learning_rate": 0.0002384644821812963, "loss": 0.0761, "step": 4793 }, { "epoch": 0.6718990889978977, "grad_norm": 1.0667437314987183, "learning_rate": 0.00023845013154747668, "loss": 0.3012, "step": 4794 }, { "epoch": 0.6720392431674842, "grad_norm": 0.4316199719905853, "learning_rate": 0.000238435780913657, "loss": 0.1149, "step": 4795 }, { "epoch": 0.6721793973370708, "grad_norm": 0.636736273765564, "learning_rate": 0.00023842143027983734, "loss": 0.0896, "step": 4796 }, { "epoch": 0.6723195515066573, "grad_norm": 1.3274928331375122, "learning_rate": 0.00023840707964601767, "loss": 0.0921, "step": 4797 }, { "epoch": 0.6724597056762439, "grad_norm": 1.606554388999939, "learning_rate": 0.00023839272901219802, "loss": 0.153, "step": 4798 }, { "epoch": 0.6725998598458304, "grad_norm": 0.6033066511154175, "learning_rate": 0.00023837837837837835, "loss": 0.0965, "step": 4799 }, { "epoch": 0.6727400140154169, "grad_norm": 0.1443236917257309, "learning_rate": 0.00023836402774455868, "loss": 0.0135, "step": 4800 }, { "epoch": 0.6728801681850035, "grad_norm": 0.2583099901676178, "learning_rate": 0.00023834967711073904, "loss": 0.0416, "step": 4801 }, { "epoch": 0.6730203223545901, "grad_norm": 0.43310612440109253, "learning_rate": 0.00023833532647691937, "loss": 0.0387, "step": 4802 }, { "epoch": 0.6731604765241765, "grad_norm": 0.39550283551216125, "learning_rate": 0.0002383209758430997, "loss": 0.0933, "step": 4803 }, { "epoch": 0.6733006306937631, "grad_norm": 0.8012619614601135, "learning_rate": 0.00023830662520928008, "loss": 0.143, "step": 4804 }, { "epoch": 0.6734407848633497, "grad_norm": 0.31968745589256287, "learning_rate": 0.0002382922745754604, "loss": 0.0795, "step": 4805 }, { "epoch": 0.6735809390329363, "grad_norm": 0.3046531677246094, "learning_rate": 0.00023827792394164073, "loss": 0.0901, "step": 4806 }, { "epoch": 0.6737210932025228, "grad_norm": 0.629946768283844, "learning_rate": 0.0002382635733078211, "loss": 0.0539, "step": 4807 }, { "epoch": 0.6738612473721093, "grad_norm": 0.23978684842586517, "learning_rate": 0.00023824922267400142, "loss": 0.0628, "step": 4808 }, { "epoch": 0.6740014015416959, "grad_norm": 0.36997658014297485, "learning_rate": 0.00023823487204018175, "loss": 0.1285, "step": 4809 }, { "epoch": 0.6741415557112824, "grad_norm": 0.21985359489917755, "learning_rate": 0.0002382205214063621, "loss": 0.0638, "step": 4810 }, { "epoch": 0.674281709880869, "grad_norm": 0.305414617061615, "learning_rate": 0.00023820617077254243, "loss": 0.081, "step": 4811 }, { "epoch": 0.6744218640504555, "grad_norm": 0.4753924608230591, "learning_rate": 0.00023819182013872276, "loss": 0.0756, "step": 4812 }, { "epoch": 0.674562018220042, "grad_norm": 0.37053221464157104, "learning_rate": 0.00023817746950490312, "loss": 0.0663, "step": 4813 }, { "epoch": 0.6747021723896286, "grad_norm": 0.5205087065696716, "learning_rate": 0.00023816311887108347, "loss": 0.0875, "step": 4814 }, { "epoch": 0.6748423265592152, "grad_norm": 0.3467317223548889, "learning_rate": 0.0002381487682372638, "loss": 0.0612, "step": 4815 }, { "epoch": 0.6749824807288017, "grad_norm": 0.20218747854232788, "learning_rate": 0.00023813441760344413, "loss": 0.0481, "step": 4816 }, { "epoch": 0.6751226348983882, "grad_norm": 0.14742673933506012, "learning_rate": 0.00023812006696962448, "loss": 0.0291, "step": 4817 }, { "epoch": 0.6752627890679748, "grad_norm": 0.5972278714179993, "learning_rate": 0.0002381057163358048, "loss": 0.0621, "step": 4818 }, { "epoch": 0.6754029432375613, "grad_norm": 0.43682608008384705, "learning_rate": 0.00023809136570198514, "loss": 0.0953, "step": 4819 }, { "epoch": 0.6755430974071479, "grad_norm": 1.4963914155960083, "learning_rate": 0.0002380770150681655, "loss": 0.2019, "step": 4820 }, { "epoch": 0.6756832515767344, "grad_norm": 0.315142959356308, "learning_rate": 0.00023806266443434583, "loss": 0.0256, "step": 4821 }, { "epoch": 0.6758234057463209, "grad_norm": 0.48459675908088684, "learning_rate": 0.00023804831380052615, "loss": 0.0789, "step": 4822 }, { "epoch": 0.6759635599159075, "grad_norm": 1.2829340696334839, "learning_rate": 0.0002380339631667065, "loss": 0.1706, "step": 4823 }, { "epoch": 0.6761037140854941, "grad_norm": 0.4187527000904083, "learning_rate": 0.00023801961253288684, "loss": 0.0577, "step": 4824 }, { "epoch": 0.6762438682550805, "grad_norm": 0.5584620237350464, "learning_rate": 0.00023800526189906717, "loss": 0.1846, "step": 4825 }, { "epoch": 0.6763840224246671, "grad_norm": 0.26539307832717896, "learning_rate": 0.00023799091126524755, "loss": 0.017, "step": 4826 }, { "epoch": 0.6765241765942537, "grad_norm": 0.3480900228023529, "learning_rate": 0.00023797656063142788, "loss": 0.0231, "step": 4827 }, { "epoch": 0.6766643307638402, "grad_norm": 0.6160892844200134, "learning_rate": 0.0002379622099976082, "loss": 0.1144, "step": 4828 }, { "epoch": 0.6768044849334268, "grad_norm": 0.10382203757762909, "learning_rate": 0.00023794785936378856, "loss": 0.02, "step": 4829 }, { "epoch": 0.6769446391030133, "grad_norm": 0.3970741331577301, "learning_rate": 0.0002379335087299689, "loss": 0.0416, "step": 4830 }, { "epoch": 0.6770847932725998, "grad_norm": 0.33715978264808655, "learning_rate": 0.00023791915809614922, "loss": 0.0909, "step": 4831 }, { "epoch": 0.6772249474421864, "grad_norm": 0.292333722114563, "learning_rate": 0.00023790480746232955, "loss": 0.0417, "step": 4832 }, { "epoch": 0.677365101611773, "grad_norm": 0.33026257157325745, "learning_rate": 0.0002378904568285099, "loss": 0.078, "step": 4833 }, { "epoch": 0.6775052557813595, "grad_norm": 0.3665729761123657, "learning_rate": 0.00023787610619469023, "loss": 0.0648, "step": 4834 }, { "epoch": 0.677645409950946, "grad_norm": 0.40561768412590027, "learning_rate": 0.00023786175556087056, "loss": 0.0338, "step": 4835 }, { "epoch": 0.6777855641205326, "grad_norm": 0.6484575867652893, "learning_rate": 0.00023784740492705094, "loss": 0.0898, "step": 4836 }, { "epoch": 0.6779257182901192, "grad_norm": 0.4605390429496765, "learning_rate": 0.00023783305429323127, "loss": 0.0819, "step": 4837 }, { "epoch": 0.6780658724597056, "grad_norm": 0.5616440176963806, "learning_rate": 0.0002378187036594116, "loss": 0.086, "step": 4838 }, { "epoch": 0.6782060266292922, "grad_norm": 0.44194066524505615, "learning_rate": 0.00023780435302559196, "loss": 0.0987, "step": 4839 }, { "epoch": 0.6783461807988788, "grad_norm": 0.7424604892730713, "learning_rate": 0.00023779000239177228, "loss": 0.1066, "step": 4840 }, { "epoch": 0.6784863349684653, "grad_norm": 0.4017472565174103, "learning_rate": 0.0002377756517579526, "loss": 0.0896, "step": 4841 }, { "epoch": 0.6786264891380519, "grad_norm": 0.2393740564584732, "learning_rate": 0.00023776130112413297, "loss": 0.0822, "step": 4842 }, { "epoch": 0.6787666433076384, "grad_norm": 0.26493439078330994, "learning_rate": 0.0002377469504903133, "loss": 0.045, "step": 4843 }, { "epoch": 0.6789067974772249, "grad_norm": 0.82822185754776, "learning_rate": 0.00023773259985649363, "loss": 0.0863, "step": 4844 }, { "epoch": 0.6790469516468115, "grad_norm": 0.40902775526046753, "learning_rate": 0.000237718249222674, "loss": 0.0507, "step": 4845 }, { "epoch": 0.6791871058163981, "grad_norm": 2.1106908321380615, "learning_rate": 0.00023770389858885434, "loss": 0.2549, "step": 4846 }, { "epoch": 0.6793272599859845, "grad_norm": 0.7432109117507935, "learning_rate": 0.00023768954795503467, "loss": 0.0946, "step": 4847 }, { "epoch": 0.6794674141555711, "grad_norm": 0.15034587681293488, "learning_rate": 0.000237675197321215, "loss": 0.0112, "step": 4848 }, { "epoch": 0.6796075683251577, "grad_norm": 0.6746618151664734, "learning_rate": 0.00023766084668739535, "loss": 0.0554, "step": 4849 }, { "epoch": 0.6797477224947442, "grad_norm": 1.48973548412323, "learning_rate": 0.00023764649605357568, "loss": 0.2704, "step": 4850 }, { "epoch": 0.6798878766643308, "grad_norm": 0.29558584094047546, "learning_rate": 0.000237632145419756, "loss": 0.047, "step": 4851 }, { "epoch": 0.6800280308339173, "grad_norm": 0.29801949858665466, "learning_rate": 0.00023761779478593636, "loss": 0.0794, "step": 4852 }, { "epoch": 0.6801681850035038, "grad_norm": 0.33727243542671204, "learning_rate": 0.0002376034441521167, "loss": 0.0582, "step": 4853 }, { "epoch": 0.6803083391730904, "grad_norm": 0.358218252658844, "learning_rate": 0.00023758909351829702, "loss": 0.0495, "step": 4854 }, { "epoch": 0.680448493342677, "grad_norm": 0.7154395580291748, "learning_rate": 0.00023757474288447738, "loss": 0.0896, "step": 4855 }, { "epoch": 0.6805886475122634, "grad_norm": 0.42337000370025635, "learning_rate": 0.0002375603922506577, "loss": 0.0421, "step": 4856 }, { "epoch": 0.68072880168185, "grad_norm": 0.40004295110702515, "learning_rate": 0.00023754604161683803, "loss": 0.1212, "step": 4857 }, { "epoch": 0.6808689558514366, "grad_norm": 0.6850024461746216, "learning_rate": 0.00023753169098301842, "loss": 0.0956, "step": 4858 }, { "epoch": 0.6810091100210232, "grad_norm": 0.5558722019195557, "learning_rate": 0.00023751734034919874, "loss": 0.0788, "step": 4859 }, { "epoch": 0.6811492641906096, "grad_norm": 0.2649074196815491, "learning_rate": 0.00023750298971537907, "loss": 0.0384, "step": 4860 }, { "epoch": 0.6812894183601962, "grad_norm": 0.3291005790233612, "learning_rate": 0.00023748863908155943, "loss": 0.0307, "step": 4861 }, { "epoch": 0.6814295725297828, "grad_norm": 0.33707088232040405, "learning_rate": 0.00023747428844773976, "loss": 0.046, "step": 4862 }, { "epoch": 0.6815697266993693, "grad_norm": 0.5303124189376831, "learning_rate": 0.00023745993781392009, "loss": 0.094, "step": 4863 }, { "epoch": 0.6817098808689559, "grad_norm": 0.6422899961471558, "learning_rate": 0.00023744558718010044, "loss": 0.1121, "step": 4864 }, { "epoch": 0.6818500350385424, "grad_norm": 0.6890474557876587, "learning_rate": 0.00023743123654628077, "loss": 0.1075, "step": 4865 }, { "epoch": 0.6819901892081289, "grad_norm": 0.2981206774711609, "learning_rate": 0.0002374168859124611, "loss": 0.0573, "step": 4866 }, { "epoch": 0.6821303433777155, "grad_norm": 0.37856847047805786, "learning_rate": 0.00023740253527864143, "loss": 0.0746, "step": 4867 }, { "epoch": 0.6822704975473021, "grad_norm": 0.19851186871528625, "learning_rate": 0.0002373881846448218, "loss": 0.0917, "step": 4868 }, { "epoch": 0.6824106517168885, "grad_norm": 0.2003358155488968, "learning_rate": 0.00023737383401100214, "loss": 0.0579, "step": 4869 }, { "epoch": 0.6825508058864751, "grad_norm": 0.44520801305770874, "learning_rate": 0.00023735948337718247, "loss": 0.0659, "step": 4870 }, { "epoch": 0.6826909600560617, "grad_norm": 1.0162887573242188, "learning_rate": 0.00023734513274336282, "loss": 0.0932, "step": 4871 }, { "epoch": 0.6828311142256482, "grad_norm": 0.6490917205810547, "learning_rate": 0.00023733078210954315, "loss": 0.122, "step": 4872 }, { "epoch": 0.6829712683952348, "grad_norm": 0.44667473435401917, "learning_rate": 0.00023731643147572348, "loss": 0.103, "step": 4873 }, { "epoch": 0.6831114225648213, "grad_norm": 0.8786742687225342, "learning_rate": 0.00023730208084190384, "loss": 0.123, "step": 4874 }, { "epoch": 0.6832515767344078, "grad_norm": 0.4250861406326294, "learning_rate": 0.00023728773020808416, "loss": 0.1716, "step": 4875 }, { "epoch": 0.6833917309039944, "grad_norm": 0.5772868394851685, "learning_rate": 0.0002372733795742645, "loss": 0.1228, "step": 4876 }, { "epoch": 0.683531885073581, "grad_norm": 0.4319916367530823, "learning_rate": 0.00023725902894044488, "loss": 0.0863, "step": 4877 }, { "epoch": 0.6836720392431674, "grad_norm": 0.28940045833587646, "learning_rate": 0.0002372446783066252, "loss": 0.0567, "step": 4878 }, { "epoch": 0.683812193412754, "grad_norm": 0.6128058433532715, "learning_rate": 0.00023723032767280553, "loss": 0.0551, "step": 4879 }, { "epoch": 0.6839523475823406, "grad_norm": 0.7274519801139832, "learning_rate": 0.0002372159770389859, "loss": 0.1297, "step": 4880 }, { "epoch": 0.6840925017519272, "grad_norm": 0.23557989299297333, "learning_rate": 0.00023720162640516622, "loss": 0.0243, "step": 4881 }, { "epoch": 0.6842326559215136, "grad_norm": 0.16109292209148407, "learning_rate": 0.00023718727577134655, "loss": 0.0561, "step": 4882 }, { "epoch": 0.6843728100911002, "grad_norm": 0.4620678424835205, "learning_rate": 0.00023717292513752687, "loss": 0.0739, "step": 4883 }, { "epoch": 0.6845129642606868, "grad_norm": 0.72284334897995, "learning_rate": 0.00023715857450370723, "loss": 0.0977, "step": 4884 }, { "epoch": 0.6846531184302733, "grad_norm": 0.6425224542617798, "learning_rate": 0.00023714422386988756, "loss": 0.1987, "step": 4885 }, { "epoch": 0.6847932725998599, "grad_norm": 0.7455645799636841, "learning_rate": 0.0002371298732360679, "loss": 0.1035, "step": 4886 }, { "epoch": 0.6849334267694464, "grad_norm": 0.4513646960258484, "learning_rate": 0.00023711552260224824, "loss": 0.0863, "step": 4887 }, { "epoch": 0.6850735809390329, "grad_norm": 0.24903219938278198, "learning_rate": 0.00023710117196842857, "loss": 0.0683, "step": 4888 }, { "epoch": 0.6852137351086195, "grad_norm": 1.014467477798462, "learning_rate": 0.0002370868213346089, "loss": 0.1482, "step": 4889 }, { "epoch": 0.6853538892782061, "grad_norm": 0.8224611282348633, "learning_rate": 0.00023707247070078928, "loss": 0.0922, "step": 4890 }, { "epoch": 0.6854940434477925, "grad_norm": 1.4471269845962524, "learning_rate": 0.0002370581200669696, "loss": 0.1765, "step": 4891 }, { "epoch": 0.6856341976173791, "grad_norm": 0.2019975632429123, "learning_rate": 0.00023704376943314994, "loss": 0.0409, "step": 4892 }, { "epoch": 0.6857743517869657, "grad_norm": 0.7212958931922913, "learning_rate": 0.0002370294187993303, "loss": 0.0952, "step": 4893 }, { "epoch": 0.6859145059565522, "grad_norm": 0.7052573561668396, "learning_rate": 0.00023701506816551062, "loss": 0.1361, "step": 4894 }, { "epoch": 0.6860546601261388, "grad_norm": 0.8826233148574829, "learning_rate": 0.00023700071753169095, "loss": 0.4416, "step": 4895 }, { "epoch": 0.6861948142957253, "grad_norm": 0.6817396283149719, "learning_rate": 0.0002369863668978713, "loss": 0.0894, "step": 4896 }, { "epoch": 0.6863349684653118, "grad_norm": 2.287858724594116, "learning_rate": 0.00023697201626405164, "loss": 0.1601, "step": 4897 }, { "epoch": 0.6864751226348984, "grad_norm": 0.5211647748947144, "learning_rate": 0.00023695766563023196, "loss": 0.027, "step": 4898 }, { "epoch": 0.686615276804485, "grad_norm": 2.598926067352295, "learning_rate": 0.00023694331499641235, "loss": 0.1217, "step": 4899 }, { "epoch": 0.6867554309740714, "grad_norm": 3.092555284500122, "learning_rate": 0.00023692896436259268, "loss": 0.3096, "step": 4900 }, { "epoch": 0.686895585143658, "grad_norm": 0.3360291123390198, "learning_rate": 0.000236914613728773, "loss": 0.0823, "step": 4901 }, { "epoch": 0.6870357393132446, "grad_norm": 0.47223180532455444, "learning_rate": 0.00023690026309495333, "loss": 0.0685, "step": 4902 }, { "epoch": 0.6871758934828311, "grad_norm": 0.5508928894996643, "learning_rate": 0.0002368859124611337, "loss": 0.0657, "step": 4903 }, { "epoch": 0.6873160476524176, "grad_norm": 0.34525009989738464, "learning_rate": 0.00023687156182731402, "loss": 0.1164, "step": 4904 }, { "epoch": 0.6874562018220042, "grad_norm": 0.4783444106578827, "learning_rate": 0.00023685721119349435, "loss": 0.121, "step": 4905 }, { "epoch": 0.6875963559915907, "grad_norm": 0.22666792571544647, "learning_rate": 0.0002368428605596747, "loss": 0.0461, "step": 4906 }, { "epoch": 0.6877365101611773, "grad_norm": 0.5437456369400024, "learning_rate": 0.00023682850992585503, "loss": 0.1436, "step": 4907 }, { "epoch": 0.6878766643307639, "grad_norm": 0.3524503707885742, "learning_rate": 0.00023681415929203536, "loss": 0.113, "step": 4908 }, { "epoch": 0.6880168185003503, "grad_norm": 0.6922323703765869, "learning_rate": 0.00023679980865821574, "loss": 0.0983, "step": 4909 }, { "epoch": 0.6881569726699369, "grad_norm": 0.3491920232772827, "learning_rate": 0.00023678545802439607, "loss": 0.0875, "step": 4910 }, { "epoch": 0.6882971268395235, "grad_norm": 0.6609526872634888, "learning_rate": 0.0002367711073905764, "loss": 0.0898, "step": 4911 }, { "epoch": 0.6884372810091101, "grad_norm": 0.40448012948036194, "learning_rate": 0.00023675675675675675, "loss": 0.0804, "step": 4912 }, { "epoch": 0.6885774351786965, "grad_norm": 0.7104846835136414, "learning_rate": 0.00023674240612293708, "loss": 0.1181, "step": 4913 }, { "epoch": 0.6887175893482831, "grad_norm": 0.4720240831375122, "learning_rate": 0.0002367280554891174, "loss": 0.096, "step": 4914 }, { "epoch": 0.6888577435178697, "grad_norm": 0.1936282068490982, "learning_rate": 0.00023671370485529777, "loss": 0.0398, "step": 4915 }, { "epoch": 0.6889978976874562, "grad_norm": 0.3752985894680023, "learning_rate": 0.0002366993542214781, "loss": 0.0801, "step": 4916 }, { "epoch": 0.6891380518570428, "grad_norm": 0.4167878329753876, "learning_rate": 0.00023668500358765842, "loss": 0.1084, "step": 4917 }, { "epoch": 0.6892782060266293, "grad_norm": 0.47314736247062683, "learning_rate": 0.00023667065295383875, "loss": 0.0604, "step": 4918 }, { "epoch": 0.6894183601962158, "grad_norm": 0.8354405164718628, "learning_rate": 0.0002366563023200191, "loss": 0.0739, "step": 4919 }, { "epoch": 0.6895585143658024, "grad_norm": 0.5121402144432068, "learning_rate": 0.00023664195168619944, "loss": 0.0541, "step": 4920 }, { "epoch": 0.689698668535389, "grad_norm": 0.4273848235607147, "learning_rate": 0.00023662760105237977, "loss": 0.0501, "step": 4921 }, { "epoch": 0.6898388227049754, "grad_norm": 0.6261603832244873, "learning_rate": 0.00023661325041856015, "loss": 0.1467, "step": 4922 }, { "epoch": 0.689978976874562, "grad_norm": 0.46040821075439453, "learning_rate": 0.00023659889978474048, "loss": 0.0941, "step": 4923 }, { "epoch": 0.6901191310441486, "grad_norm": 0.7623235583305359, "learning_rate": 0.0002365845491509208, "loss": 0.1054, "step": 4924 }, { "epoch": 0.6902592852137351, "grad_norm": 0.2708573043346405, "learning_rate": 0.00023657019851710116, "loss": 0.0632, "step": 4925 }, { "epoch": 0.6903994393833216, "grad_norm": 0.16677004098892212, "learning_rate": 0.0002365558478832815, "loss": 0.0364, "step": 4926 }, { "epoch": 0.6905395935529082, "grad_norm": 0.2472703903913498, "learning_rate": 0.00023654149724946182, "loss": 0.0342, "step": 4927 }, { "epoch": 0.6906797477224947, "grad_norm": 0.16047130525112152, "learning_rate": 0.00023652714661564217, "loss": 0.0274, "step": 4928 }, { "epoch": 0.6908199018920813, "grad_norm": 0.134731262922287, "learning_rate": 0.0002365127959818225, "loss": 0.0236, "step": 4929 }, { "epoch": 0.6909600560616679, "grad_norm": 0.2427656054496765, "learning_rate": 0.00023649844534800283, "loss": 0.076, "step": 4930 }, { "epoch": 0.6911002102312543, "grad_norm": 0.41598305106163025, "learning_rate": 0.00023648409471418321, "loss": 0.11, "step": 4931 }, { "epoch": 0.6912403644008409, "grad_norm": 0.48773691058158875, "learning_rate": 0.00023646974408036354, "loss": 0.1063, "step": 4932 }, { "epoch": 0.6913805185704275, "grad_norm": 0.3838995397090912, "learning_rate": 0.00023645539344654387, "loss": 0.0862, "step": 4933 }, { "epoch": 0.691520672740014, "grad_norm": 0.8167698383331299, "learning_rate": 0.00023644104281272423, "loss": 0.1032, "step": 4934 }, { "epoch": 0.6916608269096005, "grad_norm": 0.32955455780029297, "learning_rate": 0.00023642669217890456, "loss": 0.1011, "step": 4935 }, { "epoch": 0.6918009810791871, "grad_norm": 0.23160520195960999, "learning_rate": 0.00023641234154508488, "loss": 0.051, "step": 4936 }, { "epoch": 0.6919411352487737, "grad_norm": 0.17174088954925537, "learning_rate": 0.0002363979909112652, "loss": 0.0662, "step": 4937 }, { "epoch": 0.6920812894183602, "grad_norm": 0.345893919467926, "learning_rate": 0.00023638364027744557, "loss": 0.0329, "step": 4938 }, { "epoch": 0.6922214435879468, "grad_norm": 0.4581599831581116, "learning_rate": 0.0002363692896436259, "loss": 0.1207, "step": 4939 }, { "epoch": 0.6923615977575333, "grad_norm": 0.6067915558815002, "learning_rate": 0.00023635493900980623, "loss": 0.0954, "step": 4940 }, { "epoch": 0.6925017519271198, "grad_norm": 0.6950394511222839, "learning_rate": 0.0002363405883759866, "loss": 0.083, "step": 4941 }, { "epoch": 0.6926419060967064, "grad_norm": 0.47026780247688293, "learning_rate": 0.00023632623774216694, "loss": 0.073, "step": 4942 }, { "epoch": 0.692782060266293, "grad_norm": 0.25256428122520447, "learning_rate": 0.00023631188710834726, "loss": 0.0817, "step": 4943 }, { "epoch": 0.6929222144358794, "grad_norm": 0.3354937732219696, "learning_rate": 0.00023629753647452762, "loss": 0.0655, "step": 4944 }, { "epoch": 0.693062368605466, "grad_norm": 0.27554529905319214, "learning_rate": 0.00023628318584070795, "loss": 0.0711, "step": 4945 }, { "epoch": 0.6932025227750526, "grad_norm": 0.4474349915981293, "learning_rate": 0.00023626883520688828, "loss": 0.111, "step": 4946 }, { "epoch": 0.6933426769446391, "grad_norm": 0.5785543918609619, "learning_rate": 0.00023625448457306863, "loss": 0.0614, "step": 4947 }, { "epoch": 0.6934828311142256, "grad_norm": 0.6148270964622498, "learning_rate": 0.00023624013393924896, "loss": 0.1167, "step": 4948 }, { "epoch": 0.6936229852838122, "grad_norm": 4.919109344482422, "learning_rate": 0.0002362257833054293, "loss": 0.224, "step": 4949 }, { "epoch": 0.6937631394533987, "grad_norm": 1.4899595975875854, "learning_rate": 0.00023621143267160965, "loss": 0.2789, "step": 4950 }, { "epoch": 0.6939032936229853, "grad_norm": 0.45124882459640503, "learning_rate": 0.00023619708203778997, "loss": 0.1184, "step": 4951 }, { "epoch": 0.6940434477925719, "grad_norm": 0.4265531003475189, "learning_rate": 0.0002361827314039703, "loss": 0.106, "step": 4952 }, { "epoch": 0.6941836019621583, "grad_norm": 0.6305046677589417, "learning_rate": 0.00023616838077015063, "loss": 0.1031, "step": 4953 }, { "epoch": 0.6943237561317449, "grad_norm": 0.32009822130203247, "learning_rate": 0.00023615403013633101, "loss": 0.1061, "step": 4954 }, { "epoch": 0.6944639103013315, "grad_norm": 0.6415496468544006, "learning_rate": 0.00023613967950251134, "loss": 0.0869, "step": 4955 }, { "epoch": 0.694604064470918, "grad_norm": 0.21832266449928284, "learning_rate": 0.00023612532886869167, "loss": 0.0788, "step": 4956 }, { "epoch": 0.6947442186405045, "grad_norm": 0.49858248233795166, "learning_rate": 0.00023611097823487203, "loss": 0.0797, "step": 4957 }, { "epoch": 0.6948843728100911, "grad_norm": 0.32161790132522583, "learning_rate": 0.00023609662760105236, "loss": 0.0563, "step": 4958 }, { "epoch": 0.6950245269796776, "grad_norm": 0.33466893434524536, "learning_rate": 0.00023608227696723268, "loss": 0.1185, "step": 4959 }, { "epoch": 0.6951646811492642, "grad_norm": 0.39162588119506836, "learning_rate": 0.00023606792633341304, "loss": 0.0955, "step": 4960 }, { "epoch": 0.6953048353188508, "grad_norm": 0.3379994332790375, "learning_rate": 0.00023605357569959337, "loss": 0.0731, "step": 4961 }, { "epoch": 0.6954449894884372, "grad_norm": 0.6227350831031799, "learning_rate": 0.0002360392250657737, "loss": 0.1651, "step": 4962 }, { "epoch": 0.6955851436580238, "grad_norm": 0.35392624139785767, "learning_rate": 0.00023602487443195408, "loss": 0.0525, "step": 4963 }, { "epoch": 0.6957252978276104, "grad_norm": 0.4256768226623535, "learning_rate": 0.0002360105237981344, "loss": 0.0613, "step": 4964 }, { "epoch": 0.695865451997197, "grad_norm": 0.7919667363166809, "learning_rate": 0.00023599617316431474, "loss": 0.0527, "step": 4965 }, { "epoch": 0.6960056061667834, "grad_norm": 0.30182307958602905, "learning_rate": 0.0002359818225304951, "loss": 0.104, "step": 4966 }, { "epoch": 0.69614576033637, "grad_norm": 0.40462568402290344, "learning_rate": 0.00023596747189667542, "loss": 0.1069, "step": 4967 }, { "epoch": 0.6962859145059566, "grad_norm": 0.23089641332626343, "learning_rate": 0.00023595312126285575, "loss": 0.0253, "step": 4968 }, { "epoch": 0.6964260686755431, "grad_norm": 0.5238849520683289, "learning_rate": 0.0002359387706290361, "loss": 0.1258, "step": 4969 }, { "epoch": 0.6965662228451296, "grad_norm": 0.37786635756492615, "learning_rate": 0.00023592441999521643, "loss": 0.1281, "step": 4970 }, { "epoch": 0.6967063770147162, "grad_norm": 0.36394861340522766, "learning_rate": 0.00023591006936139676, "loss": 0.0422, "step": 4971 }, { "epoch": 0.6968465311843027, "grad_norm": 0.4449184536933899, "learning_rate": 0.0002358957187275771, "loss": 0.0794, "step": 4972 }, { "epoch": 0.6969866853538893, "grad_norm": 0.20255087316036224, "learning_rate": 0.00023588136809375747, "loss": 0.0457, "step": 4973 }, { "epoch": 0.6971268395234759, "grad_norm": 0.36113888025283813, "learning_rate": 0.0002358670174599378, "loss": 0.0499, "step": 4974 }, { "epoch": 0.6972669936930623, "grad_norm": 0.5287073254585266, "learning_rate": 0.00023585266682611813, "loss": 0.0872, "step": 4975 }, { "epoch": 0.6974071478626489, "grad_norm": 0.3566041886806488, "learning_rate": 0.0002358383161922985, "loss": 0.0504, "step": 4976 }, { "epoch": 0.6975473020322355, "grad_norm": 0.2616063356399536, "learning_rate": 0.00023582396555847882, "loss": 0.0506, "step": 4977 }, { "epoch": 0.697687456201822, "grad_norm": 0.22803056240081787, "learning_rate": 0.00023580961492465914, "loss": 0.0651, "step": 4978 }, { "epoch": 0.6978276103714085, "grad_norm": 0.4278799593448639, "learning_rate": 0.0002357952642908395, "loss": 0.0518, "step": 4979 }, { "epoch": 0.6979677645409951, "grad_norm": 0.43411722779273987, "learning_rate": 0.00023578091365701983, "loss": 0.0796, "step": 4980 }, { "epoch": 0.6981079187105816, "grad_norm": 0.7161135673522949, "learning_rate": 0.00023576656302320016, "loss": 0.1399, "step": 4981 }, { "epoch": 0.6982480728801682, "grad_norm": 0.2713644206523895, "learning_rate": 0.0002357522123893805, "loss": 0.0722, "step": 4982 }, { "epoch": 0.6983882270497548, "grad_norm": 0.6444911360740662, "learning_rate": 0.00023573786175556084, "loss": 0.0518, "step": 4983 }, { "epoch": 0.6985283812193412, "grad_norm": 1.2718766927719116, "learning_rate": 0.00023572351112174117, "loss": 0.1725, "step": 4984 }, { "epoch": 0.6986685353889278, "grad_norm": 0.6072478890419006, "learning_rate": 0.00023570916048792155, "loss": 0.0987, "step": 4985 }, { "epoch": 0.6988086895585144, "grad_norm": 0.34491369128227234, "learning_rate": 0.00023569480985410188, "loss": 0.1027, "step": 4986 }, { "epoch": 0.698948843728101, "grad_norm": 0.6384333372116089, "learning_rate": 0.0002356804592202822, "loss": 0.1637, "step": 4987 }, { "epoch": 0.6990889978976874, "grad_norm": 0.16771025955677032, "learning_rate": 0.00023566610858646254, "loss": 0.0764, "step": 4988 }, { "epoch": 0.699229152067274, "grad_norm": 0.28696170449256897, "learning_rate": 0.0002356517579526429, "loss": 0.0302, "step": 4989 }, { "epoch": 0.6993693062368606, "grad_norm": 0.20444737374782562, "learning_rate": 0.00023563740731882322, "loss": 0.0472, "step": 4990 }, { "epoch": 0.6995094604064471, "grad_norm": 0.2923547923564911, "learning_rate": 0.00023562305668500355, "loss": 0.0642, "step": 4991 }, { "epoch": 0.6996496145760336, "grad_norm": 0.21817873418331146, "learning_rate": 0.0002356087060511839, "loss": 0.027, "step": 4992 }, { "epoch": 0.6997897687456202, "grad_norm": 0.6990349292755127, "learning_rate": 0.00023559435541736424, "loss": 0.1423, "step": 4993 }, { "epoch": 0.6999299229152067, "grad_norm": 0.4087603986263275, "learning_rate": 0.00023558000478354456, "loss": 0.0811, "step": 4994 }, { "epoch": 0.7000700770847933, "grad_norm": 0.8099572062492371, "learning_rate": 0.00023556565414972495, "loss": 0.1, "step": 4995 }, { "epoch": 0.7002102312543799, "grad_norm": 0.4728637933731079, "learning_rate": 0.00023555130351590527, "loss": 0.0905, "step": 4996 }, { "epoch": 0.7003503854239663, "grad_norm": 1.3188045024871826, "learning_rate": 0.0002355369528820856, "loss": 0.1761, "step": 4997 }, { "epoch": 0.7004905395935529, "grad_norm": 0.553395688533783, "learning_rate": 0.00023552260224826596, "loss": 0.1177, "step": 4998 }, { "epoch": 0.7006306937631395, "grad_norm": 0.948553204536438, "learning_rate": 0.0002355082516144463, "loss": 0.1856, "step": 4999 }, { "epoch": 0.700770847932726, "grad_norm": 2.8646082878112793, "learning_rate": 0.00023549390098062662, "loss": 0.0974, "step": 5000 }, { "epoch": 0.7009110021023125, "grad_norm": 0.45448389649391174, "learning_rate": 0.00023547955034680697, "loss": 0.1466, "step": 5001 }, { "epoch": 0.7010511562718991, "grad_norm": 0.3053547441959381, "learning_rate": 0.0002354651997129873, "loss": 0.0757, "step": 5002 }, { "epoch": 0.7011913104414856, "grad_norm": 0.4656880497932434, "learning_rate": 0.00023545084907916763, "loss": 0.0518, "step": 5003 }, { "epoch": 0.7013314646110722, "grad_norm": 0.259682297706604, "learning_rate": 0.000235436498445348, "loss": 0.0618, "step": 5004 }, { "epoch": 0.7014716187806588, "grad_norm": 0.23479433357715607, "learning_rate": 0.00023542214781152834, "loss": 0.0435, "step": 5005 }, { "epoch": 0.7016117729502452, "grad_norm": 0.4558795392513275, "learning_rate": 0.00023540779717770867, "loss": 0.1396, "step": 5006 }, { "epoch": 0.7017519271198318, "grad_norm": 0.2944003641605377, "learning_rate": 0.000235393446543889, "loss": 0.0601, "step": 5007 }, { "epoch": 0.7018920812894184, "grad_norm": 0.34318599104881287, "learning_rate": 0.00023537909591006935, "loss": 0.0883, "step": 5008 }, { "epoch": 0.702032235459005, "grad_norm": 0.29112017154693604, "learning_rate": 0.00023536474527624968, "loss": 0.075, "step": 5009 }, { "epoch": 0.7021723896285914, "grad_norm": 0.491881787776947, "learning_rate": 0.00023535039464243, "loss": 0.0856, "step": 5010 }, { "epoch": 0.702312543798178, "grad_norm": 0.6046550869941711, "learning_rate": 0.00023533604400861037, "loss": 0.0669, "step": 5011 }, { "epoch": 0.7024526979677645, "grad_norm": 0.23227007687091827, "learning_rate": 0.0002353216933747907, "loss": 0.0888, "step": 5012 }, { "epoch": 0.7025928521373511, "grad_norm": 0.431506872177124, "learning_rate": 0.00023530734274097102, "loss": 0.0754, "step": 5013 }, { "epoch": 0.7027330063069376, "grad_norm": 0.36266499757766724, "learning_rate": 0.00023529299210715138, "loss": 0.0808, "step": 5014 }, { "epoch": 0.7028731604765242, "grad_norm": 1.0934547185897827, "learning_rate": 0.0002352786414733317, "loss": 0.0843, "step": 5015 }, { "epoch": 0.7030133146461107, "grad_norm": 0.47969672083854675, "learning_rate": 0.00023526429083951204, "loss": 0.0544, "step": 5016 }, { "epoch": 0.7031534688156973, "grad_norm": 0.27149614691734314, "learning_rate": 0.00023524994020569242, "loss": 0.0763, "step": 5017 }, { "epoch": 0.7032936229852839, "grad_norm": 0.3926200270652771, "learning_rate": 0.00023523558957187275, "loss": 0.0546, "step": 5018 }, { "epoch": 0.7034337771548703, "grad_norm": 0.5547006726264954, "learning_rate": 0.00023522123893805308, "loss": 0.1948, "step": 5019 }, { "epoch": 0.7035739313244569, "grad_norm": 0.31557026505470276, "learning_rate": 0.00023520688830423343, "loss": 0.0392, "step": 5020 }, { "epoch": 0.7037140854940435, "grad_norm": 0.7667475342750549, "learning_rate": 0.00023519253767041376, "loss": 0.1638, "step": 5021 }, { "epoch": 0.70385423966363, "grad_norm": 0.6007963418960571, "learning_rate": 0.0002351781870365941, "loss": 0.1742, "step": 5022 }, { "epoch": 0.7039943938332165, "grad_norm": 0.4213850796222687, "learning_rate": 0.00023516383640277442, "loss": 0.1667, "step": 5023 }, { "epoch": 0.7041345480028031, "grad_norm": 0.7641958594322205, "learning_rate": 0.00023514948576895477, "loss": 0.1815, "step": 5024 }, { "epoch": 0.7042747021723896, "grad_norm": 0.3817320764064789, "learning_rate": 0.0002351351351351351, "loss": 0.0594, "step": 5025 }, { "epoch": 0.7044148563419762, "grad_norm": 0.2560789883136749, "learning_rate": 0.00023512078450131543, "loss": 0.0742, "step": 5026 }, { "epoch": 0.7045550105115628, "grad_norm": 0.3542046844959259, "learning_rate": 0.0002351064338674958, "loss": 0.0963, "step": 5027 }, { "epoch": 0.7046951646811492, "grad_norm": 0.46601173281669617, "learning_rate": 0.00023509208323367614, "loss": 0.0895, "step": 5028 }, { "epoch": 0.7048353188507358, "grad_norm": 0.20284810662269592, "learning_rate": 0.00023507773259985647, "loss": 0.0612, "step": 5029 }, { "epoch": 0.7049754730203224, "grad_norm": 0.3436676263809204, "learning_rate": 0.00023506338196603683, "loss": 0.0413, "step": 5030 }, { "epoch": 0.705115627189909, "grad_norm": 0.2958710491657257, "learning_rate": 0.00023504903133221715, "loss": 0.0863, "step": 5031 }, { "epoch": 0.7052557813594954, "grad_norm": 0.2852146625518799, "learning_rate": 0.00023503468069839748, "loss": 0.035, "step": 5032 }, { "epoch": 0.705395935529082, "grad_norm": 0.3974468410015106, "learning_rate": 0.00023502033006457784, "loss": 0.0533, "step": 5033 }, { "epoch": 0.7055360896986685, "grad_norm": 0.24293284118175507, "learning_rate": 0.00023500597943075817, "loss": 0.0673, "step": 5034 }, { "epoch": 0.7056762438682551, "grad_norm": 0.1357775181531906, "learning_rate": 0.0002349916287969385, "loss": 0.0368, "step": 5035 }, { "epoch": 0.7058163980378416, "grad_norm": 0.6605488657951355, "learning_rate": 0.00023497727816311888, "loss": 0.065, "step": 5036 }, { "epoch": 0.7059565522074281, "grad_norm": 0.4690210819244385, "learning_rate": 0.0002349629275292992, "loss": 0.1224, "step": 5037 }, { "epoch": 0.7060967063770147, "grad_norm": 0.35564124584198, "learning_rate": 0.00023494857689547954, "loss": 0.069, "step": 5038 }, { "epoch": 0.7062368605466013, "grad_norm": 0.5142439007759094, "learning_rate": 0.0002349342262616599, "loss": 0.0803, "step": 5039 }, { "epoch": 0.7063770147161879, "grad_norm": 1.554359793663025, "learning_rate": 0.00023491987562784022, "loss": 0.3452, "step": 5040 }, { "epoch": 0.7065171688857743, "grad_norm": 0.35481011867523193, "learning_rate": 0.00023490552499402055, "loss": 0.0686, "step": 5041 }, { "epoch": 0.7066573230553609, "grad_norm": 0.5887169241905212, "learning_rate": 0.00023489117436020088, "loss": 0.0713, "step": 5042 }, { "epoch": 0.7067974772249475, "grad_norm": 0.30480802059173584, "learning_rate": 0.00023487682372638123, "loss": 0.054, "step": 5043 }, { "epoch": 0.706937631394534, "grad_norm": 0.4054400324821472, "learning_rate": 0.00023486247309256156, "loss": 0.0867, "step": 5044 }, { "epoch": 0.7070777855641205, "grad_norm": 1.2069053649902344, "learning_rate": 0.0002348481224587419, "loss": 0.1405, "step": 5045 }, { "epoch": 0.7072179397337071, "grad_norm": 0.5618757009506226, "learning_rate": 0.00023483377182492225, "loss": 0.1636, "step": 5046 }, { "epoch": 0.7073580939032936, "grad_norm": 0.7865987420082092, "learning_rate": 0.00023481942119110257, "loss": 0.1725, "step": 5047 }, { "epoch": 0.7074982480728802, "grad_norm": 0.838504433631897, "learning_rate": 0.0002348050705572829, "loss": 0.1417, "step": 5048 }, { "epoch": 0.7076384022424668, "grad_norm": 0.31529176235198975, "learning_rate": 0.00023479071992346328, "loss": 0.0328, "step": 5049 }, { "epoch": 0.7077785564120532, "grad_norm": 1.9494835138320923, "learning_rate": 0.0002347763692896436, "loss": 0.1889, "step": 5050 }, { "epoch": 0.7079187105816398, "grad_norm": 0.4203592538833618, "learning_rate": 0.00023476201865582394, "loss": 0.0623, "step": 5051 }, { "epoch": 0.7080588647512264, "grad_norm": 0.13881684839725494, "learning_rate": 0.0002347476680220043, "loss": 0.0333, "step": 5052 }, { "epoch": 0.7081990189208129, "grad_norm": 1.0938949584960938, "learning_rate": 0.00023473331738818463, "loss": 0.1527, "step": 5053 }, { "epoch": 0.7083391730903994, "grad_norm": 0.33289945125579834, "learning_rate": 0.00023471896675436495, "loss": 0.0307, "step": 5054 }, { "epoch": 0.708479327259986, "grad_norm": 0.4040643870830536, "learning_rate": 0.0002347046161205453, "loss": 0.0535, "step": 5055 }, { "epoch": 0.7086194814295725, "grad_norm": 0.45840442180633545, "learning_rate": 0.00023469026548672564, "loss": 0.036, "step": 5056 }, { "epoch": 0.7087596355991591, "grad_norm": 0.7804120182991028, "learning_rate": 0.00023467591485290597, "loss": 0.1064, "step": 5057 }, { "epoch": 0.7088997897687456, "grad_norm": 0.8684229850769043, "learning_rate": 0.0002346615642190863, "loss": 0.1196, "step": 5058 }, { "epoch": 0.7090399439383321, "grad_norm": 0.32349488139152527, "learning_rate": 0.00023464721358526668, "loss": 0.0824, "step": 5059 }, { "epoch": 0.7091800981079187, "grad_norm": 0.7354174852371216, "learning_rate": 0.000234632862951447, "loss": 0.0835, "step": 5060 }, { "epoch": 0.7093202522775053, "grad_norm": 0.3127652108669281, "learning_rate": 0.00023461851231762734, "loss": 0.0551, "step": 5061 }, { "epoch": 0.7094604064470919, "grad_norm": 0.4412612318992615, "learning_rate": 0.0002346041616838077, "loss": 0.0596, "step": 5062 }, { "epoch": 0.7096005606166783, "grad_norm": 0.24782206118106842, "learning_rate": 0.00023458981104998802, "loss": 0.0743, "step": 5063 }, { "epoch": 0.7097407147862649, "grad_norm": 0.23947393894195557, "learning_rate": 0.00023457546041616835, "loss": 0.0633, "step": 5064 }, { "epoch": 0.7098808689558515, "grad_norm": 0.6765064597129822, "learning_rate": 0.0002345611097823487, "loss": 0.1379, "step": 5065 }, { "epoch": 0.710021023125438, "grad_norm": 0.5635762810707092, "learning_rate": 0.00023454675914852903, "loss": 0.0675, "step": 5066 }, { "epoch": 0.7101611772950245, "grad_norm": 0.3269973397254944, "learning_rate": 0.00023453240851470936, "loss": 0.0606, "step": 5067 }, { "epoch": 0.710301331464611, "grad_norm": 1.3347358703613281, "learning_rate": 0.00023451805788088974, "loss": 0.2575, "step": 5068 }, { "epoch": 0.7104414856341976, "grad_norm": 0.6788570284843445, "learning_rate": 0.00023450370724707007, "loss": 0.0829, "step": 5069 }, { "epoch": 0.7105816398037842, "grad_norm": 0.27341267466545105, "learning_rate": 0.0002344893566132504, "loss": 0.0454, "step": 5070 }, { "epoch": 0.7107217939733707, "grad_norm": 0.4122808873653412, "learning_rate": 0.00023447500597943076, "loss": 0.1219, "step": 5071 }, { "epoch": 0.7108619481429572, "grad_norm": 0.1932440847158432, "learning_rate": 0.00023446065534561109, "loss": 0.0332, "step": 5072 }, { "epoch": 0.7110021023125438, "grad_norm": 0.12211096286773682, "learning_rate": 0.00023444630471179141, "loss": 0.0307, "step": 5073 }, { "epoch": 0.7111422564821304, "grad_norm": 0.26029810309410095, "learning_rate": 0.00023443195407797177, "loss": 0.0327, "step": 5074 }, { "epoch": 0.7112824106517169, "grad_norm": 0.3763386011123657, "learning_rate": 0.0002344176034441521, "loss": 0.0847, "step": 5075 }, { "epoch": 0.7114225648213034, "grad_norm": 0.42975911498069763, "learning_rate": 0.00023440325281033243, "loss": 0.1221, "step": 5076 }, { "epoch": 0.71156271899089, "grad_norm": 0.40714436769485474, "learning_rate": 0.00023438890217651276, "loss": 0.1232, "step": 5077 }, { "epoch": 0.7117028731604765, "grad_norm": 0.5231318473815918, "learning_rate": 0.0002343745515426931, "loss": 0.0901, "step": 5078 }, { "epoch": 0.7118430273300631, "grad_norm": 0.48449090123176575, "learning_rate": 0.00023436020090887344, "loss": 0.0566, "step": 5079 }, { "epoch": 0.7119831814996496, "grad_norm": 0.26782605051994324, "learning_rate": 0.00023434585027505377, "loss": 0.0951, "step": 5080 }, { "epoch": 0.7121233356692361, "grad_norm": 0.43498867750167847, "learning_rate": 0.00023433149964123415, "loss": 0.0453, "step": 5081 }, { "epoch": 0.7122634898388227, "grad_norm": 0.3748285472393036, "learning_rate": 0.00023431714900741448, "loss": 0.1466, "step": 5082 }, { "epoch": 0.7124036440084093, "grad_norm": 0.5433907508850098, "learning_rate": 0.0002343027983735948, "loss": 0.1037, "step": 5083 }, { "epoch": 0.7125437981779958, "grad_norm": 0.5655926465988159, "learning_rate": 0.00023428844773977516, "loss": 0.0599, "step": 5084 }, { "epoch": 0.7126839523475823, "grad_norm": 0.5901606678962708, "learning_rate": 0.0002342740971059555, "loss": 0.1123, "step": 5085 }, { "epoch": 0.7128241065171689, "grad_norm": 0.4195170998573303, "learning_rate": 0.00023425974647213582, "loss": 0.0329, "step": 5086 }, { "epoch": 0.7129642606867554, "grad_norm": 0.33596286177635193, "learning_rate": 0.00023424539583831618, "loss": 0.0846, "step": 5087 }, { "epoch": 0.713104414856342, "grad_norm": 0.3595917224884033, "learning_rate": 0.0002342310452044965, "loss": 0.1196, "step": 5088 }, { "epoch": 0.7132445690259285, "grad_norm": 0.39344146847724915, "learning_rate": 0.00023421669457067683, "loss": 0.1846, "step": 5089 }, { "epoch": 0.713384723195515, "grad_norm": 0.5979328751564026, "learning_rate": 0.00023420234393685722, "loss": 0.069, "step": 5090 }, { "epoch": 0.7135248773651016, "grad_norm": 0.5251967906951904, "learning_rate": 0.00023418799330303755, "loss": 0.0864, "step": 5091 }, { "epoch": 0.7136650315346882, "grad_norm": 0.3856375217437744, "learning_rate": 0.00023417364266921787, "loss": 0.0441, "step": 5092 }, { "epoch": 0.7138051857042746, "grad_norm": 0.6210995316505432, "learning_rate": 0.0002341592920353982, "loss": 0.0538, "step": 5093 }, { "epoch": 0.7139453398738612, "grad_norm": 0.7057554125785828, "learning_rate": 0.00023414494140157856, "loss": 0.1129, "step": 5094 }, { "epoch": 0.7140854940434478, "grad_norm": 0.9471320509910583, "learning_rate": 0.0002341305907677589, "loss": 0.1692, "step": 5095 }, { "epoch": 0.7142256482130344, "grad_norm": 0.7931409478187561, "learning_rate": 0.00023411624013393922, "loss": 0.1032, "step": 5096 }, { "epoch": 0.7143658023826209, "grad_norm": 0.9453210830688477, "learning_rate": 0.00023410188950011957, "loss": 0.0951, "step": 5097 }, { "epoch": 0.7145059565522074, "grad_norm": 1.0313867330551147, "learning_rate": 0.0002340875388662999, "loss": 0.1764, "step": 5098 }, { "epoch": 0.714646110721794, "grad_norm": 4.681178569793701, "learning_rate": 0.00023407318823248023, "loss": 0.3094, "step": 5099 }, { "epoch": 0.7147862648913805, "grad_norm": 6.18013334274292, "learning_rate": 0.0002340588375986606, "loss": 0.3564, "step": 5100 }, { "epoch": 0.7149264190609671, "grad_norm": 0.24321196973323822, "learning_rate": 0.00023404448696484094, "loss": 0.0374, "step": 5101 }, { "epoch": 0.7150665732305536, "grad_norm": 0.2917323708534241, "learning_rate": 0.00023403013633102127, "loss": 0.0666, "step": 5102 }, { "epoch": 0.7152067274001401, "grad_norm": 0.5228332281112671, "learning_rate": 0.00023401578569720162, "loss": 0.0598, "step": 5103 }, { "epoch": 0.7153468815697267, "grad_norm": 0.34571489691734314, "learning_rate": 0.00023400143506338195, "loss": 0.0819, "step": 5104 }, { "epoch": 0.7154870357393133, "grad_norm": 0.3090171813964844, "learning_rate": 0.00023398708442956228, "loss": 0.0714, "step": 5105 }, { "epoch": 0.7156271899088998, "grad_norm": 0.31017976999282837, "learning_rate": 0.00023397273379574264, "loss": 0.0563, "step": 5106 }, { "epoch": 0.7157673440784863, "grad_norm": 0.39233192801475525, "learning_rate": 0.00023395838316192296, "loss": 0.0514, "step": 5107 }, { "epoch": 0.7159074982480729, "grad_norm": 0.27979087829589844, "learning_rate": 0.0002339440325281033, "loss": 0.0648, "step": 5108 }, { "epoch": 0.7160476524176594, "grad_norm": 0.306318461894989, "learning_rate": 0.00023392968189428362, "loss": 0.0567, "step": 5109 }, { "epoch": 0.716187806587246, "grad_norm": 0.3515383303165436, "learning_rate": 0.00023391533126046398, "loss": 0.0696, "step": 5110 }, { "epoch": 0.7163279607568325, "grad_norm": 0.5803670883178711, "learning_rate": 0.0002339009806266443, "loss": 0.1081, "step": 5111 }, { "epoch": 0.716468114926419, "grad_norm": 0.3434430956840515, "learning_rate": 0.00023388662999282463, "loss": 0.1214, "step": 5112 }, { "epoch": 0.7166082690960056, "grad_norm": 0.3058526813983917, "learning_rate": 0.00023387227935900502, "loss": 0.1162, "step": 5113 }, { "epoch": 0.7167484232655922, "grad_norm": 0.46661266684532166, "learning_rate": 0.00023385792872518535, "loss": 0.0828, "step": 5114 }, { "epoch": 0.7168885774351786, "grad_norm": 0.2832973599433899, "learning_rate": 0.00023384357809136567, "loss": 0.0665, "step": 5115 }, { "epoch": 0.7170287316047652, "grad_norm": 0.23683227598667145, "learning_rate": 0.00023382922745754603, "loss": 0.0444, "step": 5116 }, { "epoch": 0.7171688857743518, "grad_norm": 0.5688052773475647, "learning_rate": 0.00023381487682372636, "loss": 0.1125, "step": 5117 }, { "epoch": 0.7173090399439384, "grad_norm": 0.46640726923942566, "learning_rate": 0.0002338005261899067, "loss": 0.0886, "step": 5118 }, { "epoch": 0.7174491941135249, "grad_norm": 0.3015604317188263, "learning_rate": 0.00023378617555608704, "loss": 0.1054, "step": 5119 }, { "epoch": 0.7175893482831114, "grad_norm": 0.3122844696044922, "learning_rate": 0.00023377182492226737, "loss": 0.0781, "step": 5120 }, { "epoch": 0.717729502452698, "grad_norm": 0.2697398066520691, "learning_rate": 0.0002337574742884477, "loss": 0.0436, "step": 5121 }, { "epoch": 0.7178696566222845, "grad_norm": 0.2917141318321228, "learning_rate": 0.00023374312365462808, "loss": 0.0739, "step": 5122 }, { "epoch": 0.7180098107918711, "grad_norm": 0.39828020334243774, "learning_rate": 0.0002337287730208084, "loss": 0.0256, "step": 5123 }, { "epoch": 0.7181499649614576, "grad_norm": 0.46709543466567993, "learning_rate": 0.00023371442238698874, "loss": 0.0858, "step": 5124 }, { "epoch": 0.7182901191310441, "grad_norm": 0.5850250720977783, "learning_rate": 0.0002337000717531691, "loss": 0.1303, "step": 5125 }, { "epoch": 0.7184302733006307, "grad_norm": 0.819942057132721, "learning_rate": 0.00023368572111934942, "loss": 0.0603, "step": 5126 }, { "epoch": 0.7185704274702173, "grad_norm": 0.4037875533103943, "learning_rate": 0.00023367137048552975, "loss": 0.0698, "step": 5127 }, { "epoch": 0.7187105816398038, "grad_norm": 0.9957055449485779, "learning_rate": 0.00023365701985171008, "loss": 0.1372, "step": 5128 }, { "epoch": 0.7188507358093903, "grad_norm": 0.5481537580490112, "learning_rate": 0.00023364266921789044, "loss": 0.0973, "step": 5129 }, { "epoch": 0.7189908899789769, "grad_norm": 0.49370571970939636, "learning_rate": 0.00023362831858407077, "loss": 0.097, "step": 5130 }, { "epoch": 0.7191310441485634, "grad_norm": 0.3298933506011963, "learning_rate": 0.0002336139679502511, "loss": 0.0642, "step": 5131 }, { "epoch": 0.71927119831815, "grad_norm": 0.2864142060279846, "learning_rate": 0.00023359961731643148, "loss": 0.0354, "step": 5132 }, { "epoch": 0.7194113524877365, "grad_norm": 1.5379772186279297, "learning_rate": 0.0002335852666826118, "loss": 0.1454, "step": 5133 }, { "epoch": 0.719551506657323, "grad_norm": 0.28490015864372253, "learning_rate": 0.00023357091604879213, "loss": 0.0524, "step": 5134 }, { "epoch": 0.7196916608269096, "grad_norm": 0.3085574209690094, "learning_rate": 0.0002335565654149725, "loss": 0.0255, "step": 5135 }, { "epoch": 0.7198318149964962, "grad_norm": 0.35978496074676514, "learning_rate": 0.00023354221478115282, "loss": 0.0837, "step": 5136 }, { "epoch": 0.7199719691660826, "grad_norm": 0.35980701446533203, "learning_rate": 0.00023352786414733315, "loss": 0.0541, "step": 5137 }, { "epoch": 0.7201121233356692, "grad_norm": 0.38387787342071533, "learning_rate": 0.0002335135135135135, "loss": 0.0497, "step": 5138 }, { "epoch": 0.7202522775052558, "grad_norm": 0.7311360239982605, "learning_rate": 0.00023349916287969383, "loss": 0.1206, "step": 5139 }, { "epoch": 0.7203924316748423, "grad_norm": 0.41189852356910706, "learning_rate": 0.00023348481224587416, "loss": 0.0404, "step": 5140 }, { "epoch": 0.7205325858444289, "grad_norm": 0.7187962532043457, "learning_rate": 0.00023347046161205452, "loss": 0.1048, "step": 5141 }, { "epoch": 0.7206727400140154, "grad_norm": 0.2527622580528259, "learning_rate": 0.00023345611097823484, "loss": 0.0506, "step": 5142 }, { "epoch": 0.720812894183602, "grad_norm": 0.2305736094713211, "learning_rate": 0.00023344176034441517, "loss": 0.0769, "step": 5143 }, { "epoch": 0.7209530483531885, "grad_norm": 0.3243862986564636, "learning_rate": 0.0002334274097105955, "loss": 0.0439, "step": 5144 }, { "epoch": 0.7210932025227751, "grad_norm": 0.4699042737483978, "learning_rate": 0.00023341305907677588, "loss": 0.1077, "step": 5145 }, { "epoch": 0.7212333566923615, "grad_norm": 0.24194759130477905, "learning_rate": 0.0002333987084429562, "loss": 0.042, "step": 5146 }, { "epoch": 0.7213735108619481, "grad_norm": 0.802369236946106, "learning_rate": 0.00023338435780913654, "loss": 0.1197, "step": 5147 }, { "epoch": 0.7215136650315347, "grad_norm": 2.8443851470947266, "learning_rate": 0.0002333700071753169, "loss": 0.3762, "step": 5148 }, { "epoch": 0.7216538192011213, "grad_norm": 0.6536649465560913, "learning_rate": 0.00023335565654149723, "loss": 0.0937, "step": 5149 }, { "epoch": 0.7217939733707078, "grad_norm": 1.9151217937469482, "learning_rate": 0.00023334130590767755, "loss": 0.4609, "step": 5150 }, { "epoch": 0.7219341275402943, "grad_norm": 0.5031387805938721, "learning_rate": 0.0002333269552738579, "loss": 0.1766, "step": 5151 }, { "epoch": 0.7220742817098809, "grad_norm": 0.641347348690033, "learning_rate": 0.00023331260464003824, "loss": 0.1777, "step": 5152 }, { "epoch": 0.7222144358794674, "grad_norm": 0.2836684286594391, "learning_rate": 0.00023329825400621857, "loss": 0.051, "step": 5153 }, { "epoch": 0.722354590049054, "grad_norm": 0.8412325978279114, "learning_rate": 0.00023328390337239895, "loss": 0.2053, "step": 5154 }, { "epoch": 0.7224947442186405, "grad_norm": 0.26968541741371155, "learning_rate": 0.00023326955273857928, "loss": 0.0308, "step": 5155 }, { "epoch": 0.722634898388227, "grad_norm": 0.2725106477737427, "learning_rate": 0.0002332552021047596, "loss": 0.0686, "step": 5156 }, { "epoch": 0.7227750525578136, "grad_norm": 0.17711910605430603, "learning_rate": 0.00023324085147093996, "loss": 0.035, "step": 5157 }, { "epoch": 0.7229152067274002, "grad_norm": 0.3863382041454315, "learning_rate": 0.0002332265008371203, "loss": 0.1225, "step": 5158 }, { "epoch": 0.7230553608969866, "grad_norm": 0.2858009338378906, "learning_rate": 0.00023321215020330062, "loss": 0.058, "step": 5159 }, { "epoch": 0.7231955150665732, "grad_norm": 0.22802037000656128, "learning_rate": 0.00023319779956948097, "loss": 0.0554, "step": 5160 }, { "epoch": 0.7233356692361598, "grad_norm": 0.18020832538604736, "learning_rate": 0.0002331834489356613, "loss": 0.0665, "step": 5161 }, { "epoch": 0.7234758234057463, "grad_norm": 0.4329358637332916, "learning_rate": 0.00023316909830184163, "loss": 0.0692, "step": 5162 }, { "epoch": 0.7236159775753329, "grad_norm": 0.6758047938346863, "learning_rate": 0.00023315474766802196, "loss": 0.1063, "step": 5163 }, { "epoch": 0.7237561317449194, "grad_norm": 0.3999052941799164, "learning_rate": 0.00023314039703420234, "loss": 0.0778, "step": 5164 }, { "epoch": 0.7238962859145059, "grad_norm": 0.29249122738838196, "learning_rate": 0.00023312604640038267, "loss": 0.071, "step": 5165 }, { "epoch": 0.7240364400840925, "grad_norm": 0.3687693774700165, "learning_rate": 0.000233111695766563, "loss": 0.0963, "step": 5166 }, { "epoch": 0.7241765942536791, "grad_norm": 0.5186643600463867, "learning_rate": 0.00023309734513274336, "loss": 0.0781, "step": 5167 }, { "epoch": 0.7243167484232655, "grad_norm": 0.527142345905304, "learning_rate": 0.00023308299449892368, "loss": 0.2136, "step": 5168 }, { "epoch": 0.7244569025928521, "grad_norm": 0.265505313873291, "learning_rate": 0.000233068643865104, "loss": 0.0562, "step": 5169 }, { "epoch": 0.7245970567624387, "grad_norm": 0.6605541110038757, "learning_rate": 0.00023305429323128437, "loss": 0.087, "step": 5170 }, { "epoch": 0.7247372109320253, "grad_norm": 0.44356611371040344, "learning_rate": 0.0002330399425974647, "loss": 0.1449, "step": 5171 }, { "epoch": 0.7248773651016118, "grad_norm": 0.13862387835979462, "learning_rate": 0.00023302559196364503, "loss": 0.0244, "step": 5172 }, { "epoch": 0.7250175192711983, "grad_norm": 0.16604040563106537, "learning_rate": 0.00023301124132982538, "loss": 0.057, "step": 5173 }, { "epoch": 0.7251576734407849, "grad_norm": 0.8485308289527893, "learning_rate": 0.0002329968906960057, "loss": 0.1922, "step": 5174 }, { "epoch": 0.7252978276103714, "grad_norm": 0.45553362369537354, "learning_rate": 0.00023298254006218604, "loss": 0.0928, "step": 5175 }, { "epoch": 0.725437981779958, "grad_norm": 1.0841885805130005, "learning_rate": 0.00023296818942836642, "loss": 0.0822, "step": 5176 }, { "epoch": 0.7255781359495445, "grad_norm": 0.26723960041999817, "learning_rate": 0.00023295383879454675, "loss": 0.0839, "step": 5177 }, { "epoch": 0.725718290119131, "grad_norm": 0.21551693975925446, "learning_rate": 0.00023293948816072708, "loss": 0.0494, "step": 5178 }, { "epoch": 0.7258584442887176, "grad_norm": 0.5043818950653076, "learning_rate": 0.0002329251375269074, "loss": 0.1181, "step": 5179 }, { "epoch": 0.7259985984583042, "grad_norm": 0.3727339208126068, "learning_rate": 0.00023291078689308776, "loss": 0.0975, "step": 5180 }, { "epoch": 0.7261387526278906, "grad_norm": 0.2844492197036743, "learning_rate": 0.0002328964362592681, "loss": 0.0666, "step": 5181 }, { "epoch": 0.7262789067974772, "grad_norm": 0.4735451936721802, "learning_rate": 0.00023288208562544842, "loss": 0.1129, "step": 5182 }, { "epoch": 0.7264190609670638, "grad_norm": 0.40668800473213196, "learning_rate": 0.00023286773499162878, "loss": 0.0897, "step": 5183 }, { "epoch": 0.7265592151366503, "grad_norm": 0.42613571882247925, "learning_rate": 0.0002328533843578091, "loss": 0.2022, "step": 5184 }, { "epoch": 0.7266993693062369, "grad_norm": 0.3661457598209381, "learning_rate": 0.00023283903372398943, "loss": 0.0436, "step": 5185 }, { "epoch": 0.7268395234758234, "grad_norm": 0.3932240307331085, "learning_rate": 0.00023282468309016982, "loss": 0.1641, "step": 5186 }, { "epoch": 0.7269796776454099, "grad_norm": 0.7341818809509277, "learning_rate": 0.00023281033245635014, "loss": 0.0786, "step": 5187 }, { "epoch": 0.7271198318149965, "grad_norm": 0.5837560296058655, "learning_rate": 0.00023279598182253047, "loss": 0.0862, "step": 5188 }, { "epoch": 0.7272599859845831, "grad_norm": 0.16939066350460052, "learning_rate": 0.00023278163118871083, "loss": 0.0389, "step": 5189 }, { "epoch": 0.7274001401541695, "grad_norm": 0.39616861939430237, "learning_rate": 0.00023276728055489116, "loss": 0.0592, "step": 5190 }, { "epoch": 0.7275402943237561, "grad_norm": 0.36377978324890137, "learning_rate": 0.00023275292992107149, "loss": 0.0481, "step": 5191 }, { "epoch": 0.7276804484933427, "grad_norm": 0.4941023290157318, "learning_rate": 0.00023273857928725184, "loss": 0.079, "step": 5192 }, { "epoch": 0.7278206026629293, "grad_norm": 0.5489917397499084, "learning_rate": 0.00023272422865343217, "loss": 0.0712, "step": 5193 }, { "epoch": 0.7279607568325158, "grad_norm": 0.45941728353500366, "learning_rate": 0.0002327098780196125, "loss": 0.0605, "step": 5194 }, { "epoch": 0.7281009110021023, "grad_norm": 1.1872835159301758, "learning_rate": 0.00023269552738579288, "loss": 0.1343, "step": 5195 }, { "epoch": 0.7282410651716889, "grad_norm": 1.267572283744812, "learning_rate": 0.0002326811767519732, "loss": 0.1438, "step": 5196 }, { "epoch": 0.7283812193412754, "grad_norm": 2.0092835426330566, "learning_rate": 0.00023266682611815354, "loss": 0.2441, "step": 5197 }, { "epoch": 0.728521373510862, "grad_norm": 1.744664192199707, "learning_rate": 0.00023265247548433387, "loss": 0.1901, "step": 5198 }, { "epoch": 0.7286615276804485, "grad_norm": 1.714289665222168, "learning_rate": 0.00023263812485051422, "loss": 0.1596, "step": 5199 }, { "epoch": 0.728801681850035, "grad_norm": 1.742221713066101, "learning_rate": 0.00023262377421669455, "loss": 0.0317, "step": 5200 }, { "epoch": 0.7289418360196216, "grad_norm": 0.38922059535980225, "learning_rate": 0.00023260942358287488, "loss": 0.1005, "step": 5201 }, { "epoch": 0.7290819901892082, "grad_norm": 0.3114355206489563, "learning_rate": 0.00023259507294905524, "loss": 0.0453, "step": 5202 }, { "epoch": 0.7292221443587946, "grad_norm": 0.14320124685764313, "learning_rate": 0.00023258072231523556, "loss": 0.0184, "step": 5203 }, { "epoch": 0.7293622985283812, "grad_norm": 0.2641476094722748, "learning_rate": 0.0002325663716814159, "loss": 0.0489, "step": 5204 }, { "epoch": 0.7295024526979678, "grad_norm": 0.3066056966781616, "learning_rate": 0.00023255202104759625, "loss": 0.0626, "step": 5205 }, { "epoch": 0.7296426068675543, "grad_norm": 0.2508927285671234, "learning_rate": 0.00023253767041377658, "loss": 0.0669, "step": 5206 }, { "epoch": 0.7297827610371409, "grad_norm": 0.4991329312324524, "learning_rate": 0.0002325233197799569, "loss": 0.1117, "step": 5207 }, { "epoch": 0.7299229152067274, "grad_norm": 0.4104723036289215, "learning_rate": 0.0002325089691461373, "loss": 0.0921, "step": 5208 }, { "epoch": 0.7300630693763139, "grad_norm": 0.4139060080051422, "learning_rate": 0.00023249461851231762, "loss": 0.0487, "step": 5209 }, { "epoch": 0.7302032235459005, "grad_norm": 0.2513093054294586, "learning_rate": 0.00023248026787849794, "loss": 0.0474, "step": 5210 }, { "epoch": 0.7303433777154871, "grad_norm": 0.23586632311344147, "learning_rate": 0.0002324659172446783, "loss": 0.0431, "step": 5211 }, { "epoch": 0.7304835318850735, "grad_norm": 0.29323136806488037, "learning_rate": 0.00023245156661085863, "loss": 0.0724, "step": 5212 }, { "epoch": 0.7306236860546601, "grad_norm": 0.37320569157600403, "learning_rate": 0.00023243721597703896, "loss": 0.0551, "step": 5213 }, { "epoch": 0.7307638402242467, "grad_norm": 0.37209105491638184, "learning_rate": 0.00023242286534321929, "loss": 0.0716, "step": 5214 }, { "epoch": 0.7309039943938332, "grad_norm": 0.2746914029121399, "learning_rate": 0.00023240851470939964, "loss": 0.0581, "step": 5215 }, { "epoch": 0.7310441485634198, "grad_norm": 0.3433951437473297, "learning_rate": 0.00023239416407557997, "loss": 0.0885, "step": 5216 }, { "epoch": 0.7311843027330063, "grad_norm": 1.1792762279510498, "learning_rate": 0.0002323798134417603, "loss": 0.115, "step": 5217 }, { "epoch": 0.7313244569025928, "grad_norm": 0.339902400970459, "learning_rate": 0.00023236546280794068, "loss": 0.0783, "step": 5218 }, { "epoch": 0.7314646110721794, "grad_norm": 0.4927713871002197, "learning_rate": 0.000232351112174121, "loss": 0.069, "step": 5219 }, { "epoch": 0.731604765241766, "grad_norm": 0.6773391366004944, "learning_rate": 0.00023233676154030134, "loss": 0.1087, "step": 5220 }, { "epoch": 0.7317449194113524, "grad_norm": 0.37482526898384094, "learning_rate": 0.0002323224109064817, "loss": 0.0865, "step": 5221 }, { "epoch": 0.731885073580939, "grad_norm": 0.2873540222644806, "learning_rate": 0.00023230806027266202, "loss": 0.0574, "step": 5222 }, { "epoch": 0.7320252277505256, "grad_norm": 0.2761348783969879, "learning_rate": 0.00023229370963884235, "loss": 0.0577, "step": 5223 }, { "epoch": 0.7321653819201122, "grad_norm": 0.8928381204605103, "learning_rate": 0.0002322793590050227, "loss": 0.2217, "step": 5224 }, { "epoch": 0.7323055360896986, "grad_norm": 0.3465178906917572, "learning_rate": 0.00023226500837120304, "loss": 0.1032, "step": 5225 }, { "epoch": 0.7324456902592852, "grad_norm": 0.3352315127849579, "learning_rate": 0.00023225065773738336, "loss": 0.1259, "step": 5226 }, { "epoch": 0.7325858444288718, "grad_norm": 0.17192241549491882, "learning_rate": 0.00023223630710356375, "loss": 0.0459, "step": 5227 }, { "epoch": 0.7327259985984583, "grad_norm": 0.22748470306396484, "learning_rate": 0.00023222195646974408, "loss": 0.0428, "step": 5228 }, { "epoch": 0.7328661527680449, "grad_norm": 0.4069025218486786, "learning_rate": 0.0002322076058359244, "loss": 0.1006, "step": 5229 }, { "epoch": 0.7330063069376314, "grad_norm": 0.24883797764778137, "learning_rate": 0.00023219325520210476, "loss": 0.0569, "step": 5230 }, { "epoch": 0.7331464611072179, "grad_norm": 0.26614904403686523, "learning_rate": 0.0002321789045682851, "loss": 0.0874, "step": 5231 }, { "epoch": 0.7332866152768045, "grad_norm": 0.30397799611091614, "learning_rate": 0.00023216455393446542, "loss": 0.0634, "step": 5232 }, { "epoch": 0.7334267694463911, "grad_norm": 0.5899048447608948, "learning_rate": 0.00023215020330064575, "loss": 0.0459, "step": 5233 }, { "epoch": 0.7335669236159775, "grad_norm": 0.26289013028144836, "learning_rate": 0.0002321358526668261, "loss": 0.0306, "step": 5234 }, { "epoch": 0.7337070777855641, "grad_norm": 0.3717977702617645, "learning_rate": 0.00023212150203300643, "loss": 0.0959, "step": 5235 }, { "epoch": 0.7338472319551507, "grad_norm": 0.13371019065380096, "learning_rate": 0.00023210715139918676, "loss": 0.0447, "step": 5236 }, { "epoch": 0.7339873861247372, "grad_norm": 0.1784622073173523, "learning_rate": 0.00023209280076536711, "loss": 0.0331, "step": 5237 }, { "epoch": 0.7341275402943238, "grad_norm": 0.40873488783836365, "learning_rate": 0.00023207845013154744, "loss": 0.0861, "step": 5238 }, { "epoch": 0.7342676944639103, "grad_norm": 0.45802634954452515, "learning_rate": 0.00023206409949772777, "loss": 0.1016, "step": 5239 }, { "epoch": 0.7344078486334968, "grad_norm": 0.29189273715019226, "learning_rate": 0.00023204974886390815, "loss": 0.0551, "step": 5240 }, { "epoch": 0.7345480028030834, "grad_norm": 0.5538116097450256, "learning_rate": 0.00023203539823008848, "loss": 0.0858, "step": 5241 }, { "epoch": 0.73468815697267, "grad_norm": 1.4417089223861694, "learning_rate": 0.0002320210475962688, "loss": 0.3018, "step": 5242 }, { "epoch": 0.7348283111422564, "grad_norm": 0.3187911808490753, "learning_rate": 0.00023200669696244917, "loss": 0.0513, "step": 5243 }, { "epoch": 0.734968465311843, "grad_norm": 0.6681216359138489, "learning_rate": 0.0002319923463286295, "loss": 0.1883, "step": 5244 }, { "epoch": 0.7351086194814296, "grad_norm": 0.24455589056015015, "learning_rate": 0.00023197799569480982, "loss": 0.0391, "step": 5245 }, { "epoch": 0.7352487736510162, "grad_norm": 0.6566860675811768, "learning_rate": 0.00023196364506099018, "loss": 0.0577, "step": 5246 }, { "epoch": 0.7353889278206026, "grad_norm": 0.9834797382354736, "learning_rate": 0.0002319492944271705, "loss": 0.1566, "step": 5247 }, { "epoch": 0.7355290819901892, "grad_norm": 0.8370198011398315, "learning_rate": 0.00023193494379335084, "loss": 0.061, "step": 5248 }, { "epoch": 0.7356692361597758, "grad_norm": 1.8041143417358398, "learning_rate": 0.00023192059315953117, "loss": 0.1527, "step": 5249 }, { "epoch": 0.7358093903293623, "grad_norm": 3.4857828617095947, "learning_rate": 0.00023190624252571155, "loss": 0.2805, "step": 5250 }, { "epoch": 0.7359495444989489, "grad_norm": 0.11467088758945465, "learning_rate": 0.00023189189189189188, "loss": 0.0156, "step": 5251 }, { "epoch": 0.7360896986685354, "grad_norm": 0.4019100069999695, "learning_rate": 0.0002318775412580722, "loss": 0.0403, "step": 5252 }, { "epoch": 0.7362298528381219, "grad_norm": 0.46338367462158203, "learning_rate": 0.00023186319062425256, "loss": 0.0979, "step": 5253 }, { "epoch": 0.7363700070077085, "grad_norm": 0.26703977584838867, "learning_rate": 0.0002318488399904329, "loss": 0.0542, "step": 5254 }, { "epoch": 0.7365101611772951, "grad_norm": 0.3099108040332794, "learning_rate": 0.00023183448935661322, "loss": 0.0848, "step": 5255 }, { "epoch": 0.7366503153468815, "grad_norm": 0.19513411819934845, "learning_rate": 0.00023182013872279357, "loss": 0.0199, "step": 5256 }, { "epoch": 0.7367904695164681, "grad_norm": 0.7094303965568542, "learning_rate": 0.0002318057880889739, "loss": 0.063, "step": 5257 }, { "epoch": 0.7369306236860547, "grad_norm": 0.3076759874820709, "learning_rate": 0.00023179143745515423, "loss": 0.0928, "step": 5258 }, { "epoch": 0.7370707778556412, "grad_norm": 0.1262865662574768, "learning_rate": 0.00023177708682133461, "loss": 0.0434, "step": 5259 }, { "epoch": 0.7372109320252278, "grad_norm": 0.4564856290817261, "learning_rate": 0.00023176273618751494, "loss": 0.0648, "step": 5260 }, { "epoch": 0.7373510861948143, "grad_norm": 0.42936354875564575, "learning_rate": 0.00023174838555369527, "loss": 0.0749, "step": 5261 }, { "epoch": 0.7374912403644008, "grad_norm": 0.1837492734193802, "learning_rate": 0.00023173403491987563, "loss": 0.0267, "step": 5262 }, { "epoch": 0.7376313945339874, "grad_norm": 0.5880001783370972, "learning_rate": 0.00023171968428605595, "loss": 0.1105, "step": 5263 }, { "epoch": 0.737771548703574, "grad_norm": 0.4614347815513611, "learning_rate": 0.00023170533365223628, "loss": 0.049, "step": 5264 }, { "epoch": 0.7379117028731604, "grad_norm": 0.4977606236934662, "learning_rate": 0.00023169098301841664, "loss": 0.0915, "step": 5265 }, { "epoch": 0.738051857042747, "grad_norm": 0.2489607334136963, "learning_rate": 0.00023167663238459697, "loss": 0.0689, "step": 5266 }, { "epoch": 0.7381920112123336, "grad_norm": 0.35282954573631287, "learning_rate": 0.0002316622817507773, "loss": 0.0656, "step": 5267 }, { "epoch": 0.7383321653819201, "grad_norm": 0.4747740924358368, "learning_rate": 0.00023164793111695762, "loss": 0.1195, "step": 5268 }, { "epoch": 0.7384723195515066, "grad_norm": 0.5125481486320496, "learning_rate": 0.00023163358048313798, "loss": 0.1129, "step": 5269 }, { "epoch": 0.7386124737210932, "grad_norm": 0.3013273775577545, "learning_rate": 0.0002316192298493183, "loss": 0.0854, "step": 5270 }, { "epoch": 0.7387526278906797, "grad_norm": 0.4458228051662445, "learning_rate": 0.00023160487921549864, "loss": 0.0338, "step": 5271 }, { "epoch": 0.7388927820602663, "grad_norm": 0.8556686639785767, "learning_rate": 0.00023159052858167902, "loss": 0.1236, "step": 5272 }, { "epoch": 0.7390329362298529, "grad_norm": 0.5918168425559998, "learning_rate": 0.00023157617794785935, "loss": 0.1524, "step": 5273 }, { "epoch": 0.7391730903994393, "grad_norm": 0.45965802669525146, "learning_rate": 0.00023156182731403968, "loss": 0.1137, "step": 5274 }, { "epoch": 0.7393132445690259, "grad_norm": 0.42896077036857605, "learning_rate": 0.00023154747668022003, "loss": 0.064, "step": 5275 }, { "epoch": 0.7394533987386125, "grad_norm": 0.4948711395263672, "learning_rate": 0.00023153312604640036, "loss": 0.1047, "step": 5276 }, { "epoch": 0.7395935529081991, "grad_norm": 0.2017544060945511, "learning_rate": 0.0002315187754125807, "loss": 0.0255, "step": 5277 }, { "epoch": 0.7397337070777855, "grad_norm": 0.6741991639137268, "learning_rate": 0.00023150442477876105, "loss": 0.0827, "step": 5278 }, { "epoch": 0.7398738612473721, "grad_norm": 0.26342734694480896, "learning_rate": 0.00023149007414494137, "loss": 0.0558, "step": 5279 }, { "epoch": 0.7400140154169587, "grad_norm": 0.288428395986557, "learning_rate": 0.0002314757235111217, "loss": 0.054, "step": 5280 }, { "epoch": 0.7401541695865452, "grad_norm": 0.20877547562122345, "learning_rate": 0.00023146137287730209, "loss": 0.0288, "step": 5281 }, { "epoch": 0.7402943237561318, "grad_norm": 0.48135656118392944, "learning_rate": 0.00023144702224348241, "loss": 0.0815, "step": 5282 }, { "epoch": 0.7404344779257183, "grad_norm": 0.23357640206813812, "learning_rate": 0.00023143267160966274, "loss": 0.1051, "step": 5283 }, { "epoch": 0.7405746320953048, "grad_norm": 0.46737703680992126, "learning_rate": 0.00023141832097584307, "loss": 0.1595, "step": 5284 }, { "epoch": 0.7407147862648914, "grad_norm": 0.13414525985717773, "learning_rate": 0.00023140397034202343, "loss": 0.0157, "step": 5285 }, { "epoch": 0.740854940434478, "grad_norm": 1.2838695049285889, "learning_rate": 0.00023138961970820376, "loss": 0.1122, "step": 5286 }, { "epoch": 0.7409950946040644, "grad_norm": 0.4931929409503937, "learning_rate": 0.00023137526907438408, "loss": 0.1286, "step": 5287 }, { "epoch": 0.741135248773651, "grad_norm": 0.6304449439048767, "learning_rate": 0.00023136091844056444, "loss": 0.1813, "step": 5288 }, { "epoch": 0.7412754029432376, "grad_norm": 0.3236653208732605, "learning_rate": 0.00023134656780674477, "loss": 0.0727, "step": 5289 }, { "epoch": 0.7414155571128241, "grad_norm": 0.41367143392562866, "learning_rate": 0.0002313322171729251, "loss": 0.1643, "step": 5290 }, { "epoch": 0.7415557112824106, "grad_norm": 0.5044955015182495, "learning_rate": 0.00023131786653910548, "loss": 0.0814, "step": 5291 }, { "epoch": 0.7416958654519972, "grad_norm": 0.36461788415908813, "learning_rate": 0.0002313035159052858, "loss": 0.0483, "step": 5292 }, { "epoch": 0.7418360196215837, "grad_norm": 1.1738500595092773, "learning_rate": 0.00023128916527146614, "loss": 0.1703, "step": 5293 }, { "epoch": 0.7419761737911703, "grad_norm": 0.4505654573440552, "learning_rate": 0.0002312748146376465, "loss": 0.041, "step": 5294 }, { "epoch": 0.7421163279607569, "grad_norm": 1.0805988311767578, "learning_rate": 0.00023126046400382682, "loss": 0.145, "step": 5295 }, { "epoch": 0.7422564821303433, "grad_norm": 0.17228636145591736, "learning_rate": 0.00023124611337000715, "loss": 0.0308, "step": 5296 }, { "epoch": 0.7423966362999299, "grad_norm": 0.41310766339302063, "learning_rate": 0.0002312317627361875, "loss": 0.0923, "step": 5297 }, { "epoch": 0.7425367904695165, "grad_norm": 0.2606530785560608, "learning_rate": 0.00023121741210236783, "loss": 0.0339, "step": 5298 }, { "epoch": 0.742676944639103, "grad_norm": 1.6337460279464722, "learning_rate": 0.00023120306146854816, "loss": 0.4906, "step": 5299 }, { "epoch": 0.7428170988086895, "grad_norm": 0.632016122341156, "learning_rate": 0.00023118871083472852, "loss": 0.0742, "step": 5300 }, { "epoch": 0.7429572529782761, "grad_norm": 0.1908397525548935, "learning_rate": 0.00023117436020090885, "loss": 0.0519, "step": 5301 }, { "epoch": 0.7430974071478627, "grad_norm": 0.5935701727867126, "learning_rate": 0.00023116000956708918, "loss": 0.0747, "step": 5302 }, { "epoch": 0.7432375613174492, "grad_norm": 0.17464745044708252, "learning_rate": 0.0002311456589332695, "loss": 0.048, "step": 5303 }, { "epoch": 0.7433777154870357, "grad_norm": 0.3084712028503418, "learning_rate": 0.0002311313082994499, "loss": 0.112, "step": 5304 }, { "epoch": 0.7435178696566223, "grad_norm": 0.19376736879348755, "learning_rate": 0.00023111695766563022, "loss": 0.0314, "step": 5305 }, { "epoch": 0.7436580238262088, "grad_norm": 0.2405315488576889, "learning_rate": 0.00023110260703181054, "loss": 0.0623, "step": 5306 }, { "epoch": 0.7437981779957954, "grad_norm": 0.6986135244369507, "learning_rate": 0.0002310882563979909, "loss": 0.1593, "step": 5307 }, { "epoch": 0.743938332165382, "grad_norm": 0.4332660138607025, "learning_rate": 0.00023107390576417123, "loss": 0.0267, "step": 5308 }, { "epoch": 0.7440784863349684, "grad_norm": 0.14004847407341003, "learning_rate": 0.00023105955513035156, "loss": 0.0421, "step": 5309 }, { "epoch": 0.744218640504555, "grad_norm": 0.4322187304496765, "learning_rate": 0.0002310452044965319, "loss": 0.1039, "step": 5310 }, { "epoch": 0.7443587946741416, "grad_norm": 0.21246327459812164, "learning_rate": 0.00023103085386271224, "loss": 0.0348, "step": 5311 }, { "epoch": 0.7444989488437281, "grad_norm": 0.428611695766449, "learning_rate": 0.00023101650322889257, "loss": 0.0697, "step": 5312 }, { "epoch": 0.7446391030133146, "grad_norm": 0.9361205697059631, "learning_rate": 0.00023100215259507295, "loss": 0.0534, "step": 5313 }, { "epoch": 0.7447792571829012, "grad_norm": 0.2423429787158966, "learning_rate": 0.00023098780196125328, "loss": 0.0695, "step": 5314 }, { "epoch": 0.7449194113524877, "grad_norm": 0.9934574365615845, "learning_rate": 0.0002309734513274336, "loss": 0.1319, "step": 5315 }, { "epoch": 0.7450595655220743, "grad_norm": 0.34104806184768677, "learning_rate": 0.00023095910069361396, "loss": 0.0749, "step": 5316 }, { "epoch": 0.7451997196916609, "grad_norm": 0.430503249168396, "learning_rate": 0.0002309447500597943, "loss": 0.058, "step": 5317 }, { "epoch": 0.7453398738612473, "grad_norm": 0.5390800833702087, "learning_rate": 0.00023093039942597462, "loss": 0.0757, "step": 5318 }, { "epoch": 0.7454800280308339, "grad_norm": 0.23335863649845123, "learning_rate": 0.00023091604879215495, "loss": 0.0495, "step": 5319 }, { "epoch": 0.7456201822004205, "grad_norm": 0.31612691283226013, "learning_rate": 0.0002309016981583353, "loss": 0.0575, "step": 5320 }, { "epoch": 0.745760336370007, "grad_norm": 0.9983062744140625, "learning_rate": 0.00023088734752451563, "loss": 0.0886, "step": 5321 }, { "epoch": 0.7459004905395935, "grad_norm": 1.0260056257247925, "learning_rate": 0.00023087299689069596, "loss": 0.1082, "step": 5322 }, { "epoch": 0.7460406447091801, "grad_norm": 0.3832590579986572, "learning_rate": 0.00023085864625687635, "loss": 0.0932, "step": 5323 }, { "epoch": 0.7461807988787666, "grad_norm": 0.3209840953350067, "learning_rate": 0.00023084429562305667, "loss": 0.0382, "step": 5324 }, { "epoch": 0.7463209530483532, "grad_norm": 0.616166353225708, "learning_rate": 0.000230829944989237, "loss": 0.1083, "step": 5325 }, { "epoch": 0.7464611072179397, "grad_norm": 0.26599881052970886, "learning_rate": 0.00023081559435541736, "loss": 0.0496, "step": 5326 }, { "epoch": 0.7466012613875262, "grad_norm": 0.27324193716049194, "learning_rate": 0.0002308012437215977, "loss": 0.0991, "step": 5327 }, { "epoch": 0.7467414155571128, "grad_norm": 0.4607287347316742, "learning_rate": 0.00023078689308777802, "loss": 0.0606, "step": 5328 }, { "epoch": 0.7468815697266994, "grad_norm": 0.3601208031177521, "learning_rate": 0.00023077254245395837, "loss": 0.0359, "step": 5329 }, { "epoch": 0.747021723896286, "grad_norm": 0.5417634844779968, "learning_rate": 0.0002307581918201387, "loss": 0.1147, "step": 5330 }, { "epoch": 0.7471618780658724, "grad_norm": 0.6748397946357727, "learning_rate": 0.00023074384118631903, "loss": 0.1519, "step": 5331 }, { "epoch": 0.747302032235459, "grad_norm": 0.5206419229507446, "learning_rate": 0.00023072949055249938, "loss": 0.1492, "step": 5332 }, { "epoch": 0.7474421864050456, "grad_norm": 0.29515206813812256, "learning_rate": 0.0002307151399186797, "loss": 0.0408, "step": 5333 }, { "epoch": 0.7475823405746321, "grad_norm": 0.5675029754638672, "learning_rate": 0.00023070078928486004, "loss": 0.0625, "step": 5334 }, { "epoch": 0.7477224947442186, "grad_norm": 0.37335205078125, "learning_rate": 0.00023068643865104042, "loss": 0.0673, "step": 5335 }, { "epoch": 0.7478626489138052, "grad_norm": 0.39389199018478394, "learning_rate": 0.00023067208801722075, "loss": 0.0939, "step": 5336 }, { "epoch": 0.7480028030833917, "grad_norm": 0.536999523639679, "learning_rate": 0.00023065773738340108, "loss": 0.1173, "step": 5337 }, { "epoch": 0.7481429572529783, "grad_norm": 0.23542311787605286, "learning_rate": 0.0002306433867495814, "loss": 0.0511, "step": 5338 }, { "epoch": 0.7482831114225649, "grad_norm": 0.45447593927383423, "learning_rate": 0.00023062903611576177, "loss": 0.1394, "step": 5339 }, { "epoch": 0.7484232655921513, "grad_norm": 0.35767289996147156, "learning_rate": 0.0002306146854819421, "loss": 0.0605, "step": 5340 }, { "epoch": 0.7485634197617379, "grad_norm": 0.6906814575195312, "learning_rate": 0.00023060033484812242, "loss": 0.1471, "step": 5341 }, { "epoch": 0.7487035739313245, "grad_norm": 0.4207620918750763, "learning_rate": 0.00023058598421430278, "loss": 0.0527, "step": 5342 }, { "epoch": 0.748843728100911, "grad_norm": 0.6929364204406738, "learning_rate": 0.0002305716335804831, "loss": 0.1169, "step": 5343 }, { "epoch": 0.7489838822704975, "grad_norm": 0.3982695937156677, "learning_rate": 0.00023055728294666344, "loss": 0.0811, "step": 5344 }, { "epoch": 0.7491240364400841, "grad_norm": 0.3504420518875122, "learning_rate": 0.00023054293231284382, "loss": 0.0756, "step": 5345 }, { "epoch": 0.7492641906096706, "grad_norm": 0.4439573287963867, "learning_rate": 0.00023052858167902415, "loss": 0.0796, "step": 5346 }, { "epoch": 0.7494043447792572, "grad_norm": 0.6658779978752136, "learning_rate": 0.00023051423104520448, "loss": 0.1295, "step": 5347 }, { "epoch": 0.7495444989488437, "grad_norm": 1.8031822443008423, "learning_rate": 0.00023049988041138483, "loss": 0.2205, "step": 5348 }, { "epoch": 0.7496846531184302, "grad_norm": 1.138259768486023, "learning_rate": 0.00023048552977756516, "loss": 0.0508, "step": 5349 }, { "epoch": 0.7498248072880168, "grad_norm": 2.118530511856079, "learning_rate": 0.0002304711791437455, "loss": 0.3368, "step": 5350 }, { "epoch": 0.7499649614576034, "grad_norm": 0.713670551776886, "learning_rate": 0.00023045682850992584, "loss": 0.1128, "step": 5351 }, { "epoch": 0.75010511562719, "grad_norm": 0.5301555395126343, "learning_rate": 0.00023044247787610617, "loss": 0.0871, "step": 5352 }, { "epoch": 0.7502452697967764, "grad_norm": 0.4175166189670563, "learning_rate": 0.0002304281272422865, "loss": 0.0838, "step": 5353 }, { "epoch": 0.750385423966363, "grad_norm": 0.4329306185245514, "learning_rate": 0.00023041377660846683, "loss": 0.1172, "step": 5354 }, { "epoch": 0.7505255781359496, "grad_norm": 0.5823131203651428, "learning_rate": 0.0002303994259746472, "loss": 0.0829, "step": 5355 }, { "epoch": 0.7506657323055361, "grad_norm": 0.43705475330352783, "learning_rate": 0.00023038507534082754, "loss": 0.0978, "step": 5356 }, { "epoch": 0.7508058864751226, "grad_norm": 0.3040735721588135, "learning_rate": 0.00023037072470700787, "loss": 0.0573, "step": 5357 }, { "epoch": 0.7509460406447092, "grad_norm": 0.35153764486312866, "learning_rate": 0.00023035637407318823, "loss": 0.1054, "step": 5358 }, { "epoch": 0.7510861948142957, "grad_norm": 0.3424082398414612, "learning_rate": 0.00023034202343936855, "loss": 0.0516, "step": 5359 }, { "epoch": 0.7512263489838823, "grad_norm": 0.2389257550239563, "learning_rate": 0.00023032767280554888, "loss": 0.065, "step": 5360 }, { "epoch": 0.7513665031534689, "grad_norm": 0.3950711190700531, "learning_rate": 0.00023031332217172924, "loss": 0.1427, "step": 5361 }, { "epoch": 0.7515066573230553, "grad_norm": 0.4938294589519501, "learning_rate": 0.00023029897153790957, "loss": 0.121, "step": 5362 }, { "epoch": 0.7516468114926419, "grad_norm": 0.3829919099807739, "learning_rate": 0.0002302846209040899, "loss": 0.0583, "step": 5363 }, { "epoch": 0.7517869656622285, "grad_norm": 0.3149269223213196, "learning_rate": 0.00023027027027027025, "loss": 0.0519, "step": 5364 }, { "epoch": 0.751927119831815, "grad_norm": 0.5110300183296204, "learning_rate": 0.00023025591963645058, "loss": 0.0546, "step": 5365 }, { "epoch": 0.7520672740014015, "grad_norm": 0.1330219954252243, "learning_rate": 0.0002302415690026309, "loss": 0.0249, "step": 5366 }, { "epoch": 0.7522074281709881, "grad_norm": 0.3704589307308197, "learning_rate": 0.0002302272183688113, "loss": 0.0626, "step": 5367 }, { "epoch": 0.7523475823405746, "grad_norm": 0.3934774398803711, "learning_rate": 0.00023021286773499162, "loss": 0.0594, "step": 5368 }, { "epoch": 0.7524877365101612, "grad_norm": 1.109682321548462, "learning_rate": 0.00023019851710117195, "loss": 0.0687, "step": 5369 }, { "epoch": 0.7526278906797477, "grad_norm": 0.37943321466445923, "learning_rate": 0.0002301841664673523, "loss": 0.0525, "step": 5370 }, { "epoch": 0.7527680448493342, "grad_norm": 1.3197342157363892, "learning_rate": 0.00023016981583353263, "loss": 0.1265, "step": 5371 }, { "epoch": 0.7529081990189208, "grad_norm": 0.31257906556129456, "learning_rate": 0.00023015546519971296, "loss": 0.0496, "step": 5372 }, { "epoch": 0.7530483531885074, "grad_norm": 0.23621530830860138, "learning_rate": 0.0002301411145658933, "loss": 0.0625, "step": 5373 }, { "epoch": 0.753188507358094, "grad_norm": 0.7305300235748291, "learning_rate": 0.00023012676393207364, "loss": 0.0657, "step": 5374 }, { "epoch": 0.7533286615276804, "grad_norm": 0.2700155973434448, "learning_rate": 0.00023011241329825397, "loss": 0.0505, "step": 5375 }, { "epoch": 0.753468815697267, "grad_norm": 0.597258985042572, "learning_rate": 0.0002300980626644343, "loss": 0.1141, "step": 5376 }, { "epoch": 0.7536089698668536, "grad_norm": 0.41933366656303406, "learning_rate": 0.00023008371203061468, "loss": 0.062, "step": 5377 }, { "epoch": 0.7537491240364401, "grad_norm": 0.49484869837760925, "learning_rate": 0.000230069361396795, "loss": 0.0897, "step": 5378 }, { "epoch": 0.7538892782060266, "grad_norm": 0.9962931871414185, "learning_rate": 0.00023005501076297534, "loss": 0.0775, "step": 5379 }, { "epoch": 0.7540294323756132, "grad_norm": 0.40183693170547485, "learning_rate": 0.0002300406601291557, "loss": 0.1059, "step": 5380 }, { "epoch": 0.7541695865451997, "grad_norm": 0.3148702085018158, "learning_rate": 0.00023002630949533603, "loss": 0.0391, "step": 5381 }, { "epoch": 0.7543097407147863, "grad_norm": 0.46502649784088135, "learning_rate": 0.00023001195886151635, "loss": 0.0662, "step": 5382 }, { "epoch": 0.7544498948843729, "grad_norm": 0.44700387120246887, "learning_rate": 0.0002299976082276967, "loss": 0.1109, "step": 5383 }, { "epoch": 0.7545900490539593, "grad_norm": 0.5130743384361267, "learning_rate": 0.00022998325759387704, "loss": 0.1, "step": 5384 }, { "epoch": 0.7547302032235459, "grad_norm": 0.5566855669021606, "learning_rate": 0.00022996890696005737, "loss": 0.1291, "step": 5385 }, { "epoch": 0.7548703573931325, "grad_norm": 0.7589330673217773, "learning_rate": 0.00022995455632623775, "loss": 0.0861, "step": 5386 }, { "epoch": 0.755010511562719, "grad_norm": 0.7593021988868713, "learning_rate": 0.00022994020569241808, "loss": 0.1528, "step": 5387 }, { "epoch": 0.7551506657323055, "grad_norm": 0.2339348942041397, "learning_rate": 0.0002299258550585984, "loss": 0.0634, "step": 5388 }, { "epoch": 0.7552908199018921, "grad_norm": 0.5440716743469238, "learning_rate": 0.00022991150442477874, "loss": 0.0472, "step": 5389 }, { "epoch": 0.7554309740714786, "grad_norm": 1.0329618453979492, "learning_rate": 0.0002298971537909591, "loss": 0.1092, "step": 5390 }, { "epoch": 0.7555711282410652, "grad_norm": 0.4245467483997345, "learning_rate": 0.00022988280315713942, "loss": 0.0733, "step": 5391 }, { "epoch": 0.7557112824106517, "grad_norm": 0.5720781683921814, "learning_rate": 0.00022986845252331975, "loss": 0.1889, "step": 5392 }, { "epoch": 0.7558514365802382, "grad_norm": 0.22144603729248047, "learning_rate": 0.0002298541018895001, "loss": 0.0294, "step": 5393 }, { "epoch": 0.7559915907498248, "grad_norm": 2.2244887351989746, "learning_rate": 0.00022983975125568043, "loss": 0.1697, "step": 5394 }, { "epoch": 0.7561317449194114, "grad_norm": 0.7043256759643555, "learning_rate": 0.00022982540062186076, "loss": 0.0496, "step": 5395 }, { "epoch": 0.756271899088998, "grad_norm": 1.7605684995651245, "learning_rate": 0.00022981104998804112, "loss": 0.0966, "step": 5396 }, { "epoch": 0.7564120532585844, "grad_norm": 1.0889674425125122, "learning_rate": 0.00022979669935422145, "loss": 0.0912, "step": 5397 }, { "epoch": 0.756552207428171, "grad_norm": 2.1754627227783203, "learning_rate": 0.00022978234872040177, "loss": 0.2401, "step": 5398 }, { "epoch": 0.7566923615977575, "grad_norm": 1.9431825876235962, "learning_rate": 0.00022976799808658216, "loss": 0.2546, "step": 5399 }, { "epoch": 0.7568325157673441, "grad_norm": 3.2252488136291504, "learning_rate": 0.00022975364745276249, "loss": 0.2302, "step": 5400 }, { "epoch": 0.7569726699369306, "grad_norm": 0.22456753253936768, "learning_rate": 0.00022973929681894281, "loss": 0.0507, "step": 5401 }, { "epoch": 0.7571128241065171, "grad_norm": 0.2638319432735443, "learning_rate": 0.00022972494618512317, "loss": 0.0514, "step": 5402 }, { "epoch": 0.7572529782761037, "grad_norm": 0.5572202801704407, "learning_rate": 0.0002297105955513035, "loss": 0.0708, "step": 5403 }, { "epoch": 0.7573931324456903, "grad_norm": 0.18339067697525024, "learning_rate": 0.00022969624491748383, "loss": 0.0291, "step": 5404 }, { "epoch": 0.7575332866152769, "grad_norm": 0.3518141508102417, "learning_rate": 0.00022968189428366416, "loss": 0.067, "step": 5405 }, { "epoch": 0.7576734407848633, "grad_norm": 0.34705042839050293, "learning_rate": 0.0002296675436498445, "loss": 0.0908, "step": 5406 }, { "epoch": 0.7578135949544499, "grad_norm": 0.45325136184692383, "learning_rate": 0.00022965319301602484, "loss": 0.1322, "step": 5407 }, { "epoch": 0.7579537491240365, "grad_norm": 0.7047853469848633, "learning_rate": 0.00022963884238220517, "loss": 0.1176, "step": 5408 }, { "epoch": 0.758093903293623, "grad_norm": 0.33671072125434875, "learning_rate": 0.00022962449174838555, "loss": 0.1151, "step": 5409 }, { "epoch": 0.7582340574632095, "grad_norm": 0.42787134647369385, "learning_rate": 0.00022961014111456588, "loss": 0.0534, "step": 5410 }, { "epoch": 0.7583742116327961, "grad_norm": 0.44981077313423157, "learning_rate": 0.0002295957904807462, "loss": 0.0874, "step": 5411 }, { "epoch": 0.7585143658023826, "grad_norm": 0.41092604398727417, "learning_rate": 0.00022958143984692656, "loss": 0.0944, "step": 5412 }, { "epoch": 0.7586545199719692, "grad_norm": 0.2224346250295639, "learning_rate": 0.0002295670892131069, "loss": 0.0549, "step": 5413 }, { "epoch": 0.7587946741415557, "grad_norm": 0.37301105260849, "learning_rate": 0.00022955273857928722, "loss": 0.0744, "step": 5414 }, { "epoch": 0.7589348283111422, "grad_norm": 0.36424317955970764, "learning_rate": 0.00022953838794546758, "loss": 0.087, "step": 5415 }, { "epoch": 0.7590749824807288, "grad_norm": 0.45953139662742615, "learning_rate": 0.0002295240373116479, "loss": 0.118, "step": 5416 }, { "epoch": 0.7592151366503154, "grad_norm": 0.19470801949501038, "learning_rate": 0.00022950968667782823, "loss": 0.041, "step": 5417 }, { "epoch": 0.7593552908199019, "grad_norm": 0.2255251556634903, "learning_rate": 0.00022949533604400862, "loss": 0.0513, "step": 5418 }, { "epoch": 0.7594954449894884, "grad_norm": 0.3890061676502228, "learning_rate": 0.00022948098541018895, "loss": 0.1079, "step": 5419 }, { "epoch": 0.759635599159075, "grad_norm": 0.6219236850738525, "learning_rate": 0.00022946663477636927, "loss": 0.0885, "step": 5420 }, { "epoch": 0.7597757533286615, "grad_norm": 0.5062904953956604, "learning_rate": 0.00022945228414254963, "loss": 0.0696, "step": 5421 }, { "epoch": 0.7599159074982481, "grad_norm": 0.5156422853469849, "learning_rate": 0.00022943793350872996, "loss": 0.0962, "step": 5422 }, { "epoch": 0.7600560616678346, "grad_norm": 0.2950113117694855, "learning_rate": 0.00022942358287491029, "loss": 0.028, "step": 5423 }, { "epoch": 0.7601962158374211, "grad_norm": 0.503536581993103, "learning_rate": 0.00022940923224109061, "loss": 0.1453, "step": 5424 }, { "epoch": 0.7603363700070077, "grad_norm": 0.3930160105228424, "learning_rate": 0.00022939488160727097, "loss": 0.0938, "step": 5425 }, { "epoch": 0.7604765241765943, "grad_norm": 0.33009910583496094, "learning_rate": 0.0002293805309734513, "loss": 0.0731, "step": 5426 }, { "epoch": 0.7606166783461809, "grad_norm": 0.34729063510894775, "learning_rate": 0.00022936618033963163, "loss": 0.0412, "step": 5427 }, { "epoch": 0.7607568325157673, "grad_norm": 0.6240994334220886, "learning_rate": 0.00022935182970581198, "loss": 0.0817, "step": 5428 }, { "epoch": 0.7608969866853539, "grad_norm": 0.25242751836776733, "learning_rate": 0.0002293374790719923, "loss": 0.0788, "step": 5429 }, { "epoch": 0.7610371408549405, "grad_norm": 0.26445019245147705, "learning_rate": 0.00022932312843817267, "loss": 0.0651, "step": 5430 }, { "epoch": 0.761177295024527, "grad_norm": 0.6258429884910583, "learning_rate": 0.00022930877780435302, "loss": 0.0684, "step": 5431 }, { "epoch": 0.7613174491941135, "grad_norm": 0.24849849939346313, "learning_rate": 0.00022929442717053335, "loss": 0.0277, "step": 5432 }, { "epoch": 0.7614576033637, "grad_norm": 0.7237057685852051, "learning_rate": 0.00022928007653671368, "loss": 0.0614, "step": 5433 }, { "epoch": 0.7615977575332866, "grad_norm": 0.7451324462890625, "learning_rate": 0.00022926572590289404, "loss": 0.1271, "step": 5434 }, { "epoch": 0.7617379117028732, "grad_norm": 0.7456595301628113, "learning_rate": 0.00022925137526907436, "loss": 0.1131, "step": 5435 }, { "epoch": 0.7618780658724597, "grad_norm": 0.844342827796936, "learning_rate": 0.0002292370246352547, "loss": 0.0393, "step": 5436 }, { "epoch": 0.7620182200420462, "grad_norm": 0.3638335168361664, "learning_rate": 0.00022922267400143505, "loss": 0.0494, "step": 5437 }, { "epoch": 0.7621583742116328, "grad_norm": 0.944242000579834, "learning_rate": 0.00022920832336761538, "loss": 0.1483, "step": 5438 }, { "epoch": 0.7622985283812194, "grad_norm": 0.68157958984375, "learning_rate": 0.0002291939727337957, "loss": 0.0743, "step": 5439 }, { "epoch": 0.7624386825508059, "grad_norm": 3.959632635116577, "learning_rate": 0.00022917962209997603, "loss": 0.1273, "step": 5440 }, { "epoch": 0.7625788367203924, "grad_norm": 0.7042611241340637, "learning_rate": 0.00022916527146615642, "loss": 0.0701, "step": 5441 }, { "epoch": 0.762718990889979, "grad_norm": 0.45904669165611267, "learning_rate": 0.00022915092083233675, "loss": 0.1252, "step": 5442 }, { "epoch": 0.7628591450595655, "grad_norm": 0.4239557683467865, "learning_rate": 0.00022913657019851707, "loss": 0.0293, "step": 5443 }, { "epoch": 0.7629992992291521, "grad_norm": 0.7637688517570496, "learning_rate": 0.00022912221956469743, "loss": 0.0882, "step": 5444 }, { "epoch": 0.7631394533987386, "grad_norm": 0.35936346650123596, "learning_rate": 0.00022910786893087776, "loss": 0.0491, "step": 5445 }, { "epoch": 0.7632796075683251, "grad_norm": 0.38990676403045654, "learning_rate": 0.0002290935182970581, "loss": 0.0825, "step": 5446 }, { "epoch": 0.7634197617379117, "grad_norm": 0.8177030682563782, "learning_rate": 0.00022907916766323844, "loss": 0.323, "step": 5447 }, { "epoch": 0.7635599159074983, "grad_norm": 0.8256582617759705, "learning_rate": 0.00022906481702941877, "loss": 0.0806, "step": 5448 }, { "epoch": 0.7637000700770848, "grad_norm": 0.9747647047042847, "learning_rate": 0.0002290504663955991, "loss": 0.2827, "step": 5449 }, { "epoch": 0.7638402242466713, "grad_norm": 0.8744803667068481, "learning_rate": 0.00022903611576177948, "loss": 0.0365, "step": 5450 }, { "epoch": 0.7639803784162579, "grad_norm": 0.41220322251319885, "learning_rate": 0.0002290217651279598, "loss": 0.164, "step": 5451 }, { "epoch": 0.7641205325858444, "grad_norm": 0.4641821086406708, "learning_rate": 0.00022900741449414014, "loss": 0.0726, "step": 5452 }, { "epoch": 0.764260686755431, "grad_norm": 0.24509745836257935, "learning_rate": 0.0002289930638603205, "loss": 0.0591, "step": 5453 }, { "epoch": 0.7644008409250175, "grad_norm": 0.7141879200935364, "learning_rate": 0.00022897871322650082, "loss": 0.2115, "step": 5454 }, { "epoch": 0.764540995094604, "grad_norm": 0.22111722826957703, "learning_rate": 0.00022896436259268115, "loss": 0.0713, "step": 5455 }, { "epoch": 0.7646811492641906, "grad_norm": 0.488768607378006, "learning_rate": 0.0002289500119588615, "loss": 0.1028, "step": 5456 }, { "epoch": 0.7648213034337772, "grad_norm": 0.4556877017021179, "learning_rate": 0.00022893566132504184, "loss": 0.1679, "step": 5457 }, { "epoch": 0.7649614576033636, "grad_norm": 0.37825000286102295, "learning_rate": 0.00022892131069122217, "loss": 0.0938, "step": 5458 }, { "epoch": 0.7651016117729502, "grad_norm": 0.3884667456150055, "learning_rate": 0.0002289069600574025, "loss": 0.1208, "step": 5459 }, { "epoch": 0.7652417659425368, "grad_norm": 0.30482277274131775, "learning_rate": 0.00022889260942358285, "loss": 0.1089, "step": 5460 }, { "epoch": 0.7653819201121234, "grad_norm": 0.27011653780937195, "learning_rate": 0.00022887825878976318, "loss": 0.0664, "step": 5461 }, { "epoch": 0.7655220742817099, "grad_norm": 0.30397647619247437, "learning_rate": 0.00022886390815594353, "loss": 0.1149, "step": 5462 }, { "epoch": 0.7656622284512964, "grad_norm": 0.26909390091896057, "learning_rate": 0.0002288495575221239, "loss": 0.0796, "step": 5463 }, { "epoch": 0.765802382620883, "grad_norm": 0.4857081174850464, "learning_rate": 0.00022883520688830422, "loss": 0.1152, "step": 5464 }, { "epoch": 0.7659425367904695, "grad_norm": 0.2117944061756134, "learning_rate": 0.00022882085625448455, "loss": 0.0783, "step": 5465 }, { "epoch": 0.7660826909600561, "grad_norm": 0.47209733724594116, "learning_rate": 0.0002288065056206649, "loss": 0.1347, "step": 5466 }, { "epoch": 0.7662228451296426, "grad_norm": 0.350393146276474, "learning_rate": 0.00022879215498684523, "loss": 0.1044, "step": 5467 }, { "epoch": 0.7663629992992291, "grad_norm": 0.42804351449012756, "learning_rate": 0.00022877780435302556, "loss": 0.1, "step": 5468 }, { "epoch": 0.7665031534688157, "grad_norm": 0.6100720763206482, "learning_rate": 0.00022876345371920592, "loss": 0.144, "step": 5469 }, { "epoch": 0.7666433076384023, "grad_norm": 0.3709159791469574, "learning_rate": 0.00022874910308538624, "loss": 0.0533, "step": 5470 }, { "epoch": 0.7667834618079888, "grad_norm": 0.22639378905296326, "learning_rate": 0.00022873475245156657, "loss": 0.0427, "step": 5471 }, { "epoch": 0.7669236159775753, "grad_norm": 0.733083188533783, "learning_rate": 0.00022872040181774696, "loss": 0.0775, "step": 5472 }, { "epoch": 0.7670637701471619, "grad_norm": 0.40559929609298706, "learning_rate": 0.00022870605118392728, "loss": 0.056, "step": 5473 }, { "epoch": 0.7672039243167484, "grad_norm": 0.2793610990047455, "learning_rate": 0.0002286917005501076, "loss": 0.0879, "step": 5474 }, { "epoch": 0.767344078486335, "grad_norm": 0.42612722516059875, "learning_rate": 0.00022867734991628794, "loss": 0.0729, "step": 5475 }, { "epoch": 0.7674842326559215, "grad_norm": 0.3715907633304596, "learning_rate": 0.0002286629992824683, "loss": 0.0926, "step": 5476 }, { "epoch": 0.767624386825508, "grad_norm": 0.2092791050672531, "learning_rate": 0.00022864864864864862, "loss": 0.0438, "step": 5477 }, { "epoch": 0.7677645409950946, "grad_norm": 0.7470300197601318, "learning_rate": 0.00022863429801482895, "loss": 0.0986, "step": 5478 }, { "epoch": 0.7679046951646812, "grad_norm": 0.3440161347389221, "learning_rate": 0.0002286199473810093, "loss": 0.0833, "step": 5479 }, { "epoch": 0.7680448493342676, "grad_norm": 0.40457969903945923, "learning_rate": 0.00022860559674718964, "loss": 0.0823, "step": 5480 }, { "epoch": 0.7681850035038542, "grad_norm": 0.5405162572860718, "learning_rate": 0.00022859124611336997, "loss": 0.1184, "step": 5481 }, { "epoch": 0.7683251576734408, "grad_norm": 0.5273644328117371, "learning_rate": 0.00022857689547955035, "loss": 0.0734, "step": 5482 }, { "epoch": 0.7684653118430274, "grad_norm": 0.20312072336673737, "learning_rate": 0.00022856254484573068, "loss": 0.0216, "step": 5483 }, { "epoch": 0.7686054660126139, "grad_norm": 0.3540211319923401, "learning_rate": 0.000228548194211911, "loss": 0.0378, "step": 5484 }, { "epoch": 0.7687456201822004, "grad_norm": 0.18328943848609924, "learning_rate": 0.00022853384357809136, "loss": 0.0338, "step": 5485 }, { "epoch": 0.768885774351787, "grad_norm": 0.47405415773391724, "learning_rate": 0.0002285194929442717, "loss": 0.0738, "step": 5486 }, { "epoch": 0.7690259285213735, "grad_norm": 0.23524805903434753, "learning_rate": 0.00022850514231045202, "loss": 0.0356, "step": 5487 }, { "epoch": 0.7691660826909601, "grad_norm": 1.4024724960327148, "learning_rate": 0.00022849079167663237, "loss": 0.1725, "step": 5488 }, { "epoch": 0.7693062368605466, "grad_norm": 0.9750211834907532, "learning_rate": 0.0002284764410428127, "loss": 0.1854, "step": 5489 }, { "epoch": 0.7694463910301331, "grad_norm": 0.40581846237182617, "learning_rate": 0.00022846209040899303, "loss": 0.0844, "step": 5490 }, { "epoch": 0.7695865451997197, "grad_norm": 0.8945640921592712, "learning_rate": 0.0002284477397751734, "loss": 0.0576, "step": 5491 }, { "epoch": 0.7697266993693063, "grad_norm": 0.4466705024242401, "learning_rate": 0.00022843338914135372, "loss": 0.0445, "step": 5492 }, { "epoch": 0.7698668535388928, "grad_norm": 0.17964980006217957, "learning_rate": 0.00022841903850753404, "loss": 0.0286, "step": 5493 }, { "epoch": 0.7700070077084793, "grad_norm": 0.6608170866966248, "learning_rate": 0.0002284046878737144, "loss": 0.0531, "step": 5494 }, { "epoch": 0.7701471618780659, "grad_norm": 0.42862468957901, "learning_rate": 0.00022839033723989476, "loss": 0.064, "step": 5495 }, { "epoch": 0.7702873160476524, "grad_norm": 1.3754823207855225, "learning_rate": 0.00022837598660607508, "loss": 0.2525, "step": 5496 }, { "epoch": 0.770427470217239, "grad_norm": 0.5401486754417419, "learning_rate": 0.0002283616359722554, "loss": 0.087, "step": 5497 }, { "epoch": 0.7705676243868255, "grad_norm": 1.3065012693405151, "learning_rate": 0.00022834728533843577, "loss": 0.192, "step": 5498 }, { "epoch": 0.770707778556412, "grad_norm": 1.142110824584961, "learning_rate": 0.0002283329347046161, "loss": 0.1133, "step": 5499 }, { "epoch": 0.7708479327259986, "grad_norm": 0.9058482050895691, "learning_rate": 0.00022831858407079643, "loss": 0.1584, "step": 5500 }, { "epoch": 0.7709880868955852, "grad_norm": 0.6781784892082214, "learning_rate": 0.00022830423343697678, "loss": 0.1607, "step": 5501 }, { "epoch": 0.7711282410651716, "grad_norm": 0.3875426650047302, "learning_rate": 0.0002282898828031571, "loss": 0.0908, "step": 5502 }, { "epoch": 0.7712683952347582, "grad_norm": 0.24912330508232117, "learning_rate": 0.00022827553216933744, "loss": 0.0497, "step": 5503 }, { "epoch": 0.7714085494043448, "grad_norm": 0.44237616658210754, "learning_rate": 0.00022826118153551782, "loss": 0.0606, "step": 5504 }, { "epoch": 0.7715487035739313, "grad_norm": 0.36543911695480347, "learning_rate": 0.00022824683090169815, "loss": 0.0768, "step": 5505 }, { "epoch": 0.7716888577435179, "grad_norm": 0.5312711596488953, "learning_rate": 0.00022823248026787848, "loss": 0.0506, "step": 5506 }, { "epoch": 0.7718290119131044, "grad_norm": 0.28648728132247925, "learning_rate": 0.00022821812963405883, "loss": 0.0689, "step": 5507 }, { "epoch": 0.771969166082691, "grad_norm": 0.6324368715286255, "learning_rate": 0.00022820377900023916, "loss": 0.1072, "step": 5508 }, { "epoch": 0.7721093202522775, "grad_norm": 0.5460099577903748, "learning_rate": 0.0002281894283664195, "loss": 0.0702, "step": 5509 }, { "epoch": 0.7722494744218641, "grad_norm": 0.9754613041877747, "learning_rate": 0.00022817507773259982, "loss": 0.0719, "step": 5510 }, { "epoch": 0.7723896285914505, "grad_norm": 0.4745437800884247, "learning_rate": 0.00022816072709878018, "loss": 0.0959, "step": 5511 }, { "epoch": 0.7725297827610371, "grad_norm": 0.39778056740760803, "learning_rate": 0.0002281463764649605, "loss": 0.055, "step": 5512 }, { "epoch": 0.7726699369306237, "grad_norm": 0.4247722029685974, "learning_rate": 0.00022813202583114083, "loss": 0.1013, "step": 5513 }, { "epoch": 0.7728100911002103, "grad_norm": 0.6100055575370789, "learning_rate": 0.00022811767519732122, "loss": 0.0998, "step": 5514 }, { "epoch": 0.7729502452697968, "grad_norm": 0.3268587291240692, "learning_rate": 0.00022810332456350154, "loss": 0.1111, "step": 5515 }, { "epoch": 0.7730903994393833, "grad_norm": 0.33326730132102966, "learning_rate": 0.00022808897392968187, "loss": 0.0685, "step": 5516 }, { "epoch": 0.7732305536089699, "grad_norm": 0.648952305316925, "learning_rate": 0.00022807462329586223, "loss": 0.1035, "step": 5517 }, { "epoch": 0.7733707077785564, "grad_norm": 0.3474395275115967, "learning_rate": 0.00022806027266204256, "loss": 0.1074, "step": 5518 }, { "epoch": 0.773510861948143, "grad_norm": 0.35976314544677734, "learning_rate": 0.00022804592202822289, "loss": 0.1051, "step": 5519 }, { "epoch": 0.7736510161177295, "grad_norm": 0.5398932099342346, "learning_rate": 0.00022803157139440324, "loss": 0.074, "step": 5520 }, { "epoch": 0.773791170287316, "grad_norm": 0.6685708165168762, "learning_rate": 0.00022801722076058357, "loss": 0.125, "step": 5521 }, { "epoch": 0.7739313244569026, "grad_norm": 0.7419278621673584, "learning_rate": 0.0002280028701267639, "loss": 0.1251, "step": 5522 }, { "epoch": 0.7740714786264892, "grad_norm": 0.20199072360992432, "learning_rate": 0.00022798851949294425, "loss": 0.0231, "step": 5523 }, { "epoch": 0.7742116327960756, "grad_norm": 0.5185670852661133, "learning_rate": 0.00022797416885912458, "loss": 0.1321, "step": 5524 }, { "epoch": 0.7743517869656622, "grad_norm": 0.17992335557937622, "learning_rate": 0.0002279598182253049, "loss": 0.0463, "step": 5525 }, { "epoch": 0.7744919411352488, "grad_norm": 0.705573558807373, "learning_rate": 0.0002279454675914853, "loss": 0.0426, "step": 5526 }, { "epoch": 0.7746320953048353, "grad_norm": 0.7857452630996704, "learning_rate": 0.00022793111695766562, "loss": 0.0827, "step": 5527 }, { "epoch": 0.7747722494744219, "grad_norm": 0.5969287157058716, "learning_rate": 0.00022791676632384595, "loss": 0.0575, "step": 5528 }, { "epoch": 0.7749124036440084, "grad_norm": 0.18924486637115479, "learning_rate": 0.00022790241569002628, "loss": 0.038, "step": 5529 }, { "epoch": 0.775052557813595, "grad_norm": 0.4760206937789917, "learning_rate": 0.00022788806505620663, "loss": 0.1088, "step": 5530 }, { "epoch": 0.7751927119831815, "grad_norm": 0.518638551235199, "learning_rate": 0.00022787371442238696, "loss": 0.0353, "step": 5531 }, { "epoch": 0.7753328661527681, "grad_norm": 0.3066500425338745, "learning_rate": 0.0002278593637885673, "loss": 0.0466, "step": 5532 }, { "epoch": 0.7754730203223545, "grad_norm": 0.2970122992992401, "learning_rate": 0.00022784501315474765, "loss": 0.0913, "step": 5533 }, { "epoch": 0.7756131744919411, "grad_norm": 0.4344521760940552, "learning_rate": 0.00022783066252092798, "loss": 0.1538, "step": 5534 }, { "epoch": 0.7757533286615277, "grad_norm": 0.4884020686149597, "learning_rate": 0.0002278163118871083, "loss": 0.1396, "step": 5535 }, { "epoch": 0.7758934828311143, "grad_norm": 0.38114333152770996, "learning_rate": 0.0002278019612532887, "loss": 0.1576, "step": 5536 }, { "epoch": 0.7760336370007007, "grad_norm": 0.49452364444732666, "learning_rate": 0.00022778761061946902, "loss": 0.1974, "step": 5537 }, { "epoch": 0.7761737911702873, "grad_norm": 0.4518037438392639, "learning_rate": 0.00022777325998564934, "loss": 0.0757, "step": 5538 }, { "epoch": 0.7763139453398739, "grad_norm": 0.4614986181259155, "learning_rate": 0.0002277589093518297, "loss": 0.0693, "step": 5539 }, { "epoch": 0.7764540995094604, "grad_norm": 0.5054397583007812, "learning_rate": 0.00022774455871801003, "loss": 0.2056, "step": 5540 }, { "epoch": 0.776594253679047, "grad_norm": 0.4532138705253601, "learning_rate": 0.00022773020808419036, "loss": 0.164, "step": 5541 }, { "epoch": 0.7767344078486335, "grad_norm": 0.34185174107551575, "learning_rate": 0.0002277158574503707, "loss": 0.0416, "step": 5542 }, { "epoch": 0.77687456201822, "grad_norm": 0.4061909317970276, "learning_rate": 0.00022770150681655104, "loss": 0.0812, "step": 5543 }, { "epoch": 0.7770147161878066, "grad_norm": 1.1434621810913086, "learning_rate": 0.00022768715618273137, "loss": 0.0949, "step": 5544 }, { "epoch": 0.7771548703573932, "grad_norm": 0.2393876314163208, "learning_rate": 0.0002276728055489117, "loss": 0.0683, "step": 5545 }, { "epoch": 0.7772950245269796, "grad_norm": 0.5975365042686462, "learning_rate": 0.00022765845491509208, "loss": 0.205, "step": 5546 }, { "epoch": 0.7774351786965662, "grad_norm": 0.5446653366088867, "learning_rate": 0.0002276441042812724, "loss": 0.0645, "step": 5547 }, { "epoch": 0.7775753328661528, "grad_norm": 4.504129886627197, "learning_rate": 0.00022762975364745274, "loss": 0.3726, "step": 5548 }, { "epoch": 0.7777154870357393, "grad_norm": 3.6292026042938232, "learning_rate": 0.0002276154030136331, "loss": 0.7406, "step": 5549 }, { "epoch": 0.7778556412053259, "grad_norm": 1.1210706233978271, "learning_rate": 0.00022760105237981342, "loss": 0.1189, "step": 5550 }, { "epoch": 0.7779957953749124, "grad_norm": 0.18803133070468903, "learning_rate": 0.00022758670174599375, "loss": 0.0493, "step": 5551 }, { "epoch": 0.7781359495444989, "grad_norm": 0.4657420217990875, "learning_rate": 0.0002275723511121741, "loss": 0.1338, "step": 5552 }, { "epoch": 0.7782761037140855, "grad_norm": 0.2918739318847656, "learning_rate": 0.00022755800047835444, "loss": 0.0375, "step": 5553 }, { "epoch": 0.7784162578836721, "grad_norm": 0.49253174662590027, "learning_rate": 0.00022754364984453476, "loss": 0.0814, "step": 5554 }, { "epoch": 0.7785564120532585, "grad_norm": 0.5060675740242004, "learning_rate": 0.00022752929921071512, "loss": 0.0947, "step": 5555 }, { "epoch": 0.7786965662228451, "grad_norm": 0.3380374014377594, "learning_rate": 0.00022751494857689545, "loss": 0.0475, "step": 5556 }, { "epoch": 0.7788367203924317, "grad_norm": 0.35931336879730225, "learning_rate": 0.0002275005979430758, "loss": 0.1079, "step": 5557 }, { "epoch": 0.7789768745620183, "grad_norm": 0.3204613924026489, "learning_rate": 0.00022748624730925616, "loss": 0.1118, "step": 5558 }, { "epoch": 0.7791170287316047, "grad_norm": 0.42459946870803833, "learning_rate": 0.0002274718966754365, "loss": 0.0752, "step": 5559 }, { "epoch": 0.7792571829011913, "grad_norm": 0.5983796119689941, "learning_rate": 0.00022745754604161682, "loss": 0.0808, "step": 5560 }, { "epoch": 0.7793973370707779, "grad_norm": 0.6460114121437073, "learning_rate": 0.00022744319540779717, "loss": 0.1252, "step": 5561 }, { "epoch": 0.7795374912403644, "grad_norm": 0.39227792620658875, "learning_rate": 0.0002274288447739775, "loss": 0.1058, "step": 5562 }, { "epoch": 0.779677645409951, "grad_norm": 0.2218901365995407, "learning_rate": 0.00022741449414015783, "loss": 0.0308, "step": 5563 }, { "epoch": 0.7798177995795375, "grad_norm": 0.27046000957489014, "learning_rate": 0.00022740014350633816, "loss": 0.0665, "step": 5564 }, { "epoch": 0.779957953749124, "grad_norm": 0.41917768120765686, "learning_rate": 0.00022738579287251851, "loss": 0.1094, "step": 5565 }, { "epoch": 0.7800981079187106, "grad_norm": 0.3697170317173004, "learning_rate": 0.00022737144223869884, "loss": 0.0818, "step": 5566 }, { "epoch": 0.7802382620882972, "grad_norm": 0.37233036756515503, "learning_rate": 0.00022735709160487917, "loss": 0.02, "step": 5567 }, { "epoch": 0.7803784162578836, "grad_norm": 0.1780182421207428, "learning_rate": 0.00022734274097105955, "loss": 0.0445, "step": 5568 }, { "epoch": 0.7805185704274702, "grad_norm": 0.35164687037467957, "learning_rate": 0.00022732839033723988, "loss": 0.0635, "step": 5569 }, { "epoch": 0.7806587245970568, "grad_norm": 0.24476350843906403, "learning_rate": 0.0002273140397034202, "loss": 0.065, "step": 5570 }, { "epoch": 0.7807988787666433, "grad_norm": 0.39904242753982544, "learning_rate": 0.00022729968906960057, "loss": 0.0792, "step": 5571 }, { "epoch": 0.7809390329362299, "grad_norm": 0.41253888607025146, "learning_rate": 0.0002272853384357809, "loss": 0.1132, "step": 5572 }, { "epoch": 0.7810791871058164, "grad_norm": 0.3747071921825409, "learning_rate": 0.00022727098780196122, "loss": 0.0721, "step": 5573 }, { "epoch": 0.7812193412754029, "grad_norm": 0.3455543518066406, "learning_rate": 0.00022725663716814158, "loss": 0.1271, "step": 5574 }, { "epoch": 0.7813594954449895, "grad_norm": 0.449190229177475, "learning_rate": 0.0002272422865343219, "loss": 0.0698, "step": 5575 }, { "epoch": 0.7814996496145761, "grad_norm": 0.3513028919696808, "learning_rate": 0.00022722793590050224, "loss": 0.1134, "step": 5576 }, { "epoch": 0.7816398037841625, "grad_norm": 0.3159481883049011, "learning_rate": 0.00022721358526668262, "loss": 0.0691, "step": 5577 }, { "epoch": 0.7817799579537491, "grad_norm": 0.49728885293006897, "learning_rate": 0.00022719923463286295, "loss": 0.2723, "step": 5578 }, { "epoch": 0.7819201121233357, "grad_norm": 0.570715069770813, "learning_rate": 0.00022718488399904328, "loss": 0.1158, "step": 5579 }, { "epoch": 0.7820602662929222, "grad_norm": 0.36925241351127625, "learning_rate": 0.0002271705333652236, "loss": 0.1393, "step": 5580 }, { "epoch": 0.7822004204625087, "grad_norm": 0.42141807079315186, "learning_rate": 0.00022715618273140396, "loss": 0.1219, "step": 5581 }, { "epoch": 0.7823405746320953, "grad_norm": 0.40359222888946533, "learning_rate": 0.0002271418320975843, "loss": 0.1132, "step": 5582 }, { "epoch": 0.7824807288016818, "grad_norm": 0.21122585237026215, "learning_rate": 0.00022712748146376462, "loss": 0.0418, "step": 5583 }, { "epoch": 0.7826208829712684, "grad_norm": 0.4212229549884796, "learning_rate": 0.00022711313082994497, "loss": 0.0647, "step": 5584 }, { "epoch": 0.782761037140855, "grad_norm": 0.20950578153133392, "learning_rate": 0.0002270987801961253, "loss": 0.0249, "step": 5585 }, { "epoch": 0.7829011913104414, "grad_norm": 0.23099462687969208, "learning_rate": 0.00022708442956230563, "loss": 0.0597, "step": 5586 }, { "epoch": 0.783041345480028, "grad_norm": 0.4331852197647095, "learning_rate": 0.00022707007892848599, "loss": 0.1412, "step": 5587 }, { "epoch": 0.7831814996496146, "grad_norm": 0.6406581401824951, "learning_rate": 0.00022705572829466631, "loss": 0.1319, "step": 5588 }, { "epoch": 0.7833216538192012, "grad_norm": 0.6134215593338013, "learning_rate": 0.00022704137766084667, "loss": 0.1797, "step": 5589 }, { "epoch": 0.7834618079887876, "grad_norm": 0.3799291253089905, "learning_rate": 0.00022702702702702703, "loss": 0.0395, "step": 5590 }, { "epoch": 0.7836019621583742, "grad_norm": 0.3801416754722595, "learning_rate": 0.00022701267639320735, "loss": 0.0646, "step": 5591 }, { "epoch": 0.7837421163279608, "grad_norm": 0.8118061423301697, "learning_rate": 0.00022699832575938768, "loss": 0.317, "step": 5592 }, { "epoch": 0.7838822704975473, "grad_norm": 0.6364262700080872, "learning_rate": 0.00022698397512556804, "loss": 0.0468, "step": 5593 }, { "epoch": 0.7840224246671339, "grad_norm": 0.4522698223590851, "learning_rate": 0.00022696962449174837, "loss": 0.0702, "step": 5594 }, { "epoch": 0.7841625788367204, "grad_norm": 0.7264848351478577, "learning_rate": 0.0002269552738579287, "loss": 0.2152, "step": 5595 }, { "epoch": 0.7843027330063069, "grad_norm": 0.39555561542510986, "learning_rate": 0.00022694092322410905, "loss": 0.0501, "step": 5596 }, { "epoch": 0.7844428871758935, "grad_norm": 0.8773069381713867, "learning_rate": 0.00022692657259028938, "loss": 0.1856, "step": 5597 }, { "epoch": 0.7845830413454801, "grad_norm": 1.3970768451690674, "learning_rate": 0.0002269122219564697, "loss": 0.0983, "step": 5598 }, { "epoch": 0.7847231955150665, "grad_norm": 1.2268863916397095, "learning_rate": 0.00022689787132265004, "loss": 0.1516, "step": 5599 }, { "epoch": 0.7848633496846531, "grad_norm": 0.813997745513916, "learning_rate": 0.00022688352068883042, "loss": 0.0945, "step": 5600 }, { "epoch": 0.7850035038542397, "grad_norm": 0.19086599349975586, "learning_rate": 0.00022686917005501075, "loss": 0.0811, "step": 5601 }, { "epoch": 0.7851436580238262, "grad_norm": 0.13190264999866486, "learning_rate": 0.00022685481942119108, "loss": 0.0437, "step": 5602 }, { "epoch": 0.7852838121934127, "grad_norm": 0.22725702822208405, "learning_rate": 0.00022684046878737143, "loss": 0.1023, "step": 5603 }, { "epoch": 0.7854239663629993, "grad_norm": 0.4065977931022644, "learning_rate": 0.00022682611815355176, "loss": 0.0552, "step": 5604 }, { "epoch": 0.7855641205325858, "grad_norm": 0.5338484048843384, "learning_rate": 0.0002268117675197321, "loss": 0.0617, "step": 5605 }, { "epoch": 0.7857042747021724, "grad_norm": 0.39808306097984314, "learning_rate": 0.00022679741688591245, "loss": 0.0964, "step": 5606 }, { "epoch": 0.785844428871759, "grad_norm": 0.21572478115558624, "learning_rate": 0.00022678306625209277, "loss": 0.034, "step": 5607 }, { "epoch": 0.7859845830413454, "grad_norm": 0.19689863920211792, "learning_rate": 0.0002267687156182731, "loss": 0.0424, "step": 5608 }, { "epoch": 0.786124737210932, "grad_norm": 0.2556608319282532, "learning_rate": 0.00022675436498445349, "loss": 0.0455, "step": 5609 }, { "epoch": 0.7862648913805186, "grad_norm": 0.46096962690353394, "learning_rate": 0.00022674001435063381, "loss": 0.1173, "step": 5610 }, { "epoch": 0.7864050455501052, "grad_norm": 0.5848700404167175, "learning_rate": 0.00022672566371681414, "loss": 0.086, "step": 5611 }, { "epoch": 0.7865451997196916, "grad_norm": 0.256839394569397, "learning_rate": 0.0002267113130829945, "loss": 0.1097, "step": 5612 }, { "epoch": 0.7866853538892782, "grad_norm": 0.5051348209381104, "learning_rate": 0.00022669696244917483, "loss": 0.1139, "step": 5613 }, { "epoch": 0.7868255080588648, "grad_norm": 0.23546195030212402, "learning_rate": 0.00022668261181535516, "loss": 0.06, "step": 5614 }, { "epoch": 0.7869656622284513, "grad_norm": 0.3419448435306549, "learning_rate": 0.00022666826118153548, "loss": 0.0564, "step": 5615 }, { "epoch": 0.7871058163980379, "grad_norm": 0.32146215438842773, "learning_rate": 0.00022665391054771584, "loss": 0.0697, "step": 5616 }, { "epoch": 0.7872459705676244, "grad_norm": 0.6872513890266418, "learning_rate": 0.00022663955991389617, "loss": 0.1559, "step": 5617 }, { "epoch": 0.7873861247372109, "grad_norm": 0.4116996228694916, "learning_rate": 0.0002266252092800765, "loss": 0.0482, "step": 5618 }, { "epoch": 0.7875262789067975, "grad_norm": 0.27954569458961487, "learning_rate": 0.00022661085864625685, "loss": 0.0606, "step": 5619 }, { "epoch": 0.7876664330763841, "grad_norm": 0.46233686804771423, "learning_rate": 0.00022659650801243718, "loss": 0.0344, "step": 5620 }, { "epoch": 0.7878065872459705, "grad_norm": 0.267116516828537, "learning_rate": 0.00022658215737861754, "loss": 0.0802, "step": 5621 }, { "epoch": 0.7879467414155571, "grad_norm": 0.659602701663971, "learning_rate": 0.0002265678067447979, "loss": 0.0883, "step": 5622 }, { "epoch": 0.7880868955851437, "grad_norm": 0.4538685381412506, "learning_rate": 0.00022655345611097822, "loss": 0.0907, "step": 5623 }, { "epoch": 0.7882270497547302, "grad_norm": 1.5112347602844238, "learning_rate": 0.00022653910547715855, "loss": 0.1227, "step": 5624 }, { "epoch": 0.7883672039243167, "grad_norm": 0.16230528056621552, "learning_rate": 0.0002265247548433389, "loss": 0.0209, "step": 5625 }, { "epoch": 0.7885073580939033, "grad_norm": 0.6399327516555786, "learning_rate": 0.00022651040420951923, "loss": 0.0835, "step": 5626 }, { "epoch": 0.7886475122634898, "grad_norm": 0.4576042592525482, "learning_rate": 0.00022649605357569956, "loss": 0.0631, "step": 5627 }, { "epoch": 0.7887876664330764, "grad_norm": 0.8167616724967957, "learning_rate": 0.00022648170294187992, "loss": 0.0718, "step": 5628 }, { "epoch": 0.788927820602663, "grad_norm": 0.9858236908912659, "learning_rate": 0.00022646735230806025, "loss": 0.1232, "step": 5629 }, { "epoch": 0.7890679747722494, "grad_norm": 0.3055446147918701, "learning_rate": 0.00022645300167424058, "loss": 0.0735, "step": 5630 }, { "epoch": 0.789208128941836, "grad_norm": 0.3733578622341156, "learning_rate": 0.00022643865104042096, "loss": 0.0351, "step": 5631 }, { "epoch": 0.7893482831114226, "grad_norm": 0.6542314887046814, "learning_rate": 0.0002264243004066013, "loss": 0.1698, "step": 5632 }, { "epoch": 0.7894884372810091, "grad_norm": 0.2872672379016876, "learning_rate": 0.00022640994977278162, "loss": 0.0698, "step": 5633 }, { "epoch": 0.7896285914505956, "grad_norm": 0.5845891237258911, "learning_rate": 0.00022639559913896194, "loss": 0.1136, "step": 5634 }, { "epoch": 0.7897687456201822, "grad_norm": 0.6551676988601685, "learning_rate": 0.0002263812485051423, "loss": 0.0572, "step": 5635 }, { "epoch": 0.7899088997897687, "grad_norm": 0.3795095980167389, "learning_rate": 0.00022636689787132263, "loss": 0.1343, "step": 5636 }, { "epoch": 0.7900490539593553, "grad_norm": 0.7125422358512878, "learning_rate": 0.00022635254723750296, "loss": 0.0723, "step": 5637 }, { "epoch": 0.7901892081289419, "grad_norm": 0.6917170882225037, "learning_rate": 0.0002263381966036833, "loss": 0.138, "step": 5638 }, { "epoch": 0.7903293622985283, "grad_norm": 0.3661969304084778, "learning_rate": 0.00022632384596986364, "loss": 0.1047, "step": 5639 }, { "epoch": 0.7904695164681149, "grad_norm": 0.749763548374176, "learning_rate": 0.00022630949533604397, "loss": 0.1705, "step": 5640 }, { "epoch": 0.7906096706377015, "grad_norm": 0.519035816192627, "learning_rate": 0.00022629514470222435, "loss": 0.0955, "step": 5641 }, { "epoch": 0.7907498248072881, "grad_norm": 0.27287420630455017, "learning_rate": 0.00022628079406840468, "loss": 0.0674, "step": 5642 }, { "epoch": 0.7908899789768745, "grad_norm": 0.9136955738067627, "learning_rate": 0.000226266443434585, "loss": 0.0895, "step": 5643 }, { "epoch": 0.7910301331464611, "grad_norm": 0.44580692052841187, "learning_rate": 0.00022625209280076536, "loss": 0.0711, "step": 5644 }, { "epoch": 0.7911702873160477, "grad_norm": 0.9792532324790955, "learning_rate": 0.0002262377421669457, "loss": 0.0996, "step": 5645 }, { "epoch": 0.7913104414856342, "grad_norm": 0.29885461926460266, "learning_rate": 0.00022622339153312602, "loss": 0.0475, "step": 5646 }, { "epoch": 0.7914505956552207, "grad_norm": 0.30109041929244995, "learning_rate": 0.00022620904089930638, "loss": 0.0692, "step": 5647 }, { "epoch": 0.7915907498248073, "grad_norm": 1.1716101169586182, "learning_rate": 0.0002261946902654867, "loss": 0.3932, "step": 5648 }, { "epoch": 0.7917309039943938, "grad_norm": 1.0599429607391357, "learning_rate": 0.00022618033963166703, "loss": 0.2659, "step": 5649 }, { "epoch": 0.7918710581639804, "grad_norm": 1.750972867012024, "learning_rate": 0.00022616598899784736, "loss": 0.1428, "step": 5650 }, { "epoch": 0.792011212333567, "grad_norm": 0.5109270811080933, "learning_rate": 0.00022615163836402772, "loss": 0.0581, "step": 5651 }, { "epoch": 0.7921513665031534, "grad_norm": 0.22958607971668243, "learning_rate": 0.00022613728773020807, "loss": 0.0488, "step": 5652 }, { "epoch": 0.79229152067274, "grad_norm": 0.29007983207702637, "learning_rate": 0.0002261229370963884, "loss": 0.0509, "step": 5653 }, { "epoch": 0.7924316748423266, "grad_norm": 0.3183847963809967, "learning_rate": 0.00022610858646256876, "loss": 0.0606, "step": 5654 }, { "epoch": 0.7925718290119131, "grad_norm": 0.2664157450199127, "learning_rate": 0.0002260942358287491, "loss": 0.0413, "step": 5655 }, { "epoch": 0.7927119831814996, "grad_norm": 0.5876112580299377, "learning_rate": 0.00022607988519492942, "loss": 0.1074, "step": 5656 }, { "epoch": 0.7928521373510862, "grad_norm": 0.49838733673095703, "learning_rate": 0.00022606553456110977, "loss": 0.1187, "step": 5657 }, { "epoch": 0.7929922915206727, "grad_norm": 0.46227696537971497, "learning_rate": 0.0002260511839272901, "loss": 0.1082, "step": 5658 }, { "epoch": 0.7931324456902593, "grad_norm": 0.5369463562965393, "learning_rate": 0.00022603683329347043, "loss": 0.119, "step": 5659 }, { "epoch": 0.7932725998598459, "grad_norm": 0.4320513606071472, "learning_rate": 0.00022602248265965078, "loss": 0.0811, "step": 5660 }, { "epoch": 0.7934127540294323, "grad_norm": 0.43080368638038635, "learning_rate": 0.0002260081320258311, "loss": 0.0968, "step": 5661 }, { "epoch": 0.7935529081990189, "grad_norm": 0.3401932120323181, "learning_rate": 0.00022599378139201144, "loss": 0.1087, "step": 5662 }, { "epoch": 0.7936930623686055, "grad_norm": 0.5740426778793335, "learning_rate": 0.00022597943075819182, "loss": 0.0921, "step": 5663 }, { "epoch": 0.793833216538192, "grad_norm": 0.3936419188976288, "learning_rate": 0.00022596508012437215, "loss": 0.1118, "step": 5664 }, { "epoch": 0.7939733707077785, "grad_norm": 1.1023430824279785, "learning_rate": 0.00022595072949055248, "loss": 0.0962, "step": 5665 }, { "epoch": 0.7941135248773651, "grad_norm": 0.2322964370250702, "learning_rate": 0.0002259363788567328, "loss": 0.0769, "step": 5666 }, { "epoch": 0.7942536790469517, "grad_norm": 0.33208635449409485, "learning_rate": 0.00022592202822291317, "loss": 0.0521, "step": 5667 }, { "epoch": 0.7943938332165382, "grad_norm": 0.7058702111244202, "learning_rate": 0.0002259076775890935, "loss": 0.1431, "step": 5668 }, { "epoch": 0.7945339873861247, "grad_norm": 0.4282469153404236, "learning_rate": 0.00022589332695527382, "loss": 0.0603, "step": 5669 }, { "epoch": 0.7946741415557113, "grad_norm": 0.7234341502189636, "learning_rate": 0.00022587897632145418, "loss": 0.0782, "step": 5670 }, { "epoch": 0.7948142957252978, "grad_norm": 0.3753330111503601, "learning_rate": 0.0002258646256876345, "loss": 0.109, "step": 5671 }, { "epoch": 0.7949544498948844, "grad_norm": 0.6509559750556946, "learning_rate": 0.00022585027505381484, "loss": 0.0855, "step": 5672 }, { "epoch": 0.795094604064471, "grad_norm": 0.11885274201631546, "learning_rate": 0.00022583592441999522, "loss": 0.0194, "step": 5673 }, { "epoch": 0.7952347582340574, "grad_norm": 0.32345446944236755, "learning_rate": 0.00022582157378617555, "loss": 0.071, "step": 5674 }, { "epoch": 0.795374912403644, "grad_norm": 0.440663605928421, "learning_rate": 0.00022580722315235588, "loss": 0.0324, "step": 5675 }, { "epoch": 0.7955150665732306, "grad_norm": 0.4897230863571167, "learning_rate": 0.00022579287251853623, "loss": 0.0775, "step": 5676 }, { "epoch": 0.7956552207428171, "grad_norm": 0.24911180138587952, "learning_rate": 0.00022577852188471656, "loss": 0.0419, "step": 5677 }, { "epoch": 0.7957953749124036, "grad_norm": 0.3806205093860626, "learning_rate": 0.0002257641712508969, "loss": 0.111, "step": 5678 }, { "epoch": 0.7959355290819902, "grad_norm": 0.8356031179428101, "learning_rate": 0.00022574982061707724, "loss": 0.0537, "step": 5679 }, { "epoch": 0.7960756832515767, "grad_norm": 0.544108510017395, "learning_rate": 0.00022573546998325757, "loss": 0.0807, "step": 5680 }, { "epoch": 0.7962158374211633, "grad_norm": 0.32353276014328003, "learning_rate": 0.0002257211193494379, "loss": 0.0638, "step": 5681 }, { "epoch": 0.7963559915907499, "grad_norm": 0.6253033876419067, "learning_rate": 0.00022570676871561826, "loss": 0.0785, "step": 5682 }, { "epoch": 0.7964961457603363, "grad_norm": 0.6292871236801147, "learning_rate": 0.00022569241808179859, "loss": 0.1029, "step": 5683 }, { "epoch": 0.7966362999299229, "grad_norm": 0.670504629611969, "learning_rate": 0.00022567806744797894, "loss": 0.0557, "step": 5684 }, { "epoch": 0.7967764540995095, "grad_norm": 0.2368209958076477, "learning_rate": 0.00022566371681415927, "loss": 0.051, "step": 5685 }, { "epoch": 0.796916608269096, "grad_norm": 0.23010286688804626, "learning_rate": 0.00022564936618033963, "loss": 0.0573, "step": 5686 }, { "epoch": 0.7970567624386825, "grad_norm": 0.37756916880607605, "learning_rate": 0.00022563501554651995, "loss": 0.0656, "step": 5687 }, { "epoch": 0.7971969166082691, "grad_norm": 0.26231664419174194, "learning_rate": 0.00022562066491270028, "loss": 0.0291, "step": 5688 }, { "epoch": 0.7973370707778556, "grad_norm": 0.6982463002204895, "learning_rate": 0.00022560631427888064, "loss": 0.1424, "step": 5689 }, { "epoch": 0.7974772249474422, "grad_norm": 0.4191700518131256, "learning_rate": 0.00022559196364506097, "loss": 0.0466, "step": 5690 }, { "epoch": 0.7976173791170287, "grad_norm": 0.928926408290863, "learning_rate": 0.0002255776130112413, "loss": 0.2481, "step": 5691 }, { "epoch": 0.7977575332866153, "grad_norm": 0.4801519811153412, "learning_rate": 0.00022556326237742165, "loss": 0.1646, "step": 5692 }, { "epoch": 0.7978976874562018, "grad_norm": 0.4181602895259857, "learning_rate": 0.00022554891174360198, "loss": 0.0868, "step": 5693 }, { "epoch": 0.7980378416257884, "grad_norm": 0.3504292666912079, "learning_rate": 0.0002255345611097823, "loss": 0.1661, "step": 5694 }, { "epoch": 0.798177995795375, "grad_norm": 1.0696107149124146, "learning_rate": 0.0002255202104759627, "loss": 0.1435, "step": 5695 }, { "epoch": 0.7983181499649614, "grad_norm": 0.4525793790817261, "learning_rate": 0.00022550585984214302, "loss": 0.0449, "step": 5696 }, { "epoch": 0.798458304134548, "grad_norm": 0.483450323343277, "learning_rate": 0.00022549150920832335, "loss": 0.0923, "step": 5697 }, { "epoch": 0.7985984583041346, "grad_norm": 1.3730570077896118, "learning_rate": 0.0002254771585745037, "loss": 0.2055, "step": 5698 }, { "epoch": 0.7987386124737211, "grad_norm": 2.2538154125213623, "learning_rate": 0.00022546280794068403, "loss": 0.1648, "step": 5699 }, { "epoch": 0.7988787666433076, "grad_norm": 2.04182505607605, "learning_rate": 0.00022544845730686436, "loss": 0.0512, "step": 5700 }, { "epoch": 0.7990189208128942, "grad_norm": 0.33751675486564636, "learning_rate": 0.0002254341066730447, "loss": 0.0894, "step": 5701 }, { "epoch": 0.7991590749824807, "grad_norm": 0.3281555771827698, "learning_rate": 0.00022541975603922504, "loss": 0.0623, "step": 5702 }, { "epoch": 0.7992992291520673, "grad_norm": 0.5163365006446838, "learning_rate": 0.00022540540540540537, "loss": 0.0995, "step": 5703 }, { "epoch": 0.7994393833216539, "grad_norm": 0.2273314893245697, "learning_rate": 0.0002253910547715857, "loss": 0.0475, "step": 5704 }, { "epoch": 0.7995795374912403, "grad_norm": 0.20162153244018555, "learning_rate": 0.00022537670413776608, "loss": 0.0315, "step": 5705 }, { "epoch": 0.7997196916608269, "grad_norm": 0.23051901161670685, "learning_rate": 0.0002253623535039464, "loss": 0.0342, "step": 5706 }, { "epoch": 0.7998598458304135, "grad_norm": 0.3236226737499237, "learning_rate": 0.00022534800287012674, "loss": 0.1278, "step": 5707 }, { "epoch": 0.8, "grad_norm": 0.33776968717575073, "learning_rate": 0.0002253336522363071, "loss": 0.1005, "step": 5708 }, { "epoch": 0.8001401541695865, "grad_norm": 0.7175341844558716, "learning_rate": 0.00022531930160248743, "loss": 0.1846, "step": 5709 }, { "epoch": 0.8002803083391731, "grad_norm": 0.4873778223991394, "learning_rate": 0.00022530495096866775, "loss": 0.0326, "step": 5710 }, { "epoch": 0.8004204625087596, "grad_norm": 0.39647483825683594, "learning_rate": 0.0002252906003348481, "loss": 0.0768, "step": 5711 }, { "epoch": 0.8005606166783462, "grad_norm": 0.6795065402984619, "learning_rate": 0.00022527624970102844, "loss": 0.0995, "step": 5712 }, { "epoch": 0.8007007708479327, "grad_norm": 0.265045702457428, "learning_rate": 0.00022526189906720877, "loss": 0.0606, "step": 5713 }, { "epoch": 0.8008409250175192, "grad_norm": 0.33189502358436584, "learning_rate": 0.00022524754843338912, "loss": 0.0733, "step": 5714 }, { "epoch": 0.8009810791871058, "grad_norm": 0.19205160439014435, "learning_rate": 0.00022523319779956945, "loss": 0.0416, "step": 5715 }, { "epoch": 0.8011212333566924, "grad_norm": 0.45440179109573364, "learning_rate": 0.0002252188471657498, "loss": 0.0671, "step": 5716 }, { "epoch": 0.801261387526279, "grad_norm": 0.3498120605945587, "learning_rate": 0.00022520449653193016, "loss": 0.0775, "step": 5717 }, { "epoch": 0.8014015416958654, "grad_norm": 0.4218611419200897, "learning_rate": 0.0002251901458981105, "loss": 0.1143, "step": 5718 }, { "epoch": 0.801541695865452, "grad_norm": 0.6124241948127747, "learning_rate": 0.00022517579526429082, "loss": 0.0991, "step": 5719 }, { "epoch": 0.8016818500350386, "grad_norm": 0.6980561017990112, "learning_rate": 0.00022516144463047115, "loss": 0.0781, "step": 5720 }, { "epoch": 0.8018220042046251, "grad_norm": 0.5807704925537109, "learning_rate": 0.0002251470939966515, "loss": 0.0438, "step": 5721 }, { "epoch": 0.8019621583742116, "grad_norm": 0.7196307182312012, "learning_rate": 0.00022513274336283183, "loss": 0.1886, "step": 5722 }, { "epoch": 0.8021023125437982, "grad_norm": 0.23914958536624908, "learning_rate": 0.00022511839272901216, "loss": 0.0531, "step": 5723 }, { "epoch": 0.8022424667133847, "grad_norm": 0.32259708642959595, "learning_rate": 0.00022510404209519252, "loss": 0.0908, "step": 5724 }, { "epoch": 0.8023826208829713, "grad_norm": 0.5749737024307251, "learning_rate": 0.00022508969146137285, "loss": 0.0836, "step": 5725 }, { "epoch": 0.8025227750525579, "grad_norm": 0.19791029393672943, "learning_rate": 0.00022507534082755317, "loss": 0.0341, "step": 5726 }, { "epoch": 0.8026629292221443, "grad_norm": 0.3118803799152374, "learning_rate": 0.00022506099019373356, "loss": 0.0748, "step": 5727 }, { "epoch": 0.8028030833917309, "grad_norm": 0.40618568658828735, "learning_rate": 0.00022504663955991389, "loss": 0.1146, "step": 5728 }, { "epoch": 0.8029432375613175, "grad_norm": 0.47597694396972656, "learning_rate": 0.00022503228892609421, "loss": 0.1232, "step": 5729 }, { "epoch": 0.803083391730904, "grad_norm": 1.004313588142395, "learning_rate": 0.00022501793829227457, "loss": 0.1368, "step": 5730 }, { "epoch": 0.8032235459004905, "grad_norm": 0.4950355291366577, "learning_rate": 0.0002250035876584549, "loss": 0.0774, "step": 5731 }, { "epoch": 0.8033637000700771, "grad_norm": 0.3069462478160858, "learning_rate": 0.00022498923702463523, "loss": 0.0592, "step": 5732 }, { "epoch": 0.8035038542396636, "grad_norm": 0.8063193559646606, "learning_rate": 0.00022497488639081558, "loss": 0.1375, "step": 5733 }, { "epoch": 0.8036440084092502, "grad_norm": 0.34387367963790894, "learning_rate": 0.0002249605357569959, "loss": 0.0559, "step": 5734 }, { "epoch": 0.8037841625788367, "grad_norm": 0.1765451431274414, "learning_rate": 0.00022494618512317624, "loss": 0.0188, "step": 5735 }, { "epoch": 0.8039243167484232, "grad_norm": 0.5219098329544067, "learning_rate": 0.00022493183448935657, "loss": 0.062, "step": 5736 }, { "epoch": 0.8040644709180098, "grad_norm": 0.7034408450126648, "learning_rate": 0.00022491748385553695, "loss": 0.1295, "step": 5737 }, { "epoch": 0.8042046250875964, "grad_norm": 0.24827638268470764, "learning_rate": 0.00022490313322171728, "loss": 0.0754, "step": 5738 }, { "epoch": 0.804344779257183, "grad_norm": 0.5348634123802185, "learning_rate": 0.0002248887825878976, "loss": 0.1076, "step": 5739 }, { "epoch": 0.8044849334267694, "grad_norm": 0.61797696352005, "learning_rate": 0.00022487443195407796, "loss": 0.1031, "step": 5740 }, { "epoch": 0.804625087596356, "grad_norm": 0.9629631042480469, "learning_rate": 0.0002248600813202583, "loss": 0.1487, "step": 5741 }, { "epoch": 0.8047652417659426, "grad_norm": 0.8176407217979431, "learning_rate": 0.00022484573068643862, "loss": 0.0783, "step": 5742 }, { "epoch": 0.8049053959355291, "grad_norm": 0.38148871064186096, "learning_rate": 0.00022483138005261898, "loss": 0.0533, "step": 5743 }, { "epoch": 0.8050455501051156, "grad_norm": 1.0174052715301514, "learning_rate": 0.0002248170294187993, "loss": 0.2, "step": 5744 }, { "epoch": 0.8051857042747022, "grad_norm": 0.8366091847419739, "learning_rate": 0.00022480267878497963, "loss": 0.2349, "step": 5745 }, { "epoch": 0.8053258584442887, "grad_norm": 0.2741507589817047, "learning_rate": 0.00022478832815116, "loss": 0.0494, "step": 5746 }, { "epoch": 0.8054660126138753, "grad_norm": 0.4730146825313568, "learning_rate": 0.00022477397751734032, "loss": 0.1758, "step": 5747 }, { "epoch": 0.8056061667834619, "grad_norm": 1.077645182609558, "learning_rate": 0.00022475962688352067, "loss": 0.1563, "step": 5748 }, { "epoch": 0.8057463209530483, "grad_norm": 1.776120662689209, "learning_rate": 0.00022474527624970103, "loss": 0.1765, "step": 5749 }, { "epoch": 0.8058864751226349, "grad_norm": 1.5276013612747192, "learning_rate": 0.00022473092561588136, "loss": 0.1371, "step": 5750 }, { "epoch": 0.8060266292922215, "grad_norm": 0.2624564468860626, "learning_rate": 0.00022471657498206169, "loss": 0.0297, "step": 5751 }, { "epoch": 0.806166783461808, "grad_norm": 0.5053154826164246, "learning_rate": 0.00022470222434824204, "loss": 0.0731, "step": 5752 }, { "epoch": 0.8063069376313945, "grad_norm": 0.4104936122894287, "learning_rate": 0.00022468787371442237, "loss": 0.0983, "step": 5753 }, { "epoch": 0.8064470918009811, "grad_norm": 0.30682364106178284, "learning_rate": 0.0002246735230806027, "loss": 0.0673, "step": 5754 }, { "epoch": 0.8065872459705676, "grad_norm": 0.2573780119419098, "learning_rate": 0.00022465917244678303, "loss": 0.0321, "step": 5755 }, { "epoch": 0.8067274001401542, "grad_norm": 0.20758210122585297, "learning_rate": 0.00022464482181296338, "loss": 0.0426, "step": 5756 }, { "epoch": 0.8068675543097407, "grad_norm": 0.7352272272109985, "learning_rate": 0.0002246304711791437, "loss": 0.1429, "step": 5757 }, { "epoch": 0.8070077084793272, "grad_norm": 0.5787365436553955, "learning_rate": 0.00022461612054532404, "loss": 0.0959, "step": 5758 }, { "epoch": 0.8071478626489138, "grad_norm": 0.20439884066581726, "learning_rate": 0.00022460176991150442, "loss": 0.0349, "step": 5759 }, { "epoch": 0.8072880168185004, "grad_norm": 0.18559855222702026, "learning_rate": 0.00022458741927768475, "loss": 0.0401, "step": 5760 }, { "epoch": 0.807428170988087, "grad_norm": 0.3092149794101715, "learning_rate": 0.00022457306864386508, "loss": 0.0593, "step": 5761 }, { "epoch": 0.8075683251576734, "grad_norm": 1.3547148704528809, "learning_rate": 0.00022455871801004544, "loss": 0.1303, "step": 5762 }, { "epoch": 0.80770847932726, "grad_norm": 0.2493240386247635, "learning_rate": 0.00022454436737622576, "loss": 0.0503, "step": 5763 }, { "epoch": 0.8078486334968465, "grad_norm": 0.2818904519081116, "learning_rate": 0.0002245300167424061, "loss": 0.0516, "step": 5764 }, { "epoch": 0.8079887876664331, "grad_norm": 0.38934430480003357, "learning_rate": 0.00022451566610858645, "loss": 0.1413, "step": 5765 }, { "epoch": 0.8081289418360196, "grad_norm": 0.48351791501045227, "learning_rate": 0.00022450131547476678, "loss": 0.0753, "step": 5766 }, { "epoch": 0.8082690960056061, "grad_norm": 0.3817238211631775, "learning_rate": 0.0002244869648409471, "loss": 0.0713, "step": 5767 }, { "epoch": 0.8084092501751927, "grad_norm": 0.3684089481830597, "learning_rate": 0.0002244726142071275, "loss": 0.1094, "step": 5768 }, { "epoch": 0.8085494043447793, "grad_norm": 0.9726632237434387, "learning_rate": 0.00022445826357330782, "loss": 0.1708, "step": 5769 }, { "epoch": 0.8086895585143657, "grad_norm": 1.1153467893600464, "learning_rate": 0.00022444391293948815, "loss": 0.2096, "step": 5770 }, { "epoch": 0.8088297126839523, "grad_norm": 0.49349766969680786, "learning_rate": 0.00022442956230566847, "loss": 0.1011, "step": 5771 }, { "epoch": 0.8089698668535389, "grad_norm": 0.5160754323005676, "learning_rate": 0.00022441521167184883, "loss": 0.1151, "step": 5772 }, { "epoch": 0.8091100210231255, "grad_norm": 0.2126186043024063, "learning_rate": 0.00022440086103802916, "loss": 0.0674, "step": 5773 }, { "epoch": 0.809250175192712, "grad_norm": 0.22338438034057617, "learning_rate": 0.0002243865104042095, "loss": 0.0767, "step": 5774 }, { "epoch": 0.8093903293622985, "grad_norm": 0.4069470763206482, "learning_rate": 0.00022437215977038984, "loss": 0.071, "step": 5775 }, { "epoch": 0.8095304835318851, "grad_norm": 0.5842556357383728, "learning_rate": 0.00022435780913657017, "loss": 0.1704, "step": 5776 }, { "epoch": 0.8096706377014716, "grad_norm": 1.6323877573013306, "learning_rate": 0.0002243434585027505, "loss": 0.0505, "step": 5777 }, { "epoch": 0.8098107918710582, "grad_norm": 0.4551006555557251, "learning_rate": 0.00022432910786893086, "loss": 0.1373, "step": 5778 }, { "epoch": 0.8099509460406447, "grad_norm": 0.5307178497314453, "learning_rate": 0.0002243147572351112, "loss": 0.0807, "step": 5779 }, { "epoch": 0.8100911002102312, "grad_norm": 0.5500494837760925, "learning_rate": 0.00022430040660129154, "loss": 0.0925, "step": 5780 }, { "epoch": 0.8102312543798178, "grad_norm": 0.47352516651153564, "learning_rate": 0.0002242860559674719, "loss": 0.1001, "step": 5781 }, { "epoch": 0.8103714085494044, "grad_norm": 0.3682084083557129, "learning_rate": 0.00022427170533365222, "loss": 0.0381, "step": 5782 }, { "epoch": 0.8105115627189909, "grad_norm": 0.7366754412651062, "learning_rate": 0.00022425735469983255, "loss": 0.1299, "step": 5783 }, { "epoch": 0.8106517168885774, "grad_norm": 0.4509728252887726, "learning_rate": 0.0002242430040660129, "loss": 0.0657, "step": 5784 }, { "epoch": 0.810791871058164, "grad_norm": 0.3227069079875946, "learning_rate": 0.00022422865343219324, "loss": 0.1501, "step": 5785 }, { "epoch": 0.8109320252277505, "grad_norm": 0.6010557413101196, "learning_rate": 0.00022421430279837357, "loss": 0.2439, "step": 5786 }, { "epoch": 0.8110721793973371, "grad_norm": 0.5877246260643005, "learning_rate": 0.00022419995216455392, "loss": 0.1577, "step": 5787 }, { "epoch": 0.8112123335669236, "grad_norm": 0.6178323030471802, "learning_rate": 0.00022418560153073425, "loss": 0.1568, "step": 5788 }, { "epoch": 0.8113524877365101, "grad_norm": 1.3562370538711548, "learning_rate": 0.00022417125089691458, "loss": 0.0626, "step": 5789 }, { "epoch": 0.8114926419060967, "grad_norm": 0.33482205867767334, "learning_rate": 0.0002241569002630949, "loss": 0.0561, "step": 5790 }, { "epoch": 0.8116327960756833, "grad_norm": 0.6903762221336365, "learning_rate": 0.0002241425496292753, "loss": 0.103, "step": 5791 }, { "epoch": 0.8117729502452697, "grad_norm": 0.5988749265670776, "learning_rate": 0.00022412819899545562, "loss": 0.1001, "step": 5792 }, { "epoch": 0.8119131044148563, "grad_norm": 1.2338522672653198, "learning_rate": 0.00022411384836163595, "loss": 0.1074, "step": 5793 }, { "epoch": 0.8120532585844429, "grad_norm": 0.9725277423858643, "learning_rate": 0.0002240994977278163, "loss": 0.1568, "step": 5794 }, { "epoch": 0.8121934127540295, "grad_norm": 2.4673266410827637, "learning_rate": 0.00022408514709399663, "loss": 0.1338, "step": 5795 }, { "epoch": 0.812333566923616, "grad_norm": 1.4851458072662354, "learning_rate": 0.00022407079646017696, "loss": 0.0907, "step": 5796 }, { "epoch": 0.8124737210932025, "grad_norm": 0.7200006246566772, "learning_rate": 0.00022405644582635731, "loss": 0.0541, "step": 5797 }, { "epoch": 0.812613875262789, "grad_norm": 1.2098779678344727, "learning_rate": 0.00022404209519253764, "loss": 0.1066, "step": 5798 }, { "epoch": 0.8127540294323756, "grad_norm": 1.121396541595459, "learning_rate": 0.00022402774455871797, "loss": 0.0875, "step": 5799 }, { "epoch": 0.8128941836019622, "grad_norm": 6.2350687980651855, "learning_rate": 0.00022401339392489835, "loss": 0.8495, "step": 5800 }, { "epoch": 0.8130343377715487, "grad_norm": 0.1380949169397354, "learning_rate": 0.00022399904329107868, "loss": 0.0294, "step": 5801 }, { "epoch": 0.8131744919411352, "grad_norm": 0.5304329991340637, "learning_rate": 0.000223984692657259, "loss": 0.133, "step": 5802 }, { "epoch": 0.8133146461107218, "grad_norm": 0.31048327684402466, "learning_rate": 0.00022397034202343937, "loss": 0.0642, "step": 5803 }, { "epoch": 0.8134548002803084, "grad_norm": 0.4840710759162903, "learning_rate": 0.0002239559913896197, "loss": 0.057, "step": 5804 }, { "epoch": 0.8135949544498949, "grad_norm": 0.37240302562713623, "learning_rate": 0.00022394164075580002, "loss": 0.1105, "step": 5805 }, { "epoch": 0.8137351086194814, "grad_norm": 0.1933498978614807, "learning_rate": 0.00022392729012198035, "loss": 0.0291, "step": 5806 }, { "epoch": 0.813875262789068, "grad_norm": 0.20238856971263885, "learning_rate": 0.0002239129394881607, "loss": 0.0394, "step": 5807 }, { "epoch": 0.8140154169586545, "grad_norm": 0.4747373163700104, "learning_rate": 0.00022389858885434104, "loss": 0.111, "step": 5808 }, { "epoch": 0.8141555711282411, "grad_norm": 0.3709792196750641, "learning_rate": 0.00022388423822052137, "loss": 0.1024, "step": 5809 }, { "epoch": 0.8142957252978276, "grad_norm": 0.48583874106407166, "learning_rate": 0.00022386988758670172, "loss": 0.1087, "step": 5810 }, { "epoch": 0.8144358794674141, "grad_norm": 0.14270281791687012, "learning_rate": 0.00022385553695288208, "loss": 0.0385, "step": 5811 }, { "epoch": 0.8145760336370007, "grad_norm": 0.6404475569725037, "learning_rate": 0.0002238411863190624, "loss": 0.1617, "step": 5812 }, { "epoch": 0.8147161878065873, "grad_norm": 0.2255086600780487, "learning_rate": 0.00022382683568524276, "loss": 0.0283, "step": 5813 }, { "epoch": 0.8148563419761737, "grad_norm": 0.27812886238098145, "learning_rate": 0.0002238124850514231, "loss": 0.0932, "step": 5814 }, { "epoch": 0.8149964961457603, "grad_norm": 0.28141188621520996, "learning_rate": 0.00022379813441760342, "loss": 0.0936, "step": 5815 }, { "epoch": 0.8151366503153469, "grad_norm": 0.2614344656467438, "learning_rate": 0.00022378378378378377, "loss": 0.0626, "step": 5816 }, { "epoch": 0.8152768044849334, "grad_norm": 0.3129107356071472, "learning_rate": 0.0002237694331499641, "loss": 0.0641, "step": 5817 }, { "epoch": 0.81541695865452, "grad_norm": 0.5294141173362732, "learning_rate": 0.00022375508251614443, "loss": 0.0578, "step": 5818 }, { "epoch": 0.8155571128241065, "grad_norm": 0.26989293098449707, "learning_rate": 0.0002237407318823248, "loss": 0.1115, "step": 5819 }, { "epoch": 0.815697266993693, "grad_norm": 0.3717700242996216, "learning_rate": 0.00022372638124850512, "loss": 0.0803, "step": 5820 }, { "epoch": 0.8158374211632796, "grad_norm": 0.4185395836830139, "learning_rate": 0.00022371203061468544, "loss": 0.1166, "step": 5821 }, { "epoch": 0.8159775753328662, "grad_norm": 0.427578866481781, "learning_rate": 0.00022369767998086583, "loss": 0.0472, "step": 5822 }, { "epoch": 0.8161177295024526, "grad_norm": 0.2768086791038513, "learning_rate": 0.00022368332934704616, "loss": 0.0486, "step": 5823 }, { "epoch": 0.8162578836720392, "grad_norm": 0.16227155923843384, "learning_rate": 0.00022366897871322648, "loss": 0.0364, "step": 5824 }, { "epoch": 0.8163980378416258, "grad_norm": 0.6284443736076355, "learning_rate": 0.0002236546280794068, "loss": 0.0852, "step": 5825 }, { "epoch": 0.8165381920112124, "grad_norm": 0.46867087483406067, "learning_rate": 0.00022364027744558717, "loss": 0.0786, "step": 5826 }, { "epoch": 0.8166783461807989, "grad_norm": 0.2443040907382965, "learning_rate": 0.0002236259268117675, "loss": 0.0617, "step": 5827 }, { "epoch": 0.8168185003503854, "grad_norm": 0.3987634479999542, "learning_rate": 0.00022361157617794783, "loss": 0.1299, "step": 5828 }, { "epoch": 0.816958654519972, "grad_norm": 0.2775398790836334, "learning_rate": 0.00022359722554412818, "loss": 0.1027, "step": 5829 }, { "epoch": 0.8170988086895585, "grad_norm": 0.3505871295928955, "learning_rate": 0.0002235828749103085, "loss": 0.0662, "step": 5830 }, { "epoch": 0.8172389628591451, "grad_norm": 0.3641906678676605, "learning_rate": 0.00022356852427648884, "loss": 0.1043, "step": 5831 }, { "epoch": 0.8173791170287316, "grad_norm": 0.9073621034622192, "learning_rate": 0.00022355417364266922, "loss": 0.1224, "step": 5832 }, { "epoch": 0.8175192711983181, "grad_norm": 0.20240426063537598, "learning_rate": 0.00022353982300884955, "loss": 0.0455, "step": 5833 }, { "epoch": 0.8176594253679047, "grad_norm": 0.4446265697479248, "learning_rate": 0.00022352547237502988, "loss": 0.0858, "step": 5834 }, { "epoch": 0.8177995795374913, "grad_norm": 0.23124608397483826, "learning_rate": 0.00022351112174121023, "loss": 0.109, "step": 5835 }, { "epoch": 0.8179397337070777, "grad_norm": 0.7142744660377502, "learning_rate": 0.00022349677110739056, "loss": 0.1285, "step": 5836 }, { "epoch": 0.8180798878766643, "grad_norm": 0.24515023827552795, "learning_rate": 0.0002234824204735709, "loss": 0.0459, "step": 5837 }, { "epoch": 0.8182200420462509, "grad_norm": 0.3710487484931946, "learning_rate": 0.00022346806983975125, "loss": 0.0945, "step": 5838 }, { "epoch": 0.8183601962158374, "grad_norm": 0.4483098089694977, "learning_rate": 0.00022345371920593158, "loss": 0.0514, "step": 5839 }, { "epoch": 0.818500350385424, "grad_norm": 0.2247656136751175, "learning_rate": 0.0002234393685721119, "loss": 0.0368, "step": 5840 }, { "epoch": 0.8186405045550105, "grad_norm": 0.5277408957481384, "learning_rate": 0.00022342501793829223, "loss": 0.0903, "step": 5841 }, { "epoch": 0.818780658724597, "grad_norm": 0.2889882028102875, "learning_rate": 0.0002234106673044726, "loss": 0.0817, "step": 5842 }, { "epoch": 0.8189208128941836, "grad_norm": 0.4435153305530548, "learning_rate": 0.00022339631667065294, "loss": 0.0444, "step": 5843 }, { "epoch": 0.8190609670637702, "grad_norm": 0.530709981918335, "learning_rate": 0.00022338196603683327, "loss": 0.0748, "step": 5844 }, { "epoch": 0.8192011212333566, "grad_norm": 0.3209708034992218, "learning_rate": 0.00022336761540301363, "loss": 0.0424, "step": 5845 }, { "epoch": 0.8193412754029432, "grad_norm": 1.7540130615234375, "learning_rate": 0.00022335326476919396, "loss": 0.1205, "step": 5846 }, { "epoch": 0.8194814295725298, "grad_norm": 1.0781437158584595, "learning_rate": 0.00022333891413537429, "loss": 0.0966, "step": 5847 }, { "epoch": 0.8196215837421164, "grad_norm": 0.09919463843107224, "learning_rate": 0.00022332456350155464, "loss": 0.0111, "step": 5848 }, { "epoch": 0.8197617379117029, "grad_norm": 2.7272090911865234, "learning_rate": 0.00022331021286773497, "loss": 0.351, "step": 5849 }, { "epoch": 0.8199018920812894, "grad_norm": 0.835983157157898, "learning_rate": 0.0002232958622339153, "loss": 0.0842, "step": 5850 }, { "epoch": 0.820042046250876, "grad_norm": 0.2904302775859833, "learning_rate": 0.00022328151160009565, "loss": 0.0398, "step": 5851 }, { "epoch": 0.8201822004204625, "grad_norm": 0.6469781994819641, "learning_rate": 0.00022326716096627598, "loss": 0.0915, "step": 5852 }, { "epoch": 0.8203223545900491, "grad_norm": 0.9117149114608765, "learning_rate": 0.0002232528103324563, "loss": 0.0832, "step": 5853 }, { "epoch": 0.8204625087596356, "grad_norm": 0.243239164352417, "learning_rate": 0.0002232384596986367, "loss": 0.0696, "step": 5854 }, { "epoch": 0.8206026629292221, "grad_norm": 0.3760521113872528, "learning_rate": 0.00022322410906481702, "loss": 0.0883, "step": 5855 }, { "epoch": 0.8207428170988087, "grad_norm": 0.4392783045768738, "learning_rate": 0.00022320975843099735, "loss": 0.0422, "step": 5856 }, { "epoch": 0.8208829712683953, "grad_norm": 0.3056473135948181, "learning_rate": 0.0002231954077971777, "loss": 0.0887, "step": 5857 }, { "epoch": 0.8210231254379817, "grad_norm": 0.5537986159324646, "learning_rate": 0.00022318105716335803, "loss": 0.0625, "step": 5858 }, { "epoch": 0.8211632796075683, "grad_norm": 0.672101616859436, "learning_rate": 0.00022316670652953836, "loss": 0.1328, "step": 5859 }, { "epoch": 0.8213034337771549, "grad_norm": 0.45674440264701843, "learning_rate": 0.0002231523558957187, "loss": 0.0582, "step": 5860 }, { "epoch": 0.8214435879467414, "grad_norm": 0.5701112747192383, "learning_rate": 0.00022313800526189905, "loss": 0.0796, "step": 5861 }, { "epoch": 0.821583742116328, "grad_norm": 0.38268429040908813, "learning_rate": 0.00022312365462807938, "loss": 0.0419, "step": 5862 }, { "epoch": 0.8217238962859145, "grad_norm": 0.31397050619125366, "learning_rate": 0.0002231093039942597, "loss": 0.0295, "step": 5863 }, { "epoch": 0.821864050455501, "grad_norm": 0.3330279290676117, "learning_rate": 0.0002230949533604401, "loss": 0.0268, "step": 5864 }, { "epoch": 0.8220042046250876, "grad_norm": 0.23322667181491852, "learning_rate": 0.00022308060272662042, "loss": 0.0697, "step": 5865 }, { "epoch": 0.8221443587946742, "grad_norm": 0.2703094482421875, "learning_rate": 0.00022306625209280074, "loss": 0.0683, "step": 5866 }, { "epoch": 0.8222845129642606, "grad_norm": 0.1767052859067917, "learning_rate": 0.0002230519014589811, "loss": 0.0383, "step": 5867 }, { "epoch": 0.8224246671338472, "grad_norm": 0.5386223793029785, "learning_rate": 0.00022303755082516143, "loss": 0.1271, "step": 5868 }, { "epoch": 0.8225648213034338, "grad_norm": 0.36625948548316956, "learning_rate": 0.00022302320019134176, "loss": 0.1122, "step": 5869 }, { "epoch": 0.8227049754730204, "grad_norm": 0.28537946939468384, "learning_rate": 0.0002230088495575221, "loss": 0.0513, "step": 5870 }, { "epoch": 0.8228451296426069, "grad_norm": 0.19931276142597198, "learning_rate": 0.00022299449892370244, "loss": 0.0289, "step": 5871 }, { "epoch": 0.8229852838121934, "grad_norm": 0.6263478398323059, "learning_rate": 0.00022298014828988277, "loss": 0.1233, "step": 5872 }, { "epoch": 0.82312543798178, "grad_norm": 0.29002660512924194, "learning_rate": 0.00022296579765606313, "loss": 0.0967, "step": 5873 }, { "epoch": 0.8232655921513665, "grad_norm": 0.31298041343688965, "learning_rate": 0.00022295144702224348, "loss": 0.058, "step": 5874 }, { "epoch": 0.8234057463209531, "grad_norm": 0.21126209199428558, "learning_rate": 0.0002229370963884238, "loss": 0.0486, "step": 5875 }, { "epoch": 0.8235459004905396, "grad_norm": 0.7859790921211243, "learning_rate": 0.00022292274575460414, "loss": 0.059, "step": 5876 }, { "epoch": 0.8236860546601261, "grad_norm": 0.33230945467948914, "learning_rate": 0.0002229083951207845, "loss": 0.0784, "step": 5877 }, { "epoch": 0.8238262088297127, "grad_norm": 0.3842691481113434, "learning_rate": 0.00022289404448696482, "loss": 0.0637, "step": 5878 }, { "epoch": 0.8239663629992993, "grad_norm": 0.41438397765159607, "learning_rate": 0.00022287969385314515, "loss": 0.1201, "step": 5879 }, { "epoch": 0.8241065171688857, "grad_norm": 0.45761585235595703, "learning_rate": 0.0002228653432193255, "loss": 0.1326, "step": 5880 }, { "epoch": 0.8242466713384723, "grad_norm": 0.32369065284729004, "learning_rate": 0.00022285099258550584, "loss": 0.062, "step": 5881 }, { "epoch": 0.8243868255080589, "grad_norm": 0.6559203863143921, "learning_rate": 0.00022283664195168616, "loss": 0.0779, "step": 5882 }, { "epoch": 0.8245269796776454, "grad_norm": 0.8068817257881165, "learning_rate": 0.00022282229131786652, "loss": 0.0845, "step": 5883 }, { "epoch": 0.824667133847232, "grad_norm": 0.2590385675430298, "learning_rate": 0.00022280794068404685, "loss": 0.0255, "step": 5884 }, { "epoch": 0.8248072880168185, "grad_norm": 0.5571833252906799, "learning_rate": 0.00022279359005022718, "loss": 0.0443, "step": 5885 }, { "epoch": 0.824947442186405, "grad_norm": 0.39487841725349426, "learning_rate": 0.00022277923941640756, "loss": 0.0643, "step": 5886 }, { "epoch": 0.8250875963559916, "grad_norm": 0.8468427062034607, "learning_rate": 0.0002227648887825879, "loss": 0.1607, "step": 5887 }, { "epoch": 0.8252277505255782, "grad_norm": 0.49531927704811096, "learning_rate": 0.00022275053814876822, "loss": 0.1023, "step": 5888 }, { "epoch": 0.8253679046951646, "grad_norm": 0.2164953500032425, "learning_rate": 0.00022273618751494857, "loss": 0.0302, "step": 5889 }, { "epoch": 0.8255080588647512, "grad_norm": 0.4535398781299591, "learning_rate": 0.0002227218368811289, "loss": 0.1252, "step": 5890 }, { "epoch": 0.8256482130343378, "grad_norm": 0.38749533891677856, "learning_rate": 0.00022270748624730923, "loss": 0.0642, "step": 5891 }, { "epoch": 0.8257883672039243, "grad_norm": 0.5470385551452637, "learning_rate": 0.00022269313561348959, "loss": 0.0984, "step": 5892 }, { "epoch": 0.8259285213735109, "grad_norm": 0.5258098244667053, "learning_rate": 0.00022267878497966991, "loss": 0.0558, "step": 5893 }, { "epoch": 0.8260686755430974, "grad_norm": 0.7119280695915222, "learning_rate": 0.00022266443434585024, "loss": 0.0993, "step": 5894 }, { "epoch": 0.826208829712684, "grad_norm": 1.5012911558151245, "learning_rate": 0.00022265008371203057, "loss": 0.0783, "step": 5895 }, { "epoch": 0.8263489838822705, "grad_norm": 0.911300539970398, "learning_rate": 0.00022263573307821095, "loss": 0.1116, "step": 5896 }, { "epoch": 0.8264891380518571, "grad_norm": 0.43797796964645386, "learning_rate": 0.00022262138244439128, "loss": 0.0453, "step": 5897 }, { "epoch": 0.8266292922214435, "grad_norm": 2.994680881500244, "learning_rate": 0.0002226070318105716, "loss": 0.2262, "step": 5898 }, { "epoch": 0.8267694463910301, "grad_norm": 1.300034761428833, "learning_rate": 0.00022259268117675197, "loss": 0.1085, "step": 5899 }, { "epoch": 0.8269096005606167, "grad_norm": 2.6300525665283203, "learning_rate": 0.0002225783305429323, "loss": 0.2636, "step": 5900 }, { "epoch": 0.8270497547302033, "grad_norm": 0.8500336408615112, "learning_rate": 0.00022256397990911262, "loss": 0.234, "step": 5901 }, { "epoch": 0.8271899088997897, "grad_norm": 0.2362862080335617, "learning_rate": 0.00022254962927529298, "loss": 0.0762, "step": 5902 }, { "epoch": 0.8273300630693763, "grad_norm": 0.2184583991765976, "learning_rate": 0.0002225352786414733, "loss": 0.0409, "step": 5903 }, { "epoch": 0.8274702172389629, "grad_norm": 0.33143872022628784, "learning_rate": 0.00022252092800765364, "loss": 0.1045, "step": 5904 }, { "epoch": 0.8276103714085494, "grad_norm": 0.580802857875824, "learning_rate": 0.000222506577373834, "loss": 0.2047, "step": 5905 }, { "epoch": 0.827750525578136, "grad_norm": 0.3154565095901489, "learning_rate": 0.00022249222674001435, "loss": 0.0866, "step": 5906 }, { "epoch": 0.8278906797477225, "grad_norm": 0.6071909070014954, "learning_rate": 0.00022247787610619468, "loss": 0.122, "step": 5907 }, { "epoch": 0.828030833917309, "grad_norm": 0.41419368982315063, "learning_rate": 0.00022246352547237503, "loss": 0.0845, "step": 5908 }, { "epoch": 0.8281709880868956, "grad_norm": 0.45883846282958984, "learning_rate": 0.00022244917483855536, "loss": 0.0807, "step": 5909 }, { "epoch": 0.8283111422564822, "grad_norm": 0.454587459564209, "learning_rate": 0.0002224348242047357, "loss": 0.09, "step": 5910 }, { "epoch": 0.8284512964260686, "grad_norm": 0.489064484834671, "learning_rate": 0.00022242047357091602, "loss": 0.0775, "step": 5911 }, { "epoch": 0.8285914505956552, "grad_norm": 0.4669618308544159, "learning_rate": 0.00022240612293709637, "loss": 0.0519, "step": 5912 }, { "epoch": 0.8287316047652418, "grad_norm": 0.4833787977695465, "learning_rate": 0.0002223917723032767, "loss": 0.1109, "step": 5913 }, { "epoch": 0.8288717589348283, "grad_norm": 0.26459962129592896, "learning_rate": 0.00022237742166945703, "loss": 0.0363, "step": 5914 }, { "epoch": 0.8290119131044149, "grad_norm": 0.42832279205322266, "learning_rate": 0.00022236307103563739, "loss": 0.0988, "step": 5915 }, { "epoch": 0.8291520672740014, "grad_norm": 0.33144134283065796, "learning_rate": 0.00022234872040181771, "loss": 0.0597, "step": 5916 }, { "epoch": 0.8292922214435879, "grad_norm": 0.25004151463508606, "learning_rate": 0.00022233436976799804, "loss": 0.0602, "step": 5917 }, { "epoch": 0.8294323756131745, "grad_norm": 0.687878429889679, "learning_rate": 0.00022232001913417843, "loss": 0.0955, "step": 5918 }, { "epoch": 0.8295725297827611, "grad_norm": 0.13679423928260803, "learning_rate": 0.00022230566850035875, "loss": 0.0233, "step": 5919 }, { "epoch": 0.8297126839523475, "grad_norm": 0.42223963141441345, "learning_rate": 0.00022229131786653908, "loss": 0.1175, "step": 5920 }, { "epoch": 0.8298528381219341, "grad_norm": 0.3276427984237671, "learning_rate": 0.00022227696723271944, "loss": 0.0648, "step": 5921 }, { "epoch": 0.8299929922915207, "grad_norm": 0.5942558646202087, "learning_rate": 0.00022226261659889977, "loss": 0.0724, "step": 5922 }, { "epoch": 0.8301331464611073, "grad_norm": 0.2597961723804474, "learning_rate": 0.0002222482659650801, "loss": 0.0618, "step": 5923 }, { "epoch": 0.8302733006306937, "grad_norm": 0.3672686815261841, "learning_rate": 0.00022223391533126045, "loss": 0.075, "step": 5924 }, { "epoch": 0.8304134548002803, "grad_norm": 0.3052956163883209, "learning_rate": 0.00022221956469744078, "loss": 0.0785, "step": 5925 }, { "epoch": 0.8305536089698669, "grad_norm": 0.29609790444374084, "learning_rate": 0.0002222052140636211, "loss": 0.0401, "step": 5926 }, { "epoch": 0.8306937631394534, "grad_norm": 0.48648586869239807, "learning_rate": 0.0002221908634298015, "loss": 0.076, "step": 5927 }, { "epoch": 0.83083391730904, "grad_norm": 0.6608290672302246, "learning_rate": 0.00022217651279598182, "loss": 0.1657, "step": 5928 }, { "epoch": 0.8309740714786265, "grad_norm": 0.24940335750579834, "learning_rate": 0.00022216216216216215, "loss": 0.048, "step": 5929 }, { "epoch": 0.831114225648213, "grad_norm": 0.4135406017303467, "learning_rate": 0.00022214781152834248, "loss": 0.0443, "step": 5930 }, { "epoch": 0.8312543798177996, "grad_norm": 0.5187567472457886, "learning_rate": 0.00022213346089452283, "loss": 0.0642, "step": 5931 }, { "epoch": 0.8313945339873862, "grad_norm": 0.25692304968833923, "learning_rate": 0.00022211911026070316, "loss": 0.0451, "step": 5932 }, { "epoch": 0.8315346881569726, "grad_norm": 0.3205924332141876, "learning_rate": 0.0002221047596268835, "loss": 0.0696, "step": 5933 }, { "epoch": 0.8316748423265592, "grad_norm": 0.33431941270828247, "learning_rate": 0.00022209040899306385, "loss": 0.0709, "step": 5934 }, { "epoch": 0.8318149964961458, "grad_norm": 0.43129605054855347, "learning_rate": 0.00022207605835924417, "loss": 0.0859, "step": 5935 }, { "epoch": 0.8319551506657323, "grad_norm": 0.9323621988296509, "learning_rate": 0.0002220617077254245, "loss": 0.0196, "step": 5936 }, { "epoch": 0.8320953048353189, "grad_norm": 0.24619603157043457, "learning_rate": 0.00022204735709160486, "loss": 0.0929, "step": 5937 }, { "epoch": 0.8322354590049054, "grad_norm": 0.5866087675094604, "learning_rate": 0.00022203300645778521, "loss": 0.0252, "step": 5938 }, { "epoch": 0.8323756131744919, "grad_norm": 0.7721034288406372, "learning_rate": 0.00022201865582396554, "loss": 0.1732, "step": 5939 }, { "epoch": 0.8325157673440785, "grad_norm": 0.5209160447120667, "learning_rate": 0.0002220043051901459, "loss": 0.1002, "step": 5940 }, { "epoch": 0.8326559215136651, "grad_norm": 0.3384513556957245, "learning_rate": 0.00022198995455632623, "loss": 0.0477, "step": 5941 }, { "epoch": 0.8327960756832515, "grad_norm": 0.32500627636909485, "learning_rate": 0.00022197560392250656, "loss": 0.0853, "step": 5942 }, { "epoch": 0.8329362298528381, "grad_norm": 1.1235716342926025, "learning_rate": 0.0002219612532886869, "loss": 0.1431, "step": 5943 }, { "epoch": 0.8330763840224247, "grad_norm": 0.2482185959815979, "learning_rate": 0.00022194690265486724, "loss": 0.0261, "step": 5944 }, { "epoch": 0.8332165381920112, "grad_norm": 0.773568868637085, "learning_rate": 0.00022193255202104757, "loss": 0.1627, "step": 5945 }, { "epoch": 0.8333566923615977, "grad_norm": 0.737103283405304, "learning_rate": 0.0002219182013872279, "loss": 0.0852, "step": 5946 }, { "epoch": 0.8334968465311843, "grad_norm": 1.8099327087402344, "learning_rate": 0.00022190385075340825, "loss": 0.2878, "step": 5947 }, { "epoch": 0.8336370007007708, "grad_norm": 0.8018434047698975, "learning_rate": 0.00022188950011958858, "loss": 0.0873, "step": 5948 }, { "epoch": 0.8337771548703574, "grad_norm": 2.499140501022339, "learning_rate": 0.0002218751494857689, "loss": 0.3278, "step": 5949 }, { "epoch": 0.833917309039944, "grad_norm": 1.1449474096298218, "learning_rate": 0.0002218607988519493, "loss": 0.3077, "step": 5950 }, { "epoch": 0.8340574632095304, "grad_norm": 0.5713278651237488, "learning_rate": 0.00022184644821812962, "loss": 0.0957, "step": 5951 }, { "epoch": 0.834197617379117, "grad_norm": 0.47468826174736023, "learning_rate": 0.00022183209758430995, "loss": 0.0752, "step": 5952 }, { "epoch": 0.8343377715487036, "grad_norm": 0.1592639684677124, "learning_rate": 0.0002218177469504903, "loss": 0.0306, "step": 5953 }, { "epoch": 0.8344779257182902, "grad_norm": 0.3566886782646179, "learning_rate": 0.00022180339631667063, "loss": 0.0929, "step": 5954 }, { "epoch": 0.8346180798878766, "grad_norm": 0.21627689898014069, "learning_rate": 0.00022178904568285096, "loss": 0.0506, "step": 5955 }, { "epoch": 0.8347582340574632, "grad_norm": 0.6209261417388916, "learning_rate": 0.00022177469504903132, "loss": 0.0825, "step": 5956 }, { "epoch": 0.8348983882270498, "grad_norm": 0.5948876738548279, "learning_rate": 0.00022176034441521165, "loss": 0.1176, "step": 5957 }, { "epoch": 0.8350385423966363, "grad_norm": 0.20633652806282043, "learning_rate": 0.00022174599378139197, "loss": 0.0372, "step": 5958 }, { "epoch": 0.8351786965662229, "grad_norm": 0.6971942186355591, "learning_rate": 0.00022173164314757236, "loss": 0.1589, "step": 5959 }, { "epoch": 0.8353188507358094, "grad_norm": 0.22771628201007843, "learning_rate": 0.00022171729251375269, "loss": 0.0281, "step": 5960 }, { "epoch": 0.8354590049053959, "grad_norm": 1.0242925882339478, "learning_rate": 0.00022170294187993301, "loss": 0.1849, "step": 5961 }, { "epoch": 0.8355991590749825, "grad_norm": 0.5470095872879028, "learning_rate": 0.00022168859124611334, "loss": 0.0822, "step": 5962 }, { "epoch": 0.8357393132445691, "grad_norm": 0.8643908500671387, "learning_rate": 0.0002216742406122937, "loss": 0.0873, "step": 5963 }, { "epoch": 0.8358794674141555, "grad_norm": 0.21783781051635742, "learning_rate": 0.00022165988997847403, "loss": 0.0338, "step": 5964 }, { "epoch": 0.8360196215837421, "grad_norm": 0.4660545587539673, "learning_rate": 0.00022164553934465436, "loss": 0.0826, "step": 5965 }, { "epoch": 0.8361597757533287, "grad_norm": 0.2196199744939804, "learning_rate": 0.0002216311887108347, "loss": 0.0519, "step": 5966 }, { "epoch": 0.8362999299229152, "grad_norm": 0.16608960926532745, "learning_rate": 0.00022161683807701504, "loss": 0.0237, "step": 5967 }, { "epoch": 0.8364400840925017, "grad_norm": 0.2225629985332489, "learning_rate": 0.00022160248744319537, "loss": 0.0227, "step": 5968 }, { "epoch": 0.8365802382620883, "grad_norm": 0.4260781705379486, "learning_rate": 0.00022158813680937572, "loss": 0.1013, "step": 5969 }, { "epoch": 0.8367203924316748, "grad_norm": 0.48793455958366394, "learning_rate": 0.00022157378617555608, "loss": 0.0772, "step": 5970 }, { "epoch": 0.8368605466012614, "grad_norm": 0.3152455985546112, "learning_rate": 0.0002215594355417364, "loss": 0.0512, "step": 5971 }, { "epoch": 0.837000700770848, "grad_norm": 0.254903644323349, "learning_rate": 0.00022154508490791676, "loss": 0.11, "step": 5972 }, { "epoch": 0.8371408549404344, "grad_norm": 0.6641345024108887, "learning_rate": 0.0002215307342740971, "loss": 0.1697, "step": 5973 }, { "epoch": 0.837281009110021, "grad_norm": 0.31637808680534363, "learning_rate": 0.00022151638364027742, "loss": 0.0296, "step": 5974 }, { "epoch": 0.8374211632796076, "grad_norm": 0.7511847615242004, "learning_rate": 0.00022150203300645778, "loss": 0.151, "step": 5975 }, { "epoch": 0.8375613174491942, "grad_norm": 0.3790588974952698, "learning_rate": 0.0002214876823726381, "loss": 0.0578, "step": 5976 }, { "epoch": 0.8377014716187806, "grad_norm": 0.24922585487365723, "learning_rate": 0.00022147333173881843, "loss": 0.0792, "step": 5977 }, { "epoch": 0.8378416257883672, "grad_norm": 0.1837388575077057, "learning_rate": 0.0002214589811049988, "loss": 0.0597, "step": 5978 }, { "epoch": 0.8379817799579538, "grad_norm": 0.4960850477218628, "learning_rate": 0.00022144463047117912, "loss": 0.1342, "step": 5979 }, { "epoch": 0.8381219341275403, "grad_norm": 0.28974518179893494, "learning_rate": 0.00022143027983735945, "loss": 0.1104, "step": 5980 }, { "epoch": 0.8382620882971269, "grad_norm": 0.8710290193557739, "learning_rate": 0.00022141592920353978, "loss": 0.0755, "step": 5981 }, { "epoch": 0.8384022424667134, "grad_norm": 0.3801742494106293, "learning_rate": 0.00022140157856972016, "loss": 0.057, "step": 5982 }, { "epoch": 0.8385423966362999, "grad_norm": 0.1014970988035202, "learning_rate": 0.0002213872279359005, "loss": 0.0114, "step": 5983 }, { "epoch": 0.8386825508058865, "grad_norm": 0.3004118502140045, "learning_rate": 0.00022137287730208082, "loss": 0.0459, "step": 5984 }, { "epoch": 0.8388227049754731, "grad_norm": 0.29902586340904236, "learning_rate": 0.00022135852666826117, "loss": 0.0579, "step": 5985 }, { "epoch": 0.8389628591450595, "grad_norm": 0.5611525177955627, "learning_rate": 0.0002213441760344415, "loss": 0.0704, "step": 5986 }, { "epoch": 0.8391030133146461, "grad_norm": 0.8856817483901978, "learning_rate": 0.00022132982540062183, "loss": 0.1368, "step": 5987 }, { "epoch": 0.8392431674842327, "grad_norm": 0.4001542627811432, "learning_rate": 0.00022131547476680218, "loss": 0.0501, "step": 5988 }, { "epoch": 0.8393833216538192, "grad_norm": 0.3253493905067444, "learning_rate": 0.0002213011241329825, "loss": 0.0343, "step": 5989 }, { "epoch": 0.8395234758234057, "grad_norm": 0.45205754041671753, "learning_rate": 0.00022128677349916284, "loss": 0.0853, "step": 5990 }, { "epoch": 0.8396636299929923, "grad_norm": 0.41299745440483093, "learning_rate": 0.00022127242286534322, "loss": 0.1086, "step": 5991 }, { "epoch": 0.8398037841625788, "grad_norm": 0.2580900192260742, "learning_rate": 0.00022125807223152355, "loss": 0.0811, "step": 5992 }, { "epoch": 0.8399439383321654, "grad_norm": 0.13802768290042877, "learning_rate": 0.00022124372159770388, "loss": 0.0367, "step": 5993 }, { "epoch": 0.840084092501752, "grad_norm": 0.414334774017334, "learning_rate": 0.00022122937096388424, "loss": 0.0713, "step": 5994 }, { "epoch": 0.8402242466713384, "grad_norm": 1.27986478805542, "learning_rate": 0.00022121502033006457, "loss": 0.2287, "step": 5995 }, { "epoch": 0.840364400840925, "grad_norm": 0.42905113101005554, "learning_rate": 0.0002212006696962449, "loss": 0.0413, "step": 5996 }, { "epoch": 0.8405045550105116, "grad_norm": 0.44794151186943054, "learning_rate": 0.00022118631906242522, "loss": 0.0375, "step": 5997 }, { "epoch": 0.8406447091800981, "grad_norm": 0.883091390132904, "learning_rate": 0.00022117196842860558, "loss": 0.2403, "step": 5998 }, { "epoch": 0.8407848633496846, "grad_norm": 0.8485433459281921, "learning_rate": 0.0002211576177947859, "loss": 0.0415, "step": 5999 }, { "epoch": 0.8409250175192712, "grad_norm": 2.7033872604370117, "learning_rate": 0.00022114326716096624, "loss": 0.3868, "step": 6000 }, { "epoch": 0.8410651716888577, "grad_norm": 0.42621660232543945, "learning_rate": 0.00022112891652714662, "loss": 0.0569, "step": 6001 }, { "epoch": 0.8412053258584443, "grad_norm": 0.40759584307670593, "learning_rate": 0.00022111456589332695, "loss": 0.0953, "step": 6002 }, { "epoch": 0.8413454800280308, "grad_norm": 0.16722503304481506, "learning_rate": 0.00022110021525950728, "loss": 0.0444, "step": 6003 }, { "epoch": 0.8414856341976173, "grad_norm": 0.2938117980957031, "learning_rate": 0.00022108586462568763, "loss": 0.0992, "step": 6004 }, { "epoch": 0.8416257883672039, "grad_norm": 0.3903235197067261, "learning_rate": 0.00022107151399186796, "loss": 0.1352, "step": 6005 }, { "epoch": 0.8417659425367905, "grad_norm": 0.4225024878978729, "learning_rate": 0.0002210571633580483, "loss": 0.0702, "step": 6006 }, { "epoch": 0.8419060967063771, "grad_norm": 0.4744155704975128, "learning_rate": 0.00022104281272422864, "loss": 0.1148, "step": 6007 }, { "epoch": 0.8420462508759635, "grad_norm": 0.229496031999588, "learning_rate": 0.00022102846209040897, "loss": 0.0342, "step": 6008 }, { "epoch": 0.8421864050455501, "grad_norm": 0.24346406757831573, "learning_rate": 0.0002210141114565893, "loss": 0.0371, "step": 6009 }, { "epoch": 0.8423265592151367, "grad_norm": 0.5424032211303711, "learning_rate": 0.00022099976082276966, "loss": 0.1306, "step": 6010 }, { "epoch": 0.8424667133847232, "grad_norm": 0.4252154529094696, "learning_rate": 0.00022098541018894998, "loss": 0.0905, "step": 6011 }, { "epoch": 0.8426068675543097, "grad_norm": 0.2243867814540863, "learning_rate": 0.0002209710595551303, "loss": 0.0709, "step": 6012 }, { "epoch": 0.8427470217238963, "grad_norm": 0.5185843706130981, "learning_rate": 0.0002209567089213107, "loss": 0.0984, "step": 6013 }, { "epoch": 0.8428871758934828, "grad_norm": 0.27996328473091125, "learning_rate": 0.00022094235828749102, "loss": 0.0425, "step": 6014 }, { "epoch": 0.8430273300630694, "grad_norm": 0.4408407509326935, "learning_rate": 0.00022092800765367135, "loss": 0.1021, "step": 6015 }, { "epoch": 0.843167484232656, "grad_norm": 0.5334112644195557, "learning_rate": 0.00022091365701985168, "loss": 0.101, "step": 6016 }, { "epoch": 0.8433076384022424, "grad_norm": 0.3409167230129242, "learning_rate": 0.00022089930638603204, "loss": 0.0573, "step": 6017 }, { "epoch": 0.843447792571829, "grad_norm": 0.21492350101470947, "learning_rate": 0.00022088495575221237, "loss": 0.0426, "step": 6018 }, { "epoch": 0.8435879467414156, "grad_norm": 0.18287070095539093, "learning_rate": 0.0002208706051183927, "loss": 0.0541, "step": 6019 }, { "epoch": 0.8437281009110021, "grad_norm": 0.6449204683303833, "learning_rate": 0.00022085625448457305, "loss": 0.0998, "step": 6020 }, { "epoch": 0.8438682550805886, "grad_norm": 0.8856091499328613, "learning_rate": 0.00022084190385075338, "loss": 0.0529, "step": 6021 }, { "epoch": 0.8440084092501752, "grad_norm": 0.4567941427230835, "learning_rate": 0.0002208275532169337, "loss": 0.0735, "step": 6022 }, { "epoch": 0.8441485634197617, "grad_norm": 0.5005852580070496, "learning_rate": 0.0002208132025831141, "loss": 0.0553, "step": 6023 }, { "epoch": 0.8442887175893483, "grad_norm": 0.31614986062049866, "learning_rate": 0.00022079885194929442, "loss": 0.0892, "step": 6024 }, { "epoch": 0.8444288717589348, "grad_norm": 0.4281679093837738, "learning_rate": 0.00022078450131547475, "loss": 0.0733, "step": 6025 }, { "epoch": 0.8445690259285213, "grad_norm": 0.18619506061077118, "learning_rate": 0.0002207701506816551, "loss": 0.0363, "step": 6026 }, { "epoch": 0.8447091800981079, "grad_norm": 0.29114648699760437, "learning_rate": 0.00022075580004783543, "loss": 0.0735, "step": 6027 }, { "epoch": 0.8448493342676945, "grad_norm": 0.29656782746315, "learning_rate": 0.00022074144941401576, "loss": 0.0536, "step": 6028 }, { "epoch": 0.844989488437281, "grad_norm": 0.3082650601863861, "learning_rate": 0.00022072709878019612, "loss": 0.065, "step": 6029 }, { "epoch": 0.8451296426068675, "grad_norm": 0.3323041796684265, "learning_rate": 0.00022071274814637644, "loss": 0.0422, "step": 6030 }, { "epoch": 0.8452697967764541, "grad_norm": 0.4899621903896332, "learning_rate": 0.00022069839751255677, "loss": 0.102, "step": 6031 }, { "epoch": 0.8454099509460407, "grad_norm": 0.6736263036727905, "learning_rate": 0.0002206840468787371, "loss": 0.1277, "step": 6032 }, { "epoch": 0.8455501051156272, "grad_norm": 0.18957358598709106, "learning_rate": 0.00022066969624491748, "loss": 0.0512, "step": 6033 }, { "epoch": 0.8456902592852137, "grad_norm": 0.32455477118492126, "learning_rate": 0.0002206553456110978, "loss": 0.0611, "step": 6034 }, { "epoch": 0.8458304134548003, "grad_norm": 0.7106560468673706, "learning_rate": 0.00022064099497727814, "loss": 0.0795, "step": 6035 }, { "epoch": 0.8459705676243868, "grad_norm": 0.36234986782073975, "learning_rate": 0.0002206266443434585, "loss": 0.0746, "step": 6036 }, { "epoch": 0.8461107217939734, "grad_norm": 0.4556647539138794, "learning_rate": 0.00022061229370963883, "loss": 0.0827, "step": 6037 }, { "epoch": 0.84625087596356, "grad_norm": 0.5617167353630066, "learning_rate": 0.00022059794307581915, "loss": 0.0394, "step": 6038 }, { "epoch": 0.8463910301331464, "grad_norm": 0.5563653111457825, "learning_rate": 0.0002205835924419995, "loss": 0.1189, "step": 6039 }, { "epoch": 0.846531184302733, "grad_norm": 0.31766507029533386, "learning_rate": 0.00022056924180817984, "loss": 0.0726, "step": 6040 }, { "epoch": 0.8466713384723196, "grad_norm": 0.7167912125587463, "learning_rate": 0.00022055489117436017, "loss": 0.0684, "step": 6041 }, { "epoch": 0.8468114926419061, "grad_norm": 0.4309585690498352, "learning_rate": 0.00022054054054054052, "loss": 0.1414, "step": 6042 }, { "epoch": 0.8469516468114926, "grad_norm": 0.3490201532840729, "learning_rate": 0.00022052618990672085, "loss": 0.047, "step": 6043 }, { "epoch": 0.8470918009810792, "grad_norm": 0.30682438611984253, "learning_rate": 0.00022051183927290118, "loss": 0.0248, "step": 6044 }, { "epoch": 0.8472319551506657, "grad_norm": 0.2593974173069, "learning_rate": 0.00022049748863908156, "loss": 0.0635, "step": 6045 }, { "epoch": 0.8473721093202523, "grad_norm": 0.7133006453514099, "learning_rate": 0.0002204831380052619, "loss": 0.0758, "step": 6046 }, { "epoch": 0.8475122634898388, "grad_norm": 2.085472345352173, "learning_rate": 0.00022046878737144222, "loss": 0.1418, "step": 6047 }, { "epoch": 0.8476524176594253, "grad_norm": 0.7213760614395142, "learning_rate": 0.00022045443673762258, "loss": 0.0489, "step": 6048 }, { "epoch": 0.8477925718290119, "grad_norm": 1.4674125909805298, "learning_rate": 0.0002204400861038029, "loss": 0.0746, "step": 6049 }, { "epoch": 0.8479327259985985, "grad_norm": 4.065183639526367, "learning_rate": 0.00022042573546998323, "loss": 0.2873, "step": 6050 }, { "epoch": 0.848072880168185, "grad_norm": 0.3302542567253113, "learning_rate": 0.00022041138483616356, "loss": 0.0644, "step": 6051 }, { "epoch": 0.8482130343377715, "grad_norm": 0.3476284146308899, "learning_rate": 0.00022039703420234392, "loss": 0.0699, "step": 6052 }, { "epoch": 0.8483531885073581, "grad_norm": 0.6748327612876892, "learning_rate": 0.00022038268356852425, "loss": 0.0505, "step": 6053 }, { "epoch": 0.8484933426769447, "grad_norm": 0.32755163311958313, "learning_rate": 0.00022036833293470457, "loss": 0.0212, "step": 6054 }, { "epoch": 0.8486334968465312, "grad_norm": 0.4533473551273346, "learning_rate": 0.00022035398230088496, "loss": 0.0838, "step": 6055 }, { "epoch": 0.8487736510161177, "grad_norm": 0.9042192697525024, "learning_rate": 0.00022033963166706529, "loss": 0.0677, "step": 6056 }, { "epoch": 0.8489138051857043, "grad_norm": 0.5788269639015198, "learning_rate": 0.00022032528103324561, "loss": 0.055, "step": 6057 }, { "epoch": 0.8490539593552908, "grad_norm": 0.6356707811355591, "learning_rate": 0.00022031093039942597, "loss": 0.0863, "step": 6058 }, { "epoch": 0.8491941135248774, "grad_norm": 0.2752299904823303, "learning_rate": 0.0002202965797656063, "loss": 0.0533, "step": 6059 }, { "epoch": 0.849334267694464, "grad_norm": 0.701941728591919, "learning_rate": 0.00022028222913178663, "loss": 0.0734, "step": 6060 }, { "epoch": 0.8494744218640504, "grad_norm": 0.8902832269668579, "learning_rate": 0.00022026787849796698, "loss": 0.2098, "step": 6061 }, { "epoch": 0.849614576033637, "grad_norm": 0.6169689297676086, "learning_rate": 0.0002202535278641473, "loss": 0.0677, "step": 6062 }, { "epoch": 0.8497547302032236, "grad_norm": 0.5556852221488953, "learning_rate": 0.00022023917723032764, "loss": 0.103, "step": 6063 }, { "epoch": 0.8498948843728101, "grad_norm": 0.862409234046936, "learning_rate": 0.000220224826596508, "loss": 0.1048, "step": 6064 }, { "epoch": 0.8500350385423966, "grad_norm": 0.39001962542533875, "learning_rate": 0.00022021047596268835, "loss": 0.0542, "step": 6065 }, { "epoch": 0.8501751927119832, "grad_norm": 0.5409131050109863, "learning_rate": 0.00022019612532886868, "loss": 0.1172, "step": 6066 }, { "epoch": 0.8503153468815697, "grad_norm": 0.35064318776130676, "learning_rate": 0.000220181774695049, "loss": 0.0713, "step": 6067 }, { "epoch": 0.8504555010511563, "grad_norm": 0.43928372859954834, "learning_rate": 0.00022016742406122936, "loss": 0.0826, "step": 6068 }, { "epoch": 0.8505956552207428, "grad_norm": 0.38914406299591064, "learning_rate": 0.0002201530734274097, "loss": 0.0881, "step": 6069 }, { "epoch": 0.8507358093903293, "grad_norm": 0.5665842890739441, "learning_rate": 0.00022013872279359002, "loss": 0.081, "step": 6070 }, { "epoch": 0.8508759635599159, "grad_norm": 0.3907313048839569, "learning_rate": 0.00022012437215977038, "loss": 0.1094, "step": 6071 }, { "epoch": 0.8510161177295025, "grad_norm": 0.573666512966156, "learning_rate": 0.0002201100215259507, "loss": 0.0917, "step": 6072 }, { "epoch": 0.851156271899089, "grad_norm": 0.3204288184642792, "learning_rate": 0.00022009567089213103, "loss": 0.0816, "step": 6073 }, { "epoch": 0.8512964260686755, "grad_norm": 0.3869044780731201, "learning_rate": 0.0002200813202583114, "loss": 0.1327, "step": 6074 }, { "epoch": 0.8514365802382621, "grad_norm": 0.20069493353366852, "learning_rate": 0.00022006696962449172, "loss": 0.0352, "step": 6075 }, { "epoch": 0.8515767344078486, "grad_norm": 0.4491002857685089, "learning_rate": 0.00022005261899067205, "loss": 0.0912, "step": 6076 }, { "epoch": 0.8517168885774352, "grad_norm": 0.599244236946106, "learning_rate": 0.00022003826835685243, "loss": 0.0785, "step": 6077 }, { "epoch": 0.8518570427470217, "grad_norm": 0.47733041644096375, "learning_rate": 0.00022002391772303276, "loss": 0.0843, "step": 6078 }, { "epoch": 0.8519971969166082, "grad_norm": 0.2956453561782837, "learning_rate": 0.00022000956708921309, "loss": 0.0459, "step": 6079 }, { "epoch": 0.8521373510861948, "grad_norm": 0.5410683155059814, "learning_rate": 0.00021999521645539344, "loss": 0.1125, "step": 6080 }, { "epoch": 0.8522775052557814, "grad_norm": 0.5709959864616394, "learning_rate": 0.00021998086582157377, "loss": 0.0658, "step": 6081 }, { "epoch": 0.852417659425368, "grad_norm": 0.4829818904399872, "learning_rate": 0.0002199665151877541, "loss": 0.1498, "step": 6082 }, { "epoch": 0.8525578135949544, "grad_norm": 0.17665866017341614, "learning_rate": 0.00021995216455393445, "loss": 0.0187, "step": 6083 }, { "epoch": 0.852697967764541, "grad_norm": 0.37586143612861633, "learning_rate": 0.00021993781392011478, "loss": 0.0456, "step": 6084 }, { "epoch": 0.8528381219341276, "grad_norm": 0.7087547183036804, "learning_rate": 0.0002199234632862951, "loss": 0.0668, "step": 6085 }, { "epoch": 0.8529782761037141, "grad_norm": 0.6223840117454529, "learning_rate": 0.00021990911265247544, "loss": 0.0613, "step": 6086 }, { "epoch": 0.8531184302733006, "grad_norm": 0.26645681262016296, "learning_rate": 0.00021989476201865582, "loss": 0.0354, "step": 6087 }, { "epoch": 0.8532585844428872, "grad_norm": 0.35039687156677246, "learning_rate": 0.00021988041138483615, "loss": 0.0446, "step": 6088 }, { "epoch": 0.8533987386124737, "grad_norm": 0.19350412487983704, "learning_rate": 0.00021986606075101648, "loss": 0.026, "step": 6089 }, { "epoch": 0.8535388927820603, "grad_norm": 0.7810064554214478, "learning_rate": 0.00021985171011719684, "loss": 0.1607, "step": 6090 }, { "epoch": 0.8536790469516468, "grad_norm": 0.22369100153446198, "learning_rate": 0.00021983735948337716, "loss": 0.0633, "step": 6091 }, { "epoch": 0.8538192011212333, "grad_norm": 0.8078294396400452, "learning_rate": 0.0002198230088495575, "loss": 0.0968, "step": 6092 }, { "epoch": 0.8539593552908199, "grad_norm": 0.44355660676956177, "learning_rate": 0.00021980865821573785, "loss": 0.0661, "step": 6093 }, { "epoch": 0.8540995094604065, "grad_norm": 1.3358955383300781, "learning_rate": 0.00021979430758191818, "loss": 0.134, "step": 6094 }, { "epoch": 0.854239663629993, "grad_norm": 2.5596325397491455, "learning_rate": 0.0002197799569480985, "loss": 0.1636, "step": 6095 }, { "epoch": 0.8543798177995795, "grad_norm": 1.5910701751708984, "learning_rate": 0.00021976560631427886, "loss": 0.1689, "step": 6096 }, { "epoch": 0.8545199719691661, "grad_norm": 4.034012317657471, "learning_rate": 0.00021975125568045922, "loss": 0.1846, "step": 6097 }, { "epoch": 0.8546601261387526, "grad_norm": 0.786284863948822, "learning_rate": 0.00021973690504663955, "loss": 0.0635, "step": 6098 }, { "epoch": 0.8548002803083392, "grad_norm": 2.224099636077881, "learning_rate": 0.0002197225544128199, "loss": 0.2622, "step": 6099 }, { "epoch": 0.8549404344779257, "grad_norm": 2.533465623855591, "learning_rate": 0.00021970820377900023, "loss": 0.4339, "step": 6100 }, { "epoch": 0.8550805886475122, "grad_norm": 0.2941974699497223, "learning_rate": 0.00021969385314518056, "loss": 0.0471, "step": 6101 }, { "epoch": 0.8552207428170988, "grad_norm": 0.47508567571640015, "learning_rate": 0.0002196795025113609, "loss": 0.0734, "step": 6102 }, { "epoch": 0.8553608969866854, "grad_norm": 0.4837630093097687, "learning_rate": 0.00021966515187754124, "loss": 0.122, "step": 6103 }, { "epoch": 0.855501051156272, "grad_norm": 0.4980991780757904, "learning_rate": 0.00021965080124372157, "loss": 0.0603, "step": 6104 }, { "epoch": 0.8556412053258584, "grad_norm": 0.38687628507614136, "learning_rate": 0.0002196364506099019, "loss": 0.0572, "step": 6105 }, { "epoch": 0.855781359495445, "grad_norm": 0.26383933424949646, "learning_rate": 0.00021962209997608226, "loss": 0.0326, "step": 6106 }, { "epoch": 0.8559215136650316, "grad_norm": 0.5728917717933655, "learning_rate": 0.00021960774934226258, "loss": 0.0654, "step": 6107 }, { "epoch": 0.8560616678346181, "grad_norm": 0.2652728259563446, "learning_rate": 0.0002195933987084429, "loss": 0.0438, "step": 6108 }, { "epoch": 0.8562018220042046, "grad_norm": 0.1694273203611374, "learning_rate": 0.0002195790480746233, "loss": 0.0324, "step": 6109 }, { "epoch": 0.8563419761737912, "grad_norm": 0.3238029479980469, "learning_rate": 0.00021956469744080362, "loss": 0.0713, "step": 6110 }, { "epoch": 0.8564821303433777, "grad_norm": 0.16493548452854156, "learning_rate": 0.00021955034680698395, "loss": 0.0187, "step": 6111 }, { "epoch": 0.8566222845129643, "grad_norm": 0.4229266941547394, "learning_rate": 0.0002195359961731643, "loss": 0.0629, "step": 6112 }, { "epoch": 0.8567624386825508, "grad_norm": 0.7142085433006287, "learning_rate": 0.00021952164553934464, "loss": 0.1472, "step": 6113 }, { "epoch": 0.8569025928521373, "grad_norm": 0.17299939692020416, "learning_rate": 0.00021950729490552497, "loss": 0.0693, "step": 6114 }, { "epoch": 0.8570427470217239, "grad_norm": 0.5140867829322815, "learning_rate": 0.00021949294427170532, "loss": 0.1833, "step": 6115 }, { "epoch": 0.8571829011913105, "grad_norm": 0.33215197920799255, "learning_rate": 0.00021947859363788565, "loss": 0.033, "step": 6116 }, { "epoch": 0.857323055360897, "grad_norm": 0.31363826990127563, "learning_rate": 0.00021946424300406598, "loss": 0.08, "step": 6117 }, { "epoch": 0.8574632095304835, "grad_norm": 0.4356216788291931, "learning_rate": 0.00021944989237024636, "loss": 0.1139, "step": 6118 }, { "epoch": 0.8576033637000701, "grad_norm": 0.3813140392303467, "learning_rate": 0.0002194355417364267, "loss": 0.0852, "step": 6119 }, { "epoch": 0.8577435178696566, "grad_norm": 1.0481899976730347, "learning_rate": 0.00021942119110260702, "loss": 0.1134, "step": 6120 }, { "epoch": 0.8578836720392432, "grad_norm": 0.5337585210800171, "learning_rate": 0.00021940684046878735, "loss": 0.0556, "step": 6121 }, { "epoch": 0.8580238262088297, "grad_norm": 0.674825131893158, "learning_rate": 0.0002193924898349677, "loss": 0.073, "step": 6122 }, { "epoch": 0.8581639803784162, "grad_norm": 0.1671558916568756, "learning_rate": 0.00021937813920114803, "loss": 0.0301, "step": 6123 }, { "epoch": 0.8583041345480028, "grad_norm": 0.46822091937065125, "learning_rate": 0.00021936378856732836, "loss": 0.0696, "step": 6124 }, { "epoch": 0.8584442887175894, "grad_norm": 0.45676594972610474, "learning_rate": 0.00021934943793350871, "loss": 0.1004, "step": 6125 }, { "epoch": 0.858584442887176, "grad_norm": 0.24710460007190704, "learning_rate": 0.00021933508729968904, "loss": 0.0553, "step": 6126 }, { "epoch": 0.8587245970567624, "grad_norm": 0.20501458644866943, "learning_rate": 0.00021932073666586937, "loss": 0.0247, "step": 6127 }, { "epoch": 0.858864751226349, "grad_norm": 0.6338192820549011, "learning_rate": 0.00021930638603204975, "loss": 0.1415, "step": 6128 }, { "epoch": 0.8590049053959355, "grad_norm": 0.3492337167263031, "learning_rate": 0.00021929203539823008, "loss": 0.1034, "step": 6129 }, { "epoch": 0.8591450595655221, "grad_norm": 0.46829754114151, "learning_rate": 0.0002192776847644104, "loss": 0.0914, "step": 6130 }, { "epoch": 0.8592852137351086, "grad_norm": 0.6821593642234802, "learning_rate": 0.00021926333413059077, "loss": 0.1096, "step": 6131 }, { "epoch": 0.8594253679046951, "grad_norm": 0.5158564448356628, "learning_rate": 0.0002192489834967711, "loss": 0.1258, "step": 6132 }, { "epoch": 0.8595655220742817, "grad_norm": 0.4335736930370331, "learning_rate": 0.00021923463286295142, "loss": 0.066, "step": 6133 }, { "epoch": 0.8597056762438683, "grad_norm": 0.21926026046276093, "learning_rate": 0.00021922028222913178, "loss": 0.0374, "step": 6134 }, { "epoch": 0.8598458304134547, "grad_norm": 0.764644980430603, "learning_rate": 0.0002192059315953121, "loss": 0.0572, "step": 6135 }, { "epoch": 0.8599859845830413, "grad_norm": 0.21171337366104126, "learning_rate": 0.00021919158096149244, "loss": 0.0577, "step": 6136 }, { "epoch": 0.8601261387526279, "grad_norm": 0.2932329475879669, "learning_rate": 0.00021917723032767277, "loss": 0.0548, "step": 6137 }, { "epoch": 0.8602662929222145, "grad_norm": 0.4444162845611572, "learning_rate": 0.00021916287969385312, "loss": 0.0578, "step": 6138 }, { "epoch": 0.860406447091801, "grad_norm": 0.9112905263900757, "learning_rate": 0.00021914852906003345, "loss": 0.0954, "step": 6139 }, { "epoch": 0.8605466012613875, "grad_norm": 0.05830321088433266, "learning_rate": 0.00021913417842621378, "loss": 0.0078, "step": 6140 }, { "epoch": 0.8606867554309741, "grad_norm": 0.5870422720909119, "learning_rate": 0.00021911982779239416, "loss": 0.1373, "step": 6141 }, { "epoch": 0.8608269096005606, "grad_norm": 0.4302772879600525, "learning_rate": 0.0002191054771585745, "loss": 0.0466, "step": 6142 }, { "epoch": 0.8609670637701472, "grad_norm": 0.456406831741333, "learning_rate": 0.00021909112652475482, "loss": 0.1077, "step": 6143 }, { "epoch": 0.8611072179397337, "grad_norm": 0.5110020041465759, "learning_rate": 0.00021907677589093517, "loss": 0.121, "step": 6144 }, { "epoch": 0.8612473721093202, "grad_norm": 0.4321301579475403, "learning_rate": 0.0002190624252571155, "loss": 0.0874, "step": 6145 }, { "epoch": 0.8613875262789068, "grad_norm": 1.0213720798492432, "learning_rate": 0.00021904807462329583, "loss": 0.072, "step": 6146 }, { "epoch": 0.8615276804484934, "grad_norm": 0.35551443696022034, "learning_rate": 0.0002190337239894762, "loss": 0.0339, "step": 6147 }, { "epoch": 0.8616678346180799, "grad_norm": 0.24760396778583527, "learning_rate": 0.00021901937335565652, "loss": 0.0618, "step": 6148 }, { "epoch": 0.8618079887876664, "grad_norm": 0.8129737973213196, "learning_rate": 0.00021900502272183684, "loss": 0.1025, "step": 6149 }, { "epoch": 0.861948142957253, "grad_norm": 1.3780536651611328, "learning_rate": 0.00021899067208801723, "loss": 0.1216, "step": 6150 }, { "epoch": 0.8620882971268395, "grad_norm": 0.3356468975543976, "learning_rate": 0.00021897632145419756, "loss": 0.0794, "step": 6151 }, { "epoch": 0.8622284512964261, "grad_norm": 0.40836986899375916, "learning_rate": 0.00021896197082037788, "loss": 0.0582, "step": 6152 }, { "epoch": 0.8623686054660126, "grad_norm": 0.2814682126045227, "learning_rate": 0.00021894762018655824, "loss": 0.0612, "step": 6153 }, { "epoch": 0.8625087596355991, "grad_norm": 0.46097177267074585, "learning_rate": 0.00021893326955273857, "loss": 0.1014, "step": 6154 }, { "epoch": 0.8626489138051857, "grad_norm": 0.31306591629981995, "learning_rate": 0.0002189189189189189, "loss": 0.0446, "step": 6155 }, { "epoch": 0.8627890679747723, "grad_norm": 0.38307279348373413, "learning_rate": 0.00021890456828509923, "loss": 0.0931, "step": 6156 }, { "epoch": 0.8629292221443587, "grad_norm": 0.5275318622589111, "learning_rate": 0.00021889021765127958, "loss": 0.058, "step": 6157 }, { "epoch": 0.8630693763139453, "grad_norm": 0.27580344676971436, "learning_rate": 0.0002188758670174599, "loss": 0.0564, "step": 6158 }, { "epoch": 0.8632095304835319, "grad_norm": 0.27724042534828186, "learning_rate": 0.00021886151638364024, "loss": 0.0384, "step": 6159 }, { "epoch": 0.8633496846531185, "grad_norm": 0.28988340497016907, "learning_rate": 0.00021884716574982062, "loss": 0.0564, "step": 6160 }, { "epoch": 0.863489838822705, "grad_norm": 0.8585079908370972, "learning_rate": 0.00021883281511600095, "loss": 0.0551, "step": 6161 }, { "epoch": 0.8636299929922915, "grad_norm": 0.1465761661529541, "learning_rate": 0.00021881846448218128, "loss": 0.0558, "step": 6162 }, { "epoch": 0.863770147161878, "grad_norm": 0.42087769508361816, "learning_rate": 0.00021880411384836163, "loss": 0.085, "step": 6163 }, { "epoch": 0.8639103013314646, "grad_norm": 0.3326455354690552, "learning_rate": 0.00021878976321454196, "loss": 0.0417, "step": 6164 }, { "epoch": 0.8640504555010512, "grad_norm": 0.46806639432907104, "learning_rate": 0.0002187754125807223, "loss": 0.0458, "step": 6165 }, { "epoch": 0.8641906096706377, "grad_norm": 0.43334513902664185, "learning_rate": 0.00021876106194690265, "loss": 0.1083, "step": 6166 }, { "epoch": 0.8643307638402242, "grad_norm": 0.4545452892780304, "learning_rate": 0.00021874671131308298, "loss": 0.1221, "step": 6167 }, { "epoch": 0.8644709180098108, "grad_norm": 0.21493001282215118, "learning_rate": 0.0002187323606792633, "loss": 0.0409, "step": 6168 }, { "epoch": 0.8646110721793974, "grad_norm": 0.1310833841562271, "learning_rate": 0.00021871801004544366, "loss": 0.0314, "step": 6169 }, { "epoch": 0.8647512263489839, "grad_norm": 0.47558698058128357, "learning_rate": 0.000218703659411624, "loss": 0.0632, "step": 6170 }, { "epoch": 0.8648913805185704, "grad_norm": 0.12817169725894928, "learning_rate": 0.00021868930877780432, "loss": 0.0199, "step": 6171 }, { "epoch": 0.865031534688157, "grad_norm": 0.49068155884742737, "learning_rate": 0.00021867495814398464, "loss": 0.1007, "step": 6172 }, { "epoch": 0.8651716888577435, "grad_norm": 0.3276272416114807, "learning_rate": 0.00021866060751016503, "loss": 0.0747, "step": 6173 }, { "epoch": 0.8653118430273301, "grad_norm": 0.4701068699359894, "learning_rate": 0.00021864625687634536, "loss": 0.0868, "step": 6174 }, { "epoch": 0.8654519971969166, "grad_norm": 0.5828079581260681, "learning_rate": 0.00021863190624252568, "loss": 0.0779, "step": 6175 }, { "epoch": 0.8655921513665031, "grad_norm": 0.30745652318000793, "learning_rate": 0.00021861755560870604, "loss": 0.0384, "step": 6176 }, { "epoch": 0.8657323055360897, "grad_norm": 0.48013949394226074, "learning_rate": 0.00021860320497488637, "loss": 0.0954, "step": 6177 }, { "epoch": 0.8658724597056763, "grad_norm": 0.32466575503349304, "learning_rate": 0.0002185888543410667, "loss": 0.0511, "step": 6178 }, { "epoch": 0.8660126138752627, "grad_norm": 0.41292816400527954, "learning_rate": 0.00021857450370724705, "loss": 0.0662, "step": 6179 }, { "epoch": 0.8661527680448493, "grad_norm": 0.741558849811554, "learning_rate": 0.00021856015307342738, "loss": 0.0439, "step": 6180 }, { "epoch": 0.8662929222144359, "grad_norm": 0.5732241272926331, "learning_rate": 0.0002185458024396077, "loss": 0.1156, "step": 6181 }, { "epoch": 0.8664330763840224, "grad_norm": 0.7777132987976074, "learning_rate": 0.0002185314518057881, "loss": 0.1861, "step": 6182 }, { "epoch": 0.866573230553609, "grad_norm": 0.24981337785720825, "learning_rate": 0.00021851710117196842, "loss": 0.0569, "step": 6183 }, { "epoch": 0.8667133847231955, "grad_norm": 0.2616785168647766, "learning_rate": 0.00021850275053814875, "loss": 0.0694, "step": 6184 }, { "epoch": 0.866853538892782, "grad_norm": 0.4602219760417938, "learning_rate": 0.0002184883999043291, "loss": 0.1396, "step": 6185 }, { "epoch": 0.8669936930623686, "grad_norm": 0.4059341549873352, "learning_rate": 0.00021847404927050943, "loss": 0.0954, "step": 6186 }, { "epoch": 0.8671338472319552, "grad_norm": 0.4319641888141632, "learning_rate": 0.00021845969863668976, "loss": 0.1626, "step": 6187 }, { "epoch": 0.8672740014015416, "grad_norm": 0.3092079162597656, "learning_rate": 0.00021844534800287012, "loss": 0.0861, "step": 6188 }, { "epoch": 0.8674141555711282, "grad_norm": 0.46257224678993225, "learning_rate": 0.00021843099736905045, "loss": 0.1117, "step": 6189 }, { "epoch": 0.8675543097407148, "grad_norm": 1.5813649892807007, "learning_rate": 0.00021841664673523078, "loss": 0.0719, "step": 6190 }, { "epoch": 0.8676944639103014, "grad_norm": 0.5169780254364014, "learning_rate": 0.0002184022961014111, "loss": 0.0668, "step": 6191 }, { "epoch": 0.8678346180798879, "grad_norm": 0.357990026473999, "learning_rate": 0.0002183879454675915, "loss": 0.0846, "step": 6192 }, { "epoch": 0.8679747722494744, "grad_norm": 0.227934330701828, "learning_rate": 0.00021837359483377182, "loss": 0.0458, "step": 6193 }, { "epoch": 0.868114926419061, "grad_norm": 0.8706230521202087, "learning_rate": 0.00021835924419995214, "loss": 0.1192, "step": 6194 }, { "epoch": 0.8682550805886475, "grad_norm": 0.7386736869812012, "learning_rate": 0.0002183448935661325, "loss": 0.0663, "step": 6195 }, { "epoch": 0.8683952347582341, "grad_norm": 0.4754023849964142, "learning_rate": 0.00021833054293231283, "loss": 0.1005, "step": 6196 }, { "epoch": 0.8685353889278206, "grad_norm": 0.2744595408439636, "learning_rate": 0.00021831619229849316, "loss": 0.0286, "step": 6197 }, { "epoch": 0.8686755430974071, "grad_norm": 0.8672484755516052, "learning_rate": 0.0002183018416646735, "loss": 0.1787, "step": 6198 }, { "epoch": 0.8688156972669937, "grad_norm": 4.3144097328186035, "learning_rate": 0.00021828749103085384, "loss": 0.1939, "step": 6199 }, { "epoch": 0.8689558514365803, "grad_norm": 3.1200180053710938, "learning_rate": 0.00021827314039703417, "loss": 0.0956, "step": 6200 }, { "epoch": 0.8690960056061667, "grad_norm": 0.2564171850681305, "learning_rate": 0.00021825878976321453, "loss": 0.0764, "step": 6201 }, { "epoch": 0.8692361597757533, "grad_norm": 0.5688282251358032, "learning_rate": 0.00021824443912939485, "loss": 0.1047, "step": 6202 }, { "epoch": 0.8693763139453399, "grad_norm": 0.33884042501449585, "learning_rate": 0.00021823008849557518, "loss": 0.0617, "step": 6203 }, { "epoch": 0.8695164681149264, "grad_norm": 0.5766940116882324, "learning_rate": 0.00021821573786175557, "loss": 0.1023, "step": 6204 }, { "epoch": 0.869656622284513, "grad_norm": 0.3584175109863281, "learning_rate": 0.0002182013872279359, "loss": 0.1451, "step": 6205 }, { "epoch": 0.8697967764540995, "grad_norm": 0.12489283829927444, "learning_rate": 0.00021818703659411622, "loss": 0.0218, "step": 6206 }, { "epoch": 0.869936930623686, "grad_norm": 0.539472758769989, "learning_rate": 0.00021817268596029655, "loss": 0.1171, "step": 6207 }, { "epoch": 0.8700770847932726, "grad_norm": 0.2593488097190857, "learning_rate": 0.0002181583353264769, "loss": 0.0935, "step": 6208 }, { "epoch": 0.8702172389628592, "grad_norm": 0.683388352394104, "learning_rate": 0.00021814398469265724, "loss": 0.0976, "step": 6209 }, { "epoch": 0.8703573931324456, "grad_norm": 0.19402046501636505, "learning_rate": 0.00021812963405883756, "loss": 0.0381, "step": 6210 }, { "epoch": 0.8704975473020322, "grad_norm": 0.2791079580783844, "learning_rate": 0.00021811528342501792, "loss": 0.054, "step": 6211 }, { "epoch": 0.8706377014716188, "grad_norm": 0.5183849334716797, "learning_rate": 0.00021810093279119825, "loss": 0.0672, "step": 6212 }, { "epoch": 0.8707778556412054, "grad_norm": 0.579476535320282, "learning_rate": 0.00021808658215737858, "loss": 0.1057, "step": 6213 }, { "epoch": 0.8709180098107919, "grad_norm": 0.3718048334121704, "learning_rate": 0.00021807223152355896, "loss": 0.0475, "step": 6214 }, { "epoch": 0.8710581639803784, "grad_norm": 0.25893765687942505, "learning_rate": 0.0002180578808897393, "loss": 0.0448, "step": 6215 }, { "epoch": 0.871198318149965, "grad_norm": 0.2760825753211975, "learning_rate": 0.00021804353025591962, "loss": 0.0716, "step": 6216 }, { "epoch": 0.8713384723195515, "grad_norm": 0.47999587655067444, "learning_rate": 0.00021802917962209997, "loss": 0.1643, "step": 6217 }, { "epoch": 0.8714786264891381, "grad_norm": 0.5208139419555664, "learning_rate": 0.0002180148289882803, "loss": 0.0716, "step": 6218 }, { "epoch": 0.8716187806587246, "grad_norm": 0.4334179162979126, "learning_rate": 0.00021800047835446063, "loss": 0.0715, "step": 6219 }, { "epoch": 0.8717589348283111, "grad_norm": 0.18401111662387848, "learning_rate": 0.00021798612772064099, "loss": 0.0283, "step": 6220 }, { "epoch": 0.8718990889978977, "grad_norm": 0.6516224145889282, "learning_rate": 0.00021797177708682131, "loss": 0.1209, "step": 6221 }, { "epoch": 0.8720392431674843, "grad_norm": 0.29093581438064575, "learning_rate": 0.00021795742645300164, "loss": 0.0601, "step": 6222 }, { "epoch": 0.8721793973370707, "grad_norm": 0.6586615443229675, "learning_rate": 0.00021794307581918197, "loss": 0.1371, "step": 6223 }, { "epoch": 0.8723195515066573, "grad_norm": 0.20381219685077667, "learning_rate": 0.00021792872518536235, "loss": 0.0353, "step": 6224 }, { "epoch": 0.8724597056762439, "grad_norm": 0.2556091248989105, "learning_rate": 0.00021791437455154268, "loss": 0.0302, "step": 6225 }, { "epoch": 0.8725998598458304, "grad_norm": 0.320342093706131, "learning_rate": 0.000217900023917723, "loss": 0.0525, "step": 6226 }, { "epoch": 0.872740014015417, "grad_norm": 0.4603051543235779, "learning_rate": 0.00021788567328390337, "loss": 0.0499, "step": 6227 }, { "epoch": 0.8728801681850035, "grad_norm": 0.4017927646636963, "learning_rate": 0.0002178713226500837, "loss": 0.0705, "step": 6228 }, { "epoch": 0.87302032235459, "grad_norm": 0.3869698941707611, "learning_rate": 0.00021785697201626402, "loss": 0.0737, "step": 6229 }, { "epoch": 0.8731604765241766, "grad_norm": 0.6222916841506958, "learning_rate": 0.00021784262138244438, "loss": 0.0762, "step": 6230 }, { "epoch": 0.8733006306937632, "grad_norm": 0.6190270185470581, "learning_rate": 0.0002178282707486247, "loss": 0.1041, "step": 6231 }, { "epoch": 0.8734407848633496, "grad_norm": 0.4681747853755951, "learning_rate": 0.00021781392011480504, "loss": 0.0545, "step": 6232 }, { "epoch": 0.8735809390329362, "grad_norm": 0.24020327627658844, "learning_rate": 0.0002177995694809854, "loss": 0.0371, "step": 6233 }, { "epoch": 0.8737210932025228, "grad_norm": 0.46187299489974976, "learning_rate": 0.00021778521884716572, "loss": 0.1199, "step": 6234 }, { "epoch": 0.8738612473721094, "grad_norm": 0.1926620900630951, "learning_rate": 0.00021777086821334605, "loss": 0.0254, "step": 6235 }, { "epoch": 0.8740014015416958, "grad_norm": 0.5401365160942078, "learning_rate": 0.00021775651757952643, "loss": 0.102, "step": 6236 }, { "epoch": 0.8741415557112824, "grad_norm": 0.676092267036438, "learning_rate": 0.00021774216694570676, "loss": 0.0826, "step": 6237 }, { "epoch": 0.874281709880869, "grad_norm": 0.34450727701187134, "learning_rate": 0.0002177278163118871, "loss": 0.0746, "step": 6238 }, { "epoch": 0.8744218640504555, "grad_norm": 0.37508848309516907, "learning_rate": 0.00021771346567806744, "loss": 0.0597, "step": 6239 }, { "epoch": 0.8745620182200421, "grad_norm": 0.4466211795806885, "learning_rate": 0.00021769911504424777, "loss": 0.068, "step": 6240 }, { "epoch": 0.8747021723896286, "grad_norm": 0.512169361114502, "learning_rate": 0.0002176847644104281, "loss": 0.0848, "step": 6241 }, { "epoch": 0.8748423265592151, "grad_norm": 0.18908928334712982, "learning_rate": 0.00021767041377660843, "loss": 0.0126, "step": 6242 }, { "epoch": 0.8749824807288017, "grad_norm": 0.22945968806743622, "learning_rate": 0.00021765606314278879, "loss": 0.0202, "step": 6243 }, { "epoch": 0.8751226348983883, "grad_norm": 0.7049269676208496, "learning_rate": 0.00021764171250896911, "loss": 0.1779, "step": 6244 }, { "epoch": 0.8752627890679747, "grad_norm": 0.34868231415748596, "learning_rate": 0.00021762736187514944, "loss": 0.0205, "step": 6245 }, { "epoch": 0.8754029432375613, "grad_norm": 0.4349097013473511, "learning_rate": 0.00021761301124132983, "loss": 0.0432, "step": 6246 }, { "epoch": 0.8755430974071479, "grad_norm": 1.3755170106887817, "learning_rate": 0.00021759866060751015, "loss": 0.0657, "step": 6247 }, { "epoch": 0.8756832515767344, "grad_norm": 1.2230827808380127, "learning_rate": 0.00021758430997369048, "loss": 0.1141, "step": 6248 }, { "epoch": 0.875823405746321, "grad_norm": 0.8426931500434875, "learning_rate": 0.00021756995933987084, "loss": 0.1539, "step": 6249 }, { "epoch": 0.8759635599159075, "grad_norm": 1.4148598909378052, "learning_rate": 0.00021755560870605117, "loss": 0.2293, "step": 6250 }, { "epoch": 0.876103714085494, "grad_norm": 0.32737067341804504, "learning_rate": 0.0002175412580722315, "loss": 0.0927, "step": 6251 }, { "epoch": 0.8762438682550806, "grad_norm": 0.6828939318656921, "learning_rate": 0.00021752690743841185, "loss": 0.1564, "step": 6252 }, { "epoch": 0.8763840224246672, "grad_norm": 0.5200299620628357, "learning_rate": 0.00021751255680459218, "loss": 0.1048, "step": 6253 }, { "epoch": 0.8765241765942536, "grad_norm": 0.6152176856994629, "learning_rate": 0.0002174982061707725, "loss": 0.146, "step": 6254 }, { "epoch": 0.8766643307638402, "grad_norm": 0.5028912425041199, "learning_rate": 0.0002174838555369529, "loss": 0.0702, "step": 6255 }, { "epoch": 0.8768044849334268, "grad_norm": 0.260227769613266, "learning_rate": 0.00021746950490313322, "loss": 0.0394, "step": 6256 }, { "epoch": 0.8769446391030133, "grad_norm": 0.5616887807846069, "learning_rate": 0.00021745515426931355, "loss": 0.1816, "step": 6257 }, { "epoch": 0.8770847932725998, "grad_norm": 0.20666776597499847, "learning_rate": 0.00021744080363549388, "loss": 0.0458, "step": 6258 }, { "epoch": 0.8772249474421864, "grad_norm": 0.2023920863866806, "learning_rate": 0.00021742645300167423, "loss": 0.0339, "step": 6259 }, { "epoch": 0.877365101611773, "grad_norm": 0.6767168045043945, "learning_rate": 0.00021741210236785456, "loss": 0.1386, "step": 6260 }, { "epoch": 0.8775052557813595, "grad_norm": 0.3841035068035126, "learning_rate": 0.0002173977517340349, "loss": 0.0836, "step": 6261 }, { "epoch": 0.8776454099509461, "grad_norm": 0.2366967350244522, "learning_rate": 0.00021738340110021525, "loss": 0.0452, "step": 6262 }, { "epoch": 0.8777855641205325, "grad_norm": 0.6210862994194031, "learning_rate": 0.00021736905046639557, "loss": 0.143, "step": 6263 }, { "epoch": 0.8779257182901191, "grad_norm": 0.5211275219917297, "learning_rate": 0.0002173546998325759, "loss": 0.159, "step": 6264 }, { "epoch": 0.8780658724597057, "grad_norm": 0.2777138352394104, "learning_rate": 0.00021734034919875626, "loss": 0.0962, "step": 6265 }, { "epoch": 0.8782060266292923, "grad_norm": 0.3432758152484894, "learning_rate": 0.0002173259985649366, "loss": 0.1401, "step": 6266 }, { "epoch": 0.8783461807988787, "grad_norm": 0.2281433790922165, "learning_rate": 0.00021731164793111692, "loss": 0.0452, "step": 6267 }, { "epoch": 0.8784863349684653, "grad_norm": 0.12181345373392105, "learning_rate": 0.0002172972972972973, "loss": 0.0238, "step": 6268 }, { "epoch": 0.8786264891380519, "grad_norm": 0.18437130749225616, "learning_rate": 0.00021728294666347763, "loss": 0.0315, "step": 6269 }, { "epoch": 0.8787666433076384, "grad_norm": 0.40676167607307434, "learning_rate": 0.00021726859602965796, "loss": 0.0694, "step": 6270 }, { "epoch": 0.878906797477225, "grad_norm": 0.6529611349105835, "learning_rate": 0.0002172542453958383, "loss": 0.0652, "step": 6271 }, { "epoch": 0.8790469516468115, "grad_norm": 0.3763105869293213, "learning_rate": 0.00021723989476201864, "loss": 0.0785, "step": 6272 }, { "epoch": 0.879187105816398, "grad_norm": 0.5702271461486816, "learning_rate": 0.00021722554412819897, "loss": 0.1388, "step": 6273 }, { "epoch": 0.8793272599859846, "grad_norm": 0.39969533681869507, "learning_rate": 0.00021721119349437932, "loss": 0.084, "step": 6274 }, { "epoch": 0.8794674141555712, "grad_norm": 0.3073981702327728, "learning_rate": 0.00021719684286055965, "loss": 0.045, "step": 6275 }, { "epoch": 0.8796075683251576, "grad_norm": 0.49991291761398315, "learning_rate": 0.00021718249222673998, "loss": 0.0778, "step": 6276 }, { "epoch": 0.8797477224947442, "grad_norm": 0.2920743525028229, "learning_rate": 0.0002171681415929203, "loss": 0.0828, "step": 6277 }, { "epoch": 0.8798878766643308, "grad_norm": 0.33542996644973755, "learning_rate": 0.0002171537909591007, "loss": 0.047, "step": 6278 }, { "epoch": 0.8800280308339173, "grad_norm": 0.4488348364830017, "learning_rate": 0.00021713944032528102, "loss": 0.0976, "step": 6279 }, { "epoch": 0.8801681850035038, "grad_norm": 0.2969159185886383, "learning_rate": 0.00021712508969146135, "loss": 0.0358, "step": 6280 }, { "epoch": 0.8803083391730904, "grad_norm": 0.3600703775882721, "learning_rate": 0.0002171107390576417, "loss": 0.1016, "step": 6281 }, { "epoch": 0.8804484933426769, "grad_norm": 0.6032580733299255, "learning_rate": 0.00021709638842382203, "loss": 0.0852, "step": 6282 }, { "epoch": 0.8805886475122635, "grad_norm": 0.4375476539134979, "learning_rate": 0.00021708203779000236, "loss": 0.0629, "step": 6283 }, { "epoch": 0.8807288016818501, "grad_norm": 0.5672846436500549, "learning_rate": 0.00021706768715618272, "loss": 0.0732, "step": 6284 }, { "epoch": 0.8808689558514365, "grad_norm": 0.32331809401512146, "learning_rate": 0.00021705333652236305, "loss": 0.0801, "step": 6285 }, { "epoch": 0.8810091100210231, "grad_norm": 0.335155189037323, "learning_rate": 0.00021703898588854337, "loss": 0.0378, "step": 6286 }, { "epoch": 0.8811492641906097, "grad_norm": 0.29419201612472534, "learning_rate": 0.00021702463525472376, "loss": 0.0489, "step": 6287 }, { "epoch": 0.8812894183601963, "grad_norm": 0.6636062860488892, "learning_rate": 0.00021701028462090409, "loss": 0.1538, "step": 6288 }, { "epoch": 0.8814295725297827, "grad_norm": 0.45243704319000244, "learning_rate": 0.00021699593398708441, "loss": 0.0546, "step": 6289 }, { "epoch": 0.8815697266993693, "grad_norm": 0.7146409153938293, "learning_rate": 0.00021698158335326477, "loss": 0.0798, "step": 6290 }, { "epoch": 0.8817098808689559, "grad_norm": 0.5848540663719177, "learning_rate": 0.0002169672327194451, "loss": 0.0477, "step": 6291 }, { "epoch": 0.8818500350385424, "grad_norm": 0.32826846837997437, "learning_rate": 0.00021695288208562543, "loss": 0.0209, "step": 6292 }, { "epoch": 0.881990189208129, "grad_norm": 0.650705873966217, "learning_rate": 0.00021693853145180576, "loss": 0.0637, "step": 6293 }, { "epoch": 0.8821303433777155, "grad_norm": 0.804492712020874, "learning_rate": 0.0002169241808179861, "loss": 0.1327, "step": 6294 }, { "epoch": 0.882270497547302, "grad_norm": 0.38751909136772156, "learning_rate": 0.00021690983018416644, "loss": 0.0385, "step": 6295 }, { "epoch": 0.8824106517168886, "grad_norm": 0.6429628133773804, "learning_rate": 0.00021689547955034677, "loss": 0.0569, "step": 6296 }, { "epoch": 0.8825508058864752, "grad_norm": 0.49627959728240967, "learning_rate": 0.00021688112891652712, "loss": 0.0756, "step": 6297 }, { "epoch": 0.8826909600560616, "grad_norm": 2.548335552215576, "learning_rate": 0.00021686677828270745, "loss": 0.2245, "step": 6298 }, { "epoch": 0.8828311142256482, "grad_norm": 0.9784413576126099, "learning_rate": 0.00021685242764888778, "loss": 0.1249, "step": 6299 }, { "epoch": 0.8829712683952348, "grad_norm": 1.6628530025482178, "learning_rate": 0.00021683807701506816, "loss": 0.1621, "step": 6300 }, { "epoch": 0.8831114225648213, "grad_norm": 0.5426815748214722, "learning_rate": 0.0002168237263812485, "loss": 0.1478, "step": 6301 }, { "epoch": 0.8832515767344078, "grad_norm": 0.31795185804367065, "learning_rate": 0.00021680937574742882, "loss": 0.0959, "step": 6302 }, { "epoch": 0.8833917309039944, "grad_norm": 0.23255884647369385, "learning_rate": 0.00021679502511360918, "loss": 0.0343, "step": 6303 }, { "epoch": 0.8835318850735809, "grad_norm": 0.37613925337791443, "learning_rate": 0.0002167806744797895, "loss": 0.1066, "step": 6304 }, { "epoch": 0.8836720392431675, "grad_norm": 0.4515974819660187, "learning_rate": 0.00021676632384596983, "loss": 0.0811, "step": 6305 }, { "epoch": 0.8838121934127541, "grad_norm": 0.24465088546276093, "learning_rate": 0.0002167519732121502, "loss": 0.0663, "step": 6306 }, { "epoch": 0.8839523475823405, "grad_norm": 0.3040670156478882, "learning_rate": 0.00021673762257833052, "loss": 0.0625, "step": 6307 }, { "epoch": 0.8840925017519271, "grad_norm": 0.4737105369567871, "learning_rate": 0.00021672327194451085, "loss": 0.1029, "step": 6308 }, { "epoch": 0.8842326559215137, "grad_norm": 0.6553725004196167, "learning_rate": 0.00021670892131069123, "loss": 0.1536, "step": 6309 }, { "epoch": 0.8843728100911002, "grad_norm": 0.4673299789428711, "learning_rate": 0.00021669457067687156, "loss": 0.0418, "step": 6310 }, { "epoch": 0.8845129642606867, "grad_norm": 0.3112906217575073, "learning_rate": 0.0002166802200430519, "loss": 0.0972, "step": 6311 }, { "epoch": 0.8846531184302733, "grad_norm": 0.3027682602405548, "learning_rate": 0.00021666586940923222, "loss": 0.1196, "step": 6312 }, { "epoch": 0.8847932725998598, "grad_norm": 0.48045089840888977, "learning_rate": 0.00021665151877541257, "loss": 0.059, "step": 6313 }, { "epoch": 0.8849334267694464, "grad_norm": 0.45420539379119873, "learning_rate": 0.0002166371681415929, "loss": 0.0886, "step": 6314 }, { "epoch": 0.885073580939033, "grad_norm": 0.2544019818305969, "learning_rate": 0.00021662281750777323, "loss": 0.0671, "step": 6315 }, { "epoch": 0.8852137351086194, "grad_norm": 0.2321062833070755, "learning_rate": 0.00021660846687395358, "loss": 0.0608, "step": 6316 }, { "epoch": 0.885353889278206, "grad_norm": 0.5656206011772156, "learning_rate": 0.0002165941162401339, "loss": 0.0795, "step": 6317 }, { "epoch": 0.8854940434477926, "grad_norm": 0.19876818358898163, "learning_rate": 0.00021657976560631424, "loss": 0.0219, "step": 6318 }, { "epoch": 0.8856341976173792, "grad_norm": 0.1730661392211914, "learning_rate": 0.00021656541497249462, "loss": 0.0294, "step": 6319 }, { "epoch": 0.8857743517869656, "grad_norm": 0.1284327358007431, "learning_rate": 0.00021655106433867495, "loss": 0.0249, "step": 6320 }, { "epoch": 0.8859145059565522, "grad_norm": 0.34040510654449463, "learning_rate": 0.00021653671370485528, "loss": 0.0564, "step": 6321 }, { "epoch": 0.8860546601261388, "grad_norm": 0.372646689414978, "learning_rate": 0.00021652236307103564, "loss": 0.0531, "step": 6322 }, { "epoch": 0.8861948142957253, "grad_norm": 0.41042637825012207, "learning_rate": 0.00021650801243721597, "loss": 0.0355, "step": 6323 }, { "epoch": 0.8863349684653118, "grad_norm": 0.3855037987232208, "learning_rate": 0.0002164936618033963, "loss": 0.071, "step": 6324 }, { "epoch": 0.8864751226348984, "grad_norm": 0.37636715173721313, "learning_rate": 0.00021647931116957665, "loss": 0.0653, "step": 6325 }, { "epoch": 0.8866152768044849, "grad_norm": 0.46927669644355774, "learning_rate": 0.00021646496053575698, "loss": 0.1323, "step": 6326 }, { "epoch": 0.8867554309740715, "grad_norm": 0.37175631523132324, "learning_rate": 0.0002164506099019373, "loss": 0.0587, "step": 6327 }, { "epoch": 0.8868955851436581, "grad_norm": 0.21671995520591736, "learning_rate": 0.00021643625926811764, "loss": 0.0607, "step": 6328 }, { "epoch": 0.8870357393132445, "grad_norm": 0.4263468384742737, "learning_rate": 0.000216421908634298, "loss": 0.0625, "step": 6329 }, { "epoch": 0.8871758934828311, "grad_norm": 0.5397272706031799, "learning_rate": 0.00021640755800047832, "loss": 0.0685, "step": 6330 }, { "epoch": 0.8873160476524177, "grad_norm": 0.35666197538375854, "learning_rate": 0.00021639320736665865, "loss": 0.0717, "step": 6331 }, { "epoch": 0.8874562018220042, "grad_norm": 0.5617634654045105, "learning_rate": 0.00021637885673283903, "loss": 0.1761, "step": 6332 }, { "epoch": 0.8875963559915907, "grad_norm": 0.21159803867340088, "learning_rate": 0.00021636450609901936, "loss": 0.0305, "step": 6333 }, { "epoch": 0.8877365101611773, "grad_norm": 0.40066707134246826, "learning_rate": 0.0002163501554651997, "loss": 0.0715, "step": 6334 }, { "epoch": 0.8878766643307638, "grad_norm": 0.6241999864578247, "learning_rate": 0.00021633580483138004, "loss": 0.0892, "step": 6335 }, { "epoch": 0.8880168185003504, "grad_norm": 0.37987348437309265, "learning_rate": 0.00021632145419756037, "loss": 0.0755, "step": 6336 }, { "epoch": 0.888156972669937, "grad_norm": 0.4611404836177826, "learning_rate": 0.0002163071035637407, "loss": 0.067, "step": 6337 }, { "epoch": 0.8882971268395234, "grad_norm": 0.33832624554634094, "learning_rate": 0.00021629275292992106, "loss": 0.0771, "step": 6338 }, { "epoch": 0.88843728100911, "grad_norm": 0.4897250235080719, "learning_rate": 0.00021627840229610138, "loss": 0.1297, "step": 6339 }, { "epoch": 0.8885774351786966, "grad_norm": 0.5794617533683777, "learning_rate": 0.0002162640516622817, "loss": 0.0511, "step": 6340 }, { "epoch": 0.8887175893482832, "grad_norm": 1.030187964439392, "learning_rate": 0.0002162497010284621, "loss": 0.1939, "step": 6341 }, { "epoch": 0.8888577435178696, "grad_norm": 0.46357640624046326, "learning_rate": 0.00021623535039464242, "loss": 0.0546, "step": 6342 }, { "epoch": 0.8889978976874562, "grad_norm": 0.49379438161849976, "learning_rate": 0.00021622099976082275, "loss": 0.0248, "step": 6343 }, { "epoch": 0.8891380518570428, "grad_norm": 0.34207683801651, "learning_rate": 0.0002162066491270031, "loss": 0.0262, "step": 6344 }, { "epoch": 0.8892782060266293, "grad_norm": 0.42017680406570435, "learning_rate": 0.00021619229849318344, "loss": 0.0757, "step": 6345 }, { "epoch": 0.8894183601962158, "grad_norm": 0.8866990804672241, "learning_rate": 0.00021617794785936377, "loss": 0.1058, "step": 6346 }, { "epoch": 0.8895585143658024, "grad_norm": 0.2930103838443756, "learning_rate": 0.0002161635972255441, "loss": 0.0172, "step": 6347 }, { "epoch": 0.8896986685353889, "grad_norm": 0.7346456050872803, "learning_rate": 0.00021614924659172445, "loss": 0.1232, "step": 6348 }, { "epoch": 0.8898388227049755, "grad_norm": 1.468702793121338, "learning_rate": 0.00021613489595790478, "loss": 0.1692, "step": 6349 }, { "epoch": 0.8899789768745621, "grad_norm": 1.1133503913879395, "learning_rate": 0.0002161205453240851, "loss": 0.4245, "step": 6350 }, { "epoch": 0.8901191310441485, "grad_norm": 0.47393229603767395, "learning_rate": 0.0002161061946902655, "loss": 0.0605, "step": 6351 }, { "epoch": 0.8902592852137351, "grad_norm": 0.35352733731269836, "learning_rate": 0.00021609184405644582, "loss": 0.0644, "step": 6352 }, { "epoch": 0.8903994393833217, "grad_norm": 0.29689136147499084, "learning_rate": 0.00021607749342262615, "loss": 0.0453, "step": 6353 }, { "epoch": 0.8905395935529082, "grad_norm": 0.3777115046977997, "learning_rate": 0.0002160631427888065, "loss": 0.0884, "step": 6354 }, { "epoch": 0.8906797477224947, "grad_norm": 0.281775563955307, "learning_rate": 0.00021604879215498683, "loss": 0.0236, "step": 6355 }, { "epoch": 0.8908199018920813, "grad_norm": 0.3897337019443512, "learning_rate": 0.00021603444152116716, "loss": 0.0967, "step": 6356 }, { "epoch": 0.8909600560616678, "grad_norm": 0.4497261643409729, "learning_rate": 0.00021602009088734752, "loss": 0.0868, "step": 6357 }, { "epoch": 0.8911002102312544, "grad_norm": 0.31632840633392334, "learning_rate": 0.00021600574025352784, "loss": 0.0594, "step": 6358 }, { "epoch": 0.891240364400841, "grad_norm": 0.5239577889442444, "learning_rate": 0.00021599138961970817, "loss": 0.079, "step": 6359 }, { "epoch": 0.8913805185704274, "grad_norm": 0.45975571870803833, "learning_rate": 0.00021597703898588853, "loss": 0.09, "step": 6360 }, { "epoch": 0.891520672740014, "grad_norm": 1.152085781097412, "learning_rate": 0.00021596268835206886, "loss": 0.1501, "step": 6361 }, { "epoch": 0.8916608269096006, "grad_norm": 0.2481631636619568, "learning_rate": 0.00021594833771824919, "loss": 0.0623, "step": 6362 }, { "epoch": 0.8918009810791871, "grad_norm": 0.33675843477249146, "learning_rate": 0.00021593398708442951, "loss": 0.0868, "step": 6363 }, { "epoch": 0.8919411352487736, "grad_norm": 0.6384389400482178, "learning_rate": 0.0002159196364506099, "loss": 0.1124, "step": 6364 }, { "epoch": 0.8920812894183602, "grad_norm": 1.1308022737503052, "learning_rate": 0.00021590528581679023, "loss": 0.0278, "step": 6365 }, { "epoch": 0.8922214435879467, "grad_norm": 0.460615873336792, "learning_rate": 0.00021589093518297055, "loss": 0.1178, "step": 6366 }, { "epoch": 0.8923615977575333, "grad_norm": 0.18188369274139404, "learning_rate": 0.0002158765845491509, "loss": 0.0795, "step": 6367 }, { "epoch": 0.8925017519271198, "grad_norm": 0.29433348774909973, "learning_rate": 0.00021586223391533124, "loss": 0.0578, "step": 6368 }, { "epoch": 0.8926419060967064, "grad_norm": 0.1542975902557373, "learning_rate": 0.00021584788328151157, "loss": 0.0268, "step": 6369 }, { "epoch": 0.8927820602662929, "grad_norm": 0.37385544180870056, "learning_rate": 0.00021583353264769192, "loss": 0.0967, "step": 6370 }, { "epoch": 0.8929222144358795, "grad_norm": 0.1319681704044342, "learning_rate": 0.00021581918201387225, "loss": 0.0155, "step": 6371 }, { "epoch": 0.8930623686054661, "grad_norm": 0.48037686944007874, "learning_rate": 0.00021580483138005258, "loss": 0.0875, "step": 6372 }, { "epoch": 0.8932025227750525, "grad_norm": 0.18546746671199799, "learning_rate": 0.00021579048074623296, "loss": 0.0361, "step": 6373 }, { "epoch": 0.8933426769446391, "grad_norm": 0.2430107295513153, "learning_rate": 0.0002157761301124133, "loss": 0.0502, "step": 6374 }, { "epoch": 0.8934828311142257, "grad_norm": 0.41412192583084106, "learning_rate": 0.00021576177947859362, "loss": 0.1078, "step": 6375 }, { "epoch": 0.8936229852838122, "grad_norm": 0.5208156108856201, "learning_rate": 0.00021574742884477398, "loss": 0.0711, "step": 6376 }, { "epoch": 0.8937631394533987, "grad_norm": 0.30979371070861816, "learning_rate": 0.0002157330782109543, "loss": 0.0524, "step": 6377 }, { "epoch": 0.8939032936229853, "grad_norm": 0.7480751872062683, "learning_rate": 0.00021571872757713463, "loss": 0.1444, "step": 6378 }, { "epoch": 0.8940434477925718, "grad_norm": 0.5892516374588013, "learning_rate": 0.000215704376943315, "loss": 0.1176, "step": 6379 }, { "epoch": 0.8941836019621584, "grad_norm": 0.3465835154056549, "learning_rate": 0.00021569002630949532, "loss": 0.0706, "step": 6380 }, { "epoch": 0.894323756131745, "grad_norm": 0.469944030046463, "learning_rate": 0.00021567567567567565, "loss": 0.1279, "step": 6381 }, { "epoch": 0.8944639103013314, "grad_norm": 0.6023955345153809, "learning_rate": 0.00021566132504185597, "loss": 0.0629, "step": 6382 }, { "epoch": 0.894604064470918, "grad_norm": 1.1703921556472778, "learning_rate": 0.00021564697440803636, "loss": 0.1073, "step": 6383 }, { "epoch": 0.8947442186405046, "grad_norm": 0.4299857020378113, "learning_rate": 0.00021563262377421668, "loss": 0.0607, "step": 6384 }, { "epoch": 0.8948843728100911, "grad_norm": 0.18168188631534576, "learning_rate": 0.000215618273140397, "loss": 0.0464, "step": 6385 }, { "epoch": 0.8950245269796776, "grad_norm": 0.26161521673202515, "learning_rate": 0.00021560392250657737, "loss": 0.0708, "step": 6386 }, { "epoch": 0.8951646811492642, "grad_norm": 0.37883812189102173, "learning_rate": 0.0002155895718727577, "loss": 0.0906, "step": 6387 }, { "epoch": 0.8953048353188507, "grad_norm": 0.2373247742652893, "learning_rate": 0.00021557522123893803, "loss": 0.0216, "step": 6388 }, { "epoch": 0.8954449894884373, "grad_norm": 0.5080769658088684, "learning_rate": 0.00021556087060511838, "loss": 0.0593, "step": 6389 }, { "epoch": 0.8955851436580238, "grad_norm": 0.5784989595413208, "learning_rate": 0.0002155465199712987, "loss": 0.084, "step": 6390 }, { "epoch": 0.8957252978276103, "grad_norm": 0.8219420909881592, "learning_rate": 0.00021553216933747904, "loss": 0.0639, "step": 6391 }, { "epoch": 0.8958654519971969, "grad_norm": 0.28659573197364807, "learning_rate": 0.0002155178187036594, "loss": 0.0403, "step": 6392 }, { "epoch": 0.8960056061667835, "grad_norm": 0.20385098457336426, "learning_rate": 0.00021550346806983972, "loss": 0.0229, "step": 6393 }, { "epoch": 0.8961457603363701, "grad_norm": 0.3711751103401184, "learning_rate": 0.00021548911743602005, "loss": 0.0559, "step": 6394 }, { "epoch": 0.8962859145059565, "grad_norm": 0.9714674949645996, "learning_rate": 0.00021547476680220043, "loss": 0.3136, "step": 6395 }, { "epoch": 0.8964260686755431, "grad_norm": 1.0008267164230347, "learning_rate": 0.00021546041616838076, "loss": 0.076, "step": 6396 }, { "epoch": 0.8965662228451297, "grad_norm": 1.8442028760910034, "learning_rate": 0.0002154460655345611, "loss": 0.1535, "step": 6397 }, { "epoch": 0.8967063770147162, "grad_norm": 1.917199969291687, "learning_rate": 0.00021543171490074142, "loss": 0.2855, "step": 6398 }, { "epoch": 0.8968465311843027, "grad_norm": 1.8050737380981445, "learning_rate": 0.00021541736426692178, "loss": 0.2695, "step": 6399 }, { "epoch": 0.8969866853538893, "grad_norm": 1.1386626958847046, "learning_rate": 0.0002154030136331021, "loss": 0.1617, "step": 6400 }, { "epoch": 0.8971268395234758, "grad_norm": 0.4858371317386627, "learning_rate": 0.00021538866299928243, "loss": 0.0469, "step": 6401 }, { "epoch": 0.8972669936930624, "grad_norm": 0.2051311731338501, "learning_rate": 0.0002153743123654628, "loss": 0.0257, "step": 6402 }, { "epoch": 0.897407147862649, "grad_norm": 0.6071633696556091, "learning_rate": 0.00021535996173164312, "loss": 0.0948, "step": 6403 }, { "epoch": 0.8975473020322354, "grad_norm": 0.4932337701320648, "learning_rate": 0.00021534561109782345, "loss": 0.0854, "step": 6404 }, { "epoch": 0.897687456201822, "grad_norm": 0.5392564535140991, "learning_rate": 0.00021533126046400383, "loss": 0.0818, "step": 6405 }, { "epoch": 0.8978276103714086, "grad_norm": 0.2504193186759949, "learning_rate": 0.00021531690983018416, "loss": 0.0906, "step": 6406 }, { "epoch": 0.8979677645409951, "grad_norm": 0.20426024496555328, "learning_rate": 0.00021530255919636449, "loss": 0.0632, "step": 6407 }, { "epoch": 0.8981079187105816, "grad_norm": 0.24049171805381775, "learning_rate": 0.00021528820856254484, "loss": 0.0541, "step": 6408 }, { "epoch": 0.8982480728801682, "grad_norm": 0.3818652033805847, "learning_rate": 0.00021527385792872517, "loss": 0.082, "step": 6409 }, { "epoch": 0.8983882270497547, "grad_norm": 0.41560080647468567, "learning_rate": 0.0002152595072949055, "loss": 0.063, "step": 6410 }, { "epoch": 0.8985283812193413, "grad_norm": 0.8477592468261719, "learning_rate": 0.00021524515666108585, "loss": 0.1278, "step": 6411 }, { "epoch": 0.8986685353889278, "grad_norm": 0.6891153454780579, "learning_rate": 0.00021523080602726618, "loss": 0.0605, "step": 6412 }, { "epoch": 0.8988086895585143, "grad_norm": 0.38674548268318176, "learning_rate": 0.0002152164553934465, "loss": 0.0761, "step": 6413 }, { "epoch": 0.8989488437281009, "grad_norm": 0.3405720591545105, "learning_rate": 0.0002152021047596269, "loss": 0.094, "step": 6414 }, { "epoch": 0.8990889978976875, "grad_norm": 0.4232446551322937, "learning_rate": 0.00021518775412580722, "loss": 0.0762, "step": 6415 }, { "epoch": 0.899229152067274, "grad_norm": 0.49405723810195923, "learning_rate": 0.00021517340349198755, "loss": 0.1253, "step": 6416 }, { "epoch": 0.8993693062368605, "grad_norm": 0.6300173997879028, "learning_rate": 0.00021515905285816788, "loss": 0.1427, "step": 6417 }, { "epoch": 0.8995094604064471, "grad_norm": 0.36156022548675537, "learning_rate": 0.00021514470222434824, "loss": 0.0829, "step": 6418 }, { "epoch": 0.8996496145760337, "grad_norm": 0.3899405002593994, "learning_rate": 0.00021513035159052856, "loss": 0.063, "step": 6419 }, { "epoch": 0.8997897687456202, "grad_norm": 0.4408261775970459, "learning_rate": 0.0002151160009567089, "loss": 0.1094, "step": 6420 }, { "epoch": 0.8999299229152067, "grad_norm": 0.3543868362903595, "learning_rate": 0.00021510165032288925, "loss": 0.0594, "step": 6421 }, { "epoch": 0.9000700770847933, "grad_norm": 0.4986155331134796, "learning_rate": 0.00021508729968906958, "loss": 0.0548, "step": 6422 }, { "epoch": 0.9002102312543798, "grad_norm": 0.26845064759254456, "learning_rate": 0.0002150729490552499, "loss": 0.05, "step": 6423 }, { "epoch": 0.9003503854239664, "grad_norm": 0.20656314492225647, "learning_rate": 0.00021505859842143026, "loss": 0.0239, "step": 6424 }, { "epoch": 0.900490539593553, "grad_norm": 0.4421077370643616, "learning_rate": 0.0002150442477876106, "loss": 0.0364, "step": 6425 }, { "epoch": 0.9006306937631394, "grad_norm": 0.5424543023109436, "learning_rate": 0.00021502989715379092, "loss": 0.0692, "step": 6426 }, { "epoch": 0.900770847932726, "grad_norm": 0.4379134178161621, "learning_rate": 0.0002150155465199713, "loss": 0.1015, "step": 6427 }, { "epoch": 0.9009110021023126, "grad_norm": 0.2991301417350769, "learning_rate": 0.00021500119588615163, "loss": 0.0404, "step": 6428 }, { "epoch": 0.9010511562718991, "grad_norm": 0.5305184721946716, "learning_rate": 0.00021498684525233196, "loss": 0.0713, "step": 6429 }, { "epoch": 0.9011913104414856, "grad_norm": 0.286765456199646, "learning_rate": 0.00021497249461851231, "loss": 0.0351, "step": 6430 }, { "epoch": 0.9013314646110722, "grad_norm": 0.2445044219493866, "learning_rate": 0.00021495814398469264, "loss": 0.0795, "step": 6431 }, { "epoch": 0.9014716187806587, "grad_norm": 0.35519471764564514, "learning_rate": 0.00021494379335087297, "loss": 0.0819, "step": 6432 }, { "epoch": 0.9016117729502453, "grad_norm": 0.2523954212665558, "learning_rate": 0.0002149294427170533, "loss": 0.0937, "step": 6433 }, { "epoch": 0.9017519271198318, "grad_norm": 0.1853904128074646, "learning_rate": 0.00021491509208323366, "loss": 0.025, "step": 6434 }, { "epoch": 0.9018920812894183, "grad_norm": 0.6573611497879028, "learning_rate": 0.00021490074144941398, "loss": 0.1004, "step": 6435 }, { "epoch": 0.9020322354590049, "grad_norm": 0.37836775183677673, "learning_rate": 0.0002148863908155943, "loss": 0.053, "step": 6436 }, { "epoch": 0.9021723896285915, "grad_norm": 0.31566399335861206, "learning_rate": 0.0002148720401817747, "loss": 0.049, "step": 6437 }, { "epoch": 0.902312543798178, "grad_norm": 0.8381720185279846, "learning_rate": 0.00021485768954795502, "loss": 0.1271, "step": 6438 }, { "epoch": 0.9024526979677645, "grad_norm": 0.15564432740211487, "learning_rate": 0.00021484333891413535, "loss": 0.0135, "step": 6439 }, { "epoch": 0.9025928521373511, "grad_norm": 0.6811659336090088, "learning_rate": 0.0002148289882803157, "loss": 0.1118, "step": 6440 }, { "epoch": 0.9027330063069376, "grad_norm": 0.4137628376483917, "learning_rate": 0.00021481463764649604, "loss": 0.0277, "step": 6441 }, { "epoch": 0.9028731604765242, "grad_norm": 0.40326789021492004, "learning_rate": 0.00021480028701267636, "loss": 0.0826, "step": 6442 }, { "epoch": 0.9030133146461107, "grad_norm": 0.3607103228569031, "learning_rate": 0.00021478593637885672, "loss": 0.071, "step": 6443 }, { "epoch": 0.9031534688156972, "grad_norm": 0.5023744106292725, "learning_rate": 0.00021477158574503705, "loss": 0.0638, "step": 6444 }, { "epoch": 0.9032936229852838, "grad_norm": 0.6061773300170898, "learning_rate": 0.00021475723511121738, "loss": 0.046, "step": 6445 }, { "epoch": 0.9034337771548704, "grad_norm": 0.23244105279445648, "learning_rate": 0.00021474288447739776, "loss": 0.0192, "step": 6446 }, { "epoch": 0.9035739313244568, "grad_norm": 1.5404292345046997, "learning_rate": 0.0002147285338435781, "loss": 0.0591, "step": 6447 }, { "epoch": 0.9037140854940434, "grad_norm": 0.9891332983970642, "learning_rate": 0.00021471418320975842, "loss": 0.0598, "step": 6448 }, { "epoch": 0.90385423966363, "grad_norm": 1.560807228088379, "learning_rate": 0.00021469983257593877, "loss": 0.1885, "step": 6449 }, { "epoch": 0.9039943938332166, "grad_norm": 0.8083940744400024, "learning_rate": 0.0002146854819421191, "loss": 0.1647, "step": 6450 }, { "epoch": 0.9041345480028031, "grad_norm": 0.27475714683532715, "learning_rate": 0.00021467113130829943, "loss": 0.045, "step": 6451 }, { "epoch": 0.9042747021723896, "grad_norm": 0.5286942720413208, "learning_rate": 0.00021465678067447976, "loss": 0.0777, "step": 6452 }, { "epoch": 0.9044148563419762, "grad_norm": 0.3517037630081177, "learning_rate": 0.00021464243004066011, "loss": 0.0309, "step": 6453 }, { "epoch": 0.9045550105115627, "grad_norm": 0.38065117597579956, "learning_rate": 0.00021462807940684044, "loss": 0.053, "step": 6454 }, { "epoch": 0.9046951646811493, "grad_norm": 0.5434138178825378, "learning_rate": 0.00021461372877302077, "loss": 0.0563, "step": 6455 }, { "epoch": 0.9048353188507358, "grad_norm": 0.7512357234954834, "learning_rate": 0.00021459937813920113, "loss": 0.0961, "step": 6456 }, { "epoch": 0.9049754730203223, "grad_norm": 0.4269784390926361, "learning_rate": 0.00021458502750538146, "loss": 0.0501, "step": 6457 }, { "epoch": 0.9051156271899089, "grad_norm": 0.23223674297332764, "learning_rate": 0.00021457067687156178, "loss": 0.0357, "step": 6458 }, { "epoch": 0.9052557813594955, "grad_norm": 0.2480769008398056, "learning_rate": 0.00021455632623774217, "loss": 0.0333, "step": 6459 }, { "epoch": 0.905395935529082, "grad_norm": 0.18888044357299805, "learning_rate": 0.0002145419756039225, "loss": 0.0504, "step": 6460 }, { "epoch": 0.9055360896986685, "grad_norm": 0.493127703666687, "learning_rate": 0.00021452762497010282, "loss": 0.1079, "step": 6461 }, { "epoch": 0.9056762438682551, "grad_norm": 0.32565072178840637, "learning_rate": 0.00021451327433628318, "loss": 0.0711, "step": 6462 }, { "epoch": 0.9058163980378416, "grad_norm": 0.48333826661109924, "learning_rate": 0.0002144989237024635, "loss": 0.0641, "step": 6463 }, { "epoch": 0.9059565522074282, "grad_norm": 0.40877336263656616, "learning_rate": 0.00021448457306864384, "loss": 0.0314, "step": 6464 }, { "epoch": 0.9060967063770147, "grad_norm": 0.2629973292350769, "learning_rate": 0.0002144702224348242, "loss": 0.0586, "step": 6465 }, { "epoch": 0.9062368605466012, "grad_norm": 0.37494751811027527, "learning_rate": 0.00021445587180100452, "loss": 0.0958, "step": 6466 }, { "epoch": 0.9063770147161878, "grad_norm": 0.7391054034233093, "learning_rate": 0.00021444152116718485, "loss": 0.121, "step": 6467 }, { "epoch": 0.9065171688857744, "grad_norm": 0.281841903924942, "learning_rate": 0.00021442717053336518, "loss": 0.04, "step": 6468 }, { "epoch": 0.9066573230553608, "grad_norm": 0.3329413831233978, "learning_rate": 0.00021441281989954556, "loss": 0.0551, "step": 6469 }, { "epoch": 0.9067974772249474, "grad_norm": 0.3830040991306305, "learning_rate": 0.0002143984692657259, "loss": 0.0683, "step": 6470 }, { "epoch": 0.906937631394534, "grad_norm": 0.6144102811813354, "learning_rate": 0.00021438411863190622, "loss": 0.1029, "step": 6471 }, { "epoch": 0.9070777855641206, "grad_norm": 0.6852854490280151, "learning_rate": 0.00021436976799808657, "loss": 0.1522, "step": 6472 }, { "epoch": 0.9072179397337071, "grad_norm": 0.3438425064086914, "learning_rate": 0.0002143554173642669, "loss": 0.0777, "step": 6473 }, { "epoch": 0.9073580939032936, "grad_norm": 0.17858552932739258, "learning_rate": 0.00021434106673044723, "loss": 0.0314, "step": 6474 }, { "epoch": 0.9074982480728802, "grad_norm": 0.5745185017585754, "learning_rate": 0.0002143267160966276, "loss": 0.0891, "step": 6475 }, { "epoch": 0.9076384022424667, "grad_norm": 0.3353009819984436, "learning_rate": 0.00021431236546280792, "loss": 0.0706, "step": 6476 }, { "epoch": 0.9077785564120533, "grad_norm": 0.9571337699890137, "learning_rate": 0.00021429801482898824, "loss": 0.0873, "step": 6477 }, { "epoch": 0.9079187105816398, "grad_norm": 0.31498396396636963, "learning_rate": 0.00021428366419516863, "loss": 0.0399, "step": 6478 }, { "epoch": 0.9080588647512263, "grad_norm": 0.8018702268600464, "learning_rate": 0.00021426931356134896, "loss": 0.127, "step": 6479 }, { "epoch": 0.9081990189208129, "grad_norm": 0.46543970704078674, "learning_rate": 0.00021425496292752928, "loss": 0.0792, "step": 6480 }, { "epoch": 0.9083391730903995, "grad_norm": 0.2422894537448883, "learning_rate": 0.00021424061229370964, "loss": 0.0352, "step": 6481 }, { "epoch": 0.908479327259986, "grad_norm": 0.33804574608802795, "learning_rate": 0.00021422626165988997, "loss": 0.0426, "step": 6482 }, { "epoch": 0.9086194814295725, "grad_norm": 0.22067680954933167, "learning_rate": 0.0002142119110260703, "loss": 0.0618, "step": 6483 }, { "epoch": 0.9087596355991591, "grad_norm": 0.2905866801738739, "learning_rate": 0.00021419756039225065, "loss": 0.0563, "step": 6484 }, { "epoch": 0.9088997897687456, "grad_norm": 0.1399889439344406, "learning_rate": 0.00021418320975843098, "loss": 0.0185, "step": 6485 }, { "epoch": 0.9090399439383322, "grad_norm": 0.6650295853614807, "learning_rate": 0.0002141688591246113, "loss": 0.0672, "step": 6486 }, { "epoch": 0.9091800981079187, "grad_norm": 0.4174114465713501, "learning_rate": 0.00021415450849079164, "loss": 0.0686, "step": 6487 }, { "epoch": 0.9093202522775052, "grad_norm": 0.5293183326721191, "learning_rate": 0.000214140157856972, "loss": 0.0523, "step": 6488 }, { "epoch": 0.9094604064470918, "grad_norm": 0.9272147417068481, "learning_rate": 0.00021412580722315232, "loss": 0.1536, "step": 6489 }, { "epoch": 0.9096005606166784, "grad_norm": 0.19976091384887695, "learning_rate": 0.00021411145658933265, "loss": 0.0331, "step": 6490 }, { "epoch": 0.9097407147862648, "grad_norm": 0.2879762649536133, "learning_rate": 0.00021409710595551303, "loss": 0.0335, "step": 6491 }, { "epoch": 0.9098808689558514, "grad_norm": 1.6049736738204956, "learning_rate": 0.00021408275532169336, "loss": 0.2334, "step": 6492 }, { "epoch": 0.910021023125438, "grad_norm": 0.4523888826370239, "learning_rate": 0.0002140684046878737, "loss": 0.1455, "step": 6493 }, { "epoch": 0.9101611772950245, "grad_norm": 0.29907873272895813, "learning_rate": 0.00021405405405405405, "loss": 0.0357, "step": 6494 }, { "epoch": 0.9103013314646111, "grad_norm": 0.7394307255744934, "learning_rate": 0.00021403970342023437, "loss": 0.0545, "step": 6495 }, { "epoch": 0.9104414856341976, "grad_norm": 0.5330753922462463, "learning_rate": 0.0002140253527864147, "loss": 0.2582, "step": 6496 }, { "epoch": 0.9105816398037841, "grad_norm": 1.8949971199035645, "learning_rate": 0.00021401100215259506, "loss": 0.1642, "step": 6497 }, { "epoch": 0.9107217939733707, "grad_norm": 0.9914364218711853, "learning_rate": 0.0002139966515187754, "loss": 0.0872, "step": 6498 }, { "epoch": 0.9108619481429573, "grad_norm": 0.4456580877304077, "learning_rate": 0.00021398230088495572, "loss": 0.041, "step": 6499 }, { "epoch": 0.9110021023125437, "grad_norm": 2.9075493812561035, "learning_rate": 0.0002139679502511361, "loss": 0.1986, "step": 6500 }, { "epoch": 0.9111422564821303, "grad_norm": 0.4145531952381134, "learning_rate": 0.00021395359961731643, "loss": 0.1075, "step": 6501 }, { "epoch": 0.9112824106517169, "grad_norm": 0.8073126673698425, "learning_rate": 0.00021393924898349676, "loss": 0.1935, "step": 6502 }, { "epoch": 0.9114225648213035, "grad_norm": 0.218635693192482, "learning_rate": 0.00021392489834967708, "loss": 0.0496, "step": 6503 }, { "epoch": 0.91156271899089, "grad_norm": 0.538770318031311, "learning_rate": 0.00021391054771585744, "loss": 0.0942, "step": 6504 }, { "epoch": 0.9117028731604765, "grad_norm": 0.38798198103904724, "learning_rate": 0.00021389619708203777, "loss": 0.0702, "step": 6505 }, { "epoch": 0.9118430273300631, "grad_norm": 0.30979520082473755, "learning_rate": 0.0002138818464482181, "loss": 0.082, "step": 6506 }, { "epoch": 0.9119831814996496, "grad_norm": 0.3060492277145386, "learning_rate": 0.00021386749581439845, "loss": 0.0514, "step": 6507 }, { "epoch": 0.9121233356692362, "grad_norm": 0.48615023493766785, "learning_rate": 0.00021385314518057878, "loss": 0.0858, "step": 6508 }, { "epoch": 0.9122634898388227, "grad_norm": 0.5969953536987305, "learning_rate": 0.0002138387945467591, "loss": 0.1424, "step": 6509 }, { "epoch": 0.9124036440084092, "grad_norm": 0.28630274534225464, "learning_rate": 0.0002138244439129395, "loss": 0.0789, "step": 6510 }, { "epoch": 0.9125437981779958, "grad_norm": 0.6329449415206909, "learning_rate": 0.00021381009327911982, "loss": 0.0966, "step": 6511 }, { "epoch": 0.9126839523475824, "grad_norm": 0.3260738253593445, "learning_rate": 0.00021379574264530015, "loss": 0.0361, "step": 6512 }, { "epoch": 0.9128241065171688, "grad_norm": 0.31602317094802856, "learning_rate": 0.0002137813920114805, "loss": 0.0509, "step": 6513 }, { "epoch": 0.9129642606867554, "grad_norm": 0.3736482262611389, "learning_rate": 0.00021376704137766083, "loss": 0.0621, "step": 6514 }, { "epoch": 0.913104414856342, "grad_norm": 0.2639308273792267, "learning_rate": 0.00021375269074384116, "loss": 0.0571, "step": 6515 }, { "epoch": 0.9132445690259285, "grad_norm": 0.4399544894695282, "learning_rate": 0.00021373834011002152, "loss": 0.1004, "step": 6516 }, { "epoch": 0.9133847231955151, "grad_norm": 0.6382057070732117, "learning_rate": 0.00021372398947620185, "loss": 0.0881, "step": 6517 }, { "epoch": 0.9135248773651016, "grad_norm": 0.3910166919231415, "learning_rate": 0.00021370963884238218, "loss": 0.0522, "step": 6518 }, { "epoch": 0.9136650315346881, "grad_norm": 0.1977645456790924, "learning_rate": 0.0002136952882085625, "loss": 0.0637, "step": 6519 }, { "epoch": 0.9138051857042747, "grad_norm": 0.44781142473220825, "learning_rate": 0.00021368093757474286, "loss": 0.1074, "step": 6520 }, { "epoch": 0.9139453398738613, "grad_norm": 0.7284477353096008, "learning_rate": 0.0002136665869409232, "loss": 0.0971, "step": 6521 }, { "epoch": 0.9140854940434477, "grad_norm": 0.2804543972015381, "learning_rate": 0.00021365223630710352, "loss": 0.024, "step": 6522 }, { "epoch": 0.9142256482130343, "grad_norm": 0.1625552773475647, "learning_rate": 0.0002136378856732839, "loss": 0.0401, "step": 6523 }, { "epoch": 0.9143658023826209, "grad_norm": 0.4540673494338989, "learning_rate": 0.00021362353503946423, "loss": 0.1122, "step": 6524 }, { "epoch": 0.9145059565522075, "grad_norm": 0.4620015025138855, "learning_rate": 0.00021360918440564456, "loss": 0.1412, "step": 6525 }, { "epoch": 0.914646110721794, "grad_norm": 0.25984036922454834, "learning_rate": 0.0002135948337718249, "loss": 0.0607, "step": 6526 }, { "epoch": 0.9147862648913805, "grad_norm": 0.11482558399438858, "learning_rate": 0.00021358048313800524, "loss": 0.0398, "step": 6527 }, { "epoch": 0.914926419060967, "grad_norm": 0.7788105010986328, "learning_rate": 0.00021356613250418557, "loss": 0.1029, "step": 6528 }, { "epoch": 0.9150665732305536, "grad_norm": 0.5086371302604675, "learning_rate": 0.00021355178187036593, "loss": 0.161, "step": 6529 }, { "epoch": 0.9152067274001402, "grad_norm": 0.4723289906978607, "learning_rate": 0.00021353743123654625, "loss": 0.1568, "step": 6530 }, { "epoch": 0.9153468815697267, "grad_norm": 0.4866516590118408, "learning_rate": 0.00021352308060272658, "loss": 0.0446, "step": 6531 }, { "epoch": 0.9154870357393132, "grad_norm": 0.48494449257850647, "learning_rate": 0.00021350872996890697, "loss": 0.0985, "step": 6532 }, { "epoch": 0.9156271899088998, "grad_norm": 0.13357658684253693, "learning_rate": 0.0002134943793350873, "loss": 0.0123, "step": 6533 }, { "epoch": 0.9157673440784864, "grad_norm": 0.4283812940120697, "learning_rate": 0.00021348002870126762, "loss": 0.1017, "step": 6534 }, { "epoch": 0.9159074982480728, "grad_norm": 0.6210263967514038, "learning_rate": 0.00021346567806744798, "loss": 0.0429, "step": 6535 }, { "epoch": 0.9160476524176594, "grad_norm": 0.5816081762313843, "learning_rate": 0.0002134513274336283, "loss": 0.1058, "step": 6536 }, { "epoch": 0.916187806587246, "grad_norm": 0.1629910171031952, "learning_rate": 0.00021343697679980864, "loss": 0.0339, "step": 6537 }, { "epoch": 0.9163279607568325, "grad_norm": 0.5754914879798889, "learning_rate": 0.00021342262616598896, "loss": 0.1801, "step": 6538 }, { "epoch": 0.9164681149264191, "grad_norm": 0.570254921913147, "learning_rate": 0.00021340827553216932, "loss": 0.0449, "step": 6539 }, { "epoch": 0.9166082690960056, "grad_norm": 0.9843643307685852, "learning_rate": 0.00021339392489834965, "loss": 0.221, "step": 6540 }, { "epoch": 0.9167484232655921, "grad_norm": 0.17277079820632935, "learning_rate": 0.00021337957426452998, "loss": 0.0217, "step": 6541 }, { "epoch": 0.9168885774351787, "grad_norm": 0.7701627612113953, "learning_rate": 0.00021336522363071036, "loss": 0.0457, "step": 6542 }, { "epoch": 0.9170287316047653, "grad_norm": 1.2003687620162964, "learning_rate": 0.0002133508729968907, "loss": 0.078, "step": 6543 }, { "epoch": 0.9171688857743517, "grad_norm": 3.0578112602233887, "learning_rate": 0.00021333652236307102, "loss": 0.2153, "step": 6544 }, { "epoch": 0.9173090399439383, "grad_norm": 2.0003116130828857, "learning_rate": 0.00021332217172925137, "loss": 0.1878, "step": 6545 }, { "epoch": 0.9174491941135249, "grad_norm": 0.39463701844215393, "learning_rate": 0.0002133078210954317, "loss": 0.1965, "step": 6546 }, { "epoch": 0.9175893482831115, "grad_norm": 4.3486104011535645, "learning_rate": 0.00021329347046161203, "loss": 0.0856, "step": 6547 }, { "epoch": 0.917729502452698, "grad_norm": 0.974236249923706, "learning_rate": 0.00021327911982779238, "loss": 0.2314, "step": 6548 }, { "epoch": 0.9178696566222845, "grad_norm": 1.3744697570800781, "learning_rate": 0.0002132647691939727, "loss": 0.2313, "step": 6549 }, { "epoch": 0.918009810791871, "grad_norm": 0.6843933463096619, "learning_rate": 0.00021325041856015304, "loss": 0.0861, "step": 6550 }, { "epoch": 0.9181499649614576, "grad_norm": 0.4104370176792145, "learning_rate": 0.0002132360679263334, "loss": 0.1074, "step": 6551 }, { "epoch": 0.9182901191310442, "grad_norm": 0.2638469636440277, "learning_rate": 0.00021322171729251373, "loss": 0.0888, "step": 6552 }, { "epoch": 0.9184302733006307, "grad_norm": 0.3258189260959625, "learning_rate": 0.00021320736665869405, "loss": 0.0733, "step": 6553 }, { "epoch": 0.9185704274702172, "grad_norm": 0.2016063779592514, "learning_rate": 0.00021319301602487438, "loss": 0.0333, "step": 6554 }, { "epoch": 0.9187105816398038, "grad_norm": 0.276944100856781, "learning_rate": 0.00021317866539105477, "loss": 0.0857, "step": 6555 }, { "epoch": 0.9188507358093904, "grad_norm": 0.4742622673511505, "learning_rate": 0.0002131643147572351, "loss": 0.1611, "step": 6556 }, { "epoch": 0.9189908899789768, "grad_norm": 0.433238685131073, "learning_rate": 0.00021314996412341542, "loss": 0.1017, "step": 6557 }, { "epoch": 0.9191310441485634, "grad_norm": 0.362604558467865, "learning_rate": 0.00021313561348959578, "loss": 0.057, "step": 6558 }, { "epoch": 0.91927119831815, "grad_norm": 0.7808148264884949, "learning_rate": 0.0002131212628557761, "loss": 0.1487, "step": 6559 }, { "epoch": 0.9194113524877365, "grad_norm": 0.33569496870040894, "learning_rate": 0.00021310691222195644, "loss": 0.0945, "step": 6560 }, { "epoch": 0.9195515066573231, "grad_norm": 0.6150591969490051, "learning_rate": 0.0002130925615881368, "loss": 0.1404, "step": 6561 }, { "epoch": 0.9196916608269096, "grad_norm": 0.36744436621665955, "learning_rate": 0.00021307821095431712, "loss": 0.0664, "step": 6562 }, { "epoch": 0.9198318149964961, "grad_norm": 0.7724167704582214, "learning_rate": 0.00021306386032049745, "loss": 0.137, "step": 6563 }, { "epoch": 0.9199719691660827, "grad_norm": 0.4401775598526001, "learning_rate": 0.00021304950968667783, "loss": 0.0615, "step": 6564 }, { "epoch": 0.9201121233356693, "grad_norm": 0.3308345675468445, "learning_rate": 0.00021303515905285816, "loss": 0.0381, "step": 6565 }, { "epoch": 0.9202522775052557, "grad_norm": 0.6252098679542542, "learning_rate": 0.0002130208084190385, "loss": 0.0447, "step": 6566 }, { "epoch": 0.9203924316748423, "grad_norm": 0.22807785868644714, "learning_rate": 0.00021300645778521884, "loss": 0.0379, "step": 6567 }, { "epoch": 0.9205325858444289, "grad_norm": 0.8972699642181396, "learning_rate": 0.00021299210715139917, "loss": 0.0766, "step": 6568 }, { "epoch": 0.9206727400140154, "grad_norm": 0.26445111632347107, "learning_rate": 0.0002129777565175795, "loss": 0.0681, "step": 6569 }, { "epoch": 0.920812894183602, "grad_norm": 0.28286445140838623, "learning_rate": 0.00021296340588375986, "loss": 0.0585, "step": 6570 }, { "epoch": 0.9209530483531885, "grad_norm": 0.583223283290863, "learning_rate": 0.00021294905524994019, "loss": 0.0862, "step": 6571 }, { "epoch": 0.921093202522775, "grad_norm": 0.14293135702610016, "learning_rate": 0.00021293470461612051, "loss": 0.0293, "step": 6572 }, { "epoch": 0.9212333566923616, "grad_norm": 0.4340853989124298, "learning_rate": 0.00021292035398230084, "loss": 0.0415, "step": 6573 }, { "epoch": 0.9213735108619482, "grad_norm": 0.6256099343299866, "learning_rate": 0.00021290600334848123, "loss": 0.0629, "step": 6574 }, { "epoch": 0.9215136650315346, "grad_norm": 0.321030855178833, "learning_rate": 0.00021289165271466155, "loss": 0.0604, "step": 6575 }, { "epoch": 0.9216538192011212, "grad_norm": 0.34738689661026, "learning_rate": 0.00021287730208084188, "loss": 0.052, "step": 6576 }, { "epoch": 0.9217939733707078, "grad_norm": 0.3290344774723053, "learning_rate": 0.00021286295144702224, "loss": 0.0591, "step": 6577 }, { "epoch": 0.9219341275402944, "grad_norm": 0.18225766718387604, "learning_rate": 0.00021284860081320257, "loss": 0.0362, "step": 6578 }, { "epoch": 0.9220742817098808, "grad_norm": 0.2788483500480652, "learning_rate": 0.0002128342501793829, "loss": 0.0467, "step": 6579 }, { "epoch": 0.9222144358794674, "grad_norm": 0.6952465772628784, "learning_rate": 0.00021281989954556325, "loss": 0.1207, "step": 6580 }, { "epoch": 0.922354590049054, "grad_norm": 0.6459895968437195, "learning_rate": 0.00021280554891174358, "loss": 0.1193, "step": 6581 }, { "epoch": 0.9224947442186405, "grad_norm": 0.3681592345237732, "learning_rate": 0.0002127911982779239, "loss": 0.0461, "step": 6582 }, { "epoch": 0.9226348983882271, "grad_norm": 0.5143775939941406, "learning_rate": 0.00021277684764410426, "loss": 0.1356, "step": 6583 }, { "epoch": 0.9227750525578136, "grad_norm": 0.4886704683303833, "learning_rate": 0.0002127624970102846, "loss": 0.0705, "step": 6584 }, { "epoch": 0.9229152067274001, "grad_norm": 0.4671679139137268, "learning_rate": 0.00021274814637646492, "loss": 0.1072, "step": 6585 }, { "epoch": 0.9230553608969867, "grad_norm": 0.2895188331604004, "learning_rate": 0.0002127337957426453, "loss": 0.0279, "step": 6586 }, { "epoch": 0.9231955150665733, "grad_norm": 0.548511803150177, "learning_rate": 0.00021271944510882563, "loss": 0.0762, "step": 6587 }, { "epoch": 0.9233356692361597, "grad_norm": 0.20739451050758362, "learning_rate": 0.00021270509447500596, "loss": 0.0207, "step": 6588 }, { "epoch": 0.9234758234057463, "grad_norm": 0.6376757025718689, "learning_rate": 0.0002126907438411863, "loss": 0.1053, "step": 6589 }, { "epoch": 0.9236159775753329, "grad_norm": 0.6974294185638428, "learning_rate": 0.00021267639320736665, "loss": 0.1186, "step": 6590 }, { "epoch": 0.9237561317449194, "grad_norm": 0.18099217116832733, "learning_rate": 0.00021266204257354697, "loss": 0.0186, "step": 6591 }, { "epoch": 0.923896285914506, "grad_norm": 0.6088647842407227, "learning_rate": 0.0002126476919397273, "loss": 0.1091, "step": 6592 }, { "epoch": 0.9240364400840925, "grad_norm": 0.3992898762226105, "learning_rate": 0.00021263334130590766, "loss": 0.1839, "step": 6593 }, { "epoch": 0.924176594253679, "grad_norm": 0.34131160378456116, "learning_rate": 0.000212618990672088, "loss": 0.0599, "step": 6594 }, { "epoch": 0.9243167484232656, "grad_norm": 0.38721826672554016, "learning_rate": 0.00021260464003826832, "loss": 0.0714, "step": 6595 }, { "epoch": 0.9244569025928522, "grad_norm": 0.04819757118821144, "learning_rate": 0.0002125902894044487, "loss": 0.0065, "step": 6596 }, { "epoch": 0.9245970567624386, "grad_norm": 0.573275625705719, "learning_rate": 0.00021257593877062903, "loss": 0.0939, "step": 6597 }, { "epoch": 0.9247372109320252, "grad_norm": 1.7822608947753906, "learning_rate": 0.00021256158813680935, "loss": 0.4323, "step": 6598 }, { "epoch": 0.9248773651016118, "grad_norm": 1.5164729356765747, "learning_rate": 0.0002125472375029897, "loss": 0.153, "step": 6599 }, { "epoch": 0.9250175192711984, "grad_norm": 2.6761419773101807, "learning_rate": 0.00021253288686917004, "loss": 0.3312, "step": 6600 }, { "epoch": 0.9251576734407848, "grad_norm": 0.35326775908470154, "learning_rate": 0.00021251853623535037, "loss": 0.1139, "step": 6601 }, { "epoch": 0.9252978276103714, "grad_norm": 0.23541373014450073, "learning_rate": 0.00021250418560153072, "loss": 0.041, "step": 6602 }, { "epoch": 0.925437981779958, "grad_norm": 0.15127332508563995, "learning_rate": 0.00021248983496771105, "loss": 0.0242, "step": 6603 }, { "epoch": 0.9255781359495445, "grad_norm": 0.8747516870498657, "learning_rate": 0.00021247548433389138, "loss": 0.0661, "step": 6604 }, { "epoch": 0.9257182901191311, "grad_norm": 0.9469834566116333, "learning_rate": 0.00021246113370007176, "loss": 0.1381, "step": 6605 }, { "epoch": 0.9258584442887176, "grad_norm": 0.37461382150650024, "learning_rate": 0.0002124467830662521, "loss": 0.0961, "step": 6606 }, { "epoch": 0.9259985984583041, "grad_norm": 0.337449848651886, "learning_rate": 0.00021243243243243242, "loss": 0.0723, "step": 6607 }, { "epoch": 0.9261387526278907, "grad_norm": 0.6606259346008301, "learning_rate": 0.00021241808179861275, "loss": 0.1, "step": 6608 }, { "epoch": 0.9262789067974773, "grad_norm": 0.47763779759407043, "learning_rate": 0.0002124037311647931, "loss": 0.0817, "step": 6609 }, { "epoch": 0.9264190609670637, "grad_norm": 0.15813365578651428, "learning_rate": 0.00021238938053097343, "loss": 0.0211, "step": 6610 }, { "epoch": 0.9265592151366503, "grad_norm": 0.2945040464401245, "learning_rate": 0.00021237502989715376, "loss": 0.1142, "step": 6611 }, { "epoch": 0.9266993693062369, "grad_norm": 0.3160688579082489, "learning_rate": 0.00021236067926333412, "loss": 0.0596, "step": 6612 }, { "epoch": 0.9268395234758234, "grad_norm": 0.4523538649082184, "learning_rate": 0.00021234632862951445, "loss": 0.0591, "step": 6613 }, { "epoch": 0.92697967764541, "grad_norm": 0.3221744894981384, "learning_rate": 0.00021233197799569477, "loss": 0.0918, "step": 6614 }, { "epoch": 0.9271198318149965, "grad_norm": 0.8121134638786316, "learning_rate": 0.00021231762736187513, "loss": 0.1557, "step": 6615 }, { "epoch": 0.927259985984583, "grad_norm": 0.7170042395591736, "learning_rate": 0.00021230327672805546, "loss": 0.0939, "step": 6616 }, { "epoch": 0.9274001401541696, "grad_norm": 0.4265516698360443, "learning_rate": 0.0002122889260942358, "loss": 0.0566, "step": 6617 }, { "epoch": 0.9275402943237562, "grad_norm": 0.5969727039337158, "learning_rate": 0.00021227457546041617, "loss": 0.0828, "step": 6618 }, { "epoch": 0.9276804484933426, "grad_norm": 0.4594188928604126, "learning_rate": 0.0002122602248265965, "loss": 0.0586, "step": 6619 }, { "epoch": 0.9278206026629292, "grad_norm": 0.6752777099609375, "learning_rate": 0.00021224587419277683, "loss": 0.1821, "step": 6620 }, { "epoch": 0.9279607568325158, "grad_norm": 0.6409759521484375, "learning_rate": 0.00021223152355895718, "loss": 0.0458, "step": 6621 }, { "epoch": 0.9281009110021023, "grad_norm": 0.48027411103248596, "learning_rate": 0.0002122171729251375, "loss": 0.0774, "step": 6622 }, { "epoch": 0.9282410651716888, "grad_norm": 0.6678305864334106, "learning_rate": 0.00021220282229131784, "loss": 0.0797, "step": 6623 }, { "epoch": 0.9283812193412754, "grad_norm": 0.35657790303230286, "learning_rate": 0.00021218847165749817, "loss": 0.0676, "step": 6624 }, { "epoch": 0.928521373510862, "grad_norm": 0.49849340319633484, "learning_rate": 0.00021217412102367852, "loss": 0.0825, "step": 6625 }, { "epoch": 0.9286615276804485, "grad_norm": 0.48096442222595215, "learning_rate": 0.00021215977038985885, "loss": 0.0528, "step": 6626 }, { "epoch": 0.9288016818500351, "grad_norm": 0.31301823258399963, "learning_rate": 0.00021214541975603918, "loss": 0.0558, "step": 6627 }, { "epoch": 0.9289418360196215, "grad_norm": 0.5326740741729736, "learning_rate": 0.00021213106912221956, "loss": 0.027, "step": 6628 }, { "epoch": 0.9290819901892081, "grad_norm": 0.3684687912464142, "learning_rate": 0.0002121167184883999, "loss": 0.0578, "step": 6629 }, { "epoch": 0.9292221443587947, "grad_norm": 0.7051708102226257, "learning_rate": 0.00021210236785458022, "loss": 0.1289, "step": 6630 }, { "epoch": 0.9293622985283813, "grad_norm": 0.717178225517273, "learning_rate": 0.00021208801722076058, "loss": 0.1313, "step": 6631 }, { "epoch": 0.9295024526979677, "grad_norm": 0.6747783422470093, "learning_rate": 0.0002120736665869409, "loss": 0.2192, "step": 6632 }, { "epoch": 0.9296426068675543, "grad_norm": 0.5534868836402893, "learning_rate": 0.00021205931595312123, "loss": 0.1306, "step": 6633 }, { "epoch": 0.9297827610371409, "grad_norm": 0.6477826833724976, "learning_rate": 0.0002120449653193016, "loss": 0.1114, "step": 6634 }, { "epoch": 0.9299229152067274, "grad_norm": 0.7142448425292969, "learning_rate": 0.00021203061468548192, "loss": 0.1409, "step": 6635 }, { "epoch": 0.930063069376314, "grad_norm": 0.8179980516433716, "learning_rate": 0.00021201626405166225, "loss": 0.0778, "step": 6636 }, { "epoch": 0.9302032235459005, "grad_norm": 0.4205307960510254, "learning_rate": 0.00021200191341784263, "loss": 0.0485, "step": 6637 }, { "epoch": 0.930343377715487, "grad_norm": 0.21393099427223206, "learning_rate": 0.00021198756278402296, "loss": 0.054, "step": 6638 }, { "epoch": 0.9304835318850736, "grad_norm": 0.4255819022655487, "learning_rate": 0.0002119732121502033, "loss": 0.0441, "step": 6639 }, { "epoch": 0.9306236860546602, "grad_norm": 0.4818372130393982, "learning_rate": 0.00021195886151638364, "loss": 0.058, "step": 6640 }, { "epoch": 0.9307638402242466, "grad_norm": 1.068794846534729, "learning_rate": 0.00021194451088256397, "loss": 0.0808, "step": 6641 }, { "epoch": 0.9309039943938332, "grad_norm": 0.49423131346702576, "learning_rate": 0.0002119301602487443, "loss": 0.2034, "step": 6642 }, { "epoch": 0.9310441485634198, "grad_norm": 0.7991312742233276, "learning_rate": 0.00021191580961492463, "loss": 0.2301, "step": 6643 }, { "epoch": 0.9311843027330063, "grad_norm": 0.8272519707679749, "learning_rate": 0.00021190145898110498, "loss": 0.0626, "step": 6644 }, { "epoch": 0.9313244569025928, "grad_norm": 0.7400698661804199, "learning_rate": 0.0002118871083472853, "loss": 0.1193, "step": 6645 }, { "epoch": 0.9314646110721794, "grad_norm": 1.1985710859298706, "learning_rate": 0.00021187275771346564, "loss": 0.1766, "step": 6646 }, { "epoch": 0.9316047652417659, "grad_norm": 0.9758482575416565, "learning_rate": 0.000211858407079646, "loss": 0.0627, "step": 6647 }, { "epoch": 0.9317449194113525, "grad_norm": 1.2363991737365723, "learning_rate": 0.00021184405644582633, "loss": 0.2157, "step": 6648 }, { "epoch": 0.9318850735809391, "grad_norm": 2.4562606811523438, "learning_rate": 0.00021182970581200665, "loss": 0.0921, "step": 6649 }, { "epoch": 0.9320252277505255, "grad_norm": 1.0566496849060059, "learning_rate": 0.00021181535517818704, "loss": 0.0508, "step": 6650 }, { "epoch": 0.9321653819201121, "grad_norm": 0.31610044836997986, "learning_rate": 0.00021180100454436736, "loss": 0.0617, "step": 6651 }, { "epoch": 0.9323055360896987, "grad_norm": 0.5460981726646423, "learning_rate": 0.0002117866539105477, "loss": 0.0563, "step": 6652 }, { "epoch": 0.9324456902592853, "grad_norm": 0.4073355197906494, "learning_rate": 0.00021177230327672805, "loss": 0.1002, "step": 6653 }, { "epoch": 0.9325858444288717, "grad_norm": 0.26478004455566406, "learning_rate": 0.00021175795264290838, "loss": 0.072, "step": 6654 }, { "epoch": 0.9327259985984583, "grad_norm": 0.24905385076999664, "learning_rate": 0.0002117436020090887, "loss": 0.0332, "step": 6655 }, { "epoch": 0.9328661527680449, "grad_norm": 0.5655146837234497, "learning_rate": 0.00021172925137526906, "loss": 0.0858, "step": 6656 }, { "epoch": 0.9330063069376314, "grad_norm": 0.2765238583087921, "learning_rate": 0.0002117149007414494, "loss": 0.0658, "step": 6657 }, { "epoch": 0.933146461107218, "grad_norm": 0.4552586078643799, "learning_rate": 0.00021170055010762972, "loss": 0.1167, "step": 6658 }, { "epoch": 0.9332866152768045, "grad_norm": 0.26092761754989624, "learning_rate": 0.00021168619947381005, "loss": 0.0586, "step": 6659 }, { "epoch": 0.933426769446391, "grad_norm": 0.15380969643592834, "learning_rate": 0.00021167184883999043, "loss": 0.0486, "step": 6660 }, { "epoch": 0.9335669236159776, "grad_norm": 0.5019110441207886, "learning_rate": 0.00021165749820617076, "loss": 0.0773, "step": 6661 }, { "epoch": 0.9337070777855642, "grad_norm": 0.36999794840812683, "learning_rate": 0.0002116431475723511, "loss": 0.0727, "step": 6662 }, { "epoch": 0.9338472319551506, "grad_norm": 0.3883402943611145, "learning_rate": 0.00021162879693853144, "loss": 0.1147, "step": 6663 }, { "epoch": 0.9339873861247372, "grad_norm": 0.3678326904773712, "learning_rate": 0.00021161444630471177, "loss": 0.0507, "step": 6664 }, { "epoch": 0.9341275402943238, "grad_norm": 0.2711755633354187, "learning_rate": 0.0002116000956708921, "loss": 0.0363, "step": 6665 }, { "epoch": 0.9342676944639103, "grad_norm": 0.32458898425102234, "learning_rate": 0.00021158574503707246, "loss": 0.0271, "step": 6666 }, { "epoch": 0.9344078486334968, "grad_norm": 0.11644943803548813, "learning_rate": 0.00021157139440325278, "loss": 0.0227, "step": 6667 }, { "epoch": 0.9345480028030834, "grad_norm": 0.5873533487319946, "learning_rate": 0.0002115570437694331, "loss": 0.0803, "step": 6668 }, { "epoch": 0.9346881569726699, "grad_norm": 0.3252265751361847, "learning_rate": 0.0002115426931356135, "loss": 0.0817, "step": 6669 }, { "epoch": 0.9348283111422565, "grad_norm": 0.8226193785667419, "learning_rate": 0.00021152834250179382, "loss": 0.1717, "step": 6670 }, { "epoch": 0.9349684653118431, "grad_norm": 0.15587258338928223, "learning_rate": 0.00021151399186797415, "loss": 0.0202, "step": 6671 }, { "epoch": 0.9351086194814295, "grad_norm": 0.46227923035621643, "learning_rate": 0.0002114996412341545, "loss": 0.1086, "step": 6672 }, { "epoch": 0.9352487736510161, "grad_norm": 0.43766385316848755, "learning_rate": 0.00021148529060033484, "loss": 0.0928, "step": 6673 }, { "epoch": 0.9353889278206027, "grad_norm": 0.5527503490447998, "learning_rate": 0.00021147093996651517, "loss": 0.124, "step": 6674 }, { "epoch": 0.9355290819901892, "grad_norm": 0.3553295135498047, "learning_rate": 0.00021145658933269552, "loss": 0.0416, "step": 6675 }, { "epoch": 0.9356692361597757, "grad_norm": 0.4060499966144562, "learning_rate": 0.00021144223869887585, "loss": 0.0853, "step": 6676 }, { "epoch": 0.9358093903293623, "grad_norm": 0.5635578036308289, "learning_rate": 0.00021142788806505618, "loss": 0.1293, "step": 6677 }, { "epoch": 0.9359495444989488, "grad_norm": 0.4267987310886383, "learning_rate": 0.0002114135374312365, "loss": 0.0862, "step": 6678 }, { "epoch": 0.9360896986685354, "grad_norm": 0.26879268884658813, "learning_rate": 0.00021139918679741686, "loss": 0.0299, "step": 6679 }, { "epoch": 0.9362298528381219, "grad_norm": 0.13448967039585114, "learning_rate": 0.0002113848361635972, "loss": 0.0116, "step": 6680 }, { "epoch": 0.9363700070077084, "grad_norm": 1.026279091835022, "learning_rate": 0.00021137048552977755, "loss": 0.1186, "step": 6681 }, { "epoch": 0.936510161177295, "grad_norm": 0.5111008286476135, "learning_rate": 0.0002113561348959579, "loss": 0.0694, "step": 6682 }, { "epoch": 0.9366503153468816, "grad_norm": 0.4464820921421051, "learning_rate": 0.00021134178426213823, "loss": 0.0767, "step": 6683 }, { "epoch": 0.9367904695164682, "grad_norm": 1.2222315073013306, "learning_rate": 0.00021132743362831856, "loss": 0.0854, "step": 6684 }, { "epoch": 0.9369306236860546, "grad_norm": 0.3423914313316345, "learning_rate": 0.00021131308299449892, "loss": 0.0517, "step": 6685 }, { "epoch": 0.9370707778556412, "grad_norm": 0.16706036031246185, "learning_rate": 0.00021129873236067924, "loss": 0.0438, "step": 6686 }, { "epoch": 0.9372109320252278, "grad_norm": 0.23028790950775146, "learning_rate": 0.00021128438172685957, "loss": 0.0841, "step": 6687 }, { "epoch": 0.9373510861948143, "grad_norm": 0.225498229265213, "learning_rate": 0.00021127003109303993, "loss": 0.0372, "step": 6688 }, { "epoch": 0.9374912403644008, "grad_norm": 0.2766595184803009, "learning_rate": 0.00021125568045922026, "loss": 0.0226, "step": 6689 }, { "epoch": 0.9376313945339874, "grad_norm": 0.3983423709869385, "learning_rate": 0.00021124132982540059, "loss": 0.0321, "step": 6690 }, { "epoch": 0.9377715487035739, "grad_norm": 0.5107288956642151, "learning_rate": 0.00021122697919158097, "loss": 0.1206, "step": 6691 }, { "epoch": 0.9379117028731605, "grad_norm": 0.28537335991859436, "learning_rate": 0.0002112126285577613, "loss": 0.0406, "step": 6692 }, { "epoch": 0.9380518570427471, "grad_norm": 1.2690504789352417, "learning_rate": 0.00021119827792394163, "loss": 0.2139, "step": 6693 }, { "epoch": 0.9381920112123335, "grad_norm": 0.6160872578620911, "learning_rate": 0.00021118392729012195, "loss": 0.0974, "step": 6694 }, { "epoch": 0.9383321653819201, "grad_norm": 0.330723375082016, "learning_rate": 0.0002111695766563023, "loss": 0.0855, "step": 6695 }, { "epoch": 0.9384723195515067, "grad_norm": 0.7720799446105957, "learning_rate": 0.00021115522602248264, "loss": 0.0961, "step": 6696 }, { "epoch": 0.9386124737210932, "grad_norm": 0.29477834701538086, "learning_rate": 0.00021114087538866297, "loss": 0.0714, "step": 6697 }, { "epoch": 0.9387526278906797, "grad_norm": 0.24162638187408447, "learning_rate": 0.00021112652475484332, "loss": 0.0244, "step": 6698 }, { "epoch": 0.9388927820602663, "grad_norm": 2.0211286544799805, "learning_rate": 0.00021111217412102365, "loss": 0.0739, "step": 6699 }, { "epoch": 0.9390329362298528, "grad_norm": 1.0248448848724365, "learning_rate": 0.00021109782348720398, "loss": 0.1163, "step": 6700 }, { "epoch": 0.9391730903994394, "grad_norm": 1.0221667289733887, "learning_rate": 0.00021108347285338436, "loss": 0.1436, "step": 6701 }, { "epoch": 0.9393132445690259, "grad_norm": 0.3628167510032654, "learning_rate": 0.0002110691222195647, "loss": 0.1063, "step": 6702 }, { "epoch": 0.9394533987386124, "grad_norm": 0.33413881063461304, "learning_rate": 0.00021105477158574502, "loss": 0.0538, "step": 6703 }, { "epoch": 0.939593552908199, "grad_norm": 0.48904046416282654, "learning_rate": 0.00021104042095192537, "loss": 0.1099, "step": 6704 }, { "epoch": 0.9397337070777856, "grad_norm": 0.4129713773727417, "learning_rate": 0.0002110260703181057, "loss": 0.0506, "step": 6705 }, { "epoch": 0.9398738612473722, "grad_norm": 0.48088040947914124, "learning_rate": 0.00021101171968428603, "loss": 0.0319, "step": 6706 }, { "epoch": 0.9400140154169586, "grad_norm": 0.195185124874115, "learning_rate": 0.0002109973690504664, "loss": 0.0484, "step": 6707 }, { "epoch": 0.9401541695865452, "grad_norm": 0.25378546118736267, "learning_rate": 0.00021098301841664672, "loss": 0.0933, "step": 6708 }, { "epoch": 0.9402943237561318, "grad_norm": 0.48153361678123474, "learning_rate": 0.00021096866778282704, "loss": 0.0718, "step": 6709 }, { "epoch": 0.9404344779257183, "grad_norm": 0.6445961594581604, "learning_rate": 0.0002109543171490074, "loss": 0.0807, "step": 6710 }, { "epoch": 0.9405746320953048, "grad_norm": 0.2878335416316986, "learning_rate": 0.00021093996651518773, "loss": 0.0604, "step": 6711 }, { "epoch": 0.9407147862648914, "grad_norm": 0.344350129365921, "learning_rate": 0.00021092561588136806, "loss": 0.1058, "step": 6712 }, { "epoch": 0.9408549404344779, "grad_norm": 0.30809956789016724, "learning_rate": 0.0002109112652475484, "loss": 0.0373, "step": 6713 }, { "epoch": 0.9409950946040645, "grad_norm": 0.5173668265342712, "learning_rate": 0.00021089691461372877, "loss": 0.0896, "step": 6714 }, { "epoch": 0.9411352487736511, "grad_norm": 0.22702008485794067, "learning_rate": 0.0002108825639799091, "loss": 0.0514, "step": 6715 }, { "epoch": 0.9412754029432375, "grad_norm": 0.20474450290203094, "learning_rate": 0.00021086821334608943, "loss": 0.0271, "step": 6716 }, { "epoch": 0.9414155571128241, "grad_norm": 0.21513846516609192, "learning_rate": 0.00021085386271226978, "loss": 0.0414, "step": 6717 }, { "epoch": 0.9415557112824107, "grad_norm": 0.23136316239833832, "learning_rate": 0.0002108395120784501, "loss": 0.0633, "step": 6718 }, { "epoch": 0.9416958654519972, "grad_norm": 0.5780016779899597, "learning_rate": 0.00021082516144463044, "loss": 0.1144, "step": 6719 }, { "epoch": 0.9418360196215837, "grad_norm": 0.407420814037323, "learning_rate": 0.0002108108108108108, "loss": 0.0837, "step": 6720 }, { "epoch": 0.9419761737911703, "grad_norm": 0.3637130558490753, "learning_rate": 0.00021079646017699112, "loss": 0.0695, "step": 6721 }, { "epoch": 0.9421163279607568, "grad_norm": 0.3148209750652313, "learning_rate": 0.00021078210954317145, "loss": 0.0671, "step": 6722 }, { "epoch": 0.9422564821303434, "grad_norm": 0.4187985062599182, "learning_rate": 0.00021076775890935183, "loss": 0.0698, "step": 6723 }, { "epoch": 0.9423966362999299, "grad_norm": 0.3156029284000397, "learning_rate": 0.00021075340827553216, "loss": 0.0243, "step": 6724 }, { "epoch": 0.9425367904695164, "grad_norm": 0.32550543546676636, "learning_rate": 0.0002107390576417125, "loss": 0.087, "step": 6725 }, { "epoch": 0.942676944639103, "grad_norm": 0.4009532034397125, "learning_rate": 0.00021072470700789285, "loss": 0.0842, "step": 6726 }, { "epoch": 0.9428170988086896, "grad_norm": 0.28254517912864685, "learning_rate": 0.00021071035637407318, "loss": 0.0762, "step": 6727 }, { "epoch": 0.9429572529782762, "grad_norm": 0.4632325768470764, "learning_rate": 0.0002106960057402535, "loss": 0.1208, "step": 6728 }, { "epoch": 0.9430974071478626, "grad_norm": 0.21587690711021423, "learning_rate": 0.00021068165510643383, "loss": 0.0327, "step": 6729 }, { "epoch": 0.9432375613174492, "grad_norm": 0.23405754566192627, "learning_rate": 0.0002106673044726142, "loss": 0.0556, "step": 6730 }, { "epoch": 0.9433777154870358, "grad_norm": 0.5658929944038391, "learning_rate": 0.00021065295383879452, "loss": 0.1037, "step": 6731 }, { "epoch": 0.9435178696566223, "grad_norm": 0.3672569692134857, "learning_rate": 0.00021063860320497485, "loss": 0.0583, "step": 6732 }, { "epoch": 0.9436580238262088, "grad_norm": 0.7151066064834595, "learning_rate": 0.00021062425257115523, "loss": 0.0719, "step": 6733 }, { "epoch": 0.9437981779957954, "grad_norm": 0.3362157940864563, "learning_rate": 0.00021060990193733556, "loss": 0.0322, "step": 6734 }, { "epoch": 0.9439383321653819, "grad_norm": 1.0419331789016724, "learning_rate": 0.00021059555130351589, "loss": 0.1414, "step": 6735 }, { "epoch": 0.9440784863349685, "grad_norm": 0.9612257480621338, "learning_rate": 0.00021058120066969624, "loss": 0.0391, "step": 6736 }, { "epoch": 0.9442186405045551, "grad_norm": 0.4866899847984314, "learning_rate": 0.00021056685003587657, "loss": 0.157, "step": 6737 }, { "epoch": 0.9443587946741415, "grad_norm": 0.3909037411212921, "learning_rate": 0.0002105524994020569, "loss": 0.0657, "step": 6738 }, { "epoch": 0.9444989488437281, "grad_norm": 0.4714002311229706, "learning_rate": 0.00021053814876823725, "loss": 0.0389, "step": 6739 }, { "epoch": 0.9446391030133147, "grad_norm": 0.6339796185493469, "learning_rate": 0.00021052379813441758, "loss": 0.0971, "step": 6740 }, { "epoch": 0.9447792571829012, "grad_norm": 0.7312930226325989, "learning_rate": 0.0002105094475005979, "loss": 0.0693, "step": 6741 }, { "epoch": 0.9449194113524877, "grad_norm": 0.5390293598175049, "learning_rate": 0.00021049509686677827, "loss": 0.0382, "step": 6742 }, { "epoch": 0.9450595655220743, "grad_norm": 0.2725268602371216, "learning_rate": 0.0002104807462329586, "loss": 0.0415, "step": 6743 }, { "epoch": 0.9451997196916608, "grad_norm": 0.3685208261013031, "learning_rate": 0.00021046639559913892, "loss": 0.0488, "step": 6744 }, { "epoch": 0.9453398738612474, "grad_norm": 0.45445573329925537, "learning_rate": 0.0002104520449653193, "loss": 0.1105, "step": 6745 }, { "epoch": 0.9454800280308339, "grad_norm": 0.541092038154602, "learning_rate": 0.00021043769433149964, "loss": 0.0806, "step": 6746 }, { "epoch": 0.9456201822004204, "grad_norm": 0.6137456893920898, "learning_rate": 0.00021042334369767996, "loss": 0.1166, "step": 6747 }, { "epoch": 0.945760336370007, "grad_norm": 0.811437726020813, "learning_rate": 0.0002104089930638603, "loss": 0.1063, "step": 6748 }, { "epoch": 0.9459004905395936, "grad_norm": 1.289063572883606, "learning_rate": 0.00021039464243004065, "loss": 0.252, "step": 6749 }, { "epoch": 0.9460406447091801, "grad_norm": 1.4864723682403564, "learning_rate": 0.00021038029179622098, "loss": 0.2891, "step": 6750 }, { "epoch": 0.9461807988787666, "grad_norm": 0.19582898914813995, "learning_rate": 0.0002103659411624013, "loss": 0.0377, "step": 6751 }, { "epoch": 0.9463209530483532, "grad_norm": 0.4129214882850647, "learning_rate": 0.00021035159052858166, "loss": 0.0297, "step": 6752 }, { "epoch": 0.9464611072179397, "grad_norm": 0.39841458201408386, "learning_rate": 0.000210337239894762, "loss": 0.104, "step": 6753 }, { "epoch": 0.9466012613875263, "grad_norm": 0.5760337114334106, "learning_rate": 0.00021032288926094232, "loss": 0.1442, "step": 6754 }, { "epoch": 0.9467414155571128, "grad_norm": 0.2270519733428955, "learning_rate": 0.0002103085386271227, "loss": 0.0937, "step": 6755 }, { "epoch": 0.9468815697266993, "grad_norm": 0.4276168942451477, "learning_rate": 0.00021029418799330303, "loss": 0.0626, "step": 6756 }, { "epoch": 0.9470217238962859, "grad_norm": 0.44169506430625916, "learning_rate": 0.00021027983735948336, "loss": 0.1065, "step": 6757 }, { "epoch": 0.9471618780658725, "grad_norm": 0.4154358208179474, "learning_rate": 0.0002102654867256637, "loss": 0.0761, "step": 6758 }, { "epoch": 0.9473020322354591, "grad_norm": 0.3619321584701538, "learning_rate": 0.00021025113609184404, "loss": 0.0821, "step": 6759 }, { "epoch": 0.9474421864050455, "grad_norm": 0.36187854409217834, "learning_rate": 0.00021023678545802437, "loss": 0.0576, "step": 6760 }, { "epoch": 0.9475823405746321, "grad_norm": 0.1873820424079895, "learning_rate": 0.00021022243482420473, "loss": 0.0428, "step": 6761 }, { "epoch": 0.9477224947442187, "grad_norm": 0.3125673234462738, "learning_rate": 0.00021020808419038505, "loss": 0.0613, "step": 6762 }, { "epoch": 0.9478626489138052, "grad_norm": 0.32027217745780945, "learning_rate": 0.00021019373355656538, "loss": 0.0556, "step": 6763 }, { "epoch": 0.9480028030833917, "grad_norm": 0.20015032589435577, "learning_rate": 0.0002101793829227457, "loss": 0.0572, "step": 6764 }, { "epoch": 0.9481429572529783, "grad_norm": 0.19977861642837524, "learning_rate": 0.0002101650322889261, "loss": 0.0312, "step": 6765 }, { "epoch": 0.9482831114225648, "grad_norm": 0.4260909855365753, "learning_rate": 0.00021015068165510642, "loss": 0.1323, "step": 6766 }, { "epoch": 0.9484232655921514, "grad_norm": 0.2937268316745758, "learning_rate": 0.00021013633102128675, "loss": 0.0237, "step": 6767 }, { "epoch": 0.9485634197617379, "grad_norm": 0.34284707903862, "learning_rate": 0.0002101219803874671, "loss": 0.0858, "step": 6768 }, { "epoch": 0.9487035739313244, "grad_norm": 0.29405221343040466, "learning_rate": 0.00021010762975364744, "loss": 0.0968, "step": 6769 }, { "epoch": 0.948843728100911, "grad_norm": 0.343694806098938, "learning_rate": 0.00021009327911982776, "loss": 0.0822, "step": 6770 }, { "epoch": 0.9489838822704976, "grad_norm": 0.5976319909095764, "learning_rate": 0.00021007892848600812, "loss": 0.1108, "step": 6771 }, { "epoch": 0.9491240364400841, "grad_norm": 0.6340566873550415, "learning_rate": 0.00021006457785218845, "loss": 0.1635, "step": 6772 }, { "epoch": 0.9492641906096706, "grad_norm": 0.3809947669506073, "learning_rate": 0.00021005022721836878, "loss": 0.0672, "step": 6773 }, { "epoch": 0.9494043447792572, "grad_norm": 0.315130352973938, "learning_rate": 0.00021003587658454913, "loss": 0.0563, "step": 6774 }, { "epoch": 0.9495444989488437, "grad_norm": 0.19299724698066711, "learning_rate": 0.00021002152595072946, "loss": 0.0453, "step": 6775 }, { "epoch": 0.9496846531184303, "grad_norm": 0.536948025226593, "learning_rate": 0.0002100071753169098, "loss": 0.1265, "step": 6776 }, { "epoch": 0.9498248072880168, "grad_norm": 0.17534567415714264, "learning_rate": 0.00020999282468309017, "loss": 0.0404, "step": 6777 }, { "epoch": 0.9499649614576033, "grad_norm": 0.23549401760101318, "learning_rate": 0.0002099784740492705, "loss": 0.0315, "step": 6778 }, { "epoch": 0.9501051156271899, "grad_norm": 0.3954560160636902, "learning_rate": 0.00020996412341545083, "loss": 0.0896, "step": 6779 }, { "epoch": 0.9502452697967765, "grad_norm": 0.5285711884498596, "learning_rate": 0.00020994977278163116, "loss": 0.0474, "step": 6780 }, { "epoch": 0.950385423966363, "grad_norm": 0.245276540517807, "learning_rate": 0.00020993542214781151, "loss": 0.0411, "step": 6781 }, { "epoch": 0.9505255781359495, "grad_norm": 0.3574492335319519, "learning_rate": 0.00020992107151399184, "loss": 0.0355, "step": 6782 }, { "epoch": 0.9506657323055361, "grad_norm": 0.3773401975631714, "learning_rate": 0.00020990672088017217, "loss": 0.0869, "step": 6783 }, { "epoch": 0.9508058864751227, "grad_norm": 0.3458418548107147, "learning_rate": 0.00020989237024635253, "loss": 0.0535, "step": 6784 }, { "epoch": 0.9509460406447092, "grad_norm": 0.40093284845352173, "learning_rate": 0.00020987801961253286, "loss": 0.1455, "step": 6785 }, { "epoch": 0.9510861948142957, "grad_norm": 0.3073892891407013, "learning_rate": 0.00020986366897871318, "loss": 0.0501, "step": 6786 }, { "epoch": 0.9512263489838823, "grad_norm": 0.2885949909687042, "learning_rate": 0.00020984931834489357, "loss": 0.0425, "step": 6787 }, { "epoch": 0.9513665031534688, "grad_norm": 0.2747536301612854, "learning_rate": 0.0002098349677110739, "loss": 0.0363, "step": 6788 }, { "epoch": 0.9515066573230554, "grad_norm": 0.5417380332946777, "learning_rate": 0.00020982061707725422, "loss": 0.0995, "step": 6789 }, { "epoch": 0.9516468114926419, "grad_norm": 0.3912383019924164, "learning_rate": 0.00020980626644343458, "loss": 0.0453, "step": 6790 }, { "epoch": 0.9517869656622284, "grad_norm": 0.4273107647895813, "learning_rate": 0.0002097919158096149, "loss": 0.1067, "step": 6791 }, { "epoch": 0.951927119831815, "grad_norm": 0.44154760241508484, "learning_rate": 0.00020977756517579524, "loss": 0.0452, "step": 6792 }, { "epoch": 0.9520672740014016, "grad_norm": 0.35051122307777405, "learning_rate": 0.0002097632145419756, "loss": 0.0438, "step": 6793 }, { "epoch": 0.9522074281709881, "grad_norm": 0.7021182775497437, "learning_rate": 0.00020974886390815592, "loss": 0.2359, "step": 6794 }, { "epoch": 0.9523475823405746, "grad_norm": 0.7743340730667114, "learning_rate": 0.00020973451327433625, "loss": 0.2383, "step": 6795 }, { "epoch": 0.9524877365101612, "grad_norm": 0.9278534054756165, "learning_rate": 0.00020972016264051663, "loss": 0.1161, "step": 6796 }, { "epoch": 0.9526278906797477, "grad_norm": 0.9077466130256653, "learning_rate": 0.00020970581200669696, "loss": 0.0422, "step": 6797 }, { "epoch": 0.9527680448493343, "grad_norm": 3.7644705772399902, "learning_rate": 0.0002096914613728773, "loss": 0.1579, "step": 6798 }, { "epoch": 0.9529081990189208, "grad_norm": 0.3081324100494385, "learning_rate": 0.00020967711073905762, "loss": 0.0181, "step": 6799 }, { "epoch": 0.9530483531885073, "grad_norm": 3.044713020324707, "learning_rate": 0.00020966276010523797, "loss": 0.1534, "step": 6800 }, { "epoch": 0.9531885073580939, "grad_norm": 0.3266991674900055, "learning_rate": 0.0002096484094714183, "loss": 0.057, "step": 6801 }, { "epoch": 0.9533286615276805, "grad_norm": 0.32300278544425964, "learning_rate": 0.00020963405883759863, "loss": 0.0708, "step": 6802 }, { "epoch": 0.953468815697267, "grad_norm": 0.26398855447769165, "learning_rate": 0.000209619708203779, "loss": 0.0441, "step": 6803 }, { "epoch": 0.9536089698668535, "grad_norm": 0.6699583530426025, "learning_rate": 0.00020960535756995932, "loss": 0.1139, "step": 6804 }, { "epoch": 0.9537491240364401, "grad_norm": 0.6843666434288025, "learning_rate": 0.00020959100693613964, "loss": 0.1313, "step": 6805 }, { "epoch": 0.9538892782060266, "grad_norm": 0.2818840742111206, "learning_rate": 0.00020957665630232, "loss": 0.0511, "step": 6806 }, { "epoch": 0.9540294323756132, "grad_norm": 0.3320895731449127, "learning_rate": 0.00020956230566850033, "loss": 0.0384, "step": 6807 }, { "epoch": 0.9541695865451997, "grad_norm": 0.413745641708374, "learning_rate": 0.00020954795503468068, "loss": 0.0849, "step": 6808 }, { "epoch": 0.9543097407147862, "grad_norm": 0.1976531744003296, "learning_rate": 0.00020953360440086104, "loss": 0.0297, "step": 6809 }, { "epoch": 0.9544498948843728, "grad_norm": 0.4792803227901459, "learning_rate": 0.00020951925376704137, "loss": 0.076, "step": 6810 }, { "epoch": 0.9545900490539594, "grad_norm": 0.13696984946727753, "learning_rate": 0.0002095049031332217, "loss": 0.0221, "step": 6811 }, { "epoch": 0.9547302032235458, "grad_norm": 0.19744239747524261, "learning_rate": 0.00020949055249940205, "loss": 0.0071, "step": 6812 }, { "epoch": 0.9548703573931324, "grad_norm": 0.2566864788532257, "learning_rate": 0.00020947620186558238, "loss": 0.0776, "step": 6813 }, { "epoch": 0.955010511562719, "grad_norm": 0.3636632561683655, "learning_rate": 0.0002094618512317627, "loss": 0.0733, "step": 6814 }, { "epoch": 0.9551506657323056, "grad_norm": 0.29802748560905457, "learning_rate": 0.00020944750059794304, "loss": 0.0434, "step": 6815 }, { "epoch": 0.9552908199018921, "grad_norm": 0.31809690594673157, "learning_rate": 0.0002094331499641234, "loss": 0.0355, "step": 6816 }, { "epoch": 0.9554309740714786, "grad_norm": 0.3092915415763855, "learning_rate": 0.00020941879933030372, "loss": 0.0675, "step": 6817 }, { "epoch": 0.9555711282410652, "grad_norm": 0.30978840589523315, "learning_rate": 0.00020940444869648405, "loss": 0.0416, "step": 6818 }, { "epoch": 0.9557112824106517, "grad_norm": 0.4784527122974396, "learning_rate": 0.00020939009806266443, "loss": 0.1491, "step": 6819 }, { "epoch": 0.9558514365802383, "grad_norm": 0.5266783237457275, "learning_rate": 0.00020937574742884476, "loss": 0.0664, "step": 6820 }, { "epoch": 0.9559915907498248, "grad_norm": 0.4265827238559723, "learning_rate": 0.0002093613967950251, "loss": 0.1297, "step": 6821 }, { "epoch": 0.9561317449194113, "grad_norm": 0.5543458461761475, "learning_rate": 0.00020934704616120545, "loss": 0.1197, "step": 6822 }, { "epoch": 0.9562718990889979, "grad_norm": 0.56550133228302, "learning_rate": 0.00020933269552738577, "loss": 0.0813, "step": 6823 }, { "epoch": 0.9564120532585845, "grad_norm": 0.3015633821487427, "learning_rate": 0.0002093183448935661, "loss": 0.0597, "step": 6824 }, { "epoch": 0.956552207428171, "grad_norm": 0.5544846057891846, "learning_rate": 0.00020930399425974646, "loss": 0.1174, "step": 6825 }, { "epoch": 0.9566923615977575, "grad_norm": 0.26918500661849976, "learning_rate": 0.0002092896436259268, "loss": 0.0497, "step": 6826 }, { "epoch": 0.9568325157673441, "grad_norm": 0.5477024912834167, "learning_rate": 0.00020927529299210712, "loss": 0.103, "step": 6827 }, { "epoch": 0.9569726699369306, "grad_norm": 0.5423645973205566, "learning_rate": 0.0002092609423582875, "loss": 0.0621, "step": 6828 }, { "epoch": 0.9571128241065172, "grad_norm": 0.2877272665500641, "learning_rate": 0.00020924659172446783, "loss": 0.0559, "step": 6829 }, { "epoch": 0.9572529782761037, "grad_norm": 0.16464126110076904, "learning_rate": 0.00020923224109064816, "loss": 0.0534, "step": 6830 }, { "epoch": 0.9573931324456902, "grad_norm": 0.296781063079834, "learning_rate": 0.0002092178904568285, "loss": 0.0739, "step": 6831 }, { "epoch": 0.9575332866152768, "grad_norm": 0.3618508279323578, "learning_rate": 0.00020920353982300884, "loss": 0.0714, "step": 6832 }, { "epoch": 0.9576734407848634, "grad_norm": 0.3697979152202606, "learning_rate": 0.00020918918918918917, "loss": 0.0571, "step": 6833 }, { "epoch": 0.9578135949544498, "grad_norm": 0.5758258700370789, "learning_rate": 0.0002091748385553695, "loss": 0.0572, "step": 6834 }, { "epoch": 0.9579537491240364, "grad_norm": 0.2583303451538086, "learning_rate": 0.00020916048792154985, "loss": 0.0311, "step": 6835 }, { "epoch": 0.958093903293623, "grad_norm": 0.24555346369743347, "learning_rate": 0.00020914613728773018, "loss": 0.0452, "step": 6836 }, { "epoch": 0.9582340574632096, "grad_norm": 0.3500046133995056, "learning_rate": 0.0002091317866539105, "loss": 0.1176, "step": 6837 }, { "epoch": 0.9583742116327961, "grad_norm": 0.4273541569709778, "learning_rate": 0.00020911743602009087, "loss": 0.0695, "step": 6838 }, { "epoch": 0.9585143658023826, "grad_norm": 1.1742777824401855, "learning_rate": 0.0002091030853862712, "loss": 0.0621, "step": 6839 }, { "epoch": 0.9586545199719692, "grad_norm": 0.23294374346733093, "learning_rate": 0.00020908873475245155, "loss": 0.0285, "step": 6840 }, { "epoch": 0.9587946741415557, "grad_norm": 0.29251956939697266, "learning_rate": 0.0002090743841186319, "loss": 0.0383, "step": 6841 }, { "epoch": 0.9589348283111423, "grad_norm": 0.44774389266967773, "learning_rate": 0.00020906003348481223, "loss": 0.1078, "step": 6842 }, { "epoch": 0.9590749824807288, "grad_norm": 0.3983364403247833, "learning_rate": 0.00020904568285099256, "loss": 0.0816, "step": 6843 }, { "epoch": 0.9592151366503153, "grad_norm": 0.01704859547317028, "learning_rate": 0.00020903133221717292, "loss": 0.0023, "step": 6844 }, { "epoch": 0.9593552908199019, "grad_norm": 1.1474777460098267, "learning_rate": 0.00020901698158335325, "loss": 0.141, "step": 6845 }, { "epoch": 0.9594954449894885, "grad_norm": 0.4115254282951355, "learning_rate": 0.00020900263094953358, "loss": 0.0157, "step": 6846 }, { "epoch": 0.959635599159075, "grad_norm": 1.123226284980774, "learning_rate": 0.00020898828031571393, "loss": 0.2343, "step": 6847 }, { "epoch": 0.9597757533286615, "grad_norm": 0.9458621144294739, "learning_rate": 0.00020897392968189426, "loss": 0.2189, "step": 6848 }, { "epoch": 0.9599159074982481, "grad_norm": 1.3432241678237915, "learning_rate": 0.0002089595790480746, "loss": 0.2261, "step": 6849 }, { "epoch": 0.9600560616678346, "grad_norm": 2.4930717945098877, "learning_rate": 0.00020894522841425492, "loss": 0.2259, "step": 6850 }, { "epoch": 0.9601962158374212, "grad_norm": 0.2192278802394867, "learning_rate": 0.0002089308777804353, "loss": 0.0405, "step": 6851 }, { "epoch": 0.9603363700070077, "grad_norm": 0.5236284136772156, "learning_rate": 0.00020891652714661563, "loss": 0.0782, "step": 6852 }, { "epoch": 0.9604765241765942, "grad_norm": 0.36357614398002625, "learning_rate": 0.00020890217651279596, "loss": 0.0332, "step": 6853 }, { "epoch": 0.9606166783461808, "grad_norm": 0.35396990180015564, "learning_rate": 0.0002088878258789763, "loss": 0.0362, "step": 6854 }, { "epoch": 0.9607568325157674, "grad_norm": 0.3969132602214813, "learning_rate": 0.00020887347524515664, "loss": 0.0615, "step": 6855 }, { "epoch": 0.9608969866853538, "grad_norm": 0.15618836879730225, "learning_rate": 0.00020885912461133697, "loss": 0.0484, "step": 6856 }, { "epoch": 0.9610371408549404, "grad_norm": 0.3552684783935547, "learning_rate": 0.00020884477397751733, "loss": 0.0893, "step": 6857 }, { "epoch": 0.961177295024527, "grad_norm": 0.26791292428970337, "learning_rate": 0.00020883042334369765, "loss": 0.0746, "step": 6858 }, { "epoch": 0.9613174491941135, "grad_norm": 0.8597043752670288, "learning_rate": 0.00020881607270987798, "loss": 0.0963, "step": 6859 }, { "epoch": 0.9614576033637001, "grad_norm": 0.4311549663543701, "learning_rate": 0.00020880172207605837, "loss": 0.1231, "step": 6860 }, { "epoch": 0.9615977575332866, "grad_norm": 0.3141905665397644, "learning_rate": 0.0002087873714422387, "loss": 0.0977, "step": 6861 }, { "epoch": 0.9617379117028731, "grad_norm": 0.3051708936691284, "learning_rate": 0.00020877302080841902, "loss": 0.1264, "step": 6862 }, { "epoch": 0.9618780658724597, "grad_norm": 0.32538965344429016, "learning_rate": 0.00020875867017459938, "loss": 0.062, "step": 6863 }, { "epoch": 0.9620182200420463, "grad_norm": 0.4162488877773285, "learning_rate": 0.0002087443195407797, "loss": 0.1089, "step": 6864 }, { "epoch": 0.9621583742116327, "grad_norm": 0.3207227885723114, "learning_rate": 0.00020872996890696003, "loss": 0.0537, "step": 6865 }, { "epoch": 0.9622985283812193, "grad_norm": 0.14528274536132812, "learning_rate": 0.0002087156182731404, "loss": 0.0201, "step": 6866 }, { "epoch": 0.9624386825508059, "grad_norm": 0.29829683899879456, "learning_rate": 0.00020870126763932072, "loss": 0.1148, "step": 6867 }, { "epoch": 0.9625788367203925, "grad_norm": 0.08368442207574844, "learning_rate": 0.00020868691700550105, "loss": 0.0174, "step": 6868 }, { "epoch": 0.962718990889979, "grad_norm": 0.14285236597061157, "learning_rate": 0.00020867256637168138, "loss": 0.0246, "step": 6869 }, { "epoch": 0.9628591450595655, "grad_norm": 3.1134514808654785, "learning_rate": 0.00020865821573786173, "loss": 0.0723, "step": 6870 }, { "epoch": 0.9629992992291521, "grad_norm": 0.38169342279434204, "learning_rate": 0.00020864386510404206, "loss": 0.0984, "step": 6871 }, { "epoch": 0.9631394533987386, "grad_norm": 0.4693698585033417, "learning_rate": 0.00020862951447022242, "loss": 0.1521, "step": 6872 }, { "epoch": 0.9632796075683252, "grad_norm": 0.14318130910396576, "learning_rate": 0.00020861516383640277, "loss": 0.0497, "step": 6873 }, { "epoch": 0.9634197617379117, "grad_norm": 0.21699407696723938, "learning_rate": 0.0002086008132025831, "loss": 0.0452, "step": 6874 }, { "epoch": 0.9635599159074982, "grad_norm": 0.36788904666900635, "learning_rate": 0.00020858646256876343, "loss": 0.0697, "step": 6875 }, { "epoch": 0.9637000700770848, "grad_norm": 0.8227933049201965, "learning_rate": 0.00020857211193494378, "loss": 0.1259, "step": 6876 }, { "epoch": 0.9638402242466714, "grad_norm": 0.4492322504520416, "learning_rate": 0.0002085577613011241, "loss": 0.1111, "step": 6877 }, { "epoch": 0.9639803784162578, "grad_norm": 1.0661616325378418, "learning_rate": 0.00020854341066730444, "loss": 0.0717, "step": 6878 }, { "epoch": 0.9641205325858444, "grad_norm": 0.3309056758880615, "learning_rate": 0.0002085290600334848, "loss": 0.0449, "step": 6879 }, { "epoch": 0.964260686755431, "grad_norm": 0.33337658643722534, "learning_rate": 0.00020851470939966513, "loss": 0.0858, "step": 6880 }, { "epoch": 0.9644008409250175, "grad_norm": 0.9851006269454956, "learning_rate": 0.00020850035876584545, "loss": 0.0643, "step": 6881 }, { "epoch": 0.9645409950946041, "grad_norm": 0.24895556271076202, "learning_rate": 0.00020848600813202584, "loss": 0.0363, "step": 6882 }, { "epoch": 0.9646811492641906, "grad_norm": 0.48956048488616943, "learning_rate": 0.00020847165749820617, "loss": 0.0932, "step": 6883 }, { "epoch": 0.9648213034337771, "grad_norm": 0.35453397035598755, "learning_rate": 0.0002084573068643865, "loss": 0.1305, "step": 6884 }, { "epoch": 0.9649614576033637, "grad_norm": 0.2098037451505661, "learning_rate": 0.00020844295623056682, "loss": 0.077, "step": 6885 }, { "epoch": 0.9651016117729503, "grad_norm": 0.23574477434158325, "learning_rate": 0.00020842860559674718, "loss": 0.0197, "step": 6886 }, { "epoch": 0.9652417659425367, "grad_norm": 0.36492350697517395, "learning_rate": 0.0002084142549629275, "loss": 0.0628, "step": 6887 }, { "epoch": 0.9653819201121233, "grad_norm": 0.7939193844795227, "learning_rate": 0.00020839990432910784, "loss": 0.059, "step": 6888 }, { "epoch": 0.9655220742817099, "grad_norm": 1.3341991901397705, "learning_rate": 0.0002083855536952882, "loss": 0.2687, "step": 6889 }, { "epoch": 0.9656622284512965, "grad_norm": 0.09365924447774887, "learning_rate": 0.00020837120306146852, "loss": 0.0157, "step": 6890 }, { "epoch": 0.965802382620883, "grad_norm": 0.4536382257938385, "learning_rate": 0.00020835685242764885, "loss": 0.0679, "step": 6891 }, { "epoch": 0.9659425367904695, "grad_norm": 0.6109830141067505, "learning_rate": 0.00020834250179382923, "loss": 0.088, "step": 6892 }, { "epoch": 0.9660826909600561, "grad_norm": 0.7137888669967651, "learning_rate": 0.00020832815116000956, "loss": 0.0421, "step": 6893 }, { "epoch": 0.9662228451296426, "grad_norm": 0.883314311504364, "learning_rate": 0.0002083138005261899, "loss": 0.0589, "step": 6894 }, { "epoch": 0.9663629992992292, "grad_norm": 0.6361221671104431, "learning_rate": 0.00020829944989237024, "loss": 0.062, "step": 6895 }, { "epoch": 0.9665031534688157, "grad_norm": 2.891608238220215, "learning_rate": 0.00020828509925855057, "loss": 0.4207, "step": 6896 }, { "epoch": 0.9666433076384022, "grad_norm": 0.7212381362915039, "learning_rate": 0.0002082707486247309, "loss": 0.0716, "step": 6897 }, { "epoch": 0.9667834618079888, "grad_norm": 1.5999410152435303, "learning_rate": 0.00020825639799091126, "loss": 0.2749, "step": 6898 }, { "epoch": 0.9669236159775754, "grad_norm": 1.2265528440475464, "learning_rate": 0.00020824204735709159, "loss": 0.2263, "step": 6899 }, { "epoch": 0.9670637701471618, "grad_norm": 0.8691014051437378, "learning_rate": 0.00020822769672327191, "loss": 0.0846, "step": 6900 }, { "epoch": 0.9672039243167484, "grad_norm": 0.22890891134738922, "learning_rate": 0.00020821334608945227, "loss": 0.0474, "step": 6901 }, { "epoch": 0.967344078486335, "grad_norm": 0.2901453375816345, "learning_rate": 0.0002081989954556326, "loss": 0.0563, "step": 6902 }, { "epoch": 0.9674842326559215, "grad_norm": 0.5743530988693237, "learning_rate": 0.00020818464482181293, "loss": 0.1326, "step": 6903 }, { "epoch": 0.9676243868255081, "grad_norm": 0.45388081669807434, "learning_rate": 0.00020817029418799328, "loss": 0.0881, "step": 6904 }, { "epoch": 0.9677645409950946, "grad_norm": 0.5331664085388184, "learning_rate": 0.00020815594355417364, "loss": 0.095, "step": 6905 }, { "epoch": 0.9679046951646811, "grad_norm": 0.17647810280323029, "learning_rate": 0.00020814159292035397, "loss": 0.0285, "step": 6906 }, { "epoch": 0.9680448493342677, "grad_norm": 0.41952958703041077, "learning_rate": 0.0002081272422865343, "loss": 0.0615, "step": 6907 }, { "epoch": 0.9681850035038543, "grad_norm": 0.4461071193218231, "learning_rate": 0.00020811289165271465, "loss": 0.1123, "step": 6908 }, { "epoch": 0.9683251576734407, "grad_norm": 0.6692301630973816, "learning_rate": 0.00020809854101889498, "loss": 0.0817, "step": 6909 }, { "epoch": 0.9684653118430273, "grad_norm": 0.36530736088752747, "learning_rate": 0.0002080841903850753, "loss": 0.0506, "step": 6910 }, { "epoch": 0.9686054660126139, "grad_norm": 0.4973869025707245, "learning_rate": 0.00020806983975125566, "loss": 0.0826, "step": 6911 }, { "epoch": 0.9687456201822005, "grad_norm": 0.5696591138839722, "learning_rate": 0.000208055489117436, "loss": 0.0816, "step": 6912 }, { "epoch": 0.9688857743517869, "grad_norm": 0.3018736243247986, "learning_rate": 0.00020804113848361632, "loss": 0.0721, "step": 6913 }, { "epoch": 0.9690259285213735, "grad_norm": 0.33308374881744385, "learning_rate": 0.0002080267878497967, "loss": 0.0417, "step": 6914 }, { "epoch": 0.96916608269096, "grad_norm": 0.3336891233921051, "learning_rate": 0.00020801243721597703, "loss": 0.0741, "step": 6915 }, { "epoch": 0.9693062368605466, "grad_norm": 0.41221198439598083, "learning_rate": 0.00020799808658215736, "loss": 0.0593, "step": 6916 }, { "epoch": 0.9694463910301332, "grad_norm": 0.47218501567840576, "learning_rate": 0.00020798373594833772, "loss": 0.1557, "step": 6917 }, { "epoch": 0.9695865451997197, "grad_norm": 0.26762154698371887, "learning_rate": 0.00020796938531451804, "loss": 0.0625, "step": 6918 }, { "epoch": 0.9697266993693062, "grad_norm": 0.44407474994659424, "learning_rate": 0.00020795503468069837, "loss": 0.0765, "step": 6919 }, { "epoch": 0.9698668535388928, "grad_norm": 0.30590900778770447, "learning_rate": 0.0002079406840468787, "loss": 0.03, "step": 6920 }, { "epoch": 0.9700070077084794, "grad_norm": 0.41272279620170593, "learning_rate": 0.00020792633341305906, "loss": 0.0692, "step": 6921 }, { "epoch": 0.9701471618780658, "grad_norm": 0.6494072079658508, "learning_rate": 0.00020791198277923939, "loss": 0.1739, "step": 6922 }, { "epoch": 0.9702873160476524, "grad_norm": 0.315132200717926, "learning_rate": 0.00020789763214541971, "loss": 0.0839, "step": 6923 }, { "epoch": 0.970427470217239, "grad_norm": 0.3787939250469208, "learning_rate": 0.0002078832815116001, "loss": 0.0286, "step": 6924 }, { "epoch": 0.9705676243868255, "grad_norm": 1.0519497394561768, "learning_rate": 0.00020786893087778043, "loss": 0.1371, "step": 6925 }, { "epoch": 0.9707077785564121, "grad_norm": 0.5610406994819641, "learning_rate": 0.00020785458024396075, "loss": 0.1178, "step": 6926 }, { "epoch": 0.9708479327259986, "grad_norm": 0.35256150364875793, "learning_rate": 0.0002078402296101411, "loss": 0.0652, "step": 6927 }, { "epoch": 0.9709880868955851, "grad_norm": 0.13512149453163147, "learning_rate": 0.00020782587897632144, "loss": 0.0286, "step": 6928 }, { "epoch": 0.9711282410651717, "grad_norm": 0.2617959678173065, "learning_rate": 0.00020781152834250177, "loss": 0.0413, "step": 6929 }, { "epoch": 0.9712683952347583, "grad_norm": 0.1958327442407608, "learning_rate": 0.00020779717770868212, "loss": 0.0375, "step": 6930 }, { "epoch": 0.9714085494043447, "grad_norm": 0.4769895374774933, "learning_rate": 0.00020778282707486245, "loss": 0.07, "step": 6931 }, { "epoch": 0.9715487035739313, "grad_norm": 0.4198013246059418, "learning_rate": 0.00020776847644104278, "loss": 0.0905, "step": 6932 }, { "epoch": 0.9716888577435179, "grad_norm": 0.5336285829544067, "learning_rate": 0.00020775412580722314, "loss": 0.0755, "step": 6933 }, { "epoch": 0.9718290119131044, "grad_norm": 0.323056697845459, "learning_rate": 0.00020773977517340346, "loss": 0.0963, "step": 6934 }, { "epoch": 0.9719691660826909, "grad_norm": 0.9526858329772949, "learning_rate": 0.00020772542453958382, "loss": 0.0577, "step": 6935 }, { "epoch": 0.9721093202522775, "grad_norm": 0.3790183961391449, "learning_rate": 0.00020771107390576418, "loss": 0.059, "step": 6936 }, { "epoch": 0.972249474421864, "grad_norm": 0.8899807333946228, "learning_rate": 0.0002076967232719445, "loss": 0.1189, "step": 6937 }, { "epoch": 0.9723896285914506, "grad_norm": 1.0275369882583618, "learning_rate": 0.00020768237263812483, "loss": 0.185, "step": 6938 }, { "epoch": 0.9725297827610372, "grad_norm": 0.2496979981660843, "learning_rate": 0.00020766802200430516, "loss": 0.0688, "step": 6939 }, { "epoch": 0.9726699369306236, "grad_norm": 0.3520813286304474, "learning_rate": 0.00020765367137048552, "loss": 0.0375, "step": 6940 }, { "epoch": 0.9728100911002102, "grad_norm": 0.6201832890510559, "learning_rate": 0.00020763932073666585, "loss": 0.0721, "step": 6941 }, { "epoch": 0.9729502452697968, "grad_norm": 0.6734105944633484, "learning_rate": 0.00020762497010284617, "loss": 0.0732, "step": 6942 }, { "epoch": 0.9730903994393834, "grad_norm": 0.5790233016014099, "learning_rate": 0.00020761061946902653, "loss": 0.0253, "step": 6943 }, { "epoch": 0.9732305536089698, "grad_norm": 0.7184246182441711, "learning_rate": 0.00020759626883520686, "loss": 0.0483, "step": 6944 }, { "epoch": 0.9733707077785564, "grad_norm": 0.6030838489532471, "learning_rate": 0.0002075819182013872, "loss": 0.0599, "step": 6945 }, { "epoch": 0.973510861948143, "grad_norm": 0.9266127347946167, "learning_rate": 0.00020756756756756757, "loss": 0.1178, "step": 6946 }, { "epoch": 0.9736510161177295, "grad_norm": 2.7538599967956543, "learning_rate": 0.0002075532169337479, "loss": 0.1661, "step": 6947 }, { "epoch": 0.9737911702873161, "grad_norm": 0.5794621109962463, "learning_rate": 0.00020753886629992823, "loss": 0.1154, "step": 6948 }, { "epoch": 0.9739313244569026, "grad_norm": 1.5444072484970093, "learning_rate": 0.00020752451566610858, "loss": 0.173, "step": 6949 }, { "epoch": 0.9740714786264891, "grad_norm": 1.1836755275726318, "learning_rate": 0.0002075101650322889, "loss": 0.1901, "step": 6950 }, { "epoch": 0.9742116327960757, "grad_norm": 0.27281269431114197, "learning_rate": 0.00020749581439846924, "loss": 0.0366, "step": 6951 }, { "epoch": 0.9743517869656623, "grad_norm": 0.2458818256855011, "learning_rate": 0.0002074814637646496, "loss": 0.0381, "step": 6952 }, { "epoch": 0.9744919411352487, "grad_norm": 0.4554118812084198, "learning_rate": 0.00020746711313082992, "loss": 0.102, "step": 6953 }, { "epoch": 0.9746320953048353, "grad_norm": 0.5654162764549255, "learning_rate": 0.00020745276249701025, "loss": 0.099, "step": 6954 }, { "epoch": 0.9747722494744219, "grad_norm": 0.31239786744117737, "learning_rate": 0.00020743841186319058, "loss": 0.0437, "step": 6955 }, { "epoch": 0.9749124036440084, "grad_norm": 0.2250964492559433, "learning_rate": 0.00020742406122937096, "loss": 0.0627, "step": 6956 }, { "epoch": 0.9750525578135949, "grad_norm": 0.4371689260005951, "learning_rate": 0.0002074097105955513, "loss": 0.1013, "step": 6957 }, { "epoch": 0.9751927119831815, "grad_norm": 0.27315187454223633, "learning_rate": 0.00020739535996173162, "loss": 0.044, "step": 6958 }, { "epoch": 0.975332866152768, "grad_norm": 0.5150197744369507, "learning_rate": 0.00020738100932791198, "loss": 0.0893, "step": 6959 }, { "epoch": 0.9754730203223546, "grad_norm": 0.4323754906654358, "learning_rate": 0.0002073666586940923, "loss": 0.1044, "step": 6960 }, { "epoch": 0.9756131744919412, "grad_norm": 0.22092288732528687, "learning_rate": 0.00020735230806027263, "loss": 0.0403, "step": 6961 }, { "epoch": 0.9757533286615276, "grad_norm": 0.20307494699954987, "learning_rate": 0.000207337957426453, "loss": 0.0573, "step": 6962 }, { "epoch": 0.9758934828311142, "grad_norm": 0.41007351875305176, "learning_rate": 0.00020732360679263332, "loss": 0.1052, "step": 6963 }, { "epoch": 0.9760336370007008, "grad_norm": 0.2851572036743164, "learning_rate": 0.00020730925615881365, "loss": 0.0552, "step": 6964 }, { "epoch": 0.9761737911702874, "grad_norm": 0.961644172668457, "learning_rate": 0.000207294905524994, "loss": 0.132, "step": 6965 }, { "epoch": 0.9763139453398738, "grad_norm": 0.21831761300563812, "learning_rate": 0.00020728055489117433, "loss": 0.031, "step": 6966 }, { "epoch": 0.9764540995094604, "grad_norm": 0.4800322353839874, "learning_rate": 0.0002072662042573547, "loss": 0.0603, "step": 6967 }, { "epoch": 0.976594253679047, "grad_norm": 0.56587815284729, "learning_rate": 0.00020725185362353504, "loss": 0.0589, "step": 6968 }, { "epoch": 0.9767344078486335, "grad_norm": 0.34528833627700806, "learning_rate": 0.00020723750298971537, "loss": 0.0437, "step": 6969 }, { "epoch": 0.9768745620182201, "grad_norm": 0.34439995884895325, "learning_rate": 0.0002072231523558957, "loss": 0.058, "step": 6970 }, { "epoch": 0.9770147161878066, "grad_norm": 0.1708739697933197, "learning_rate": 0.00020720880172207605, "loss": 0.0311, "step": 6971 }, { "epoch": 0.9771548703573931, "grad_norm": 0.7745507955551147, "learning_rate": 0.00020719445108825638, "loss": 0.1903, "step": 6972 }, { "epoch": 0.9772950245269797, "grad_norm": 0.19944646954536438, "learning_rate": 0.0002071801004544367, "loss": 0.0424, "step": 6973 }, { "epoch": 0.9774351786965663, "grad_norm": 0.29700997471809387, "learning_rate": 0.00020716574982061704, "loss": 0.0321, "step": 6974 }, { "epoch": 0.9775753328661527, "grad_norm": 0.30736637115478516, "learning_rate": 0.0002071513991867974, "loss": 0.1211, "step": 6975 }, { "epoch": 0.9777154870357393, "grad_norm": 0.1292431801557541, "learning_rate": 0.00020713704855297772, "loss": 0.0226, "step": 6976 }, { "epoch": 0.9778556412053259, "grad_norm": 0.4061063230037689, "learning_rate": 0.00020712269791915805, "loss": 0.0728, "step": 6977 }, { "epoch": 0.9779957953749124, "grad_norm": 0.5302044153213501, "learning_rate": 0.00020710834728533844, "loss": 0.0895, "step": 6978 }, { "epoch": 0.9781359495444989, "grad_norm": 0.2492242306470871, "learning_rate": 0.00020709399665151876, "loss": 0.0291, "step": 6979 }, { "epoch": 0.9782761037140855, "grad_norm": 0.5246913433074951, "learning_rate": 0.0002070796460176991, "loss": 0.1142, "step": 6980 }, { "epoch": 0.978416257883672, "grad_norm": 0.3783250153064728, "learning_rate": 0.00020706529538387945, "loss": 0.041, "step": 6981 }, { "epoch": 0.9785564120532586, "grad_norm": 0.9292603731155396, "learning_rate": 0.00020705094475005978, "loss": 0.1006, "step": 6982 }, { "epoch": 0.9786965662228452, "grad_norm": 0.16517627239227295, "learning_rate": 0.0002070365941162401, "loss": 0.0327, "step": 6983 }, { "epoch": 0.9788367203924316, "grad_norm": 0.40302616357803345, "learning_rate": 0.00020702224348242046, "loss": 0.0198, "step": 6984 }, { "epoch": 0.9789768745620182, "grad_norm": 0.282459020614624, "learning_rate": 0.0002070078928486008, "loss": 0.0566, "step": 6985 }, { "epoch": 0.9791170287316048, "grad_norm": 0.129927396774292, "learning_rate": 0.00020699354221478112, "loss": 0.0082, "step": 6986 }, { "epoch": 0.9792571829011913, "grad_norm": 0.33250609040260315, "learning_rate": 0.0002069791915809615, "loss": 0.049, "step": 6987 }, { "epoch": 0.9793973370707778, "grad_norm": 0.41602328419685364, "learning_rate": 0.00020696484094714183, "loss": 0.0519, "step": 6988 }, { "epoch": 0.9795374912403644, "grad_norm": 0.5409842729568481, "learning_rate": 0.00020695049031332216, "loss": 0.054, "step": 6989 }, { "epoch": 0.979677645409951, "grad_norm": 0.5139724612236023, "learning_rate": 0.0002069361396795025, "loss": 0.0821, "step": 6990 }, { "epoch": 0.9798177995795375, "grad_norm": 0.5147519111633301, "learning_rate": 0.00020692178904568284, "loss": 0.0543, "step": 6991 }, { "epoch": 0.9799579537491241, "grad_norm": 0.6792997717857361, "learning_rate": 0.00020690743841186317, "loss": 0.1143, "step": 6992 }, { "epoch": 0.9800981079187105, "grad_norm": 0.5357325673103333, "learning_rate": 0.0002068930877780435, "loss": 0.1133, "step": 6993 }, { "epoch": 0.9802382620882971, "grad_norm": 0.9429832100868225, "learning_rate": 0.00020687873714422386, "loss": 0.1229, "step": 6994 }, { "epoch": 0.9803784162578837, "grad_norm": 0.30730298161506653, "learning_rate": 0.00020686438651040418, "loss": 0.0658, "step": 6995 }, { "epoch": 0.9805185704274703, "grad_norm": 0.9290444254875183, "learning_rate": 0.0002068500358765845, "loss": 0.1383, "step": 6996 }, { "epoch": 0.9806587245970567, "grad_norm": 1.5623475313186646, "learning_rate": 0.00020683568524276487, "loss": 0.0622, "step": 6997 }, { "epoch": 0.9807988787666433, "grad_norm": 2.994185209274292, "learning_rate": 0.0002068213346089452, "loss": 0.1936, "step": 6998 }, { "epoch": 0.9809390329362299, "grad_norm": 0.5363599061965942, "learning_rate": 0.00020680698397512555, "loss": 0.053, "step": 6999 }, { "epoch": 0.9810791871058164, "grad_norm": 0.4846128225326538, "learning_rate": 0.0002067926333413059, "loss": 0.0322, "step": 7000 }, { "epoch": 0.9812193412754029, "grad_norm": 0.30691367387771606, "learning_rate": 0.00020677828270748624, "loss": 0.0575, "step": 7001 }, { "epoch": 0.9813594954449895, "grad_norm": 0.2341775894165039, "learning_rate": 0.00020676393207366657, "loss": 0.0369, "step": 7002 }, { "epoch": 0.981499649614576, "grad_norm": 0.45710182189941406, "learning_rate": 0.00020674958143984692, "loss": 0.1012, "step": 7003 }, { "epoch": 0.9816398037841626, "grad_norm": 0.36037296056747437, "learning_rate": 0.00020673523080602725, "loss": 0.0721, "step": 7004 }, { "epoch": 0.9817799579537492, "grad_norm": 0.3422500789165497, "learning_rate": 0.00020672088017220758, "loss": 0.0141, "step": 7005 }, { "epoch": 0.9819201121233356, "grad_norm": 0.31993263959884644, "learning_rate": 0.00020670652953838793, "loss": 0.0453, "step": 7006 }, { "epoch": 0.9820602662929222, "grad_norm": 0.2883129119873047, "learning_rate": 0.00020669217890456826, "loss": 0.0328, "step": 7007 }, { "epoch": 0.9822004204625088, "grad_norm": 0.26729217171669006, "learning_rate": 0.0002066778282707486, "loss": 0.0564, "step": 7008 }, { "epoch": 0.9823405746320953, "grad_norm": 0.3590710163116455, "learning_rate": 0.00020666347763692892, "loss": 0.0599, "step": 7009 }, { "epoch": 0.9824807288016818, "grad_norm": 0.47798070311546326, "learning_rate": 0.0002066491270031093, "loss": 0.0468, "step": 7010 }, { "epoch": 0.9826208829712684, "grad_norm": 0.46382319927215576, "learning_rate": 0.00020663477636928963, "loss": 0.1042, "step": 7011 }, { "epoch": 0.9827610371408549, "grad_norm": 0.37648844718933105, "learning_rate": 0.00020662042573546996, "loss": 0.0452, "step": 7012 }, { "epoch": 0.9829011913104415, "grad_norm": 0.5499049425125122, "learning_rate": 0.00020660607510165032, "loss": 0.1431, "step": 7013 }, { "epoch": 0.9830413454800281, "grad_norm": 0.7389718294143677, "learning_rate": 0.00020659172446783064, "loss": 0.0712, "step": 7014 }, { "epoch": 0.9831814996496145, "grad_norm": 0.1977791041135788, "learning_rate": 0.00020657737383401097, "loss": 0.0544, "step": 7015 }, { "epoch": 0.9833216538192011, "grad_norm": 0.5178593993186951, "learning_rate": 0.00020656302320019133, "loss": 0.073, "step": 7016 }, { "epoch": 0.9834618079887877, "grad_norm": 0.9876886010169983, "learning_rate": 0.00020654867256637166, "loss": 0.07, "step": 7017 }, { "epoch": 0.9836019621583743, "grad_norm": 0.3890713155269623, "learning_rate": 0.00020653432193255199, "loss": 0.0954, "step": 7018 }, { "epoch": 0.9837421163279607, "grad_norm": 0.5815483331680298, "learning_rate": 0.00020651997129873237, "loss": 0.0768, "step": 7019 }, { "epoch": 0.9838822704975473, "grad_norm": 0.4890800416469574, "learning_rate": 0.0002065056206649127, "loss": 0.1239, "step": 7020 }, { "epoch": 0.9840224246671339, "grad_norm": 0.5563451647758484, "learning_rate": 0.00020649127003109303, "loss": 0.0492, "step": 7021 }, { "epoch": 0.9841625788367204, "grad_norm": 0.5135653018951416, "learning_rate": 0.00020647691939727338, "loss": 0.1072, "step": 7022 }, { "epoch": 0.9843027330063069, "grad_norm": 0.8146799206733704, "learning_rate": 0.0002064625687634537, "loss": 0.0791, "step": 7023 }, { "epoch": 0.9844428871758935, "grad_norm": 0.3026643991470337, "learning_rate": 0.00020644821812963404, "loss": 0.0611, "step": 7024 }, { "epoch": 0.98458304134548, "grad_norm": 0.43137699365615845, "learning_rate": 0.00020643386749581437, "loss": 0.1023, "step": 7025 }, { "epoch": 0.9847231955150666, "grad_norm": 0.299809992313385, "learning_rate": 0.00020641951686199472, "loss": 0.0829, "step": 7026 }, { "epoch": 0.9848633496846532, "grad_norm": 0.5949607491493225, "learning_rate": 0.00020640516622817505, "loss": 0.0851, "step": 7027 }, { "epoch": 0.9850035038542396, "grad_norm": 0.38227760791778564, "learning_rate": 0.00020639081559435538, "loss": 0.0794, "step": 7028 }, { "epoch": 0.9851436580238262, "grad_norm": 0.540929913520813, "learning_rate": 0.00020637646496053573, "loss": 0.0371, "step": 7029 }, { "epoch": 0.9852838121934128, "grad_norm": 0.3091493844985962, "learning_rate": 0.0002063621143267161, "loss": 0.0339, "step": 7030 }, { "epoch": 0.9854239663629993, "grad_norm": 0.4328194856643677, "learning_rate": 0.00020634776369289642, "loss": 0.0772, "step": 7031 }, { "epoch": 0.9855641205325858, "grad_norm": 0.5255628824234009, "learning_rate": 0.00020633341305907677, "loss": 0.1025, "step": 7032 }, { "epoch": 0.9857042747021724, "grad_norm": 0.3415969908237457, "learning_rate": 0.0002063190624252571, "loss": 0.0847, "step": 7033 }, { "epoch": 0.9858444288717589, "grad_norm": 0.30125463008880615, "learning_rate": 0.00020630471179143743, "loss": 0.0998, "step": 7034 }, { "epoch": 0.9859845830413455, "grad_norm": 0.1967931091785431, "learning_rate": 0.0002062903611576178, "loss": 0.0239, "step": 7035 }, { "epoch": 0.9861247372109321, "grad_norm": 0.6195602416992188, "learning_rate": 0.00020627601052379812, "loss": 0.0901, "step": 7036 }, { "epoch": 0.9862648913805185, "grad_norm": 0.2731892168521881, "learning_rate": 0.00020626165988997844, "loss": 0.063, "step": 7037 }, { "epoch": 0.9864050455501051, "grad_norm": 0.3098612129688263, "learning_rate": 0.0002062473092561588, "loss": 0.0926, "step": 7038 }, { "epoch": 0.9865451997196917, "grad_norm": 0.2808132767677307, "learning_rate": 0.00020623295862233913, "loss": 0.0421, "step": 7039 }, { "epoch": 0.9866853538892782, "grad_norm": 0.34991103410720825, "learning_rate": 0.00020621860798851946, "loss": 0.1143, "step": 7040 }, { "epoch": 0.9868255080588647, "grad_norm": 1.4301457405090332, "learning_rate": 0.00020620425735469984, "loss": 0.0788, "step": 7041 }, { "epoch": 0.9869656622284513, "grad_norm": 0.26166096329689026, "learning_rate": 0.00020618990672088017, "loss": 0.0323, "step": 7042 }, { "epoch": 0.9871058163980378, "grad_norm": 0.8020908832550049, "learning_rate": 0.0002061755560870605, "loss": 0.162, "step": 7043 }, { "epoch": 0.9872459705676244, "grad_norm": 0.8709140419960022, "learning_rate": 0.00020616120545324083, "loss": 0.1758, "step": 7044 }, { "epoch": 0.9873861247372109, "grad_norm": 0.8525486588478088, "learning_rate": 0.00020614685481942118, "loss": 0.0917, "step": 7045 }, { "epoch": 0.9875262789067975, "grad_norm": 1.0763194561004639, "learning_rate": 0.0002061325041856015, "loss": 0.1291, "step": 7046 }, { "epoch": 0.987666433076384, "grad_norm": 0.530787467956543, "learning_rate": 0.00020611815355178184, "loss": 0.0735, "step": 7047 }, { "epoch": 0.9878065872459706, "grad_norm": 1.07344388961792, "learning_rate": 0.0002061038029179622, "loss": 0.1167, "step": 7048 }, { "epoch": 0.9879467414155572, "grad_norm": 1.048150658607483, "learning_rate": 0.00020608945228414252, "loss": 0.1266, "step": 7049 }, { "epoch": 0.9880868955851436, "grad_norm": 4.462587833404541, "learning_rate": 0.00020607510165032285, "loss": 0.3827, "step": 7050 }, { "epoch": 0.9882270497547302, "grad_norm": 0.37979450821876526, "learning_rate": 0.00020606075101650323, "loss": 0.12, "step": 7051 }, { "epoch": 0.9883672039243168, "grad_norm": 0.26517969369888306, "learning_rate": 0.00020604640038268356, "loss": 0.0923, "step": 7052 }, { "epoch": 0.9885073580939033, "grad_norm": 0.2702295780181885, "learning_rate": 0.0002060320497488639, "loss": 0.0633, "step": 7053 }, { "epoch": 0.9886475122634898, "grad_norm": 0.45654866099357605, "learning_rate": 0.00020601769911504425, "loss": 0.0955, "step": 7054 }, { "epoch": 0.9887876664330764, "grad_norm": 0.5021839737892151, "learning_rate": 0.00020600334848122458, "loss": 0.0971, "step": 7055 }, { "epoch": 0.9889278206026629, "grad_norm": 0.3004218637943268, "learning_rate": 0.0002059889978474049, "loss": 0.12, "step": 7056 }, { "epoch": 0.9890679747722495, "grad_norm": 0.33169451355934143, "learning_rate": 0.00020597464721358526, "loss": 0.0886, "step": 7057 }, { "epoch": 0.9892081289418361, "grad_norm": 0.2871354818344116, "learning_rate": 0.0002059602965797656, "loss": 0.0313, "step": 7058 }, { "epoch": 0.9893482831114225, "grad_norm": 0.9618695378303528, "learning_rate": 0.00020594594594594592, "loss": 0.0979, "step": 7059 }, { "epoch": 0.9894884372810091, "grad_norm": 0.25487926602363586, "learning_rate": 0.00020593159531212625, "loss": 0.0388, "step": 7060 }, { "epoch": 0.9896285914505957, "grad_norm": 0.2643638551235199, "learning_rate": 0.0002059172446783066, "loss": 0.0427, "step": 7061 }, { "epoch": 0.9897687456201822, "grad_norm": 0.22066166996955872, "learning_rate": 0.00020590289404448696, "loss": 0.0677, "step": 7062 }, { "epoch": 0.9899088997897687, "grad_norm": 0.10679704695940018, "learning_rate": 0.00020588854341066729, "loss": 0.0187, "step": 7063 }, { "epoch": 0.9900490539593553, "grad_norm": 0.2386174350976944, "learning_rate": 0.00020587419277684764, "loss": 0.0562, "step": 7064 }, { "epoch": 0.9901892081289418, "grad_norm": 0.44058892130851746, "learning_rate": 0.00020585984214302797, "loss": 0.0825, "step": 7065 }, { "epoch": 0.9903293622985284, "grad_norm": 0.7175392508506775, "learning_rate": 0.0002058454915092083, "loss": 0.0641, "step": 7066 }, { "epoch": 0.9904695164681149, "grad_norm": 0.5369006991386414, "learning_rate": 0.00020583114087538865, "loss": 0.1013, "step": 7067 }, { "epoch": 0.9906096706377014, "grad_norm": 0.14437179267406464, "learning_rate": 0.00020581679024156898, "loss": 0.0151, "step": 7068 }, { "epoch": 0.990749824807288, "grad_norm": 0.3580746352672577, "learning_rate": 0.0002058024396077493, "loss": 0.0425, "step": 7069 }, { "epoch": 0.9908899789768746, "grad_norm": 0.20528078079223633, "learning_rate": 0.00020578808897392967, "loss": 0.028, "step": 7070 }, { "epoch": 0.9910301331464612, "grad_norm": 0.27812063694000244, "learning_rate": 0.00020577373834011, "loss": 0.0691, "step": 7071 }, { "epoch": 0.9911702873160476, "grad_norm": 0.2630431652069092, "learning_rate": 0.00020575938770629032, "loss": 0.0486, "step": 7072 }, { "epoch": 0.9913104414856342, "grad_norm": 0.20029127597808838, "learning_rate": 0.0002057450370724707, "loss": 0.0328, "step": 7073 }, { "epoch": 0.9914505956552208, "grad_norm": 0.8080753087997437, "learning_rate": 0.00020573068643865104, "loss": 0.1783, "step": 7074 }, { "epoch": 0.9915907498248073, "grad_norm": 0.3186112940311432, "learning_rate": 0.00020571633580483136, "loss": 0.0359, "step": 7075 }, { "epoch": 0.9917309039943938, "grad_norm": 0.18976891040802002, "learning_rate": 0.0002057019851710117, "loss": 0.0334, "step": 7076 }, { "epoch": 0.9918710581639804, "grad_norm": 0.3050951659679413, "learning_rate": 0.00020568763453719205, "loss": 0.0769, "step": 7077 }, { "epoch": 0.9920112123335669, "grad_norm": 0.42824047803878784, "learning_rate": 0.00020567328390337238, "loss": 0.0829, "step": 7078 }, { "epoch": 0.9921513665031535, "grad_norm": 0.6803753972053528, "learning_rate": 0.0002056589332695527, "loss": 0.1113, "step": 7079 }, { "epoch": 0.9922915206727401, "grad_norm": 0.38260748982429504, "learning_rate": 0.00020564458263573306, "loss": 0.1025, "step": 7080 }, { "epoch": 0.9924316748423265, "grad_norm": 0.20176684856414795, "learning_rate": 0.0002056302320019134, "loss": 0.0237, "step": 7081 }, { "epoch": 0.9925718290119131, "grad_norm": 0.3585328757762909, "learning_rate": 0.00020561588136809372, "loss": 0.0389, "step": 7082 }, { "epoch": 0.9927119831814997, "grad_norm": 0.5209757089614868, "learning_rate": 0.0002056015307342741, "loss": 0.1041, "step": 7083 }, { "epoch": 0.9928521373510862, "grad_norm": 0.45334312319755554, "learning_rate": 0.00020558718010045443, "loss": 0.0695, "step": 7084 }, { "epoch": 0.9929922915206727, "grad_norm": 0.5760122537612915, "learning_rate": 0.00020557282946663476, "loss": 0.1058, "step": 7085 }, { "epoch": 0.9931324456902593, "grad_norm": 0.5242316722869873, "learning_rate": 0.0002055584788328151, "loss": 0.0604, "step": 7086 }, { "epoch": 0.9932725998598458, "grad_norm": 0.382184773683548, "learning_rate": 0.00020554412819899544, "loss": 0.103, "step": 7087 }, { "epoch": 0.9934127540294324, "grad_norm": 0.6734559535980225, "learning_rate": 0.00020552977756517577, "loss": 0.049, "step": 7088 }, { "epoch": 0.9935529081990189, "grad_norm": 0.2894585430622101, "learning_rate": 0.00020551542693135613, "loss": 0.0576, "step": 7089 }, { "epoch": 0.9936930623686054, "grad_norm": 0.6706838607788086, "learning_rate": 0.00020550107629753645, "loss": 0.1325, "step": 7090 }, { "epoch": 0.993833216538192, "grad_norm": 0.33583569526672363, "learning_rate": 0.00020548672566371678, "loss": 0.0576, "step": 7091 }, { "epoch": 0.9939733707077786, "grad_norm": 0.3077923357486725, "learning_rate": 0.00020547237502989714, "loss": 0.0593, "step": 7092 }, { "epoch": 0.9941135248773652, "grad_norm": 0.18344588577747345, "learning_rate": 0.00020545802439607747, "loss": 0.0261, "step": 7093 }, { "epoch": 0.9942536790469516, "grad_norm": 0.7783629894256592, "learning_rate": 0.00020544367376225782, "loss": 0.069, "step": 7094 }, { "epoch": 0.9943938332165382, "grad_norm": 0.23166191577911377, "learning_rate": 0.00020542932312843815, "loss": 0.0495, "step": 7095 }, { "epoch": 0.9945339873861248, "grad_norm": 0.4915904402732849, "learning_rate": 0.0002054149724946185, "loss": 0.1026, "step": 7096 }, { "epoch": 0.9946741415557113, "grad_norm": 0.2845335900783539, "learning_rate": 0.00020540062186079884, "loss": 0.032, "step": 7097 }, { "epoch": 0.9948142957252978, "grad_norm": 1.0460699796676636, "learning_rate": 0.00020538627122697916, "loss": 0.1769, "step": 7098 }, { "epoch": 0.9949544498948844, "grad_norm": 1.540544867515564, "learning_rate": 0.00020537192059315952, "loss": 0.1013, "step": 7099 }, { "epoch": 0.9950946040644709, "grad_norm": 2.2109038829803467, "learning_rate": 0.00020535756995933985, "loss": 0.0651, "step": 7100 }, { "epoch": 0.9952347582340575, "grad_norm": 0.474007785320282, "learning_rate": 0.00020534321932552018, "loss": 0.0673, "step": 7101 }, { "epoch": 0.9953749124036441, "grad_norm": 0.5390128493309021, "learning_rate": 0.00020532886869170053, "loss": 0.1015, "step": 7102 }, { "epoch": 0.9955150665732305, "grad_norm": 0.3510839343070984, "learning_rate": 0.00020531451805788086, "loss": 0.0503, "step": 7103 }, { "epoch": 0.9956552207428171, "grad_norm": 0.755711555480957, "learning_rate": 0.0002053001674240612, "loss": 0.0858, "step": 7104 }, { "epoch": 0.9957953749124037, "grad_norm": 0.8366712927818298, "learning_rate": 0.00020528581679024157, "loss": 0.1422, "step": 7105 }, { "epoch": 0.9959355290819902, "grad_norm": 0.5402758717536926, "learning_rate": 0.0002052714661564219, "loss": 0.0571, "step": 7106 }, { "epoch": 0.9960756832515767, "grad_norm": 0.2994421720504761, "learning_rate": 0.00020525711552260223, "loss": 0.0333, "step": 7107 }, { "epoch": 0.9962158374211633, "grad_norm": 0.31326714158058167, "learning_rate": 0.00020524276488878259, "loss": 0.0439, "step": 7108 }, { "epoch": 0.9963559915907498, "grad_norm": 0.6625999808311462, "learning_rate": 0.00020522841425496291, "loss": 0.0824, "step": 7109 }, { "epoch": 0.9964961457603364, "grad_norm": 0.3427512049674988, "learning_rate": 0.00020521406362114324, "loss": 0.0581, "step": 7110 }, { "epoch": 0.9966362999299229, "grad_norm": 0.48263347148895264, "learning_rate": 0.00020519971298732357, "loss": 0.0888, "step": 7111 }, { "epoch": 0.9967764540995094, "grad_norm": 0.2981701195240021, "learning_rate": 0.00020518536235350393, "loss": 0.099, "step": 7112 }, { "epoch": 0.996916608269096, "grad_norm": 0.4356768727302551, "learning_rate": 0.00020517101171968426, "loss": 0.0602, "step": 7113 }, { "epoch": 0.9970567624386826, "grad_norm": 0.46916916966438293, "learning_rate": 0.00020515666108586458, "loss": 0.0668, "step": 7114 }, { "epoch": 0.9971969166082691, "grad_norm": 0.36121895909309387, "learning_rate": 0.00020514231045204497, "loss": 0.0555, "step": 7115 }, { "epoch": 0.9973370707778556, "grad_norm": 0.38666602969169617, "learning_rate": 0.0002051279598182253, "loss": 0.0372, "step": 7116 }, { "epoch": 0.9974772249474422, "grad_norm": 0.7229727506637573, "learning_rate": 0.00020511360918440562, "loss": 0.1263, "step": 7117 }, { "epoch": 0.9976173791170287, "grad_norm": 0.4332045316696167, "learning_rate": 0.00020509925855058598, "loss": 0.0761, "step": 7118 }, { "epoch": 0.9977575332866153, "grad_norm": 0.3635750412940979, "learning_rate": 0.0002050849079167663, "loss": 0.0424, "step": 7119 }, { "epoch": 0.9978976874562018, "grad_norm": 0.36419880390167236, "learning_rate": 0.00020507055728294664, "loss": 0.0807, "step": 7120 }, { "epoch": 0.9980378416257883, "grad_norm": 0.3582659065723419, "learning_rate": 0.000205056206649127, "loss": 0.0689, "step": 7121 }, { "epoch": 0.9981779957953749, "grad_norm": 0.14960364997386932, "learning_rate": 0.00020504185601530732, "loss": 0.0314, "step": 7122 }, { "epoch": 0.9983181499649615, "grad_norm": 0.4342552721500397, "learning_rate": 0.00020502750538148765, "loss": 0.046, "step": 7123 }, { "epoch": 0.9984583041345481, "grad_norm": 0.37765437364578247, "learning_rate": 0.000205013154747668, "loss": 0.1352, "step": 7124 }, { "epoch": 0.9985984583041345, "grad_norm": 0.8114736676216125, "learning_rate": 0.00020499880411384833, "loss": 0.1503, "step": 7125 }, { "epoch": 0.9987386124737211, "grad_norm": 0.24331362545490265, "learning_rate": 0.0002049844534800287, "loss": 0.0537, "step": 7126 }, { "epoch": 0.9988787666433077, "grad_norm": 0.3869306147098541, "learning_rate": 0.00020497010284620905, "loss": 0.1128, "step": 7127 }, { "epoch": 0.9990189208128942, "grad_norm": 0.7088903784751892, "learning_rate": 0.00020495575221238937, "loss": 0.1281, "step": 7128 }, { "epoch": 0.9991590749824807, "grad_norm": 0.6007456183433533, "learning_rate": 0.0002049414015785697, "loss": 0.1023, "step": 7129 }, { "epoch": 0.9992992291520673, "grad_norm": 0.3305444121360779, "learning_rate": 0.00020492705094475003, "loss": 0.0294, "step": 7130 }, { "epoch": 0.9994393833216538, "grad_norm": 0.21787963807582855, "learning_rate": 0.00020491270031093039, "loss": 0.0501, "step": 7131 }, { "epoch": 0.9995795374912404, "grad_norm": 0.3425024151802063, "learning_rate": 0.00020489834967711071, "loss": 0.0938, "step": 7132 }, { "epoch": 0.9997196916608269, "grad_norm": 1.214779019355774, "learning_rate": 0.00020488399904329104, "loss": 0.1061, "step": 7133 }, { "epoch": 0.9998598458304134, "grad_norm": 0.9670930504798889, "learning_rate": 0.0002048696484094714, "loss": 0.121, "step": 7134 }, { "epoch": 1.0, "grad_norm": 2.462139368057251, "learning_rate": 0.00020485529777565173, "loss": 0.1329, "step": 7135 }, { "epoch": 1.0001401541695865, "grad_norm": 0.2860074043273926, "learning_rate": 0.00020484094714183206, "loss": 0.067, "step": 7136 }, { "epoch": 1.0002803083391731, "grad_norm": 0.5792491436004639, "learning_rate": 0.00020482659650801244, "loss": 0.1233, "step": 7137 }, { "epoch": 1.0004204625087596, "grad_norm": 0.23094545304775238, "learning_rate": 0.00020481224587419277, "loss": 0.0709, "step": 7138 }, { "epoch": 1.0005606166783463, "grad_norm": 0.14148405194282532, "learning_rate": 0.0002047978952403731, "loss": 0.0276, "step": 7139 }, { "epoch": 1.0007007708479327, "grad_norm": 0.20996734499931335, "learning_rate": 0.00020478354460655345, "loss": 0.0311, "step": 7140 }, { "epoch": 1.0008409250175192, "grad_norm": 0.24814346432685852, "learning_rate": 0.00020476919397273378, "loss": 0.0463, "step": 7141 }, { "epoch": 1.0009810791871059, "grad_norm": 0.27665844559669495, "learning_rate": 0.0002047548433389141, "loss": 0.0838, "step": 7142 }, { "epoch": 1.0011212333566923, "grad_norm": 0.3728223145008087, "learning_rate": 0.00020474049270509446, "loss": 0.0543, "step": 7143 }, { "epoch": 1.0012613875262788, "grad_norm": 0.29025015234947205, "learning_rate": 0.0002047261420712748, "loss": 0.0834, "step": 7144 }, { "epoch": 1.0014015416958655, "grad_norm": 0.26596614718437195, "learning_rate": 0.00020471179143745512, "loss": 0.0405, "step": 7145 }, { "epoch": 1.001541695865452, "grad_norm": 0.22693288326263428, "learning_rate": 0.00020469744080363545, "loss": 0.0321, "step": 7146 }, { "epoch": 1.0016818500350386, "grad_norm": 0.46044325828552246, "learning_rate": 0.00020468309016981583, "loss": 0.0746, "step": 7147 }, { "epoch": 1.001822004204625, "grad_norm": 0.28197091817855835, "learning_rate": 0.00020466873953599616, "loss": 0.0357, "step": 7148 }, { "epoch": 1.0019621583742115, "grad_norm": 0.13461196422576904, "learning_rate": 0.0002046543889021765, "loss": 0.0276, "step": 7149 }, { "epoch": 1.0021023125437982, "grad_norm": 0.33018240332603455, "learning_rate": 0.00020464003826835685, "loss": 0.0626, "step": 7150 }, { "epoch": 1.0022424667133847, "grad_norm": 1.112799882888794, "learning_rate": 0.00020462568763453717, "loss": 0.0505, "step": 7151 }, { "epoch": 1.0023826208829714, "grad_norm": 0.3565979301929474, "learning_rate": 0.0002046113370007175, "loss": 0.0413, "step": 7152 }, { "epoch": 1.0025227750525578, "grad_norm": 0.40753549337387085, "learning_rate": 0.00020459698636689786, "loss": 0.0382, "step": 7153 }, { "epoch": 1.0026629292221443, "grad_norm": 0.5862252116203308, "learning_rate": 0.0002045826357330782, "loss": 0.1047, "step": 7154 }, { "epoch": 1.002803083391731, "grad_norm": 0.2554514408111572, "learning_rate": 0.00020456828509925852, "loss": 0.051, "step": 7155 }, { "epoch": 1.0029432375613174, "grad_norm": 0.5357496738433838, "learning_rate": 0.00020455393446543887, "loss": 0.0814, "step": 7156 }, { "epoch": 1.0030833917309039, "grad_norm": 0.2656848728656769, "learning_rate": 0.00020453958383161923, "loss": 0.0194, "step": 7157 }, { "epoch": 1.0032235459004906, "grad_norm": 0.5183130502700806, "learning_rate": 0.00020452523319779956, "loss": 0.0368, "step": 7158 }, { "epoch": 1.003363700070077, "grad_norm": 0.3824182152748108, "learning_rate": 0.0002045108825639799, "loss": 0.1042, "step": 7159 }, { "epoch": 1.0035038542396637, "grad_norm": 0.5910611748695374, "learning_rate": 0.00020449653193016024, "loss": 0.0571, "step": 7160 }, { "epoch": 1.0036440084092502, "grad_norm": 0.3872907757759094, "learning_rate": 0.00020448218129634057, "loss": 0.1088, "step": 7161 }, { "epoch": 1.0037841625788366, "grad_norm": 0.6107721924781799, "learning_rate": 0.00020446783066252092, "loss": 0.0857, "step": 7162 }, { "epoch": 1.0039243167484233, "grad_norm": 0.42903822660446167, "learning_rate": 0.00020445348002870125, "loss": 0.074, "step": 7163 }, { "epoch": 1.0040644709180098, "grad_norm": 0.4483715891838074, "learning_rate": 0.00020443912939488158, "loss": 0.0566, "step": 7164 }, { "epoch": 1.0042046250875964, "grad_norm": 0.25098010897636414, "learning_rate": 0.0002044247787610619, "loss": 0.0272, "step": 7165 }, { "epoch": 1.004344779257183, "grad_norm": 1.1176609992980957, "learning_rate": 0.00020441042812724227, "loss": 0.0824, "step": 7166 }, { "epoch": 1.0044849334267694, "grad_norm": 0.49698013067245483, "learning_rate": 0.0002043960774934226, "loss": 0.0643, "step": 7167 }, { "epoch": 1.004625087596356, "grad_norm": 0.4594554901123047, "learning_rate": 0.00020438172685960292, "loss": 0.0838, "step": 7168 }, { "epoch": 1.0047652417659425, "grad_norm": 0.41447314620018005, "learning_rate": 0.0002043673762257833, "loss": 0.0737, "step": 7169 }, { "epoch": 1.0049053959355292, "grad_norm": 0.46276921033859253, "learning_rate": 0.00020435302559196363, "loss": 0.0711, "step": 7170 }, { "epoch": 1.0050455501051156, "grad_norm": 0.5921447277069092, "learning_rate": 0.00020433867495814396, "loss": 0.0717, "step": 7171 }, { "epoch": 1.005185704274702, "grad_norm": 0.3937142491340637, "learning_rate": 0.00020432432432432432, "loss": 0.0452, "step": 7172 }, { "epoch": 1.0053258584442888, "grad_norm": 0.11307530105113983, "learning_rate": 0.00020430997369050465, "loss": 0.0168, "step": 7173 }, { "epoch": 1.0054660126138752, "grad_norm": 0.7994346618652344, "learning_rate": 0.00020429562305668498, "loss": 0.0775, "step": 7174 }, { "epoch": 1.0056061667834617, "grad_norm": 0.4385305643081665, "learning_rate": 0.00020428127242286533, "loss": 0.115, "step": 7175 }, { "epoch": 1.0057463209530484, "grad_norm": 0.3255448043346405, "learning_rate": 0.00020426692178904566, "loss": 0.0223, "step": 7176 }, { "epoch": 1.0058864751226348, "grad_norm": 0.5701867341995239, "learning_rate": 0.000204252571155226, "loss": 0.0666, "step": 7177 }, { "epoch": 1.0060266292922215, "grad_norm": 0.6423361897468567, "learning_rate": 0.00020423822052140637, "loss": 0.1026, "step": 7178 }, { "epoch": 1.006166783461808, "grad_norm": 0.38494694232940674, "learning_rate": 0.0002042238698875867, "loss": 0.0334, "step": 7179 }, { "epoch": 1.0063069376313944, "grad_norm": 0.6565921902656555, "learning_rate": 0.00020420951925376703, "loss": 0.0849, "step": 7180 }, { "epoch": 1.0064470918009811, "grad_norm": 0.43672704696655273, "learning_rate": 0.00020419516861994736, "loss": 0.1401, "step": 7181 }, { "epoch": 1.0065872459705676, "grad_norm": 0.4526292681694031, "learning_rate": 0.0002041808179861277, "loss": 0.0385, "step": 7182 }, { "epoch": 1.0067274001401543, "grad_norm": 0.22365699708461761, "learning_rate": 0.00020416646735230804, "loss": 0.037, "step": 7183 }, { "epoch": 1.0068675543097407, "grad_norm": 1.9282917976379395, "learning_rate": 0.00020415211671848837, "loss": 0.1219, "step": 7184 }, { "epoch": 1.0070077084793272, "grad_norm": 2.3473336696624756, "learning_rate": 0.00020413776608466872, "loss": 0.3394, "step": 7185 }, { "epoch": 1.0071478626489139, "grad_norm": 0.1598985493183136, "learning_rate": 0.00020412341545084905, "loss": 0.0296, "step": 7186 }, { "epoch": 1.0072880168185003, "grad_norm": 0.2726248800754547, "learning_rate": 0.00020410906481702938, "loss": 0.0383, "step": 7187 }, { "epoch": 1.0074281709880868, "grad_norm": 0.18712691962718964, "learning_rate": 0.00020409471418320974, "loss": 0.0425, "step": 7188 }, { "epoch": 1.0075683251576735, "grad_norm": 0.4840964674949646, "learning_rate": 0.0002040803635493901, "loss": 0.0896, "step": 7189 }, { "epoch": 1.00770847932726, "grad_norm": 0.3585018217563629, "learning_rate": 0.00020406601291557042, "loss": 0.0662, "step": 7190 }, { "epoch": 1.0078486334968466, "grad_norm": 0.3046760559082031, "learning_rate": 0.00020405166228175078, "loss": 0.0494, "step": 7191 }, { "epoch": 1.007988787666433, "grad_norm": 0.2689339816570282, "learning_rate": 0.0002040373116479311, "loss": 0.0578, "step": 7192 }, { "epoch": 1.0081289418360195, "grad_norm": 0.47273269295692444, "learning_rate": 0.00020402296101411143, "loss": 0.124, "step": 7193 }, { "epoch": 1.0082690960056062, "grad_norm": 0.6697574853897095, "learning_rate": 0.0002040086103802918, "loss": 0.0386, "step": 7194 }, { "epoch": 1.0084092501751927, "grad_norm": 0.5874694585800171, "learning_rate": 0.00020399425974647212, "loss": 0.109, "step": 7195 }, { "epoch": 1.0085494043447794, "grad_norm": 0.2584046423435211, "learning_rate": 0.00020397990911265245, "loss": 0.0254, "step": 7196 }, { "epoch": 1.0086895585143658, "grad_norm": 0.2864406704902649, "learning_rate": 0.0002039655584788328, "loss": 0.0323, "step": 7197 }, { "epoch": 1.0088297126839523, "grad_norm": 0.20431135594844818, "learning_rate": 0.00020395120784501313, "loss": 0.0292, "step": 7198 }, { "epoch": 1.008969866853539, "grad_norm": 0.5439653992652893, "learning_rate": 0.00020393685721119346, "loss": 0.057, "step": 7199 }, { "epoch": 1.0091100210231254, "grad_norm": 0.29200273752212524, "learning_rate": 0.0002039225065773738, "loss": 0.0341, "step": 7200 }, { "epoch": 1.0092501751927119, "grad_norm": 0.31157270073890686, "learning_rate": 0.00020390815594355417, "loss": 0.0485, "step": 7201 }, { "epoch": 1.0093903293622986, "grad_norm": 0.17556287348270416, "learning_rate": 0.0002038938053097345, "loss": 0.0326, "step": 7202 }, { "epoch": 1.009530483531885, "grad_norm": 0.510563850402832, "learning_rate": 0.00020387945467591483, "loss": 0.039, "step": 7203 }, { "epoch": 1.0096706377014717, "grad_norm": 0.3272097706794739, "learning_rate": 0.00020386510404209518, "loss": 0.0279, "step": 7204 }, { "epoch": 1.0098107918710582, "grad_norm": 0.694811999797821, "learning_rate": 0.0002038507534082755, "loss": 0.1093, "step": 7205 }, { "epoch": 1.0099509460406446, "grad_norm": 0.16517947614192963, "learning_rate": 0.00020383640277445584, "loss": 0.0274, "step": 7206 }, { "epoch": 1.0100911002102313, "grad_norm": 0.24342204630374908, "learning_rate": 0.0002038220521406362, "loss": 0.0234, "step": 7207 }, { "epoch": 1.0102312543798178, "grad_norm": 0.2100415825843811, "learning_rate": 0.00020380770150681653, "loss": 0.0424, "step": 7208 }, { "epoch": 1.0103714085494044, "grad_norm": 0.2612631916999817, "learning_rate": 0.00020379335087299685, "loss": 0.0428, "step": 7209 }, { "epoch": 1.010511562718991, "grad_norm": 0.15079949796199799, "learning_rate": 0.00020377900023917724, "loss": 0.0111, "step": 7210 }, { "epoch": 1.0106517168885774, "grad_norm": 0.3396081030368805, "learning_rate": 0.00020376464960535757, "loss": 0.0989, "step": 7211 }, { "epoch": 1.010791871058164, "grad_norm": 0.4676767587661743, "learning_rate": 0.0002037502989715379, "loss": 0.0604, "step": 7212 }, { "epoch": 1.0109320252277505, "grad_norm": 0.24942511320114136, "learning_rate": 0.00020373594833771825, "loss": 0.056, "step": 7213 }, { "epoch": 1.0110721793973372, "grad_norm": 0.40445375442504883, "learning_rate": 0.00020372159770389858, "loss": 0.1008, "step": 7214 }, { "epoch": 1.0112123335669236, "grad_norm": 0.3608073890209198, "learning_rate": 0.0002037072470700789, "loss": 0.0372, "step": 7215 }, { "epoch": 1.01135248773651, "grad_norm": 0.15398520231246948, "learning_rate": 0.00020369289643625924, "loss": 0.0171, "step": 7216 }, { "epoch": 1.0114926419060968, "grad_norm": 0.45473670959472656, "learning_rate": 0.0002036785458024396, "loss": 0.0668, "step": 7217 }, { "epoch": 1.0116327960756832, "grad_norm": 0.5309837460517883, "learning_rate": 0.00020366419516861992, "loss": 0.0651, "step": 7218 }, { "epoch": 1.0117729502452697, "grad_norm": 0.7329338192939758, "learning_rate": 0.00020364984453480025, "loss": 0.0932, "step": 7219 }, { "epoch": 1.0119131044148564, "grad_norm": 0.36260029673576355, "learning_rate": 0.0002036354939009806, "loss": 0.0656, "step": 7220 }, { "epoch": 1.0120532585844428, "grad_norm": 0.35101327300071716, "learning_rate": 0.00020362114326716096, "loss": 0.0616, "step": 7221 }, { "epoch": 1.0121934127540295, "grad_norm": 0.43613189458847046, "learning_rate": 0.0002036067926333413, "loss": 0.0398, "step": 7222 }, { "epoch": 1.012333566923616, "grad_norm": 0.478753924369812, "learning_rate": 0.00020359244199952164, "loss": 0.0605, "step": 7223 }, { "epoch": 1.0124737210932024, "grad_norm": 0.07942412048578262, "learning_rate": 0.00020357809136570197, "loss": 0.0104, "step": 7224 }, { "epoch": 1.0126138752627891, "grad_norm": 0.5202214121818542, "learning_rate": 0.0002035637407318823, "loss": 0.0941, "step": 7225 }, { "epoch": 1.0127540294323756, "grad_norm": 0.27747607231140137, "learning_rate": 0.00020354939009806266, "loss": 0.0459, "step": 7226 }, { "epoch": 1.0128941836019623, "grad_norm": 0.27870842814445496, "learning_rate": 0.00020353503946424299, "loss": 0.0239, "step": 7227 }, { "epoch": 1.0130343377715487, "grad_norm": 0.30336645245552063, "learning_rate": 0.00020352068883042331, "loss": 0.01, "step": 7228 }, { "epoch": 1.0131744919411352, "grad_norm": 0.8620482683181763, "learning_rate": 0.00020350633819660367, "loss": 0.0779, "step": 7229 }, { "epoch": 1.0133146461107219, "grad_norm": 0.48967793583869934, "learning_rate": 0.000203491987562784, "loss": 0.0746, "step": 7230 }, { "epoch": 1.0134548002803083, "grad_norm": 0.22333772480487823, "learning_rate": 0.00020347763692896433, "loss": 0.0359, "step": 7231 }, { "epoch": 1.0135949544498948, "grad_norm": 0.35928311944007874, "learning_rate": 0.0002034632862951447, "loss": 0.045, "step": 7232 }, { "epoch": 1.0137351086194815, "grad_norm": 1.6389645338058472, "learning_rate": 0.00020344893566132504, "loss": 0.3991, "step": 7233 }, { "epoch": 1.013875262789068, "grad_norm": 1.3229844570159912, "learning_rate": 0.00020343458502750537, "loss": 0.0578, "step": 7234 }, { "epoch": 1.0140154169586546, "grad_norm": 0.017571115866303444, "learning_rate": 0.0002034202343936857, "loss": 0.002, "step": 7235 }, { "epoch": 1.014155571128241, "grad_norm": 0.42118310928344727, "learning_rate": 0.00020340588375986605, "loss": 0.0795, "step": 7236 }, { "epoch": 1.0142957252978275, "grad_norm": 0.326107919216156, "learning_rate": 0.00020339153312604638, "loss": 0.1243, "step": 7237 }, { "epoch": 1.0144358794674142, "grad_norm": 0.3536466956138611, "learning_rate": 0.0002033771824922267, "loss": 0.0651, "step": 7238 }, { "epoch": 1.0145760336370007, "grad_norm": 0.5534151792526245, "learning_rate": 0.00020336283185840706, "loss": 0.0845, "step": 7239 }, { "epoch": 1.0147161878065873, "grad_norm": 0.2486460953950882, "learning_rate": 0.0002033484812245874, "loss": 0.0369, "step": 7240 }, { "epoch": 1.0148563419761738, "grad_norm": 0.44978901743888855, "learning_rate": 0.00020333413059076772, "loss": 0.0351, "step": 7241 }, { "epoch": 1.0149964961457603, "grad_norm": 0.29931503534317017, "learning_rate": 0.0002033197799569481, "loss": 0.0464, "step": 7242 }, { "epoch": 1.015136650315347, "grad_norm": 0.36510804295539856, "learning_rate": 0.00020330542932312843, "loss": 0.0792, "step": 7243 }, { "epoch": 1.0152768044849334, "grad_norm": 0.4880961775779724, "learning_rate": 0.00020329107868930876, "loss": 0.0824, "step": 7244 }, { "epoch": 1.0154169586545199, "grad_norm": 0.16730909049510956, "learning_rate": 0.00020327672805548912, "loss": 0.031, "step": 7245 }, { "epoch": 1.0155571128241065, "grad_norm": 0.2527766227722168, "learning_rate": 0.00020326237742166944, "loss": 0.0623, "step": 7246 }, { "epoch": 1.015697266993693, "grad_norm": 0.4247931241989136, "learning_rate": 0.00020324802678784977, "loss": 0.0325, "step": 7247 }, { "epoch": 1.0158374211632797, "grad_norm": 0.17874020338058472, "learning_rate": 0.00020323367615403013, "loss": 0.0741, "step": 7248 }, { "epoch": 1.0159775753328661, "grad_norm": 0.22037763893604279, "learning_rate": 0.00020321932552021046, "loss": 0.0345, "step": 7249 }, { "epoch": 1.0161177295024526, "grad_norm": 0.18538211286067963, "learning_rate": 0.00020320497488639079, "loss": 0.0505, "step": 7250 }, { "epoch": 1.0162578836720393, "grad_norm": 0.32006552815437317, "learning_rate": 0.00020319062425257111, "loss": 0.0355, "step": 7251 }, { "epoch": 1.0163980378416257, "grad_norm": 0.20736652612686157, "learning_rate": 0.0002031762736187515, "loss": 0.0389, "step": 7252 }, { "epoch": 1.0165381920112124, "grad_norm": 0.23939959704875946, "learning_rate": 0.00020316192298493183, "loss": 0.0317, "step": 7253 }, { "epoch": 1.0166783461807989, "grad_norm": 0.4261023998260498, "learning_rate": 0.00020314757235111215, "loss": 0.0491, "step": 7254 }, { "epoch": 1.0168185003503853, "grad_norm": 0.320877343416214, "learning_rate": 0.0002031332217172925, "loss": 0.0254, "step": 7255 }, { "epoch": 1.016958654519972, "grad_norm": 0.4134514033794403, "learning_rate": 0.00020311887108347284, "loss": 0.0541, "step": 7256 }, { "epoch": 1.0170988086895585, "grad_norm": 0.1331988424062729, "learning_rate": 0.00020310452044965317, "loss": 0.0114, "step": 7257 }, { "epoch": 1.0172389628591452, "grad_norm": 0.31431692838668823, "learning_rate": 0.00020309016981583352, "loss": 0.0419, "step": 7258 }, { "epoch": 1.0173791170287316, "grad_norm": 0.45364242792129517, "learning_rate": 0.00020307581918201385, "loss": 0.0526, "step": 7259 }, { "epoch": 1.017519271198318, "grad_norm": 0.32927805185317993, "learning_rate": 0.00020306146854819418, "loss": 0.0374, "step": 7260 }, { "epoch": 1.0176594253679048, "grad_norm": 0.23745539784431458, "learning_rate": 0.00020304711791437454, "loss": 0.0373, "step": 7261 }, { "epoch": 1.0177995795374912, "grad_norm": 0.23946061730384827, "learning_rate": 0.00020303276728055486, "loss": 0.0396, "step": 7262 }, { "epoch": 1.0179397337070777, "grad_norm": 0.2508072555065155, "learning_rate": 0.0002030184166467352, "loss": 0.023, "step": 7263 }, { "epoch": 1.0180798878766644, "grad_norm": 0.25330471992492676, "learning_rate": 0.00020300406601291558, "loss": 0.0226, "step": 7264 }, { "epoch": 1.0182200420462508, "grad_norm": 0.6070547103881836, "learning_rate": 0.0002029897153790959, "loss": 0.053, "step": 7265 }, { "epoch": 1.0183601962158375, "grad_norm": 0.4620213508605957, "learning_rate": 0.00020297536474527623, "loss": 0.1009, "step": 7266 }, { "epoch": 1.018500350385424, "grad_norm": 1.4101574420928955, "learning_rate": 0.0002029610141114566, "loss": 0.0963, "step": 7267 }, { "epoch": 1.0186405045550104, "grad_norm": 0.28356462717056274, "learning_rate": 0.00020294666347763692, "loss": 0.033, "step": 7268 }, { "epoch": 1.018780658724597, "grad_norm": 0.09166912734508514, "learning_rate": 0.00020293231284381725, "loss": 0.0133, "step": 7269 }, { "epoch": 1.0189208128941836, "grad_norm": 0.44415149092674255, "learning_rate": 0.00020291796220999757, "loss": 0.074, "step": 7270 }, { "epoch": 1.0190609670637703, "grad_norm": 0.6255483031272888, "learning_rate": 0.00020290361157617793, "loss": 0.1408, "step": 7271 }, { "epoch": 1.0192011212333567, "grad_norm": 0.6920514702796936, "learning_rate": 0.00020288926094235826, "loss": 0.0303, "step": 7272 }, { "epoch": 1.0193412754029432, "grad_norm": 0.35557299852371216, "learning_rate": 0.0002028749103085386, "loss": 0.0479, "step": 7273 }, { "epoch": 1.0194814295725299, "grad_norm": 0.3282272219657898, "learning_rate": 0.00020286055967471897, "loss": 0.0594, "step": 7274 }, { "epoch": 1.0196215837421163, "grad_norm": 0.33518266677856445, "learning_rate": 0.0002028462090408993, "loss": 0.0269, "step": 7275 }, { "epoch": 1.0197617379117028, "grad_norm": 0.24703074991703033, "learning_rate": 0.00020283185840707963, "loss": 0.039, "step": 7276 }, { "epoch": 1.0199018920812895, "grad_norm": 0.48286372423171997, "learning_rate": 0.00020281750777325998, "loss": 0.1049, "step": 7277 }, { "epoch": 1.020042046250876, "grad_norm": 0.6484137773513794, "learning_rate": 0.0002028031571394403, "loss": 0.05, "step": 7278 }, { "epoch": 1.0201822004204626, "grad_norm": 0.8242290616035461, "learning_rate": 0.00020278880650562064, "loss": 0.0527, "step": 7279 }, { "epoch": 1.020322354590049, "grad_norm": 0.30259793996810913, "learning_rate": 0.000202774455871801, "loss": 0.007, "step": 7280 }, { "epoch": 1.0204625087596355, "grad_norm": 0.6221671104431152, "learning_rate": 0.00020276010523798132, "loss": 0.0438, "step": 7281 }, { "epoch": 1.0206026629292222, "grad_norm": 0.5225541591644287, "learning_rate": 0.00020274575460416165, "loss": 0.0313, "step": 7282 }, { "epoch": 1.0207428170988087, "grad_norm": 1.2069557905197144, "learning_rate": 0.000202731403970342, "loss": 0.3316, "step": 7283 }, { "epoch": 1.0208829712683953, "grad_norm": 1.5490013360977173, "learning_rate": 0.00020271705333652236, "loss": 0.0932, "step": 7284 }, { "epoch": 1.0210231254379818, "grad_norm": 2.697406053543091, "learning_rate": 0.0002027027027027027, "loss": 0.3199, "step": 7285 }, { "epoch": 1.0211632796075683, "grad_norm": 0.6773514151573181, "learning_rate": 0.00020268835206888302, "loss": 0.098, "step": 7286 }, { "epoch": 1.021303433777155, "grad_norm": 0.2825588881969452, "learning_rate": 0.00020267400143506338, "loss": 0.055, "step": 7287 }, { "epoch": 1.0214435879467414, "grad_norm": 0.2414739429950714, "learning_rate": 0.0002026596508012437, "loss": 0.0772, "step": 7288 }, { "epoch": 1.0215837421163279, "grad_norm": 0.30025947093963623, "learning_rate": 0.00020264530016742403, "loss": 0.1154, "step": 7289 }, { "epoch": 1.0217238962859145, "grad_norm": 0.2881932854652405, "learning_rate": 0.0002026309495336044, "loss": 0.0444, "step": 7290 }, { "epoch": 1.021864050455501, "grad_norm": 0.2947738766670227, "learning_rate": 0.00020261659889978472, "loss": 0.0352, "step": 7291 }, { "epoch": 1.0220042046250877, "grad_norm": 0.30118581652641296, "learning_rate": 0.00020260224826596505, "loss": 0.0903, "step": 7292 }, { "epoch": 1.0221443587946741, "grad_norm": 0.13735143840312958, "learning_rate": 0.0002025878976321454, "loss": 0.0329, "step": 7293 }, { "epoch": 1.0222845129642606, "grad_norm": 0.31322962045669556, "learning_rate": 0.00020257354699832573, "loss": 0.0957, "step": 7294 }, { "epoch": 1.0224246671338473, "grad_norm": 0.4684290885925293, "learning_rate": 0.00020255919636450606, "loss": 0.1097, "step": 7295 }, { "epoch": 1.0225648213034337, "grad_norm": 0.37178564071655273, "learning_rate": 0.00020254484573068644, "loss": 0.0806, "step": 7296 }, { "epoch": 1.0227049754730204, "grad_norm": 0.3892574906349182, "learning_rate": 0.00020253049509686677, "loss": 0.066, "step": 7297 }, { "epoch": 1.0228451296426069, "grad_norm": 0.3241652846336365, "learning_rate": 0.0002025161444630471, "loss": 0.0801, "step": 7298 }, { "epoch": 1.0229852838121933, "grad_norm": 0.17515529692173004, "learning_rate": 0.00020250179382922745, "loss": 0.0252, "step": 7299 }, { "epoch": 1.02312543798178, "grad_norm": 0.32863670587539673, "learning_rate": 0.00020248744319540778, "loss": 0.0683, "step": 7300 }, { "epoch": 1.0232655921513665, "grad_norm": 0.3024722635746002, "learning_rate": 0.0002024730925615881, "loss": 0.0206, "step": 7301 }, { "epoch": 1.023405746320953, "grad_norm": 0.1940188854932785, "learning_rate": 0.00020245874192776847, "loss": 0.052, "step": 7302 }, { "epoch": 1.0235459004905396, "grad_norm": 0.2718174159526825, "learning_rate": 0.0002024443912939488, "loss": 0.0584, "step": 7303 }, { "epoch": 1.023686054660126, "grad_norm": 0.6966903209686279, "learning_rate": 0.00020243004066012912, "loss": 0.0565, "step": 7304 }, { "epoch": 1.0238262088297128, "grad_norm": 0.45669588446617126, "learning_rate": 0.00020241569002630945, "loss": 0.0405, "step": 7305 }, { "epoch": 1.0239663629992992, "grad_norm": 0.4384971857070923, "learning_rate": 0.00020240133939248984, "loss": 0.1242, "step": 7306 }, { "epoch": 1.0241065171688857, "grad_norm": 0.2861323654651642, "learning_rate": 0.00020238698875867016, "loss": 0.0825, "step": 7307 }, { "epoch": 1.0242466713384724, "grad_norm": 0.29005348682403564, "learning_rate": 0.0002023726381248505, "loss": 0.0257, "step": 7308 }, { "epoch": 1.0243868255080588, "grad_norm": 0.5171800851821899, "learning_rate": 0.00020235828749103085, "loss": 0.0315, "step": 7309 }, { "epoch": 1.0245269796776455, "grad_norm": 0.29901498556137085, "learning_rate": 0.00020234393685721118, "loss": 0.0795, "step": 7310 }, { "epoch": 1.024667133847232, "grad_norm": 0.3842817544937134, "learning_rate": 0.0002023295862233915, "loss": 0.071, "step": 7311 }, { "epoch": 1.0248072880168184, "grad_norm": 0.3723081648349762, "learning_rate": 0.00020231523558957186, "loss": 0.0676, "step": 7312 }, { "epoch": 1.024947442186405, "grad_norm": 0.4442616105079651, "learning_rate": 0.0002023008849557522, "loss": 0.0299, "step": 7313 }, { "epoch": 1.0250875963559916, "grad_norm": 0.22426271438598633, "learning_rate": 0.00020228653432193252, "loss": 0.0996, "step": 7314 }, { "epoch": 1.0252277505255782, "grad_norm": 0.5538390874862671, "learning_rate": 0.00020227218368811287, "loss": 0.073, "step": 7315 }, { "epoch": 1.0253679046951647, "grad_norm": 0.29517579078674316, "learning_rate": 0.00020225783305429323, "loss": 0.0324, "step": 7316 }, { "epoch": 1.0255080588647512, "grad_norm": 0.09712222218513489, "learning_rate": 0.00020224348242047356, "loss": 0.0083, "step": 7317 }, { "epoch": 1.0256482130343378, "grad_norm": 0.25485730171203613, "learning_rate": 0.00020222913178665391, "loss": 0.0437, "step": 7318 }, { "epoch": 1.0257883672039243, "grad_norm": 0.34867939352989197, "learning_rate": 0.00020221478115283424, "loss": 0.0956, "step": 7319 }, { "epoch": 1.0259285213735108, "grad_norm": 0.1582653522491455, "learning_rate": 0.00020220043051901457, "loss": 0.028, "step": 7320 }, { "epoch": 1.0260686755430974, "grad_norm": 0.6382333040237427, "learning_rate": 0.0002021860798851949, "loss": 0.1231, "step": 7321 }, { "epoch": 1.026208829712684, "grad_norm": 0.21647104620933533, "learning_rate": 0.00020217172925137526, "loss": 0.0448, "step": 7322 }, { "epoch": 1.0263489838822706, "grad_norm": 0.2099693864583969, "learning_rate": 0.00020215737861755558, "loss": 0.0378, "step": 7323 }, { "epoch": 1.026489138051857, "grad_norm": 0.09777022153139114, "learning_rate": 0.0002021430279837359, "loss": 0.01, "step": 7324 }, { "epoch": 1.0266292922214435, "grad_norm": 0.40246543288230896, "learning_rate": 0.00020212867734991627, "loss": 0.0796, "step": 7325 }, { "epoch": 1.0267694463910302, "grad_norm": 0.6252508163452148, "learning_rate": 0.0002021143267160966, "loss": 0.0522, "step": 7326 }, { "epoch": 1.0269096005606166, "grad_norm": 0.4237343370914459, "learning_rate": 0.00020209997608227693, "loss": 0.0805, "step": 7327 }, { "epoch": 1.0270497547302033, "grad_norm": 0.3577063977718353, "learning_rate": 0.0002020856254484573, "loss": 0.0744, "step": 7328 }, { "epoch": 1.0271899088997898, "grad_norm": 0.33011043071746826, "learning_rate": 0.00020207127481463764, "loss": 0.0332, "step": 7329 }, { "epoch": 1.0273300630693762, "grad_norm": 0.24332870543003082, "learning_rate": 0.00020205692418081797, "loss": 0.1144, "step": 7330 }, { "epoch": 1.027470217238963, "grad_norm": 0.35399290919303894, "learning_rate": 0.00020204257354699832, "loss": 0.0396, "step": 7331 }, { "epoch": 1.0276103714085494, "grad_norm": 0.8833919763565063, "learning_rate": 0.00020202822291317865, "loss": 0.2596, "step": 7332 }, { "epoch": 1.0277505255781358, "grad_norm": 0.24139781296253204, "learning_rate": 0.00020201387227935898, "loss": 0.0489, "step": 7333 }, { "epoch": 1.0278906797477225, "grad_norm": 0.7945934534072876, "learning_rate": 0.00020199952164553933, "loss": 0.1718, "step": 7334 }, { "epoch": 1.028030833917309, "grad_norm": 0.1805168241262436, "learning_rate": 0.00020198517101171966, "loss": 0.0098, "step": 7335 }, { "epoch": 1.0281709880868957, "grad_norm": 0.36715954542160034, "learning_rate": 0.0002019708203779, "loss": 0.0773, "step": 7336 }, { "epoch": 1.0283111422564821, "grad_norm": 0.3491787016391754, "learning_rate": 0.00020195646974408032, "loss": 0.0584, "step": 7337 }, { "epoch": 1.0284512964260686, "grad_norm": 0.31847110390663147, "learning_rate": 0.0002019421191102607, "loss": 0.0277, "step": 7338 }, { "epoch": 1.0285914505956553, "grad_norm": 0.28226909041404724, "learning_rate": 0.00020192776847644103, "loss": 0.0372, "step": 7339 }, { "epoch": 1.0287316047652417, "grad_norm": 0.16133855283260345, "learning_rate": 0.00020191341784262136, "loss": 0.0221, "step": 7340 }, { "epoch": 1.0288717589348284, "grad_norm": 0.5553686022758484, "learning_rate": 0.00020189906720880172, "loss": 0.0458, "step": 7341 }, { "epoch": 1.0290119131044149, "grad_norm": 0.1868080198764801, "learning_rate": 0.00020188471657498204, "loss": 0.042, "step": 7342 }, { "epoch": 1.0291520672740013, "grad_norm": 0.21590116620063782, "learning_rate": 0.00020187036594116237, "loss": 0.0342, "step": 7343 }, { "epoch": 1.029292221443588, "grad_norm": 0.24588674306869507, "learning_rate": 0.00020185601530734273, "loss": 0.0461, "step": 7344 }, { "epoch": 1.0294323756131745, "grad_norm": 0.16774991154670715, "learning_rate": 0.00020184166467352306, "loss": 0.0422, "step": 7345 }, { "epoch": 1.0295725297827611, "grad_norm": 0.7636731863021851, "learning_rate": 0.00020182731403970338, "loss": 0.0929, "step": 7346 }, { "epoch": 1.0297126839523476, "grad_norm": 0.2616930305957794, "learning_rate": 0.00020181296340588374, "loss": 0.0664, "step": 7347 }, { "epoch": 1.029852838121934, "grad_norm": 0.2908239960670471, "learning_rate": 0.0002017986127720641, "loss": 0.0414, "step": 7348 }, { "epoch": 1.0299929922915207, "grad_norm": 0.33808261156082153, "learning_rate": 0.00020178426213824442, "loss": 0.0454, "step": 7349 }, { "epoch": 1.0301331464611072, "grad_norm": 0.18225722014904022, "learning_rate": 0.00020176991150442478, "loss": 0.0377, "step": 7350 }, { "epoch": 1.0302733006306937, "grad_norm": 0.48944491147994995, "learning_rate": 0.0002017555608706051, "loss": 0.0623, "step": 7351 }, { "epoch": 1.0304134548002803, "grad_norm": 0.5488277673721313, "learning_rate": 0.00020174121023678544, "loss": 0.0939, "step": 7352 }, { "epoch": 1.0305536089698668, "grad_norm": 0.24379444122314453, "learning_rate": 0.0002017268596029658, "loss": 0.0596, "step": 7353 }, { "epoch": 1.0306937631394535, "grad_norm": 0.1473238617181778, "learning_rate": 0.00020171250896914612, "loss": 0.0227, "step": 7354 }, { "epoch": 1.03083391730904, "grad_norm": 0.5108892321586609, "learning_rate": 0.00020169815833532645, "loss": 0.0812, "step": 7355 }, { "epoch": 1.0309740714786264, "grad_norm": 0.7706103324890137, "learning_rate": 0.00020168380770150678, "loss": 0.0395, "step": 7356 }, { "epoch": 1.031114225648213, "grad_norm": 0.1537996381521225, "learning_rate": 0.00020166945706768713, "loss": 0.0344, "step": 7357 }, { "epoch": 1.0312543798177995, "grad_norm": 0.4848909378051758, "learning_rate": 0.00020165510643386746, "loss": 0.0572, "step": 7358 }, { "epoch": 1.0313945339873862, "grad_norm": 0.14417120814323425, "learning_rate": 0.0002016407558000478, "loss": 0.014, "step": 7359 }, { "epoch": 1.0315346881569727, "grad_norm": 0.16109015047550201, "learning_rate": 0.00020162640516622817, "loss": 0.0184, "step": 7360 }, { "epoch": 1.0316748423265591, "grad_norm": 0.39149004220962524, "learning_rate": 0.0002016120545324085, "loss": 0.0537, "step": 7361 }, { "epoch": 1.0318149964961458, "grad_norm": 0.47483861446380615, "learning_rate": 0.00020159770389858883, "loss": 0.0803, "step": 7362 }, { "epoch": 1.0319551506657323, "grad_norm": 1.0141165256500244, "learning_rate": 0.0002015833532647692, "loss": 0.0359, "step": 7363 }, { "epoch": 1.0320953048353187, "grad_norm": 0.19929462671279907, "learning_rate": 0.00020156900263094952, "loss": 0.019, "step": 7364 }, { "epoch": 1.0322354590049054, "grad_norm": 0.24317246675491333, "learning_rate": 0.00020155465199712984, "loss": 0.0098, "step": 7365 }, { "epoch": 1.032375613174492, "grad_norm": 0.222087100148201, "learning_rate": 0.0002015403013633102, "loss": 0.0527, "step": 7366 }, { "epoch": 1.0325157673440786, "grad_norm": 0.2788246273994446, "learning_rate": 0.00020152595072949053, "loss": 0.0519, "step": 7367 }, { "epoch": 1.032655921513665, "grad_norm": 0.585625410079956, "learning_rate": 0.00020151160009567086, "loss": 0.0424, "step": 7368 }, { "epoch": 1.0327960756832515, "grad_norm": 0.24283930659294128, "learning_rate": 0.00020149724946185124, "loss": 0.0479, "step": 7369 }, { "epoch": 1.0329362298528382, "grad_norm": 0.10255005955696106, "learning_rate": 0.00020148289882803157, "loss": 0.0116, "step": 7370 }, { "epoch": 1.0330763840224246, "grad_norm": 0.30152273178100586, "learning_rate": 0.0002014685481942119, "loss": 0.0322, "step": 7371 }, { "epoch": 1.0332165381920113, "grad_norm": 0.20395119488239288, "learning_rate": 0.00020145419756039223, "loss": 0.0074, "step": 7372 }, { "epoch": 1.0333566923615978, "grad_norm": 0.15507175028324127, "learning_rate": 0.00020143984692657258, "loss": 0.052, "step": 7373 }, { "epoch": 1.0334968465311842, "grad_norm": 0.280568927526474, "learning_rate": 0.0002014254962927529, "loss": 0.0194, "step": 7374 }, { "epoch": 1.033637000700771, "grad_norm": 0.3317860960960388, "learning_rate": 0.00020141114565893324, "loss": 0.0712, "step": 7375 }, { "epoch": 1.0337771548703574, "grad_norm": 0.8176124691963196, "learning_rate": 0.0002013967950251136, "loss": 0.0963, "step": 7376 }, { "epoch": 1.0339173090399438, "grad_norm": 0.567148745059967, "learning_rate": 0.00020138244439129392, "loss": 0.0518, "step": 7377 }, { "epoch": 1.0340574632095305, "grad_norm": 1.0496882200241089, "learning_rate": 0.00020136809375747425, "loss": 0.1916, "step": 7378 }, { "epoch": 1.034197617379117, "grad_norm": 0.4502559304237366, "learning_rate": 0.00020135374312365463, "loss": 0.0674, "step": 7379 }, { "epoch": 1.0343377715487037, "grad_norm": 0.3167615234851837, "learning_rate": 0.00020133939248983496, "loss": 0.0424, "step": 7380 }, { "epoch": 1.0344779257182901, "grad_norm": 0.6294723749160767, "learning_rate": 0.0002013250418560153, "loss": 0.1329, "step": 7381 }, { "epoch": 1.0346180798878766, "grad_norm": 0.5803903937339783, "learning_rate": 0.00020131069122219565, "loss": 0.1641, "step": 7382 }, { "epoch": 1.0347582340574633, "grad_norm": 0.2547217905521393, "learning_rate": 0.00020129634058837598, "loss": 0.0563, "step": 7383 }, { "epoch": 1.0348983882270497, "grad_norm": 1.5837944746017456, "learning_rate": 0.0002012819899545563, "loss": 0.1091, "step": 7384 }, { "epoch": 1.0350385423966364, "grad_norm": 0.710404098033905, "learning_rate": 0.00020126763932073666, "loss": 0.0234, "step": 7385 }, { "epoch": 1.0351786965662229, "grad_norm": 0.17221421003341675, "learning_rate": 0.000201253288686917, "loss": 0.036, "step": 7386 }, { "epoch": 1.0353188507358093, "grad_norm": 0.25372564792633057, "learning_rate": 0.00020123893805309732, "loss": 0.0753, "step": 7387 }, { "epoch": 1.035459004905396, "grad_norm": 0.23701223731040955, "learning_rate": 0.00020122458741927767, "loss": 0.0665, "step": 7388 }, { "epoch": 1.0355991590749825, "grad_norm": 0.6436792016029358, "learning_rate": 0.000201210236785458, "loss": 0.0431, "step": 7389 }, { "epoch": 1.035739313244569, "grad_norm": 0.5409477949142456, "learning_rate": 0.00020119588615163833, "loss": 0.0562, "step": 7390 }, { "epoch": 1.0358794674141556, "grad_norm": 0.30708518624305725, "learning_rate": 0.00020118153551781866, "loss": 0.0699, "step": 7391 }, { "epoch": 1.036019621583742, "grad_norm": 0.6640605926513672, "learning_rate": 0.00020116718488399904, "loss": 0.0452, "step": 7392 }, { "epoch": 1.0361597757533287, "grad_norm": 0.2592013478279114, "learning_rate": 0.00020115283425017937, "loss": 0.0622, "step": 7393 }, { "epoch": 1.0362999299229152, "grad_norm": 0.24011889100074768, "learning_rate": 0.0002011384836163597, "loss": 0.0615, "step": 7394 }, { "epoch": 1.0364400840925017, "grad_norm": 0.5006332993507385, "learning_rate": 0.00020112413298254005, "loss": 0.0984, "step": 7395 }, { "epoch": 1.0365802382620883, "grad_norm": 0.3818579614162445, "learning_rate": 0.00020110978234872038, "loss": 0.035, "step": 7396 }, { "epoch": 1.0367203924316748, "grad_norm": 0.1948472559452057, "learning_rate": 0.0002010954317149007, "loss": 0.0164, "step": 7397 }, { "epoch": 1.0368605466012615, "grad_norm": 0.13681766390800476, "learning_rate": 0.00020108108108108107, "loss": 0.0277, "step": 7398 }, { "epoch": 1.037000700770848, "grad_norm": 0.8447391986846924, "learning_rate": 0.0002010667304472614, "loss": 0.1062, "step": 7399 }, { "epoch": 1.0371408549404344, "grad_norm": 0.27277272939682007, "learning_rate": 0.00020105237981344172, "loss": 0.0447, "step": 7400 }, { "epoch": 1.037281009110021, "grad_norm": 0.3555588126182556, "learning_rate": 0.0002010380291796221, "loss": 0.0409, "step": 7401 }, { "epoch": 1.0374211632796075, "grad_norm": 0.3866293430328369, "learning_rate": 0.00020102367854580243, "loss": 0.0483, "step": 7402 }, { "epoch": 1.0375613174491942, "grad_norm": 0.5190638899803162, "learning_rate": 0.00020100932791198276, "loss": 0.1141, "step": 7403 }, { "epoch": 1.0377014716187807, "grad_norm": 0.22311319410800934, "learning_rate": 0.00020099497727816312, "loss": 0.0418, "step": 7404 }, { "epoch": 1.0378416257883671, "grad_norm": 0.27752485871315, "learning_rate": 0.00020098062664434345, "loss": 0.0393, "step": 7405 }, { "epoch": 1.0379817799579538, "grad_norm": 0.4159800112247467, "learning_rate": 0.00020096627601052378, "loss": 0.0438, "step": 7406 }, { "epoch": 1.0381219341275403, "grad_norm": 0.4560358226299286, "learning_rate": 0.0002009519253767041, "loss": 0.0693, "step": 7407 }, { "epoch": 1.0382620882971267, "grad_norm": 0.287677139043808, "learning_rate": 0.00020093757474288446, "loss": 0.0357, "step": 7408 }, { "epoch": 1.0384022424667134, "grad_norm": 0.4302176833152771, "learning_rate": 0.0002009232241090648, "loss": 0.0584, "step": 7409 }, { "epoch": 1.0385423966362999, "grad_norm": 0.2120569497346878, "learning_rate": 0.00020090887347524512, "loss": 0.0721, "step": 7410 }, { "epoch": 1.0386825508058866, "grad_norm": 0.4541984498500824, "learning_rate": 0.0002008945228414255, "loss": 0.0481, "step": 7411 }, { "epoch": 1.038822704975473, "grad_norm": 0.5624955296516418, "learning_rate": 0.00020088017220760583, "loss": 0.0586, "step": 7412 }, { "epoch": 1.0389628591450595, "grad_norm": 0.3735843896865845, "learning_rate": 0.00020086582157378616, "loss": 0.0749, "step": 7413 }, { "epoch": 1.0391030133146462, "grad_norm": 0.2377900779247284, "learning_rate": 0.0002008514709399665, "loss": 0.0463, "step": 7414 }, { "epoch": 1.0392431674842326, "grad_norm": 0.3517459034919739, "learning_rate": 0.00020083712030614684, "loss": 0.031, "step": 7415 }, { "epoch": 1.0393833216538193, "grad_norm": 0.3057696521282196, "learning_rate": 0.00020082276967232717, "loss": 0.0302, "step": 7416 }, { "epoch": 1.0395234758234058, "grad_norm": 0.2454061508178711, "learning_rate": 0.00020080841903850753, "loss": 0.0635, "step": 7417 }, { "epoch": 1.0396636299929922, "grad_norm": 0.15972313284873962, "learning_rate": 0.00020079406840468785, "loss": 0.041, "step": 7418 }, { "epoch": 1.039803784162579, "grad_norm": 0.6719050407409668, "learning_rate": 0.00020077971777086818, "loss": 0.0479, "step": 7419 }, { "epoch": 1.0399439383321654, "grad_norm": 0.14499905705451965, "learning_rate": 0.00020076536713704854, "loss": 0.0142, "step": 7420 }, { "epoch": 1.0400840925017518, "grad_norm": 0.50604248046875, "learning_rate": 0.00020075101650322887, "loss": 0.1007, "step": 7421 }, { "epoch": 1.0402242466713385, "grad_norm": 0.11758874356746674, "learning_rate": 0.0002007366658694092, "loss": 0.0109, "step": 7422 }, { "epoch": 1.040364400840925, "grad_norm": 0.40569648146629333, "learning_rate": 0.00020072231523558958, "loss": 0.0868, "step": 7423 }, { "epoch": 1.0405045550105116, "grad_norm": 0.6281216144561768, "learning_rate": 0.0002007079646017699, "loss": 0.0727, "step": 7424 }, { "epoch": 1.040644709180098, "grad_norm": 0.7099785804748535, "learning_rate": 0.00020069361396795024, "loss": 0.0389, "step": 7425 }, { "epoch": 1.0407848633496846, "grad_norm": 0.23996074497699738, "learning_rate": 0.00020067926333413056, "loss": 0.0507, "step": 7426 }, { "epoch": 1.0409250175192712, "grad_norm": 0.8768213987350464, "learning_rate": 0.00020066491270031092, "loss": 0.0368, "step": 7427 }, { "epoch": 1.0410651716888577, "grad_norm": 0.3884669244289398, "learning_rate": 0.00020065056206649125, "loss": 0.0371, "step": 7428 }, { "epoch": 1.0412053258584444, "grad_norm": 0.20927593111991882, "learning_rate": 0.00020063621143267158, "loss": 0.0215, "step": 7429 }, { "epoch": 1.0413454800280308, "grad_norm": 1.5984781980514526, "learning_rate": 0.00020062186079885193, "loss": 0.1222, "step": 7430 }, { "epoch": 1.0414856341976173, "grad_norm": 0.46438372135162354, "learning_rate": 0.00020060751016503226, "loss": 0.0608, "step": 7431 }, { "epoch": 1.041625788367204, "grad_norm": 1.4514050483703613, "learning_rate": 0.0002005931595312126, "loss": 0.3426, "step": 7432 }, { "epoch": 1.0417659425367904, "grad_norm": 0.8924177885055542, "learning_rate": 0.00020057880889739297, "loss": 0.1756, "step": 7433 }, { "epoch": 1.0419060967063771, "grad_norm": 0.2706003189086914, "learning_rate": 0.0002005644582635733, "loss": 0.0963, "step": 7434 }, { "epoch": 1.0420462508759636, "grad_norm": 0.4359434247016907, "learning_rate": 0.00020055010762975363, "loss": 0.0721, "step": 7435 }, { "epoch": 1.04218640504555, "grad_norm": 0.3326599895954132, "learning_rate": 0.00020053575699593399, "loss": 0.062, "step": 7436 }, { "epoch": 1.0423265592151367, "grad_norm": 0.3369787931442261, "learning_rate": 0.00020052140636211431, "loss": 0.057, "step": 7437 }, { "epoch": 1.0424667133847232, "grad_norm": 0.3704361021518707, "learning_rate": 0.00020050705572829464, "loss": 0.0996, "step": 7438 }, { "epoch": 1.0426068675543096, "grad_norm": 0.41580623388290405, "learning_rate": 0.000200492705094475, "loss": 0.0671, "step": 7439 }, { "epoch": 1.0427470217238963, "grad_norm": 0.1372150480747223, "learning_rate": 0.00020047835446065533, "loss": 0.0289, "step": 7440 }, { "epoch": 1.0428871758934828, "grad_norm": 0.26816439628601074, "learning_rate": 0.00020046400382683566, "loss": 0.0315, "step": 7441 }, { "epoch": 1.0430273300630695, "grad_norm": 1.0764726400375366, "learning_rate": 0.00020044965319301598, "loss": 0.0418, "step": 7442 }, { "epoch": 1.043167484232656, "grad_norm": 0.1662338525056839, "learning_rate": 0.00020043530255919637, "loss": 0.0161, "step": 7443 }, { "epoch": 1.0433076384022424, "grad_norm": 0.4620538651943207, "learning_rate": 0.0002004209519253767, "loss": 0.0826, "step": 7444 }, { "epoch": 1.043447792571829, "grad_norm": 0.3773845136165619, "learning_rate": 0.00020040660129155702, "loss": 0.0507, "step": 7445 }, { "epoch": 1.0435879467414155, "grad_norm": 0.20116060972213745, "learning_rate": 0.00020039225065773738, "loss": 0.0558, "step": 7446 }, { "epoch": 1.043728100911002, "grad_norm": 0.5692968964576721, "learning_rate": 0.0002003779000239177, "loss": 0.0814, "step": 7447 }, { "epoch": 1.0438682550805887, "grad_norm": 0.44019076228141785, "learning_rate": 0.00020036354939009804, "loss": 0.0509, "step": 7448 }, { "epoch": 1.0440084092501751, "grad_norm": 0.9188428521156311, "learning_rate": 0.0002003491987562784, "loss": 0.1231, "step": 7449 }, { "epoch": 1.0441485634197618, "grad_norm": 0.4651030898094177, "learning_rate": 0.00020033484812245872, "loss": 0.1281, "step": 7450 }, { "epoch": 1.0442887175893483, "grad_norm": 0.3135788142681122, "learning_rate": 0.00020032049748863905, "loss": 0.0541, "step": 7451 }, { "epoch": 1.0444288717589347, "grad_norm": 0.7074897289276123, "learning_rate": 0.0002003061468548194, "loss": 0.0506, "step": 7452 }, { "epoch": 1.0445690259285214, "grad_norm": 0.625097930431366, "learning_rate": 0.00020029179622099973, "loss": 0.044, "step": 7453 }, { "epoch": 1.0447091800981079, "grad_norm": 0.18113072216510773, "learning_rate": 0.00020027744558718006, "loss": 0.0126, "step": 7454 }, { "epoch": 1.0448493342676946, "grad_norm": 0.20055146515369415, "learning_rate": 0.00020026309495336044, "loss": 0.0405, "step": 7455 }, { "epoch": 1.044989488437281, "grad_norm": 0.3123377859592438, "learning_rate": 0.00020024874431954077, "loss": 0.0427, "step": 7456 }, { "epoch": 1.0451296426068675, "grad_norm": 0.16642262041568756, "learning_rate": 0.0002002343936857211, "loss": 0.0177, "step": 7457 }, { "epoch": 1.0452697967764542, "grad_norm": 0.46752092242240906, "learning_rate": 0.00020022004305190146, "loss": 0.0661, "step": 7458 }, { "epoch": 1.0454099509460406, "grad_norm": 0.1605590581893921, "learning_rate": 0.00020020569241808179, "loss": 0.0173, "step": 7459 }, { "epoch": 1.0455501051156273, "grad_norm": 0.3142889738082886, "learning_rate": 0.00020019134178426211, "loss": 0.0781, "step": 7460 }, { "epoch": 1.0456902592852138, "grad_norm": 0.33982208371162415, "learning_rate": 0.00020017699115044244, "loss": 0.0686, "step": 7461 }, { "epoch": 1.0458304134548002, "grad_norm": 0.3422391414642334, "learning_rate": 0.0002001626405166228, "loss": 0.0231, "step": 7462 }, { "epoch": 1.045970567624387, "grad_norm": 0.3262426257133484, "learning_rate": 0.00020014828988280313, "loss": 0.0301, "step": 7463 }, { "epoch": 1.0461107217939734, "grad_norm": 0.3322065770626068, "learning_rate": 0.00020013393924898346, "loss": 0.0195, "step": 7464 }, { "epoch": 1.0462508759635598, "grad_norm": 0.2396252453327179, "learning_rate": 0.00020011958861516384, "loss": 0.037, "step": 7465 }, { "epoch": 1.0463910301331465, "grad_norm": 0.43130582571029663, "learning_rate": 0.00020010523798134417, "loss": 0.1024, "step": 7466 }, { "epoch": 1.046531184302733, "grad_norm": 0.25824931263923645, "learning_rate": 0.0002000908873475245, "loss": 0.038, "step": 7467 }, { "epoch": 1.0466713384723196, "grad_norm": 0.3817158639431, "learning_rate": 0.00020007653671370485, "loss": 0.0726, "step": 7468 }, { "epoch": 1.046811492641906, "grad_norm": 0.343127578496933, "learning_rate": 0.00020006218607988518, "loss": 0.0389, "step": 7469 }, { "epoch": 1.0469516468114926, "grad_norm": 0.2909266948699951, "learning_rate": 0.0002000478354460655, "loss": 0.0344, "step": 7470 }, { "epoch": 1.0470918009810792, "grad_norm": 0.5257437825202942, "learning_rate": 0.00020003348481224586, "loss": 0.0377, "step": 7471 }, { "epoch": 1.0472319551506657, "grad_norm": 0.2117527425289154, "learning_rate": 0.0002000191341784262, "loss": 0.0222, "step": 7472 }, { "epoch": 1.0473721093202524, "grad_norm": 0.5073584318161011, "learning_rate": 0.00020000478354460652, "loss": 0.0679, "step": 7473 }, { "epoch": 1.0475122634898388, "grad_norm": 0.38086339831352234, "learning_rate": 0.0001999904329107869, "loss": 0.0707, "step": 7474 }, { "epoch": 1.0476524176594253, "grad_norm": 0.49791890382766724, "learning_rate": 0.00019997608227696723, "loss": 0.0547, "step": 7475 }, { "epoch": 1.047792571829012, "grad_norm": 0.6414896845817566, "learning_rate": 0.00019996173164314756, "loss": 0.0596, "step": 7476 }, { "epoch": 1.0479327259985984, "grad_norm": 0.4338603913784027, "learning_rate": 0.0001999473810093279, "loss": 0.0865, "step": 7477 }, { "epoch": 1.048072880168185, "grad_norm": 0.27509358525276184, "learning_rate": 0.00019993303037550825, "loss": 0.0128, "step": 7478 }, { "epoch": 1.0482130343377716, "grad_norm": 0.3782193660736084, "learning_rate": 0.00019991867974168857, "loss": 0.0641, "step": 7479 }, { "epoch": 1.048353188507358, "grad_norm": 0.2564517557621002, "learning_rate": 0.0001999043291078689, "loss": 0.0288, "step": 7480 }, { "epoch": 1.0484933426769447, "grad_norm": 1.3186359405517578, "learning_rate": 0.00019988997847404926, "loss": 0.2088, "step": 7481 }, { "epoch": 1.0486334968465312, "grad_norm": 0.845447301864624, "learning_rate": 0.0001998756278402296, "loss": 0.2101, "step": 7482 }, { "epoch": 1.0487736510161176, "grad_norm": 0.6468475461006165, "learning_rate": 0.00019986127720640992, "loss": 0.1327, "step": 7483 }, { "epoch": 1.0489138051857043, "grad_norm": 0.4572700262069702, "learning_rate": 0.00019984692657259027, "loss": 0.1116, "step": 7484 }, { "epoch": 1.0490539593552908, "grad_norm": 1.019961953163147, "learning_rate": 0.0001998325759387706, "loss": 0.1179, "step": 7485 }, { "epoch": 1.0491941135248775, "grad_norm": 0.49574682116508484, "learning_rate": 0.00019981822530495093, "loss": 0.0721, "step": 7486 }, { "epoch": 1.049334267694464, "grad_norm": 0.23519475758075714, "learning_rate": 0.0001998038746711313, "loss": 0.0593, "step": 7487 }, { "epoch": 1.0494744218640504, "grad_norm": 0.3164590299129486, "learning_rate": 0.00019978952403731164, "loss": 0.0543, "step": 7488 }, { "epoch": 1.049614576033637, "grad_norm": 0.46372711658477783, "learning_rate": 0.00019977517340349197, "loss": 0.0745, "step": 7489 }, { "epoch": 1.0497547302032235, "grad_norm": 0.26876315474510193, "learning_rate": 0.00019976082276967232, "loss": 0.0449, "step": 7490 }, { "epoch": 1.0498948843728102, "grad_norm": 0.2763366997241974, "learning_rate": 0.00019974647213585265, "loss": 0.0854, "step": 7491 }, { "epoch": 1.0500350385423967, "grad_norm": 0.4557197093963623, "learning_rate": 0.00019973212150203298, "loss": 0.0333, "step": 7492 }, { "epoch": 1.0501751927119831, "grad_norm": 0.41645553708076477, "learning_rate": 0.00019971777086821334, "loss": 0.0405, "step": 7493 }, { "epoch": 1.0503153468815698, "grad_norm": 0.43728384375572205, "learning_rate": 0.00019970342023439367, "loss": 0.1349, "step": 7494 }, { "epoch": 1.0504555010511563, "grad_norm": 0.48241326212882996, "learning_rate": 0.000199689069600574, "loss": 0.0746, "step": 7495 }, { "epoch": 1.0505956552207427, "grad_norm": 0.5028722286224365, "learning_rate": 0.00019967471896675432, "loss": 0.1016, "step": 7496 }, { "epoch": 1.0507358093903294, "grad_norm": 0.22248029708862305, "learning_rate": 0.0001996603683329347, "loss": 0.0455, "step": 7497 }, { "epoch": 1.0508759635599159, "grad_norm": 0.8588648438453674, "learning_rate": 0.00019964601769911503, "loss": 0.0895, "step": 7498 }, { "epoch": 1.0510161177295025, "grad_norm": 0.23754124343395233, "learning_rate": 0.00019963166706529536, "loss": 0.0526, "step": 7499 }, { "epoch": 1.051156271899089, "grad_norm": 0.7451086640357971, "learning_rate": 0.00019961731643147572, "loss": 0.1106, "step": 7500 }, { "epoch": 1.0512964260686755, "grad_norm": 0.2748192548751831, "learning_rate": 0.00019960296579765605, "loss": 0.0362, "step": 7501 }, { "epoch": 1.0514365802382621, "grad_norm": 0.21557137370109558, "learning_rate": 0.00019958861516383638, "loss": 0.0248, "step": 7502 }, { "epoch": 1.0515767344078486, "grad_norm": 0.4813844859600067, "learning_rate": 0.00019957426453001673, "loss": 0.0718, "step": 7503 }, { "epoch": 1.0517168885774353, "grad_norm": 0.32053476572036743, "learning_rate": 0.00019955991389619706, "loss": 0.0413, "step": 7504 }, { "epoch": 1.0518570427470217, "grad_norm": 0.40857240557670593, "learning_rate": 0.0001995455632623774, "loss": 0.0341, "step": 7505 }, { "epoch": 1.0519971969166082, "grad_norm": 0.3459523916244507, "learning_rate": 0.00019953121262855777, "loss": 0.0734, "step": 7506 }, { "epoch": 1.0521373510861949, "grad_norm": 0.4750889241695404, "learning_rate": 0.0001995168619947381, "loss": 0.1062, "step": 7507 }, { "epoch": 1.0522775052557813, "grad_norm": 0.6610357165336609, "learning_rate": 0.00019950251136091843, "loss": 0.0614, "step": 7508 }, { "epoch": 1.0524176594253678, "grad_norm": 0.27166077494621277, "learning_rate": 0.00019948816072709878, "loss": 0.0519, "step": 7509 }, { "epoch": 1.0525578135949545, "grad_norm": 0.4206123948097229, "learning_rate": 0.0001994738100932791, "loss": 0.0409, "step": 7510 }, { "epoch": 1.052697967764541, "grad_norm": 0.6885419487953186, "learning_rate": 0.00019945945945945944, "loss": 0.0312, "step": 7511 }, { "epoch": 1.0528381219341276, "grad_norm": 0.6111764907836914, "learning_rate": 0.00019944510882563977, "loss": 0.1358, "step": 7512 }, { "epoch": 1.052978276103714, "grad_norm": 0.6657649874687195, "learning_rate": 0.00019943075819182012, "loss": 0.0707, "step": 7513 }, { "epoch": 1.0531184302733005, "grad_norm": 0.2693290710449219, "learning_rate": 0.00019941640755800045, "loss": 0.0492, "step": 7514 }, { "epoch": 1.0532585844428872, "grad_norm": 0.7509901523590088, "learning_rate": 0.00019940205692418078, "loss": 0.0916, "step": 7515 }, { "epoch": 1.0533987386124737, "grad_norm": 0.30057698488235474, "learning_rate": 0.00019938770629036114, "loss": 0.0445, "step": 7516 }, { "epoch": 1.0535388927820604, "grad_norm": 0.22511519491672516, "learning_rate": 0.00019937335565654147, "loss": 0.0421, "step": 7517 }, { "epoch": 1.0536790469516468, "grad_norm": 0.6416477560997009, "learning_rate": 0.0001993590050227218, "loss": 0.136, "step": 7518 }, { "epoch": 1.0538192011212333, "grad_norm": 0.5774193406105042, "learning_rate": 0.00019934465438890218, "loss": 0.0618, "step": 7519 }, { "epoch": 1.05395935529082, "grad_norm": 0.7175807356834412, "learning_rate": 0.0001993303037550825, "loss": 0.0705, "step": 7520 }, { "epoch": 1.0540995094604064, "grad_norm": 0.3576360046863556, "learning_rate": 0.00019931595312126283, "loss": 0.047, "step": 7521 }, { "epoch": 1.054239663629993, "grad_norm": 0.08990313112735748, "learning_rate": 0.0001993016024874432, "loss": 0.0137, "step": 7522 }, { "epoch": 1.0543798177995796, "grad_norm": 0.27882933616638184, "learning_rate": 0.00019928725185362352, "loss": 0.0481, "step": 7523 }, { "epoch": 1.054519971969166, "grad_norm": 0.39496538043022156, "learning_rate": 0.00019927290121980385, "loss": 0.0291, "step": 7524 }, { "epoch": 1.0546601261387527, "grad_norm": 0.5221301913261414, "learning_rate": 0.0001992585505859842, "loss": 0.0382, "step": 7525 }, { "epoch": 1.0548002803083392, "grad_norm": 0.1998799443244934, "learning_rate": 0.00019924419995216453, "loss": 0.0313, "step": 7526 }, { "epoch": 1.0549404344779256, "grad_norm": 0.4970436692237854, "learning_rate": 0.00019922984931834486, "loss": 0.0562, "step": 7527 }, { "epoch": 1.0550805886475123, "grad_norm": 0.8129107356071472, "learning_rate": 0.00019921549868452524, "loss": 0.0532, "step": 7528 }, { "epoch": 1.0552207428170988, "grad_norm": 0.2570417523384094, "learning_rate": 0.00019920114805070557, "loss": 0.0115, "step": 7529 }, { "epoch": 1.0553608969866854, "grad_norm": 0.9175505638122559, "learning_rate": 0.0001991867974168859, "loss": 0.0298, "step": 7530 }, { "epoch": 1.055501051156272, "grad_norm": 0.515227198600769, "learning_rate": 0.00019917244678306623, "loss": 0.0461, "step": 7531 }, { "epoch": 1.0556412053258584, "grad_norm": 0.5605230331420898, "learning_rate": 0.00019915809614924658, "loss": 0.0309, "step": 7532 }, { "epoch": 1.055781359495445, "grad_norm": 0.16247159242630005, "learning_rate": 0.0001991437455154269, "loss": 0.0262, "step": 7533 }, { "epoch": 1.0559215136650315, "grad_norm": 0.09054696559906006, "learning_rate": 0.00019912939488160724, "loss": 0.0047, "step": 7534 }, { "epoch": 1.056061667834618, "grad_norm": 2.6213529109954834, "learning_rate": 0.0001991150442477876, "loss": 0.1639, "step": 7535 }, { "epoch": 1.0562018220042046, "grad_norm": 0.3176807463169098, "learning_rate": 0.00019910069361396793, "loss": 0.0372, "step": 7536 }, { "epoch": 1.056341976173791, "grad_norm": 0.5382075905799866, "learning_rate": 0.00019908634298014825, "loss": 0.0535, "step": 7537 }, { "epoch": 1.0564821303433778, "grad_norm": 0.3053722381591797, "learning_rate": 0.00019907199234632864, "loss": 0.0845, "step": 7538 }, { "epoch": 1.0566222845129642, "grad_norm": 0.29730188846588135, "learning_rate": 0.00019905764171250897, "loss": 0.038, "step": 7539 }, { "epoch": 1.0567624386825507, "grad_norm": 0.526772141456604, "learning_rate": 0.0001990432910786893, "loss": 0.1425, "step": 7540 }, { "epoch": 1.0569025928521374, "grad_norm": 0.32177266478538513, "learning_rate": 0.00019902894044486965, "loss": 0.0505, "step": 7541 }, { "epoch": 1.0570427470217238, "grad_norm": 0.47809478640556335, "learning_rate": 0.00019901458981104998, "loss": 0.0524, "step": 7542 }, { "epoch": 1.0571829011913105, "grad_norm": 0.4227425754070282, "learning_rate": 0.0001990002391772303, "loss": 0.1108, "step": 7543 }, { "epoch": 1.057323055360897, "grad_norm": 0.4794295132160187, "learning_rate": 0.00019898588854341066, "loss": 0.028, "step": 7544 }, { "epoch": 1.0574632095304835, "grad_norm": 0.820785403251648, "learning_rate": 0.000198971537909591, "loss": 0.0414, "step": 7545 }, { "epoch": 1.0576033637000701, "grad_norm": 0.5381726026535034, "learning_rate": 0.00019895718727577132, "loss": 0.0589, "step": 7546 }, { "epoch": 1.0577435178696566, "grad_norm": 0.27617841958999634, "learning_rate": 0.00019894283664195165, "loss": 0.0799, "step": 7547 }, { "epoch": 1.0578836720392433, "grad_norm": 0.5296957492828369, "learning_rate": 0.000198928486008132, "loss": 0.0759, "step": 7548 }, { "epoch": 1.0580238262088297, "grad_norm": 0.2712271213531494, "learning_rate": 0.00019891413537431233, "loss": 0.0332, "step": 7549 }, { "epoch": 1.0581639803784162, "grad_norm": 0.3844560980796814, "learning_rate": 0.00019889978474049266, "loss": 0.0684, "step": 7550 }, { "epoch": 1.0583041345480029, "grad_norm": 0.3186555802822113, "learning_rate": 0.00019888543410667304, "loss": 0.0272, "step": 7551 }, { "epoch": 1.0584442887175893, "grad_norm": 0.31765928864479065, "learning_rate": 0.00019887108347285337, "loss": 0.0446, "step": 7552 }, { "epoch": 1.0585844428871758, "grad_norm": 0.32476410269737244, "learning_rate": 0.0001988567328390337, "loss": 0.1209, "step": 7553 }, { "epoch": 1.0587245970567625, "grad_norm": 0.32868948578834534, "learning_rate": 0.00019884238220521406, "loss": 0.1, "step": 7554 }, { "epoch": 1.058864751226349, "grad_norm": 0.4197101593017578, "learning_rate": 0.00019882803157139439, "loss": 0.0482, "step": 7555 }, { "epoch": 1.0590049053959356, "grad_norm": 0.1931259036064148, "learning_rate": 0.00019881368093757471, "loss": 0.0225, "step": 7556 }, { "epoch": 1.059145059565522, "grad_norm": 0.34077540040016174, "learning_rate": 0.00019879933030375507, "loss": 0.0584, "step": 7557 }, { "epoch": 1.0592852137351085, "grad_norm": 0.43230900168418884, "learning_rate": 0.0001987849796699354, "loss": 0.0663, "step": 7558 }, { "epoch": 1.0594253679046952, "grad_norm": 0.22732526063919067, "learning_rate": 0.00019877062903611573, "loss": 0.0348, "step": 7559 }, { "epoch": 1.0595655220742817, "grad_norm": 0.1841212511062622, "learning_rate": 0.0001987562784022961, "loss": 0.0544, "step": 7560 }, { "epoch": 1.0597056762438684, "grad_norm": 0.37000972032546997, "learning_rate": 0.00019874192776847644, "loss": 0.0283, "step": 7561 }, { "epoch": 1.0598458304134548, "grad_norm": 0.48312994837760925, "learning_rate": 0.00019872757713465677, "loss": 0.0841, "step": 7562 }, { "epoch": 1.0599859845830413, "grad_norm": 0.34391459822654724, "learning_rate": 0.00019871322650083712, "loss": 0.0462, "step": 7563 }, { "epoch": 1.060126138752628, "grad_norm": 0.15284235775470734, "learning_rate": 0.00019869887586701745, "loss": 0.0268, "step": 7564 }, { "epoch": 1.0602662929222144, "grad_norm": 0.19502505660057068, "learning_rate": 0.00019868452523319778, "loss": 0.0452, "step": 7565 }, { "epoch": 1.0604064470918009, "grad_norm": 0.2463318258523941, "learning_rate": 0.0001986701745993781, "loss": 0.0377, "step": 7566 }, { "epoch": 1.0605466012613876, "grad_norm": 0.21984611451625824, "learning_rate": 0.00019865582396555846, "loss": 0.0315, "step": 7567 }, { "epoch": 1.060686755430974, "grad_norm": 0.8172303438186646, "learning_rate": 0.0001986414733317388, "loss": 0.0665, "step": 7568 }, { "epoch": 1.0608269096005607, "grad_norm": 0.37418031692504883, "learning_rate": 0.00019862712269791912, "loss": 0.0839, "step": 7569 }, { "epoch": 1.0609670637701472, "grad_norm": 0.4457027018070221, "learning_rate": 0.0001986127720640995, "loss": 0.0736, "step": 7570 }, { "epoch": 1.0611072179397336, "grad_norm": 0.13124918937683105, "learning_rate": 0.00019859842143027983, "loss": 0.023, "step": 7571 }, { "epoch": 1.0612473721093203, "grad_norm": 0.33823636174201965, "learning_rate": 0.00019858407079646016, "loss": 0.0567, "step": 7572 }, { "epoch": 1.0613875262789068, "grad_norm": 0.1908726990222931, "learning_rate": 0.00019856972016264052, "loss": 0.0155, "step": 7573 }, { "epoch": 1.0615276804484934, "grad_norm": 0.3268665671348572, "learning_rate": 0.00019855536952882084, "loss": 0.0683, "step": 7574 }, { "epoch": 1.06166783461808, "grad_norm": 0.20980527997016907, "learning_rate": 0.00019854101889500117, "loss": 0.0563, "step": 7575 }, { "epoch": 1.0618079887876664, "grad_norm": 0.30090105533599854, "learning_rate": 0.00019852666826118153, "loss": 0.0627, "step": 7576 }, { "epoch": 1.061948142957253, "grad_norm": 0.09535747021436691, "learning_rate": 0.00019851231762736186, "loss": 0.0103, "step": 7577 }, { "epoch": 1.0620882971268395, "grad_norm": 0.40273454785346985, "learning_rate": 0.00019849796699354219, "loss": 0.0397, "step": 7578 }, { "epoch": 1.0622284512964262, "grad_norm": 0.7150191068649292, "learning_rate": 0.00019848361635972254, "loss": 0.0584, "step": 7579 }, { "epoch": 1.0623686054660126, "grad_norm": 0.4053855538368225, "learning_rate": 0.00019846926572590287, "loss": 0.0281, "step": 7580 }, { "epoch": 1.062508759635599, "grad_norm": 2.127323865890503, "learning_rate": 0.0001984549150920832, "loss": 0.396, "step": 7581 }, { "epoch": 1.0626489138051858, "grad_norm": 0.8696365356445312, "learning_rate": 0.00019844056445826353, "loss": 0.0997, "step": 7582 }, { "epoch": 1.0627890679747722, "grad_norm": 3.740506410598755, "learning_rate": 0.0001984262138244439, "loss": 0.0472, "step": 7583 }, { "epoch": 1.0629292221443587, "grad_norm": 0.8195126056671143, "learning_rate": 0.00019841186319062424, "loss": 0.0616, "step": 7584 }, { "epoch": 1.0630693763139454, "grad_norm": 1.1001120805740356, "learning_rate": 0.00019839751255680457, "loss": 0.0576, "step": 7585 }, { "epoch": 1.0632095304835318, "grad_norm": 0.44824814796447754, "learning_rate": 0.00019838316192298492, "loss": 0.0456, "step": 7586 }, { "epoch": 1.0633496846531185, "grad_norm": 0.3747692108154297, "learning_rate": 0.00019836881128916525, "loss": 0.0553, "step": 7587 }, { "epoch": 1.063489838822705, "grad_norm": 0.26847440004348755, "learning_rate": 0.00019835446065534558, "loss": 0.0653, "step": 7588 }, { "epoch": 1.0636299929922914, "grad_norm": 0.42787301540374756, "learning_rate": 0.00019834011002152594, "loss": 0.032, "step": 7589 }, { "epoch": 1.0637701471618781, "grad_norm": 0.374841570854187, "learning_rate": 0.00019832575938770626, "loss": 0.1007, "step": 7590 }, { "epoch": 1.0639103013314646, "grad_norm": 0.1404145509004593, "learning_rate": 0.0001983114087538866, "loss": 0.0213, "step": 7591 }, { "epoch": 1.064050455501051, "grad_norm": 0.2259984165430069, "learning_rate": 0.00019829705812006698, "loss": 0.0411, "step": 7592 }, { "epoch": 1.0641906096706377, "grad_norm": 0.2627427279949188, "learning_rate": 0.0001982827074862473, "loss": 0.1163, "step": 7593 }, { "epoch": 1.0643307638402242, "grad_norm": 0.3677605092525482, "learning_rate": 0.00019826835685242763, "loss": 0.0646, "step": 7594 }, { "epoch": 1.0644709180098109, "grad_norm": 0.17421786487102509, "learning_rate": 0.000198254006218608, "loss": 0.0329, "step": 7595 }, { "epoch": 1.0646110721793973, "grad_norm": 0.6553385257720947, "learning_rate": 0.00019823965558478832, "loss": 0.0739, "step": 7596 }, { "epoch": 1.0647512263489838, "grad_norm": 0.3015502989292145, "learning_rate": 0.00019822530495096865, "loss": 0.0246, "step": 7597 }, { "epoch": 1.0648913805185705, "grad_norm": 0.3092505931854248, "learning_rate": 0.000198210954317149, "loss": 0.0493, "step": 7598 }, { "epoch": 1.065031534688157, "grad_norm": 0.5699924230575562, "learning_rate": 0.00019819660368332933, "loss": 0.0277, "step": 7599 }, { "epoch": 1.0651716888577436, "grad_norm": 0.15097761154174805, "learning_rate": 0.00019818225304950966, "loss": 0.0257, "step": 7600 }, { "epoch": 1.06531184302733, "grad_norm": 0.19576691091060638, "learning_rate": 0.00019816790241569, "loss": 0.0409, "step": 7601 }, { "epoch": 1.0654519971969165, "grad_norm": 0.7171152830123901, "learning_rate": 0.00019815355178187037, "loss": 0.1016, "step": 7602 }, { "epoch": 1.0655921513665032, "grad_norm": 0.2911851108074188, "learning_rate": 0.0001981392011480507, "loss": 0.0496, "step": 7603 }, { "epoch": 1.0657323055360897, "grad_norm": 0.358695924282074, "learning_rate": 0.00019812485051423103, "loss": 0.0968, "step": 7604 }, { "epoch": 1.0658724597056763, "grad_norm": 0.29031676054000854, "learning_rate": 0.00019811049988041138, "loss": 0.1149, "step": 7605 }, { "epoch": 1.0660126138752628, "grad_norm": 0.12392277270555496, "learning_rate": 0.0001980961492465917, "loss": 0.0092, "step": 7606 }, { "epoch": 1.0661527680448493, "grad_norm": 0.25931262969970703, "learning_rate": 0.00019808179861277204, "loss": 0.0324, "step": 7607 }, { "epoch": 1.066292922214436, "grad_norm": 0.8556021451950073, "learning_rate": 0.0001980674479789524, "loss": 0.1004, "step": 7608 }, { "epoch": 1.0664330763840224, "grad_norm": 1.0676041841506958, "learning_rate": 0.00019805309734513272, "loss": 0.0768, "step": 7609 }, { "epoch": 1.066573230553609, "grad_norm": 0.23332707583904266, "learning_rate": 0.00019803874671131305, "loss": 0.0287, "step": 7610 }, { "epoch": 1.0667133847231955, "grad_norm": 0.39812928438186646, "learning_rate": 0.0001980243960774934, "loss": 0.0494, "step": 7611 }, { "epoch": 1.066853538892782, "grad_norm": 0.3797602653503418, "learning_rate": 0.00019801004544367374, "loss": 0.0758, "step": 7612 }, { "epoch": 1.0669936930623687, "grad_norm": 0.4300213158130646, "learning_rate": 0.00019799569480985406, "loss": 0.0419, "step": 7613 }, { "epoch": 1.0671338472319551, "grad_norm": 0.21468813717365265, "learning_rate": 0.00019798134417603445, "loss": 0.0265, "step": 7614 }, { "epoch": 1.0672740014015416, "grad_norm": 0.20800012350082397, "learning_rate": 0.00019796699354221478, "loss": 0.0183, "step": 7615 }, { "epoch": 1.0674141555711283, "grad_norm": 0.6823247671127319, "learning_rate": 0.0001979526429083951, "loss": 0.0838, "step": 7616 }, { "epoch": 1.0675543097407147, "grad_norm": 0.19142048060894012, "learning_rate": 0.00019793829227457543, "loss": 0.0333, "step": 7617 }, { "epoch": 1.0676944639103014, "grad_norm": 0.1455041915178299, "learning_rate": 0.0001979239416407558, "loss": 0.0817, "step": 7618 }, { "epoch": 1.0678346180798879, "grad_norm": 0.3570077419281006, "learning_rate": 0.00019790959100693612, "loss": 0.1126, "step": 7619 }, { "epoch": 1.0679747722494743, "grad_norm": 0.42099517583847046, "learning_rate": 0.00019789524037311645, "loss": 0.1019, "step": 7620 }, { "epoch": 1.068114926419061, "grad_norm": 0.5669971704483032, "learning_rate": 0.0001978808897392968, "loss": 0.0572, "step": 7621 }, { "epoch": 1.0682550805886475, "grad_norm": 1.097241997718811, "learning_rate": 0.00019786653910547713, "loss": 0.138, "step": 7622 }, { "epoch": 1.068395234758234, "grad_norm": 0.4841913878917694, "learning_rate": 0.00019785218847165746, "loss": 0.0641, "step": 7623 }, { "epoch": 1.0685353889278206, "grad_norm": 0.36787909269332886, "learning_rate": 0.00019783783783783784, "loss": 0.0671, "step": 7624 }, { "epoch": 1.068675543097407, "grad_norm": 0.13150766491889954, "learning_rate": 0.00019782348720401817, "loss": 0.016, "step": 7625 }, { "epoch": 1.0688156972669938, "grad_norm": 0.7408350110054016, "learning_rate": 0.0001978091365701985, "loss": 0.0767, "step": 7626 }, { "epoch": 1.0689558514365802, "grad_norm": 0.5474209189414978, "learning_rate": 0.00019779478593637885, "loss": 0.1179, "step": 7627 }, { "epoch": 1.0690960056061667, "grad_norm": 0.7485910654067993, "learning_rate": 0.00019778043530255918, "loss": 0.0969, "step": 7628 }, { "epoch": 1.0692361597757534, "grad_norm": 0.3601694107055664, "learning_rate": 0.0001977660846687395, "loss": 0.0306, "step": 7629 }, { "epoch": 1.0693763139453398, "grad_norm": 0.25154393911361694, "learning_rate": 0.00019775173403491987, "loss": 0.0272, "step": 7630 }, { "epoch": 1.0695164681149265, "grad_norm": 1.0999586582183838, "learning_rate": 0.0001977373834011002, "loss": 0.0459, "step": 7631 }, { "epoch": 1.069656622284513, "grad_norm": 0.3171786367893219, "learning_rate": 0.00019772303276728052, "loss": 0.0382, "step": 7632 }, { "epoch": 1.0697967764540994, "grad_norm": 0.4888435900211334, "learning_rate": 0.00019770868213346085, "loss": 0.0423, "step": 7633 }, { "epoch": 1.069936930623686, "grad_norm": 0.5235338807106018, "learning_rate": 0.00019769433149964124, "loss": 0.1306, "step": 7634 }, { "epoch": 1.0700770847932726, "grad_norm": 2.3171980381011963, "learning_rate": 0.00019767998086582156, "loss": 0.5611, "step": 7635 }, { "epoch": 1.0702172389628593, "grad_norm": 0.20193396508693695, "learning_rate": 0.0001976656302320019, "loss": 0.0258, "step": 7636 }, { "epoch": 1.0703573931324457, "grad_norm": 0.2289632111787796, "learning_rate": 0.00019765127959818225, "loss": 0.0456, "step": 7637 }, { "epoch": 1.0704975473020322, "grad_norm": 0.22437684237957, "learning_rate": 0.00019763692896436258, "loss": 0.0272, "step": 7638 }, { "epoch": 1.0706377014716189, "grad_norm": 0.21627455949783325, "learning_rate": 0.0001976225783305429, "loss": 0.0282, "step": 7639 }, { "epoch": 1.0707778556412053, "grad_norm": 0.08288053423166275, "learning_rate": 0.00019760822769672326, "loss": 0.013, "step": 7640 }, { "epoch": 1.0709180098107918, "grad_norm": 0.3561341464519501, "learning_rate": 0.0001975938770629036, "loss": 0.0621, "step": 7641 }, { "epoch": 1.0710581639803785, "grad_norm": 0.31880494952201843, "learning_rate": 0.00019757952642908392, "loss": 0.0349, "step": 7642 }, { "epoch": 1.071198318149965, "grad_norm": 0.3106725513935089, "learning_rate": 0.00019756517579526427, "loss": 0.0388, "step": 7643 }, { "epoch": 1.0713384723195516, "grad_norm": 0.4200275242328644, "learning_rate": 0.0001975508251614446, "loss": 0.0316, "step": 7644 }, { "epoch": 1.071478626489138, "grad_norm": 0.29874706268310547, "learning_rate": 0.00019753647452762493, "loss": 0.0868, "step": 7645 }, { "epoch": 1.0716187806587245, "grad_norm": 0.3218427896499634, "learning_rate": 0.00019752212389380531, "loss": 0.0502, "step": 7646 }, { "epoch": 1.0717589348283112, "grad_norm": 0.16986088454723358, "learning_rate": 0.00019750777325998564, "loss": 0.0656, "step": 7647 }, { "epoch": 1.0718990889978977, "grad_norm": 0.26179036498069763, "learning_rate": 0.00019749342262616597, "loss": 0.0381, "step": 7648 }, { "epoch": 1.0720392431674843, "grad_norm": 0.2574778199195862, "learning_rate": 0.00019747907199234633, "loss": 0.0806, "step": 7649 }, { "epoch": 1.0721793973370708, "grad_norm": 0.43426260352134705, "learning_rate": 0.00019746472135852666, "loss": 0.0273, "step": 7650 }, { "epoch": 1.0723195515066573, "grad_norm": 0.3414384424686432, "learning_rate": 0.00019745037072470698, "loss": 0.0833, "step": 7651 }, { "epoch": 1.072459705676244, "grad_norm": 0.6683964133262634, "learning_rate": 0.0001974360200908873, "loss": 0.0194, "step": 7652 }, { "epoch": 1.0725998598458304, "grad_norm": 0.083673395216465, "learning_rate": 0.00019742166945706767, "loss": 0.0047, "step": 7653 }, { "epoch": 1.0727400140154169, "grad_norm": 0.33309340476989746, "learning_rate": 0.000197407318823248, "loss": 0.0496, "step": 7654 }, { "epoch": 1.0728801681850035, "grad_norm": 0.20718157291412354, "learning_rate": 0.00019739296818942833, "loss": 0.056, "step": 7655 }, { "epoch": 1.07302032235459, "grad_norm": 0.4584257900714874, "learning_rate": 0.0001973786175556087, "loss": 0.052, "step": 7656 }, { "epoch": 1.0731604765241767, "grad_norm": 0.4084530472755432, "learning_rate": 0.00019736426692178904, "loss": 0.0861, "step": 7657 }, { "epoch": 1.0733006306937631, "grad_norm": 0.3616637885570526, "learning_rate": 0.00019734991628796937, "loss": 0.0548, "step": 7658 }, { "epoch": 1.0734407848633496, "grad_norm": 0.4114520847797394, "learning_rate": 0.00019733556565414972, "loss": 0.1039, "step": 7659 }, { "epoch": 1.0735809390329363, "grad_norm": 0.1908227503299713, "learning_rate": 0.00019732121502033005, "loss": 0.0641, "step": 7660 }, { "epoch": 1.0737210932025227, "grad_norm": 0.4970937967300415, "learning_rate": 0.00019730686438651038, "loss": 0.0416, "step": 7661 }, { "epoch": 1.0738612473721094, "grad_norm": 0.5051129460334778, "learning_rate": 0.00019729251375269073, "loss": 0.1104, "step": 7662 }, { "epoch": 1.0740014015416959, "grad_norm": 0.6620528101921082, "learning_rate": 0.00019727816311887106, "loss": 0.1118, "step": 7663 }, { "epoch": 1.0741415557112823, "grad_norm": 0.34434640407562256, "learning_rate": 0.0001972638124850514, "loss": 0.0755, "step": 7664 }, { "epoch": 1.074281709880869, "grad_norm": 0.22941954433918, "learning_rate": 0.00019724946185123177, "loss": 0.0314, "step": 7665 }, { "epoch": 1.0744218640504555, "grad_norm": 0.15672756731510162, "learning_rate": 0.0001972351112174121, "loss": 0.0237, "step": 7666 }, { "epoch": 1.0745620182200422, "grad_norm": 0.377066433429718, "learning_rate": 0.00019722076058359243, "loss": 0.0608, "step": 7667 }, { "epoch": 1.0747021723896286, "grad_norm": 0.4135933816432953, "learning_rate": 0.00019720640994977276, "loss": 0.0582, "step": 7668 }, { "epoch": 1.074842326559215, "grad_norm": 0.4028638005256653, "learning_rate": 0.00019719205931595311, "loss": 0.1164, "step": 7669 }, { "epoch": 1.0749824807288018, "grad_norm": 0.512250542640686, "learning_rate": 0.00019717770868213344, "loss": 0.0757, "step": 7670 }, { "epoch": 1.0751226348983882, "grad_norm": 0.2980792224407196, "learning_rate": 0.00019716335804831377, "loss": 0.0346, "step": 7671 }, { "epoch": 1.0752627890679747, "grad_norm": 0.17340050637722015, "learning_rate": 0.00019714900741449413, "loss": 0.0503, "step": 7672 }, { "epoch": 1.0754029432375614, "grad_norm": 0.20955890417099, "learning_rate": 0.00019713465678067446, "loss": 0.0172, "step": 7673 }, { "epoch": 1.0755430974071478, "grad_norm": 0.19779478013515472, "learning_rate": 0.00019712030614685478, "loss": 0.0434, "step": 7674 }, { "epoch": 1.0756832515767345, "grad_norm": 0.6693371534347534, "learning_rate": 0.00019710595551303514, "loss": 0.0626, "step": 7675 }, { "epoch": 1.075823405746321, "grad_norm": 0.18301236629486084, "learning_rate": 0.00019709160487921547, "loss": 0.0267, "step": 7676 }, { "epoch": 1.0759635599159074, "grad_norm": 0.42358770966529846, "learning_rate": 0.0001970772542453958, "loss": 0.0521, "step": 7677 }, { "epoch": 1.076103714085494, "grad_norm": 0.6924291253089905, "learning_rate": 0.00019706290361157618, "loss": 0.1468, "step": 7678 }, { "epoch": 1.0762438682550806, "grad_norm": 0.45006266236305237, "learning_rate": 0.0001970485529777565, "loss": 0.0456, "step": 7679 }, { "epoch": 1.076384022424667, "grad_norm": 0.25100284814834595, "learning_rate": 0.00019703420234393684, "loss": 0.0472, "step": 7680 }, { "epoch": 1.0765241765942537, "grad_norm": 0.32928958535194397, "learning_rate": 0.0001970198517101172, "loss": 0.0582, "step": 7681 }, { "epoch": 1.0766643307638402, "grad_norm": 0.5857794880867004, "learning_rate": 0.00019700550107629752, "loss": 0.0667, "step": 7682 }, { "epoch": 1.0768044849334268, "grad_norm": 0.5003965497016907, "learning_rate": 0.00019699115044247785, "loss": 0.0284, "step": 7683 }, { "epoch": 1.0769446391030133, "grad_norm": 1.3723543882369995, "learning_rate": 0.0001969767998086582, "loss": 0.2033, "step": 7684 }, { "epoch": 1.0770847932725998, "grad_norm": 1.1034142971038818, "learning_rate": 0.00019696244917483853, "loss": 0.2817, "step": 7685 }, { "epoch": 1.0772249474421864, "grad_norm": 0.365693062543869, "learning_rate": 0.00019694809854101886, "loss": 0.0403, "step": 7686 }, { "epoch": 1.077365101611773, "grad_norm": 0.3229908049106598, "learning_rate": 0.0001969337479071992, "loss": 0.0405, "step": 7687 }, { "epoch": 1.0775052557813596, "grad_norm": 0.18721123039722443, "learning_rate": 0.00019691939727337957, "loss": 0.0457, "step": 7688 }, { "epoch": 1.077645409950946, "grad_norm": 0.32023507356643677, "learning_rate": 0.0001969050466395599, "loss": 0.0677, "step": 7689 }, { "epoch": 1.0777855641205325, "grad_norm": 0.41388630867004395, "learning_rate": 0.00019689069600574023, "loss": 0.0711, "step": 7690 }, { "epoch": 1.0779257182901192, "grad_norm": 0.3030456006526947, "learning_rate": 0.0001968763453719206, "loss": 0.0592, "step": 7691 }, { "epoch": 1.0780658724597056, "grad_norm": 0.21968378126621246, "learning_rate": 0.00019686199473810092, "loss": 0.0574, "step": 7692 }, { "epoch": 1.0782060266292923, "grad_norm": 0.22468826174736023, "learning_rate": 0.00019684764410428124, "loss": 0.0322, "step": 7693 }, { "epoch": 1.0783461807988788, "grad_norm": 0.10458522289991379, "learning_rate": 0.0001968332934704616, "loss": 0.026, "step": 7694 }, { "epoch": 1.0784863349684652, "grad_norm": 0.3012867867946625, "learning_rate": 0.00019681894283664193, "loss": 0.0701, "step": 7695 }, { "epoch": 1.078626489138052, "grad_norm": 0.2631513178348541, "learning_rate": 0.00019680459220282226, "loss": 0.0667, "step": 7696 }, { "epoch": 1.0787666433076384, "grad_norm": 0.10674197971820831, "learning_rate": 0.00019679024156900264, "loss": 0.0202, "step": 7697 }, { "epoch": 1.0789067974772248, "grad_norm": 0.4610738158226013, "learning_rate": 0.00019677589093518297, "loss": 0.1531, "step": 7698 }, { "epoch": 1.0790469516468115, "grad_norm": 0.3037176728248596, "learning_rate": 0.0001967615403013633, "loss": 0.071, "step": 7699 }, { "epoch": 1.079187105816398, "grad_norm": 0.5380765199661255, "learning_rate": 0.00019674718966754365, "loss": 0.1136, "step": 7700 }, { "epoch": 1.0793272599859847, "grad_norm": 0.22383515536785126, "learning_rate": 0.00019673283903372398, "loss": 0.042, "step": 7701 }, { "epoch": 1.0794674141555711, "grad_norm": 0.2810443937778473, "learning_rate": 0.0001967184883999043, "loss": 0.0217, "step": 7702 }, { "epoch": 1.0796075683251576, "grad_norm": 0.25943776965141296, "learning_rate": 0.00019670413776608464, "loss": 0.0357, "step": 7703 }, { "epoch": 1.0797477224947443, "grad_norm": 0.22084593772888184, "learning_rate": 0.000196689787132265, "loss": 0.0358, "step": 7704 }, { "epoch": 1.0798878766643307, "grad_norm": 0.16578315198421478, "learning_rate": 0.00019667543649844532, "loss": 0.0247, "step": 7705 }, { "epoch": 1.0800280308339174, "grad_norm": 0.3578493595123291, "learning_rate": 0.00019666108586462565, "loss": 0.0651, "step": 7706 }, { "epoch": 1.0801681850035039, "grad_norm": 0.20319566130638123, "learning_rate": 0.000196646735230806, "loss": 0.0118, "step": 7707 }, { "epoch": 1.0803083391730903, "grad_norm": 0.2713966369628906, "learning_rate": 0.00019663238459698634, "loss": 0.0499, "step": 7708 }, { "epoch": 1.080448493342677, "grad_norm": 0.25646093487739563, "learning_rate": 0.00019661803396316666, "loss": 0.032, "step": 7709 }, { "epoch": 1.0805886475122635, "grad_norm": 0.2926417887210846, "learning_rate": 0.00019660368332934705, "loss": 0.1006, "step": 7710 }, { "epoch": 1.08072880168185, "grad_norm": 0.2752074897289276, "learning_rate": 0.00019658933269552738, "loss": 0.016, "step": 7711 }, { "epoch": 1.0808689558514366, "grad_norm": 0.5022668838500977, "learning_rate": 0.0001965749820617077, "loss": 0.0525, "step": 7712 }, { "epoch": 1.081009110021023, "grad_norm": 0.19097664952278137, "learning_rate": 0.00019656063142788806, "loss": 0.0542, "step": 7713 }, { "epoch": 1.0811492641906097, "grad_norm": 0.2498018890619278, "learning_rate": 0.0001965462807940684, "loss": 0.0966, "step": 7714 }, { "epoch": 1.0812894183601962, "grad_norm": 0.13516223430633545, "learning_rate": 0.00019653193016024872, "loss": 0.0223, "step": 7715 }, { "epoch": 1.0814295725297827, "grad_norm": 0.15037253499031067, "learning_rate": 0.00019651757952642907, "loss": 0.0085, "step": 7716 }, { "epoch": 1.0815697266993693, "grad_norm": 0.9176134467124939, "learning_rate": 0.0001965032288926094, "loss": 0.0726, "step": 7717 }, { "epoch": 1.0817098808689558, "grad_norm": 0.12279194593429565, "learning_rate": 0.00019648887825878973, "loss": 0.0105, "step": 7718 }, { "epoch": 1.0818500350385425, "grad_norm": 0.38189974427223206, "learning_rate": 0.0001964745276249701, "loss": 0.0754, "step": 7719 }, { "epoch": 1.081990189208129, "grad_norm": 0.4557897448539734, "learning_rate": 0.00019646017699115044, "loss": 0.0729, "step": 7720 }, { "epoch": 1.0821303433777154, "grad_norm": 0.3637048304080963, "learning_rate": 0.00019644582635733077, "loss": 0.0796, "step": 7721 }, { "epoch": 1.082270497547302, "grad_norm": 0.7959785461425781, "learning_rate": 0.0001964314757235111, "loss": 0.0657, "step": 7722 }, { "epoch": 1.0824106517168885, "grad_norm": 0.7466673254966736, "learning_rate": 0.00019641712508969145, "loss": 0.184, "step": 7723 }, { "epoch": 1.0825508058864752, "grad_norm": 0.2976071834564209, "learning_rate": 0.00019640277445587178, "loss": 0.0381, "step": 7724 }, { "epoch": 1.0826909600560617, "grad_norm": 0.9213812947273254, "learning_rate": 0.0001963884238220521, "loss": 0.0828, "step": 7725 }, { "epoch": 1.0828311142256482, "grad_norm": 0.6381317973136902, "learning_rate": 0.00019637407318823247, "loss": 0.0963, "step": 7726 }, { "epoch": 1.0829712683952348, "grad_norm": 0.7232535481452942, "learning_rate": 0.0001963597225544128, "loss": 0.0343, "step": 7727 }, { "epoch": 1.0831114225648213, "grad_norm": 0.4582957625389099, "learning_rate": 0.00019634537192059312, "loss": 0.0791, "step": 7728 }, { "epoch": 1.0832515767344078, "grad_norm": 0.3746282458305359, "learning_rate": 0.0001963310212867735, "loss": 0.0533, "step": 7729 }, { "epoch": 1.0833917309039944, "grad_norm": 0.4384494125843048, "learning_rate": 0.00019631667065295383, "loss": 0.0516, "step": 7730 }, { "epoch": 1.083531885073581, "grad_norm": 0.3319662809371948, "learning_rate": 0.00019630232001913416, "loss": 0.029, "step": 7731 }, { "epoch": 1.0836720392431676, "grad_norm": 0.7180753350257874, "learning_rate": 0.00019628796938531452, "loss": 0.1695, "step": 7732 }, { "epoch": 1.083812193412754, "grad_norm": 1.7276380062103271, "learning_rate": 0.00019627361875149485, "loss": 0.1131, "step": 7733 }, { "epoch": 1.0839523475823405, "grad_norm": 0.6530478000640869, "learning_rate": 0.00019625926811767518, "loss": 0.1136, "step": 7734 }, { "epoch": 1.0840925017519272, "grad_norm": 0.9716345071792603, "learning_rate": 0.00019624491748385553, "loss": 0.1691, "step": 7735 }, { "epoch": 1.0842326559215136, "grad_norm": 0.5784167051315308, "learning_rate": 0.00019623056685003586, "loss": 0.0718, "step": 7736 }, { "epoch": 1.0843728100911003, "grad_norm": 0.21612343192100525, "learning_rate": 0.0001962162162162162, "loss": 0.0363, "step": 7737 }, { "epoch": 1.0845129642606868, "grad_norm": 0.1700899749994278, "learning_rate": 0.00019620186558239652, "loss": 0.0255, "step": 7738 }, { "epoch": 1.0846531184302732, "grad_norm": 0.17835497856140137, "learning_rate": 0.00019618751494857687, "loss": 0.0176, "step": 7739 }, { "epoch": 1.08479327259986, "grad_norm": 0.3192072808742523, "learning_rate": 0.0001961731643147572, "loss": 0.0844, "step": 7740 }, { "epoch": 1.0849334267694464, "grad_norm": 0.20011286437511444, "learning_rate": 0.00019615881368093753, "loss": 0.035, "step": 7741 }, { "epoch": 1.0850735809390328, "grad_norm": 0.257863312959671, "learning_rate": 0.0001961444630471179, "loss": 0.0545, "step": 7742 }, { "epoch": 1.0852137351086195, "grad_norm": 0.2425508350133896, "learning_rate": 0.00019613011241329824, "loss": 0.0618, "step": 7743 }, { "epoch": 1.085353889278206, "grad_norm": 0.0386434942483902, "learning_rate": 0.00019611576177947857, "loss": 0.0057, "step": 7744 }, { "epoch": 1.0854940434477927, "grad_norm": 0.37644556164741516, "learning_rate": 0.00019610141114565893, "loss": 0.0628, "step": 7745 }, { "epoch": 1.0856341976173791, "grad_norm": 0.2679113745689392, "learning_rate": 0.00019608706051183925, "loss": 0.0394, "step": 7746 }, { "epoch": 1.0857743517869656, "grad_norm": 0.3189074397087097, "learning_rate": 0.00019607270987801958, "loss": 0.0614, "step": 7747 }, { "epoch": 1.0859145059565523, "grad_norm": 0.3783037066459656, "learning_rate": 0.00019605835924419994, "loss": 0.0644, "step": 7748 }, { "epoch": 1.0860546601261387, "grad_norm": 0.29174134135246277, "learning_rate": 0.00019604400861038027, "loss": 0.0538, "step": 7749 }, { "epoch": 1.0861948142957254, "grad_norm": 0.4524042010307312, "learning_rate": 0.0001960296579765606, "loss": 0.0855, "step": 7750 }, { "epoch": 1.0863349684653119, "grad_norm": 0.18859432637691498, "learning_rate": 0.00019601530734274098, "loss": 0.0343, "step": 7751 }, { "epoch": 1.0864751226348983, "grad_norm": 0.42731592059135437, "learning_rate": 0.0001960009567089213, "loss": 0.0333, "step": 7752 }, { "epoch": 1.086615276804485, "grad_norm": 0.292182058095932, "learning_rate": 0.00019598660607510164, "loss": 0.0481, "step": 7753 }, { "epoch": 1.0867554309740715, "grad_norm": 0.267025351524353, "learning_rate": 0.000195972255441282, "loss": 0.0493, "step": 7754 }, { "epoch": 1.0868955851436581, "grad_norm": 0.23354393243789673, "learning_rate": 0.00019595790480746232, "loss": 0.0209, "step": 7755 }, { "epoch": 1.0870357393132446, "grad_norm": 0.46044641733169556, "learning_rate": 0.00019594355417364265, "loss": 0.0509, "step": 7756 }, { "epoch": 1.087175893482831, "grad_norm": 0.29496869444847107, "learning_rate": 0.00019592920353982298, "loss": 0.0704, "step": 7757 }, { "epoch": 1.0873160476524177, "grad_norm": 0.16183353960514069, "learning_rate": 0.00019591485290600333, "loss": 0.0588, "step": 7758 }, { "epoch": 1.0874562018220042, "grad_norm": 0.17188377678394318, "learning_rate": 0.00019590050227218366, "loss": 0.0265, "step": 7759 }, { "epoch": 1.0875963559915907, "grad_norm": 0.5238999128341675, "learning_rate": 0.000195886151638364, "loss": 0.0577, "step": 7760 }, { "epoch": 1.0877365101611773, "grad_norm": 0.35187384486198425, "learning_rate": 0.00019587180100454437, "loss": 0.077, "step": 7761 }, { "epoch": 1.0878766643307638, "grad_norm": 0.5250095725059509, "learning_rate": 0.0001958574503707247, "loss": 0.0401, "step": 7762 }, { "epoch": 1.0880168185003505, "grad_norm": 0.28186026215553284, "learning_rate": 0.00019584309973690503, "loss": 0.0677, "step": 7763 }, { "epoch": 1.088156972669937, "grad_norm": 0.4049895405769348, "learning_rate": 0.00019582874910308539, "loss": 0.0458, "step": 7764 }, { "epoch": 1.0882971268395234, "grad_norm": 0.15566539764404297, "learning_rate": 0.00019581439846926571, "loss": 0.0241, "step": 7765 }, { "epoch": 1.08843728100911, "grad_norm": 0.4208690822124481, "learning_rate": 0.00019580004783544604, "loss": 0.0654, "step": 7766 }, { "epoch": 1.0885774351786965, "grad_norm": 0.34259408712387085, "learning_rate": 0.0001957856972016264, "loss": 0.0704, "step": 7767 }, { "epoch": 1.088717589348283, "grad_norm": 0.21498975157737732, "learning_rate": 0.00019577134656780673, "loss": 0.0431, "step": 7768 }, { "epoch": 1.0888577435178697, "grad_norm": 0.1400747001171112, "learning_rate": 0.00019575699593398706, "loss": 0.0317, "step": 7769 }, { "epoch": 1.0889978976874561, "grad_norm": 0.24307838082313538, "learning_rate": 0.0001957426453001674, "loss": 0.0196, "step": 7770 }, { "epoch": 1.0891380518570428, "grad_norm": 0.2923641800880432, "learning_rate": 0.00019572829466634774, "loss": 0.0406, "step": 7771 }, { "epoch": 1.0892782060266293, "grad_norm": 0.1953319013118744, "learning_rate": 0.00019571394403252807, "loss": 0.036, "step": 7772 }, { "epoch": 1.0894183601962157, "grad_norm": 0.8618971705436707, "learning_rate": 0.0001956995933987084, "loss": 0.0673, "step": 7773 }, { "epoch": 1.0895585143658024, "grad_norm": 0.4419419467449188, "learning_rate": 0.00019568524276488878, "loss": 0.0836, "step": 7774 }, { "epoch": 1.0896986685353889, "grad_norm": 0.08663631975650787, "learning_rate": 0.0001956708921310691, "loss": 0.0101, "step": 7775 }, { "epoch": 1.0898388227049756, "grad_norm": 0.4783826768398285, "learning_rate": 0.00019565654149724944, "loss": 0.1525, "step": 7776 }, { "epoch": 1.089978976874562, "grad_norm": 0.4597350060939789, "learning_rate": 0.0001956421908634298, "loss": 0.0287, "step": 7777 }, { "epoch": 1.0901191310441485, "grad_norm": 0.6316526532173157, "learning_rate": 0.00019562784022961012, "loss": 0.0976, "step": 7778 }, { "epoch": 1.0902592852137352, "grad_norm": 0.042646557092666626, "learning_rate": 0.00019561348959579045, "loss": 0.0051, "step": 7779 }, { "epoch": 1.0903994393833216, "grad_norm": 0.428884893655777, "learning_rate": 0.0001955991389619708, "loss": 0.1054, "step": 7780 }, { "epoch": 1.0905395935529083, "grad_norm": 0.7622066140174866, "learning_rate": 0.00019558478832815113, "loss": 0.0609, "step": 7781 }, { "epoch": 1.0906797477224948, "grad_norm": 2.064103364944458, "learning_rate": 0.00019557043769433146, "loss": 0.1038, "step": 7782 }, { "epoch": 1.0908199018920812, "grad_norm": 0.23380877077579498, "learning_rate": 0.00019555608706051184, "loss": 0.0207, "step": 7783 }, { "epoch": 1.090960056061668, "grad_norm": 1.3618347644805908, "learning_rate": 0.00019554173642669217, "loss": 0.0982, "step": 7784 }, { "epoch": 1.0911002102312544, "grad_norm": 1.470130443572998, "learning_rate": 0.0001955273857928725, "loss": 0.0433, "step": 7785 }, { "epoch": 1.0912403644008408, "grad_norm": 0.39666977524757385, "learning_rate": 0.00019551303515905286, "loss": 0.075, "step": 7786 }, { "epoch": 1.0913805185704275, "grad_norm": 0.361255407333374, "learning_rate": 0.00019549868452523319, "loss": 0.0794, "step": 7787 }, { "epoch": 1.091520672740014, "grad_norm": 0.19289886951446533, "learning_rate": 0.00019548433389141351, "loss": 0.0383, "step": 7788 }, { "epoch": 1.0916608269096006, "grad_norm": 0.2669353187084198, "learning_rate": 0.00019546998325759387, "loss": 0.0654, "step": 7789 }, { "epoch": 1.091800981079187, "grad_norm": 0.41004490852355957, "learning_rate": 0.0001954556326237742, "loss": 0.0621, "step": 7790 }, { "epoch": 1.0919411352487736, "grad_norm": 0.3903586268424988, "learning_rate": 0.00019544128198995453, "loss": 0.0591, "step": 7791 }, { "epoch": 1.0920812894183602, "grad_norm": 1.1857318878173828, "learning_rate": 0.00019542693135613486, "loss": 0.0433, "step": 7792 }, { "epoch": 1.0922214435879467, "grad_norm": 0.34951847791671753, "learning_rate": 0.00019541258072231524, "loss": 0.0382, "step": 7793 }, { "epoch": 1.0923615977575334, "grad_norm": 0.4225844740867615, "learning_rate": 0.00019539823008849557, "loss": 0.0547, "step": 7794 }, { "epoch": 1.0925017519271198, "grad_norm": 0.4104465842247009, "learning_rate": 0.0001953838794546759, "loss": 0.0685, "step": 7795 }, { "epoch": 1.0926419060967063, "grad_norm": 0.26032739877700806, "learning_rate": 0.00019536952882085625, "loss": 0.0495, "step": 7796 }, { "epoch": 1.092782060266293, "grad_norm": 0.3200533092021942, "learning_rate": 0.00019535517818703658, "loss": 0.0552, "step": 7797 }, { "epoch": 1.0929222144358794, "grad_norm": 0.28809860348701477, "learning_rate": 0.0001953408275532169, "loss": 0.0291, "step": 7798 }, { "epoch": 1.093062368605466, "grad_norm": 0.6311838030815125, "learning_rate": 0.00019532647691939726, "loss": 0.0824, "step": 7799 }, { "epoch": 1.0932025227750526, "grad_norm": 0.13497351109981537, "learning_rate": 0.0001953121262855776, "loss": 0.0185, "step": 7800 }, { "epoch": 1.093342676944639, "grad_norm": 0.538755476474762, "learning_rate": 0.00019529777565175792, "loss": 0.1242, "step": 7801 }, { "epoch": 1.0934828311142257, "grad_norm": 0.12650761008262634, "learning_rate": 0.00019528342501793828, "loss": 0.0085, "step": 7802 }, { "epoch": 1.0936229852838122, "grad_norm": 0.3782743215560913, "learning_rate": 0.0001952690743841186, "loss": 0.0395, "step": 7803 }, { "epoch": 1.0937631394533986, "grad_norm": 0.3198573887348175, "learning_rate": 0.00019525472375029893, "loss": 0.0527, "step": 7804 }, { "epoch": 1.0939032936229853, "grad_norm": 0.35259202122688293, "learning_rate": 0.00019524037311647932, "loss": 0.0415, "step": 7805 }, { "epoch": 1.0940434477925718, "grad_norm": 0.3345377743244171, "learning_rate": 0.00019522602248265965, "loss": 0.0512, "step": 7806 }, { "epoch": 1.0941836019621585, "grad_norm": 0.11956437677145004, "learning_rate": 0.00019521167184883997, "loss": 0.0085, "step": 7807 }, { "epoch": 1.094323756131745, "grad_norm": 0.2903411388397217, "learning_rate": 0.0001951973212150203, "loss": 0.0612, "step": 7808 }, { "epoch": 1.0944639103013314, "grad_norm": 0.35868003964424133, "learning_rate": 0.00019518297058120066, "loss": 0.0244, "step": 7809 }, { "epoch": 1.094604064470918, "grad_norm": 0.19841048121452332, "learning_rate": 0.000195168619947381, "loss": 0.0485, "step": 7810 }, { "epoch": 1.0947442186405045, "grad_norm": 0.5561944842338562, "learning_rate": 0.00019515426931356132, "loss": 0.0864, "step": 7811 }, { "epoch": 1.0948843728100912, "grad_norm": 0.37782612442970276, "learning_rate": 0.00019513991867974167, "loss": 0.059, "step": 7812 }, { "epoch": 1.0950245269796777, "grad_norm": 0.33996427059173584, "learning_rate": 0.000195125568045922, "loss": 0.0857, "step": 7813 }, { "epoch": 1.0951646811492641, "grad_norm": 0.460025429725647, "learning_rate": 0.00019511121741210233, "loss": 0.0561, "step": 7814 }, { "epoch": 1.0953048353188508, "grad_norm": 0.11655977368354797, "learning_rate": 0.0001950968667782827, "loss": 0.0262, "step": 7815 }, { "epoch": 1.0954449894884373, "grad_norm": 0.2663809359073639, "learning_rate": 0.00019508251614446304, "loss": 0.0381, "step": 7816 }, { "epoch": 1.0955851436580237, "grad_norm": 0.48363324999809265, "learning_rate": 0.00019506816551064337, "loss": 0.0624, "step": 7817 }, { "epoch": 1.0957252978276104, "grad_norm": 1.2686469554901123, "learning_rate": 0.00019505381487682372, "loss": 0.0867, "step": 7818 }, { "epoch": 1.0958654519971969, "grad_norm": 0.3008817136287689, "learning_rate": 0.00019503946424300405, "loss": 0.0313, "step": 7819 }, { "epoch": 1.0960056061667836, "grad_norm": 0.6708664298057556, "learning_rate": 0.00019502511360918438, "loss": 0.1013, "step": 7820 }, { "epoch": 1.09614576033637, "grad_norm": 0.5214825868606567, "learning_rate": 0.00019501076297536474, "loss": 0.0689, "step": 7821 }, { "epoch": 1.0962859145059565, "grad_norm": 0.3507421314716339, "learning_rate": 0.00019499641234154507, "loss": 0.0655, "step": 7822 }, { "epoch": 1.0964260686755432, "grad_norm": 0.2852841317653656, "learning_rate": 0.0001949820617077254, "loss": 0.0591, "step": 7823 }, { "epoch": 1.0965662228451296, "grad_norm": 0.13078312575817108, "learning_rate": 0.00019496771107390578, "loss": 0.0128, "step": 7824 }, { "epoch": 1.096706377014716, "grad_norm": 0.43701568245887756, "learning_rate": 0.0001949533604400861, "loss": 0.0255, "step": 7825 }, { "epoch": 1.0968465311843028, "grad_norm": 0.7900677919387817, "learning_rate": 0.00019493900980626643, "loss": 0.1769, "step": 7826 }, { "epoch": 1.0969866853538892, "grad_norm": 0.0890909880399704, "learning_rate": 0.00019492465917244676, "loss": 0.0095, "step": 7827 }, { "epoch": 1.097126839523476, "grad_norm": 0.9859515428543091, "learning_rate": 0.00019491030853862712, "loss": 0.1583, "step": 7828 }, { "epoch": 1.0972669936930624, "grad_norm": 0.23981811106204987, "learning_rate": 0.00019489595790480745, "loss": 0.0145, "step": 7829 }, { "epoch": 1.0974071478626488, "grad_norm": 1.265547513961792, "learning_rate": 0.00019488160727098777, "loss": 0.2236, "step": 7830 }, { "epoch": 1.0975473020322355, "grad_norm": 0.7176455855369568, "learning_rate": 0.00019486725663716813, "loss": 0.0666, "step": 7831 }, { "epoch": 1.097687456201822, "grad_norm": 0.6914899349212646, "learning_rate": 0.00019485290600334846, "loss": 0.0834, "step": 7832 }, { "epoch": 1.0978276103714086, "grad_norm": 0.7753621935844421, "learning_rate": 0.0001948385553695288, "loss": 0.1039, "step": 7833 }, { "epoch": 1.097967764540995, "grad_norm": 0.7670572996139526, "learning_rate": 0.00019482420473570914, "loss": 0.0345, "step": 7834 }, { "epoch": 1.0981079187105816, "grad_norm": 0.8203732967376709, "learning_rate": 0.00019480985410188947, "loss": 0.0438, "step": 7835 }, { "epoch": 1.0982480728801682, "grad_norm": 0.294342577457428, "learning_rate": 0.0001947955034680698, "loss": 0.0478, "step": 7836 }, { "epoch": 1.0983882270497547, "grad_norm": 0.3223698139190674, "learning_rate": 0.00019478115283425018, "loss": 0.0473, "step": 7837 }, { "epoch": 1.0985283812193414, "grad_norm": 0.4793541729450226, "learning_rate": 0.0001947668022004305, "loss": 0.0868, "step": 7838 }, { "epoch": 1.0986685353889278, "grad_norm": 0.29572466015815735, "learning_rate": 0.00019475245156661084, "loss": 0.0739, "step": 7839 }, { "epoch": 1.0988086895585143, "grad_norm": 0.18741720914840698, "learning_rate": 0.0001947381009327912, "loss": 0.0351, "step": 7840 }, { "epoch": 1.098948843728101, "grad_norm": 0.3839675784111023, "learning_rate": 0.00019472375029897152, "loss": 0.0686, "step": 7841 }, { "epoch": 1.0990889978976874, "grad_norm": 0.29997843503952026, "learning_rate": 0.00019470939966515185, "loss": 0.0819, "step": 7842 }, { "epoch": 1.0992291520672741, "grad_norm": 0.3279254138469696, "learning_rate": 0.00019469504903133218, "loss": 0.046, "step": 7843 }, { "epoch": 1.0993693062368606, "grad_norm": 0.47016963362693787, "learning_rate": 0.00019468069839751254, "loss": 0.0352, "step": 7844 }, { "epoch": 1.099509460406447, "grad_norm": 0.2271425575017929, "learning_rate": 0.00019466634776369287, "loss": 0.0291, "step": 7845 }, { "epoch": 1.0996496145760337, "grad_norm": 0.1966121643781662, "learning_rate": 0.0001946519971298732, "loss": 0.0528, "step": 7846 }, { "epoch": 1.0997897687456202, "grad_norm": 0.168510302901268, "learning_rate": 0.00019463764649605358, "loss": 0.0165, "step": 7847 }, { "epoch": 1.0999299229152066, "grad_norm": 0.4008927047252655, "learning_rate": 0.0001946232958622339, "loss": 0.0504, "step": 7848 }, { "epoch": 1.1000700770847933, "grad_norm": 0.21007902920246124, "learning_rate": 0.00019460894522841423, "loss": 0.0661, "step": 7849 }, { "epoch": 1.1002102312543798, "grad_norm": 0.4335603713989258, "learning_rate": 0.0001945945945945946, "loss": 0.0645, "step": 7850 }, { "epoch": 1.1003503854239665, "grad_norm": 0.11960671842098236, "learning_rate": 0.00019458024396077492, "loss": 0.0088, "step": 7851 }, { "epoch": 1.100490539593553, "grad_norm": 0.2624993324279785, "learning_rate": 0.00019456589332695525, "loss": 0.0466, "step": 7852 }, { "epoch": 1.1006306937631394, "grad_norm": 0.26366522908210754, "learning_rate": 0.0001945515426931356, "loss": 0.0244, "step": 7853 }, { "epoch": 1.100770847932726, "grad_norm": 0.19341231882572174, "learning_rate": 0.00019453719205931593, "loss": 0.0441, "step": 7854 }, { "epoch": 1.1009110021023125, "grad_norm": 0.16881239414215088, "learning_rate": 0.00019452284142549626, "loss": 0.0164, "step": 7855 }, { "epoch": 1.101051156271899, "grad_norm": 0.13859426975250244, "learning_rate": 0.00019450849079167664, "loss": 0.0238, "step": 7856 }, { "epoch": 1.1011913104414857, "grad_norm": 0.4294334053993225, "learning_rate": 0.00019449414015785697, "loss": 0.1345, "step": 7857 }, { "epoch": 1.1013314646110721, "grad_norm": 0.8766242861747742, "learning_rate": 0.0001944797895240373, "loss": 0.0764, "step": 7858 }, { "epoch": 1.1014716187806588, "grad_norm": 0.3806171119213104, "learning_rate": 0.00019446543889021766, "loss": 0.057, "step": 7859 }, { "epoch": 1.1016117729502453, "grad_norm": 0.7184103727340698, "learning_rate": 0.00019445108825639798, "loss": 0.0152, "step": 7860 }, { "epoch": 1.1017519271198317, "grad_norm": 0.41738948225975037, "learning_rate": 0.0001944367376225783, "loss": 0.0465, "step": 7861 }, { "epoch": 1.1018920812894184, "grad_norm": 0.3431372046470642, "learning_rate": 0.00019442238698875864, "loss": 0.0433, "step": 7862 }, { "epoch": 1.1020322354590049, "grad_norm": 0.1914280205965042, "learning_rate": 0.000194408036354939, "loss": 0.024, "step": 7863 }, { "epoch": 1.1021723896285915, "grad_norm": 0.2511115074157715, "learning_rate": 0.00019439368572111933, "loss": 0.0319, "step": 7864 }, { "epoch": 1.102312543798178, "grad_norm": 0.7820152640342712, "learning_rate": 0.00019437933508729965, "loss": 0.0716, "step": 7865 }, { "epoch": 1.1024526979677645, "grad_norm": 0.6725873351097107, "learning_rate": 0.00019436498445348, "loss": 0.0646, "step": 7866 }, { "epoch": 1.1025928521373511, "grad_norm": 0.2968751788139343, "learning_rate": 0.00019435063381966034, "loss": 0.1052, "step": 7867 }, { "epoch": 1.1027330063069376, "grad_norm": 0.5666132569313049, "learning_rate": 0.00019433628318584067, "loss": 0.0496, "step": 7868 }, { "epoch": 1.1028731604765243, "grad_norm": 0.698685348033905, "learning_rate": 0.00019432193255202105, "loss": 0.0556, "step": 7869 }, { "epoch": 1.1030133146461107, "grad_norm": 0.3263324797153473, "learning_rate": 0.00019430758191820138, "loss": 0.0543, "step": 7870 }, { "epoch": 1.1031534688156972, "grad_norm": 0.36369356513023376, "learning_rate": 0.0001942932312843817, "loss": 0.0692, "step": 7871 }, { "epoch": 1.1032936229852839, "grad_norm": 0.2374008446931839, "learning_rate": 0.00019427888065056206, "loss": 0.022, "step": 7872 }, { "epoch": 1.1034337771548703, "grad_norm": 0.4797550439834595, "learning_rate": 0.0001942645300167424, "loss": 0.0587, "step": 7873 }, { "epoch": 1.1035739313244568, "grad_norm": 0.27886685729026794, "learning_rate": 0.00019425017938292272, "loss": 0.0417, "step": 7874 }, { "epoch": 1.1037140854940435, "grad_norm": 0.17206478118896484, "learning_rate": 0.00019423582874910308, "loss": 0.0143, "step": 7875 }, { "epoch": 1.10385423966363, "grad_norm": 0.15134984254837036, "learning_rate": 0.0001942214781152834, "loss": 0.0127, "step": 7876 }, { "epoch": 1.1039943938332166, "grad_norm": 0.6880931854248047, "learning_rate": 0.00019420712748146373, "loss": 0.0744, "step": 7877 }, { "epoch": 1.104134548002803, "grad_norm": 0.12138552963733673, "learning_rate": 0.00019419277684764406, "loss": 0.0127, "step": 7878 }, { "epoch": 1.1042747021723895, "grad_norm": 0.052256833761930466, "learning_rate": 0.00019417842621382444, "loss": 0.0037, "step": 7879 }, { "epoch": 1.1044148563419762, "grad_norm": 0.8468590974807739, "learning_rate": 0.00019416407558000477, "loss": 0.1146, "step": 7880 }, { "epoch": 1.1045550105115627, "grad_norm": 0.3638436496257782, "learning_rate": 0.0001941497249461851, "loss": 0.0854, "step": 7881 }, { "epoch": 1.1046951646811494, "grad_norm": 0.3517143428325653, "learning_rate": 0.00019413537431236546, "loss": 0.0199, "step": 7882 }, { "epoch": 1.1048353188507358, "grad_norm": 1.5876296758651733, "learning_rate": 0.00019412102367854578, "loss": 0.1463, "step": 7883 }, { "epoch": 1.1049754730203223, "grad_norm": 1.5338149070739746, "learning_rate": 0.0001941066730447261, "loss": 0.0748, "step": 7884 }, { "epoch": 1.105115627189909, "grad_norm": 3.5674824714660645, "learning_rate": 0.00019409232241090647, "loss": 0.3716, "step": 7885 }, { "epoch": 1.1052557813594954, "grad_norm": 0.20747163891792297, "learning_rate": 0.0001940779717770868, "loss": 0.0271, "step": 7886 }, { "epoch": 1.1053959355290819, "grad_norm": 0.28685444593429565, "learning_rate": 0.00019406362114326713, "loss": 0.0294, "step": 7887 }, { "epoch": 1.1055360896986686, "grad_norm": 0.2830279469490051, "learning_rate": 0.0001940492705094475, "loss": 0.0578, "step": 7888 }, { "epoch": 1.105676243868255, "grad_norm": 0.5834911465644836, "learning_rate": 0.00019403491987562784, "loss": 0.0955, "step": 7889 }, { "epoch": 1.1058163980378417, "grad_norm": 0.35689404606819153, "learning_rate": 0.00019402056924180817, "loss": 0.0254, "step": 7890 }, { "epoch": 1.1059565522074282, "grad_norm": 0.15401045978069305, "learning_rate": 0.00019400621860798852, "loss": 0.0338, "step": 7891 }, { "epoch": 1.1060967063770146, "grad_norm": 0.3949108123779297, "learning_rate": 0.00019399186797416885, "loss": 0.066, "step": 7892 }, { "epoch": 1.1062368605466013, "grad_norm": 0.5664955973625183, "learning_rate": 0.00019397751734034918, "loss": 0.0946, "step": 7893 }, { "epoch": 1.1063770147161878, "grad_norm": 0.1789037585258484, "learning_rate": 0.0001939631667065295, "loss": 0.043, "step": 7894 }, { "epoch": 1.1065171688857744, "grad_norm": 0.18390746414661407, "learning_rate": 0.00019394881607270986, "loss": 0.0346, "step": 7895 }, { "epoch": 1.106657323055361, "grad_norm": 0.8283275365829468, "learning_rate": 0.0001939344654388902, "loss": 0.1155, "step": 7896 }, { "epoch": 1.1067974772249474, "grad_norm": 0.17174625396728516, "learning_rate": 0.00019392011480507052, "loss": 0.024, "step": 7897 }, { "epoch": 1.106937631394534, "grad_norm": 0.7722519636154175, "learning_rate": 0.00019390576417125088, "loss": 0.0368, "step": 7898 }, { "epoch": 1.1070777855641205, "grad_norm": 0.5638456344604492, "learning_rate": 0.0001938914135374312, "loss": 0.0785, "step": 7899 }, { "epoch": 1.1072179397337072, "grad_norm": 0.2936611473560333, "learning_rate": 0.00019387706290361153, "loss": 0.0744, "step": 7900 }, { "epoch": 1.1073580939032936, "grad_norm": 0.23863860964775085, "learning_rate": 0.00019386271226979192, "loss": 0.0716, "step": 7901 }, { "epoch": 1.10749824807288, "grad_norm": 0.8755089044570923, "learning_rate": 0.00019384836163597224, "loss": 0.1022, "step": 7902 }, { "epoch": 1.1076384022424668, "grad_norm": 0.2535272538661957, "learning_rate": 0.00019383401100215257, "loss": 0.0487, "step": 7903 }, { "epoch": 1.1077785564120533, "grad_norm": 0.23830698430538177, "learning_rate": 0.00019381966036833293, "loss": 0.023, "step": 7904 }, { "epoch": 1.1079187105816397, "grad_norm": 0.37590092420578003, "learning_rate": 0.00019380530973451326, "loss": 0.0313, "step": 7905 }, { "epoch": 1.1080588647512264, "grad_norm": 0.509615957736969, "learning_rate": 0.00019379095910069359, "loss": 0.0831, "step": 7906 }, { "epoch": 1.1081990189208129, "grad_norm": 0.6688640117645264, "learning_rate": 0.00019377660846687394, "loss": 0.1068, "step": 7907 }, { "epoch": 1.1083391730903995, "grad_norm": 0.45249491930007935, "learning_rate": 0.00019376225783305427, "loss": 0.0703, "step": 7908 }, { "epoch": 1.108479327259986, "grad_norm": 0.4076036810874939, "learning_rate": 0.0001937479071992346, "loss": 0.0682, "step": 7909 }, { "epoch": 1.1086194814295725, "grad_norm": 0.30717846751213074, "learning_rate": 0.00019373355656541498, "loss": 0.0207, "step": 7910 }, { "epoch": 1.1087596355991591, "grad_norm": 0.638920783996582, "learning_rate": 0.0001937192059315953, "loss": 0.0714, "step": 7911 }, { "epoch": 1.1088997897687456, "grad_norm": 0.2553742527961731, "learning_rate": 0.00019370485529777564, "loss": 0.0499, "step": 7912 }, { "epoch": 1.109039943938332, "grad_norm": 0.3501196801662445, "learning_rate": 0.00019369050466395597, "loss": 0.0474, "step": 7913 }, { "epoch": 1.1091800981079187, "grad_norm": 0.2916560769081116, "learning_rate": 0.00019367615403013632, "loss": 0.0264, "step": 7914 }, { "epoch": 1.1093202522775052, "grad_norm": 0.5131843686103821, "learning_rate": 0.00019366180339631665, "loss": 0.0734, "step": 7915 }, { "epoch": 1.1094604064470919, "grad_norm": 0.5559123754501343, "learning_rate": 0.00019364745276249698, "loss": 0.0641, "step": 7916 }, { "epoch": 1.1096005606166783, "grad_norm": 0.29364800453186035, "learning_rate": 0.00019363310212867734, "loss": 0.0599, "step": 7917 }, { "epoch": 1.1097407147862648, "grad_norm": 0.3228829801082611, "learning_rate": 0.00019361875149485766, "loss": 0.0461, "step": 7918 }, { "epoch": 1.1098808689558515, "grad_norm": 0.17589092254638672, "learning_rate": 0.000193604400861038, "loss": 0.0328, "step": 7919 }, { "epoch": 1.110021023125438, "grad_norm": 0.500698447227478, "learning_rate": 0.00019359005022721838, "loss": 0.0575, "step": 7920 }, { "epoch": 1.1101611772950246, "grad_norm": 0.17207537591457367, "learning_rate": 0.0001935756995933987, "loss": 0.0301, "step": 7921 }, { "epoch": 1.110301331464611, "grad_norm": 0.3140786588191986, "learning_rate": 0.00019356134895957903, "loss": 0.035, "step": 7922 }, { "epoch": 1.1104414856341975, "grad_norm": 0.12867513298988342, "learning_rate": 0.0001935469983257594, "loss": 0.0225, "step": 7923 }, { "epoch": 1.1105816398037842, "grad_norm": 0.2635933756828308, "learning_rate": 0.00019353264769193972, "loss": 0.0463, "step": 7924 }, { "epoch": 1.1107217939733707, "grad_norm": 0.24610888957977295, "learning_rate": 0.00019351829705812005, "loss": 0.0354, "step": 7925 }, { "epoch": 1.1108619481429574, "grad_norm": 0.3536587357521057, "learning_rate": 0.0001935039464243004, "loss": 0.0819, "step": 7926 }, { "epoch": 1.1110021023125438, "grad_norm": 1.0215599536895752, "learning_rate": 0.00019348959579048073, "loss": 0.0996, "step": 7927 }, { "epoch": 1.1111422564821303, "grad_norm": 0.6937496066093445, "learning_rate": 0.00019347524515666106, "loss": 0.1081, "step": 7928 }, { "epoch": 1.111282410651717, "grad_norm": 0.6525269746780396, "learning_rate": 0.0001934608945228414, "loss": 0.0953, "step": 7929 }, { "epoch": 1.1114225648213034, "grad_norm": 0.6573078632354736, "learning_rate": 0.00019344654388902174, "loss": 0.0793, "step": 7930 }, { "epoch": 1.1115627189908899, "grad_norm": 0.887570858001709, "learning_rate": 0.00019343219325520207, "loss": 0.158, "step": 7931 }, { "epoch": 1.1117028731604766, "grad_norm": 0.6558364033699036, "learning_rate": 0.0001934178426213824, "loss": 0.0635, "step": 7932 }, { "epoch": 1.111843027330063, "grad_norm": 0.5542738437652588, "learning_rate": 0.00019340349198756278, "loss": 0.0804, "step": 7933 }, { "epoch": 1.1119831814996497, "grad_norm": 4.551071643829346, "learning_rate": 0.0001933891413537431, "loss": 0.1388, "step": 7934 }, { "epoch": 1.1121233356692362, "grad_norm": 0.28740426898002625, "learning_rate": 0.00019337479071992344, "loss": 0.0198, "step": 7935 }, { "epoch": 1.1122634898388226, "grad_norm": 0.18654921650886536, "learning_rate": 0.0001933604400861038, "loss": 0.0505, "step": 7936 }, { "epoch": 1.1124036440084093, "grad_norm": 0.22372834384441376, "learning_rate": 0.00019334608945228412, "loss": 0.0326, "step": 7937 }, { "epoch": 1.1125437981779958, "grad_norm": 0.45720720291137695, "learning_rate": 0.00019333173881846445, "loss": 0.1098, "step": 7938 }, { "epoch": 1.1126839523475824, "grad_norm": 0.454596608877182, "learning_rate": 0.0001933173881846448, "loss": 0.0723, "step": 7939 }, { "epoch": 1.112824106517169, "grad_norm": 0.4282210171222687, "learning_rate": 0.00019330303755082514, "loss": 0.1123, "step": 7940 }, { "epoch": 1.1129642606867554, "grad_norm": 0.16677014529705048, "learning_rate": 0.00019328868691700546, "loss": 0.0358, "step": 7941 }, { "epoch": 1.113104414856342, "grad_norm": 0.23977968096733093, "learning_rate": 0.00019327433628318585, "loss": 0.0483, "step": 7942 }, { "epoch": 1.1132445690259285, "grad_norm": 0.13637958467006683, "learning_rate": 0.00019325998564936618, "loss": 0.0165, "step": 7943 }, { "epoch": 1.113384723195515, "grad_norm": 0.29663246870040894, "learning_rate": 0.0001932456350155465, "loss": 0.0327, "step": 7944 }, { "epoch": 1.1135248773651016, "grad_norm": 0.1970030665397644, "learning_rate": 0.00019323128438172686, "loss": 0.0436, "step": 7945 }, { "epoch": 1.113665031534688, "grad_norm": 0.3472856283187866, "learning_rate": 0.0001932169337479072, "loss": 0.0412, "step": 7946 }, { "epoch": 1.1138051857042748, "grad_norm": 0.18037283420562744, "learning_rate": 0.00019320258311408752, "loss": 0.047, "step": 7947 }, { "epoch": 1.1139453398738612, "grad_norm": 0.275040864944458, "learning_rate": 0.00019318823248026785, "loss": 0.0368, "step": 7948 }, { "epoch": 1.1140854940434477, "grad_norm": 0.30739516019821167, "learning_rate": 0.0001931738818464482, "loss": 0.0384, "step": 7949 }, { "epoch": 1.1142256482130344, "grad_norm": 0.5042182803153992, "learning_rate": 0.00019315953121262853, "loss": 0.1226, "step": 7950 }, { "epoch": 1.1143658023826208, "grad_norm": 0.22179976105690002, "learning_rate": 0.00019314518057880886, "loss": 0.0448, "step": 7951 }, { "epoch": 1.1145059565522075, "grad_norm": 0.5325523018836975, "learning_rate": 0.00019313082994498924, "loss": 0.0851, "step": 7952 }, { "epoch": 1.114646110721794, "grad_norm": 0.36900416016578674, "learning_rate": 0.00019311647931116957, "loss": 0.0827, "step": 7953 }, { "epoch": 1.1147862648913804, "grad_norm": 0.406634658575058, "learning_rate": 0.0001931021286773499, "loss": 0.0681, "step": 7954 }, { "epoch": 1.1149264190609671, "grad_norm": 0.4363675117492676, "learning_rate": 0.00019308777804353025, "loss": 0.0941, "step": 7955 }, { "epoch": 1.1150665732305536, "grad_norm": 0.29236674308776855, "learning_rate": 0.00019307342740971058, "loss": 0.0554, "step": 7956 }, { "epoch": 1.1152067274001403, "grad_norm": 0.1862139105796814, "learning_rate": 0.0001930590767758909, "loss": 0.0119, "step": 7957 }, { "epoch": 1.1153468815697267, "grad_norm": 0.31448566913604736, "learning_rate": 0.00019304472614207127, "loss": 0.0381, "step": 7958 }, { "epoch": 1.1154870357393132, "grad_norm": 0.47054511308670044, "learning_rate": 0.0001930303755082516, "loss": 0.0672, "step": 7959 }, { "epoch": 1.1156271899088999, "grad_norm": 0.35249945521354675, "learning_rate": 0.00019301602487443192, "loss": 0.0507, "step": 7960 }, { "epoch": 1.1157673440784863, "grad_norm": 0.44620150327682495, "learning_rate": 0.00019300167424061228, "loss": 0.1127, "step": 7961 }, { "epoch": 1.1159074982480728, "grad_norm": 0.2217017561197281, "learning_rate": 0.0001929873236067926, "loss": 0.0537, "step": 7962 }, { "epoch": 1.1160476524176595, "grad_norm": 0.6119962930679321, "learning_rate": 0.00019297297297297294, "loss": 0.0887, "step": 7963 }, { "epoch": 1.116187806587246, "grad_norm": 0.8834139108657837, "learning_rate": 0.0001929586223391533, "loss": 0.097, "step": 7964 }, { "epoch": 1.1163279607568326, "grad_norm": 0.3024270832538605, "learning_rate": 0.00019294427170533365, "loss": 0.0527, "step": 7965 }, { "epoch": 1.116468114926419, "grad_norm": 0.14427095651626587, "learning_rate": 0.00019292992107151398, "loss": 0.038, "step": 7966 }, { "epoch": 1.1166082690960055, "grad_norm": 0.17836223542690277, "learning_rate": 0.0001929155704376943, "loss": 0.0412, "step": 7967 }, { "epoch": 1.1167484232655922, "grad_norm": 0.19944356381893158, "learning_rate": 0.00019290121980387466, "loss": 0.0431, "step": 7968 }, { "epoch": 1.1168885774351787, "grad_norm": 0.34181687235832214, "learning_rate": 0.000192886869170055, "loss": 0.0626, "step": 7969 }, { "epoch": 1.1170287316047651, "grad_norm": 0.27578863501548767, "learning_rate": 0.00019287251853623532, "loss": 0.0718, "step": 7970 }, { "epoch": 1.1171688857743518, "grad_norm": 0.4260280728340149, "learning_rate": 0.00019285816790241567, "loss": 0.1472, "step": 7971 }, { "epoch": 1.1173090399439383, "grad_norm": 0.9106219410896301, "learning_rate": 0.000192843817268596, "loss": 0.1447, "step": 7972 }, { "epoch": 1.117449194113525, "grad_norm": 0.27554360032081604, "learning_rate": 0.00019282946663477633, "loss": 0.0625, "step": 7973 }, { "epoch": 1.1175893482831114, "grad_norm": 0.1750875562429428, "learning_rate": 0.00019281511600095671, "loss": 0.0558, "step": 7974 }, { "epoch": 1.1177295024526979, "grad_norm": 0.24339143931865692, "learning_rate": 0.00019280076536713704, "loss": 0.0211, "step": 7975 }, { "epoch": 1.1178696566222845, "grad_norm": 0.4519350826740265, "learning_rate": 0.00019278641473331737, "loss": 0.0541, "step": 7976 }, { "epoch": 1.118009810791871, "grad_norm": 0.7721365094184875, "learning_rate": 0.00019277206409949773, "loss": 0.0833, "step": 7977 }, { "epoch": 1.1181499649614577, "grad_norm": 0.7545231580734253, "learning_rate": 0.00019275771346567806, "loss": 0.1039, "step": 7978 }, { "epoch": 1.1182901191310441, "grad_norm": 0.9306256771087646, "learning_rate": 0.00019274336283185838, "loss": 0.0362, "step": 7979 }, { "epoch": 1.1184302733006306, "grad_norm": 1.333509087562561, "learning_rate": 0.00019272901219803874, "loss": 0.2061, "step": 7980 }, { "epoch": 1.1185704274702173, "grad_norm": 0.5061262845993042, "learning_rate": 0.00019271466156421907, "loss": 0.0393, "step": 7981 }, { "epoch": 1.1187105816398037, "grad_norm": 0.589859127998352, "learning_rate": 0.0001927003109303994, "loss": 0.0785, "step": 7982 }, { "epoch": 1.1188507358093904, "grad_norm": 0.5165272355079651, "learning_rate": 0.00019268596029657973, "loss": 0.0717, "step": 7983 }, { "epoch": 1.1189908899789769, "grad_norm": 0.7961622476577759, "learning_rate": 0.0001926716096627601, "loss": 0.0349, "step": 7984 }, { "epoch": 1.1191310441485633, "grad_norm": 3.2725977897644043, "learning_rate": 0.00019265725902894044, "loss": 0.2943, "step": 7985 }, { "epoch": 1.11927119831815, "grad_norm": 0.44366204738616943, "learning_rate": 0.00019264290839512076, "loss": 0.0958, "step": 7986 }, { "epoch": 1.1194113524877365, "grad_norm": 0.5367948412895203, "learning_rate": 0.00019262855776130112, "loss": 0.0859, "step": 7987 }, { "epoch": 1.1195515066573232, "grad_norm": 0.18503437936306, "learning_rate": 0.00019261420712748145, "loss": 0.0378, "step": 7988 }, { "epoch": 1.1196916608269096, "grad_norm": 0.2635434567928314, "learning_rate": 0.00019259985649366178, "loss": 0.044, "step": 7989 }, { "epoch": 1.119831814996496, "grad_norm": 0.5049416422843933, "learning_rate": 0.00019258550585984213, "loss": 0.071, "step": 7990 }, { "epoch": 1.1199719691660828, "grad_norm": 0.5209174156188965, "learning_rate": 0.00019257115522602246, "loss": 0.0555, "step": 7991 }, { "epoch": 1.1201121233356692, "grad_norm": 0.2113497406244278, "learning_rate": 0.0001925568045922028, "loss": 0.0416, "step": 7992 }, { "epoch": 1.1202522775052557, "grad_norm": 0.28367385268211365, "learning_rate": 0.00019254245395838315, "loss": 0.0612, "step": 7993 }, { "epoch": 1.1203924316748424, "grad_norm": 0.40710994601249695, "learning_rate": 0.00019252810332456347, "loss": 0.0761, "step": 7994 }, { "epoch": 1.1205325858444288, "grad_norm": 0.15141141414642334, "learning_rate": 0.0001925137526907438, "loss": 0.0289, "step": 7995 }, { "epoch": 1.1206727400140155, "grad_norm": 0.45504826307296753, "learning_rate": 0.00019249940205692419, "loss": 0.103, "step": 7996 }, { "epoch": 1.120812894183602, "grad_norm": 0.17744044959545135, "learning_rate": 0.00019248505142310451, "loss": 0.0408, "step": 7997 }, { "epoch": 1.1209530483531884, "grad_norm": 0.5326001644134521, "learning_rate": 0.00019247070078928484, "loss": 0.0352, "step": 7998 }, { "epoch": 1.1210932025227751, "grad_norm": 0.3257143795490265, "learning_rate": 0.00019245635015546517, "loss": 0.0911, "step": 7999 }, { "epoch": 1.1212333566923616, "grad_norm": 0.46379753947257996, "learning_rate": 0.00019244199952164553, "loss": 0.0607, "step": 8000 }, { "epoch": 1.121373510861948, "grad_norm": 0.2637377083301544, "learning_rate": 0.00019242764888782586, "loss": 0.0261, "step": 8001 }, { "epoch": 1.1215136650315347, "grad_norm": 0.32528358697891235, "learning_rate": 0.00019241329825400618, "loss": 0.0957, "step": 8002 }, { "epoch": 1.1216538192011212, "grad_norm": 0.44165828824043274, "learning_rate": 0.00019239894762018654, "loss": 0.051, "step": 8003 }, { "epoch": 1.1217939733707079, "grad_norm": 0.1660914570093155, "learning_rate": 0.00019238459698636687, "loss": 0.043, "step": 8004 }, { "epoch": 1.1219341275402943, "grad_norm": 0.48632216453552246, "learning_rate": 0.0001923702463525472, "loss": 0.113, "step": 8005 }, { "epoch": 1.1220742817098808, "grad_norm": 0.2467777580022812, "learning_rate": 0.00019235589571872758, "loss": 0.0516, "step": 8006 }, { "epoch": 1.1222144358794675, "grad_norm": 0.24975550174713135, "learning_rate": 0.0001923415450849079, "loss": 0.0632, "step": 8007 }, { "epoch": 1.122354590049054, "grad_norm": 0.6065748929977417, "learning_rate": 0.00019232719445108824, "loss": 0.0513, "step": 8008 }, { "epoch": 1.1224947442186406, "grad_norm": 0.44722145795822144, "learning_rate": 0.0001923128438172686, "loss": 0.0666, "step": 8009 }, { "epoch": 1.122634898388227, "grad_norm": 0.509182333946228, "learning_rate": 0.00019229849318344892, "loss": 0.0876, "step": 8010 }, { "epoch": 1.1227750525578135, "grad_norm": 0.38631001114845276, "learning_rate": 0.00019228414254962925, "loss": 0.0518, "step": 8011 }, { "epoch": 1.1229152067274002, "grad_norm": 0.20089133083820343, "learning_rate": 0.0001922697919158096, "loss": 0.0443, "step": 8012 }, { "epoch": 1.1230553608969867, "grad_norm": 0.22899366915225983, "learning_rate": 0.00019225544128198993, "loss": 0.0357, "step": 8013 }, { "epoch": 1.1231955150665733, "grad_norm": 0.5111515522003174, "learning_rate": 0.00019224109064817026, "loss": 0.0426, "step": 8014 }, { "epoch": 1.1233356692361598, "grad_norm": 0.42809003591537476, "learning_rate": 0.00019222674001435065, "loss": 0.0551, "step": 8015 }, { "epoch": 1.1234758234057463, "grad_norm": 0.17355439066886902, "learning_rate": 0.00019221238938053097, "loss": 0.0069, "step": 8016 }, { "epoch": 1.123615977575333, "grad_norm": 0.3530888259410858, "learning_rate": 0.0001921980387467113, "loss": 0.0597, "step": 8017 }, { "epoch": 1.1237561317449194, "grad_norm": 0.781139075756073, "learning_rate": 0.00019218368811289163, "loss": 0.0634, "step": 8018 }, { "epoch": 1.1238962859145059, "grad_norm": 0.14461249113082886, "learning_rate": 0.000192169337479072, "loss": 0.018, "step": 8019 }, { "epoch": 1.1240364400840925, "grad_norm": 0.5108649730682373, "learning_rate": 0.00019215498684525232, "loss": 0.066, "step": 8020 }, { "epoch": 1.124176594253679, "grad_norm": 0.2528609037399292, "learning_rate": 0.00019214063621143264, "loss": 0.0335, "step": 8021 }, { "epoch": 1.1243167484232657, "grad_norm": 0.6112862229347229, "learning_rate": 0.000192126285577613, "loss": 0.1046, "step": 8022 }, { "epoch": 1.1244569025928521, "grad_norm": 0.8291991353034973, "learning_rate": 0.00019211193494379333, "loss": 0.1284, "step": 8023 }, { "epoch": 1.1245970567624386, "grad_norm": 0.23615464568138123, "learning_rate": 0.00019209758430997366, "loss": 0.0133, "step": 8024 }, { "epoch": 1.1247372109320253, "grad_norm": 0.3060218393802643, "learning_rate": 0.000192083233676154, "loss": 0.0492, "step": 8025 }, { "epoch": 1.1248773651016117, "grad_norm": 0.657585859298706, "learning_rate": 0.00019206888304233434, "loss": 0.0412, "step": 8026 }, { "epoch": 1.1250175192711982, "grad_norm": 0.2563890218734741, "learning_rate": 0.00019205453240851467, "loss": 0.1012, "step": 8027 }, { "epoch": 1.1251576734407849, "grad_norm": 0.14099502563476562, "learning_rate": 0.00019204018177469505, "loss": 0.0127, "step": 8028 }, { "epoch": 1.1252978276103713, "grad_norm": 0.21189579367637634, "learning_rate": 0.00019202583114087538, "loss": 0.0688, "step": 8029 }, { "epoch": 1.125437981779958, "grad_norm": 0.4861416220664978, "learning_rate": 0.0001920114805070557, "loss": 0.0504, "step": 8030 }, { "epoch": 1.1255781359495445, "grad_norm": 2.024507522583008, "learning_rate": 0.00019199712987323607, "loss": 0.2907, "step": 8031 }, { "epoch": 1.125718290119131, "grad_norm": 0.5642987489700317, "learning_rate": 0.0001919827792394164, "loss": 0.0506, "step": 8032 }, { "epoch": 1.1258584442887176, "grad_norm": 0.8888179063796997, "learning_rate": 0.00019196842860559672, "loss": 0.0248, "step": 8033 }, { "epoch": 1.125998598458304, "grad_norm": 1.257251262664795, "learning_rate": 0.00019195407797177705, "loss": 0.1059, "step": 8034 }, { "epoch": 1.1261387526278908, "grad_norm": 1.0725864171981812, "learning_rate": 0.0001919397273379574, "loss": 0.043, "step": 8035 }, { "epoch": 1.1262789067974772, "grad_norm": 0.442417711019516, "learning_rate": 0.00019192537670413774, "loss": 0.0393, "step": 8036 }, { "epoch": 1.1264190609670637, "grad_norm": 0.3742845058441162, "learning_rate": 0.00019191102607031806, "loss": 0.0757, "step": 8037 }, { "epoch": 1.1265592151366504, "grad_norm": 0.08734431862831116, "learning_rate": 0.00019189667543649845, "loss": 0.0102, "step": 8038 }, { "epoch": 1.1266993693062368, "grad_norm": 0.22829271852970123, "learning_rate": 0.00019188232480267877, "loss": 0.0508, "step": 8039 }, { "epoch": 1.1268395234758235, "grad_norm": 0.19670654833316803, "learning_rate": 0.0001918679741688591, "loss": 0.0405, "step": 8040 }, { "epoch": 1.12697967764541, "grad_norm": 0.5248310565948486, "learning_rate": 0.00019185362353503946, "loss": 0.0467, "step": 8041 }, { "epoch": 1.1271198318149964, "grad_norm": 0.29423487186431885, "learning_rate": 0.0001918392729012198, "loss": 0.0381, "step": 8042 }, { "epoch": 1.127259985984583, "grad_norm": 1.0417219400405884, "learning_rate": 0.00019182492226740012, "loss": 0.1391, "step": 8043 }, { "epoch": 1.1274001401541696, "grad_norm": 0.16028207540512085, "learning_rate": 0.00019181057163358047, "loss": 0.0337, "step": 8044 }, { "epoch": 1.1275402943237562, "grad_norm": 0.5439640879631042, "learning_rate": 0.0001917962209997608, "loss": 0.0855, "step": 8045 }, { "epoch": 1.1276804484933427, "grad_norm": 0.16845403611660004, "learning_rate": 0.00019178187036594113, "loss": 0.0128, "step": 8046 }, { "epoch": 1.1278206026629292, "grad_norm": 0.36725983023643494, "learning_rate": 0.0001917675197321215, "loss": 0.1029, "step": 8047 }, { "epoch": 1.1279607568325158, "grad_norm": 1.0634560585021973, "learning_rate": 0.00019175316909830184, "loss": 0.0718, "step": 8048 }, { "epoch": 1.1281009110021023, "grad_norm": 0.21387803554534912, "learning_rate": 0.00019173881846448217, "loss": 0.035, "step": 8049 }, { "epoch": 1.1282410651716888, "grad_norm": 0.19767367839813232, "learning_rate": 0.00019172446783066252, "loss": 0.0196, "step": 8050 }, { "epoch": 1.1283812193412754, "grad_norm": 1.1467914581298828, "learning_rate": 0.00019171011719684285, "loss": 0.08, "step": 8051 }, { "epoch": 1.128521373510862, "grad_norm": 0.3003193140029907, "learning_rate": 0.00019169576656302318, "loss": 0.0917, "step": 8052 }, { "epoch": 1.1286615276804486, "grad_norm": 0.225721538066864, "learning_rate": 0.0001916814159292035, "loss": 0.0278, "step": 8053 }, { "epoch": 1.128801681850035, "grad_norm": 0.23106655478477478, "learning_rate": 0.00019166706529538387, "loss": 0.0658, "step": 8054 }, { "epoch": 1.1289418360196215, "grad_norm": 0.25449538230895996, "learning_rate": 0.0001916527146615642, "loss": 0.0753, "step": 8055 }, { "epoch": 1.1290819901892082, "grad_norm": 0.15561680495738983, "learning_rate": 0.00019163836402774452, "loss": 0.0262, "step": 8056 }, { "epoch": 1.1292221443587946, "grad_norm": 0.652627170085907, "learning_rate": 0.00019162401339392488, "loss": 0.0833, "step": 8057 }, { "epoch": 1.129362298528381, "grad_norm": 0.3154574930667877, "learning_rate": 0.0001916096627601052, "loss": 0.0731, "step": 8058 }, { "epoch": 1.1295024526979678, "grad_norm": 0.24196955561637878, "learning_rate": 0.00019159531212628556, "loss": 0.0379, "step": 8059 }, { "epoch": 1.1296426068675542, "grad_norm": 0.17953385412693024, "learning_rate": 0.00019158096149246592, "loss": 0.0229, "step": 8060 }, { "epoch": 1.129782761037141, "grad_norm": 0.18981239199638367, "learning_rate": 0.00019156661085864625, "loss": 0.0461, "step": 8061 }, { "epoch": 1.1299229152067274, "grad_norm": 0.2708427608013153, "learning_rate": 0.00019155226022482658, "loss": 0.0288, "step": 8062 }, { "epoch": 1.1300630693763138, "grad_norm": 0.31338393688201904, "learning_rate": 0.00019153790959100693, "loss": 0.0517, "step": 8063 }, { "epoch": 1.1302032235459005, "grad_norm": 0.5928025841712952, "learning_rate": 0.00019152355895718726, "loss": 0.0772, "step": 8064 }, { "epoch": 1.130343377715487, "grad_norm": 0.23111417889595032, "learning_rate": 0.0001915092083233676, "loss": 0.0423, "step": 8065 }, { "epoch": 1.1304835318850737, "grad_norm": 0.2650432586669922, "learning_rate": 0.00019149485768954794, "loss": 0.0617, "step": 8066 }, { "epoch": 1.1306236860546601, "grad_norm": 0.15775763988494873, "learning_rate": 0.00019148050705572827, "loss": 0.0336, "step": 8067 }, { "epoch": 1.1307638402242466, "grad_norm": 0.09262767434120178, "learning_rate": 0.0001914661564219086, "loss": 0.0148, "step": 8068 }, { "epoch": 1.1309039943938333, "grad_norm": 0.2704620957374573, "learning_rate": 0.00019145180578808893, "loss": 0.0375, "step": 8069 }, { "epoch": 1.1310441485634197, "grad_norm": 0.8019313216209412, "learning_rate": 0.0001914374551542693, "loss": 0.0685, "step": 8070 }, { "epoch": 1.1311843027330064, "grad_norm": 0.30213186144828796, "learning_rate": 0.00019142310452044964, "loss": 0.0569, "step": 8071 }, { "epoch": 1.1313244569025929, "grad_norm": 0.226015105843544, "learning_rate": 0.00019140875388662997, "loss": 0.059, "step": 8072 }, { "epoch": 1.1314646110721793, "grad_norm": 0.395021915435791, "learning_rate": 0.00019139440325281033, "loss": 0.0757, "step": 8073 }, { "epoch": 1.131604765241766, "grad_norm": 0.41723376512527466, "learning_rate": 0.00019138005261899065, "loss": 0.0577, "step": 8074 }, { "epoch": 1.1317449194113525, "grad_norm": 0.334320992231369, "learning_rate": 0.00019136570198517098, "loss": 0.0782, "step": 8075 }, { "epoch": 1.1318850735809391, "grad_norm": 2.3480494022369385, "learning_rate": 0.00019135135135135134, "loss": 0.0793, "step": 8076 }, { "epoch": 1.1320252277505256, "grad_norm": 0.09011318534612656, "learning_rate": 0.00019133700071753167, "loss": 0.0053, "step": 8077 }, { "epoch": 1.132165381920112, "grad_norm": 0.6127850413322449, "learning_rate": 0.000191322650083712, "loss": 0.0646, "step": 8078 }, { "epoch": 1.1323055360896987, "grad_norm": 0.39642518758773804, "learning_rate": 0.00019130829944989238, "loss": 0.0551, "step": 8079 }, { "epoch": 1.1324456902592852, "grad_norm": 0.4842112362384796, "learning_rate": 0.0001912939488160727, "loss": 0.0541, "step": 8080 }, { "epoch": 1.1325858444288717, "grad_norm": 0.46259811520576477, "learning_rate": 0.00019127959818225304, "loss": 0.0708, "step": 8081 }, { "epoch": 1.1327259985984584, "grad_norm": 0.3893522024154663, "learning_rate": 0.0001912652475484334, "loss": 0.048, "step": 8082 }, { "epoch": 1.1328661527680448, "grad_norm": 0.1922399252653122, "learning_rate": 0.00019125089691461372, "loss": 0.0164, "step": 8083 }, { "epoch": 1.1330063069376315, "grad_norm": 0.7262095212936401, "learning_rate": 0.00019123654628079405, "loss": 0.0681, "step": 8084 }, { "epoch": 1.133146461107218, "grad_norm": 0.9022505283355713, "learning_rate": 0.0001912221956469744, "loss": 0.0686, "step": 8085 }, { "epoch": 1.1332866152768044, "grad_norm": 0.26339420676231384, "learning_rate": 0.00019120784501315473, "loss": 0.0581, "step": 8086 }, { "epoch": 1.133426769446391, "grad_norm": 0.32881686091423035, "learning_rate": 0.00019119349437933506, "loss": 0.0758, "step": 8087 }, { "epoch": 1.1335669236159776, "grad_norm": 0.2719397246837616, "learning_rate": 0.0001911791437455154, "loss": 0.0706, "step": 8088 }, { "epoch": 1.133707077785564, "grad_norm": 0.5079146027565002, "learning_rate": 0.00019116479311169575, "loss": 0.0606, "step": 8089 }, { "epoch": 1.1338472319551507, "grad_norm": 0.26316285133361816, "learning_rate": 0.00019115044247787607, "loss": 0.0551, "step": 8090 }, { "epoch": 1.1339873861247372, "grad_norm": 0.4014352560043335, "learning_rate": 0.00019113609184405643, "loss": 0.0522, "step": 8091 }, { "epoch": 1.1341275402943238, "grad_norm": 0.23115786910057068, "learning_rate": 0.00019112174121023678, "loss": 0.0373, "step": 8092 }, { "epoch": 1.1342676944639103, "grad_norm": 0.503904402256012, "learning_rate": 0.0001911073905764171, "loss": 0.0575, "step": 8093 }, { "epoch": 1.1344078486334968, "grad_norm": 0.47989892959594727, "learning_rate": 0.00019109303994259744, "loss": 0.1067, "step": 8094 }, { "epoch": 1.1345480028030834, "grad_norm": 0.481784850358963, "learning_rate": 0.0001910786893087778, "loss": 0.0952, "step": 8095 }, { "epoch": 1.13468815697267, "grad_norm": 0.4192765951156616, "learning_rate": 0.00019106433867495813, "loss": 0.0654, "step": 8096 }, { "epoch": 1.1348283111422566, "grad_norm": 0.6675271987915039, "learning_rate": 0.00019104998804113845, "loss": 0.0704, "step": 8097 }, { "epoch": 1.134968465311843, "grad_norm": 0.412428081035614, "learning_rate": 0.0001910356374073188, "loss": 0.0439, "step": 8098 }, { "epoch": 1.1351086194814295, "grad_norm": 0.37966421246528625, "learning_rate": 0.00019102128677349914, "loss": 0.0687, "step": 8099 }, { "epoch": 1.1352487736510162, "grad_norm": 0.1752105951309204, "learning_rate": 0.00019100693613967947, "loss": 0.0225, "step": 8100 }, { "epoch": 1.1353889278206026, "grad_norm": 0.5338507294654846, "learning_rate": 0.00019099258550585985, "loss": 0.1215, "step": 8101 }, { "epoch": 1.1355290819901893, "grad_norm": 0.21000151336193085, "learning_rate": 0.00019097823487204018, "loss": 0.0234, "step": 8102 }, { "epoch": 1.1356692361597758, "grad_norm": 0.27362069487571716, "learning_rate": 0.0001909638842382205, "loss": 0.0452, "step": 8103 }, { "epoch": 1.1358093903293622, "grad_norm": 0.2722977101802826, "learning_rate": 0.00019094953360440084, "loss": 0.0671, "step": 8104 }, { "epoch": 1.135949544498949, "grad_norm": 0.27142515778541565, "learning_rate": 0.0001909351829705812, "loss": 0.0696, "step": 8105 }, { "epoch": 1.1360896986685354, "grad_norm": 0.22372335195541382, "learning_rate": 0.00019092083233676152, "loss": 0.0917, "step": 8106 }, { "epoch": 1.136229852838122, "grad_norm": 0.4673496186733246, "learning_rate": 0.00019090648170294185, "loss": 0.033, "step": 8107 }, { "epoch": 1.1363700070077085, "grad_norm": 0.5011157989501953, "learning_rate": 0.0001908921310691222, "loss": 0.1164, "step": 8108 }, { "epoch": 1.136510161177295, "grad_norm": 0.45621436834335327, "learning_rate": 0.00019087778043530253, "loss": 0.0963, "step": 8109 }, { "epoch": 1.1366503153468817, "grad_norm": 0.5107046961784363, "learning_rate": 0.00019086342980148286, "loss": 0.0454, "step": 8110 }, { "epoch": 1.1367904695164681, "grad_norm": 0.5959237217903137, "learning_rate": 0.00019084907916766324, "loss": 0.0414, "step": 8111 }, { "epoch": 1.1369306236860546, "grad_norm": 0.16816528141498566, "learning_rate": 0.00019083472853384357, "loss": 0.0741, "step": 8112 }, { "epoch": 1.1370707778556413, "grad_norm": 0.8550013899803162, "learning_rate": 0.0001908203779000239, "loss": 0.0755, "step": 8113 }, { "epoch": 1.1372109320252277, "grad_norm": 0.15414147078990936, "learning_rate": 0.00019080602726620426, "loss": 0.0201, "step": 8114 }, { "epoch": 1.1373510861948142, "grad_norm": 0.3081057667732239, "learning_rate": 0.00019079167663238459, "loss": 0.1005, "step": 8115 }, { "epoch": 1.1374912403644009, "grad_norm": 0.4586223065853119, "learning_rate": 0.00019077732599856491, "loss": 0.0672, "step": 8116 }, { "epoch": 1.1376313945339873, "grad_norm": 0.2728267312049866, "learning_rate": 0.00019076297536474527, "loss": 0.0363, "step": 8117 }, { "epoch": 1.137771548703574, "grad_norm": 0.5043738484382629, "learning_rate": 0.0001907486247309256, "loss": 0.0628, "step": 8118 }, { "epoch": 1.1379117028731605, "grad_norm": 0.5502161383628845, "learning_rate": 0.00019073427409710593, "loss": 0.0788, "step": 8119 }, { "epoch": 1.138051857042747, "grad_norm": 0.43950599431991577, "learning_rate": 0.00019071992346328628, "loss": 0.0771, "step": 8120 }, { "epoch": 1.1381920112123336, "grad_norm": 0.6934971809387207, "learning_rate": 0.0001907055728294666, "loss": 0.0444, "step": 8121 }, { "epoch": 1.13833216538192, "grad_norm": 0.5587944388389587, "learning_rate": 0.00019069122219564694, "loss": 0.0785, "step": 8122 }, { "epoch": 1.1384723195515067, "grad_norm": 0.47870492935180664, "learning_rate": 0.0001906768715618273, "loss": 0.144, "step": 8123 }, { "epoch": 1.1386124737210932, "grad_norm": 0.6334477066993713, "learning_rate": 0.00019066252092800765, "loss": 0.0308, "step": 8124 }, { "epoch": 1.1387526278906797, "grad_norm": 0.5581763386726379, "learning_rate": 0.00019064817029418798, "loss": 0.1099, "step": 8125 }, { "epoch": 1.1388927820602663, "grad_norm": 0.23852522671222687, "learning_rate": 0.0001906338196603683, "loss": 0.0163, "step": 8126 }, { "epoch": 1.1390329362298528, "grad_norm": 0.6680789589881897, "learning_rate": 0.00019061946902654866, "loss": 0.139, "step": 8127 }, { "epoch": 1.1391730903994395, "grad_norm": 0.33995699882507324, "learning_rate": 0.000190605118392729, "loss": 0.0485, "step": 8128 }, { "epoch": 1.139313244569026, "grad_norm": 1.069642186164856, "learning_rate": 0.00019059076775890932, "loss": 0.0834, "step": 8129 }, { "epoch": 1.1394533987386124, "grad_norm": 0.2828660309314728, "learning_rate": 0.00019057641712508968, "loss": 0.0181, "step": 8130 }, { "epoch": 1.139593552908199, "grad_norm": 0.4177997410297394, "learning_rate": 0.00019056206649127, "loss": 0.0594, "step": 8131 }, { "epoch": 1.1397337070777855, "grad_norm": 0.9500121474266052, "learning_rate": 0.00019054771585745033, "loss": 0.1209, "step": 8132 }, { "epoch": 1.1398738612473722, "grad_norm": 0.3758149743080139, "learning_rate": 0.00019053336522363072, "loss": 0.0555, "step": 8133 }, { "epoch": 1.1400140154169587, "grad_norm": 1.231550693511963, "learning_rate": 0.00019051901458981105, "loss": 0.2025, "step": 8134 }, { "epoch": 1.1401541695865451, "grad_norm": 2.562213659286499, "learning_rate": 0.00019050466395599137, "loss": 0.236, "step": 8135 }, { "epoch": 1.1402943237561318, "grad_norm": 0.251033753156662, "learning_rate": 0.00019049031332217173, "loss": 0.0665, "step": 8136 }, { "epoch": 1.1404344779257183, "grad_norm": 0.34847134351730347, "learning_rate": 0.00019047596268835206, "loss": 0.0961, "step": 8137 }, { "epoch": 1.1405746320953047, "grad_norm": 0.13728486001491547, "learning_rate": 0.0001904616120545324, "loss": 0.0156, "step": 8138 }, { "epoch": 1.1407147862648914, "grad_norm": 0.30178892612457275, "learning_rate": 0.00019044726142071272, "loss": 0.0441, "step": 8139 }, { "epoch": 1.1408549404344779, "grad_norm": 0.19602464139461517, "learning_rate": 0.00019043291078689307, "loss": 0.077, "step": 8140 }, { "epoch": 1.1409950946040646, "grad_norm": 0.35875406861305237, "learning_rate": 0.0001904185601530734, "loss": 0.1188, "step": 8141 }, { "epoch": 1.141135248773651, "grad_norm": 0.21018241345882416, "learning_rate": 0.00019040420951925373, "loss": 0.047, "step": 8142 }, { "epoch": 1.1412754029432375, "grad_norm": 0.23383651673793793, "learning_rate": 0.0001903898588854341, "loss": 0.059, "step": 8143 }, { "epoch": 1.1414155571128242, "grad_norm": 1.4485735893249512, "learning_rate": 0.00019037550825161444, "loss": 0.0811, "step": 8144 }, { "epoch": 1.1415557112824106, "grad_norm": 0.28622177243232727, "learning_rate": 0.00019036115761779477, "loss": 0.0326, "step": 8145 }, { "epoch": 1.141695865451997, "grad_norm": 0.33646684885025024, "learning_rate": 0.00019034680698397512, "loss": 0.1203, "step": 8146 }, { "epoch": 1.1418360196215838, "grad_norm": 0.3636622428894043, "learning_rate": 0.00019033245635015545, "loss": 0.0671, "step": 8147 }, { "epoch": 1.1419761737911702, "grad_norm": 0.3842368721961975, "learning_rate": 0.00019031810571633578, "loss": 0.0555, "step": 8148 }, { "epoch": 1.142116327960757, "grad_norm": 0.24999134242534637, "learning_rate": 0.00019030375508251614, "loss": 0.0442, "step": 8149 }, { "epoch": 1.1422564821303434, "grad_norm": 0.2663787007331848, "learning_rate": 0.00019028940444869646, "loss": 0.059, "step": 8150 }, { "epoch": 1.1423966362999298, "grad_norm": 0.4734601378440857, "learning_rate": 0.0001902750538148768, "loss": 0.0942, "step": 8151 }, { "epoch": 1.1425367904695165, "grad_norm": 0.22639179229736328, "learning_rate": 0.00019026070318105715, "loss": 0.0732, "step": 8152 }, { "epoch": 1.142676944639103, "grad_norm": 0.24627842009067535, "learning_rate": 0.00019024635254723748, "loss": 0.0638, "step": 8153 }, { "epoch": 1.1428170988086896, "grad_norm": 0.4628268778324127, "learning_rate": 0.0001902320019134178, "loss": 0.0695, "step": 8154 }, { "epoch": 1.142957252978276, "grad_norm": 0.3266357183456421, "learning_rate": 0.0001902176512795982, "loss": 0.03, "step": 8155 }, { "epoch": 1.1430974071478626, "grad_norm": 0.14849551022052765, "learning_rate": 0.00019020330064577852, "loss": 0.0157, "step": 8156 }, { "epoch": 1.1432375613174492, "grad_norm": 0.2778032422065735, "learning_rate": 0.00019018895001195885, "loss": 0.0326, "step": 8157 }, { "epoch": 1.1433777154870357, "grad_norm": 0.306439608335495, "learning_rate": 0.00019017459937813917, "loss": 0.1054, "step": 8158 }, { "epoch": 1.1435178696566224, "grad_norm": 0.5741012096405029, "learning_rate": 0.00019016024874431953, "loss": 0.03, "step": 8159 }, { "epoch": 1.1436580238262088, "grad_norm": 0.15136685967445374, "learning_rate": 0.00019014589811049986, "loss": 0.0376, "step": 8160 }, { "epoch": 1.1437981779957953, "grad_norm": 0.3661526143550873, "learning_rate": 0.0001901315474766802, "loss": 0.097, "step": 8161 }, { "epoch": 1.143938332165382, "grad_norm": 0.18994368612766266, "learning_rate": 0.00019011719684286054, "loss": 0.0457, "step": 8162 }, { "epoch": 1.1440784863349684, "grad_norm": 0.23135820031166077, "learning_rate": 0.00019010284620904087, "loss": 0.0941, "step": 8163 }, { "epoch": 1.1442186405045551, "grad_norm": 0.37540772557258606, "learning_rate": 0.0001900884955752212, "loss": 0.0744, "step": 8164 }, { "epoch": 1.1443587946741416, "grad_norm": 0.3024245500564575, "learning_rate": 0.00019007414494140158, "loss": 0.0838, "step": 8165 }, { "epoch": 1.144498948843728, "grad_norm": 0.41323837637901306, "learning_rate": 0.0001900597943075819, "loss": 0.0772, "step": 8166 }, { "epoch": 1.1446391030133147, "grad_norm": 0.4566798806190491, "learning_rate": 0.00019004544367376224, "loss": 0.1013, "step": 8167 }, { "epoch": 1.1447792571829012, "grad_norm": 0.26838138699531555, "learning_rate": 0.0001900310930399426, "loss": 0.0465, "step": 8168 }, { "epoch": 1.1449194113524876, "grad_norm": 0.2764984965324402, "learning_rate": 0.00019001674240612292, "loss": 0.0377, "step": 8169 }, { "epoch": 1.1450595655220743, "grad_norm": 0.08905649185180664, "learning_rate": 0.00019000239177230325, "loss": 0.0074, "step": 8170 }, { "epoch": 1.1451997196916608, "grad_norm": 0.67715984582901, "learning_rate": 0.0001899880411384836, "loss": 0.1194, "step": 8171 }, { "epoch": 1.1453398738612472, "grad_norm": 0.3427574038505554, "learning_rate": 0.00018997369050466394, "loss": 0.0222, "step": 8172 }, { "epoch": 1.145480028030834, "grad_norm": 0.9477463364601135, "learning_rate": 0.00018995933987084427, "loss": 0.0771, "step": 8173 }, { "epoch": 1.1456201822004204, "grad_norm": 0.26231104135513306, "learning_rate": 0.0001899449892370246, "loss": 0.0653, "step": 8174 }, { "epoch": 1.145760336370007, "grad_norm": 0.23415350914001465, "learning_rate": 0.00018993063860320498, "loss": 0.0207, "step": 8175 }, { "epoch": 1.1459004905395935, "grad_norm": 0.5579625964164734, "learning_rate": 0.0001899162879693853, "loss": 0.0907, "step": 8176 }, { "epoch": 1.14604064470918, "grad_norm": 0.3386024832725525, "learning_rate": 0.00018990193733556563, "loss": 0.0437, "step": 8177 }, { "epoch": 1.1461807988787667, "grad_norm": 0.8833440542221069, "learning_rate": 0.000189887586701746, "loss": 0.0743, "step": 8178 }, { "epoch": 1.1463209530483531, "grad_norm": 0.18453925848007202, "learning_rate": 0.00018987323606792632, "loss": 0.0448, "step": 8179 }, { "epoch": 1.1464611072179398, "grad_norm": 0.8642560243606567, "learning_rate": 0.00018985888543410665, "loss": 0.0703, "step": 8180 }, { "epoch": 1.1466012613875263, "grad_norm": 0.07260432094335556, "learning_rate": 0.000189844534800287, "loss": 0.005, "step": 8181 }, { "epoch": 1.1467414155571127, "grad_norm": 0.3032330572605133, "learning_rate": 0.00018983018416646733, "loss": 0.0254, "step": 8182 }, { "epoch": 1.1468815697266994, "grad_norm": 0.4182407557964325, "learning_rate": 0.00018981583353264766, "loss": 0.0774, "step": 8183 }, { "epoch": 1.1470217238962859, "grad_norm": 1.464682936668396, "learning_rate": 0.00018980148289882802, "loss": 0.3925, "step": 8184 }, { "epoch": 1.1471618780658726, "grad_norm": 0.05969484522938728, "learning_rate": 0.00018978713226500834, "loss": 0.0052, "step": 8185 }, { "epoch": 1.147302032235459, "grad_norm": 0.3523111939430237, "learning_rate": 0.0001897727816311887, "loss": 0.0266, "step": 8186 }, { "epoch": 1.1474421864050455, "grad_norm": 0.15141288936138153, "learning_rate": 0.00018975843099736906, "loss": 0.046, "step": 8187 }, { "epoch": 1.1475823405746322, "grad_norm": 0.5671917200088501, "learning_rate": 0.00018974408036354938, "loss": 0.0723, "step": 8188 }, { "epoch": 1.1477224947442186, "grad_norm": 0.18358641862869263, "learning_rate": 0.0001897297297297297, "loss": 0.0496, "step": 8189 }, { "epoch": 1.1478626489138053, "grad_norm": 0.2935771644115448, "learning_rate": 0.00018971537909591004, "loss": 0.0712, "step": 8190 }, { "epoch": 1.1480028030833918, "grad_norm": 0.31260305643081665, "learning_rate": 0.0001897010284620904, "loss": 0.0713, "step": 8191 }, { "epoch": 1.1481429572529782, "grad_norm": 0.1767255663871765, "learning_rate": 0.00018968667782827073, "loss": 0.0674, "step": 8192 }, { "epoch": 1.148283111422565, "grad_norm": 0.39744818210601807, "learning_rate": 0.00018967232719445105, "loss": 0.03, "step": 8193 }, { "epoch": 1.1484232655921514, "grad_norm": 0.2499329000711441, "learning_rate": 0.0001896579765606314, "loss": 0.0623, "step": 8194 }, { "epoch": 1.148563419761738, "grad_norm": 0.2834676504135132, "learning_rate": 0.00018964362592681174, "loss": 0.0832, "step": 8195 }, { "epoch": 1.1487035739313245, "grad_norm": 0.5464600920677185, "learning_rate": 0.00018962927529299207, "loss": 0.0887, "step": 8196 }, { "epoch": 1.148843728100911, "grad_norm": 0.3822512924671173, "learning_rate": 0.00018961492465917245, "loss": 0.0716, "step": 8197 }, { "epoch": 1.1489838822704976, "grad_norm": 0.5086199641227722, "learning_rate": 0.00018960057402535278, "loss": 0.0486, "step": 8198 }, { "epoch": 1.149124036440084, "grad_norm": 0.4189874529838562, "learning_rate": 0.0001895862233915331, "loss": 0.1297, "step": 8199 }, { "epoch": 1.1492641906096706, "grad_norm": 0.28184911608695984, "learning_rate": 0.00018957187275771346, "loss": 0.0563, "step": 8200 }, { "epoch": 1.1494043447792572, "grad_norm": 0.5149058103561401, "learning_rate": 0.0001895575221238938, "loss": 0.0231, "step": 8201 }, { "epoch": 1.1495444989488437, "grad_norm": 0.08912201225757599, "learning_rate": 0.00018954317149007412, "loss": 0.0138, "step": 8202 }, { "epoch": 1.1496846531184302, "grad_norm": 0.10554853826761246, "learning_rate": 0.00018952882085625447, "loss": 0.0151, "step": 8203 }, { "epoch": 1.1498248072880168, "grad_norm": 0.5522861480712891, "learning_rate": 0.0001895144702224348, "loss": 0.0921, "step": 8204 }, { "epoch": 1.1499649614576033, "grad_norm": 0.2820811867713928, "learning_rate": 0.00018950011958861513, "loss": 0.0671, "step": 8205 }, { "epoch": 1.15010511562719, "grad_norm": 0.5302562713623047, "learning_rate": 0.00018948576895479551, "loss": 0.077, "step": 8206 }, { "epoch": 1.1502452697967764, "grad_norm": 1.1579928398132324, "learning_rate": 0.00018947141832097584, "loss": 0.0676, "step": 8207 }, { "epoch": 1.150385423966363, "grad_norm": 0.27457666397094727, "learning_rate": 0.00018945706768715617, "loss": 0.0567, "step": 8208 }, { "epoch": 1.1505255781359496, "grad_norm": 0.23893170058727264, "learning_rate": 0.0001894427170533365, "loss": 0.0504, "step": 8209 }, { "epoch": 1.150665732305536, "grad_norm": 0.33306390047073364, "learning_rate": 0.00018942836641951686, "loss": 0.017, "step": 8210 }, { "epoch": 1.1508058864751227, "grad_norm": 0.23821240663528442, "learning_rate": 0.00018941401578569718, "loss": 0.0285, "step": 8211 }, { "epoch": 1.1509460406447092, "grad_norm": 0.3634144067764282, "learning_rate": 0.0001893996651518775, "loss": 0.1039, "step": 8212 }, { "epoch": 1.1510861948142956, "grad_norm": 0.1651085466146469, "learning_rate": 0.00018938531451805787, "loss": 0.0247, "step": 8213 }, { "epoch": 1.1512263489838823, "grad_norm": 1.0651239156723022, "learning_rate": 0.0001893709638842382, "loss": 0.139, "step": 8214 }, { "epoch": 1.1513665031534688, "grad_norm": 0.4140488803386688, "learning_rate": 0.00018935661325041853, "loss": 0.1118, "step": 8215 }, { "epoch": 1.1515066573230555, "grad_norm": 0.18126502633094788, "learning_rate": 0.00018934226261659888, "loss": 0.0246, "step": 8216 }, { "epoch": 1.151646811492642, "grad_norm": 0.13416564464569092, "learning_rate": 0.0001893279119827792, "loss": 0.0128, "step": 8217 }, { "epoch": 1.1517869656622284, "grad_norm": 0.3202526569366455, "learning_rate": 0.00018931356134895957, "loss": 0.0589, "step": 8218 }, { "epoch": 1.151927119831815, "grad_norm": 0.6767866015434265, "learning_rate": 0.00018929921071513992, "loss": 0.0451, "step": 8219 }, { "epoch": 1.1520672740014015, "grad_norm": 0.2485506534576416, "learning_rate": 0.00018928486008132025, "loss": 0.0775, "step": 8220 }, { "epoch": 1.1522074281709882, "grad_norm": 0.4857812821865082, "learning_rate": 0.00018927050944750058, "loss": 0.0518, "step": 8221 }, { "epoch": 1.1523475823405747, "grad_norm": 0.19365179538726807, "learning_rate": 0.00018925615881368093, "loss": 0.0312, "step": 8222 }, { "epoch": 1.1524877365101611, "grad_norm": 0.3440621495246887, "learning_rate": 0.00018924180817986126, "loss": 0.081, "step": 8223 }, { "epoch": 1.1526278906797478, "grad_norm": 0.12351280450820923, "learning_rate": 0.0001892274575460416, "loss": 0.0102, "step": 8224 }, { "epoch": 1.1527680448493343, "grad_norm": 0.4448149800300598, "learning_rate": 0.00018921310691222192, "loss": 0.0713, "step": 8225 }, { "epoch": 1.1529081990189207, "grad_norm": 2.7275681495666504, "learning_rate": 0.00018919875627840228, "loss": 0.0143, "step": 8226 }, { "epoch": 1.1530483531885074, "grad_norm": 0.2122349590063095, "learning_rate": 0.0001891844056445826, "loss": 0.044, "step": 8227 }, { "epoch": 1.1531885073580939, "grad_norm": 0.30623915791511536, "learning_rate": 0.00018917005501076293, "loss": 0.0596, "step": 8228 }, { "epoch": 1.1533286615276805, "grad_norm": 0.6875832676887512, "learning_rate": 0.00018915570437694332, "loss": 0.0323, "step": 8229 }, { "epoch": 1.153468815697267, "grad_norm": 1.5201548337936401, "learning_rate": 0.00018914135374312364, "loss": 0.0834, "step": 8230 }, { "epoch": 1.1536089698668535, "grad_norm": 0.23452061414718628, "learning_rate": 0.00018912700310930397, "loss": 0.011, "step": 8231 }, { "epoch": 1.1537491240364401, "grad_norm": 0.5297860503196716, "learning_rate": 0.00018911265247548433, "loss": 0.1174, "step": 8232 }, { "epoch": 1.1538892782060266, "grad_norm": 1.8183493614196777, "learning_rate": 0.00018909830184166466, "loss": 0.1645, "step": 8233 }, { "epoch": 1.154029432375613, "grad_norm": 1.5141992568969727, "learning_rate": 0.00018908395120784499, "loss": 0.1216, "step": 8234 }, { "epoch": 1.1541695865451997, "grad_norm": 4.588138103485107, "learning_rate": 0.00018906960057402534, "loss": 0.1987, "step": 8235 }, { "epoch": 1.1543097407147862, "grad_norm": 0.18887551128864288, "learning_rate": 0.00018905524994020567, "loss": 0.032, "step": 8236 }, { "epoch": 1.1544498948843729, "grad_norm": 0.3860364556312561, "learning_rate": 0.000189040899306386, "loss": 0.0982, "step": 8237 }, { "epoch": 1.1545900490539593, "grad_norm": 0.25927311182022095, "learning_rate": 0.00018902654867256638, "loss": 0.0407, "step": 8238 }, { "epoch": 1.1547302032235458, "grad_norm": 0.283090740442276, "learning_rate": 0.0001890121980387467, "loss": 0.0431, "step": 8239 }, { "epoch": 1.1548703573931325, "grad_norm": 0.14458709955215454, "learning_rate": 0.00018899784740492704, "loss": 0.029, "step": 8240 }, { "epoch": 1.155010511562719, "grad_norm": 0.2459026426076889, "learning_rate": 0.0001889834967711074, "loss": 0.0367, "step": 8241 }, { "epoch": 1.1551506657323056, "grad_norm": 0.4028928875923157, "learning_rate": 0.00018896914613728772, "loss": 0.083, "step": 8242 }, { "epoch": 1.155290819901892, "grad_norm": 0.2877585291862488, "learning_rate": 0.00018895479550346805, "loss": 0.0807, "step": 8243 }, { "epoch": 1.1554309740714785, "grad_norm": 0.31825652718544006, "learning_rate": 0.00018894044486964838, "loss": 0.0599, "step": 8244 }, { "epoch": 1.1555711282410652, "grad_norm": 0.456478089094162, "learning_rate": 0.00018892609423582874, "loss": 0.1627, "step": 8245 }, { "epoch": 1.1557112824106517, "grad_norm": 0.41934704780578613, "learning_rate": 0.00018891174360200906, "loss": 0.1298, "step": 8246 }, { "epoch": 1.1558514365802384, "grad_norm": 0.38486745953559875, "learning_rate": 0.0001888973929681894, "loss": 0.0391, "step": 8247 }, { "epoch": 1.1559915907498248, "grad_norm": 0.3698129653930664, "learning_rate": 0.00018888304233436975, "loss": 0.1231, "step": 8248 }, { "epoch": 1.1561317449194113, "grad_norm": 0.08605755865573883, "learning_rate": 0.00018886869170055008, "loss": 0.0105, "step": 8249 }, { "epoch": 1.156271899088998, "grad_norm": 0.2706579864025116, "learning_rate": 0.00018885434106673043, "loss": 0.051, "step": 8250 }, { "epoch": 1.1564120532585844, "grad_norm": 0.5422663688659668, "learning_rate": 0.0001888399904329108, "loss": 0.0802, "step": 8251 }, { "epoch": 1.156552207428171, "grad_norm": 0.29781830310821533, "learning_rate": 0.00018882563979909112, "loss": 0.0758, "step": 8252 }, { "epoch": 1.1566923615977576, "grad_norm": 0.15586911141872406, "learning_rate": 0.00018881128916527144, "loss": 0.0263, "step": 8253 }, { "epoch": 1.156832515767344, "grad_norm": 0.2306089550256729, "learning_rate": 0.0001887969385314518, "loss": 0.0436, "step": 8254 }, { "epoch": 1.1569726699369307, "grad_norm": 0.44029954075813293, "learning_rate": 0.00018878258789763213, "loss": 0.0375, "step": 8255 }, { "epoch": 1.1571128241065172, "grad_norm": 0.2494993805885315, "learning_rate": 0.00018876823726381246, "loss": 0.0483, "step": 8256 }, { "epoch": 1.1572529782761036, "grad_norm": 0.3793202042579651, "learning_rate": 0.0001887538866299928, "loss": 0.0335, "step": 8257 }, { "epoch": 1.1573931324456903, "grad_norm": 0.3121330440044403, "learning_rate": 0.00018873953599617314, "loss": 0.0555, "step": 8258 }, { "epoch": 1.1575332866152768, "grad_norm": 0.45819664001464844, "learning_rate": 0.00018872518536235347, "loss": 0.0836, "step": 8259 }, { "epoch": 1.1576734407848632, "grad_norm": 0.1707604080438614, "learning_rate": 0.0001887108347285338, "loss": 0.0171, "step": 8260 }, { "epoch": 1.15781359495445, "grad_norm": 0.3753352463245392, "learning_rate": 0.00018869648409471418, "loss": 0.0768, "step": 8261 }, { "epoch": 1.1579537491240364, "grad_norm": 0.31892135739326477, "learning_rate": 0.0001886821334608945, "loss": 0.0795, "step": 8262 }, { "epoch": 1.158093903293623, "grad_norm": 0.1381869912147522, "learning_rate": 0.00018866778282707484, "loss": 0.0153, "step": 8263 }, { "epoch": 1.1582340574632095, "grad_norm": 0.32289040088653564, "learning_rate": 0.0001886534321932552, "loss": 0.0499, "step": 8264 }, { "epoch": 1.158374211632796, "grad_norm": 0.15772201120853424, "learning_rate": 0.00018863908155943552, "loss": 0.0117, "step": 8265 }, { "epoch": 1.1585143658023827, "grad_norm": 0.4177266061306, "learning_rate": 0.00018862473092561585, "loss": 0.0557, "step": 8266 }, { "epoch": 1.158654519971969, "grad_norm": 0.3156067430973053, "learning_rate": 0.0001886103802917962, "loss": 0.0222, "step": 8267 }, { "epoch": 1.1587946741415558, "grad_norm": 0.2939659059047699, "learning_rate": 0.00018859602965797654, "loss": 0.065, "step": 8268 }, { "epoch": 1.1589348283111423, "grad_norm": 0.6546409726142883, "learning_rate": 0.00018858167902415686, "loss": 0.0761, "step": 8269 }, { "epoch": 1.1590749824807287, "grad_norm": 0.29267361760139465, "learning_rate": 0.00018856732839033725, "loss": 0.0435, "step": 8270 }, { "epoch": 1.1592151366503154, "grad_norm": 0.250063955783844, "learning_rate": 0.00018855297775651758, "loss": 0.0232, "step": 8271 }, { "epoch": 1.1593552908199019, "grad_norm": 0.37106165289878845, "learning_rate": 0.0001885386271226979, "loss": 0.1344, "step": 8272 }, { "epoch": 1.1594954449894885, "grad_norm": 0.7054430842399597, "learning_rate": 0.00018852427648887826, "loss": 0.241, "step": 8273 }, { "epoch": 1.159635599159075, "grad_norm": 0.653161346912384, "learning_rate": 0.0001885099258550586, "loss": 0.0986, "step": 8274 }, { "epoch": 1.1597757533286615, "grad_norm": 0.5201935768127441, "learning_rate": 0.00018849557522123892, "loss": 0.0774, "step": 8275 }, { "epoch": 1.1599159074982481, "grad_norm": 0.33321377635002136, "learning_rate": 0.00018848122458741927, "loss": 0.0748, "step": 8276 }, { "epoch": 1.1600560616678346, "grad_norm": 0.8500821590423584, "learning_rate": 0.0001884668739535996, "loss": 0.0301, "step": 8277 }, { "epoch": 1.1601962158374213, "grad_norm": 0.8910528421401978, "learning_rate": 0.00018845252331977993, "loss": 0.3395, "step": 8278 }, { "epoch": 1.1603363700070077, "grad_norm": 0.37445971369743347, "learning_rate": 0.00018843817268596026, "loss": 0.0631, "step": 8279 }, { "epoch": 1.1604765241765942, "grad_norm": 0.6641671657562256, "learning_rate": 0.00018842382205214061, "loss": 0.081, "step": 8280 }, { "epoch": 1.1606166783461809, "grad_norm": 0.955694317817688, "learning_rate": 0.00018840947141832097, "loss": 0.0942, "step": 8281 }, { "epoch": 1.1607568325157673, "grad_norm": 1.1868804693222046, "learning_rate": 0.0001883951207845013, "loss": 0.1858, "step": 8282 }, { "epoch": 1.1608969866853538, "grad_norm": 1.3917365074157715, "learning_rate": 0.00018838077015068165, "loss": 0.0717, "step": 8283 }, { "epoch": 1.1610371408549405, "grad_norm": 1.6709376573562622, "learning_rate": 0.00018836641951686198, "loss": 0.0724, "step": 8284 }, { "epoch": 1.161177295024527, "grad_norm": 8.957598686218262, "learning_rate": 0.0001883520688830423, "loss": 0.202, "step": 8285 }, { "epoch": 1.1613174491941136, "grad_norm": 0.15242385864257812, "learning_rate": 0.00018833771824922267, "loss": 0.0228, "step": 8286 }, { "epoch": 1.1614576033637, "grad_norm": 0.34917619824409485, "learning_rate": 0.000188323367615403, "loss": 0.0438, "step": 8287 }, { "epoch": 1.1615977575332865, "grad_norm": 0.578653872013092, "learning_rate": 0.00018830901698158332, "loss": 0.0637, "step": 8288 }, { "epoch": 1.1617379117028732, "grad_norm": 0.7125800848007202, "learning_rate": 0.00018829466634776368, "loss": 0.1002, "step": 8289 }, { "epoch": 1.1618780658724597, "grad_norm": 0.3307386636734009, "learning_rate": 0.000188280315713944, "loss": 0.0591, "step": 8290 }, { "epoch": 1.1620182200420461, "grad_norm": 0.24913068115711212, "learning_rate": 0.00018826596508012434, "loss": 0.057, "step": 8291 }, { "epoch": 1.1621583742116328, "grad_norm": 0.4080732464790344, "learning_rate": 0.00018825161444630472, "loss": 0.0486, "step": 8292 }, { "epoch": 1.1622985283812193, "grad_norm": 0.31905221939086914, "learning_rate": 0.00018823726381248505, "loss": 0.0509, "step": 8293 }, { "epoch": 1.162438682550806, "grad_norm": 0.2527974843978882, "learning_rate": 0.00018822291317866538, "loss": 0.0619, "step": 8294 }, { "epoch": 1.1625788367203924, "grad_norm": 0.24787454307079315, "learning_rate": 0.0001882085625448457, "loss": 0.0331, "step": 8295 }, { "epoch": 1.1627189908899789, "grad_norm": 0.27092069387435913, "learning_rate": 0.00018819421191102606, "loss": 0.0689, "step": 8296 }, { "epoch": 1.1628591450595656, "grad_norm": 0.18213745951652527, "learning_rate": 0.0001881798612772064, "loss": 0.0494, "step": 8297 }, { "epoch": 1.162999299229152, "grad_norm": 0.43573054671287537, "learning_rate": 0.00018816551064338672, "loss": 0.0777, "step": 8298 }, { "epoch": 1.1631394533987387, "grad_norm": 0.42315080761909485, "learning_rate": 0.00018815116000956707, "loss": 0.057, "step": 8299 }, { "epoch": 1.1632796075683252, "grad_norm": 1.2821844816207886, "learning_rate": 0.0001881368093757474, "loss": 0.0505, "step": 8300 }, { "epoch": 1.1634197617379116, "grad_norm": 0.3277112543582916, "learning_rate": 0.00018812245874192773, "loss": 0.0574, "step": 8301 }, { "epoch": 1.1635599159074983, "grad_norm": 0.41321098804473877, "learning_rate": 0.00018810810810810811, "loss": 0.113, "step": 8302 }, { "epoch": 1.1637000700770848, "grad_norm": 0.2844173014163971, "learning_rate": 0.00018809375747428844, "loss": 0.038, "step": 8303 }, { "epoch": 1.1638402242466714, "grad_norm": 0.31971997022628784, "learning_rate": 0.00018807940684046877, "loss": 0.0555, "step": 8304 }, { "epoch": 1.163980378416258, "grad_norm": 0.3507886826992035, "learning_rate": 0.00018806505620664913, "loss": 0.0465, "step": 8305 }, { "epoch": 1.1641205325858444, "grad_norm": 0.20255932211875916, "learning_rate": 0.00018805070557282945, "loss": 0.0162, "step": 8306 }, { "epoch": 1.164260686755431, "grad_norm": 0.33217859268188477, "learning_rate": 0.00018803635493900978, "loss": 0.0386, "step": 8307 }, { "epoch": 1.1644008409250175, "grad_norm": 0.19839653372764587, "learning_rate": 0.00018802200430519014, "loss": 0.0474, "step": 8308 }, { "epoch": 1.1645409950946042, "grad_norm": 0.19316379725933075, "learning_rate": 0.00018800765367137047, "loss": 0.0393, "step": 8309 }, { "epoch": 1.1646811492641906, "grad_norm": 0.19684062898159027, "learning_rate": 0.0001879933030375508, "loss": 0.0252, "step": 8310 }, { "epoch": 1.164821303433777, "grad_norm": 0.2286255657672882, "learning_rate": 0.00018797895240373115, "loss": 0.0473, "step": 8311 }, { "epoch": 1.1649614576033638, "grad_norm": 0.18859437108039856, "learning_rate": 0.00018796460176991148, "loss": 0.0309, "step": 8312 }, { "epoch": 1.1651016117729502, "grad_norm": 0.1314937174320221, "learning_rate": 0.00018795025113609184, "loss": 0.021, "step": 8313 }, { "epoch": 1.1652417659425367, "grad_norm": 0.369870662689209, "learning_rate": 0.00018793590050227216, "loss": 0.0622, "step": 8314 }, { "epoch": 1.1653819201121234, "grad_norm": 0.7222849726676941, "learning_rate": 0.00018792154986845252, "loss": 0.0494, "step": 8315 }, { "epoch": 1.1655220742817098, "grad_norm": 0.3878611624240875, "learning_rate": 0.00018790719923463285, "loss": 0.0789, "step": 8316 }, { "epoch": 1.1656622284512965, "grad_norm": 0.22645603120326996, "learning_rate": 0.00018789284860081318, "loss": 0.0144, "step": 8317 }, { "epoch": 1.165802382620883, "grad_norm": 0.21751196682453156, "learning_rate": 0.00018787849796699353, "loss": 0.0375, "step": 8318 }, { "epoch": 1.1659425367904694, "grad_norm": 0.2437049299478531, "learning_rate": 0.00018786414733317386, "loss": 0.0217, "step": 8319 }, { "epoch": 1.1660826909600561, "grad_norm": 0.14584419131278992, "learning_rate": 0.0001878497966993542, "loss": 0.0241, "step": 8320 }, { "epoch": 1.1662228451296426, "grad_norm": 0.15523898601531982, "learning_rate": 0.00018783544606553455, "loss": 0.0232, "step": 8321 }, { "epoch": 1.166362999299229, "grad_norm": 0.2595839202404022, "learning_rate": 0.00018782109543171487, "loss": 0.047, "step": 8322 }, { "epoch": 1.1665031534688157, "grad_norm": 0.5731723308563232, "learning_rate": 0.0001878067447978952, "loss": 0.0375, "step": 8323 }, { "epoch": 1.1666433076384022, "grad_norm": 0.7627415657043457, "learning_rate": 0.00018779239416407559, "loss": 0.0688, "step": 8324 }, { "epoch": 1.1667834618079889, "grad_norm": 1.139765977859497, "learning_rate": 0.00018777804353025591, "loss": 0.0877, "step": 8325 }, { "epoch": 1.1669236159775753, "grad_norm": 0.40040647983551025, "learning_rate": 0.00018776369289643624, "loss": 0.0584, "step": 8326 }, { "epoch": 1.1670637701471618, "grad_norm": 0.5922824740409851, "learning_rate": 0.0001877493422626166, "loss": 0.0681, "step": 8327 }, { "epoch": 1.1672039243167485, "grad_norm": 0.2685219347476959, "learning_rate": 0.00018773499162879693, "loss": 0.0293, "step": 8328 }, { "epoch": 1.167344078486335, "grad_norm": 1.1396485567092896, "learning_rate": 0.00018772064099497726, "loss": 0.0492, "step": 8329 }, { "epoch": 1.1674842326559216, "grad_norm": 0.43605372309684753, "learning_rate": 0.00018770629036115758, "loss": 0.0354, "step": 8330 }, { "epoch": 1.167624386825508, "grad_norm": 0.26172560453414917, "learning_rate": 0.00018769193972733794, "loss": 0.0257, "step": 8331 }, { "epoch": 1.1677645409950945, "grad_norm": 0.2245074361562729, "learning_rate": 0.00018767758909351827, "loss": 0.0195, "step": 8332 }, { "epoch": 1.1679046951646812, "grad_norm": 1.2434290647506714, "learning_rate": 0.0001876632384596986, "loss": 0.0706, "step": 8333 }, { "epoch": 1.1680448493342677, "grad_norm": 1.3934446573257446, "learning_rate": 0.00018764888782587898, "loss": 0.2143, "step": 8334 }, { "epoch": 1.1681850035038543, "grad_norm": 1.9912471771240234, "learning_rate": 0.0001876345371920593, "loss": 0.1416, "step": 8335 }, { "epoch": 1.1683251576734408, "grad_norm": 0.16852135956287384, "learning_rate": 0.00018762018655823964, "loss": 0.0163, "step": 8336 }, { "epoch": 1.1684653118430273, "grad_norm": 0.4382365047931671, "learning_rate": 0.00018760583592442, "loss": 0.0735, "step": 8337 }, { "epoch": 1.168605466012614, "grad_norm": 0.34233397245407104, "learning_rate": 0.00018759148529060032, "loss": 0.0371, "step": 8338 }, { "epoch": 1.1687456201822004, "grad_norm": 0.36186596751213074, "learning_rate": 0.00018757713465678065, "loss": 0.0356, "step": 8339 }, { "epoch": 1.168885774351787, "grad_norm": 0.27241790294647217, "learning_rate": 0.000187562784022961, "loss": 0.0855, "step": 8340 }, { "epoch": 1.1690259285213735, "grad_norm": 0.24945935606956482, "learning_rate": 0.00018754843338914133, "loss": 0.0484, "step": 8341 }, { "epoch": 1.16916608269096, "grad_norm": 0.2609570324420929, "learning_rate": 0.00018753408275532166, "loss": 0.044, "step": 8342 }, { "epoch": 1.1693062368605467, "grad_norm": 0.548702597618103, "learning_rate": 0.00018751973212150202, "loss": 0.0445, "step": 8343 }, { "epoch": 1.1694463910301331, "grad_norm": 0.3048005700111389, "learning_rate": 0.00018750538148768235, "loss": 0.0204, "step": 8344 }, { "epoch": 1.1695865451997196, "grad_norm": 0.7119125127792358, "learning_rate": 0.0001874910308538627, "loss": 0.0389, "step": 8345 }, { "epoch": 1.1697266993693063, "grad_norm": 0.31920552253723145, "learning_rate": 0.00018747668022004306, "loss": 0.073, "step": 8346 }, { "epoch": 1.1698668535388927, "grad_norm": 0.8130448460578918, "learning_rate": 0.0001874623295862234, "loss": 0.08, "step": 8347 }, { "epoch": 1.1700070077084792, "grad_norm": 0.2658419609069824, "learning_rate": 0.00018744797895240372, "loss": 0.0444, "step": 8348 }, { "epoch": 1.170147161878066, "grad_norm": 0.11541197448968887, "learning_rate": 0.00018743362831858404, "loss": 0.0073, "step": 8349 }, { "epoch": 1.1702873160476523, "grad_norm": 0.41286951303482056, "learning_rate": 0.0001874192776847644, "loss": 0.0716, "step": 8350 }, { "epoch": 1.170427470217239, "grad_norm": 0.38991743326187134, "learning_rate": 0.00018740492705094473, "loss": 0.0308, "step": 8351 }, { "epoch": 1.1705676243868255, "grad_norm": 0.13846030831336975, "learning_rate": 0.00018739057641712506, "loss": 0.0212, "step": 8352 }, { "epoch": 1.170707778556412, "grad_norm": 0.6411053538322449, "learning_rate": 0.0001873762257833054, "loss": 0.1269, "step": 8353 }, { "epoch": 1.1708479327259986, "grad_norm": 0.25690263509750366, "learning_rate": 0.00018736187514948574, "loss": 0.0428, "step": 8354 }, { "epoch": 1.170988086895585, "grad_norm": 0.18725794553756714, "learning_rate": 0.00018734752451566607, "loss": 0.0313, "step": 8355 }, { "epoch": 1.1711282410651718, "grad_norm": 0.21428105235099792, "learning_rate": 0.00018733317388184645, "loss": 0.0185, "step": 8356 }, { "epoch": 1.1712683952347582, "grad_norm": 0.25514960289001465, "learning_rate": 0.00018731882324802678, "loss": 0.0334, "step": 8357 }, { "epoch": 1.1714085494043447, "grad_norm": 0.43981075286865234, "learning_rate": 0.0001873044726142071, "loss": 0.0703, "step": 8358 }, { "epoch": 1.1715487035739314, "grad_norm": 0.4248395562171936, "learning_rate": 0.00018729012198038746, "loss": 0.1012, "step": 8359 }, { "epoch": 1.1716888577435178, "grad_norm": 0.604216456413269, "learning_rate": 0.0001872757713465678, "loss": 0.0676, "step": 8360 }, { "epoch": 1.1718290119131045, "grad_norm": 0.08519989252090454, "learning_rate": 0.00018726142071274812, "loss": 0.0073, "step": 8361 }, { "epoch": 1.171969166082691, "grad_norm": 0.4951140880584717, "learning_rate": 0.00018724707007892848, "loss": 0.1452, "step": 8362 }, { "epoch": 1.1721093202522774, "grad_norm": 0.3014000654220581, "learning_rate": 0.0001872327194451088, "loss": 0.0519, "step": 8363 }, { "epoch": 1.1722494744218641, "grad_norm": 0.291948527097702, "learning_rate": 0.00018721836881128913, "loss": 0.0306, "step": 8364 }, { "epoch": 1.1723896285914506, "grad_norm": 0.21021240949630737, "learning_rate": 0.00018720401817746946, "loss": 0.0423, "step": 8365 }, { "epoch": 1.1725297827610373, "grad_norm": 1.003395438194275, "learning_rate": 0.00018718966754364985, "loss": 0.1596, "step": 8366 }, { "epoch": 1.1726699369306237, "grad_norm": 1.0240802764892578, "learning_rate": 0.00018717531690983017, "loss": 0.0725, "step": 8367 }, { "epoch": 1.1728100911002102, "grad_norm": 0.6086975932121277, "learning_rate": 0.0001871609662760105, "loss": 0.0853, "step": 8368 }, { "epoch": 1.1729502452697969, "grad_norm": 0.4363154172897339, "learning_rate": 0.00018714661564219086, "loss": 0.0451, "step": 8369 }, { "epoch": 1.1730903994393833, "grad_norm": 0.28637152910232544, "learning_rate": 0.0001871322650083712, "loss": 0.0507, "step": 8370 }, { "epoch": 1.1732305536089698, "grad_norm": 0.33646801114082336, "learning_rate": 0.00018711791437455152, "loss": 0.0356, "step": 8371 }, { "epoch": 1.1733707077785565, "grad_norm": 0.1964593231678009, "learning_rate": 0.00018710356374073187, "loss": 0.0188, "step": 8372 }, { "epoch": 1.173510861948143, "grad_norm": 0.6283341646194458, "learning_rate": 0.0001870892131069122, "loss": 0.059, "step": 8373 }, { "epoch": 1.1736510161177296, "grad_norm": 0.6729354858398438, "learning_rate": 0.00018707486247309253, "loss": 0.0466, "step": 8374 }, { "epoch": 1.173791170287316, "grad_norm": 0.3486192226409912, "learning_rate": 0.00018706051183927288, "loss": 0.0163, "step": 8375 }, { "epoch": 1.1739313244569025, "grad_norm": 0.14129824936389923, "learning_rate": 0.0001870461612054532, "loss": 0.0325, "step": 8376 }, { "epoch": 1.1740714786264892, "grad_norm": 0.34836235642433167, "learning_rate": 0.00018703181057163357, "loss": 0.0172, "step": 8377 }, { "epoch": 1.1742116327960757, "grad_norm": 0.6925960183143616, "learning_rate": 0.00018701745993781392, "loss": 0.1112, "step": 8378 }, { "epoch": 1.1743517869656621, "grad_norm": 0.3958178162574768, "learning_rate": 0.00018700310930399425, "loss": 0.1184, "step": 8379 }, { "epoch": 1.1744919411352488, "grad_norm": 0.13694526255130768, "learning_rate": 0.00018698875867017458, "loss": 0.0094, "step": 8380 }, { "epoch": 1.1746320953048353, "grad_norm": 0.7940552830696106, "learning_rate": 0.00018697440803635494, "loss": 0.0294, "step": 8381 }, { "epoch": 1.174772249474422, "grad_norm": 0.8132178783416748, "learning_rate": 0.00018696005740253527, "loss": 0.1169, "step": 8382 }, { "epoch": 1.1749124036440084, "grad_norm": 0.23389962315559387, "learning_rate": 0.0001869457067687156, "loss": 0.055, "step": 8383 }, { "epoch": 1.1750525578135949, "grad_norm": 0.90741366147995, "learning_rate": 0.00018693135613489592, "loss": 0.2185, "step": 8384 }, { "epoch": 1.1751927119831815, "grad_norm": 3.7466557025909424, "learning_rate": 0.00018691700550107628, "loss": 0.2281, "step": 8385 }, { "epoch": 1.175332866152768, "grad_norm": 0.19903984665870667, "learning_rate": 0.0001869026548672566, "loss": 0.0249, "step": 8386 }, { "epoch": 1.1754730203223547, "grad_norm": 0.26661404967308044, "learning_rate": 0.00018688830423343694, "loss": 0.1125, "step": 8387 }, { "epoch": 1.1756131744919411, "grad_norm": 0.4420032799243927, "learning_rate": 0.00018687395359961732, "loss": 0.0712, "step": 8388 }, { "epoch": 1.1757533286615276, "grad_norm": 0.19613388180732727, "learning_rate": 0.00018685960296579765, "loss": 0.02, "step": 8389 }, { "epoch": 1.1758934828311143, "grad_norm": 0.1935284435749054, "learning_rate": 0.00018684525233197798, "loss": 0.0478, "step": 8390 }, { "epoch": 1.1760336370007007, "grad_norm": 0.3692995309829712, "learning_rate": 0.00018683090169815833, "loss": 0.0551, "step": 8391 }, { "epoch": 1.1761737911702874, "grad_norm": 0.20738908648490906, "learning_rate": 0.00018681655106433866, "loss": 0.0446, "step": 8392 }, { "epoch": 1.1763139453398739, "grad_norm": 1.241309404373169, "learning_rate": 0.000186802200430519, "loss": 0.1116, "step": 8393 }, { "epoch": 1.1764540995094603, "grad_norm": 0.5874578952789307, "learning_rate": 0.00018678784979669934, "loss": 0.079, "step": 8394 }, { "epoch": 1.176594253679047, "grad_norm": 0.19788005948066711, "learning_rate": 0.00018677349916287967, "loss": 0.0494, "step": 8395 }, { "epoch": 1.1767344078486335, "grad_norm": 0.31705984473228455, "learning_rate": 0.00018675914852906, "loss": 0.0703, "step": 8396 }, { "epoch": 1.1768745620182202, "grad_norm": 0.2618476152420044, "learning_rate": 0.00018674479789524038, "loss": 0.0282, "step": 8397 }, { "epoch": 1.1770147161878066, "grad_norm": 0.3796525001525879, "learning_rate": 0.0001867304472614207, "loss": 0.0952, "step": 8398 }, { "epoch": 1.177154870357393, "grad_norm": 0.2857600450515747, "learning_rate": 0.00018671609662760104, "loss": 0.0259, "step": 8399 }, { "epoch": 1.1772950245269798, "grad_norm": 0.4316628575325012, "learning_rate": 0.00018670174599378137, "loss": 0.0495, "step": 8400 }, { "epoch": 1.1774351786965662, "grad_norm": 0.3237719237804413, "learning_rate": 0.00018668739535996173, "loss": 0.0424, "step": 8401 }, { "epoch": 1.1775753328661527, "grad_norm": 0.5039601922035217, "learning_rate": 0.00018667304472614205, "loss": 0.1105, "step": 8402 }, { "epoch": 1.1777154870357394, "grad_norm": 0.44629162549972534, "learning_rate": 0.00018665869409232238, "loss": 0.0719, "step": 8403 }, { "epoch": 1.1778556412053258, "grad_norm": 0.878272533416748, "learning_rate": 0.00018664434345850274, "loss": 0.1124, "step": 8404 }, { "epoch": 1.1779957953749123, "grad_norm": 0.35363179445266724, "learning_rate": 0.00018662999282468307, "loss": 0.0515, "step": 8405 }, { "epoch": 1.178135949544499, "grad_norm": 0.205809086561203, "learning_rate": 0.0001866156421908634, "loss": 0.0342, "step": 8406 }, { "epoch": 1.1782761037140854, "grad_norm": 0.2189025729894638, "learning_rate": 0.00018660129155704375, "loss": 0.0411, "step": 8407 }, { "epoch": 1.178416257883672, "grad_norm": 0.4029066264629364, "learning_rate": 0.0001865869409232241, "loss": 0.0777, "step": 8408 }, { "epoch": 1.1785564120532586, "grad_norm": 0.3204864263534546, "learning_rate": 0.00018657259028940444, "loss": 0.0622, "step": 8409 }, { "epoch": 1.178696566222845, "grad_norm": 0.1740313470363617, "learning_rate": 0.0001865582396555848, "loss": 0.0365, "step": 8410 }, { "epoch": 1.1788367203924317, "grad_norm": 0.2346617728471756, "learning_rate": 0.00018654388902176512, "loss": 0.0189, "step": 8411 }, { "epoch": 1.1789768745620182, "grad_norm": 0.37026694416999817, "learning_rate": 0.00018652953838794545, "loss": 0.0437, "step": 8412 }, { "epoch": 1.1791170287316048, "grad_norm": 0.25204741954803467, "learning_rate": 0.0001865151877541258, "loss": 0.0261, "step": 8413 }, { "epoch": 1.1792571829011913, "grad_norm": 0.26381635665893555, "learning_rate": 0.00018650083712030613, "loss": 0.0279, "step": 8414 }, { "epoch": 1.1793973370707778, "grad_norm": 0.34110718965530396, "learning_rate": 0.00018648648648648646, "loss": 0.0529, "step": 8415 }, { "epoch": 1.1795374912403644, "grad_norm": 0.38720956444740295, "learning_rate": 0.00018647213585266682, "loss": 0.08, "step": 8416 }, { "epoch": 1.179677645409951, "grad_norm": 0.1541094034910202, "learning_rate": 0.00018645778521884714, "loss": 0.0334, "step": 8417 }, { "epoch": 1.1798177995795376, "grad_norm": 0.3178797960281372, "learning_rate": 0.00018644343458502747, "loss": 0.0846, "step": 8418 }, { "epoch": 1.179957953749124, "grad_norm": 0.33582767844200134, "learning_rate": 0.0001864290839512078, "loss": 0.0617, "step": 8419 }, { "epoch": 1.1800981079187105, "grad_norm": 1.152430534362793, "learning_rate": 0.00018641473331738818, "loss": 0.0881, "step": 8420 }, { "epoch": 1.1802382620882972, "grad_norm": 0.2558451294898987, "learning_rate": 0.0001864003826835685, "loss": 0.0279, "step": 8421 }, { "epoch": 1.1803784162578836, "grad_norm": 0.6255281567573547, "learning_rate": 0.00018638603204974884, "loss": 0.0413, "step": 8422 }, { "epoch": 1.1805185704274703, "grad_norm": 0.2184334248304367, "learning_rate": 0.0001863716814159292, "loss": 0.0407, "step": 8423 }, { "epoch": 1.1806587245970568, "grad_norm": 0.30584102869033813, "learning_rate": 0.00018635733078210953, "loss": 0.0305, "step": 8424 }, { "epoch": 1.1807988787666432, "grad_norm": 1.3026423454284668, "learning_rate": 0.00018634298014828985, "loss": 0.0955, "step": 8425 }, { "epoch": 1.18093903293623, "grad_norm": 0.41045674681663513, "learning_rate": 0.0001863286295144702, "loss": 0.0263, "step": 8426 }, { "epoch": 1.1810791871058164, "grad_norm": 0.592070460319519, "learning_rate": 0.00018631427888065054, "loss": 0.0917, "step": 8427 }, { "epoch": 1.181219341275403, "grad_norm": 0.34400054812431335, "learning_rate": 0.00018629992824683087, "loss": 0.0533, "step": 8428 }, { "epoch": 1.1813594954449895, "grad_norm": 0.4891303777694702, "learning_rate": 0.00018628557761301125, "loss": 0.0814, "step": 8429 }, { "epoch": 1.181499649614576, "grad_norm": 0.49518412351608276, "learning_rate": 0.00018627122697919158, "loss": 0.1097, "step": 8430 }, { "epoch": 1.1816398037841627, "grad_norm": 1.4780369997024536, "learning_rate": 0.0001862568763453719, "loss": 0.2873, "step": 8431 }, { "epoch": 1.1817799579537491, "grad_norm": 1.0688928365707397, "learning_rate": 0.00018624252571155226, "loss": 0.0758, "step": 8432 }, { "epoch": 1.1819201121233356, "grad_norm": 2.4572336673736572, "learning_rate": 0.0001862281750777326, "loss": 0.0754, "step": 8433 }, { "epoch": 1.1820602662929223, "grad_norm": 0.5639296770095825, "learning_rate": 0.00018621382444391292, "loss": 0.1319, "step": 8434 }, { "epoch": 1.1822004204625087, "grad_norm": 1.2623234987258911, "learning_rate": 0.00018619947381009325, "loss": 0.3551, "step": 8435 }, { "epoch": 1.1823405746320952, "grad_norm": 0.4160192310810089, "learning_rate": 0.0001861851231762736, "loss": 0.0809, "step": 8436 }, { "epoch": 1.1824807288016819, "grad_norm": 0.4047568142414093, "learning_rate": 0.00018617077254245393, "loss": 0.0623, "step": 8437 }, { "epoch": 1.1826208829712683, "grad_norm": 0.5796579122543335, "learning_rate": 0.00018615642190863426, "loss": 0.0833, "step": 8438 }, { "epoch": 1.182761037140855, "grad_norm": 0.2815111577510834, "learning_rate": 0.00018614207127481462, "loss": 0.0456, "step": 8439 }, { "epoch": 1.1829011913104415, "grad_norm": 0.4378122091293335, "learning_rate": 0.00018612772064099497, "loss": 0.1344, "step": 8440 }, { "epoch": 1.183041345480028, "grad_norm": 0.22896511852741241, "learning_rate": 0.0001861133700071753, "loss": 0.0526, "step": 8441 }, { "epoch": 1.1831814996496146, "grad_norm": 0.1659672111272812, "learning_rate": 0.00018609901937335566, "loss": 0.0403, "step": 8442 }, { "epoch": 1.183321653819201, "grad_norm": 0.42920631170272827, "learning_rate": 0.00018608466873953599, "loss": 0.1015, "step": 8443 }, { "epoch": 1.1834618079887878, "grad_norm": 1.0234919786453247, "learning_rate": 0.00018607031810571631, "loss": 0.1248, "step": 8444 }, { "epoch": 1.1836019621583742, "grad_norm": 0.18251116573810577, "learning_rate": 0.00018605596747189667, "loss": 0.0376, "step": 8445 }, { "epoch": 1.1837421163279607, "grad_norm": 0.1405799686908722, "learning_rate": 0.000186041616838077, "loss": 0.0383, "step": 8446 }, { "epoch": 1.1838822704975474, "grad_norm": 0.5530314445495605, "learning_rate": 0.00018602726620425733, "loss": 0.1039, "step": 8447 }, { "epoch": 1.1840224246671338, "grad_norm": 0.6715810894966125, "learning_rate": 0.00018601291557043768, "loss": 0.0951, "step": 8448 }, { "epoch": 1.1841625788367205, "grad_norm": 0.2891225516796112, "learning_rate": 0.000185998564936618, "loss": 0.0608, "step": 8449 }, { "epoch": 1.184302733006307, "grad_norm": 0.4518788456916809, "learning_rate": 0.00018598421430279834, "loss": 0.0631, "step": 8450 }, { "epoch": 1.1844428871758934, "grad_norm": 0.1502719223499298, "learning_rate": 0.00018596986366897867, "loss": 0.0279, "step": 8451 }, { "epoch": 1.18458304134548, "grad_norm": 0.44686177372932434, "learning_rate": 0.00018595551303515905, "loss": 0.0337, "step": 8452 }, { "epoch": 1.1847231955150666, "grad_norm": 0.33555567264556885, "learning_rate": 0.00018594116240133938, "loss": 0.0704, "step": 8453 }, { "epoch": 1.1848633496846532, "grad_norm": 0.34301823377609253, "learning_rate": 0.0001859268117675197, "loss": 0.0785, "step": 8454 }, { "epoch": 1.1850035038542397, "grad_norm": 0.3517075181007385, "learning_rate": 0.00018591246113370006, "loss": 0.0901, "step": 8455 }, { "epoch": 1.1851436580238262, "grad_norm": 0.2612588703632355, "learning_rate": 0.0001858981104998804, "loss": 0.0465, "step": 8456 }, { "epoch": 1.1852838121934128, "grad_norm": 0.18803571164608002, "learning_rate": 0.00018588375986606072, "loss": 0.0575, "step": 8457 }, { "epoch": 1.1854239663629993, "grad_norm": 0.29370224475860596, "learning_rate": 0.00018586940923224108, "loss": 0.076, "step": 8458 }, { "epoch": 1.1855641205325858, "grad_norm": 0.6146436929702759, "learning_rate": 0.0001858550585984214, "loss": 0.092, "step": 8459 }, { "epoch": 1.1857042747021724, "grad_norm": 0.43006423115730286, "learning_rate": 0.00018584070796460173, "loss": 0.1221, "step": 8460 }, { "epoch": 1.185844428871759, "grad_norm": 0.3662368953227997, "learning_rate": 0.00018582635733078212, "loss": 0.0991, "step": 8461 }, { "epoch": 1.1859845830413456, "grad_norm": 0.3738935589790344, "learning_rate": 0.00018581200669696245, "loss": 0.0412, "step": 8462 }, { "epoch": 1.186124737210932, "grad_norm": 0.4793298542499542, "learning_rate": 0.00018579765606314277, "loss": 0.0792, "step": 8463 }, { "epoch": 1.1862648913805185, "grad_norm": 1.356055736541748, "learning_rate": 0.00018578330542932313, "loss": 0.0612, "step": 8464 }, { "epoch": 1.1864050455501052, "grad_norm": 0.4037184417247772, "learning_rate": 0.00018576895479550346, "loss": 0.0798, "step": 8465 }, { "epoch": 1.1865451997196916, "grad_norm": 0.17769403755664825, "learning_rate": 0.00018575460416168379, "loss": 0.0253, "step": 8466 }, { "epoch": 1.186685353889278, "grad_norm": 0.39587733149528503, "learning_rate": 0.00018574025352786414, "loss": 0.0369, "step": 8467 }, { "epoch": 1.1868255080588648, "grad_norm": 1.1012765169143677, "learning_rate": 0.00018572590289404447, "loss": 0.0902, "step": 8468 }, { "epoch": 1.1869656622284512, "grad_norm": 0.3209918141365051, "learning_rate": 0.0001857115522602248, "loss": 0.0731, "step": 8469 }, { "epoch": 1.187105816398038, "grad_norm": 0.3216061294078827, "learning_rate": 0.00018569720162640513, "loss": 0.0393, "step": 8470 }, { "epoch": 1.1872459705676244, "grad_norm": 0.22348041832447052, "learning_rate": 0.00018568285099258548, "loss": 0.019, "step": 8471 }, { "epoch": 1.1873861247372108, "grad_norm": 0.7250317931175232, "learning_rate": 0.00018566850035876584, "loss": 0.0225, "step": 8472 }, { "epoch": 1.1875262789067975, "grad_norm": 0.2691590487957001, "learning_rate": 0.00018565414972494617, "loss": 0.026, "step": 8473 }, { "epoch": 1.187666433076384, "grad_norm": 0.521014928817749, "learning_rate": 0.00018563979909112652, "loss": 0.0303, "step": 8474 }, { "epoch": 1.1878065872459707, "grad_norm": 0.789585292339325, "learning_rate": 0.00018562544845730685, "loss": 0.132, "step": 8475 }, { "epoch": 1.1879467414155571, "grad_norm": 0.7490088939666748, "learning_rate": 0.00018561109782348718, "loss": 0.203, "step": 8476 }, { "epoch": 1.1880868955851436, "grad_norm": 0.23827721178531647, "learning_rate": 0.00018559674718966754, "loss": 0.0175, "step": 8477 }, { "epoch": 1.1882270497547303, "grad_norm": 0.3921199440956116, "learning_rate": 0.00018558239655584786, "loss": 0.0483, "step": 8478 }, { "epoch": 1.1883672039243167, "grad_norm": 0.28024962544441223, "learning_rate": 0.0001855680459220282, "loss": 0.0404, "step": 8479 }, { "epoch": 1.1885073580939034, "grad_norm": 0.9474804997444153, "learning_rate": 0.00018555369528820855, "loss": 0.0999, "step": 8480 }, { "epoch": 1.1886475122634899, "grad_norm": 0.2688314914703369, "learning_rate": 0.00018553934465438888, "loss": 0.0143, "step": 8481 }, { "epoch": 1.1887876664330763, "grad_norm": 0.7894964218139648, "learning_rate": 0.0001855249940205692, "loss": 0.1318, "step": 8482 }, { "epoch": 1.188927820602663, "grad_norm": 0.8433345556259155, "learning_rate": 0.0001855106433867496, "loss": 0.1358, "step": 8483 }, { "epoch": 1.1890679747722495, "grad_norm": 2.712667465209961, "learning_rate": 0.00018549629275292992, "loss": 0.0983, "step": 8484 }, { "epoch": 1.1892081289418361, "grad_norm": 4.782211780548096, "learning_rate": 0.00018548194211911025, "loss": 0.1831, "step": 8485 }, { "epoch": 1.1893482831114226, "grad_norm": 0.3351871967315674, "learning_rate": 0.00018546759148529057, "loss": 0.1222, "step": 8486 }, { "epoch": 1.189488437281009, "grad_norm": 0.45225581526756287, "learning_rate": 0.00018545324085147093, "loss": 0.1117, "step": 8487 }, { "epoch": 1.1896285914505957, "grad_norm": 0.3638037443161011, "learning_rate": 0.00018543889021765126, "loss": 0.0836, "step": 8488 }, { "epoch": 1.1897687456201822, "grad_norm": 0.2246861606836319, "learning_rate": 0.0001854245395838316, "loss": 0.0247, "step": 8489 }, { "epoch": 1.1899088997897687, "grad_norm": 0.48111027479171753, "learning_rate": 0.00018541018895001194, "loss": 0.0955, "step": 8490 }, { "epoch": 1.1900490539593553, "grad_norm": 0.24523086845874786, "learning_rate": 0.00018539583831619227, "loss": 0.0215, "step": 8491 }, { "epoch": 1.1901892081289418, "grad_norm": 0.22513946890830994, "learning_rate": 0.0001853814876823726, "loss": 0.0175, "step": 8492 }, { "epoch": 1.1903293622985283, "grad_norm": 0.1555069535970688, "learning_rate": 0.00018536713704855298, "loss": 0.0578, "step": 8493 }, { "epoch": 1.190469516468115, "grad_norm": 0.44876307249069214, "learning_rate": 0.0001853527864147333, "loss": 0.0658, "step": 8494 }, { "epoch": 1.1906096706377014, "grad_norm": 0.4169052839279175, "learning_rate": 0.00018533843578091364, "loss": 0.0635, "step": 8495 }, { "epoch": 1.190749824807288, "grad_norm": 0.38028332591056824, "learning_rate": 0.000185324085147094, "loss": 0.0667, "step": 8496 }, { "epoch": 1.1908899789768745, "grad_norm": 0.21552136540412903, "learning_rate": 0.00018530973451327432, "loss": 0.0427, "step": 8497 }, { "epoch": 1.191030133146461, "grad_norm": 0.2731516659259796, "learning_rate": 0.00018529538387945465, "loss": 0.0326, "step": 8498 }, { "epoch": 1.1911702873160477, "grad_norm": 0.4314786493778229, "learning_rate": 0.000185281033245635, "loss": 0.0403, "step": 8499 }, { "epoch": 1.1913104414856341, "grad_norm": 0.32413384318351746, "learning_rate": 0.00018526668261181534, "loss": 0.0761, "step": 8500 }, { "epoch": 1.1914505956552208, "grad_norm": 0.48024535179138184, "learning_rate": 0.00018525233197799567, "loss": 0.0367, "step": 8501 }, { "epoch": 1.1915907498248073, "grad_norm": 0.2946139872074127, "learning_rate": 0.00018523798134417602, "loss": 0.0575, "step": 8502 }, { "epoch": 1.1917309039943937, "grad_norm": 0.5404660105705261, "learning_rate": 0.00018522363071035638, "loss": 0.1124, "step": 8503 }, { "epoch": 1.1918710581639804, "grad_norm": 0.0834084302186966, "learning_rate": 0.0001852092800765367, "loss": 0.0112, "step": 8504 }, { "epoch": 1.1920112123335669, "grad_norm": 0.44430938363075256, "learning_rate": 0.00018519492944271703, "loss": 0.1216, "step": 8505 }, { "epoch": 1.1921513665031536, "grad_norm": 0.4010743498802185, "learning_rate": 0.0001851805788088974, "loss": 0.0565, "step": 8506 }, { "epoch": 1.19229152067274, "grad_norm": 0.7161933183670044, "learning_rate": 0.00018516622817507772, "loss": 0.1479, "step": 8507 }, { "epoch": 1.1924316748423265, "grad_norm": 0.38962656259536743, "learning_rate": 0.00018515187754125805, "loss": 0.1049, "step": 8508 }, { "epoch": 1.1925718290119132, "grad_norm": 0.7673728466033936, "learning_rate": 0.0001851375269074384, "loss": 0.1296, "step": 8509 }, { "epoch": 1.1927119831814996, "grad_norm": 0.2714522182941437, "learning_rate": 0.00018512317627361873, "loss": 0.0388, "step": 8510 }, { "epoch": 1.1928521373510863, "grad_norm": 0.34488505125045776, "learning_rate": 0.00018510882563979906, "loss": 0.0458, "step": 8511 }, { "epoch": 1.1929922915206728, "grad_norm": 0.19427450001239777, "learning_rate": 0.00018509447500597942, "loss": 0.0691, "step": 8512 }, { "epoch": 1.1931324456902592, "grad_norm": 0.1503702700138092, "learning_rate": 0.00018508012437215974, "loss": 0.0245, "step": 8513 }, { "epoch": 1.193272599859846, "grad_norm": 0.39883527159690857, "learning_rate": 0.00018506577373834007, "loss": 0.0724, "step": 8514 }, { "epoch": 1.1934127540294324, "grad_norm": 0.5355033278465271, "learning_rate": 0.00018505142310452046, "loss": 0.0616, "step": 8515 }, { "epoch": 1.1935529081990188, "grad_norm": 0.28657329082489014, "learning_rate": 0.00018503707247070078, "loss": 0.0659, "step": 8516 }, { "epoch": 1.1936930623686055, "grad_norm": 1.1676677465438843, "learning_rate": 0.0001850227218368811, "loss": 0.1472, "step": 8517 }, { "epoch": 1.193833216538192, "grad_norm": 0.2774539291858673, "learning_rate": 0.00018500837120306147, "loss": 0.0504, "step": 8518 }, { "epoch": 1.1939733707077786, "grad_norm": 0.39250561594963074, "learning_rate": 0.0001849940205692418, "loss": 0.0723, "step": 8519 }, { "epoch": 1.194113524877365, "grad_norm": 0.4060557782649994, "learning_rate": 0.00018497966993542212, "loss": 0.0913, "step": 8520 }, { "epoch": 1.1942536790469516, "grad_norm": 0.32384955883026123, "learning_rate": 0.00018496531930160245, "loss": 0.0929, "step": 8521 }, { "epoch": 1.1943938332165382, "grad_norm": 0.24844740331172943, "learning_rate": 0.0001849509686677828, "loss": 0.0717, "step": 8522 }, { "epoch": 1.1945339873861247, "grad_norm": 0.3311687409877777, "learning_rate": 0.00018493661803396314, "loss": 0.0908, "step": 8523 }, { "epoch": 1.1946741415557112, "grad_norm": 0.09977242350578308, "learning_rate": 0.00018492226740014347, "loss": 0.0139, "step": 8524 }, { "epoch": 1.1948142957252978, "grad_norm": 0.28865495324134827, "learning_rate": 0.00018490791676632385, "loss": 0.0388, "step": 8525 }, { "epoch": 1.1949544498948843, "grad_norm": 0.3077395558357239, "learning_rate": 0.00018489356613250418, "loss": 0.0196, "step": 8526 }, { "epoch": 1.195094604064471, "grad_norm": 0.3865760564804077, "learning_rate": 0.0001848792154986845, "loss": 0.066, "step": 8527 }, { "epoch": 1.1952347582340574, "grad_norm": 0.5432672500610352, "learning_rate": 0.00018486486486486486, "loss": 0.1328, "step": 8528 }, { "epoch": 1.195374912403644, "grad_norm": 0.35212069749832153, "learning_rate": 0.0001848505142310452, "loss": 0.1249, "step": 8529 }, { "epoch": 1.1955150665732306, "grad_norm": 0.19249378144741058, "learning_rate": 0.00018483616359722552, "loss": 0.0123, "step": 8530 }, { "epoch": 1.195655220742817, "grad_norm": 0.5189899802207947, "learning_rate": 0.00018482181296340587, "loss": 0.1196, "step": 8531 }, { "epoch": 1.1957953749124037, "grad_norm": 0.4813257157802582, "learning_rate": 0.0001848074623295862, "loss": 0.1401, "step": 8532 }, { "epoch": 1.1959355290819902, "grad_norm": 1.005050539970398, "learning_rate": 0.00018479311169576653, "loss": 0.1695, "step": 8533 }, { "epoch": 1.1960756832515766, "grad_norm": 0.3431726098060608, "learning_rate": 0.0001847787610619469, "loss": 0.0902, "step": 8534 }, { "epoch": 1.1962158374211633, "grad_norm": 1.6111829280853271, "learning_rate": 0.00018476441042812724, "loss": 0.1166, "step": 8535 }, { "epoch": 1.1963559915907498, "grad_norm": 0.4205247461795807, "learning_rate": 0.00018475005979430757, "loss": 0.0779, "step": 8536 }, { "epoch": 1.1964961457603365, "grad_norm": 0.2543555498123169, "learning_rate": 0.00018473570916048793, "loss": 0.0734, "step": 8537 }, { "epoch": 1.196636299929923, "grad_norm": 0.3554800748825073, "learning_rate": 0.00018472135852666826, "loss": 0.0883, "step": 8538 }, { "epoch": 1.1967764540995094, "grad_norm": 0.18363718688488007, "learning_rate": 0.00018470700789284858, "loss": 0.0463, "step": 8539 }, { "epoch": 1.196916608269096, "grad_norm": 0.15836459398269653, "learning_rate": 0.0001846926572590289, "loss": 0.0536, "step": 8540 }, { "epoch": 1.1970567624386825, "grad_norm": 0.31539079546928406, "learning_rate": 0.00018467830662520927, "loss": 0.0786, "step": 8541 }, { "epoch": 1.1971969166082692, "grad_norm": 0.23454636335372925, "learning_rate": 0.0001846639559913896, "loss": 0.0512, "step": 8542 }, { "epoch": 1.1973370707778557, "grad_norm": 0.3851999342441559, "learning_rate": 0.00018464960535756993, "loss": 0.1011, "step": 8543 }, { "epoch": 1.1974772249474421, "grad_norm": 0.3170848786830902, "learning_rate": 0.00018463525472375028, "loss": 0.038, "step": 8544 }, { "epoch": 1.1976173791170288, "grad_norm": 0.34854915738105774, "learning_rate": 0.0001846209040899306, "loss": 0.0681, "step": 8545 }, { "epoch": 1.1977575332866153, "grad_norm": 0.21731990575790405, "learning_rate": 0.00018460655345611094, "loss": 0.0468, "step": 8546 }, { "epoch": 1.1978976874562017, "grad_norm": 0.3786073625087738, "learning_rate": 0.00018459220282229132, "loss": 0.0663, "step": 8547 }, { "epoch": 1.1980378416257884, "grad_norm": 0.1978093832731247, "learning_rate": 0.00018457785218847165, "loss": 0.0482, "step": 8548 }, { "epoch": 1.1981779957953749, "grad_norm": 0.23888733983039856, "learning_rate": 0.00018456350155465198, "loss": 0.0472, "step": 8549 }, { "epoch": 1.1983181499649616, "grad_norm": 0.45601579546928406, "learning_rate": 0.00018454915092083233, "loss": 0.0751, "step": 8550 }, { "epoch": 1.198458304134548, "grad_norm": 0.49066832661628723, "learning_rate": 0.00018453480028701266, "loss": 0.0865, "step": 8551 }, { "epoch": 1.1985984583041345, "grad_norm": 0.1663818657398224, "learning_rate": 0.000184520449653193, "loss": 0.0206, "step": 8552 }, { "epoch": 1.1987386124737212, "grad_norm": 0.7273907661437988, "learning_rate": 0.00018450609901937335, "loss": 0.0451, "step": 8553 }, { "epoch": 1.1988787666433076, "grad_norm": 0.16610193252563477, "learning_rate": 0.00018449174838555368, "loss": 0.0394, "step": 8554 }, { "epoch": 1.199018920812894, "grad_norm": 0.3918784260749817, "learning_rate": 0.000184477397751734, "loss": 0.0634, "step": 8555 }, { "epoch": 1.1991590749824808, "grad_norm": 0.09749762713909149, "learning_rate": 0.00018446304711791433, "loss": 0.0283, "step": 8556 }, { "epoch": 1.1992992291520672, "grad_norm": 0.19365651905536652, "learning_rate": 0.00018444869648409472, "loss": 0.0109, "step": 8557 }, { "epoch": 1.199439383321654, "grad_norm": 0.6888707280158997, "learning_rate": 0.00018443434585027504, "loss": 0.0822, "step": 8558 }, { "epoch": 1.1995795374912404, "grad_norm": 0.3675529658794403, "learning_rate": 0.00018441999521645537, "loss": 0.0381, "step": 8559 }, { "epoch": 1.1997196916608268, "grad_norm": 0.285459965467453, "learning_rate": 0.00018440564458263573, "loss": 0.0694, "step": 8560 }, { "epoch": 1.1998598458304135, "grad_norm": 0.3074505925178528, "learning_rate": 0.00018439129394881606, "loss": 0.0825, "step": 8561 }, { "epoch": 1.2, "grad_norm": 0.4485792815685272, "learning_rate": 0.00018437694331499639, "loss": 0.077, "step": 8562 }, { "epoch": 1.2001401541695866, "grad_norm": 0.6382713913917542, "learning_rate": 0.00018436259268117674, "loss": 0.1037, "step": 8563 }, { "epoch": 1.200280308339173, "grad_norm": 0.7484086751937866, "learning_rate": 0.00018434824204735707, "loss": 0.1019, "step": 8564 }, { "epoch": 1.2004204625087596, "grad_norm": 0.6526056528091431, "learning_rate": 0.0001843338914135374, "loss": 0.0725, "step": 8565 }, { "epoch": 1.2005606166783462, "grad_norm": 0.4769851565361023, "learning_rate": 0.00018431954077971775, "loss": 0.1073, "step": 8566 }, { "epoch": 1.2007007708479327, "grad_norm": 0.3522621691226959, "learning_rate": 0.0001843051901458981, "loss": 0.0949, "step": 8567 }, { "epoch": 1.2008409250175194, "grad_norm": 0.11805888265371323, "learning_rate": 0.00018429083951207844, "loss": 0.0192, "step": 8568 }, { "epoch": 1.2009810791871058, "grad_norm": 0.32674145698547363, "learning_rate": 0.0001842764888782588, "loss": 0.0906, "step": 8569 }, { "epoch": 1.2011212333566923, "grad_norm": 0.39941170811653137, "learning_rate": 0.00018426213824443912, "loss": 0.0395, "step": 8570 }, { "epoch": 1.201261387526279, "grad_norm": 0.3969572186470032, "learning_rate": 0.00018424778761061945, "loss": 0.0927, "step": 8571 }, { "epoch": 1.2014015416958654, "grad_norm": 0.5477501749992371, "learning_rate": 0.0001842334369767998, "loss": 0.0626, "step": 8572 }, { "epoch": 1.2015416958654521, "grad_norm": 0.43994542956352234, "learning_rate": 0.00018421908634298013, "loss": 0.1254, "step": 8573 }, { "epoch": 1.2016818500350386, "grad_norm": 0.3910580277442932, "learning_rate": 0.00018420473570916046, "loss": 0.032, "step": 8574 }, { "epoch": 1.201822004204625, "grad_norm": 0.5842429995536804, "learning_rate": 0.0001841903850753408, "loss": 0.0464, "step": 8575 }, { "epoch": 1.2019621583742117, "grad_norm": 0.1773810237646103, "learning_rate": 0.00018417603444152115, "loss": 0.0329, "step": 8576 }, { "epoch": 1.2021023125437982, "grad_norm": 0.4404746890068054, "learning_rate": 0.00018416168380770148, "loss": 0.058, "step": 8577 }, { "epoch": 1.2022424667133846, "grad_norm": 0.7999029755592346, "learning_rate": 0.0001841473331738818, "loss": 0.2951, "step": 8578 }, { "epoch": 1.2023826208829713, "grad_norm": 0.30653145909309387, "learning_rate": 0.0001841329825400622, "loss": 0.0962, "step": 8579 }, { "epoch": 1.2025227750525578, "grad_norm": 0.2000439316034317, "learning_rate": 0.00018411863190624252, "loss": 0.0672, "step": 8580 }, { "epoch": 1.2026629292221442, "grad_norm": 0.5974790453910828, "learning_rate": 0.00018410428127242284, "loss": 0.1004, "step": 8581 }, { "epoch": 1.202803083391731, "grad_norm": 0.6584644913673401, "learning_rate": 0.0001840899306386032, "loss": 0.0443, "step": 8582 }, { "epoch": 1.2029432375613174, "grad_norm": 0.09968796372413635, "learning_rate": 0.00018407558000478353, "loss": 0.019, "step": 8583 }, { "epoch": 1.203083391730904, "grad_norm": 0.8579974174499512, "learning_rate": 0.00018406122937096386, "loss": 0.1454, "step": 8584 }, { "epoch": 1.2032235459004905, "grad_norm": 1.0567073822021484, "learning_rate": 0.0001840468787371442, "loss": 0.1917, "step": 8585 }, { "epoch": 1.203363700070077, "grad_norm": 0.20587460696697235, "learning_rate": 0.00018403252810332454, "loss": 0.0284, "step": 8586 }, { "epoch": 1.2035038542396637, "grad_norm": 0.21338842809200287, "learning_rate": 0.00018401817746950487, "loss": 0.0574, "step": 8587 }, { "epoch": 1.2036440084092501, "grad_norm": 0.2831384241580963, "learning_rate": 0.00018400382683568525, "loss": 0.0335, "step": 8588 }, { "epoch": 1.2037841625788368, "grad_norm": 0.24759230017662048, "learning_rate": 0.00018398947620186558, "loss": 0.0482, "step": 8589 }, { "epoch": 1.2039243167484233, "grad_norm": 0.18353457748889923, "learning_rate": 0.0001839751255680459, "loss": 0.0466, "step": 8590 }, { "epoch": 1.2040644709180097, "grad_norm": 0.2520129084587097, "learning_rate": 0.00018396077493422624, "loss": 0.0859, "step": 8591 }, { "epoch": 1.2042046250875964, "grad_norm": 0.4092220664024353, "learning_rate": 0.0001839464243004066, "loss": 0.0803, "step": 8592 }, { "epoch": 1.2043447792571829, "grad_norm": 0.20996958017349243, "learning_rate": 0.00018393207366658692, "loss": 0.0617, "step": 8593 }, { "epoch": 1.2044849334267695, "grad_norm": 0.25399351119995117, "learning_rate": 0.00018391772303276725, "loss": 0.0463, "step": 8594 }, { "epoch": 1.204625087596356, "grad_norm": 0.2289029359817505, "learning_rate": 0.0001839033723989476, "loss": 0.0401, "step": 8595 }, { "epoch": 1.2047652417659425, "grad_norm": 0.18452684581279755, "learning_rate": 0.00018388902176512794, "loss": 0.029, "step": 8596 }, { "epoch": 1.2049053959355291, "grad_norm": 0.24377408623695374, "learning_rate": 0.00018387467113130826, "loss": 0.0401, "step": 8597 }, { "epoch": 1.2050455501051156, "grad_norm": 0.3590956926345825, "learning_rate": 0.00018386032049748862, "loss": 0.0608, "step": 8598 }, { "epoch": 1.2051857042747023, "grad_norm": 0.44609805941581726, "learning_rate": 0.00018384596986366898, "loss": 0.0478, "step": 8599 }, { "epoch": 1.2053258584442887, "grad_norm": 0.4489005506038666, "learning_rate": 0.0001838316192298493, "loss": 0.0228, "step": 8600 }, { "epoch": 1.2054660126138752, "grad_norm": 0.275067538022995, "learning_rate": 0.00018381726859602966, "loss": 0.047, "step": 8601 }, { "epoch": 1.2056061667834619, "grad_norm": 0.4546056091785431, "learning_rate": 0.00018380291796221, "loss": 0.0335, "step": 8602 }, { "epoch": 1.2057463209530483, "grad_norm": 0.6824021339416504, "learning_rate": 0.00018378856732839032, "loss": 0.074, "step": 8603 }, { "epoch": 1.2058864751226348, "grad_norm": 0.38738110661506653, "learning_rate": 0.00018377421669457067, "loss": 0.0656, "step": 8604 }, { "epoch": 1.2060266292922215, "grad_norm": 0.6033779978752136, "learning_rate": 0.000183759866060751, "loss": 0.0683, "step": 8605 }, { "epoch": 1.206166783461808, "grad_norm": 0.18526332080364227, "learning_rate": 0.00018374551542693133, "loss": 0.0207, "step": 8606 }, { "epoch": 1.2063069376313946, "grad_norm": 0.4811161756515503, "learning_rate": 0.00018373116479311169, "loss": 0.0477, "step": 8607 }, { "epoch": 1.206447091800981, "grad_norm": 0.2796858251094818, "learning_rate": 0.00018371681415929201, "loss": 0.0683, "step": 8608 }, { "epoch": 1.2065872459705675, "grad_norm": 0.5424320101737976, "learning_rate": 0.00018370246352547234, "loss": 0.0892, "step": 8609 }, { "epoch": 1.2067274001401542, "grad_norm": 0.712868332862854, "learning_rate": 0.00018368811289165267, "loss": 0.1111, "step": 8610 }, { "epoch": 1.2068675543097407, "grad_norm": 0.2624228596687317, "learning_rate": 0.00018367376225783305, "loss": 0.05, "step": 8611 }, { "epoch": 1.2070077084793271, "grad_norm": 0.3732443153858185, "learning_rate": 0.00018365941162401338, "loss": 0.0601, "step": 8612 }, { "epoch": 1.2071478626489138, "grad_norm": 0.47797560691833496, "learning_rate": 0.0001836450609901937, "loss": 0.0772, "step": 8613 }, { "epoch": 1.2072880168185003, "grad_norm": 1.25688636302948, "learning_rate": 0.00018363071035637407, "loss": 0.0382, "step": 8614 }, { "epoch": 1.207428170988087, "grad_norm": 0.3314021825790405, "learning_rate": 0.0001836163597225544, "loss": 0.0912, "step": 8615 }, { "epoch": 1.2075683251576734, "grad_norm": 0.6147961616516113, "learning_rate": 0.00018360200908873472, "loss": 0.2393, "step": 8616 }, { "epoch": 1.2077084793272599, "grad_norm": 0.2781028151512146, "learning_rate": 0.00018358765845491508, "loss": 0.0307, "step": 8617 }, { "epoch": 1.2078486334968466, "grad_norm": 0.285336971282959, "learning_rate": 0.0001835733078210954, "loss": 0.035, "step": 8618 }, { "epoch": 1.207988787666433, "grad_norm": 0.33116811513900757, "learning_rate": 0.00018355895718727574, "loss": 0.0207, "step": 8619 }, { "epoch": 1.2081289418360197, "grad_norm": 0.3539915680885315, "learning_rate": 0.00018354460655345612, "loss": 0.0645, "step": 8620 }, { "epoch": 1.2082690960056062, "grad_norm": 0.233585923910141, "learning_rate": 0.00018353025591963645, "loss": 0.0165, "step": 8621 }, { "epoch": 1.2084092501751926, "grad_norm": 0.28798067569732666, "learning_rate": 0.00018351590528581678, "loss": 0.1027, "step": 8622 }, { "epoch": 1.2085494043447793, "grad_norm": 0.41744330525398254, "learning_rate": 0.00018350155465199713, "loss": 0.0484, "step": 8623 }, { "epoch": 1.2086895585143658, "grad_norm": 0.21721336245536804, "learning_rate": 0.00018348720401817746, "loss": 0.0511, "step": 8624 }, { "epoch": 1.2088297126839525, "grad_norm": 0.156241774559021, "learning_rate": 0.0001834728533843578, "loss": 0.0305, "step": 8625 }, { "epoch": 1.208969866853539, "grad_norm": 0.34080907702445984, "learning_rate": 0.00018345850275053812, "loss": 0.0402, "step": 8626 }, { "epoch": 1.2091100210231254, "grad_norm": 0.7670702338218689, "learning_rate": 0.00018344415211671847, "loss": 0.1638, "step": 8627 }, { "epoch": 1.209250175192712, "grad_norm": 1.6789828538894653, "learning_rate": 0.0001834298014828988, "loss": 0.1111, "step": 8628 }, { "epoch": 1.2093903293622985, "grad_norm": 0.5524570941925049, "learning_rate": 0.00018341545084907913, "loss": 0.0938, "step": 8629 }, { "epoch": 1.2095304835318852, "grad_norm": 0.8256645202636719, "learning_rate": 0.0001834011002152595, "loss": 0.0885, "step": 8630 }, { "epoch": 1.2096706377014717, "grad_norm": 1.3773860931396484, "learning_rate": 0.00018338674958143984, "loss": 0.1457, "step": 8631 }, { "epoch": 1.209810791871058, "grad_norm": 1.313921570777893, "learning_rate": 0.00018337239894762017, "loss": 0.2281, "step": 8632 }, { "epoch": 1.2099509460406448, "grad_norm": 1.0833861827850342, "learning_rate": 0.00018335804831380053, "loss": 0.0567, "step": 8633 }, { "epoch": 1.2100911002102313, "grad_norm": 3.6338839530944824, "learning_rate": 0.00018334369767998085, "loss": 0.12, "step": 8634 }, { "epoch": 1.2102312543798177, "grad_norm": 0.7777293920516968, "learning_rate": 0.00018332934704616118, "loss": 0.1219, "step": 8635 }, { "epoch": 1.2103714085494044, "grad_norm": 0.2840065062046051, "learning_rate": 0.00018331499641234154, "loss": 0.0825, "step": 8636 }, { "epoch": 1.2105115627189909, "grad_norm": 0.3750748038291931, "learning_rate": 0.00018330064577852187, "loss": 0.061, "step": 8637 }, { "epoch": 1.2106517168885773, "grad_norm": 0.45787206292152405, "learning_rate": 0.0001832862951447022, "loss": 0.1067, "step": 8638 }, { "epoch": 1.210791871058164, "grad_norm": 0.18101774156093597, "learning_rate": 0.00018327194451088255, "loss": 0.0281, "step": 8639 }, { "epoch": 1.2109320252277505, "grad_norm": 0.45391911268234253, "learning_rate": 0.00018325759387706288, "loss": 0.0682, "step": 8640 }, { "epoch": 1.2110721793973371, "grad_norm": 0.2658536732196808, "learning_rate": 0.0001832432432432432, "loss": 0.0229, "step": 8641 }, { "epoch": 1.2112123335669236, "grad_norm": 0.4262392818927765, "learning_rate": 0.0001832288926094236, "loss": 0.073, "step": 8642 }, { "epoch": 1.21135248773651, "grad_norm": 0.4265669882297516, "learning_rate": 0.00018321454197560392, "loss": 0.0621, "step": 8643 }, { "epoch": 1.2114926419060967, "grad_norm": 0.3030928075313568, "learning_rate": 0.00018320019134178425, "loss": 0.049, "step": 8644 }, { "epoch": 1.2116327960756832, "grad_norm": 0.23425814509391785, "learning_rate": 0.00018318584070796458, "loss": 0.0282, "step": 8645 }, { "epoch": 1.2117729502452699, "grad_norm": 0.42376530170440674, "learning_rate": 0.00018317149007414493, "loss": 0.0784, "step": 8646 }, { "epoch": 1.2119131044148563, "grad_norm": 0.11200292408466339, "learning_rate": 0.00018315713944032526, "loss": 0.0149, "step": 8647 }, { "epoch": 1.2120532585844428, "grad_norm": 0.29357296228408813, "learning_rate": 0.0001831427888065056, "loss": 0.0387, "step": 8648 }, { "epoch": 1.2121934127540295, "grad_norm": 0.3336953818798065, "learning_rate": 0.00018312843817268595, "loss": 0.1397, "step": 8649 }, { "epoch": 1.212333566923616, "grad_norm": 0.3192681670188904, "learning_rate": 0.00018311408753886627, "loss": 0.0746, "step": 8650 }, { "epoch": 1.2124737210932026, "grad_norm": 0.20528243482112885, "learning_rate": 0.0001830997369050466, "loss": 0.0257, "step": 8651 }, { "epoch": 1.212613875262789, "grad_norm": 0.16346094012260437, "learning_rate": 0.00018308538627122699, "loss": 0.0247, "step": 8652 }, { "epoch": 1.2127540294323755, "grad_norm": 0.21894079446792603, "learning_rate": 0.00018307103563740731, "loss": 0.0609, "step": 8653 }, { "epoch": 1.2128941836019622, "grad_norm": 0.26416367292404175, "learning_rate": 0.00018305668500358764, "loss": 0.0317, "step": 8654 }, { "epoch": 1.2130343377715487, "grad_norm": 0.4925996959209442, "learning_rate": 0.000183042334369768, "loss": 0.071, "step": 8655 }, { "epoch": 1.2131744919411354, "grad_norm": 0.20673345029354095, "learning_rate": 0.00018302798373594833, "loss": 0.0171, "step": 8656 }, { "epoch": 1.2133146461107218, "grad_norm": 0.43811532855033875, "learning_rate": 0.00018301363310212866, "loss": 0.0478, "step": 8657 }, { "epoch": 1.2134548002803083, "grad_norm": 0.2935405671596527, "learning_rate": 0.000182999282468309, "loss": 0.0571, "step": 8658 }, { "epoch": 1.213594954449895, "grad_norm": 0.2003026008605957, "learning_rate": 0.00018298493183448934, "loss": 0.028, "step": 8659 }, { "epoch": 1.2137351086194814, "grad_norm": 0.20540444552898407, "learning_rate": 0.00018297058120066967, "loss": 0.0426, "step": 8660 }, { "epoch": 1.213875262789068, "grad_norm": 0.34198060631752014, "learning_rate": 0.00018295623056685, "loss": 0.0591, "step": 8661 }, { "epoch": 1.2140154169586546, "grad_norm": 0.5858361124992371, "learning_rate": 0.00018294187993303038, "loss": 0.0414, "step": 8662 }, { "epoch": 1.214155571128241, "grad_norm": 0.33844229578971863, "learning_rate": 0.0001829275292992107, "loss": 0.0306, "step": 8663 }, { "epoch": 1.2142957252978277, "grad_norm": 0.43569090962409973, "learning_rate": 0.00018291317866539104, "loss": 0.0271, "step": 8664 }, { "epoch": 1.2144358794674142, "grad_norm": 0.36916837096214294, "learning_rate": 0.0001828988280315714, "loss": 0.0828, "step": 8665 }, { "epoch": 1.2145760336370006, "grad_norm": 0.33154740929603577, "learning_rate": 0.00018288447739775172, "loss": 0.039, "step": 8666 }, { "epoch": 1.2147161878065873, "grad_norm": 0.24683552980422974, "learning_rate": 0.00018287012676393205, "loss": 0.0844, "step": 8667 }, { "epoch": 1.2148563419761738, "grad_norm": 0.09857519716024399, "learning_rate": 0.0001828557761301124, "loss": 0.0132, "step": 8668 }, { "epoch": 1.2149964961457602, "grad_norm": 0.6895578503608704, "learning_rate": 0.00018284142549629273, "loss": 0.0485, "step": 8669 }, { "epoch": 1.215136650315347, "grad_norm": 0.36764654517173767, "learning_rate": 0.00018282707486247306, "loss": 0.0796, "step": 8670 }, { "epoch": 1.2152768044849334, "grad_norm": 0.16565701365470886, "learning_rate": 0.00018281272422865342, "loss": 0.0278, "step": 8671 }, { "epoch": 1.21541695865452, "grad_norm": 0.5505766868591309, "learning_rate": 0.00018279837359483375, "loss": 0.0809, "step": 8672 }, { "epoch": 1.2155571128241065, "grad_norm": 0.4263438582420349, "learning_rate": 0.00018278402296101408, "loss": 0.0585, "step": 8673 }, { "epoch": 1.215697266993693, "grad_norm": 0.5488643050193787, "learning_rate": 0.00018276967232719446, "loss": 0.0342, "step": 8674 }, { "epoch": 1.2158374211632796, "grad_norm": 0.38896697759628296, "learning_rate": 0.0001827553216933748, "loss": 0.0321, "step": 8675 }, { "epoch": 1.215977575332866, "grad_norm": 0.17850738763809204, "learning_rate": 0.00018274097105955512, "loss": 0.016, "step": 8676 }, { "epoch": 1.2161177295024528, "grad_norm": 0.3042354881763458, "learning_rate": 0.00018272662042573547, "loss": 0.0294, "step": 8677 }, { "epoch": 1.2162578836720392, "grad_norm": 0.5600208044052124, "learning_rate": 0.0001827122697919158, "loss": 0.1301, "step": 8678 }, { "epoch": 1.2163980378416257, "grad_norm": 0.7561913132667542, "learning_rate": 0.00018269791915809613, "loss": 0.1005, "step": 8679 }, { "epoch": 1.2165381920112124, "grad_norm": 0.34700363874435425, "learning_rate": 0.00018268356852427646, "loss": 0.0491, "step": 8680 }, { "epoch": 1.2166783461807988, "grad_norm": 0.8128876686096191, "learning_rate": 0.0001826692178904568, "loss": 0.1313, "step": 8681 }, { "epoch": 1.2168185003503855, "grad_norm": 0.40929701924324036, "learning_rate": 0.00018265486725663714, "loss": 0.0911, "step": 8682 }, { "epoch": 1.216958654519972, "grad_norm": 1.4489890336990356, "learning_rate": 0.00018264051662281747, "loss": 0.2669, "step": 8683 }, { "epoch": 1.2170988086895584, "grad_norm": 0.5744010210037231, "learning_rate": 0.00018262616598899785, "loss": 0.0576, "step": 8684 }, { "epoch": 1.2172389628591451, "grad_norm": 1.720779538154602, "learning_rate": 0.00018261181535517818, "loss": 0.2815, "step": 8685 }, { "epoch": 1.2173791170287316, "grad_norm": 0.3878129720687866, "learning_rate": 0.0001825974647213585, "loss": 0.0638, "step": 8686 }, { "epoch": 1.2175192711983183, "grad_norm": 0.344102144241333, "learning_rate": 0.00018258311408753886, "loss": 0.065, "step": 8687 }, { "epoch": 1.2176594253679047, "grad_norm": 0.2713230550289154, "learning_rate": 0.0001825687634537192, "loss": 0.0451, "step": 8688 }, { "epoch": 1.2177995795374912, "grad_norm": 0.4024549424648285, "learning_rate": 0.00018255441281989952, "loss": 0.0866, "step": 8689 }, { "epoch": 1.2179397337070779, "grad_norm": 0.32044848799705505, "learning_rate": 0.00018254006218607988, "loss": 0.0768, "step": 8690 }, { "epoch": 1.2180798878766643, "grad_norm": 0.2760136127471924, "learning_rate": 0.0001825257115522602, "loss": 0.0335, "step": 8691 }, { "epoch": 1.2182200420462508, "grad_norm": 0.20203445851802826, "learning_rate": 0.00018251136091844053, "loss": 0.0445, "step": 8692 }, { "epoch": 1.2183601962158375, "grad_norm": 0.2641170024871826, "learning_rate": 0.0001824970102846209, "loss": 0.0391, "step": 8693 }, { "epoch": 1.218500350385424, "grad_norm": 0.40196365118026733, "learning_rate": 0.00018248265965080125, "loss": 0.0575, "step": 8694 }, { "epoch": 1.2186405045550106, "grad_norm": 0.29076626896858215, "learning_rate": 0.00018246830901698157, "loss": 0.0579, "step": 8695 }, { "epoch": 1.218780658724597, "grad_norm": 0.10989604145288467, "learning_rate": 0.0001824539583831619, "loss": 0.012, "step": 8696 }, { "epoch": 1.2189208128941835, "grad_norm": 0.2224172055721283, "learning_rate": 0.00018243960774934226, "loss": 0.0659, "step": 8697 }, { "epoch": 1.2190609670637702, "grad_norm": 0.2968049943447113, "learning_rate": 0.0001824252571155226, "loss": 0.0881, "step": 8698 }, { "epoch": 1.2192011212333567, "grad_norm": 0.27378183603286743, "learning_rate": 0.00018241090648170292, "loss": 0.0732, "step": 8699 }, { "epoch": 1.2193412754029431, "grad_norm": 0.2966006398200989, "learning_rate": 0.00018239655584788327, "loss": 0.0758, "step": 8700 }, { "epoch": 1.2194814295725298, "grad_norm": 0.27214285731315613, "learning_rate": 0.0001823822052140636, "loss": 0.0476, "step": 8701 }, { "epoch": 1.2196215837421163, "grad_norm": 0.4161134362220764, "learning_rate": 0.00018236785458024393, "loss": 0.0863, "step": 8702 }, { "epoch": 1.219761737911703, "grad_norm": 0.2782464921474457, "learning_rate": 0.00018235350394642428, "loss": 0.0292, "step": 8703 }, { "epoch": 1.2199018920812894, "grad_norm": 0.2341078370809555, "learning_rate": 0.0001823391533126046, "loss": 0.0389, "step": 8704 }, { "epoch": 1.2200420462508759, "grad_norm": 0.2888844311237335, "learning_rate": 0.00018232480267878494, "loss": 0.092, "step": 8705 }, { "epoch": 1.2201822004204625, "grad_norm": 0.3119038939476013, "learning_rate": 0.00018231045204496532, "loss": 0.0402, "step": 8706 }, { "epoch": 1.220322354590049, "grad_norm": 0.2826389670372009, "learning_rate": 0.00018229610141114565, "loss": 0.0375, "step": 8707 }, { "epoch": 1.2204625087596357, "grad_norm": 0.5185393691062927, "learning_rate": 0.00018228175077732598, "loss": 0.0844, "step": 8708 }, { "epoch": 1.2206026629292221, "grad_norm": 0.3574616014957428, "learning_rate": 0.00018226740014350634, "loss": 0.0656, "step": 8709 }, { "epoch": 1.2207428170988086, "grad_norm": 0.6013672947883606, "learning_rate": 0.00018225304950968667, "loss": 0.0359, "step": 8710 }, { "epoch": 1.2208829712683953, "grad_norm": 0.4368615746498108, "learning_rate": 0.000182238698875867, "loss": 0.0323, "step": 8711 }, { "epoch": 1.2210231254379817, "grad_norm": 1.1246472597122192, "learning_rate": 0.00018222434824204735, "loss": 0.0858, "step": 8712 }, { "epoch": 1.2211632796075684, "grad_norm": 0.1949842870235443, "learning_rate": 0.00018220999760822768, "loss": 0.0154, "step": 8713 }, { "epoch": 1.221303433777155, "grad_norm": 0.14524754881858826, "learning_rate": 0.000182195646974408, "loss": 0.0326, "step": 8714 }, { "epoch": 1.2214435879467413, "grad_norm": 0.22695517539978027, "learning_rate": 0.00018218129634058834, "loss": 0.0218, "step": 8715 }, { "epoch": 1.221583742116328, "grad_norm": 0.09179423749446869, "learning_rate": 0.00018216694570676872, "loss": 0.0189, "step": 8716 }, { "epoch": 1.2217238962859145, "grad_norm": 0.2989479601383209, "learning_rate": 0.00018215259507294905, "loss": 0.057, "step": 8717 }, { "epoch": 1.2218640504555012, "grad_norm": 0.3001638650894165, "learning_rate": 0.00018213824443912938, "loss": 0.0588, "step": 8718 }, { "epoch": 1.2220042046250876, "grad_norm": 0.3052922487258911, "learning_rate": 0.00018212389380530973, "loss": 0.0769, "step": 8719 }, { "epoch": 1.222144358794674, "grad_norm": 0.30948060750961304, "learning_rate": 0.00018210954317149006, "loss": 0.0604, "step": 8720 }, { "epoch": 1.2222845129642608, "grad_norm": 0.3422660827636719, "learning_rate": 0.0001820951925376704, "loss": 0.0605, "step": 8721 }, { "epoch": 1.2224246671338472, "grad_norm": 0.43985825777053833, "learning_rate": 0.00018208084190385074, "loss": 0.0545, "step": 8722 }, { "epoch": 1.2225648213034337, "grad_norm": 0.1907966583967209, "learning_rate": 0.00018206649127003107, "loss": 0.045, "step": 8723 }, { "epoch": 1.2227049754730204, "grad_norm": 0.27309638261795044, "learning_rate": 0.0001820521406362114, "loss": 0.0443, "step": 8724 }, { "epoch": 1.2228451296426068, "grad_norm": 0.3246774673461914, "learning_rate": 0.00018203779000239176, "loss": 0.0429, "step": 8725 }, { "epoch": 1.2229852838121933, "grad_norm": 0.0974561870098114, "learning_rate": 0.0001820234393685721, "loss": 0.015, "step": 8726 }, { "epoch": 1.22312543798178, "grad_norm": 0.40235310792922974, "learning_rate": 0.00018200908873475244, "loss": 0.0406, "step": 8727 }, { "epoch": 1.2232655921513664, "grad_norm": 0.6393806338310242, "learning_rate": 0.0001819947381009328, "loss": 0.2022, "step": 8728 }, { "epoch": 1.2234057463209531, "grad_norm": 1.2388378381729126, "learning_rate": 0.00018198038746711313, "loss": 0.1056, "step": 8729 }, { "epoch": 1.2235459004905396, "grad_norm": 0.5445995330810547, "learning_rate": 0.00018196603683329345, "loss": 0.0872, "step": 8730 }, { "epoch": 1.223686054660126, "grad_norm": 0.8528308272361755, "learning_rate": 0.00018195168619947378, "loss": 0.0245, "step": 8731 }, { "epoch": 1.2238262088297127, "grad_norm": 1.624738335609436, "learning_rate": 0.00018193733556565414, "loss": 0.1435, "step": 8732 }, { "epoch": 1.2239663629992992, "grad_norm": 1.2054100036621094, "learning_rate": 0.00018192298493183447, "loss": 0.0566, "step": 8733 }, { "epoch": 1.2241065171688859, "grad_norm": 0.6819858551025391, "learning_rate": 0.0001819086342980148, "loss": 0.0381, "step": 8734 }, { "epoch": 1.2242466713384723, "grad_norm": 0.5789105296134949, "learning_rate": 0.00018189428366419515, "loss": 0.0779, "step": 8735 }, { "epoch": 1.2243868255080588, "grad_norm": 0.24732036888599396, "learning_rate": 0.00018187993303037548, "loss": 0.0607, "step": 8736 }, { "epoch": 1.2245269796776455, "grad_norm": 0.421773761510849, "learning_rate": 0.0001818655823965558, "loss": 0.0439, "step": 8737 }, { "epoch": 1.224667133847232, "grad_norm": 0.24831874668598175, "learning_rate": 0.0001818512317627362, "loss": 0.0493, "step": 8738 }, { "epoch": 1.2248072880168186, "grad_norm": 0.42275428771972656, "learning_rate": 0.00018183688112891652, "loss": 0.0665, "step": 8739 }, { "epoch": 1.224947442186405, "grad_norm": 0.2681684195995331, "learning_rate": 0.00018182253049509685, "loss": 0.0267, "step": 8740 }, { "epoch": 1.2250875963559915, "grad_norm": 0.30987536907196045, "learning_rate": 0.0001818081798612772, "loss": 0.0793, "step": 8741 }, { "epoch": 1.2252277505255782, "grad_norm": 0.319633811712265, "learning_rate": 0.00018179382922745753, "loss": 0.076, "step": 8742 }, { "epoch": 1.2253679046951647, "grad_norm": 0.8478633165359497, "learning_rate": 0.00018177947859363786, "loss": 0.1082, "step": 8743 }, { "epoch": 1.2255080588647513, "grad_norm": 0.3469730019569397, "learning_rate": 0.00018176512795981822, "loss": 0.0437, "step": 8744 }, { "epoch": 1.2256482130343378, "grad_norm": 0.3907775282859802, "learning_rate": 0.00018175077732599854, "loss": 0.0584, "step": 8745 }, { "epoch": 1.2257883672039243, "grad_norm": 0.47700566053390503, "learning_rate": 0.00018173642669217887, "loss": 0.1019, "step": 8746 }, { "epoch": 1.225928521373511, "grad_norm": 0.7770333886146545, "learning_rate": 0.0001817220760583592, "loss": 0.0189, "step": 8747 }, { "epoch": 1.2260686755430974, "grad_norm": 0.42544013261795044, "learning_rate": 0.00018170772542453958, "loss": 0.0513, "step": 8748 }, { "epoch": 1.2262088297126839, "grad_norm": 0.11113908886909485, "learning_rate": 0.0001816933747907199, "loss": 0.0144, "step": 8749 }, { "epoch": 1.2263489838822705, "grad_norm": 0.2252221405506134, "learning_rate": 0.00018167902415690024, "loss": 0.0115, "step": 8750 }, { "epoch": 1.226489138051857, "grad_norm": 0.27279195189476013, "learning_rate": 0.0001816646735230806, "loss": 0.0405, "step": 8751 }, { "epoch": 1.2266292922214437, "grad_norm": 0.16972167789936066, "learning_rate": 0.00018165032288926093, "loss": 0.0201, "step": 8752 }, { "epoch": 1.2267694463910301, "grad_norm": 0.15438127517700195, "learning_rate": 0.00018163597225544125, "loss": 0.0072, "step": 8753 }, { "epoch": 1.2269096005606166, "grad_norm": 0.26638391613960266, "learning_rate": 0.0001816216216216216, "loss": 0.0326, "step": 8754 }, { "epoch": 1.2270497547302033, "grad_norm": 0.47062039375305176, "learning_rate": 0.00018160727098780194, "loss": 0.1566, "step": 8755 }, { "epoch": 1.2271899088997897, "grad_norm": 0.25272372364997864, "learning_rate": 0.00018159292035398227, "loss": 0.0971, "step": 8756 }, { "epoch": 1.2273300630693762, "grad_norm": 0.1324554979801178, "learning_rate": 0.00018157856972016265, "loss": 0.0201, "step": 8757 }, { "epoch": 1.2274702172389629, "grad_norm": 0.27917635440826416, "learning_rate": 0.00018156421908634298, "loss": 0.0989, "step": 8758 }, { "epoch": 1.2276103714085493, "grad_norm": 0.17215456068515778, "learning_rate": 0.0001815498684525233, "loss": 0.0357, "step": 8759 }, { "epoch": 1.227750525578136, "grad_norm": 0.350581556558609, "learning_rate": 0.00018153551781870366, "loss": 0.063, "step": 8760 }, { "epoch": 1.2278906797477225, "grad_norm": 0.293480783700943, "learning_rate": 0.000181521167184884, "loss": 0.0486, "step": 8761 }, { "epoch": 1.228030833917309, "grad_norm": 0.4810590147972107, "learning_rate": 0.00018150681655106432, "loss": 0.0756, "step": 8762 }, { "epoch": 1.2281709880868956, "grad_norm": 0.8719367384910583, "learning_rate": 0.00018149246591724468, "loss": 0.0653, "step": 8763 }, { "epoch": 1.228311142256482, "grad_norm": 0.15654395520687103, "learning_rate": 0.000181478115283425, "loss": 0.0231, "step": 8764 }, { "epoch": 1.2284512964260688, "grad_norm": 0.20144544541835785, "learning_rate": 0.00018146376464960533, "loss": 0.0468, "step": 8765 }, { "epoch": 1.2285914505956552, "grad_norm": 0.08369973301887512, "learning_rate": 0.00018144941401578566, "loss": 0.0218, "step": 8766 }, { "epoch": 1.2287316047652417, "grad_norm": 0.21827556192874908, "learning_rate": 0.00018143506338196602, "loss": 0.0369, "step": 8767 }, { "epoch": 1.2288717589348284, "grad_norm": 0.1105455607175827, "learning_rate": 0.00018142071274814635, "loss": 0.0202, "step": 8768 }, { "epoch": 1.2290119131044148, "grad_norm": 0.20448343455791473, "learning_rate": 0.00018140636211432667, "loss": 0.0279, "step": 8769 }, { "epoch": 1.2291520672740015, "grad_norm": 0.41001424193382263, "learning_rate": 0.00018139201148050706, "loss": 0.0954, "step": 8770 }, { "epoch": 1.229292221443588, "grad_norm": 0.4907464385032654, "learning_rate": 0.00018137766084668739, "loss": 0.0768, "step": 8771 }, { "epoch": 1.2294323756131744, "grad_norm": 0.5235586166381836, "learning_rate": 0.00018136331021286771, "loss": 0.0687, "step": 8772 }, { "epoch": 1.229572529782761, "grad_norm": 0.6149177551269531, "learning_rate": 0.00018134895957904807, "loss": 0.0506, "step": 8773 }, { "epoch": 1.2297126839523476, "grad_norm": 0.944739043712616, "learning_rate": 0.0001813346089452284, "loss": 0.0479, "step": 8774 }, { "epoch": 1.2298528381219342, "grad_norm": 0.18646426498889923, "learning_rate": 0.00018132025831140873, "loss": 0.0195, "step": 8775 }, { "epoch": 1.2299929922915207, "grad_norm": 0.4853493571281433, "learning_rate": 0.00018130590767758908, "loss": 0.0403, "step": 8776 }, { "epoch": 1.2301331464611072, "grad_norm": 0.43826955556869507, "learning_rate": 0.0001812915570437694, "loss": 0.0586, "step": 8777 }, { "epoch": 1.2302733006306938, "grad_norm": 0.39421191811561584, "learning_rate": 0.00018127720640994974, "loss": 0.0364, "step": 8778 }, { "epoch": 1.2304134548002803, "grad_norm": 0.3942345678806305, "learning_rate": 0.00018126285577613012, "loss": 0.0823, "step": 8779 }, { "epoch": 1.2305536089698668, "grad_norm": 0.463264524936676, "learning_rate": 0.00018124850514231045, "loss": 0.0513, "step": 8780 }, { "epoch": 1.2306937631394534, "grad_norm": 0.5555284023284912, "learning_rate": 0.00018123415450849078, "loss": 0.0493, "step": 8781 }, { "epoch": 1.23083391730904, "grad_norm": 0.30923137068748474, "learning_rate": 0.0001812198038746711, "loss": 0.0875, "step": 8782 }, { "epoch": 1.2309740714786266, "grad_norm": 0.3791423439979553, "learning_rate": 0.00018120545324085146, "loss": 0.0399, "step": 8783 }, { "epoch": 1.231114225648213, "grad_norm": 0.5037143230438232, "learning_rate": 0.0001811911026070318, "loss": 0.0781, "step": 8784 }, { "epoch": 1.2312543798177995, "grad_norm": 0.27099910378456116, "learning_rate": 0.00018117675197321212, "loss": 0.0147, "step": 8785 }, { "epoch": 1.2313945339873862, "grad_norm": 0.3733114004135132, "learning_rate": 0.00018116240133939248, "loss": 0.0733, "step": 8786 }, { "epoch": 1.2315346881569726, "grad_norm": 0.43500620126724243, "learning_rate": 0.0001811480507055728, "loss": 0.0609, "step": 8787 }, { "epoch": 1.231674842326559, "grad_norm": 0.4153698980808258, "learning_rate": 0.00018113370007175313, "loss": 0.0469, "step": 8788 }, { "epoch": 1.2318149964961458, "grad_norm": 0.14749528467655182, "learning_rate": 0.00018111934943793352, "loss": 0.0226, "step": 8789 }, { "epoch": 1.2319551506657322, "grad_norm": 0.18205280601978302, "learning_rate": 0.00018110499880411384, "loss": 0.0201, "step": 8790 }, { "epoch": 1.232095304835319, "grad_norm": 0.21400584280490875, "learning_rate": 0.00018109064817029417, "loss": 0.0303, "step": 8791 }, { "epoch": 1.2322354590049054, "grad_norm": 0.31804898381233215, "learning_rate": 0.00018107629753647453, "loss": 0.042, "step": 8792 }, { "epoch": 1.2323756131744918, "grad_norm": 0.2616739273071289, "learning_rate": 0.00018106194690265486, "loss": 0.0702, "step": 8793 }, { "epoch": 1.2325157673440785, "grad_norm": 0.07070483267307281, "learning_rate": 0.00018104759626883519, "loss": 0.0205, "step": 8794 }, { "epoch": 1.232655921513665, "grad_norm": 0.3609790503978729, "learning_rate": 0.00018103324563501554, "loss": 0.0565, "step": 8795 }, { "epoch": 1.2327960756832517, "grad_norm": 0.3016926646232605, "learning_rate": 0.00018101889500119587, "loss": 0.0397, "step": 8796 }, { "epoch": 1.2329362298528381, "grad_norm": 0.5391256213188171, "learning_rate": 0.0001810045443673762, "loss": 0.1211, "step": 8797 }, { "epoch": 1.2330763840224246, "grad_norm": 0.13770680129528046, "learning_rate": 0.00018099019373355655, "loss": 0.0268, "step": 8798 }, { "epoch": 1.2332165381920113, "grad_norm": 0.8050645589828491, "learning_rate": 0.00018097584309973688, "loss": 0.0787, "step": 8799 }, { "epoch": 1.2333566923615977, "grad_norm": 0.5566849112510681, "learning_rate": 0.0001809614924659172, "loss": 0.0691, "step": 8800 }, { "epoch": 1.2334968465311844, "grad_norm": 0.22305220365524292, "learning_rate": 0.00018094714183209754, "loss": 0.0175, "step": 8801 }, { "epoch": 1.2336370007007709, "grad_norm": 0.5556771755218506, "learning_rate": 0.00018093279119827792, "loss": 0.0589, "step": 8802 }, { "epoch": 1.2337771548703573, "grad_norm": 0.24585343897342682, "learning_rate": 0.00018091844056445825, "loss": 0.0519, "step": 8803 }, { "epoch": 1.233917309039944, "grad_norm": 0.1927025318145752, "learning_rate": 0.00018090408993063858, "loss": 0.018, "step": 8804 }, { "epoch": 1.2340574632095305, "grad_norm": 0.18265752494335175, "learning_rate": 0.00018088973929681894, "loss": 0.0278, "step": 8805 }, { "epoch": 1.2341976173791172, "grad_norm": 0.2701992094516754, "learning_rate": 0.00018087538866299926, "loss": 0.0372, "step": 8806 }, { "epoch": 1.2343377715487036, "grad_norm": 0.2678571343421936, "learning_rate": 0.0001808610380291796, "loss": 0.0504, "step": 8807 }, { "epoch": 1.23447792571829, "grad_norm": 0.4660065472126007, "learning_rate": 0.00018084668739535995, "loss": 0.0378, "step": 8808 }, { "epoch": 1.2346180798878768, "grad_norm": 0.39514169096946716, "learning_rate": 0.00018083233676154028, "loss": 0.106, "step": 8809 }, { "epoch": 1.2347582340574632, "grad_norm": 0.8927792310714722, "learning_rate": 0.0001808179861277206, "loss": 0.0298, "step": 8810 }, { "epoch": 1.2348983882270497, "grad_norm": 0.27887874841690063, "learning_rate": 0.000180803635493901, "loss": 0.0624, "step": 8811 }, { "epoch": 1.2350385423966364, "grad_norm": 0.1413101702928543, "learning_rate": 0.00018078928486008132, "loss": 0.021, "step": 8812 }, { "epoch": 1.2351786965662228, "grad_norm": 0.2123580425977707, "learning_rate": 0.00018077493422626165, "loss": 0.0188, "step": 8813 }, { "epoch": 1.2353188507358093, "grad_norm": 1.3969950675964355, "learning_rate": 0.000180760583592442, "loss": 0.0644, "step": 8814 }, { "epoch": 1.235459004905396, "grad_norm": 0.17703315615653992, "learning_rate": 0.00018074623295862233, "loss": 0.0335, "step": 8815 }, { "epoch": 1.2355991590749824, "grad_norm": 0.3511470556259155, "learning_rate": 0.00018073188232480266, "loss": 0.082, "step": 8816 }, { "epoch": 1.235739313244569, "grad_norm": 0.22825947403907776, "learning_rate": 0.000180717531690983, "loss": 0.0512, "step": 8817 }, { "epoch": 1.2358794674141556, "grad_norm": 0.324421763420105, "learning_rate": 0.00018070318105716334, "loss": 0.061, "step": 8818 }, { "epoch": 1.236019621583742, "grad_norm": 0.17748211324214935, "learning_rate": 0.00018068883042334367, "loss": 0.0192, "step": 8819 }, { "epoch": 1.2361597757533287, "grad_norm": 0.25499194860458374, "learning_rate": 0.000180674479789524, "loss": 0.0622, "step": 8820 }, { "epoch": 1.2362999299229152, "grad_norm": 0.48264193534851074, "learning_rate": 0.00018066012915570438, "loss": 0.098, "step": 8821 }, { "epoch": 1.2364400840925018, "grad_norm": 0.7179067134857178, "learning_rate": 0.0001806457785218847, "loss": 0.1388, "step": 8822 }, { "epoch": 1.2365802382620883, "grad_norm": 0.3876977264881134, "learning_rate": 0.00018063142788806504, "loss": 0.1089, "step": 8823 }, { "epoch": 1.2367203924316748, "grad_norm": 0.07642313838005066, "learning_rate": 0.0001806170772542454, "loss": 0.0135, "step": 8824 }, { "epoch": 1.2368605466012614, "grad_norm": 0.18005669116973877, "learning_rate": 0.00018060272662042572, "loss": 0.0513, "step": 8825 }, { "epoch": 1.237000700770848, "grad_norm": 0.1359214037656784, "learning_rate": 0.00018058837598660605, "loss": 0.0118, "step": 8826 }, { "epoch": 1.2371408549404346, "grad_norm": 0.2316037267446518, "learning_rate": 0.0001805740253527864, "loss": 0.034, "step": 8827 }, { "epoch": 1.237281009110021, "grad_norm": 0.27992984652519226, "learning_rate": 0.00018055967471896674, "loss": 0.0773, "step": 8828 }, { "epoch": 1.2374211632796075, "grad_norm": 0.6376937031745911, "learning_rate": 0.00018054532408514707, "loss": 0.0573, "step": 8829 }, { "epoch": 1.2375613174491942, "grad_norm": 0.2907851040363312, "learning_rate": 0.00018053097345132742, "loss": 0.028, "step": 8830 }, { "epoch": 1.2377014716187806, "grad_norm": 1.39239501953125, "learning_rate": 0.00018051662281750775, "loss": 0.1777, "step": 8831 }, { "epoch": 1.2378416257883673, "grad_norm": 0.2886229455471039, "learning_rate": 0.00018050227218368808, "loss": 0.0526, "step": 8832 }, { "epoch": 1.2379817799579538, "grad_norm": 0.3110648989677429, "learning_rate": 0.00018048792154986846, "loss": 0.0639, "step": 8833 }, { "epoch": 1.2381219341275402, "grad_norm": 0.8068387508392334, "learning_rate": 0.0001804735709160488, "loss": 0.0846, "step": 8834 }, { "epoch": 1.238262088297127, "grad_norm": 0.8859265446662903, "learning_rate": 0.00018045922028222912, "loss": 0.0605, "step": 8835 }, { "epoch": 1.2384022424667134, "grad_norm": 0.15297624468803406, "learning_rate": 0.00018044486964840945, "loss": 0.0231, "step": 8836 }, { "epoch": 1.2385423966362998, "grad_norm": 1.0882353782653809, "learning_rate": 0.0001804305190145898, "loss": 0.0669, "step": 8837 }, { "epoch": 1.2386825508058865, "grad_norm": 0.2530595660209656, "learning_rate": 0.00018041616838077013, "loss": 0.0414, "step": 8838 }, { "epoch": 1.238822704975473, "grad_norm": 0.267008513212204, "learning_rate": 0.00018040181774695046, "loss": 0.0713, "step": 8839 }, { "epoch": 1.2389628591450597, "grad_norm": 0.2525866627693176, "learning_rate": 0.00018038746711313081, "loss": 0.0406, "step": 8840 }, { "epoch": 1.2391030133146461, "grad_norm": 0.20825164020061493, "learning_rate": 0.00018037311647931114, "loss": 0.0176, "step": 8841 }, { "epoch": 1.2392431674842326, "grad_norm": 1.2770270109176636, "learning_rate": 0.00018035876584549147, "loss": 0.147, "step": 8842 }, { "epoch": 1.2393833216538193, "grad_norm": 0.5578776597976685, "learning_rate": 0.00018034441521167185, "loss": 0.063, "step": 8843 }, { "epoch": 1.2395234758234057, "grad_norm": 0.382220596075058, "learning_rate": 0.00018033006457785218, "loss": 0.0606, "step": 8844 }, { "epoch": 1.2396636299929922, "grad_norm": 0.39881905913352966, "learning_rate": 0.0001803157139440325, "loss": 0.0883, "step": 8845 }, { "epoch": 1.2398037841625789, "grad_norm": 0.484540194272995, "learning_rate": 0.00018030136331021287, "loss": 0.0612, "step": 8846 }, { "epoch": 1.2399439383321653, "grad_norm": 0.20494407415390015, "learning_rate": 0.0001802870126763932, "loss": 0.05, "step": 8847 }, { "epoch": 1.240084092501752, "grad_norm": 0.36396971344947815, "learning_rate": 0.00018027266204257352, "loss": 0.0245, "step": 8848 }, { "epoch": 1.2402242466713385, "grad_norm": 0.2862990200519562, "learning_rate": 0.00018025831140875388, "loss": 0.0446, "step": 8849 }, { "epoch": 1.240364400840925, "grad_norm": 0.25687506794929504, "learning_rate": 0.0001802439607749342, "loss": 0.0289, "step": 8850 }, { "epoch": 1.2405045550105116, "grad_norm": 1.0851212739944458, "learning_rate": 0.00018022961014111454, "loss": 0.0603, "step": 8851 }, { "epoch": 1.240644709180098, "grad_norm": 0.35455313324928284, "learning_rate": 0.00018021525950729487, "loss": 0.0599, "step": 8852 }, { "epoch": 1.2407848633496847, "grad_norm": 0.3976340889930725, "learning_rate": 0.00018020090887347525, "loss": 0.0301, "step": 8853 }, { "epoch": 1.2409250175192712, "grad_norm": 0.2038656622171402, "learning_rate": 0.00018018655823965558, "loss": 0.0315, "step": 8854 }, { "epoch": 1.2410651716888577, "grad_norm": 0.4963422417640686, "learning_rate": 0.0001801722076058359, "loss": 0.0247, "step": 8855 }, { "epoch": 1.2412053258584443, "grad_norm": 0.5993987321853638, "learning_rate": 0.00018015785697201626, "loss": 0.1168, "step": 8856 }, { "epoch": 1.2413454800280308, "grad_norm": 0.5678585171699524, "learning_rate": 0.0001801435063381966, "loss": 0.0911, "step": 8857 }, { "epoch": 1.2414856341976175, "grad_norm": 0.526900053024292, "learning_rate": 0.00018012915570437692, "loss": 0.0335, "step": 8858 }, { "epoch": 1.241625788367204, "grad_norm": 0.11840403079986572, "learning_rate": 0.00018011480507055727, "loss": 0.0114, "step": 8859 }, { "epoch": 1.2417659425367904, "grad_norm": 0.8445833921432495, "learning_rate": 0.0001801004544367376, "loss": 0.0814, "step": 8860 }, { "epoch": 1.241906096706377, "grad_norm": 0.42567649483680725, "learning_rate": 0.00018008610380291793, "loss": 0.0342, "step": 8861 }, { "epoch": 1.2420462508759635, "grad_norm": 0.26783329248428345, "learning_rate": 0.0001800717531690983, "loss": 0.0235, "step": 8862 }, { "epoch": 1.2421864050455502, "grad_norm": 0.21952690184116364, "learning_rate": 0.00018005740253527862, "loss": 0.0294, "step": 8863 }, { "epoch": 1.2423265592151367, "grad_norm": 0.4130716919898987, "learning_rate": 0.00018004305190145894, "loss": 0.0625, "step": 8864 }, { "epoch": 1.2424667133847231, "grad_norm": 0.6658200025558472, "learning_rate": 0.00018002870126763933, "loss": 0.0734, "step": 8865 }, { "epoch": 1.2426068675543098, "grad_norm": 0.45264697074890137, "learning_rate": 0.00018001435063381966, "loss": 0.074, "step": 8866 }, { "epoch": 1.2427470217238963, "grad_norm": 0.39087289571762085, "learning_rate": 0.00017999999999999998, "loss": 0.0637, "step": 8867 }, { "epoch": 1.2428871758934827, "grad_norm": 0.06972356140613556, "learning_rate": 0.00017998564936618034, "loss": 0.008, "step": 8868 }, { "epoch": 1.2430273300630694, "grad_norm": 0.226225346326828, "learning_rate": 0.00017997129873236067, "loss": 0.0189, "step": 8869 }, { "epoch": 1.2431674842326559, "grad_norm": 0.4632932245731354, "learning_rate": 0.000179956948098541, "loss": 0.1249, "step": 8870 }, { "epoch": 1.2433076384022423, "grad_norm": 0.22286300361156464, "learning_rate": 0.00017994259746472133, "loss": 0.0312, "step": 8871 }, { "epoch": 1.243447792571829, "grad_norm": 0.3477002680301666, "learning_rate": 0.00017992824683090168, "loss": 0.0499, "step": 8872 }, { "epoch": 1.2435879467414155, "grad_norm": 0.07345622032880783, "learning_rate": 0.000179913896197082, "loss": 0.0045, "step": 8873 }, { "epoch": 1.2437281009110022, "grad_norm": 0.15686769783496857, "learning_rate": 0.00017989954556326234, "loss": 0.0168, "step": 8874 }, { "epoch": 1.2438682550805886, "grad_norm": 0.23002737760543823, "learning_rate": 0.00017988519492944272, "loss": 0.0397, "step": 8875 }, { "epoch": 1.244008409250175, "grad_norm": 0.45163851976394653, "learning_rate": 0.00017987084429562305, "loss": 0.0591, "step": 8876 }, { "epoch": 1.2441485634197618, "grad_norm": 0.42541858553886414, "learning_rate": 0.00017985649366180338, "loss": 0.0628, "step": 8877 }, { "epoch": 1.2442887175893482, "grad_norm": 0.24296577274799347, "learning_rate": 0.00017984214302798373, "loss": 0.0329, "step": 8878 }, { "epoch": 1.244428871758935, "grad_norm": 0.39703062176704407, "learning_rate": 0.00017982779239416406, "loss": 0.1151, "step": 8879 }, { "epoch": 1.2445690259285214, "grad_norm": 0.5747311115264893, "learning_rate": 0.0001798134417603444, "loss": 0.1525, "step": 8880 }, { "epoch": 1.2447091800981078, "grad_norm": 0.883503258228302, "learning_rate": 0.00017979909112652475, "loss": 0.0701, "step": 8881 }, { "epoch": 1.2448493342676945, "grad_norm": 1.8566811084747314, "learning_rate": 0.00017978474049270508, "loss": 0.3588, "step": 8882 }, { "epoch": 1.244989488437281, "grad_norm": 1.2332398891448975, "learning_rate": 0.0001797703898588854, "loss": 0.1362, "step": 8883 }, { "epoch": 1.2451296426068676, "grad_norm": 1.036044955253601, "learning_rate": 0.0001797560392250658, "loss": 0.0943, "step": 8884 }, { "epoch": 1.245269796776454, "grad_norm": 1.9353384971618652, "learning_rate": 0.00017974168859124612, "loss": 0.2039, "step": 8885 }, { "epoch": 1.2454099509460406, "grad_norm": 0.17843778431415558, "learning_rate": 0.00017972733795742644, "loss": 0.0542, "step": 8886 }, { "epoch": 1.2455501051156272, "grad_norm": 0.26558083295822144, "learning_rate": 0.00017971298732360677, "loss": 0.0735, "step": 8887 }, { "epoch": 1.2456902592852137, "grad_norm": 0.6807878017425537, "learning_rate": 0.00017969863668978713, "loss": 0.0464, "step": 8888 }, { "epoch": 1.2458304134548004, "grad_norm": 0.08774295449256897, "learning_rate": 0.00017968428605596746, "loss": 0.0097, "step": 8889 }, { "epoch": 1.2459705676243868, "grad_norm": 0.2471190243959427, "learning_rate": 0.00017966993542214779, "loss": 0.0883, "step": 8890 }, { "epoch": 1.2461107217939733, "grad_norm": 0.3901362419128418, "learning_rate": 0.00017965558478832814, "loss": 0.0822, "step": 8891 }, { "epoch": 1.24625087596356, "grad_norm": 0.40733176469802856, "learning_rate": 0.00017964123415450847, "loss": 0.0521, "step": 8892 }, { "epoch": 1.2463910301331464, "grad_norm": 0.27577829360961914, "learning_rate": 0.0001796268835206888, "loss": 0.1004, "step": 8893 }, { "epoch": 1.2465311843027331, "grad_norm": 0.28781938552856445, "learning_rate": 0.00017961253288686915, "loss": 0.0328, "step": 8894 }, { "epoch": 1.2466713384723196, "grad_norm": 0.40174397826194763, "learning_rate": 0.00017959818225304948, "loss": 0.096, "step": 8895 }, { "epoch": 1.246811492641906, "grad_norm": 0.3050895035266876, "learning_rate": 0.0001795838316192298, "loss": 0.0639, "step": 8896 }, { "epoch": 1.2469516468114927, "grad_norm": 0.47422388195991516, "learning_rate": 0.0001795694809854102, "loss": 0.0352, "step": 8897 }, { "epoch": 1.2470918009810792, "grad_norm": 0.30920857191085815, "learning_rate": 0.00017955513035159052, "loss": 0.0835, "step": 8898 }, { "epoch": 1.2472319551506656, "grad_norm": 0.158649280667305, "learning_rate": 0.00017954077971777085, "loss": 0.0686, "step": 8899 }, { "epoch": 1.2473721093202523, "grad_norm": 0.4309064447879791, "learning_rate": 0.0001795264290839512, "loss": 0.0483, "step": 8900 }, { "epoch": 1.2475122634898388, "grad_norm": 0.17623929679393768, "learning_rate": 0.00017951207845013153, "loss": 0.0278, "step": 8901 }, { "epoch": 1.2476524176594253, "grad_norm": 0.35155707597732544, "learning_rate": 0.00017949772781631186, "loss": 0.06, "step": 8902 }, { "epoch": 1.247792571829012, "grad_norm": 0.3256426453590393, "learning_rate": 0.00017948337718249222, "loss": 0.0959, "step": 8903 }, { "epoch": 1.2479327259985984, "grad_norm": 0.23376812040805817, "learning_rate": 0.00017946902654867255, "loss": 0.049, "step": 8904 }, { "epoch": 1.248072880168185, "grad_norm": 0.27109384536743164, "learning_rate": 0.00017945467591485288, "loss": 0.029, "step": 8905 }, { "epoch": 1.2482130343377715, "grad_norm": 0.4933944642543793, "learning_rate": 0.0001794403252810332, "loss": 0.025, "step": 8906 }, { "epoch": 1.248353188507358, "grad_norm": 0.23296017944812775, "learning_rate": 0.0001794259746472136, "loss": 0.0127, "step": 8907 }, { "epoch": 1.2484933426769447, "grad_norm": 0.19686123728752136, "learning_rate": 0.00017941162401339392, "loss": 0.0456, "step": 8908 }, { "epoch": 1.2486334968465311, "grad_norm": 0.38507455587387085, "learning_rate": 0.00017939727337957424, "loss": 0.094, "step": 8909 }, { "epoch": 1.2487736510161178, "grad_norm": 0.6581763029098511, "learning_rate": 0.0001793829227457546, "loss": 0.1131, "step": 8910 }, { "epoch": 1.2489138051857043, "grad_norm": 0.88583904504776, "learning_rate": 0.00017936857211193493, "loss": 0.1287, "step": 8911 }, { "epoch": 1.2490539593552907, "grad_norm": 0.47335192561149597, "learning_rate": 0.00017935422147811526, "loss": 0.0606, "step": 8912 }, { "epoch": 1.2491941135248774, "grad_norm": 0.2666143774986267, "learning_rate": 0.0001793398708442956, "loss": 0.0301, "step": 8913 }, { "epoch": 1.2493342676944639, "grad_norm": 1.089593529701233, "learning_rate": 0.00017932552021047594, "loss": 0.1099, "step": 8914 }, { "epoch": 1.2494744218640506, "grad_norm": 0.5098910927772522, "learning_rate": 0.00017931116957665627, "loss": 0.0365, "step": 8915 }, { "epoch": 1.249614576033637, "grad_norm": 0.4775100648403168, "learning_rate": 0.00017929681894283665, "loss": 0.089, "step": 8916 }, { "epoch": 1.2497547302032235, "grad_norm": 0.2448941022157669, "learning_rate": 0.00017928246830901698, "loss": 0.0157, "step": 8917 }, { "epoch": 1.2498948843728102, "grad_norm": 0.20178437232971191, "learning_rate": 0.0001792681176751973, "loss": 0.0281, "step": 8918 }, { "epoch": 1.2500350385423966, "grad_norm": 0.34960028529167175, "learning_rate": 0.00017925376704137767, "loss": 0.0958, "step": 8919 }, { "epoch": 1.2501751927119833, "grad_norm": 0.40998774766921997, "learning_rate": 0.000179239416407558, "loss": 0.0343, "step": 8920 }, { "epoch": 1.2503153468815698, "grad_norm": 0.19427341222763062, "learning_rate": 0.00017922506577373832, "loss": 0.0281, "step": 8921 }, { "epoch": 1.2504555010511562, "grad_norm": 0.34853291511535645, "learning_rate": 0.00017921071513991865, "loss": 0.0499, "step": 8922 }, { "epoch": 1.250595655220743, "grad_norm": 0.4753531217575073, "learning_rate": 0.000179196364506099, "loss": 0.0781, "step": 8923 }, { "epoch": 1.2507358093903294, "grad_norm": 0.1523970067501068, "learning_rate": 0.00017918201387227934, "loss": 0.0227, "step": 8924 }, { "epoch": 1.250875963559916, "grad_norm": 0.9778902530670166, "learning_rate": 0.00017916766323845966, "loss": 0.091, "step": 8925 }, { "epoch": 1.2510161177295025, "grad_norm": 0.3031432628631592, "learning_rate": 0.00017915331260464002, "loss": 0.0337, "step": 8926 }, { "epoch": 1.251156271899089, "grad_norm": 0.3461599051952362, "learning_rate": 0.00017913896197082035, "loss": 0.0534, "step": 8927 }, { "epoch": 1.2512964260686754, "grad_norm": 0.31594088673591614, "learning_rate": 0.00017912461133700068, "loss": 0.1118, "step": 8928 }, { "epoch": 1.251436580238262, "grad_norm": 0.6019078493118286, "learning_rate": 0.00017911026070318106, "loss": 0.0644, "step": 8929 }, { "epoch": 1.2515767344078486, "grad_norm": 0.4162902534008026, "learning_rate": 0.0001790959100693614, "loss": 0.0463, "step": 8930 }, { "epoch": 1.2517168885774352, "grad_norm": 0.5430227518081665, "learning_rate": 0.00017908155943554172, "loss": 0.0466, "step": 8931 }, { "epoch": 1.2518570427470217, "grad_norm": 0.23210391402244568, "learning_rate": 0.00017906720880172207, "loss": 0.0348, "step": 8932 }, { "epoch": 1.2519971969166082, "grad_norm": 0.2866744101047516, "learning_rate": 0.0001790528581679024, "loss": 0.0144, "step": 8933 }, { "epoch": 1.2521373510861948, "grad_norm": 0.8020691871643066, "learning_rate": 0.00017903850753408273, "loss": 0.0412, "step": 8934 }, { "epoch": 1.2522775052557813, "grad_norm": 0.8091253638267517, "learning_rate": 0.00017902415690026309, "loss": 0.0223, "step": 8935 }, { "epoch": 1.252417659425368, "grad_norm": 0.4563453793525696, "learning_rate": 0.00017900980626644341, "loss": 0.1206, "step": 8936 }, { "epoch": 1.2525578135949544, "grad_norm": 0.2362118512392044, "learning_rate": 0.00017899545563262374, "loss": 0.0311, "step": 8937 }, { "epoch": 1.252697967764541, "grad_norm": 0.6482368111610413, "learning_rate": 0.00017898110499880413, "loss": 0.081, "step": 8938 }, { "epoch": 1.2528381219341276, "grad_norm": 0.6267337799072266, "learning_rate": 0.00017896675436498445, "loss": 0.0566, "step": 8939 }, { "epoch": 1.252978276103714, "grad_norm": 0.24068769812583923, "learning_rate": 0.00017895240373116478, "loss": 0.0264, "step": 8940 }, { "epoch": 1.2531184302733007, "grad_norm": 0.24023745954036713, "learning_rate": 0.0001789380530973451, "loss": 0.0366, "step": 8941 }, { "epoch": 1.2532585844428872, "grad_norm": 0.21197065711021423, "learning_rate": 0.00017892370246352547, "loss": 0.03, "step": 8942 }, { "epoch": 1.2533987386124736, "grad_norm": 0.6333866715431213, "learning_rate": 0.0001789093518297058, "loss": 0.0926, "step": 8943 }, { "epoch": 1.2535388927820603, "grad_norm": 0.48493653535842896, "learning_rate": 0.00017889500119588612, "loss": 0.0632, "step": 8944 }, { "epoch": 1.2536790469516468, "grad_norm": 0.5835952162742615, "learning_rate": 0.00017888065056206648, "loss": 0.0293, "step": 8945 }, { "epoch": 1.2538192011212335, "grad_norm": 0.4014591872692108, "learning_rate": 0.0001788662999282468, "loss": 0.0583, "step": 8946 }, { "epoch": 1.25395935529082, "grad_norm": 0.6542590856552124, "learning_rate": 0.00017885194929442714, "loss": 0.0743, "step": 8947 }, { "epoch": 1.2540995094604064, "grad_norm": 0.3809979259967804, "learning_rate": 0.00017883759866060752, "loss": 0.0439, "step": 8948 }, { "epoch": 1.254239663629993, "grad_norm": 0.24661223590373993, "learning_rate": 0.00017882324802678785, "loss": 0.0381, "step": 8949 }, { "epoch": 1.2543798177995795, "grad_norm": 0.3975275456905365, "learning_rate": 0.00017880889739296818, "loss": 0.0631, "step": 8950 }, { "epoch": 1.2545199719691662, "grad_norm": 0.5094953775405884, "learning_rate": 0.00017879454675914853, "loss": 0.0352, "step": 8951 }, { "epoch": 1.2546601261387527, "grad_norm": 0.26044341921806335, "learning_rate": 0.00017878019612532886, "loss": 0.1066, "step": 8952 }, { "epoch": 1.2548002803083391, "grad_norm": 0.4691629707813263, "learning_rate": 0.0001787658454915092, "loss": 0.0721, "step": 8953 }, { "epoch": 1.2549404344779256, "grad_norm": 0.520023763179779, "learning_rate": 0.00017875149485768954, "loss": 0.079, "step": 8954 }, { "epoch": 1.2550805886475123, "grad_norm": 0.3155807852745056, "learning_rate": 0.00017873714422386987, "loss": 0.0304, "step": 8955 }, { "epoch": 1.255220742817099, "grad_norm": 0.3651512563228607, "learning_rate": 0.0001787227935900502, "loss": 0.0376, "step": 8956 }, { "epoch": 1.2553608969866854, "grad_norm": 0.44446155428886414, "learning_rate": 0.00017870844295623053, "loss": 0.0644, "step": 8957 }, { "epoch": 1.2555010511562719, "grad_norm": 0.4956394135951996, "learning_rate": 0.00017869409232241089, "loss": 0.0369, "step": 8958 }, { "epoch": 1.2556412053258583, "grad_norm": 0.13711056113243103, "learning_rate": 0.00017867974168859121, "loss": 0.0345, "step": 8959 }, { "epoch": 1.255781359495445, "grad_norm": 0.47741255164146423, "learning_rate": 0.00017866539105477154, "loss": 0.0522, "step": 8960 }, { "epoch": 1.2559215136650315, "grad_norm": 0.10002173483371735, "learning_rate": 0.00017865104042095193, "loss": 0.019, "step": 8961 }, { "epoch": 1.2560616678346181, "grad_norm": 1.0469988584518433, "learning_rate": 0.00017863668978713225, "loss": 0.126, "step": 8962 }, { "epoch": 1.2562018220042046, "grad_norm": 0.2660384178161621, "learning_rate": 0.00017862233915331258, "loss": 0.0703, "step": 8963 }, { "epoch": 1.256341976173791, "grad_norm": 0.40686583518981934, "learning_rate": 0.00017860798851949294, "loss": 0.035, "step": 8964 }, { "epoch": 1.2564821303433777, "grad_norm": 0.439657062292099, "learning_rate": 0.00017859363788567327, "loss": 0.0597, "step": 8965 }, { "epoch": 1.2566222845129642, "grad_norm": 0.5489710569381714, "learning_rate": 0.0001785792872518536, "loss": 0.0567, "step": 8966 }, { "epoch": 1.2567624386825509, "grad_norm": 0.5363398194313049, "learning_rate": 0.00017856493661803395, "loss": 0.1139, "step": 8967 }, { "epoch": 1.2569025928521373, "grad_norm": 0.44276735186576843, "learning_rate": 0.00017855058598421428, "loss": 0.0638, "step": 8968 }, { "epoch": 1.2570427470217238, "grad_norm": 0.3831653296947479, "learning_rate": 0.0001785362353503946, "loss": 0.035, "step": 8969 }, { "epoch": 1.2571829011913105, "grad_norm": 0.5723667740821838, "learning_rate": 0.000178521884716575, "loss": 0.091, "step": 8970 }, { "epoch": 1.257323055360897, "grad_norm": 0.47155535221099854, "learning_rate": 0.00017850753408275532, "loss": 0.0504, "step": 8971 }, { "epoch": 1.2574632095304836, "grad_norm": 0.43353211879730225, "learning_rate": 0.00017849318344893565, "loss": 0.0905, "step": 8972 }, { "epoch": 1.25760336370007, "grad_norm": 0.20206907391548157, "learning_rate": 0.000178478832815116, "loss": 0.0852, "step": 8973 }, { "epoch": 1.2577435178696565, "grad_norm": 0.27788031101226807, "learning_rate": 0.00017846448218129633, "loss": 0.0211, "step": 8974 }, { "epoch": 1.2578836720392432, "grad_norm": 0.7037582993507385, "learning_rate": 0.00017845013154747666, "loss": 0.091, "step": 8975 }, { "epoch": 1.2580238262088297, "grad_norm": 0.30100011825561523, "learning_rate": 0.000178435780913657, "loss": 0.067, "step": 8976 }, { "epoch": 1.2581639803784164, "grad_norm": 0.5694476366043091, "learning_rate": 0.00017842143027983735, "loss": 0.0646, "step": 8977 }, { "epoch": 1.2583041345480028, "grad_norm": 0.2185674011707306, "learning_rate": 0.00017840707964601767, "loss": 0.0534, "step": 8978 }, { "epoch": 1.2584442887175893, "grad_norm": 0.3084188401699066, "learning_rate": 0.000178392729012198, "loss": 0.0594, "step": 8979 }, { "epoch": 1.258584442887176, "grad_norm": 0.708634078502655, "learning_rate": 0.00017837837837837839, "loss": 0.0991, "step": 8980 }, { "epoch": 1.2587245970567624, "grad_norm": 0.6217760443687439, "learning_rate": 0.00017836402774455871, "loss": 0.0239, "step": 8981 }, { "epoch": 1.258864751226349, "grad_norm": 1.0362032651901245, "learning_rate": 0.00017834967711073904, "loss": 0.2871, "step": 8982 }, { "epoch": 1.2590049053959356, "grad_norm": 0.7619487047195435, "learning_rate": 0.0001783353264769194, "loss": 0.0939, "step": 8983 }, { "epoch": 1.259145059565522, "grad_norm": 1.9713295698165894, "learning_rate": 0.00017832097584309973, "loss": 0.1286, "step": 8984 }, { "epoch": 1.2592852137351085, "grad_norm": 0.7887514233589172, "learning_rate": 0.00017830662520928006, "loss": 0.0936, "step": 8985 }, { "epoch": 1.2594253679046952, "grad_norm": 0.27064570784568787, "learning_rate": 0.0001782922745754604, "loss": 0.0668, "step": 8986 }, { "epoch": 1.2595655220742816, "grad_norm": 1.0867239236831665, "learning_rate": 0.00017827792394164074, "loss": 0.0941, "step": 8987 }, { "epoch": 1.2597056762438683, "grad_norm": 0.1598159819841385, "learning_rate": 0.00017826357330782107, "loss": 0.045, "step": 8988 }, { "epoch": 1.2598458304134548, "grad_norm": 0.2231038361787796, "learning_rate": 0.00017824922267400142, "loss": 0.0298, "step": 8989 }, { "epoch": 1.2599859845830412, "grad_norm": 0.38892167806625366, "learning_rate": 0.00017823487204018175, "loss": 0.0586, "step": 8990 }, { "epoch": 1.260126138752628, "grad_norm": 0.2902325391769409, "learning_rate": 0.00017822052140636208, "loss": 0.0555, "step": 8991 }, { "epoch": 1.2602662929222144, "grad_norm": 0.2863599956035614, "learning_rate": 0.0001782061707725424, "loss": 0.0544, "step": 8992 }, { "epoch": 1.260406447091801, "grad_norm": 0.32938963174819946, "learning_rate": 0.0001781918201387228, "loss": 0.0974, "step": 8993 }, { "epoch": 1.2605466012613875, "grad_norm": 0.45764076709747314, "learning_rate": 0.00017817746950490312, "loss": 0.069, "step": 8994 }, { "epoch": 1.260686755430974, "grad_norm": 0.19576597213745117, "learning_rate": 0.00017816311887108345, "loss": 0.042, "step": 8995 }, { "epoch": 1.2608269096005607, "grad_norm": 0.3495793640613556, "learning_rate": 0.0001781487682372638, "loss": 0.0578, "step": 8996 }, { "epoch": 1.2609670637701471, "grad_norm": 0.46037957072257996, "learning_rate": 0.00017813441760344413, "loss": 0.0744, "step": 8997 }, { "epoch": 1.2611072179397338, "grad_norm": 0.19668155908584595, "learning_rate": 0.00017812006696962446, "loss": 0.0536, "step": 8998 }, { "epoch": 1.2612473721093203, "grad_norm": 0.22465582191944122, "learning_rate": 0.00017810571633580482, "loss": 0.0525, "step": 8999 }, { "epoch": 1.2613875262789067, "grad_norm": 0.1850176453590393, "learning_rate": 0.00017809136570198515, "loss": 0.0195, "step": 9000 }, { "epoch": 1.2615276804484934, "grad_norm": 0.2853897511959076, "learning_rate": 0.00017807701506816547, "loss": 0.0391, "step": 9001 }, { "epoch": 1.2616678346180799, "grad_norm": 0.30048874020576477, "learning_rate": 0.00017806266443434586, "loss": 0.0907, "step": 9002 }, { "epoch": 1.2618079887876665, "grad_norm": 0.23137569427490234, "learning_rate": 0.00017804831380052619, "loss": 0.0342, "step": 9003 }, { "epoch": 1.261948142957253, "grad_norm": 0.3478965163230896, "learning_rate": 0.00017803396316670651, "loss": 0.0954, "step": 9004 }, { "epoch": 1.2620882971268395, "grad_norm": 0.3756159543991089, "learning_rate": 0.00017801961253288687, "loss": 0.0826, "step": 9005 }, { "epoch": 1.2622284512964261, "grad_norm": 0.5070664882659912, "learning_rate": 0.0001780052618990672, "loss": 0.0602, "step": 9006 }, { "epoch": 1.2623686054660126, "grad_norm": 0.2849693298339844, "learning_rate": 0.00017799091126524753, "loss": 0.035, "step": 9007 }, { "epoch": 1.2625087596355993, "grad_norm": 0.32464128732681274, "learning_rate": 0.00017797656063142786, "loss": 0.011, "step": 9008 }, { "epoch": 1.2626489138051857, "grad_norm": 0.2038242667913437, "learning_rate": 0.0001779622099976082, "loss": 0.0303, "step": 9009 }, { "epoch": 1.2627890679747722, "grad_norm": 0.3782430589199066, "learning_rate": 0.00017794785936378854, "loss": 0.0796, "step": 9010 }, { "epoch": 1.2629292221443589, "grad_norm": 0.18381468951702118, "learning_rate": 0.00017793350872996887, "loss": 0.0697, "step": 9011 }, { "epoch": 1.2630693763139453, "grad_norm": 0.4218471944332123, "learning_rate": 0.00017791915809614925, "loss": 0.0878, "step": 9012 }, { "epoch": 1.263209530483532, "grad_norm": 0.3217445909976959, "learning_rate": 0.00017790480746232958, "loss": 0.0379, "step": 9013 }, { "epoch": 1.2633496846531185, "grad_norm": 0.3653494417667389, "learning_rate": 0.0001778904568285099, "loss": 0.0773, "step": 9014 }, { "epoch": 1.263489838822705, "grad_norm": 0.42345574498176575, "learning_rate": 0.00017787610619469026, "loss": 0.0591, "step": 9015 }, { "epoch": 1.2636299929922914, "grad_norm": 0.6523240804672241, "learning_rate": 0.0001778617555608706, "loss": 0.0694, "step": 9016 }, { "epoch": 1.263770147161878, "grad_norm": 0.6912359595298767, "learning_rate": 0.00017784740492705092, "loss": 0.0513, "step": 9017 }, { "epoch": 1.2639103013314645, "grad_norm": 0.16744333505630493, "learning_rate": 0.00017783305429323128, "loss": 0.0167, "step": 9018 }, { "epoch": 1.2640504555010512, "grad_norm": 0.7581879496574402, "learning_rate": 0.0001778187036594116, "loss": 0.0681, "step": 9019 }, { "epoch": 1.2641906096706377, "grad_norm": 0.25457116961479187, "learning_rate": 0.00017780435302559193, "loss": 0.0421, "step": 9020 }, { "epoch": 1.2643307638402241, "grad_norm": 0.3509018123149872, "learning_rate": 0.0001777900023917723, "loss": 0.026, "step": 9021 }, { "epoch": 1.2644709180098108, "grad_norm": 0.604300320148468, "learning_rate": 0.00017777565175795262, "loss": 0.0687, "step": 9022 }, { "epoch": 1.2646110721793973, "grad_norm": 0.27976885437965393, "learning_rate": 0.00017776130112413295, "loss": 0.0617, "step": 9023 }, { "epoch": 1.264751226348984, "grad_norm": 0.4046292006969452, "learning_rate": 0.00017774695049031333, "loss": 0.0935, "step": 9024 }, { "epoch": 1.2648913805185704, "grad_norm": 0.5800681114196777, "learning_rate": 0.00017773259985649366, "loss": 0.147, "step": 9025 }, { "epoch": 1.2650315346881569, "grad_norm": 0.4359778165817261, "learning_rate": 0.000177718249222674, "loss": 0.1021, "step": 9026 }, { "epoch": 1.2651716888577436, "grad_norm": 0.13012881577014923, "learning_rate": 0.00017770389858885432, "loss": 0.0177, "step": 9027 }, { "epoch": 1.26531184302733, "grad_norm": 0.8135332465171814, "learning_rate": 0.00017768954795503467, "loss": 0.1369, "step": 9028 }, { "epoch": 1.2654519971969167, "grad_norm": 0.22240281105041504, "learning_rate": 0.000177675197321215, "loss": 0.0297, "step": 9029 }, { "epoch": 1.2655921513665032, "grad_norm": 0.5183936357498169, "learning_rate": 0.00017766084668739533, "loss": 0.0612, "step": 9030 }, { "epoch": 1.2657323055360896, "grad_norm": 0.5014948844909668, "learning_rate": 0.00017764649605357568, "loss": 0.1044, "step": 9031 }, { "epoch": 1.2658724597056763, "grad_norm": 0.7427463531494141, "learning_rate": 0.000177632145419756, "loss": 0.0269, "step": 9032 }, { "epoch": 1.2660126138752628, "grad_norm": 1.4680558443069458, "learning_rate": 0.00017761779478593634, "loss": 0.227, "step": 9033 }, { "epoch": 1.2661527680448494, "grad_norm": 1.5702149868011475, "learning_rate": 0.00017760344415211672, "loss": 0.1232, "step": 9034 }, { "epoch": 1.266292922214436, "grad_norm": 4.549380302429199, "learning_rate": 0.00017758909351829705, "loss": 0.1444, "step": 9035 }, { "epoch": 1.2664330763840224, "grad_norm": 0.14719705283641815, "learning_rate": 0.00017757474288447738, "loss": 0.0359, "step": 9036 }, { "epoch": 1.266573230553609, "grad_norm": 0.27009081840515137, "learning_rate": 0.00017756039225065774, "loss": 0.0562, "step": 9037 }, { "epoch": 1.2667133847231955, "grad_norm": 0.14267082512378693, "learning_rate": 0.00017754604161683807, "loss": 0.0223, "step": 9038 }, { "epoch": 1.2668535388927822, "grad_norm": 0.4080493748188019, "learning_rate": 0.0001775316909830184, "loss": 0.076, "step": 9039 }, { "epoch": 1.2669936930623686, "grad_norm": 0.48317191004753113, "learning_rate": 0.00017751734034919875, "loss": 0.0924, "step": 9040 }, { "epoch": 1.267133847231955, "grad_norm": 0.22761987149715424, "learning_rate": 0.00017750298971537908, "loss": 0.0284, "step": 9041 }, { "epoch": 1.2672740014015416, "grad_norm": 0.41949746012687683, "learning_rate": 0.0001774886390815594, "loss": 0.0448, "step": 9042 }, { "epoch": 1.2674141555711282, "grad_norm": 0.2655579149723053, "learning_rate": 0.00017747428844773974, "loss": 0.0431, "step": 9043 }, { "epoch": 1.267554309740715, "grad_norm": 0.4393776059150696, "learning_rate": 0.00017745993781392012, "loss": 0.1271, "step": 9044 }, { "epoch": 1.2676944639103014, "grad_norm": 0.29652729630470276, "learning_rate": 0.00017744558718010045, "loss": 0.0562, "step": 9045 }, { "epoch": 1.2678346180798878, "grad_norm": 0.36156249046325684, "learning_rate": 0.00017743123654628078, "loss": 0.0772, "step": 9046 }, { "epoch": 1.2679747722494743, "grad_norm": 0.3355903923511505, "learning_rate": 0.00017741688591246113, "loss": 0.0456, "step": 9047 }, { "epoch": 1.268114926419061, "grad_norm": 0.8490182757377625, "learning_rate": 0.00017740253527864146, "loss": 0.0624, "step": 9048 }, { "epoch": 1.2682550805886474, "grad_norm": 0.22248008847236633, "learning_rate": 0.0001773881846448218, "loss": 0.0286, "step": 9049 }, { "epoch": 1.2683952347582341, "grad_norm": 0.27648764848709106, "learning_rate": 0.00017737383401100214, "loss": 0.0326, "step": 9050 }, { "epoch": 1.2685353889278206, "grad_norm": 0.3741937577724457, "learning_rate": 0.00017735948337718247, "loss": 0.0478, "step": 9051 }, { "epoch": 1.268675543097407, "grad_norm": 0.3212883472442627, "learning_rate": 0.0001773451327433628, "loss": 0.0714, "step": 9052 }, { "epoch": 1.2688156972669937, "grad_norm": 0.3490413725376129, "learning_rate": 0.00017733078210954316, "loss": 0.0613, "step": 9053 }, { "epoch": 1.2689558514365802, "grad_norm": 0.21237251162528992, "learning_rate": 0.00017731643147572348, "loss": 0.0509, "step": 9054 }, { "epoch": 1.2690960056061669, "grad_norm": 0.15484821796417236, "learning_rate": 0.0001773020808419038, "loss": 0.0231, "step": 9055 }, { "epoch": 1.2692361597757533, "grad_norm": 0.15340827405452728, "learning_rate": 0.0001772877302080842, "loss": 0.0151, "step": 9056 }, { "epoch": 1.2693763139453398, "grad_norm": 0.40345245599746704, "learning_rate": 0.00017727337957426452, "loss": 0.0866, "step": 9057 }, { "epoch": 1.2695164681149265, "grad_norm": 0.2492057830095291, "learning_rate": 0.00017725902894044485, "loss": 0.0636, "step": 9058 }, { "epoch": 1.269656622284513, "grad_norm": 0.326826274394989, "learning_rate": 0.0001772446783066252, "loss": 0.0612, "step": 9059 }, { "epoch": 1.2697967764540996, "grad_norm": 0.30686521530151367, "learning_rate": 0.00017723032767280554, "loss": 0.068, "step": 9060 }, { "epoch": 1.269936930623686, "grad_norm": 0.38144758343696594, "learning_rate": 0.00017721597703898587, "loss": 0.086, "step": 9061 }, { "epoch": 1.2700770847932725, "grad_norm": 0.18938730657100677, "learning_rate": 0.0001772016264051662, "loss": 0.0568, "step": 9062 }, { "epoch": 1.2702172389628592, "grad_norm": 0.29055100679397583, "learning_rate": 0.00017718727577134655, "loss": 0.0422, "step": 9063 }, { "epoch": 1.2703573931324457, "grad_norm": 0.3560505509376526, "learning_rate": 0.00017717292513752688, "loss": 0.0544, "step": 9064 }, { "epoch": 1.2704975473020323, "grad_norm": 0.302933007478714, "learning_rate": 0.0001771585745037072, "loss": 0.0456, "step": 9065 }, { "epoch": 1.2706377014716188, "grad_norm": 0.34537091851234436, "learning_rate": 0.0001771442238698876, "loss": 0.0314, "step": 9066 }, { "epoch": 1.2707778556412053, "grad_norm": 0.25632351636886597, "learning_rate": 0.00017712987323606792, "loss": 0.0123, "step": 9067 }, { "epoch": 1.270918009810792, "grad_norm": 0.2705901861190796, "learning_rate": 0.00017711552260224825, "loss": 0.0402, "step": 9068 }, { "epoch": 1.2710581639803784, "grad_norm": 1.1916329860687256, "learning_rate": 0.0001771011719684286, "loss": 0.1206, "step": 9069 }, { "epoch": 1.271198318149965, "grad_norm": 0.12040171772241592, "learning_rate": 0.00017708682133460893, "loss": 0.0273, "step": 9070 }, { "epoch": 1.2713384723195515, "grad_norm": 0.24024905264377594, "learning_rate": 0.00017707247070078926, "loss": 0.0351, "step": 9071 }, { "epoch": 1.271478626489138, "grad_norm": 0.3056226074695587, "learning_rate": 0.00017705812006696962, "loss": 0.0986, "step": 9072 }, { "epoch": 1.2716187806587245, "grad_norm": 0.24302147328853607, "learning_rate": 0.00017704376943314994, "loss": 0.0483, "step": 9073 }, { "epoch": 1.2717589348283111, "grad_norm": 0.2792523503303528, "learning_rate": 0.00017702941879933027, "loss": 0.0354, "step": 9074 }, { "epoch": 1.2718990889978976, "grad_norm": 0.2571404278278351, "learning_rate": 0.00017701506816551066, "loss": 0.0522, "step": 9075 }, { "epoch": 1.2720392431674843, "grad_norm": 0.41163089871406555, "learning_rate": 0.00017700071753169098, "loss": 0.0511, "step": 9076 }, { "epoch": 1.2721793973370707, "grad_norm": 0.708112359046936, "learning_rate": 0.0001769863668978713, "loss": 0.0556, "step": 9077 }, { "epoch": 1.2723195515066572, "grad_norm": 0.09369780123233795, "learning_rate": 0.00017697201626405164, "loss": 0.0086, "step": 9078 }, { "epoch": 1.272459705676244, "grad_norm": 0.19607171416282654, "learning_rate": 0.000176957665630232, "loss": 0.0269, "step": 9079 }, { "epoch": 1.2725998598458304, "grad_norm": 0.2334619015455246, "learning_rate": 0.00017694331499641233, "loss": 0.0163, "step": 9080 }, { "epoch": 1.272740014015417, "grad_norm": 0.2039363533258438, "learning_rate": 0.00017692896436259265, "loss": 0.0094, "step": 9081 }, { "epoch": 1.2728801681850035, "grad_norm": 0.16530577838420868, "learning_rate": 0.000176914613728773, "loss": 0.0213, "step": 9082 }, { "epoch": 1.27302032235459, "grad_norm": 2.212944984436035, "learning_rate": 0.00017690026309495334, "loss": 0.1571, "step": 9083 }, { "epoch": 1.2731604765241766, "grad_norm": 0.7792435884475708, "learning_rate": 0.00017688591246113367, "loss": 0.1337, "step": 9084 }, { "epoch": 1.273300630693763, "grad_norm": 1.1687673330307007, "learning_rate": 0.00017687156182731402, "loss": 0.0895, "step": 9085 }, { "epoch": 1.2734407848633498, "grad_norm": 0.17413710057735443, "learning_rate": 0.00017685721119349435, "loss": 0.0375, "step": 9086 }, { "epoch": 1.2735809390329362, "grad_norm": 0.14838114380836487, "learning_rate": 0.00017684286055967468, "loss": 0.0299, "step": 9087 }, { "epoch": 1.2737210932025227, "grad_norm": 0.28675714135169983, "learning_rate": 0.00017682850992585506, "loss": 0.1071, "step": 9088 }, { "epoch": 1.2738612473721094, "grad_norm": 0.5509903430938721, "learning_rate": 0.0001768141592920354, "loss": 0.1003, "step": 9089 }, { "epoch": 1.2740014015416958, "grad_norm": 0.21225732564926147, "learning_rate": 0.00017679980865821572, "loss": 0.0217, "step": 9090 }, { "epoch": 1.2741415557112825, "grad_norm": 0.2884884774684906, "learning_rate": 0.00017678545802439608, "loss": 0.0405, "step": 9091 }, { "epoch": 1.274281709880869, "grad_norm": 0.5166020393371582, "learning_rate": 0.0001767711073905764, "loss": 0.0833, "step": 9092 }, { "epoch": 1.2744218640504554, "grad_norm": 0.4160119891166687, "learning_rate": 0.00017675675675675673, "loss": 0.0854, "step": 9093 }, { "epoch": 1.2745620182200421, "grad_norm": 0.3326933979988098, "learning_rate": 0.0001767424061229371, "loss": 0.0609, "step": 9094 }, { "epoch": 1.2747021723896286, "grad_norm": 0.29613587260246277, "learning_rate": 0.00017672805548911742, "loss": 0.0371, "step": 9095 }, { "epoch": 1.2748423265592153, "grad_norm": 0.3050192892551422, "learning_rate": 0.00017671370485529775, "loss": 0.0442, "step": 9096 }, { "epoch": 1.2749824807288017, "grad_norm": 0.3882840573787689, "learning_rate": 0.00017669935422147807, "loss": 0.0739, "step": 9097 }, { "epoch": 1.2751226348983882, "grad_norm": 0.21806761622428894, "learning_rate": 0.00017668500358765846, "loss": 0.0278, "step": 9098 }, { "epoch": 1.2752627890679746, "grad_norm": 0.34788092970848083, "learning_rate": 0.00017667065295383879, "loss": 0.0481, "step": 9099 }, { "epoch": 1.2754029432375613, "grad_norm": 0.2520679533481598, "learning_rate": 0.00017665630232001911, "loss": 0.0453, "step": 9100 }, { "epoch": 1.275543097407148, "grad_norm": 0.31644484400749207, "learning_rate": 0.00017664195168619947, "loss": 0.0336, "step": 9101 }, { "epoch": 1.2756832515767345, "grad_norm": 0.33437755703926086, "learning_rate": 0.0001766276010523798, "loss": 0.0343, "step": 9102 }, { "epoch": 1.275823405746321, "grad_norm": 0.14972250163555145, "learning_rate": 0.00017661325041856013, "loss": 0.0179, "step": 9103 }, { "epoch": 1.2759635599159074, "grad_norm": 0.3177717924118042, "learning_rate": 0.00017659889978474048, "loss": 0.0708, "step": 9104 }, { "epoch": 1.276103714085494, "grad_norm": 0.7146325707435608, "learning_rate": 0.0001765845491509208, "loss": 0.0684, "step": 9105 }, { "epoch": 1.2762438682550805, "grad_norm": 0.6792933344841003, "learning_rate": 0.00017657019851710114, "loss": 0.0834, "step": 9106 }, { "epoch": 1.2763840224246672, "grad_norm": 0.23644766211509705, "learning_rate": 0.00017655584788328152, "loss": 0.0456, "step": 9107 }, { "epoch": 1.2765241765942537, "grad_norm": 0.4507300853729248, "learning_rate": 0.00017654149724946185, "loss": 0.0484, "step": 9108 }, { "epoch": 1.2766643307638401, "grad_norm": 0.26117345690727234, "learning_rate": 0.00017652714661564218, "loss": 0.0419, "step": 9109 }, { "epoch": 1.2768044849334268, "grad_norm": 0.22464138269424438, "learning_rate": 0.00017651279598182253, "loss": 0.0266, "step": 9110 }, { "epoch": 1.2769446391030133, "grad_norm": 0.4495616853237152, "learning_rate": 0.00017649844534800286, "loss": 0.0827, "step": 9111 }, { "epoch": 1.2770847932726, "grad_norm": 0.7609719634056091, "learning_rate": 0.0001764840947141832, "loss": 0.0628, "step": 9112 }, { "epoch": 1.2772249474421864, "grad_norm": 0.3974210023880005, "learning_rate": 0.00017646974408036352, "loss": 0.0351, "step": 9113 }, { "epoch": 1.2773651016117729, "grad_norm": 0.39843010902404785, "learning_rate": 0.00017645539344654388, "loss": 0.0332, "step": 9114 }, { "epoch": 1.2775052557813595, "grad_norm": 0.4331094026565552, "learning_rate": 0.0001764410428127242, "loss": 0.0464, "step": 9115 }, { "epoch": 1.277645409950946, "grad_norm": 0.9654332399368286, "learning_rate": 0.00017642669217890453, "loss": 0.0503, "step": 9116 }, { "epoch": 1.2777855641205327, "grad_norm": 0.3390435576438904, "learning_rate": 0.0001764123415450849, "loss": 0.0803, "step": 9117 }, { "epoch": 1.2779257182901191, "grad_norm": 0.45222267508506775, "learning_rate": 0.00017639799091126522, "loss": 0.0597, "step": 9118 }, { "epoch": 1.2780658724597056, "grad_norm": 0.32691147923469543, "learning_rate": 0.00017638364027744555, "loss": 0.0307, "step": 9119 }, { "epoch": 1.2782060266292923, "grad_norm": 0.29305174946784973, "learning_rate": 0.00017636928964362593, "loss": 0.0408, "step": 9120 }, { "epoch": 1.2783461807988787, "grad_norm": 0.1483353227376938, "learning_rate": 0.00017635493900980626, "loss": 0.0304, "step": 9121 }, { "epoch": 1.2784863349684654, "grad_norm": 0.15840518474578857, "learning_rate": 0.00017634058837598659, "loss": 0.0413, "step": 9122 }, { "epoch": 1.2786264891380519, "grad_norm": 0.33043789863586426, "learning_rate": 0.00017632623774216694, "loss": 0.0444, "step": 9123 }, { "epoch": 1.2787666433076383, "grad_norm": 0.4495549499988556, "learning_rate": 0.00017631188710834727, "loss": 0.0679, "step": 9124 }, { "epoch": 1.278906797477225, "grad_norm": 0.5658689737319946, "learning_rate": 0.0001762975364745276, "loss": 0.024, "step": 9125 }, { "epoch": 1.2790469516468115, "grad_norm": 0.1985495537519455, "learning_rate": 0.00017628318584070795, "loss": 0.0353, "step": 9126 }, { "epoch": 1.2791871058163982, "grad_norm": 0.06834873557090759, "learning_rate": 0.00017626883520688828, "loss": 0.0133, "step": 9127 }, { "epoch": 1.2793272599859846, "grad_norm": 0.30182531476020813, "learning_rate": 0.0001762544845730686, "loss": 0.024, "step": 9128 }, { "epoch": 1.279467414155571, "grad_norm": 1.2703973054885864, "learning_rate": 0.000176240133939249, "loss": 0.0592, "step": 9129 }, { "epoch": 1.2796075683251575, "grad_norm": 0.5980146527290344, "learning_rate": 0.00017622578330542932, "loss": 0.0245, "step": 9130 }, { "epoch": 1.2797477224947442, "grad_norm": 0.4103550314903259, "learning_rate": 0.00017621143267160965, "loss": 0.0558, "step": 9131 }, { "epoch": 1.2798878766643307, "grad_norm": 0.5899176597595215, "learning_rate": 0.00017619708203778998, "loss": 0.0378, "step": 9132 }, { "epoch": 1.2800280308339174, "grad_norm": 1.5419152975082397, "learning_rate": 0.00017618273140397034, "loss": 0.1064, "step": 9133 }, { "epoch": 1.2801681850035038, "grad_norm": 0.9648589491844177, "learning_rate": 0.00017616838077015066, "loss": 0.0694, "step": 9134 }, { "epoch": 1.2803083391730903, "grad_norm": 1.6360570192337036, "learning_rate": 0.000176154030136331, "loss": 0.0801, "step": 9135 }, { "epoch": 1.280448493342677, "grad_norm": 0.4212769567966461, "learning_rate": 0.00017613967950251135, "loss": 0.07, "step": 9136 }, { "epoch": 1.2805886475122634, "grad_norm": 0.22926774621009827, "learning_rate": 0.00017612532886869168, "loss": 0.0312, "step": 9137 }, { "epoch": 1.28072880168185, "grad_norm": 0.33715131878852844, "learning_rate": 0.000176110978234872, "loss": 0.0498, "step": 9138 }, { "epoch": 1.2808689558514366, "grad_norm": 0.45437347888946533, "learning_rate": 0.0001760966276010524, "loss": 0.0494, "step": 9139 }, { "epoch": 1.281009110021023, "grad_norm": 0.21203671395778656, "learning_rate": 0.00017608227696723272, "loss": 0.032, "step": 9140 }, { "epoch": 1.2811492641906097, "grad_norm": 0.8790839314460754, "learning_rate": 0.00017606792633341305, "loss": 0.0819, "step": 9141 }, { "epoch": 1.2812894183601962, "grad_norm": 0.3380790054798126, "learning_rate": 0.0001760535756995934, "loss": 0.0404, "step": 9142 }, { "epoch": 1.2814295725297828, "grad_norm": 0.585423469543457, "learning_rate": 0.00017603922506577373, "loss": 0.0592, "step": 9143 }, { "epoch": 1.2815697266993693, "grad_norm": 0.7210609912872314, "learning_rate": 0.00017602487443195406, "loss": 0.1065, "step": 9144 }, { "epoch": 1.2817098808689558, "grad_norm": 0.35747218132019043, "learning_rate": 0.00017601052379813441, "loss": 0.0332, "step": 9145 }, { "epoch": 1.2818500350385424, "grad_norm": 0.1638728231191635, "learning_rate": 0.00017599617316431474, "loss": 0.0191, "step": 9146 }, { "epoch": 1.281990189208129, "grad_norm": 0.3132542669773102, "learning_rate": 0.00017598182253049507, "loss": 0.057, "step": 9147 }, { "epoch": 1.2821303433777156, "grad_norm": 0.1341615915298462, "learning_rate": 0.0001759674718966754, "loss": 0.0125, "step": 9148 }, { "epoch": 1.282270497547302, "grad_norm": 0.39787402749061584, "learning_rate": 0.00017595312126285576, "loss": 0.0773, "step": 9149 }, { "epoch": 1.2824106517168885, "grad_norm": 0.5609546303749084, "learning_rate": 0.00017593877062903608, "loss": 0.1328, "step": 9150 }, { "epoch": 1.2825508058864752, "grad_norm": 0.47899529337882996, "learning_rate": 0.0001759244199952164, "loss": 0.1613, "step": 9151 }, { "epoch": 1.2826909600560616, "grad_norm": 0.3914549946784973, "learning_rate": 0.0001759100693613968, "loss": 0.0673, "step": 9152 }, { "epoch": 1.2828311142256483, "grad_norm": 0.20205208659172058, "learning_rate": 0.00017589571872757712, "loss": 0.0469, "step": 9153 }, { "epoch": 1.2829712683952348, "grad_norm": 0.2930958867073059, "learning_rate": 0.00017588136809375745, "loss": 0.0814, "step": 9154 }, { "epoch": 1.2831114225648212, "grad_norm": 0.35807353258132935, "learning_rate": 0.0001758670174599378, "loss": 0.0589, "step": 9155 }, { "epoch": 1.283251576734408, "grad_norm": 0.4420841336250305, "learning_rate": 0.00017585266682611814, "loss": 0.0514, "step": 9156 }, { "epoch": 1.2833917309039944, "grad_norm": 0.3788946568965912, "learning_rate": 0.00017583831619229847, "loss": 0.0669, "step": 9157 }, { "epoch": 1.283531885073581, "grad_norm": 0.21619819104671478, "learning_rate": 0.00017582396555847882, "loss": 0.0446, "step": 9158 }, { "epoch": 1.2836720392431675, "grad_norm": 0.14575551450252533, "learning_rate": 0.00017580961492465915, "loss": 0.0228, "step": 9159 }, { "epoch": 1.283812193412754, "grad_norm": 0.15900026261806488, "learning_rate": 0.00017579526429083948, "loss": 0.0121, "step": 9160 }, { "epoch": 1.2839523475823404, "grad_norm": 0.26260024309158325, "learning_rate": 0.00017578091365701986, "loss": 0.0175, "step": 9161 }, { "epoch": 1.2840925017519271, "grad_norm": 0.3468899726867676, "learning_rate": 0.0001757665630232002, "loss": 0.107, "step": 9162 }, { "epoch": 1.2842326559215136, "grad_norm": 0.317746102809906, "learning_rate": 0.00017575221238938052, "loss": 0.0493, "step": 9163 }, { "epoch": 1.2843728100911003, "grad_norm": 0.21704652905464172, "learning_rate": 0.00017573786175556087, "loss": 0.0405, "step": 9164 }, { "epoch": 1.2845129642606867, "grad_norm": 0.35822945833206177, "learning_rate": 0.0001757235111217412, "loss": 0.0307, "step": 9165 }, { "epoch": 1.2846531184302732, "grad_norm": 0.40714696049690247, "learning_rate": 0.00017570916048792153, "loss": 0.0754, "step": 9166 }, { "epoch": 1.2847932725998599, "grad_norm": 0.2953345775604248, "learning_rate": 0.00017569480985410186, "loss": 0.0485, "step": 9167 }, { "epoch": 1.2849334267694463, "grad_norm": 0.5718219876289368, "learning_rate": 0.00017568045922028221, "loss": 0.1046, "step": 9168 }, { "epoch": 1.285073580939033, "grad_norm": 0.34174638986587524, "learning_rate": 0.00017566610858646254, "loss": 0.0475, "step": 9169 }, { "epoch": 1.2852137351086195, "grad_norm": 0.42920982837677, "learning_rate": 0.00017565175795264287, "loss": 0.043, "step": 9170 }, { "epoch": 1.285353889278206, "grad_norm": 0.8799381256103516, "learning_rate": 0.00017563740731882325, "loss": 0.1526, "step": 9171 }, { "epoch": 1.2854940434477926, "grad_norm": 0.4349725842475891, "learning_rate": 0.00017562305668500358, "loss": 0.0549, "step": 9172 }, { "epoch": 1.285634197617379, "grad_norm": 0.21843469142913818, "learning_rate": 0.0001756087060511839, "loss": 0.0349, "step": 9173 }, { "epoch": 1.2857743517869658, "grad_norm": 0.47788068652153015, "learning_rate": 0.00017559435541736427, "loss": 0.0503, "step": 9174 }, { "epoch": 1.2859145059565522, "grad_norm": 0.48978129029273987, "learning_rate": 0.0001755800047835446, "loss": 0.1091, "step": 9175 }, { "epoch": 1.2860546601261387, "grad_norm": 0.5065164566040039, "learning_rate": 0.00017556565414972492, "loss": 0.1335, "step": 9176 }, { "epoch": 1.2861948142957254, "grad_norm": 0.47257286310195923, "learning_rate": 0.00017555130351590528, "loss": 0.0516, "step": 9177 }, { "epoch": 1.2863349684653118, "grad_norm": 0.3913591504096985, "learning_rate": 0.0001755369528820856, "loss": 0.0577, "step": 9178 }, { "epoch": 1.2864751226348985, "grad_norm": 0.4789082407951355, "learning_rate": 0.00017552260224826594, "loss": 0.0395, "step": 9179 }, { "epoch": 1.286615276804485, "grad_norm": 0.44679173827171326, "learning_rate": 0.0001755082516144463, "loss": 0.1381, "step": 9180 }, { "epoch": 1.2867554309740714, "grad_norm": 0.039233651012182236, "learning_rate": 0.00017549390098062662, "loss": 0.0037, "step": 9181 }, { "epoch": 1.286895585143658, "grad_norm": 1.6388367414474487, "learning_rate": 0.00017547955034680695, "loss": 0.068, "step": 9182 }, { "epoch": 1.2870357393132446, "grad_norm": 1.3405311107635498, "learning_rate": 0.00017546519971298728, "loss": 0.0605, "step": 9183 }, { "epoch": 1.2871758934828312, "grad_norm": 0.23422621190547943, "learning_rate": 0.00017545084907916766, "loss": 0.0248, "step": 9184 }, { "epoch": 1.2873160476524177, "grad_norm": 0.5003237724304199, "learning_rate": 0.000175436498445348, "loss": 0.0528, "step": 9185 }, { "epoch": 1.2874562018220042, "grad_norm": 0.3197651505470276, "learning_rate": 0.00017542214781152832, "loss": 0.0804, "step": 9186 }, { "epoch": 1.2875963559915906, "grad_norm": 0.18552614748477936, "learning_rate": 0.00017540779717770867, "loss": 0.038, "step": 9187 }, { "epoch": 1.2877365101611773, "grad_norm": 0.23271670937538147, "learning_rate": 0.000175393446543889, "loss": 0.044, "step": 9188 }, { "epoch": 1.287876664330764, "grad_norm": 0.11801839619874954, "learning_rate": 0.00017537909591006933, "loss": 0.0087, "step": 9189 }, { "epoch": 1.2880168185003504, "grad_norm": 0.3330506980419159, "learning_rate": 0.0001753647452762497, "loss": 0.0335, "step": 9190 }, { "epoch": 1.288156972669937, "grad_norm": 0.20191267132759094, "learning_rate": 0.00017535039464243002, "loss": 0.0497, "step": 9191 }, { "epoch": 1.2882971268395234, "grad_norm": 1.042723536491394, "learning_rate": 0.00017533604400861034, "loss": 0.0761, "step": 9192 }, { "epoch": 1.28843728100911, "grad_norm": 0.36740317940711975, "learning_rate": 0.00017532169337479073, "loss": 0.0447, "step": 9193 }, { "epoch": 1.2885774351786965, "grad_norm": 0.19223883748054504, "learning_rate": 0.00017530734274097106, "loss": 0.0348, "step": 9194 }, { "epoch": 1.2887175893482832, "grad_norm": 0.24793177843093872, "learning_rate": 0.00017529299210715138, "loss": 0.0127, "step": 9195 }, { "epoch": 1.2888577435178696, "grad_norm": 0.27648964524269104, "learning_rate": 0.00017527864147333174, "loss": 0.0588, "step": 9196 }, { "epoch": 1.288997897687456, "grad_norm": 0.3581071197986603, "learning_rate": 0.00017526429083951207, "loss": 0.0507, "step": 9197 }, { "epoch": 1.2891380518570428, "grad_norm": 0.36401063203811646, "learning_rate": 0.0001752499402056924, "loss": 0.0628, "step": 9198 }, { "epoch": 1.2892782060266292, "grad_norm": 0.27278509736061096, "learning_rate": 0.00017523558957187275, "loss": 0.0541, "step": 9199 }, { "epoch": 1.289418360196216, "grad_norm": 0.6484878063201904, "learning_rate": 0.00017522123893805308, "loss": 0.0563, "step": 9200 }, { "epoch": 1.2895585143658024, "grad_norm": 0.2905210852622986, "learning_rate": 0.0001752068883042334, "loss": 0.0792, "step": 9201 }, { "epoch": 1.2896986685353888, "grad_norm": 0.20685267448425293, "learning_rate": 0.00017519253767041374, "loss": 0.0491, "step": 9202 }, { "epoch": 1.2898388227049755, "grad_norm": 0.6980092525482178, "learning_rate": 0.00017517818703659412, "loss": 0.0819, "step": 9203 }, { "epoch": 1.289978976874562, "grad_norm": 0.358024001121521, "learning_rate": 0.00017516383640277445, "loss": 0.0815, "step": 9204 }, { "epoch": 1.2901191310441487, "grad_norm": 0.2620103359222412, "learning_rate": 0.00017514948576895478, "loss": 0.0393, "step": 9205 }, { "epoch": 1.2902592852137351, "grad_norm": 0.3297736346721649, "learning_rate": 0.00017513513513513513, "loss": 0.0794, "step": 9206 }, { "epoch": 1.2903994393833216, "grad_norm": 0.295920193195343, "learning_rate": 0.00017512078450131546, "loss": 0.0254, "step": 9207 }, { "epoch": 1.2905395935529083, "grad_norm": 0.25152695178985596, "learning_rate": 0.0001751064338674958, "loss": 0.0389, "step": 9208 }, { "epoch": 1.2906797477224947, "grad_norm": 0.35447749495506287, "learning_rate": 0.00017509208323367615, "loss": 0.0752, "step": 9209 }, { "epoch": 1.2908199018920814, "grad_norm": 0.16341543197631836, "learning_rate": 0.00017507773259985648, "loss": 0.03, "step": 9210 }, { "epoch": 1.2909600560616679, "grad_norm": 0.4578576385974884, "learning_rate": 0.0001750633819660368, "loss": 0.0982, "step": 9211 }, { "epoch": 1.2911002102312543, "grad_norm": 0.3087027370929718, "learning_rate": 0.00017504903133221716, "loss": 0.041, "step": 9212 }, { "epoch": 1.291240364400841, "grad_norm": 0.6771711707115173, "learning_rate": 0.0001750346806983975, "loss": 0.0637, "step": 9213 }, { "epoch": 1.2913805185704275, "grad_norm": 0.47814592719078064, "learning_rate": 0.00017502033006457782, "loss": 0.0763, "step": 9214 }, { "epoch": 1.2915206727400141, "grad_norm": 0.17093922197818756, "learning_rate": 0.0001750059794307582, "loss": 0.0561, "step": 9215 }, { "epoch": 1.2916608269096006, "grad_norm": 0.49321088194847107, "learning_rate": 0.00017499162879693853, "loss": 0.0625, "step": 9216 }, { "epoch": 1.291800981079187, "grad_norm": 0.3502731919288635, "learning_rate": 0.00017497727816311886, "loss": 0.0487, "step": 9217 }, { "epoch": 1.2919411352487735, "grad_norm": 0.2070801556110382, "learning_rate": 0.00017496292752929918, "loss": 0.036, "step": 9218 }, { "epoch": 1.2920812894183602, "grad_norm": 0.5243269205093384, "learning_rate": 0.00017494857689547954, "loss": 0.0737, "step": 9219 }, { "epoch": 1.2922214435879467, "grad_norm": 0.2569962739944458, "learning_rate": 0.00017493422626165987, "loss": 0.0407, "step": 9220 }, { "epoch": 1.2923615977575333, "grad_norm": 0.49715694785118103, "learning_rate": 0.0001749198756278402, "loss": 0.0649, "step": 9221 }, { "epoch": 1.2925017519271198, "grad_norm": 0.15350471436977386, "learning_rate": 0.00017490552499402055, "loss": 0.0219, "step": 9222 }, { "epoch": 1.2926419060967063, "grad_norm": 0.5645371675491333, "learning_rate": 0.00017489117436020088, "loss": 0.0426, "step": 9223 }, { "epoch": 1.292782060266293, "grad_norm": 0.4345232844352722, "learning_rate": 0.0001748768237263812, "loss": 0.0817, "step": 9224 }, { "epoch": 1.2929222144358794, "grad_norm": 0.3802756369113922, "learning_rate": 0.0001748624730925616, "loss": 0.0667, "step": 9225 }, { "epoch": 1.293062368605466, "grad_norm": 0.4165038764476776, "learning_rate": 0.00017484812245874192, "loss": 0.1399, "step": 9226 }, { "epoch": 1.2932025227750525, "grad_norm": 0.5550238490104675, "learning_rate": 0.00017483377182492225, "loss": 0.089, "step": 9227 }, { "epoch": 1.293342676944639, "grad_norm": 0.4495529532432556, "learning_rate": 0.0001748194211911026, "loss": 0.0636, "step": 9228 }, { "epoch": 1.2934828311142257, "grad_norm": 0.7648413181304932, "learning_rate": 0.00017480507055728293, "loss": 0.1098, "step": 9229 }, { "epoch": 1.2936229852838121, "grad_norm": 0.2721000015735626, "learning_rate": 0.00017479071992346326, "loss": 0.0282, "step": 9230 }, { "epoch": 1.2937631394533988, "grad_norm": 0.3553127646446228, "learning_rate": 0.00017477636928964362, "loss": 0.0311, "step": 9231 }, { "epoch": 1.2939032936229853, "grad_norm": 0.7984032034873962, "learning_rate": 0.00017476201865582395, "loss": 0.1414, "step": 9232 }, { "epoch": 1.2940434477925717, "grad_norm": 1.4642761945724487, "learning_rate": 0.00017474766802200428, "loss": 0.2559, "step": 9233 }, { "epoch": 1.2941836019621584, "grad_norm": 2.0226428508758545, "learning_rate": 0.00017473331738818466, "loss": 0.3245, "step": 9234 }, { "epoch": 1.2943237561317449, "grad_norm": 1.8149771690368652, "learning_rate": 0.000174718966754365, "loss": 0.4662, "step": 9235 }, { "epoch": 1.2944639103013316, "grad_norm": 0.16745662689208984, "learning_rate": 0.00017470461612054532, "loss": 0.0176, "step": 9236 }, { "epoch": 1.294604064470918, "grad_norm": 0.3042994737625122, "learning_rate": 0.00017469026548672564, "loss": 0.0775, "step": 9237 }, { "epoch": 1.2947442186405045, "grad_norm": 0.3558884263038635, "learning_rate": 0.000174675914852906, "loss": 0.1116, "step": 9238 }, { "epoch": 1.2948843728100912, "grad_norm": 0.2747805118560791, "learning_rate": 0.00017466156421908633, "loss": 0.0519, "step": 9239 }, { "epoch": 1.2950245269796776, "grad_norm": 0.4262479543685913, "learning_rate": 0.00017464721358526666, "loss": 0.058, "step": 9240 }, { "epoch": 1.2951646811492643, "grad_norm": 0.5840110182762146, "learning_rate": 0.000174632862951447, "loss": 0.0629, "step": 9241 }, { "epoch": 1.2953048353188508, "grad_norm": 0.2950681746006012, "learning_rate": 0.00017461851231762734, "loss": 0.0772, "step": 9242 }, { "epoch": 1.2954449894884372, "grad_norm": 0.41101083159446716, "learning_rate": 0.00017460416168380767, "loss": 0.1177, "step": 9243 }, { "epoch": 1.295585143658024, "grad_norm": 0.26323914527893066, "learning_rate": 0.00017458981104998803, "loss": 0.0484, "step": 9244 }, { "epoch": 1.2957252978276104, "grad_norm": 0.2590402066707611, "learning_rate": 0.00017457546041616835, "loss": 0.0828, "step": 9245 }, { "epoch": 1.295865451997197, "grad_norm": 0.5093362927436829, "learning_rate": 0.00017456110978234868, "loss": 0.0439, "step": 9246 }, { "epoch": 1.2960056061667835, "grad_norm": 0.2033235728740692, "learning_rate": 0.00017454675914852907, "loss": 0.0439, "step": 9247 }, { "epoch": 1.29614576033637, "grad_norm": 0.19060920178890228, "learning_rate": 0.0001745324085147094, "loss": 0.0564, "step": 9248 }, { "epoch": 1.2962859145059564, "grad_norm": 0.4060218930244446, "learning_rate": 0.00017451805788088972, "loss": 0.0381, "step": 9249 }, { "epoch": 1.296426068675543, "grad_norm": 0.14771825075149536, "learning_rate": 0.00017450370724707008, "loss": 0.0491, "step": 9250 }, { "epoch": 1.2965662228451296, "grad_norm": 0.19069533050060272, "learning_rate": 0.0001744893566132504, "loss": 0.0386, "step": 9251 }, { "epoch": 1.2967063770147162, "grad_norm": 0.2760177254676819, "learning_rate": 0.00017447500597943074, "loss": 0.1309, "step": 9252 }, { "epoch": 1.2968465311843027, "grad_norm": 0.11279609799385071, "learning_rate": 0.00017446065534561106, "loss": 0.0091, "step": 9253 }, { "epoch": 1.2969866853538892, "grad_norm": 0.7452822923660278, "learning_rate": 0.00017444630471179142, "loss": 0.1099, "step": 9254 }, { "epoch": 1.2971268395234758, "grad_norm": 0.2421618402004242, "learning_rate": 0.00017443195407797175, "loss": 0.0931, "step": 9255 }, { "epoch": 1.2972669936930623, "grad_norm": 0.19737926125526428, "learning_rate": 0.00017441760344415208, "loss": 0.0291, "step": 9256 }, { "epoch": 1.297407147862649, "grad_norm": 0.3796572685241699, "learning_rate": 0.00017440325281033246, "loss": 0.078, "step": 9257 }, { "epoch": 1.2975473020322355, "grad_norm": 0.19226792454719543, "learning_rate": 0.0001743889021765128, "loss": 0.0564, "step": 9258 }, { "epoch": 1.297687456201822, "grad_norm": 0.41613495349884033, "learning_rate": 0.00017437455154269312, "loss": 0.0576, "step": 9259 }, { "epoch": 1.2978276103714086, "grad_norm": 0.21988633275032043, "learning_rate": 0.00017436020090887347, "loss": 0.0351, "step": 9260 }, { "epoch": 1.297967764540995, "grad_norm": 0.26572853326797485, "learning_rate": 0.0001743458502750538, "loss": 0.0575, "step": 9261 }, { "epoch": 1.2981079187105817, "grad_norm": 0.2526540458202362, "learning_rate": 0.00017433149964123413, "loss": 0.0363, "step": 9262 }, { "epoch": 1.2982480728801682, "grad_norm": 0.18137133121490479, "learning_rate": 0.00017431714900741449, "loss": 0.0285, "step": 9263 }, { "epoch": 1.2983882270497547, "grad_norm": 0.7921618819236755, "learning_rate": 0.00017430279837359481, "loss": 0.0348, "step": 9264 }, { "epoch": 1.2985283812193413, "grad_norm": 0.40944787859916687, "learning_rate": 0.00017428844773977514, "loss": 0.0195, "step": 9265 }, { "epoch": 1.2986685353889278, "grad_norm": 0.7854244709014893, "learning_rate": 0.00017427409710595552, "loss": 0.1102, "step": 9266 }, { "epoch": 1.2988086895585145, "grad_norm": 0.23199813067913055, "learning_rate": 0.00017425974647213585, "loss": 0.0287, "step": 9267 }, { "epoch": 1.298948843728101, "grad_norm": 0.34447017312049866, "learning_rate": 0.00017424539583831618, "loss": 0.0176, "step": 9268 }, { "epoch": 1.2990889978976874, "grad_norm": 0.43597227334976196, "learning_rate": 0.00017423104520449654, "loss": 0.0332, "step": 9269 }, { "epoch": 1.299229152067274, "grad_norm": 0.21176619827747345, "learning_rate": 0.00017421669457067687, "loss": 0.0409, "step": 9270 }, { "epoch": 1.2993693062368605, "grad_norm": 0.6289152503013611, "learning_rate": 0.0001742023439368572, "loss": 0.0835, "step": 9271 }, { "epoch": 1.2995094604064472, "grad_norm": 0.48130685091018677, "learning_rate": 0.00017418799330303752, "loss": 0.0497, "step": 9272 }, { "epoch": 1.2996496145760337, "grad_norm": 0.9445815682411194, "learning_rate": 0.00017417364266921788, "loss": 0.0896, "step": 9273 }, { "epoch": 1.2997897687456201, "grad_norm": 0.12780983746051788, "learning_rate": 0.0001741592920353982, "loss": 0.014, "step": 9274 }, { "epoch": 1.2999299229152066, "grad_norm": 0.5552884340286255, "learning_rate": 0.00017414494140157854, "loss": 0.0891, "step": 9275 }, { "epoch": 1.3000700770847933, "grad_norm": 0.45527178049087524, "learning_rate": 0.0001741305907677589, "loss": 0.0324, "step": 9276 }, { "epoch": 1.30021023125438, "grad_norm": 1.0368541479110718, "learning_rate": 0.00017411624013393922, "loss": 0.0302, "step": 9277 }, { "epoch": 1.3003503854239664, "grad_norm": 1.3127162456512451, "learning_rate": 0.00017410188950011955, "loss": 0.1259, "step": 9278 }, { "epoch": 1.3004905395935529, "grad_norm": 0.40234002470970154, "learning_rate": 0.00017408753886629993, "loss": 0.1897, "step": 9279 }, { "epoch": 1.3006306937631393, "grad_norm": 0.031851742416620255, "learning_rate": 0.00017407318823248026, "loss": 0.0026, "step": 9280 }, { "epoch": 1.300770847932726, "grad_norm": 0.3209799528121948, "learning_rate": 0.0001740588375986606, "loss": 0.0263, "step": 9281 }, { "epoch": 1.3009110021023125, "grad_norm": 0.7005457878112793, "learning_rate": 0.00017404448696484094, "loss": 0.0917, "step": 9282 }, { "epoch": 1.3010511562718992, "grad_norm": 1.1309617757797241, "learning_rate": 0.00017403013633102127, "loss": 0.0651, "step": 9283 }, { "epoch": 1.3011913104414856, "grad_norm": 0.5017295479774475, "learning_rate": 0.0001740157856972016, "loss": 0.0602, "step": 9284 }, { "epoch": 1.301331464611072, "grad_norm": 2.6762759685516357, "learning_rate": 0.00017400143506338196, "loss": 0.0997, "step": 9285 }, { "epoch": 1.3014716187806588, "grad_norm": 0.2774607241153717, "learning_rate": 0.00017398708442956229, "loss": 0.0562, "step": 9286 }, { "epoch": 1.3016117729502452, "grad_norm": 0.5338559150695801, "learning_rate": 0.00017397273379574261, "loss": 0.1086, "step": 9287 }, { "epoch": 1.301751927119832, "grad_norm": 0.18976879119873047, "learning_rate": 0.00017395838316192294, "loss": 0.0413, "step": 9288 }, { "epoch": 1.3018920812894184, "grad_norm": 0.4055008292198181, "learning_rate": 0.00017394403252810333, "loss": 0.0592, "step": 9289 }, { "epoch": 1.3020322354590048, "grad_norm": 0.17511360347270966, "learning_rate": 0.00017392968189428365, "loss": 0.0443, "step": 9290 }, { "epoch": 1.3021723896285915, "grad_norm": 0.2881506383419037, "learning_rate": 0.00017391533126046398, "loss": 0.0295, "step": 9291 }, { "epoch": 1.302312543798178, "grad_norm": 0.3340437710285187, "learning_rate": 0.00017390098062664434, "loss": 0.0528, "step": 9292 }, { "epoch": 1.3024526979677646, "grad_norm": 0.27340471744537354, "learning_rate": 0.00017388662999282467, "loss": 0.0684, "step": 9293 }, { "epoch": 1.302592852137351, "grad_norm": 0.20512999594211578, "learning_rate": 0.000173872279359005, "loss": 0.0392, "step": 9294 }, { "epoch": 1.3027330063069376, "grad_norm": 0.4061545729637146, "learning_rate": 0.00017385792872518535, "loss": 0.0418, "step": 9295 }, { "epoch": 1.3028731604765242, "grad_norm": 0.3768601417541504, "learning_rate": 0.00017384357809136568, "loss": 0.0646, "step": 9296 }, { "epoch": 1.3030133146461107, "grad_norm": 0.5274515151977539, "learning_rate": 0.000173829227457546, "loss": 0.0754, "step": 9297 }, { "epoch": 1.3031534688156974, "grad_norm": 0.38769614696502686, "learning_rate": 0.0001738148768237264, "loss": 0.078, "step": 9298 }, { "epoch": 1.3032936229852838, "grad_norm": 0.3846321403980255, "learning_rate": 0.00017380052618990672, "loss": 0.1279, "step": 9299 }, { "epoch": 1.3034337771548703, "grad_norm": 0.19241230189800262, "learning_rate": 0.00017378617555608705, "loss": 0.0142, "step": 9300 }, { "epoch": 1.303573931324457, "grad_norm": 0.4274437427520752, "learning_rate": 0.0001737718249222674, "loss": 0.0581, "step": 9301 }, { "epoch": 1.3037140854940434, "grad_norm": 0.2873820662498474, "learning_rate": 0.00017375747428844773, "loss": 0.0161, "step": 9302 }, { "epoch": 1.3038542396636301, "grad_norm": 0.2634715437889099, "learning_rate": 0.00017374312365462806, "loss": 0.0654, "step": 9303 }, { "epoch": 1.3039943938332166, "grad_norm": 0.3628040850162506, "learning_rate": 0.0001737287730208084, "loss": 0.0415, "step": 9304 }, { "epoch": 1.304134548002803, "grad_norm": 0.3178967237472534, "learning_rate": 0.00017371442238698875, "loss": 0.0853, "step": 9305 }, { "epoch": 1.3042747021723895, "grad_norm": 0.6008825898170471, "learning_rate": 0.00017370007175316907, "loss": 0.0477, "step": 9306 }, { "epoch": 1.3044148563419762, "grad_norm": 0.23644877970218658, "learning_rate": 0.0001736857211193494, "loss": 0.0708, "step": 9307 }, { "epoch": 1.3045550105115626, "grad_norm": 0.26526695489883423, "learning_rate": 0.00017367137048552976, "loss": 0.0462, "step": 9308 }, { "epoch": 1.3046951646811493, "grad_norm": 0.70341956615448, "learning_rate": 0.0001736570198517101, "loss": 0.0793, "step": 9309 }, { "epoch": 1.3048353188507358, "grad_norm": 0.26267218589782715, "learning_rate": 0.00017364266921789044, "loss": 0.025, "step": 9310 }, { "epoch": 1.3049754730203222, "grad_norm": 0.5026963949203491, "learning_rate": 0.0001736283185840708, "loss": 0.0813, "step": 9311 }, { "epoch": 1.305115627189909, "grad_norm": 0.21843931078910828, "learning_rate": 0.00017361396795025113, "loss": 0.032, "step": 9312 }, { "epoch": 1.3052557813594954, "grad_norm": 1.003778338432312, "learning_rate": 0.00017359961731643146, "loss": 0.0737, "step": 9313 }, { "epoch": 1.305395935529082, "grad_norm": 0.2915496230125427, "learning_rate": 0.0001735852666826118, "loss": 0.0226, "step": 9314 }, { "epoch": 1.3055360896986685, "grad_norm": 0.15265758335590363, "learning_rate": 0.00017357091604879214, "loss": 0.0205, "step": 9315 }, { "epoch": 1.305676243868255, "grad_norm": 0.3716699182987213, "learning_rate": 0.00017355656541497247, "loss": 0.0347, "step": 9316 }, { "epoch": 1.3058163980378417, "grad_norm": 0.2023908793926239, "learning_rate": 0.00017354221478115282, "loss": 0.0311, "step": 9317 }, { "epoch": 1.3059565522074281, "grad_norm": 0.21397031843662262, "learning_rate": 0.00017352786414733315, "loss": 0.0318, "step": 9318 }, { "epoch": 1.3060967063770148, "grad_norm": 0.6200441122055054, "learning_rate": 0.00017351351351351348, "loss": 0.0923, "step": 9319 }, { "epoch": 1.3062368605466013, "grad_norm": 1.9982049465179443, "learning_rate": 0.00017349916287969386, "loss": 0.0432, "step": 9320 }, { "epoch": 1.3063770147161877, "grad_norm": 0.3658328056335449, "learning_rate": 0.0001734848122458742, "loss": 0.0497, "step": 9321 }, { "epoch": 1.3065171688857744, "grad_norm": 0.8792640566825867, "learning_rate": 0.00017347046161205452, "loss": 0.032, "step": 9322 }, { "epoch": 1.3066573230553609, "grad_norm": 0.2585294544696808, "learning_rate": 0.00017345611097823485, "loss": 0.0229, "step": 9323 }, { "epoch": 1.3067974772249475, "grad_norm": 0.647038459777832, "learning_rate": 0.0001734417603444152, "loss": 0.1344, "step": 9324 }, { "epoch": 1.306937631394534, "grad_norm": 0.3910023868083954, "learning_rate": 0.00017342740971059553, "loss": 0.0489, "step": 9325 }, { "epoch": 1.3070777855641205, "grad_norm": 0.37376490235328674, "learning_rate": 0.00017341305907677586, "loss": 0.1398, "step": 9326 }, { "epoch": 1.3072179397337071, "grad_norm": 0.2519770562648773, "learning_rate": 0.00017339870844295622, "loss": 0.052, "step": 9327 }, { "epoch": 1.3073580939032936, "grad_norm": 0.5523326992988586, "learning_rate": 0.00017338435780913655, "loss": 0.0797, "step": 9328 }, { "epoch": 1.3074982480728803, "grad_norm": 0.7268182635307312, "learning_rate": 0.00017337000717531687, "loss": 0.1285, "step": 9329 }, { "epoch": 1.3076384022424667, "grad_norm": 0.4166392385959625, "learning_rate": 0.00017335565654149726, "loss": 0.0693, "step": 9330 }, { "epoch": 1.3077785564120532, "grad_norm": 0.22859403491020203, "learning_rate": 0.00017334130590767759, "loss": 0.0323, "step": 9331 }, { "epoch": 1.3079187105816397, "grad_norm": 6.437104225158691, "learning_rate": 0.00017332695527385791, "loss": 0.061, "step": 9332 }, { "epoch": 1.3080588647512263, "grad_norm": 0.2819992005825043, "learning_rate": 0.00017331260464003827, "loss": 0.018, "step": 9333 }, { "epoch": 1.308199018920813, "grad_norm": 0.5006887316703796, "learning_rate": 0.0001732982540062186, "loss": 0.1007, "step": 9334 }, { "epoch": 1.3083391730903995, "grad_norm": 0.27398598194122314, "learning_rate": 0.00017328390337239893, "loss": 0.0159, "step": 9335 }, { "epoch": 1.308479327259986, "grad_norm": 0.5111422538757324, "learning_rate": 0.00017326955273857928, "loss": 0.0787, "step": 9336 }, { "epoch": 1.3086194814295724, "grad_norm": 0.3599962294101715, "learning_rate": 0.0001732552021047596, "loss": 0.0345, "step": 9337 }, { "epoch": 1.308759635599159, "grad_norm": 0.14783678948879242, "learning_rate": 0.00017324085147093994, "loss": 0.042, "step": 9338 }, { "epoch": 1.3088997897687455, "grad_norm": 0.37984761595726013, "learning_rate": 0.00017322650083712027, "loss": 0.0576, "step": 9339 }, { "epoch": 1.3090399439383322, "grad_norm": 0.40532270073890686, "learning_rate": 0.00017321215020330062, "loss": 0.1169, "step": 9340 }, { "epoch": 1.3091800981079187, "grad_norm": 0.3553960621356964, "learning_rate": 0.00017319779956948095, "loss": 0.0624, "step": 9341 }, { "epoch": 1.3093202522775051, "grad_norm": 0.32200583815574646, "learning_rate": 0.0001731834489356613, "loss": 0.0386, "step": 9342 }, { "epoch": 1.3094604064470918, "grad_norm": 0.5956253409385681, "learning_rate": 0.00017316909830184166, "loss": 0.0726, "step": 9343 }, { "epoch": 1.3096005606166783, "grad_norm": 0.4274877607822418, "learning_rate": 0.000173154747668022, "loss": 0.051, "step": 9344 }, { "epoch": 1.309740714786265, "grad_norm": 0.20143482089042664, "learning_rate": 0.00017314039703420232, "loss": 0.0419, "step": 9345 }, { "epoch": 1.3098808689558514, "grad_norm": 0.4330682158470154, "learning_rate": 0.00017312604640038268, "loss": 0.0895, "step": 9346 }, { "epoch": 1.310021023125438, "grad_norm": 0.37343457341194153, "learning_rate": 0.000173111695766563, "loss": 0.0459, "step": 9347 }, { "epoch": 1.3101611772950246, "grad_norm": 0.20155425369739532, "learning_rate": 0.00017309734513274333, "loss": 0.037, "step": 9348 }, { "epoch": 1.310301331464611, "grad_norm": 0.29298028349876404, "learning_rate": 0.0001730829944989237, "loss": 0.0571, "step": 9349 }, { "epoch": 1.3104414856341977, "grad_norm": 0.22942262887954712, "learning_rate": 0.00017306864386510402, "loss": 0.0369, "step": 9350 }, { "epoch": 1.3105816398037842, "grad_norm": 0.2129916250705719, "learning_rate": 0.00017305429323128435, "loss": 0.0381, "step": 9351 }, { "epoch": 1.3107217939733706, "grad_norm": 0.38374507427215576, "learning_rate": 0.00017303994259746473, "loss": 0.0769, "step": 9352 }, { "epoch": 1.3108619481429573, "grad_norm": 0.4300970733165741, "learning_rate": 0.00017302559196364506, "loss": 0.0656, "step": 9353 }, { "epoch": 1.3110021023125438, "grad_norm": 0.12158835679292679, "learning_rate": 0.0001730112413298254, "loss": 0.0125, "step": 9354 }, { "epoch": 1.3111422564821305, "grad_norm": 0.4713352918624878, "learning_rate": 0.00017299689069600574, "loss": 0.0296, "step": 9355 }, { "epoch": 1.311282410651717, "grad_norm": 0.5789030194282532, "learning_rate": 0.00017298254006218607, "loss": 0.0608, "step": 9356 }, { "epoch": 1.3114225648213034, "grad_norm": 0.3154323697090149, "learning_rate": 0.0001729681894283664, "loss": 0.0494, "step": 9357 }, { "epoch": 1.31156271899089, "grad_norm": 0.35313957929611206, "learning_rate": 0.00017295383879454673, "loss": 0.0409, "step": 9358 }, { "epoch": 1.3117028731604765, "grad_norm": 0.4066758453845978, "learning_rate": 0.00017293948816072708, "loss": 0.0956, "step": 9359 }, { "epoch": 1.3118430273300632, "grad_norm": 0.2841693162918091, "learning_rate": 0.0001729251375269074, "loss": 0.0455, "step": 9360 }, { "epoch": 1.3119831814996497, "grad_norm": 0.33462291955947876, "learning_rate": 0.00017291078689308774, "loss": 0.0682, "step": 9361 }, { "epoch": 1.3121233356692361, "grad_norm": 0.6383495330810547, "learning_rate": 0.00017289643625926812, "loss": 0.0635, "step": 9362 }, { "epoch": 1.3122634898388226, "grad_norm": 0.6227530241012573, "learning_rate": 0.00017288208562544845, "loss": 0.0355, "step": 9363 }, { "epoch": 1.3124036440084093, "grad_norm": 0.8222858309745789, "learning_rate": 0.00017286773499162878, "loss": 0.1387, "step": 9364 }, { "epoch": 1.3125437981779957, "grad_norm": 0.43814709782600403, "learning_rate": 0.00017285338435780914, "loss": 0.0944, "step": 9365 }, { "epoch": 1.3126839523475824, "grad_norm": 0.2018711268901825, "learning_rate": 0.00017283903372398947, "loss": 0.0633, "step": 9366 }, { "epoch": 1.3128241065171689, "grad_norm": 0.17528922855854034, "learning_rate": 0.0001728246830901698, "loss": 0.0257, "step": 9367 }, { "epoch": 1.3129642606867553, "grad_norm": 0.47659605741500854, "learning_rate": 0.00017281033245635015, "loss": 0.0438, "step": 9368 }, { "epoch": 1.313104414856342, "grad_norm": 0.3596767783164978, "learning_rate": 0.00017279598182253048, "loss": 0.0288, "step": 9369 }, { "epoch": 1.3132445690259285, "grad_norm": 0.263205885887146, "learning_rate": 0.0001727816311887108, "loss": 0.0661, "step": 9370 }, { "epoch": 1.3133847231955151, "grad_norm": 0.38372817635536194, "learning_rate": 0.00017276728055489116, "loss": 0.0879, "step": 9371 }, { "epoch": 1.3135248773651016, "grad_norm": 0.41303446888923645, "learning_rate": 0.0001727529299210715, "loss": 0.0586, "step": 9372 }, { "epoch": 1.313665031534688, "grad_norm": 0.3223862648010254, "learning_rate": 0.00017273857928725182, "loss": 0.0484, "step": 9373 }, { "epoch": 1.3138051857042747, "grad_norm": 0.15607298910617828, "learning_rate": 0.00017272422865343217, "loss": 0.0321, "step": 9374 }, { "epoch": 1.3139453398738612, "grad_norm": 0.3347414433956146, "learning_rate": 0.00017270987801961253, "loss": 0.0268, "step": 9375 }, { "epoch": 1.3140854940434479, "grad_norm": 0.5157453417778015, "learning_rate": 0.00017269552738579286, "loss": 0.1011, "step": 9376 }, { "epoch": 1.3142256482130343, "grad_norm": 0.4231446087360382, "learning_rate": 0.0001726811767519732, "loss": 0.0207, "step": 9377 }, { "epoch": 1.3143658023826208, "grad_norm": 1.3078078031539917, "learning_rate": 0.00017266682611815354, "loss": 0.192, "step": 9378 }, { "epoch": 1.3145059565522075, "grad_norm": 0.11999952048063278, "learning_rate": 0.00017265247548433387, "loss": 0.0187, "step": 9379 }, { "epoch": 1.314646110721794, "grad_norm": 0.8334781527519226, "learning_rate": 0.0001726381248505142, "loss": 0.0311, "step": 9380 }, { "epoch": 1.3147862648913806, "grad_norm": 0.40437230467796326, "learning_rate": 0.00017262377421669456, "loss": 0.0528, "step": 9381 }, { "epoch": 1.314926419060967, "grad_norm": 0.5065985918045044, "learning_rate": 0.00017260942358287488, "loss": 0.0933, "step": 9382 }, { "epoch": 1.3150665732305535, "grad_norm": 0.3498857915401459, "learning_rate": 0.0001725950729490552, "loss": 0.015, "step": 9383 }, { "epoch": 1.3152067274001402, "grad_norm": 0.6446489095687866, "learning_rate": 0.0001725807223152356, "loss": 0.0514, "step": 9384 }, { "epoch": 1.3153468815697267, "grad_norm": 0.7263616323471069, "learning_rate": 0.00017256637168141592, "loss": 0.0958, "step": 9385 }, { "epoch": 1.3154870357393134, "grad_norm": 0.5223122835159302, "learning_rate": 0.00017255202104759625, "loss": 0.0604, "step": 9386 }, { "epoch": 1.3156271899088998, "grad_norm": 0.19896076619625092, "learning_rate": 0.0001725376704137766, "loss": 0.022, "step": 9387 }, { "epoch": 1.3157673440784863, "grad_norm": 0.5604138970375061, "learning_rate": 0.00017252331977995694, "loss": 0.0839, "step": 9388 }, { "epoch": 1.315907498248073, "grad_norm": 0.26151901483535767, "learning_rate": 0.00017250896914613727, "loss": 0.0645, "step": 9389 }, { "epoch": 1.3160476524176594, "grad_norm": 0.6319864988327026, "learning_rate": 0.00017249461851231762, "loss": 0.0483, "step": 9390 }, { "epoch": 1.316187806587246, "grad_norm": 0.34077081084251404, "learning_rate": 0.00017248026787849795, "loss": 0.028, "step": 9391 }, { "epoch": 1.3163279607568326, "grad_norm": 0.6036697030067444, "learning_rate": 0.00017246591724467828, "loss": 0.0897, "step": 9392 }, { "epoch": 1.316468114926419, "grad_norm": 0.197063609957695, "learning_rate": 0.0001724515666108586, "loss": 0.0399, "step": 9393 }, { "epoch": 1.3166082690960055, "grad_norm": 0.30906596779823303, "learning_rate": 0.000172437215977039, "loss": 0.0508, "step": 9394 }, { "epoch": 1.3167484232655922, "grad_norm": 0.32832279801368713, "learning_rate": 0.00017242286534321932, "loss": 0.0637, "step": 9395 }, { "epoch": 1.3168885774351786, "grad_norm": 0.4837797284126282, "learning_rate": 0.00017240851470939965, "loss": 0.1044, "step": 9396 }, { "epoch": 1.3170287316047653, "grad_norm": 0.7901841998100281, "learning_rate": 0.00017239416407558, "loss": 0.1214, "step": 9397 }, { "epoch": 1.3171688857743518, "grad_norm": 0.6718924641609192, "learning_rate": 0.00017237981344176033, "loss": 0.0555, "step": 9398 }, { "epoch": 1.3173090399439382, "grad_norm": 0.2511550784111023, "learning_rate": 0.00017236546280794066, "loss": 0.0536, "step": 9399 }, { "epoch": 1.317449194113525, "grad_norm": 0.4304676949977875, "learning_rate": 0.00017235111217412102, "loss": 0.0559, "step": 9400 }, { "epoch": 1.3175893482831114, "grad_norm": 0.2692815661430359, "learning_rate": 0.00017233676154030134, "loss": 0.0447, "step": 9401 }, { "epoch": 1.317729502452698, "grad_norm": 0.3264482319355011, "learning_rate": 0.00017232241090648167, "loss": 0.0345, "step": 9402 }, { "epoch": 1.3178696566222845, "grad_norm": 0.19648206233978271, "learning_rate": 0.00017230806027266203, "loss": 0.0398, "step": 9403 }, { "epoch": 1.318009810791871, "grad_norm": 0.3099084198474884, "learning_rate": 0.00017229370963884236, "loss": 0.0385, "step": 9404 }, { "epoch": 1.3181499649614576, "grad_norm": 0.1876903474330902, "learning_rate": 0.00017227935900502269, "loss": 0.0285, "step": 9405 }, { "epoch": 1.318290119131044, "grad_norm": 0.50505131483078, "learning_rate": 0.00017226500837120307, "loss": 0.0322, "step": 9406 }, { "epoch": 1.3184302733006308, "grad_norm": 0.4616032838821411, "learning_rate": 0.0001722506577373834, "loss": 0.0424, "step": 9407 }, { "epoch": 1.3185704274702172, "grad_norm": 0.3054984211921692, "learning_rate": 0.00017223630710356373, "loss": 0.0521, "step": 9408 }, { "epoch": 1.3187105816398037, "grad_norm": 0.45004233717918396, "learning_rate": 0.00017222195646974405, "loss": 0.0632, "step": 9409 }, { "epoch": 1.3188507358093904, "grad_norm": 0.7899589538574219, "learning_rate": 0.0001722076058359244, "loss": 0.054, "step": 9410 }, { "epoch": 1.3189908899789768, "grad_norm": 0.41488322615623474, "learning_rate": 0.00017219325520210474, "loss": 0.0583, "step": 9411 }, { "epoch": 1.3191310441485635, "grad_norm": 0.364869624376297, "learning_rate": 0.00017217890456828507, "loss": 0.1081, "step": 9412 }, { "epoch": 1.31927119831815, "grad_norm": 0.0936540812253952, "learning_rate": 0.00017216455393446542, "loss": 0.0076, "step": 9413 }, { "epoch": 1.3194113524877364, "grad_norm": 0.24324995279312134, "learning_rate": 0.00017215020330064575, "loss": 0.0191, "step": 9414 }, { "epoch": 1.3195515066573231, "grad_norm": 0.5959106087684631, "learning_rate": 0.00017213585266682608, "loss": 0.0402, "step": 9415 }, { "epoch": 1.3196916608269096, "grad_norm": 0.22388191521167755, "learning_rate": 0.00017212150203300646, "loss": 0.0106, "step": 9416 }, { "epoch": 1.3198318149964963, "grad_norm": 0.6405531764030457, "learning_rate": 0.0001721071513991868, "loss": 0.0582, "step": 9417 }, { "epoch": 1.3199719691660827, "grad_norm": 0.33234524726867676, "learning_rate": 0.00017209280076536712, "loss": 0.0258, "step": 9418 }, { "epoch": 1.3201121233356692, "grad_norm": 0.5107434988021851, "learning_rate": 0.00017207845013154748, "loss": 0.0705, "step": 9419 }, { "epoch": 1.3202522775052556, "grad_norm": 0.519616961479187, "learning_rate": 0.0001720640994977278, "loss": 0.1073, "step": 9420 }, { "epoch": 1.3203924316748423, "grad_norm": 0.22811037302017212, "learning_rate": 0.00017204974886390813, "loss": 0.0211, "step": 9421 }, { "epoch": 1.320532585844429, "grad_norm": 0.3887975215911865, "learning_rate": 0.0001720353982300885, "loss": 0.0743, "step": 9422 }, { "epoch": 1.3206727400140155, "grad_norm": 0.36133256554603577, "learning_rate": 0.00017202104759626882, "loss": 0.0566, "step": 9423 }, { "epoch": 1.320812894183602, "grad_norm": 0.1473684459924698, "learning_rate": 0.00017200669696244915, "loss": 0.0187, "step": 9424 }, { "epoch": 1.3209530483531884, "grad_norm": 0.18020454049110413, "learning_rate": 0.00017199234632862953, "loss": 0.0216, "step": 9425 }, { "epoch": 1.321093202522775, "grad_norm": 0.3773222863674164, "learning_rate": 0.00017197799569480986, "loss": 0.0402, "step": 9426 }, { "epoch": 1.3212333566923615, "grad_norm": 0.5045652389526367, "learning_rate": 0.00017196364506099018, "loss": 0.0258, "step": 9427 }, { "epoch": 1.3213735108619482, "grad_norm": 2.0684447288513184, "learning_rate": 0.0001719492944271705, "loss": 0.0577, "step": 9428 }, { "epoch": 1.3215136650315347, "grad_norm": 0.4795096516609192, "learning_rate": 0.00017193494379335087, "loss": 0.0619, "step": 9429 }, { "epoch": 1.3216538192011211, "grad_norm": 0.32334205508232117, "learning_rate": 0.0001719205931595312, "loss": 0.0442, "step": 9430 }, { "epoch": 1.3217939733707078, "grad_norm": 0.5212321281433105, "learning_rate": 0.00017190624252571153, "loss": 0.0549, "step": 9431 }, { "epoch": 1.3219341275402943, "grad_norm": 0.26268699765205383, "learning_rate": 0.00017189189189189188, "loss": 0.0056, "step": 9432 }, { "epoch": 1.322074281709881, "grad_norm": 1.6961874961853027, "learning_rate": 0.0001718775412580722, "loss": 0.1777, "step": 9433 }, { "epoch": 1.3222144358794674, "grad_norm": 6.129197120666504, "learning_rate": 0.00017186319062425254, "loss": 0.5777, "step": 9434 }, { "epoch": 1.3223545900490539, "grad_norm": 1.4504064321517944, "learning_rate": 0.0001718488399904329, "loss": 0.1988, "step": 9435 }, { "epoch": 1.3224947442186405, "grad_norm": 0.37830254435539246, "learning_rate": 0.00017183448935661322, "loss": 0.0386, "step": 9436 }, { "epoch": 1.322634898388227, "grad_norm": 0.29685354232788086, "learning_rate": 0.00017182013872279358, "loss": 0.0433, "step": 9437 }, { "epoch": 1.3227750525578137, "grad_norm": 0.3068963289260864, "learning_rate": 0.00017180578808897393, "loss": 0.0559, "step": 9438 }, { "epoch": 1.3229152067274002, "grad_norm": 0.36144188046455383, "learning_rate": 0.00017179143745515426, "loss": 0.1003, "step": 9439 }, { "epoch": 1.3230553608969866, "grad_norm": 0.23329022526741028, "learning_rate": 0.0001717770868213346, "loss": 0.0662, "step": 9440 }, { "epoch": 1.3231955150665733, "grad_norm": 0.31613385677337646, "learning_rate": 0.00017176273618751495, "loss": 0.0467, "step": 9441 }, { "epoch": 1.3233356692361598, "grad_norm": 0.7425344586372375, "learning_rate": 0.00017174838555369528, "loss": 0.1065, "step": 9442 }, { "epoch": 1.3234758234057464, "grad_norm": 0.2765350639820099, "learning_rate": 0.0001717340349198756, "loss": 0.0658, "step": 9443 }, { "epoch": 1.323615977575333, "grad_norm": 0.35815995931625366, "learning_rate": 0.00017171968428605593, "loss": 0.0613, "step": 9444 }, { "epoch": 1.3237561317449194, "grad_norm": 0.4471622705459595, "learning_rate": 0.0001717053336522363, "loss": 0.0337, "step": 9445 }, { "epoch": 1.323896285914506, "grad_norm": 0.27890393137931824, "learning_rate": 0.00017169098301841662, "loss": 0.0386, "step": 9446 }, { "epoch": 1.3240364400840925, "grad_norm": 0.3145810067653656, "learning_rate": 0.00017167663238459695, "loss": 0.0507, "step": 9447 }, { "epoch": 1.3241765942536792, "grad_norm": 0.11248306185007095, "learning_rate": 0.00017166228175077733, "loss": 0.0114, "step": 9448 }, { "epoch": 1.3243167484232656, "grad_norm": 0.4057770073413849, "learning_rate": 0.00017164793111695766, "loss": 0.0405, "step": 9449 }, { "epoch": 1.324456902592852, "grad_norm": 0.4622826874256134, "learning_rate": 0.00017163358048313799, "loss": 0.0638, "step": 9450 }, { "epoch": 1.3245970567624386, "grad_norm": 0.3345355689525604, "learning_rate": 0.00017161922984931834, "loss": 0.0568, "step": 9451 }, { "epoch": 1.3247372109320252, "grad_norm": 0.11428558826446533, "learning_rate": 0.00017160487921549867, "loss": 0.014, "step": 9452 }, { "epoch": 1.3248773651016117, "grad_norm": 0.218642920255661, "learning_rate": 0.000171590528581679, "loss": 0.0353, "step": 9453 }, { "epoch": 1.3250175192711984, "grad_norm": 0.7660614848136902, "learning_rate": 0.00017157617794785935, "loss": 0.0524, "step": 9454 }, { "epoch": 1.3251576734407848, "grad_norm": 0.38175347447395325, "learning_rate": 0.00017156182731403968, "loss": 0.0702, "step": 9455 }, { "epoch": 1.3252978276103713, "grad_norm": 0.49460795521736145, "learning_rate": 0.00017154747668022, "loss": 0.0506, "step": 9456 }, { "epoch": 1.325437981779958, "grad_norm": 0.3104495108127594, "learning_rate": 0.0001715331260464004, "loss": 0.062, "step": 9457 }, { "epoch": 1.3255781359495444, "grad_norm": 0.241384819149971, "learning_rate": 0.00017151877541258072, "loss": 0.0708, "step": 9458 }, { "epoch": 1.3257182901191311, "grad_norm": 0.6427384614944458, "learning_rate": 0.00017150442477876105, "loss": 0.0274, "step": 9459 }, { "epoch": 1.3258584442887176, "grad_norm": 0.38693928718566895, "learning_rate": 0.0001714900741449414, "loss": 0.0201, "step": 9460 }, { "epoch": 1.325998598458304, "grad_norm": 0.3339323401451111, "learning_rate": 0.00017147572351112174, "loss": 0.0551, "step": 9461 }, { "epoch": 1.3261387526278907, "grad_norm": 0.2760277986526489, "learning_rate": 0.00017146137287730206, "loss": 0.0368, "step": 9462 }, { "epoch": 1.3262789067974772, "grad_norm": 0.8217750191688538, "learning_rate": 0.0001714470222434824, "loss": 0.0486, "step": 9463 }, { "epoch": 1.3264190609670639, "grad_norm": 0.3545800447463989, "learning_rate": 0.00017143267160966275, "loss": 0.0621, "step": 9464 }, { "epoch": 1.3265592151366503, "grad_norm": 0.5317981839179993, "learning_rate": 0.00017141832097584308, "loss": 0.0639, "step": 9465 }, { "epoch": 1.3266993693062368, "grad_norm": 0.10901476442813873, "learning_rate": 0.0001714039703420234, "loss": 0.0089, "step": 9466 }, { "epoch": 1.3268395234758235, "grad_norm": 0.20464099943637848, "learning_rate": 0.00017138961970820376, "loss": 0.0529, "step": 9467 }, { "epoch": 1.32697967764541, "grad_norm": 0.41743746399879456, "learning_rate": 0.0001713752690743841, "loss": 0.088, "step": 9468 }, { "epoch": 1.3271198318149966, "grad_norm": 0.4790174961090088, "learning_rate": 0.00017136091844056445, "loss": 0.0516, "step": 9469 }, { "epoch": 1.327259985984583, "grad_norm": 0.6115158796310425, "learning_rate": 0.0001713465678067448, "loss": 0.0496, "step": 9470 }, { "epoch": 1.3274001401541695, "grad_norm": 0.16216257214546204, "learning_rate": 0.00017133221717292513, "loss": 0.0192, "step": 9471 }, { "epoch": 1.3275402943237562, "grad_norm": 0.33258384466171265, "learning_rate": 0.00017131786653910546, "loss": 0.0803, "step": 9472 }, { "epoch": 1.3276804484933427, "grad_norm": 0.09029435366392136, "learning_rate": 0.00017130351590528581, "loss": 0.0125, "step": 9473 }, { "epoch": 1.3278206026629293, "grad_norm": 0.7412534952163696, "learning_rate": 0.00017128916527146614, "loss": 0.0242, "step": 9474 }, { "epoch": 1.3279607568325158, "grad_norm": 0.20665088295936584, "learning_rate": 0.00017127481463764647, "loss": 0.0202, "step": 9475 }, { "epoch": 1.3281009110021023, "grad_norm": 0.29844722151756287, "learning_rate": 0.00017126046400382683, "loss": 0.0499, "step": 9476 }, { "epoch": 1.328241065171689, "grad_norm": 0.1486787647008896, "learning_rate": 0.00017124611337000716, "loss": 0.0088, "step": 9477 }, { "epoch": 1.3283812193412754, "grad_norm": 0.5192716121673584, "learning_rate": 0.00017123176273618748, "loss": 0.107, "step": 9478 }, { "epoch": 1.328521373510862, "grad_norm": 0.21467429399490356, "learning_rate": 0.0001712174121023678, "loss": 0.0087, "step": 9479 }, { "epoch": 1.3286615276804485, "grad_norm": 0.5761073231697083, "learning_rate": 0.0001712030614685482, "loss": 0.0783, "step": 9480 }, { "epoch": 1.328801681850035, "grad_norm": 0.28764209151268005, "learning_rate": 0.00017118871083472852, "loss": 0.0407, "step": 9481 }, { "epoch": 1.3289418360196215, "grad_norm": 0.5905888676643372, "learning_rate": 0.00017117436020090885, "loss": 0.0257, "step": 9482 }, { "epoch": 1.3290819901892081, "grad_norm": 1.063347578048706, "learning_rate": 0.0001711600095670892, "loss": 0.1806, "step": 9483 }, { "epoch": 1.3292221443587946, "grad_norm": 0.8173499703407288, "learning_rate": 0.00017114565893326954, "loss": 0.3561, "step": 9484 }, { "epoch": 1.3293622985283813, "grad_norm": 2.738908052444458, "learning_rate": 0.00017113130829944986, "loss": 0.3224, "step": 9485 }, { "epoch": 1.3295024526979677, "grad_norm": 0.24104203283786774, "learning_rate": 0.00017111695766563022, "loss": 0.0421, "step": 9486 }, { "epoch": 1.3296426068675542, "grad_norm": 0.3442029058933258, "learning_rate": 0.00017110260703181055, "loss": 0.1006, "step": 9487 }, { "epoch": 1.3297827610371409, "grad_norm": 0.30067411065101624, "learning_rate": 0.00017108825639799088, "loss": 0.0636, "step": 9488 }, { "epoch": 1.3299229152067273, "grad_norm": 0.853289008140564, "learning_rate": 0.00017107390576417126, "loss": 0.1017, "step": 9489 }, { "epoch": 1.330063069376314, "grad_norm": 0.2919696271419525, "learning_rate": 0.0001710595551303516, "loss": 0.0579, "step": 9490 }, { "epoch": 1.3302032235459005, "grad_norm": 0.28185316920280457, "learning_rate": 0.00017104520449653192, "loss": 0.056, "step": 9491 }, { "epoch": 1.330343377715487, "grad_norm": 0.3327508866786957, "learning_rate": 0.00017103085386271227, "loss": 0.0525, "step": 9492 }, { "epoch": 1.3304835318850736, "grad_norm": 0.5602075457572937, "learning_rate": 0.0001710165032288926, "loss": 0.0495, "step": 9493 }, { "epoch": 1.33062368605466, "grad_norm": 0.7398636341094971, "learning_rate": 0.00017100215259507293, "loss": 0.1476, "step": 9494 }, { "epoch": 1.3307638402242468, "grad_norm": 0.1316189467906952, "learning_rate": 0.00017098780196125329, "loss": 0.0228, "step": 9495 }, { "epoch": 1.3309039943938332, "grad_norm": 0.3175063729286194, "learning_rate": 0.00017097345132743361, "loss": 0.0711, "step": 9496 }, { "epoch": 1.3310441485634197, "grad_norm": 0.24527721107006073, "learning_rate": 0.00017095910069361394, "loss": 0.0232, "step": 9497 }, { "epoch": 1.3311843027330064, "grad_norm": 0.21846212446689606, "learning_rate": 0.00017094475005979427, "loss": 0.0843, "step": 9498 }, { "epoch": 1.3313244569025928, "grad_norm": 0.22878167033195496, "learning_rate": 0.00017093039942597463, "loss": 0.0572, "step": 9499 }, { "epoch": 1.3314646110721795, "grad_norm": 0.2170042097568512, "learning_rate": 0.00017091604879215496, "loss": 0.05, "step": 9500 }, { "epoch": 1.331604765241766, "grad_norm": 0.18254008889198303, "learning_rate": 0.0001709016981583353, "loss": 0.0362, "step": 9501 }, { "epoch": 1.3317449194113524, "grad_norm": 0.2711474299430847, "learning_rate": 0.00017088734752451567, "loss": 0.0601, "step": 9502 }, { "epoch": 1.331885073580939, "grad_norm": 0.4067468047142029, "learning_rate": 0.000170872996890696, "loss": 0.0947, "step": 9503 }, { "epoch": 1.3320252277505256, "grad_norm": 0.3171539306640625, "learning_rate": 0.00017085864625687632, "loss": 0.0742, "step": 9504 }, { "epoch": 1.3321653819201122, "grad_norm": 0.15590724349021912, "learning_rate": 0.00017084429562305668, "loss": 0.0241, "step": 9505 }, { "epoch": 1.3323055360896987, "grad_norm": 0.17302237451076508, "learning_rate": 0.000170829944989237, "loss": 0.0267, "step": 9506 }, { "epoch": 1.3324456902592852, "grad_norm": 0.3936539590358734, "learning_rate": 0.00017081559435541734, "loss": 0.0611, "step": 9507 }, { "epoch": 1.3325858444288716, "grad_norm": 0.22893404960632324, "learning_rate": 0.0001708012437215977, "loss": 0.0368, "step": 9508 }, { "epoch": 1.3327259985984583, "grad_norm": 0.3247547447681427, "learning_rate": 0.00017078689308777802, "loss": 0.0392, "step": 9509 }, { "epoch": 1.332866152768045, "grad_norm": 0.17838488519191742, "learning_rate": 0.00017077254245395835, "loss": 0.0448, "step": 9510 }, { "epoch": 1.3330063069376314, "grad_norm": 0.5286068320274353, "learning_rate": 0.00017075819182013873, "loss": 0.0499, "step": 9511 }, { "epoch": 1.333146461107218, "grad_norm": 0.4125949740409851, "learning_rate": 0.00017074384118631906, "loss": 0.0862, "step": 9512 }, { "epoch": 1.3332866152768044, "grad_norm": 0.3173750340938568, "learning_rate": 0.0001707294905524994, "loss": 0.0523, "step": 9513 }, { "epoch": 1.333426769446391, "grad_norm": 0.48030805587768555, "learning_rate": 0.00017071513991867972, "loss": 0.0916, "step": 9514 }, { "epoch": 1.3335669236159775, "grad_norm": 0.38122883439064026, "learning_rate": 0.00017070078928486007, "loss": 0.0586, "step": 9515 }, { "epoch": 1.3337070777855642, "grad_norm": 0.20957478880882263, "learning_rate": 0.0001706864386510404, "loss": 0.0224, "step": 9516 }, { "epoch": 1.3338472319551506, "grad_norm": 0.27692320942878723, "learning_rate": 0.00017067208801722073, "loss": 0.038, "step": 9517 }, { "epoch": 1.333987386124737, "grad_norm": 0.5911213159561157, "learning_rate": 0.0001706577373834011, "loss": 0.084, "step": 9518 }, { "epoch": 1.3341275402943238, "grad_norm": 0.35676783323287964, "learning_rate": 0.00017064338674958142, "loss": 0.0397, "step": 9519 }, { "epoch": 1.3342676944639102, "grad_norm": 0.17341038584709167, "learning_rate": 0.00017062903611576174, "loss": 0.0556, "step": 9520 }, { "epoch": 1.334407848633497, "grad_norm": 0.302238404750824, "learning_rate": 0.00017061468548194213, "loss": 0.0491, "step": 9521 }, { "epoch": 1.3345480028030834, "grad_norm": 0.12353895604610443, "learning_rate": 0.00017060033484812246, "loss": 0.0214, "step": 9522 }, { "epoch": 1.3346881569726698, "grad_norm": 0.3894589841365814, "learning_rate": 0.00017058598421430278, "loss": 0.0612, "step": 9523 }, { "epoch": 1.3348283111422565, "grad_norm": 0.2525026500225067, "learning_rate": 0.00017057163358048314, "loss": 0.0859, "step": 9524 }, { "epoch": 1.334968465311843, "grad_norm": 0.21902169287204742, "learning_rate": 0.00017055728294666347, "loss": 0.0526, "step": 9525 }, { "epoch": 1.3351086194814297, "grad_norm": 0.5129676461219788, "learning_rate": 0.0001705429323128438, "loss": 0.0578, "step": 9526 }, { "epoch": 1.3352487736510161, "grad_norm": 0.678501546382904, "learning_rate": 0.00017052858167902415, "loss": 0.1178, "step": 9527 }, { "epoch": 1.3353889278206026, "grad_norm": 0.07673218101263046, "learning_rate": 0.00017051423104520448, "loss": 0.0053, "step": 9528 }, { "epoch": 1.3355290819901893, "grad_norm": 1.247165560722351, "learning_rate": 0.0001704998804113848, "loss": 0.0709, "step": 9529 }, { "epoch": 1.3356692361597757, "grad_norm": 0.24425484240055084, "learning_rate": 0.00017048552977756517, "loss": 0.0269, "step": 9530 }, { "epoch": 1.3358093903293624, "grad_norm": 0.6372262239456177, "learning_rate": 0.0001704711791437455, "loss": 0.0474, "step": 9531 }, { "epoch": 1.3359495444989489, "grad_norm": 0.1417616456747055, "learning_rate": 0.00017045682850992585, "loss": 0.0147, "step": 9532 }, { "epoch": 1.3360896986685353, "grad_norm": 1.049975872039795, "learning_rate": 0.00017044247787610618, "loss": 0.1155, "step": 9533 }, { "epoch": 1.336229852838122, "grad_norm": 1.076125979423523, "learning_rate": 0.00017042812724228653, "loss": 0.0353, "step": 9534 }, { "epoch": 1.3363700070077085, "grad_norm": 1.0211845636367798, "learning_rate": 0.00017041377660846686, "loss": 0.0917, "step": 9535 }, { "epoch": 1.3365101611772952, "grad_norm": 0.3120881915092468, "learning_rate": 0.0001703994259746472, "loss": 0.0389, "step": 9536 }, { "epoch": 1.3366503153468816, "grad_norm": 0.3107713460922241, "learning_rate": 0.00017038507534082755, "loss": 0.027, "step": 9537 }, { "epoch": 1.336790469516468, "grad_norm": 0.15866436064243317, "learning_rate": 0.00017037072470700787, "loss": 0.0428, "step": 9538 }, { "epoch": 1.3369306236860545, "grad_norm": 0.23351190984249115, "learning_rate": 0.0001703563740731882, "loss": 0.0182, "step": 9539 }, { "epoch": 1.3370707778556412, "grad_norm": 0.09756828844547272, "learning_rate": 0.00017034202343936856, "loss": 0.0112, "step": 9540 }, { "epoch": 1.3372109320252277, "grad_norm": 0.32933470606803894, "learning_rate": 0.0001703276728055489, "loss": 0.0468, "step": 9541 }, { "epoch": 1.3373510861948144, "grad_norm": 0.2580755650997162, "learning_rate": 0.00017031332217172922, "loss": 0.0423, "step": 9542 }, { "epoch": 1.3374912403644008, "grad_norm": 0.26410555839538574, "learning_rate": 0.0001702989715379096, "loss": 0.0472, "step": 9543 }, { "epoch": 1.3376313945339873, "grad_norm": 0.2901782989501953, "learning_rate": 0.00017028462090408993, "loss": 0.0623, "step": 9544 }, { "epoch": 1.337771548703574, "grad_norm": 0.4845621585845947, "learning_rate": 0.00017027027027027026, "loss": 0.0593, "step": 9545 }, { "epoch": 1.3379117028731604, "grad_norm": 0.3676447868347168, "learning_rate": 0.0001702559196364506, "loss": 0.0767, "step": 9546 }, { "epoch": 1.338051857042747, "grad_norm": 0.1000380590558052, "learning_rate": 0.00017024156900263094, "loss": 0.0067, "step": 9547 }, { "epoch": 1.3381920112123336, "grad_norm": 0.11539237946271896, "learning_rate": 0.00017022721836881127, "loss": 0.0357, "step": 9548 }, { "epoch": 1.33833216538192, "grad_norm": 0.22616511583328247, "learning_rate": 0.0001702128677349916, "loss": 0.0238, "step": 9549 }, { "epoch": 1.3384723195515067, "grad_norm": 0.2731891870498657, "learning_rate": 0.00017019851710117195, "loss": 0.0368, "step": 9550 }, { "epoch": 1.3386124737210932, "grad_norm": 0.42878878116607666, "learning_rate": 0.00017018416646735228, "loss": 0.0822, "step": 9551 }, { "epoch": 1.3387526278906798, "grad_norm": 0.2010854333639145, "learning_rate": 0.0001701698158335326, "loss": 0.0476, "step": 9552 }, { "epoch": 1.3388927820602663, "grad_norm": 0.32286015152931213, "learning_rate": 0.000170155465199713, "loss": 0.0668, "step": 9553 }, { "epoch": 1.3390329362298528, "grad_norm": 0.33523309230804443, "learning_rate": 0.00017014111456589332, "loss": 0.0572, "step": 9554 }, { "epoch": 1.3391730903994394, "grad_norm": 0.22790703177452087, "learning_rate": 0.00017012676393207365, "loss": 0.0397, "step": 9555 }, { "epoch": 1.339313244569026, "grad_norm": 0.42198801040649414, "learning_rate": 0.000170112413298254, "loss": 0.0962, "step": 9556 }, { "epoch": 1.3394533987386126, "grad_norm": 0.22848308086395264, "learning_rate": 0.00017009806266443433, "loss": 0.0249, "step": 9557 }, { "epoch": 1.339593552908199, "grad_norm": 0.348146915435791, "learning_rate": 0.00017008371203061466, "loss": 0.0558, "step": 9558 }, { "epoch": 1.3397337070777855, "grad_norm": 0.558443009853363, "learning_rate": 0.00017006936139679502, "loss": 0.0966, "step": 9559 }, { "epoch": 1.3398738612473722, "grad_norm": 0.5223244428634644, "learning_rate": 0.00017005501076297535, "loss": 0.0451, "step": 9560 }, { "epoch": 1.3400140154169586, "grad_norm": 0.6537792086601257, "learning_rate": 0.00017004066012915568, "loss": 0.0655, "step": 9561 }, { "epoch": 1.3401541695865453, "grad_norm": 0.1561972051858902, "learning_rate": 0.00017002630949533603, "loss": 0.0194, "step": 9562 }, { "epoch": 1.3402943237561318, "grad_norm": 0.4724835753440857, "learning_rate": 0.00017001195886151636, "loss": 0.095, "step": 9563 }, { "epoch": 1.3404344779257182, "grad_norm": 0.3880367577075958, "learning_rate": 0.00016999760822769672, "loss": 0.0566, "step": 9564 }, { "epoch": 1.3405746320953047, "grad_norm": 0.1931467354297638, "learning_rate": 0.00016998325759387704, "loss": 0.0285, "step": 9565 }, { "epoch": 1.3407147862648914, "grad_norm": 0.745561420917511, "learning_rate": 0.0001699689069600574, "loss": 0.0736, "step": 9566 }, { "epoch": 1.340854940434478, "grad_norm": 0.14206565916538239, "learning_rate": 0.00016995455632623773, "loss": 0.0353, "step": 9567 }, { "epoch": 1.3409950946040645, "grad_norm": 0.6110050678253174, "learning_rate": 0.00016994020569241806, "loss": 0.1482, "step": 9568 }, { "epoch": 1.341135248773651, "grad_norm": 1.2706602811813354, "learning_rate": 0.0001699258550585984, "loss": 0.1146, "step": 9569 }, { "epoch": 1.3412754029432374, "grad_norm": 0.3216162323951721, "learning_rate": 0.00016991150442477874, "loss": 0.0456, "step": 9570 }, { "epoch": 1.3414155571128241, "grad_norm": 0.2087244987487793, "learning_rate": 0.00016989715379095907, "loss": 0.0293, "step": 9571 }, { "epoch": 1.3415557112824106, "grad_norm": 0.4775845408439636, "learning_rate": 0.00016988280315713943, "loss": 0.1145, "step": 9572 }, { "epoch": 1.3416958654519973, "grad_norm": 0.24230755865573883, "learning_rate": 0.00016986845252331975, "loss": 0.0869, "step": 9573 }, { "epoch": 1.3418360196215837, "grad_norm": 0.12217220664024353, "learning_rate": 0.00016985410188950008, "loss": 0.018, "step": 9574 }, { "epoch": 1.3419761737911702, "grad_norm": 0.2012275755405426, "learning_rate": 0.00016983975125568047, "loss": 0.072, "step": 9575 }, { "epoch": 1.3421163279607569, "grad_norm": 0.2923319935798645, "learning_rate": 0.0001698254006218608, "loss": 0.0555, "step": 9576 }, { "epoch": 1.3422564821303433, "grad_norm": 0.5228692293167114, "learning_rate": 0.00016981104998804112, "loss": 0.0849, "step": 9577 }, { "epoch": 1.34239663629993, "grad_norm": 0.3818853795528412, "learning_rate": 0.00016979669935422148, "loss": 0.0816, "step": 9578 }, { "epoch": 1.3425367904695165, "grad_norm": 0.9247800707817078, "learning_rate": 0.0001697823487204018, "loss": 0.23, "step": 9579 }, { "epoch": 1.342676944639103, "grad_norm": 0.39113649725914, "learning_rate": 0.00016976799808658214, "loss": 0.0807, "step": 9580 }, { "epoch": 1.3428170988086896, "grad_norm": 1.0975922346115112, "learning_rate": 0.0001697536474527625, "loss": 0.0949, "step": 9581 }, { "epoch": 1.342957252978276, "grad_norm": 0.3754643499851227, "learning_rate": 0.00016973929681894282, "loss": 0.1034, "step": 9582 }, { "epoch": 1.3430974071478627, "grad_norm": 0.7964534163475037, "learning_rate": 0.00016972494618512315, "loss": 0.1745, "step": 9583 }, { "epoch": 1.3432375613174492, "grad_norm": 2.2981514930725098, "learning_rate": 0.00016971059555130348, "loss": 0.069, "step": 9584 }, { "epoch": 1.3433777154870357, "grad_norm": 0.1188306137919426, "learning_rate": 0.00016969624491748386, "loss": 0.011, "step": 9585 }, { "epoch": 1.3435178696566223, "grad_norm": 0.32813698053359985, "learning_rate": 0.0001696818942836642, "loss": 0.0672, "step": 9586 }, { "epoch": 1.3436580238262088, "grad_norm": 0.20178063213825226, "learning_rate": 0.00016966754364984452, "loss": 0.0295, "step": 9587 }, { "epoch": 1.3437981779957955, "grad_norm": 0.38333839178085327, "learning_rate": 0.00016965319301602487, "loss": 0.0726, "step": 9588 }, { "epoch": 1.343938332165382, "grad_norm": 0.3554108738899231, "learning_rate": 0.0001696388423822052, "loss": 0.1224, "step": 9589 }, { "epoch": 1.3440784863349684, "grad_norm": 0.3435667157173157, "learning_rate": 0.00016962449174838553, "loss": 0.1305, "step": 9590 }, { "epoch": 1.344218640504555, "grad_norm": 0.20762506127357483, "learning_rate": 0.00016961014111456588, "loss": 0.0471, "step": 9591 }, { "epoch": 1.3443587946741415, "grad_norm": 0.30831727385520935, "learning_rate": 0.0001695957904807462, "loss": 0.1059, "step": 9592 }, { "epoch": 1.3444989488437282, "grad_norm": 0.15234048664569855, "learning_rate": 0.00016958143984692654, "loss": 0.0501, "step": 9593 }, { "epoch": 1.3446391030133147, "grad_norm": 0.24389781057834625, "learning_rate": 0.0001695670892131069, "loss": 0.0305, "step": 9594 }, { "epoch": 1.3447792571829011, "grad_norm": 0.5057260990142822, "learning_rate": 0.00016955273857928723, "loss": 0.1259, "step": 9595 }, { "epoch": 1.3449194113524876, "grad_norm": 0.1339229792356491, "learning_rate": 0.00016953838794546758, "loss": 0.0202, "step": 9596 }, { "epoch": 1.3450595655220743, "grad_norm": 0.40921786427497864, "learning_rate": 0.00016952403731164794, "loss": 0.0575, "step": 9597 }, { "epoch": 1.3451997196916607, "grad_norm": 0.17376172542572021, "learning_rate": 0.00016950968667782827, "loss": 0.0258, "step": 9598 }, { "epoch": 1.3453398738612474, "grad_norm": 0.6082354187965393, "learning_rate": 0.0001694953360440086, "loss": 0.0709, "step": 9599 }, { "epoch": 1.3454800280308339, "grad_norm": 0.22580136358737946, "learning_rate": 0.00016948098541018892, "loss": 0.0528, "step": 9600 }, { "epoch": 1.3456201822004203, "grad_norm": 0.27200090885162354, "learning_rate": 0.00016946663477636928, "loss": 0.0267, "step": 9601 }, { "epoch": 1.345760336370007, "grad_norm": 0.16164040565490723, "learning_rate": 0.0001694522841425496, "loss": 0.023, "step": 9602 }, { "epoch": 1.3459004905395935, "grad_norm": 0.1931738257408142, "learning_rate": 0.00016943793350872994, "loss": 0.0335, "step": 9603 }, { "epoch": 1.3460406447091802, "grad_norm": 0.36990222334861755, "learning_rate": 0.0001694235828749103, "loss": 0.0676, "step": 9604 }, { "epoch": 1.3461807988787666, "grad_norm": 0.24738983809947968, "learning_rate": 0.00016940923224109062, "loss": 0.0445, "step": 9605 }, { "epoch": 1.346320953048353, "grad_norm": 0.24724404513835907, "learning_rate": 0.00016939488160727095, "loss": 0.0306, "step": 9606 }, { "epoch": 1.3464611072179398, "grad_norm": 0.5514512062072754, "learning_rate": 0.00016938053097345133, "loss": 0.065, "step": 9607 }, { "epoch": 1.3466012613875262, "grad_norm": 0.5415422916412354, "learning_rate": 0.00016936618033963166, "loss": 0.1003, "step": 9608 }, { "epoch": 1.346741415557113, "grad_norm": 0.5476313829421997, "learning_rate": 0.000169351829705812, "loss": 0.0622, "step": 9609 }, { "epoch": 1.3468815697266994, "grad_norm": 1.0372077226638794, "learning_rate": 0.00016933747907199234, "loss": 0.1601, "step": 9610 }, { "epoch": 1.3470217238962858, "grad_norm": 0.5090930461883545, "learning_rate": 0.00016932312843817267, "loss": 0.0855, "step": 9611 }, { "epoch": 1.3471618780658725, "grad_norm": 0.5956155061721802, "learning_rate": 0.000169308777804353, "loss": 0.1916, "step": 9612 }, { "epoch": 1.347302032235459, "grad_norm": 0.3450208604335785, "learning_rate": 0.00016929442717053336, "loss": 0.0636, "step": 9613 }, { "epoch": 1.3474421864050456, "grad_norm": 0.4558791518211365, "learning_rate": 0.00016928007653671369, "loss": 0.0869, "step": 9614 }, { "epoch": 1.347582340574632, "grad_norm": 0.0827002078294754, "learning_rate": 0.00016926572590289401, "loss": 0.0131, "step": 9615 }, { "epoch": 1.3477224947442186, "grad_norm": 0.16938596963882446, "learning_rate": 0.0001692513752690744, "loss": 0.0172, "step": 9616 }, { "epoch": 1.3478626489138053, "grad_norm": 0.1471003144979477, "learning_rate": 0.00016923702463525473, "loss": 0.0142, "step": 9617 }, { "epoch": 1.3480028030833917, "grad_norm": 0.2193594127893448, "learning_rate": 0.00016922267400143505, "loss": 0.0533, "step": 9618 }, { "epoch": 1.3481429572529784, "grad_norm": 0.5552753210067749, "learning_rate": 0.00016920832336761538, "loss": 0.041, "step": 9619 }, { "epoch": 1.3482831114225649, "grad_norm": 1.0127137899398804, "learning_rate": 0.00016919397273379574, "loss": 0.1012, "step": 9620 }, { "epoch": 1.3484232655921513, "grad_norm": 0.7789947986602783, "learning_rate": 0.00016917962209997607, "loss": 0.0374, "step": 9621 }, { "epoch": 1.348563419761738, "grad_norm": 0.48137718439102173, "learning_rate": 0.0001691652714661564, "loss": 0.0399, "step": 9622 }, { "epoch": 1.3487035739313245, "grad_norm": 0.2113160938024521, "learning_rate": 0.00016915092083233675, "loss": 0.0217, "step": 9623 }, { "epoch": 1.3488437281009111, "grad_norm": 0.3934256434440613, "learning_rate": 0.00016913657019851708, "loss": 0.0178, "step": 9624 }, { "epoch": 1.3489838822704976, "grad_norm": 0.6783877015113831, "learning_rate": 0.0001691222195646974, "loss": 0.06, "step": 9625 }, { "epoch": 1.349124036440084, "grad_norm": 0.24311049282550812, "learning_rate": 0.00016910786893087776, "loss": 0.105, "step": 9626 }, { "epoch": 1.3492641906096705, "grad_norm": 0.6261190176010132, "learning_rate": 0.0001690935182970581, "loss": 0.0486, "step": 9627 }, { "epoch": 1.3494043447792572, "grad_norm": 0.37981170415878296, "learning_rate": 0.00016907916766323845, "loss": 0.1636, "step": 9628 }, { "epoch": 1.3495444989488437, "grad_norm": 0.5377947688102722, "learning_rate": 0.0001690648170294188, "loss": 0.1592, "step": 9629 }, { "epoch": 1.3496846531184303, "grad_norm": 0.5082780122756958, "learning_rate": 0.00016905046639559913, "loss": 0.1175, "step": 9630 }, { "epoch": 1.3498248072880168, "grad_norm": 0.33813199400901794, "learning_rate": 0.00016903611576177946, "loss": 0.0213, "step": 9631 }, { "epoch": 1.3499649614576033, "grad_norm": 0.9066349864006042, "learning_rate": 0.00016902176512795982, "loss": 0.1986, "step": 9632 }, { "epoch": 1.35010511562719, "grad_norm": 0.3174242377281189, "learning_rate": 0.00016900741449414015, "loss": 0.0225, "step": 9633 }, { "epoch": 1.3502452697967764, "grad_norm": 1.0678372383117676, "learning_rate": 0.00016899306386032047, "loss": 0.0772, "step": 9634 }, { "epoch": 1.350385423966363, "grad_norm": 1.488322138786316, "learning_rate": 0.0001689787132265008, "loss": 0.0465, "step": 9635 }, { "epoch": 1.3505255781359495, "grad_norm": 0.46247369050979614, "learning_rate": 0.00016896436259268116, "loss": 0.1368, "step": 9636 }, { "epoch": 1.350665732305536, "grad_norm": 0.4396011233329773, "learning_rate": 0.0001689500119588615, "loss": 0.1015, "step": 9637 }, { "epoch": 1.3508058864751227, "grad_norm": 0.6034500598907471, "learning_rate": 0.00016893566132504182, "loss": 0.1036, "step": 9638 }, { "epoch": 1.3509460406447091, "grad_norm": 0.15007194876670837, "learning_rate": 0.0001689213106912222, "loss": 0.0313, "step": 9639 }, { "epoch": 1.3510861948142958, "grad_norm": 0.39873605966567993, "learning_rate": 0.00016890696005740253, "loss": 0.0894, "step": 9640 }, { "epoch": 1.3512263489838823, "grad_norm": 0.3135083019733429, "learning_rate": 0.00016889260942358285, "loss": 0.0641, "step": 9641 }, { "epoch": 1.3513665031534687, "grad_norm": 0.7267391681671143, "learning_rate": 0.0001688782587897632, "loss": 0.1094, "step": 9642 }, { "epoch": 1.3515066573230554, "grad_norm": 1.3700371980667114, "learning_rate": 0.00016886390815594354, "loss": 0.0352, "step": 9643 }, { "epoch": 1.3516468114926419, "grad_norm": 0.3135605454444885, "learning_rate": 0.00016884955752212387, "loss": 0.0435, "step": 9644 }, { "epoch": 1.3517869656622286, "grad_norm": 0.3437100946903229, "learning_rate": 0.00016883520688830422, "loss": 0.0539, "step": 9645 }, { "epoch": 1.351927119831815, "grad_norm": 0.4274687170982361, "learning_rate": 0.00016882085625448455, "loss": 0.0755, "step": 9646 }, { "epoch": 1.3520672740014015, "grad_norm": 0.5987633466720581, "learning_rate": 0.00016880650562066488, "loss": 0.0565, "step": 9647 }, { "epoch": 1.3522074281709882, "grad_norm": 0.104233518242836, "learning_rate": 0.00016879215498684526, "loss": 0.0149, "step": 9648 }, { "epoch": 1.3523475823405746, "grad_norm": 0.2899114787578583, "learning_rate": 0.0001687778043530256, "loss": 0.0314, "step": 9649 }, { "epoch": 1.3524877365101613, "grad_norm": 0.19484783709049225, "learning_rate": 0.00016876345371920592, "loss": 0.0443, "step": 9650 }, { "epoch": 1.3526278906797478, "grad_norm": 0.28061410784721375, "learning_rate": 0.00016874910308538628, "loss": 0.0706, "step": 9651 }, { "epoch": 1.3527680448493342, "grad_norm": 0.8848469257354736, "learning_rate": 0.0001687347524515666, "loss": 0.0875, "step": 9652 }, { "epoch": 1.3529081990189207, "grad_norm": 0.37647897005081177, "learning_rate": 0.00016872040181774693, "loss": 0.0836, "step": 9653 }, { "epoch": 1.3530483531885074, "grad_norm": 0.5196483731269836, "learning_rate": 0.00016870605118392726, "loss": 0.0894, "step": 9654 }, { "epoch": 1.353188507358094, "grad_norm": 0.14152616262435913, "learning_rate": 0.00016869170055010762, "loss": 0.0191, "step": 9655 }, { "epoch": 1.3533286615276805, "grad_norm": 0.1502651423215866, "learning_rate": 0.00016867734991628795, "loss": 0.0092, "step": 9656 }, { "epoch": 1.353468815697267, "grad_norm": 0.7941946387290955, "learning_rate": 0.00016866299928246827, "loss": 0.0511, "step": 9657 }, { "epoch": 1.3536089698668534, "grad_norm": 0.30657556653022766, "learning_rate": 0.00016864864864864863, "loss": 0.0404, "step": 9658 }, { "epoch": 1.35374912403644, "grad_norm": 0.717262864112854, "learning_rate": 0.00016863429801482899, "loss": 0.0424, "step": 9659 }, { "epoch": 1.3538892782060266, "grad_norm": 0.18987800180912018, "learning_rate": 0.00016861994738100931, "loss": 0.0432, "step": 9660 }, { "epoch": 1.3540294323756132, "grad_norm": 0.27932873368263245, "learning_rate": 0.00016860559674718967, "loss": 0.0353, "step": 9661 }, { "epoch": 1.3541695865451997, "grad_norm": 0.5111303925514221, "learning_rate": 0.00016859124611337, "loss": 0.0534, "step": 9662 }, { "epoch": 1.3543097407147862, "grad_norm": 0.6268123388290405, "learning_rate": 0.00016857689547955033, "loss": 0.0836, "step": 9663 }, { "epoch": 1.3544498948843728, "grad_norm": 0.6795238256454468, "learning_rate": 0.00016856254484573068, "loss": 0.1216, "step": 9664 }, { "epoch": 1.3545900490539593, "grad_norm": 0.2711315453052521, "learning_rate": 0.000168548194211911, "loss": 0.0408, "step": 9665 }, { "epoch": 1.354730203223546, "grad_norm": 0.24131660163402557, "learning_rate": 0.00016853384357809134, "loss": 0.0272, "step": 9666 }, { "epoch": 1.3548703573931324, "grad_norm": 0.3637286424636841, "learning_rate": 0.0001685194929442717, "loss": 0.0411, "step": 9667 }, { "epoch": 1.355010511562719, "grad_norm": 0.06975194811820984, "learning_rate": 0.00016850514231045202, "loss": 0.0122, "step": 9668 }, { "epoch": 1.3551506657323056, "grad_norm": 0.42182624340057373, "learning_rate": 0.00016849079167663235, "loss": 0.0614, "step": 9669 }, { "epoch": 1.355290819901892, "grad_norm": 0.4486479163169861, "learning_rate": 0.00016847644104281268, "loss": 0.032, "step": 9670 }, { "epoch": 1.3554309740714787, "grad_norm": 1.331338882446289, "learning_rate": 0.00016846209040899306, "loss": 0.0871, "step": 9671 }, { "epoch": 1.3555711282410652, "grad_norm": 0.5530375242233276, "learning_rate": 0.0001684477397751734, "loss": 0.1335, "step": 9672 }, { "epoch": 1.3557112824106516, "grad_norm": 0.4682348966598511, "learning_rate": 0.00016843338914135372, "loss": 0.0687, "step": 9673 }, { "epoch": 1.3558514365802383, "grad_norm": 0.11995410174131393, "learning_rate": 0.00016841903850753408, "loss": 0.0102, "step": 9674 }, { "epoch": 1.3559915907498248, "grad_norm": 0.25365790724754333, "learning_rate": 0.0001684046878737144, "loss": 0.0423, "step": 9675 }, { "epoch": 1.3561317449194115, "grad_norm": 0.20284157991409302, "learning_rate": 0.00016839033723989473, "loss": 0.0325, "step": 9676 }, { "epoch": 1.356271899088998, "grad_norm": 0.280926913022995, "learning_rate": 0.0001683759866060751, "loss": 0.06, "step": 9677 }, { "epoch": 1.3564120532585844, "grad_norm": 0.10474584251642227, "learning_rate": 0.00016836163597225542, "loss": 0.0074, "step": 9678 }, { "epoch": 1.356552207428171, "grad_norm": 0.2583116888999939, "learning_rate": 0.00016834728533843575, "loss": 0.0372, "step": 9679 }, { "epoch": 1.3566923615977575, "grad_norm": 0.2200823277235031, "learning_rate": 0.00016833293470461613, "loss": 0.0332, "step": 9680 }, { "epoch": 1.3568325157673442, "grad_norm": 0.45323115587234497, "learning_rate": 0.00016831858407079646, "loss": 0.0572, "step": 9681 }, { "epoch": 1.3569726699369307, "grad_norm": 0.6593964099884033, "learning_rate": 0.0001683042334369768, "loss": 0.0793, "step": 9682 }, { "epoch": 1.3571128241065171, "grad_norm": 0.8121048808097839, "learning_rate": 0.00016828988280315714, "loss": 0.0619, "step": 9683 }, { "epoch": 1.3572529782761036, "grad_norm": 0.3453994393348694, "learning_rate": 0.00016827553216933747, "loss": 0.0287, "step": 9684 }, { "epoch": 1.3573931324456903, "grad_norm": 0.7168417572975159, "learning_rate": 0.0001682611815355178, "loss": 0.0737, "step": 9685 }, { "epoch": 1.3575332866152767, "grad_norm": 0.31652066111564636, "learning_rate": 0.00016824683090169816, "loss": 0.0318, "step": 9686 }, { "epoch": 1.3576734407848634, "grad_norm": 0.19063116610050201, "learning_rate": 0.00016823248026787848, "loss": 0.0337, "step": 9687 }, { "epoch": 1.3578135949544499, "grad_norm": 0.17053866386413574, "learning_rate": 0.0001682181296340588, "loss": 0.0387, "step": 9688 }, { "epoch": 1.3579537491240363, "grad_norm": 0.41927045583724976, "learning_rate": 0.00016820377900023914, "loss": 0.0469, "step": 9689 }, { "epoch": 1.358093903293623, "grad_norm": 0.29581496119499207, "learning_rate": 0.0001681894283664195, "loss": 0.0751, "step": 9690 }, { "epoch": 1.3582340574632095, "grad_norm": 0.3370482623577118, "learning_rate": 0.00016817507773259985, "loss": 0.0487, "step": 9691 }, { "epoch": 1.3583742116327961, "grad_norm": 0.4582037627696991, "learning_rate": 0.00016816072709878018, "loss": 0.0708, "step": 9692 }, { "epoch": 1.3585143658023826, "grad_norm": 0.5835514664649963, "learning_rate": 0.00016814637646496054, "loss": 0.0346, "step": 9693 }, { "epoch": 1.358654519971969, "grad_norm": 0.6042092442512512, "learning_rate": 0.00016813202583114086, "loss": 0.0285, "step": 9694 }, { "epoch": 1.3587946741415557, "grad_norm": 0.11981189250946045, "learning_rate": 0.0001681176751973212, "loss": 0.0159, "step": 9695 }, { "epoch": 1.3589348283111422, "grad_norm": 0.591228723526001, "learning_rate": 0.00016810332456350155, "loss": 0.0609, "step": 9696 }, { "epoch": 1.3590749824807289, "grad_norm": 0.13521863520145416, "learning_rate": 0.00016808897392968188, "loss": 0.0286, "step": 9697 }, { "epoch": 1.3592151366503153, "grad_norm": 0.1536429226398468, "learning_rate": 0.0001680746232958622, "loss": 0.0376, "step": 9698 }, { "epoch": 1.3593552908199018, "grad_norm": 0.2578083872795105, "learning_rate": 0.00016806027266204256, "loss": 0.0647, "step": 9699 }, { "epoch": 1.3594954449894885, "grad_norm": 0.3452412486076355, "learning_rate": 0.0001680459220282229, "loss": 0.0451, "step": 9700 }, { "epoch": 1.359635599159075, "grad_norm": 0.7062780261039734, "learning_rate": 0.00016803157139440322, "loss": 0.1317, "step": 9701 }, { "epoch": 1.3597757533286616, "grad_norm": 0.17084822058677673, "learning_rate": 0.0001680172207605836, "loss": 0.0431, "step": 9702 }, { "epoch": 1.359915907498248, "grad_norm": 0.4236963391304016, "learning_rate": 0.00016800287012676393, "loss": 0.0515, "step": 9703 }, { "epoch": 1.3600560616678345, "grad_norm": 0.21950465440750122, "learning_rate": 0.00016798851949294426, "loss": 0.0528, "step": 9704 }, { "epoch": 1.3601962158374212, "grad_norm": 0.27367982268333435, "learning_rate": 0.0001679741688591246, "loss": 0.0519, "step": 9705 }, { "epoch": 1.3603363700070077, "grad_norm": 0.15314653515815735, "learning_rate": 0.00016795981822530494, "loss": 0.0347, "step": 9706 }, { "epoch": 1.3604765241765944, "grad_norm": 0.22668534517288208, "learning_rate": 0.00016794546759148527, "loss": 0.0152, "step": 9707 }, { "epoch": 1.3606166783461808, "grad_norm": 0.35102778673171997, "learning_rate": 0.0001679311169576656, "loss": 0.0439, "step": 9708 }, { "epoch": 1.3607568325157673, "grad_norm": 0.35799992084503174, "learning_rate": 0.00016791676632384596, "loss": 0.0365, "step": 9709 }, { "epoch": 1.360896986685354, "grad_norm": 0.5184187889099121, "learning_rate": 0.00016790241569002628, "loss": 0.0266, "step": 9710 }, { "epoch": 1.3610371408549404, "grad_norm": 0.36529093980789185, "learning_rate": 0.0001678880650562066, "loss": 0.0603, "step": 9711 }, { "epoch": 1.3611772950245271, "grad_norm": 0.38146328926086426, "learning_rate": 0.000167873714422387, "loss": 0.0575, "step": 9712 }, { "epoch": 1.3613174491941136, "grad_norm": 0.24646399915218353, "learning_rate": 0.00016785936378856732, "loss": 0.0218, "step": 9713 }, { "epoch": 1.3614576033637, "grad_norm": 0.35222500562667847, "learning_rate": 0.00016784501315474765, "loss": 0.0281, "step": 9714 }, { "epoch": 1.3615977575332865, "grad_norm": 0.22733935713768005, "learning_rate": 0.000167830662520928, "loss": 0.0369, "step": 9715 }, { "epoch": 1.3617379117028732, "grad_norm": 0.1621370017528534, "learning_rate": 0.00016781631188710834, "loss": 0.018, "step": 9716 }, { "epoch": 1.3618780658724596, "grad_norm": 0.38500919938087463, "learning_rate": 0.00016780196125328867, "loss": 0.0183, "step": 9717 }, { "epoch": 1.3620182200420463, "grad_norm": 0.2516532242298126, "learning_rate": 0.00016778761061946902, "loss": 0.0363, "step": 9718 }, { "epoch": 1.3621583742116328, "grad_norm": 0.2872370779514313, "learning_rate": 0.00016777325998564935, "loss": 0.0326, "step": 9719 }, { "epoch": 1.3622985283812192, "grad_norm": 0.1611098349094391, "learning_rate": 0.00016775890935182968, "loss": 0.0179, "step": 9720 }, { "epoch": 1.362438682550806, "grad_norm": 0.28133001923561096, "learning_rate": 0.00016774455871801003, "loss": 0.051, "step": 9721 }, { "epoch": 1.3625788367203924, "grad_norm": 0.29716742038726807, "learning_rate": 0.00016773020808419036, "loss": 0.0278, "step": 9722 }, { "epoch": 1.362718990889979, "grad_norm": 0.43527963757514954, "learning_rate": 0.00016771585745037072, "loss": 0.1277, "step": 9723 }, { "epoch": 1.3628591450595655, "grad_norm": 0.6623085141181946, "learning_rate": 0.00016770150681655105, "loss": 0.045, "step": 9724 }, { "epoch": 1.362999299229152, "grad_norm": 0.13986140489578247, "learning_rate": 0.0001676871561827314, "loss": 0.0301, "step": 9725 }, { "epoch": 1.3631394533987387, "grad_norm": 0.34659212827682495, "learning_rate": 0.00016767280554891173, "loss": 0.0346, "step": 9726 }, { "epoch": 1.3632796075683251, "grad_norm": 0.5105130672454834, "learning_rate": 0.00016765845491509206, "loss": 0.132, "step": 9727 }, { "epoch": 1.3634197617379118, "grad_norm": 0.4852428138256073, "learning_rate": 0.00016764410428127242, "loss": 0.0433, "step": 9728 }, { "epoch": 1.3635599159074983, "grad_norm": 0.8634260296821594, "learning_rate": 0.00016762975364745274, "loss": 0.0182, "step": 9729 }, { "epoch": 1.3637000700770847, "grad_norm": 0.5627794861793518, "learning_rate": 0.00016761540301363307, "loss": 0.0501, "step": 9730 }, { "epoch": 1.3638402242466714, "grad_norm": 0.3760053217411041, "learning_rate": 0.00016760105237981343, "loss": 0.0509, "step": 9731 }, { "epoch": 1.3639803784162579, "grad_norm": 4.839693069458008, "learning_rate": 0.00016758670174599376, "loss": 0.0624, "step": 9732 }, { "epoch": 1.3641205325858445, "grad_norm": 1.5461735725402832, "learning_rate": 0.00016757235111217409, "loss": 0.109, "step": 9733 }, { "epoch": 1.364260686755431, "grad_norm": 2.0265204906463623, "learning_rate": 0.00016755800047835447, "loss": 0.1984, "step": 9734 }, { "epoch": 1.3644008409250175, "grad_norm": 0.41636842489242554, "learning_rate": 0.0001675436498445348, "loss": 0.0301, "step": 9735 }, { "epoch": 1.3645409950946041, "grad_norm": 0.42520642280578613, "learning_rate": 0.00016752929921071513, "loss": 0.0854, "step": 9736 }, { "epoch": 1.3646811492641906, "grad_norm": 0.22899377346038818, "learning_rate": 0.00016751494857689548, "loss": 0.0684, "step": 9737 }, { "epoch": 1.3648213034337773, "grad_norm": 0.498642235994339, "learning_rate": 0.0001675005979430758, "loss": 0.0625, "step": 9738 }, { "epoch": 1.3649614576033637, "grad_norm": 0.2687016725540161, "learning_rate": 0.00016748624730925614, "loss": 0.0373, "step": 9739 }, { "epoch": 1.3651016117729502, "grad_norm": 0.19468122720718384, "learning_rate": 0.00016747189667543647, "loss": 0.0979, "step": 9740 }, { "epoch": 1.3652417659425367, "grad_norm": 0.7452189922332764, "learning_rate": 0.00016745754604161682, "loss": 0.0324, "step": 9741 }, { "epoch": 1.3653819201121233, "grad_norm": 0.16044935584068298, "learning_rate": 0.00016744319540779715, "loss": 0.0264, "step": 9742 }, { "epoch": 1.36552207428171, "grad_norm": 0.3225463628768921, "learning_rate": 0.00016742884477397748, "loss": 0.03, "step": 9743 }, { "epoch": 1.3656622284512965, "grad_norm": 0.3160260021686554, "learning_rate": 0.00016741449414015786, "loss": 0.0295, "step": 9744 }, { "epoch": 1.365802382620883, "grad_norm": 0.390919953584671, "learning_rate": 0.0001674001435063382, "loss": 0.06, "step": 9745 }, { "epoch": 1.3659425367904694, "grad_norm": 0.20790278911590576, "learning_rate": 0.00016738579287251852, "loss": 0.0275, "step": 9746 }, { "epoch": 1.366082690960056, "grad_norm": 0.2458115816116333, "learning_rate": 0.00016737144223869887, "loss": 0.0697, "step": 9747 }, { "epoch": 1.3662228451296425, "grad_norm": 0.2854403555393219, "learning_rate": 0.0001673570916048792, "loss": 0.0216, "step": 9748 }, { "epoch": 1.3663629992992292, "grad_norm": 0.12739400565624237, "learning_rate": 0.00016734274097105953, "loss": 0.0182, "step": 9749 }, { "epoch": 1.3665031534688157, "grad_norm": 0.3343561589717865, "learning_rate": 0.0001673283903372399, "loss": 0.0578, "step": 9750 }, { "epoch": 1.3666433076384021, "grad_norm": 0.24275031685829163, "learning_rate": 0.00016731403970342022, "loss": 0.0452, "step": 9751 }, { "epoch": 1.3667834618079888, "grad_norm": 0.3544938564300537, "learning_rate": 0.00016729968906960054, "loss": 0.07, "step": 9752 }, { "epoch": 1.3669236159775753, "grad_norm": 0.3955884575843811, "learning_rate": 0.0001672853384357809, "loss": 0.045, "step": 9753 }, { "epoch": 1.367063770147162, "grad_norm": 0.3393678069114685, "learning_rate": 0.00016727098780196123, "loss": 0.0738, "step": 9754 }, { "epoch": 1.3672039243167484, "grad_norm": 0.33253100514411926, "learning_rate": 0.00016725663716814158, "loss": 0.0884, "step": 9755 }, { "epoch": 1.3673440784863349, "grad_norm": 0.30154991149902344, "learning_rate": 0.00016724228653432194, "loss": 0.0529, "step": 9756 }, { "epoch": 1.3674842326559216, "grad_norm": 0.40125954151153564, "learning_rate": 0.00016722793590050227, "loss": 0.048, "step": 9757 }, { "epoch": 1.367624386825508, "grad_norm": 0.5775707960128784, "learning_rate": 0.0001672135852666826, "loss": 0.1117, "step": 9758 }, { "epoch": 1.3677645409950947, "grad_norm": 0.14290858805179596, "learning_rate": 0.00016719923463286293, "loss": 0.0172, "step": 9759 }, { "epoch": 1.3679046951646812, "grad_norm": 0.382387638092041, "learning_rate": 0.00016718488399904328, "loss": 0.0541, "step": 9760 }, { "epoch": 1.3680448493342676, "grad_norm": 0.299716055393219, "learning_rate": 0.0001671705333652236, "loss": 0.0337, "step": 9761 }, { "epoch": 1.3681850035038543, "grad_norm": 0.3183436095714569, "learning_rate": 0.00016715618273140394, "loss": 0.0455, "step": 9762 }, { "epoch": 1.3683251576734408, "grad_norm": 0.12458598613739014, "learning_rate": 0.0001671418320975843, "loss": 0.0093, "step": 9763 }, { "epoch": 1.3684653118430274, "grad_norm": 0.23761752247810364, "learning_rate": 0.00016712748146376462, "loss": 0.0259, "step": 9764 }, { "epoch": 1.368605466012614, "grad_norm": 0.28191614151000977, "learning_rate": 0.00016711313082994495, "loss": 0.0312, "step": 9765 }, { "epoch": 1.3687456201822004, "grad_norm": 0.28340035676956177, "learning_rate": 0.00016709878019612533, "loss": 0.0232, "step": 9766 }, { "epoch": 1.368885774351787, "grad_norm": 0.47565537691116333, "learning_rate": 0.00016708442956230566, "loss": 0.1049, "step": 9767 }, { "epoch": 1.3690259285213735, "grad_norm": 0.6011717915534973, "learning_rate": 0.000167070078928486, "loss": 0.048, "step": 9768 }, { "epoch": 1.3691660826909602, "grad_norm": 0.34251531958580017, "learning_rate": 0.00016705572829466635, "loss": 0.0474, "step": 9769 }, { "epoch": 1.3693062368605466, "grad_norm": 0.27362003922462463, "learning_rate": 0.00016704137766084668, "loss": 0.0451, "step": 9770 }, { "epoch": 1.369446391030133, "grad_norm": 0.6736848950386047, "learning_rate": 0.000167027027027027, "loss": 0.0273, "step": 9771 }, { "epoch": 1.3695865451997196, "grad_norm": 0.4624515771865845, "learning_rate": 0.00016701267639320736, "loss": 0.0355, "step": 9772 }, { "epoch": 1.3697266993693062, "grad_norm": 0.19825811684131622, "learning_rate": 0.0001669983257593877, "loss": 0.0336, "step": 9773 }, { "epoch": 1.3698668535388927, "grad_norm": 0.21776919066905975, "learning_rate": 0.00016698397512556802, "loss": 0.0371, "step": 9774 }, { "epoch": 1.3700070077084794, "grad_norm": 0.33590784668922424, "learning_rate": 0.00016696962449174835, "loss": 0.037, "step": 9775 }, { "epoch": 1.3701471618780658, "grad_norm": 0.3645128011703491, "learning_rate": 0.00016695527385792873, "loss": 0.0422, "step": 9776 }, { "epoch": 1.3702873160476523, "grad_norm": 0.13569369912147522, "learning_rate": 0.00016694092322410906, "loss": 0.0374, "step": 9777 }, { "epoch": 1.370427470217239, "grad_norm": 0.4902980625629425, "learning_rate": 0.00016692657259028939, "loss": 0.0287, "step": 9778 }, { "epoch": 1.3705676243868254, "grad_norm": 0.2802170217037201, "learning_rate": 0.00016691222195646974, "loss": 0.04, "step": 9779 }, { "epoch": 1.3707077785564121, "grad_norm": 0.30555084347724915, "learning_rate": 0.00016689787132265007, "loss": 0.038, "step": 9780 }, { "epoch": 1.3708479327259986, "grad_norm": 0.40659022331237793, "learning_rate": 0.0001668835206888304, "loss": 0.2067, "step": 9781 }, { "epoch": 1.370988086895585, "grad_norm": 0.7139325737953186, "learning_rate": 0.00016686917005501075, "loss": 0.0651, "step": 9782 }, { "epoch": 1.3711282410651717, "grad_norm": 0.8820049166679382, "learning_rate": 0.00016685481942119108, "loss": 0.0697, "step": 9783 }, { "epoch": 1.3712683952347582, "grad_norm": 0.4157586097717285, "learning_rate": 0.0001668404687873714, "loss": 0.084, "step": 9784 }, { "epoch": 1.3714085494043449, "grad_norm": 1.549342155456543, "learning_rate": 0.00016682611815355177, "loss": 0.2265, "step": 9785 }, { "epoch": 1.3715487035739313, "grad_norm": 0.35396257042884827, "learning_rate": 0.00016681176751973212, "loss": 0.0516, "step": 9786 }, { "epoch": 1.3716888577435178, "grad_norm": 0.4627709686756134, "learning_rate": 0.00016679741688591245, "loss": 0.0891, "step": 9787 }, { "epoch": 1.3718290119131045, "grad_norm": 0.5053613781929016, "learning_rate": 0.0001667830662520928, "loss": 0.0904, "step": 9788 }, { "epoch": 1.371969166082691, "grad_norm": 1.0574511289596558, "learning_rate": 0.00016676871561827314, "loss": 0.065, "step": 9789 }, { "epoch": 1.3721093202522776, "grad_norm": 0.4142410457134247, "learning_rate": 0.00016675436498445346, "loss": 0.0352, "step": 9790 }, { "epoch": 1.372249474421864, "grad_norm": 0.4920435845851898, "learning_rate": 0.00016674001435063382, "loss": 0.1189, "step": 9791 }, { "epoch": 1.3723896285914505, "grad_norm": 0.3084256052970886, "learning_rate": 0.00016672566371681415, "loss": 0.0465, "step": 9792 }, { "epoch": 1.3725297827610372, "grad_norm": 0.4969826340675354, "learning_rate": 0.00016671131308299448, "loss": 0.0856, "step": 9793 }, { "epoch": 1.3726699369306237, "grad_norm": 0.6154130697250366, "learning_rate": 0.0001666969624491748, "loss": 0.0839, "step": 9794 }, { "epoch": 1.3728100911002104, "grad_norm": 0.20868386328220367, "learning_rate": 0.00016668261181535516, "loss": 0.0437, "step": 9795 }, { "epoch": 1.3729502452697968, "grad_norm": 0.23858319222927094, "learning_rate": 0.0001666682611815355, "loss": 0.041, "step": 9796 }, { "epoch": 1.3730903994393833, "grad_norm": 0.8451023697853088, "learning_rate": 0.00016665391054771582, "loss": 0.0611, "step": 9797 }, { "epoch": 1.3732305536089697, "grad_norm": 0.19176678359508514, "learning_rate": 0.0001666395599138962, "loss": 0.0279, "step": 9798 }, { "epoch": 1.3733707077785564, "grad_norm": 0.4042290449142456, "learning_rate": 0.00016662520928007653, "loss": 0.16, "step": 9799 }, { "epoch": 1.373510861948143, "grad_norm": 0.18294411897659302, "learning_rate": 0.00016661085864625686, "loss": 0.0354, "step": 9800 }, { "epoch": 1.3736510161177296, "grad_norm": 0.28249847888946533, "learning_rate": 0.0001665965080124372, "loss": 0.1256, "step": 9801 }, { "epoch": 1.373791170287316, "grad_norm": 0.46365299820899963, "learning_rate": 0.00016658215737861754, "loss": 0.057, "step": 9802 }, { "epoch": 1.3739313244569025, "grad_norm": 0.3091248571872711, "learning_rate": 0.00016656780674479787, "loss": 0.0879, "step": 9803 }, { "epoch": 1.3740714786264892, "grad_norm": 0.11044400185346603, "learning_rate": 0.00016655345611097823, "loss": 0.0212, "step": 9804 }, { "epoch": 1.3742116327960756, "grad_norm": 0.26344338059425354, "learning_rate": 0.00016653910547715855, "loss": 0.0507, "step": 9805 }, { "epoch": 1.3743517869656623, "grad_norm": 0.16674789786338806, "learning_rate": 0.00016652475484333888, "loss": 0.0872, "step": 9806 }, { "epoch": 1.3744919411352488, "grad_norm": 0.37788036465644836, "learning_rate": 0.00016651040420951927, "loss": 0.0991, "step": 9807 }, { "epoch": 1.3746320953048352, "grad_norm": 0.8262220621109009, "learning_rate": 0.0001664960535756996, "loss": 0.0394, "step": 9808 }, { "epoch": 1.374772249474422, "grad_norm": 0.5292328596115112, "learning_rate": 0.00016648170294187992, "loss": 0.088, "step": 9809 }, { "epoch": 1.3749124036440084, "grad_norm": 0.23596534132957458, "learning_rate": 0.00016646735230806025, "loss": 0.0852, "step": 9810 }, { "epoch": 1.375052557813595, "grad_norm": 0.27616414427757263, "learning_rate": 0.0001664530016742406, "loss": 0.0427, "step": 9811 }, { "epoch": 1.3751927119831815, "grad_norm": 0.25596749782562256, "learning_rate": 0.00016643865104042094, "loss": 0.0307, "step": 9812 }, { "epoch": 1.375332866152768, "grad_norm": 0.6160295605659485, "learning_rate": 0.00016642430040660126, "loss": 0.101, "step": 9813 }, { "epoch": 1.3754730203223546, "grad_norm": 0.3630438446998596, "learning_rate": 0.00016640994977278162, "loss": 0.0438, "step": 9814 }, { "epoch": 1.375613174491941, "grad_norm": 0.15699683129787445, "learning_rate": 0.00016639559913896195, "loss": 0.0289, "step": 9815 }, { "epoch": 1.3757533286615278, "grad_norm": 0.2664378583431244, "learning_rate": 0.00016638124850514228, "loss": 0.0529, "step": 9816 }, { "epoch": 1.3758934828311142, "grad_norm": 0.6827977299690247, "learning_rate": 0.00016636689787132263, "loss": 0.0913, "step": 9817 }, { "epoch": 1.3760336370007007, "grad_norm": 0.1987086832523346, "learning_rate": 0.000166352547237503, "loss": 0.0255, "step": 9818 }, { "epoch": 1.3761737911702874, "grad_norm": 0.20452755689620972, "learning_rate": 0.00016633819660368332, "loss": 0.0173, "step": 9819 }, { "epoch": 1.3763139453398738, "grad_norm": 0.2806962728500366, "learning_rate": 0.00016632384596986367, "loss": 0.0281, "step": 9820 }, { "epoch": 1.3764540995094605, "grad_norm": 0.738853931427002, "learning_rate": 0.000166309495336044, "loss": 0.0816, "step": 9821 }, { "epoch": 1.376594253679047, "grad_norm": 0.3580811023712158, "learning_rate": 0.00016629514470222433, "loss": 0.077, "step": 9822 }, { "epoch": 1.3767344078486334, "grad_norm": 0.273605614900589, "learning_rate": 0.00016628079406840469, "loss": 0.0192, "step": 9823 }, { "epoch": 1.3768745620182201, "grad_norm": 0.3490077257156372, "learning_rate": 0.00016626644343458501, "loss": 0.0666, "step": 9824 }, { "epoch": 1.3770147161878066, "grad_norm": 0.5520818829536438, "learning_rate": 0.00016625209280076534, "loss": 0.0824, "step": 9825 }, { "epoch": 1.3771548703573933, "grad_norm": 0.3373028635978699, "learning_rate": 0.0001662377421669457, "loss": 0.0363, "step": 9826 }, { "epoch": 1.3772950245269797, "grad_norm": 0.3937041759490967, "learning_rate": 0.00016622339153312603, "loss": 0.0299, "step": 9827 }, { "epoch": 1.3774351786965662, "grad_norm": 0.8915595412254333, "learning_rate": 0.00016620904089930636, "loss": 0.0975, "step": 9828 }, { "epoch": 1.3775753328661526, "grad_norm": 0.4106620252132416, "learning_rate": 0.00016619469026548668, "loss": 0.1457, "step": 9829 }, { "epoch": 1.3777154870357393, "grad_norm": 0.5025816559791565, "learning_rate": 0.00016618033963166707, "loss": 0.0226, "step": 9830 }, { "epoch": 1.3778556412053258, "grad_norm": 1.7612367868423462, "learning_rate": 0.0001661659889978474, "loss": 0.0644, "step": 9831 }, { "epoch": 1.3779957953749125, "grad_norm": 1.447753667831421, "learning_rate": 0.00016615163836402772, "loss": 0.1274, "step": 9832 }, { "epoch": 1.378135949544499, "grad_norm": 3.34128737449646, "learning_rate": 0.00016613728773020808, "loss": 0.2345, "step": 9833 }, { "epoch": 1.3782761037140854, "grad_norm": 1.8495852947235107, "learning_rate": 0.0001661229370963884, "loss": 0.2408, "step": 9834 }, { "epoch": 1.378416257883672, "grad_norm": 0.27958783507347107, "learning_rate": 0.00016610858646256874, "loss": 0.0305, "step": 9835 }, { "epoch": 1.3785564120532585, "grad_norm": 0.20330193638801575, "learning_rate": 0.0001660942358287491, "loss": 0.0303, "step": 9836 }, { "epoch": 1.3786965662228452, "grad_norm": 0.18050719797611237, "learning_rate": 0.00016607988519492942, "loss": 0.0406, "step": 9837 }, { "epoch": 1.3788367203924317, "grad_norm": 0.12246309220790863, "learning_rate": 0.00016606553456110975, "loss": 0.0438, "step": 9838 }, { "epoch": 1.3789768745620181, "grad_norm": 0.1493966430425644, "learning_rate": 0.00016605118392729013, "loss": 0.0226, "step": 9839 }, { "epoch": 1.3791170287316048, "grad_norm": 0.3623847961425781, "learning_rate": 0.00016603683329347046, "loss": 0.0421, "step": 9840 }, { "epoch": 1.3792571829011913, "grad_norm": 0.29569578170776367, "learning_rate": 0.0001660224826596508, "loss": 0.0472, "step": 9841 }, { "epoch": 1.379397337070778, "grad_norm": 0.3124988377094269, "learning_rate": 0.00016600813202583115, "loss": 0.0834, "step": 9842 }, { "epoch": 1.3795374912403644, "grad_norm": 0.41684073209762573, "learning_rate": 0.00016599378139201147, "loss": 0.0954, "step": 9843 }, { "epoch": 1.3796776454099509, "grad_norm": 0.3260037899017334, "learning_rate": 0.0001659794307581918, "loss": 0.0386, "step": 9844 }, { "epoch": 1.3798177995795375, "grad_norm": 0.7677415609359741, "learning_rate": 0.00016596508012437213, "loss": 0.0595, "step": 9845 }, { "epoch": 1.379957953749124, "grad_norm": 0.2738877236843109, "learning_rate": 0.0001659507294905525, "loss": 0.0729, "step": 9846 }, { "epoch": 1.3800981079187107, "grad_norm": 0.21391017735004425, "learning_rate": 0.00016593637885673282, "loss": 0.0178, "step": 9847 }, { "epoch": 1.3802382620882971, "grad_norm": 0.3169548213481903, "learning_rate": 0.00016592202822291314, "loss": 0.0596, "step": 9848 }, { "epoch": 1.3803784162578836, "grad_norm": 0.34406572580337524, "learning_rate": 0.0001659076775890935, "loss": 0.1025, "step": 9849 }, { "epoch": 1.3805185704274703, "grad_norm": 0.2814118564128876, "learning_rate": 0.00016589332695527386, "loss": 0.0707, "step": 9850 }, { "epoch": 1.3806587245970567, "grad_norm": 0.2682029902935028, "learning_rate": 0.00016587897632145418, "loss": 0.0863, "step": 9851 }, { "epoch": 1.3807988787666434, "grad_norm": 0.543023943901062, "learning_rate": 0.00016586462568763454, "loss": 0.0801, "step": 9852 }, { "epoch": 1.3809390329362299, "grad_norm": 0.22751758992671967, "learning_rate": 0.00016585027505381487, "loss": 0.0338, "step": 9853 }, { "epoch": 1.3810791871058163, "grad_norm": 0.12478634715080261, "learning_rate": 0.0001658359244199952, "loss": 0.0157, "step": 9854 }, { "epoch": 1.381219341275403, "grad_norm": 0.3831522762775421, "learning_rate": 0.00016582157378617555, "loss": 0.074, "step": 9855 }, { "epoch": 1.3813594954449895, "grad_norm": 0.3365626037120819, "learning_rate": 0.00016580722315235588, "loss": 0.056, "step": 9856 }, { "epoch": 1.3814996496145762, "grad_norm": 0.452978253364563, "learning_rate": 0.0001657928725185362, "loss": 0.0729, "step": 9857 }, { "epoch": 1.3816398037841626, "grad_norm": 0.11168167740106583, "learning_rate": 0.00016577852188471656, "loss": 0.0219, "step": 9858 }, { "epoch": 1.381779957953749, "grad_norm": 0.38828563690185547, "learning_rate": 0.0001657641712508969, "loss": 0.1007, "step": 9859 }, { "epoch": 1.3819201121233355, "grad_norm": 0.49741503596305847, "learning_rate": 0.00016574982061707722, "loss": 0.0553, "step": 9860 }, { "epoch": 1.3820602662929222, "grad_norm": 0.39293739199638367, "learning_rate": 0.00016573546998325755, "loss": 0.0283, "step": 9861 }, { "epoch": 1.3822004204625087, "grad_norm": 0.7633966207504272, "learning_rate": 0.00016572111934943793, "loss": 0.063, "step": 9862 }, { "epoch": 1.3823405746320954, "grad_norm": 0.18341323733329773, "learning_rate": 0.00016570676871561826, "loss": 0.0399, "step": 9863 }, { "epoch": 1.3824807288016818, "grad_norm": 0.16444343328475952, "learning_rate": 0.0001656924180817986, "loss": 0.059, "step": 9864 }, { "epoch": 1.3826208829712683, "grad_norm": 0.6269795298576355, "learning_rate": 0.00016567806744797895, "loss": 0.1135, "step": 9865 }, { "epoch": 1.382761037140855, "grad_norm": 1.1129467487335205, "learning_rate": 0.00016566371681415927, "loss": 0.069, "step": 9866 }, { "epoch": 1.3829011913104414, "grad_norm": 0.3726169168949127, "learning_rate": 0.0001656493661803396, "loss": 0.0789, "step": 9867 }, { "epoch": 1.383041345480028, "grad_norm": 0.3132324814796448, "learning_rate": 0.00016563501554651996, "loss": 0.1302, "step": 9868 }, { "epoch": 1.3831814996496146, "grad_norm": 0.4091932773590088, "learning_rate": 0.0001656206649127003, "loss": 0.0997, "step": 9869 }, { "epoch": 1.383321653819201, "grad_norm": 0.34533199667930603, "learning_rate": 0.00016560631427888062, "loss": 0.0516, "step": 9870 }, { "epoch": 1.3834618079887877, "grad_norm": 0.3048110902309418, "learning_rate": 0.000165591963645061, "loss": 0.0337, "step": 9871 }, { "epoch": 1.3836019621583742, "grad_norm": 0.46481406688690186, "learning_rate": 0.00016557761301124133, "loss": 0.0624, "step": 9872 }, { "epoch": 1.3837421163279608, "grad_norm": 0.3779941201210022, "learning_rate": 0.00016556326237742166, "loss": 0.0622, "step": 9873 }, { "epoch": 1.3838822704975473, "grad_norm": 0.3981720209121704, "learning_rate": 0.000165548911743602, "loss": 0.0451, "step": 9874 }, { "epoch": 1.3840224246671338, "grad_norm": 0.4253818690776825, "learning_rate": 0.00016553456110978234, "loss": 0.0589, "step": 9875 }, { "epoch": 1.3841625788367204, "grad_norm": 0.1879155933856964, "learning_rate": 0.00016552021047596267, "loss": 0.0242, "step": 9876 }, { "epoch": 1.384302733006307, "grad_norm": 0.4387842118740082, "learning_rate": 0.00016550585984214302, "loss": 0.0472, "step": 9877 }, { "epoch": 1.3844428871758936, "grad_norm": 0.6361129879951477, "learning_rate": 0.00016549150920832335, "loss": 0.0558, "step": 9878 }, { "epoch": 1.38458304134548, "grad_norm": 0.6603955626487732, "learning_rate": 0.00016547715857450368, "loss": 0.0671, "step": 9879 }, { "epoch": 1.3847231955150665, "grad_norm": 0.5385135412216187, "learning_rate": 0.000165462807940684, "loss": 0.1675, "step": 9880 }, { "epoch": 1.3848633496846532, "grad_norm": 0.7110384106636047, "learning_rate": 0.0001654484573068644, "loss": 0.0427, "step": 9881 }, { "epoch": 1.3850035038542396, "grad_norm": 0.4004608690738678, "learning_rate": 0.00016543410667304472, "loss": 0.0692, "step": 9882 }, { "epoch": 1.3851436580238263, "grad_norm": 0.5768249034881592, "learning_rate": 0.00016541975603922505, "loss": 0.0422, "step": 9883 }, { "epoch": 1.3852838121934128, "grad_norm": 2.5120275020599365, "learning_rate": 0.0001654054054054054, "loss": 0.2715, "step": 9884 }, { "epoch": 1.3854239663629992, "grad_norm": 1.6612597703933716, "learning_rate": 0.00016539105477158573, "loss": 0.1563, "step": 9885 }, { "epoch": 1.3855641205325857, "grad_norm": 0.13905218243598938, "learning_rate": 0.00016537670413776606, "loss": 0.0287, "step": 9886 }, { "epoch": 1.3857042747021724, "grad_norm": 0.26785966753959656, "learning_rate": 0.00016536235350394642, "loss": 0.0991, "step": 9887 }, { "epoch": 1.385844428871759, "grad_norm": 0.30881965160369873, "learning_rate": 0.00016534800287012675, "loss": 0.1044, "step": 9888 }, { "epoch": 1.3859845830413455, "grad_norm": 0.769124448299408, "learning_rate": 0.00016533365223630708, "loss": 0.1158, "step": 9889 }, { "epoch": 1.386124737210932, "grad_norm": 0.34987568855285645, "learning_rate": 0.00016531930160248743, "loss": 0.0613, "step": 9890 }, { "epoch": 1.3862648913805184, "grad_norm": 0.17850933969020844, "learning_rate": 0.00016530495096866776, "loss": 0.0396, "step": 9891 }, { "epoch": 1.3864050455501051, "grad_norm": 0.13544943928718567, "learning_rate": 0.0001652906003348481, "loss": 0.0431, "step": 9892 }, { "epoch": 1.3865451997196916, "grad_norm": 0.1979440599679947, "learning_rate": 0.00016527624970102847, "loss": 0.0396, "step": 9893 }, { "epoch": 1.3866853538892783, "grad_norm": 0.1560634821653366, "learning_rate": 0.0001652618990672088, "loss": 0.0227, "step": 9894 }, { "epoch": 1.3868255080588647, "grad_norm": 0.1894182562828064, "learning_rate": 0.00016524754843338913, "loss": 0.0325, "step": 9895 }, { "epoch": 1.3869656622284512, "grad_norm": 0.22193019092082977, "learning_rate": 0.00016523319779956946, "loss": 0.056, "step": 9896 }, { "epoch": 1.3871058163980379, "grad_norm": 0.1935570389032364, "learning_rate": 0.0001652188471657498, "loss": 0.0541, "step": 9897 }, { "epoch": 1.3872459705676243, "grad_norm": 0.1711646318435669, "learning_rate": 0.00016520449653193014, "loss": 0.023, "step": 9898 }, { "epoch": 1.387386124737211, "grad_norm": 0.19555114209651947, "learning_rate": 0.00016519014589811047, "loss": 0.0131, "step": 9899 }, { "epoch": 1.3875262789067975, "grad_norm": 0.4711863398551941, "learning_rate": 0.00016517579526429083, "loss": 0.0373, "step": 9900 }, { "epoch": 1.387666433076384, "grad_norm": 0.22323556244373322, "learning_rate": 0.00016516144463047115, "loss": 0.0476, "step": 9901 }, { "epoch": 1.3878065872459706, "grad_norm": 0.4431634247303009, "learning_rate": 0.00016514709399665148, "loss": 0.0562, "step": 9902 }, { "epoch": 1.387946741415557, "grad_norm": 0.26687121391296387, "learning_rate": 0.00016513274336283187, "loss": 0.1016, "step": 9903 }, { "epoch": 1.3880868955851438, "grad_norm": 0.09746091067790985, "learning_rate": 0.0001651183927290122, "loss": 0.0162, "step": 9904 }, { "epoch": 1.3882270497547302, "grad_norm": 0.30017802119255066, "learning_rate": 0.00016510404209519252, "loss": 0.0566, "step": 9905 }, { "epoch": 1.3883672039243167, "grad_norm": 0.2457084208726883, "learning_rate": 0.00016508969146137288, "loss": 0.0323, "step": 9906 }, { "epoch": 1.3885073580939034, "grad_norm": 0.42852237820625305, "learning_rate": 0.0001650753408275532, "loss": 0.0782, "step": 9907 }, { "epoch": 1.3886475122634898, "grad_norm": 0.19700708985328674, "learning_rate": 0.00016506099019373353, "loss": 0.0125, "step": 9908 }, { "epoch": 1.3887876664330765, "grad_norm": 0.3940353989601135, "learning_rate": 0.0001650466395599139, "loss": 0.0567, "step": 9909 }, { "epoch": 1.388927820602663, "grad_norm": 0.5056464076042175, "learning_rate": 0.00016503228892609422, "loss": 0.0503, "step": 9910 }, { "epoch": 1.3890679747722494, "grad_norm": 0.1690114587545395, "learning_rate": 0.00016501793829227455, "loss": 0.0135, "step": 9911 }, { "epoch": 1.389208128941836, "grad_norm": 0.058540597558021545, "learning_rate": 0.0001650035876584549, "loss": 0.0053, "step": 9912 }, { "epoch": 1.3893482831114226, "grad_norm": 0.17253154516220093, "learning_rate": 0.00016498923702463526, "loss": 0.0354, "step": 9913 }, { "epoch": 1.3894884372810092, "grad_norm": 0.6110184192657471, "learning_rate": 0.0001649748863908156, "loss": 0.0412, "step": 9914 }, { "epoch": 1.3896285914505957, "grad_norm": 1.0733479261398315, "learning_rate": 0.00016496053575699592, "loss": 0.0678, "step": 9915 }, { "epoch": 1.3897687456201822, "grad_norm": 0.37653848528862, "learning_rate": 0.00016494618512317627, "loss": 0.0479, "step": 9916 }, { "epoch": 1.3899088997897686, "grad_norm": 0.4403993487358093, "learning_rate": 0.0001649318344893566, "loss": 0.0755, "step": 9917 }, { "epoch": 1.3900490539593553, "grad_norm": 0.11352758854627609, "learning_rate": 0.00016491748385553693, "loss": 0.048, "step": 9918 }, { "epoch": 1.3901892081289418, "grad_norm": 0.3408459424972534, "learning_rate": 0.00016490313322171728, "loss": 0.0239, "step": 9919 }, { "epoch": 1.3903293622985284, "grad_norm": 0.31039145588874817, "learning_rate": 0.0001648887825878976, "loss": 0.0714, "step": 9920 }, { "epoch": 1.390469516468115, "grad_norm": 0.5289487838745117, "learning_rate": 0.00016487443195407794, "loss": 0.0429, "step": 9921 }, { "epoch": 1.3906096706377014, "grad_norm": 0.258137971162796, "learning_rate": 0.0001648600813202583, "loss": 0.033, "step": 9922 }, { "epoch": 1.390749824807288, "grad_norm": 0.3090880215167999, "learning_rate": 0.00016484573068643863, "loss": 0.0334, "step": 9923 }, { "epoch": 1.3908899789768745, "grad_norm": 0.21065670251846313, "learning_rate": 0.00016483138005261895, "loss": 0.0153, "step": 9924 }, { "epoch": 1.3910301331464612, "grad_norm": 1.2045516967773438, "learning_rate": 0.00016481702941879934, "loss": 0.1021, "step": 9925 }, { "epoch": 1.3911702873160476, "grad_norm": 0.22760462760925293, "learning_rate": 0.00016480267878497967, "loss": 0.0816, "step": 9926 }, { "epoch": 1.391310441485634, "grad_norm": 0.8216742873191833, "learning_rate": 0.00016478832815116, "loss": 0.0496, "step": 9927 }, { "epoch": 1.3914505956552208, "grad_norm": 1.1841371059417725, "learning_rate": 0.00016477397751734035, "loss": 0.1763, "step": 9928 }, { "epoch": 1.3915907498248072, "grad_norm": 0.44436272978782654, "learning_rate": 0.00016475962688352068, "loss": 0.1548, "step": 9929 }, { "epoch": 1.391730903994394, "grad_norm": 0.4918588399887085, "learning_rate": 0.000164745276249701, "loss": 0.0922, "step": 9930 }, { "epoch": 1.3918710581639804, "grad_norm": 0.34305915236473083, "learning_rate": 0.00016473092561588134, "loss": 0.0327, "step": 9931 }, { "epoch": 1.3920112123335668, "grad_norm": 0.19560088217258453, "learning_rate": 0.0001647165749820617, "loss": 0.0179, "step": 9932 }, { "epoch": 1.3921513665031535, "grad_norm": 1.6035668849945068, "learning_rate": 0.00016470222434824202, "loss": 0.1782, "step": 9933 }, { "epoch": 1.39229152067274, "grad_norm": 0.9499816298484802, "learning_rate": 0.00016468787371442235, "loss": 0.0385, "step": 9934 }, { "epoch": 1.3924316748423267, "grad_norm": 1.1299548149108887, "learning_rate": 0.00016467352308060273, "loss": 0.0798, "step": 9935 }, { "epoch": 1.3925718290119131, "grad_norm": 0.16624300181865692, "learning_rate": 0.00016465917244678306, "loss": 0.0426, "step": 9936 }, { "epoch": 1.3927119831814996, "grad_norm": 0.2523902356624603, "learning_rate": 0.0001646448218129634, "loss": 0.0387, "step": 9937 }, { "epoch": 1.3928521373510863, "grad_norm": 0.11820819228887558, "learning_rate": 0.00016463047117914374, "loss": 0.0239, "step": 9938 }, { "epoch": 1.3929922915206727, "grad_norm": 0.45123013854026794, "learning_rate": 0.00016461612054532407, "loss": 0.0516, "step": 9939 }, { "epoch": 1.3931324456902594, "grad_norm": 0.3315385580062866, "learning_rate": 0.0001646017699115044, "loss": 0.0537, "step": 9940 }, { "epoch": 1.3932725998598459, "grad_norm": 0.2899155914783478, "learning_rate": 0.00016458741927768476, "loss": 0.0333, "step": 9941 }, { "epoch": 1.3934127540294323, "grad_norm": 0.503903329372406, "learning_rate": 0.00016457306864386509, "loss": 0.0195, "step": 9942 }, { "epoch": 1.393552908199019, "grad_norm": 0.3337000906467438, "learning_rate": 0.00016455871801004541, "loss": 0.0668, "step": 9943 }, { "epoch": 1.3936930623686055, "grad_norm": 0.28747814893722534, "learning_rate": 0.00016454436737622577, "loss": 0.0257, "step": 9944 }, { "epoch": 1.3938332165381921, "grad_norm": 0.3226398527622223, "learning_rate": 0.00016453001674240613, "loss": 0.053, "step": 9945 }, { "epoch": 1.3939733707077786, "grad_norm": 0.08775278180837631, "learning_rate": 0.00016451566610858645, "loss": 0.0169, "step": 9946 }, { "epoch": 1.394113524877365, "grad_norm": 0.40490493178367615, "learning_rate": 0.0001645013154747668, "loss": 0.0403, "step": 9947 }, { "epoch": 1.3942536790469515, "grad_norm": 0.2999856472015381, "learning_rate": 0.00016448696484094714, "loss": 0.0298, "step": 9948 }, { "epoch": 1.3943938332165382, "grad_norm": 0.32002151012420654, "learning_rate": 0.00016447261420712747, "loss": 0.0349, "step": 9949 }, { "epoch": 1.3945339873861247, "grad_norm": 0.3044469356536865, "learning_rate": 0.0001644582635733078, "loss": 0.0575, "step": 9950 }, { "epoch": 1.3946741415557113, "grad_norm": 0.3892817199230194, "learning_rate": 0.00016444391293948815, "loss": 0.0743, "step": 9951 }, { "epoch": 1.3948142957252978, "grad_norm": 0.16460630297660828, "learning_rate": 0.00016442956230566848, "loss": 0.0112, "step": 9952 }, { "epoch": 1.3949544498948843, "grad_norm": 0.2842039167881012, "learning_rate": 0.0001644152116718488, "loss": 0.0815, "step": 9953 }, { "epoch": 1.395094604064471, "grad_norm": 0.29906436800956726, "learning_rate": 0.00016440086103802916, "loss": 0.0288, "step": 9954 }, { "epoch": 1.3952347582340574, "grad_norm": 0.23643511533737183, "learning_rate": 0.0001643865104042095, "loss": 0.0418, "step": 9955 }, { "epoch": 1.395374912403644, "grad_norm": 0.28124886751174927, "learning_rate": 0.00016437215977038982, "loss": 0.0631, "step": 9956 }, { "epoch": 1.3955150665732305, "grad_norm": 0.16888119280338287, "learning_rate": 0.0001643578091365702, "loss": 0.0201, "step": 9957 }, { "epoch": 1.395655220742817, "grad_norm": 0.7826975584030151, "learning_rate": 0.00016434345850275053, "loss": 0.0539, "step": 9958 }, { "epoch": 1.3957953749124037, "grad_norm": 0.7242889404296875, "learning_rate": 0.00016432910786893086, "loss": 0.1006, "step": 9959 }, { "epoch": 1.3959355290819901, "grad_norm": 0.2995014190673828, "learning_rate": 0.00016431475723511122, "loss": 0.0963, "step": 9960 }, { "epoch": 1.3960756832515768, "grad_norm": 0.18630144000053406, "learning_rate": 0.00016430040660129154, "loss": 0.0276, "step": 9961 }, { "epoch": 1.3962158374211633, "grad_norm": 0.9215060472488403, "learning_rate": 0.00016428605596747187, "loss": 0.0494, "step": 9962 }, { "epoch": 1.3963559915907497, "grad_norm": 0.4581790566444397, "learning_rate": 0.00016427170533365223, "loss": 0.0782, "step": 9963 }, { "epoch": 1.3964961457603364, "grad_norm": 0.6915099024772644, "learning_rate": 0.00016425735469983256, "loss": 0.0374, "step": 9964 }, { "epoch": 1.3966362999299229, "grad_norm": 0.4167921245098114, "learning_rate": 0.00016424300406601289, "loss": 0.0862, "step": 9965 }, { "epoch": 1.3967764540995096, "grad_norm": 0.21781854331493378, "learning_rate": 0.00016422865343219321, "loss": 0.0152, "step": 9966 }, { "epoch": 1.396916608269096, "grad_norm": 0.18379028141498566, "learning_rate": 0.0001642143027983736, "loss": 0.0421, "step": 9967 }, { "epoch": 1.3970567624386825, "grad_norm": 0.2734959125518799, "learning_rate": 0.00016419995216455393, "loss": 0.0414, "step": 9968 }, { "epoch": 1.3971969166082692, "grad_norm": 0.5780998468399048, "learning_rate": 0.00016418560153073425, "loss": 0.0978, "step": 9969 }, { "epoch": 1.3973370707778556, "grad_norm": 0.41190165281295776, "learning_rate": 0.0001641712508969146, "loss": 0.0499, "step": 9970 }, { "epoch": 1.3974772249474423, "grad_norm": 0.36172229051589966, "learning_rate": 0.00016415690026309494, "loss": 0.0414, "step": 9971 }, { "epoch": 1.3976173791170288, "grad_norm": 0.2094537913799286, "learning_rate": 0.00016414254962927527, "loss": 0.0272, "step": 9972 }, { "epoch": 1.3977575332866152, "grad_norm": 0.1513061672449112, "learning_rate": 0.00016412819899545562, "loss": 0.0111, "step": 9973 }, { "epoch": 1.3978976874562017, "grad_norm": 0.40582114458084106, "learning_rate": 0.00016411384836163595, "loss": 0.0763, "step": 9974 }, { "epoch": 1.3980378416257884, "grad_norm": 0.456534206867218, "learning_rate": 0.00016409949772781628, "loss": 0.0761, "step": 9975 }, { "epoch": 1.398177995795375, "grad_norm": 0.2686010003089905, "learning_rate": 0.00016408514709399664, "loss": 0.0342, "step": 9976 }, { "epoch": 1.3983181499649615, "grad_norm": 0.4605688452720642, "learning_rate": 0.000164070796460177, "loss": 0.0509, "step": 9977 }, { "epoch": 1.398458304134548, "grad_norm": 0.26086583733558655, "learning_rate": 0.00016405644582635732, "loss": 0.043, "step": 9978 }, { "epoch": 1.3985984583041344, "grad_norm": 0.1798989325761795, "learning_rate": 0.00016404209519253768, "loss": 0.0291, "step": 9979 }, { "epoch": 1.398738612473721, "grad_norm": 0.27406424283981323, "learning_rate": 0.000164027744558718, "loss": 0.0292, "step": 9980 }, { "epoch": 1.3988787666433076, "grad_norm": 0.6544710993766785, "learning_rate": 0.00016401339392489833, "loss": 0.0131, "step": 9981 }, { "epoch": 1.3990189208128943, "grad_norm": 1.0445932149887085, "learning_rate": 0.0001639990432910787, "loss": 0.1203, "step": 9982 }, { "epoch": 1.3991590749824807, "grad_norm": 0.4544623792171478, "learning_rate": 0.00016398469265725902, "loss": 0.0503, "step": 9983 }, { "epoch": 1.3992992291520672, "grad_norm": 0.6019479036331177, "learning_rate": 0.00016397034202343935, "loss": 0.032, "step": 9984 }, { "epoch": 1.3994393833216539, "grad_norm": 1.3350580930709839, "learning_rate": 0.00016395599138961967, "loss": 0.1664, "step": 9985 }, { "epoch": 1.3995795374912403, "grad_norm": 0.1536942422389984, "learning_rate": 0.00016394164075580003, "loss": 0.0228, "step": 9986 }, { "epoch": 1.399719691660827, "grad_norm": 0.35316482186317444, "learning_rate": 0.00016392729012198036, "loss": 0.0256, "step": 9987 }, { "epoch": 1.3998598458304135, "grad_norm": 0.1834089756011963, "learning_rate": 0.0001639129394881607, "loss": 0.0161, "step": 9988 }, { "epoch": 1.4, "grad_norm": 0.4867200553417206, "learning_rate": 0.00016389858885434107, "loss": 0.0732, "step": 9989 }, { "epoch": 1.4001401541695866, "grad_norm": 0.26918044686317444, "learning_rate": 0.0001638842382205214, "loss": 0.0245, "step": 9990 }, { "epoch": 1.400280308339173, "grad_norm": 0.1925928294658661, "learning_rate": 0.00016386988758670173, "loss": 0.0199, "step": 9991 }, { "epoch": 1.4004204625087597, "grad_norm": 0.5353230237960815, "learning_rate": 0.00016385553695288208, "loss": 0.0599, "step": 9992 }, { "epoch": 1.4005606166783462, "grad_norm": 1.0651512145996094, "learning_rate": 0.0001638411863190624, "loss": 0.053, "step": 9993 }, { "epoch": 1.4007007708479327, "grad_norm": 0.31378835439682007, "learning_rate": 0.00016382683568524274, "loss": 0.0547, "step": 9994 }, { "epoch": 1.4008409250175193, "grad_norm": 0.23990264534950256, "learning_rate": 0.0001638124850514231, "loss": 0.0441, "step": 9995 }, { "epoch": 1.4009810791871058, "grad_norm": 0.538273811340332, "learning_rate": 0.00016379813441760342, "loss": 0.0667, "step": 9996 }, { "epoch": 1.4011212333566925, "grad_norm": 0.4067722260951996, "learning_rate": 0.00016378378378378375, "loss": 0.0829, "step": 9997 }, { "epoch": 1.401261387526279, "grad_norm": 0.26510345935821533, "learning_rate": 0.00016376943314996414, "loss": 0.0667, "step": 9998 }, { "epoch": 1.4014015416958654, "grad_norm": 0.3448517918586731, "learning_rate": 0.00016375508251614446, "loss": 0.0188, "step": 9999 }, { "epoch": 1.401541695865452, "grad_norm": 0.6394071578979492, "learning_rate": 0.0001637407318823248, "loss": 0.0767, "step": 10000 }, { "epoch": 1.4016818500350385, "grad_norm": 0.36078816652297974, "learning_rate": 0.00016372638124850512, "loss": 0.0373, "step": 10001 }, { "epoch": 1.4018220042046252, "grad_norm": 0.5789733529090881, "learning_rate": 0.00016371203061468548, "loss": 0.0424, "step": 10002 }, { "epoch": 1.4019621583742117, "grad_norm": 0.308187335729599, "learning_rate": 0.0001636976799808658, "loss": 0.0769, "step": 10003 }, { "epoch": 1.4021023125437981, "grad_norm": 0.21963247656822205, "learning_rate": 0.00016368332934704613, "loss": 0.0153, "step": 10004 }, { "epoch": 1.4022424667133846, "grad_norm": 0.13228927552700043, "learning_rate": 0.0001636689787132265, "loss": 0.0064, "step": 10005 }, { "epoch": 1.4023826208829713, "grad_norm": 0.20588622987270355, "learning_rate": 0.00016365462807940682, "loss": 0.0135, "step": 10006 }, { "epoch": 1.4025227750525577, "grad_norm": 0.49017664790153503, "learning_rate": 0.00016364027744558715, "loss": 0.0428, "step": 10007 }, { "epoch": 1.4026629292221444, "grad_norm": 0.22865651547908783, "learning_rate": 0.00016362592681176753, "loss": 0.0161, "step": 10008 }, { "epoch": 1.4028030833917309, "grad_norm": 0.17678236961364746, "learning_rate": 0.00016361157617794786, "loss": 0.0231, "step": 10009 }, { "epoch": 1.4029432375613173, "grad_norm": 0.24384216964244843, "learning_rate": 0.0001635972255441282, "loss": 0.0195, "step": 10010 }, { "epoch": 1.403083391730904, "grad_norm": 0.3762162923812866, "learning_rate": 0.00016358287491030854, "loss": 0.0213, "step": 10011 }, { "epoch": 1.4032235459004905, "grad_norm": 0.4076460003852844, "learning_rate": 0.00016356852427648887, "loss": 0.0413, "step": 10012 }, { "epoch": 1.4033637000700772, "grad_norm": 0.5227800011634827, "learning_rate": 0.0001635541736426692, "loss": 0.054, "step": 10013 }, { "epoch": 1.4035038542396636, "grad_norm": 0.3644075393676758, "learning_rate": 0.00016353982300884955, "loss": 0.042, "step": 10014 }, { "epoch": 1.40364400840925, "grad_norm": 0.3115498423576355, "learning_rate": 0.00016352547237502988, "loss": 0.0642, "step": 10015 }, { "epoch": 1.4037841625788368, "grad_norm": 0.3390819728374481, "learning_rate": 0.0001635111217412102, "loss": 0.0405, "step": 10016 }, { "epoch": 1.4039243167484232, "grad_norm": 0.3525443971157074, "learning_rate": 0.00016349677110739057, "loss": 0.0478, "step": 10017 }, { "epoch": 1.40406447091801, "grad_norm": 0.3489852547645569, "learning_rate": 0.0001634824204735709, "loss": 0.0905, "step": 10018 }, { "epoch": 1.4042046250875964, "grad_norm": 0.4071124196052551, "learning_rate": 0.00016346806983975122, "loss": 0.0609, "step": 10019 }, { "epoch": 1.4043447792571828, "grad_norm": 0.15567345917224884, "learning_rate": 0.00016345371920593155, "loss": 0.0509, "step": 10020 }, { "epoch": 1.4044849334267695, "grad_norm": 0.062040187418460846, "learning_rate": 0.00016343936857211194, "loss": 0.0079, "step": 10021 }, { "epoch": 1.404625087596356, "grad_norm": 0.3235335350036621, "learning_rate": 0.00016342501793829226, "loss": 0.0341, "step": 10022 }, { "epoch": 1.4047652417659426, "grad_norm": 0.5629802346229553, "learning_rate": 0.0001634106673044726, "loss": 0.0875, "step": 10023 }, { "epoch": 1.404905395935529, "grad_norm": 0.42549726366996765, "learning_rate": 0.00016339631667065295, "loss": 0.0545, "step": 10024 }, { "epoch": 1.4050455501051156, "grad_norm": 0.4296789765357971, "learning_rate": 0.00016338196603683328, "loss": 0.1332, "step": 10025 }, { "epoch": 1.4051857042747022, "grad_norm": 0.4154438376426697, "learning_rate": 0.0001633676154030136, "loss": 0.066, "step": 10026 }, { "epoch": 1.4053258584442887, "grad_norm": 0.8033463358879089, "learning_rate": 0.00016335326476919396, "loss": 0.0661, "step": 10027 }, { "epoch": 1.4054660126138754, "grad_norm": 0.23800277709960938, "learning_rate": 0.0001633389141353743, "loss": 0.0463, "step": 10028 }, { "epoch": 1.4056061667834618, "grad_norm": 1.1812933683395386, "learning_rate": 0.00016332456350155462, "loss": 0.0678, "step": 10029 }, { "epoch": 1.4057463209530483, "grad_norm": 1.805396556854248, "learning_rate": 0.000163310212867735, "loss": 0.073, "step": 10030 }, { "epoch": 1.4058864751226348, "grad_norm": 0.525574266910553, "learning_rate": 0.00016329586223391533, "loss": 0.0885, "step": 10031 }, { "epoch": 1.4060266292922214, "grad_norm": 0.24835549294948578, "learning_rate": 0.00016328151160009566, "loss": 0.0584, "step": 10032 }, { "epoch": 1.4061667834618081, "grad_norm": 0.3491349220275879, "learning_rate": 0.00016326716096627601, "loss": 0.0255, "step": 10033 }, { "epoch": 1.4063069376313946, "grad_norm": 0.9566276669502258, "learning_rate": 0.00016325281033245634, "loss": 0.1137, "step": 10034 }, { "epoch": 1.406447091800981, "grad_norm": 1.8127440214157104, "learning_rate": 0.00016323845969863667, "loss": 0.1942, "step": 10035 }, { "epoch": 1.4065872459705675, "grad_norm": 0.40884366631507874, "learning_rate": 0.000163224109064817, "loss": 0.0542, "step": 10036 }, { "epoch": 1.4067274001401542, "grad_norm": 0.45456331968307495, "learning_rate": 0.00016320975843099736, "loss": 0.0824, "step": 10037 }, { "epoch": 1.4068675543097406, "grad_norm": 0.5281078219413757, "learning_rate": 0.00016319540779717768, "loss": 0.0659, "step": 10038 }, { "epoch": 1.4070077084793273, "grad_norm": 0.07686958461999893, "learning_rate": 0.000163181057163358, "loss": 0.0109, "step": 10039 }, { "epoch": 1.4071478626489138, "grad_norm": 0.2552245855331421, "learning_rate": 0.0001631667065295384, "loss": 0.0527, "step": 10040 }, { "epoch": 1.4072880168185002, "grad_norm": 0.31303641200065613, "learning_rate": 0.00016315235589571872, "loss": 0.057, "step": 10041 }, { "epoch": 1.407428170988087, "grad_norm": 0.26873889565467834, "learning_rate": 0.00016313800526189905, "loss": 0.0371, "step": 10042 }, { "epoch": 1.4075683251576734, "grad_norm": 0.22019919753074646, "learning_rate": 0.0001631236546280794, "loss": 0.0303, "step": 10043 }, { "epoch": 1.40770847932726, "grad_norm": 0.49361923336982727, "learning_rate": 0.00016310930399425974, "loss": 0.0486, "step": 10044 }, { "epoch": 1.4078486334968465, "grad_norm": 0.19308622181415558, "learning_rate": 0.00016309495336044007, "loss": 0.0395, "step": 10045 }, { "epoch": 1.407988787666433, "grad_norm": 0.09779738634824753, "learning_rate": 0.00016308060272662042, "loss": 0.0134, "step": 10046 }, { "epoch": 1.4081289418360197, "grad_norm": 0.3088248372077942, "learning_rate": 0.00016306625209280075, "loss": 0.0259, "step": 10047 }, { "epoch": 1.4082690960056061, "grad_norm": 0.17860445380210876, "learning_rate": 0.00016305190145898108, "loss": 0.0301, "step": 10048 }, { "epoch": 1.4084092501751928, "grad_norm": 0.359048455953598, "learning_rate": 0.00016303755082516143, "loss": 0.0317, "step": 10049 }, { "epoch": 1.4085494043447793, "grad_norm": 0.33246049284935, "learning_rate": 0.00016302320019134176, "loss": 0.0673, "step": 10050 }, { "epoch": 1.4086895585143657, "grad_norm": 0.11317368596792221, "learning_rate": 0.0001630088495575221, "loss": 0.014, "step": 10051 }, { "epoch": 1.4088297126839524, "grad_norm": 0.530755341053009, "learning_rate": 0.00016299449892370247, "loss": 0.0703, "step": 10052 }, { "epoch": 1.4089698668535389, "grad_norm": 0.3518270254135132, "learning_rate": 0.0001629801482898828, "loss": 0.0581, "step": 10053 }, { "epoch": 1.4091100210231255, "grad_norm": 0.13017380237579346, "learning_rate": 0.00016296579765606313, "loss": 0.0124, "step": 10054 }, { "epoch": 1.409250175192712, "grad_norm": 0.3444695472717285, "learning_rate": 0.00016295144702224346, "loss": 0.0429, "step": 10055 }, { "epoch": 1.4093903293622985, "grad_norm": 0.4072650969028473, "learning_rate": 0.00016293709638842382, "loss": 0.0347, "step": 10056 }, { "epoch": 1.4095304835318851, "grad_norm": 0.04853210225701332, "learning_rate": 0.00016292274575460414, "loss": 0.0043, "step": 10057 }, { "epoch": 1.4096706377014716, "grad_norm": 0.1448264718055725, "learning_rate": 0.00016290839512078447, "loss": 0.0177, "step": 10058 }, { "epoch": 1.4098107918710583, "grad_norm": 0.3599625825881958, "learning_rate": 0.00016289404448696483, "loss": 0.0445, "step": 10059 }, { "epoch": 1.4099509460406447, "grad_norm": 0.29080915451049805, "learning_rate": 0.00016287969385314516, "loss": 0.0864, "step": 10060 }, { "epoch": 1.4100911002102312, "grad_norm": 0.1319163590669632, "learning_rate": 0.00016286534321932549, "loss": 0.0293, "step": 10061 }, { "epoch": 1.4102312543798177, "grad_norm": 0.21962696313858032, "learning_rate": 0.00016285099258550587, "loss": 0.0247, "step": 10062 }, { "epoch": 1.4103714085494043, "grad_norm": 0.25393569469451904, "learning_rate": 0.0001628366419516862, "loss": 0.024, "step": 10063 }, { "epoch": 1.4105115627189908, "grad_norm": 0.732363224029541, "learning_rate": 0.00016282229131786653, "loss": 0.0867, "step": 10064 }, { "epoch": 1.4106517168885775, "grad_norm": 0.31279635429382324, "learning_rate": 0.00016280794068404688, "loss": 0.0456, "step": 10065 }, { "epoch": 1.410791871058164, "grad_norm": 0.45657625794410706, "learning_rate": 0.0001627935900502272, "loss": 0.1078, "step": 10066 }, { "epoch": 1.4109320252277504, "grad_norm": 0.48153361678123474, "learning_rate": 0.00016277923941640754, "loss": 0.1648, "step": 10067 }, { "epoch": 1.411072179397337, "grad_norm": 0.38867172598838806, "learning_rate": 0.0001627648887825879, "loss": 0.0593, "step": 10068 }, { "epoch": 1.4112123335669235, "grad_norm": 0.4148240089416504, "learning_rate": 0.00016275053814876822, "loss": 0.087, "step": 10069 }, { "epoch": 1.4113524877365102, "grad_norm": 0.35816872119903564, "learning_rate": 0.00016273618751494855, "loss": 0.0121, "step": 10070 }, { "epoch": 1.4114926419060967, "grad_norm": 0.28633856773376465, "learning_rate": 0.00016272183688112888, "loss": 0.0375, "step": 10071 }, { "epoch": 1.4116327960756831, "grad_norm": 0.4820577800273895, "learning_rate": 0.00016270748624730926, "loss": 0.0477, "step": 10072 }, { "epoch": 1.4117729502452698, "grad_norm": 0.42785048484802246, "learning_rate": 0.0001626931356134896, "loss": 0.0428, "step": 10073 }, { "epoch": 1.4119131044148563, "grad_norm": 0.15436211228370667, "learning_rate": 0.00016267878497966992, "loss": 0.0342, "step": 10074 }, { "epoch": 1.412053258584443, "grad_norm": 0.42595529556274414, "learning_rate": 0.00016266443434585027, "loss": 0.0294, "step": 10075 }, { "epoch": 1.4121934127540294, "grad_norm": 0.13147082924842834, "learning_rate": 0.0001626500837120306, "loss": 0.0143, "step": 10076 }, { "epoch": 1.412333566923616, "grad_norm": 0.35984864830970764, "learning_rate": 0.00016263573307821093, "loss": 0.0338, "step": 10077 }, { "epoch": 1.4124737210932026, "grad_norm": 0.5684127807617188, "learning_rate": 0.0001626213824443913, "loss": 0.0789, "step": 10078 }, { "epoch": 1.412613875262789, "grad_norm": 0.44349196553230286, "learning_rate": 0.00016260703181057162, "loss": 0.0422, "step": 10079 }, { "epoch": 1.4127540294323757, "grad_norm": 0.3814984858036041, "learning_rate": 0.00016259268117675194, "loss": 0.0343, "step": 10080 }, { "epoch": 1.4128941836019622, "grad_norm": 0.45309409499168396, "learning_rate": 0.0001625783305429323, "loss": 0.0307, "step": 10081 }, { "epoch": 1.4130343377715486, "grad_norm": 1.0413120985031128, "learning_rate": 0.00016256397990911263, "loss": 0.1161, "step": 10082 }, { "epoch": 1.4131744919411353, "grad_norm": 0.6609210968017578, "learning_rate": 0.00016254962927529296, "loss": 0.0809, "step": 10083 }, { "epoch": 1.4133146461107218, "grad_norm": 0.49330639839172363, "learning_rate": 0.00016253527864147334, "loss": 0.03, "step": 10084 }, { "epoch": 1.4134548002803085, "grad_norm": 2.1410765647888184, "learning_rate": 0.00016252092800765367, "loss": 0.2454, "step": 10085 }, { "epoch": 1.413594954449895, "grad_norm": 0.5847886204719543, "learning_rate": 0.000162506577373834, "loss": 0.059, "step": 10086 }, { "epoch": 1.4137351086194814, "grad_norm": 0.16128098964691162, "learning_rate": 0.00016249222674001435, "loss": 0.0109, "step": 10087 }, { "epoch": 1.413875262789068, "grad_norm": 0.18059886991977692, "learning_rate": 0.00016247787610619468, "loss": 0.0153, "step": 10088 }, { "epoch": 1.4140154169586545, "grad_norm": 0.17081362009048462, "learning_rate": 0.000162463525472375, "loss": 0.0429, "step": 10089 }, { "epoch": 1.4141555711282412, "grad_norm": 0.546936571598053, "learning_rate": 0.00016244917483855534, "loss": 0.0825, "step": 10090 }, { "epoch": 1.4142957252978277, "grad_norm": 0.3452431261539459, "learning_rate": 0.0001624348242047357, "loss": 0.0504, "step": 10091 }, { "epoch": 1.4144358794674141, "grad_norm": 0.30322957038879395, "learning_rate": 0.00016242047357091602, "loss": 0.0403, "step": 10092 }, { "epoch": 1.4145760336370006, "grad_norm": 0.27789390087127686, "learning_rate": 0.00016240612293709635, "loss": 0.0334, "step": 10093 }, { "epoch": 1.4147161878065873, "grad_norm": 0.23856988549232483, "learning_rate": 0.00016239177230327673, "loss": 0.0295, "step": 10094 }, { "epoch": 1.4148563419761737, "grad_norm": 0.32311302423477173, "learning_rate": 0.00016237742166945706, "loss": 0.0573, "step": 10095 }, { "epoch": 1.4149964961457604, "grad_norm": 0.22626614570617676, "learning_rate": 0.0001623630710356374, "loss": 0.0451, "step": 10096 }, { "epoch": 1.4151366503153469, "grad_norm": 1.0516315698623657, "learning_rate": 0.00016234872040181775, "loss": 0.0646, "step": 10097 }, { "epoch": 1.4152768044849333, "grad_norm": 0.243377223610878, "learning_rate": 0.00016233436976799808, "loss": 0.0306, "step": 10098 }, { "epoch": 1.41541695865452, "grad_norm": 0.2931329011917114, "learning_rate": 0.0001623200191341784, "loss": 0.0579, "step": 10099 }, { "epoch": 1.4155571128241065, "grad_norm": 0.30589163303375244, "learning_rate": 0.00016230566850035876, "loss": 0.0263, "step": 10100 }, { "epoch": 1.4156972669936931, "grad_norm": 0.15806232392787933, "learning_rate": 0.0001622913178665391, "loss": 0.0233, "step": 10101 }, { "epoch": 1.4158374211632796, "grad_norm": 0.4234723746776581, "learning_rate": 0.00016227696723271942, "loss": 0.0417, "step": 10102 }, { "epoch": 1.415977575332866, "grad_norm": 0.5165902376174927, "learning_rate": 0.0001622626165988998, "loss": 0.0536, "step": 10103 }, { "epoch": 1.4161177295024527, "grad_norm": 0.16471479833126068, "learning_rate": 0.00016224826596508013, "loss": 0.0225, "step": 10104 }, { "epoch": 1.4162578836720392, "grad_norm": 0.6931836009025574, "learning_rate": 0.00016223391533126046, "loss": 0.112, "step": 10105 }, { "epoch": 1.4163980378416259, "grad_norm": 0.15529198944568634, "learning_rate": 0.00016221956469744079, "loss": 0.024, "step": 10106 }, { "epoch": 1.4165381920112123, "grad_norm": 0.19974516332149506, "learning_rate": 0.00016220521406362114, "loss": 0.0262, "step": 10107 }, { "epoch": 1.4166783461807988, "grad_norm": 0.13752387464046478, "learning_rate": 0.00016219086342980147, "loss": 0.0169, "step": 10108 }, { "epoch": 1.4168185003503855, "grad_norm": 0.5949627161026001, "learning_rate": 0.0001621765127959818, "loss": 0.0533, "step": 10109 }, { "epoch": 1.416958654519972, "grad_norm": 0.17016081511974335, "learning_rate": 0.00016216216216216215, "loss": 0.0118, "step": 10110 }, { "epoch": 1.4170988086895586, "grad_norm": 0.22535526752471924, "learning_rate": 0.00016214781152834248, "loss": 0.0573, "step": 10111 }, { "epoch": 1.417238962859145, "grad_norm": 0.88917076587677, "learning_rate": 0.0001621334608945228, "loss": 0.0894, "step": 10112 }, { "epoch": 1.4173791170287315, "grad_norm": 0.6522378921508789, "learning_rate": 0.00016211911026070317, "loss": 0.0809, "step": 10113 }, { "epoch": 1.4175192711983182, "grad_norm": 0.4226674437522888, "learning_rate": 0.0001621047596268835, "loss": 0.0965, "step": 10114 }, { "epoch": 1.4176594253679047, "grad_norm": 0.09856314957141876, "learning_rate": 0.00016209040899306382, "loss": 0.0118, "step": 10115 }, { "epoch": 1.4177995795374914, "grad_norm": 0.7640262842178345, "learning_rate": 0.0001620760583592442, "loss": 0.069, "step": 10116 }, { "epoch": 1.4179397337070778, "grad_norm": 0.35214075446128845, "learning_rate": 0.00016206170772542454, "loss": 0.0344, "step": 10117 }, { "epoch": 1.4180798878766643, "grad_norm": 0.6715491414070129, "learning_rate": 0.00016204735709160486, "loss": 0.0335, "step": 10118 }, { "epoch": 1.4182200420462507, "grad_norm": 1.1098538637161255, "learning_rate": 0.00016203300645778522, "loss": 0.0823, "step": 10119 }, { "epoch": 1.4183601962158374, "grad_norm": 0.15266643464565277, "learning_rate": 0.00016201865582396555, "loss": 0.029, "step": 10120 }, { "epoch": 1.418500350385424, "grad_norm": 0.290271133184433, "learning_rate": 0.00016200430519014588, "loss": 0.0309, "step": 10121 }, { "epoch": 1.4186405045550106, "grad_norm": 1.9331448078155518, "learning_rate": 0.0001619899545563262, "loss": 0.0382, "step": 10122 }, { "epoch": 1.418780658724597, "grad_norm": 0.13446784019470215, "learning_rate": 0.00016197560392250656, "loss": 0.0404, "step": 10123 }, { "epoch": 1.4189208128941835, "grad_norm": 0.46477991342544556, "learning_rate": 0.0001619612532886869, "loss": 0.0375, "step": 10124 }, { "epoch": 1.4190609670637702, "grad_norm": 0.23926281929016113, "learning_rate": 0.00016194690265486722, "loss": 0.0317, "step": 10125 }, { "epoch": 1.4192011212333566, "grad_norm": 1.2273597717285156, "learning_rate": 0.0001619325520210476, "loss": 0.0739, "step": 10126 }, { "epoch": 1.4193412754029433, "grad_norm": 0.3223104774951935, "learning_rate": 0.00016191820138722793, "loss": 0.0295, "step": 10127 }, { "epoch": 1.4194814295725298, "grad_norm": 0.7063602805137634, "learning_rate": 0.00016190385075340826, "loss": 0.1277, "step": 10128 }, { "epoch": 1.4196215837421162, "grad_norm": 0.31807786226272583, "learning_rate": 0.0001618895001195886, "loss": 0.0715, "step": 10129 }, { "epoch": 1.419761737911703, "grad_norm": 0.4765188694000244, "learning_rate": 0.00016187514948576894, "loss": 0.0871, "step": 10130 }, { "epoch": 1.4199018920812894, "grad_norm": 0.3904831111431122, "learning_rate": 0.00016186079885194927, "loss": 0.0461, "step": 10131 }, { "epoch": 1.420042046250876, "grad_norm": 0.40957337617874146, "learning_rate": 0.00016184644821812963, "loss": 0.0222, "step": 10132 }, { "epoch": 1.4201822004204625, "grad_norm": 2.021178722381592, "learning_rate": 0.00016183209758430995, "loss": 0.225, "step": 10133 }, { "epoch": 1.420322354590049, "grad_norm": 1.0561689138412476, "learning_rate": 0.00016181774695049028, "loss": 0.013, "step": 10134 }, { "epoch": 1.4204625087596356, "grad_norm": 3.7962841987609863, "learning_rate": 0.00016180339631667067, "loss": 0.3033, "step": 10135 }, { "epoch": 1.420602662929222, "grad_norm": 0.2517479956150055, "learning_rate": 0.000161789045682851, "loss": 0.0532, "step": 10136 }, { "epoch": 1.4207428170988088, "grad_norm": 0.23859789967536926, "learning_rate": 0.00016177469504903132, "loss": 0.0729, "step": 10137 }, { "epoch": 1.4208829712683952, "grad_norm": 0.7044009566307068, "learning_rate": 0.00016176034441521168, "loss": 0.102, "step": 10138 }, { "epoch": 1.4210231254379817, "grad_norm": 0.431848406791687, "learning_rate": 0.000161745993781392, "loss": 0.0387, "step": 10139 }, { "epoch": 1.4211632796075684, "grad_norm": 0.1918434500694275, "learning_rate": 0.00016173164314757234, "loss": 0.0216, "step": 10140 }, { "epoch": 1.4213034337771548, "grad_norm": 0.48974621295928955, "learning_rate": 0.00016171729251375266, "loss": 0.0809, "step": 10141 }, { "epoch": 1.4214435879467415, "grad_norm": 0.54285728931427, "learning_rate": 0.00016170294187993302, "loss": 0.1183, "step": 10142 }, { "epoch": 1.421583742116328, "grad_norm": 0.39793580770492554, "learning_rate": 0.00016168859124611335, "loss": 0.0733, "step": 10143 }, { "epoch": 1.4217238962859144, "grad_norm": 0.30271995067596436, "learning_rate": 0.00016167424061229368, "loss": 0.0588, "step": 10144 }, { "epoch": 1.4218640504555011, "grad_norm": 0.15205909311771393, "learning_rate": 0.00016165988997847403, "loss": 0.0176, "step": 10145 }, { "epoch": 1.4220042046250876, "grad_norm": 0.30136293172836304, "learning_rate": 0.00016164553934465436, "loss": 0.1326, "step": 10146 }, { "epoch": 1.4221443587946743, "grad_norm": 0.1649971604347229, "learning_rate": 0.0001616311887108347, "loss": 0.0231, "step": 10147 }, { "epoch": 1.4222845129642607, "grad_norm": 0.08201868832111359, "learning_rate": 0.00016161683807701507, "loss": 0.0066, "step": 10148 }, { "epoch": 1.4224246671338472, "grad_norm": 0.3914889991283417, "learning_rate": 0.0001616024874431954, "loss": 0.0584, "step": 10149 }, { "epoch": 1.4225648213034336, "grad_norm": 0.5152369737625122, "learning_rate": 0.00016158813680937573, "loss": 0.0676, "step": 10150 }, { "epoch": 1.4227049754730203, "grad_norm": 0.14055362343788147, "learning_rate": 0.00016157378617555609, "loss": 0.0076, "step": 10151 }, { "epoch": 1.4228451296426068, "grad_norm": 0.1619841605424881, "learning_rate": 0.00016155943554173641, "loss": 0.0176, "step": 10152 }, { "epoch": 1.4229852838121935, "grad_norm": 0.5846623778343201, "learning_rate": 0.00016154508490791674, "loss": 0.0905, "step": 10153 }, { "epoch": 1.42312543798178, "grad_norm": 0.30887091159820557, "learning_rate": 0.0001615307342740971, "loss": 0.0803, "step": 10154 }, { "epoch": 1.4232655921513664, "grad_norm": 0.6513183116912842, "learning_rate": 0.00016151638364027743, "loss": 0.0666, "step": 10155 }, { "epoch": 1.423405746320953, "grad_norm": 0.3283507227897644, "learning_rate": 0.00016150203300645776, "loss": 0.0524, "step": 10156 }, { "epoch": 1.4235459004905395, "grad_norm": 0.2082063853740692, "learning_rate": 0.00016148768237263808, "loss": 0.0385, "step": 10157 }, { "epoch": 1.4236860546601262, "grad_norm": 0.32522910833358765, "learning_rate": 0.00016147333173881847, "loss": 0.0565, "step": 10158 }, { "epoch": 1.4238262088297127, "grad_norm": 0.20435641705989838, "learning_rate": 0.0001614589811049988, "loss": 0.0221, "step": 10159 }, { "epoch": 1.4239663629992991, "grad_norm": 0.43393731117248535, "learning_rate": 0.00016144463047117912, "loss": 0.0561, "step": 10160 }, { "epoch": 1.4241065171688858, "grad_norm": 0.24540644884109497, "learning_rate": 0.00016143027983735948, "loss": 0.0645, "step": 10161 }, { "epoch": 1.4242466713384723, "grad_norm": 0.3852110207080841, "learning_rate": 0.0001614159292035398, "loss": 0.0388, "step": 10162 }, { "epoch": 1.424386825508059, "grad_norm": 0.33503320813179016, "learning_rate": 0.00016140157856972014, "loss": 0.0181, "step": 10163 }, { "epoch": 1.4245269796776454, "grad_norm": 0.2334572970867157, "learning_rate": 0.0001613872279359005, "loss": 0.0445, "step": 10164 }, { "epoch": 1.4246671338472319, "grad_norm": 0.6089301109313965, "learning_rate": 0.00016137287730208082, "loss": 0.0205, "step": 10165 }, { "epoch": 1.4248072880168186, "grad_norm": 0.8390827178955078, "learning_rate": 0.00016135852666826115, "loss": 0.142, "step": 10166 }, { "epoch": 1.424947442186405, "grad_norm": 1.045295238494873, "learning_rate": 0.00016134417603444153, "loss": 0.0549, "step": 10167 }, { "epoch": 1.4250875963559917, "grad_norm": 0.4901297986507416, "learning_rate": 0.00016132982540062186, "loss": 0.0639, "step": 10168 }, { "epoch": 1.4252277505255782, "grad_norm": 0.33060768246650696, "learning_rate": 0.0001613154747668022, "loss": 0.0331, "step": 10169 }, { "epoch": 1.4253679046951646, "grad_norm": 0.43889865279197693, "learning_rate": 0.00016130112413298255, "loss": 0.1722, "step": 10170 }, { "epoch": 1.4255080588647513, "grad_norm": 0.15537147223949432, "learning_rate": 0.00016128677349916287, "loss": 0.0192, "step": 10171 }, { "epoch": 1.4256482130343378, "grad_norm": 0.1894560307264328, "learning_rate": 0.0001612724228653432, "loss": 0.0583, "step": 10172 }, { "epoch": 1.4257883672039244, "grad_norm": 0.42838916182518005, "learning_rate": 0.00016125807223152356, "loss": 0.0956, "step": 10173 }, { "epoch": 1.425928521373511, "grad_norm": 0.16536885499954224, "learning_rate": 0.00016124372159770389, "loss": 0.0147, "step": 10174 }, { "epoch": 1.4260686755430974, "grad_norm": 0.4123830795288086, "learning_rate": 0.00016122937096388421, "loss": 0.0813, "step": 10175 }, { "epoch": 1.426208829712684, "grad_norm": 0.29722410440444946, "learning_rate": 0.00016121502033006454, "loss": 0.0314, "step": 10176 }, { "epoch": 1.4263489838822705, "grad_norm": 0.24730896949768066, "learning_rate": 0.0001612006696962449, "loss": 0.049, "step": 10177 }, { "epoch": 1.4264891380518572, "grad_norm": 0.5634830594062805, "learning_rate": 0.00016118631906242523, "loss": 0.0523, "step": 10178 }, { "epoch": 1.4266292922214436, "grad_norm": 0.26038607954978943, "learning_rate": 0.00016117196842860556, "loss": 0.0206, "step": 10179 }, { "epoch": 1.42676944639103, "grad_norm": 0.5175793170928955, "learning_rate": 0.00016115761779478594, "loss": 0.0481, "step": 10180 }, { "epoch": 1.4269096005606166, "grad_norm": 1.546864628791809, "learning_rate": 0.00016114326716096627, "loss": 0.1191, "step": 10181 }, { "epoch": 1.4270497547302032, "grad_norm": 0.6556147933006287, "learning_rate": 0.0001611289165271466, "loss": 0.1567, "step": 10182 }, { "epoch": 1.4271899088997897, "grad_norm": 0.30948296189308167, "learning_rate": 0.00016111456589332695, "loss": 0.0592, "step": 10183 }, { "epoch": 1.4273300630693764, "grad_norm": 0.600836992263794, "learning_rate": 0.00016110021525950728, "loss": 0.0468, "step": 10184 }, { "epoch": 1.4274702172389628, "grad_norm": 3.3685903549194336, "learning_rate": 0.0001610858646256876, "loss": 0.259, "step": 10185 }, { "epoch": 1.4276103714085493, "grad_norm": 0.3439197838306427, "learning_rate": 0.00016107151399186796, "loss": 0.0749, "step": 10186 }, { "epoch": 1.427750525578136, "grad_norm": 0.5156503319740295, "learning_rate": 0.0001610571633580483, "loss": 0.0469, "step": 10187 }, { "epoch": 1.4278906797477224, "grad_norm": 0.3198818266391754, "learning_rate": 0.00016104281272422862, "loss": 0.0388, "step": 10188 }, { "epoch": 1.4280308339173091, "grad_norm": 0.2796226441860199, "learning_rate": 0.000161028462090409, "loss": 0.0505, "step": 10189 }, { "epoch": 1.4281709880868956, "grad_norm": 0.1790735125541687, "learning_rate": 0.00016101411145658933, "loss": 0.037, "step": 10190 }, { "epoch": 1.428311142256482, "grad_norm": 0.5123526453971863, "learning_rate": 0.00016099976082276966, "loss": 0.0428, "step": 10191 }, { "epoch": 1.4284512964260687, "grad_norm": 0.07718978822231293, "learning_rate": 0.00016098541018895, "loss": 0.0078, "step": 10192 }, { "epoch": 1.4285914505956552, "grad_norm": 0.24048765003681183, "learning_rate": 0.00016097105955513035, "loss": 0.0541, "step": 10193 }, { "epoch": 1.4287316047652419, "grad_norm": 0.35560065507888794, "learning_rate": 0.00016095670892131067, "loss": 0.0487, "step": 10194 }, { "epoch": 1.4288717589348283, "grad_norm": 0.32569611072540283, "learning_rate": 0.000160942358287491, "loss": 0.071, "step": 10195 }, { "epoch": 1.4290119131044148, "grad_norm": 0.44907891750335693, "learning_rate": 0.00016092800765367136, "loss": 0.0574, "step": 10196 }, { "epoch": 1.4291520672740015, "grad_norm": 0.1444093883037567, "learning_rate": 0.0001609136570198517, "loss": 0.03, "step": 10197 }, { "epoch": 1.429292221443588, "grad_norm": 0.58797287940979, "learning_rate": 0.00016089930638603202, "loss": 0.0758, "step": 10198 }, { "epoch": 1.4294323756131746, "grad_norm": 0.16892100870609283, "learning_rate": 0.0001608849557522124, "loss": 0.027, "step": 10199 }, { "epoch": 1.429572529782761, "grad_norm": 0.3921225666999817, "learning_rate": 0.00016087060511839273, "loss": 0.0509, "step": 10200 }, { "epoch": 1.4297126839523475, "grad_norm": 0.42925089597702026, "learning_rate": 0.00016085625448457306, "loss": 0.0476, "step": 10201 }, { "epoch": 1.4298528381219342, "grad_norm": 0.27148038148880005, "learning_rate": 0.0001608419038507534, "loss": 0.0805, "step": 10202 }, { "epoch": 1.4299929922915207, "grad_norm": 0.24084679782390594, "learning_rate": 0.00016082755321693374, "loss": 0.0587, "step": 10203 }, { "epoch": 1.4301331464611073, "grad_norm": 0.22835497558116913, "learning_rate": 0.00016081320258311407, "loss": 0.0357, "step": 10204 }, { "epoch": 1.4302733006306938, "grad_norm": 0.14377571642398834, "learning_rate": 0.00016079885194929442, "loss": 0.0173, "step": 10205 }, { "epoch": 1.4304134548002803, "grad_norm": 0.22109854221343994, "learning_rate": 0.00016078450131547475, "loss": 0.0388, "step": 10206 }, { "epoch": 1.4305536089698667, "grad_norm": 0.7799450159072876, "learning_rate": 0.00016077015068165508, "loss": 0.0655, "step": 10207 }, { "epoch": 1.4306937631394534, "grad_norm": 0.4504595696926117, "learning_rate": 0.00016075580004783544, "loss": 0.0733, "step": 10208 }, { "epoch": 1.43083391730904, "grad_norm": 0.3658653497695923, "learning_rate": 0.00016074144941401577, "loss": 0.0576, "step": 10209 }, { "epoch": 1.4309740714786265, "grad_norm": 0.27942484617233276, "learning_rate": 0.0001607270987801961, "loss": 0.0511, "step": 10210 }, { "epoch": 1.431114225648213, "grad_norm": 0.27003490924835205, "learning_rate": 0.00016071274814637642, "loss": 0.0474, "step": 10211 }, { "epoch": 1.4312543798177995, "grad_norm": 0.7827368378639221, "learning_rate": 0.0001606983975125568, "loss": 0.0849, "step": 10212 }, { "epoch": 1.4313945339873861, "grad_norm": 0.5065380930900574, "learning_rate": 0.00016068404687873713, "loss": 0.0445, "step": 10213 }, { "epoch": 1.4315346881569726, "grad_norm": 0.32973483204841614, "learning_rate": 0.00016066969624491746, "loss": 0.0447, "step": 10214 }, { "epoch": 1.4316748423265593, "grad_norm": 0.3289881646633148, "learning_rate": 0.00016065534561109782, "loss": 0.0523, "step": 10215 }, { "epoch": 1.4318149964961457, "grad_norm": 0.15622155368328094, "learning_rate": 0.00016064099497727815, "loss": 0.0146, "step": 10216 }, { "epoch": 1.4319551506657322, "grad_norm": 0.42556485533714294, "learning_rate": 0.00016062664434345848, "loss": 0.0521, "step": 10217 }, { "epoch": 1.4320953048353189, "grad_norm": 0.20520903170108795, "learning_rate": 0.00016061229370963883, "loss": 0.0499, "step": 10218 }, { "epoch": 1.4322354590049053, "grad_norm": 0.6761796474456787, "learning_rate": 0.00016059794307581916, "loss": 0.0754, "step": 10219 }, { "epoch": 1.432375613174492, "grad_norm": 0.40153732895851135, "learning_rate": 0.0001605835924419995, "loss": 0.1202, "step": 10220 }, { "epoch": 1.4325157673440785, "grad_norm": 0.19425968825817108, "learning_rate": 0.00016056924180817987, "loss": 0.0117, "step": 10221 }, { "epoch": 1.432655921513665, "grad_norm": 0.18818245828151703, "learning_rate": 0.0001605548911743602, "loss": 0.0225, "step": 10222 }, { "epoch": 1.4327960756832516, "grad_norm": 0.2861733138561249, "learning_rate": 0.00016054054054054053, "loss": 0.0745, "step": 10223 }, { "epoch": 1.432936229852838, "grad_norm": 0.24111218750476837, "learning_rate": 0.00016052618990672088, "loss": 0.0581, "step": 10224 }, { "epoch": 1.4330763840224248, "grad_norm": 0.19972725212574005, "learning_rate": 0.0001605118392729012, "loss": 0.0332, "step": 10225 }, { "epoch": 1.4332165381920112, "grad_norm": 0.36768510937690735, "learning_rate": 0.00016049748863908154, "loss": 0.0513, "step": 10226 }, { "epoch": 1.4333566923615977, "grad_norm": 0.2722630500793457, "learning_rate": 0.00016048313800526187, "loss": 0.0424, "step": 10227 }, { "epoch": 1.4334968465311844, "grad_norm": 0.47722113132476807, "learning_rate": 0.00016046878737144222, "loss": 0.0725, "step": 10228 }, { "epoch": 1.4336370007007708, "grad_norm": 0.19668526947498322, "learning_rate": 0.00016045443673762255, "loss": 0.0081, "step": 10229 }, { "epoch": 1.4337771548703575, "grad_norm": 1.039320945739746, "learning_rate": 0.00016044008610380288, "loss": 0.1781, "step": 10230 }, { "epoch": 1.433917309039944, "grad_norm": 0.8073946833610535, "learning_rate": 0.00016042573546998326, "loss": 0.0684, "step": 10231 }, { "epoch": 1.4340574632095304, "grad_norm": 0.968450129032135, "learning_rate": 0.0001604113848361636, "loss": 0.1085, "step": 10232 }, { "epoch": 1.434197617379117, "grad_norm": 1.12123703956604, "learning_rate": 0.00016039703420234392, "loss": 0.0541, "step": 10233 }, { "epoch": 1.4343377715487036, "grad_norm": 0.35450249910354614, "learning_rate": 0.00016038268356852428, "loss": 0.0568, "step": 10234 }, { "epoch": 1.4344779257182902, "grad_norm": 3.7186460494995117, "learning_rate": 0.0001603683329347046, "loss": 0.2854, "step": 10235 }, { "epoch": 1.4346180798878767, "grad_norm": 0.21409717202186584, "learning_rate": 0.00016035398230088493, "loss": 0.0305, "step": 10236 }, { "epoch": 1.4347582340574632, "grad_norm": 0.19324368238449097, "learning_rate": 0.0001603396316670653, "loss": 0.0328, "step": 10237 }, { "epoch": 1.4348983882270496, "grad_norm": 0.36743223667144775, "learning_rate": 0.00016032528103324562, "loss": 0.0417, "step": 10238 }, { "epoch": 1.4350385423966363, "grad_norm": 0.13218557834625244, "learning_rate": 0.00016031093039942595, "loss": 0.0211, "step": 10239 }, { "epoch": 1.4351786965662228, "grad_norm": 0.420714795589447, "learning_rate": 0.0001602965797656063, "loss": 0.0311, "step": 10240 }, { "epoch": 1.4353188507358094, "grad_norm": 0.7833425402641296, "learning_rate": 0.00016028222913178663, "loss": 0.0581, "step": 10241 }, { "epoch": 1.435459004905396, "grad_norm": 0.22733062505722046, "learning_rate": 0.00016026787849796696, "loss": 0.0504, "step": 10242 }, { "epoch": 1.4355991590749824, "grad_norm": 0.5046831369400024, "learning_rate": 0.00016025352786414734, "loss": 0.0469, "step": 10243 }, { "epoch": 1.435739313244569, "grad_norm": 0.339307963848114, "learning_rate": 0.00016023917723032767, "loss": 0.0447, "step": 10244 }, { "epoch": 1.4358794674141555, "grad_norm": 0.15975286066532135, "learning_rate": 0.000160224826596508, "loss": 0.0304, "step": 10245 }, { "epoch": 1.4360196215837422, "grad_norm": 0.23918233811855316, "learning_rate": 0.00016021047596268833, "loss": 0.0392, "step": 10246 }, { "epoch": 1.4361597757533286, "grad_norm": 0.3375759720802307, "learning_rate": 0.00016019612532886868, "loss": 0.0472, "step": 10247 }, { "epoch": 1.436299929922915, "grad_norm": 0.2116953283548355, "learning_rate": 0.000160181774695049, "loss": 0.0705, "step": 10248 }, { "epoch": 1.4364400840925018, "grad_norm": 0.4924832582473755, "learning_rate": 0.00016016742406122934, "loss": 0.059, "step": 10249 }, { "epoch": 1.4365802382620882, "grad_norm": 0.36272770166397095, "learning_rate": 0.0001601530734274097, "loss": 0.0531, "step": 10250 }, { "epoch": 1.436720392431675, "grad_norm": 0.4529755413532257, "learning_rate": 0.00016013872279359003, "loss": 0.0467, "step": 10251 }, { "epoch": 1.4368605466012614, "grad_norm": 0.19947658479213715, "learning_rate": 0.00016012437215977035, "loss": 0.0187, "step": 10252 }, { "epoch": 1.4370007007708478, "grad_norm": 0.3391959071159363, "learning_rate": 0.00016011002152595074, "loss": 0.0426, "step": 10253 }, { "epoch": 1.4371408549404345, "grad_norm": 0.15439580380916595, "learning_rate": 0.00016009567089213107, "loss": 0.024, "step": 10254 }, { "epoch": 1.437281009110021, "grad_norm": 0.2740303575992584, "learning_rate": 0.0001600813202583114, "loss": 0.0407, "step": 10255 }, { "epoch": 1.4374211632796077, "grad_norm": 0.7851037979125977, "learning_rate": 0.00016006696962449175, "loss": 0.0723, "step": 10256 }, { "epoch": 1.4375613174491941, "grad_norm": 0.2592664957046509, "learning_rate": 0.00016005261899067208, "loss": 0.034, "step": 10257 }, { "epoch": 1.4377014716187806, "grad_norm": 0.46411001682281494, "learning_rate": 0.0001600382683568524, "loss": 0.0795, "step": 10258 }, { "epoch": 1.4378416257883673, "grad_norm": 0.35372135043144226, "learning_rate": 0.00016002391772303276, "loss": 0.0381, "step": 10259 }, { "epoch": 1.4379817799579537, "grad_norm": 0.8923017382621765, "learning_rate": 0.0001600095670892131, "loss": 0.0697, "step": 10260 }, { "epoch": 1.4381219341275404, "grad_norm": 0.5511233806610107, "learning_rate": 0.00015999521645539342, "loss": 0.0731, "step": 10261 }, { "epoch": 1.4382620882971269, "grad_norm": 0.26875755190849304, "learning_rate": 0.00015998086582157375, "loss": 0.0505, "step": 10262 }, { "epoch": 1.4384022424667133, "grad_norm": 0.18504968285560608, "learning_rate": 0.00015996651518775413, "loss": 0.0425, "step": 10263 }, { "epoch": 1.4385423966362998, "grad_norm": 0.33882051706314087, "learning_rate": 0.00015995216455393446, "loss": 0.0431, "step": 10264 }, { "epoch": 1.4386825508058865, "grad_norm": 0.2804034948348999, "learning_rate": 0.0001599378139201148, "loss": 0.0667, "step": 10265 }, { "epoch": 1.4388227049754732, "grad_norm": 0.464950829744339, "learning_rate": 0.00015992346328629514, "loss": 0.0471, "step": 10266 }, { "epoch": 1.4389628591450596, "grad_norm": 0.3928641676902771, "learning_rate": 0.00015990911265247547, "loss": 0.0296, "step": 10267 }, { "epoch": 1.439103013314646, "grad_norm": 0.4964993894100189, "learning_rate": 0.0001598947620186558, "loss": 0.0936, "step": 10268 }, { "epoch": 1.4392431674842325, "grad_norm": 0.4022435247898102, "learning_rate": 0.00015988041138483616, "loss": 0.0665, "step": 10269 }, { "epoch": 1.4393833216538192, "grad_norm": 0.23466211557388306, "learning_rate": 0.00015986606075101649, "loss": 0.0175, "step": 10270 }, { "epoch": 1.4395234758234057, "grad_norm": 0.6732150912284851, "learning_rate": 0.00015985171011719681, "loss": 0.093, "step": 10271 }, { "epoch": 1.4396636299929924, "grad_norm": 0.3779064118862152, "learning_rate": 0.00015983735948337717, "loss": 0.0837, "step": 10272 }, { "epoch": 1.4398037841625788, "grad_norm": 0.1630546897649765, "learning_rate": 0.0001598230088495575, "loss": 0.0573, "step": 10273 }, { "epoch": 1.4399439383321653, "grad_norm": 0.28557801246643066, "learning_rate": 0.00015980865821573783, "loss": 0.0527, "step": 10274 }, { "epoch": 1.440084092501752, "grad_norm": 0.3272715210914612, "learning_rate": 0.0001597943075819182, "loss": 0.0572, "step": 10275 }, { "epoch": 1.4402242466713384, "grad_norm": 0.22885917127132416, "learning_rate": 0.00015977995694809854, "loss": 0.0343, "step": 10276 }, { "epoch": 1.440364400840925, "grad_norm": 0.6123156547546387, "learning_rate": 0.00015976560631427887, "loss": 0.0458, "step": 10277 }, { "epoch": 1.4405045550105116, "grad_norm": 0.3223572373390198, "learning_rate": 0.00015975125568045922, "loss": 0.0722, "step": 10278 }, { "epoch": 1.440644709180098, "grad_norm": 2.047304391860962, "learning_rate": 0.00015973690504663955, "loss": 0.0588, "step": 10279 }, { "epoch": 1.4407848633496847, "grad_norm": 0.45392248034477234, "learning_rate": 0.00015972255441281988, "loss": 0.0263, "step": 10280 }, { "epoch": 1.4409250175192712, "grad_norm": 0.5961271524429321, "learning_rate": 0.0001597082037790002, "loss": 0.0662, "step": 10281 }, { "epoch": 1.4410651716888578, "grad_norm": 0.09243643283843994, "learning_rate": 0.00015969385314518056, "loss": 0.0055, "step": 10282 }, { "epoch": 1.4412053258584443, "grad_norm": 0.39269953966140747, "learning_rate": 0.0001596795025113609, "loss": 0.0994, "step": 10283 }, { "epoch": 1.4413454800280308, "grad_norm": 1.7446138858795166, "learning_rate": 0.00015966515187754122, "loss": 0.0865, "step": 10284 }, { "epoch": 1.4414856341976174, "grad_norm": 3.0109221935272217, "learning_rate": 0.0001596508012437216, "loss": 0.1716, "step": 10285 }, { "epoch": 1.441625788367204, "grad_norm": 0.37838804721832275, "learning_rate": 0.00015963645060990193, "loss": 0.0869, "step": 10286 }, { "epoch": 1.4417659425367906, "grad_norm": 0.19698014855384827, "learning_rate": 0.00015962209997608226, "loss": 0.0659, "step": 10287 }, { "epoch": 1.441906096706377, "grad_norm": 0.25391724705696106, "learning_rate": 0.00015960774934226262, "loss": 0.0746, "step": 10288 }, { "epoch": 1.4420462508759635, "grad_norm": 0.23755589127540588, "learning_rate": 0.00015959339870844294, "loss": 0.0994, "step": 10289 }, { "epoch": 1.4421864050455502, "grad_norm": 0.34438541531562805, "learning_rate": 0.00015957904807462327, "loss": 0.0622, "step": 10290 }, { "epoch": 1.4423265592151366, "grad_norm": 0.2734167277812958, "learning_rate": 0.00015956469744080363, "loss": 0.0401, "step": 10291 }, { "epoch": 1.4424667133847233, "grad_norm": 0.4822908639907837, "learning_rate": 0.00015955034680698396, "loss": 0.0699, "step": 10292 }, { "epoch": 1.4426068675543098, "grad_norm": 0.2431921809911728, "learning_rate": 0.00015953599617316429, "loss": 0.1038, "step": 10293 }, { "epoch": 1.4427470217238962, "grad_norm": 0.5435600280761719, "learning_rate": 0.00015952164553934467, "loss": 0.0649, "step": 10294 }, { "epoch": 1.4428871758934827, "grad_norm": 0.26899123191833496, "learning_rate": 0.000159507294905525, "loss": 0.0295, "step": 10295 }, { "epoch": 1.4430273300630694, "grad_norm": 0.07756052166223526, "learning_rate": 0.00015949294427170533, "loss": 0.0074, "step": 10296 }, { "epoch": 1.4431674842326558, "grad_norm": 0.3434891700744629, "learning_rate": 0.00015947859363788565, "loss": 0.0951, "step": 10297 }, { "epoch": 1.4433076384022425, "grad_norm": 0.2591734826564789, "learning_rate": 0.000159464243004066, "loss": 0.033, "step": 10298 }, { "epoch": 1.443447792571829, "grad_norm": 0.21404829621315002, "learning_rate": 0.00015944989237024634, "loss": 0.0431, "step": 10299 }, { "epoch": 1.4435879467414154, "grad_norm": 0.2508086860179901, "learning_rate": 0.00015943554173642667, "loss": 0.0157, "step": 10300 }, { "epoch": 1.4437281009110021, "grad_norm": 0.22778648138046265, "learning_rate": 0.00015942119110260702, "loss": 0.045, "step": 10301 }, { "epoch": 1.4438682550805886, "grad_norm": 0.38248202204704285, "learning_rate": 0.00015940684046878735, "loss": 0.0341, "step": 10302 }, { "epoch": 1.4440084092501753, "grad_norm": 0.280750572681427, "learning_rate": 0.00015939248983496768, "loss": 0.0449, "step": 10303 }, { "epoch": 1.4441485634197617, "grad_norm": 0.6564945578575134, "learning_rate": 0.00015937813920114804, "loss": 0.0844, "step": 10304 }, { "epoch": 1.4442887175893482, "grad_norm": 1.5890581607818604, "learning_rate": 0.00015936378856732836, "loss": 0.1157, "step": 10305 }, { "epoch": 1.4444288717589349, "grad_norm": 0.26139146089553833, "learning_rate": 0.0001593494379335087, "loss": 0.0295, "step": 10306 }, { "epoch": 1.4445690259285213, "grad_norm": 0.17264211177825928, "learning_rate": 0.00015933508729968908, "loss": 0.0106, "step": 10307 }, { "epoch": 1.444709180098108, "grad_norm": 0.306117445230484, "learning_rate": 0.0001593207366658694, "loss": 0.0773, "step": 10308 }, { "epoch": 1.4448493342676945, "grad_norm": 0.264789342880249, "learning_rate": 0.00015930638603204973, "loss": 0.04, "step": 10309 }, { "epoch": 1.444989488437281, "grad_norm": 0.2679969370365143, "learning_rate": 0.0001592920353982301, "loss": 0.0653, "step": 10310 }, { "epoch": 1.4451296426068676, "grad_norm": 0.2131221741437912, "learning_rate": 0.00015927768476441042, "loss": 0.0447, "step": 10311 }, { "epoch": 1.445269796776454, "grad_norm": 0.7403803467750549, "learning_rate": 0.00015926333413059075, "loss": 0.0653, "step": 10312 }, { "epoch": 1.4454099509460407, "grad_norm": 0.24347679316997528, "learning_rate": 0.0001592489834967711, "loss": 0.0387, "step": 10313 }, { "epoch": 1.4455501051156272, "grad_norm": 0.41113975644111633, "learning_rate": 0.00015923463286295143, "loss": 0.0369, "step": 10314 }, { "epoch": 1.4456902592852137, "grad_norm": 0.29645198583602905, "learning_rate": 0.00015922028222913176, "loss": 0.0139, "step": 10315 }, { "epoch": 1.4458304134548003, "grad_norm": 0.18170084059238434, "learning_rate": 0.0001592059315953121, "loss": 0.0214, "step": 10316 }, { "epoch": 1.4459705676243868, "grad_norm": 0.33017316460609436, "learning_rate": 0.00015919158096149247, "loss": 0.1124, "step": 10317 }, { "epoch": 1.4461107217939735, "grad_norm": 0.3603140413761139, "learning_rate": 0.0001591772303276728, "loss": 0.0619, "step": 10318 }, { "epoch": 1.44625087596356, "grad_norm": 0.2632472813129425, "learning_rate": 0.00015916287969385313, "loss": 0.0273, "step": 10319 }, { "epoch": 1.4463910301331464, "grad_norm": 0.5321194529533386, "learning_rate": 0.00015914852906003348, "loss": 0.089, "step": 10320 }, { "epoch": 1.446531184302733, "grad_norm": 0.35040658712387085, "learning_rate": 0.0001591341784262138, "loss": 0.0554, "step": 10321 }, { "epoch": 1.4466713384723195, "grad_norm": 0.3102070987224579, "learning_rate": 0.00015911982779239414, "loss": 0.0221, "step": 10322 }, { "epoch": 1.4468114926419062, "grad_norm": 0.22028884291648865, "learning_rate": 0.0001591054771585745, "loss": 0.0277, "step": 10323 }, { "epoch": 1.4469516468114927, "grad_norm": 0.7164686918258667, "learning_rate": 0.00015909112652475482, "loss": 0.0654, "step": 10324 }, { "epoch": 1.4470918009810791, "grad_norm": 0.10332869738340378, "learning_rate": 0.00015907677589093515, "loss": 0.0177, "step": 10325 }, { "epoch": 1.4472319551506656, "grad_norm": 0.27902406454086304, "learning_rate": 0.00015906242525711554, "loss": 0.088, "step": 10326 }, { "epoch": 1.4473721093202523, "grad_norm": 0.32615530490875244, "learning_rate": 0.00015904807462329586, "loss": 0.0531, "step": 10327 }, { "epoch": 1.4475122634898387, "grad_norm": 0.49578773975372314, "learning_rate": 0.0001590337239894762, "loss": 0.1529, "step": 10328 }, { "epoch": 1.4476524176594254, "grad_norm": 0.5705392360687256, "learning_rate": 0.00015901937335565655, "loss": 0.0756, "step": 10329 }, { "epoch": 1.4477925718290119, "grad_norm": 0.4772118031978607, "learning_rate": 0.00015900502272183688, "loss": 0.0606, "step": 10330 }, { "epoch": 1.4479327259985983, "grad_norm": 0.19996708631515503, "learning_rate": 0.0001589906720880172, "loss": 0.0458, "step": 10331 }, { "epoch": 1.448072880168185, "grad_norm": 1.5132197141647339, "learning_rate": 0.00015897632145419753, "loss": 0.0828, "step": 10332 }, { "epoch": 1.4482130343377715, "grad_norm": 2.096529006958008, "learning_rate": 0.0001589619708203779, "loss": 0.0249, "step": 10333 }, { "epoch": 1.4483531885073582, "grad_norm": 1.0235874652862549, "learning_rate": 0.00015894762018655822, "loss": 0.2314, "step": 10334 }, { "epoch": 1.4484933426769446, "grad_norm": 1.823887825012207, "learning_rate": 0.00015893326955273855, "loss": 0.3277, "step": 10335 }, { "epoch": 1.448633496846531, "grad_norm": 0.23627851903438568, "learning_rate": 0.0001589189189189189, "loss": 0.0189, "step": 10336 }, { "epoch": 1.4487736510161178, "grad_norm": 0.22111201286315918, "learning_rate": 0.00015890456828509923, "loss": 0.0207, "step": 10337 }, { "epoch": 1.4489138051857042, "grad_norm": 0.34417077898979187, "learning_rate": 0.00015889021765127956, "loss": 0.1092, "step": 10338 }, { "epoch": 1.449053959355291, "grad_norm": 0.310786634683609, "learning_rate": 0.00015887586701745994, "loss": 0.0572, "step": 10339 }, { "epoch": 1.4491941135248774, "grad_norm": 0.08493835479021072, "learning_rate": 0.00015886151638364027, "loss": 0.0122, "step": 10340 }, { "epoch": 1.4493342676944638, "grad_norm": 0.37680280208587646, "learning_rate": 0.0001588471657498206, "loss": 0.0364, "step": 10341 }, { "epoch": 1.4494744218640505, "grad_norm": 0.3844102621078491, "learning_rate": 0.00015883281511600095, "loss": 0.059, "step": 10342 }, { "epoch": 1.449614576033637, "grad_norm": 0.35041797161102295, "learning_rate": 0.00015881846448218128, "loss": 0.0681, "step": 10343 }, { "epoch": 1.4497547302032237, "grad_norm": 0.36123186349868774, "learning_rate": 0.0001588041138483616, "loss": 0.0505, "step": 10344 }, { "epoch": 1.44989488437281, "grad_norm": 0.3794997036457062, "learning_rate": 0.00015878976321454197, "loss": 0.1217, "step": 10345 }, { "epoch": 1.4500350385423966, "grad_norm": 0.3944834768772125, "learning_rate": 0.0001587754125807223, "loss": 0.0257, "step": 10346 }, { "epoch": 1.4501751927119833, "grad_norm": 0.20971815288066864, "learning_rate": 0.00015876106194690262, "loss": 0.0204, "step": 10347 }, { "epoch": 1.4503153468815697, "grad_norm": 0.2663668394088745, "learning_rate": 0.000158746711313083, "loss": 0.087, "step": 10348 }, { "epoch": 1.4504555010511564, "grad_norm": 0.24102389812469482, "learning_rate": 0.00015873236067926334, "loss": 0.057, "step": 10349 }, { "epoch": 1.4505956552207429, "grad_norm": 0.2945449650287628, "learning_rate": 0.00015871801004544366, "loss": 0.0267, "step": 10350 }, { "epoch": 1.4507358093903293, "grad_norm": 0.33592602610588074, "learning_rate": 0.000158703659411624, "loss": 0.0756, "step": 10351 }, { "epoch": 1.4508759635599158, "grad_norm": 0.40410202741622925, "learning_rate": 0.00015868930877780435, "loss": 0.0605, "step": 10352 }, { "epoch": 1.4510161177295025, "grad_norm": 0.7900621891021729, "learning_rate": 0.00015867495814398468, "loss": 0.0556, "step": 10353 }, { "epoch": 1.4511562718990891, "grad_norm": 0.5548974275588989, "learning_rate": 0.000158660607510165, "loss": 0.0866, "step": 10354 }, { "epoch": 1.4512964260686756, "grad_norm": 0.4954400360584259, "learning_rate": 0.00015864625687634536, "loss": 0.073, "step": 10355 }, { "epoch": 1.451436580238262, "grad_norm": 0.5850778222084045, "learning_rate": 0.0001586319062425257, "loss": 0.112, "step": 10356 }, { "epoch": 1.4515767344078485, "grad_norm": 0.3048843741416931, "learning_rate": 0.00015861755560870602, "loss": 0.062, "step": 10357 }, { "epoch": 1.4517168885774352, "grad_norm": 0.3343328535556793, "learning_rate": 0.0001586032049748864, "loss": 0.0482, "step": 10358 }, { "epoch": 1.4518570427470217, "grad_norm": 0.35425207018852234, "learning_rate": 0.00015858885434106673, "loss": 0.0632, "step": 10359 }, { "epoch": 1.4519971969166083, "grad_norm": 0.1927790492773056, "learning_rate": 0.00015857450370724706, "loss": 0.0349, "step": 10360 }, { "epoch": 1.4521373510861948, "grad_norm": 0.8330217599868774, "learning_rate": 0.00015856015307342741, "loss": 0.051, "step": 10361 }, { "epoch": 1.4522775052557813, "grad_norm": 0.5046206116676331, "learning_rate": 0.00015854580243960774, "loss": 0.0319, "step": 10362 }, { "epoch": 1.452417659425368, "grad_norm": 0.21573689579963684, "learning_rate": 0.00015853145180578807, "loss": 0.0404, "step": 10363 }, { "epoch": 1.4525578135949544, "grad_norm": 0.29211878776550293, "learning_rate": 0.00015851710117196843, "loss": 0.0507, "step": 10364 }, { "epoch": 1.452697967764541, "grad_norm": 0.47367075085639954, "learning_rate": 0.00015850275053814876, "loss": 0.0756, "step": 10365 }, { "epoch": 1.4528381219341275, "grad_norm": 0.659792423248291, "learning_rate": 0.00015848839990432908, "loss": 0.0919, "step": 10366 }, { "epoch": 1.452978276103714, "grad_norm": 0.40301090478897095, "learning_rate": 0.0001584740492705094, "loss": 0.0542, "step": 10367 }, { "epoch": 1.4531184302733007, "grad_norm": 0.244008406996727, "learning_rate": 0.00015845969863668977, "loss": 0.0225, "step": 10368 }, { "epoch": 1.4532585844428871, "grad_norm": 0.2853996455669403, "learning_rate": 0.0001584453480028701, "loss": 0.0291, "step": 10369 }, { "epoch": 1.4533987386124738, "grad_norm": 0.860802948474884, "learning_rate": 0.00015843099736905043, "loss": 0.0752, "step": 10370 }, { "epoch": 1.4535388927820603, "grad_norm": 0.1470966935157776, "learning_rate": 0.0001584166467352308, "loss": 0.0134, "step": 10371 }, { "epoch": 1.4536790469516467, "grad_norm": 0.5490665435791016, "learning_rate": 0.00015840229610141114, "loss": 0.0723, "step": 10372 }, { "epoch": 1.4538192011212334, "grad_norm": 0.14277786016464233, "learning_rate": 0.00015838794546759147, "loss": 0.022, "step": 10373 }, { "epoch": 1.4539593552908199, "grad_norm": 0.49025362730026245, "learning_rate": 0.00015837359483377182, "loss": 0.0677, "step": 10374 }, { "epoch": 1.4540995094604066, "grad_norm": 0.33080458641052246, "learning_rate": 0.00015835924419995215, "loss": 0.0516, "step": 10375 }, { "epoch": 1.454239663629993, "grad_norm": 0.3922242522239685, "learning_rate": 0.00015834489356613248, "loss": 0.0579, "step": 10376 }, { "epoch": 1.4543798177995795, "grad_norm": 0.3718421757221222, "learning_rate": 0.00015833054293231283, "loss": 0.0363, "step": 10377 }, { "epoch": 1.4545199719691662, "grad_norm": 0.4780240058898926, "learning_rate": 0.00015831619229849316, "loss": 0.0502, "step": 10378 }, { "epoch": 1.4546601261387526, "grad_norm": 0.1006823256611824, "learning_rate": 0.0001583018416646735, "loss": 0.005, "step": 10379 }, { "epoch": 1.4548002803083393, "grad_norm": 0.14853745698928833, "learning_rate": 0.00015828749103085387, "loss": 0.022, "step": 10380 }, { "epoch": 1.4549404344779258, "grad_norm": 0.1390381008386612, "learning_rate": 0.0001582731403970342, "loss": 0.0095, "step": 10381 }, { "epoch": 1.4550805886475122, "grad_norm": 0.20819009840488434, "learning_rate": 0.00015825878976321453, "loss": 0.0088, "step": 10382 }, { "epoch": 1.4552207428170987, "grad_norm": 4.472097873687744, "learning_rate": 0.0001582444391293949, "loss": 0.2471, "step": 10383 }, { "epoch": 1.4553608969866854, "grad_norm": 2.834503173828125, "learning_rate": 0.00015823008849557522, "loss": 0.1322, "step": 10384 }, { "epoch": 1.4555010511562718, "grad_norm": 0.6564981341362, "learning_rate": 0.00015821573786175554, "loss": 0.0367, "step": 10385 }, { "epoch": 1.4556412053258585, "grad_norm": 0.24525196850299835, "learning_rate": 0.00015820138722793587, "loss": 0.0492, "step": 10386 }, { "epoch": 1.455781359495445, "grad_norm": 0.15929551422595978, "learning_rate": 0.00015818703659411623, "loss": 0.019, "step": 10387 }, { "epoch": 1.4559215136650314, "grad_norm": 0.3377452790737152, "learning_rate": 0.00015817268596029656, "loss": 0.0712, "step": 10388 }, { "epoch": 1.456061667834618, "grad_norm": 0.33260834217071533, "learning_rate": 0.00015815833532647688, "loss": 0.0618, "step": 10389 }, { "epoch": 1.4562018220042046, "grad_norm": 0.5088858008384705, "learning_rate": 0.00015814398469265727, "loss": 0.1161, "step": 10390 }, { "epoch": 1.4563419761737912, "grad_norm": 0.22829212248325348, "learning_rate": 0.0001581296340588376, "loss": 0.0555, "step": 10391 }, { "epoch": 1.4564821303433777, "grad_norm": 0.4645979106426239, "learning_rate": 0.00015811528342501792, "loss": 0.0758, "step": 10392 }, { "epoch": 1.4566222845129642, "grad_norm": 0.2573792636394501, "learning_rate": 0.00015810093279119828, "loss": 0.0529, "step": 10393 }, { "epoch": 1.4567624386825508, "grad_norm": 0.44291824102401733, "learning_rate": 0.0001580865821573786, "loss": 0.0286, "step": 10394 }, { "epoch": 1.4569025928521373, "grad_norm": 0.5993772745132446, "learning_rate": 0.00015807223152355894, "loss": 0.0686, "step": 10395 }, { "epoch": 1.457042747021724, "grad_norm": 0.21350601315498352, "learning_rate": 0.0001580578808897393, "loss": 0.0298, "step": 10396 }, { "epoch": 1.4571829011913104, "grad_norm": 0.2096581906080246, "learning_rate": 0.00015804353025591962, "loss": 0.0128, "step": 10397 }, { "epoch": 1.457323055360897, "grad_norm": 0.21883641183376312, "learning_rate": 0.00015802917962209995, "loss": 0.0215, "step": 10398 }, { "epoch": 1.4574632095304836, "grad_norm": 0.2923535704612732, "learning_rate": 0.0001580148289882803, "loss": 0.0213, "step": 10399 }, { "epoch": 1.45760336370007, "grad_norm": 0.47698137164115906, "learning_rate": 0.00015800047835446063, "loss": 0.0337, "step": 10400 }, { "epoch": 1.4577435178696567, "grad_norm": 0.44751036167144775, "learning_rate": 0.00015798612772064096, "loss": 0.0503, "step": 10401 }, { "epoch": 1.4578836720392432, "grad_norm": 0.5426173806190491, "learning_rate": 0.0001579717770868213, "loss": 0.0256, "step": 10402 }, { "epoch": 1.4580238262088296, "grad_norm": 0.3416033387184143, "learning_rate": 0.00015795742645300167, "loss": 0.0426, "step": 10403 }, { "epoch": 1.4581639803784163, "grad_norm": 0.3211739957332611, "learning_rate": 0.000157943075819182, "loss": 0.0505, "step": 10404 }, { "epoch": 1.4583041345480028, "grad_norm": 0.594524085521698, "learning_rate": 0.00015792872518536233, "loss": 0.0409, "step": 10405 }, { "epoch": 1.4584442887175895, "grad_norm": 0.6529772281646729, "learning_rate": 0.0001579143745515427, "loss": 0.0733, "step": 10406 }, { "epoch": 1.458584442887176, "grad_norm": 0.24155311286449432, "learning_rate": 0.00015790002391772302, "loss": 0.0244, "step": 10407 }, { "epoch": 1.4587245970567624, "grad_norm": 0.21188224852085114, "learning_rate": 0.00015788567328390334, "loss": 0.0339, "step": 10408 }, { "epoch": 1.458864751226349, "grad_norm": 0.3168196380138397, "learning_rate": 0.0001578713226500837, "loss": 0.0299, "step": 10409 }, { "epoch": 1.4590049053959355, "grad_norm": 0.2256992906332016, "learning_rate": 0.00015785697201626403, "loss": 0.0503, "step": 10410 }, { "epoch": 1.4591450595655222, "grad_norm": 0.29772457480430603, "learning_rate": 0.00015784262138244436, "loss": 0.0256, "step": 10411 }, { "epoch": 1.4592852137351087, "grad_norm": 0.3221682608127594, "learning_rate": 0.00015782827074862474, "loss": 0.0621, "step": 10412 }, { "epoch": 1.4594253679046951, "grad_norm": 0.27890974283218384, "learning_rate": 0.00015781392011480507, "loss": 0.0379, "step": 10413 }, { "epoch": 1.4595655220742816, "grad_norm": 0.42896613478660583, "learning_rate": 0.0001577995694809854, "loss": 0.044, "step": 10414 }, { "epoch": 1.4597056762438683, "grad_norm": 0.3510957360267639, "learning_rate": 0.00015778521884716575, "loss": 0.0963, "step": 10415 }, { "epoch": 1.4598458304134547, "grad_norm": 0.15724126994609833, "learning_rate": 0.00015777086821334608, "loss": 0.0132, "step": 10416 }, { "epoch": 1.4599859845830414, "grad_norm": 0.2976750135421753, "learning_rate": 0.0001577565175795264, "loss": 0.0678, "step": 10417 }, { "epoch": 1.4601261387526279, "grad_norm": 0.2269676774740219, "learning_rate": 0.00015774216694570674, "loss": 0.02, "step": 10418 }, { "epoch": 1.4602662929222143, "grad_norm": 0.5498712658882141, "learning_rate": 0.0001577278163118871, "loss": 0.0503, "step": 10419 }, { "epoch": 1.460406447091801, "grad_norm": 0.343868613243103, "learning_rate": 0.00015771346567806742, "loss": 0.0541, "step": 10420 }, { "epoch": 1.4605466012613875, "grad_norm": 0.62278151512146, "learning_rate": 0.00015769911504424775, "loss": 0.0407, "step": 10421 }, { "epoch": 1.4606867554309741, "grad_norm": 0.310596764087677, "learning_rate": 0.00015768476441042813, "loss": 0.0705, "step": 10422 }, { "epoch": 1.4608269096005606, "grad_norm": 0.3266717791557312, "learning_rate": 0.00015767041377660846, "loss": 0.0482, "step": 10423 }, { "epoch": 1.460967063770147, "grad_norm": 0.3247486352920532, "learning_rate": 0.0001576560631427888, "loss": 0.0247, "step": 10424 }, { "epoch": 1.4611072179397337, "grad_norm": 0.2839023768901825, "learning_rate": 0.00015764171250896915, "loss": 0.0856, "step": 10425 }, { "epoch": 1.4612473721093202, "grad_norm": 0.3268580734729767, "learning_rate": 0.00015762736187514948, "loss": 0.0378, "step": 10426 }, { "epoch": 1.461387526278907, "grad_norm": 0.4395720660686493, "learning_rate": 0.0001576130112413298, "loss": 0.0887, "step": 10427 }, { "epoch": 1.4615276804484933, "grad_norm": 0.6974881291389465, "learning_rate": 0.00015759866060751016, "loss": 0.039, "step": 10428 }, { "epoch": 1.4616678346180798, "grad_norm": 0.35591039061546326, "learning_rate": 0.0001575843099736905, "loss": 0.0917, "step": 10429 }, { "epoch": 1.4618079887876665, "grad_norm": 0.10858973115682602, "learning_rate": 0.00015756995933987082, "loss": 0.0143, "step": 10430 }, { "epoch": 1.461948142957253, "grad_norm": 0.619379460811615, "learning_rate": 0.00015755560870605117, "loss": 0.0626, "step": 10431 }, { "epoch": 1.4620882971268396, "grad_norm": 0.48327958583831787, "learning_rate": 0.0001575412580722315, "loss": 0.0443, "step": 10432 }, { "epoch": 1.462228451296426, "grad_norm": 0.2702605128288269, "learning_rate": 0.00015752690743841183, "loss": 0.015, "step": 10433 }, { "epoch": 1.4623686054660125, "grad_norm": 3.299253463745117, "learning_rate": 0.0001575125568045922, "loss": 0.0335, "step": 10434 }, { "epoch": 1.4625087596355992, "grad_norm": 0.647927463054657, "learning_rate": 0.00015749820617077254, "loss": 0.07, "step": 10435 }, { "epoch": 1.4626489138051857, "grad_norm": 0.15951493382453918, "learning_rate": 0.00015748385553695287, "loss": 0.054, "step": 10436 }, { "epoch": 1.4627890679747724, "grad_norm": 0.3459722697734833, "learning_rate": 0.0001574695049031332, "loss": 0.042, "step": 10437 }, { "epoch": 1.4629292221443588, "grad_norm": 0.6201842427253723, "learning_rate": 0.00015745515426931355, "loss": 0.0899, "step": 10438 }, { "epoch": 1.4630693763139453, "grad_norm": 0.19347669184207916, "learning_rate": 0.00015744080363549388, "loss": 0.0294, "step": 10439 }, { "epoch": 1.4632095304835318, "grad_norm": 0.2756344974040985, "learning_rate": 0.0001574264530016742, "loss": 0.0197, "step": 10440 }, { "epoch": 1.4633496846531184, "grad_norm": 0.3071753680706024, "learning_rate": 0.00015741210236785457, "loss": 0.0473, "step": 10441 }, { "epoch": 1.4634898388227051, "grad_norm": 0.3743177056312561, "learning_rate": 0.0001573977517340349, "loss": 0.0454, "step": 10442 }, { "epoch": 1.4636299929922916, "grad_norm": 0.32004883885383606, "learning_rate": 0.00015738340110021522, "loss": 0.0682, "step": 10443 }, { "epoch": 1.463770147161878, "grad_norm": 0.19337202608585358, "learning_rate": 0.0001573690504663956, "loss": 0.0245, "step": 10444 }, { "epoch": 1.4639103013314645, "grad_norm": 0.31909459829330444, "learning_rate": 0.00015735469983257593, "loss": 0.0309, "step": 10445 }, { "epoch": 1.4640504555010512, "grad_norm": 0.4476185142993927, "learning_rate": 0.00015734034919875626, "loss": 0.1003, "step": 10446 }, { "epoch": 1.4641906096706376, "grad_norm": 0.3821708858013153, "learning_rate": 0.00015732599856493662, "loss": 0.0352, "step": 10447 }, { "epoch": 1.4643307638402243, "grad_norm": 0.2570171058177948, "learning_rate": 0.00015731164793111695, "loss": 0.0483, "step": 10448 }, { "epoch": 1.4644709180098108, "grad_norm": 0.24343939125537872, "learning_rate": 0.00015729729729729728, "loss": 0.0264, "step": 10449 }, { "epoch": 1.4646110721793972, "grad_norm": 0.46883058547973633, "learning_rate": 0.00015728294666347763, "loss": 0.0583, "step": 10450 }, { "epoch": 1.464751226348984, "grad_norm": 0.32669922709465027, "learning_rate": 0.00015726859602965796, "loss": 0.0705, "step": 10451 }, { "epoch": 1.4648913805185704, "grad_norm": 0.40174418687820435, "learning_rate": 0.0001572542453958383, "loss": 0.0691, "step": 10452 }, { "epoch": 1.465031534688157, "grad_norm": 0.5688478946685791, "learning_rate": 0.00015723989476201862, "loss": 0.0425, "step": 10453 }, { "epoch": 1.4651716888577435, "grad_norm": 0.17448224127292633, "learning_rate": 0.000157225544128199, "loss": 0.0474, "step": 10454 }, { "epoch": 1.46531184302733, "grad_norm": 0.1570330262184143, "learning_rate": 0.00015721119349437933, "loss": 0.0184, "step": 10455 }, { "epoch": 1.4654519971969167, "grad_norm": 0.23003444075584412, "learning_rate": 0.00015719684286055966, "loss": 0.014, "step": 10456 }, { "epoch": 1.4655921513665031, "grad_norm": 0.5143041014671326, "learning_rate": 0.00015718249222674, "loss": 0.0349, "step": 10457 }, { "epoch": 1.4657323055360898, "grad_norm": 0.4303840398788452, "learning_rate": 0.00015716814159292034, "loss": 0.0207, "step": 10458 }, { "epoch": 1.4658724597056763, "grad_norm": 0.23060907423496246, "learning_rate": 0.00015715379095910067, "loss": 0.0208, "step": 10459 }, { "epoch": 1.4660126138752627, "grad_norm": 0.1794975996017456, "learning_rate": 0.00015713944032528103, "loss": 0.0654, "step": 10460 }, { "epoch": 1.4661527680448494, "grad_norm": 0.32689523696899414, "learning_rate": 0.00015712508969146135, "loss": 0.061, "step": 10461 }, { "epoch": 1.4662929222144359, "grad_norm": 1.0868775844573975, "learning_rate": 0.00015711073905764168, "loss": 0.0271, "step": 10462 }, { "epoch": 1.4664330763840225, "grad_norm": 0.2191668152809143, "learning_rate": 0.00015709638842382204, "loss": 0.0294, "step": 10463 }, { "epoch": 1.466573230553609, "grad_norm": 0.48042985796928406, "learning_rate": 0.00015708203779000237, "loss": 0.0621, "step": 10464 }, { "epoch": 1.4667133847231955, "grad_norm": 0.26325538754463196, "learning_rate": 0.0001570676871561827, "loss": 0.037, "step": 10465 }, { "epoch": 1.4668535388927821, "grad_norm": 0.335306316614151, "learning_rate": 0.00015705333652236308, "loss": 0.0897, "step": 10466 }, { "epoch": 1.4669936930623686, "grad_norm": 0.10870976746082306, "learning_rate": 0.0001570389858885434, "loss": 0.0156, "step": 10467 }, { "epoch": 1.4671338472319553, "grad_norm": 0.8795617818832397, "learning_rate": 0.00015702463525472374, "loss": 0.0769, "step": 10468 }, { "epoch": 1.4672740014015417, "grad_norm": 0.7768855690956116, "learning_rate": 0.0001570102846209041, "loss": 0.1468, "step": 10469 }, { "epoch": 1.4674141555711282, "grad_norm": 0.14433500170707703, "learning_rate": 0.00015699593398708442, "loss": 0.0232, "step": 10470 }, { "epoch": 1.4675543097407147, "grad_norm": 0.22287726402282715, "learning_rate": 0.00015698158335326475, "loss": 0.0287, "step": 10471 }, { "epoch": 1.4676944639103013, "grad_norm": 0.2655218541622162, "learning_rate": 0.00015696723271944508, "loss": 0.0282, "step": 10472 }, { "epoch": 1.4678346180798878, "grad_norm": 0.22424133121967316, "learning_rate": 0.00015695288208562543, "loss": 0.0382, "step": 10473 }, { "epoch": 1.4679747722494745, "grad_norm": 0.2551825940608978, "learning_rate": 0.00015693853145180576, "loss": 0.0517, "step": 10474 }, { "epoch": 1.468114926419061, "grad_norm": 0.8367336988449097, "learning_rate": 0.0001569241808179861, "loss": 0.0403, "step": 10475 }, { "epoch": 1.4682550805886474, "grad_norm": 0.1715218424797058, "learning_rate": 0.00015690983018416647, "loss": 0.0274, "step": 10476 }, { "epoch": 1.468395234758234, "grad_norm": 0.3328858017921448, "learning_rate": 0.0001568954795503468, "loss": 0.0433, "step": 10477 }, { "epoch": 1.4685353889278205, "grad_norm": 0.47546708583831787, "learning_rate": 0.00015688112891652713, "loss": 0.1093, "step": 10478 }, { "epoch": 1.4686755430974072, "grad_norm": 0.880604088306427, "learning_rate": 0.00015686677828270749, "loss": 0.1032, "step": 10479 }, { "epoch": 1.4688156972669937, "grad_norm": 0.532997727394104, "learning_rate": 0.00015685242764888781, "loss": 0.0608, "step": 10480 }, { "epoch": 1.4689558514365801, "grad_norm": 0.9230180978775024, "learning_rate": 0.00015683807701506814, "loss": 0.163, "step": 10481 }, { "epoch": 1.4690960056061668, "grad_norm": 1.7509791851043701, "learning_rate": 0.0001568237263812485, "loss": 0.0869, "step": 10482 }, { "epoch": 1.4692361597757533, "grad_norm": 1.7276511192321777, "learning_rate": 0.00015680937574742883, "loss": 0.2058, "step": 10483 }, { "epoch": 1.46937631394534, "grad_norm": 0.4504629969596863, "learning_rate": 0.00015679502511360916, "loss": 0.0833, "step": 10484 }, { "epoch": 1.4695164681149264, "grad_norm": 1.767192006111145, "learning_rate": 0.00015678067447978954, "loss": 0.0561, "step": 10485 }, { "epoch": 1.4696566222845129, "grad_norm": 0.046178188174963, "learning_rate": 0.00015676632384596987, "loss": 0.0058, "step": 10486 }, { "epoch": 1.4697967764540996, "grad_norm": 0.3052062392234802, "learning_rate": 0.0001567519732121502, "loss": 0.0601, "step": 10487 }, { "epoch": 1.469936930623686, "grad_norm": 0.2043287605047226, "learning_rate": 0.00015673762257833052, "loss": 0.0344, "step": 10488 }, { "epoch": 1.4700770847932727, "grad_norm": 0.5523789525032043, "learning_rate": 0.00015672327194451088, "loss": 0.068, "step": 10489 }, { "epoch": 1.4702172389628592, "grad_norm": 0.3881550133228302, "learning_rate": 0.0001567089213106912, "loss": 0.058, "step": 10490 }, { "epoch": 1.4703573931324456, "grad_norm": 0.2595837116241455, "learning_rate": 0.00015669457067687154, "loss": 0.0419, "step": 10491 }, { "epoch": 1.4704975473020323, "grad_norm": 0.3189378082752228, "learning_rate": 0.0001566802200430519, "loss": 0.043, "step": 10492 }, { "epoch": 1.4706377014716188, "grad_norm": 0.25390636920928955, "learning_rate": 0.00015666586940923222, "loss": 0.0901, "step": 10493 }, { "epoch": 1.4707778556412054, "grad_norm": 0.5380606651306152, "learning_rate": 0.00015665151877541255, "loss": 0.04, "step": 10494 }, { "epoch": 1.470918009810792, "grad_norm": 0.4755038619041443, "learning_rate": 0.0001566371681415929, "loss": 0.0411, "step": 10495 }, { "epoch": 1.4710581639803784, "grad_norm": 0.568804144859314, "learning_rate": 0.00015662281750777323, "loss": 0.0364, "step": 10496 }, { "epoch": 1.4711983181499648, "grad_norm": 0.2592986226081848, "learning_rate": 0.00015660846687395356, "loss": 0.0242, "step": 10497 }, { "epoch": 1.4713384723195515, "grad_norm": 0.4103192090988159, "learning_rate": 0.00015659411624013394, "loss": 0.1018, "step": 10498 }, { "epoch": 1.4714786264891382, "grad_norm": 0.467492014169693, "learning_rate": 0.00015657976560631427, "loss": 0.0579, "step": 10499 }, { "epoch": 1.4716187806587246, "grad_norm": 0.4619954526424408, "learning_rate": 0.0001565654149724946, "loss": 0.0639, "step": 10500 }, { "epoch": 1.471758934828311, "grad_norm": 0.2719810903072357, "learning_rate": 0.00015655106433867496, "loss": 0.0702, "step": 10501 }, { "epoch": 1.4718990889978976, "grad_norm": 0.5512811541557312, "learning_rate": 0.00015653671370485529, "loss": 0.1299, "step": 10502 }, { "epoch": 1.4720392431674842, "grad_norm": 0.29279544949531555, "learning_rate": 0.00015652236307103561, "loss": 0.0356, "step": 10503 }, { "epoch": 1.4721793973370707, "grad_norm": 0.2552359998226166, "learning_rate": 0.00015650801243721597, "loss": 0.0255, "step": 10504 }, { "epoch": 1.4723195515066574, "grad_norm": 0.5418764352798462, "learning_rate": 0.0001564936618033963, "loss": 0.0428, "step": 10505 }, { "epoch": 1.4724597056762438, "grad_norm": 0.5578295588493347, "learning_rate": 0.00015647931116957663, "loss": 0.0752, "step": 10506 }, { "epoch": 1.4725998598458303, "grad_norm": 0.5077351927757263, "learning_rate": 0.00015646496053575696, "loss": 0.0532, "step": 10507 }, { "epoch": 1.472740014015417, "grad_norm": 0.2052440345287323, "learning_rate": 0.00015645060990193734, "loss": 0.0491, "step": 10508 }, { "epoch": 1.4728801681850034, "grad_norm": 0.24515457451343536, "learning_rate": 0.00015643625926811767, "loss": 0.0389, "step": 10509 }, { "epoch": 1.4730203223545901, "grad_norm": 0.23881974816322327, "learning_rate": 0.000156421908634298, "loss": 0.0235, "step": 10510 }, { "epoch": 1.4731604765241766, "grad_norm": 0.5546131730079651, "learning_rate": 0.00015640755800047835, "loss": 0.0257, "step": 10511 }, { "epoch": 1.473300630693763, "grad_norm": 0.46937084197998047, "learning_rate": 0.00015639320736665868, "loss": 0.0659, "step": 10512 }, { "epoch": 1.4734407848633497, "grad_norm": 1.763956904411316, "learning_rate": 0.000156378856732839, "loss": 0.0982, "step": 10513 }, { "epoch": 1.4735809390329362, "grad_norm": 0.5380129218101501, "learning_rate": 0.00015636450609901936, "loss": 0.0778, "step": 10514 }, { "epoch": 1.4737210932025229, "grad_norm": 0.1430983990430832, "learning_rate": 0.0001563501554651997, "loss": 0.0276, "step": 10515 }, { "epoch": 1.4738612473721093, "grad_norm": 0.21112503111362457, "learning_rate": 0.00015633580483138002, "loss": 0.0199, "step": 10516 }, { "epoch": 1.4740014015416958, "grad_norm": 0.31134650111198425, "learning_rate": 0.0001563214541975604, "loss": 0.1121, "step": 10517 }, { "epoch": 1.4741415557112825, "grad_norm": 0.5994790196418762, "learning_rate": 0.00015630710356374073, "loss": 0.0658, "step": 10518 }, { "epoch": 1.474281709880869, "grad_norm": 0.44840505719184875, "learning_rate": 0.00015629275292992106, "loss": 0.1078, "step": 10519 }, { "epoch": 1.4744218640504556, "grad_norm": 1.0635029077529907, "learning_rate": 0.00015627840229610142, "loss": 0.0454, "step": 10520 }, { "epoch": 1.474562018220042, "grad_norm": 0.43159785866737366, "learning_rate": 0.00015626405166228175, "loss": 0.1139, "step": 10521 }, { "epoch": 1.4747021723896285, "grad_norm": 0.38016262650489807, "learning_rate": 0.00015624970102846207, "loss": 0.0541, "step": 10522 }, { "epoch": 1.4748423265592152, "grad_norm": 0.407825767993927, "learning_rate": 0.0001562353503946424, "loss": 0.0539, "step": 10523 }, { "epoch": 1.4749824807288017, "grad_norm": 0.22972871363162994, "learning_rate": 0.00015622099976082276, "loss": 0.0633, "step": 10524 }, { "epoch": 1.4751226348983884, "grad_norm": 0.5041607618331909, "learning_rate": 0.0001562066491270031, "loss": 0.0726, "step": 10525 }, { "epoch": 1.4752627890679748, "grad_norm": 0.7107272744178772, "learning_rate": 0.00015619229849318342, "loss": 0.1012, "step": 10526 }, { "epoch": 1.4754029432375613, "grad_norm": 0.33229541778564453, "learning_rate": 0.00015617794785936377, "loss": 0.092, "step": 10527 }, { "epoch": 1.4755430974071477, "grad_norm": 0.4425903260707855, "learning_rate": 0.0001561635972255441, "loss": 0.0369, "step": 10528 }, { "epoch": 1.4756832515767344, "grad_norm": 0.2784324884414673, "learning_rate": 0.00015614924659172443, "loss": 0.0898, "step": 10529 }, { "epoch": 1.4758234057463209, "grad_norm": 0.8509828448295593, "learning_rate": 0.0001561348959579048, "loss": 0.0758, "step": 10530 }, { "epoch": 1.4759635599159076, "grad_norm": 1.1231894493103027, "learning_rate": 0.00015612054532408514, "loss": 0.1082, "step": 10531 }, { "epoch": 1.476103714085494, "grad_norm": 2.6428587436676025, "learning_rate": 0.00015610619469026547, "loss": 0.1552, "step": 10532 }, { "epoch": 1.4762438682550805, "grad_norm": 0.774156391620636, "learning_rate": 0.00015609184405644582, "loss": 0.2044, "step": 10533 }, { "epoch": 1.4763840224246672, "grad_norm": 2.5511696338653564, "learning_rate": 0.00015607749342262615, "loss": 0.1108, "step": 10534 }, { "epoch": 1.4765241765942536, "grad_norm": 0.06326726824045181, "learning_rate": 0.00015606314278880648, "loss": 0.0042, "step": 10535 }, { "epoch": 1.4766643307638403, "grad_norm": 0.34364280104637146, "learning_rate": 0.00015604879215498684, "loss": 0.0563, "step": 10536 }, { "epoch": 1.4768044849334268, "grad_norm": 0.4228704273700714, "learning_rate": 0.00015603444152116717, "loss": 0.145, "step": 10537 }, { "epoch": 1.4769446391030132, "grad_norm": 0.189705029129982, "learning_rate": 0.0001560200908873475, "loss": 0.0464, "step": 10538 }, { "epoch": 1.4770847932726, "grad_norm": 0.3275066614151001, "learning_rate": 0.00015600574025352788, "loss": 0.0474, "step": 10539 }, { "epoch": 1.4772249474421864, "grad_norm": 0.270628422498703, "learning_rate": 0.0001559913896197082, "loss": 0.0459, "step": 10540 }, { "epoch": 1.477365101611773, "grad_norm": 0.3875422775745392, "learning_rate": 0.00015597703898588853, "loss": 0.0499, "step": 10541 }, { "epoch": 1.4775052557813595, "grad_norm": 0.47175467014312744, "learning_rate": 0.00015596268835206886, "loss": 0.079, "step": 10542 }, { "epoch": 1.477645409950946, "grad_norm": 0.32115423679351807, "learning_rate": 0.00015594833771824922, "loss": 0.0659, "step": 10543 }, { "epoch": 1.4777855641205326, "grad_norm": 0.27516838908195496, "learning_rate": 0.00015593398708442955, "loss": 0.0395, "step": 10544 }, { "epoch": 1.477925718290119, "grad_norm": 0.20578263700008392, "learning_rate": 0.00015591963645060988, "loss": 0.0386, "step": 10545 }, { "epoch": 1.4780658724597058, "grad_norm": 0.2117847353219986, "learning_rate": 0.00015590528581679023, "loss": 0.041, "step": 10546 }, { "epoch": 1.4782060266292922, "grad_norm": 0.2477148473262787, "learning_rate": 0.00015589093518297056, "loss": 0.0395, "step": 10547 }, { "epoch": 1.4783461807988787, "grad_norm": 0.3784487545490265, "learning_rate": 0.0001558765845491509, "loss": 0.0628, "step": 10548 }, { "epoch": 1.4784863349684654, "grad_norm": 0.4848484992980957, "learning_rate": 0.00015586223391533127, "loss": 0.0628, "step": 10549 }, { "epoch": 1.4786264891380518, "grad_norm": 0.3362005352973938, "learning_rate": 0.0001558478832815116, "loss": 0.0238, "step": 10550 }, { "epoch": 1.4787666433076385, "grad_norm": 0.11578638106584549, "learning_rate": 0.00015583353264769193, "loss": 0.0105, "step": 10551 }, { "epoch": 1.478906797477225, "grad_norm": 0.7384300827980042, "learning_rate": 0.00015581918201387228, "loss": 0.128, "step": 10552 }, { "epoch": 1.4790469516468114, "grad_norm": 0.22978708148002625, "learning_rate": 0.0001558048313800526, "loss": 0.0636, "step": 10553 }, { "epoch": 1.4791871058163981, "grad_norm": 0.22337229549884796, "learning_rate": 0.00015579048074623294, "loss": 0.0355, "step": 10554 }, { "epoch": 1.4793272599859846, "grad_norm": 0.41107049584388733, "learning_rate": 0.0001557761301124133, "loss": 0.0485, "step": 10555 }, { "epoch": 1.4794674141555713, "grad_norm": 0.31415748596191406, "learning_rate": 0.00015576177947859362, "loss": 0.0492, "step": 10556 }, { "epoch": 1.4796075683251577, "grad_norm": 0.260538786649704, "learning_rate": 0.00015574742884477395, "loss": 0.0405, "step": 10557 }, { "epoch": 1.4797477224947442, "grad_norm": 0.5792301893234253, "learning_rate": 0.00015573307821095428, "loss": 0.0634, "step": 10558 }, { "epoch": 1.4798878766643306, "grad_norm": 0.6849362254142761, "learning_rate": 0.00015571872757713464, "loss": 0.0525, "step": 10559 }, { "epoch": 1.4800280308339173, "grad_norm": 0.3748149275779724, "learning_rate": 0.00015570437694331497, "loss": 0.0718, "step": 10560 }, { "epoch": 1.4801681850035038, "grad_norm": 0.2086973637342453, "learning_rate": 0.00015569002630949532, "loss": 0.0432, "step": 10561 }, { "epoch": 1.4803083391730905, "grad_norm": 0.24434258043766022, "learning_rate": 0.00015567567567567568, "loss": 0.0574, "step": 10562 }, { "epoch": 1.480448493342677, "grad_norm": 0.7663661241531372, "learning_rate": 0.000155661325041856, "loss": 0.1086, "step": 10563 }, { "epoch": 1.4805886475122634, "grad_norm": 0.07997685670852661, "learning_rate": 0.00015564697440803633, "loss": 0.0144, "step": 10564 }, { "epoch": 1.48072880168185, "grad_norm": 0.908627986907959, "learning_rate": 0.0001556326237742167, "loss": 0.0469, "step": 10565 }, { "epoch": 1.4808689558514365, "grad_norm": 0.13131190836429596, "learning_rate": 0.00015561827314039702, "loss": 0.0218, "step": 10566 }, { "epoch": 1.4810091100210232, "grad_norm": 0.369503915309906, "learning_rate": 0.00015560392250657735, "loss": 0.0392, "step": 10567 }, { "epoch": 1.4811492641906097, "grad_norm": 0.3099110722541809, "learning_rate": 0.0001555895718727577, "loss": 0.0371, "step": 10568 }, { "epoch": 1.4812894183601961, "grad_norm": 0.2589000165462494, "learning_rate": 0.00015557522123893803, "loss": 0.0303, "step": 10569 }, { "epoch": 1.4814295725297828, "grad_norm": 0.2184293270111084, "learning_rate": 0.00015556087060511836, "loss": 0.0462, "step": 10570 }, { "epoch": 1.4815697266993693, "grad_norm": 0.75166255235672, "learning_rate": 0.00015554651997129874, "loss": 0.1352, "step": 10571 }, { "epoch": 1.481709880868956, "grad_norm": 0.32692375779151917, "learning_rate": 0.00015553216933747907, "loss": 0.0537, "step": 10572 }, { "epoch": 1.4818500350385424, "grad_norm": 0.15704116225242615, "learning_rate": 0.0001555178187036594, "loss": 0.0399, "step": 10573 }, { "epoch": 1.4819901892081289, "grad_norm": 0.10007571429014206, "learning_rate": 0.00015550346806983976, "loss": 0.0127, "step": 10574 }, { "epoch": 1.4821303433777155, "grad_norm": 0.3893049955368042, "learning_rate": 0.00015548911743602008, "loss": 0.0661, "step": 10575 }, { "epoch": 1.482270497547302, "grad_norm": 0.6022841334342957, "learning_rate": 0.0001554747668022004, "loss": 0.0562, "step": 10576 }, { "epoch": 1.4824106517168887, "grad_norm": 0.42999178171157837, "learning_rate": 0.00015546041616838074, "loss": 0.0729, "step": 10577 }, { "epoch": 1.4825508058864751, "grad_norm": 0.3609732389450073, "learning_rate": 0.0001554460655345611, "loss": 0.0416, "step": 10578 }, { "epoch": 1.4826909600560616, "grad_norm": 0.3163759112358093, "learning_rate": 0.00015543171490074143, "loss": 0.0788, "step": 10579 }, { "epoch": 1.4828311142256483, "grad_norm": 1.285809874534607, "learning_rate": 0.00015541736426692175, "loss": 0.1231, "step": 10580 }, { "epoch": 1.4829712683952347, "grad_norm": 0.40848323702812195, "learning_rate": 0.00015540301363310214, "loss": 0.0812, "step": 10581 }, { "epoch": 1.4831114225648214, "grad_norm": 0.8626853227615356, "learning_rate": 0.00015538866299928247, "loss": 0.1236, "step": 10582 }, { "epoch": 1.4832515767344079, "grad_norm": 0.6446532607078552, "learning_rate": 0.0001553743123654628, "loss": 0.0262, "step": 10583 }, { "epoch": 1.4833917309039943, "grad_norm": 1.0703104734420776, "learning_rate": 0.00015535996173164315, "loss": 0.0841, "step": 10584 }, { "epoch": 1.4835318850735808, "grad_norm": 1.4783800840377808, "learning_rate": 0.00015534561109782348, "loss": 0.1207, "step": 10585 }, { "epoch": 1.4836720392431675, "grad_norm": 0.5006522536277771, "learning_rate": 0.0001553312604640038, "loss": 0.0654, "step": 10586 }, { "epoch": 1.4838121934127542, "grad_norm": 0.11923687905073166, "learning_rate": 0.00015531690983018416, "loss": 0.0365, "step": 10587 }, { "epoch": 1.4839523475823406, "grad_norm": 0.2262464463710785, "learning_rate": 0.0001553025591963645, "loss": 0.0572, "step": 10588 }, { "epoch": 1.484092501751927, "grad_norm": 0.2834444046020508, "learning_rate": 0.00015528820856254482, "loss": 0.0361, "step": 10589 }, { "epoch": 1.4842326559215135, "grad_norm": 0.25554704666137695, "learning_rate": 0.00015527385792872518, "loss": 0.0301, "step": 10590 }, { "epoch": 1.4843728100911002, "grad_norm": 0.2724181115627289, "learning_rate": 0.0001552595072949055, "loss": 0.038, "step": 10591 }, { "epoch": 1.4845129642606867, "grad_norm": 0.42190948128700256, "learning_rate": 0.00015524515666108583, "loss": 0.0221, "step": 10592 }, { "epoch": 1.4846531184302734, "grad_norm": 0.35933244228363037, "learning_rate": 0.0001552308060272662, "loss": 0.0471, "step": 10593 }, { "epoch": 1.4847932725998598, "grad_norm": 0.14831778407096863, "learning_rate": 0.00015521645539344654, "loss": 0.0156, "step": 10594 }, { "epoch": 1.4849334267694463, "grad_norm": 0.2693512737751007, "learning_rate": 0.00015520210475962687, "loss": 0.0338, "step": 10595 }, { "epoch": 1.485073580939033, "grad_norm": 0.06814882904291153, "learning_rate": 0.0001551877541258072, "loss": 0.013, "step": 10596 }, { "epoch": 1.4852137351086194, "grad_norm": 0.18754149973392487, "learning_rate": 0.00015517340349198756, "loss": 0.0462, "step": 10597 }, { "epoch": 1.485353889278206, "grad_norm": 0.16206228733062744, "learning_rate": 0.00015515905285816789, "loss": 0.0342, "step": 10598 }, { "epoch": 1.4854940434477926, "grad_norm": 0.3485026955604553, "learning_rate": 0.00015514470222434821, "loss": 0.0319, "step": 10599 }, { "epoch": 1.485634197617379, "grad_norm": 0.37309691309928894, "learning_rate": 0.00015513035159052857, "loss": 0.0223, "step": 10600 }, { "epoch": 1.4857743517869657, "grad_norm": 0.5247063040733337, "learning_rate": 0.0001551160009567089, "loss": 0.0712, "step": 10601 }, { "epoch": 1.4859145059565522, "grad_norm": 0.14952580630779266, "learning_rate": 0.00015510165032288923, "loss": 0.0207, "step": 10602 }, { "epoch": 1.4860546601261388, "grad_norm": 0.24514086544513702, "learning_rate": 0.0001550872996890696, "loss": 0.0491, "step": 10603 }, { "epoch": 1.4861948142957253, "grad_norm": 0.12210065126419067, "learning_rate": 0.00015507294905524994, "loss": 0.0136, "step": 10604 }, { "epoch": 1.4863349684653118, "grad_norm": 0.5551408529281616, "learning_rate": 0.00015505859842143027, "loss": 0.086, "step": 10605 }, { "epoch": 1.4864751226348984, "grad_norm": 0.38685253262519836, "learning_rate": 0.00015504424778761062, "loss": 0.0317, "step": 10606 }, { "epoch": 1.486615276804485, "grad_norm": 0.5873581767082214, "learning_rate": 0.00015502989715379095, "loss": 0.0603, "step": 10607 }, { "epoch": 1.4867554309740716, "grad_norm": 0.5957799553871155, "learning_rate": 0.00015501554651997128, "loss": 0.0337, "step": 10608 }, { "epoch": 1.486895585143658, "grad_norm": 0.6263166666030884, "learning_rate": 0.00015500119588615163, "loss": 0.0336, "step": 10609 }, { "epoch": 1.4870357393132445, "grad_norm": 0.6674920916557312, "learning_rate": 0.00015498684525233196, "loss": 0.0749, "step": 10610 }, { "epoch": 1.4871758934828312, "grad_norm": 0.2550250291824341, "learning_rate": 0.0001549724946185123, "loss": 0.0387, "step": 10611 }, { "epoch": 1.4873160476524176, "grad_norm": 0.4844677448272705, "learning_rate": 0.00015495814398469262, "loss": 0.0853, "step": 10612 }, { "epoch": 1.4874562018220043, "grad_norm": 0.2599497139453888, "learning_rate": 0.000154943793350873, "loss": 0.0417, "step": 10613 }, { "epoch": 1.4875963559915908, "grad_norm": 0.2552608847618103, "learning_rate": 0.00015492944271705333, "loss": 0.0423, "step": 10614 }, { "epoch": 1.4877365101611773, "grad_norm": 0.1310550719499588, "learning_rate": 0.00015491509208323366, "loss": 0.0112, "step": 10615 }, { "epoch": 1.4878766643307637, "grad_norm": 1.0396101474761963, "learning_rate": 0.00015490074144941402, "loss": 0.0906, "step": 10616 }, { "epoch": 1.4880168185003504, "grad_norm": 0.2367522120475769, "learning_rate": 0.00015488639081559434, "loss": 0.0442, "step": 10617 }, { "epoch": 1.4881569726699369, "grad_norm": 0.12786652147769928, "learning_rate": 0.00015487204018177467, "loss": 0.0171, "step": 10618 }, { "epoch": 1.4882971268395235, "grad_norm": 0.6618436574935913, "learning_rate": 0.00015485768954795503, "loss": 0.1401, "step": 10619 }, { "epoch": 1.48843728100911, "grad_norm": 0.16624286770820618, "learning_rate": 0.00015484333891413536, "loss": 0.0286, "step": 10620 }, { "epoch": 1.4885774351786965, "grad_norm": 0.3128873407840729, "learning_rate": 0.00015482898828031569, "loss": 0.0716, "step": 10621 }, { "epoch": 1.4887175893482831, "grad_norm": 0.10659144073724747, "learning_rate": 0.00015481463764649604, "loss": 0.0108, "step": 10622 }, { "epoch": 1.4888577435178696, "grad_norm": 0.43128708004951477, "learning_rate": 0.00015480028701267637, "loss": 0.1436, "step": 10623 }, { "epoch": 1.4889978976874563, "grad_norm": 0.3703378736972809, "learning_rate": 0.0001547859363788567, "loss": 0.0388, "step": 10624 }, { "epoch": 1.4891380518570427, "grad_norm": 0.5118187665939331, "learning_rate": 0.00015477158574503708, "loss": 0.0707, "step": 10625 }, { "epoch": 1.4892782060266292, "grad_norm": 0.3890518546104431, "learning_rate": 0.0001547572351112174, "loss": 0.0756, "step": 10626 }, { "epoch": 1.4894183601962159, "grad_norm": 0.4319985508918762, "learning_rate": 0.00015474288447739774, "loss": 0.0447, "step": 10627 }, { "epoch": 1.4895585143658023, "grad_norm": 0.34239524602890015, "learning_rate": 0.00015472853384357807, "loss": 0.0637, "step": 10628 }, { "epoch": 1.489698668535389, "grad_norm": 0.33493366837501526, "learning_rate": 0.00015471418320975842, "loss": 0.0213, "step": 10629 }, { "epoch": 1.4898388227049755, "grad_norm": 0.7749230265617371, "learning_rate": 0.00015469983257593875, "loss": 0.1294, "step": 10630 }, { "epoch": 1.489978976874562, "grad_norm": 0.31648990511894226, "learning_rate": 0.00015468548194211908, "loss": 0.0118, "step": 10631 }, { "epoch": 1.4901191310441486, "grad_norm": 0.9662306308746338, "learning_rate": 0.00015467113130829944, "loss": 0.0986, "step": 10632 }, { "epoch": 1.490259285213735, "grad_norm": 1.3401962518692017, "learning_rate": 0.00015465678067447976, "loss": 0.0693, "step": 10633 }, { "epoch": 1.4903994393833218, "grad_norm": 0.11344845592975616, "learning_rate": 0.0001546424300406601, "loss": 0.0053, "step": 10634 }, { "epoch": 1.4905395935529082, "grad_norm": 0.4984247386455536, "learning_rate": 0.00015462807940684048, "loss": 0.0767, "step": 10635 }, { "epoch": 1.4906797477224947, "grad_norm": 0.28609466552734375, "learning_rate": 0.0001546137287730208, "loss": 0.0538, "step": 10636 }, { "epoch": 1.4908199018920814, "grad_norm": 0.2780989110469818, "learning_rate": 0.00015459937813920113, "loss": 0.0409, "step": 10637 }, { "epoch": 1.4909600560616678, "grad_norm": 0.18578656017780304, "learning_rate": 0.0001545850275053815, "loss": 0.028, "step": 10638 }, { "epoch": 1.4911002102312545, "grad_norm": 0.3846004903316498, "learning_rate": 0.00015457067687156182, "loss": 0.0976, "step": 10639 }, { "epoch": 1.491240364400841, "grad_norm": 0.34138429164886475, "learning_rate": 0.00015455632623774215, "loss": 0.0298, "step": 10640 }, { "epoch": 1.4913805185704274, "grad_norm": 0.26356932520866394, "learning_rate": 0.0001545419756039225, "loss": 0.0921, "step": 10641 }, { "epoch": 1.491520672740014, "grad_norm": 0.1698404848575592, "learning_rate": 0.00015452762497010283, "loss": 0.0222, "step": 10642 }, { "epoch": 1.4916608269096006, "grad_norm": 0.3921966254711151, "learning_rate": 0.00015451327433628316, "loss": 0.0329, "step": 10643 }, { "epoch": 1.4918009810791872, "grad_norm": 0.21553845703601837, "learning_rate": 0.00015449892370246354, "loss": 0.0544, "step": 10644 }, { "epoch": 1.4919411352487737, "grad_norm": 0.3352382183074951, "learning_rate": 0.00015448457306864387, "loss": 0.064, "step": 10645 }, { "epoch": 1.4920812894183602, "grad_norm": 0.5347132682800293, "learning_rate": 0.0001544702224348242, "loss": 0.0665, "step": 10646 }, { "epoch": 1.4922214435879466, "grad_norm": 0.4259601831436157, "learning_rate": 0.00015445587180100453, "loss": 0.0677, "step": 10647 }, { "epoch": 1.4923615977575333, "grad_norm": 0.2352869212627411, "learning_rate": 0.00015444152116718488, "loss": 0.0419, "step": 10648 }, { "epoch": 1.4925017519271198, "grad_norm": 0.1949128806591034, "learning_rate": 0.0001544271705333652, "loss": 0.04, "step": 10649 }, { "epoch": 1.4926419060967064, "grad_norm": 0.309375137090683, "learning_rate": 0.00015441281989954554, "loss": 0.046, "step": 10650 }, { "epoch": 1.492782060266293, "grad_norm": 0.3392605483531952, "learning_rate": 0.0001543984692657259, "loss": 0.0211, "step": 10651 }, { "epoch": 1.4929222144358794, "grad_norm": 0.45508208870887756, "learning_rate": 0.00015438411863190622, "loss": 0.0603, "step": 10652 }, { "epoch": 1.493062368605466, "grad_norm": 0.3773292303085327, "learning_rate": 0.00015436976799808655, "loss": 0.0283, "step": 10653 }, { "epoch": 1.4932025227750525, "grad_norm": 0.2173190861940384, "learning_rate": 0.0001543554173642669, "loss": 0.0343, "step": 10654 }, { "epoch": 1.4933426769446392, "grad_norm": 0.22467036545276642, "learning_rate": 0.00015434106673044724, "loss": 0.0396, "step": 10655 }, { "epoch": 1.4934828311142256, "grad_norm": 0.1732553392648697, "learning_rate": 0.00015432671609662756, "loss": 0.0277, "step": 10656 }, { "epoch": 1.493622985283812, "grad_norm": 0.6231887936592102, "learning_rate": 0.00015431236546280795, "loss": 0.0861, "step": 10657 }, { "epoch": 1.4937631394533988, "grad_norm": 0.1981246918439865, "learning_rate": 0.00015429801482898828, "loss": 0.0458, "step": 10658 }, { "epoch": 1.4939032936229852, "grad_norm": 0.2730039954185486, "learning_rate": 0.0001542836641951686, "loss": 0.012, "step": 10659 }, { "epoch": 1.494043447792572, "grad_norm": 0.3582146167755127, "learning_rate": 0.00015426931356134896, "loss": 0.068, "step": 10660 }, { "epoch": 1.4941836019621584, "grad_norm": 0.18658442795276642, "learning_rate": 0.0001542549629275293, "loss": 0.0344, "step": 10661 }, { "epoch": 1.4943237561317448, "grad_norm": 0.2621755003929138, "learning_rate": 0.00015424061229370962, "loss": 0.0432, "step": 10662 }, { "epoch": 1.4944639103013315, "grad_norm": 0.2550070881843567, "learning_rate": 0.00015422626165988995, "loss": 0.0263, "step": 10663 }, { "epoch": 1.494604064470918, "grad_norm": 0.7072445750236511, "learning_rate": 0.0001542119110260703, "loss": 0.0659, "step": 10664 }, { "epoch": 1.4947442186405047, "grad_norm": 0.2643151581287384, "learning_rate": 0.00015419756039225063, "loss": 0.0316, "step": 10665 }, { "epoch": 1.4948843728100911, "grad_norm": 0.20512066781520844, "learning_rate": 0.00015418320975843096, "loss": 0.03, "step": 10666 }, { "epoch": 1.4950245269796776, "grad_norm": 0.5069049596786499, "learning_rate": 0.00015416885912461134, "loss": 0.0823, "step": 10667 }, { "epoch": 1.4951646811492643, "grad_norm": 0.1641375720500946, "learning_rate": 0.00015415450849079167, "loss": 0.0158, "step": 10668 }, { "epoch": 1.4953048353188507, "grad_norm": 0.4637381434440613, "learning_rate": 0.000154140157856972, "loss": 0.043, "step": 10669 }, { "epoch": 1.4954449894884374, "grad_norm": 0.15042731165885925, "learning_rate": 0.00015412580722315235, "loss": 0.0128, "step": 10670 }, { "epoch": 1.4955851436580239, "grad_norm": 0.2220517247915268, "learning_rate": 0.00015411145658933268, "loss": 0.0565, "step": 10671 }, { "epoch": 1.4957252978276103, "grad_norm": 0.45739394426345825, "learning_rate": 0.000154097105955513, "loss": 0.0992, "step": 10672 }, { "epoch": 1.4958654519971968, "grad_norm": 0.310108482837677, "learning_rate": 0.00015408275532169337, "loss": 0.0532, "step": 10673 }, { "epoch": 1.4960056061667835, "grad_norm": 0.8512653112411499, "learning_rate": 0.0001540684046878737, "loss": 0.1773, "step": 10674 }, { "epoch": 1.4961457603363701, "grad_norm": 0.6950824856758118, "learning_rate": 0.00015405405405405402, "loss": 0.0884, "step": 10675 }, { "epoch": 1.4962859145059566, "grad_norm": 0.2746245265007019, "learning_rate": 0.0001540397034202344, "loss": 0.0432, "step": 10676 }, { "epoch": 1.496426068675543, "grad_norm": 0.3589267432689667, "learning_rate": 0.00015402535278641474, "loss": 0.0726, "step": 10677 }, { "epoch": 1.4965662228451295, "grad_norm": 0.39547809958457947, "learning_rate": 0.00015401100215259506, "loss": 0.033, "step": 10678 }, { "epoch": 1.4967063770147162, "grad_norm": 0.34382662177085876, "learning_rate": 0.0001539966515187754, "loss": 0.0869, "step": 10679 }, { "epoch": 1.4968465311843027, "grad_norm": 1.4440655708312988, "learning_rate": 0.00015398230088495575, "loss": 0.0987, "step": 10680 }, { "epoch": 1.4969866853538893, "grad_norm": 0.47064104676246643, "learning_rate": 0.00015396795025113608, "loss": 0.056, "step": 10681 }, { "epoch": 1.4971268395234758, "grad_norm": 0.26009801030158997, "learning_rate": 0.0001539535996173164, "loss": 0.0278, "step": 10682 }, { "epoch": 1.4972669936930623, "grad_norm": 0.20467530190944672, "learning_rate": 0.00015393924898349676, "loss": 0.038, "step": 10683 }, { "epoch": 1.497407147862649, "grad_norm": 0.24109293520450592, "learning_rate": 0.0001539248983496771, "loss": 0.0267, "step": 10684 }, { "epoch": 1.4975473020322354, "grad_norm": 1.0723930597305298, "learning_rate": 0.00015391054771585742, "loss": 0.1705, "step": 10685 }, { "epoch": 1.497687456201822, "grad_norm": 0.5197061896324158, "learning_rate": 0.00015389619708203777, "loss": 0.1189, "step": 10686 }, { "epoch": 1.4978276103714085, "grad_norm": 0.3153019845485687, "learning_rate": 0.0001538818464482181, "loss": 0.0434, "step": 10687 }, { "epoch": 1.497967764540995, "grad_norm": 0.20601604878902435, "learning_rate": 0.00015386749581439846, "loss": 0.0264, "step": 10688 }, { "epoch": 1.4981079187105817, "grad_norm": 0.21115687489509583, "learning_rate": 0.00015385314518057881, "loss": 0.1023, "step": 10689 }, { "epoch": 1.4982480728801681, "grad_norm": 0.4875928461551666, "learning_rate": 0.00015383879454675914, "loss": 0.0847, "step": 10690 }, { "epoch": 1.4983882270497548, "grad_norm": 0.4417114555835724, "learning_rate": 0.00015382444391293947, "loss": 0.0699, "step": 10691 }, { "epoch": 1.4985283812193413, "grad_norm": 0.5027158856391907, "learning_rate": 0.00015381009327911983, "loss": 0.0909, "step": 10692 }, { "epoch": 1.4986685353889277, "grad_norm": 0.4453561305999756, "learning_rate": 0.00015379574264530016, "loss": 0.0523, "step": 10693 }, { "epoch": 1.4988086895585144, "grad_norm": 0.19902105629444122, "learning_rate": 0.00015378139201148048, "loss": 0.0413, "step": 10694 }, { "epoch": 1.4989488437281009, "grad_norm": 0.36840540170669556, "learning_rate": 0.00015376704137766084, "loss": 0.0525, "step": 10695 }, { "epoch": 1.4990889978976876, "grad_norm": 0.32745835185050964, "learning_rate": 0.00015375269074384117, "loss": 0.059, "step": 10696 }, { "epoch": 1.499229152067274, "grad_norm": 0.6460821032524109, "learning_rate": 0.0001537383401100215, "loss": 0.0995, "step": 10697 }, { "epoch": 1.4993693062368605, "grad_norm": 0.4269315302371979, "learning_rate": 0.00015372398947620183, "loss": 0.0592, "step": 10698 }, { "epoch": 1.4995094604064472, "grad_norm": 0.20190581679344177, "learning_rate": 0.0001537096388423822, "loss": 0.0204, "step": 10699 }, { "epoch": 1.4996496145760336, "grad_norm": 0.40271300077438354, "learning_rate": 0.00015369528820856254, "loss": 0.0768, "step": 10700 }, { "epoch": 1.4997897687456203, "grad_norm": 0.25591036677360535, "learning_rate": 0.00015368093757474287, "loss": 0.0386, "step": 10701 }, { "epoch": 1.4999299229152068, "grad_norm": 0.2654736042022705, "learning_rate": 0.00015366658694092322, "loss": 0.0404, "step": 10702 }, { "epoch": 1.5000700770847932, "grad_norm": 0.17650796473026276, "learning_rate": 0.00015365223630710355, "loss": 0.0421, "step": 10703 }, { "epoch": 1.5002102312543797, "grad_norm": 0.26553329825401306, "learning_rate": 0.00015363788567328388, "loss": 0.0405, "step": 10704 }, { "epoch": 1.5003503854239664, "grad_norm": 0.15579751133918762, "learning_rate": 0.00015362353503946423, "loss": 0.0829, "step": 10705 }, { "epoch": 1.500490539593553, "grad_norm": 0.3489481806755066, "learning_rate": 0.00015360918440564456, "loss": 0.0312, "step": 10706 }, { "epoch": 1.5006306937631395, "grad_norm": 0.299667090177536, "learning_rate": 0.0001535948337718249, "loss": 0.0478, "step": 10707 }, { "epoch": 1.500770847932726, "grad_norm": 0.09261930733919144, "learning_rate": 0.00015358048313800527, "loss": 0.0141, "step": 10708 }, { "epoch": 1.5009110021023124, "grad_norm": 0.5096018314361572, "learning_rate": 0.0001535661325041856, "loss": 0.0773, "step": 10709 }, { "epoch": 1.5010511562718991, "grad_norm": 0.09015260636806488, "learning_rate": 0.00015355178187036593, "loss": 0.0113, "step": 10710 }, { "epoch": 1.5011913104414858, "grad_norm": 0.8212284445762634, "learning_rate": 0.00015353743123654629, "loss": 0.0666, "step": 10711 }, { "epoch": 1.5013314646110723, "grad_norm": 0.519081711769104, "learning_rate": 0.00015352308060272661, "loss": 0.1139, "step": 10712 }, { "epoch": 1.5014716187806587, "grad_norm": 0.2797171473503113, "learning_rate": 0.00015350872996890694, "loss": 0.0302, "step": 10713 }, { "epoch": 1.5016117729502452, "grad_norm": 0.3629531264305115, "learning_rate": 0.00015349437933508727, "loss": 0.0469, "step": 10714 }, { "epoch": 1.5017519271198319, "grad_norm": 0.28467485308647156, "learning_rate": 0.00015348002870126763, "loss": 0.06, "step": 10715 }, { "epoch": 1.5018920812894183, "grad_norm": 0.28747886419296265, "learning_rate": 0.00015346567806744796, "loss": 0.0233, "step": 10716 }, { "epoch": 1.502032235459005, "grad_norm": 0.4266621172428131, "learning_rate": 0.00015345132743362828, "loss": 0.0316, "step": 10717 }, { "epoch": 1.5021723896285915, "grad_norm": 0.13075195252895355, "learning_rate": 0.00015343697679980864, "loss": 0.0147, "step": 10718 }, { "epoch": 1.502312543798178, "grad_norm": 0.12602153420448303, "learning_rate": 0.00015342262616598897, "loss": 0.0114, "step": 10719 }, { "epoch": 1.5024526979677646, "grad_norm": 0.1261730194091797, "learning_rate": 0.00015340827553216932, "loss": 0.0088, "step": 10720 }, { "epoch": 1.502592852137351, "grad_norm": 1.1034353971481323, "learning_rate": 0.00015339392489834968, "loss": 0.0737, "step": 10721 }, { "epoch": 1.5027330063069377, "grad_norm": 0.32052773237228394, "learning_rate": 0.00015337957426453, "loss": 0.0283, "step": 10722 }, { "epoch": 1.5028731604765242, "grad_norm": 0.4729958772659302, "learning_rate": 0.00015336522363071034, "loss": 0.0362, "step": 10723 }, { "epoch": 1.5030133146461107, "grad_norm": 0.4405868649482727, "learning_rate": 0.0001533508729968907, "loss": 0.0416, "step": 10724 }, { "epoch": 1.5031534688156971, "grad_norm": 0.9098767042160034, "learning_rate": 0.00015333652236307102, "loss": 0.1684, "step": 10725 }, { "epoch": 1.5032936229852838, "grad_norm": 0.16408763825893402, "learning_rate": 0.00015332217172925135, "loss": 0.0327, "step": 10726 }, { "epoch": 1.5034337771548705, "grad_norm": 0.7781208753585815, "learning_rate": 0.0001533078210954317, "loss": 0.1479, "step": 10727 }, { "epoch": 1.503573931324457, "grad_norm": 0.22220692038536072, "learning_rate": 0.00015329347046161203, "loss": 0.024, "step": 10728 }, { "epoch": 1.5037140854940434, "grad_norm": 0.16805599629878998, "learning_rate": 0.00015327911982779236, "loss": 0.0081, "step": 10729 }, { "epoch": 1.5038542396636299, "grad_norm": 0.23774494230747223, "learning_rate": 0.00015326476919397275, "loss": 0.025, "step": 10730 }, { "epoch": 1.5039943938332165, "grad_norm": 1.0608748197555542, "learning_rate": 0.00015325041856015307, "loss": 0.0874, "step": 10731 }, { "epoch": 1.5041345480028032, "grad_norm": 1.797458291053772, "learning_rate": 0.0001532360679263334, "loss": 0.2008, "step": 10732 }, { "epoch": 1.5042747021723897, "grad_norm": 1.239945411682129, "learning_rate": 0.00015322171729251373, "loss": 0.0383, "step": 10733 }, { "epoch": 1.5044148563419761, "grad_norm": 1.1340250968933105, "learning_rate": 0.0001532073666586941, "loss": 0.0418, "step": 10734 }, { "epoch": 1.5045550105115626, "grad_norm": 0.3491586744785309, "learning_rate": 0.00015319301602487442, "loss": 0.0213, "step": 10735 }, { "epoch": 1.5046951646811493, "grad_norm": 0.32779473066329956, "learning_rate": 0.00015317866539105474, "loss": 0.0553, "step": 10736 }, { "epoch": 1.504835318850736, "grad_norm": 1.9793720245361328, "learning_rate": 0.0001531643147572351, "loss": 0.0548, "step": 10737 }, { "epoch": 1.5049754730203224, "grad_norm": 0.9499906301498413, "learning_rate": 0.00015314996412341543, "loss": 0.0463, "step": 10738 }, { "epoch": 1.5051156271899089, "grad_norm": 0.2471480667591095, "learning_rate": 0.00015313561348959576, "loss": 0.0105, "step": 10739 }, { "epoch": 1.5052557813594953, "grad_norm": 0.621599018573761, "learning_rate": 0.00015312126285577614, "loss": 0.0712, "step": 10740 }, { "epoch": 1.505395935529082, "grad_norm": 0.15189777314662933, "learning_rate": 0.00015310691222195647, "loss": 0.0313, "step": 10741 }, { "epoch": 1.5055360896986687, "grad_norm": 0.32547056674957275, "learning_rate": 0.0001530925615881368, "loss": 0.0655, "step": 10742 }, { "epoch": 1.5056762438682552, "grad_norm": 0.31570181250572205, "learning_rate": 0.00015307821095431715, "loss": 0.0808, "step": 10743 }, { "epoch": 1.5058163980378416, "grad_norm": 0.35404089093208313, "learning_rate": 0.00015306386032049748, "loss": 0.0281, "step": 10744 }, { "epoch": 1.505956552207428, "grad_norm": 0.16628332436084747, "learning_rate": 0.0001530495096866778, "loss": 0.0346, "step": 10745 }, { "epoch": 1.5060967063770148, "grad_norm": 0.36109432578086853, "learning_rate": 0.00015303515905285817, "loss": 0.0804, "step": 10746 }, { "epoch": 1.5062368605466012, "grad_norm": 0.5275200009346008, "learning_rate": 0.0001530208084190385, "loss": 0.088, "step": 10747 }, { "epoch": 1.506377014716188, "grad_norm": 0.06977388262748718, "learning_rate": 0.00015300645778521882, "loss": 0.0089, "step": 10748 }, { "epoch": 1.5065171688857744, "grad_norm": 0.352504163980484, "learning_rate": 0.00015299210715139915, "loss": 0.0482, "step": 10749 }, { "epoch": 1.5066573230553608, "grad_norm": 0.3667183518409729, "learning_rate": 0.0001529777565175795, "loss": 0.0917, "step": 10750 }, { "epoch": 1.5067974772249473, "grad_norm": 0.14459437131881714, "learning_rate": 0.00015296340588375984, "loss": 0.0097, "step": 10751 }, { "epoch": 1.506937631394534, "grad_norm": 0.308759868144989, "learning_rate": 0.0001529490552499402, "loss": 0.0657, "step": 10752 }, { "epoch": 1.5070777855641206, "grad_norm": 0.29015445709228516, "learning_rate": 0.00015293470461612055, "loss": 0.0647, "step": 10753 }, { "epoch": 1.507217939733707, "grad_norm": 0.3704620599746704, "learning_rate": 0.00015292035398230088, "loss": 0.0681, "step": 10754 }, { "epoch": 1.5073580939032936, "grad_norm": 0.2279195338487625, "learning_rate": 0.0001529060033484812, "loss": 0.0319, "step": 10755 }, { "epoch": 1.50749824807288, "grad_norm": 0.12060660868883133, "learning_rate": 0.00015289165271466156, "loss": 0.0147, "step": 10756 }, { "epoch": 1.5076384022424667, "grad_norm": 0.5635783076286316, "learning_rate": 0.0001528773020808419, "loss": 0.0523, "step": 10757 }, { "epoch": 1.5077785564120534, "grad_norm": 0.2604323923587799, "learning_rate": 0.00015286295144702222, "loss": 0.0332, "step": 10758 }, { "epoch": 1.5079187105816398, "grad_norm": 0.29043710231781006, "learning_rate": 0.00015284860081320257, "loss": 0.0611, "step": 10759 }, { "epoch": 1.5080588647512263, "grad_norm": 0.28768956661224365, "learning_rate": 0.0001528342501793829, "loss": 0.0515, "step": 10760 }, { "epoch": 1.5081990189208128, "grad_norm": 0.21576617658138275, "learning_rate": 0.00015281989954556323, "loss": 0.0358, "step": 10761 }, { "epoch": 1.5083391730903994, "grad_norm": 0.08806242793798447, "learning_rate": 0.0001528055489117436, "loss": 0.0063, "step": 10762 }, { "epoch": 1.5084793272599861, "grad_norm": 0.25208035111427307, "learning_rate": 0.00015279119827792394, "loss": 0.0182, "step": 10763 }, { "epoch": 1.5086194814295726, "grad_norm": 0.6288411617279053, "learning_rate": 0.00015277684764410427, "loss": 0.0524, "step": 10764 }, { "epoch": 1.508759635599159, "grad_norm": 0.5118399858474731, "learning_rate": 0.00015276249701028462, "loss": 0.0639, "step": 10765 }, { "epoch": 1.5088997897687455, "grad_norm": 0.5515400171279907, "learning_rate": 0.00015274814637646495, "loss": 0.1634, "step": 10766 }, { "epoch": 1.5090399439383322, "grad_norm": 0.4751434028148651, "learning_rate": 0.00015273379574264528, "loss": 0.0782, "step": 10767 }, { "epoch": 1.5091800981079189, "grad_norm": 0.5392279624938965, "learning_rate": 0.0001527194451088256, "loss": 0.1296, "step": 10768 }, { "epoch": 1.5093202522775053, "grad_norm": 0.40058988332748413, "learning_rate": 0.00015270509447500597, "loss": 0.0742, "step": 10769 }, { "epoch": 1.5094604064470918, "grad_norm": 0.36171796917915344, "learning_rate": 0.0001526907438411863, "loss": 0.0234, "step": 10770 }, { "epoch": 1.5096005606166782, "grad_norm": 0.2082325667142868, "learning_rate": 0.00015267639320736662, "loss": 0.0382, "step": 10771 }, { "epoch": 1.509740714786265, "grad_norm": 0.18789510428905487, "learning_rate": 0.000152662042573547, "loss": 0.0295, "step": 10772 }, { "epoch": 1.5098808689558514, "grad_norm": 0.5805644392967224, "learning_rate": 0.00015264769193972733, "loss": 0.0232, "step": 10773 }, { "epoch": 1.510021023125438, "grad_norm": 0.4467480182647705, "learning_rate": 0.00015263334130590766, "loss": 0.0176, "step": 10774 }, { "epoch": 1.5101611772950245, "grad_norm": 0.3863131105899811, "learning_rate": 0.00015261899067208802, "loss": 0.0422, "step": 10775 }, { "epoch": 1.510301331464611, "grad_norm": 0.0412088967859745, "learning_rate": 0.00015260464003826835, "loss": 0.0041, "step": 10776 }, { "epoch": 1.5104414856341977, "grad_norm": 0.4299469292163849, "learning_rate": 0.00015259028940444868, "loss": 0.1029, "step": 10777 }, { "epoch": 1.5105816398037841, "grad_norm": 0.6618829965591431, "learning_rate": 0.00015257593877062903, "loss": 0.0983, "step": 10778 }, { "epoch": 1.5107217939733708, "grad_norm": 0.6282081007957458, "learning_rate": 0.00015256158813680936, "loss": 0.0961, "step": 10779 }, { "epoch": 1.5108619481429573, "grad_norm": 0.028919318690896034, "learning_rate": 0.0001525472375029897, "loss": 0.0023, "step": 10780 }, { "epoch": 1.5110021023125437, "grad_norm": 0.24769586324691772, "learning_rate": 0.00015253288686917004, "loss": 0.0297, "step": 10781 }, { "epoch": 1.5111422564821302, "grad_norm": 0.1989944875240326, "learning_rate": 0.00015251853623535037, "loss": 0.0208, "step": 10782 }, { "epoch": 1.5112824106517169, "grad_norm": 0.196117103099823, "learning_rate": 0.0001525041856015307, "loss": 0.0237, "step": 10783 }, { "epoch": 1.5114225648213035, "grad_norm": 1.185658574104309, "learning_rate": 0.00015248983496771106, "loss": 0.0879, "step": 10784 }, { "epoch": 1.51156271899089, "grad_norm": 2.8151087760925293, "learning_rate": 0.0001524754843338914, "loss": 0.4963, "step": 10785 }, { "epoch": 1.5117028731604765, "grad_norm": 0.08544868230819702, "learning_rate": 0.00015246113370007174, "loss": 0.0125, "step": 10786 }, { "epoch": 1.511843027330063, "grad_norm": 0.25065869092941284, "learning_rate": 0.00015244678306625207, "loss": 0.0456, "step": 10787 }, { "epoch": 1.5119831814996496, "grad_norm": 0.32535651326179504, "learning_rate": 0.00015243243243243243, "loss": 0.0737, "step": 10788 }, { "epoch": 1.5121233356692363, "grad_norm": 0.11083528399467468, "learning_rate": 0.00015241808179861275, "loss": 0.0047, "step": 10789 }, { "epoch": 1.5122634898388227, "grad_norm": 0.5659446716308594, "learning_rate": 0.00015240373116479308, "loss": 0.1077, "step": 10790 }, { "epoch": 1.5124036440084092, "grad_norm": 0.2669200897216797, "learning_rate": 0.00015238938053097344, "loss": 0.0359, "step": 10791 }, { "epoch": 1.5125437981779957, "grad_norm": 0.3054511249065399, "learning_rate": 0.00015237502989715377, "loss": 0.0374, "step": 10792 }, { "epoch": 1.5126839523475824, "grad_norm": 0.1364801675081253, "learning_rate": 0.0001523606792633341, "loss": 0.0271, "step": 10793 }, { "epoch": 1.512824106517169, "grad_norm": 0.2876091003417969, "learning_rate": 0.00015234632862951448, "loss": 0.0248, "step": 10794 }, { "epoch": 1.5129642606867555, "grad_norm": 1.205754280090332, "learning_rate": 0.0001523319779956948, "loss": 0.054, "step": 10795 }, { "epoch": 1.513104414856342, "grad_norm": 0.3665226101875305, "learning_rate": 0.00015231762736187514, "loss": 0.069, "step": 10796 }, { "epoch": 1.5132445690259284, "grad_norm": 0.4154714047908783, "learning_rate": 0.0001523032767280555, "loss": 0.0285, "step": 10797 }, { "epoch": 1.513384723195515, "grad_norm": 0.9619696140289307, "learning_rate": 0.00015228892609423582, "loss": 0.0872, "step": 10798 }, { "epoch": 1.5135248773651018, "grad_norm": 0.28615206480026245, "learning_rate": 0.00015227457546041615, "loss": 0.0306, "step": 10799 }, { "epoch": 1.5136650315346882, "grad_norm": 0.33452072739601135, "learning_rate": 0.0001522602248265965, "loss": 0.0382, "step": 10800 }, { "epoch": 1.5138051857042747, "grad_norm": 0.26759278774261475, "learning_rate": 0.00015224587419277683, "loss": 0.0381, "step": 10801 }, { "epoch": 1.5139453398738612, "grad_norm": 0.2677653729915619, "learning_rate": 0.00015223152355895716, "loss": 0.0263, "step": 10802 }, { "epoch": 1.5140854940434478, "grad_norm": 0.6581940650939941, "learning_rate": 0.0001522171729251375, "loss": 0.0385, "step": 10803 }, { "epoch": 1.5142256482130343, "grad_norm": 0.31521183252334595, "learning_rate": 0.00015220282229131787, "loss": 0.0744, "step": 10804 }, { "epoch": 1.514365802382621, "grad_norm": 0.21869181096553802, "learning_rate": 0.0001521884716574982, "loss": 0.0468, "step": 10805 }, { "epoch": 1.5145059565522074, "grad_norm": 0.5995174050331116, "learning_rate": 0.00015217412102367853, "loss": 0.0663, "step": 10806 }, { "epoch": 1.514646110721794, "grad_norm": 0.3601396977901459, "learning_rate": 0.00015215977038985889, "loss": 0.0319, "step": 10807 }, { "epoch": 1.5147862648913806, "grad_norm": 0.368913471698761, "learning_rate": 0.00015214541975603921, "loss": 0.0474, "step": 10808 }, { "epoch": 1.514926419060967, "grad_norm": 0.44042468070983887, "learning_rate": 0.00015213106912221954, "loss": 0.0466, "step": 10809 }, { "epoch": 1.5150665732305537, "grad_norm": 0.24894589185714722, "learning_rate": 0.0001521167184883999, "loss": 0.0533, "step": 10810 }, { "epoch": 1.5152067274001402, "grad_norm": 0.3458297550678253, "learning_rate": 0.00015210236785458023, "loss": 0.0454, "step": 10811 }, { "epoch": 1.5153468815697266, "grad_norm": 0.5747541189193726, "learning_rate": 0.00015208801722076056, "loss": 0.0553, "step": 10812 }, { "epoch": 1.515487035739313, "grad_norm": 0.6571207046508789, "learning_rate": 0.0001520736665869409, "loss": 0.1397, "step": 10813 }, { "epoch": 1.5156271899088998, "grad_norm": 0.16182446479797363, "learning_rate": 0.00015205931595312124, "loss": 0.0122, "step": 10814 }, { "epoch": 1.5157673440784865, "grad_norm": 0.7133028507232666, "learning_rate": 0.0001520449653193016, "loss": 0.0396, "step": 10815 }, { "epoch": 1.515907498248073, "grad_norm": 0.468172162771225, "learning_rate": 0.00015203061468548195, "loss": 0.0347, "step": 10816 }, { "epoch": 1.5160476524176594, "grad_norm": 0.19602705538272858, "learning_rate": 0.00015201626405166228, "loss": 0.0264, "step": 10817 }, { "epoch": 1.5161878065872458, "grad_norm": 0.25425300002098083, "learning_rate": 0.0001520019134178426, "loss": 0.0159, "step": 10818 }, { "epoch": 1.5163279607568325, "grad_norm": 0.30720216035842896, "learning_rate": 0.00015198756278402294, "loss": 0.0195, "step": 10819 }, { "epoch": 1.5164681149264192, "grad_norm": 0.47727319598197937, "learning_rate": 0.0001519732121502033, "loss": 0.0581, "step": 10820 }, { "epoch": 1.5166082690960057, "grad_norm": 0.2693765163421631, "learning_rate": 0.00015195886151638362, "loss": 0.0435, "step": 10821 }, { "epoch": 1.5167484232655921, "grad_norm": 0.20326267182826996, "learning_rate": 0.00015194451088256395, "loss": 0.018, "step": 10822 }, { "epoch": 1.5168885774351786, "grad_norm": 0.13905519247055054, "learning_rate": 0.0001519301602487443, "loss": 0.025, "step": 10823 }, { "epoch": 1.5170287316047653, "grad_norm": 0.32441219687461853, "learning_rate": 0.00015191580961492463, "loss": 0.062, "step": 10824 }, { "epoch": 1.517168885774352, "grad_norm": 0.25833946466445923, "learning_rate": 0.00015190145898110496, "loss": 0.0551, "step": 10825 }, { "epoch": 1.5173090399439384, "grad_norm": 0.38717103004455566, "learning_rate": 0.00015188710834728534, "loss": 0.0379, "step": 10826 }, { "epoch": 1.5174491941135249, "grad_norm": 0.17563162744045258, "learning_rate": 0.00015187275771346567, "loss": 0.0163, "step": 10827 }, { "epoch": 1.5175893482831113, "grad_norm": 1.8284509181976318, "learning_rate": 0.000151858407079646, "loss": 0.0683, "step": 10828 }, { "epoch": 1.517729502452698, "grad_norm": 0.7358977794647217, "learning_rate": 0.00015184405644582636, "loss": 0.0851, "step": 10829 }, { "epoch": 1.5178696566222845, "grad_norm": 0.20001107454299927, "learning_rate": 0.00015182970581200669, "loss": 0.0427, "step": 10830 }, { "epoch": 1.5180098107918711, "grad_norm": 0.22446341812610626, "learning_rate": 0.00015181535517818701, "loss": 0.0157, "step": 10831 }, { "epoch": 1.5181499649614576, "grad_norm": 0.9096955060958862, "learning_rate": 0.00015180100454436737, "loss": 0.2369, "step": 10832 }, { "epoch": 1.518290119131044, "grad_norm": 1.0665274858474731, "learning_rate": 0.0001517866539105477, "loss": 0.1144, "step": 10833 }, { "epoch": 1.5184302733006307, "grad_norm": 2.253868341445923, "learning_rate": 0.00015177230327672803, "loss": 0.308, "step": 10834 }, { "epoch": 1.5185704274702172, "grad_norm": 1.3804970979690552, "learning_rate": 0.0001517579526429084, "loss": 0.3864, "step": 10835 }, { "epoch": 1.5187105816398039, "grad_norm": 0.4434795081615448, "learning_rate": 0.00015174360200908874, "loss": 0.0527, "step": 10836 }, { "epoch": 1.5188507358093903, "grad_norm": 0.27656427025794983, "learning_rate": 0.00015172925137526907, "loss": 0.0571, "step": 10837 }, { "epoch": 1.5189908899789768, "grad_norm": 0.2262948751449585, "learning_rate": 0.0001517149007414494, "loss": 0.0714, "step": 10838 }, { "epoch": 1.5191310441485633, "grad_norm": 0.15831328928470612, "learning_rate": 0.00015170055010762975, "loss": 0.0553, "step": 10839 }, { "epoch": 1.51927119831815, "grad_norm": 0.3244056701660156, "learning_rate": 0.00015168619947381008, "loss": 0.1105, "step": 10840 }, { "epoch": 1.5194113524877366, "grad_norm": 0.2566559910774231, "learning_rate": 0.0001516718488399904, "loss": 0.0942, "step": 10841 }, { "epoch": 1.519551506657323, "grad_norm": 0.29002147912979126, "learning_rate": 0.00015165749820617076, "loss": 0.0483, "step": 10842 }, { "epoch": 1.5196916608269095, "grad_norm": 0.7608566284179688, "learning_rate": 0.0001516431475723511, "loss": 0.0785, "step": 10843 }, { "epoch": 1.519831814996496, "grad_norm": 0.24261343479156494, "learning_rate": 0.00015162879693853142, "loss": 0.0563, "step": 10844 }, { "epoch": 1.5199719691660827, "grad_norm": 0.11511098593473434, "learning_rate": 0.00015161444630471178, "loss": 0.0286, "step": 10845 }, { "epoch": 1.5201121233356694, "grad_norm": 0.30866873264312744, "learning_rate": 0.0001516000956708921, "loss": 0.051, "step": 10846 }, { "epoch": 1.5202522775052558, "grad_norm": 0.23170363903045654, "learning_rate": 0.00015158574503707246, "loss": 0.0397, "step": 10847 }, { "epoch": 1.5203924316748423, "grad_norm": 0.21092519164085388, "learning_rate": 0.00015157139440325282, "loss": 0.0439, "step": 10848 }, { "epoch": 1.5205325858444287, "grad_norm": 0.14955882728099823, "learning_rate": 0.00015155704376943315, "loss": 0.0403, "step": 10849 }, { "epoch": 1.5206727400140154, "grad_norm": 0.3115289807319641, "learning_rate": 0.00015154269313561347, "loss": 0.0484, "step": 10850 }, { "epoch": 1.520812894183602, "grad_norm": 0.46107035875320435, "learning_rate": 0.00015152834250179383, "loss": 0.0468, "step": 10851 }, { "epoch": 1.5209530483531886, "grad_norm": 0.13584738969802856, "learning_rate": 0.00015151399186797416, "loss": 0.0282, "step": 10852 }, { "epoch": 1.521093202522775, "grad_norm": 0.3119492828845978, "learning_rate": 0.0001514996412341545, "loss": 0.0657, "step": 10853 }, { "epoch": 1.5212333566923615, "grad_norm": 0.35627949237823486, "learning_rate": 0.00015148529060033482, "loss": 0.0848, "step": 10854 }, { "epoch": 1.5213735108619482, "grad_norm": 0.10402132570743561, "learning_rate": 0.00015147093996651517, "loss": 0.0234, "step": 10855 }, { "epoch": 1.5215136650315348, "grad_norm": 0.42414355278015137, "learning_rate": 0.0001514565893326955, "loss": 0.042, "step": 10856 }, { "epoch": 1.5216538192011213, "grad_norm": 0.3917308747768402, "learning_rate": 0.00015144223869887583, "loss": 0.1049, "step": 10857 }, { "epoch": 1.5217939733707078, "grad_norm": 0.2178390771150589, "learning_rate": 0.0001514278880650562, "loss": 0.016, "step": 10858 }, { "epoch": 1.5219341275402942, "grad_norm": 0.1455674022436142, "learning_rate": 0.00015141353743123654, "loss": 0.0617, "step": 10859 }, { "epoch": 1.522074281709881, "grad_norm": 0.2771393358707428, "learning_rate": 0.00015139918679741687, "loss": 0.0659, "step": 10860 }, { "epoch": 1.5222144358794674, "grad_norm": 0.1687132567167282, "learning_rate": 0.00015138483616359722, "loss": 0.0249, "step": 10861 }, { "epoch": 1.522354590049054, "grad_norm": 0.5420262217521667, "learning_rate": 0.00015137048552977755, "loss": 0.0626, "step": 10862 }, { "epoch": 1.5224947442186405, "grad_norm": 0.20648440718650818, "learning_rate": 0.00015135613489595788, "loss": 0.0362, "step": 10863 }, { "epoch": 1.522634898388227, "grad_norm": 0.5218943953514099, "learning_rate": 0.00015134178426213824, "loss": 0.1662, "step": 10864 }, { "epoch": 1.5227750525578136, "grad_norm": 0.12385623902082443, "learning_rate": 0.00015132743362831857, "loss": 0.014, "step": 10865 }, { "epoch": 1.5229152067274, "grad_norm": 0.9721292853355408, "learning_rate": 0.0001513130829944989, "loss": 0.1214, "step": 10866 }, { "epoch": 1.5230553608969868, "grad_norm": 0.4468240439891815, "learning_rate": 0.00015129873236067928, "loss": 0.1219, "step": 10867 }, { "epoch": 1.5231955150665732, "grad_norm": 0.7129539847373962, "learning_rate": 0.0001512843817268596, "loss": 0.0506, "step": 10868 }, { "epoch": 1.5233356692361597, "grad_norm": 0.256916344165802, "learning_rate": 0.00015127003109303993, "loss": 0.0275, "step": 10869 }, { "epoch": 1.5234758234057462, "grad_norm": 0.434985876083374, "learning_rate": 0.0001512556804592203, "loss": 0.0929, "step": 10870 }, { "epoch": 1.5236159775753328, "grad_norm": 0.40868932008743286, "learning_rate": 0.00015124132982540062, "loss": 0.0319, "step": 10871 }, { "epoch": 1.5237561317449195, "grad_norm": 0.3004898130893707, "learning_rate": 0.00015122697919158095, "loss": 0.0213, "step": 10872 }, { "epoch": 1.523896285914506, "grad_norm": 0.5182176828384399, "learning_rate": 0.00015121262855776127, "loss": 0.0371, "step": 10873 }, { "epoch": 1.5240364400840924, "grad_norm": 0.10452108085155487, "learning_rate": 0.00015119827792394163, "loss": 0.0145, "step": 10874 }, { "epoch": 1.524176594253679, "grad_norm": 0.2919504642486572, "learning_rate": 0.00015118392729012196, "loss": 0.0318, "step": 10875 }, { "epoch": 1.5243167484232656, "grad_norm": 0.2737484276294708, "learning_rate": 0.0001511695766563023, "loss": 0.0642, "step": 10876 }, { "epoch": 1.5244569025928523, "grad_norm": 0.8553444147109985, "learning_rate": 0.00015115522602248264, "loss": 0.133, "step": 10877 }, { "epoch": 1.5245970567624387, "grad_norm": 0.3205641806125641, "learning_rate": 0.00015114087538866297, "loss": 0.0666, "step": 10878 }, { "epoch": 1.5247372109320252, "grad_norm": 0.5432232618331909, "learning_rate": 0.00015112652475484333, "loss": 0.0667, "step": 10879 }, { "epoch": 1.5248773651016116, "grad_norm": 0.12649619579315186, "learning_rate": 0.00015111217412102368, "loss": 0.0124, "step": 10880 }, { "epoch": 1.5250175192711983, "grad_norm": 0.1093301922082901, "learning_rate": 0.000151097823487204, "loss": 0.0086, "step": 10881 }, { "epoch": 1.525157673440785, "grad_norm": 0.6656155586242676, "learning_rate": 0.00015108347285338434, "loss": 0.1079, "step": 10882 }, { "epoch": 1.5252978276103715, "grad_norm": 0.6874668598175049, "learning_rate": 0.0001510691222195647, "loss": 0.1198, "step": 10883 }, { "epoch": 1.525437981779958, "grad_norm": 1.210294485092163, "learning_rate": 0.00015105477158574502, "loss": 0.1503, "step": 10884 }, { "epoch": 1.5255781359495444, "grad_norm": 3.4931607246398926, "learning_rate": 0.00015104042095192535, "loss": 0.7081, "step": 10885 }, { "epoch": 1.525718290119131, "grad_norm": 0.17720280587673187, "learning_rate": 0.0001510260703181057, "loss": 0.032, "step": 10886 }, { "epoch": 1.5258584442887178, "grad_norm": 0.2815784811973572, "learning_rate": 0.00015101171968428604, "loss": 0.0438, "step": 10887 }, { "epoch": 1.5259985984583042, "grad_norm": 0.2656289339065552, "learning_rate": 0.00015099736905046637, "loss": 0.0493, "step": 10888 }, { "epoch": 1.5261387526278907, "grad_norm": 0.21136623620986938, "learning_rate": 0.0001509830184166467, "loss": 0.0575, "step": 10889 }, { "epoch": 1.5262789067974771, "grad_norm": 0.14760293066501617, "learning_rate": 0.00015096866778282708, "loss": 0.0136, "step": 10890 }, { "epoch": 1.5264190609670638, "grad_norm": 0.19249001145362854, "learning_rate": 0.0001509543171490074, "loss": 0.0212, "step": 10891 }, { "epoch": 1.5265592151366503, "grad_norm": 0.29600653052330017, "learning_rate": 0.00015093996651518773, "loss": 0.0235, "step": 10892 }, { "epoch": 1.526699369306237, "grad_norm": 0.17708593606948853, "learning_rate": 0.0001509256158813681, "loss": 0.0265, "step": 10893 }, { "epoch": 1.5268395234758234, "grad_norm": 1.0646584033966064, "learning_rate": 0.00015091126524754842, "loss": 0.0537, "step": 10894 }, { "epoch": 1.5269796776454099, "grad_norm": 0.5252156257629395, "learning_rate": 0.00015089691461372875, "loss": 0.062, "step": 10895 }, { "epoch": 1.5271198318149963, "grad_norm": 0.4272652864456177, "learning_rate": 0.0001508825639799091, "loss": 0.0485, "step": 10896 }, { "epoch": 1.527259985984583, "grad_norm": 0.3775762915611267, "learning_rate": 0.00015086821334608943, "loss": 0.0617, "step": 10897 }, { "epoch": 1.5274001401541697, "grad_norm": 0.9619895219802856, "learning_rate": 0.00015085386271226976, "loss": 0.0674, "step": 10898 }, { "epoch": 1.5275402943237562, "grad_norm": 0.18873095512390137, "learning_rate": 0.00015083951207845014, "loss": 0.0202, "step": 10899 }, { "epoch": 1.5276804484933426, "grad_norm": 0.4533403813838959, "learning_rate": 0.00015082516144463047, "loss": 0.1591, "step": 10900 }, { "epoch": 1.527820602662929, "grad_norm": 0.47293156385421753, "learning_rate": 0.0001508108108108108, "loss": 0.0539, "step": 10901 }, { "epoch": 1.5279607568325158, "grad_norm": 0.5269839763641357, "learning_rate": 0.00015079646017699116, "loss": 0.108, "step": 10902 }, { "epoch": 1.5281009110021024, "grad_norm": 0.18391455709934235, "learning_rate": 0.00015078210954317148, "loss": 0.03, "step": 10903 }, { "epoch": 1.528241065171689, "grad_norm": 1.3996319770812988, "learning_rate": 0.0001507677589093518, "loss": 0.1065, "step": 10904 }, { "epoch": 1.5283812193412754, "grad_norm": 0.37039241194725037, "learning_rate": 0.00015075340827553217, "loss": 0.0438, "step": 10905 }, { "epoch": 1.5285213735108618, "grad_norm": 0.20693868398666382, "learning_rate": 0.0001507390576417125, "loss": 0.0159, "step": 10906 }, { "epoch": 1.5286615276804485, "grad_norm": 0.16598059237003326, "learning_rate": 0.00015072470700789283, "loss": 0.0344, "step": 10907 }, { "epoch": 1.5288016818500352, "grad_norm": 0.9544166326522827, "learning_rate": 0.00015071035637407315, "loss": 0.1074, "step": 10908 }, { "epoch": 1.5289418360196216, "grad_norm": 0.3196631669998169, "learning_rate": 0.0001506960057402535, "loss": 0.0422, "step": 10909 }, { "epoch": 1.529081990189208, "grad_norm": 0.34100693464279175, "learning_rate": 0.00015068165510643387, "loss": 0.0142, "step": 10910 }, { "epoch": 1.5292221443587946, "grad_norm": 0.6050891280174255, "learning_rate": 0.0001506673044726142, "loss": 0.0949, "step": 10911 }, { "epoch": 1.5293622985283812, "grad_norm": 0.19235040247440338, "learning_rate": 0.00015065295383879455, "loss": 0.0389, "step": 10912 }, { "epoch": 1.529502452697968, "grad_norm": 0.23279502987861633, "learning_rate": 0.00015063860320497488, "loss": 0.0352, "step": 10913 }, { "epoch": 1.5296426068675544, "grad_norm": 0.18740299344062805, "learning_rate": 0.0001506242525711552, "loss": 0.0273, "step": 10914 }, { "epoch": 1.5297827610371408, "grad_norm": 0.15140323340892792, "learning_rate": 0.00015060990193733556, "loss": 0.0201, "step": 10915 }, { "epoch": 1.5299229152067273, "grad_norm": 0.21403227746486664, "learning_rate": 0.0001505955513035159, "loss": 0.0355, "step": 10916 }, { "epoch": 1.530063069376314, "grad_norm": 0.23515480756759644, "learning_rate": 0.00015058120066969622, "loss": 0.0498, "step": 10917 }, { "epoch": 1.5302032235459004, "grad_norm": 0.24206171929836273, "learning_rate": 0.00015056685003587658, "loss": 0.0551, "step": 10918 }, { "epoch": 1.5303433777154871, "grad_norm": 0.1398821622133255, "learning_rate": 0.0001505524994020569, "loss": 0.0338, "step": 10919 }, { "epoch": 1.5304835318850736, "grad_norm": 0.4756773114204407, "learning_rate": 0.00015053814876823723, "loss": 0.072, "step": 10920 }, { "epoch": 1.53062368605466, "grad_norm": 0.08770030736923218, "learning_rate": 0.00015052379813441761, "loss": 0.0072, "step": 10921 }, { "epoch": 1.5307638402242467, "grad_norm": 0.4261038899421692, "learning_rate": 0.00015050944750059794, "loss": 0.0904, "step": 10922 }, { "epoch": 1.5309039943938332, "grad_norm": 0.48148220777511597, "learning_rate": 0.00015049509686677827, "loss": 0.0984, "step": 10923 }, { "epoch": 1.5310441485634199, "grad_norm": 0.5076977014541626, "learning_rate": 0.0001504807462329586, "loss": 0.0799, "step": 10924 }, { "epoch": 1.5311843027330063, "grad_norm": 0.7440010905265808, "learning_rate": 0.00015046639559913896, "loss": 0.0977, "step": 10925 }, { "epoch": 1.5313244569025928, "grad_norm": 1.4425071477890015, "learning_rate": 0.00015045204496531928, "loss": 0.0555, "step": 10926 }, { "epoch": 1.5314646110721792, "grad_norm": 0.30451399087905884, "learning_rate": 0.0001504376943314996, "loss": 0.0227, "step": 10927 }, { "epoch": 1.531604765241766, "grad_norm": 0.5178651809692383, "learning_rate": 0.00015042334369767997, "loss": 0.0256, "step": 10928 }, { "epoch": 1.5317449194113526, "grad_norm": 0.26351237297058105, "learning_rate": 0.0001504089930638603, "loss": 0.015, "step": 10929 }, { "epoch": 1.531885073580939, "grad_norm": 0.7813514471054077, "learning_rate": 0.00015039464243004063, "loss": 0.0981, "step": 10930 }, { "epoch": 1.5320252277505255, "grad_norm": 0.5308046936988831, "learning_rate": 0.000150380291796221, "loss": 0.1238, "step": 10931 }, { "epoch": 1.532165381920112, "grad_norm": 0.8562813997268677, "learning_rate": 0.00015036594116240134, "loss": 0.0528, "step": 10932 }, { "epoch": 1.5323055360896987, "grad_norm": 0.872998833656311, "learning_rate": 0.00015035159052858167, "loss": 0.0439, "step": 10933 }, { "epoch": 1.5324456902592853, "grad_norm": 1.37801992893219, "learning_rate": 0.00015033723989476202, "loss": 0.1312, "step": 10934 }, { "epoch": 1.5325858444288718, "grad_norm": 0.14941856265068054, "learning_rate": 0.00015032288926094235, "loss": 0.0056, "step": 10935 }, { "epoch": 1.5327259985984583, "grad_norm": 0.20218883454799652, "learning_rate": 0.00015030853862712268, "loss": 0.0459, "step": 10936 }, { "epoch": 1.5328661527680447, "grad_norm": 0.2454679012298584, "learning_rate": 0.00015029418799330303, "loss": 0.0426, "step": 10937 }, { "epoch": 1.5330063069376314, "grad_norm": 0.14818303287029266, "learning_rate": 0.00015027983735948336, "loss": 0.0232, "step": 10938 }, { "epoch": 1.533146461107218, "grad_norm": 0.14970046281814575, "learning_rate": 0.0001502654867256637, "loss": 0.0354, "step": 10939 }, { "epoch": 1.5332866152768045, "grad_norm": 0.04809822142124176, "learning_rate": 0.00015025113609184405, "loss": 0.0047, "step": 10940 }, { "epoch": 1.533426769446391, "grad_norm": 0.20709531009197235, "learning_rate": 0.00015023678545802438, "loss": 0.0189, "step": 10941 }, { "epoch": 1.5335669236159775, "grad_norm": 0.29231974482536316, "learning_rate": 0.00015022243482420473, "loss": 0.0268, "step": 10942 }, { "epoch": 1.5337070777855641, "grad_norm": 0.12852610647678375, "learning_rate": 0.00015020808419038506, "loss": 0.0411, "step": 10943 }, { "epoch": 1.5338472319551508, "grad_norm": 0.18194787204265594, "learning_rate": 0.00015019373355656542, "loss": 0.0241, "step": 10944 }, { "epoch": 1.5339873861247373, "grad_norm": 0.42633524537086487, "learning_rate": 0.00015017938292274574, "loss": 0.0329, "step": 10945 }, { "epoch": 1.5341275402943237, "grad_norm": 0.2647063434123993, "learning_rate": 0.00015016503228892607, "loss": 0.0262, "step": 10946 }, { "epoch": 1.5342676944639102, "grad_norm": 0.5387910604476929, "learning_rate": 0.00015015068165510643, "loss": 0.0938, "step": 10947 }, { "epoch": 1.5344078486334969, "grad_norm": 0.06391759216785431, "learning_rate": 0.00015013633102128676, "loss": 0.0049, "step": 10948 }, { "epoch": 1.5345480028030833, "grad_norm": 0.5705273747444153, "learning_rate": 0.00015012198038746709, "loss": 0.0952, "step": 10949 }, { "epoch": 1.53468815697267, "grad_norm": 0.20019106566905975, "learning_rate": 0.00015010762975364744, "loss": 0.0556, "step": 10950 }, { "epoch": 1.5348283111422565, "grad_norm": 0.23995529115200043, "learning_rate": 0.00015009327911982777, "loss": 0.0201, "step": 10951 }, { "epoch": 1.534968465311843, "grad_norm": 0.4369611144065857, "learning_rate": 0.0001500789284860081, "loss": 0.032, "step": 10952 }, { "epoch": 1.5351086194814296, "grad_norm": 0.3101497292518616, "learning_rate": 0.00015006457785218848, "loss": 0.0276, "step": 10953 }, { "epoch": 1.535248773651016, "grad_norm": 0.31244391202926636, "learning_rate": 0.0001500502272183688, "loss": 0.0565, "step": 10954 }, { "epoch": 1.5353889278206028, "grad_norm": 0.36449599266052246, "learning_rate": 0.00015003587658454914, "loss": 0.0419, "step": 10955 }, { "epoch": 1.5355290819901892, "grad_norm": 0.45624756813049316, "learning_rate": 0.0001500215259507295, "loss": 0.0983, "step": 10956 }, { "epoch": 1.5356692361597757, "grad_norm": 0.25939854979515076, "learning_rate": 0.00015000717531690982, "loss": 0.0284, "step": 10957 }, { "epoch": 1.5358093903293621, "grad_norm": 0.403261661529541, "learning_rate": 0.00014999282468309015, "loss": 0.0428, "step": 10958 }, { "epoch": 1.5359495444989488, "grad_norm": 0.23813696205615997, "learning_rate": 0.0001499784740492705, "loss": 0.0249, "step": 10959 }, { "epoch": 1.5360896986685355, "grad_norm": 0.4732477366924286, "learning_rate": 0.00014996412341545084, "loss": 0.0726, "step": 10960 }, { "epoch": 1.536229852838122, "grad_norm": 0.3605789542198181, "learning_rate": 0.00014994977278163116, "loss": 0.0336, "step": 10961 }, { "epoch": 1.5363700070077084, "grad_norm": 0.2642771899700165, "learning_rate": 0.00014993542214781152, "loss": 0.0246, "step": 10962 }, { "epoch": 1.5365101611772949, "grad_norm": 0.29140397906303406, "learning_rate": 0.00014992107151399185, "loss": 0.0484, "step": 10963 }, { "epoch": 1.5366503153468816, "grad_norm": 0.5289062261581421, "learning_rate": 0.0001499067208801722, "loss": 0.0566, "step": 10964 }, { "epoch": 1.5367904695164682, "grad_norm": 0.3317778706550598, "learning_rate": 0.00014989237024635253, "loss": 0.0438, "step": 10965 }, { "epoch": 1.5369306236860547, "grad_norm": 0.47614800930023193, "learning_rate": 0.00014987801961253286, "loss": 0.0643, "step": 10966 }, { "epoch": 1.5370707778556412, "grad_norm": 1.2505192756652832, "learning_rate": 0.00014986366897871322, "loss": 0.0503, "step": 10967 }, { "epoch": 1.5372109320252276, "grad_norm": 0.45235052704811096, "learning_rate": 0.00014984931834489357, "loss": 0.0564, "step": 10968 }, { "epoch": 1.5373510861948143, "grad_norm": 0.9001206159591675, "learning_rate": 0.0001498349677110739, "loss": 0.0311, "step": 10969 }, { "epoch": 1.537491240364401, "grad_norm": 1.357242226600647, "learning_rate": 0.00014982061707725423, "loss": 0.0363, "step": 10970 }, { "epoch": 1.5376313945339875, "grad_norm": 0.2559105455875397, "learning_rate": 0.00014980626644343456, "loss": 0.0981, "step": 10971 }, { "epoch": 1.537771548703574, "grad_norm": 0.5213108062744141, "learning_rate": 0.00014979191580961491, "loss": 0.0379, "step": 10972 }, { "epoch": 1.5379117028731604, "grad_norm": 0.45825639367103577, "learning_rate": 0.00014977756517579524, "loss": 0.0381, "step": 10973 }, { "epoch": 1.538051857042747, "grad_norm": 0.20507089793682098, "learning_rate": 0.0001497632145419756, "loss": 0.0519, "step": 10974 }, { "epoch": 1.5381920112123337, "grad_norm": 0.3508005738258362, "learning_rate": 0.00014974886390815593, "loss": 0.0742, "step": 10975 }, { "epoch": 1.5383321653819202, "grad_norm": 0.44060322642326355, "learning_rate": 0.00014973451327433628, "loss": 0.0541, "step": 10976 }, { "epoch": 1.5384723195515067, "grad_norm": 0.36605215072631836, "learning_rate": 0.0001497201626405166, "loss": 0.0798, "step": 10977 }, { "epoch": 1.538612473721093, "grad_norm": 0.6941869854927063, "learning_rate": 0.00014970581200669694, "loss": 0.0341, "step": 10978 }, { "epoch": 1.5387526278906798, "grad_norm": 0.3290439248085022, "learning_rate": 0.0001496914613728773, "loss": 0.0454, "step": 10979 }, { "epoch": 1.5388927820602663, "grad_norm": 0.5251160264015198, "learning_rate": 0.00014967711073905762, "loss": 0.0303, "step": 10980 }, { "epoch": 1.539032936229853, "grad_norm": 0.3014562427997589, "learning_rate": 0.00014966276010523798, "loss": 0.0547, "step": 10981 }, { "epoch": 1.5391730903994394, "grad_norm": 0.35790368914604187, "learning_rate": 0.0001496484094714183, "loss": 0.0514, "step": 10982 }, { "epoch": 1.5393132445690259, "grad_norm": 0.8156243562698364, "learning_rate": 0.00014963405883759864, "loss": 0.0527, "step": 10983 }, { "epoch": 1.5394533987386123, "grad_norm": 0.16634903848171234, "learning_rate": 0.000149619708203779, "loss": 0.0087, "step": 10984 }, { "epoch": 1.539593552908199, "grad_norm": 2.464573860168457, "learning_rate": 0.00014960535756995932, "loss": 0.1041, "step": 10985 }, { "epoch": 1.5397337070777857, "grad_norm": 0.3118284344673157, "learning_rate": 0.00014959100693613968, "loss": 0.0843, "step": 10986 }, { "epoch": 1.5398738612473721, "grad_norm": 0.4030275046825409, "learning_rate": 0.00014957665630232, "loss": 0.0821, "step": 10987 }, { "epoch": 1.5400140154169586, "grad_norm": 0.39984509348869324, "learning_rate": 0.00014956230566850033, "loss": 0.0694, "step": 10988 }, { "epoch": 1.540154169586545, "grad_norm": 0.6093841195106506, "learning_rate": 0.0001495479550346807, "loss": 0.1557, "step": 10989 }, { "epoch": 1.5402943237561317, "grad_norm": 0.24070964753627777, "learning_rate": 0.00014953360440086102, "loss": 0.0324, "step": 10990 }, { "epoch": 1.5404344779257184, "grad_norm": 0.5339551568031311, "learning_rate": 0.00014951925376704137, "loss": 0.1013, "step": 10991 }, { "epoch": 1.5405746320953049, "grad_norm": 0.2245163917541504, "learning_rate": 0.0001495049031332217, "loss": 0.0719, "step": 10992 }, { "epoch": 1.5407147862648913, "grad_norm": 0.4870762228965759, "learning_rate": 0.00014949055249940203, "loss": 0.0248, "step": 10993 }, { "epoch": 1.5408549404344778, "grad_norm": 0.8021231293678284, "learning_rate": 0.00014947620186558239, "loss": 0.1202, "step": 10994 }, { "epoch": 1.5409950946040645, "grad_norm": 0.36878955364227295, "learning_rate": 0.00014946185123176274, "loss": 0.0681, "step": 10995 }, { "epoch": 1.5411352487736512, "grad_norm": 0.5719016194343567, "learning_rate": 0.00014944750059794307, "loss": 0.0737, "step": 10996 }, { "epoch": 1.5412754029432376, "grad_norm": 0.7299607992172241, "learning_rate": 0.0001494331499641234, "loss": 0.066, "step": 10997 }, { "epoch": 1.541415557112824, "grad_norm": 0.3183037042617798, "learning_rate": 0.00014941879933030373, "loss": 0.0471, "step": 10998 }, { "epoch": 1.5415557112824105, "grad_norm": 0.2559284567832947, "learning_rate": 0.00014940444869648408, "loss": 0.0462, "step": 10999 }, { "epoch": 1.5416958654519972, "grad_norm": 0.3757286071777344, "learning_rate": 0.00014939009806266444, "loss": 0.0456, "step": 11000 }, { "epoch": 1.541836019621584, "grad_norm": 0.2546945810317993, "learning_rate": 0.00014937574742884477, "loss": 0.0394, "step": 11001 }, { "epoch": 1.5419761737911704, "grad_norm": 0.2045392394065857, "learning_rate": 0.0001493613967950251, "loss": 0.0487, "step": 11002 }, { "epoch": 1.5421163279607568, "grad_norm": 0.19313909113407135, "learning_rate": 0.00014934704616120545, "loss": 0.0505, "step": 11003 }, { "epoch": 1.5422564821303433, "grad_norm": 0.20614683628082275, "learning_rate": 0.00014933269552738578, "loss": 0.0261, "step": 11004 }, { "epoch": 1.54239663629993, "grad_norm": 0.20345093309879303, "learning_rate": 0.0001493183448935661, "loss": 0.05, "step": 11005 }, { "epoch": 1.5425367904695164, "grad_norm": 0.5298324227333069, "learning_rate": 0.00014930399425974646, "loss": 0.0238, "step": 11006 }, { "epoch": 1.542676944639103, "grad_norm": 0.11062458157539368, "learning_rate": 0.0001492896436259268, "loss": 0.0194, "step": 11007 }, { "epoch": 1.5428170988086896, "grad_norm": 0.14331156015396118, "learning_rate": 0.00014927529299210715, "loss": 0.0201, "step": 11008 }, { "epoch": 1.542957252978276, "grad_norm": 0.3429943919181824, "learning_rate": 0.00014926094235828748, "loss": 0.0498, "step": 11009 }, { "epoch": 1.5430974071478627, "grad_norm": 0.5337877869606018, "learning_rate": 0.0001492465917244678, "loss": 0.0872, "step": 11010 }, { "epoch": 1.5432375613174492, "grad_norm": 0.13501450419425964, "learning_rate": 0.00014923224109064816, "loss": 0.0256, "step": 11011 }, { "epoch": 1.5433777154870358, "grad_norm": 0.1723991483449936, "learning_rate": 0.0001492178904568285, "loss": 0.0253, "step": 11012 }, { "epoch": 1.5435178696566223, "grad_norm": 0.18672430515289307, "learning_rate": 0.00014920353982300885, "loss": 0.0677, "step": 11013 }, { "epoch": 1.5436580238262088, "grad_norm": 0.12765507400035858, "learning_rate": 0.00014918918918918917, "loss": 0.0415, "step": 11014 }, { "epoch": 1.5437981779957952, "grad_norm": 0.3994615972042084, "learning_rate": 0.0001491748385553695, "loss": 0.0277, "step": 11015 }, { "epoch": 1.543938332165382, "grad_norm": 0.1665220558643341, "learning_rate": 0.00014916048792154986, "loss": 0.0321, "step": 11016 }, { "epoch": 1.5440784863349686, "grad_norm": 0.19501011073589325, "learning_rate": 0.0001491461372877302, "loss": 0.0291, "step": 11017 }, { "epoch": 1.544218640504555, "grad_norm": 0.17355221509933472, "learning_rate": 0.00014913178665391054, "loss": 0.0289, "step": 11018 }, { "epoch": 1.5443587946741415, "grad_norm": 0.2866831421852112, "learning_rate": 0.00014911743602009087, "loss": 0.0527, "step": 11019 }, { "epoch": 1.544498948843728, "grad_norm": 0.17746466398239136, "learning_rate": 0.0001491030853862712, "loss": 0.0517, "step": 11020 }, { "epoch": 1.5446391030133146, "grad_norm": 0.8315578699111938, "learning_rate": 0.00014908873475245156, "loss": 0.0588, "step": 11021 }, { "epoch": 1.5447792571829013, "grad_norm": 0.17520473897457123, "learning_rate": 0.0001490743841186319, "loss": 0.0337, "step": 11022 }, { "epoch": 1.5449194113524878, "grad_norm": 0.24548864364624023, "learning_rate": 0.00014906003348481224, "loss": 0.0441, "step": 11023 }, { "epoch": 1.5450595655220742, "grad_norm": 0.4432048201560974, "learning_rate": 0.00014904568285099257, "loss": 0.0945, "step": 11024 }, { "epoch": 1.5451997196916607, "grad_norm": 0.17676718533039093, "learning_rate": 0.0001490313322171729, "loss": 0.0748, "step": 11025 }, { "epoch": 1.5453398738612474, "grad_norm": 0.22608895599842072, "learning_rate": 0.00014901698158335325, "loss": 0.0166, "step": 11026 }, { "epoch": 1.545480028030834, "grad_norm": 0.7016151547431946, "learning_rate": 0.0001490026309495336, "loss": 0.0454, "step": 11027 }, { "epoch": 1.5456201822004205, "grad_norm": 0.3259376883506775, "learning_rate": 0.00014898828031571394, "loss": 0.0618, "step": 11028 }, { "epoch": 1.545760336370007, "grad_norm": 0.33096277713775635, "learning_rate": 0.00014897392968189426, "loss": 0.0718, "step": 11029 }, { "epoch": 1.5459004905395934, "grad_norm": 0.4058026373386383, "learning_rate": 0.00014895957904807462, "loss": 0.0383, "step": 11030 }, { "epoch": 1.5460406447091801, "grad_norm": 0.5410714149475098, "learning_rate": 0.00014894522841425495, "loss": 0.0787, "step": 11031 }, { "epoch": 1.5461807988787668, "grad_norm": 0.7713353037834167, "learning_rate": 0.0001489308777804353, "loss": 0.1656, "step": 11032 }, { "epoch": 1.5463209530483533, "grad_norm": 0.3726988732814789, "learning_rate": 0.00014891652714661563, "loss": 0.1695, "step": 11033 }, { "epoch": 1.5464611072179397, "grad_norm": 0.7880122661590576, "learning_rate": 0.00014890217651279596, "loss": 0.0568, "step": 11034 }, { "epoch": 1.5466012613875262, "grad_norm": 1.9528509378433228, "learning_rate": 0.00014888782587897632, "loss": 0.1263, "step": 11035 }, { "epoch": 1.5467414155571129, "grad_norm": 0.2528485953807831, "learning_rate": 0.00014887347524515665, "loss": 0.0463, "step": 11036 }, { "epoch": 1.5468815697266993, "grad_norm": 0.38555899262428284, "learning_rate": 0.000148859124611337, "loss": 0.0791, "step": 11037 }, { "epoch": 1.547021723896286, "grad_norm": 0.49913421273231506, "learning_rate": 0.00014884477397751733, "loss": 0.0519, "step": 11038 }, { "epoch": 1.5471618780658725, "grad_norm": 0.11820407956838608, "learning_rate": 0.00014883042334369766, "loss": 0.0167, "step": 11039 }, { "epoch": 1.547302032235459, "grad_norm": 0.25541189312934875, "learning_rate": 0.00014881607270987801, "loss": 0.0362, "step": 11040 }, { "epoch": 1.5474421864050456, "grad_norm": 0.31139102578163147, "learning_rate": 0.00014880172207605834, "loss": 0.0366, "step": 11041 }, { "epoch": 1.547582340574632, "grad_norm": 0.3352653980255127, "learning_rate": 0.00014878737144223867, "loss": 0.0451, "step": 11042 }, { "epoch": 1.5477224947442187, "grad_norm": 0.32105931639671326, "learning_rate": 0.00014877302080841903, "loss": 0.1157, "step": 11043 }, { "epoch": 1.5478626489138052, "grad_norm": 0.6846283078193665, "learning_rate": 0.00014875867017459936, "loss": 0.1519, "step": 11044 }, { "epoch": 1.5480028030833917, "grad_norm": 0.33372262120246887, "learning_rate": 0.0001487443195407797, "loss": 0.0615, "step": 11045 }, { "epoch": 1.5481429572529781, "grad_norm": 0.32960978150367737, "learning_rate": 0.00014872996890696004, "loss": 0.0341, "step": 11046 }, { "epoch": 1.5482831114225648, "grad_norm": 0.22216135263442993, "learning_rate": 0.00014871561827314037, "loss": 0.0439, "step": 11047 }, { "epoch": 1.5484232655921515, "grad_norm": 0.4431505799293518, "learning_rate": 0.00014870126763932072, "loss": 0.0633, "step": 11048 }, { "epoch": 1.548563419761738, "grad_norm": 0.10634178668260574, "learning_rate": 0.00014868691700550105, "loss": 0.0128, "step": 11049 }, { "epoch": 1.5487035739313244, "grad_norm": 0.9513567686080933, "learning_rate": 0.0001486725663716814, "loss": 0.0531, "step": 11050 }, { "epoch": 1.5488437281009109, "grad_norm": 0.44933417439460754, "learning_rate": 0.00014865821573786174, "loss": 0.1208, "step": 11051 }, { "epoch": 1.5489838822704975, "grad_norm": 0.2294304519891739, "learning_rate": 0.00014864386510404207, "loss": 0.0387, "step": 11052 }, { "epoch": 1.5491240364400842, "grad_norm": 0.11340650916099548, "learning_rate": 0.00014862951447022242, "loss": 0.0179, "step": 11053 }, { "epoch": 1.5492641906096707, "grad_norm": 0.3369068503379822, "learning_rate": 0.00014861516383640278, "loss": 0.035, "step": 11054 }, { "epoch": 1.5494043447792571, "grad_norm": 0.3367752432823181, "learning_rate": 0.0001486008132025831, "loss": 0.068, "step": 11055 }, { "epoch": 1.5495444989488436, "grad_norm": 0.36260631680488586, "learning_rate": 0.00014858646256876343, "loss": 0.074, "step": 11056 }, { "epoch": 1.5496846531184303, "grad_norm": 0.05152274668216705, "learning_rate": 0.0001485721119349438, "loss": 0.0042, "step": 11057 }, { "epoch": 1.549824807288017, "grad_norm": 0.15546537935733795, "learning_rate": 0.00014855776130112412, "loss": 0.0362, "step": 11058 }, { "epoch": 1.5499649614576034, "grad_norm": 0.16330179572105408, "learning_rate": 0.00014854341066730447, "loss": 0.0298, "step": 11059 }, { "epoch": 1.55010511562719, "grad_norm": 0.18959923088550568, "learning_rate": 0.0001485290600334848, "loss": 0.0612, "step": 11060 }, { "epoch": 1.5502452697967763, "grad_norm": 0.16318963468074799, "learning_rate": 0.00014851470939966513, "loss": 0.0226, "step": 11061 }, { "epoch": 1.550385423966363, "grad_norm": 0.07888984680175781, "learning_rate": 0.0001485003587658455, "loss": 0.012, "step": 11062 }, { "epoch": 1.5505255781359495, "grad_norm": 0.19933873414993286, "learning_rate": 0.00014848600813202582, "loss": 0.0184, "step": 11063 }, { "epoch": 1.5506657323055362, "grad_norm": 0.4456675350666046, "learning_rate": 0.00014847165749820617, "loss": 0.0342, "step": 11064 }, { "epoch": 1.5508058864751226, "grad_norm": 0.6584606766700745, "learning_rate": 0.0001484573068643865, "loss": 0.1309, "step": 11065 }, { "epoch": 1.550946040644709, "grad_norm": 0.27750495076179504, "learning_rate": 0.00014844295623056683, "loss": 0.0299, "step": 11066 }, { "epoch": 1.5510861948142958, "grad_norm": 0.06212341785430908, "learning_rate": 0.00014842860559674718, "loss": 0.0063, "step": 11067 }, { "epoch": 1.5512263489838822, "grad_norm": 0.31536439061164856, "learning_rate": 0.0001484142549629275, "loss": 0.0298, "step": 11068 }, { "epoch": 1.551366503153469, "grad_norm": 0.559956431388855, "learning_rate": 0.00014839990432910787, "loss": 0.0525, "step": 11069 }, { "epoch": 1.5515066573230554, "grad_norm": 0.2821565568447113, "learning_rate": 0.0001483855536952882, "loss": 0.0393, "step": 11070 }, { "epoch": 1.5516468114926418, "grad_norm": 0.6767759919166565, "learning_rate": 0.00014837120306146853, "loss": 0.102, "step": 11071 }, { "epoch": 1.5517869656622283, "grad_norm": 0.37384364008903503, "learning_rate": 0.00014835685242764888, "loss": 0.0377, "step": 11072 }, { "epoch": 1.551927119831815, "grad_norm": 0.6912876963615417, "learning_rate": 0.0001483425017938292, "loss": 0.0718, "step": 11073 }, { "epoch": 1.5520672740014017, "grad_norm": 0.8117472529411316, "learning_rate": 0.00014832815116000954, "loss": 0.0694, "step": 11074 }, { "epoch": 1.5522074281709881, "grad_norm": 1.1233464479446411, "learning_rate": 0.0001483138005261899, "loss": 0.0599, "step": 11075 }, { "epoch": 1.5523475823405746, "grad_norm": 0.8551127910614014, "learning_rate": 0.00014829944989237022, "loss": 0.075, "step": 11076 }, { "epoch": 1.552487736510161, "grad_norm": 0.026999225839972496, "learning_rate": 0.00014828509925855058, "loss": 0.0029, "step": 11077 }, { "epoch": 1.5526278906797477, "grad_norm": 0.3826160728931427, "learning_rate": 0.0001482707486247309, "loss": 0.1069, "step": 11078 }, { "epoch": 1.5527680448493344, "grad_norm": 0.24017146229743958, "learning_rate": 0.00014825639799091124, "loss": 0.0721, "step": 11079 }, { "epoch": 1.5529081990189209, "grad_norm": 0.8382569551467896, "learning_rate": 0.0001482420473570916, "loss": 0.1573, "step": 11080 }, { "epoch": 1.5530483531885073, "grad_norm": 0.09319008141756058, "learning_rate": 0.00014822769672327195, "loss": 0.0122, "step": 11081 }, { "epoch": 1.5531885073580938, "grad_norm": 0.42329511046409607, "learning_rate": 0.00014821334608945227, "loss": 0.0565, "step": 11082 }, { "epoch": 1.5533286615276805, "grad_norm": 0.17139148712158203, "learning_rate": 0.0001481989954556326, "loss": 0.0169, "step": 11083 }, { "epoch": 1.5534688156972671, "grad_norm": 0.8504591584205627, "learning_rate": 0.00014818464482181293, "loss": 0.0654, "step": 11084 }, { "epoch": 1.5536089698668536, "grad_norm": 0.12272142618894577, "learning_rate": 0.0001481702941879933, "loss": 0.0042, "step": 11085 }, { "epoch": 1.55374912403644, "grad_norm": 0.5930334329605103, "learning_rate": 0.00014815594355417364, "loss": 0.0992, "step": 11086 }, { "epoch": 1.5538892782060265, "grad_norm": 0.15107546746730804, "learning_rate": 0.00014814159292035397, "loss": 0.0402, "step": 11087 }, { "epoch": 1.5540294323756132, "grad_norm": 0.23581773042678833, "learning_rate": 0.0001481272422865343, "loss": 0.061, "step": 11088 }, { "epoch": 1.5541695865451999, "grad_norm": 0.24607306718826294, "learning_rate": 0.00014811289165271466, "loss": 0.0416, "step": 11089 }, { "epoch": 1.5543097407147863, "grad_norm": 1.1874651908874512, "learning_rate": 0.00014809854101889498, "loss": 0.0517, "step": 11090 }, { "epoch": 1.5544498948843728, "grad_norm": 0.18721534311771393, "learning_rate": 0.00014808419038507534, "loss": 0.0199, "step": 11091 }, { "epoch": 1.5545900490539593, "grad_norm": 0.26073938608169556, "learning_rate": 0.00014806983975125567, "loss": 0.0665, "step": 11092 }, { "epoch": 1.554730203223546, "grad_norm": 0.3165334165096283, "learning_rate": 0.000148055489117436, "loss": 0.0423, "step": 11093 }, { "epoch": 1.5548703573931324, "grad_norm": 0.17674729228019714, "learning_rate": 0.00014804113848361635, "loss": 0.0228, "step": 11094 }, { "epoch": 1.555010511562719, "grad_norm": 0.291576087474823, "learning_rate": 0.00014802678784979668, "loss": 0.0356, "step": 11095 }, { "epoch": 1.5551506657323055, "grad_norm": 0.3110606372356415, "learning_rate": 0.00014801243721597704, "loss": 0.0242, "step": 11096 }, { "epoch": 1.555290819901892, "grad_norm": 0.2618960440158844, "learning_rate": 0.00014799808658215737, "loss": 0.0326, "step": 11097 }, { "epoch": 1.5554309740714787, "grad_norm": 0.239740788936615, "learning_rate": 0.0001479837359483377, "loss": 0.0216, "step": 11098 }, { "epoch": 1.5555711282410651, "grad_norm": 0.5276740193367004, "learning_rate": 0.00014796938531451805, "loss": 0.0317, "step": 11099 }, { "epoch": 1.5557112824106518, "grad_norm": 0.14626654982566833, "learning_rate": 0.00014795503468069838, "loss": 0.0187, "step": 11100 }, { "epoch": 1.5558514365802383, "grad_norm": 0.191325843334198, "learning_rate": 0.00014794068404687873, "loss": 0.049, "step": 11101 }, { "epoch": 1.5559915907498247, "grad_norm": 0.3403986990451813, "learning_rate": 0.00014792633341305906, "loss": 0.0657, "step": 11102 }, { "epoch": 1.5561317449194112, "grad_norm": 0.2034078687429428, "learning_rate": 0.0001479119827792394, "loss": 0.0199, "step": 11103 }, { "epoch": 1.5562718990889979, "grad_norm": 0.14513526856899261, "learning_rate": 0.00014789763214541975, "loss": 0.0315, "step": 11104 }, { "epoch": 1.5564120532585846, "grad_norm": 0.40946337580680847, "learning_rate": 0.00014788328151160008, "loss": 0.0247, "step": 11105 }, { "epoch": 1.556552207428171, "grad_norm": 0.11619977653026581, "learning_rate": 0.0001478689308777804, "loss": 0.0177, "step": 11106 }, { "epoch": 1.5566923615977575, "grad_norm": 0.4205186069011688, "learning_rate": 0.00014785458024396076, "loss": 0.0508, "step": 11107 }, { "epoch": 1.556832515767344, "grad_norm": 0.25226473808288574, "learning_rate": 0.00014784022961014112, "loss": 0.0748, "step": 11108 }, { "epoch": 1.5569726699369306, "grad_norm": 0.7032549977302551, "learning_rate": 0.00014782587897632144, "loss": 0.0992, "step": 11109 }, { "epoch": 1.5571128241065173, "grad_norm": 0.48030897974967957, "learning_rate": 0.00014781152834250177, "loss": 0.0727, "step": 11110 }, { "epoch": 1.5572529782761038, "grad_norm": 0.257094144821167, "learning_rate": 0.0001477971777086821, "loss": 0.0565, "step": 11111 }, { "epoch": 1.5573931324456902, "grad_norm": 0.21146482229232788, "learning_rate": 0.00014778282707486246, "loss": 0.0477, "step": 11112 }, { "epoch": 1.5575332866152767, "grad_norm": 0.1778813898563385, "learning_rate": 0.0001477684764410428, "loss": 0.0205, "step": 11113 }, { "epoch": 1.5576734407848634, "grad_norm": 0.21597367525100708, "learning_rate": 0.00014775412580722314, "loss": 0.0189, "step": 11114 }, { "epoch": 1.55781359495445, "grad_norm": 0.3993750810623169, "learning_rate": 0.00014773977517340347, "loss": 0.0398, "step": 11115 }, { "epoch": 1.5579537491240365, "grad_norm": 0.19708694517612457, "learning_rate": 0.00014772542453958383, "loss": 0.0112, "step": 11116 }, { "epoch": 1.558093903293623, "grad_norm": 0.3637322187423706, "learning_rate": 0.00014771107390576415, "loss": 0.0437, "step": 11117 }, { "epoch": 1.5582340574632094, "grad_norm": 0.2887438237667084, "learning_rate": 0.0001476967232719445, "loss": 0.026, "step": 11118 }, { "epoch": 1.558374211632796, "grad_norm": 0.32681554555892944, "learning_rate": 0.00014768237263812484, "loss": 0.0196, "step": 11119 }, { "epoch": 1.5585143658023828, "grad_norm": 0.528824508190155, "learning_rate": 0.00014766802200430517, "loss": 0.0178, "step": 11120 }, { "epoch": 1.5586545199719692, "grad_norm": 0.211766317486763, "learning_rate": 0.00014765367137048552, "loss": 0.0308, "step": 11121 }, { "epoch": 1.5587946741415557, "grad_norm": 0.5120553970336914, "learning_rate": 0.00014763932073666585, "loss": 0.0844, "step": 11122 }, { "epoch": 1.5589348283111422, "grad_norm": 0.18291109800338745, "learning_rate": 0.0001476249701028462, "loss": 0.0474, "step": 11123 }, { "epoch": 1.5590749824807288, "grad_norm": 0.3133188486099243, "learning_rate": 0.00014761061946902654, "loss": 0.0164, "step": 11124 }, { "epoch": 1.5592151366503153, "grad_norm": 0.2677336633205414, "learning_rate": 0.00014759626883520686, "loss": 0.0247, "step": 11125 }, { "epoch": 1.559355290819902, "grad_norm": 0.3338314890861511, "learning_rate": 0.00014758191820138722, "loss": 0.0296, "step": 11126 }, { "epoch": 1.5594954449894884, "grad_norm": 0.5702020525932312, "learning_rate": 0.00014756756756756758, "loss": 0.0666, "step": 11127 }, { "epoch": 1.559635599159075, "grad_norm": 0.3197702467441559, "learning_rate": 0.0001475532169337479, "loss": 0.0347, "step": 11128 }, { "epoch": 1.5597757533286614, "grad_norm": 0.6559661626815796, "learning_rate": 0.00014753886629992823, "loss": 0.0724, "step": 11129 }, { "epoch": 1.559915907498248, "grad_norm": 1.1053879261016846, "learning_rate": 0.00014752451566610856, "loss": 0.0729, "step": 11130 }, { "epoch": 1.5600560616678347, "grad_norm": 1.476270318031311, "learning_rate": 0.00014751016503228892, "loss": 0.196, "step": 11131 }, { "epoch": 1.5601962158374212, "grad_norm": 0.46585288643836975, "learning_rate": 0.00014749581439846927, "loss": 0.0949, "step": 11132 }, { "epoch": 1.5603363700070076, "grad_norm": 1.2716703414916992, "learning_rate": 0.0001474814637646496, "loss": 0.1641, "step": 11133 }, { "epoch": 1.560476524176594, "grad_norm": 0.8445846438407898, "learning_rate": 0.00014746711313082993, "loss": 0.1595, "step": 11134 }, { "epoch": 1.5606166783461808, "grad_norm": 0.9498838186264038, "learning_rate": 0.00014745276249701028, "loss": 0.1249, "step": 11135 }, { "epoch": 1.5607568325157675, "grad_norm": 0.6306179165840149, "learning_rate": 0.0001474384118631906, "loss": 0.0813, "step": 11136 }, { "epoch": 1.560896986685354, "grad_norm": 0.16381900012493134, "learning_rate": 0.00014742406122937094, "loss": 0.0303, "step": 11137 }, { "epoch": 1.5610371408549404, "grad_norm": 0.22249996662139893, "learning_rate": 0.00014740971059555127, "loss": 0.0302, "step": 11138 }, { "epoch": 1.5611772950245268, "grad_norm": 0.2671750783920288, "learning_rate": 0.00014739535996173163, "loss": 0.0264, "step": 11139 }, { "epoch": 1.5613174491941135, "grad_norm": 0.5948111414909363, "learning_rate": 0.00014738100932791198, "loss": 0.0585, "step": 11140 }, { "epoch": 1.5614576033637002, "grad_norm": 0.44250333309173584, "learning_rate": 0.0001473666586940923, "loss": 0.0634, "step": 11141 }, { "epoch": 1.5615977575332867, "grad_norm": 0.2616373598575592, "learning_rate": 0.00014735230806027264, "loss": 0.0485, "step": 11142 }, { "epoch": 1.5617379117028731, "grad_norm": 0.41670432686805725, "learning_rate": 0.000147337957426453, "loss": 0.0741, "step": 11143 }, { "epoch": 1.5618780658724596, "grad_norm": 0.2678121030330658, "learning_rate": 0.00014732360679263332, "loss": 0.0564, "step": 11144 }, { "epoch": 1.5620182200420463, "grad_norm": 0.3389662206172943, "learning_rate": 0.00014730925615881368, "loss": 0.0316, "step": 11145 }, { "epoch": 1.562158374211633, "grad_norm": 0.2639074921607971, "learning_rate": 0.000147294905524994, "loss": 0.1061, "step": 11146 }, { "epoch": 1.5622985283812194, "grad_norm": 0.2882052958011627, "learning_rate": 0.00014728055489117434, "loss": 0.0311, "step": 11147 }, { "epoch": 1.5624386825508059, "grad_norm": 0.07320276647806168, "learning_rate": 0.0001472662042573547, "loss": 0.0073, "step": 11148 }, { "epoch": 1.5625788367203923, "grad_norm": 0.3554665148258209, "learning_rate": 0.00014725185362353502, "loss": 0.0559, "step": 11149 }, { "epoch": 1.562718990889979, "grad_norm": 0.772337794303894, "learning_rate": 0.00014723750298971538, "loss": 0.0684, "step": 11150 }, { "epoch": 1.5628591450595655, "grad_norm": 0.31310799717903137, "learning_rate": 0.0001472231523558957, "loss": 0.036, "step": 11151 }, { "epoch": 1.5629992992291522, "grad_norm": 0.3384800851345062, "learning_rate": 0.00014720880172207603, "loss": 0.0979, "step": 11152 }, { "epoch": 1.5631394533987386, "grad_norm": 0.15126965939998627, "learning_rate": 0.0001471944510882564, "loss": 0.0207, "step": 11153 }, { "epoch": 1.563279607568325, "grad_norm": 0.31631508469581604, "learning_rate": 0.00014718010045443672, "loss": 0.0315, "step": 11154 }, { "epoch": 1.5634197617379118, "grad_norm": 0.17821283638477325, "learning_rate": 0.00014716574982061707, "loss": 0.017, "step": 11155 }, { "epoch": 1.5635599159074982, "grad_norm": 0.42374420166015625, "learning_rate": 0.0001471513991867974, "loss": 0.0701, "step": 11156 }, { "epoch": 1.563700070077085, "grad_norm": 0.1802091896533966, "learning_rate": 0.00014713704855297773, "loss": 0.0241, "step": 11157 }, { "epoch": 1.5638402242466714, "grad_norm": 0.2635543942451477, "learning_rate": 0.00014712269791915809, "loss": 0.0346, "step": 11158 }, { "epoch": 1.5639803784162578, "grad_norm": 0.5130125284194946, "learning_rate": 0.00014710834728533844, "loss": 0.0417, "step": 11159 }, { "epoch": 1.5641205325858443, "grad_norm": 0.19770728051662445, "learning_rate": 0.00014709399665151877, "loss": 0.035, "step": 11160 }, { "epoch": 1.564260686755431, "grad_norm": 0.09157402813434601, "learning_rate": 0.0001470796460176991, "loss": 0.007, "step": 11161 }, { "epoch": 1.5644008409250176, "grad_norm": 0.7800874710083008, "learning_rate": 0.00014706529538387945, "loss": 0.0495, "step": 11162 }, { "epoch": 1.564540995094604, "grad_norm": 0.1194981038570404, "learning_rate": 0.00014705094475005978, "loss": 0.0317, "step": 11163 }, { "epoch": 1.5646811492641906, "grad_norm": 0.08925904333591461, "learning_rate": 0.00014703659411624014, "loss": 0.0145, "step": 11164 }, { "epoch": 1.564821303433777, "grad_norm": 0.1607254594564438, "learning_rate": 0.00014702224348242047, "loss": 0.0222, "step": 11165 }, { "epoch": 1.5649614576033637, "grad_norm": 0.573932945728302, "learning_rate": 0.0001470078928486008, "loss": 0.1133, "step": 11166 }, { "epoch": 1.5651016117729504, "grad_norm": 0.16129440069198608, "learning_rate": 0.00014699354221478115, "loss": 0.0234, "step": 11167 }, { "epoch": 1.5652417659425368, "grad_norm": 0.1657620072364807, "learning_rate": 0.00014697919158096148, "loss": 0.0471, "step": 11168 }, { "epoch": 1.5653819201121233, "grad_norm": 0.1637999564409256, "learning_rate": 0.0001469648409471418, "loss": 0.0278, "step": 11169 }, { "epoch": 1.5655220742817098, "grad_norm": 0.13079184293746948, "learning_rate": 0.00014695049031332216, "loss": 0.0267, "step": 11170 }, { "epoch": 1.5656622284512964, "grad_norm": 0.24356278777122498, "learning_rate": 0.0001469361396795025, "loss": 0.0265, "step": 11171 }, { "epoch": 1.5658023826208831, "grad_norm": 0.1065502017736435, "learning_rate": 0.00014692178904568285, "loss": 0.0092, "step": 11172 }, { "epoch": 1.5659425367904696, "grad_norm": 0.14290915429592133, "learning_rate": 0.00014690743841186318, "loss": 0.0216, "step": 11173 }, { "epoch": 1.566082690960056, "grad_norm": 0.4905205965042114, "learning_rate": 0.0001468930877780435, "loss": 0.0779, "step": 11174 }, { "epoch": 1.5662228451296425, "grad_norm": 0.5665708780288696, "learning_rate": 0.00014687873714422386, "loss": 0.022, "step": 11175 }, { "epoch": 1.5663629992992292, "grad_norm": 0.23964370787143707, "learning_rate": 0.0001468643865104042, "loss": 0.0225, "step": 11176 }, { "epoch": 1.5665031534688159, "grad_norm": 0.5382786989212036, "learning_rate": 0.00014685003587658455, "loss": 0.1165, "step": 11177 }, { "epoch": 1.5666433076384023, "grad_norm": 0.06651629507541656, "learning_rate": 0.00014683568524276487, "loss": 0.0085, "step": 11178 }, { "epoch": 1.5667834618079888, "grad_norm": 0.5134016871452332, "learning_rate": 0.0001468213346089452, "loss": 0.1353, "step": 11179 }, { "epoch": 1.5669236159775752, "grad_norm": 0.8455452919006348, "learning_rate": 0.00014680698397512556, "loss": 0.0917, "step": 11180 }, { "epoch": 1.567063770147162, "grad_norm": 0.5323025584220886, "learning_rate": 0.0001467926333413059, "loss": 0.077, "step": 11181 }, { "epoch": 1.5672039243167484, "grad_norm": 0.21210433542728424, "learning_rate": 0.00014677828270748624, "loss": 0.0134, "step": 11182 }, { "epoch": 1.567344078486335, "grad_norm": 0.1852567344903946, "learning_rate": 0.00014676393207366657, "loss": 0.0109, "step": 11183 }, { "epoch": 1.5674842326559215, "grad_norm": 1.3256570100784302, "learning_rate": 0.0001467495814398469, "loss": 0.0751, "step": 11184 }, { "epoch": 1.567624386825508, "grad_norm": 2.4334514141082764, "learning_rate": 0.00014673523080602726, "loss": 0.2014, "step": 11185 }, { "epoch": 1.5677645409950947, "grad_norm": 0.35006874799728394, "learning_rate": 0.0001467208801722076, "loss": 0.081, "step": 11186 }, { "epoch": 1.5679046951646811, "grad_norm": 0.37955722212791443, "learning_rate": 0.00014670652953838794, "loss": 0.0444, "step": 11187 }, { "epoch": 1.5680448493342678, "grad_norm": 0.08182496577501297, "learning_rate": 0.00014669217890456827, "loss": 0.0077, "step": 11188 }, { "epoch": 1.5681850035038543, "grad_norm": 0.1904602348804474, "learning_rate": 0.0001466778282707486, "loss": 0.0463, "step": 11189 }, { "epoch": 1.5683251576734407, "grad_norm": 0.5992399454116821, "learning_rate": 0.00014666347763692895, "loss": 0.0424, "step": 11190 }, { "epoch": 1.5684653118430272, "grad_norm": 0.528991162776947, "learning_rate": 0.0001466491270031093, "loss": 0.0868, "step": 11191 }, { "epoch": 1.5686054660126139, "grad_norm": 0.2003084421157837, "learning_rate": 0.00014663477636928964, "loss": 0.016, "step": 11192 }, { "epoch": 1.5687456201822005, "grad_norm": 0.27936655282974243, "learning_rate": 0.00014662042573546996, "loss": 0.0697, "step": 11193 }, { "epoch": 1.568885774351787, "grad_norm": 0.39925622940063477, "learning_rate": 0.00014660607510165032, "loss": 0.0488, "step": 11194 }, { "epoch": 1.5690259285213735, "grad_norm": 0.37023505568504333, "learning_rate": 0.00014659172446783065, "loss": 0.0271, "step": 11195 }, { "epoch": 1.56916608269096, "grad_norm": 0.19889254868030548, "learning_rate": 0.000146577373834011, "loss": 0.0321, "step": 11196 }, { "epoch": 1.5693062368605466, "grad_norm": 0.18486392498016357, "learning_rate": 0.00014656302320019133, "loss": 0.0143, "step": 11197 }, { "epoch": 1.5694463910301333, "grad_norm": 0.1314917802810669, "learning_rate": 0.00014654867256637166, "loss": 0.0249, "step": 11198 }, { "epoch": 1.5695865451997197, "grad_norm": 0.24066641926765442, "learning_rate": 0.00014653432193255202, "loss": 0.0448, "step": 11199 }, { "epoch": 1.5697266993693062, "grad_norm": 0.20847086608409882, "learning_rate": 0.00014651997129873235, "loss": 0.0629, "step": 11200 }, { "epoch": 1.5698668535388927, "grad_norm": 0.18620292842388153, "learning_rate": 0.00014650562066491267, "loss": 0.0353, "step": 11201 }, { "epoch": 1.5700070077084793, "grad_norm": 0.2205834835767746, "learning_rate": 0.00014649127003109303, "loss": 0.0463, "step": 11202 }, { "epoch": 1.570147161878066, "grad_norm": 0.25545647740364075, "learning_rate": 0.00014647691939727336, "loss": 0.0771, "step": 11203 }, { "epoch": 1.5702873160476525, "grad_norm": 0.352230429649353, "learning_rate": 0.00014646256876345371, "loss": 0.0379, "step": 11204 }, { "epoch": 1.570427470217239, "grad_norm": 0.47847968339920044, "learning_rate": 0.00014644821812963404, "loss": 0.0801, "step": 11205 }, { "epoch": 1.5705676243868254, "grad_norm": 0.24556659162044525, "learning_rate": 0.00014643386749581437, "loss": 0.0526, "step": 11206 }, { "epoch": 1.570707778556412, "grad_norm": 0.42434048652648926, "learning_rate": 0.00014641951686199473, "loss": 0.101, "step": 11207 }, { "epoch": 1.5708479327259988, "grad_norm": 0.238752543926239, "learning_rate": 0.00014640516622817506, "loss": 0.0565, "step": 11208 }, { "epoch": 1.5709880868955852, "grad_norm": 0.1340286135673523, "learning_rate": 0.0001463908155943554, "loss": 0.0114, "step": 11209 }, { "epoch": 1.5711282410651717, "grad_norm": 0.6875956654548645, "learning_rate": 0.00014637646496053574, "loss": 0.0261, "step": 11210 }, { "epoch": 1.5712683952347581, "grad_norm": 0.3364204466342926, "learning_rate": 0.00014636211432671607, "loss": 0.0884, "step": 11211 }, { "epoch": 1.5714085494043448, "grad_norm": 0.09406965970993042, "learning_rate": 0.00014634776369289642, "loss": 0.0156, "step": 11212 }, { "epoch": 1.5715487035739313, "grad_norm": 0.06484595686197281, "learning_rate": 0.00014633341305907678, "loss": 0.0062, "step": 11213 }, { "epoch": 1.571688857743518, "grad_norm": 0.5709916353225708, "learning_rate": 0.0001463190624252571, "loss": 0.0534, "step": 11214 }, { "epoch": 1.5718290119131044, "grad_norm": 0.3590030074119568, "learning_rate": 0.00014630471179143744, "loss": 0.0435, "step": 11215 }, { "epoch": 1.5719691660826909, "grad_norm": 0.2900840938091278, "learning_rate": 0.00014629036115761777, "loss": 0.0423, "step": 11216 }, { "epoch": 1.5721093202522773, "grad_norm": 0.5910401940345764, "learning_rate": 0.00014627601052379812, "loss": 0.0852, "step": 11217 }, { "epoch": 1.572249474421864, "grad_norm": 2.503844976425171, "learning_rate": 0.00014626165988997848, "loss": 0.0583, "step": 11218 }, { "epoch": 1.5723896285914507, "grad_norm": 0.07830420136451721, "learning_rate": 0.0001462473092561588, "loss": 0.006, "step": 11219 }, { "epoch": 1.5725297827610372, "grad_norm": 0.2498377561569214, "learning_rate": 0.00014623295862233913, "loss": 0.0825, "step": 11220 }, { "epoch": 1.5726699369306236, "grad_norm": 0.7405567765235901, "learning_rate": 0.0001462186079885195, "loss": 0.0192, "step": 11221 }, { "epoch": 1.57281009110021, "grad_norm": 0.18399973213672638, "learning_rate": 0.00014620425735469982, "loss": 0.032, "step": 11222 }, { "epoch": 1.5729502452697968, "grad_norm": 0.33848488330841064, "learning_rate": 0.00014618990672088017, "loss": 0.0429, "step": 11223 }, { "epoch": 1.5730903994393834, "grad_norm": 0.5522069334983826, "learning_rate": 0.0001461755560870605, "loss": 0.0577, "step": 11224 }, { "epoch": 1.57323055360897, "grad_norm": 0.5384527444839478, "learning_rate": 0.00014616120545324083, "loss": 0.1057, "step": 11225 }, { "epoch": 1.5733707077785564, "grad_norm": 0.4417308270931244, "learning_rate": 0.0001461468548194212, "loss": 0.017, "step": 11226 }, { "epoch": 1.5735108619481428, "grad_norm": 0.3732277452945709, "learning_rate": 0.00014613250418560152, "loss": 0.0544, "step": 11227 }, { "epoch": 1.5736510161177295, "grad_norm": 1.7595444917678833, "learning_rate": 0.00014611815355178187, "loss": 0.2268, "step": 11228 }, { "epoch": 1.5737911702873162, "grad_norm": 0.24302317202091217, "learning_rate": 0.0001461038029179622, "loss": 0.0359, "step": 11229 }, { "epoch": 1.5739313244569026, "grad_norm": 0.24950161576271057, "learning_rate": 0.00014608945228414253, "loss": 0.0433, "step": 11230 }, { "epoch": 1.574071478626489, "grad_norm": 1.941673994064331, "learning_rate": 0.00014607510165032288, "loss": 0.0538, "step": 11231 }, { "epoch": 1.5742116327960756, "grad_norm": 1.4476665258407593, "learning_rate": 0.0001460607510165032, "loss": 0.12, "step": 11232 }, { "epoch": 1.5743517869656622, "grad_norm": 0.214619442820549, "learning_rate": 0.00014604640038268354, "loss": 0.0121, "step": 11233 }, { "epoch": 1.574491941135249, "grad_norm": 0.3862488567829132, "learning_rate": 0.0001460320497488639, "loss": 0.0177, "step": 11234 }, { "epoch": 1.5746320953048354, "grad_norm": 1.9196295738220215, "learning_rate": 0.00014601769911504423, "loss": 0.2218, "step": 11235 }, { "epoch": 1.5747722494744218, "grad_norm": 0.2490377426147461, "learning_rate": 0.00014600334848122458, "loss": 0.0575, "step": 11236 }, { "epoch": 1.5749124036440083, "grad_norm": 0.14667293429374695, "learning_rate": 0.0001459889978474049, "loss": 0.043, "step": 11237 }, { "epoch": 1.575052557813595, "grad_norm": 0.20396338403224945, "learning_rate": 0.00014597464721358524, "loss": 0.056, "step": 11238 }, { "epoch": 1.5751927119831814, "grad_norm": 0.35358285903930664, "learning_rate": 0.0001459602965797656, "loss": 0.0539, "step": 11239 }, { "epoch": 1.5753328661527681, "grad_norm": 0.3170239329338074, "learning_rate": 0.00014594594594594595, "loss": 0.0527, "step": 11240 }, { "epoch": 1.5754730203223546, "grad_norm": 0.6362709403038025, "learning_rate": 0.00014593159531212628, "loss": 0.1129, "step": 11241 }, { "epoch": 1.575613174491941, "grad_norm": 0.22644935548305511, "learning_rate": 0.0001459172446783066, "loss": 0.0312, "step": 11242 }, { "epoch": 1.5757533286615277, "grad_norm": 0.3139689266681671, "learning_rate": 0.00014590289404448693, "loss": 0.0433, "step": 11243 }, { "epoch": 1.5758934828311142, "grad_norm": 0.4308244287967682, "learning_rate": 0.0001458885434106673, "loss": 0.0526, "step": 11244 }, { "epoch": 1.5760336370007009, "grad_norm": 0.2084711641073227, "learning_rate": 0.00014587419277684765, "loss": 0.0602, "step": 11245 }, { "epoch": 1.5761737911702873, "grad_norm": 0.32235977053642273, "learning_rate": 0.00014585984214302797, "loss": 0.0602, "step": 11246 }, { "epoch": 1.5763139453398738, "grad_norm": 0.4344325065612793, "learning_rate": 0.0001458454915092083, "loss": 0.0296, "step": 11247 }, { "epoch": 1.5764540995094602, "grad_norm": 0.39458155632019043, "learning_rate": 0.00014583114087538866, "loss": 0.0616, "step": 11248 }, { "epoch": 1.576594253679047, "grad_norm": 0.5368797183036804, "learning_rate": 0.000145816790241569, "loss": 0.0771, "step": 11249 }, { "epoch": 1.5767344078486336, "grad_norm": 0.3535921573638916, "learning_rate": 0.00014580243960774934, "loss": 0.0309, "step": 11250 }, { "epoch": 1.57687456201822, "grad_norm": 0.12445615231990814, "learning_rate": 0.00014578808897392967, "loss": 0.0183, "step": 11251 }, { "epoch": 1.5770147161878065, "grad_norm": 0.11979249119758606, "learning_rate": 0.00014577373834011, "loss": 0.0185, "step": 11252 }, { "epoch": 1.577154870357393, "grad_norm": 0.38393333554267883, "learning_rate": 0.00014575938770629036, "loss": 0.0462, "step": 11253 }, { "epoch": 1.5772950245269797, "grad_norm": 0.5327111482620239, "learning_rate": 0.00014574503707247068, "loss": 0.0544, "step": 11254 }, { "epoch": 1.5774351786965664, "grad_norm": 0.5200344324111938, "learning_rate": 0.00014573068643865104, "loss": 0.0904, "step": 11255 }, { "epoch": 1.5775753328661528, "grad_norm": 0.4005781412124634, "learning_rate": 0.00014571633580483137, "loss": 0.0426, "step": 11256 }, { "epoch": 1.5777154870357393, "grad_norm": 0.4025375247001648, "learning_rate": 0.0001457019851710117, "loss": 0.0424, "step": 11257 }, { "epoch": 1.5778556412053257, "grad_norm": 0.34456416964530945, "learning_rate": 0.00014568763453719205, "loss": 0.0383, "step": 11258 }, { "epoch": 1.5779957953749124, "grad_norm": 0.2475968450307846, "learning_rate": 0.00014567328390337238, "loss": 0.0354, "step": 11259 }, { "epoch": 1.578135949544499, "grad_norm": 0.07748182862997055, "learning_rate": 0.00014565893326955274, "loss": 0.0098, "step": 11260 }, { "epoch": 1.5782761037140856, "grad_norm": 0.36885011196136475, "learning_rate": 0.00014564458263573307, "loss": 0.0618, "step": 11261 }, { "epoch": 1.578416257883672, "grad_norm": 0.258219450712204, "learning_rate": 0.0001456302320019134, "loss": 0.0794, "step": 11262 }, { "epoch": 1.5785564120532585, "grad_norm": 0.26139187812805176, "learning_rate": 0.00014561588136809375, "loss": 0.0181, "step": 11263 }, { "epoch": 1.5786965662228452, "grad_norm": 0.2537481486797333, "learning_rate": 0.00014560153073427408, "loss": 0.0884, "step": 11264 }, { "epoch": 1.5788367203924318, "grad_norm": 0.12628164887428284, "learning_rate": 0.0001455871801004544, "loss": 0.025, "step": 11265 }, { "epoch": 1.5789768745620183, "grad_norm": 0.34642693400382996, "learning_rate": 0.00014557282946663476, "loss": 0.0447, "step": 11266 }, { "epoch": 1.5791170287316048, "grad_norm": 0.32561546564102173, "learning_rate": 0.0001455584788328151, "loss": 0.0825, "step": 11267 }, { "epoch": 1.5792571829011912, "grad_norm": 0.42550963163375854, "learning_rate": 0.00014554412819899545, "loss": 0.0422, "step": 11268 }, { "epoch": 1.579397337070778, "grad_norm": 0.4909414052963257, "learning_rate": 0.00014552977756517578, "loss": 0.0591, "step": 11269 }, { "epoch": 1.5795374912403644, "grad_norm": 0.5342494249343872, "learning_rate": 0.0001455154269313561, "loss": 0.0544, "step": 11270 }, { "epoch": 1.579677645409951, "grad_norm": 0.44271159172058105, "learning_rate": 0.00014550107629753646, "loss": 0.0579, "step": 11271 }, { "epoch": 1.5798177995795375, "grad_norm": 0.678312361240387, "learning_rate": 0.00014548672566371682, "loss": 0.0333, "step": 11272 }, { "epoch": 1.579957953749124, "grad_norm": 0.32278138399124146, "learning_rate": 0.00014547237502989714, "loss": 0.0422, "step": 11273 }, { "epoch": 1.5800981079187106, "grad_norm": 0.551562488079071, "learning_rate": 0.00014545802439607747, "loss": 0.028, "step": 11274 }, { "epoch": 1.580238262088297, "grad_norm": 1.3201539516448975, "learning_rate": 0.00014544367376225783, "loss": 0.0868, "step": 11275 }, { "epoch": 1.5803784162578838, "grad_norm": 0.2706214189529419, "learning_rate": 0.00014542932312843816, "loss": 0.0395, "step": 11276 }, { "epoch": 1.5805185704274702, "grad_norm": 0.2803541123867035, "learning_rate": 0.0001454149724946185, "loss": 0.0357, "step": 11277 }, { "epoch": 1.5806587245970567, "grad_norm": 0.9173793792724609, "learning_rate": 0.00014540062186079884, "loss": 0.1996, "step": 11278 }, { "epoch": 1.5807988787666432, "grad_norm": 0.2926436960697174, "learning_rate": 0.00014538627122697917, "loss": 0.0626, "step": 11279 }, { "epoch": 1.5809390329362298, "grad_norm": 0.9522509574890137, "learning_rate": 0.00014537192059315953, "loss": 0.0841, "step": 11280 }, { "epoch": 1.5810791871058165, "grad_norm": 0.5323994755744934, "learning_rate": 0.00014535756995933985, "loss": 0.0709, "step": 11281 }, { "epoch": 1.581219341275403, "grad_norm": 0.4189034700393677, "learning_rate": 0.0001453432193255202, "loss": 0.0764, "step": 11282 }, { "epoch": 1.5813594954449894, "grad_norm": 0.1715243011713028, "learning_rate": 0.00014532886869170054, "loss": 0.0139, "step": 11283 }, { "epoch": 1.581499649614576, "grad_norm": 0.7849209904670715, "learning_rate": 0.00014531451805788087, "loss": 0.0433, "step": 11284 }, { "epoch": 1.5816398037841626, "grad_norm": 0.7326869368553162, "learning_rate": 0.00014530016742406122, "loss": 0.0217, "step": 11285 }, { "epoch": 1.5817799579537493, "grad_norm": 0.20545487105846405, "learning_rate": 0.00014528581679024155, "loss": 0.0177, "step": 11286 }, { "epoch": 1.5819201121233357, "grad_norm": 0.28836357593536377, "learning_rate": 0.0001452714661564219, "loss": 0.0418, "step": 11287 }, { "epoch": 1.5820602662929222, "grad_norm": 0.38304904103279114, "learning_rate": 0.00014525711552260224, "loss": 0.0495, "step": 11288 }, { "epoch": 1.5822004204625086, "grad_norm": 0.3287407457828522, "learning_rate": 0.00014524276488878256, "loss": 0.0342, "step": 11289 }, { "epoch": 1.5823405746320953, "grad_norm": 0.358939528465271, "learning_rate": 0.00014522841425496292, "loss": 0.0388, "step": 11290 }, { "epoch": 1.582480728801682, "grad_norm": 0.42045775055885315, "learning_rate": 0.00014521406362114328, "loss": 0.0647, "step": 11291 }, { "epoch": 1.5826208829712685, "grad_norm": 0.6764708161354065, "learning_rate": 0.0001451997129873236, "loss": 0.1026, "step": 11292 }, { "epoch": 1.582761037140855, "grad_norm": 0.6854547262191772, "learning_rate": 0.00014518536235350393, "loss": 0.0509, "step": 11293 }, { "epoch": 1.5829011913104414, "grad_norm": 0.4909321665763855, "learning_rate": 0.00014517101171968426, "loss": 0.0379, "step": 11294 }, { "epoch": 1.583041345480028, "grad_norm": 0.4555673599243164, "learning_rate": 0.00014515666108586462, "loss": 0.0872, "step": 11295 }, { "epoch": 1.5831814996496145, "grad_norm": 0.08482465893030167, "learning_rate": 0.00014514231045204494, "loss": 0.0153, "step": 11296 }, { "epoch": 1.5833216538192012, "grad_norm": 0.5087714791297913, "learning_rate": 0.00014512795981822527, "loss": 0.1572, "step": 11297 }, { "epoch": 1.5834618079887877, "grad_norm": 0.5906250476837158, "learning_rate": 0.00014511360918440563, "loss": 0.0872, "step": 11298 }, { "epoch": 1.5836019621583741, "grad_norm": 0.2855149209499359, "learning_rate": 0.00014509925855058598, "loss": 0.031, "step": 11299 }, { "epoch": 1.5837421163279608, "grad_norm": 0.27233362197875977, "learning_rate": 0.0001450849079167663, "loss": 0.0327, "step": 11300 }, { "epoch": 1.5838822704975473, "grad_norm": 0.9900158643722534, "learning_rate": 0.00014507055728294664, "loss": 0.1225, "step": 11301 }, { "epoch": 1.584022424667134, "grad_norm": 0.27581602334976196, "learning_rate": 0.00014505620664912697, "loss": 0.0717, "step": 11302 }, { "epoch": 1.5841625788367204, "grad_norm": 0.4488271176815033, "learning_rate": 0.00014504185601530733, "loss": 0.1049, "step": 11303 }, { "epoch": 1.5843027330063069, "grad_norm": 0.24682290852069855, "learning_rate": 0.00014502750538148768, "loss": 0.0366, "step": 11304 }, { "epoch": 1.5844428871758933, "grad_norm": 0.46133553981781006, "learning_rate": 0.000145013154747668, "loss": 0.0565, "step": 11305 }, { "epoch": 1.58458304134548, "grad_norm": 0.3794057071208954, "learning_rate": 0.00014499880411384834, "loss": 0.0938, "step": 11306 }, { "epoch": 1.5847231955150667, "grad_norm": 0.2560756504535675, "learning_rate": 0.0001449844534800287, "loss": 0.0549, "step": 11307 }, { "epoch": 1.5848633496846531, "grad_norm": 0.5859915614128113, "learning_rate": 0.00014497010284620902, "loss": 0.0743, "step": 11308 }, { "epoch": 1.5850035038542396, "grad_norm": 0.21086852252483368, "learning_rate": 0.00014495575221238938, "loss": 0.0447, "step": 11309 }, { "epoch": 1.585143658023826, "grad_norm": 0.36471033096313477, "learning_rate": 0.0001449414015785697, "loss": 0.0466, "step": 11310 }, { "epoch": 1.5852838121934127, "grad_norm": 0.20248322188854218, "learning_rate": 0.00014492705094475004, "loss": 0.0298, "step": 11311 }, { "epoch": 1.5854239663629994, "grad_norm": 0.42613497376441956, "learning_rate": 0.0001449127003109304, "loss": 0.0233, "step": 11312 }, { "epoch": 1.5855641205325859, "grad_norm": 0.1068175882101059, "learning_rate": 0.00014489834967711072, "loss": 0.0081, "step": 11313 }, { "epoch": 1.5857042747021723, "grad_norm": 0.3122786581516266, "learning_rate": 0.00014488399904329108, "loss": 0.0509, "step": 11314 }, { "epoch": 1.5858444288717588, "grad_norm": 0.2238869071006775, "learning_rate": 0.0001448696484094714, "loss": 0.0258, "step": 11315 }, { "epoch": 1.5859845830413455, "grad_norm": 0.307205468416214, "learning_rate": 0.00014485529777565173, "loss": 0.0514, "step": 11316 }, { "epoch": 1.5861247372109322, "grad_norm": 0.30395668745040894, "learning_rate": 0.0001448409471418321, "loss": 0.1037, "step": 11317 }, { "epoch": 1.5862648913805186, "grad_norm": 0.20317557454109192, "learning_rate": 0.00014482659650801244, "loss": 0.0464, "step": 11318 }, { "epoch": 1.586405045550105, "grad_norm": 0.47860780358314514, "learning_rate": 0.00014481224587419277, "loss": 0.0232, "step": 11319 }, { "epoch": 1.5865451997196915, "grad_norm": 0.05480014905333519, "learning_rate": 0.0001447978952403731, "loss": 0.025, "step": 11320 }, { "epoch": 1.5866853538892782, "grad_norm": 0.3540691137313843, "learning_rate": 0.00014478354460655343, "loss": 0.0494, "step": 11321 }, { "epoch": 1.586825508058865, "grad_norm": 0.7096140384674072, "learning_rate": 0.00014476919397273379, "loss": 0.0641, "step": 11322 }, { "epoch": 1.5869656622284514, "grad_norm": 0.2946372330188751, "learning_rate": 0.00014475484333891414, "loss": 0.0548, "step": 11323 }, { "epoch": 1.5871058163980378, "grad_norm": 0.08551746606826782, "learning_rate": 0.00014474049270509447, "loss": 0.0155, "step": 11324 }, { "epoch": 1.5872459705676243, "grad_norm": 0.1938301920890808, "learning_rate": 0.0001447261420712748, "loss": 0.0152, "step": 11325 }, { "epoch": 1.587386124737211, "grad_norm": 0.2958170771598816, "learning_rate": 0.00014471179143745515, "loss": 0.0535, "step": 11326 }, { "epoch": 1.5875262789067974, "grad_norm": 0.24976089596748352, "learning_rate": 0.00014469744080363548, "loss": 0.0353, "step": 11327 }, { "epoch": 1.587666433076384, "grad_norm": 0.485668420791626, "learning_rate": 0.0001446830901698158, "loss": 0.0589, "step": 11328 }, { "epoch": 1.5878065872459706, "grad_norm": 0.20511524379253387, "learning_rate": 0.00014466873953599614, "loss": 0.0521, "step": 11329 }, { "epoch": 1.587946741415557, "grad_norm": 0.18602831661701202, "learning_rate": 0.0001446543889021765, "loss": 0.0199, "step": 11330 }, { "epoch": 1.5880868955851437, "grad_norm": 0.1648767590522766, "learning_rate": 0.00014464003826835685, "loss": 0.011, "step": 11331 }, { "epoch": 1.5882270497547302, "grad_norm": 0.3389259874820709, "learning_rate": 0.00014462568763453718, "loss": 0.0772, "step": 11332 }, { "epoch": 1.5883672039243169, "grad_norm": 0.9060108065605164, "learning_rate": 0.0001446113370007175, "loss": 0.0892, "step": 11333 }, { "epoch": 1.5885073580939033, "grad_norm": 1.392243504524231, "learning_rate": 0.00014459698636689786, "loss": 0.3211, "step": 11334 }, { "epoch": 1.5886475122634898, "grad_norm": 0.4251784384250641, "learning_rate": 0.0001445826357330782, "loss": 0.0475, "step": 11335 }, { "epoch": 1.5887876664330762, "grad_norm": 0.13818101584911346, "learning_rate": 0.00014456828509925855, "loss": 0.0147, "step": 11336 }, { "epoch": 1.588927820602663, "grad_norm": 0.16225974261760712, "learning_rate": 0.00014455393446543888, "loss": 0.0272, "step": 11337 }, { "epoch": 1.5890679747722496, "grad_norm": 0.43955108523368835, "learning_rate": 0.0001445395838316192, "loss": 0.0828, "step": 11338 }, { "epoch": 1.589208128941836, "grad_norm": 0.2266266793012619, "learning_rate": 0.00014452523319779956, "loss": 0.0594, "step": 11339 }, { "epoch": 1.5893482831114225, "grad_norm": 0.27316924929618835, "learning_rate": 0.0001445108825639799, "loss": 0.0506, "step": 11340 }, { "epoch": 1.589488437281009, "grad_norm": 0.689016580581665, "learning_rate": 0.00014449653193016025, "loss": 0.0749, "step": 11341 }, { "epoch": 1.5896285914505957, "grad_norm": 0.21097469329833984, "learning_rate": 0.00014448218129634057, "loss": 0.0211, "step": 11342 }, { "epoch": 1.5897687456201823, "grad_norm": 0.1962958723306656, "learning_rate": 0.0001444678306625209, "loss": 0.0234, "step": 11343 }, { "epoch": 1.5899088997897688, "grad_norm": 0.22709308564662933, "learning_rate": 0.00014445348002870126, "loss": 0.0238, "step": 11344 }, { "epoch": 1.5900490539593553, "grad_norm": 0.14744724333286285, "learning_rate": 0.0001444391293948816, "loss": 0.0165, "step": 11345 }, { "epoch": 1.5901892081289417, "grad_norm": 0.22451026737689972, "learning_rate": 0.00014442477876106194, "loss": 0.0453, "step": 11346 }, { "epoch": 1.5903293622985284, "grad_norm": 0.39369678497314453, "learning_rate": 0.00014441042812724227, "loss": 0.147, "step": 11347 }, { "epoch": 1.590469516468115, "grad_norm": 0.24836881458759308, "learning_rate": 0.0001443960774934226, "loss": 0.0387, "step": 11348 }, { "epoch": 1.5906096706377015, "grad_norm": 0.21216589212417603, "learning_rate": 0.00014438172685960295, "loss": 0.0453, "step": 11349 }, { "epoch": 1.590749824807288, "grad_norm": 0.4266994893550873, "learning_rate": 0.0001443673762257833, "loss": 0.0256, "step": 11350 }, { "epoch": 1.5908899789768745, "grad_norm": 0.23873865604400635, "learning_rate": 0.00014435302559196364, "loss": 0.0622, "step": 11351 }, { "epoch": 1.5910301331464611, "grad_norm": 0.45349276065826416, "learning_rate": 0.00014433867495814397, "loss": 0.0346, "step": 11352 }, { "epoch": 1.5911702873160478, "grad_norm": 0.2463388890028, "learning_rate": 0.00014432432432432432, "loss": 0.0357, "step": 11353 }, { "epoch": 1.5913104414856343, "grad_norm": 0.3385111391544342, "learning_rate": 0.00014430997369050465, "loss": 0.0581, "step": 11354 }, { "epoch": 1.5914505956552207, "grad_norm": 0.2506190836429596, "learning_rate": 0.000144295623056685, "loss": 0.0685, "step": 11355 }, { "epoch": 1.5915907498248072, "grad_norm": 0.33793550729751587, "learning_rate": 0.00014428127242286534, "loss": 0.0494, "step": 11356 }, { "epoch": 1.5917309039943939, "grad_norm": 0.5122058987617493, "learning_rate": 0.00014426692178904566, "loss": 0.0508, "step": 11357 }, { "epoch": 1.5918710581639803, "grad_norm": 0.6740719079971313, "learning_rate": 0.00014425257115522602, "loss": 0.0599, "step": 11358 }, { "epoch": 1.592011212333567, "grad_norm": 0.1437353640794754, "learning_rate": 0.00014423822052140635, "loss": 0.0238, "step": 11359 }, { "epoch": 1.5921513665031535, "grad_norm": 0.16703295707702637, "learning_rate": 0.00014422386988758668, "loss": 0.0181, "step": 11360 }, { "epoch": 1.59229152067274, "grad_norm": 0.2900306284427643, "learning_rate": 0.00014420951925376703, "loss": 0.0286, "step": 11361 }, { "epoch": 1.5924316748423264, "grad_norm": 0.1914912313222885, "learning_rate": 0.00014419516861994736, "loss": 0.067, "step": 11362 }, { "epoch": 1.592571829011913, "grad_norm": 0.2010813057422638, "learning_rate": 0.00014418081798612772, "loss": 0.0431, "step": 11363 }, { "epoch": 1.5927119831814998, "grad_norm": 0.2772805392742157, "learning_rate": 0.00014416646735230805, "loss": 0.0377, "step": 11364 }, { "epoch": 1.5928521373510862, "grad_norm": 0.4188619554042816, "learning_rate": 0.00014415211671848837, "loss": 0.06, "step": 11365 }, { "epoch": 1.5929922915206727, "grad_norm": 0.05867023766040802, "learning_rate": 0.00014413776608466873, "loss": 0.009, "step": 11366 }, { "epoch": 1.5931324456902591, "grad_norm": 0.4740632474422455, "learning_rate": 0.00014412341545084906, "loss": 0.0425, "step": 11367 }, { "epoch": 1.5932725998598458, "grad_norm": 0.19765959680080414, "learning_rate": 0.00014410906481702941, "loss": 0.0319, "step": 11368 }, { "epoch": 1.5934127540294325, "grad_norm": 0.399566113948822, "learning_rate": 0.00014409471418320974, "loss": 0.0519, "step": 11369 }, { "epoch": 1.593552908199019, "grad_norm": 0.7658405900001526, "learning_rate": 0.00014408036354939007, "loss": 0.1309, "step": 11370 }, { "epoch": 1.5936930623686054, "grad_norm": 0.10469689965248108, "learning_rate": 0.00014406601291557043, "loss": 0.0142, "step": 11371 }, { "epoch": 1.5938332165381919, "grad_norm": 0.4584612250328064, "learning_rate": 0.00014405166228175076, "loss": 0.0664, "step": 11372 }, { "epoch": 1.5939733707077786, "grad_norm": 0.5687464475631714, "learning_rate": 0.0001440373116479311, "loss": 0.0527, "step": 11373 }, { "epoch": 1.5941135248773652, "grad_norm": 0.5980580449104309, "learning_rate": 0.00014402296101411144, "loss": 0.0738, "step": 11374 }, { "epoch": 1.5942536790469517, "grad_norm": 0.525931179523468, "learning_rate": 0.00014400861038029177, "loss": 0.0695, "step": 11375 }, { "epoch": 1.5943938332165382, "grad_norm": 0.5981613993644714, "learning_rate": 0.00014399425974647212, "loss": 0.0297, "step": 11376 }, { "epoch": 1.5945339873861246, "grad_norm": 0.32686084508895874, "learning_rate": 0.00014397990911265248, "loss": 0.0537, "step": 11377 }, { "epoch": 1.5946741415557113, "grad_norm": 0.36781010031700134, "learning_rate": 0.0001439655584788328, "loss": 0.1101, "step": 11378 }, { "epoch": 1.594814295725298, "grad_norm": 0.33023348450660706, "learning_rate": 0.00014395120784501314, "loss": 0.0192, "step": 11379 }, { "epoch": 1.5949544498948844, "grad_norm": 0.2836973965167999, "learning_rate": 0.00014393685721119347, "loss": 0.0745, "step": 11380 }, { "epoch": 1.595094604064471, "grad_norm": 0.08225195854902267, "learning_rate": 0.00014392250657737382, "loss": 0.005, "step": 11381 }, { "epoch": 1.5952347582340574, "grad_norm": 0.5242540836334229, "learning_rate": 0.00014390815594355418, "loss": 0.0197, "step": 11382 }, { "epoch": 1.595374912403644, "grad_norm": 0.1624547392129898, "learning_rate": 0.0001438938053097345, "loss": 0.01, "step": 11383 }, { "epoch": 1.5955150665732305, "grad_norm": 0.4167322814464569, "learning_rate": 0.00014387945467591483, "loss": 0.0788, "step": 11384 }, { "epoch": 1.5956552207428172, "grad_norm": 3.375951051712036, "learning_rate": 0.0001438651040420952, "loss": 0.1986, "step": 11385 }, { "epoch": 1.5957953749124036, "grad_norm": 0.23198918998241425, "learning_rate": 0.00014385075340827552, "loss": 0.022, "step": 11386 }, { "epoch": 1.59593552908199, "grad_norm": 0.26288193464279175, "learning_rate": 0.00014383640277445587, "loss": 0.0257, "step": 11387 }, { "epoch": 1.5960756832515768, "grad_norm": 0.19199809432029724, "learning_rate": 0.0001438220521406362, "loss": 0.038, "step": 11388 }, { "epoch": 1.5962158374211632, "grad_norm": 0.25046664476394653, "learning_rate": 0.00014380770150681653, "loss": 0.0625, "step": 11389 }, { "epoch": 1.59635599159075, "grad_norm": 0.3365858495235443, "learning_rate": 0.0001437933508729969, "loss": 0.0897, "step": 11390 }, { "epoch": 1.5964961457603364, "grad_norm": 0.13023974001407623, "learning_rate": 0.00014377900023917722, "loss": 0.031, "step": 11391 }, { "epoch": 1.5966362999299228, "grad_norm": 0.356695294380188, "learning_rate": 0.00014376464960535754, "loss": 0.0335, "step": 11392 }, { "epoch": 1.5967764540995093, "grad_norm": 0.2053287923336029, "learning_rate": 0.0001437502989715379, "loss": 0.0578, "step": 11393 }, { "epoch": 1.596916608269096, "grad_norm": 0.45393800735473633, "learning_rate": 0.00014373594833771823, "loss": 0.0796, "step": 11394 }, { "epoch": 1.5970567624386827, "grad_norm": 0.2853446304798126, "learning_rate": 0.00014372159770389858, "loss": 0.0357, "step": 11395 }, { "epoch": 1.5971969166082691, "grad_norm": 0.25550398230552673, "learning_rate": 0.0001437072470700789, "loss": 0.0839, "step": 11396 }, { "epoch": 1.5973370707778556, "grad_norm": 0.13601426780223846, "learning_rate": 0.00014369289643625924, "loss": 0.0149, "step": 11397 }, { "epoch": 1.597477224947442, "grad_norm": 0.1922236680984497, "learning_rate": 0.0001436785458024396, "loss": 0.028, "step": 11398 }, { "epoch": 1.5976173791170287, "grad_norm": 0.3007862865924835, "learning_rate": 0.00014366419516861993, "loss": 0.0397, "step": 11399 }, { "epoch": 1.5977575332866154, "grad_norm": 0.20555178821086884, "learning_rate": 0.00014364984453480028, "loss": 0.0096, "step": 11400 }, { "epoch": 1.5978976874562019, "grad_norm": 0.40918028354644775, "learning_rate": 0.0001436354939009806, "loss": 0.0468, "step": 11401 }, { "epoch": 1.5980378416257883, "grad_norm": 0.24805596470832825, "learning_rate": 0.00014362114326716094, "loss": 0.056, "step": 11402 }, { "epoch": 1.5981779957953748, "grad_norm": 0.2547355592250824, "learning_rate": 0.0001436067926333413, "loss": 0.0548, "step": 11403 }, { "epoch": 1.5983181499649615, "grad_norm": 0.47048670053482056, "learning_rate": 0.00014359244199952165, "loss": 0.0667, "step": 11404 }, { "epoch": 1.5984583041345481, "grad_norm": 0.48704493045806885, "learning_rate": 0.00014357809136570198, "loss": 0.0786, "step": 11405 }, { "epoch": 1.5985984583041346, "grad_norm": 0.14439354836940765, "learning_rate": 0.0001435637407318823, "loss": 0.042, "step": 11406 }, { "epoch": 1.598738612473721, "grad_norm": 0.13267837464809418, "learning_rate": 0.00014354939009806263, "loss": 0.0127, "step": 11407 }, { "epoch": 1.5988787666433075, "grad_norm": 0.772402286529541, "learning_rate": 0.000143535039464243, "loss": 0.0622, "step": 11408 }, { "epoch": 1.5990189208128942, "grad_norm": 0.12240627408027649, "learning_rate": 0.00014352068883042335, "loss": 0.0073, "step": 11409 }, { "epoch": 1.5991590749824809, "grad_norm": 0.32743585109710693, "learning_rate": 0.00014350633819660367, "loss": 0.0305, "step": 11410 }, { "epoch": 1.5992992291520673, "grad_norm": 0.19536246359348297, "learning_rate": 0.000143491987562784, "loss": 0.0206, "step": 11411 }, { "epoch": 1.5994393833216538, "grad_norm": 0.16025663912296295, "learning_rate": 0.00014347763692896436, "loss": 0.0292, "step": 11412 }, { "epoch": 1.5995795374912403, "grad_norm": 0.32042425870895386, "learning_rate": 0.0001434632862951447, "loss": 0.0394, "step": 11413 }, { "epoch": 1.599719691660827, "grad_norm": 0.2431601583957672, "learning_rate": 0.00014344893566132504, "loss": 0.0357, "step": 11414 }, { "epoch": 1.5998598458304134, "grad_norm": 0.4907341003417969, "learning_rate": 0.00014343458502750537, "loss": 0.042, "step": 11415 }, { "epoch": 1.6, "grad_norm": 0.7053388953208923, "learning_rate": 0.0001434202343936857, "loss": 0.0561, "step": 11416 }, { "epoch": 1.6001401541695865, "grad_norm": 0.3977697193622589, "learning_rate": 0.00014340588375986606, "loss": 0.0364, "step": 11417 }, { "epoch": 1.600280308339173, "grad_norm": 0.3205070495605469, "learning_rate": 0.00014339153312604638, "loss": 0.0907, "step": 11418 }, { "epoch": 1.6004204625087597, "grad_norm": 0.39762935042381287, "learning_rate": 0.00014337718249222674, "loss": 0.1301, "step": 11419 }, { "epoch": 1.6005606166783461, "grad_norm": 0.29576027393341064, "learning_rate": 0.00014336283185840707, "loss": 0.0572, "step": 11420 }, { "epoch": 1.6007007708479328, "grad_norm": 0.47650256752967834, "learning_rate": 0.0001433484812245874, "loss": 0.0958, "step": 11421 }, { "epoch": 1.6008409250175193, "grad_norm": 0.3434876501560211, "learning_rate": 0.00014333413059076775, "loss": 0.0174, "step": 11422 }, { "epoch": 1.6009810791871057, "grad_norm": 0.8974648118019104, "learning_rate": 0.00014331977995694808, "loss": 0.0631, "step": 11423 }, { "epoch": 1.6011212333566922, "grad_norm": 0.5391984581947327, "learning_rate": 0.0001433054293231284, "loss": 0.0246, "step": 11424 }, { "epoch": 1.601261387526279, "grad_norm": 0.3260977864265442, "learning_rate": 0.00014329107868930877, "loss": 0.0336, "step": 11425 }, { "epoch": 1.6014015416958656, "grad_norm": 0.4213988780975342, "learning_rate": 0.0001432767280554891, "loss": 0.0524, "step": 11426 }, { "epoch": 1.601541695865452, "grad_norm": 0.4234372675418854, "learning_rate": 0.00014326237742166945, "loss": 0.1597, "step": 11427 }, { "epoch": 1.6016818500350385, "grad_norm": 0.30518388748168945, "learning_rate": 0.00014324802678784978, "loss": 0.0128, "step": 11428 }, { "epoch": 1.601822004204625, "grad_norm": 0.2911543548107147, "learning_rate": 0.0001432336761540301, "loss": 0.0417, "step": 11429 }, { "epoch": 1.6019621583742116, "grad_norm": 0.47299671173095703, "learning_rate": 0.00014321932552021046, "loss": 0.0709, "step": 11430 }, { "epoch": 1.6021023125437983, "grad_norm": 0.47777125239372253, "learning_rate": 0.00014320497488639082, "loss": 0.0754, "step": 11431 }, { "epoch": 1.6022424667133848, "grad_norm": 0.37924832105636597, "learning_rate": 0.00014319062425257115, "loss": 0.0497, "step": 11432 }, { "epoch": 1.6023826208829712, "grad_norm": 2.9140608310699463, "learning_rate": 0.00014317627361875148, "loss": 0.2088, "step": 11433 }, { "epoch": 1.6025227750525577, "grad_norm": 0.6861683130264282, "learning_rate": 0.0001431619229849318, "loss": 0.0866, "step": 11434 }, { "epoch": 1.6026629292221444, "grad_norm": 1.7493194341659546, "learning_rate": 0.00014314757235111216, "loss": 0.4043, "step": 11435 }, { "epoch": 1.602803083391731, "grad_norm": 0.43702423572540283, "learning_rate": 0.00014313322171729252, "loss": 0.0553, "step": 11436 }, { "epoch": 1.6029432375613175, "grad_norm": 0.1277390569448471, "learning_rate": 0.00014311887108347284, "loss": 0.0148, "step": 11437 }, { "epoch": 1.603083391730904, "grad_norm": 0.28713348507881165, "learning_rate": 0.00014310452044965317, "loss": 0.066, "step": 11438 }, { "epoch": 1.6032235459004904, "grad_norm": 0.2675716280937195, "learning_rate": 0.00014309016981583353, "loss": 0.0452, "step": 11439 }, { "epoch": 1.6033637000700771, "grad_norm": 0.19364652037620544, "learning_rate": 0.00014307581918201386, "loss": 0.0447, "step": 11440 }, { "epoch": 1.6035038542396638, "grad_norm": 0.5300313234329224, "learning_rate": 0.0001430614685481942, "loss": 0.0489, "step": 11441 }, { "epoch": 1.6036440084092503, "grad_norm": 0.25991445779800415, "learning_rate": 0.00014304711791437454, "loss": 0.0835, "step": 11442 }, { "epoch": 1.6037841625788367, "grad_norm": 0.4901083707809448, "learning_rate": 0.00014303276728055487, "loss": 0.0352, "step": 11443 }, { "epoch": 1.6039243167484232, "grad_norm": 0.37696927785873413, "learning_rate": 0.00014301841664673523, "loss": 0.0397, "step": 11444 }, { "epoch": 1.6040644709180099, "grad_norm": 0.215261310338974, "learning_rate": 0.00014300406601291555, "loss": 0.0434, "step": 11445 }, { "epoch": 1.6042046250875963, "grad_norm": 0.5213586091995239, "learning_rate": 0.0001429897153790959, "loss": 0.0566, "step": 11446 }, { "epoch": 1.604344779257183, "grad_norm": 0.35481783747673035, "learning_rate": 0.00014297536474527624, "loss": 0.0821, "step": 11447 }, { "epoch": 1.6044849334267695, "grad_norm": 0.2919999659061432, "learning_rate": 0.00014296101411145657, "loss": 0.0557, "step": 11448 }, { "epoch": 1.604625087596356, "grad_norm": 0.18999779224395752, "learning_rate": 0.00014294666347763692, "loss": 0.0695, "step": 11449 }, { "epoch": 1.6047652417659424, "grad_norm": 0.5571599006652832, "learning_rate": 0.00014293231284381725, "loss": 0.1317, "step": 11450 }, { "epoch": 1.604905395935529, "grad_norm": 0.13731858134269714, "learning_rate": 0.0001429179622099976, "loss": 0.0322, "step": 11451 }, { "epoch": 1.6050455501051157, "grad_norm": 0.4020020663738251, "learning_rate": 0.00014290361157617794, "loss": 0.0509, "step": 11452 }, { "epoch": 1.6051857042747022, "grad_norm": 0.7551411986351013, "learning_rate": 0.00014288926094235826, "loss": 0.1063, "step": 11453 }, { "epoch": 1.6053258584442887, "grad_norm": 0.4349111318588257, "learning_rate": 0.00014287491030853862, "loss": 0.1069, "step": 11454 }, { "epoch": 1.6054660126138751, "grad_norm": 0.23612400889396667, "learning_rate": 0.00014286055967471895, "loss": 0.0439, "step": 11455 }, { "epoch": 1.6056061667834618, "grad_norm": 0.2968144118785858, "learning_rate": 0.00014284620904089928, "loss": 0.0682, "step": 11456 }, { "epoch": 1.6057463209530485, "grad_norm": 0.37891146540641785, "learning_rate": 0.00014283185840707963, "loss": 0.0408, "step": 11457 }, { "epoch": 1.605886475122635, "grad_norm": 0.33037495613098145, "learning_rate": 0.00014281750777325996, "loss": 0.0387, "step": 11458 }, { "epoch": 1.6060266292922214, "grad_norm": 0.25307202339172363, "learning_rate": 0.00014280315713944032, "loss": 0.0655, "step": 11459 }, { "epoch": 1.6061667834618079, "grad_norm": 0.4955904483795166, "learning_rate": 0.00014278880650562064, "loss": 0.0944, "step": 11460 }, { "epoch": 1.6063069376313945, "grad_norm": 0.30982741713523865, "learning_rate": 0.00014277445587180097, "loss": 0.0551, "step": 11461 }, { "epoch": 1.6064470918009812, "grad_norm": 0.21471638977527618, "learning_rate": 0.00014276010523798133, "loss": 0.0248, "step": 11462 }, { "epoch": 1.6065872459705677, "grad_norm": 0.29256123304367065, "learning_rate": 0.00014274575460416168, "loss": 0.0197, "step": 11463 }, { "epoch": 1.6067274001401541, "grad_norm": 0.17885033786296844, "learning_rate": 0.000142731403970342, "loss": 0.0367, "step": 11464 }, { "epoch": 1.6068675543097406, "grad_norm": 0.3832833170890808, "learning_rate": 0.00014271705333652234, "loss": 0.1491, "step": 11465 }, { "epoch": 1.6070077084793273, "grad_norm": 0.12220731377601624, "learning_rate": 0.0001427027027027027, "loss": 0.0198, "step": 11466 }, { "epoch": 1.607147862648914, "grad_norm": 0.08932118117809296, "learning_rate": 0.00014268835206888303, "loss": 0.0096, "step": 11467 }, { "epoch": 1.6072880168185004, "grad_norm": 0.18182271718978882, "learning_rate": 0.00014267400143506338, "loss": 0.0164, "step": 11468 }, { "epoch": 1.6074281709880869, "grad_norm": 1.2724153995513916, "learning_rate": 0.0001426596508012437, "loss": 0.0993, "step": 11469 }, { "epoch": 1.6075683251576733, "grad_norm": 0.23609989881515503, "learning_rate": 0.00014264530016742404, "loss": 0.0403, "step": 11470 }, { "epoch": 1.60770847932726, "grad_norm": 0.3410497307777405, "learning_rate": 0.0001426309495336044, "loss": 0.0476, "step": 11471 }, { "epoch": 1.6078486334968465, "grad_norm": 0.6018911004066467, "learning_rate": 0.00014261659889978472, "loss": 0.0488, "step": 11472 }, { "epoch": 1.6079887876664332, "grad_norm": 0.09402860701084137, "learning_rate": 0.00014260224826596508, "loss": 0.0188, "step": 11473 }, { "epoch": 1.6081289418360196, "grad_norm": 0.17101004719734192, "learning_rate": 0.0001425878976321454, "loss": 0.0223, "step": 11474 }, { "epoch": 1.608269096005606, "grad_norm": 0.2607898712158203, "learning_rate": 0.00014257354699832574, "loss": 0.066, "step": 11475 }, { "epoch": 1.6084092501751928, "grad_norm": 0.16262494027614594, "learning_rate": 0.0001425591963645061, "loss": 0.0066, "step": 11476 }, { "epoch": 1.6085494043447792, "grad_norm": 0.24213643372058868, "learning_rate": 0.00014254484573068642, "loss": 0.0217, "step": 11477 }, { "epoch": 1.608689558514366, "grad_norm": 0.24582268297672272, "learning_rate": 0.00014253049509686678, "loss": 0.0229, "step": 11478 }, { "epoch": 1.6088297126839524, "grad_norm": 0.4782690107822418, "learning_rate": 0.0001425161444630471, "loss": 0.025, "step": 11479 }, { "epoch": 1.6089698668535388, "grad_norm": 0.9680556058883667, "learning_rate": 0.00014250179382922743, "loss": 0.1242, "step": 11480 }, { "epoch": 1.6091100210231253, "grad_norm": 0.5895328521728516, "learning_rate": 0.0001424874431954078, "loss": 0.0631, "step": 11481 }, { "epoch": 1.609250175192712, "grad_norm": 0.6573855876922607, "learning_rate": 0.00014247309256158814, "loss": 0.1201, "step": 11482 }, { "epoch": 1.6093903293622986, "grad_norm": 0.572184681892395, "learning_rate": 0.00014245874192776847, "loss": 0.0553, "step": 11483 }, { "epoch": 1.609530483531885, "grad_norm": 1.0979609489440918, "learning_rate": 0.0001424443912939488, "loss": 0.0784, "step": 11484 }, { "epoch": 1.6096706377014716, "grad_norm": 0.2946031987667084, "learning_rate": 0.00014243004066012913, "loss": 0.0226, "step": 11485 }, { "epoch": 1.609810791871058, "grad_norm": 0.6670780181884766, "learning_rate": 0.00014241569002630949, "loss": 0.0615, "step": 11486 }, { "epoch": 1.6099509460406447, "grad_norm": 0.2716189920902252, "learning_rate": 0.00014240133939248981, "loss": 0.0078, "step": 11487 }, { "epoch": 1.6100911002102314, "grad_norm": 0.39290618896484375, "learning_rate": 0.00014238698875867017, "loss": 0.0371, "step": 11488 }, { "epoch": 1.6102312543798178, "grad_norm": 0.2137584388256073, "learning_rate": 0.0001423726381248505, "loss": 0.0389, "step": 11489 }, { "epoch": 1.6103714085494043, "grad_norm": 0.32838350534439087, "learning_rate": 0.00014235828749103085, "loss": 0.0337, "step": 11490 }, { "epoch": 1.6105115627189908, "grad_norm": 0.154434934258461, "learning_rate": 0.00014234393685721118, "loss": 0.0285, "step": 11491 }, { "epoch": 1.6106517168885774, "grad_norm": 0.2677808701992035, "learning_rate": 0.0001423295862233915, "loss": 0.0232, "step": 11492 }, { "epoch": 1.6107918710581641, "grad_norm": 0.3976641297340393, "learning_rate": 0.00014231523558957184, "loss": 0.0964, "step": 11493 }, { "epoch": 1.6109320252277506, "grad_norm": 0.30432915687561035, "learning_rate": 0.0001423008849557522, "loss": 0.0344, "step": 11494 }, { "epoch": 1.611072179397337, "grad_norm": 0.20437073707580566, "learning_rate": 0.00014228653432193255, "loss": 0.0462, "step": 11495 }, { "epoch": 1.6112123335669235, "grad_norm": 0.36792802810668945, "learning_rate": 0.00014227218368811288, "loss": 0.0842, "step": 11496 }, { "epoch": 1.6113524877365102, "grad_norm": 0.3269313871860504, "learning_rate": 0.0001422578330542932, "loss": 0.0627, "step": 11497 }, { "epoch": 1.6114926419060969, "grad_norm": 0.4229220747947693, "learning_rate": 0.00014224348242047356, "loss": 0.056, "step": 11498 }, { "epoch": 1.6116327960756833, "grad_norm": 0.45769762992858887, "learning_rate": 0.0001422291317866539, "loss": 0.0223, "step": 11499 }, { "epoch": 1.6117729502452698, "grad_norm": 0.4342239797115326, "learning_rate": 0.00014221478115283425, "loss": 0.0297, "step": 11500 }, { "epoch": 1.6119131044148562, "grad_norm": 0.49738943576812744, "learning_rate": 0.00014220043051901458, "loss": 0.0495, "step": 11501 }, { "epoch": 1.612053258584443, "grad_norm": 0.30668190121650696, "learning_rate": 0.0001421860798851949, "loss": 0.0145, "step": 11502 }, { "epoch": 1.6121934127540294, "grad_norm": 0.4818120002746582, "learning_rate": 0.00014217172925137526, "loss": 0.0381, "step": 11503 }, { "epoch": 1.612333566923616, "grad_norm": 0.28638604283332825, "learning_rate": 0.0001421573786175556, "loss": 0.0814, "step": 11504 }, { "epoch": 1.6124737210932025, "grad_norm": 0.12675456702709198, "learning_rate": 0.00014214302798373595, "loss": 0.0203, "step": 11505 }, { "epoch": 1.612613875262789, "grad_norm": 0.11540811508893967, "learning_rate": 0.00014212867734991627, "loss": 0.0265, "step": 11506 }, { "epoch": 1.6127540294323757, "grad_norm": 0.14152872562408447, "learning_rate": 0.0001421143267160966, "loss": 0.0198, "step": 11507 }, { "epoch": 1.6128941836019621, "grad_norm": 0.5622986555099487, "learning_rate": 0.00014209997608227696, "loss": 0.1093, "step": 11508 }, { "epoch": 1.6130343377715488, "grad_norm": 0.1203148365020752, "learning_rate": 0.0001420856254484573, "loss": 0.0391, "step": 11509 }, { "epoch": 1.6131744919411353, "grad_norm": 0.4011740982532501, "learning_rate": 0.00014207127481463764, "loss": 0.031, "step": 11510 }, { "epoch": 1.6133146461107217, "grad_norm": 0.5486384034156799, "learning_rate": 0.00014205692418081797, "loss": 0.025, "step": 11511 }, { "epoch": 1.6134548002803082, "grad_norm": 0.32643720507621765, "learning_rate": 0.0001420425735469983, "loss": 0.0512, "step": 11512 }, { "epoch": 1.6135949544498949, "grad_norm": 0.3651643395423889, "learning_rate": 0.00014202822291317865, "loss": 0.0365, "step": 11513 }, { "epoch": 1.6137351086194816, "grad_norm": 0.06403113156557083, "learning_rate": 0.000142013872279359, "loss": 0.0043, "step": 11514 }, { "epoch": 1.613875262789068, "grad_norm": 0.283630907535553, "learning_rate": 0.00014199952164553934, "loss": 0.0445, "step": 11515 }, { "epoch": 1.6140154169586545, "grad_norm": 0.3547370433807373, "learning_rate": 0.00014198517101171967, "loss": 0.0424, "step": 11516 }, { "epoch": 1.614155571128241, "grad_norm": 0.234574094414711, "learning_rate": 0.00014197082037790002, "loss": 0.0191, "step": 11517 }, { "epoch": 1.6142957252978276, "grad_norm": 0.9759863018989563, "learning_rate": 0.00014195646974408035, "loss": 0.0529, "step": 11518 }, { "epoch": 1.6144358794674143, "grad_norm": 0.3272368907928467, "learning_rate": 0.00014194211911026068, "loss": 0.029, "step": 11519 }, { "epoch": 1.6145760336370008, "grad_norm": 0.3000497817993164, "learning_rate": 0.00014192776847644104, "loss": 0.0532, "step": 11520 }, { "epoch": 1.6147161878065872, "grad_norm": 0.806154727935791, "learning_rate": 0.00014191341784262136, "loss": 0.0596, "step": 11521 }, { "epoch": 1.6148563419761737, "grad_norm": 0.46560943126678467, "learning_rate": 0.00014189906720880172, "loss": 0.0722, "step": 11522 }, { "epoch": 1.6149964961457604, "grad_norm": 0.5738423466682434, "learning_rate": 0.00014188471657498205, "loss": 0.0774, "step": 11523 }, { "epoch": 1.615136650315347, "grad_norm": 0.228576198220253, "learning_rate": 0.00014187036594116238, "loss": 0.047, "step": 11524 }, { "epoch": 1.6152768044849335, "grad_norm": 1.1163264513015747, "learning_rate": 0.00014185601530734273, "loss": 0.0255, "step": 11525 }, { "epoch": 1.61541695865452, "grad_norm": 0.14057192206382751, "learning_rate": 0.00014184166467352306, "loss": 0.0209, "step": 11526 }, { "epoch": 1.6155571128241064, "grad_norm": 0.2490060031414032, "learning_rate": 0.00014182731403970342, "loss": 0.0436, "step": 11527 }, { "epoch": 1.615697266993693, "grad_norm": 0.4295102655887604, "learning_rate": 0.00014181296340588375, "loss": 0.0375, "step": 11528 }, { "epoch": 1.6158374211632796, "grad_norm": 0.22657571732997894, "learning_rate": 0.00014179861277206407, "loss": 0.0441, "step": 11529 }, { "epoch": 1.6159775753328662, "grad_norm": 1.6277847290039062, "learning_rate": 0.00014178426213824443, "loss": 0.0671, "step": 11530 }, { "epoch": 1.6161177295024527, "grad_norm": 0.7943706512451172, "learning_rate": 0.00014176991150442476, "loss": 0.1968, "step": 11531 }, { "epoch": 1.6162578836720392, "grad_norm": 0.3241390883922577, "learning_rate": 0.00014175556087060511, "loss": 0.0566, "step": 11532 }, { "epoch": 1.6163980378416258, "grad_norm": 1.4788482189178467, "learning_rate": 0.00014174121023678544, "loss": 0.1092, "step": 11533 }, { "epoch": 1.6165381920112123, "grad_norm": 0.40441399812698364, "learning_rate": 0.00014172685960296577, "loss": 0.02, "step": 11534 }, { "epoch": 1.616678346180799, "grad_norm": 0.21731466054916382, "learning_rate": 0.00014171250896914613, "loss": 0.0084, "step": 11535 }, { "epoch": 1.6168185003503854, "grad_norm": 0.3413139879703522, "learning_rate": 0.00014169815833532648, "loss": 0.0595, "step": 11536 }, { "epoch": 1.616958654519972, "grad_norm": 0.37197378277778625, "learning_rate": 0.0001416838077015068, "loss": 0.0697, "step": 11537 }, { "epoch": 1.6170988086895584, "grad_norm": 0.1412816196680069, "learning_rate": 0.00014166945706768714, "loss": 0.022, "step": 11538 }, { "epoch": 1.617238962859145, "grad_norm": 0.7602114677429199, "learning_rate": 0.00014165510643386747, "loss": 0.0542, "step": 11539 }, { "epoch": 1.6173791170287317, "grad_norm": 0.7870051860809326, "learning_rate": 0.00014164075580004782, "loss": 0.1007, "step": 11540 }, { "epoch": 1.6175192711983182, "grad_norm": 0.0858910009264946, "learning_rate": 0.00014162640516622818, "loss": 0.0135, "step": 11541 }, { "epoch": 1.6176594253679046, "grad_norm": 0.20803296566009521, "learning_rate": 0.0001416120545324085, "loss": 0.0415, "step": 11542 }, { "epoch": 1.617799579537491, "grad_norm": 0.36449846625328064, "learning_rate": 0.00014159770389858884, "loss": 0.0302, "step": 11543 }, { "epoch": 1.6179397337070778, "grad_norm": 0.8823717832565308, "learning_rate": 0.0001415833532647692, "loss": 0.1131, "step": 11544 }, { "epoch": 1.6180798878766645, "grad_norm": 0.5474041104316711, "learning_rate": 0.00014156900263094952, "loss": 0.0492, "step": 11545 }, { "epoch": 1.618220042046251, "grad_norm": 0.25257402658462524, "learning_rate": 0.00014155465199712988, "loss": 0.0629, "step": 11546 }, { "epoch": 1.6183601962158374, "grad_norm": 0.32781097292900085, "learning_rate": 0.0001415403013633102, "loss": 0.043, "step": 11547 }, { "epoch": 1.6185003503854238, "grad_norm": 0.08208863437175751, "learning_rate": 0.00014152595072949053, "loss": 0.0084, "step": 11548 }, { "epoch": 1.6186405045550105, "grad_norm": 0.1464567482471466, "learning_rate": 0.0001415116000956709, "loss": 0.0362, "step": 11549 }, { "epoch": 1.6187806587245972, "grad_norm": 0.1557217538356781, "learning_rate": 0.00014149724946185122, "loss": 0.0392, "step": 11550 }, { "epoch": 1.6189208128941837, "grad_norm": 0.08954671025276184, "learning_rate": 0.00014148289882803155, "loss": 0.0144, "step": 11551 }, { "epoch": 1.6190609670637701, "grad_norm": 0.31153181195259094, "learning_rate": 0.0001414685481942119, "loss": 0.053, "step": 11552 }, { "epoch": 1.6192011212333566, "grad_norm": 0.26565760374069214, "learning_rate": 0.00014145419756039223, "loss": 0.04, "step": 11553 }, { "epoch": 1.6193412754029433, "grad_norm": 0.2863524258136749, "learning_rate": 0.0001414398469265726, "loss": 0.031, "step": 11554 }, { "epoch": 1.61948142957253, "grad_norm": 0.14655672013759613, "learning_rate": 0.00014142549629275292, "loss": 0.0374, "step": 11555 }, { "epoch": 1.6196215837421164, "grad_norm": 0.7252004742622375, "learning_rate": 0.00014141114565893324, "loss": 0.1329, "step": 11556 }, { "epoch": 1.6197617379117029, "grad_norm": 0.21816428005695343, "learning_rate": 0.0001413967950251136, "loss": 0.0117, "step": 11557 }, { "epoch": 1.6199018920812893, "grad_norm": 0.3231394290924072, "learning_rate": 0.00014138244439129393, "loss": 0.0427, "step": 11558 }, { "epoch": 1.620042046250876, "grad_norm": 0.2110644429922104, "learning_rate": 0.00014136809375747428, "loss": 0.0232, "step": 11559 }, { "epoch": 1.6201822004204625, "grad_norm": 0.3024694621562958, "learning_rate": 0.0001413537431236546, "loss": 0.0393, "step": 11560 }, { "epoch": 1.6203223545900491, "grad_norm": 0.29343223571777344, "learning_rate": 0.00014133939248983494, "loss": 0.071, "step": 11561 }, { "epoch": 1.6204625087596356, "grad_norm": 0.20576244592666626, "learning_rate": 0.0001413250418560153, "loss": 0.067, "step": 11562 }, { "epoch": 1.620602662929222, "grad_norm": 0.2312684804201126, "learning_rate": 0.00014131069122219562, "loss": 0.0889, "step": 11563 }, { "epoch": 1.6207428170988087, "grad_norm": 0.5816258788108826, "learning_rate": 0.00014129634058837598, "loss": 0.0621, "step": 11564 }, { "epoch": 1.6208829712683952, "grad_norm": 0.2639305293560028, "learning_rate": 0.0001412819899545563, "loss": 0.1137, "step": 11565 }, { "epoch": 1.6210231254379819, "grad_norm": 0.32290351390838623, "learning_rate": 0.00014126763932073664, "loss": 0.0773, "step": 11566 }, { "epoch": 1.6211632796075683, "grad_norm": 0.6009404063224792, "learning_rate": 0.000141253288686917, "loss": 0.0695, "step": 11567 }, { "epoch": 1.6213034337771548, "grad_norm": 0.3422173857688904, "learning_rate": 0.00014123893805309735, "loss": 0.0334, "step": 11568 }, { "epoch": 1.6214435879467413, "grad_norm": 0.3206561207771301, "learning_rate": 0.00014122458741927768, "loss": 0.0354, "step": 11569 }, { "epoch": 1.621583742116328, "grad_norm": 0.19543053209781647, "learning_rate": 0.000141210236785458, "loss": 0.0461, "step": 11570 }, { "epoch": 1.6217238962859146, "grad_norm": 0.704021155834198, "learning_rate": 0.00014119588615163836, "loss": 0.0912, "step": 11571 }, { "epoch": 1.621864050455501, "grad_norm": 0.2393180876970291, "learning_rate": 0.0001411815355178187, "loss": 0.0237, "step": 11572 }, { "epoch": 1.6220042046250875, "grad_norm": 0.20762500166893005, "learning_rate": 0.00014116718488399905, "loss": 0.0304, "step": 11573 }, { "epoch": 1.622144358794674, "grad_norm": 0.31861111521720886, "learning_rate": 0.00014115283425017937, "loss": 0.0595, "step": 11574 }, { "epoch": 1.6222845129642607, "grad_norm": 0.5564166903495789, "learning_rate": 0.0001411384836163597, "loss": 0.0657, "step": 11575 }, { "epoch": 1.6224246671338474, "grad_norm": 0.19453097879886627, "learning_rate": 0.00014112413298254006, "loss": 0.0418, "step": 11576 }, { "epoch": 1.6225648213034338, "grad_norm": 0.7942109704017639, "learning_rate": 0.0001411097823487204, "loss": 0.0209, "step": 11577 }, { "epoch": 1.6227049754730203, "grad_norm": 0.1301758885383606, "learning_rate": 0.00014109543171490074, "loss": 0.0116, "step": 11578 }, { "epoch": 1.6228451296426067, "grad_norm": 0.0691453069448471, "learning_rate": 0.00014108108108108107, "loss": 0.0071, "step": 11579 }, { "epoch": 1.6229852838121934, "grad_norm": 0.14758393168449402, "learning_rate": 0.0001410667304472614, "loss": 0.0136, "step": 11580 }, { "epoch": 1.62312543798178, "grad_norm": 0.27677419781684875, "learning_rate": 0.00014105237981344176, "loss": 0.0124, "step": 11581 }, { "epoch": 1.6232655921513666, "grad_norm": 0.3889691233634949, "learning_rate": 0.00014103802917962208, "loss": 0.0691, "step": 11582 }, { "epoch": 1.623405746320953, "grad_norm": 0.7682013511657715, "learning_rate": 0.0001410236785458024, "loss": 0.0568, "step": 11583 }, { "epoch": 1.6235459004905395, "grad_norm": 0.4609513282775879, "learning_rate": 0.00014100932791198277, "loss": 0.0177, "step": 11584 }, { "epoch": 1.6236860546601262, "grad_norm": 1.136196494102478, "learning_rate": 0.0001409949772781631, "loss": 0.1233, "step": 11585 }, { "epoch": 1.6238262088297128, "grad_norm": 0.25201964378356934, "learning_rate": 0.00014098062664434345, "loss": 0.0879, "step": 11586 }, { "epoch": 1.6239663629992993, "grad_norm": 0.5443011522293091, "learning_rate": 0.00014096627601052378, "loss": 0.0692, "step": 11587 }, { "epoch": 1.6241065171688858, "grad_norm": 0.9291744828224182, "learning_rate": 0.0001409519253767041, "loss": 0.0647, "step": 11588 }, { "epoch": 1.6242466713384722, "grad_norm": 0.4512588679790497, "learning_rate": 0.00014093757474288447, "loss": 0.0721, "step": 11589 }, { "epoch": 1.624386825508059, "grad_norm": 0.18535341322422028, "learning_rate": 0.0001409232241090648, "loss": 0.028, "step": 11590 }, { "epoch": 1.6245269796776454, "grad_norm": 0.4352816939353943, "learning_rate": 0.00014090887347524515, "loss": 0.0831, "step": 11591 }, { "epoch": 1.624667133847232, "grad_norm": 0.18647170066833496, "learning_rate": 0.00014089452284142548, "loss": 0.0391, "step": 11592 }, { "epoch": 1.6248072880168185, "grad_norm": 0.22422170639038086, "learning_rate": 0.0001408801722076058, "loss": 0.0118, "step": 11593 }, { "epoch": 1.624947442186405, "grad_norm": 0.2361518293619156, "learning_rate": 0.00014086582157378616, "loss": 0.0513, "step": 11594 }, { "epoch": 1.6250875963559914, "grad_norm": 0.2519594728946686, "learning_rate": 0.00014085147093996652, "loss": 0.0327, "step": 11595 }, { "epoch": 1.625227750525578, "grad_norm": 0.2969636023044586, "learning_rate": 0.00014083712030614685, "loss": 0.0393, "step": 11596 }, { "epoch": 1.6253679046951648, "grad_norm": 0.5165436863899231, "learning_rate": 0.00014082276967232718, "loss": 0.0577, "step": 11597 }, { "epoch": 1.6255080588647512, "grad_norm": 0.47275036573410034, "learning_rate": 0.0001408084190385075, "loss": 0.051, "step": 11598 }, { "epoch": 1.6256482130343377, "grad_norm": 0.5552020072937012, "learning_rate": 0.00014079406840468786, "loss": 0.0511, "step": 11599 }, { "epoch": 1.6257883672039242, "grad_norm": 0.16615641117095947, "learning_rate": 0.00014077971777086822, "loss": 0.0243, "step": 11600 }, { "epoch": 1.6259285213735108, "grad_norm": 0.34352177381515503, "learning_rate": 0.00014076536713704854, "loss": 0.0217, "step": 11601 }, { "epoch": 1.6260686755430975, "grad_norm": 0.3269125521183014, "learning_rate": 0.00014075101650322887, "loss": 0.0482, "step": 11602 }, { "epoch": 1.626208829712684, "grad_norm": 0.5768211483955383, "learning_rate": 0.00014073666586940923, "loss": 0.0418, "step": 11603 }, { "epoch": 1.6263489838822704, "grad_norm": 0.49134424328804016, "learning_rate": 0.00014072231523558956, "loss": 0.0379, "step": 11604 }, { "epoch": 1.626489138051857, "grad_norm": 0.09402913600206375, "learning_rate": 0.0001407079646017699, "loss": 0.0131, "step": 11605 }, { "epoch": 1.6266292922214436, "grad_norm": 0.027240999042987823, "learning_rate": 0.00014069361396795024, "loss": 0.0022, "step": 11606 }, { "epoch": 1.6267694463910303, "grad_norm": 0.2057039737701416, "learning_rate": 0.00014067926333413057, "loss": 0.0263, "step": 11607 }, { "epoch": 1.6269096005606167, "grad_norm": 0.19102413952350616, "learning_rate": 0.00014066491270031093, "loss": 0.0294, "step": 11608 }, { "epoch": 1.6270497547302032, "grad_norm": 0.37462911009788513, "learning_rate": 0.00014065056206649125, "loss": 0.0914, "step": 11609 }, { "epoch": 1.6271899088997896, "grad_norm": 0.5419964790344238, "learning_rate": 0.0001406362114326716, "loss": 0.0386, "step": 11610 }, { "epoch": 1.6273300630693763, "grad_norm": 0.2506133019924164, "learning_rate": 0.00014062186079885194, "loss": 0.0086, "step": 11611 }, { "epoch": 1.627470217238963, "grad_norm": 0.19552595913410187, "learning_rate": 0.00014060751016503227, "loss": 0.0127, "step": 11612 }, { "epoch": 1.6276103714085495, "grad_norm": 0.32846346497535706, "learning_rate": 0.00014059315953121262, "loss": 0.0564, "step": 11613 }, { "epoch": 1.627750525578136, "grad_norm": 0.1583506464958191, "learning_rate": 0.00014057880889739295, "loss": 0.0267, "step": 11614 }, { "epoch": 1.6278906797477224, "grad_norm": 0.34699931740760803, "learning_rate": 0.0001405644582635733, "loss": 0.0401, "step": 11615 }, { "epoch": 1.628030833917309, "grad_norm": 0.23199664056301117, "learning_rate": 0.00014055010762975363, "loss": 0.0319, "step": 11616 }, { "epoch": 1.6281709880868955, "grad_norm": 0.1987570822238922, "learning_rate": 0.00014053575699593396, "loss": 0.0157, "step": 11617 }, { "epoch": 1.6283111422564822, "grad_norm": 0.43920180201530457, "learning_rate": 0.00014052140636211432, "loss": 0.0389, "step": 11618 }, { "epoch": 1.6284512964260687, "grad_norm": 0.2858230471611023, "learning_rate": 0.00014050705572829465, "loss": 0.0895, "step": 11619 }, { "epoch": 1.6285914505956551, "grad_norm": 0.2685343325138092, "learning_rate": 0.00014049270509447498, "loss": 0.0684, "step": 11620 }, { "epoch": 1.6287316047652418, "grad_norm": 0.36698684096336365, "learning_rate": 0.00014047835446065533, "loss": 0.0892, "step": 11621 }, { "epoch": 1.6288717589348283, "grad_norm": 0.18439428508281708, "learning_rate": 0.0001404640038268357, "loss": 0.0316, "step": 11622 }, { "epoch": 1.629011913104415, "grad_norm": 0.6981950402259827, "learning_rate": 0.00014044965319301602, "loss": 0.0594, "step": 11623 }, { "epoch": 1.6291520672740014, "grad_norm": 0.36755767464637756, "learning_rate": 0.00014043530255919634, "loss": 0.0308, "step": 11624 }, { "epoch": 1.6292922214435879, "grad_norm": 0.5525476932525635, "learning_rate": 0.00014042095192537667, "loss": 0.0145, "step": 11625 }, { "epoch": 1.6294323756131743, "grad_norm": 1.8820995092391968, "learning_rate": 0.00014040660129155703, "loss": 0.0557, "step": 11626 }, { "epoch": 1.629572529782761, "grad_norm": 0.4943757951259613, "learning_rate": 0.00014039225065773738, "loss": 0.0555, "step": 11627 }, { "epoch": 1.6297126839523477, "grad_norm": 0.32236361503601074, "learning_rate": 0.0001403779000239177, "loss": 0.0863, "step": 11628 }, { "epoch": 1.6298528381219342, "grad_norm": 0.322846382856369, "learning_rate": 0.00014036354939009804, "loss": 0.013, "step": 11629 }, { "epoch": 1.6299929922915206, "grad_norm": 0.18453797698020935, "learning_rate": 0.0001403491987562784, "loss": 0.0214, "step": 11630 }, { "epoch": 1.630133146461107, "grad_norm": 0.3332267701625824, "learning_rate": 0.00014033484812245873, "loss": 0.031, "step": 11631 }, { "epoch": 1.6302733006306938, "grad_norm": 1.2403481006622314, "learning_rate": 0.00014032049748863908, "loss": 0.0477, "step": 11632 }, { "epoch": 1.6304134548002804, "grad_norm": 0.24606643617153168, "learning_rate": 0.0001403061468548194, "loss": 0.0931, "step": 11633 }, { "epoch": 1.630553608969867, "grad_norm": 1.2664353847503662, "learning_rate": 0.00014029179622099974, "loss": 0.1896, "step": 11634 }, { "epoch": 1.6306937631394534, "grad_norm": 0.3728208541870117, "learning_rate": 0.0001402774455871801, "loss": 0.0263, "step": 11635 }, { "epoch": 1.6308339173090398, "grad_norm": 0.11035755276679993, "learning_rate": 0.00014026309495336042, "loss": 0.0282, "step": 11636 }, { "epoch": 1.6309740714786265, "grad_norm": 0.4083704948425293, "learning_rate": 0.00014024874431954078, "loss": 0.0656, "step": 11637 }, { "epoch": 1.6311142256482132, "grad_norm": 0.4946696162223816, "learning_rate": 0.0001402343936857211, "loss": 0.0442, "step": 11638 }, { "epoch": 1.6312543798177996, "grad_norm": 0.38726767897605896, "learning_rate": 0.00014022004305190144, "loss": 0.0395, "step": 11639 }, { "epoch": 1.631394533987386, "grad_norm": 0.49798622727394104, "learning_rate": 0.0001402056924180818, "loss": 0.0341, "step": 11640 }, { "epoch": 1.6315346881569726, "grad_norm": 0.38486430048942566, "learning_rate": 0.00014019134178426212, "loss": 0.0531, "step": 11641 }, { "epoch": 1.6316748423265592, "grad_norm": 0.18969294428825378, "learning_rate": 0.00014017699115044248, "loss": 0.033, "step": 11642 }, { "epoch": 1.631814996496146, "grad_norm": 0.5594385266304016, "learning_rate": 0.0001401626405166228, "loss": 0.047, "step": 11643 }, { "epoch": 1.6319551506657324, "grad_norm": 0.28359031677246094, "learning_rate": 0.00014014828988280313, "loss": 0.0912, "step": 11644 }, { "epoch": 1.6320953048353188, "grad_norm": 0.21976250410079956, "learning_rate": 0.0001401339392489835, "loss": 0.0231, "step": 11645 }, { "epoch": 1.6322354590049053, "grad_norm": 0.13548928499221802, "learning_rate": 0.00014011958861516382, "loss": 0.0107, "step": 11646 }, { "epoch": 1.632375613174492, "grad_norm": 0.04017278179526329, "learning_rate": 0.00014010523798134417, "loss": 0.0033, "step": 11647 }, { "epoch": 1.6325157673440784, "grad_norm": 1.0490429401397705, "learning_rate": 0.0001400908873475245, "loss": 0.0202, "step": 11648 }, { "epoch": 1.6326559215136651, "grad_norm": 0.20822161436080933, "learning_rate": 0.00014007653671370486, "loss": 0.0426, "step": 11649 }, { "epoch": 1.6327960756832516, "grad_norm": 0.4806745648384094, "learning_rate": 0.00014006218607988519, "loss": 0.0906, "step": 11650 }, { "epoch": 1.632936229852838, "grad_norm": 0.16590049862861633, "learning_rate": 0.00014004783544606551, "loss": 0.0215, "step": 11651 }, { "epoch": 1.6330763840224247, "grad_norm": 0.43973755836486816, "learning_rate": 0.00014003348481224584, "loss": 0.0339, "step": 11652 }, { "epoch": 1.6332165381920112, "grad_norm": 0.23996147513389587, "learning_rate": 0.0001400191341784262, "loss": 0.0323, "step": 11653 }, { "epoch": 1.6333566923615979, "grad_norm": 0.15574686229228973, "learning_rate": 0.00014000478354460655, "loss": 0.0609, "step": 11654 }, { "epoch": 1.6334968465311843, "grad_norm": 0.22125369310379028, "learning_rate": 0.00013999043291078688, "loss": 0.0497, "step": 11655 }, { "epoch": 1.6336370007007708, "grad_norm": 0.2110290676355362, "learning_rate": 0.0001399760822769672, "loss": 0.0192, "step": 11656 }, { "epoch": 1.6337771548703572, "grad_norm": 0.2032098025083542, "learning_rate": 0.00013996173164314757, "loss": 0.0357, "step": 11657 }, { "epoch": 1.633917309039944, "grad_norm": 0.25698280334472656, "learning_rate": 0.0001399473810093279, "loss": 0.078, "step": 11658 }, { "epoch": 1.6340574632095306, "grad_norm": 0.21493308246135712, "learning_rate": 0.00013993303037550825, "loss": 0.0296, "step": 11659 }, { "epoch": 1.634197617379117, "grad_norm": 0.23097802698612213, "learning_rate": 0.00013991867974168858, "loss": 0.0236, "step": 11660 }, { "epoch": 1.6343377715487035, "grad_norm": 0.24762603640556335, "learning_rate": 0.0001399043291078689, "loss": 0.0837, "step": 11661 }, { "epoch": 1.63447792571829, "grad_norm": 0.1455816775560379, "learning_rate": 0.00013988997847404926, "loss": 0.0156, "step": 11662 }, { "epoch": 1.6346180798878767, "grad_norm": 0.28169047832489014, "learning_rate": 0.0001398756278402296, "loss": 0.039, "step": 11663 }, { "epoch": 1.6347582340574633, "grad_norm": 0.2514907121658325, "learning_rate": 0.00013986127720640995, "loss": 0.0432, "step": 11664 }, { "epoch": 1.6348983882270498, "grad_norm": 0.22667475044727325, "learning_rate": 0.00013984692657259028, "loss": 0.0606, "step": 11665 }, { "epoch": 1.6350385423966363, "grad_norm": 0.13793246448040009, "learning_rate": 0.0001398325759387706, "loss": 0.0167, "step": 11666 }, { "epoch": 1.6351786965662227, "grad_norm": 0.45482996106147766, "learning_rate": 0.00013981822530495096, "loss": 0.0429, "step": 11667 }, { "epoch": 1.6353188507358094, "grad_norm": 0.2539718449115753, "learning_rate": 0.0001398038746711313, "loss": 0.044, "step": 11668 }, { "epoch": 1.635459004905396, "grad_norm": 0.2680078148841858, "learning_rate": 0.00013978952403731164, "loss": 0.0223, "step": 11669 }, { "epoch": 1.6355991590749825, "grad_norm": 0.45410051941871643, "learning_rate": 0.00013977517340349197, "loss": 0.1116, "step": 11670 }, { "epoch": 1.635739313244569, "grad_norm": 0.632757842540741, "learning_rate": 0.0001397608227696723, "loss": 0.0835, "step": 11671 }, { "epoch": 1.6358794674141555, "grad_norm": 0.8625863194465637, "learning_rate": 0.00013974647213585266, "loss": 0.1734, "step": 11672 }, { "epoch": 1.6360196215837421, "grad_norm": 0.6289317011833191, "learning_rate": 0.000139732121502033, "loss": 0.0401, "step": 11673 }, { "epoch": 1.6361597757533288, "grad_norm": 0.4812743067741394, "learning_rate": 0.00013971777086821334, "loss": 0.0576, "step": 11674 }, { "epoch": 1.6362999299229153, "grad_norm": 0.1312282383441925, "learning_rate": 0.00013970342023439367, "loss": 0.0148, "step": 11675 }, { "epoch": 1.6364400840925017, "grad_norm": 0.1737457513809204, "learning_rate": 0.000139689069600574, "loss": 0.0222, "step": 11676 }, { "epoch": 1.6365802382620882, "grad_norm": 0.2768307626247406, "learning_rate": 0.00013967471896675435, "loss": 0.0503, "step": 11677 }, { "epoch": 1.6367203924316749, "grad_norm": 0.4666915833950043, "learning_rate": 0.00013966036833293468, "loss": 0.0482, "step": 11678 }, { "epoch": 1.6368605466012613, "grad_norm": 0.07669016718864441, "learning_rate": 0.00013964601769911504, "loss": 0.0087, "step": 11679 }, { "epoch": 1.637000700770848, "grad_norm": 0.7221842408180237, "learning_rate": 0.00013963166706529537, "loss": 0.1014, "step": 11680 }, { "epoch": 1.6371408549404345, "grad_norm": 0.562724769115448, "learning_rate": 0.00013961731643147572, "loss": 0.0569, "step": 11681 }, { "epoch": 1.637281009110021, "grad_norm": 1.501454472541809, "learning_rate": 0.00013960296579765605, "loss": 0.1748, "step": 11682 }, { "epoch": 1.6374211632796074, "grad_norm": 0.23456807434558868, "learning_rate": 0.00013958861516383638, "loss": 0.0124, "step": 11683 }, { "epoch": 1.637561317449194, "grad_norm": 0.37844786047935486, "learning_rate": 0.00013957426453001674, "loss": 0.0428, "step": 11684 }, { "epoch": 1.6377014716187808, "grad_norm": 1.2032986879348755, "learning_rate": 0.00013955991389619706, "loss": 0.2068, "step": 11685 }, { "epoch": 1.6378416257883672, "grad_norm": 0.11660397797822952, "learning_rate": 0.00013954556326237742, "loss": 0.016, "step": 11686 }, { "epoch": 1.6379817799579537, "grad_norm": 0.1083175390958786, "learning_rate": 0.00013953121262855775, "loss": 0.0191, "step": 11687 }, { "epoch": 1.6381219341275401, "grad_norm": 0.14667271077632904, "learning_rate": 0.00013951686199473808, "loss": 0.0284, "step": 11688 }, { "epoch": 1.6382620882971268, "grad_norm": 0.1866927444934845, "learning_rate": 0.00013950251136091843, "loss": 0.0308, "step": 11689 }, { "epoch": 1.6384022424667135, "grad_norm": 0.5327773690223694, "learning_rate": 0.00013948816072709876, "loss": 0.0612, "step": 11690 }, { "epoch": 1.6385423966363, "grad_norm": 0.23347429931163788, "learning_rate": 0.00013947381009327912, "loss": 0.0255, "step": 11691 }, { "epoch": 1.6386825508058864, "grad_norm": 0.22115172445774078, "learning_rate": 0.00013945945945945945, "loss": 0.0359, "step": 11692 }, { "epoch": 1.6388227049754729, "grad_norm": 0.35347816348075867, "learning_rate": 0.00013944510882563977, "loss": 0.0549, "step": 11693 }, { "epoch": 1.6389628591450596, "grad_norm": 0.28366562724113464, "learning_rate": 0.00013943075819182013, "loss": 0.0617, "step": 11694 }, { "epoch": 1.6391030133146463, "grad_norm": 0.26388657093048096, "learning_rate": 0.00013941640755800046, "loss": 0.0434, "step": 11695 }, { "epoch": 1.6392431674842327, "grad_norm": 0.4912094175815582, "learning_rate": 0.00013940205692418081, "loss": 0.0546, "step": 11696 }, { "epoch": 1.6393833216538192, "grad_norm": 0.17095795273780823, "learning_rate": 0.00013938770629036114, "loss": 0.0315, "step": 11697 }, { "epoch": 1.6395234758234056, "grad_norm": 0.11469148099422455, "learning_rate": 0.00013937335565654147, "loss": 0.0103, "step": 11698 }, { "epoch": 1.6396636299929923, "grad_norm": 0.3439951539039612, "learning_rate": 0.00013935900502272183, "loss": 0.1323, "step": 11699 }, { "epoch": 1.639803784162579, "grad_norm": 0.3164527416229248, "learning_rate": 0.00013934465438890218, "loss": 0.0971, "step": 11700 }, { "epoch": 1.6399439383321655, "grad_norm": 0.3256116807460785, "learning_rate": 0.0001393303037550825, "loss": 0.0913, "step": 11701 }, { "epoch": 1.640084092501752, "grad_norm": 0.3805089592933655, "learning_rate": 0.00013931595312126284, "loss": 0.0242, "step": 11702 }, { "epoch": 1.6402242466713384, "grad_norm": 0.2286466658115387, "learning_rate": 0.00013930160248744317, "loss": 0.0555, "step": 11703 }, { "epoch": 1.640364400840925, "grad_norm": 0.7314069867134094, "learning_rate": 0.00013928725185362352, "loss": 0.0478, "step": 11704 }, { "epoch": 1.6405045550105115, "grad_norm": 0.3232010304927826, "learning_rate": 0.00013927290121980388, "loss": 0.052, "step": 11705 }, { "epoch": 1.6406447091800982, "grad_norm": 0.21542169153690338, "learning_rate": 0.0001392585505859842, "loss": 0.0467, "step": 11706 }, { "epoch": 1.6407848633496847, "grad_norm": 0.5921143889427185, "learning_rate": 0.00013924419995216454, "loss": 0.0877, "step": 11707 }, { "epoch": 1.6409250175192711, "grad_norm": 0.15161815285682678, "learning_rate": 0.0001392298493183449, "loss": 0.026, "step": 11708 }, { "epoch": 1.6410651716888578, "grad_norm": 0.12948666512966156, "learning_rate": 0.00013921549868452522, "loss": 0.0145, "step": 11709 }, { "epoch": 1.6412053258584443, "grad_norm": 1.161110520362854, "learning_rate": 0.00013920114805070558, "loss": 0.0703, "step": 11710 }, { "epoch": 1.641345480028031, "grad_norm": 0.5644869208335876, "learning_rate": 0.0001391867974168859, "loss": 0.1123, "step": 11711 }, { "epoch": 1.6414856341976174, "grad_norm": 0.26063624024391174, "learning_rate": 0.00013917244678306623, "loss": 0.0202, "step": 11712 }, { "epoch": 1.6416257883672039, "grad_norm": 0.39383062720298767, "learning_rate": 0.0001391580961492466, "loss": 0.067, "step": 11713 }, { "epoch": 1.6417659425367903, "grad_norm": 0.14393767714500427, "learning_rate": 0.00013914374551542692, "loss": 0.0134, "step": 11714 }, { "epoch": 1.641906096706377, "grad_norm": 0.26287737488746643, "learning_rate": 0.00013912939488160725, "loss": 0.0734, "step": 11715 }, { "epoch": 1.6420462508759637, "grad_norm": 0.12290789932012558, "learning_rate": 0.0001391150442477876, "loss": 0.0478, "step": 11716 }, { "epoch": 1.6421864050455501, "grad_norm": 0.5133820176124573, "learning_rate": 0.00013910069361396793, "loss": 0.0639, "step": 11717 }, { "epoch": 1.6423265592151366, "grad_norm": 0.44815823435783386, "learning_rate": 0.0001390863429801483, "loss": 0.0964, "step": 11718 }, { "epoch": 1.642466713384723, "grad_norm": 0.07294411957263947, "learning_rate": 0.00013907199234632862, "loss": 0.0066, "step": 11719 }, { "epoch": 1.6426068675543097, "grad_norm": 0.333614706993103, "learning_rate": 0.00013905764171250894, "loss": 0.022, "step": 11720 }, { "epoch": 1.6427470217238964, "grad_norm": 0.16638225317001343, "learning_rate": 0.0001390432910786893, "loss": 0.0249, "step": 11721 }, { "epoch": 1.6428871758934829, "grad_norm": 0.4734067916870117, "learning_rate": 0.00013902894044486963, "loss": 0.0359, "step": 11722 }, { "epoch": 1.6430273300630693, "grad_norm": 0.8964606523513794, "learning_rate": 0.00013901458981104998, "loss": 0.0514, "step": 11723 }, { "epoch": 1.6431674842326558, "grad_norm": 0.20593541860580444, "learning_rate": 0.0001390002391772303, "loss": 0.0296, "step": 11724 }, { "epoch": 1.6433076384022425, "grad_norm": 0.28778791427612305, "learning_rate": 0.00013898588854341064, "loss": 0.0959, "step": 11725 }, { "epoch": 1.6434477925718292, "grad_norm": 0.4542524218559265, "learning_rate": 0.000138971537909591, "loss": 0.0763, "step": 11726 }, { "epoch": 1.6435879467414156, "grad_norm": 0.3051491677761078, "learning_rate": 0.00013895718727577135, "loss": 0.1164, "step": 11727 }, { "epoch": 1.643728100911002, "grad_norm": 0.24849773943424225, "learning_rate": 0.00013894283664195168, "loss": 0.0222, "step": 11728 }, { "epoch": 1.6438682550805885, "grad_norm": 0.7094378471374512, "learning_rate": 0.000138928486008132, "loss": 0.026, "step": 11729 }, { "epoch": 1.6440084092501752, "grad_norm": 0.47970616817474365, "learning_rate": 0.00013891413537431234, "loss": 0.1069, "step": 11730 }, { "epoch": 1.644148563419762, "grad_norm": 0.3720422685146332, "learning_rate": 0.0001388997847404927, "loss": 0.014, "step": 11731 }, { "epoch": 1.6442887175893484, "grad_norm": 0.21593424677848816, "learning_rate": 0.00013888543410667305, "loss": 0.0159, "step": 11732 }, { "epoch": 1.6444288717589348, "grad_norm": 0.13539251685142517, "learning_rate": 0.00013887108347285338, "loss": 0.0128, "step": 11733 }, { "epoch": 1.6445690259285213, "grad_norm": 1.8731926679611206, "learning_rate": 0.0001388567328390337, "loss": 0.2625, "step": 11734 }, { "epoch": 1.644709180098108, "grad_norm": 1.3780369758605957, "learning_rate": 0.00013884238220521406, "loss": 0.1894, "step": 11735 }, { "epoch": 1.6448493342676944, "grad_norm": 0.426658034324646, "learning_rate": 0.0001388280315713944, "loss": 0.0731, "step": 11736 }, { "epoch": 1.644989488437281, "grad_norm": 0.30276042222976685, "learning_rate": 0.00013881368093757475, "loss": 0.1117, "step": 11737 }, { "epoch": 1.6451296426068676, "grad_norm": 0.12564964592456818, "learning_rate": 0.00013879933030375507, "loss": 0.0104, "step": 11738 }, { "epoch": 1.645269796776454, "grad_norm": 0.16238072514533997, "learning_rate": 0.0001387849796699354, "loss": 0.0167, "step": 11739 }, { "epoch": 1.6454099509460405, "grad_norm": 0.19371038675308228, "learning_rate": 0.00013877062903611576, "loss": 0.0573, "step": 11740 }, { "epoch": 1.6455501051156272, "grad_norm": 0.36758118867874146, "learning_rate": 0.0001387562784022961, "loss": 0.0345, "step": 11741 }, { "epoch": 1.6456902592852138, "grad_norm": 0.44633328914642334, "learning_rate": 0.00013874192776847644, "loss": 0.0625, "step": 11742 }, { "epoch": 1.6458304134548003, "grad_norm": 0.2560858726501465, "learning_rate": 0.00013872757713465677, "loss": 0.0948, "step": 11743 }, { "epoch": 1.6459705676243868, "grad_norm": 0.17425720393657684, "learning_rate": 0.0001387132265008371, "loss": 0.0375, "step": 11744 }, { "epoch": 1.6461107217939732, "grad_norm": 0.3766055107116699, "learning_rate": 0.00013869887586701746, "loss": 0.0458, "step": 11745 }, { "epoch": 1.64625087596356, "grad_norm": 0.20229826867580414, "learning_rate": 0.00013868452523319778, "loss": 0.0518, "step": 11746 }, { "epoch": 1.6463910301331466, "grad_norm": 0.11625409126281738, "learning_rate": 0.0001386701745993781, "loss": 0.0187, "step": 11747 }, { "epoch": 1.646531184302733, "grad_norm": 0.297679603099823, "learning_rate": 0.00013865582396555847, "loss": 0.0385, "step": 11748 }, { "epoch": 1.6466713384723195, "grad_norm": 1.0538545846939087, "learning_rate": 0.0001386414733317388, "loss": 0.111, "step": 11749 }, { "epoch": 1.646811492641906, "grad_norm": 0.08853999525308609, "learning_rate": 0.00013862712269791915, "loss": 0.01, "step": 11750 }, { "epoch": 1.6469516468114926, "grad_norm": 0.3008106052875519, "learning_rate": 0.00013861277206409948, "loss": 0.0192, "step": 11751 }, { "epoch": 1.6470918009810793, "grad_norm": 0.14477674663066864, "learning_rate": 0.0001385984214302798, "loss": 0.0178, "step": 11752 }, { "epoch": 1.6472319551506658, "grad_norm": 0.13875508308410645, "learning_rate": 0.00013858407079646017, "loss": 0.042, "step": 11753 }, { "epoch": 1.6473721093202522, "grad_norm": 0.2620844542980194, "learning_rate": 0.0001385697201626405, "loss": 0.047, "step": 11754 }, { "epoch": 1.6475122634898387, "grad_norm": 0.282022625207901, "learning_rate": 0.00013855536952882085, "loss": 0.0736, "step": 11755 }, { "epoch": 1.6476524176594254, "grad_norm": 0.33656007051467896, "learning_rate": 0.00013854101889500118, "loss": 0.1235, "step": 11756 }, { "epoch": 1.647792571829012, "grad_norm": 0.4576495289802551, "learning_rate": 0.0001385266682611815, "loss": 0.0617, "step": 11757 }, { "epoch": 1.6479327259985985, "grad_norm": 0.5562580823898315, "learning_rate": 0.00013851231762736186, "loss": 0.0553, "step": 11758 }, { "epoch": 1.648072880168185, "grad_norm": 0.20411553978919983, "learning_rate": 0.00013849796699354222, "loss": 0.0394, "step": 11759 }, { "epoch": 1.6482130343377714, "grad_norm": 0.33069273829460144, "learning_rate": 0.00013848361635972255, "loss": 0.0474, "step": 11760 }, { "epoch": 1.6483531885073581, "grad_norm": 0.2694055736064911, "learning_rate": 0.00013846926572590288, "loss": 0.0675, "step": 11761 }, { "epoch": 1.6484933426769446, "grad_norm": 0.35911455750465393, "learning_rate": 0.00013845491509208323, "loss": 0.0581, "step": 11762 }, { "epoch": 1.6486334968465313, "grad_norm": 0.11525940150022507, "learning_rate": 0.00013844056445826356, "loss": 0.0086, "step": 11763 }, { "epoch": 1.6487736510161177, "grad_norm": 0.422592431306839, "learning_rate": 0.00013842621382444392, "loss": 0.0921, "step": 11764 }, { "epoch": 1.6489138051857042, "grad_norm": 0.11063863337039948, "learning_rate": 0.00013841186319062424, "loss": 0.0159, "step": 11765 }, { "epoch": 1.6490539593552909, "grad_norm": 0.09566666185855865, "learning_rate": 0.00013839751255680457, "loss": 0.0114, "step": 11766 }, { "epoch": 1.6491941135248773, "grad_norm": 0.7951347827911377, "learning_rate": 0.00013838316192298493, "loss": 0.0443, "step": 11767 }, { "epoch": 1.649334267694464, "grad_norm": 0.3839142918586731, "learning_rate": 0.00013836881128916526, "loss": 0.0532, "step": 11768 }, { "epoch": 1.6494744218640505, "grad_norm": 0.2822932004928589, "learning_rate": 0.0001383544606553456, "loss": 0.0421, "step": 11769 }, { "epoch": 1.649614576033637, "grad_norm": 0.4533008635044098, "learning_rate": 0.00013834011002152594, "loss": 0.0844, "step": 11770 }, { "epoch": 1.6497547302032234, "grad_norm": 0.15165278315544128, "learning_rate": 0.00013832575938770627, "loss": 0.015, "step": 11771 }, { "epoch": 1.64989488437281, "grad_norm": 0.9360820651054382, "learning_rate": 0.00013831140875388663, "loss": 0.1712, "step": 11772 }, { "epoch": 1.6500350385423967, "grad_norm": 0.2267729789018631, "learning_rate": 0.00013829705812006695, "loss": 0.0155, "step": 11773 }, { "epoch": 1.6501751927119832, "grad_norm": 0.4546307921409607, "learning_rate": 0.0001382827074862473, "loss": 0.0392, "step": 11774 }, { "epoch": 1.6503153468815697, "grad_norm": 0.9851316213607788, "learning_rate": 0.00013826835685242764, "loss": 0.0519, "step": 11775 }, { "epoch": 1.6504555010511561, "grad_norm": 0.11964554339647293, "learning_rate": 0.00013825400621860797, "loss": 0.0162, "step": 11776 }, { "epoch": 1.6505956552207428, "grad_norm": 0.6148589253425598, "learning_rate": 0.00013823965558478832, "loss": 0.028, "step": 11777 }, { "epoch": 1.6507358093903295, "grad_norm": 0.5039327144622803, "learning_rate": 0.00013822530495096865, "loss": 0.0574, "step": 11778 }, { "epoch": 1.650875963559916, "grad_norm": 0.4020152986049652, "learning_rate": 0.00013821095431714898, "loss": 0.0416, "step": 11779 }, { "epoch": 1.6510161177295024, "grad_norm": 0.405556321144104, "learning_rate": 0.00013819660368332933, "loss": 0.084, "step": 11780 }, { "epoch": 1.6511562718990889, "grad_norm": 0.2820827066898346, "learning_rate": 0.00013818225304950966, "loss": 0.0692, "step": 11781 }, { "epoch": 1.6512964260686755, "grad_norm": 1.0139774084091187, "learning_rate": 0.00013816790241569002, "loss": 0.2055, "step": 11782 }, { "epoch": 1.6514365802382622, "grad_norm": 1.809464454650879, "learning_rate": 0.00013815355178187035, "loss": 0.2039, "step": 11783 }, { "epoch": 1.6515767344078487, "grad_norm": 0.8287193179130554, "learning_rate": 0.00013813920114805068, "loss": 0.0421, "step": 11784 }, { "epoch": 1.6517168885774351, "grad_norm": 0.5953776836395264, "learning_rate": 0.00013812485051423103, "loss": 0.036, "step": 11785 }, { "epoch": 1.6518570427470216, "grad_norm": 0.2768326997756958, "learning_rate": 0.0001381104998804114, "loss": 0.0897, "step": 11786 }, { "epoch": 1.6519971969166083, "grad_norm": 0.14924399554729462, "learning_rate": 0.00013809614924659172, "loss": 0.014, "step": 11787 }, { "epoch": 1.652137351086195, "grad_norm": 0.19737350940704346, "learning_rate": 0.00013808179861277204, "loss": 0.0482, "step": 11788 }, { "epoch": 1.6522775052557814, "grad_norm": 0.184872105717659, "learning_rate": 0.00013806744797895237, "loss": 0.0499, "step": 11789 }, { "epoch": 1.652417659425368, "grad_norm": 0.17982976138591766, "learning_rate": 0.00013805309734513273, "loss": 0.0286, "step": 11790 }, { "epoch": 1.6525578135949544, "grad_norm": 0.3252630829811096, "learning_rate": 0.00013803874671131308, "loss": 0.0691, "step": 11791 }, { "epoch": 1.652697967764541, "grad_norm": 0.18803054094314575, "learning_rate": 0.0001380243960774934, "loss": 0.0475, "step": 11792 }, { "epoch": 1.6528381219341275, "grad_norm": 0.19127315282821655, "learning_rate": 0.00013801004544367374, "loss": 0.0394, "step": 11793 }, { "epoch": 1.6529782761037142, "grad_norm": 0.2496771216392517, "learning_rate": 0.0001379956948098541, "loss": 0.0535, "step": 11794 }, { "epoch": 1.6531184302733006, "grad_norm": 0.2117943912744522, "learning_rate": 0.00013798134417603443, "loss": 0.0747, "step": 11795 }, { "epoch": 1.653258584442887, "grad_norm": 0.2620314061641693, "learning_rate": 0.00013796699354221478, "loss": 0.0688, "step": 11796 }, { "epoch": 1.6533987386124738, "grad_norm": 0.18790596723556519, "learning_rate": 0.0001379526429083951, "loss": 0.0277, "step": 11797 }, { "epoch": 1.6535388927820602, "grad_norm": 0.10476590692996979, "learning_rate": 0.00013793829227457544, "loss": 0.0215, "step": 11798 }, { "epoch": 1.653679046951647, "grad_norm": 0.4203190207481384, "learning_rate": 0.0001379239416407558, "loss": 0.0523, "step": 11799 }, { "epoch": 1.6538192011212334, "grad_norm": 0.320651113986969, "learning_rate": 0.00013790959100693612, "loss": 0.032, "step": 11800 }, { "epoch": 1.6539593552908198, "grad_norm": 0.13263949751853943, "learning_rate": 0.00013789524037311648, "loss": 0.0393, "step": 11801 }, { "epoch": 1.6540995094604063, "grad_norm": 0.1796700805425644, "learning_rate": 0.0001378808897392968, "loss": 0.031, "step": 11802 }, { "epoch": 1.654239663629993, "grad_norm": 0.21623770892620087, "learning_rate": 0.00013786653910547714, "loss": 0.052, "step": 11803 }, { "epoch": 1.6543798177995797, "grad_norm": 0.4184766411781311, "learning_rate": 0.0001378521884716575, "loss": 0.0553, "step": 11804 }, { "epoch": 1.6545199719691661, "grad_norm": 0.29444655776023865, "learning_rate": 0.00013783783783783782, "loss": 0.0381, "step": 11805 }, { "epoch": 1.6546601261387526, "grad_norm": 0.281023234128952, "learning_rate": 0.00013782348720401818, "loss": 0.0473, "step": 11806 }, { "epoch": 1.654800280308339, "grad_norm": 0.22504088282585144, "learning_rate": 0.0001378091365701985, "loss": 0.0356, "step": 11807 }, { "epoch": 1.6549404344779257, "grad_norm": 0.2260611206293106, "learning_rate": 0.00013779478593637883, "loss": 0.0595, "step": 11808 }, { "epoch": 1.6550805886475124, "grad_norm": 0.18638841807842255, "learning_rate": 0.0001377804353025592, "loss": 0.038, "step": 11809 }, { "epoch": 1.6552207428170989, "grad_norm": 0.153292715549469, "learning_rate": 0.00013776608466873952, "loss": 0.0173, "step": 11810 }, { "epoch": 1.6553608969866853, "grad_norm": 0.47503137588500977, "learning_rate": 0.00013775173403491985, "loss": 0.0236, "step": 11811 }, { "epoch": 1.6555010511562718, "grad_norm": 0.7097020745277405, "learning_rate": 0.0001377373834011002, "loss": 0.1166, "step": 11812 }, { "epoch": 1.6556412053258585, "grad_norm": 0.9861078262329102, "learning_rate": 0.00013772303276728056, "loss": 0.1646, "step": 11813 }, { "epoch": 1.6557813594954451, "grad_norm": 0.08831826597452164, "learning_rate": 0.00013770868213346089, "loss": 0.0246, "step": 11814 }, { "epoch": 1.6559215136650316, "grad_norm": 0.3325512707233429, "learning_rate": 0.00013769433149964121, "loss": 0.0505, "step": 11815 }, { "epoch": 1.656061667834618, "grad_norm": 0.2166481912136078, "learning_rate": 0.00013767998086582154, "loss": 0.0195, "step": 11816 }, { "epoch": 1.6562018220042045, "grad_norm": 0.5999817252159119, "learning_rate": 0.0001376656302320019, "loss": 0.0572, "step": 11817 }, { "epoch": 1.6563419761737912, "grad_norm": 0.6211214065551758, "learning_rate": 0.00013765127959818225, "loss": 0.0394, "step": 11818 }, { "epoch": 1.6564821303433779, "grad_norm": 0.6740303039550781, "learning_rate": 0.00013763692896436258, "loss": 0.0365, "step": 11819 }, { "epoch": 1.6566222845129643, "grad_norm": 0.22392001748085022, "learning_rate": 0.0001376225783305429, "loss": 0.0422, "step": 11820 }, { "epoch": 1.6567624386825508, "grad_norm": 0.21644064784049988, "learning_rate": 0.00013760822769672327, "loss": 0.0176, "step": 11821 }, { "epoch": 1.6569025928521373, "grad_norm": 0.3878958523273468, "learning_rate": 0.0001375938770629036, "loss": 0.1003, "step": 11822 }, { "epoch": 1.657042747021724, "grad_norm": 0.0884343609213829, "learning_rate": 0.00013757952642908395, "loss": 0.0085, "step": 11823 }, { "epoch": 1.6571829011913104, "grad_norm": 0.3534504771232605, "learning_rate": 0.00013756517579526428, "loss": 0.0194, "step": 11824 }, { "epoch": 1.657323055360897, "grad_norm": 0.15540985763072968, "learning_rate": 0.0001375508251614446, "loss": 0.0319, "step": 11825 }, { "epoch": 1.6574632095304835, "grad_norm": 0.3599049746990204, "learning_rate": 0.00013753647452762496, "loss": 0.0392, "step": 11826 }, { "epoch": 1.65760336370007, "grad_norm": 0.6477541923522949, "learning_rate": 0.0001375221238938053, "loss": 0.1121, "step": 11827 }, { "epoch": 1.6577435178696565, "grad_norm": 0.4080917239189148, "learning_rate": 0.00013750777325998565, "loss": 0.0615, "step": 11828 }, { "epoch": 1.6578836720392431, "grad_norm": 1.1862410306930542, "learning_rate": 0.00013749342262616598, "loss": 0.179, "step": 11829 }, { "epoch": 1.6580238262088298, "grad_norm": 0.1407335102558136, "learning_rate": 0.0001374790719923463, "loss": 0.0117, "step": 11830 }, { "epoch": 1.6581639803784163, "grad_norm": 0.6689997911453247, "learning_rate": 0.00013746472135852666, "loss": 0.1023, "step": 11831 }, { "epoch": 1.6583041345480027, "grad_norm": 0.37400004267692566, "learning_rate": 0.00013745037072470702, "loss": 0.0475, "step": 11832 }, { "epoch": 1.6584442887175892, "grad_norm": 0.5994638800621033, "learning_rate": 0.00013743602009088734, "loss": 0.2705, "step": 11833 }, { "epoch": 1.6585844428871759, "grad_norm": 3.1800317764282227, "learning_rate": 0.00013742166945706767, "loss": 0.1777, "step": 11834 }, { "epoch": 1.6587245970567626, "grad_norm": 1.0134509801864624, "learning_rate": 0.000137407318823248, "loss": 0.1518, "step": 11835 }, { "epoch": 1.658864751226349, "grad_norm": 0.3975442945957184, "learning_rate": 0.00013739296818942836, "loss": 0.0707, "step": 11836 }, { "epoch": 1.6590049053959355, "grad_norm": 0.286672979593277, "learning_rate": 0.0001373786175556087, "loss": 0.0592, "step": 11837 }, { "epoch": 1.659145059565522, "grad_norm": 0.46812012791633606, "learning_rate": 0.00013736426692178904, "loss": 0.0426, "step": 11838 }, { "epoch": 1.6592852137351086, "grad_norm": 0.42614510655403137, "learning_rate": 0.00013734991628796937, "loss": 0.0641, "step": 11839 }, { "epoch": 1.6594253679046953, "grad_norm": 0.3011021912097931, "learning_rate": 0.00013733556565414973, "loss": 0.0517, "step": 11840 }, { "epoch": 1.6595655220742818, "grad_norm": 0.6219186186790466, "learning_rate": 0.00013732121502033005, "loss": 0.0863, "step": 11841 }, { "epoch": 1.6597056762438682, "grad_norm": 0.3697461187839508, "learning_rate": 0.00013730686438651038, "loss": 0.0448, "step": 11842 }, { "epoch": 1.6598458304134547, "grad_norm": 0.15110893547534943, "learning_rate": 0.0001372925137526907, "loss": 0.025, "step": 11843 }, { "epoch": 1.6599859845830414, "grad_norm": 0.13687993586063385, "learning_rate": 0.00013727816311887107, "loss": 0.0138, "step": 11844 }, { "epoch": 1.660126138752628, "grad_norm": 0.13671039044857025, "learning_rate": 0.00013726381248505142, "loss": 0.0147, "step": 11845 }, { "epoch": 1.6602662929222145, "grad_norm": 0.5719878077507019, "learning_rate": 0.00013724946185123175, "loss": 0.1206, "step": 11846 }, { "epoch": 1.660406447091801, "grad_norm": 0.38528135418891907, "learning_rate": 0.00013723511121741208, "loss": 0.0852, "step": 11847 }, { "epoch": 1.6605466012613874, "grad_norm": 0.5481377243995667, "learning_rate": 0.00013722076058359244, "loss": 0.0681, "step": 11848 }, { "epoch": 1.660686755430974, "grad_norm": 0.3499343693256378, "learning_rate": 0.00013720640994977276, "loss": 0.0914, "step": 11849 }, { "epoch": 1.6608269096005606, "grad_norm": 0.21624954044818878, "learning_rate": 0.00013719205931595312, "loss": 0.0174, "step": 11850 }, { "epoch": 1.6609670637701472, "grad_norm": 0.4076785445213318, "learning_rate": 0.00013717770868213345, "loss": 0.0426, "step": 11851 }, { "epoch": 1.6611072179397337, "grad_norm": 0.5568191409111023, "learning_rate": 0.00013716335804831378, "loss": 0.0278, "step": 11852 }, { "epoch": 1.6612473721093202, "grad_norm": 0.37976574897766113, "learning_rate": 0.00013714900741449413, "loss": 0.0342, "step": 11853 }, { "epoch": 1.6613875262789068, "grad_norm": 0.6761259436607361, "learning_rate": 0.00013713465678067446, "loss": 0.0534, "step": 11854 }, { "epoch": 1.6615276804484933, "grad_norm": 0.2505970597267151, "learning_rate": 0.00013712030614685482, "loss": 0.0396, "step": 11855 }, { "epoch": 1.66166783461808, "grad_norm": 0.26204952597618103, "learning_rate": 0.00013710595551303515, "loss": 0.0281, "step": 11856 }, { "epoch": 1.6618079887876664, "grad_norm": 0.2770780920982361, "learning_rate": 0.00013709160487921547, "loss": 0.0344, "step": 11857 }, { "epoch": 1.661948142957253, "grad_norm": 0.40327659249305725, "learning_rate": 0.00013707725424539583, "loss": 0.029, "step": 11858 }, { "epoch": 1.6620882971268394, "grad_norm": 0.11049145460128784, "learning_rate": 0.00013706290361157616, "loss": 0.0152, "step": 11859 }, { "epoch": 1.662228451296426, "grad_norm": 0.17752285301685333, "learning_rate": 0.00013704855297775651, "loss": 0.0462, "step": 11860 }, { "epoch": 1.6623686054660127, "grad_norm": 0.531157374382019, "learning_rate": 0.00013703420234393684, "loss": 0.0894, "step": 11861 }, { "epoch": 1.6625087596355992, "grad_norm": 0.4597202241420746, "learning_rate": 0.00013701985171011717, "loss": 0.0238, "step": 11862 }, { "epoch": 1.6626489138051856, "grad_norm": 0.38533341884613037, "learning_rate": 0.00013700550107629753, "loss": 0.0382, "step": 11863 }, { "epoch": 1.662789067974772, "grad_norm": 0.2667778730392456, "learning_rate": 0.00013699115044247788, "loss": 0.0123, "step": 11864 }, { "epoch": 1.6629292221443588, "grad_norm": 0.15610401332378387, "learning_rate": 0.0001369767998086582, "loss": 0.0502, "step": 11865 }, { "epoch": 1.6630693763139455, "grad_norm": 0.1264648586511612, "learning_rate": 0.00013696244917483854, "loss": 0.0272, "step": 11866 }, { "epoch": 1.663209530483532, "grad_norm": 0.5182936191558838, "learning_rate": 0.00013694809854101887, "loss": 0.0867, "step": 11867 }, { "epoch": 1.6633496846531184, "grad_norm": 0.2675636112689972, "learning_rate": 0.00013693374790719922, "loss": 0.037, "step": 11868 }, { "epoch": 1.6634898388227048, "grad_norm": 0.2956010699272156, "learning_rate": 0.00013691939727337958, "loss": 0.0814, "step": 11869 }, { "epoch": 1.6636299929922915, "grad_norm": 0.15889495611190796, "learning_rate": 0.0001369050466395599, "loss": 0.0207, "step": 11870 }, { "epoch": 1.6637701471618782, "grad_norm": 0.32948222756385803, "learning_rate": 0.00013689069600574024, "loss": 0.063, "step": 11871 }, { "epoch": 1.6639103013314647, "grad_norm": 0.7254371643066406, "learning_rate": 0.0001368763453719206, "loss": 0.0651, "step": 11872 }, { "epoch": 1.6640504555010511, "grad_norm": 0.49235978722572327, "learning_rate": 0.00013686199473810092, "loss": 0.1158, "step": 11873 }, { "epoch": 1.6641906096706376, "grad_norm": 0.24491435289382935, "learning_rate": 0.00013684764410428125, "loss": 0.0519, "step": 11874 }, { "epoch": 1.6643307638402243, "grad_norm": 0.3322868347167969, "learning_rate": 0.0001368332934704616, "loss": 0.0933, "step": 11875 }, { "epoch": 1.664470918009811, "grad_norm": 0.26144763827323914, "learning_rate": 0.00013681894283664193, "loss": 0.0459, "step": 11876 }, { "epoch": 1.6646110721793974, "grad_norm": 0.18525584042072296, "learning_rate": 0.0001368045922028223, "loss": 0.0299, "step": 11877 }, { "epoch": 1.6647512263489839, "grad_norm": 0.8073279857635498, "learning_rate": 0.00013679024156900262, "loss": 0.0784, "step": 11878 }, { "epoch": 1.6648913805185703, "grad_norm": 0.30658721923828125, "learning_rate": 0.00013677589093518295, "loss": 0.0316, "step": 11879 }, { "epoch": 1.665031534688157, "grad_norm": 0.45898351073265076, "learning_rate": 0.0001367615403013633, "loss": 0.0467, "step": 11880 }, { "epoch": 1.6651716888577435, "grad_norm": 0.45159777998924255, "learning_rate": 0.00013674718966754363, "loss": 0.0329, "step": 11881 }, { "epoch": 1.6653118430273302, "grad_norm": 0.5975531339645386, "learning_rate": 0.00013673283903372399, "loss": 0.1153, "step": 11882 }, { "epoch": 1.6654519971969166, "grad_norm": 0.22012656927108765, "learning_rate": 0.00013671848839990431, "loss": 0.0084, "step": 11883 }, { "epoch": 1.665592151366503, "grad_norm": 0.6469978094100952, "learning_rate": 0.00013670413776608464, "loss": 0.0548, "step": 11884 }, { "epoch": 1.6657323055360898, "grad_norm": 1.1799291372299194, "learning_rate": 0.000136689787132265, "loss": 0.0779, "step": 11885 }, { "epoch": 1.6658724597056762, "grad_norm": 0.2911396324634552, "learning_rate": 0.00013667543649844533, "loss": 0.0301, "step": 11886 }, { "epoch": 1.666012613875263, "grad_norm": 0.932594895362854, "learning_rate": 0.00013666108586462568, "loss": 0.0368, "step": 11887 }, { "epoch": 1.6661527680448494, "grad_norm": 0.28875109553337097, "learning_rate": 0.000136646735230806, "loss": 0.0762, "step": 11888 }, { "epoch": 1.6662929222144358, "grad_norm": 0.2923699915409088, "learning_rate": 0.00013663238459698634, "loss": 0.0347, "step": 11889 }, { "epoch": 1.6664330763840223, "grad_norm": 0.15817172825336456, "learning_rate": 0.0001366180339631667, "loss": 0.0107, "step": 11890 }, { "epoch": 1.666573230553609, "grad_norm": 0.30916720628738403, "learning_rate": 0.00013660368332934705, "loss": 0.038, "step": 11891 }, { "epoch": 1.6667133847231956, "grad_norm": 0.4304237961769104, "learning_rate": 0.00013658933269552738, "loss": 0.0245, "step": 11892 }, { "epoch": 1.666853538892782, "grad_norm": 0.6282362341880798, "learning_rate": 0.0001365749820617077, "loss": 0.0945, "step": 11893 }, { "epoch": 1.6669936930623686, "grad_norm": 0.3297266364097595, "learning_rate": 0.00013656063142788804, "loss": 0.0446, "step": 11894 }, { "epoch": 1.667133847231955, "grad_norm": 0.28222498297691345, "learning_rate": 0.0001365462807940684, "loss": 0.0443, "step": 11895 }, { "epoch": 1.6672740014015417, "grad_norm": 0.26622357964515686, "learning_rate": 0.00013653193016024875, "loss": 0.061, "step": 11896 }, { "epoch": 1.6674141555711284, "grad_norm": 0.9223150610923767, "learning_rate": 0.00013651757952642908, "loss": 0.1193, "step": 11897 }, { "epoch": 1.6675543097407148, "grad_norm": 0.05580474063754082, "learning_rate": 0.0001365032288926094, "loss": 0.0056, "step": 11898 }, { "epoch": 1.6676944639103013, "grad_norm": 0.5558134913444519, "learning_rate": 0.00013648887825878976, "loss": 0.0852, "step": 11899 }, { "epoch": 1.6678346180798878, "grad_norm": 0.1503482460975647, "learning_rate": 0.0001364745276249701, "loss": 0.0373, "step": 11900 }, { "epoch": 1.6679747722494744, "grad_norm": 0.38089436292648315, "learning_rate": 0.00013646017699115045, "loss": 0.0735, "step": 11901 }, { "epoch": 1.6681149264190611, "grad_norm": 0.41297438740730286, "learning_rate": 0.00013644582635733077, "loss": 0.084, "step": 11902 }, { "epoch": 1.6682550805886476, "grad_norm": 0.3086937963962555, "learning_rate": 0.0001364314757235111, "loss": 0.0422, "step": 11903 }, { "epoch": 1.668395234758234, "grad_norm": 0.48585644364356995, "learning_rate": 0.00013641712508969146, "loss": 0.0449, "step": 11904 }, { "epoch": 1.6685353889278205, "grad_norm": 0.24671518802642822, "learning_rate": 0.0001364027744558718, "loss": 0.0385, "step": 11905 }, { "epoch": 1.6686755430974072, "grad_norm": 0.3495645523071289, "learning_rate": 0.00013638842382205212, "loss": 0.0385, "step": 11906 }, { "epoch": 1.6688156972669939, "grad_norm": 0.3558703362941742, "learning_rate": 0.00013637407318823247, "loss": 0.0238, "step": 11907 }, { "epoch": 1.6689558514365803, "grad_norm": 0.07500531524419785, "learning_rate": 0.0001363597225544128, "loss": 0.0084, "step": 11908 }, { "epoch": 1.6690960056061668, "grad_norm": 0.13769300282001495, "learning_rate": 0.00013634537192059316, "loss": 0.0841, "step": 11909 }, { "epoch": 1.6692361597757532, "grad_norm": 0.2148124724626541, "learning_rate": 0.00013633102128677348, "loss": 0.0503, "step": 11910 }, { "epoch": 1.66937631394534, "grad_norm": 0.09973196685314178, "learning_rate": 0.0001363166706529538, "loss": 0.0182, "step": 11911 }, { "epoch": 1.6695164681149264, "grad_norm": 0.31246715784072876, "learning_rate": 0.00013630232001913417, "loss": 0.0401, "step": 11912 }, { "epoch": 1.669656622284513, "grad_norm": 0.36144205927848816, "learning_rate": 0.0001362879693853145, "loss": 0.0355, "step": 11913 }, { "epoch": 1.6697967764540995, "grad_norm": 0.24027907848358154, "learning_rate": 0.00013627361875149485, "loss": 0.0226, "step": 11914 }, { "epoch": 1.669936930623686, "grad_norm": 0.224004328250885, "learning_rate": 0.00013625926811767518, "loss": 0.0171, "step": 11915 }, { "epoch": 1.6700770847932724, "grad_norm": 0.3434467613697052, "learning_rate": 0.0001362449174838555, "loss": 0.0405, "step": 11916 }, { "epoch": 1.6702172389628591, "grad_norm": 0.21136601269245148, "learning_rate": 0.00013623056685003587, "loss": 0.0368, "step": 11917 }, { "epoch": 1.6703573931324458, "grad_norm": 0.24941956996917725, "learning_rate": 0.00013621621621621622, "loss": 0.064, "step": 11918 }, { "epoch": 1.6704975473020323, "grad_norm": 0.4517078995704651, "learning_rate": 0.00013620186558239655, "loss": 0.0393, "step": 11919 }, { "epoch": 1.6706377014716187, "grad_norm": 0.5788573026657104, "learning_rate": 0.00013618751494857688, "loss": 0.0166, "step": 11920 }, { "epoch": 1.6707778556412052, "grad_norm": 0.53787761926651, "learning_rate": 0.0001361731643147572, "loss": 0.0436, "step": 11921 }, { "epoch": 1.6709180098107919, "grad_norm": 0.4707808494567871, "learning_rate": 0.00013615881368093756, "loss": 0.088, "step": 11922 }, { "epoch": 1.6710581639803785, "grad_norm": 0.08876273781061172, "learning_rate": 0.00013614446304711792, "loss": 0.0138, "step": 11923 }, { "epoch": 1.671198318149965, "grad_norm": 0.5747291445732117, "learning_rate": 0.00013613011241329825, "loss": 0.0347, "step": 11924 }, { "epoch": 1.6713384723195515, "grad_norm": 0.433500736951828, "learning_rate": 0.00013611576177947858, "loss": 0.0539, "step": 11925 }, { "epoch": 1.671478626489138, "grad_norm": 0.1903090626001358, "learning_rate": 0.00013610141114565893, "loss": 0.0279, "step": 11926 }, { "epoch": 1.6716187806587246, "grad_norm": 0.19112512469291687, "learning_rate": 0.00013608706051183926, "loss": 0.0166, "step": 11927 }, { "epoch": 1.6717589348283113, "grad_norm": 0.5589933395385742, "learning_rate": 0.00013607270987801962, "loss": 0.0586, "step": 11928 }, { "epoch": 1.6718990889978977, "grad_norm": 0.559594988822937, "learning_rate": 0.00013605835924419994, "loss": 0.0152, "step": 11929 }, { "epoch": 1.6720392431674842, "grad_norm": 0.5386563539505005, "learning_rate": 0.00013604400861038027, "loss": 0.1003, "step": 11930 }, { "epoch": 1.6721793973370707, "grad_norm": 0.9070091843605042, "learning_rate": 0.00013602965797656063, "loss": 0.1477, "step": 11931 }, { "epoch": 1.6723195515066573, "grad_norm": 0.29393908381462097, "learning_rate": 0.00013601530734274096, "loss": 0.0238, "step": 11932 }, { "epoch": 1.672459705676244, "grad_norm": 0.36104804277420044, "learning_rate": 0.0001360009567089213, "loss": 0.0118, "step": 11933 }, { "epoch": 1.6725998598458305, "grad_norm": 0.4152359366416931, "learning_rate": 0.00013598660607510164, "loss": 0.0233, "step": 11934 }, { "epoch": 1.672740014015417, "grad_norm": 2.065208673477173, "learning_rate": 0.00013597225544128197, "loss": 0.1959, "step": 11935 }, { "epoch": 1.6728801681850034, "grad_norm": 0.22712886333465576, "learning_rate": 0.00013595790480746232, "loss": 0.0317, "step": 11936 }, { "epoch": 1.67302032235459, "grad_norm": 0.34828656911849976, "learning_rate": 0.00013594355417364265, "loss": 0.0311, "step": 11937 }, { "epoch": 1.6731604765241765, "grad_norm": 0.3816492259502411, "learning_rate": 0.00013592920353982298, "loss": 0.0815, "step": 11938 }, { "epoch": 1.6733006306937632, "grad_norm": 0.2463635355234146, "learning_rate": 0.00013591485290600334, "loss": 0.0484, "step": 11939 }, { "epoch": 1.6734407848633497, "grad_norm": 0.24040736258029938, "learning_rate": 0.00013590050227218367, "loss": 0.0465, "step": 11940 }, { "epoch": 1.6735809390329361, "grad_norm": 0.3465491235256195, "learning_rate": 0.00013588615163836402, "loss": 0.1006, "step": 11941 }, { "epoch": 1.6737210932025228, "grad_norm": 0.1676328033208847, "learning_rate": 0.00013587180100454435, "loss": 0.0321, "step": 11942 }, { "epoch": 1.6738612473721093, "grad_norm": 0.4115368127822876, "learning_rate": 0.00013585745037072468, "loss": 0.0318, "step": 11943 }, { "epoch": 1.674001401541696, "grad_norm": 0.12745307385921478, "learning_rate": 0.00013584309973690503, "loss": 0.0162, "step": 11944 }, { "epoch": 1.6741415557112824, "grad_norm": 0.35464954376220703, "learning_rate": 0.0001358287491030854, "loss": 0.0727, "step": 11945 }, { "epoch": 1.6742817098808689, "grad_norm": 0.182016059756279, "learning_rate": 0.00013581439846926572, "loss": 0.0164, "step": 11946 }, { "epoch": 1.6744218640504553, "grad_norm": 0.3788032531738281, "learning_rate": 0.00013580004783544605, "loss": 0.0537, "step": 11947 }, { "epoch": 1.674562018220042, "grad_norm": 0.45415472984313965, "learning_rate": 0.00013578569720162638, "loss": 0.0801, "step": 11948 }, { "epoch": 1.6747021723896287, "grad_norm": 0.19531764090061188, "learning_rate": 0.00013577134656780673, "loss": 0.0308, "step": 11949 }, { "epoch": 1.6748423265592152, "grad_norm": 0.220106840133667, "learning_rate": 0.0001357569959339871, "loss": 0.0343, "step": 11950 }, { "epoch": 1.6749824807288016, "grad_norm": 0.17745056748390198, "learning_rate": 0.00013574264530016742, "loss": 0.0344, "step": 11951 }, { "epoch": 1.675122634898388, "grad_norm": 0.3207990229129791, "learning_rate": 0.00013572829466634774, "loss": 0.0435, "step": 11952 }, { "epoch": 1.6752627890679748, "grad_norm": 0.38948920369148254, "learning_rate": 0.0001357139440325281, "loss": 0.0935, "step": 11953 }, { "epoch": 1.6754029432375614, "grad_norm": 0.16411563754081726, "learning_rate": 0.00013569959339870843, "loss": 0.014, "step": 11954 }, { "epoch": 1.675543097407148, "grad_norm": 0.15310940146446228, "learning_rate": 0.00013568524276488878, "loss": 0.0238, "step": 11955 }, { "epoch": 1.6756832515767344, "grad_norm": 0.9212971329689026, "learning_rate": 0.0001356708921310691, "loss": 0.1334, "step": 11956 }, { "epoch": 1.6758234057463208, "grad_norm": 0.13926981389522552, "learning_rate": 0.00013565654149724944, "loss": 0.0231, "step": 11957 }, { "epoch": 1.6759635599159075, "grad_norm": 0.2559824585914612, "learning_rate": 0.0001356421908634298, "loss": 0.0457, "step": 11958 }, { "epoch": 1.6761037140854942, "grad_norm": 0.3078050911426544, "learning_rate": 0.00013562784022961013, "loss": 0.053, "step": 11959 }, { "epoch": 1.6762438682550806, "grad_norm": 0.22574682533740997, "learning_rate": 0.00013561348959579048, "loss": 0.0371, "step": 11960 }, { "epoch": 1.676384022424667, "grad_norm": 0.3852347135543823, "learning_rate": 0.0001355991389619708, "loss": 0.0914, "step": 11961 }, { "epoch": 1.6765241765942536, "grad_norm": 0.21751011908054352, "learning_rate": 0.00013558478832815114, "loss": 0.0635, "step": 11962 }, { "epoch": 1.6766643307638402, "grad_norm": 0.2538410723209381, "learning_rate": 0.0001355704376943315, "loss": 0.0095, "step": 11963 }, { "epoch": 1.676804484933427, "grad_norm": 1.156904935836792, "learning_rate": 0.00013555608706051182, "loss": 0.102, "step": 11964 }, { "epoch": 1.6769446391030134, "grad_norm": 0.21310707926750183, "learning_rate": 0.00013554173642669218, "loss": 0.0232, "step": 11965 }, { "epoch": 1.6770847932725998, "grad_norm": 0.19779279828071594, "learning_rate": 0.0001355273857928725, "loss": 0.0283, "step": 11966 }, { "epoch": 1.6772249474421863, "grad_norm": 0.30429092049598694, "learning_rate": 0.00013551303515905284, "loss": 0.0277, "step": 11967 }, { "epoch": 1.677365101611773, "grad_norm": 0.17025989294052124, "learning_rate": 0.0001354986845252332, "loss": 0.0144, "step": 11968 }, { "epoch": 1.6775052557813595, "grad_norm": 0.3282012641429901, "learning_rate": 0.00013548433389141352, "loss": 0.0386, "step": 11969 }, { "epoch": 1.6776454099509461, "grad_norm": 0.90810227394104, "learning_rate": 0.00013546998325759385, "loss": 0.1024, "step": 11970 }, { "epoch": 1.6777855641205326, "grad_norm": 0.2610308825969696, "learning_rate": 0.0001354556326237742, "loss": 0.0708, "step": 11971 }, { "epoch": 1.677925718290119, "grad_norm": 0.5114941000938416, "learning_rate": 0.00013544128198995453, "loss": 0.0414, "step": 11972 }, { "epoch": 1.6780658724597055, "grad_norm": 0.09413972496986389, "learning_rate": 0.0001354269313561349, "loss": 0.0164, "step": 11973 }, { "epoch": 1.6782060266292922, "grad_norm": 0.19549696147441864, "learning_rate": 0.00013541258072231522, "loss": 0.0225, "step": 11974 }, { "epoch": 1.6783461807988789, "grad_norm": 0.27949437499046326, "learning_rate": 0.00013539823008849555, "loss": 0.0467, "step": 11975 }, { "epoch": 1.6784863349684653, "grad_norm": 0.7189096212387085, "learning_rate": 0.0001353838794546759, "loss": 0.0456, "step": 11976 }, { "epoch": 1.6786264891380518, "grad_norm": 0.3418213129043579, "learning_rate": 0.00013536952882085626, "loss": 0.0724, "step": 11977 }, { "epoch": 1.6787666433076383, "grad_norm": 0.36146309971809387, "learning_rate": 0.00013535517818703659, "loss": 0.0733, "step": 11978 }, { "epoch": 1.678906797477225, "grad_norm": 0.5192902088165283, "learning_rate": 0.00013534082755321691, "loss": 0.0903, "step": 11979 }, { "epoch": 1.6790469516468116, "grad_norm": 0.015255287289619446, "learning_rate": 0.00013532647691939727, "loss": 0.0018, "step": 11980 }, { "epoch": 1.679187105816398, "grad_norm": 0.7141194343566895, "learning_rate": 0.0001353121262855776, "loss": 0.0434, "step": 11981 }, { "epoch": 1.6793272599859845, "grad_norm": 0.345865935087204, "learning_rate": 0.00013529777565175795, "loss": 0.0654, "step": 11982 }, { "epoch": 1.679467414155571, "grad_norm": 0.5288901329040527, "learning_rate": 0.00013528342501793828, "loss": 0.0869, "step": 11983 }, { "epoch": 1.6796075683251577, "grad_norm": 0.980785608291626, "learning_rate": 0.0001352690743841186, "loss": 0.084, "step": 11984 }, { "epoch": 1.6797477224947444, "grad_norm": 0.10756443440914154, "learning_rate": 0.00013525472375029897, "loss": 0.0123, "step": 11985 }, { "epoch": 1.6798878766643308, "grad_norm": 0.08367438614368439, "learning_rate": 0.0001352403731164793, "loss": 0.0065, "step": 11986 }, { "epoch": 1.6800280308339173, "grad_norm": 0.31768864393234253, "learning_rate": 0.00013522602248265965, "loss": 0.0268, "step": 11987 }, { "epoch": 1.6801681850035037, "grad_norm": 0.44531092047691345, "learning_rate": 0.00013521167184883998, "loss": 0.0426, "step": 11988 }, { "epoch": 1.6803083391730904, "grad_norm": 0.12503424286842346, "learning_rate": 0.0001351973212150203, "loss": 0.0128, "step": 11989 }, { "epoch": 1.680448493342677, "grad_norm": 0.5373589992523193, "learning_rate": 0.00013518297058120066, "loss": 0.0679, "step": 11990 }, { "epoch": 1.6805886475122636, "grad_norm": 0.2363787293434143, "learning_rate": 0.000135168619947381, "loss": 0.027, "step": 11991 }, { "epoch": 1.68072880168185, "grad_norm": 0.29088252782821655, "learning_rate": 0.00013515426931356135, "loss": 0.025, "step": 11992 }, { "epoch": 1.6808689558514365, "grad_norm": 0.36560165882110596, "learning_rate": 0.00013513991867974168, "loss": 0.0442, "step": 11993 }, { "epoch": 1.6810091100210232, "grad_norm": 0.1881323754787445, "learning_rate": 0.000135125568045922, "loss": 0.0537, "step": 11994 }, { "epoch": 1.6811492641906096, "grad_norm": 0.2522684335708618, "learning_rate": 0.00013511121741210236, "loss": 0.0568, "step": 11995 }, { "epoch": 1.6812894183601963, "grad_norm": 0.2547159194946289, "learning_rate": 0.00013509686677828272, "loss": 0.0457, "step": 11996 }, { "epoch": 1.6814295725297828, "grad_norm": 0.12966875731945038, "learning_rate": 0.00013508251614446304, "loss": 0.0122, "step": 11997 }, { "epoch": 1.6815697266993692, "grad_norm": 0.22635617852210999, "learning_rate": 0.00013506816551064337, "loss": 0.0732, "step": 11998 }, { "epoch": 1.681709880868956, "grad_norm": 0.3793053925037384, "learning_rate": 0.0001350538148768237, "loss": 0.071, "step": 11999 }, { "epoch": 1.6818500350385424, "grad_norm": 0.17158959805965424, "learning_rate": 0.00013503946424300406, "loss": 0.0558, "step": 12000 }, { "epoch": 1.681990189208129, "grad_norm": 0.4342384934425354, "learning_rate": 0.00013502511360918439, "loss": 0.1214, "step": 12001 }, { "epoch": 1.6821303433777155, "grad_norm": 0.15962554514408112, "learning_rate": 0.00013501076297536471, "loss": 0.0177, "step": 12002 }, { "epoch": 1.682270497547302, "grad_norm": 0.25122955441474915, "learning_rate": 0.00013499641234154507, "loss": 0.0481, "step": 12003 }, { "epoch": 1.6824106517168884, "grad_norm": 0.9374120831489563, "learning_rate": 0.00013498206170772543, "loss": 0.0802, "step": 12004 }, { "epoch": 1.682550805886475, "grad_norm": 0.5195601582527161, "learning_rate": 0.00013496771107390575, "loss": 0.0588, "step": 12005 }, { "epoch": 1.6826909600560618, "grad_norm": 0.232902392745018, "learning_rate": 0.00013495336044008608, "loss": 0.0627, "step": 12006 }, { "epoch": 1.6828311142256482, "grad_norm": 0.3968820571899414, "learning_rate": 0.0001349390098062664, "loss": 0.0711, "step": 12007 }, { "epoch": 1.6829712683952347, "grad_norm": 0.415885865688324, "learning_rate": 0.00013492465917244677, "loss": 0.0662, "step": 12008 }, { "epoch": 1.6831114225648212, "grad_norm": 0.22248360514640808, "learning_rate": 0.00013491030853862712, "loss": 0.0297, "step": 12009 }, { "epoch": 1.6832515767344078, "grad_norm": 0.13106459379196167, "learning_rate": 0.00013489595790480745, "loss": 0.0233, "step": 12010 }, { "epoch": 1.6833917309039945, "grad_norm": 0.4800136685371399, "learning_rate": 0.00013488160727098778, "loss": 0.0604, "step": 12011 }, { "epoch": 1.683531885073581, "grad_norm": 0.5863081812858582, "learning_rate": 0.00013486725663716814, "loss": 0.0523, "step": 12012 }, { "epoch": 1.6836720392431674, "grad_norm": 0.2394528090953827, "learning_rate": 0.00013485290600334846, "loss": 0.1183, "step": 12013 }, { "epoch": 1.683812193412754, "grad_norm": 0.9481812119483948, "learning_rate": 0.00013483855536952882, "loss": 0.0397, "step": 12014 }, { "epoch": 1.6839523475823406, "grad_norm": 0.410428524017334, "learning_rate": 0.00013482420473570915, "loss": 0.0487, "step": 12015 }, { "epoch": 1.6840925017519273, "grad_norm": 0.49864962697029114, "learning_rate": 0.00013480985410188948, "loss": 0.0247, "step": 12016 }, { "epoch": 1.6842326559215137, "grad_norm": 0.1838756948709488, "learning_rate": 0.00013479550346806983, "loss": 0.0294, "step": 12017 }, { "epoch": 1.6843728100911002, "grad_norm": 0.30036959052085876, "learning_rate": 0.00013478115283425016, "loss": 0.0465, "step": 12018 }, { "epoch": 1.6845129642606866, "grad_norm": 0.2414606660604477, "learning_rate": 0.00013476680220043052, "loss": 0.0271, "step": 12019 }, { "epoch": 1.6846531184302733, "grad_norm": 0.08831758052110672, "learning_rate": 0.00013475245156661085, "loss": 0.0126, "step": 12020 }, { "epoch": 1.68479327259986, "grad_norm": 0.6286929845809937, "learning_rate": 0.00013473810093279117, "loss": 0.0612, "step": 12021 }, { "epoch": 1.6849334267694465, "grad_norm": 0.21967561542987823, "learning_rate": 0.00013472375029897153, "loss": 0.0214, "step": 12022 }, { "epoch": 1.685073580939033, "grad_norm": 0.19949324429035187, "learning_rate": 0.00013470939966515189, "loss": 0.0152, "step": 12023 }, { "epoch": 1.6852137351086194, "grad_norm": 0.27890774607658386, "learning_rate": 0.00013469504903133221, "loss": 0.0303, "step": 12024 }, { "epoch": 1.685353889278206, "grad_norm": 0.3173663020133972, "learning_rate": 0.00013468069839751254, "loss": 0.0398, "step": 12025 }, { "epoch": 1.6854940434477925, "grad_norm": 0.23873330652713776, "learning_rate": 0.00013466634776369287, "loss": 0.0552, "step": 12026 }, { "epoch": 1.6856341976173792, "grad_norm": 0.27269285917282104, "learning_rate": 0.00013465199712987323, "loss": 0.1135, "step": 12027 }, { "epoch": 1.6857743517869657, "grad_norm": 0.425577312707901, "learning_rate": 0.00013463764649605358, "loss": 0.0232, "step": 12028 }, { "epoch": 1.6859145059565521, "grad_norm": 0.40970930457115173, "learning_rate": 0.0001346232958622339, "loss": 0.0219, "step": 12029 }, { "epoch": 1.6860546601261388, "grad_norm": 0.5845174789428711, "learning_rate": 0.00013460894522841424, "loss": 0.0905, "step": 12030 }, { "epoch": 1.6861948142957253, "grad_norm": 0.5986014604568481, "learning_rate": 0.0001345945945945946, "loss": 0.0415, "step": 12031 }, { "epoch": 1.686334968465312, "grad_norm": 0.4809368848800659, "learning_rate": 0.00013458024396077492, "loss": 0.0374, "step": 12032 }, { "epoch": 1.6864751226348984, "grad_norm": 0.5704452991485596, "learning_rate": 0.00013456589332695525, "loss": 0.0962, "step": 12033 }, { "epoch": 1.6866152768044849, "grad_norm": 1.1903120279312134, "learning_rate": 0.00013455154269313558, "loss": 0.2128, "step": 12034 }, { "epoch": 1.6867554309740713, "grad_norm": 0.45623716711997986, "learning_rate": 0.00013453719205931594, "loss": 0.0673, "step": 12035 }, { "epoch": 1.686895585143658, "grad_norm": 0.1426125466823578, "learning_rate": 0.0001345228414254963, "loss": 0.0301, "step": 12036 }, { "epoch": 1.6870357393132447, "grad_norm": 0.2537117600440979, "learning_rate": 0.00013450849079167662, "loss": 0.0449, "step": 12037 }, { "epoch": 1.6871758934828311, "grad_norm": 0.365544855594635, "learning_rate": 0.00013449414015785695, "loss": 0.0585, "step": 12038 }, { "epoch": 1.6873160476524176, "grad_norm": 0.13807782530784607, "learning_rate": 0.0001344797895240373, "loss": 0.0296, "step": 12039 }, { "epoch": 1.687456201822004, "grad_norm": 0.9301925897598267, "learning_rate": 0.00013446543889021763, "loss": 0.0942, "step": 12040 }, { "epoch": 1.6875963559915907, "grad_norm": 0.1984456330537796, "learning_rate": 0.000134451088256398, "loss": 0.0462, "step": 12041 }, { "epoch": 1.6877365101611774, "grad_norm": 0.610122799873352, "learning_rate": 0.00013443673762257832, "loss": 0.0649, "step": 12042 }, { "epoch": 1.6878766643307639, "grad_norm": 0.6308504939079285, "learning_rate": 0.00013442238698875865, "loss": 0.0709, "step": 12043 }, { "epoch": 1.6880168185003503, "grad_norm": 0.2242647409439087, "learning_rate": 0.000134408036354939, "loss": 0.0362, "step": 12044 }, { "epoch": 1.6881569726699368, "grad_norm": 0.18173405528068542, "learning_rate": 0.00013439368572111933, "loss": 0.0323, "step": 12045 }, { "epoch": 1.6882971268395235, "grad_norm": 0.33567434549331665, "learning_rate": 0.00013437933508729969, "loss": 0.0517, "step": 12046 }, { "epoch": 1.6884372810091102, "grad_norm": 0.5238831043243408, "learning_rate": 0.00013436498445348001, "loss": 0.034, "step": 12047 }, { "epoch": 1.6885774351786966, "grad_norm": 0.9118971824645996, "learning_rate": 0.00013435063381966034, "loss": 0.0924, "step": 12048 }, { "epoch": 1.688717589348283, "grad_norm": 0.15738297998905182, "learning_rate": 0.0001343362831858407, "loss": 0.0116, "step": 12049 }, { "epoch": 1.6888577435178695, "grad_norm": 0.6067864894866943, "learning_rate": 0.00013432193255202103, "loss": 0.0649, "step": 12050 }, { "epoch": 1.6889978976874562, "grad_norm": 0.5457295179367065, "learning_rate": 0.00013430758191820138, "loss": 0.0477, "step": 12051 }, { "epoch": 1.689138051857043, "grad_norm": 1.9149231910705566, "learning_rate": 0.0001342932312843817, "loss": 0.0733, "step": 12052 }, { "epoch": 1.6892782060266294, "grad_norm": 0.2596793472766876, "learning_rate": 0.00013427888065056204, "loss": 0.0376, "step": 12053 }, { "epoch": 1.6894183601962158, "grad_norm": 0.2406296730041504, "learning_rate": 0.0001342645300167424, "loss": 0.0341, "step": 12054 }, { "epoch": 1.6895585143658023, "grad_norm": 0.14211568236351013, "learning_rate": 0.00013425017938292275, "loss": 0.0225, "step": 12055 }, { "epoch": 1.689698668535389, "grad_norm": 1.1997653245925903, "learning_rate": 0.00013423582874910308, "loss": 0.0957, "step": 12056 }, { "epoch": 1.6898388227049754, "grad_norm": 0.20580078661441803, "learning_rate": 0.0001342214781152834, "loss": 0.0561, "step": 12057 }, { "epoch": 1.689978976874562, "grad_norm": 0.16731882095336914, "learning_rate": 0.00013420712748146376, "loss": 0.0247, "step": 12058 }, { "epoch": 1.6901191310441486, "grad_norm": 0.37701407074928284, "learning_rate": 0.0001341927768476441, "loss": 0.0487, "step": 12059 }, { "epoch": 1.690259285213735, "grad_norm": 0.15563316643238068, "learning_rate": 0.00013417842621382445, "loss": 0.0283, "step": 12060 }, { "epoch": 1.6903994393833215, "grad_norm": 0.3963906168937683, "learning_rate": 0.00013416407558000478, "loss": 0.0656, "step": 12061 }, { "epoch": 1.6905395935529082, "grad_norm": 0.3319525122642517, "learning_rate": 0.0001341497249461851, "loss": 0.0333, "step": 12062 }, { "epoch": 1.6906797477224949, "grad_norm": 0.4549751877784729, "learning_rate": 0.00013413537431236546, "loss": 0.0458, "step": 12063 }, { "epoch": 1.6908199018920813, "grad_norm": 0.16253913938999176, "learning_rate": 0.0001341210236785458, "loss": 0.0335, "step": 12064 }, { "epoch": 1.6909600560616678, "grad_norm": 0.9755932092666626, "learning_rate": 0.00013410667304472612, "loss": 0.0516, "step": 12065 }, { "epoch": 1.6911002102312542, "grad_norm": 0.08407410234212875, "learning_rate": 0.00013409232241090647, "loss": 0.0117, "step": 12066 }, { "epoch": 1.691240364400841, "grad_norm": 0.5107954740524292, "learning_rate": 0.0001340779717770868, "loss": 0.1058, "step": 12067 }, { "epoch": 1.6913805185704276, "grad_norm": 0.12123275548219681, "learning_rate": 0.00013406362114326716, "loss": 0.0214, "step": 12068 }, { "epoch": 1.691520672740014, "grad_norm": 0.2946118712425232, "learning_rate": 0.0001340492705094475, "loss": 0.0335, "step": 12069 }, { "epoch": 1.6916608269096005, "grad_norm": 0.49260231852531433, "learning_rate": 0.00013403491987562782, "loss": 0.0509, "step": 12070 }, { "epoch": 1.691800981079187, "grad_norm": 0.524235725402832, "learning_rate": 0.00013402056924180817, "loss": 0.1372, "step": 12071 }, { "epoch": 1.6919411352487737, "grad_norm": 0.3608382046222687, "learning_rate": 0.0001340062186079885, "loss": 0.0568, "step": 12072 }, { "epoch": 1.6920812894183603, "grad_norm": 0.5260193943977356, "learning_rate": 0.00013399186797416886, "loss": 0.061, "step": 12073 }, { "epoch": 1.6922214435879468, "grad_norm": 0.4607023596763611, "learning_rate": 0.00013397751734034918, "loss": 0.0478, "step": 12074 }, { "epoch": 1.6923615977575333, "grad_norm": 0.19261333346366882, "learning_rate": 0.0001339631667065295, "loss": 0.0246, "step": 12075 }, { "epoch": 1.6925017519271197, "grad_norm": 0.6916095614433289, "learning_rate": 0.00013394881607270987, "loss": 0.0836, "step": 12076 }, { "epoch": 1.6926419060967064, "grad_norm": 0.379151850938797, "learning_rate": 0.0001339344654388902, "loss": 0.0368, "step": 12077 }, { "epoch": 1.692782060266293, "grad_norm": 0.40913838148117065, "learning_rate": 0.00013392011480507055, "loss": 0.0511, "step": 12078 }, { "epoch": 1.6929222144358795, "grad_norm": 0.45786041021347046, "learning_rate": 0.00013390576417125088, "loss": 0.0673, "step": 12079 }, { "epoch": 1.693062368605466, "grad_norm": 1.3063641786575317, "learning_rate": 0.0001338914135374312, "loss": 0.0423, "step": 12080 }, { "epoch": 1.6932025227750525, "grad_norm": 0.31361743807792664, "learning_rate": 0.00013387706290361157, "loss": 0.0708, "step": 12081 }, { "epoch": 1.6933426769446391, "grad_norm": 0.5006605386734009, "learning_rate": 0.00013386271226979192, "loss": 0.044, "step": 12082 }, { "epoch": 1.6934828311142256, "grad_norm": 0.31330978870391846, "learning_rate": 0.00013384836163597225, "loss": 0.0758, "step": 12083 }, { "epoch": 1.6936229852838123, "grad_norm": 0.5071033835411072, "learning_rate": 0.00013383401100215258, "loss": 0.0746, "step": 12084 }, { "epoch": 1.6937631394533987, "grad_norm": 1.7994585037231445, "learning_rate": 0.0001338196603683329, "loss": 0.1479, "step": 12085 }, { "epoch": 1.6939032936229852, "grad_norm": 0.04736752808094025, "learning_rate": 0.00013380530973451326, "loss": 0.0065, "step": 12086 }, { "epoch": 1.6940434477925719, "grad_norm": 0.14732757210731506, "learning_rate": 0.00013379095910069362, "loss": 0.0176, "step": 12087 }, { "epoch": 1.6941836019621583, "grad_norm": 0.11965247243642807, "learning_rate": 0.00013377660846687395, "loss": 0.0264, "step": 12088 }, { "epoch": 1.694323756131745, "grad_norm": 0.3270857632160187, "learning_rate": 0.00013376225783305428, "loss": 0.0382, "step": 12089 }, { "epoch": 1.6944639103013315, "grad_norm": 0.2812128961086273, "learning_rate": 0.00013374790719923463, "loss": 0.0311, "step": 12090 }, { "epoch": 1.694604064470918, "grad_norm": 0.24704235792160034, "learning_rate": 0.00013373355656541496, "loss": 0.0595, "step": 12091 }, { "epoch": 1.6947442186405044, "grad_norm": 0.4344119727611542, "learning_rate": 0.00013371920593159532, "loss": 0.0698, "step": 12092 }, { "epoch": 1.694884372810091, "grad_norm": 0.11858907341957092, "learning_rate": 0.00013370485529777564, "loss": 0.0173, "step": 12093 }, { "epoch": 1.6950245269796778, "grad_norm": 0.14710776507854462, "learning_rate": 0.00013369050466395597, "loss": 0.0139, "step": 12094 }, { "epoch": 1.6951646811492642, "grad_norm": 0.16257426142692566, "learning_rate": 0.00013367615403013633, "loss": 0.015, "step": 12095 }, { "epoch": 1.6953048353188507, "grad_norm": 0.3220033645629883, "learning_rate": 0.00013366180339631666, "loss": 0.0327, "step": 12096 }, { "epoch": 1.6954449894884371, "grad_norm": 0.3824523091316223, "learning_rate": 0.00013364745276249698, "loss": 0.0514, "step": 12097 }, { "epoch": 1.6955851436580238, "grad_norm": 0.5479860305786133, "learning_rate": 0.00013363310212867734, "loss": 0.0653, "step": 12098 }, { "epoch": 1.6957252978276105, "grad_norm": 0.22309648990631104, "learning_rate": 0.00013361875149485767, "loss": 0.0644, "step": 12099 }, { "epoch": 1.695865451997197, "grad_norm": 0.314375638961792, "learning_rate": 0.00013360440086103802, "loss": 0.0634, "step": 12100 }, { "epoch": 1.6960056061667834, "grad_norm": 0.14579297602176666, "learning_rate": 0.00013359005022721835, "loss": 0.0251, "step": 12101 }, { "epoch": 1.6961457603363699, "grad_norm": 0.22480905055999756, "learning_rate": 0.00013357569959339868, "loss": 0.0156, "step": 12102 }, { "epoch": 1.6962859145059566, "grad_norm": 0.21081236004829407, "learning_rate": 0.00013356134895957904, "loss": 0.0454, "step": 12103 }, { "epoch": 1.6964260686755432, "grad_norm": 0.34576883912086487, "learning_rate": 0.00013354699832575937, "loss": 0.1142, "step": 12104 }, { "epoch": 1.6965662228451297, "grad_norm": 0.12170752882957458, "learning_rate": 0.00013353264769193972, "loss": 0.0082, "step": 12105 }, { "epoch": 1.6967063770147162, "grad_norm": 0.5246629118919373, "learning_rate": 0.00013351829705812005, "loss": 0.0463, "step": 12106 }, { "epoch": 1.6968465311843026, "grad_norm": 1.375457525253296, "learning_rate": 0.00013350394642430038, "loss": 0.0543, "step": 12107 }, { "epoch": 1.6969866853538893, "grad_norm": 0.6639530658721924, "learning_rate": 0.00013348959579048073, "loss": 0.0436, "step": 12108 }, { "epoch": 1.697126839523476, "grad_norm": 0.8654889464378357, "learning_rate": 0.0001334752451566611, "loss": 0.0966, "step": 12109 }, { "epoch": 1.6972669936930624, "grad_norm": 0.3038753271102905, "learning_rate": 0.00013346089452284142, "loss": 0.0818, "step": 12110 }, { "epoch": 1.697407147862649, "grad_norm": 0.8238230347633362, "learning_rate": 0.00013344654388902175, "loss": 0.1143, "step": 12111 }, { "epoch": 1.6975473020322354, "grad_norm": 0.30154138803482056, "learning_rate": 0.00013343219325520208, "loss": 0.0342, "step": 12112 }, { "epoch": 1.697687456201822, "grad_norm": 0.18004603683948517, "learning_rate": 0.00013341784262138243, "loss": 0.0174, "step": 12113 }, { "epoch": 1.6978276103714085, "grad_norm": 0.2873094975948334, "learning_rate": 0.0001334034919875628, "loss": 0.1025, "step": 12114 }, { "epoch": 1.6979677645409952, "grad_norm": 0.17505615949630737, "learning_rate": 0.00013338914135374312, "loss": 0.0705, "step": 12115 }, { "epoch": 1.6981079187105816, "grad_norm": 0.37265855073928833, "learning_rate": 0.00013337479071992344, "loss": 0.0374, "step": 12116 }, { "epoch": 1.698248072880168, "grad_norm": 0.2789835035800934, "learning_rate": 0.0001333604400861038, "loss": 0.0722, "step": 12117 }, { "epoch": 1.6983882270497548, "grad_norm": 0.4448922276496887, "learning_rate": 0.00013334608945228413, "loss": 0.0728, "step": 12118 }, { "epoch": 1.6985283812193412, "grad_norm": 0.4655461609363556, "learning_rate": 0.00013333173881846448, "loss": 0.0247, "step": 12119 }, { "epoch": 1.698668535388928, "grad_norm": 0.20748305320739746, "learning_rate": 0.0001333173881846448, "loss": 0.0115, "step": 12120 }, { "epoch": 1.6988086895585144, "grad_norm": 0.32459160685539246, "learning_rate": 0.00013330303755082514, "loss": 0.0642, "step": 12121 }, { "epoch": 1.6989488437281008, "grad_norm": 0.46259671449661255, "learning_rate": 0.0001332886869170055, "loss": 0.0867, "step": 12122 }, { "epoch": 1.6990889978976873, "grad_norm": 0.2355385571718216, "learning_rate": 0.00013327433628318583, "loss": 0.0339, "step": 12123 }, { "epoch": 1.699229152067274, "grad_norm": 0.31749945878982544, "learning_rate": 0.00013325998564936618, "loss": 0.076, "step": 12124 }, { "epoch": 1.6993693062368607, "grad_norm": 0.23552361130714417, "learning_rate": 0.0001332456350155465, "loss": 0.0266, "step": 12125 }, { "epoch": 1.6995094604064471, "grad_norm": 0.19038169085979462, "learning_rate": 0.00013323128438172684, "loss": 0.0676, "step": 12126 }, { "epoch": 1.6996496145760336, "grad_norm": 0.4941211938858032, "learning_rate": 0.0001332169337479072, "loss": 0.0839, "step": 12127 }, { "epoch": 1.69978976874562, "grad_norm": 0.32394328713417053, "learning_rate": 0.00013320258311408752, "loss": 0.0378, "step": 12128 }, { "epoch": 1.6999299229152067, "grad_norm": 0.11059322953224182, "learning_rate": 0.00013318823248026785, "loss": 0.0058, "step": 12129 }, { "epoch": 1.7000700770847934, "grad_norm": 0.8874800801277161, "learning_rate": 0.0001331738818464482, "loss": 0.0688, "step": 12130 }, { "epoch": 1.7002102312543799, "grad_norm": 0.2018873542547226, "learning_rate": 0.00013315953121262854, "loss": 0.0138, "step": 12131 }, { "epoch": 1.7003503854239663, "grad_norm": 0.3651524782180786, "learning_rate": 0.0001331451805788089, "loss": 0.0537, "step": 12132 }, { "epoch": 1.7004905395935528, "grad_norm": 0.4067952036857605, "learning_rate": 0.00013313082994498922, "loss": 0.0213, "step": 12133 }, { "epoch": 1.7006306937631395, "grad_norm": 0.7026558518409729, "learning_rate": 0.00013311647931116955, "loss": 0.0631, "step": 12134 }, { "epoch": 1.7007708479327261, "grad_norm": 1.1000531911849976, "learning_rate": 0.0001331021286773499, "loss": 0.2094, "step": 12135 }, { "epoch": 1.7009110021023126, "grad_norm": 0.17267359793186188, "learning_rate": 0.00013308777804353026, "loss": 0.0398, "step": 12136 }, { "epoch": 1.701051156271899, "grad_norm": 0.18348808586597443, "learning_rate": 0.0001330734274097106, "loss": 0.0305, "step": 12137 }, { "epoch": 1.7011913104414855, "grad_norm": 0.10993392020463943, "learning_rate": 0.00013305907677589092, "loss": 0.0143, "step": 12138 }, { "epoch": 1.7013314646110722, "grad_norm": 0.2789594233036041, "learning_rate": 0.00013304472614207125, "loss": 0.0598, "step": 12139 }, { "epoch": 1.701471618780659, "grad_norm": 0.2597116529941559, "learning_rate": 0.0001330303755082516, "loss": 0.039, "step": 12140 }, { "epoch": 1.7016117729502453, "grad_norm": 0.18684706091880798, "learning_rate": 0.00013301602487443196, "loss": 0.0336, "step": 12141 }, { "epoch": 1.7017519271198318, "grad_norm": 0.3200320899486542, "learning_rate": 0.00013300167424061229, "loss": 0.0778, "step": 12142 }, { "epoch": 1.7018920812894183, "grad_norm": 0.14672788977622986, "learning_rate": 0.00013298732360679261, "loss": 0.027, "step": 12143 }, { "epoch": 1.702032235459005, "grad_norm": 0.5173974633216858, "learning_rate": 0.00013297297297297297, "loss": 0.1067, "step": 12144 }, { "epoch": 1.7021723896285914, "grad_norm": 0.4318787157535553, "learning_rate": 0.0001329586223391533, "loss": 0.0261, "step": 12145 }, { "epoch": 1.702312543798178, "grad_norm": 0.28429049253463745, "learning_rate": 0.00013294427170533365, "loss": 0.0607, "step": 12146 }, { "epoch": 1.7024526979677645, "grad_norm": 0.2931499183177948, "learning_rate": 0.00013292992107151398, "loss": 0.0574, "step": 12147 }, { "epoch": 1.702592852137351, "grad_norm": 0.20604759454727173, "learning_rate": 0.0001329155704376943, "loss": 0.0437, "step": 12148 }, { "epoch": 1.7027330063069375, "grad_norm": 0.372398316860199, "learning_rate": 0.00013290121980387467, "loss": 0.0277, "step": 12149 }, { "epoch": 1.7028731604765242, "grad_norm": 0.0969495102763176, "learning_rate": 0.000132886869170055, "loss": 0.008, "step": 12150 }, { "epoch": 1.7030133146461108, "grad_norm": 0.08814334124326706, "learning_rate": 0.00013287251853623535, "loss": 0.0124, "step": 12151 }, { "epoch": 1.7031534688156973, "grad_norm": 0.25535672903060913, "learning_rate": 0.00013285816790241568, "loss": 0.0823, "step": 12152 }, { "epoch": 1.7032936229852838, "grad_norm": 0.17418822646141052, "learning_rate": 0.000132843817268596, "loss": 0.0089, "step": 12153 }, { "epoch": 1.7034337771548702, "grad_norm": 0.1341678351163864, "learning_rate": 0.00013282946663477636, "loss": 0.054, "step": 12154 }, { "epoch": 1.703573931324457, "grad_norm": 0.24280256032943726, "learning_rate": 0.0001328151160009567, "loss": 0.0277, "step": 12155 }, { "epoch": 1.7037140854940436, "grad_norm": 0.5688652992248535, "learning_rate": 0.00013280076536713705, "loss": 0.0478, "step": 12156 }, { "epoch": 1.70385423966363, "grad_norm": 0.20240440964698792, "learning_rate": 0.00013278641473331738, "loss": 0.0232, "step": 12157 }, { "epoch": 1.7039943938332165, "grad_norm": 0.5226565599441528, "learning_rate": 0.0001327720640994977, "loss": 0.077, "step": 12158 }, { "epoch": 1.704134548002803, "grad_norm": 0.27666181325912476, "learning_rate": 0.00013275771346567806, "loss": 0.0347, "step": 12159 }, { "epoch": 1.7042747021723896, "grad_norm": 0.17397163808345795, "learning_rate": 0.0001327433628318584, "loss": 0.0748, "step": 12160 }, { "epoch": 1.7044148563419763, "grad_norm": 0.2695830464363098, "learning_rate": 0.00013272901219803874, "loss": 0.0285, "step": 12161 }, { "epoch": 1.7045550105115628, "grad_norm": 0.22675687074661255, "learning_rate": 0.00013271466156421907, "loss": 0.0253, "step": 12162 }, { "epoch": 1.7046951646811492, "grad_norm": 0.1300095021724701, "learning_rate": 0.0001327003109303994, "loss": 0.0138, "step": 12163 }, { "epoch": 1.7048353188507357, "grad_norm": 0.12879200279712677, "learning_rate": 0.00013268596029657976, "loss": 0.0354, "step": 12164 }, { "epoch": 1.7049754730203224, "grad_norm": 0.23569846153259277, "learning_rate": 0.00013267160966276009, "loss": 0.031, "step": 12165 }, { "epoch": 1.705115627189909, "grad_norm": 0.07775438576936722, "learning_rate": 0.00013265725902894041, "loss": 0.0122, "step": 12166 }, { "epoch": 1.7052557813594955, "grad_norm": 0.27824512124061584, "learning_rate": 0.00013264290839512077, "loss": 0.0183, "step": 12167 }, { "epoch": 1.705395935529082, "grad_norm": 0.20355655252933502, "learning_rate": 0.00013262855776130113, "loss": 0.0202, "step": 12168 }, { "epoch": 1.7055360896986684, "grad_norm": 0.12762348353862762, "learning_rate": 0.00013261420712748145, "loss": 0.0131, "step": 12169 }, { "epoch": 1.7056762438682551, "grad_norm": 0.33934980630874634, "learning_rate": 0.00013259985649366178, "loss": 0.0518, "step": 12170 }, { "epoch": 1.7058163980378416, "grad_norm": 0.27709197998046875, "learning_rate": 0.00013258550585984214, "loss": 0.047, "step": 12171 }, { "epoch": 1.7059565522074283, "grad_norm": 0.39742711186408997, "learning_rate": 0.00013257115522602247, "loss": 0.0687, "step": 12172 }, { "epoch": 1.7060967063770147, "grad_norm": 0.649774432182312, "learning_rate": 0.00013255680459220282, "loss": 0.0601, "step": 12173 }, { "epoch": 1.7062368605466012, "grad_norm": 0.5390459895133972, "learning_rate": 0.00013254245395838315, "loss": 0.0225, "step": 12174 }, { "epoch": 1.7063770147161879, "grad_norm": 0.5721665024757385, "learning_rate": 0.00013252810332456348, "loss": 0.1058, "step": 12175 }, { "epoch": 1.7065171688857743, "grad_norm": 1.3042230606079102, "learning_rate": 0.00013251375269074384, "loss": 0.0652, "step": 12176 }, { "epoch": 1.706657323055361, "grad_norm": 0.8957727551460266, "learning_rate": 0.00013249940205692416, "loss": 0.0337, "step": 12177 }, { "epoch": 1.7067974772249475, "grad_norm": 0.06076275557279587, "learning_rate": 0.00013248505142310452, "loss": 0.0046, "step": 12178 }, { "epoch": 1.706937631394534, "grad_norm": 0.5878314971923828, "learning_rate": 0.00013247070078928485, "loss": 0.0282, "step": 12179 }, { "epoch": 1.7070777855641204, "grad_norm": 0.22794058918952942, "learning_rate": 0.00013245635015546518, "loss": 0.0242, "step": 12180 }, { "epoch": 1.707217939733707, "grad_norm": 0.3879918158054352, "learning_rate": 0.00013244199952164553, "loss": 0.0516, "step": 12181 }, { "epoch": 1.7073580939032937, "grad_norm": 0.2316022366285324, "learning_rate": 0.00013242764888782586, "loss": 0.0895, "step": 12182 }, { "epoch": 1.7074982480728802, "grad_norm": 0.9732415676116943, "learning_rate": 0.00013241329825400622, "loss": 0.0567, "step": 12183 }, { "epoch": 1.7076384022424667, "grad_norm": 0.1015130802989006, "learning_rate": 0.00013239894762018655, "loss": 0.005, "step": 12184 }, { "epoch": 1.7077785564120531, "grad_norm": 1.2262755632400513, "learning_rate": 0.00013238459698636687, "loss": 0.0316, "step": 12185 }, { "epoch": 1.7079187105816398, "grad_norm": 0.2844727635383606, "learning_rate": 0.00013237024635254723, "loss": 0.0766, "step": 12186 }, { "epoch": 1.7080588647512265, "grad_norm": 0.9894722104072571, "learning_rate": 0.00013235589571872759, "loss": 0.1161, "step": 12187 }, { "epoch": 1.708199018920813, "grad_norm": 0.35616248846054077, "learning_rate": 0.00013234154508490791, "loss": 0.096, "step": 12188 }, { "epoch": 1.7083391730903994, "grad_norm": 0.2448006123304367, "learning_rate": 0.00013232719445108824, "loss": 0.0161, "step": 12189 }, { "epoch": 1.7084793272599859, "grad_norm": 0.41324755549430847, "learning_rate": 0.00013231284381726857, "loss": 0.09, "step": 12190 }, { "epoch": 1.7086194814295725, "grad_norm": 0.4179147481918335, "learning_rate": 0.00013229849318344893, "loss": 0.0459, "step": 12191 }, { "epoch": 1.7087596355991592, "grad_norm": 0.14356225728988647, "learning_rate": 0.00013228414254962926, "loss": 0.0494, "step": 12192 }, { "epoch": 1.7088997897687457, "grad_norm": 0.05739745497703552, "learning_rate": 0.0001322697919158096, "loss": 0.0096, "step": 12193 }, { "epoch": 1.7090399439383321, "grad_norm": 0.1322007030248642, "learning_rate": 0.00013225544128198994, "loss": 0.0158, "step": 12194 }, { "epoch": 1.7091800981079186, "grad_norm": 0.25732842087745667, "learning_rate": 0.0001322410906481703, "loss": 0.0288, "step": 12195 }, { "epoch": 1.7093202522775053, "grad_norm": 0.22450783848762512, "learning_rate": 0.00013222674001435062, "loss": 0.0429, "step": 12196 }, { "epoch": 1.709460406447092, "grad_norm": 0.5321676731109619, "learning_rate": 0.00013221238938053095, "loss": 0.0947, "step": 12197 }, { "epoch": 1.7096005606166784, "grad_norm": 0.5037558674812317, "learning_rate": 0.00013219803874671128, "loss": 0.0685, "step": 12198 }, { "epoch": 1.7097407147862649, "grad_norm": 0.4735731780529022, "learning_rate": 0.00013218368811289164, "loss": 0.0445, "step": 12199 }, { "epoch": 1.7098808689558513, "grad_norm": 0.2632163465023041, "learning_rate": 0.000132169337479072, "loss": 0.0321, "step": 12200 }, { "epoch": 1.710021023125438, "grad_norm": 0.5000625848770142, "learning_rate": 0.00013215498684525232, "loss": 0.029, "step": 12201 }, { "epoch": 1.7101611772950245, "grad_norm": 0.22909273207187653, "learning_rate": 0.00013214063621143265, "loss": 0.0413, "step": 12202 }, { "epoch": 1.7103013314646112, "grad_norm": 0.2071690708398819, "learning_rate": 0.000132126285577613, "loss": 0.0193, "step": 12203 }, { "epoch": 1.7104414856341976, "grad_norm": 0.262101948261261, "learning_rate": 0.00013211193494379333, "loss": 0.0414, "step": 12204 }, { "epoch": 1.710581639803784, "grad_norm": 0.09843871742486954, "learning_rate": 0.0001320975843099737, "loss": 0.0139, "step": 12205 }, { "epoch": 1.7107217939733705, "grad_norm": 0.3094974160194397, "learning_rate": 0.00013208323367615402, "loss": 0.0611, "step": 12206 }, { "epoch": 1.7108619481429572, "grad_norm": 0.28097647428512573, "learning_rate": 0.00013206888304233435, "loss": 0.0511, "step": 12207 }, { "epoch": 1.711002102312544, "grad_norm": 0.366079717874527, "learning_rate": 0.0001320545324085147, "loss": 0.0388, "step": 12208 }, { "epoch": 1.7111422564821304, "grad_norm": 0.24471531808376312, "learning_rate": 0.00013204018177469503, "loss": 0.0487, "step": 12209 }, { "epoch": 1.7112824106517168, "grad_norm": 0.27892395853996277, "learning_rate": 0.00013202583114087539, "loss": 0.0428, "step": 12210 }, { "epoch": 1.7114225648213033, "grad_norm": 0.24249088764190674, "learning_rate": 0.00013201148050705571, "loss": 0.0806, "step": 12211 }, { "epoch": 1.71156271899089, "grad_norm": 0.1949528455734253, "learning_rate": 0.00013199712987323604, "loss": 0.0327, "step": 12212 }, { "epoch": 1.7117028731604766, "grad_norm": 0.5891454219818115, "learning_rate": 0.0001319827792394164, "loss": 0.0363, "step": 12213 }, { "epoch": 1.711843027330063, "grad_norm": 0.34219926595687866, "learning_rate": 0.00013196842860559675, "loss": 0.0498, "step": 12214 }, { "epoch": 1.7119831814996496, "grad_norm": 0.463893860578537, "learning_rate": 0.00013195407797177708, "loss": 0.039, "step": 12215 }, { "epoch": 1.712123335669236, "grad_norm": 0.21816730499267578, "learning_rate": 0.0001319397273379574, "loss": 0.0299, "step": 12216 }, { "epoch": 1.7122634898388227, "grad_norm": 0.2313939779996872, "learning_rate": 0.00013192537670413774, "loss": 0.0229, "step": 12217 }, { "epoch": 1.7124036440084094, "grad_norm": 0.37385743856430054, "learning_rate": 0.0001319110260703181, "loss": 0.0861, "step": 12218 }, { "epoch": 1.7125437981779958, "grad_norm": 0.09055466949939728, "learning_rate": 0.00013189667543649845, "loss": 0.0094, "step": 12219 }, { "epoch": 1.7126839523475823, "grad_norm": 0.39656758308410645, "learning_rate": 0.00013188232480267878, "loss": 0.0283, "step": 12220 }, { "epoch": 1.7128241065171688, "grad_norm": 0.9020304083824158, "learning_rate": 0.0001318679741688591, "loss": 0.0887, "step": 12221 }, { "epoch": 1.7129642606867554, "grad_norm": 0.869001030921936, "learning_rate": 0.00013185362353503946, "loss": 0.0993, "step": 12222 }, { "epoch": 1.7131044148563421, "grad_norm": 0.31396469473838806, "learning_rate": 0.0001318392729012198, "loss": 0.0577, "step": 12223 }, { "epoch": 1.7132445690259286, "grad_norm": 0.28609851002693176, "learning_rate": 0.00013182492226740012, "loss": 0.0629, "step": 12224 }, { "epoch": 1.713384723195515, "grad_norm": 0.18209898471832275, "learning_rate": 0.00013181057163358048, "loss": 0.0248, "step": 12225 }, { "epoch": 1.7135248773651015, "grad_norm": 0.4208248257637024, "learning_rate": 0.0001317962209997608, "loss": 0.0244, "step": 12226 }, { "epoch": 1.7136650315346882, "grad_norm": 0.15248838067054749, "learning_rate": 0.00013178187036594116, "loss": 0.0122, "step": 12227 }, { "epoch": 1.7138051857042746, "grad_norm": 0.16308845579624176, "learning_rate": 0.0001317675197321215, "loss": 0.0251, "step": 12228 }, { "epoch": 1.7139453398738613, "grad_norm": 1.4393203258514404, "learning_rate": 0.00013175316909830182, "loss": 0.2633, "step": 12229 }, { "epoch": 1.7140854940434478, "grad_norm": 0.49943456053733826, "learning_rate": 0.00013173881846448217, "loss": 0.0958, "step": 12230 }, { "epoch": 1.7142256482130342, "grad_norm": 0.3061561584472656, "learning_rate": 0.0001317244678306625, "loss": 0.036, "step": 12231 }, { "epoch": 1.714365802382621, "grad_norm": 0.6713414788246155, "learning_rate": 0.00013171011719684286, "loss": 0.0952, "step": 12232 }, { "epoch": 1.7145059565522074, "grad_norm": 0.06964185833930969, "learning_rate": 0.0001316957665630232, "loss": 0.0071, "step": 12233 }, { "epoch": 1.714646110721794, "grad_norm": 0.7420777678489685, "learning_rate": 0.00013168141592920352, "loss": 0.1465, "step": 12234 }, { "epoch": 1.7147862648913805, "grad_norm": 0.8891483545303345, "learning_rate": 0.00013166706529538387, "loss": 0.1991, "step": 12235 }, { "epoch": 1.714926419060967, "grad_norm": 0.16871501505374908, "learning_rate": 0.0001316527146615642, "loss": 0.0497, "step": 12236 }, { "epoch": 1.7150665732305534, "grad_norm": 0.2156693935394287, "learning_rate": 0.00013163836402774456, "loss": 0.059, "step": 12237 }, { "epoch": 1.7152067274001401, "grad_norm": 0.31691744923591614, "learning_rate": 0.00013162401339392488, "loss": 0.0239, "step": 12238 }, { "epoch": 1.7153468815697268, "grad_norm": 0.45139381289482117, "learning_rate": 0.0001316096627601052, "loss": 0.0754, "step": 12239 }, { "epoch": 1.7154870357393133, "grad_norm": 0.34668833017349243, "learning_rate": 0.00013159531212628557, "loss": 0.1447, "step": 12240 }, { "epoch": 1.7156271899088997, "grad_norm": 0.43707892298698425, "learning_rate": 0.00013158096149246592, "loss": 0.0425, "step": 12241 }, { "epoch": 1.7157673440784862, "grad_norm": 0.3323785662651062, "learning_rate": 0.00013156661085864625, "loss": 0.0686, "step": 12242 }, { "epoch": 1.7159074982480729, "grad_norm": 0.24211536347866058, "learning_rate": 0.00013155226022482658, "loss": 0.0579, "step": 12243 }, { "epoch": 1.7160476524176596, "grad_norm": 0.5556278824806213, "learning_rate": 0.0001315379095910069, "loss": 0.0423, "step": 12244 }, { "epoch": 1.716187806587246, "grad_norm": 0.45714691281318665, "learning_rate": 0.00013152355895718727, "loss": 0.0294, "step": 12245 }, { "epoch": 1.7163279607568325, "grad_norm": 0.39925500750541687, "learning_rate": 0.00013150920832336762, "loss": 0.0571, "step": 12246 }, { "epoch": 1.716468114926419, "grad_norm": 0.15344958007335663, "learning_rate": 0.00013149485768954795, "loss": 0.0304, "step": 12247 }, { "epoch": 1.7166082690960056, "grad_norm": 0.2583089768886566, "learning_rate": 0.00013148050705572828, "loss": 0.0322, "step": 12248 }, { "epoch": 1.7167484232655923, "grad_norm": 0.22716905176639557, "learning_rate": 0.00013146615642190863, "loss": 0.0525, "step": 12249 }, { "epoch": 1.7168885774351788, "grad_norm": 0.3833186626434326, "learning_rate": 0.00013145180578808896, "loss": 0.0286, "step": 12250 }, { "epoch": 1.7170287316047652, "grad_norm": 0.24826520681381226, "learning_rate": 0.00013143745515426932, "loss": 0.0217, "step": 12251 }, { "epoch": 1.7171688857743517, "grad_norm": 0.289717435836792, "learning_rate": 0.00013142310452044965, "loss": 0.0764, "step": 12252 }, { "epoch": 1.7173090399439384, "grad_norm": 0.2958870828151703, "learning_rate": 0.00013140875388662998, "loss": 0.0631, "step": 12253 }, { "epoch": 1.717449194113525, "grad_norm": 0.37041962146759033, "learning_rate": 0.00013139440325281033, "loss": 0.0559, "step": 12254 }, { "epoch": 1.7175893482831115, "grad_norm": 0.18747790157794952, "learning_rate": 0.00013138005261899066, "loss": 0.0479, "step": 12255 }, { "epoch": 1.717729502452698, "grad_norm": 1.0447160005569458, "learning_rate": 0.000131365701985171, "loss": 0.0373, "step": 12256 }, { "epoch": 1.7178696566222844, "grad_norm": 0.4998255968093872, "learning_rate": 0.00013135135135135134, "loss": 0.0722, "step": 12257 }, { "epoch": 1.718009810791871, "grad_norm": 0.2755364775657654, "learning_rate": 0.00013133700071753167, "loss": 0.0466, "step": 12258 }, { "epoch": 1.7181499649614576, "grad_norm": 0.27703437209129333, "learning_rate": 0.00013132265008371203, "loss": 0.058, "step": 12259 }, { "epoch": 1.7182901191310442, "grad_norm": 0.34295231103897095, "learning_rate": 0.00013130829944989236, "loss": 0.0421, "step": 12260 }, { "epoch": 1.7184302733006307, "grad_norm": 0.3623409867286682, "learning_rate": 0.00013129394881607268, "loss": 0.1082, "step": 12261 }, { "epoch": 1.7185704274702172, "grad_norm": 0.16262243688106537, "learning_rate": 0.00013127959818225304, "loss": 0.0292, "step": 12262 }, { "epoch": 1.7187105816398038, "grad_norm": 0.24371591210365295, "learning_rate": 0.00013126524754843337, "loss": 0.0196, "step": 12263 }, { "epoch": 1.7188507358093903, "grad_norm": 0.20839661359786987, "learning_rate": 0.00013125089691461372, "loss": 0.0124, "step": 12264 }, { "epoch": 1.718990889978977, "grad_norm": 0.26452597975730896, "learning_rate": 0.00013123654628079405, "loss": 0.0393, "step": 12265 }, { "epoch": 1.7191310441485634, "grad_norm": 0.22257104516029358, "learning_rate": 0.00013122219564697438, "loss": 0.0178, "step": 12266 }, { "epoch": 1.71927119831815, "grad_norm": 0.4206107258796692, "learning_rate": 0.00013120784501315474, "loss": 0.0825, "step": 12267 }, { "epoch": 1.7194113524877364, "grad_norm": 0.2450208216905594, "learning_rate": 0.00013119349437933507, "loss": 0.0632, "step": 12268 }, { "epoch": 1.719551506657323, "grad_norm": 0.238607719540596, "learning_rate": 0.00013117914374551542, "loss": 0.0551, "step": 12269 }, { "epoch": 1.7196916608269097, "grad_norm": 0.5203726887702942, "learning_rate": 0.00013116479311169575, "loss": 0.0445, "step": 12270 }, { "epoch": 1.7198318149964962, "grad_norm": 0.13038702309131622, "learning_rate": 0.00013115044247787608, "loss": 0.0114, "step": 12271 }, { "epoch": 1.7199719691660826, "grad_norm": 0.2569502890110016, "learning_rate": 0.00013113609184405643, "loss": 0.0567, "step": 12272 }, { "epoch": 1.720112123335669, "grad_norm": 0.6292478442192078, "learning_rate": 0.0001311217412102368, "loss": 0.0685, "step": 12273 }, { "epoch": 1.7202522775052558, "grad_norm": 0.13167506456375122, "learning_rate": 0.00013110739057641712, "loss": 0.0083, "step": 12274 }, { "epoch": 1.7203924316748425, "grad_norm": 0.36134523153305054, "learning_rate": 0.00013109303994259745, "loss": 0.0549, "step": 12275 }, { "epoch": 1.720532585844429, "grad_norm": 0.285233736038208, "learning_rate": 0.0001310786893087778, "loss": 0.0448, "step": 12276 }, { "epoch": 1.7206727400140154, "grad_norm": 0.21198295056819916, "learning_rate": 0.00013106433867495813, "loss": 0.0136, "step": 12277 }, { "epoch": 1.7208128941836018, "grad_norm": 0.12797924876213074, "learning_rate": 0.0001310499880411385, "loss": 0.0052, "step": 12278 }, { "epoch": 1.7209530483531885, "grad_norm": 0.34781551361083984, "learning_rate": 0.00013103563740731882, "loss": 0.0534, "step": 12279 }, { "epoch": 1.7210932025227752, "grad_norm": 2.350677967071533, "learning_rate": 0.00013102128677349914, "loss": 0.1168, "step": 12280 }, { "epoch": 1.7212333566923617, "grad_norm": 0.22225646674633026, "learning_rate": 0.0001310069361396795, "loss": 0.0242, "step": 12281 }, { "epoch": 1.7213735108619481, "grad_norm": 0.9159325957298279, "learning_rate": 0.00013099258550585983, "loss": 0.0751, "step": 12282 }, { "epoch": 1.7215136650315346, "grad_norm": 0.6331193447113037, "learning_rate": 0.00013097823487204018, "loss": 0.0395, "step": 12283 }, { "epoch": 1.7216538192011213, "grad_norm": 1.145039677619934, "learning_rate": 0.0001309638842382205, "loss": 0.0717, "step": 12284 }, { "epoch": 1.721793973370708, "grad_norm": 1.5468385219573975, "learning_rate": 0.00013094953360440084, "loss": 0.0691, "step": 12285 }, { "epoch": 1.7219341275402944, "grad_norm": 0.18451419472694397, "learning_rate": 0.0001309351829705812, "loss": 0.0217, "step": 12286 }, { "epoch": 1.7220742817098809, "grad_norm": 0.3441013693809509, "learning_rate": 0.00013092083233676153, "loss": 0.0656, "step": 12287 }, { "epoch": 1.7222144358794673, "grad_norm": 0.12094729393720627, "learning_rate": 0.00013090648170294188, "loss": 0.0167, "step": 12288 }, { "epoch": 1.722354590049054, "grad_norm": 0.1621893048286438, "learning_rate": 0.0001308921310691222, "loss": 0.0256, "step": 12289 }, { "epoch": 1.7224947442186405, "grad_norm": 0.15836484730243683, "learning_rate": 0.00013087778043530254, "loss": 0.0198, "step": 12290 }, { "epoch": 1.7226348983882271, "grad_norm": 0.22203406691551208, "learning_rate": 0.0001308634298014829, "loss": 0.0115, "step": 12291 }, { "epoch": 1.7227750525578136, "grad_norm": 0.3655462861061096, "learning_rate": 0.00013084907916766322, "loss": 0.0419, "step": 12292 }, { "epoch": 1.7229152067274, "grad_norm": 0.17145375907421112, "learning_rate": 0.00013083472853384355, "loss": 0.0251, "step": 12293 }, { "epoch": 1.7230553608969865, "grad_norm": 0.2717093229293823, "learning_rate": 0.0001308203779000239, "loss": 0.0469, "step": 12294 }, { "epoch": 1.7231955150665732, "grad_norm": 0.05062802881002426, "learning_rate": 0.00013080602726620424, "loss": 0.0055, "step": 12295 }, { "epoch": 1.7233356692361599, "grad_norm": 0.24106605350971222, "learning_rate": 0.0001307916766323846, "loss": 0.041, "step": 12296 }, { "epoch": 1.7234758234057463, "grad_norm": 0.1237282082438469, "learning_rate": 0.00013077732599856492, "loss": 0.0229, "step": 12297 }, { "epoch": 1.7236159775753328, "grad_norm": 0.20759360492229462, "learning_rate": 0.00013076297536474525, "loss": 0.0456, "step": 12298 }, { "epoch": 1.7237561317449193, "grad_norm": 0.5264992117881775, "learning_rate": 0.0001307486247309256, "loss": 0.0601, "step": 12299 }, { "epoch": 1.723896285914506, "grad_norm": 0.4196541905403137, "learning_rate": 0.00013073427409710596, "loss": 0.0481, "step": 12300 }, { "epoch": 1.7240364400840926, "grad_norm": 0.33874571323394775, "learning_rate": 0.0001307199234632863, "loss": 0.033, "step": 12301 }, { "epoch": 1.724176594253679, "grad_norm": 0.30149513483047485, "learning_rate": 0.00013070557282946662, "loss": 0.0505, "step": 12302 }, { "epoch": 1.7243167484232655, "grad_norm": 0.18182025849819183, "learning_rate": 0.00013069122219564695, "loss": 0.03, "step": 12303 }, { "epoch": 1.724456902592852, "grad_norm": 0.45294544100761414, "learning_rate": 0.0001306768715618273, "loss": 0.0627, "step": 12304 }, { "epoch": 1.7245970567624387, "grad_norm": 0.18393060564994812, "learning_rate": 0.00013066252092800766, "loss": 0.0215, "step": 12305 }, { "epoch": 1.7247372109320254, "grad_norm": 0.8840623497962952, "learning_rate": 0.00013064817029418799, "loss": 0.0389, "step": 12306 }, { "epoch": 1.7248773651016118, "grad_norm": 0.4182656705379486, "learning_rate": 0.00013063381966036831, "loss": 0.0559, "step": 12307 }, { "epoch": 1.7250175192711983, "grad_norm": 0.3140980303287506, "learning_rate": 0.00013061946902654867, "loss": 0.081, "step": 12308 }, { "epoch": 1.7251576734407847, "grad_norm": 0.24665966629981995, "learning_rate": 0.000130605118392729, "loss": 0.0196, "step": 12309 }, { "epoch": 1.7252978276103714, "grad_norm": 1.1210683584213257, "learning_rate": 0.00013059076775890935, "loss": 0.0796, "step": 12310 }, { "epoch": 1.725437981779958, "grad_norm": 0.28329533338546753, "learning_rate": 0.00013057641712508968, "loss": 0.053, "step": 12311 }, { "epoch": 1.7255781359495446, "grad_norm": 0.28426188230514526, "learning_rate": 0.00013056206649127, "loss": 0.0288, "step": 12312 }, { "epoch": 1.725718290119131, "grad_norm": 0.4659711420536041, "learning_rate": 0.00013054771585745037, "loss": 0.0547, "step": 12313 }, { "epoch": 1.7258584442887175, "grad_norm": 0.7289787530899048, "learning_rate": 0.0001305333652236307, "loss": 0.1183, "step": 12314 }, { "epoch": 1.7259985984583042, "grad_norm": 0.1230320855975151, "learning_rate": 0.00013051901458981105, "loss": 0.043, "step": 12315 }, { "epoch": 1.7261387526278906, "grad_norm": 0.3219144642353058, "learning_rate": 0.00013050466395599138, "loss": 0.0472, "step": 12316 }, { "epoch": 1.7262789067974773, "grad_norm": 0.20390263199806213, "learning_rate": 0.0001304903133221717, "loss": 0.0097, "step": 12317 }, { "epoch": 1.7264190609670638, "grad_norm": 0.17698785662651062, "learning_rate": 0.00013047596268835206, "loss": 0.0582, "step": 12318 }, { "epoch": 1.7265592151366502, "grad_norm": 0.7207682132720947, "learning_rate": 0.0001304616120545324, "loss": 0.1119, "step": 12319 }, { "epoch": 1.726699369306237, "grad_norm": 0.4833196699619293, "learning_rate": 0.00013044726142071275, "loss": 0.0427, "step": 12320 }, { "epoch": 1.7268395234758234, "grad_norm": 0.3859612047672272, "learning_rate": 0.00013043291078689308, "loss": 0.0292, "step": 12321 }, { "epoch": 1.72697967764541, "grad_norm": 0.8018611073493958, "learning_rate": 0.0001304185601530734, "loss": 0.0267, "step": 12322 }, { "epoch": 1.7271198318149965, "grad_norm": 0.127554252743721, "learning_rate": 0.00013040420951925376, "loss": 0.0102, "step": 12323 }, { "epoch": 1.727259985984583, "grad_norm": 0.09663911908864975, "learning_rate": 0.0001303898588854341, "loss": 0.0144, "step": 12324 }, { "epoch": 1.7274001401541694, "grad_norm": 0.5925981402397156, "learning_rate": 0.00013037550825161442, "loss": 0.0556, "step": 12325 }, { "epoch": 1.727540294323756, "grad_norm": 0.18153047561645508, "learning_rate": 0.00013036115761779477, "loss": 0.018, "step": 12326 }, { "epoch": 1.7276804484933428, "grad_norm": 0.3498193025588989, "learning_rate": 0.00013034680698397513, "loss": 0.032, "step": 12327 }, { "epoch": 1.7278206026629293, "grad_norm": 0.11203636974096298, "learning_rate": 0.00013033245635015546, "loss": 0.0137, "step": 12328 }, { "epoch": 1.7279607568325157, "grad_norm": 0.44035327434539795, "learning_rate": 0.00013031810571633579, "loss": 0.0518, "step": 12329 }, { "epoch": 1.7281009110021022, "grad_norm": 0.42255085706710815, "learning_rate": 0.00013030375508251611, "loss": 0.0316, "step": 12330 }, { "epoch": 1.7282410651716889, "grad_norm": 1.9962173700332642, "learning_rate": 0.00013028940444869647, "loss": 0.1031, "step": 12331 }, { "epoch": 1.7283812193412755, "grad_norm": 0.9048944115638733, "learning_rate": 0.00013027505381487683, "loss": 0.0503, "step": 12332 }, { "epoch": 1.728521373510862, "grad_norm": 0.9001339077949524, "learning_rate": 0.00013026070318105715, "loss": 0.0656, "step": 12333 }, { "epoch": 1.7286615276804485, "grad_norm": 0.5447548031806946, "learning_rate": 0.00013024635254723748, "loss": 0.0347, "step": 12334 }, { "epoch": 1.728801681850035, "grad_norm": 2.1652841567993164, "learning_rate": 0.00013023200191341784, "loss": 0.1731, "step": 12335 }, { "epoch": 1.7289418360196216, "grad_norm": 0.33333322405815125, "learning_rate": 0.00013021765127959817, "loss": 0.0396, "step": 12336 }, { "epoch": 1.7290819901892083, "grad_norm": 0.31905025243759155, "learning_rate": 0.00013020330064577852, "loss": 0.0305, "step": 12337 }, { "epoch": 1.7292221443587947, "grad_norm": 0.3603152632713318, "learning_rate": 0.00013018895001195885, "loss": 0.0798, "step": 12338 }, { "epoch": 1.7293622985283812, "grad_norm": 0.26587095856666565, "learning_rate": 0.00013017459937813918, "loss": 0.0537, "step": 12339 }, { "epoch": 1.7295024526979677, "grad_norm": 0.20560120046138763, "learning_rate": 0.00013016024874431954, "loss": 0.0293, "step": 12340 }, { "epoch": 1.7296426068675543, "grad_norm": 0.21184688806533813, "learning_rate": 0.00013014589811049986, "loss": 0.0583, "step": 12341 }, { "epoch": 1.729782761037141, "grad_norm": 0.2364475429058075, "learning_rate": 0.00013013154747668022, "loss": 0.0308, "step": 12342 }, { "epoch": 1.7299229152067275, "grad_norm": 0.2784612476825714, "learning_rate": 0.00013011719684286055, "loss": 0.0629, "step": 12343 }, { "epoch": 1.730063069376314, "grad_norm": 0.5540226101875305, "learning_rate": 0.00013010284620904088, "loss": 0.043, "step": 12344 }, { "epoch": 1.7302032235459004, "grad_norm": 0.34508460760116577, "learning_rate": 0.00013008849557522123, "loss": 0.0408, "step": 12345 }, { "epoch": 1.730343377715487, "grad_norm": 0.19782060384750366, "learning_rate": 0.00013007414494140156, "loss": 0.0138, "step": 12346 }, { "epoch": 1.7304835318850735, "grad_norm": 0.30429843068122864, "learning_rate": 0.00013005979430758192, "loss": 0.0624, "step": 12347 }, { "epoch": 1.7306236860546602, "grad_norm": 0.13465876877307892, "learning_rate": 0.00013004544367376225, "loss": 0.015, "step": 12348 }, { "epoch": 1.7307638402242467, "grad_norm": 0.4841441214084625, "learning_rate": 0.00013003109303994257, "loss": 0.096, "step": 12349 }, { "epoch": 1.7309039943938331, "grad_norm": 0.2868037521839142, "learning_rate": 0.00013001674240612293, "loss": 0.0316, "step": 12350 }, { "epoch": 1.7310441485634198, "grad_norm": 0.17500494420528412, "learning_rate": 0.00013000239177230326, "loss": 0.0513, "step": 12351 }, { "epoch": 1.7311843027330063, "grad_norm": 0.2766554057598114, "learning_rate": 0.00012998804113848361, "loss": 0.0488, "step": 12352 }, { "epoch": 1.731324456902593, "grad_norm": 0.9096641540527344, "learning_rate": 0.00012997369050466394, "loss": 0.074, "step": 12353 }, { "epoch": 1.7314646110721794, "grad_norm": 0.1750549077987671, "learning_rate": 0.0001299593398708443, "loss": 0.0231, "step": 12354 }, { "epoch": 1.7316047652417659, "grad_norm": 0.3142986297607422, "learning_rate": 0.00012994498923702463, "loss": 0.0318, "step": 12355 }, { "epoch": 1.7317449194113523, "grad_norm": 0.1438191831111908, "learning_rate": 0.00012993063860320496, "loss": 0.0259, "step": 12356 }, { "epoch": 1.731885073580939, "grad_norm": 0.2323543280363083, "learning_rate": 0.00012991628796938528, "loss": 0.022, "step": 12357 }, { "epoch": 1.7320252277505257, "grad_norm": 0.5318880081176758, "learning_rate": 0.00012990193733556564, "loss": 0.0236, "step": 12358 }, { "epoch": 1.7321653819201122, "grad_norm": 0.4395744204521179, "learning_rate": 0.000129887586701746, "loss": 0.0494, "step": 12359 }, { "epoch": 1.7323055360896986, "grad_norm": 0.3908523619174957, "learning_rate": 0.00012987323606792632, "loss": 0.0524, "step": 12360 }, { "epoch": 1.732445690259285, "grad_norm": 0.3383214771747589, "learning_rate": 0.00012985888543410665, "loss": 0.0327, "step": 12361 }, { "epoch": 1.7325858444288718, "grad_norm": 0.24420246481895447, "learning_rate": 0.000129844534800287, "loss": 0.122, "step": 12362 }, { "epoch": 1.7327259985984584, "grad_norm": 0.29437127709388733, "learning_rate": 0.00012983018416646734, "loss": 0.0965, "step": 12363 }, { "epoch": 1.732866152768045, "grad_norm": 0.1582673192024231, "learning_rate": 0.0001298158335326477, "loss": 0.0381, "step": 12364 }, { "epoch": 1.7330063069376314, "grad_norm": 0.490459680557251, "learning_rate": 0.00012980148289882802, "loss": 0.1023, "step": 12365 }, { "epoch": 1.7331464611072178, "grad_norm": 0.3984527289867401, "learning_rate": 0.00012978713226500835, "loss": 0.0569, "step": 12366 }, { "epoch": 1.7332866152768045, "grad_norm": 0.4091145396232605, "learning_rate": 0.0001297727816311887, "loss": 0.1168, "step": 12367 }, { "epoch": 1.7334267694463912, "grad_norm": 0.3573763966560364, "learning_rate": 0.00012975843099736903, "loss": 0.0488, "step": 12368 }, { "epoch": 1.7335669236159776, "grad_norm": 0.1730595976114273, "learning_rate": 0.0001297440803635494, "loss": 0.0092, "step": 12369 }, { "epoch": 1.733707077785564, "grad_norm": 0.26960843801498413, "learning_rate": 0.00012972972972972972, "loss": 0.0193, "step": 12370 }, { "epoch": 1.7338472319551506, "grad_norm": 0.30998438596725464, "learning_rate": 0.00012971537909591005, "loss": 0.0623, "step": 12371 }, { "epoch": 1.7339873861247372, "grad_norm": 0.21065787971019745, "learning_rate": 0.0001297010284620904, "loss": 0.0435, "step": 12372 }, { "epoch": 1.734127540294324, "grad_norm": 0.07457490265369415, "learning_rate": 0.00012968667782827073, "loss": 0.0076, "step": 12373 }, { "epoch": 1.7342676944639104, "grad_norm": 0.21273885667324066, "learning_rate": 0.00012967232719445109, "loss": 0.0296, "step": 12374 }, { "epoch": 1.7344078486334968, "grad_norm": 0.6699028611183167, "learning_rate": 0.00012965797656063141, "loss": 0.0864, "step": 12375 }, { "epoch": 1.7345480028030833, "grad_norm": 0.3473144471645355, "learning_rate": 0.00012964362592681174, "loss": 0.0371, "step": 12376 }, { "epoch": 1.73468815697267, "grad_norm": 0.49465423822402954, "learning_rate": 0.0001296292752929921, "loss": 0.0852, "step": 12377 }, { "epoch": 1.7348283111422564, "grad_norm": 0.4723498821258545, "learning_rate": 0.00012961492465917245, "loss": 0.012, "step": 12378 }, { "epoch": 1.7349684653118431, "grad_norm": 0.5831929445266724, "learning_rate": 0.00012960057402535278, "loss": 0.1802, "step": 12379 }, { "epoch": 1.7351086194814296, "grad_norm": 0.6744077801704407, "learning_rate": 0.0001295862233915331, "loss": 0.0776, "step": 12380 }, { "epoch": 1.735248773651016, "grad_norm": 0.029793143272399902, "learning_rate": 0.00012957187275771344, "loss": 0.0025, "step": 12381 }, { "epoch": 1.7353889278206025, "grad_norm": 0.6070799231529236, "learning_rate": 0.0001295575221238938, "loss": 0.165, "step": 12382 }, { "epoch": 1.7355290819901892, "grad_norm": 0.18198765814304352, "learning_rate": 0.00012954317149007412, "loss": 0.0123, "step": 12383 }, { "epoch": 1.7356692361597759, "grad_norm": 0.28456178307533264, "learning_rate": 0.00012952882085625448, "loss": 0.0297, "step": 12384 }, { "epoch": 1.7358093903293623, "grad_norm": 1.2448192834854126, "learning_rate": 0.0001295144702224348, "loss": 0.0986, "step": 12385 }, { "epoch": 1.7359495444989488, "grad_norm": 0.18204334378242493, "learning_rate": 0.00012950011958861516, "loss": 0.0382, "step": 12386 }, { "epoch": 1.7360896986685352, "grad_norm": 0.2056664228439331, "learning_rate": 0.0001294857689547955, "loss": 0.0467, "step": 12387 }, { "epoch": 1.736229852838122, "grad_norm": 0.5056012272834778, "learning_rate": 0.00012947141832097582, "loss": 0.0965, "step": 12388 }, { "epoch": 1.7363700070077086, "grad_norm": 0.21838964521884918, "learning_rate": 0.00012945706768715618, "loss": 0.0352, "step": 12389 }, { "epoch": 1.736510161177295, "grad_norm": 0.19418999552726746, "learning_rate": 0.0001294427170533365, "loss": 0.0207, "step": 12390 }, { "epoch": 1.7366503153468815, "grad_norm": 0.3644949197769165, "learning_rate": 0.00012942836641951686, "loss": 0.0532, "step": 12391 }, { "epoch": 1.736790469516468, "grad_norm": 0.25886279344558716, "learning_rate": 0.0001294140157856972, "loss": 0.0347, "step": 12392 }, { "epoch": 1.7369306236860547, "grad_norm": 0.2497645914554596, "learning_rate": 0.00012939966515187752, "loss": 0.0593, "step": 12393 }, { "epoch": 1.7370707778556413, "grad_norm": 0.3633016049861908, "learning_rate": 0.00012938531451805787, "loss": 0.0514, "step": 12394 }, { "epoch": 1.7372109320252278, "grad_norm": 0.14847776293754578, "learning_rate": 0.0001293709638842382, "loss": 0.0262, "step": 12395 }, { "epoch": 1.7373510861948143, "grad_norm": 0.5249321460723877, "learning_rate": 0.00012935661325041856, "loss": 0.0642, "step": 12396 }, { "epoch": 1.7374912403644007, "grad_norm": 0.3293091654777527, "learning_rate": 0.0001293422626165989, "loss": 0.0949, "step": 12397 }, { "epoch": 1.7376313945339874, "grad_norm": 0.5687469840049744, "learning_rate": 0.00012932791198277922, "loss": 0.0358, "step": 12398 }, { "epoch": 1.737771548703574, "grad_norm": 0.15011931955814362, "learning_rate": 0.00012931356134895957, "loss": 0.0195, "step": 12399 }, { "epoch": 1.7379117028731605, "grad_norm": 0.07403405010700226, "learning_rate": 0.0001292992107151399, "loss": 0.01, "step": 12400 }, { "epoch": 1.738051857042747, "grad_norm": 0.432157963514328, "learning_rate": 0.00012928486008132026, "loss": 0.0603, "step": 12401 }, { "epoch": 1.7381920112123335, "grad_norm": 0.16148848831653595, "learning_rate": 0.00012927050944750058, "loss": 0.0218, "step": 12402 }, { "epoch": 1.7383321653819201, "grad_norm": 0.5359057188034058, "learning_rate": 0.0001292561588136809, "loss": 0.0738, "step": 12403 }, { "epoch": 1.7384723195515066, "grad_norm": 0.2861936092376709, "learning_rate": 0.00012924180817986127, "loss": 0.0551, "step": 12404 }, { "epoch": 1.7386124737210933, "grad_norm": 0.7421399354934692, "learning_rate": 0.00012922745754604162, "loss": 0.0626, "step": 12405 }, { "epoch": 1.7387526278906797, "grad_norm": 0.1533435434103012, "learning_rate": 0.00012921310691222195, "loss": 0.0152, "step": 12406 }, { "epoch": 1.7388927820602662, "grad_norm": 0.2357248067855835, "learning_rate": 0.00012919875627840228, "loss": 0.0423, "step": 12407 }, { "epoch": 1.7390329362298529, "grad_norm": 0.31651562452316284, "learning_rate": 0.0001291844056445826, "loss": 0.0935, "step": 12408 }, { "epoch": 1.7391730903994393, "grad_norm": 0.4581146538257599, "learning_rate": 0.00012917005501076297, "loss": 0.0553, "step": 12409 }, { "epoch": 1.739313244569026, "grad_norm": 0.24639490246772766, "learning_rate": 0.00012915570437694332, "loss": 0.0378, "step": 12410 }, { "epoch": 1.7394533987386125, "grad_norm": 0.14299839735031128, "learning_rate": 0.00012914135374312365, "loss": 0.0251, "step": 12411 }, { "epoch": 1.739593552908199, "grad_norm": 0.17846910655498505, "learning_rate": 0.00012912700310930398, "loss": 0.0197, "step": 12412 }, { "epoch": 1.7397337070777854, "grad_norm": 0.6507803201675415, "learning_rate": 0.00012911265247548433, "loss": 0.0966, "step": 12413 }, { "epoch": 1.739873861247372, "grad_norm": 0.6710510849952698, "learning_rate": 0.00012909830184166466, "loss": 0.1416, "step": 12414 }, { "epoch": 1.7400140154169588, "grad_norm": 0.1861286610364914, "learning_rate": 0.00012908395120784502, "loss": 0.0245, "step": 12415 }, { "epoch": 1.7401541695865452, "grad_norm": 0.3210747539997101, "learning_rate": 0.00012906960057402535, "loss": 0.0994, "step": 12416 }, { "epoch": 1.7402943237561317, "grad_norm": 0.20362047851085663, "learning_rate": 0.00012905524994020567, "loss": 0.0204, "step": 12417 }, { "epoch": 1.7404344779257181, "grad_norm": 0.22663837671279907, "learning_rate": 0.00012904089930638603, "loss": 0.0431, "step": 12418 }, { "epoch": 1.7405746320953048, "grad_norm": 0.4013076424598694, "learning_rate": 0.00012902654867256636, "loss": 0.06, "step": 12419 }, { "epoch": 1.7407147862648915, "grad_norm": 0.40635499358177185, "learning_rate": 0.0001290121980387467, "loss": 0.0574, "step": 12420 }, { "epoch": 1.740854940434478, "grad_norm": 0.2186608910560608, "learning_rate": 0.00012899784740492704, "loss": 0.0215, "step": 12421 }, { "epoch": 1.7409950946040644, "grad_norm": 0.5750024318695068, "learning_rate": 0.00012898349677110737, "loss": 0.0252, "step": 12422 }, { "epoch": 1.741135248773651, "grad_norm": 0.1944301575422287, "learning_rate": 0.00012896914613728773, "loss": 0.0235, "step": 12423 }, { "epoch": 1.7412754029432376, "grad_norm": 0.6095818877220154, "learning_rate": 0.00012895479550346806, "loss": 0.0655, "step": 12424 }, { "epoch": 1.7414155571128243, "grad_norm": 0.6217092871665955, "learning_rate": 0.00012894044486964838, "loss": 0.019, "step": 12425 }, { "epoch": 1.7415557112824107, "grad_norm": 0.4872361421585083, "learning_rate": 0.00012892609423582874, "loss": 0.1592, "step": 12426 }, { "epoch": 1.7416958654519972, "grad_norm": 0.16807933151721954, "learning_rate": 0.00012891174360200907, "loss": 0.0339, "step": 12427 }, { "epoch": 1.7418360196215836, "grad_norm": 0.20999501645565033, "learning_rate": 0.00012889739296818942, "loss": 0.0601, "step": 12428 }, { "epoch": 1.7419761737911703, "grad_norm": 0.3214479982852936, "learning_rate": 0.00012888304233436975, "loss": 0.0455, "step": 12429 }, { "epoch": 1.742116327960757, "grad_norm": 1.0307008028030396, "learning_rate": 0.00012886869170055008, "loss": 0.0652, "step": 12430 }, { "epoch": 1.7422564821303435, "grad_norm": 0.3579634130001068, "learning_rate": 0.00012885434106673044, "loss": 0.0578, "step": 12431 }, { "epoch": 1.74239663629993, "grad_norm": 0.5004786849021912, "learning_rate": 0.0001288399904329108, "loss": 0.0156, "step": 12432 }, { "epoch": 1.7425367904695164, "grad_norm": 0.2483609914779663, "learning_rate": 0.00012882563979909112, "loss": 0.0115, "step": 12433 }, { "epoch": 1.742676944639103, "grad_norm": 0.5183850526809692, "learning_rate": 0.00012881128916527145, "loss": 0.0544, "step": 12434 }, { "epoch": 1.7428170988086895, "grad_norm": 4.178595542907715, "learning_rate": 0.00012879693853145178, "loss": 0.2263, "step": 12435 }, { "epoch": 1.7429572529782762, "grad_norm": 0.14125262200832367, "learning_rate": 0.00012878258789763213, "loss": 0.0267, "step": 12436 }, { "epoch": 1.7430974071478627, "grad_norm": 0.9474937915802002, "learning_rate": 0.0001287682372638125, "loss": 0.0879, "step": 12437 }, { "epoch": 1.7432375613174491, "grad_norm": 0.4898061454296112, "learning_rate": 0.00012875388662999282, "loss": 0.066, "step": 12438 }, { "epoch": 1.7433777154870356, "grad_norm": 0.22721178829669952, "learning_rate": 0.00012873953599617315, "loss": 0.0146, "step": 12439 }, { "epoch": 1.7435178696566223, "grad_norm": 0.47369828820228577, "learning_rate": 0.0001287251853623535, "loss": 0.0544, "step": 12440 }, { "epoch": 1.743658023826209, "grad_norm": 0.7142036557197571, "learning_rate": 0.00012871083472853383, "loss": 0.038, "step": 12441 }, { "epoch": 1.7437981779957954, "grad_norm": 0.49921169877052307, "learning_rate": 0.0001286964840947142, "loss": 0.1423, "step": 12442 }, { "epoch": 1.7439383321653819, "grad_norm": 0.34828388690948486, "learning_rate": 0.00012868213346089452, "loss": 0.0498, "step": 12443 }, { "epoch": 1.7440784863349683, "grad_norm": 0.48462727665901184, "learning_rate": 0.00012866778282707484, "loss": 0.0513, "step": 12444 }, { "epoch": 1.744218640504555, "grad_norm": 0.24147053062915802, "learning_rate": 0.0001286534321932552, "loss": 0.0516, "step": 12445 }, { "epoch": 1.7443587946741417, "grad_norm": 0.37030845880508423, "learning_rate": 0.00012863908155943553, "loss": 0.0279, "step": 12446 }, { "epoch": 1.7444989488437281, "grad_norm": 0.21156297624111176, "learning_rate": 0.00012862473092561588, "loss": 0.0402, "step": 12447 }, { "epoch": 1.7446391030133146, "grad_norm": 0.26465553045272827, "learning_rate": 0.0001286103802917962, "loss": 0.0219, "step": 12448 }, { "epoch": 1.744779257182901, "grad_norm": 0.43751251697540283, "learning_rate": 0.00012859602965797654, "loss": 0.0872, "step": 12449 }, { "epoch": 1.7449194113524877, "grad_norm": 0.10660535097122192, "learning_rate": 0.0001285816790241569, "loss": 0.0097, "step": 12450 }, { "epoch": 1.7450595655220744, "grad_norm": 0.3799755573272705, "learning_rate": 0.00012856732839033723, "loss": 0.0468, "step": 12451 }, { "epoch": 1.7451997196916609, "grad_norm": 0.20369702577590942, "learning_rate": 0.00012855297775651755, "loss": 0.0537, "step": 12452 }, { "epoch": 1.7453398738612473, "grad_norm": 0.1461147665977478, "learning_rate": 0.0001285386271226979, "loss": 0.0142, "step": 12453 }, { "epoch": 1.7454800280308338, "grad_norm": 0.18199720978736877, "learning_rate": 0.00012852427648887824, "loss": 0.0504, "step": 12454 }, { "epoch": 1.7456201822004205, "grad_norm": 0.5083375573158264, "learning_rate": 0.0001285099258550586, "loss": 0.088, "step": 12455 }, { "epoch": 1.7457603363700072, "grad_norm": 0.17230340838432312, "learning_rate": 0.00012849557522123892, "loss": 0.0498, "step": 12456 }, { "epoch": 1.7459004905395936, "grad_norm": 0.36871016025543213, "learning_rate": 0.00012848122458741925, "loss": 0.0327, "step": 12457 }, { "epoch": 1.74604064470918, "grad_norm": 0.6613603234291077, "learning_rate": 0.0001284668739535996, "loss": 0.045, "step": 12458 }, { "epoch": 1.7461807988787665, "grad_norm": 0.27450990676879883, "learning_rate": 0.00012845252331977994, "loss": 0.0391, "step": 12459 }, { "epoch": 1.7463209530483532, "grad_norm": 0.2846451997756958, "learning_rate": 0.0001284381726859603, "loss": 0.0205, "step": 12460 }, { "epoch": 1.7464611072179397, "grad_norm": 0.22938039898872375, "learning_rate": 0.00012842382205214062, "loss": 0.0154, "step": 12461 }, { "epoch": 1.7466012613875264, "grad_norm": 0.11990543454885483, "learning_rate": 0.00012840947141832095, "loss": 0.0154, "step": 12462 }, { "epoch": 1.7467414155571128, "grad_norm": 0.6128911972045898, "learning_rate": 0.0001283951207845013, "loss": 0.066, "step": 12463 }, { "epoch": 1.7468815697266993, "grad_norm": 0.5303910970687866, "learning_rate": 0.00012838077015068166, "loss": 0.0696, "step": 12464 }, { "epoch": 1.747021723896286, "grad_norm": 0.5912019610404968, "learning_rate": 0.000128366419516862, "loss": 0.1024, "step": 12465 }, { "epoch": 1.7471618780658724, "grad_norm": 0.12200074642896652, "learning_rate": 0.00012835206888304232, "loss": 0.0113, "step": 12466 }, { "epoch": 1.747302032235459, "grad_norm": 0.27069538831710815, "learning_rate": 0.00012833771824922267, "loss": 0.0235, "step": 12467 }, { "epoch": 1.7474421864050456, "grad_norm": 0.1351957768201828, "learning_rate": 0.000128323367615403, "loss": 0.0069, "step": 12468 }, { "epoch": 1.747582340574632, "grad_norm": 0.1599879115819931, "learning_rate": 0.00012830901698158336, "loss": 0.0342, "step": 12469 }, { "epoch": 1.7477224947442185, "grad_norm": 0.6961497664451599, "learning_rate": 0.00012829466634776368, "loss": 0.1281, "step": 12470 }, { "epoch": 1.7478626489138052, "grad_norm": 0.193365678191185, "learning_rate": 0.000128280315713944, "loss": 0.0128, "step": 12471 }, { "epoch": 1.7480028030833918, "grad_norm": 0.5707042217254639, "learning_rate": 0.00012826596508012437, "loss": 0.1072, "step": 12472 }, { "epoch": 1.7481429572529783, "grad_norm": 0.6319882273674011, "learning_rate": 0.0001282516144463047, "loss": 0.0486, "step": 12473 }, { "epoch": 1.7482831114225648, "grad_norm": 0.69849693775177, "learning_rate": 0.00012823726381248505, "loss": 0.0646, "step": 12474 }, { "epoch": 1.7484232655921512, "grad_norm": 0.28756093978881836, "learning_rate": 0.00012822291317866538, "loss": 0.0273, "step": 12475 }, { "epoch": 1.748563419761738, "grad_norm": 0.2201293706893921, "learning_rate": 0.0001282085625448457, "loss": 0.0484, "step": 12476 }, { "epoch": 1.7487035739313246, "grad_norm": 1.8964568376541138, "learning_rate": 0.00012819421191102607, "loss": 0.1547, "step": 12477 }, { "epoch": 1.748843728100911, "grad_norm": 0.2972404360771179, "learning_rate": 0.0001281798612772064, "loss": 0.023, "step": 12478 }, { "epoch": 1.7489838822704975, "grad_norm": 0.26198846101760864, "learning_rate": 0.00012816551064338675, "loss": 0.0605, "step": 12479 }, { "epoch": 1.749124036440084, "grad_norm": 0.29685354232788086, "learning_rate": 0.00012815116000956708, "loss": 0.0471, "step": 12480 }, { "epoch": 1.7492641906096706, "grad_norm": 1.3867894411087036, "learning_rate": 0.0001281368093757474, "loss": 0.0662, "step": 12481 }, { "epoch": 1.7494043447792573, "grad_norm": 0.6764283180236816, "learning_rate": 0.00012812245874192776, "loss": 0.1135, "step": 12482 }, { "epoch": 1.7495444989488438, "grad_norm": 0.7813418507575989, "learning_rate": 0.0001281081081081081, "loss": 0.0672, "step": 12483 }, { "epoch": 1.7496846531184302, "grad_norm": 2.208521842956543, "learning_rate": 0.00012809375747428842, "loss": 0.2374, "step": 12484 }, { "epoch": 1.7498248072880167, "grad_norm": 0.6415435671806335, "learning_rate": 0.00012807940684046878, "loss": 0.1071, "step": 12485 }, { "epoch": 1.7499649614576034, "grad_norm": 0.16769105195999146, "learning_rate": 0.0001280650562066491, "loss": 0.0225, "step": 12486 }, { "epoch": 1.75010511562719, "grad_norm": 0.09176711738109589, "learning_rate": 0.00012805070557282946, "loss": 0.014, "step": 12487 }, { "epoch": 1.7502452697967765, "grad_norm": 0.4044470489025116, "learning_rate": 0.0001280363549390098, "loss": 0.1298, "step": 12488 }, { "epoch": 1.750385423966363, "grad_norm": 0.41471362113952637, "learning_rate": 0.00012802200430519012, "loss": 0.0418, "step": 12489 }, { "epoch": 1.7505255781359494, "grad_norm": 0.36400818824768066, "learning_rate": 0.00012800765367137047, "loss": 0.0882, "step": 12490 }, { "epoch": 1.7506657323055361, "grad_norm": 0.18132257461547852, "learning_rate": 0.00012799330303755083, "loss": 0.0341, "step": 12491 }, { "epoch": 1.7508058864751226, "grad_norm": 0.2690911591053009, "learning_rate": 0.00012797895240373116, "loss": 0.0218, "step": 12492 }, { "epoch": 1.7509460406447093, "grad_norm": 0.4310668706893921, "learning_rate": 0.00012796460176991149, "loss": 0.0644, "step": 12493 }, { "epoch": 1.7510861948142957, "grad_norm": 0.19673354923725128, "learning_rate": 0.00012795025113609181, "loss": 0.0251, "step": 12494 }, { "epoch": 1.7512263489838822, "grad_norm": 0.3696749806404114, "learning_rate": 0.00012793590050227217, "loss": 0.136, "step": 12495 }, { "epoch": 1.7513665031534689, "grad_norm": 0.43675506114959717, "learning_rate": 0.00012792154986845253, "loss": 0.0272, "step": 12496 }, { "epoch": 1.7515066573230553, "grad_norm": 0.1094985380768776, "learning_rate": 0.00012790719923463285, "loss": 0.0121, "step": 12497 }, { "epoch": 1.751646811492642, "grad_norm": 0.43299415707588196, "learning_rate": 0.00012789284860081318, "loss": 0.0416, "step": 12498 }, { "epoch": 1.7517869656622285, "grad_norm": 0.4543834626674652, "learning_rate": 0.00012787849796699354, "loss": 0.0433, "step": 12499 }, { "epoch": 1.751927119831815, "grad_norm": 0.16920851171016693, "learning_rate": 0.00012786414733317387, "loss": 0.0459, "step": 12500 }, { "epoch": 1.7520672740014014, "grad_norm": 0.36703088879585266, "learning_rate": 0.00012784979669935422, "loss": 0.0481, "step": 12501 }, { "epoch": 1.752207428170988, "grad_norm": 0.4576791822910309, "learning_rate": 0.00012783544606553455, "loss": 0.0442, "step": 12502 }, { "epoch": 1.7523475823405747, "grad_norm": 0.13480620086193085, "learning_rate": 0.00012782109543171488, "loss": 0.0216, "step": 12503 }, { "epoch": 1.7524877365101612, "grad_norm": 0.2754122316837311, "learning_rate": 0.00012780674479789524, "loss": 0.0856, "step": 12504 }, { "epoch": 1.7526278906797477, "grad_norm": 0.2122202068567276, "learning_rate": 0.00012779239416407556, "loss": 0.0199, "step": 12505 }, { "epoch": 1.7527680448493341, "grad_norm": 0.1711021065711975, "learning_rate": 0.00012777804353025592, "loss": 0.0402, "step": 12506 }, { "epoch": 1.7529081990189208, "grad_norm": 0.26081499457359314, "learning_rate": 0.00012776369289643625, "loss": 0.028, "step": 12507 }, { "epoch": 1.7530483531885075, "grad_norm": 0.41491949558258057, "learning_rate": 0.00012774934226261658, "loss": 0.0355, "step": 12508 }, { "epoch": 1.753188507358094, "grad_norm": 0.6401037573814392, "learning_rate": 0.00012773499162879693, "loss": 0.1195, "step": 12509 }, { "epoch": 1.7533286615276804, "grad_norm": 0.0715518668293953, "learning_rate": 0.0001277206409949773, "loss": 0.007, "step": 12510 }, { "epoch": 1.7534688156972669, "grad_norm": 0.48080724477767944, "learning_rate": 0.00012770629036115762, "loss": 0.0434, "step": 12511 }, { "epoch": 1.7536089698668536, "grad_norm": 0.1719878613948822, "learning_rate": 0.00012769193972733795, "loss": 0.0085, "step": 12512 }, { "epoch": 1.7537491240364402, "grad_norm": 0.29505372047424316, "learning_rate": 0.00012767758909351827, "loss": 0.0474, "step": 12513 }, { "epoch": 1.7538892782060267, "grad_norm": 0.15355399250984192, "learning_rate": 0.00012766323845969863, "loss": 0.0184, "step": 12514 }, { "epoch": 1.7540294323756132, "grad_norm": 0.5316483378410339, "learning_rate": 0.00012764888782587896, "loss": 0.0394, "step": 12515 }, { "epoch": 1.7541695865451996, "grad_norm": 0.4025880992412567, "learning_rate": 0.0001276345371920593, "loss": 0.0219, "step": 12516 }, { "epoch": 1.7543097407147863, "grad_norm": 0.2650052607059479, "learning_rate": 0.00012762018655823964, "loss": 0.0648, "step": 12517 }, { "epoch": 1.754449894884373, "grad_norm": 0.3519507944583893, "learning_rate": 0.00012760583592442, "loss": 0.0496, "step": 12518 }, { "epoch": 1.7545900490539594, "grad_norm": 0.10431823134422302, "learning_rate": 0.00012759148529060033, "loss": 0.0104, "step": 12519 }, { "epoch": 1.754730203223546, "grad_norm": 1.1775903701782227, "learning_rate": 0.00012757713465678066, "loss": 0.0481, "step": 12520 }, { "epoch": 1.7548703573931324, "grad_norm": 0.2676998972892761, "learning_rate": 0.00012756278402296098, "loss": 0.0701, "step": 12521 }, { "epoch": 1.755010511562719, "grad_norm": 0.4892907738685608, "learning_rate": 0.00012754843338914134, "loss": 0.0164, "step": 12522 }, { "epoch": 1.7551506657323055, "grad_norm": 0.1719372421503067, "learning_rate": 0.0001275340827553217, "loss": 0.0207, "step": 12523 }, { "epoch": 1.7552908199018922, "grad_norm": 0.4776909053325653, "learning_rate": 0.00012751973212150202, "loss": 0.0542, "step": 12524 }, { "epoch": 1.7554309740714786, "grad_norm": 0.5701606273651123, "learning_rate": 0.00012750538148768235, "loss": 0.1174, "step": 12525 }, { "epoch": 1.755571128241065, "grad_norm": 0.3333530128002167, "learning_rate": 0.0001274910308538627, "loss": 0.0624, "step": 12526 }, { "epoch": 1.7557112824106516, "grad_norm": 0.2036724090576172, "learning_rate": 0.00012747668022004304, "loss": 0.0242, "step": 12527 }, { "epoch": 1.7558514365802382, "grad_norm": 0.4787029027938843, "learning_rate": 0.0001274623295862234, "loss": 0.0878, "step": 12528 }, { "epoch": 1.755991590749825, "grad_norm": 0.49031561613082886, "learning_rate": 0.00012744797895240372, "loss": 0.0734, "step": 12529 }, { "epoch": 1.7561317449194114, "grad_norm": 0.6599199771881104, "learning_rate": 0.00012743362831858405, "loss": 0.0442, "step": 12530 }, { "epoch": 1.7562718990889978, "grad_norm": 0.4643518626689911, "learning_rate": 0.0001274192776847644, "loss": 0.0354, "step": 12531 }, { "epoch": 1.7564120532585843, "grad_norm": 0.5022676587104797, "learning_rate": 0.00012740492705094473, "loss": 0.0571, "step": 12532 }, { "epoch": 1.756552207428171, "grad_norm": 0.4180651605129242, "learning_rate": 0.0001273905764171251, "loss": 0.2197, "step": 12533 }, { "epoch": 1.7566923615977577, "grad_norm": 0.8144434094429016, "learning_rate": 0.00012737622578330542, "loss": 0.116, "step": 12534 }, { "epoch": 1.7568325157673441, "grad_norm": 1.8669880628585815, "learning_rate": 0.00012736187514948575, "loss": 0.353, "step": 12535 }, { "epoch": 1.7569726699369306, "grad_norm": 0.14623229205608368, "learning_rate": 0.0001273475245156661, "loss": 0.0201, "step": 12536 }, { "epoch": 1.757112824106517, "grad_norm": 0.23059025406837463, "learning_rate": 0.00012733317388184646, "loss": 0.0321, "step": 12537 }, { "epoch": 1.7572529782761037, "grad_norm": 0.2146468162536621, "learning_rate": 0.00012731882324802679, "loss": 0.0583, "step": 12538 }, { "epoch": 1.7573931324456904, "grad_norm": 0.09678258746862411, "learning_rate": 0.00012730447261420711, "loss": 0.0231, "step": 12539 }, { "epoch": 1.7575332866152769, "grad_norm": 0.34229788184165955, "learning_rate": 0.00012729012198038744, "loss": 0.0565, "step": 12540 }, { "epoch": 1.7576734407848633, "grad_norm": 0.3117468059062958, "learning_rate": 0.0001272757713465678, "loss": 0.0464, "step": 12541 }, { "epoch": 1.7578135949544498, "grad_norm": 0.11006274819374084, "learning_rate": 0.00012726142071274815, "loss": 0.01, "step": 12542 }, { "epoch": 1.7579537491240365, "grad_norm": 0.40979981422424316, "learning_rate": 0.00012724707007892848, "loss": 0.0505, "step": 12543 }, { "epoch": 1.7580939032936231, "grad_norm": 0.22459720075130463, "learning_rate": 0.0001272327194451088, "loss": 0.0254, "step": 12544 }, { "epoch": 1.7582340574632096, "grad_norm": 0.45098814368247986, "learning_rate": 0.00012721836881128917, "loss": 0.0477, "step": 12545 }, { "epoch": 1.758374211632796, "grad_norm": 0.13434384763240814, "learning_rate": 0.0001272040181774695, "loss": 0.0161, "step": 12546 }, { "epoch": 1.7585143658023825, "grad_norm": 0.3728131949901581, "learning_rate": 0.00012718966754364982, "loss": 0.0577, "step": 12547 }, { "epoch": 1.7586545199719692, "grad_norm": 0.43375343084335327, "learning_rate": 0.00012717531690983015, "loss": 0.0356, "step": 12548 }, { "epoch": 1.7587946741415557, "grad_norm": 0.2617349922657013, "learning_rate": 0.0001271609662760105, "loss": 0.0499, "step": 12549 }, { "epoch": 1.7589348283111423, "grad_norm": 0.1481235772371292, "learning_rate": 0.00012714661564219086, "loss": 0.0566, "step": 12550 }, { "epoch": 1.7590749824807288, "grad_norm": 0.4470871388912201, "learning_rate": 0.0001271322650083712, "loss": 0.0251, "step": 12551 }, { "epoch": 1.7592151366503153, "grad_norm": 0.2650703489780426, "learning_rate": 0.00012711791437455152, "loss": 0.0293, "step": 12552 }, { "epoch": 1.759355290819902, "grad_norm": 0.5229439735412598, "learning_rate": 0.00012710356374073188, "loss": 0.1084, "step": 12553 }, { "epoch": 1.7594954449894884, "grad_norm": 0.06834203004837036, "learning_rate": 0.0001270892131069122, "loss": 0.0108, "step": 12554 }, { "epoch": 1.759635599159075, "grad_norm": 0.5743789672851562, "learning_rate": 0.00012707486247309256, "loss": 0.0539, "step": 12555 }, { "epoch": 1.7597757533286615, "grad_norm": 0.24601314961910248, "learning_rate": 0.0001270605118392729, "loss": 0.0431, "step": 12556 }, { "epoch": 1.759915907498248, "grad_norm": 0.24687445163726807, "learning_rate": 0.00012704616120545322, "loss": 0.0428, "step": 12557 }, { "epoch": 1.7600560616678345, "grad_norm": 0.5515462160110474, "learning_rate": 0.00012703181057163357, "loss": 0.0263, "step": 12558 }, { "epoch": 1.7601962158374211, "grad_norm": 0.776067316532135, "learning_rate": 0.0001270174599378139, "loss": 0.091, "step": 12559 }, { "epoch": 1.7603363700070078, "grad_norm": 0.15359188616275787, "learning_rate": 0.00012700310930399426, "loss": 0.0412, "step": 12560 }, { "epoch": 1.7604765241765943, "grad_norm": 0.18060021102428436, "learning_rate": 0.0001269887586701746, "loss": 0.0277, "step": 12561 }, { "epoch": 1.7606166783461807, "grad_norm": 0.4621140956878662, "learning_rate": 0.00012697440803635492, "loss": 0.0487, "step": 12562 }, { "epoch": 1.7607568325157672, "grad_norm": 0.31673869490623474, "learning_rate": 0.00012696005740253527, "loss": 0.0607, "step": 12563 }, { "epoch": 1.7608969866853539, "grad_norm": 0.0712527483701706, "learning_rate": 0.0001269457067687156, "loss": 0.0134, "step": 12564 }, { "epoch": 1.7610371408549406, "grad_norm": 0.12757079303264618, "learning_rate": 0.00012693135613489596, "loss": 0.013, "step": 12565 }, { "epoch": 1.761177295024527, "grad_norm": 0.19716481864452362, "learning_rate": 0.00012691700550107628, "loss": 0.0343, "step": 12566 }, { "epoch": 1.7613174491941135, "grad_norm": 0.431725412607193, "learning_rate": 0.0001269026548672566, "loss": 0.0711, "step": 12567 }, { "epoch": 1.7614576033637, "grad_norm": 0.3966410160064697, "learning_rate": 0.00012688830423343697, "loss": 0.05, "step": 12568 }, { "epoch": 1.7615977575332866, "grad_norm": 0.4786522090435028, "learning_rate": 0.00012687395359961732, "loss": 0.0343, "step": 12569 }, { "epoch": 1.7617379117028733, "grad_norm": 0.4577521085739136, "learning_rate": 0.00012685960296579765, "loss": 0.0753, "step": 12570 }, { "epoch": 1.7618780658724598, "grad_norm": 0.12372224777936935, "learning_rate": 0.00012684525233197798, "loss": 0.0058, "step": 12571 }, { "epoch": 1.7620182200420462, "grad_norm": 0.30933916568756104, "learning_rate": 0.0001268309016981583, "loss": 0.0389, "step": 12572 }, { "epoch": 1.7621583742116327, "grad_norm": 0.25998735427856445, "learning_rate": 0.00012681655106433867, "loss": 0.0213, "step": 12573 }, { "epoch": 1.7622985283812194, "grad_norm": 0.5214083194732666, "learning_rate": 0.00012680220043051902, "loss": 0.0998, "step": 12574 }, { "epoch": 1.762438682550806, "grad_norm": 0.4503547251224518, "learning_rate": 0.00012678784979669935, "loss": 0.0799, "step": 12575 }, { "epoch": 1.7625788367203925, "grad_norm": 0.5093770027160645, "learning_rate": 0.00012677349916287968, "loss": 0.1242, "step": 12576 }, { "epoch": 1.762718990889979, "grad_norm": 0.19426004588603973, "learning_rate": 0.00012675914852906003, "loss": 0.0219, "step": 12577 }, { "epoch": 1.7628591450595654, "grad_norm": 0.43884512782096863, "learning_rate": 0.00012674479789524036, "loss": 0.0539, "step": 12578 }, { "epoch": 1.762999299229152, "grad_norm": 0.7678201794624329, "learning_rate": 0.0001267304472614207, "loss": 0.0818, "step": 12579 }, { "epoch": 1.7631394533987386, "grad_norm": 0.2963252365589142, "learning_rate": 0.00012671609662760105, "loss": 0.031, "step": 12580 }, { "epoch": 1.7632796075683252, "grad_norm": 1.0102722644805908, "learning_rate": 0.00012670174599378137, "loss": 0.125, "step": 12581 }, { "epoch": 1.7634197617379117, "grad_norm": 0.18581822514533997, "learning_rate": 0.00012668739535996173, "loss": 0.0174, "step": 12582 }, { "epoch": 1.7635599159074982, "grad_norm": 0.5525992512702942, "learning_rate": 0.00012667304472614206, "loss": 0.1618, "step": 12583 }, { "epoch": 1.7637000700770848, "grad_norm": 0.6855126023292542, "learning_rate": 0.0001266586940923224, "loss": 0.0905, "step": 12584 }, { "epoch": 1.7638402242466713, "grad_norm": 0.6343996524810791, "learning_rate": 0.00012664434345850274, "loss": 0.0606, "step": 12585 }, { "epoch": 1.763980378416258, "grad_norm": 0.1963779479265213, "learning_rate": 0.00012662999282468307, "loss": 0.0331, "step": 12586 }, { "epoch": 1.7641205325858444, "grad_norm": 0.4166298508644104, "learning_rate": 0.00012661564219086343, "loss": 0.0606, "step": 12587 }, { "epoch": 1.764260686755431, "grad_norm": 0.36177870631217957, "learning_rate": 0.00012660129155704376, "loss": 0.0648, "step": 12588 }, { "epoch": 1.7644008409250174, "grad_norm": 0.3219148516654968, "learning_rate": 0.00012658694092322408, "loss": 0.0499, "step": 12589 }, { "epoch": 1.764540995094604, "grad_norm": 0.5056019425392151, "learning_rate": 0.00012657259028940444, "loss": 0.0483, "step": 12590 }, { "epoch": 1.7646811492641907, "grad_norm": 0.2618730962276459, "learning_rate": 0.00012655823965558477, "loss": 0.0272, "step": 12591 }, { "epoch": 1.7648213034337772, "grad_norm": 0.18048636615276337, "learning_rate": 0.00012654388902176512, "loss": 0.0182, "step": 12592 }, { "epoch": 1.7649614576033636, "grad_norm": 0.1842491179704666, "learning_rate": 0.00012652953838794545, "loss": 0.0284, "step": 12593 }, { "epoch": 1.76510161177295, "grad_norm": 0.23825345933437347, "learning_rate": 0.00012651518775412578, "loss": 0.0697, "step": 12594 }, { "epoch": 1.7652417659425368, "grad_norm": 0.37002116441726685, "learning_rate": 0.00012650083712030614, "loss": 0.057, "step": 12595 }, { "epoch": 1.7653819201121235, "grad_norm": 0.3460192084312439, "learning_rate": 0.0001264864864864865, "loss": 0.0852, "step": 12596 }, { "epoch": 1.76552207428171, "grad_norm": 0.505820631980896, "learning_rate": 0.00012647213585266682, "loss": 0.0672, "step": 12597 }, { "epoch": 1.7656622284512964, "grad_norm": 0.19242654740810394, "learning_rate": 0.00012645778521884715, "loss": 0.044, "step": 12598 }, { "epoch": 1.7658023826208828, "grad_norm": 0.3322543799877167, "learning_rate": 0.00012644343458502748, "loss": 0.051, "step": 12599 }, { "epoch": 1.7659425367904695, "grad_norm": 0.16603419184684753, "learning_rate": 0.00012642908395120783, "loss": 0.0436, "step": 12600 }, { "epoch": 1.7660826909600562, "grad_norm": 0.6716790199279785, "learning_rate": 0.0001264147333173882, "loss": 0.0463, "step": 12601 }, { "epoch": 1.7662228451296427, "grad_norm": 0.20524267852306366, "learning_rate": 0.00012640038268356852, "loss": 0.0679, "step": 12602 }, { "epoch": 1.7663629992992291, "grad_norm": 0.18807309865951538, "learning_rate": 0.00012638603204974885, "loss": 0.0268, "step": 12603 }, { "epoch": 1.7665031534688156, "grad_norm": 0.13584941625595093, "learning_rate": 0.0001263716814159292, "loss": 0.0225, "step": 12604 }, { "epoch": 1.7666433076384023, "grad_norm": 0.22762326896190643, "learning_rate": 0.00012635733078210953, "loss": 0.0196, "step": 12605 }, { "epoch": 1.766783461807989, "grad_norm": 0.37516525387763977, "learning_rate": 0.0001263429801482899, "loss": 0.0909, "step": 12606 }, { "epoch": 1.7669236159775754, "grad_norm": 0.4195827543735504, "learning_rate": 0.00012632862951447022, "loss": 0.0278, "step": 12607 }, { "epoch": 1.7670637701471619, "grad_norm": 0.21018537878990173, "learning_rate": 0.00012631427888065054, "loss": 0.0535, "step": 12608 }, { "epoch": 1.7672039243167483, "grad_norm": 0.40081337094306946, "learning_rate": 0.0001262999282468309, "loss": 0.0629, "step": 12609 }, { "epoch": 1.767344078486335, "grad_norm": 0.2986059784889221, "learning_rate": 0.00012628557761301123, "loss": 0.0422, "step": 12610 }, { "epoch": 1.7674842326559215, "grad_norm": 0.3104020357131958, "learning_rate": 0.00012627122697919156, "loss": 0.0503, "step": 12611 }, { "epoch": 1.7676243868255082, "grad_norm": 0.33469510078430176, "learning_rate": 0.0001262568763453719, "loss": 0.0558, "step": 12612 }, { "epoch": 1.7677645409950946, "grad_norm": 0.11093602329492569, "learning_rate": 0.00012624252571155224, "loss": 0.0101, "step": 12613 }, { "epoch": 1.767904695164681, "grad_norm": 0.29873716831207275, "learning_rate": 0.0001262281750777326, "loss": 0.0479, "step": 12614 }, { "epoch": 1.7680448493342675, "grad_norm": 0.1035333201289177, "learning_rate": 0.00012621382444391293, "loss": 0.0159, "step": 12615 }, { "epoch": 1.7681850035038542, "grad_norm": 0.16268615424633026, "learning_rate": 0.00012619947381009325, "loss": 0.0242, "step": 12616 }, { "epoch": 1.768325157673441, "grad_norm": 0.19221974909305573, "learning_rate": 0.0001261851231762736, "loss": 0.0137, "step": 12617 }, { "epoch": 1.7684653118430274, "grad_norm": 0.15477271378040314, "learning_rate": 0.00012617077254245394, "loss": 0.0101, "step": 12618 }, { "epoch": 1.7686054660126138, "grad_norm": 0.370347797870636, "learning_rate": 0.0001261564219086343, "loss": 0.0382, "step": 12619 }, { "epoch": 1.7687456201822003, "grad_norm": 0.39101099967956543, "learning_rate": 0.00012614207127481462, "loss": 0.0528, "step": 12620 }, { "epoch": 1.768885774351787, "grad_norm": 0.27915164828300476, "learning_rate": 0.00012612772064099495, "loss": 0.0172, "step": 12621 }, { "epoch": 1.7690259285213736, "grad_norm": 0.13251233100891113, "learning_rate": 0.0001261133700071753, "loss": 0.0067, "step": 12622 }, { "epoch": 1.76916608269096, "grad_norm": 0.7182928323745728, "learning_rate": 0.00012609901937335566, "loss": 0.072, "step": 12623 }, { "epoch": 1.7693062368605466, "grad_norm": 0.27497270703315735, "learning_rate": 0.000126084668739536, "loss": 0.0328, "step": 12624 }, { "epoch": 1.769446391030133, "grad_norm": 0.2314102053642273, "learning_rate": 0.00012607031810571632, "loss": 0.0492, "step": 12625 }, { "epoch": 1.7695865451997197, "grad_norm": 0.35429078340530396, "learning_rate": 0.00012605596747189665, "loss": 0.0135, "step": 12626 }, { "epoch": 1.7697266993693064, "grad_norm": 0.3357757031917572, "learning_rate": 0.000126041616838077, "loss": 0.0564, "step": 12627 }, { "epoch": 1.7698668535388928, "grad_norm": 0.29387056827545166, "learning_rate": 0.00012602726620425736, "loss": 0.0229, "step": 12628 }, { "epoch": 1.7700070077084793, "grad_norm": 0.13563747704029083, "learning_rate": 0.0001260129155704377, "loss": 0.0163, "step": 12629 }, { "epoch": 1.7701471618780658, "grad_norm": 0.39580798149108887, "learning_rate": 0.00012599856493661802, "loss": 0.0618, "step": 12630 }, { "epoch": 1.7702873160476524, "grad_norm": 0.3091544210910797, "learning_rate": 0.00012598421430279837, "loss": 0.0926, "step": 12631 }, { "epoch": 1.7704274702172391, "grad_norm": 0.18303082883358002, "learning_rate": 0.0001259698636689787, "loss": 0.0152, "step": 12632 }, { "epoch": 1.7705676243868256, "grad_norm": 0.37664055824279785, "learning_rate": 0.00012595551303515906, "loss": 0.0202, "step": 12633 }, { "epoch": 1.770707778556412, "grad_norm": 3.7317678928375244, "learning_rate": 0.00012594116240133938, "loss": 0.1951, "step": 12634 }, { "epoch": 1.7708479327259985, "grad_norm": 1.39244544506073, "learning_rate": 0.0001259268117675197, "loss": 0.1723, "step": 12635 }, { "epoch": 1.7709880868955852, "grad_norm": 0.5231479406356812, "learning_rate": 0.00012591246113370007, "loss": 0.0598, "step": 12636 }, { "epoch": 1.7711282410651716, "grad_norm": 1.100511908531189, "learning_rate": 0.0001258981104998804, "loss": 0.1009, "step": 12637 }, { "epoch": 1.7712683952347583, "grad_norm": 0.0895213931798935, "learning_rate": 0.00012588375986606075, "loss": 0.0154, "step": 12638 }, { "epoch": 1.7714085494043448, "grad_norm": 0.06310956925153732, "learning_rate": 0.00012586940923224108, "loss": 0.0054, "step": 12639 }, { "epoch": 1.7715487035739312, "grad_norm": 0.350788414478302, "learning_rate": 0.0001258550585984214, "loss": 0.0839, "step": 12640 }, { "epoch": 1.771688857743518, "grad_norm": 0.22423112392425537, "learning_rate": 0.00012584070796460177, "loss": 0.0286, "step": 12641 }, { "epoch": 1.7718290119131044, "grad_norm": 0.0421469509601593, "learning_rate": 0.0001258263573307821, "loss": 0.0113, "step": 12642 }, { "epoch": 1.771969166082691, "grad_norm": 0.08583997935056686, "learning_rate": 0.00012581200669696242, "loss": 0.0183, "step": 12643 }, { "epoch": 1.7721093202522775, "grad_norm": 0.21854151785373688, "learning_rate": 0.00012579765606314278, "loss": 0.0289, "step": 12644 }, { "epoch": 1.772249474421864, "grad_norm": 0.5578779578208923, "learning_rate": 0.0001257833054293231, "loss": 0.0993, "step": 12645 }, { "epoch": 1.7723896285914504, "grad_norm": 0.1693013608455658, "learning_rate": 0.00012576895479550346, "loss": 0.0239, "step": 12646 }, { "epoch": 1.7725297827610371, "grad_norm": 0.3322499096393585, "learning_rate": 0.0001257546041616838, "loss": 0.0321, "step": 12647 }, { "epoch": 1.7726699369306238, "grad_norm": 0.6681264042854309, "learning_rate": 0.00012574025352786412, "loss": 0.1421, "step": 12648 }, { "epoch": 1.7728100911002103, "grad_norm": 0.16810016334056854, "learning_rate": 0.00012572590289404448, "loss": 0.0326, "step": 12649 }, { "epoch": 1.7729502452697967, "grad_norm": 0.14535851776599884, "learning_rate": 0.00012571155226022483, "loss": 0.0543, "step": 12650 }, { "epoch": 1.7730903994393832, "grad_norm": 0.15418435633182526, "learning_rate": 0.00012569720162640516, "loss": 0.0229, "step": 12651 }, { "epoch": 1.7732305536089699, "grad_norm": 0.218855082988739, "learning_rate": 0.0001256828509925855, "loss": 0.0322, "step": 12652 }, { "epoch": 1.7733707077785565, "grad_norm": 0.21407152712345123, "learning_rate": 0.00012566850035876582, "loss": 0.0532, "step": 12653 }, { "epoch": 1.773510861948143, "grad_norm": 0.37991687655448914, "learning_rate": 0.00012565414972494617, "loss": 0.1043, "step": 12654 }, { "epoch": 1.7736510161177295, "grad_norm": 0.09540198743343353, "learning_rate": 0.00012563979909112653, "loss": 0.0082, "step": 12655 }, { "epoch": 1.773791170287316, "grad_norm": 0.2393234223127365, "learning_rate": 0.00012562544845730686, "loss": 0.0288, "step": 12656 }, { "epoch": 1.7739313244569026, "grad_norm": 0.41243407130241394, "learning_rate": 0.00012561109782348719, "loss": 0.1101, "step": 12657 }, { "epoch": 1.7740714786264893, "grad_norm": 0.20797981321811676, "learning_rate": 0.00012559674718966754, "loss": 0.0546, "step": 12658 }, { "epoch": 1.7742116327960757, "grad_norm": 0.14063552021980286, "learning_rate": 0.00012558239655584787, "loss": 0.0297, "step": 12659 }, { "epoch": 1.7743517869656622, "grad_norm": 0.2068670094013214, "learning_rate": 0.00012556804592202823, "loss": 0.037, "step": 12660 }, { "epoch": 1.7744919411352487, "grad_norm": 0.1441810578107834, "learning_rate": 0.00012555369528820855, "loss": 0.0214, "step": 12661 }, { "epoch": 1.7746320953048353, "grad_norm": 0.19155828654766083, "learning_rate": 0.00012553934465438888, "loss": 0.0416, "step": 12662 }, { "epoch": 1.774772249474422, "grad_norm": 0.2164948433637619, "learning_rate": 0.00012552499402056924, "loss": 0.0271, "step": 12663 }, { "epoch": 1.7749124036440085, "grad_norm": 0.4058765769004822, "learning_rate": 0.00012551064338674957, "loss": 0.0288, "step": 12664 }, { "epoch": 1.775052557813595, "grad_norm": 0.22426724433898926, "learning_rate": 0.00012549629275292992, "loss": 0.0408, "step": 12665 }, { "epoch": 1.7751927119831814, "grad_norm": 0.13831987977027893, "learning_rate": 0.00012548194211911025, "loss": 0.009, "step": 12666 }, { "epoch": 1.775332866152768, "grad_norm": 0.559334933757782, "learning_rate": 0.00012546759148529058, "loss": 0.084, "step": 12667 }, { "epoch": 1.7754730203223545, "grad_norm": 0.4013811945915222, "learning_rate": 0.00012545324085147094, "loss": 0.0505, "step": 12668 }, { "epoch": 1.7756131744919412, "grad_norm": 0.40574657917022705, "learning_rate": 0.00012543889021765126, "loss": 0.0552, "step": 12669 }, { "epoch": 1.7757533286615277, "grad_norm": 0.10503213107585907, "learning_rate": 0.00012542453958383162, "loss": 0.0253, "step": 12670 }, { "epoch": 1.7758934828311141, "grad_norm": 0.35956352949142456, "learning_rate": 0.00012541018895001195, "loss": 0.0913, "step": 12671 }, { "epoch": 1.7760336370007006, "grad_norm": 0.14425687491893768, "learning_rate": 0.00012539583831619228, "loss": 0.0203, "step": 12672 }, { "epoch": 1.7761737911702873, "grad_norm": 0.21116703748703003, "learning_rate": 0.00012538148768237263, "loss": 0.0404, "step": 12673 }, { "epoch": 1.776313945339874, "grad_norm": 0.33213019371032715, "learning_rate": 0.00012536713704855296, "loss": 0.0407, "step": 12674 }, { "epoch": 1.7764540995094604, "grad_norm": 0.6757363677024841, "learning_rate": 0.0001253527864147333, "loss": 0.0416, "step": 12675 }, { "epoch": 1.7765942536790469, "grad_norm": 0.5575529336929321, "learning_rate": 0.00012533843578091365, "loss": 0.0263, "step": 12676 }, { "epoch": 1.7767344078486333, "grad_norm": 0.40344178676605225, "learning_rate": 0.00012532408514709397, "loss": 0.0326, "step": 12677 }, { "epoch": 1.77687456201822, "grad_norm": 0.5858974456787109, "learning_rate": 0.00012530973451327433, "loss": 0.1061, "step": 12678 }, { "epoch": 1.7770147161878067, "grad_norm": 0.639039933681488, "learning_rate": 0.00012529538387945466, "loss": 0.0399, "step": 12679 }, { "epoch": 1.7771548703573932, "grad_norm": 0.5654316544532776, "learning_rate": 0.000125281033245635, "loss": 0.0478, "step": 12680 }, { "epoch": 1.7772950245269796, "grad_norm": 0.7055120468139648, "learning_rate": 0.00012526668261181534, "loss": 0.0757, "step": 12681 }, { "epoch": 1.777435178696566, "grad_norm": 0.36441636085510254, "learning_rate": 0.0001252523319779957, "loss": 0.0681, "step": 12682 }, { "epoch": 1.7775753328661528, "grad_norm": 1.6339479684829712, "learning_rate": 0.00012523798134417603, "loss": 0.1076, "step": 12683 }, { "epoch": 1.7777154870357395, "grad_norm": 1.3059263229370117, "learning_rate": 0.00012522363071035635, "loss": 0.0825, "step": 12684 }, { "epoch": 1.777855641205326, "grad_norm": 2.2003870010375977, "learning_rate": 0.0001252092800765367, "loss": 0.3255, "step": 12685 }, { "epoch": 1.7779957953749124, "grad_norm": 0.23578482866287231, "learning_rate": 0.00012519492944271704, "loss": 0.039, "step": 12686 }, { "epoch": 1.7781359495444988, "grad_norm": 0.29941314458847046, "learning_rate": 0.0001251805788088974, "loss": 0.0408, "step": 12687 }, { "epoch": 1.7782761037140855, "grad_norm": 1.0790528059005737, "learning_rate": 0.00012516622817507772, "loss": 0.0638, "step": 12688 }, { "epoch": 1.7784162578836722, "grad_norm": 0.1633813977241516, "learning_rate": 0.00012515187754125805, "loss": 0.0511, "step": 12689 }, { "epoch": 1.7785564120532587, "grad_norm": 0.26883625984191895, "learning_rate": 0.0001251375269074384, "loss": 0.0601, "step": 12690 }, { "epoch": 1.778696566222845, "grad_norm": 0.5229213237762451, "learning_rate": 0.00012512317627361874, "loss": 0.0897, "step": 12691 }, { "epoch": 1.7788367203924316, "grad_norm": 0.1665145605802536, "learning_rate": 0.0001251088256397991, "loss": 0.0149, "step": 12692 }, { "epoch": 1.7789768745620183, "grad_norm": 0.08214559406042099, "learning_rate": 0.00012509447500597942, "loss": 0.0091, "step": 12693 }, { "epoch": 1.7791170287316047, "grad_norm": 0.4949318468570709, "learning_rate": 0.00012508012437215975, "loss": 0.07, "step": 12694 }, { "epoch": 1.7792571829011914, "grad_norm": 0.10292713344097137, "learning_rate": 0.0001250657737383401, "loss": 0.0095, "step": 12695 }, { "epoch": 1.7793973370707779, "grad_norm": 0.18902745842933655, "learning_rate": 0.00012505142310452043, "loss": 0.0321, "step": 12696 }, { "epoch": 1.7795374912403643, "grad_norm": 0.35434672236442566, "learning_rate": 0.0001250370724707008, "loss": 0.0309, "step": 12697 }, { "epoch": 1.779677645409951, "grad_norm": 0.24826212227344513, "learning_rate": 0.00012502272183688112, "loss": 0.0343, "step": 12698 }, { "epoch": 1.7798177995795375, "grad_norm": 0.2808452844619751, "learning_rate": 0.00012500837120306145, "loss": 0.0457, "step": 12699 }, { "epoch": 1.7799579537491241, "grad_norm": 0.35589727759361267, "learning_rate": 0.0001249940205692418, "loss": 0.0702, "step": 12700 }, { "epoch": 1.7800981079187106, "grad_norm": 0.5442264080047607, "learning_rate": 0.00012497966993542216, "loss": 0.0785, "step": 12701 }, { "epoch": 1.780238262088297, "grad_norm": 0.7384634613990784, "learning_rate": 0.00012496531930160249, "loss": 0.0845, "step": 12702 }, { "epoch": 1.7803784162578835, "grad_norm": 0.35767456889152527, "learning_rate": 0.00012495096866778281, "loss": 0.0625, "step": 12703 }, { "epoch": 1.7805185704274702, "grad_norm": 0.6801416873931885, "learning_rate": 0.00012493661803396314, "loss": 0.0882, "step": 12704 }, { "epoch": 1.7806587245970569, "grad_norm": 0.5508906841278076, "learning_rate": 0.0001249222674001435, "loss": 0.043, "step": 12705 }, { "epoch": 1.7807988787666433, "grad_norm": 0.4439547061920166, "learning_rate": 0.00012490791676632383, "loss": 0.0531, "step": 12706 }, { "epoch": 1.7809390329362298, "grad_norm": 0.4452202320098877, "learning_rate": 0.00012489356613250416, "loss": 0.0475, "step": 12707 }, { "epoch": 1.7810791871058163, "grad_norm": 0.1963074654340744, "learning_rate": 0.0001248792154986845, "loss": 0.0251, "step": 12708 }, { "epoch": 1.781219341275403, "grad_norm": 0.09595419466495514, "learning_rate": 0.00012486486486486487, "loss": 0.0177, "step": 12709 }, { "epoch": 1.7813594954449896, "grad_norm": 0.4474094808101654, "learning_rate": 0.0001248505142310452, "loss": 0.0499, "step": 12710 }, { "epoch": 1.781499649614576, "grad_norm": 0.20792241394519806, "learning_rate": 0.00012483616359722552, "loss": 0.0097, "step": 12711 }, { "epoch": 1.7816398037841625, "grad_norm": 0.44448330998420715, "learning_rate": 0.00012482181296340585, "loss": 0.0267, "step": 12712 }, { "epoch": 1.781779957953749, "grad_norm": 0.2350614219903946, "learning_rate": 0.0001248074623295862, "loss": 0.0746, "step": 12713 }, { "epoch": 1.7819201121233357, "grad_norm": 0.3534233272075653, "learning_rate": 0.00012479311169576656, "loss": 0.0385, "step": 12714 }, { "epoch": 1.7820602662929224, "grad_norm": 0.259888231754303, "learning_rate": 0.0001247787610619469, "loss": 0.0471, "step": 12715 }, { "epoch": 1.7822004204625088, "grad_norm": 0.39188113808631897, "learning_rate": 0.00012476441042812722, "loss": 0.0552, "step": 12716 }, { "epoch": 1.7823405746320953, "grad_norm": 0.451839804649353, "learning_rate": 0.00012475005979430758, "loss": 0.034, "step": 12717 }, { "epoch": 1.7824807288016817, "grad_norm": 0.4768160283565521, "learning_rate": 0.0001247357091604879, "loss": 0.0323, "step": 12718 }, { "epoch": 1.7826208829712684, "grad_norm": 0.426025927066803, "learning_rate": 0.00012472135852666826, "loss": 0.0702, "step": 12719 }, { "epoch": 1.782761037140855, "grad_norm": 0.10429257154464722, "learning_rate": 0.0001247070078928486, "loss": 0.0298, "step": 12720 }, { "epoch": 1.7829011913104416, "grad_norm": 0.0650818794965744, "learning_rate": 0.00012469265725902892, "loss": 0.0103, "step": 12721 }, { "epoch": 1.783041345480028, "grad_norm": 0.12811315059661865, "learning_rate": 0.00012467830662520927, "loss": 0.0182, "step": 12722 }, { "epoch": 1.7831814996496145, "grad_norm": 0.76716148853302, "learning_rate": 0.0001246639559913896, "loss": 0.0923, "step": 12723 }, { "epoch": 1.7833216538192012, "grad_norm": 0.16608989238739014, "learning_rate": 0.00012464960535756996, "loss": 0.0156, "step": 12724 }, { "epoch": 1.7834618079887876, "grad_norm": 0.7828342914581299, "learning_rate": 0.0001246352547237503, "loss": 0.0841, "step": 12725 }, { "epoch": 1.7836019621583743, "grad_norm": 0.20947137475013733, "learning_rate": 0.00012462090408993062, "loss": 0.0497, "step": 12726 }, { "epoch": 1.7837421163279608, "grad_norm": 0.11443881690502167, "learning_rate": 0.00012460655345611097, "loss": 0.0268, "step": 12727 }, { "epoch": 1.7838822704975472, "grad_norm": 0.3637041747570038, "learning_rate": 0.00012459220282229133, "loss": 0.0427, "step": 12728 }, { "epoch": 1.784022424667134, "grad_norm": 0.11641380190849304, "learning_rate": 0.00012457785218847166, "loss": 0.0318, "step": 12729 }, { "epoch": 1.7841625788367204, "grad_norm": 0.3642352819442749, "learning_rate": 0.00012456350155465198, "loss": 0.1057, "step": 12730 }, { "epoch": 1.784302733006307, "grad_norm": 0.4932324290275574, "learning_rate": 0.0001245491509208323, "loss": 0.0619, "step": 12731 }, { "epoch": 1.7844428871758935, "grad_norm": 1.1922752857208252, "learning_rate": 0.00012453480028701267, "loss": 0.0725, "step": 12732 }, { "epoch": 1.78458304134548, "grad_norm": 1.504634976387024, "learning_rate": 0.00012452044965319302, "loss": 0.0547, "step": 12733 }, { "epoch": 1.7847231955150664, "grad_norm": 0.5879935622215271, "learning_rate": 0.00012450609901937335, "loss": 0.0553, "step": 12734 }, { "epoch": 1.784863349684653, "grad_norm": 2.0564184188842773, "learning_rate": 0.00012449174838555368, "loss": 0.2254, "step": 12735 }, { "epoch": 1.7850035038542398, "grad_norm": 0.10723566263914108, "learning_rate": 0.00012447739775173404, "loss": 0.0177, "step": 12736 }, { "epoch": 1.7851436580238262, "grad_norm": 0.3034172058105469, "learning_rate": 0.00012446304711791436, "loss": 0.0379, "step": 12737 }, { "epoch": 1.7852838121934127, "grad_norm": 0.5171294808387756, "learning_rate": 0.0001244486964840947, "loss": 0.0432, "step": 12738 }, { "epoch": 1.7854239663629992, "grad_norm": 0.244834303855896, "learning_rate": 0.00012443434585027505, "loss": 0.0378, "step": 12739 }, { "epoch": 1.7855641205325858, "grad_norm": 0.1415349841117859, "learning_rate": 0.00012441999521645538, "loss": 0.0325, "step": 12740 }, { "epoch": 1.7857042747021725, "grad_norm": 0.14750425517559052, "learning_rate": 0.00012440564458263573, "loss": 0.0069, "step": 12741 }, { "epoch": 1.785844428871759, "grad_norm": 0.12571613490581512, "learning_rate": 0.00012439129394881606, "loss": 0.0087, "step": 12742 }, { "epoch": 1.7859845830413454, "grad_norm": 1.6872973442077637, "learning_rate": 0.0001243769433149964, "loss": 0.0459, "step": 12743 }, { "epoch": 1.786124737210932, "grad_norm": 0.7671909928321838, "learning_rate": 0.00012436259268117675, "loss": 0.0562, "step": 12744 }, { "epoch": 1.7862648913805186, "grad_norm": 0.09681243449449539, "learning_rate": 0.00012434824204735707, "loss": 0.009, "step": 12745 }, { "epoch": 1.7864050455501053, "grad_norm": 0.393929123878479, "learning_rate": 0.00012433389141353743, "loss": 0.0547, "step": 12746 }, { "epoch": 1.7865451997196917, "grad_norm": 0.18076857924461365, "learning_rate": 0.00012431954077971776, "loss": 0.0164, "step": 12747 }, { "epoch": 1.7866853538892782, "grad_norm": 0.3891890347003937, "learning_rate": 0.0001243051901458981, "loss": 0.0754, "step": 12748 }, { "epoch": 1.7868255080588646, "grad_norm": 0.8744583129882812, "learning_rate": 0.00012429083951207844, "loss": 0.0431, "step": 12749 }, { "epoch": 1.7869656622284513, "grad_norm": 0.5414014458656311, "learning_rate": 0.00012427648887825877, "loss": 0.0516, "step": 12750 }, { "epoch": 1.787105816398038, "grad_norm": 0.19303114712238312, "learning_rate": 0.00012426213824443913, "loss": 0.0125, "step": 12751 }, { "epoch": 1.7872459705676245, "grad_norm": 0.9245198369026184, "learning_rate": 0.00012424778761061946, "loss": 0.0468, "step": 12752 }, { "epoch": 1.787386124737211, "grad_norm": 0.13063810765743256, "learning_rate": 0.00012423343697679978, "loss": 0.0081, "step": 12753 }, { "epoch": 1.7875262789067974, "grad_norm": 0.3303832709789276, "learning_rate": 0.00012421908634298014, "loss": 0.0576, "step": 12754 }, { "epoch": 1.787666433076384, "grad_norm": 0.27099576592445374, "learning_rate": 0.00012420473570916047, "loss": 0.0708, "step": 12755 }, { "epoch": 1.7878065872459705, "grad_norm": 0.30913522839546204, "learning_rate": 0.00012419038507534082, "loss": 0.0309, "step": 12756 }, { "epoch": 1.7879467414155572, "grad_norm": 0.34400373697280884, "learning_rate": 0.00012417603444152115, "loss": 0.1017, "step": 12757 }, { "epoch": 1.7880868955851437, "grad_norm": 0.3078020513057709, "learning_rate": 0.00012416168380770148, "loss": 0.0226, "step": 12758 }, { "epoch": 1.7882270497547301, "grad_norm": 0.19640223681926727, "learning_rate": 0.00012414733317388184, "loss": 0.0409, "step": 12759 }, { "epoch": 1.7883672039243166, "grad_norm": 0.40916523337364197, "learning_rate": 0.0001241329825400622, "loss": 0.0618, "step": 12760 }, { "epoch": 1.7885073580939033, "grad_norm": 0.3068006932735443, "learning_rate": 0.00012411863190624252, "loss": 0.0612, "step": 12761 }, { "epoch": 1.78864751226349, "grad_norm": 0.0761723741889, "learning_rate": 0.00012410428127242285, "loss": 0.0072, "step": 12762 }, { "epoch": 1.7887876664330764, "grad_norm": 0.14007216691970825, "learning_rate": 0.0001240899306386032, "loss": 0.0171, "step": 12763 }, { "epoch": 1.7889278206026629, "grad_norm": 0.24300192296504974, "learning_rate": 0.00012407558000478353, "loss": 0.0347, "step": 12764 }, { "epoch": 1.7890679747722493, "grad_norm": 0.4248351752758026, "learning_rate": 0.0001240612293709639, "loss": 0.0481, "step": 12765 }, { "epoch": 1.789208128941836, "grad_norm": 0.4369022250175476, "learning_rate": 0.00012404687873714422, "loss": 0.0442, "step": 12766 }, { "epoch": 1.7893482831114227, "grad_norm": 0.09725047647953033, "learning_rate": 0.00012403252810332455, "loss": 0.0128, "step": 12767 }, { "epoch": 1.7894884372810091, "grad_norm": 0.25909674167633057, "learning_rate": 0.0001240181774695049, "loss": 0.0482, "step": 12768 }, { "epoch": 1.7896285914505956, "grad_norm": 0.7365142703056335, "learning_rate": 0.00012400382683568523, "loss": 0.0662, "step": 12769 }, { "epoch": 1.789768745620182, "grad_norm": 0.22100892663002014, "learning_rate": 0.00012398947620186556, "loss": 0.0367, "step": 12770 }, { "epoch": 1.7899088997897687, "grad_norm": 0.17569518089294434, "learning_rate": 0.00012397512556804592, "loss": 0.0145, "step": 12771 }, { "epoch": 1.7900490539593554, "grad_norm": 0.7322267293930054, "learning_rate": 0.00012396077493422624, "loss": 0.1292, "step": 12772 }, { "epoch": 1.790189208128942, "grad_norm": 0.1580660343170166, "learning_rate": 0.0001239464243004066, "loss": 0.0249, "step": 12773 }, { "epoch": 1.7903293622985283, "grad_norm": 0.5480430126190186, "learning_rate": 0.00012393207366658693, "loss": 0.078, "step": 12774 }, { "epoch": 1.7904695164681148, "grad_norm": 0.20228888094425201, "learning_rate": 0.00012391772303276726, "loss": 0.0221, "step": 12775 }, { "epoch": 1.7906096706377015, "grad_norm": 0.22830694913864136, "learning_rate": 0.0001239033723989476, "loss": 0.0397, "step": 12776 }, { "epoch": 1.7907498248072882, "grad_norm": 0.5371941328048706, "learning_rate": 0.00012388902176512794, "loss": 0.0343, "step": 12777 }, { "epoch": 1.7908899789768746, "grad_norm": 0.44322195649147034, "learning_rate": 0.0001238746711313083, "loss": 0.1148, "step": 12778 }, { "epoch": 1.791030133146461, "grad_norm": 0.08946286886930466, "learning_rate": 0.00012386032049748863, "loss": 0.0045, "step": 12779 }, { "epoch": 1.7911702873160475, "grad_norm": 0.24678777158260345, "learning_rate": 0.00012384596986366895, "loss": 0.0506, "step": 12780 }, { "epoch": 1.7913104414856342, "grad_norm": 0.1845226138830185, "learning_rate": 0.0001238316192298493, "loss": 0.0079, "step": 12781 }, { "epoch": 1.7914505956552207, "grad_norm": 0.7750501036643982, "learning_rate": 0.00012381726859602964, "loss": 0.1014, "step": 12782 }, { "epoch": 1.7915907498248074, "grad_norm": 0.8726668357849121, "learning_rate": 0.00012380291796221, "loss": 0.1029, "step": 12783 }, { "epoch": 1.7917309039943938, "grad_norm": 0.4718799591064453, "learning_rate": 0.00012378856732839032, "loss": 0.0209, "step": 12784 }, { "epoch": 1.7918710581639803, "grad_norm": 1.7219072580337524, "learning_rate": 0.00012377421669457065, "loss": 0.3222, "step": 12785 }, { "epoch": 1.792011212333567, "grad_norm": 0.48471567034721375, "learning_rate": 0.000123759866060751, "loss": 0.1472, "step": 12786 }, { "epoch": 1.7921513665031534, "grad_norm": 0.6885779500007629, "learning_rate": 0.00012374551542693136, "loss": 0.0614, "step": 12787 }, { "epoch": 1.7922915206727401, "grad_norm": 0.15752582252025604, "learning_rate": 0.0001237311647931117, "loss": 0.022, "step": 12788 }, { "epoch": 1.7924316748423266, "grad_norm": 0.27776530385017395, "learning_rate": 0.00012371681415929202, "loss": 0.0356, "step": 12789 }, { "epoch": 1.792571829011913, "grad_norm": 0.34364476799964905, "learning_rate": 0.00012370246352547235, "loss": 0.0325, "step": 12790 }, { "epoch": 1.7927119831814995, "grad_norm": 0.13597267866134644, "learning_rate": 0.0001236881128916527, "loss": 0.0334, "step": 12791 }, { "epoch": 1.7928521373510862, "grad_norm": 0.27088266611099243, "learning_rate": 0.00012367376225783306, "loss": 0.0204, "step": 12792 }, { "epoch": 1.7929922915206729, "grad_norm": 0.14142204821109772, "learning_rate": 0.0001236594116240134, "loss": 0.0406, "step": 12793 }, { "epoch": 1.7931324456902593, "grad_norm": 0.355624258518219, "learning_rate": 0.00012364506099019372, "loss": 0.0506, "step": 12794 }, { "epoch": 1.7932725998598458, "grad_norm": 0.2310442328453064, "learning_rate": 0.00012363071035637407, "loss": 0.0446, "step": 12795 }, { "epoch": 1.7934127540294322, "grad_norm": 0.1650094985961914, "learning_rate": 0.0001236163597225544, "loss": 0.025, "step": 12796 }, { "epoch": 1.793552908199019, "grad_norm": 0.17290274798870087, "learning_rate": 0.00012360200908873476, "loss": 0.0277, "step": 12797 }, { "epoch": 1.7936930623686056, "grad_norm": 0.18702958524227142, "learning_rate": 0.00012358765845491508, "loss": 0.0258, "step": 12798 }, { "epoch": 1.793833216538192, "grad_norm": 1.1847370862960815, "learning_rate": 0.0001235733078210954, "loss": 0.048, "step": 12799 }, { "epoch": 1.7939733707077785, "grad_norm": 0.22448472678661346, "learning_rate": 0.00012355895718727577, "loss": 0.0185, "step": 12800 }, { "epoch": 1.794113524877365, "grad_norm": 0.34777554869651794, "learning_rate": 0.0001235446065534561, "loss": 0.0607, "step": 12801 }, { "epoch": 1.7942536790469517, "grad_norm": 0.10799390822649002, "learning_rate": 0.00012353025591963643, "loss": 0.0132, "step": 12802 }, { "epoch": 1.7943938332165383, "grad_norm": 0.17135055363178253, "learning_rate": 0.00012351590528581678, "loss": 0.0442, "step": 12803 }, { "epoch": 1.7945339873861248, "grad_norm": 0.25680282711982727, "learning_rate": 0.0001235015546519971, "loss": 0.0561, "step": 12804 }, { "epoch": 1.7946741415557113, "grad_norm": 0.24472257494926453, "learning_rate": 0.00012348720401817747, "loss": 0.0595, "step": 12805 }, { "epoch": 1.7948142957252977, "grad_norm": 0.3981437683105469, "learning_rate": 0.0001234728533843578, "loss": 0.033, "step": 12806 }, { "epoch": 1.7949544498948844, "grad_norm": 0.33710384368896484, "learning_rate": 0.00012345850275053812, "loss": 0.0274, "step": 12807 }, { "epoch": 1.795094604064471, "grad_norm": 0.5404962301254272, "learning_rate": 0.00012344415211671848, "loss": 0.0724, "step": 12808 }, { "epoch": 1.7952347582340575, "grad_norm": 0.5258068442344666, "learning_rate": 0.0001234298014828988, "loss": 0.0278, "step": 12809 }, { "epoch": 1.795374912403644, "grad_norm": 0.23582759499549866, "learning_rate": 0.00012341545084907916, "loss": 0.0279, "step": 12810 }, { "epoch": 1.7955150665732305, "grad_norm": 0.3797152042388916, "learning_rate": 0.0001234011002152595, "loss": 0.0589, "step": 12811 }, { "epoch": 1.7956552207428171, "grad_norm": 0.18421849608421326, "learning_rate": 0.00012338674958143982, "loss": 0.0319, "step": 12812 }, { "epoch": 1.7957953749124036, "grad_norm": 0.16365164518356323, "learning_rate": 0.00012337239894762018, "loss": 0.0205, "step": 12813 }, { "epoch": 1.7959355290819903, "grad_norm": 0.37158793210983276, "learning_rate": 0.00012335804831380053, "loss": 0.0319, "step": 12814 }, { "epoch": 1.7960756832515767, "grad_norm": 0.46859195828437805, "learning_rate": 0.00012334369767998086, "loss": 0.0657, "step": 12815 }, { "epoch": 1.7962158374211632, "grad_norm": 0.18334749341011047, "learning_rate": 0.0001233293470461612, "loss": 0.0262, "step": 12816 }, { "epoch": 1.7963559915907499, "grad_norm": 0.5150696635246277, "learning_rate": 0.00012331499641234152, "loss": 0.0284, "step": 12817 }, { "epoch": 1.7964961457603363, "grad_norm": 0.4401821494102478, "learning_rate": 0.00012330064577852187, "loss": 0.0935, "step": 12818 }, { "epoch": 1.796636299929923, "grad_norm": 0.4382910132408142, "learning_rate": 0.00012328629514470223, "loss": 0.0607, "step": 12819 }, { "epoch": 1.7967764540995095, "grad_norm": 0.47764065861701965, "learning_rate": 0.00012327194451088256, "loss": 0.0603, "step": 12820 }, { "epoch": 1.796916608269096, "grad_norm": 0.35685819387435913, "learning_rate": 0.00012325759387706289, "loss": 0.0474, "step": 12821 }, { "epoch": 1.7970567624386824, "grad_norm": 0.19071047008037567, "learning_rate": 0.00012324324324324324, "loss": 0.026, "step": 12822 }, { "epoch": 1.797196916608269, "grad_norm": 0.15680713951587677, "learning_rate": 0.00012322889260942357, "loss": 0.0159, "step": 12823 }, { "epoch": 1.7973370707778558, "grad_norm": 0.4171592891216278, "learning_rate": 0.00012321454197560393, "loss": 0.0324, "step": 12824 }, { "epoch": 1.7974772249474422, "grad_norm": 0.3295215964317322, "learning_rate": 0.00012320019134178425, "loss": 0.0288, "step": 12825 }, { "epoch": 1.7976173791170287, "grad_norm": 0.5781238675117493, "learning_rate": 0.00012318584070796458, "loss": 0.0533, "step": 12826 }, { "epoch": 1.7977575332866151, "grad_norm": 0.13872668147087097, "learning_rate": 0.00012317149007414494, "loss": 0.0139, "step": 12827 }, { "epoch": 1.7978976874562018, "grad_norm": 0.7919232249259949, "learning_rate": 0.00012315713944032527, "loss": 0.0213, "step": 12828 }, { "epoch": 1.7980378416257885, "grad_norm": 0.29356151819229126, "learning_rate": 0.00012314278880650562, "loss": 0.0295, "step": 12829 }, { "epoch": 1.798177995795375, "grad_norm": 0.5192530751228333, "learning_rate": 0.00012312843817268595, "loss": 0.0744, "step": 12830 }, { "epoch": 1.7983181499649614, "grad_norm": 0.4007788300514221, "learning_rate": 0.00012311408753886628, "loss": 0.0138, "step": 12831 }, { "epoch": 1.7984583041345479, "grad_norm": 0.6972614526748657, "learning_rate": 0.00012309973690504664, "loss": 0.1492, "step": 12832 }, { "epoch": 1.7985984583041346, "grad_norm": 1.7300293445587158, "learning_rate": 0.00012308538627122696, "loss": 0.2528, "step": 12833 }, { "epoch": 1.7987386124737212, "grad_norm": 1.1460438966751099, "learning_rate": 0.0001230710356374073, "loss": 0.082, "step": 12834 }, { "epoch": 1.7988787666433077, "grad_norm": 1.8754453659057617, "learning_rate": 0.00012305668500358765, "loss": 0.0555, "step": 12835 }, { "epoch": 1.7990189208128942, "grad_norm": 0.2733992636203766, "learning_rate": 0.00012304233436976798, "loss": 0.0723, "step": 12836 }, { "epoch": 1.7991590749824806, "grad_norm": 0.18559584021568298, "learning_rate": 0.00012302798373594833, "loss": 0.0822, "step": 12837 }, { "epoch": 1.7992992291520673, "grad_norm": 0.34309783577919006, "learning_rate": 0.00012301363310212866, "loss": 0.0942, "step": 12838 }, { "epoch": 1.799439383321654, "grad_norm": 0.200308695435524, "learning_rate": 0.000122999282468309, "loss": 0.0212, "step": 12839 }, { "epoch": 1.7995795374912404, "grad_norm": 0.6797464489936829, "learning_rate": 0.00012298493183448935, "loss": 0.0895, "step": 12840 }, { "epoch": 1.799719691660827, "grad_norm": 0.3852284550666809, "learning_rate": 0.0001229705812006697, "loss": 0.0917, "step": 12841 }, { "epoch": 1.7998598458304134, "grad_norm": 0.5513192415237427, "learning_rate": 0.00012295623056685003, "loss": 0.0811, "step": 12842 }, { "epoch": 1.8, "grad_norm": 0.6143665909767151, "learning_rate": 0.00012294187993303036, "loss": 0.0854, "step": 12843 }, { "epoch": 1.8001401541695865, "grad_norm": 0.296474426984787, "learning_rate": 0.00012292752929921069, "loss": 0.0443, "step": 12844 }, { "epoch": 1.8002803083391732, "grad_norm": 0.5858607292175293, "learning_rate": 0.00012291317866539104, "loss": 0.0267, "step": 12845 }, { "epoch": 1.8004204625087596, "grad_norm": 0.50831538438797, "learning_rate": 0.0001228988280315714, "loss": 0.0776, "step": 12846 }, { "epoch": 1.800560616678346, "grad_norm": 0.2200281172990799, "learning_rate": 0.00012288447739775173, "loss": 0.0608, "step": 12847 }, { "epoch": 1.8007007708479326, "grad_norm": 0.4607313275337219, "learning_rate": 0.00012287012676393205, "loss": 0.0823, "step": 12848 }, { "epoch": 1.8008409250175192, "grad_norm": 0.4934716820716858, "learning_rate": 0.0001228557761301124, "loss": 0.0548, "step": 12849 }, { "epoch": 1.800981079187106, "grad_norm": 0.4305703341960907, "learning_rate": 0.00012284142549629274, "loss": 0.0893, "step": 12850 }, { "epoch": 1.8011212333566924, "grad_norm": 0.2147679477930069, "learning_rate": 0.0001228270748624731, "loss": 0.0237, "step": 12851 }, { "epoch": 1.8012613875262788, "grad_norm": 0.19156956672668457, "learning_rate": 0.00012281272422865342, "loss": 0.046, "step": 12852 }, { "epoch": 1.8014015416958653, "grad_norm": 0.2585931122303009, "learning_rate": 0.00012279837359483375, "loss": 0.049, "step": 12853 }, { "epoch": 1.801541695865452, "grad_norm": 0.24206191301345825, "learning_rate": 0.0001227840229610141, "loss": 0.029, "step": 12854 }, { "epoch": 1.8016818500350387, "grad_norm": 0.6593036651611328, "learning_rate": 0.00012276967232719444, "loss": 0.067, "step": 12855 }, { "epoch": 1.8018220042046251, "grad_norm": 0.17387881875038147, "learning_rate": 0.0001227553216933748, "loss": 0.0155, "step": 12856 }, { "epoch": 1.8019621583742116, "grad_norm": 0.20001249015331268, "learning_rate": 0.00012274097105955512, "loss": 0.0456, "step": 12857 }, { "epoch": 1.802102312543798, "grad_norm": 0.1928340494632721, "learning_rate": 0.00012272662042573545, "loss": 0.0272, "step": 12858 }, { "epoch": 1.8022424667133847, "grad_norm": 0.2522693872451782, "learning_rate": 0.0001227122697919158, "loss": 0.025, "step": 12859 }, { "epoch": 1.8023826208829714, "grad_norm": 0.24919652938842773, "learning_rate": 0.00012269791915809613, "loss": 0.058, "step": 12860 }, { "epoch": 1.8025227750525579, "grad_norm": 0.4658038914203644, "learning_rate": 0.0001226835685242765, "loss": 0.1087, "step": 12861 }, { "epoch": 1.8026629292221443, "grad_norm": 0.16299644112586975, "learning_rate": 0.00012266921789045682, "loss": 0.0226, "step": 12862 }, { "epoch": 1.8028030833917308, "grad_norm": 0.19520533084869385, "learning_rate": 0.00012265486725663715, "loss": 0.0299, "step": 12863 }, { "epoch": 1.8029432375613175, "grad_norm": 0.27884581685066223, "learning_rate": 0.0001226405166228175, "loss": 0.0269, "step": 12864 }, { "epoch": 1.8030833917309042, "grad_norm": 0.5366170406341553, "learning_rate": 0.00012262616598899783, "loss": 0.0769, "step": 12865 }, { "epoch": 1.8032235459004906, "grad_norm": 0.8271068334579468, "learning_rate": 0.00012261181535517819, "loss": 0.1031, "step": 12866 }, { "epoch": 1.803363700070077, "grad_norm": 0.609592616558075, "learning_rate": 0.00012259746472135851, "loss": 0.0556, "step": 12867 }, { "epoch": 1.8035038542396635, "grad_norm": 0.3008006811141968, "learning_rate": 0.00012258311408753884, "loss": 0.0526, "step": 12868 }, { "epoch": 1.8036440084092502, "grad_norm": 0.30220529437065125, "learning_rate": 0.0001225687634537192, "loss": 0.0502, "step": 12869 }, { "epoch": 1.8037841625788367, "grad_norm": 0.22283318638801575, "learning_rate": 0.00012255441281989953, "loss": 0.0398, "step": 12870 }, { "epoch": 1.8039243167484234, "grad_norm": 0.21892574429512024, "learning_rate": 0.00012254006218607986, "loss": 0.0234, "step": 12871 }, { "epoch": 1.8040644709180098, "grad_norm": 0.17471952736377716, "learning_rate": 0.0001225257115522602, "loss": 0.0211, "step": 12872 }, { "epoch": 1.8042046250875963, "grad_norm": 0.32636162638664246, "learning_rate": 0.00012251136091844057, "loss": 0.087, "step": 12873 }, { "epoch": 1.804344779257183, "grad_norm": 0.29673856496810913, "learning_rate": 0.0001224970102846209, "loss": 0.0174, "step": 12874 }, { "epoch": 1.8044849334267694, "grad_norm": 0.34801968932151794, "learning_rate": 0.00012248265965080122, "loss": 0.0722, "step": 12875 }, { "epoch": 1.804625087596356, "grad_norm": 0.2269166111946106, "learning_rate": 0.00012246830901698158, "loss": 0.0395, "step": 12876 }, { "epoch": 1.8047652417659426, "grad_norm": 0.4518377184867859, "learning_rate": 0.0001224539583831619, "loss": 0.043, "step": 12877 }, { "epoch": 1.804905395935529, "grad_norm": 0.6025972962379456, "learning_rate": 0.00012243960774934226, "loss": 0.0827, "step": 12878 }, { "epoch": 1.8050455501051155, "grad_norm": 0.27791860699653625, "learning_rate": 0.0001224252571155226, "loss": 0.0517, "step": 12879 }, { "epoch": 1.8051857042747022, "grad_norm": 0.6230483651161194, "learning_rate": 0.00012241090648170292, "loss": 0.0594, "step": 12880 }, { "epoch": 1.8053258584442888, "grad_norm": 0.3419473469257355, "learning_rate": 0.00012239655584788328, "loss": 0.0554, "step": 12881 }, { "epoch": 1.8054660126138753, "grad_norm": 1.1800894737243652, "learning_rate": 0.0001223822052140636, "loss": 0.0809, "step": 12882 }, { "epoch": 1.8056061667834618, "grad_norm": 1.495298147201538, "learning_rate": 0.00012236785458024396, "loss": 0.155, "step": 12883 }, { "epoch": 1.8057463209530482, "grad_norm": 1.0437233448028564, "learning_rate": 0.0001223535039464243, "loss": 0.1134, "step": 12884 }, { "epoch": 1.805886475122635, "grad_norm": 0.05267738923430443, "learning_rate": 0.00012233915331260462, "loss": 0.0039, "step": 12885 }, { "epoch": 1.8060266292922216, "grad_norm": 0.18792273104190826, "learning_rate": 0.00012232480267878497, "loss": 0.021, "step": 12886 }, { "epoch": 1.806166783461808, "grad_norm": 0.30840983986854553, "learning_rate": 0.0001223104520449653, "loss": 0.0551, "step": 12887 }, { "epoch": 1.8063069376313945, "grad_norm": 0.158649742603302, "learning_rate": 0.00012229610141114566, "loss": 0.0346, "step": 12888 }, { "epoch": 1.806447091800981, "grad_norm": 0.2195279598236084, "learning_rate": 0.000122281750777326, "loss": 0.0431, "step": 12889 }, { "epoch": 1.8065872459705676, "grad_norm": 0.18091900646686554, "learning_rate": 0.00012226740014350632, "loss": 0.0185, "step": 12890 }, { "epoch": 1.8067274001401543, "grad_norm": 0.17235256731510162, "learning_rate": 0.00012225304950968667, "loss": 0.0085, "step": 12891 }, { "epoch": 1.8068675543097408, "grad_norm": 0.4248929023742676, "learning_rate": 0.00012223869887586703, "loss": 0.0924, "step": 12892 }, { "epoch": 1.8070077084793272, "grad_norm": 0.1359269618988037, "learning_rate": 0.00012222434824204736, "loss": 0.0328, "step": 12893 }, { "epoch": 1.8071478626489137, "grad_norm": 0.3212231993675232, "learning_rate": 0.00012220999760822768, "loss": 0.0783, "step": 12894 }, { "epoch": 1.8072880168185004, "grad_norm": 0.38217827677726746, "learning_rate": 0.000122195646974408, "loss": 0.1037, "step": 12895 }, { "epoch": 1.807428170988087, "grad_norm": 0.2574213147163391, "learning_rate": 0.00012218129634058837, "loss": 0.0424, "step": 12896 }, { "epoch": 1.8075683251576735, "grad_norm": 1.117431879043579, "learning_rate": 0.0001221669457067687, "loss": 0.0647, "step": 12897 }, { "epoch": 1.80770847932726, "grad_norm": 0.14649498462677002, "learning_rate": 0.00012215259507294905, "loss": 0.0179, "step": 12898 }, { "epoch": 1.8078486334968464, "grad_norm": 0.48996809124946594, "learning_rate": 0.00012213824443912938, "loss": 0.0621, "step": 12899 }, { "epoch": 1.8079887876664331, "grad_norm": 0.19080258905887604, "learning_rate": 0.00012212389380530974, "loss": 0.0466, "step": 12900 }, { "epoch": 1.8081289418360196, "grad_norm": 0.18082423508167267, "learning_rate": 0.00012210954317149006, "loss": 0.0171, "step": 12901 }, { "epoch": 1.8082690960056063, "grad_norm": 0.1817387491464615, "learning_rate": 0.0001220951925376704, "loss": 0.0219, "step": 12902 }, { "epoch": 1.8084092501751927, "grad_norm": 0.31144678592681885, "learning_rate": 0.00012208084190385072, "loss": 0.0164, "step": 12903 }, { "epoch": 1.8085494043447792, "grad_norm": 0.22523410618305206, "learning_rate": 0.00012206649127003108, "loss": 0.012, "step": 12904 }, { "epoch": 1.8086895585143656, "grad_norm": 0.3270754814147949, "learning_rate": 0.00012205214063621142, "loss": 0.1076, "step": 12905 }, { "epoch": 1.8088297126839523, "grad_norm": 0.3382010757923126, "learning_rate": 0.00012203779000239176, "loss": 0.0256, "step": 12906 }, { "epoch": 1.808969866853539, "grad_norm": 1.599377155303955, "learning_rate": 0.0001220234393685721, "loss": 0.1008, "step": 12907 }, { "epoch": 1.8091100210231255, "grad_norm": 0.4510815739631653, "learning_rate": 0.00012200908873475245, "loss": 0.0194, "step": 12908 }, { "epoch": 1.809250175192712, "grad_norm": 0.45227116346359253, "learning_rate": 0.00012199473810093277, "loss": 0.0672, "step": 12909 }, { "epoch": 1.8093903293622984, "grad_norm": 0.13850203156471252, "learning_rate": 0.00012198038746711312, "loss": 0.0136, "step": 12910 }, { "epoch": 1.809530483531885, "grad_norm": 0.12272889167070389, "learning_rate": 0.00012196603683329347, "loss": 0.0134, "step": 12911 }, { "epoch": 1.8096706377014717, "grad_norm": 0.2970481812953949, "learning_rate": 0.0001219516861994738, "loss": 0.0347, "step": 12912 }, { "epoch": 1.8098107918710582, "grad_norm": 0.1268194317817688, "learning_rate": 0.00012193733556565414, "loss": 0.0148, "step": 12913 }, { "epoch": 1.8099509460406447, "grad_norm": 0.3441358208656311, "learning_rate": 0.00012192298493183447, "loss": 0.0932, "step": 12914 }, { "epoch": 1.8100911002102311, "grad_norm": 0.3065379559993744, "learning_rate": 0.00012190863429801481, "loss": 0.0302, "step": 12915 }, { "epoch": 1.8102312543798178, "grad_norm": 0.4870491027832031, "learning_rate": 0.00012189428366419517, "loss": 0.0886, "step": 12916 }, { "epoch": 1.8103714085494045, "grad_norm": 0.2083703726530075, "learning_rate": 0.0001218799330303755, "loss": 0.0246, "step": 12917 }, { "epoch": 1.810511562718991, "grad_norm": 0.2707286775112152, "learning_rate": 0.00012186558239655584, "loss": 0.0481, "step": 12918 }, { "epoch": 1.8106517168885774, "grad_norm": 0.5525835156440735, "learning_rate": 0.00012185123176273618, "loss": 0.0246, "step": 12919 }, { "epoch": 1.8107918710581639, "grad_norm": 0.19326196610927582, "learning_rate": 0.00012183688112891651, "loss": 0.0569, "step": 12920 }, { "epoch": 1.8109320252277505, "grad_norm": 0.4657191336154938, "learning_rate": 0.00012182253049509685, "loss": 0.0782, "step": 12921 }, { "epoch": 1.8110721793973372, "grad_norm": 0.07895074784755707, "learning_rate": 0.0001218081798612772, "loss": 0.0203, "step": 12922 }, { "epoch": 1.8112123335669237, "grad_norm": 0.4483071267604828, "learning_rate": 0.00012179382922745754, "loss": 0.0315, "step": 12923 }, { "epoch": 1.8113524877365101, "grad_norm": 0.4019063711166382, "learning_rate": 0.00012177947859363788, "loss": 0.0706, "step": 12924 }, { "epoch": 1.8114926419060966, "grad_norm": 0.314506471157074, "learning_rate": 0.00012176512795981821, "loss": 0.0625, "step": 12925 }, { "epoch": 1.8116327960756833, "grad_norm": 0.6163290143013, "learning_rate": 0.00012175077732599855, "loss": 0.1359, "step": 12926 }, { "epoch": 1.8117729502452697, "grad_norm": 1.1162049770355225, "learning_rate": 0.0001217364266921789, "loss": 0.1204, "step": 12927 }, { "epoch": 1.8119131044148564, "grad_norm": 0.13889525830745697, "learning_rate": 0.00012172207605835923, "loss": 0.0369, "step": 12928 }, { "epoch": 1.8120532585844429, "grad_norm": 0.5765511989593506, "learning_rate": 0.00012170772542453958, "loss": 0.0563, "step": 12929 }, { "epoch": 1.8121934127540293, "grad_norm": 0.7851291298866272, "learning_rate": 0.0001216933747907199, "loss": 0.0659, "step": 12930 }, { "epoch": 1.812333566923616, "grad_norm": 0.20594029128551483, "learning_rate": 0.00012167902415690025, "loss": 0.0333, "step": 12931 }, { "epoch": 1.8124737210932025, "grad_norm": 0.5577102303504944, "learning_rate": 0.0001216646735230806, "loss": 0.0596, "step": 12932 }, { "epoch": 1.8126138752627892, "grad_norm": 0.18293729424476624, "learning_rate": 0.00012165032288926093, "loss": 0.0154, "step": 12933 }, { "epoch": 1.8127540294323756, "grad_norm": 4.890850067138672, "learning_rate": 0.00012163597225544127, "loss": 0.145, "step": 12934 }, { "epoch": 1.812894183601962, "grad_norm": 0.7400285005569458, "learning_rate": 0.00012162162162162162, "loss": 0.0453, "step": 12935 }, { "epoch": 1.8130343377715485, "grad_norm": 0.2621132731437683, "learning_rate": 0.00012160727098780194, "loss": 0.016, "step": 12936 }, { "epoch": 1.8131744919411352, "grad_norm": 0.39345619082450867, "learning_rate": 0.00012159292035398229, "loss": 0.0355, "step": 12937 }, { "epoch": 1.813314646110722, "grad_norm": 0.14441049098968506, "learning_rate": 0.00012157856972016263, "loss": 0.0184, "step": 12938 }, { "epoch": 1.8134548002803084, "grad_norm": 0.15110982954502106, "learning_rate": 0.00012156421908634297, "loss": 0.0097, "step": 12939 }, { "epoch": 1.8135949544498948, "grad_norm": 0.33224159479141235, "learning_rate": 0.00012154986845252331, "loss": 0.0579, "step": 12940 }, { "epoch": 1.8137351086194813, "grad_norm": 0.2408864051103592, "learning_rate": 0.00012153551781870364, "loss": 0.0449, "step": 12941 }, { "epoch": 1.813875262789068, "grad_norm": 0.4061248302459717, "learning_rate": 0.00012152116718488398, "loss": 0.0769, "step": 12942 }, { "epoch": 1.8140154169586546, "grad_norm": 0.270393967628479, "learning_rate": 0.00012150681655106434, "loss": 0.0347, "step": 12943 }, { "epoch": 1.814155571128241, "grad_norm": 0.20464079082012177, "learning_rate": 0.00012149246591724467, "loss": 0.0385, "step": 12944 }, { "epoch": 1.8142957252978276, "grad_norm": 0.39568743109703064, "learning_rate": 0.00012147811528342501, "loss": 0.066, "step": 12945 }, { "epoch": 1.814435879467414, "grad_norm": 0.2845861613750458, "learning_rate": 0.00012146376464960535, "loss": 0.0206, "step": 12946 }, { "epoch": 1.8145760336370007, "grad_norm": 0.39516395330429077, "learning_rate": 0.00012144941401578568, "loss": 0.0989, "step": 12947 }, { "epoch": 1.8147161878065874, "grad_norm": 0.31977009773254395, "learning_rate": 0.00012143506338196604, "loss": 0.0327, "step": 12948 }, { "epoch": 1.8148563419761738, "grad_norm": 0.20780211687088013, "learning_rate": 0.00012142071274814636, "loss": 0.0263, "step": 12949 }, { "epoch": 1.8149964961457603, "grad_norm": 1.5113365650177002, "learning_rate": 0.0001214063621143267, "loss": 0.0165, "step": 12950 }, { "epoch": 1.8151366503153468, "grad_norm": 0.15647585690021515, "learning_rate": 0.00012139201148050705, "loss": 0.0255, "step": 12951 }, { "epoch": 1.8152768044849334, "grad_norm": 0.1031777411699295, "learning_rate": 0.00012137766084668738, "loss": 0.0304, "step": 12952 }, { "epoch": 1.8154169586545201, "grad_norm": 0.5230974555015564, "learning_rate": 0.00012136331021286772, "loss": 0.1189, "step": 12953 }, { "epoch": 1.8155571128241066, "grad_norm": 0.34056246280670166, "learning_rate": 0.00012134895957904807, "loss": 0.0957, "step": 12954 }, { "epoch": 1.815697266993693, "grad_norm": 0.2997777462005615, "learning_rate": 0.0001213346089452284, "loss": 0.044, "step": 12955 }, { "epoch": 1.8158374211632795, "grad_norm": 0.15355022251605988, "learning_rate": 0.00012132025831140875, "loss": 0.0153, "step": 12956 }, { "epoch": 1.8159775753328662, "grad_norm": 0.3450574576854706, "learning_rate": 0.00012130590767758907, "loss": 0.0336, "step": 12957 }, { "epoch": 1.8161177295024526, "grad_norm": 0.10439668595790863, "learning_rate": 0.00012129155704376942, "loss": 0.0146, "step": 12958 }, { "epoch": 1.8162578836720393, "grad_norm": 0.2745637893676758, "learning_rate": 0.00012127720640994977, "loss": 0.0187, "step": 12959 }, { "epoch": 1.8163980378416258, "grad_norm": 0.21283739805221558, "learning_rate": 0.0001212628557761301, "loss": 0.0284, "step": 12960 }, { "epoch": 1.8165381920112122, "grad_norm": 0.05814182385802269, "learning_rate": 0.00012124850514231044, "loss": 0.0125, "step": 12961 }, { "epoch": 1.816678346180799, "grad_norm": 0.10886926203966141, "learning_rate": 0.00012123415450849078, "loss": 0.0119, "step": 12962 }, { "epoch": 1.8168185003503854, "grad_norm": 0.06237927824258804, "learning_rate": 0.00012121980387467111, "loss": 0.0051, "step": 12963 }, { "epoch": 1.816958654519972, "grad_norm": 0.4345255196094513, "learning_rate": 0.00012120545324085147, "loss": 0.1363, "step": 12964 }, { "epoch": 1.8170988086895585, "grad_norm": 0.2464209944009781, "learning_rate": 0.0001211911026070318, "loss": 0.0291, "step": 12965 }, { "epoch": 1.817238962859145, "grad_norm": 0.05634305626153946, "learning_rate": 0.00012117675197321214, "loss": 0.0044, "step": 12966 }, { "epoch": 1.8173791170287315, "grad_norm": 0.41657713055610657, "learning_rate": 0.00012116240133939248, "loss": 0.0844, "step": 12967 }, { "epoch": 1.8175192711983181, "grad_norm": 0.20880872011184692, "learning_rate": 0.00012114805070557281, "loss": 0.0271, "step": 12968 }, { "epoch": 1.8176594253679048, "grad_norm": 0.20800060033798218, "learning_rate": 0.00012113370007175317, "loss": 0.0341, "step": 12969 }, { "epoch": 1.8177995795374913, "grad_norm": 0.6321690082550049, "learning_rate": 0.00012111934943793351, "loss": 0.1202, "step": 12970 }, { "epoch": 1.8179397337070777, "grad_norm": 0.3648313581943512, "learning_rate": 0.00012110499880411384, "loss": 0.0777, "step": 12971 }, { "epoch": 1.8180798878766642, "grad_norm": 0.1709165722131729, "learning_rate": 0.00012109064817029418, "loss": 0.0421, "step": 12972 }, { "epoch": 1.8182200420462509, "grad_norm": 0.4314466118812561, "learning_rate": 0.00012107629753647451, "loss": 0.0814, "step": 12973 }, { "epoch": 1.8183601962158376, "grad_norm": 0.3605118989944458, "learning_rate": 0.00012106194690265485, "loss": 0.038, "step": 12974 }, { "epoch": 1.818500350385424, "grad_norm": 0.19191895425319672, "learning_rate": 0.0001210475962688352, "loss": 0.0215, "step": 12975 }, { "epoch": 1.8186405045550105, "grad_norm": 0.3105887770652771, "learning_rate": 0.00012103324563501553, "loss": 0.056, "step": 12976 }, { "epoch": 1.818780658724597, "grad_norm": 0.7323061227798462, "learning_rate": 0.00012101889500119588, "loss": 0.0867, "step": 12977 }, { "epoch": 1.8189208128941836, "grad_norm": 0.38156676292419434, "learning_rate": 0.00012100454436737622, "loss": 0.0299, "step": 12978 }, { "epoch": 1.8190609670637703, "grad_norm": 1.1498442888259888, "learning_rate": 0.00012099019373355655, "loss": 0.1193, "step": 12979 }, { "epoch": 1.8192011212333568, "grad_norm": 0.37885749340057373, "learning_rate": 0.0001209758430997369, "loss": 0.0684, "step": 12980 }, { "epoch": 1.8193412754029432, "grad_norm": 1.0525306463241577, "learning_rate": 0.00012096149246591723, "loss": 0.0725, "step": 12981 }, { "epoch": 1.8194814295725297, "grad_norm": 0.6396930813789368, "learning_rate": 0.00012094714183209757, "loss": 0.231, "step": 12982 }, { "epoch": 1.8196215837421164, "grad_norm": 0.31932976841926575, "learning_rate": 0.00012093279119827791, "loss": 0.0281, "step": 12983 }, { "epoch": 1.819761737911703, "grad_norm": 2.0052361488342285, "learning_rate": 0.00012091844056445824, "loss": 0.1293, "step": 12984 }, { "epoch": 1.8199018920812895, "grad_norm": 0.32648447155952454, "learning_rate": 0.0001209040899306386, "loss": 0.0603, "step": 12985 }, { "epoch": 1.820042046250876, "grad_norm": 0.2711527943611145, "learning_rate": 0.00012088973929681894, "loss": 0.0393, "step": 12986 }, { "epoch": 1.8201822004204624, "grad_norm": 0.2042703926563263, "learning_rate": 0.00012087538866299927, "loss": 0.0398, "step": 12987 }, { "epoch": 1.820322354590049, "grad_norm": 0.26983964443206787, "learning_rate": 0.00012086103802917961, "loss": 0.0357, "step": 12988 }, { "epoch": 1.8204625087596356, "grad_norm": 0.7581669688224792, "learning_rate": 0.00012084668739535995, "loss": 0.0962, "step": 12989 }, { "epoch": 1.8206026629292222, "grad_norm": 0.5755071640014648, "learning_rate": 0.00012083233676154028, "loss": 0.0493, "step": 12990 }, { "epoch": 1.8207428170988087, "grad_norm": 0.15472157299518585, "learning_rate": 0.00012081798612772064, "loss": 0.0256, "step": 12991 }, { "epoch": 1.8208829712683952, "grad_norm": 0.14568139612674713, "learning_rate": 0.00012080363549390097, "loss": 0.0365, "step": 12992 }, { "epoch": 1.8210231254379816, "grad_norm": 0.43495064973831177, "learning_rate": 0.00012078928486008131, "loss": 0.045, "step": 12993 }, { "epoch": 1.8211632796075683, "grad_norm": 0.3852024972438812, "learning_rate": 0.00012077493422626165, "loss": 0.0351, "step": 12994 }, { "epoch": 1.821303433777155, "grad_norm": 0.45781028270721436, "learning_rate": 0.00012076058359244198, "loss": 0.0785, "step": 12995 }, { "epoch": 1.8214435879467414, "grad_norm": 0.12383861839771271, "learning_rate": 0.00012074623295862234, "loss": 0.0258, "step": 12996 }, { "epoch": 1.821583742116328, "grad_norm": 0.10406908392906189, "learning_rate": 0.00012073188232480268, "loss": 0.0116, "step": 12997 }, { "epoch": 1.8217238962859144, "grad_norm": 0.1576005518436432, "learning_rate": 0.000120717531690983, "loss": 0.0116, "step": 12998 }, { "epoch": 1.821864050455501, "grad_norm": 0.17741477489471436, "learning_rate": 0.00012070318105716335, "loss": 0.019, "step": 12999 }, { "epoch": 1.8220042046250877, "grad_norm": 0.15079960227012634, "learning_rate": 0.00012068883042334368, "loss": 0.0087, "step": 13000 }, { "epoch": 1.8221443587946742, "grad_norm": 0.16749387979507446, "learning_rate": 0.00012067447978952403, "loss": 0.0267, "step": 13001 }, { "epoch": 1.8222845129642606, "grad_norm": 0.24852505326271057, "learning_rate": 0.00012066012915570437, "loss": 0.0528, "step": 13002 }, { "epoch": 1.822424667133847, "grad_norm": 0.4245162010192871, "learning_rate": 0.0001206457785218847, "loss": 0.0867, "step": 13003 }, { "epoch": 1.8225648213034338, "grad_norm": 0.1511314958333969, "learning_rate": 0.00012063142788806504, "loss": 0.0185, "step": 13004 }, { "epoch": 1.8227049754730205, "grad_norm": 0.14931067824363708, "learning_rate": 0.00012061707725424539, "loss": 0.026, "step": 13005 }, { "epoch": 1.822845129642607, "grad_norm": 0.2274581342935562, "learning_rate": 0.00012060272662042572, "loss": 0.0451, "step": 13006 }, { "epoch": 1.8229852838121934, "grad_norm": 0.24408139288425446, "learning_rate": 0.00012058837598660607, "loss": 0.0454, "step": 13007 }, { "epoch": 1.8231254379817798, "grad_norm": 0.12239329516887665, "learning_rate": 0.0001205740253527864, "loss": 0.0152, "step": 13008 }, { "epoch": 1.8232655921513665, "grad_norm": 0.18465597927570343, "learning_rate": 0.00012055967471896674, "loss": 0.0391, "step": 13009 }, { "epoch": 1.8234057463209532, "grad_norm": 0.27876588702201843, "learning_rate": 0.00012054532408514708, "loss": 0.0457, "step": 13010 }, { "epoch": 1.8235459004905397, "grad_norm": 0.24993014335632324, "learning_rate": 0.00012053097345132741, "loss": 0.054, "step": 13011 }, { "epoch": 1.8236860546601261, "grad_norm": 0.4324030578136444, "learning_rate": 0.00012051662281750777, "loss": 0.0357, "step": 13012 }, { "epoch": 1.8238262088297126, "grad_norm": 0.36240458488464355, "learning_rate": 0.00012050227218368811, "loss": 0.0658, "step": 13013 }, { "epoch": 1.8239663629992993, "grad_norm": 0.1839447319507599, "learning_rate": 0.00012048792154986844, "loss": 0.0501, "step": 13014 }, { "epoch": 1.8241065171688857, "grad_norm": 0.14985217154026031, "learning_rate": 0.00012047357091604878, "loss": 0.0355, "step": 13015 }, { "epoch": 1.8242466713384724, "grad_norm": 0.10741608589887619, "learning_rate": 0.00012045922028222911, "loss": 0.0101, "step": 13016 }, { "epoch": 1.8243868255080589, "grad_norm": 0.36963754892349243, "learning_rate": 0.00012044486964840947, "loss": 0.0444, "step": 13017 }, { "epoch": 1.8245269796776453, "grad_norm": 0.14281608164310455, "learning_rate": 0.00012043051901458981, "loss": 0.0195, "step": 13018 }, { "epoch": 1.824667133847232, "grad_norm": 0.49174901843070984, "learning_rate": 0.00012041616838077014, "loss": 0.0208, "step": 13019 }, { "epoch": 1.8248072880168185, "grad_norm": 0.3527078926563263, "learning_rate": 0.00012040181774695048, "loss": 0.0724, "step": 13020 }, { "epoch": 1.8249474421864051, "grad_norm": 0.11536964774131775, "learning_rate": 0.00012038746711313082, "loss": 0.0134, "step": 13021 }, { "epoch": 1.8250875963559916, "grad_norm": 0.19288687407970428, "learning_rate": 0.00012037311647931115, "loss": 0.0367, "step": 13022 }, { "epoch": 1.825227750525578, "grad_norm": 0.5221089720726013, "learning_rate": 0.0001203587658454915, "loss": 0.0236, "step": 13023 }, { "epoch": 1.8253679046951645, "grad_norm": 0.18207323551177979, "learning_rate": 0.00012034441521167185, "loss": 0.014, "step": 13024 }, { "epoch": 1.8255080588647512, "grad_norm": 0.2753743529319763, "learning_rate": 0.00012033006457785218, "loss": 0.0546, "step": 13025 }, { "epoch": 1.8256482130343379, "grad_norm": 0.528946578502655, "learning_rate": 0.00012031571394403252, "loss": 0.0622, "step": 13026 }, { "epoch": 1.8257883672039243, "grad_norm": 0.6245681643486023, "learning_rate": 0.00012030136331021285, "loss": 0.0868, "step": 13027 }, { "epoch": 1.8259285213735108, "grad_norm": 0.24446775019168854, "learning_rate": 0.0001202870126763932, "loss": 0.021, "step": 13028 }, { "epoch": 1.8260686755430973, "grad_norm": 0.13140496611595154, "learning_rate": 0.00012027266204257354, "loss": 0.0085, "step": 13029 }, { "epoch": 1.826208829712684, "grad_norm": 0.3186090588569641, "learning_rate": 0.00012025831140875387, "loss": 0.0363, "step": 13030 }, { "epoch": 1.8263489838822706, "grad_norm": 0.5285914540290833, "learning_rate": 0.00012024396077493421, "loss": 0.0428, "step": 13031 }, { "epoch": 1.826489138051857, "grad_norm": 0.7337438464164734, "learning_rate": 0.00012022961014111456, "loss": 0.1334, "step": 13032 }, { "epoch": 1.8266292922214435, "grad_norm": 1.9644092321395874, "learning_rate": 0.0001202152595072949, "loss": 0.3768, "step": 13033 }, { "epoch": 1.82676944639103, "grad_norm": 0.9033927917480469, "learning_rate": 0.00012020090887347524, "loss": 0.0594, "step": 13034 }, { "epoch": 1.8269096005606167, "grad_norm": 0.8508265018463135, "learning_rate": 0.00012018655823965557, "loss": 0.0897, "step": 13035 }, { "epoch": 1.8270497547302034, "grad_norm": 0.3438679277896881, "learning_rate": 0.00012017220760583591, "loss": 0.0452, "step": 13036 }, { "epoch": 1.8271899088997898, "grad_norm": 0.1351635456085205, "learning_rate": 0.00012015785697201625, "loss": 0.0303, "step": 13037 }, { "epoch": 1.8273300630693763, "grad_norm": 0.16365383565425873, "learning_rate": 0.00012014350633819658, "loss": 0.0331, "step": 13038 }, { "epoch": 1.8274702172389627, "grad_norm": 0.1686399281024933, "learning_rate": 0.00012012915570437694, "loss": 0.0231, "step": 13039 }, { "epoch": 1.8276103714085494, "grad_norm": 0.3161714971065521, "learning_rate": 0.00012011480507055728, "loss": 0.0602, "step": 13040 }, { "epoch": 1.827750525578136, "grad_norm": 0.23480254411697388, "learning_rate": 0.00012010045443673761, "loss": 0.0287, "step": 13041 }, { "epoch": 1.8278906797477226, "grad_norm": 0.3092454969882965, "learning_rate": 0.00012008610380291795, "loss": 0.0774, "step": 13042 }, { "epoch": 1.828030833917309, "grad_norm": 0.37827104330062866, "learning_rate": 0.00012007175316909828, "loss": 0.0558, "step": 13043 }, { "epoch": 1.8281709880868955, "grad_norm": 0.20225325226783752, "learning_rate": 0.00012005740253527863, "loss": 0.0205, "step": 13044 }, { "epoch": 1.8283111422564822, "grad_norm": 0.20479802787303925, "learning_rate": 0.00012004305190145898, "loss": 0.0885, "step": 13045 }, { "epoch": 1.8284512964260686, "grad_norm": 0.47692474722862244, "learning_rate": 0.0001200287012676393, "loss": 0.0658, "step": 13046 }, { "epoch": 1.8285914505956553, "grad_norm": 0.17206741869449615, "learning_rate": 0.00012001435063381965, "loss": 0.0229, "step": 13047 }, { "epoch": 1.8287316047652418, "grad_norm": 0.2441057413816452, "learning_rate": 0.00011999999999999999, "loss": 0.0624, "step": 13048 }, { "epoch": 1.8288717589348282, "grad_norm": 0.2826390564441681, "learning_rate": 0.00011998564936618033, "loss": 0.0657, "step": 13049 }, { "epoch": 1.829011913104415, "grad_norm": 0.4242059588432312, "learning_rate": 0.00011997129873236067, "loss": 0.076, "step": 13050 }, { "epoch": 1.8291520672740014, "grad_norm": 0.08126556873321533, "learning_rate": 0.000119956948098541, "loss": 0.0125, "step": 13051 }, { "epoch": 1.829292221443588, "grad_norm": 0.38126716017723083, "learning_rate": 0.00011994259746472134, "loss": 0.0705, "step": 13052 }, { "epoch": 1.8294323756131745, "grad_norm": 0.2914828658103943, "learning_rate": 0.00011992824683090169, "loss": 0.0447, "step": 13053 }, { "epoch": 1.829572529782761, "grad_norm": 0.2624664902687073, "learning_rate": 0.00011991389619708202, "loss": 0.0215, "step": 13054 }, { "epoch": 1.8297126839523474, "grad_norm": 0.1292945146560669, "learning_rate": 0.00011989954556326237, "loss": 0.0202, "step": 13055 }, { "epoch": 1.829852838121934, "grad_norm": 0.31137487292289734, "learning_rate": 0.00011988519492944271, "loss": 0.0487, "step": 13056 }, { "epoch": 1.8299929922915208, "grad_norm": 0.3116886019706726, "learning_rate": 0.00011987084429562304, "loss": 0.0722, "step": 13057 }, { "epoch": 1.8301331464611073, "grad_norm": 0.3327091336250305, "learning_rate": 0.00011985649366180338, "loss": 0.0785, "step": 13058 }, { "epoch": 1.8302733006306937, "grad_norm": 0.1800590604543686, "learning_rate": 0.00011984214302798374, "loss": 0.0343, "step": 13059 }, { "epoch": 1.8304134548002802, "grad_norm": 0.2878572642803192, "learning_rate": 0.00011982779239416407, "loss": 0.0333, "step": 13060 }, { "epoch": 1.8305536089698669, "grad_norm": 0.1222718209028244, "learning_rate": 0.00011981344176034441, "loss": 0.0126, "step": 13061 }, { "epoch": 1.8306937631394535, "grad_norm": 0.18908029794692993, "learning_rate": 0.00011979909112652474, "loss": 0.0199, "step": 13062 }, { "epoch": 1.83083391730904, "grad_norm": 0.41595008969306946, "learning_rate": 0.00011978474049270508, "loss": 0.0354, "step": 13063 }, { "epoch": 1.8309740714786265, "grad_norm": 0.09385739266872406, "learning_rate": 0.00011977038985888542, "loss": 0.011, "step": 13064 }, { "epoch": 1.831114225648213, "grad_norm": 0.1520908623933792, "learning_rate": 0.00011975603922506576, "loss": 0.0247, "step": 13065 }, { "epoch": 1.8312543798177996, "grad_norm": 0.34486567974090576, "learning_rate": 0.00011974168859124611, "loss": 0.0363, "step": 13066 }, { "epoch": 1.8313945339873863, "grad_norm": 0.08466723561286926, "learning_rate": 0.00011972733795742645, "loss": 0.0061, "step": 13067 }, { "epoch": 1.8315346881569727, "grad_norm": 0.2776062488555908, "learning_rate": 0.00011971298732360678, "loss": 0.0531, "step": 13068 }, { "epoch": 1.8316748423265592, "grad_norm": 0.3212437927722931, "learning_rate": 0.00011969863668978712, "loss": 0.0496, "step": 13069 }, { "epoch": 1.8318149964961457, "grad_norm": 0.835310161113739, "learning_rate": 0.00011968428605596745, "loss": 0.0871, "step": 13070 }, { "epoch": 1.8319551506657323, "grad_norm": 0.6809616088867188, "learning_rate": 0.0001196699354221478, "loss": 0.0445, "step": 13071 }, { "epoch": 1.832095304835319, "grad_norm": 0.3452107012271881, "learning_rate": 0.00011965558478832815, "loss": 0.0174, "step": 13072 }, { "epoch": 1.8322354590049055, "grad_norm": 0.32924070954322815, "learning_rate": 0.00011964123415450847, "loss": 0.0418, "step": 13073 }, { "epoch": 1.832375613174492, "grad_norm": 0.4984101951122284, "learning_rate": 0.00011962688352068882, "loss": 0.1248, "step": 13074 }, { "epoch": 1.8325157673440784, "grad_norm": 0.23525528609752655, "learning_rate": 0.00011961253288686917, "loss": 0.0481, "step": 13075 }, { "epoch": 1.832655921513665, "grad_norm": 0.05106350779533386, "learning_rate": 0.0001195981822530495, "loss": 0.0056, "step": 13076 }, { "epoch": 1.8327960756832515, "grad_norm": 0.10208006948232651, "learning_rate": 0.00011958383161922984, "loss": 0.0093, "step": 13077 }, { "epoch": 1.8329362298528382, "grad_norm": 0.23808462917804718, "learning_rate": 0.00011956948098541017, "loss": 0.0092, "step": 13078 }, { "epoch": 1.8330763840224247, "grad_norm": 0.2991667091846466, "learning_rate": 0.00011955513035159051, "loss": 0.0297, "step": 13079 }, { "epoch": 1.8332165381920111, "grad_norm": 0.323784738779068, "learning_rate": 0.00011954077971777087, "loss": 0.0275, "step": 13080 }, { "epoch": 1.8333566923615976, "grad_norm": 0.25618141889572144, "learning_rate": 0.0001195264290839512, "loss": 0.0147, "step": 13081 }, { "epoch": 1.8334968465311843, "grad_norm": 0.8678697943687439, "learning_rate": 0.00011951207845013154, "loss": 0.0665, "step": 13082 }, { "epoch": 1.833637000700771, "grad_norm": 2.700031042098999, "learning_rate": 0.00011949772781631188, "loss": 0.313, "step": 13083 }, { "epoch": 1.8337771548703574, "grad_norm": 3.0952396392822266, "learning_rate": 0.00011948337718249221, "loss": 0.2181, "step": 13084 }, { "epoch": 1.8339173090399439, "grad_norm": 0.32221120595932007, "learning_rate": 0.00011946902654867255, "loss": 0.0144, "step": 13085 }, { "epoch": 1.8340574632095303, "grad_norm": 0.14368171989917755, "learning_rate": 0.00011945467591485288, "loss": 0.0122, "step": 13086 }, { "epoch": 1.834197617379117, "grad_norm": 0.4254034161567688, "learning_rate": 0.00011944032528103324, "loss": 0.0475, "step": 13087 }, { "epoch": 1.8343377715487037, "grad_norm": 0.37998393177986145, "learning_rate": 0.00011942597464721358, "loss": 0.0426, "step": 13088 }, { "epoch": 1.8344779257182902, "grad_norm": 0.3881828486919403, "learning_rate": 0.00011941162401339391, "loss": 0.0797, "step": 13089 }, { "epoch": 1.8346180798878766, "grad_norm": 0.12122989445924759, "learning_rate": 0.00011939727337957425, "loss": 0.0085, "step": 13090 }, { "epoch": 1.834758234057463, "grad_norm": 0.2777736485004425, "learning_rate": 0.0001193829227457546, "loss": 0.0341, "step": 13091 }, { "epoch": 1.8348983882270498, "grad_norm": 0.31072843074798584, "learning_rate": 0.00011936857211193493, "loss": 0.026, "step": 13092 }, { "epoch": 1.8350385423966364, "grad_norm": 0.24548600614070892, "learning_rate": 0.00011935422147811528, "loss": 0.0229, "step": 13093 }, { "epoch": 1.835178696566223, "grad_norm": 0.15506403148174286, "learning_rate": 0.00011933987084429562, "loss": 0.0331, "step": 13094 }, { "epoch": 1.8353188507358094, "grad_norm": 0.4133717715740204, "learning_rate": 0.00011932552021047595, "loss": 0.0764, "step": 13095 }, { "epoch": 1.8354590049053958, "grad_norm": 0.10784870386123657, "learning_rate": 0.0001193111695766563, "loss": 0.0293, "step": 13096 }, { "epoch": 1.8355991590749825, "grad_norm": 0.32498976588249207, "learning_rate": 0.00011929681894283663, "loss": 0.0433, "step": 13097 }, { "epoch": 1.8357393132445692, "grad_norm": 0.4950914978981018, "learning_rate": 0.00011928246830901697, "loss": 0.055, "step": 13098 }, { "epoch": 1.8358794674141556, "grad_norm": 0.34596705436706543, "learning_rate": 0.00011926811767519732, "loss": 0.0179, "step": 13099 }, { "epoch": 1.836019621583742, "grad_norm": 0.22688059508800507, "learning_rate": 0.00011925376704137764, "loss": 0.0346, "step": 13100 }, { "epoch": 1.8361597757533286, "grad_norm": 0.295215368270874, "learning_rate": 0.00011923941640755799, "loss": 0.0324, "step": 13101 }, { "epoch": 1.8362999299229152, "grad_norm": 0.17926205694675446, "learning_rate": 0.00011922506577373834, "loss": 0.0205, "step": 13102 }, { "epoch": 1.8364400840925017, "grad_norm": 0.3611327111721039, "learning_rate": 0.00011921071513991867, "loss": 0.0884, "step": 13103 }, { "epoch": 1.8365802382620884, "grad_norm": 0.47480544447898865, "learning_rate": 0.00011919636450609901, "loss": 0.2169, "step": 13104 }, { "epoch": 1.8367203924316748, "grad_norm": 0.3176404535770416, "learning_rate": 0.00011918201387227934, "loss": 0.03, "step": 13105 }, { "epoch": 1.8368605466012613, "grad_norm": 0.23722144961357117, "learning_rate": 0.00011916766323845968, "loss": 0.0496, "step": 13106 }, { "epoch": 1.837000700770848, "grad_norm": 0.395290732383728, "learning_rate": 0.00011915331260464004, "loss": 0.1526, "step": 13107 }, { "epoch": 1.8371408549404344, "grad_norm": 0.38038206100463867, "learning_rate": 0.00011913896197082037, "loss": 0.0216, "step": 13108 }, { "epoch": 1.8372810091100211, "grad_norm": 0.3218480944633484, "learning_rate": 0.00011912461133700071, "loss": 0.0707, "step": 13109 }, { "epoch": 1.8374211632796076, "grad_norm": 0.12077045440673828, "learning_rate": 0.00011911026070318105, "loss": 0.0135, "step": 13110 }, { "epoch": 1.837561317449194, "grad_norm": 0.24675005674362183, "learning_rate": 0.00011909591006936138, "loss": 0.0237, "step": 13111 }, { "epoch": 1.8377014716187805, "grad_norm": 0.4440574645996094, "learning_rate": 0.00011908155943554174, "loss": 0.0926, "step": 13112 }, { "epoch": 1.8378416257883672, "grad_norm": 0.2965434193611145, "learning_rate": 0.00011906720880172206, "loss": 0.0222, "step": 13113 }, { "epoch": 1.8379817799579539, "grad_norm": 0.34264495968818665, "learning_rate": 0.0001190528581679024, "loss": 0.0372, "step": 13114 }, { "epoch": 1.8381219341275403, "grad_norm": 0.45088109374046326, "learning_rate": 0.00011903850753408275, "loss": 0.0431, "step": 13115 }, { "epoch": 1.8382620882971268, "grad_norm": 0.1931804120540619, "learning_rate": 0.00011902415690026308, "loss": 0.0196, "step": 13116 }, { "epoch": 1.8384022424667132, "grad_norm": 0.2493153214454651, "learning_rate": 0.00011900980626644342, "loss": 0.0377, "step": 13117 }, { "epoch": 1.8385423966363, "grad_norm": 0.22799772024154663, "learning_rate": 0.00011899545563262377, "loss": 0.0368, "step": 13118 }, { "epoch": 1.8386825508058866, "grad_norm": 0.20331008732318878, "learning_rate": 0.0001189811049988041, "loss": 0.0401, "step": 13119 }, { "epoch": 1.838822704975473, "grad_norm": 0.32094836235046387, "learning_rate": 0.00011896675436498445, "loss": 0.0812, "step": 13120 }, { "epoch": 1.8389628591450595, "grad_norm": 0.4043622612953186, "learning_rate": 0.00011895240373116477, "loss": 0.0286, "step": 13121 }, { "epoch": 1.839103013314646, "grad_norm": 0.18666526675224304, "learning_rate": 0.00011893805309734512, "loss": 0.0235, "step": 13122 }, { "epoch": 1.8392431674842327, "grad_norm": 0.2039700299501419, "learning_rate": 0.00011892370246352547, "loss": 0.0478, "step": 13123 }, { "epoch": 1.8393833216538193, "grad_norm": 0.44583046436309814, "learning_rate": 0.0001189093518297058, "loss": 0.0219, "step": 13124 }, { "epoch": 1.8395234758234058, "grad_norm": 0.2930704951286316, "learning_rate": 0.00011889500119588614, "loss": 0.0748, "step": 13125 }, { "epoch": 1.8396636299929923, "grad_norm": 0.4853978455066681, "learning_rate": 0.00011888065056206648, "loss": 0.075, "step": 13126 }, { "epoch": 1.8398037841625787, "grad_norm": 0.26997554302215576, "learning_rate": 0.00011886629992824681, "loss": 0.0337, "step": 13127 }, { "epoch": 1.8399439383321654, "grad_norm": 0.20511342585086823, "learning_rate": 0.00011885194929442717, "loss": 0.02, "step": 13128 }, { "epoch": 1.840084092501752, "grad_norm": 0.39991262555122375, "learning_rate": 0.0001188375986606075, "loss": 0.073, "step": 13129 }, { "epoch": 1.8402242466713385, "grad_norm": 0.4542100131511688, "learning_rate": 0.00011882324802678784, "loss": 0.0787, "step": 13130 }, { "epoch": 1.840364400840925, "grad_norm": 0.8485069870948792, "learning_rate": 0.00011880889739296818, "loss": 0.0352, "step": 13131 }, { "epoch": 1.8405045550105115, "grad_norm": 2.933495283126831, "learning_rate": 0.00011879454675914851, "loss": 0.1002, "step": 13132 }, { "epoch": 1.8406447091800981, "grad_norm": 0.8393083214759827, "learning_rate": 0.00011878019612532885, "loss": 0.2263, "step": 13133 }, { "epoch": 1.8407848633496846, "grad_norm": 2.1371407508850098, "learning_rate": 0.00011876584549150921, "loss": 0.2124, "step": 13134 }, { "epoch": 1.8409250175192713, "grad_norm": 1.579941987991333, "learning_rate": 0.00011875149485768954, "loss": 0.2545, "step": 13135 }, { "epoch": 1.8410651716888577, "grad_norm": 0.1906174123287201, "learning_rate": 0.00011873714422386988, "loss": 0.0195, "step": 13136 }, { "epoch": 1.8412053258584442, "grad_norm": 0.24619857966899872, "learning_rate": 0.00011872279359005022, "loss": 0.08, "step": 13137 }, { "epoch": 1.8413454800280307, "grad_norm": 0.24982307851314545, "learning_rate": 0.00011870844295623055, "loss": 0.0417, "step": 13138 }, { "epoch": 1.8414856341976173, "grad_norm": 0.27078914642333984, "learning_rate": 0.0001186940923224109, "loss": 0.0725, "step": 13139 }, { "epoch": 1.841625788367204, "grad_norm": 0.08704311400651932, "learning_rate": 0.00011867974168859123, "loss": 0.0108, "step": 13140 }, { "epoch": 1.8417659425367905, "grad_norm": 0.3021286427974701, "learning_rate": 0.00011866539105477158, "loss": 0.0433, "step": 13141 }, { "epoch": 1.841906096706377, "grad_norm": 0.12301019579172134, "learning_rate": 0.00011865104042095192, "loss": 0.0335, "step": 13142 }, { "epoch": 1.8420462508759634, "grad_norm": 0.13123148679733276, "learning_rate": 0.00011863668978713225, "loss": 0.017, "step": 13143 }, { "epoch": 1.84218640504555, "grad_norm": 0.633851945400238, "learning_rate": 0.0001186223391533126, "loss": 0.0919, "step": 13144 }, { "epoch": 1.8423265592151368, "grad_norm": 0.09904909133911133, "learning_rate": 0.00011860798851949294, "loss": 0.0171, "step": 13145 }, { "epoch": 1.8424667133847232, "grad_norm": 0.3920518755912781, "learning_rate": 0.00011859363788567327, "loss": 0.0883, "step": 13146 }, { "epoch": 1.8426068675543097, "grad_norm": 0.3847196102142334, "learning_rate": 0.00011857928725185361, "loss": 0.0804, "step": 13147 }, { "epoch": 1.8427470217238962, "grad_norm": 0.2734045088291168, "learning_rate": 0.00011856493661803394, "loss": 0.0622, "step": 13148 }, { "epoch": 1.8428871758934828, "grad_norm": 0.15843774378299713, "learning_rate": 0.00011855058598421429, "loss": 0.0267, "step": 13149 }, { "epoch": 1.8430273300630695, "grad_norm": 0.18920204043388367, "learning_rate": 0.00011853623535039464, "loss": 0.0146, "step": 13150 }, { "epoch": 1.843167484232656, "grad_norm": 0.32722994685173035, "learning_rate": 0.00011852188471657497, "loss": 0.0932, "step": 13151 }, { "epoch": 1.8433076384022424, "grad_norm": 0.3440050482749939, "learning_rate": 0.00011850753408275531, "loss": 0.0602, "step": 13152 }, { "epoch": 1.843447792571829, "grad_norm": 0.25374898314476013, "learning_rate": 0.00011849318344893565, "loss": 0.0258, "step": 13153 }, { "epoch": 1.8435879467414156, "grad_norm": 0.1849697381258011, "learning_rate": 0.00011847883281511598, "loss": 0.0389, "step": 13154 }, { "epoch": 1.8437281009110023, "grad_norm": 0.1742199957370758, "learning_rate": 0.00011846448218129634, "loss": 0.0489, "step": 13155 }, { "epoch": 1.8438682550805887, "grad_norm": 0.28863275051116943, "learning_rate": 0.00011845013154747667, "loss": 0.0616, "step": 13156 }, { "epoch": 1.8440084092501752, "grad_norm": 0.22597481310367584, "learning_rate": 0.00011843578091365701, "loss": 0.0513, "step": 13157 }, { "epoch": 1.8441485634197616, "grad_norm": 0.3605732023715973, "learning_rate": 0.00011842143027983735, "loss": 0.028, "step": 13158 }, { "epoch": 1.8442887175893483, "grad_norm": 0.14195258915424347, "learning_rate": 0.00011840707964601768, "loss": 0.0091, "step": 13159 }, { "epoch": 1.8444288717589348, "grad_norm": 0.43894365429878235, "learning_rate": 0.00011839272901219804, "loss": 0.0561, "step": 13160 }, { "epoch": 1.8445690259285215, "grad_norm": 0.1146656721830368, "learning_rate": 0.00011837837837837838, "loss": 0.0167, "step": 13161 }, { "epoch": 1.844709180098108, "grad_norm": 0.2076784074306488, "learning_rate": 0.0001183640277445587, "loss": 0.0139, "step": 13162 }, { "epoch": 1.8448493342676944, "grad_norm": 0.9799286723136902, "learning_rate": 0.00011834967711073905, "loss": 0.0545, "step": 13163 }, { "epoch": 1.844989488437281, "grad_norm": 0.31785622239112854, "learning_rate": 0.00011833532647691938, "loss": 0.0349, "step": 13164 }, { "epoch": 1.8451296426068675, "grad_norm": 0.5915439128875732, "learning_rate": 0.00011832097584309972, "loss": 0.05, "step": 13165 }, { "epoch": 1.8452697967764542, "grad_norm": 0.4077807664871216, "learning_rate": 0.00011830662520928007, "loss": 0.0764, "step": 13166 }, { "epoch": 1.8454099509460407, "grad_norm": 0.388322651386261, "learning_rate": 0.0001182922745754604, "loss": 0.0525, "step": 13167 }, { "epoch": 1.8455501051156271, "grad_norm": 0.21979929506778717, "learning_rate": 0.00011827792394164074, "loss": 0.0775, "step": 13168 }, { "epoch": 1.8456902592852136, "grad_norm": 0.6163580417633057, "learning_rate": 0.00011826357330782109, "loss": 0.0745, "step": 13169 }, { "epoch": 1.8458304134548003, "grad_norm": 0.20840896666049957, "learning_rate": 0.00011824922267400142, "loss": 0.0627, "step": 13170 }, { "epoch": 1.845970567624387, "grad_norm": 0.3919309079647064, "learning_rate": 0.00011823487204018177, "loss": 0.0567, "step": 13171 }, { "epoch": 1.8461107217939734, "grad_norm": 0.28132322430610657, "learning_rate": 0.00011822052140636211, "loss": 0.0637, "step": 13172 }, { "epoch": 1.8462508759635599, "grad_norm": 0.17737574875354767, "learning_rate": 0.00011820617077254244, "loss": 0.0229, "step": 13173 }, { "epoch": 1.8463910301331463, "grad_norm": 0.41007745265960693, "learning_rate": 0.00011819182013872278, "loss": 0.0304, "step": 13174 }, { "epoch": 1.846531184302733, "grad_norm": 0.08945126086473465, "learning_rate": 0.00011817746950490311, "loss": 0.0093, "step": 13175 }, { "epoch": 1.8466713384723197, "grad_norm": 0.1429409682750702, "learning_rate": 0.00011816311887108347, "loss": 0.0187, "step": 13176 }, { "epoch": 1.8468114926419061, "grad_norm": 0.06563121825456619, "learning_rate": 0.00011814876823726381, "loss": 0.0043, "step": 13177 }, { "epoch": 1.8469516468114926, "grad_norm": 0.10587241500616074, "learning_rate": 0.00011813441760344414, "loss": 0.0162, "step": 13178 }, { "epoch": 1.847091800981079, "grad_norm": 0.6637721657752991, "learning_rate": 0.00011812006696962448, "loss": 0.1071, "step": 13179 }, { "epoch": 1.8472319551506657, "grad_norm": 0.9505621194839478, "learning_rate": 0.00011810571633580482, "loss": 0.0999, "step": 13180 }, { "epoch": 1.8473721093202524, "grad_norm": 0.7461830377578735, "learning_rate": 0.00011809136570198515, "loss": 0.1124, "step": 13181 }, { "epoch": 1.8475122634898389, "grad_norm": 0.9445149302482605, "learning_rate": 0.00011807701506816551, "loss": 0.0807, "step": 13182 }, { "epoch": 1.8476524176594253, "grad_norm": 0.1189340353012085, "learning_rate": 0.00011806266443434584, "loss": 0.0063, "step": 13183 }, { "epoch": 1.8477925718290118, "grad_norm": 0.2214917689561844, "learning_rate": 0.00011804831380052618, "loss": 0.0426, "step": 13184 }, { "epoch": 1.8479327259985985, "grad_norm": 4.704756259918213, "learning_rate": 0.00011803396316670652, "loss": 0.1249, "step": 13185 }, { "epoch": 1.8480728801681852, "grad_norm": 0.34630975127220154, "learning_rate": 0.00011801961253288685, "loss": 0.0417, "step": 13186 }, { "epoch": 1.8482130343377716, "grad_norm": 0.6147617697715759, "learning_rate": 0.0001180052618990672, "loss": 0.0617, "step": 13187 }, { "epoch": 1.848353188507358, "grad_norm": 0.16629427671432495, "learning_rate": 0.00011799091126524755, "loss": 0.0322, "step": 13188 }, { "epoch": 1.8484933426769445, "grad_norm": 0.1443949043750763, "learning_rate": 0.00011797656063142788, "loss": 0.0194, "step": 13189 }, { "epoch": 1.8486334968465312, "grad_norm": 0.3427322804927826, "learning_rate": 0.00011796220999760822, "loss": 0.0437, "step": 13190 }, { "epoch": 1.8487736510161177, "grad_norm": 0.2636445164680481, "learning_rate": 0.00011794785936378855, "loss": 0.0466, "step": 13191 }, { "epoch": 1.8489138051857044, "grad_norm": 0.4148591458797455, "learning_rate": 0.0001179335087299689, "loss": 0.0606, "step": 13192 }, { "epoch": 1.8490539593552908, "grad_norm": 0.29563140869140625, "learning_rate": 0.00011791915809614924, "loss": 0.0468, "step": 13193 }, { "epoch": 1.8491941135248773, "grad_norm": 0.18425562977790833, "learning_rate": 0.00011790480746232957, "loss": 0.0187, "step": 13194 }, { "epoch": 1.849334267694464, "grad_norm": 0.08078820258378983, "learning_rate": 0.00011789045682850991, "loss": 0.0094, "step": 13195 }, { "epoch": 1.8494744218640504, "grad_norm": 0.17210640013217926, "learning_rate": 0.00011787610619469026, "loss": 0.0098, "step": 13196 }, { "epoch": 1.849614576033637, "grad_norm": 0.5614300966262817, "learning_rate": 0.00011786175556087058, "loss": 0.0292, "step": 13197 }, { "epoch": 1.8497547302032236, "grad_norm": 0.2318727821111679, "learning_rate": 0.00011784740492705094, "loss": 0.0152, "step": 13198 }, { "epoch": 1.84989488437281, "grad_norm": 0.34381574392318726, "learning_rate": 0.00011783305429323127, "loss": 0.0361, "step": 13199 }, { "epoch": 1.8500350385423965, "grad_norm": 0.6980080008506775, "learning_rate": 0.00011781870365941161, "loss": 0.0579, "step": 13200 }, { "epoch": 1.8501751927119832, "grad_norm": 0.6064243912696838, "learning_rate": 0.00011780435302559195, "loss": 0.0627, "step": 13201 }, { "epoch": 1.8503153468815698, "grad_norm": 0.30239546298980713, "learning_rate": 0.00011779000239177228, "loss": 0.0353, "step": 13202 }, { "epoch": 1.8504555010511563, "grad_norm": 0.35314688086509705, "learning_rate": 0.00011777565175795264, "loss": 0.0942, "step": 13203 }, { "epoch": 1.8505956552207428, "grad_norm": 0.3479951024055481, "learning_rate": 0.00011776130112413298, "loss": 0.0804, "step": 13204 }, { "epoch": 1.8507358093903292, "grad_norm": 0.2140364795923233, "learning_rate": 0.00011774695049031331, "loss": 0.0383, "step": 13205 }, { "epoch": 1.850875963559916, "grad_norm": 0.284807413816452, "learning_rate": 0.00011773259985649365, "loss": 0.0318, "step": 13206 }, { "epoch": 1.8510161177295026, "grad_norm": 0.22400060296058655, "learning_rate": 0.000117718249222674, "loss": 0.0334, "step": 13207 }, { "epoch": 1.851156271899089, "grad_norm": 0.2021048367023468, "learning_rate": 0.00011770389858885433, "loss": 0.0387, "step": 13208 }, { "epoch": 1.8512964260686755, "grad_norm": 0.22780652344226837, "learning_rate": 0.00011768954795503468, "loss": 0.0232, "step": 13209 }, { "epoch": 1.851436580238262, "grad_norm": 0.34028196334838867, "learning_rate": 0.000117675197321215, "loss": 0.0531, "step": 13210 }, { "epoch": 1.8515767344078486, "grad_norm": 0.3480367362499237, "learning_rate": 0.00011766084668739535, "loss": 0.0864, "step": 13211 }, { "epoch": 1.8517168885774353, "grad_norm": 0.18290720880031586, "learning_rate": 0.00011764649605357569, "loss": 0.0512, "step": 13212 }, { "epoch": 1.8518570427470218, "grad_norm": 0.389321506023407, "learning_rate": 0.00011763214541975602, "loss": 0.0498, "step": 13213 }, { "epoch": 1.8519971969166082, "grad_norm": 0.2584618926048279, "learning_rate": 0.00011761779478593637, "loss": 0.049, "step": 13214 }, { "epoch": 1.8521373510861947, "grad_norm": 0.6915590763092041, "learning_rate": 0.00011760344415211672, "loss": 0.1029, "step": 13215 }, { "epoch": 1.8522775052557814, "grad_norm": 0.10391825437545776, "learning_rate": 0.00011758909351829704, "loss": 0.0139, "step": 13216 }, { "epoch": 1.852417659425368, "grad_norm": 0.17216764390468597, "learning_rate": 0.00011757474288447739, "loss": 0.012, "step": 13217 }, { "epoch": 1.8525578135949545, "grad_norm": 0.9722307920455933, "learning_rate": 0.00011756039225065771, "loss": 0.0632, "step": 13218 }, { "epoch": 1.852697967764541, "grad_norm": 0.8123739361763, "learning_rate": 0.00011754604161683807, "loss": 0.027, "step": 13219 }, { "epoch": 1.8528381219341274, "grad_norm": 0.21997202932834625, "learning_rate": 0.00011753169098301841, "loss": 0.0396, "step": 13220 }, { "epoch": 1.8529782761037141, "grad_norm": 0.5752045512199402, "learning_rate": 0.00011751734034919874, "loss": 0.0561, "step": 13221 }, { "epoch": 1.8531184302733006, "grad_norm": 0.2269778996706009, "learning_rate": 0.00011750298971537908, "loss": 0.076, "step": 13222 }, { "epoch": 1.8532585844428873, "grad_norm": 0.45925530791282654, "learning_rate": 0.00011748863908155944, "loss": 0.0347, "step": 13223 }, { "epoch": 1.8533987386124737, "grad_norm": 0.4658682942390442, "learning_rate": 0.00011747428844773977, "loss": 0.0206, "step": 13224 }, { "epoch": 1.8535388927820602, "grad_norm": 0.20311835408210754, "learning_rate": 0.00011745993781392011, "loss": 0.0418, "step": 13225 }, { "epoch": 1.8536790469516466, "grad_norm": 0.12614500522613525, "learning_rate": 0.00011744558718010044, "loss": 0.0072, "step": 13226 }, { "epoch": 1.8538192011212333, "grad_norm": 0.2045486569404602, "learning_rate": 0.00011743123654628078, "loss": 0.0137, "step": 13227 }, { "epoch": 1.85395935529082, "grad_norm": 0.6752736568450928, "learning_rate": 0.00011741688591246112, "loss": 0.0345, "step": 13228 }, { "epoch": 1.8540995094604065, "grad_norm": 0.6215781569480896, "learning_rate": 0.00011740253527864145, "loss": 0.0481, "step": 13229 }, { "epoch": 1.854239663629993, "grad_norm": 0.716833233833313, "learning_rate": 0.0001173881846448218, "loss": 0.2071, "step": 13230 }, { "epoch": 1.8543798177995794, "grad_norm": 1.8723562955856323, "learning_rate": 0.00011737383401100215, "loss": 0.1326, "step": 13231 }, { "epoch": 1.854519971969166, "grad_norm": 0.43276792764663696, "learning_rate": 0.00011735948337718248, "loss": 0.0206, "step": 13232 }, { "epoch": 1.8546601261387528, "grad_norm": 0.4123082756996155, "learning_rate": 0.00011734513274336282, "loss": 0.053, "step": 13233 }, { "epoch": 1.8548002803083392, "grad_norm": 0.42152661085128784, "learning_rate": 0.00011733078210954315, "loss": 0.023, "step": 13234 }, { "epoch": 1.8549404344779257, "grad_norm": 1.80049467086792, "learning_rate": 0.0001173164314757235, "loss": 0.1437, "step": 13235 }, { "epoch": 1.8550805886475121, "grad_norm": 0.161837637424469, "learning_rate": 0.00011730208084190385, "loss": 0.0256, "step": 13236 }, { "epoch": 1.8552207428170988, "grad_norm": 0.23803015053272247, "learning_rate": 0.00011728773020808417, "loss": 0.0207, "step": 13237 }, { "epoch": 1.8553608969866855, "grad_norm": 0.2239723652601242, "learning_rate": 0.00011727337957426452, "loss": 0.0276, "step": 13238 }, { "epoch": 1.855501051156272, "grad_norm": 0.376230388879776, "learning_rate": 0.00011725902894044487, "loss": 0.0379, "step": 13239 }, { "epoch": 1.8556412053258584, "grad_norm": 0.31867870688438416, "learning_rate": 0.0001172446783066252, "loss": 0.0396, "step": 13240 }, { "epoch": 1.8557813594954449, "grad_norm": 0.36151254177093506, "learning_rate": 0.00011723032767280554, "loss": 0.0346, "step": 13241 }, { "epoch": 1.8559215136650316, "grad_norm": 0.36161887645721436, "learning_rate": 0.00011721597703898589, "loss": 0.0416, "step": 13242 }, { "epoch": 1.8560616678346182, "grad_norm": 0.3505237102508545, "learning_rate": 0.00011720162640516621, "loss": 0.0314, "step": 13243 }, { "epoch": 1.8562018220042047, "grad_norm": 0.22928068041801453, "learning_rate": 0.00011718727577134656, "loss": 0.0257, "step": 13244 }, { "epoch": 1.8563419761737912, "grad_norm": 0.17889830470085144, "learning_rate": 0.00011717292513752688, "loss": 0.0284, "step": 13245 }, { "epoch": 1.8564821303433776, "grad_norm": 0.09752125293016434, "learning_rate": 0.00011715857450370724, "loss": 0.014, "step": 13246 }, { "epoch": 1.8566222845129643, "grad_norm": 0.10232482850551605, "learning_rate": 0.00011714422386988758, "loss": 0.0078, "step": 13247 }, { "epoch": 1.8567624386825508, "grad_norm": 0.43635377287864685, "learning_rate": 0.00011712987323606791, "loss": 0.0474, "step": 13248 }, { "epoch": 1.8569025928521374, "grad_norm": 1.17537522315979, "learning_rate": 0.00011711552260224825, "loss": 0.0882, "step": 13249 }, { "epoch": 1.857042747021724, "grad_norm": 0.15354828536510468, "learning_rate": 0.00011710117196842861, "loss": 0.0452, "step": 13250 }, { "epoch": 1.8571829011913104, "grad_norm": 0.4120674729347229, "learning_rate": 0.00011708682133460894, "loss": 0.0814, "step": 13251 }, { "epoch": 1.857323055360897, "grad_norm": 0.37464696168899536, "learning_rate": 0.00011707247070078928, "loss": 0.081, "step": 13252 }, { "epoch": 1.8574632095304835, "grad_norm": 0.4091629385948181, "learning_rate": 0.00011705812006696961, "loss": 0.0498, "step": 13253 }, { "epoch": 1.8576033637000702, "grad_norm": 0.23688896000385284, "learning_rate": 0.00011704376943314995, "loss": 0.0269, "step": 13254 }, { "epoch": 1.8577435178696566, "grad_norm": 0.4084051549434662, "learning_rate": 0.0001170294187993303, "loss": 0.0582, "step": 13255 }, { "epoch": 1.857883672039243, "grad_norm": 0.2864155173301697, "learning_rate": 0.00011701506816551063, "loss": 0.0415, "step": 13256 }, { "epoch": 1.8580238262088296, "grad_norm": 0.0915154367685318, "learning_rate": 0.00011700071753169098, "loss": 0.0286, "step": 13257 }, { "epoch": 1.8581639803784162, "grad_norm": 0.8650240898132324, "learning_rate": 0.00011698636689787132, "loss": 0.11, "step": 13258 }, { "epoch": 1.858304134548003, "grad_norm": 0.3484264016151428, "learning_rate": 0.00011697201626405165, "loss": 0.0299, "step": 13259 }, { "epoch": 1.8584442887175894, "grad_norm": 0.7601109147071838, "learning_rate": 0.00011695766563023199, "loss": 0.1608, "step": 13260 }, { "epoch": 1.8585844428871758, "grad_norm": 0.41416794061660767, "learning_rate": 0.00011694331499641232, "loss": 0.0376, "step": 13261 }, { "epoch": 1.8587245970567623, "grad_norm": 0.29231420159339905, "learning_rate": 0.00011692896436259267, "loss": 0.0314, "step": 13262 }, { "epoch": 1.858864751226349, "grad_norm": 0.281376451253891, "learning_rate": 0.00011691461372877302, "loss": 0.0776, "step": 13263 }, { "epoch": 1.8590049053959357, "grad_norm": 0.4767308831214905, "learning_rate": 0.00011690026309495334, "loss": 0.0943, "step": 13264 }, { "epoch": 1.8591450595655221, "grad_norm": 0.2560347020626068, "learning_rate": 0.00011688591246113369, "loss": 0.0667, "step": 13265 }, { "epoch": 1.8592852137351086, "grad_norm": 0.12228003889322281, "learning_rate": 0.00011687156182731404, "loss": 0.0162, "step": 13266 }, { "epoch": 1.859425367904695, "grad_norm": 0.48055019974708557, "learning_rate": 0.00011685721119349437, "loss": 0.0344, "step": 13267 }, { "epoch": 1.8595655220742817, "grad_norm": 0.0845029354095459, "learning_rate": 0.00011684286055967471, "loss": 0.0156, "step": 13268 }, { "epoch": 1.8597056762438684, "grad_norm": 0.1355154663324356, "learning_rate": 0.00011682850992585504, "loss": 0.0125, "step": 13269 }, { "epoch": 1.8598458304134549, "grad_norm": 0.5049238204956055, "learning_rate": 0.00011681415929203538, "loss": 0.0577, "step": 13270 }, { "epoch": 1.8599859845830413, "grad_norm": 0.7007641196250916, "learning_rate": 0.00011679980865821574, "loss": 0.1806, "step": 13271 }, { "epoch": 1.8601261387526278, "grad_norm": 0.46290716528892517, "learning_rate": 0.00011678545802439607, "loss": 0.0437, "step": 13272 }, { "epoch": 1.8602662929222145, "grad_norm": 0.249790221452713, "learning_rate": 0.00011677110739057641, "loss": 0.0219, "step": 13273 }, { "epoch": 1.8604064470918011, "grad_norm": 0.5716654658317566, "learning_rate": 0.00011675675675675675, "loss": 0.0768, "step": 13274 }, { "epoch": 1.8605466012613876, "grad_norm": 0.6225055456161499, "learning_rate": 0.00011674240612293708, "loss": 0.1216, "step": 13275 }, { "epoch": 1.860686755430974, "grad_norm": 0.5695887804031372, "learning_rate": 0.00011672805548911742, "loss": 0.1039, "step": 13276 }, { "epoch": 1.8608269096005605, "grad_norm": 0.3829694986343384, "learning_rate": 0.00011671370485529775, "loss": 0.0681, "step": 13277 }, { "epoch": 1.8609670637701472, "grad_norm": 1.835770606994629, "learning_rate": 0.0001166993542214781, "loss": 0.0846, "step": 13278 }, { "epoch": 1.8611072179397337, "grad_norm": 0.23133142292499542, "learning_rate": 0.00011668500358765845, "loss": 0.0208, "step": 13279 }, { "epoch": 1.8612473721093203, "grad_norm": 0.751875102519989, "learning_rate": 0.00011667065295383878, "loss": 0.073, "step": 13280 }, { "epoch": 1.8613875262789068, "grad_norm": 0.299029678106308, "learning_rate": 0.00011665630232001912, "loss": 0.0347, "step": 13281 }, { "epoch": 1.8615276804484933, "grad_norm": 0.642019510269165, "learning_rate": 0.00011664195168619947, "loss": 0.1066, "step": 13282 }, { "epoch": 1.86166783461808, "grad_norm": 0.9072075486183167, "learning_rate": 0.0001166276010523798, "loss": 0.03, "step": 13283 }, { "epoch": 1.8618079887876664, "grad_norm": 2.1316585540771484, "learning_rate": 0.00011661325041856015, "loss": 0.2839, "step": 13284 }, { "epoch": 1.861948142957253, "grad_norm": 0.6896312832832336, "learning_rate": 0.00011659889978474049, "loss": 0.2553, "step": 13285 }, { "epoch": 1.8620882971268395, "grad_norm": 0.23766830563545227, "learning_rate": 0.00011658454915092082, "loss": 0.0155, "step": 13286 }, { "epoch": 1.862228451296426, "grad_norm": 0.37232038378715515, "learning_rate": 0.00011657019851710117, "loss": 0.0761, "step": 13287 }, { "epoch": 1.8623686054660125, "grad_norm": 0.16449019312858582, "learning_rate": 0.0001165558478832815, "loss": 0.0179, "step": 13288 }, { "epoch": 1.8625087596355991, "grad_norm": 0.21596352756023407, "learning_rate": 0.00011654149724946184, "loss": 0.0231, "step": 13289 }, { "epoch": 1.8626489138051858, "grad_norm": 0.28764578700065613, "learning_rate": 0.00011652714661564218, "loss": 0.0565, "step": 13290 }, { "epoch": 1.8627890679747723, "grad_norm": 0.10288781672716141, "learning_rate": 0.00011651279598182251, "loss": 0.0227, "step": 13291 }, { "epoch": 1.8629292221443587, "grad_norm": 0.4076229929924011, "learning_rate": 0.00011649844534800286, "loss": 0.0459, "step": 13292 }, { "epoch": 1.8630693763139452, "grad_norm": 0.24998854100704193, "learning_rate": 0.00011648409471418321, "loss": 0.0396, "step": 13293 }, { "epoch": 1.8632095304835319, "grad_norm": 0.24248023331165314, "learning_rate": 0.00011646974408036354, "loss": 0.0404, "step": 13294 }, { "epoch": 1.8633496846531186, "grad_norm": 0.09918013215065002, "learning_rate": 0.00011645539344654388, "loss": 0.0191, "step": 13295 }, { "epoch": 1.863489838822705, "grad_norm": 0.2902102470397949, "learning_rate": 0.00011644104281272421, "loss": 0.0554, "step": 13296 }, { "epoch": 1.8636299929922915, "grad_norm": 0.26920413970947266, "learning_rate": 0.00011642669217890455, "loss": 0.0315, "step": 13297 }, { "epoch": 1.863770147161878, "grad_norm": 0.2771225869655609, "learning_rate": 0.00011641234154508491, "loss": 0.0781, "step": 13298 }, { "epoch": 1.8639103013314646, "grad_norm": 1.0889365673065186, "learning_rate": 0.00011639799091126524, "loss": 0.0228, "step": 13299 }, { "epoch": 1.8640504555010513, "grad_norm": 0.3517109453678131, "learning_rate": 0.00011638364027744558, "loss": 0.1046, "step": 13300 }, { "epoch": 1.8641906096706378, "grad_norm": 0.11087671667337418, "learning_rate": 0.00011636928964362592, "loss": 0.0099, "step": 13301 }, { "epoch": 1.8643307638402242, "grad_norm": 0.5309032201766968, "learning_rate": 0.00011635493900980625, "loss": 0.0606, "step": 13302 }, { "epoch": 1.8644709180098107, "grad_norm": 0.14224471151828766, "learning_rate": 0.0001163405883759866, "loss": 0.0225, "step": 13303 }, { "epoch": 1.8646110721793974, "grad_norm": 0.20649072527885437, "learning_rate": 0.00011632623774216693, "loss": 0.0683, "step": 13304 }, { "epoch": 1.864751226348984, "grad_norm": 0.2530348300933838, "learning_rate": 0.00011631188710834728, "loss": 0.0499, "step": 13305 }, { "epoch": 1.8648913805185705, "grad_norm": 0.4077099561691284, "learning_rate": 0.00011629753647452762, "loss": 0.0448, "step": 13306 }, { "epoch": 1.865031534688157, "grad_norm": 0.34534546732902527, "learning_rate": 0.00011628318584070795, "loss": 0.0574, "step": 13307 }, { "epoch": 1.8651716888577434, "grad_norm": 0.24342651665210724, "learning_rate": 0.00011626883520688829, "loss": 0.0349, "step": 13308 }, { "epoch": 1.86531184302733, "grad_norm": 0.16251760721206665, "learning_rate": 0.00011625448457306864, "loss": 0.0151, "step": 13309 }, { "epoch": 1.8654519971969166, "grad_norm": 0.2958175241947174, "learning_rate": 0.00011624013393924897, "loss": 0.0408, "step": 13310 }, { "epoch": 1.8655921513665032, "grad_norm": 0.17052926123142242, "learning_rate": 0.00011622578330542931, "loss": 0.0445, "step": 13311 }, { "epoch": 1.8657323055360897, "grad_norm": 0.3912261426448822, "learning_rate": 0.00011621143267160964, "loss": 0.0982, "step": 13312 }, { "epoch": 1.8658724597056762, "grad_norm": 0.4915083050727844, "learning_rate": 0.00011619708203778999, "loss": 0.0347, "step": 13313 }, { "epoch": 1.8660126138752626, "grad_norm": 0.2878972887992859, "learning_rate": 0.00011618273140397034, "loss": 0.0283, "step": 13314 }, { "epoch": 1.8661527680448493, "grad_norm": 0.37967249751091003, "learning_rate": 0.00011616838077015067, "loss": 0.0611, "step": 13315 }, { "epoch": 1.866292922214436, "grad_norm": 0.07954419404268265, "learning_rate": 0.00011615403013633101, "loss": 0.017, "step": 13316 }, { "epoch": 1.8664330763840224, "grad_norm": 0.6028661727905273, "learning_rate": 0.00011613967950251135, "loss": 0.1259, "step": 13317 }, { "epoch": 1.866573230553609, "grad_norm": 0.14035560190677643, "learning_rate": 0.00011612532886869168, "loss": 0.0152, "step": 13318 }, { "epoch": 1.8667133847231954, "grad_norm": 0.4251324236392975, "learning_rate": 0.00011611097823487204, "loss": 0.0507, "step": 13319 }, { "epoch": 1.866853538892782, "grad_norm": 0.3765099346637726, "learning_rate": 0.00011609662760105238, "loss": 0.0396, "step": 13320 }, { "epoch": 1.8669936930623687, "grad_norm": 0.12300407886505127, "learning_rate": 0.00011608227696723271, "loss": 0.0098, "step": 13321 }, { "epoch": 1.8671338472319552, "grad_norm": 0.1625182330608368, "learning_rate": 0.00011606792633341305, "loss": 0.0064, "step": 13322 }, { "epoch": 1.8672740014015416, "grad_norm": 0.18274950981140137, "learning_rate": 0.00011605357569959338, "loss": 0.0375, "step": 13323 }, { "epoch": 1.867414155571128, "grad_norm": 0.15846355259418488, "learning_rate": 0.00011603922506577372, "loss": 0.0099, "step": 13324 }, { "epoch": 1.8675543097407148, "grad_norm": 0.6078698039054871, "learning_rate": 0.00011602487443195408, "loss": 0.0851, "step": 13325 }, { "epoch": 1.8676944639103015, "grad_norm": 0.2566559314727783, "learning_rate": 0.0001160105237981344, "loss": 0.0256, "step": 13326 }, { "epoch": 1.867834618079888, "grad_norm": 0.27305838465690613, "learning_rate": 0.00011599617316431475, "loss": 0.1789, "step": 13327 }, { "epoch": 1.8679747722494744, "grad_norm": 0.5287879109382629, "learning_rate": 0.00011598182253049509, "loss": 0.0471, "step": 13328 }, { "epoch": 1.8681149264190609, "grad_norm": 0.15730054676532745, "learning_rate": 0.00011596747189667542, "loss": 0.0125, "step": 13329 }, { "epoch": 1.8682550805886475, "grad_norm": 0.17922146618366241, "learning_rate": 0.00011595312126285577, "loss": 0.0462, "step": 13330 }, { "epoch": 1.8683952347582342, "grad_norm": 0.20706181228160858, "learning_rate": 0.0001159387706290361, "loss": 0.0158, "step": 13331 }, { "epoch": 1.8685353889278207, "grad_norm": 0.19337230920791626, "learning_rate": 0.00011592441999521644, "loss": 0.0379, "step": 13332 }, { "epoch": 1.8686755430974071, "grad_norm": 0.5239232182502747, "learning_rate": 0.00011591006936139679, "loss": 0.0408, "step": 13333 }, { "epoch": 1.8688156972669936, "grad_norm": 0.5003645420074463, "learning_rate": 0.00011589571872757712, "loss": 0.1373, "step": 13334 }, { "epoch": 1.8689558514365803, "grad_norm": 5.551768779754639, "learning_rate": 0.00011588136809375747, "loss": 0.443, "step": 13335 }, { "epoch": 1.8690960056061667, "grad_norm": 0.20352433621883392, "learning_rate": 0.00011586701745993781, "loss": 0.0428, "step": 13336 }, { "epoch": 1.8692361597757534, "grad_norm": 0.720059871673584, "learning_rate": 0.00011585266682611814, "loss": 0.0797, "step": 13337 }, { "epoch": 1.8693763139453399, "grad_norm": 0.2068159431219101, "learning_rate": 0.00011583831619229848, "loss": 0.0173, "step": 13338 }, { "epoch": 1.8695164681149263, "grad_norm": 0.1925693154335022, "learning_rate": 0.00011582396555847881, "loss": 0.0415, "step": 13339 }, { "epoch": 1.869656622284513, "grad_norm": 0.1934211552143097, "learning_rate": 0.00011580961492465915, "loss": 0.0188, "step": 13340 }, { "epoch": 1.8697967764540995, "grad_norm": 0.8721495866775513, "learning_rate": 0.00011579526429083951, "loss": 0.0739, "step": 13341 }, { "epoch": 1.8699369306236862, "grad_norm": 0.24747636914253235, "learning_rate": 0.00011578091365701984, "loss": 0.0303, "step": 13342 }, { "epoch": 1.8700770847932726, "grad_norm": 0.08373694866895676, "learning_rate": 0.00011576656302320018, "loss": 0.0149, "step": 13343 }, { "epoch": 1.870217238962859, "grad_norm": 0.39172282814979553, "learning_rate": 0.00011575221238938052, "loss": 0.0743, "step": 13344 }, { "epoch": 1.8703573931324455, "grad_norm": 0.2763004004955292, "learning_rate": 0.00011573786175556085, "loss": 0.0218, "step": 13345 }, { "epoch": 1.8704975473020322, "grad_norm": 0.5017855167388916, "learning_rate": 0.00011572351112174121, "loss": 0.0292, "step": 13346 }, { "epoch": 1.870637701471619, "grad_norm": 0.6171298027038574, "learning_rate": 0.00011570916048792154, "loss": 0.0437, "step": 13347 }, { "epoch": 1.8707778556412054, "grad_norm": 0.38493019342422485, "learning_rate": 0.00011569480985410188, "loss": 0.0366, "step": 13348 }, { "epoch": 1.8709180098107918, "grad_norm": 0.2922995686531067, "learning_rate": 0.00011568045922028222, "loss": 0.0563, "step": 13349 }, { "epoch": 1.8710581639803783, "grad_norm": 0.09337473660707474, "learning_rate": 0.00011566610858646255, "loss": 0.0097, "step": 13350 }, { "epoch": 1.871198318149965, "grad_norm": 1.8487067222595215, "learning_rate": 0.0001156517579526429, "loss": 0.0553, "step": 13351 }, { "epoch": 1.8713384723195516, "grad_norm": 0.9333266615867615, "learning_rate": 0.00011563740731882325, "loss": 0.0809, "step": 13352 }, { "epoch": 1.871478626489138, "grad_norm": 0.37149691581726074, "learning_rate": 0.00011562305668500357, "loss": 0.0361, "step": 13353 }, { "epoch": 1.8716187806587246, "grad_norm": 0.4017590284347534, "learning_rate": 0.00011560870605118392, "loss": 0.0731, "step": 13354 }, { "epoch": 1.871758934828311, "grad_norm": 0.211772620677948, "learning_rate": 0.00011559435541736426, "loss": 0.0626, "step": 13355 }, { "epoch": 1.8718990889978977, "grad_norm": 0.11263607442378998, "learning_rate": 0.00011558000478354459, "loss": 0.0204, "step": 13356 }, { "epoch": 1.8720392431674844, "grad_norm": 0.16496983170509338, "learning_rate": 0.00011556565414972494, "loss": 0.0404, "step": 13357 }, { "epoch": 1.8721793973370708, "grad_norm": 0.08869792520999908, "learning_rate": 0.00011555130351590527, "loss": 0.0078, "step": 13358 }, { "epoch": 1.8723195515066573, "grad_norm": 0.22677567601203918, "learning_rate": 0.00011553695288208561, "loss": 0.047, "step": 13359 }, { "epoch": 1.8724597056762438, "grad_norm": 0.23768877983093262, "learning_rate": 0.00011552260224826596, "loss": 0.0224, "step": 13360 }, { "epoch": 1.8725998598458304, "grad_norm": 0.1602317988872528, "learning_rate": 0.00011550825161444628, "loss": 0.0225, "step": 13361 }, { "epoch": 1.8727400140154171, "grad_norm": 0.21547986567020416, "learning_rate": 0.00011549390098062664, "loss": 0.0432, "step": 13362 }, { "epoch": 1.8728801681850036, "grad_norm": 0.3699752688407898, "learning_rate": 0.00011547955034680698, "loss": 0.0313, "step": 13363 }, { "epoch": 1.87302032235459, "grad_norm": 0.35498467087745667, "learning_rate": 0.00011546519971298731, "loss": 0.0423, "step": 13364 }, { "epoch": 1.8731604765241765, "grad_norm": 0.4760517179965973, "learning_rate": 0.00011545084907916765, "loss": 0.0793, "step": 13365 }, { "epoch": 1.8733006306937632, "grad_norm": 0.12012570351362228, "learning_rate": 0.00011543649844534798, "loss": 0.019, "step": 13366 }, { "epoch": 1.8734407848633496, "grad_norm": 0.5168591141700745, "learning_rate": 0.00011542214781152834, "loss": 0.0674, "step": 13367 }, { "epoch": 1.8735809390329363, "grad_norm": 0.22395719587802887, "learning_rate": 0.00011540779717770868, "loss": 0.0678, "step": 13368 }, { "epoch": 1.8737210932025228, "grad_norm": 0.19130194187164307, "learning_rate": 0.00011539344654388901, "loss": 0.0528, "step": 13369 }, { "epoch": 1.8738612473721092, "grad_norm": 0.5229623913764954, "learning_rate": 0.00011537909591006935, "loss": 0.0415, "step": 13370 }, { "epoch": 1.8740014015416957, "grad_norm": 0.265043169260025, "learning_rate": 0.00011536474527624969, "loss": 0.032, "step": 13371 }, { "epoch": 1.8741415557112824, "grad_norm": 0.2419590950012207, "learning_rate": 0.00011535039464243002, "loss": 0.0369, "step": 13372 }, { "epoch": 1.874281709880869, "grad_norm": 0.14468783140182495, "learning_rate": 0.00011533604400861038, "loss": 0.0214, "step": 13373 }, { "epoch": 1.8744218640504555, "grad_norm": 0.33427003026008606, "learning_rate": 0.0001153216933747907, "loss": 0.0239, "step": 13374 }, { "epoch": 1.874562018220042, "grad_norm": 0.4960840344429016, "learning_rate": 0.00011530734274097105, "loss": 0.0913, "step": 13375 }, { "epoch": 1.8747021723896284, "grad_norm": 0.475669801235199, "learning_rate": 0.00011529299210715139, "loss": 0.0624, "step": 13376 }, { "epoch": 1.8748423265592151, "grad_norm": 0.21983139216899872, "learning_rate": 0.00011527864147333172, "loss": 0.0396, "step": 13377 }, { "epoch": 1.8749824807288018, "grad_norm": 0.4598732888698578, "learning_rate": 0.00011526429083951207, "loss": 0.0301, "step": 13378 }, { "epoch": 1.8751226348983883, "grad_norm": 0.46474209427833557, "learning_rate": 0.00011524994020569242, "loss": 0.0325, "step": 13379 }, { "epoch": 1.8752627890679747, "grad_norm": 0.04625300318002701, "learning_rate": 0.00011523558957187274, "loss": 0.002, "step": 13380 }, { "epoch": 1.8754029432375612, "grad_norm": 0.9333197474479675, "learning_rate": 0.00011522123893805309, "loss": 0.2254, "step": 13381 }, { "epoch": 1.8755430974071479, "grad_norm": 1.1528195142745972, "learning_rate": 0.00011520688830423341, "loss": 0.1393, "step": 13382 }, { "epoch": 1.8756832515767345, "grad_norm": 1.5066992044448853, "learning_rate": 0.00011519253767041377, "loss": 0.0915, "step": 13383 }, { "epoch": 1.875823405746321, "grad_norm": 0.21653881669044495, "learning_rate": 0.00011517818703659411, "loss": 0.0076, "step": 13384 }, { "epoch": 1.8759635599159075, "grad_norm": 0.12956270575523376, "learning_rate": 0.00011516383640277444, "loss": 0.0055, "step": 13385 }, { "epoch": 1.876103714085494, "grad_norm": 0.2553445100784302, "learning_rate": 0.00011514948576895478, "loss": 0.0573, "step": 13386 }, { "epoch": 1.8762438682550806, "grad_norm": 0.32053250074386597, "learning_rate": 0.00011513513513513513, "loss": 0.013, "step": 13387 }, { "epoch": 1.8763840224246673, "grad_norm": 0.4977538287639618, "learning_rate": 0.00011512078450131545, "loss": 0.0439, "step": 13388 }, { "epoch": 1.8765241765942537, "grad_norm": 0.17351041734218597, "learning_rate": 0.00011510643386749581, "loss": 0.0277, "step": 13389 }, { "epoch": 1.8766643307638402, "grad_norm": 0.23659555613994598, "learning_rate": 0.00011509208323367615, "loss": 0.0624, "step": 13390 }, { "epoch": 1.8768044849334267, "grad_norm": 0.3162183463573456, "learning_rate": 0.00011507773259985648, "loss": 0.0555, "step": 13391 }, { "epoch": 1.8769446391030133, "grad_norm": 0.1986086070537567, "learning_rate": 0.00011506338196603682, "loss": 0.0381, "step": 13392 }, { "epoch": 1.8770847932725998, "grad_norm": 0.24596472084522247, "learning_rate": 0.00011504903133221715, "loss": 0.034, "step": 13393 }, { "epoch": 1.8772249474421865, "grad_norm": 0.2564314901828766, "learning_rate": 0.0001150346806983975, "loss": 0.0227, "step": 13394 }, { "epoch": 1.877365101611773, "grad_norm": 0.08028414845466614, "learning_rate": 0.00011502033006457785, "loss": 0.0091, "step": 13395 }, { "epoch": 1.8775052557813594, "grad_norm": 0.17415869235992432, "learning_rate": 0.00011500597943075818, "loss": 0.037, "step": 13396 }, { "epoch": 1.877645409950946, "grad_norm": 0.3385166823863983, "learning_rate": 0.00011499162879693852, "loss": 0.0992, "step": 13397 }, { "epoch": 1.8777855641205325, "grad_norm": 0.19357110559940338, "learning_rate": 0.00011497727816311888, "loss": 0.0274, "step": 13398 }, { "epoch": 1.8779257182901192, "grad_norm": 0.2873781621456146, "learning_rate": 0.0001149629275292992, "loss": 0.0111, "step": 13399 }, { "epoch": 1.8780658724597057, "grad_norm": 0.3342207968235016, "learning_rate": 0.00011494857689547955, "loss": 0.0432, "step": 13400 }, { "epoch": 1.8782060266292921, "grad_norm": 0.32632869482040405, "learning_rate": 0.00011493422626165987, "loss": 0.0339, "step": 13401 }, { "epoch": 1.8783461807988786, "grad_norm": 0.15519414842128754, "learning_rate": 0.00011491987562784022, "loss": 0.0437, "step": 13402 }, { "epoch": 1.8784863349684653, "grad_norm": 0.297125905752182, "learning_rate": 0.00011490552499402056, "loss": 0.0413, "step": 13403 }, { "epoch": 1.878626489138052, "grad_norm": 0.18203961849212646, "learning_rate": 0.00011489117436020089, "loss": 0.0426, "step": 13404 }, { "epoch": 1.8787666433076384, "grad_norm": 0.3117711842060089, "learning_rate": 0.00011487682372638124, "loss": 0.0311, "step": 13405 }, { "epoch": 1.8789067974772249, "grad_norm": 0.3444058299064636, "learning_rate": 0.00011486247309256158, "loss": 0.0363, "step": 13406 }, { "epoch": 1.8790469516468113, "grad_norm": 0.30491572618484497, "learning_rate": 0.00011484812245874191, "loss": 0.0298, "step": 13407 }, { "epoch": 1.879187105816398, "grad_norm": 0.29263925552368164, "learning_rate": 0.00011483377182492226, "loss": 0.0356, "step": 13408 }, { "epoch": 1.8793272599859847, "grad_norm": 0.32042330503463745, "learning_rate": 0.00011481942119110258, "loss": 0.0281, "step": 13409 }, { "epoch": 1.8794674141555712, "grad_norm": 0.05516209825873375, "learning_rate": 0.00011480507055728294, "loss": 0.0084, "step": 13410 }, { "epoch": 1.8796075683251576, "grad_norm": 0.2328115999698639, "learning_rate": 0.00011479071992346328, "loss": 0.0492, "step": 13411 }, { "epoch": 1.879747722494744, "grad_norm": 0.5882803797721863, "learning_rate": 0.00011477636928964361, "loss": 0.0607, "step": 13412 }, { "epoch": 1.8798878766643308, "grad_norm": 0.25868362188339233, "learning_rate": 0.00011476201865582395, "loss": 0.0269, "step": 13413 }, { "epoch": 1.8800280308339175, "grad_norm": 0.28339165449142456, "learning_rate": 0.00011474766802200431, "loss": 0.0343, "step": 13414 }, { "epoch": 1.880168185003504, "grad_norm": 0.27832159399986267, "learning_rate": 0.00011473331738818464, "loss": 0.0145, "step": 13415 }, { "epoch": 1.8803083391730904, "grad_norm": 0.4135292172431946, "learning_rate": 0.00011471896675436498, "loss": 0.1034, "step": 13416 }, { "epoch": 1.8804484933426768, "grad_norm": 0.3380087614059448, "learning_rate": 0.00011470461612054531, "loss": 0.0611, "step": 13417 }, { "epoch": 1.8805886475122635, "grad_norm": 0.5146254301071167, "learning_rate": 0.00011469026548672565, "loss": 0.0226, "step": 13418 }, { "epoch": 1.8807288016818502, "grad_norm": 0.45673611760139465, "learning_rate": 0.00011467591485290599, "loss": 0.0177, "step": 13419 }, { "epoch": 1.8808689558514367, "grad_norm": 0.455418199300766, "learning_rate": 0.00011466156421908633, "loss": 0.0729, "step": 13420 }, { "epoch": 1.8810091100210231, "grad_norm": 0.12285628914833069, "learning_rate": 0.00011464721358526668, "loss": 0.0156, "step": 13421 }, { "epoch": 1.8811492641906096, "grad_norm": 0.39038726687431335, "learning_rate": 0.00011463286295144702, "loss": 0.0572, "step": 13422 }, { "epoch": 1.8812894183601963, "grad_norm": 0.29715901613235474, "learning_rate": 0.00011461851231762735, "loss": 0.0723, "step": 13423 }, { "epoch": 1.8814295725297827, "grad_norm": 0.26968151330947876, "learning_rate": 0.00011460416168380769, "loss": 0.031, "step": 13424 }, { "epoch": 1.8815697266993694, "grad_norm": 0.1891375631093979, "learning_rate": 0.00011458981104998802, "loss": 0.0432, "step": 13425 }, { "epoch": 1.8817098808689559, "grad_norm": 0.17729321122169495, "learning_rate": 0.00011457546041616837, "loss": 0.0242, "step": 13426 }, { "epoch": 1.8818500350385423, "grad_norm": 0.04632085934281349, "learning_rate": 0.00011456110978234872, "loss": 0.004, "step": 13427 }, { "epoch": 1.881990189208129, "grad_norm": 0.24560733139514923, "learning_rate": 0.00011454675914852904, "loss": 0.0197, "step": 13428 }, { "epoch": 1.8821303433777155, "grad_norm": 1.5941994190216064, "learning_rate": 0.00011453240851470939, "loss": 0.0926, "step": 13429 }, { "epoch": 1.8822704975473021, "grad_norm": 1.6707119941711426, "learning_rate": 0.00011451805788088974, "loss": 0.0491, "step": 13430 }, { "epoch": 1.8824106517168886, "grad_norm": 0.49443477392196655, "learning_rate": 0.00011450370724707007, "loss": 0.0455, "step": 13431 }, { "epoch": 1.882550805886475, "grad_norm": 0.35893669724464417, "learning_rate": 0.00011448935661325041, "loss": 0.0324, "step": 13432 }, { "epoch": 1.8826909600560615, "grad_norm": 0.35242122411727905, "learning_rate": 0.00011447500597943075, "loss": 0.0444, "step": 13433 }, { "epoch": 1.8828311142256482, "grad_norm": 2.934565544128418, "learning_rate": 0.00011446065534561108, "loss": 0.1014, "step": 13434 }, { "epoch": 1.8829712683952349, "grad_norm": 4.257355690002441, "learning_rate": 0.00011444630471179142, "loss": 0.2638, "step": 13435 }, { "epoch": 1.8831114225648213, "grad_norm": 0.23855766654014587, "learning_rate": 0.00011443195407797177, "loss": 0.0191, "step": 13436 }, { "epoch": 1.8832515767344078, "grad_norm": 0.349558025598526, "learning_rate": 0.00011441760344415211, "loss": 0.089, "step": 13437 }, { "epoch": 1.8833917309039943, "grad_norm": 0.28910204768180847, "learning_rate": 0.00011440325281033245, "loss": 0.1031, "step": 13438 }, { "epoch": 1.883531885073581, "grad_norm": 0.7620351910591125, "learning_rate": 0.00011438890217651278, "loss": 0.0279, "step": 13439 }, { "epoch": 1.8836720392431676, "grad_norm": 0.2556101381778717, "learning_rate": 0.00011437455154269312, "loss": 0.0393, "step": 13440 }, { "epoch": 1.883812193412754, "grad_norm": 0.17038565874099731, "learning_rate": 0.00011436020090887348, "loss": 0.0565, "step": 13441 }, { "epoch": 1.8839523475823405, "grad_norm": 0.06055064499378204, "learning_rate": 0.0001143458502750538, "loss": 0.0137, "step": 13442 }, { "epoch": 1.884092501751927, "grad_norm": 0.44050848484039307, "learning_rate": 0.00011433149964123415, "loss": 0.0429, "step": 13443 }, { "epoch": 1.8842326559215137, "grad_norm": 0.5867749452590942, "learning_rate": 0.00011431714900741448, "loss": 0.1403, "step": 13444 }, { "epoch": 1.8843728100911004, "grad_norm": 0.1588454395532608, "learning_rate": 0.00011430279837359482, "loss": 0.0172, "step": 13445 }, { "epoch": 1.8845129642606868, "grad_norm": 0.3927476704120636, "learning_rate": 0.00011428844773977517, "loss": 0.0399, "step": 13446 }, { "epoch": 1.8846531184302733, "grad_norm": 0.26721999049186707, "learning_rate": 0.0001142740971059555, "loss": 0.0679, "step": 13447 }, { "epoch": 1.8847932725998597, "grad_norm": 0.24815945327281952, "learning_rate": 0.00011425974647213585, "loss": 0.047, "step": 13448 }, { "epoch": 1.8849334267694464, "grad_norm": 0.3501009941101074, "learning_rate": 0.00011424539583831619, "loss": 0.1487, "step": 13449 }, { "epoch": 1.885073580939033, "grad_norm": 0.3207671046257019, "learning_rate": 0.00011423104520449652, "loss": 0.0566, "step": 13450 }, { "epoch": 1.8852137351086196, "grad_norm": 0.29027116298675537, "learning_rate": 0.00011421669457067686, "loss": 0.0892, "step": 13451 }, { "epoch": 1.885353889278206, "grad_norm": 0.12084417790174484, "learning_rate": 0.0001142023439368572, "loss": 0.0141, "step": 13452 }, { "epoch": 1.8854940434477925, "grad_norm": 0.555729329586029, "learning_rate": 0.00011418799330303754, "loss": 0.0427, "step": 13453 }, { "epoch": 1.8856341976173792, "grad_norm": 0.1798364371061325, "learning_rate": 0.00011417364266921788, "loss": 0.0707, "step": 13454 }, { "epoch": 1.8857743517869656, "grad_norm": 0.18271052837371826, "learning_rate": 0.00011415929203539821, "loss": 0.0276, "step": 13455 }, { "epoch": 1.8859145059565523, "grad_norm": 0.33206313848495483, "learning_rate": 0.00011414494140157856, "loss": 0.0507, "step": 13456 }, { "epoch": 1.8860546601261388, "grad_norm": 0.2393684685230255, "learning_rate": 0.00011413059076775891, "loss": 0.0369, "step": 13457 }, { "epoch": 1.8861948142957252, "grad_norm": 0.15933044254779816, "learning_rate": 0.00011411624013393924, "loss": 0.0447, "step": 13458 }, { "epoch": 1.8863349684653117, "grad_norm": 0.11679864674806595, "learning_rate": 0.00011410188950011958, "loss": 0.0239, "step": 13459 }, { "epoch": 1.8864751226348984, "grad_norm": 0.2846401035785675, "learning_rate": 0.00011408753886629991, "loss": 0.0446, "step": 13460 }, { "epoch": 1.886615276804485, "grad_norm": 0.33770138025283813, "learning_rate": 0.00011407318823248025, "loss": 0.0441, "step": 13461 }, { "epoch": 1.8867554309740715, "grad_norm": 0.13469266891479492, "learning_rate": 0.00011405883759866061, "loss": 0.0227, "step": 13462 }, { "epoch": 1.886895585143658, "grad_norm": 0.22757545113563538, "learning_rate": 0.00011404448696484094, "loss": 0.0615, "step": 13463 }, { "epoch": 1.8870357393132444, "grad_norm": 0.26594793796539307, "learning_rate": 0.00011403013633102128, "loss": 0.0311, "step": 13464 }, { "epoch": 1.887175893482831, "grad_norm": 0.1401015967130661, "learning_rate": 0.00011401578569720162, "loss": 0.0369, "step": 13465 }, { "epoch": 1.8873160476524178, "grad_norm": 0.40369248390197754, "learning_rate": 0.00011400143506338195, "loss": 0.0435, "step": 13466 }, { "epoch": 1.8874562018220042, "grad_norm": 0.315899133682251, "learning_rate": 0.00011398708442956229, "loss": 0.0347, "step": 13467 }, { "epoch": 1.8875963559915907, "grad_norm": 0.21384172141551971, "learning_rate": 0.00011397273379574265, "loss": 0.0241, "step": 13468 }, { "epoch": 1.8877365101611772, "grad_norm": 0.13499832153320312, "learning_rate": 0.00011395838316192298, "loss": 0.0485, "step": 13469 }, { "epoch": 1.8878766643307638, "grad_norm": 0.504688024520874, "learning_rate": 0.00011394403252810332, "loss": 0.0651, "step": 13470 }, { "epoch": 1.8880168185003505, "grad_norm": 0.2866809070110321, "learning_rate": 0.00011392968189428365, "loss": 0.056, "step": 13471 }, { "epoch": 1.888156972669937, "grad_norm": 0.3693518340587616, "learning_rate": 0.00011391533126046399, "loss": 0.0274, "step": 13472 }, { "epoch": 1.8882971268395234, "grad_norm": 0.3757310211658478, "learning_rate": 0.00011390098062664434, "loss": 0.042, "step": 13473 }, { "epoch": 1.88843728100911, "grad_norm": 0.4082602560520172, "learning_rate": 0.00011388662999282467, "loss": 0.0278, "step": 13474 }, { "epoch": 1.8885774351786966, "grad_norm": 0.1054476946592331, "learning_rate": 0.00011387227935900501, "loss": 0.0087, "step": 13475 }, { "epoch": 1.8887175893482833, "grad_norm": 0.22146852314472198, "learning_rate": 0.00011385792872518536, "loss": 0.0226, "step": 13476 }, { "epoch": 1.8888577435178697, "grad_norm": 0.11555270850658417, "learning_rate": 0.00011384357809136569, "loss": 0.0043, "step": 13477 }, { "epoch": 1.8889978976874562, "grad_norm": 0.27244818210601807, "learning_rate": 0.00011382922745754604, "loss": 0.0699, "step": 13478 }, { "epoch": 1.8891380518570426, "grad_norm": 0.2782879173755646, "learning_rate": 0.00011381487682372637, "loss": 0.026, "step": 13479 }, { "epoch": 1.8892782060266293, "grad_norm": 0.21919305622577667, "learning_rate": 0.00011380052618990671, "loss": 0.0188, "step": 13480 }, { "epoch": 1.8894183601962158, "grad_norm": 0.9740397930145264, "learning_rate": 0.00011378617555608705, "loss": 0.1612, "step": 13481 }, { "epoch": 1.8895585143658025, "grad_norm": 1.249995470046997, "learning_rate": 0.00011377182492226738, "loss": 0.0655, "step": 13482 }, { "epoch": 1.889698668535389, "grad_norm": 0.8391216993331909, "learning_rate": 0.00011375747428844772, "loss": 0.0429, "step": 13483 }, { "epoch": 1.8898388227049754, "grad_norm": 0.7672373652458191, "learning_rate": 0.00011374312365462808, "loss": 0.1755, "step": 13484 }, { "epoch": 1.889978976874562, "grad_norm": 0.7418503761291504, "learning_rate": 0.00011372877302080841, "loss": 0.1489, "step": 13485 }, { "epoch": 1.8901191310441485, "grad_norm": 0.393179327249527, "learning_rate": 0.00011371442238698875, "loss": 0.0773, "step": 13486 }, { "epoch": 1.8902592852137352, "grad_norm": 0.17001232504844666, "learning_rate": 0.00011370007175316908, "loss": 0.019, "step": 13487 }, { "epoch": 1.8903994393833217, "grad_norm": 0.1701052486896515, "learning_rate": 0.00011368572111934942, "loss": 0.0182, "step": 13488 }, { "epoch": 1.8905395935529081, "grad_norm": 0.4775990843772888, "learning_rate": 0.00011367137048552978, "loss": 0.0445, "step": 13489 }, { "epoch": 1.8906797477224946, "grad_norm": 0.21463489532470703, "learning_rate": 0.0001136570198517101, "loss": 0.029, "step": 13490 }, { "epoch": 1.8908199018920813, "grad_norm": 0.31704986095428467, "learning_rate": 0.00011364266921789045, "loss": 0.0432, "step": 13491 }, { "epoch": 1.890960056061668, "grad_norm": 0.21317845582962036, "learning_rate": 0.00011362831858407079, "loss": 0.0328, "step": 13492 }, { "epoch": 1.8911002102312544, "grad_norm": 0.6330044269561768, "learning_rate": 0.00011361396795025112, "loss": 0.0722, "step": 13493 }, { "epoch": 1.8912403644008409, "grad_norm": 0.41525593400001526, "learning_rate": 0.00011359961731643147, "loss": 0.1009, "step": 13494 }, { "epoch": 1.8913805185704273, "grad_norm": 0.3246195614337921, "learning_rate": 0.0001135852666826118, "loss": 0.0593, "step": 13495 }, { "epoch": 1.891520672740014, "grad_norm": 0.4455593526363373, "learning_rate": 0.00011357091604879214, "loss": 0.052, "step": 13496 }, { "epoch": 1.8916608269096007, "grad_norm": 0.4324365556240082, "learning_rate": 0.00011355656541497249, "loss": 0.0558, "step": 13497 }, { "epoch": 1.8918009810791871, "grad_norm": 0.2009086161851883, "learning_rate": 0.00011354221478115282, "loss": 0.0356, "step": 13498 }, { "epoch": 1.8919411352487736, "grad_norm": 0.29308587312698364, "learning_rate": 0.00011352786414733316, "loss": 0.0215, "step": 13499 }, { "epoch": 1.89208128941836, "grad_norm": 0.23277276754379272, "learning_rate": 0.00011351351351351351, "loss": 0.0932, "step": 13500 }, { "epoch": 1.8922214435879467, "grad_norm": 0.40751221776008606, "learning_rate": 0.00011349916287969384, "loss": 0.1203, "step": 13501 }, { "epoch": 1.8923615977575334, "grad_norm": 0.4604926109313965, "learning_rate": 0.00011348481224587418, "loss": 0.0525, "step": 13502 }, { "epoch": 1.89250175192712, "grad_norm": 0.29643258452415466, "learning_rate": 0.00011347046161205453, "loss": 0.019, "step": 13503 }, { "epoch": 1.8926419060967064, "grad_norm": 0.1702430248260498, "learning_rate": 0.00011345611097823485, "loss": 0.0286, "step": 13504 }, { "epoch": 1.8927820602662928, "grad_norm": 0.21273918449878693, "learning_rate": 0.00011344176034441521, "loss": 0.0557, "step": 13505 }, { "epoch": 1.8929222144358795, "grad_norm": 0.3799642026424408, "learning_rate": 0.00011342740971059554, "loss": 0.0455, "step": 13506 }, { "epoch": 1.8930623686054662, "grad_norm": 0.18924139440059662, "learning_rate": 0.00011341305907677588, "loss": 0.0492, "step": 13507 }, { "epoch": 1.8932025227750526, "grad_norm": 0.2973273992538452, "learning_rate": 0.00011339870844295622, "loss": 0.046, "step": 13508 }, { "epoch": 1.893342676944639, "grad_norm": 0.22490397095680237, "learning_rate": 0.00011338435780913655, "loss": 0.0166, "step": 13509 }, { "epoch": 1.8934828311142256, "grad_norm": 0.13594576716423035, "learning_rate": 0.00011337000717531691, "loss": 0.0196, "step": 13510 }, { "epoch": 1.8936229852838122, "grad_norm": 0.2492450475692749, "learning_rate": 0.00011335565654149725, "loss": 0.0521, "step": 13511 }, { "epoch": 1.8937631394533987, "grad_norm": 0.17327338457107544, "learning_rate": 0.00011334130590767758, "loss": 0.023, "step": 13512 }, { "epoch": 1.8939032936229854, "grad_norm": 0.49359628558158875, "learning_rate": 0.00011332695527385792, "loss": 0.0612, "step": 13513 }, { "epoch": 1.8940434477925718, "grad_norm": 0.2705613672733307, "learning_rate": 0.00011331260464003825, "loss": 0.0758, "step": 13514 }, { "epoch": 1.8941836019621583, "grad_norm": 0.4242211878299713, "learning_rate": 0.00011329825400621859, "loss": 0.018, "step": 13515 }, { "epoch": 1.894323756131745, "grad_norm": 0.4368789792060852, "learning_rate": 0.00011328390337239895, "loss": 0.066, "step": 13516 }, { "epoch": 1.8944639103013314, "grad_norm": 0.16248489916324615, "learning_rate": 0.00011326955273857927, "loss": 0.0131, "step": 13517 }, { "epoch": 1.8946040644709181, "grad_norm": 0.04456055909395218, "learning_rate": 0.00011325520210475962, "loss": 0.0053, "step": 13518 }, { "epoch": 1.8947442186405046, "grad_norm": 0.33838513493537903, "learning_rate": 0.00011324085147093996, "loss": 0.0891, "step": 13519 }, { "epoch": 1.894884372810091, "grad_norm": 0.1119290366768837, "learning_rate": 0.00011322650083712029, "loss": 0.0262, "step": 13520 }, { "epoch": 1.8950245269796775, "grad_norm": 0.5778630375862122, "learning_rate": 0.00011321215020330064, "loss": 0.058, "step": 13521 }, { "epoch": 1.8951646811492642, "grad_norm": 0.309861958026886, "learning_rate": 0.00011319779956948097, "loss": 0.036, "step": 13522 }, { "epoch": 1.8953048353188509, "grad_norm": 0.19732381403446198, "learning_rate": 0.00011318344893566131, "loss": 0.0314, "step": 13523 }, { "epoch": 1.8954449894884373, "grad_norm": 0.2120002657175064, "learning_rate": 0.00011316909830184166, "loss": 0.0237, "step": 13524 }, { "epoch": 1.8955851436580238, "grad_norm": 0.11605087667703629, "learning_rate": 0.00011315474766802198, "loss": 0.0248, "step": 13525 }, { "epoch": 1.8957252978276102, "grad_norm": 0.24174977838993073, "learning_rate": 0.00011314039703420234, "loss": 0.0679, "step": 13526 }, { "epoch": 1.895865451997197, "grad_norm": 0.26389169692993164, "learning_rate": 0.00011312604640038268, "loss": 0.0431, "step": 13527 }, { "epoch": 1.8960056061667836, "grad_norm": 0.37816429138183594, "learning_rate": 0.00011311169576656301, "loss": 0.0553, "step": 13528 }, { "epoch": 1.89614576033637, "grad_norm": 0.23505020141601562, "learning_rate": 0.00011309734513274335, "loss": 0.0955, "step": 13529 }, { "epoch": 1.8962859145059565, "grad_norm": 0.5466634035110474, "learning_rate": 0.00011308299449892368, "loss": 0.1658, "step": 13530 }, { "epoch": 1.896426068675543, "grad_norm": 0.4026920199394226, "learning_rate": 0.00011306864386510404, "loss": 0.0173, "step": 13531 }, { "epoch": 1.8965662228451297, "grad_norm": 0.3126298785209656, "learning_rate": 0.00011305429323128438, "loss": 0.0472, "step": 13532 }, { "epoch": 1.8967063770147163, "grad_norm": 1.9589697122573853, "learning_rate": 0.00011303994259746471, "loss": 0.0255, "step": 13533 }, { "epoch": 1.8968465311843028, "grad_norm": 0.16841769218444824, "learning_rate": 0.00011302559196364505, "loss": 0.0101, "step": 13534 }, { "epoch": 1.8969866853538893, "grad_norm": 1.3485615253448486, "learning_rate": 0.00011301124132982539, "loss": 0.1446, "step": 13535 }, { "epoch": 1.8971268395234757, "grad_norm": 0.12767793238162994, "learning_rate": 0.00011299689069600572, "loss": 0.0145, "step": 13536 }, { "epoch": 1.8972669936930624, "grad_norm": 0.33895644545555115, "learning_rate": 0.00011298254006218608, "loss": 0.0339, "step": 13537 }, { "epoch": 1.897407147862649, "grad_norm": 0.26316457986831665, "learning_rate": 0.0001129681894283664, "loss": 0.0388, "step": 13538 }, { "epoch": 1.8975473020322355, "grad_norm": 0.3911347985267639, "learning_rate": 0.00011295383879454675, "loss": 0.0338, "step": 13539 }, { "epoch": 1.897687456201822, "grad_norm": 0.1672990620136261, "learning_rate": 0.00011293948816072709, "loss": 0.0158, "step": 13540 }, { "epoch": 1.8978276103714085, "grad_norm": 0.9989466667175293, "learning_rate": 0.00011292513752690742, "loss": 0.0723, "step": 13541 }, { "epoch": 1.8979677645409951, "grad_norm": 0.2344716340303421, "learning_rate": 0.00011291078689308777, "loss": 0.0172, "step": 13542 }, { "epoch": 1.8981079187105816, "grad_norm": 0.5946636199951172, "learning_rate": 0.00011289643625926812, "loss": 0.1419, "step": 13543 }, { "epoch": 1.8982480728801683, "grad_norm": 0.21360154449939728, "learning_rate": 0.00011288208562544844, "loss": 0.0428, "step": 13544 }, { "epoch": 1.8983882270497547, "grad_norm": 0.2801905572414398, "learning_rate": 0.00011286773499162879, "loss": 0.0217, "step": 13545 }, { "epoch": 1.8985283812193412, "grad_norm": 0.3123037815093994, "learning_rate": 0.00011285338435780913, "loss": 0.0437, "step": 13546 }, { "epoch": 1.8986685353889277, "grad_norm": 0.2466706484556198, "learning_rate": 0.00011283903372398947, "loss": 0.0712, "step": 13547 }, { "epoch": 1.8988086895585143, "grad_norm": 0.25357887148857117, "learning_rate": 0.00011282468309016981, "loss": 0.0434, "step": 13548 }, { "epoch": 1.898948843728101, "grad_norm": 0.4434691071510315, "learning_rate": 0.00011281033245635014, "loss": 0.0526, "step": 13549 }, { "epoch": 1.8990889978976875, "grad_norm": 0.22153626382350922, "learning_rate": 0.00011279598182253048, "loss": 0.0434, "step": 13550 }, { "epoch": 1.899229152067274, "grad_norm": 0.24759724736213684, "learning_rate": 0.00011278163118871083, "loss": 0.043, "step": 13551 }, { "epoch": 1.8993693062368604, "grad_norm": 0.21771380305290222, "learning_rate": 0.00011276728055489115, "loss": 0.0605, "step": 13552 }, { "epoch": 1.899509460406447, "grad_norm": 0.11064166575670242, "learning_rate": 0.00011275292992107151, "loss": 0.011, "step": 13553 }, { "epoch": 1.8996496145760338, "grad_norm": 0.48809659481048584, "learning_rate": 0.00011273857928725185, "loss": 0.0526, "step": 13554 }, { "epoch": 1.8997897687456202, "grad_norm": 0.8010419607162476, "learning_rate": 0.00011272422865343218, "loss": 0.0903, "step": 13555 }, { "epoch": 1.8999299229152067, "grad_norm": 0.1474427729845047, "learning_rate": 0.00011270987801961252, "loss": 0.0155, "step": 13556 }, { "epoch": 1.9000700770847931, "grad_norm": 0.25691649317741394, "learning_rate": 0.00011269552738579285, "loss": 0.0327, "step": 13557 }, { "epoch": 1.9002102312543798, "grad_norm": 0.2793200612068176, "learning_rate": 0.0001126811767519732, "loss": 0.0297, "step": 13558 }, { "epoch": 1.9003503854239665, "grad_norm": 0.28630396723747253, "learning_rate": 0.00011266682611815355, "loss": 0.043, "step": 13559 }, { "epoch": 1.900490539593553, "grad_norm": 0.4067237675189972, "learning_rate": 0.00011265247548433388, "loss": 0.0815, "step": 13560 }, { "epoch": 1.9006306937631394, "grad_norm": 0.0867854431271553, "learning_rate": 0.00011263812485051422, "loss": 0.0218, "step": 13561 }, { "epoch": 1.9007708479327259, "grad_norm": 0.40777266025543213, "learning_rate": 0.00011262377421669456, "loss": 0.0615, "step": 13562 }, { "epoch": 1.9009110021023126, "grad_norm": 0.3923787474632263, "learning_rate": 0.0001126094235828749, "loss": 0.0323, "step": 13563 }, { "epoch": 1.9010511562718992, "grad_norm": 0.2128615528345108, "learning_rate": 0.00011259507294905525, "loss": 0.0179, "step": 13564 }, { "epoch": 1.9011913104414857, "grad_norm": 0.2645399570465088, "learning_rate": 0.00011258072231523557, "loss": 0.0585, "step": 13565 }, { "epoch": 1.9013314646110722, "grad_norm": 0.20695510506629944, "learning_rate": 0.00011256637168141592, "loss": 0.0175, "step": 13566 }, { "epoch": 1.9014716187806586, "grad_norm": 0.22354115545749664, "learning_rate": 0.00011255202104759626, "loss": 0.0359, "step": 13567 }, { "epoch": 1.9016117729502453, "grad_norm": 0.2357221394777298, "learning_rate": 0.00011253767041377659, "loss": 0.0274, "step": 13568 }, { "epoch": 1.9017519271198318, "grad_norm": 0.2637893855571747, "learning_rate": 0.00011252331977995694, "loss": 0.0269, "step": 13569 }, { "epoch": 1.9018920812894184, "grad_norm": 0.19776253402233124, "learning_rate": 0.00011250896914613728, "loss": 0.0238, "step": 13570 }, { "epoch": 1.902032235459005, "grad_norm": 0.18680135905742645, "learning_rate": 0.00011249461851231761, "loss": 0.0204, "step": 13571 }, { "epoch": 1.9021723896285914, "grad_norm": 0.3026014566421509, "learning_rate": 0.00011248026787849796, "loss": 0.0148, "step": 13572 }, { "epoch": 1.902312543798178, "grad_norm": 0.32537463307380676, "learning_rate": 0.00011246591724467828, "loss": 0.0351, "step": 13573 }, { "epoch": 1.9024526979677645, "grad_norm": 0.9825728535652161, "learning_rate": 0.00011245156661085864, "loss": 0.1462, "step": 13574 }, { "epoch": 1.9025928521373512, "grad_norm": 0.9745528697967529, "learning_rate": 0.00011243721597703898, "loss": 0.0425, "step": 13575 }, { "epoch": 1.9027330063069376, "grad_norm": 0.5819140076637268, "learning_rate": 0.00011242286534321931, "loss": 0.0507, "step": 13576 }, { "epoch": 1.902873160476524, "grad_norm": 0.3542453944683075, "learning_rate": 0.00011240851470939965, "loss": 0.0719, "step": 13577 }, { "epoch": 1.9030133146461106, "grad_norm": 0.3804064393043518, "learning_rate": 0.00011239416407558, "loss": 0.0441, "step": 13578 }, { "epoch": 1.9031534688156972, "grad_norm": 0.6226317882537842, "learning_rate": 0.00011237981344176034, "loss": 0.1228, "step": 13579 }, { "epoch": 1.903293622985284, "grad_norm": 0.2324400395154953, "learning_rate": 0.00011236546280794068, "loss": 0.0431, "step": 13580 }, { "epoch": 1.9034337771548704, "grad_norm": 0.9894178509712219, "learning_rate": 0.00011235111217412102, "loss": 0.0332, "step": 13581 }, { "epoch": 1.9035739313244568, "grad_norm": 1.1990857124328613, "learning_rate": 0.00011233676154030135, "loss": 0.2815, "step": 13582 }, { "epoch": 1.9037140854940433, "grad_norm": 0.1801864057779312, "learning_rate": 0.00011232241090648169, "loss": 0.01, "step": 13583 }, { "epoch": 1.90385423966363, "grad_norm": 0.24877943098545074, "learning_rate": 0.00011230806027266202, "loss": 0.0165, "step": 13584 }, { "epoch": 1.9039943938332167, "grad_norm": 0.37404167652130127, "learning_rate": 0.00011229370963884238, "loss": 0.0305, "step": 13585 }, { "epoch": 1.9041345480028031, "grad_norm": 0.41422197222709656, "learning_rate": 0.00011227935900502272, "loss": 0.0691, "step": 13586 }, { "epoch": 1.9042747021723896, "grad_norm": 0.20202675461769104, "learning_rate": 0.00011226500837120305, "loss": 0.0298, "step": 13587 }, { "epoch": 1.904414856341976, "grad_norm": 0.17427481710910797, "learning_rate": 0.00011225065773738339, "loss": 0.0199, "step": 13588 }, { "epoch": 1.9045550105115627, "grad_norm": 0.36209309101104736, "learning_rate": 0.00011223630710356374, "loss": 0.0555, "step": 13589 }, { "epoch": 1.9046951646811494, "grad_norm": 0.1634976714849472, "learning_rate": 0.00011222195646974407, "loss": 0.0311, "step": 13590 }, { "epoch": 1.9048353188507359, "grad_norm": 0.358043372631073, "learning_rate": 0.00011220760583592441, "loss": 0.0421, "step": 13591 }, { "epoch": 1.9049754730203223, "grad_norm": 0.5043150782585144, "learning_rate": 0.00011219325520210474, "loss": 0.0298, "step": 13592 }, { "epoch": 1.9051156271899088, "grad_norm": 0.1375071406364441, "learning_rate": 0.00011217890456828509, "loss": 0.023, "step": 13593 }, { "epoch": 1.9052557813594955, "grad_norm": 0.17262771725654602, "learning_rate": 0.00011216455393446543, "loss": 0.0472, "step": 13594 }, { "epoch": 1.9053959355290822, "grad_norm": 0.6882639527320862, "learning_rate": 0.00011215020330064577, "loss": 0.0569, "step": 13595 }, { "epoch": 1.9055360896986686, "grad_norm": 0.23287679255008698, "learning_rate": 0.00011213585266682611, "loss": 0.04, "step": 13596 }, { "epoch": 1.905676243868255, "grad_norm": 0.19731824100017548, "learning_rate": 0.00011212150203300645, "loss": 0.0376, "step": 13597 }, { "epoch": 1.9058163980378415, "grad_norm": 0.5704525113105774, "learning_rate": 0.00011210715139918678, "loss": 0.0499, "step": 13598 }, { "epoch": 1.9059565522074282, "grad_norm": 0.23459932208061218, "learning_rate": 0.00011209280076536712, "loss": 0.0594, "step": 13599 }, { "epoch": 1.9060967063770147, "grad_norm": 0.08259747177362442, "learning_rate": 0.00011207845013154745, "loss": 0.0108, "step": 13600 }, { "epoch": 1.9062368605466014, "grad_norm": 0.2248906046152115, "learning_rate": 0.00011206409949772781, "loss": 0.0234, "step": 13601 }, { "epoch": 1.9063770147161878, "grad_norm": 0.2571329176425934, "learning_rate": 0.00011204974886390815, "loss": 0.0389, "step": 13602 }, { "epoch": 1.9065171688857743, "grad_norm": 0.33607983589172363, "learning_rate": 0.00011203539823008848, "loss": 0.0716, "step": 13603 }, { "epoch": 1.9066573230553607, "grad_norm": 0.06593748182058334, "learning_rate": 0.00011202104759626882, "loss": 0.0383, "step": 13604 }, { "epoch": 1.9067974772249474, "grad_norm": 0.21620528399944305, "learning_rate": 0.00011200669696244918, "loss": 0.021, "step": 13605 }, { "epoch": 1.906937631394534, "grad_norm": 0.4865710735321045, "learning_rate": 0.0001119923463286295, "loss": 0.1057, "step": 13606 }, { "epoch": 1.9070777855641206, "grad_norm": 0.44378310441970825, "learning_rate": 0.00011197799569480985, "loss": 0.0803, "step": 13607 }, { "epoch": 1.907217939733707, "grad_norm": 0.18585270643234253, "learning_rate": 0.00011196364506099018, "loss": 0.0467, "step": 13608 }, { "epoch": 1.9073580939032935, "grad_norm": 0.2271266132593155, "learning_rate": 0.00011194929442717052, "loss": 0.0272, "step": 13609 }, { "epoch": 1.9074982480728802, "grad_norm": 0.14281751215457916, "learning_rate": 0.00011193494379335086, "loss": 0.0116, "step": 13610 }, { "epoch": 1.9076384022424668, "grad_norm": 0.2914571166038513, "learning_rate": 0.0001119205931595312, "loss": 0.0499, "step": 13611 }, { "epoch": 1.9077785564120533, "grad_norm": 0.21913349628448486, "learning_rate": 0.00011190624252571155, "loss": 0.0742, "step": 13612 }, { "epoch": 1.9079187105816398, "grad_norm": 0.264310359954834, "learning_rate": 0.00011189189189189189, "loss": 0.0588, "step": 13613 }, { "epoch": 1.9080588647512262, "grad_norm": 0.3651709258556366, "learning_rate": 0.00011187754125807222, "loss": 0.0341, "step": 13614 }, { "epoch": 1.908199018920813, "grad_norm": 0.32444310188293457, "learning_rate": 0.00011186319062425256, "loss": 0.0593, "step": 13615 }, { "epoch": 1.9083391730903996, "grad_norm": 0.09272079169750214, "learning_rate": 0.00011184883999043291, "loss": 0.0112, "step": 13616 }, { "epoch": 1.908479327259986, "grad_norm": 0.21247078478336334, "learning_rate": 0.00011183448935661324, "loss": 0.0378, "step": 13617 }, { "epoch": 1.9086194814295725, "grad_norm": 0.27204954624176025, "learning_rate": 0.00011182013872279358, "loss": 0.0559, "step": 13618 }, { "epoch": 1.908759635599159, "grad_norm": 0.5331076383590698, "learning_rate": 0.00011180578808897391, "loss": 0.0873, "step": 13619 }, { "epoch": 1.9088997897687456, "grad_norm": 0.20795750617980957, "learning_rate": 0.00011179143745515425, "loss": 0.0291, "step": 13620 }, { "epoch": 1.9090399439383323, "grad_norm": 0.3403012156486511, "learning_rate": 0.00011177708682133461, "loss": 0.041, "step": 13621 }, { "epoch": 1.9091800981079188, "grad_norm": 0.3852470815181732, "learning_rate": 0.00011176273618751494, "loss": 0.0225, "step": 13622 }, { "epoch": 1.9093202522775052, "grad_norm": 0.46531111001968384, "learning_rate": 0.00011174838555369528, "loss": 0.0907, "step": 13623 }, { "epoch": 1.9094604064470917, "grad_norm": 0.28256070613861084, "learning_rate": 0.00011173403491987562, "loss": 0.0856, "step": 13624 }, { "epoch": 1.9096005606166784, "grad_norm": 0.2915250062942505, "learning_rate": 0.00011171968428605595, "loss": 0.0394, "step": 13625 }, { "epoch": 1.9097407147862648, "grad_norm": 0.3901909589767456, "learning_rate": 0.0001117053336522363, "loss": 0.0321, "step": 13626 }, { "epoch": 1.9098808689558515, "grad_norm": 0.1341322511434555, "learning_rate": 0.00011169098301841664, "loss": 0.0363, "step": 13627 }, { "epoch": 1.910021023125438, "grad_norm": 0.6977105736732483, "learning_rate": 0.00011167663238459698, "loss": 0.0528, "step": 13628 }, { "epoch": 1.9101611772950244, "grad_norm": 0.4452248215675354, "learning_rate": 0.00011166228175077732, "loss": 0.0687, "step": 13629 }, { "epoch": 1.9103013314646111, "grad_norm": 1.505567193031311, "learning_rate": 0.00011164793111695765, "loss": 0.0767, "step": 13630 }, { "epoch": 1.9104414856341976, "grad_norm": 1.6543866395950317, "learning_rate": 0.00011163358048313799, "loss": 0.0266, "step": 13631 }, { "epoch": 1.9105816398037843, "grad_norm": 0.6628902554512024, "learning_rate": 0.00011161922984931835, "loss": 0.0967, "step": 13632 }, { "epoch": 1.9107217939733707, "grad_norm": 1.0607616901397705, "learning_rate": 0.00011160487921549868, "loss": 0.0571, "step": 13633 }, { "epoch": 1.9108619481429572, "grad_norm": 0.14026914536952972, "learning_rate": 0.00011159052858167902, "loss": 0.0088, "step": 13634 }, { "epoch": 1.9110021023125436, "grad_norm": 0.04002803936600685, "learning_rate": 0.00011157617794785935, "loss": 0.003, "step": 13635 }, { "epoch": 1.9111422564821303, "grad_norm": 0.2342625856399536, "learning_rate": 0.00011156182731403969, "loss": 0.0309, "step": 13636 }, { "epoch": 1.911282410651717, "grad_norm": 0.11075586080551147, "learning_rate": 0.00011154747668022004, "loss": 0.0248, "step": 13637 }, { "epoch": 1.9114225648213035, "grad_norm": 0.44621115922927856, "learning_rate": 0.00011153312604640037, "loss": 0.0383, "step": 13638 }, { "epoch": 1.91156271899089, "grad_norm": 0.1257069706916809, "learning_rate": 0.00011151877541258071, "loss": 0.0061, "step": 13639 }, { "epoch": 1.9117028731604764, "grad_norm": 0.4938739240169525, "learning_rate": 0.00011150442477876106, "loss": 0.031, "step": 13640 }, { "epoch": 1.911843027330063, "grad_norm": 0.21167363226413727, "learning_rate": 0.00011149007414494139, "loss": 0.0149, "step": 13641 }, { "epoch": 1.9119831814996497, "grad_norm": 0.22203145921230316, "learning_rate": 0.00011147572351112174, "loss": 0.0533, "step": 13642 }, { "epoch": 1.9121233356692362, "grad_norm": 0.24504150450229645, "learning_rate": 0.00011146137287730207, "loss": 0.0374, "step": 13643 }, { "epoch": 1.9122634898388227, "grad_norm": 0.1304401010274887, "learning_rate": 0.00011144702224348241, "loss": 0.0117, "step": 13644 }, { "epoch": 1.9124036440084091, "grad_norm": 0.34737083315849304, "learning_rate": 0.00011143267160966275, "loss": 0.0373, "step": 13645 }, { "epoch": 1.9125437981779958, "grad_norm": 0.2828051745891571, "learning_rate": 0.00011141832097584308, "loss": 0.0247, "step": 13646 }, { "epoch": 1.9126839523475825, "grad_norm": 0.3984811305999756, "learning_rate": 0.00011140397034202342, "loss": 0.0302, "step": 13647 }, { "epoch": 1.912824106517169, "grad_norm": 0.2794347405433655, "learning_rate": 0.00011138961970820378, "loss": 0.0946, "step": 13648 }, { "epoch": 1.9129642606867554, "grad_norm": 0.2458021193742752, "learning_rate": 0.00011137526907438411, "loss": 0.0353, "step": 13649 }, { "epoch": 1.9131044148563419, "grad_norm": 0.24065767228603363, "learning_rate": 0.00011136091844056445, "loss": 0.0148, "step": 13650 }, { "epoch": 1.9132445690259285, "grad_norm": 0.20720089972019196, "learning_rate": 0.00011134656780674479, "loss": 0.0524, "step": 13651 }, { "epoch": 1.9133847231955152, "grad_norm": 0.36210453510284424, "learning_rate": 0.00011133221717292512, "loss": 0.0519, "step": 13652 }, { "epoch": 1.9135248773651017, "grad_norm": 0.23472703993320465, "learning_rate": 0.00011131786653910548, "loss": 0.0205, "step": 13653 }, { "epoch": 1.9136650315346881, "grad_norm": 0.17534111440181732, "learning_rate": 0.0001113035159052858, "loss": 0.0204, "step": 13654 }, { "epoch": 1.9138051857042746, "grad_norm": 0.1530742198228836, "learning_rate": 0.00011128916527146615, "loss": 0.0175, "step": 13655 }, { "epoch": 1.9139453398738613, "grad_norm": 0.24701647460460663, "learning_rate": 0.00011127481463764649, "loss": 0.016, "step": 13656 }, { "epoch": 1.9140854940434477, "grad_norm": 0.2545458972454071, "learning_rate": 0.00011126046400382682, "loss": 0.0806, "step": 13657 }, { "epoch": 1.9142256482130344, "grad_norm": 0.4718727767467499, "learning_rate": 0.00011124611337000717, "loss": 0.0489, "step": 13658 }, { "epoch": 1.9143658023826209, "grad_norm": 0.7278096675872803, "learning_rate": 0.00011123176273618752, "loss": 0.0858, "step": 13659 }, { "epoch": 1.9145059565522073, "grad_norm": 0.08965340256690979, "learning_rate": 0.00011121741210236784, "loss": 0.0098, "step": 13660 }, { "epoch": 1.914646110721794, "grad_norm": 0.211187481880188, "learning_rate": 0.00011120306146854819, "loss": 0.014, "step": 13661 }, { "epoch": 1.9147862648913805, "grad_norm": 0.520647406578064, "learning_rate": 0.00011118871083472852, "loss": 0.1383, "step": 13662 }, { "epoch": 1.9149264190609672, "grad_norm": 0.7984415292739868, "learning_rate": 0.00011117436020090886, "loss": 0.0993, "step": 13663 }, { "epoch": 1.9150665732305536, "grad_norm": 0.07336806505918503, "learning_rate": 0.00011116000956708921, "loss": 0.0068, "step": 13664 }, { "epoch": 1.91520672740014, "grad_norm": 0.25393521785736084, "learning_rate": 0.00011114565893326954, "loss": 0.0699, "step": 13665 }, { "epoch": 1.9153468815697265, "grad_norm": 0.10439901798963547, "learning_rate": 0.00011113130829944988, "loss": 0.0105, "step": 13666 }, { "epoch": 1.9154870357393132, "grad_norm": 0.32841578125953674, "learning_rate": 0.00011111695766563023, "loss": 0.0249, "step": 13667 }, { "epoch": 1.9156271899089, "grad_norm": 0.6405636668205261, "learning_rate": 0.00011110260703181055, "loss": 0.0366, "step": 13668 }, { "epoch": 1.9157673440784864, "grad_norm": 0.6857269406318665, "learning_rate": 0.00011108825639799091, "loss": 0.0974, "step": 13669 }, { "epoch": 1.9159074982480728, "grad_norm": 0.28608301281929016, "learning_rate": 0.00011107390576417124, "loss": 0.0925, "step": 13670 }, { "epoch": 1.9160476524176593, "grad_norm": 0.862321138381958, "learning_rate": 0.00011105955513035158, "loss": 0.0852, "step": 13671 }, { "epoch": 1.916187806587246, "grad_norm": 0.3136264979839325, "learning_rate": 0.00011104520449653192, "loss": 0.0695, "step": 13672 }, { "epoch": 1.9163279607568326, "grad_norm": 0.5510244965553284, "learning_rate": 0.00011103085386271225, "loss": 0.0729, "step": 13673 }, { "epoch": 1.916468114926419, "grad_norm": 0.38377177715301514, "learning_rate": 0.00011101650322889261, "loss": 0.0411, "step": 13674 }, { "epoch": 1.9166082690960056, "grad_norm": 0.4233558773994446, "learning_rate": 0.00011100215259507295, "loss": 0.0535, "step": 13675 }, { "epoch": 1.916748423265592, "grad_norm": 0.2139747440814972, "learning_rate": 0.00011098780196125328, "loss": 0.051, "step": 13676 }, { "epoch": 1.9168885774351787, "grad_norm": 0.2551210820674896, "learning_rate": 0.00011097345132743362, "loss": 0.0227, "step": 13677 }, { "epoch": 1.9170287316047654, "grad_norm": 0.45006346702575684, "learning_rate": 0.00011095910069361395, "loss": 0.0527, "step": 13678 }, { "epoch": 1.9171688857743518, "grad_norm": 0.6150220036506653, "learning_rate": 0.00011094475005979429, "loss": 0.0294, "step": 13679 }, { "epoch": 1.9173090399439383, "grad_norm": 0.6374920606613159, "learning_rate": 0.00011093039942597465, "loss": 0.0715, "step": 13680 }, { "epoch": 1.9174491941135248, "grad_norm": 0.2077314257621765, "learning_rate": 0.00011091604879215497, "loss": 0.0164, "step": 13681 }, { "epoch": 1.9175893482831115, "grad_norm": 0.20097874104976654, "learning_rate": 0.00011090169815833532, "loss": 0.0449, "step": 13682 }, { "epoch": 1.9177295024526981, "grad_norm": 0.6967077851295471, "learning_rate": 0.00011088734752451566, "loss": 0.0787, "step": 13683 }, { "epoch": 1.9178696566222846, "grad_norm": 0.2360992133617401, "learning_rate": 0.00011087299689069599, "loss": 0.0237, "step": 13684 }, { "epoch": 1.918009810791871, "grad_norm": 0.39261049032211304, "learning_rate": 0.00011085864625687634, "loss": 0.0359, "step": 13685 }, { "epoch": 1.9181499649614575, "grad_norm": 0.08414167165756226, "learning_rate": 0.00011084429562305667, "loss": 0.0109, "step": 13686 }, { "epoch": 1.9182901191310442, "grad_norm": 0.20710989832878113, "learning_rate": 0.00011082994498923701, "loss": 0.0197, "step": 13687 }, { "epoch": 1.9184302733006307, "grad_norm": 0.38900843262672424, "learning_rate": 0.00011081559435541736, "loss": 0.0294, "step": 13688 }, { "epoch": 1.9185704274702173, "grad_norm": 0.38893353939056396, "learning_rate": 0.00011080124372159768, "loss": 0.0262, "step": 13689 }, { "epoch": 1.9187105816398038, "grad_norm": 0.09258971363306046, "learning_rate": 0.00011078689308777804, "loss": 0.019, "step": 13690 }, { "epoch": 1.9188507358093903, "grad_norm": 0.20317251980304718, "learning_rate": 0.00011077254245395838, "loss": 0.026, "step": 13691 }, { "epoch": 1.9189908899789767, "grad_norm": 0.36524906754493713, "learning_rate": 0.00011075819182013871, "loss": 0.0991, "step": 13692 }, { "epoch": 1.9191310441485634, "grad_norm": 0.08668703585863113, "learning_rate": 0.00011074384118631905, "loss": 0.0144, "step": 13693 }, { "epoch": 1.91927119831815, "grad_norm": 0.15800528228282928, "learning_rate": 0.0001107294905524994, "loss": 0.0204, "step": 13694 }, { "epoch": 1.9194113524877365, "grad_norm": 0.5117565393447876, "learning_rate": 0.00011071513991867972, "loss": 0.1666, "step": 13695 }, { "epoch": 1.919551506657323, "grad_norm": 0.3248744606971741, "learning_rate": 0.00011070078928486008, "loss": 0.0638, "step": 13696 }, { "epoch": 1.9196916608269095, "grad_norm": 0.3702656030654907, "learning_rate": 0.00011068643865104041, "loss": 0.1141, "step": 13697 }, { "epoch": 1.9198318149964961, "grad_norm": 0.2507188320159912, "learning_rate": 0.00011067208801722075, "loss": 0.0379, "step": 13698 }, { "epoch": 1.9199719691660828, "grad_norm": 0.2232222855091095, "learning_rate": 0.00011065773738340109, "loss": 0.0753, "step": 13699 }, { "epoch": 1.9201121233356693, "grad_norm": 0.553311824798584, "learning_rate": 0.00011064338674958142, "loss": 0.0721, "step": 13700 }, { "epoch": 1.9202522775052557, "grad_norm": 0.37850871682167053, "learning_rate": 0.00011062903611576178, "loss": 0.0804, "step": 13701 }, { "epoch": 1.9203924316748422, "grad_norm": 0.16052445769309998, "learning_rate": 0.00011061468548194212, "loss": 0.0137, "step": 13702 }, { "epoch": 1.9205325858444289, "grad_norm": 0.21905580163002014, "learning_rate": 0.00011060033484812245, "loss": 0.0399, "step": 13703 }, { "epoch": 1.9206727400140156, "grad_norm": 0.29196351766586304, "learning_rate": 0.00011058598421430279, "loss": 0.0295, "step": 13704 }, { "epoch": 1.920812894183602, "grad_norm": 0.2661282420158386, "learning_rate": 0.00011057163358048312, "loss": 0.0186, "step": 13705 }, { "epoch": 1.9209530483531885, "grad_norm": 0.43304118514060974, "learning_rate": 0.00011055728294666347, "loss": 0.0624, "step": 13706 }, { "epoch": 1.921093202522775, "grad_norm": 0.146284818649292, "learning_rate": 0.00011054293231284382, "loss": 0.0194, "step": 13707 }, { "epoch": 1.9212333566923616, "grad_norm": 0.3591146767139435, "learning_rate": 0.00011052858167902414, "loss": 0.0316, "step": 13708 }, { "epoch": 1.9213735108619483, "grad_norm": 0.4111204445362091, "learning_rate": 0.00011051423104520449, "loss": 0.064, "step": 13709 }, { "epoch": 1.9215136650315348, "grad_norm": 0.21412111818790436, "learning_rate": 0.00011049988041138483, "loss": 0.0172, "step": 13710 }, { "epoch": 1.9216538192011212, "grad_norm": 0.12354470789432526, "learning_rate": 0.00011048552977756516, "loss": 0.0276, "step": 13711 }, { "epoch": 1.9217939733707077, "grad_norm": 0.137582466006279, "learning_rate": 0.00011047117914374551, "loss": 0.0318, "step": 13712 }, { "epoch": 1.9219341275402944, "grad_norm": 0.5220590233802795, "learning_rate": 0.00011045682850992584, "loss": 0.1214, "step": 13713 }, { "epoch": 1.9220742817098808, "grad_norm": 0.6386471390724182, "learning_rate": 0.00011044247787610618, "loss": 0.0373, "step": 13714 }, { "epoch": 1.9222144358794675, "grad_norm": 0.04840155318379402, "learning_rate": 0.00011042812724228653, "loss": 0.0047, "step": 13715 }, { "epoch": 1.922354590049054, "grad_norm": 0.27740180492401123, "learning_rate": 0.00011041377660846685, "loss": 0.04, "step": 13716 }, { "epoch": 1.9224947442186404, "grad_norm": 0.5502803921699524, "learning_rate": 0.00011039942597464721, "loss": 0.0292, "step": 13717 }, { "epoch": 1.922634898388227, "grad_norm": 0.27440428733825684, "learning_rate": 0.00011038507534082755, "loss": 0.0786, "step": 13718 }, { "epoch": 1.9227750525578136, "grad_norm": 1.0464221239089966, "learning_rate": 0.00011037072470700788, "loss": 0.0929, "step": 13719 }, { "epoch": 1.9229152067274002, "grad_norm": 0.11623378098011017, "learning_rate": 0.00011035637407318822, "loss": 0.0163, "step": 13720 }, { "epoch": 1.9230553608969867, "grad_norm": 0.15276794135570526, "learning_rate": 0.00011034202343936855, "loss": 0.0137, "step": 13721 }, { "epoch": 1.9231955150665732, "grad_norm": 0.5811693072319031, "learning_rate": 0.0001103276728055489, "loss": 0.0824, "step": 13722 }, { "epoch": 1.9233356692361596, "grad_norm": 0.416728675365448, "learning_rate": 0.00011031332217172925, "loss": 0.0526, "step": 13723 }, { "epoch": 1.9234758234057463, "grad_norm": 0.33003339171409607, "learning_rate": 0.00011029897153790958, "loss": 0.0795, "step": 13724 }, { "epoch": 1.923615977575333, "grad_norm": 0.4260284900665283, "learning_rate": 0.00011028462090408992, "loss": 0.0505, "step": 13725 }, { "epoch": 1.9237561317449194, "grad_norm": 0.491764098405838, "learning_rate": 0.00011027027027027026, "loss": 0.071, "step": 13726 }, { "epoch": 1.923896285914506, "grad_norm": 0.6291949152946472, "learning_rate": 0.00011025591963645059, "loss": 0.0306, "step": 13727 }, { "epoch": 1.9240364400840924, "grad_norm": 0.26882901787757874, "learning_rate": 0.00011024156900263095, "loss": 0.0195, "step": 13728 }, { "epoch": 1.924176594253679, "grad_norm": 0.4803031086921692, "learning_rate": 0.00011022721836881129, "loss": 0.0484, "step": 13729 }, { "epoch": 1.9243167484232657, "grad_norm": 0.6390761733055115, "learning_rate": 0.00011021286773499162, "loss": 0.0549, "step": 13730 }, { "epoch": 1.9244569025928522, "grad_norm": 0.4836127460002899, "learning_rate": 0.00011019851710117196, "loss": 0.0135, "step": 13731 }, { "epoch": 1.9245970567624386, "grad_norm": 0.30497288703918457, "learning_rate": 0.00011018416646735229, "loss": 0.0222, "step": 13732 }, { "epoch": 1.924737210932025, "grad_norm": 0.22047258913516998, "learning_rate": 0.00011016981583353264, "loss": 0.0325, "step": 13733 }, { "epoch": 1.9248773651016118, "grad_norm": 0.43384021520614624, "learning_rate": 0.00011015546519971298, "loss": 0.021, "step": 13734 }, { "epoch": 1.9250175192711985, "grad_norm": 0.6336798667907715, "learning_rate": 0.00011014111456589331, "loss": 0.0415, "step": 13735 }, { "epoch": 1.925157673440785, "grad_norm": 0.06143474578857422, "learning_rate": 0.00011012676393207366, "loss": 0.0135, "step": 13736 }, { "epoch": 1.9252978276103714, "grad_norm": 0.33726197481155396, "learning_rate": 0.000110112413298254, "loss": 0.0394, "step": 13737 }, { "epoch": 1.9254379817799578, "grad_norm": 0.22072440385818481, "learning_rate": 0.00011009806266443434, "loss": 0.0475, "step": 13738 }, { "epoch": 1.9255781359495445, "grad_norm": 0.8299367427825928, "learning_rate": 0.00011008371203061468, "loss": 0.0529, "step": 13739 }, { "epoch": 1.9257182901191312, "grad_norm": 0.1965607851743698, "learning_rate": 0.00011006936139679501, "loss": 0.0118, "step": 13740 }, { "epoch": 1.9258584442887177, "grad_norm": 0.24947310984134674, "learning_rate": 0.00011005501076297535, "loss": 0.0304, "step": 13741 }, { "epoch": 1.9259985984583041, "grad_norm": 0.3248518705368042, "learning_rate": 0.0001100406601291557, "loss": 0.074, "step": 13742 }, { "epoch": 1.9261387526278906, "grad_norm": 0.4450487494468689, "learning_rate": 0.00011002630949533602, "loss": 0.0623, "step": 13743 }, { "epoch": 1.9262789067974773, "grad_norm": 0.5626828074455261, "learning_rate": 0.00011001195886151638, "loss": 0.0394, "step": 13744 }, { "epoch": 1.9264190609670637, "grad_norm": 0.3531736135482788, "learning_rate": 0.00010999760822769672, "loss": 0.064, "step": 13745 }, { "epoch": 1.9265592151366504, "grad_norm": 0.08108167350292206, "learning_rate": 0.00010998325759387705, "loss": 0.0097, "step": 13746 }, { "epoch": 1.9266993693062369, "grad_norm": 0.11380761116743088, "learning_rate": 0.00010996890696005739, "loss": 0.0176, "step": 13747 }, { "epoch": 1.9268395234758233, "grad_norm": 0.15786680579185486, "learning_rate": 0.00010995455632623772, "loss": 0.0164, "step": 13748 }, { "epoch": 1.92697967764541, "grad_norm": 0.22282624244689941, "learning_rate": 0.00010994020569241808, "loss": 0.0802, "step": 13749 }, { "epoch": 1.9271198318149965, "grad_norm": 0.34158846735954285, "learning_rate": 0.00010992585505859842, "loss": 0.0749, "step": 13750 }, { "epoch": 1.9272599859845831, "grad_norm": 0.23161712288856506, "learning_rate": 0.00010991150442477875, "loss": 0.0402, "step": 13751 }, { "epoch": 1.9274001401541696, "grad_norm": 0.19699764251708984, "learning_rate": 0.00010989715379095909, "loss": 0.0222, "step": 13752 }, { "epoch": 1.927540294323756, "grad_norm": 0.3704795241355896, "learning_rate": 0.00010988280315713943, "loss": 0.013, "step": 13753 }, { "epoch": 1.9276804484933425, "grad_norm": 0.38436684012413025, "learning_rate": 0.00010986845252331977, "loss": 0.013, "step": 13754 }, { "epoch": 1.9278206026629292, "grad_norm": 0.22594177722930908, "learning_rate": 0.00010985410188950011, "loss": 0.031, "step": 13755 }, { "epoch": 1.9279607568325159, "grad_norm": 0.09729012101888657, "learning_rate": 0.00010983975125568044, "loss": 0.0152, "step": 13756 }, { "epoch": 1.9281009110021023, "grad_norm": 0.3148127496242523, "learning_rate": 0.00010982540062186079, "loss": 0.0314, "step": 13757 }, { "epoch": 1.9282410651716888, "grad_norm": 0.26471808552742004, "learning_rate": 0.00010981104998804113, "loss": 0.0418, "step": 13758 }, { "epoch": 1.9283812193412753, "grad_norm": 0.09002430737018585, "learning_rate": 0.00010979669935422146, "loss": 0.0071, "step": 13759 }, { "epoch": 1.928521373510862, "grad_norm": 0.2948862910270691, "learning_rate": 0.00010978234872040181, "loss": 0.0539, "step": 13760 }, { "epoch": 1.9286615276804486, "grad_norm": 0.1825835108757019, "learning_rate": 0.00010976799808658215, "loss": 0.0193, "step": 13761 }, { "epoch": 1.928801681850035, "grad_norm": 0.1326032280921936, "learning_rate": 0.00010975364745276248, "loss": 0.0272, "step": 13762 }, { "epoch": 1.9289418360196215, "grad_norm": 0.7449473142623901, "learning_rate": 0.00010973929681894282, "loss": 0.1853, "step": 13763 }, { "epoch": 1.929081990189208, "grad_norm": 0.21338985860347748, "learning_rate": 0.00010972494618512318, "loss": 0.056, "step": 13764 }, { "epoch": 1.9292221443587947, "grad_norm": 0.6736727356910706, "learning_rate": 0.00010971059555130351, "loss": 0.0164, "step": 13765 }, { "epoch": 1.9293622985283814, "grad_norm": 0.36937427520751953, "learning_rate": 0.00010969624491748385, "loss": 0.024, "step": 13766 }, { "epoch": 1.9295024526979678, "grad_norm": 0.14926302433013916, "learning_rate": 0.00010968189428366418, "loss": 0.0137, "step": 13767 }, { "epoch": 1.9296426068675543, "grad_norm": 0.3490966856479645, "learning_rate": 0.00010966754364984452, "loss": 0.0288, "step": 13768 }, { "epoch": 1.9297827610371407, "grad_norm": 0.4471503794193268, "learning_rate": 0.00010965319301602488, "loss": 0.0268, "step": 13769 }, { "epoch": 1.9299229152067274, "grad_norm": 0.22939524054527283, "learning_rate": 0.0001096388423822052, "loss": 0.0694, "step": 13770 }, { "epoch": 1.9300630693763141, "grad_norm": 0.5468003749847412, "learning_rate": 0.00010962449174838555, "loss": 0.0599, "step": 13771 }, { "epoch": 1.9302032235459006, "grad_norm": 0.2246396392583847, "learning_rate": 0.00010961014111456589, "loss": 0.0338, "step": 13772 }, { "epoch": 1.930343377715487, "grad_norm": 0.3756606876850128, "learning_rate": 0.00010959579048074622, "loss": 0.0501, "step": 13773 }, { "epoch": 1.9304835318850735, "grad_norm": 0.2372695356607437, "learning_rate": 0.00010958143984692656, "loss": 0.0467, "step": 13774 }, { "epoch": 1.9306236860546602, "grad_norm": 0.20574170351028442, "learning_rate": 0.00010956708921310689, "loss": 0.0221, "step": 13775 }, { "epoch": 1.9307638402242466, "grad_norm": 0.45478641986846924, "learning_rate": 0.00010955273857928725, "loss": 0.0319, "step": 13776 }, { "epoch": 1.9309039943938333, "grad_norm": 0.39161354303359985, "learning_rate": 0.00010953838794546759, "loss": 0.0298, "step": 13777 }, { "epoch": 1.9310441485634198, "grad_norm": 0.3739454746246338, "learning_rate": 0.00010952403731164792, "loss": 0.0221, "step": 13778 }, { "epoch": 1.9311843027330062, "grad_norm": 0.315547376871109, "learning_rate": 0.00010950968667782826, "loss": 0.0253, "step": 13779 }, { "epoch": 1.9313244569025927, "grad_norm": 0.4347879886627197, "learning_rate": 0.00010949533604400861, "loss": 0.0678, "step": 13780 }, { "epoch": 1.9314646110721794, "grad_norm": 0.319716215133667, "learning_rate": 0.00010948098541018894, "loss": 0.0601, "step": 13781 }, { "epoch": 1.931604765241766, "grad_norm": 0.46149200201034546, "learning_rate": 0.00010946663477636928, "loss": 0.0187, "step": 13782 }, { "epoch": 1.9317449194113525, "grad_norm": 0.6473413109779358, "learning_rate": 0.00010945228414254961, "loss": 0.0762, "step": 13783 }, { "epoch": 1.931885073580939, "grad_norm": 0.6601626873016357, "learning_rate": 0.00010943793350872995, "loss": 0.0351, "step": 13784 }, { "epoch": 1.9320252277505254, "grad_norm": 0.7963165640830994, "learning_rate": 0.00010942358287491031, "loss": 0.0157, "step": 13785 }, { "epoch": 1.9321653819201121, "grad_norm": 0.4127991795539856, "learning_rate": 0.00010940923224109064, "loss": 0.0388, "step": 13786 }, { "epoch": 1.9323055360896988, "grad_norm": 0.6259059309959412, "learning_rate": 0.00010939488160727098, "loss": 0.0548, "step": 13787 }, { "epoch": 1.9324456902592853, "grad_norm": 0.31176838278770447, "learning_rate": 0.00010938053097345132, "loss": 0.0493, "step": 13788 }, { "epoch": 1.9325858444288717, "grad_norm": 0.4198831617832184, "learning_rate": 0.00010936618033963165, "loss": 0.0446, "step": 13789 }, { "epoch": 1.9327259985984582, "grad_norm": 0.2061156928539276, "learning_rate": 0.000109351829705812, "loss": 0.0413, "step": 13790 }, { "epoch": 1.9328661527680449, "grad_norm": 0.3667908012866974, "learning_rate": 0.00010933747907199232, "loss": 0.0732, "step": 13791 }, { "epoch": 1.9330063069376315, "grad_norm": 0.15379749238491058, "learning_rate": 0.00010932312843817268, "loss": 0.0379, "step": 13792 }, { "epoch": 1.933146461107218, "grad_norm": 0.4355453550815582, "learning_rate": 0.00010930877780435302, "loss": 0.0285, "step": 13793 }, { "epoch": 1.9332866152768045, "grad_norm": 0.6018511652946472, "learning_rate": 0.00010929442717053335, "loss": 0.1155, "step": 13794 }, { "epoch": 1.933426769446391, "grad_norm": 0.35737666487693787, "learning_rate": 0.00010928007653671369, "loss": 0.0854, "step": 13795 }, { "epoch": 1.9335669236159776, "grad_norm": 0.27383071184158325, "learning_rate": 0.00010926572590289405, "loss": 0.0535, "step": 13796 }, { "epoch": 1.9337070777855643, "grad_norm": 0.46503695845603943, "learning_rate": 0.00010925137526907438, "loss": 0.1117, "step": 13797 }, { "epoch": 1.9338472319551507, "grad_norm": 0.2172660380601883, "learning_rate": 0.00010923702463525472, "loss": 0.0359, "step": 13798 }, { "epoch": 1.9339873861247372, "grad_norm": 0.4931662082672119, "learning_rate": 0.00010922267400143506, "loss": 0.048, "step": 13799 }, { "epoch": 1.9341275402943237, "grad_norm": 0.505435585975647, "learning_rate": 0.00010920832336761539, "loss": 0.059, "step": 13800 }, { "epoch": 1.9342676944639103, "grad_norm": 0.11063232272863388, "learning_rate": 0.00010919397273379574, "loss": 0.0099, "step": 13801 }, { "epoch": 1.9344078486334968, "grad_norm": 0.2406204640865326, "learning_rate": 0.00010917962209997607, "loss": 0.089, "step": 13802 }, { "epoch": 1.9345480028030835, "grad_norm": 0.25254330039024353, "learning_rate": 0.00010916527146615641, "loss": 0.0476, "step": 13803 }, { "epoch": 1.93468815697267, "grad_norm": 0.11822552978992462, "learning_rate": 0.00010915092083233676, "loss": 0.0471, "step": 13804 }, { "epoch": 1.9348283111422564, "grad_norm": 0.2360810488462448, "learning_rate": 0.00010913657019851708, "loss": 0.0337, "step": 13805 }, { "epoch": 1.934968465311843, "grad_norm": 0.11549448221921921, "learning_rate": 0.00010912221956469743, "loss": 0.014, "step": 13806 }, { "epoch": 1.9351086194814295, "grad_norm": 0.2151952087879181, "learning_rate": 0.00010910786893087778, "loss": 0.0172, "step": 13807 }, { "epoch": 1.9352487736510162, "grad_norm": 0.1471911370754242, "learning_rate": 0.00010909351829705811, "loss": 0.0178, "step": 13808 }, { "epoch": 1.9353889278206027, "grad_norm": 0.22184059023857117, "learning_rate": 0.00010907916766323845, "loss": 0.0379, "step": 13809 }, { "epoch": 1.9355290819901891, "grad_norm": 0.21752691268920898, "learning_rate": 0.00010906481702941878, "loss": 0.0628, "step": 13810 }, { "epoch": 1.9356692361597756, "grad_norm": 0.17189821600914001, "learning_rate": 0.00010905046639559912, "loss": 0.0191, "step": 13811 }, { "epoch": 1.9358093903293623, "grad_norm": 0.5695016980171204, "learning_rate": 0.00010903611576177948, "loss": 0.071, "step": 13812 }, { "epoch": 1.935949544498949, "grad_norm": 0.15487004816532135, "learning_rate": 0.00010902176512795981, "loss": 0.0103, "step": 13813 }, { "epoch": 1.9360896986685354, "grad_norm": 0.08888904750347137, "learning_rate": 0.00010900741449414015, "loss": 0.0265, "step": 13814 }, { "epoch": 1.9362298528381219, "grad_norm": 0.2852155566215515, "learning_rate": 0.00010899306386032049, "loss": 0.0424, "step": 13815 }, { "epoch": 1.9363700070077083, "grad_norm": 0.355835497379303, "learning_rate": 0.00010897871322650082, "loss": 0.0324, "step": 13816 }, { "epoch": 1.936510161177295, "grad_norm": 0.23602645099163055, "learning_rate": 0.00010896436259268118, "loss": 0.0827, "step": 13817 }, { "epoch": 1.9366503153468817, "grad_norm": 0.08828553557395935, "learning_rate": 0.0001089500119588615, "loss": 0.0128, "step": 13818 }, { "epoch": 1.9367904695164682, "grad_norm": 0.48711979389190674, "learning_rate": 0.00010893566132504185, "loss": 0.1301, "step": 13819 }, { "epoch": 1.9369306236860546, "grad_norm": 0.33350110054016113, "learning_rate": 0.00010892131069122219, "loss": 0.0712, "step": 13820 }, { "epoch": 1.937070777855641, "grad_norm": 0.17934256792068481, "learning_rate": 0.00010890696005740252, "loss": 0.0381, "step": 13821 }, { "epoch": 1.9372109320252278, "grad_norm": 0.48867979645729065, "learning_rate": 0.00010889260942358286, "loss": 0.0469, "step": 13822 }, { "epoch": 1.9373510861948144, "grad_norm": 1.0411688089370728, "learning_rate": 0.00010887825878976322, "loss": 0.1012, "step": 13823 }, { "epoch": 1.937491240364401, "grad_norm": 0.2419024258852005, "learning_rate": 0.00010886390815594354, "loss": 0.0169, "step": 13824 }, { "epoch": 1.9376313945339874, "grad_norm": 0.6210082173347473, "learning_rate": 0.00010884955752212389, "loss": 0.1389, "step": 13825 }, { "epoch": 1.9377715487035738, "grad_norm": 0.2601433992385864, "learning_rate": 0.00010883520688830422, "loss": 0.0431, "step": 13826 }, { "epoch": 1.9379117028731605, "grad_norm": 0.19944724440574646, "learning_rate": 0.00010882085625448456, "loss": 0.0165, "step": 13827 }, { "epoch": 1.9380518570427472, "grad_norm": 0.3502379357814789, "learning_rate": 0.00010880650562066491, "loss": 0.0274, "step": 13828 }, { "epoch": 1.9381920112123336, "grad_norm": 0.1394815593957901, "learning_rate": 0.00010879215498684524, "loss": 0.0237, "step": 13829 }, { "epoch": 1.93833216538192, "grad_norm": 0.6292027831077576, "learning_rate": 0.00010877780435302558, "loss": 0.1062, "step": 13830 }, { "epoch": 1.9384723195515066, "grad_norm": 0.5791199207305908, "learning_rate": 0.00010876345371920593, "loss": 0.1374, "step": 13831 }, { "epoch": 1.9386124737210932, "grad_norm": 0.2543894648551941, "learning_rate": 0.00010874910308538625, "loss": 0.0341, "step": 13832 }, { "epoch": 1.9387526278906797, "grad_norm": 0.43626680970191956, "learning_rate": 0.00010873475245156661, "loss": 0.069, "step": 13833 }, { "epoch": 1.9388927820602664, "grad_norm": 0.8026769161224365, "learning_rate": 0.00010872040181774694, "loss": 0.1655, "step": 13834 }, { "epoch": 1.9390329362298528, "grad_norm": 1.4881922006607056, "learning_rate": 0.00010870605118392728, "loss": 0.3615, "step": 13835 }, { "epoch": 1.9391730903994393, "grad_norm": 0.2582043707370758, "learning_rate": 0.00010869170055010762, "loss": 0.0547, "step": 13836 }, { "epoch": 1.9393132445690258, "grad_norm": 0.1846296638250351, "learning_rate": 0.00010867734991628795, "loss": 0.0165, "step": 13837 }, { "epoch": 1.9394533987386124, "grad_norm": 0.3504028916358948, "learning_rate": 0.0001086629992824683, "loss": 0.0551, "step": 13838 }, { "epoch": 1.9395935529081991, "grad_norm": 0.3042311668395996, "learning_rate": 0.00010864864864864865, "loss": 0.0574, "step": 13839 }, { "epoch": 1.9397337070777856, "grad_norm": 0.15060807764530182, "learning_rate": 0.00010863429801482898, "loss": 0.0237, "step": 13840 }, { "epoch": 1.939873861247372, "grad_norm": 0.3377339243888855, "learning_rate": 0.00010861994738100932, "loss": 0.0243, "step": 13841 }, { "epoch": 1.9400140154169585, "grad_norm": 0.47248032689094543, "learning_rate": 0.00010860559674718966, "loss": 0.0276, "step": 13842 }, { "epoch": 1.9401541695865452, "grad_norm": 0.2184755951166153, "learning_rate": 0.00010859124611336999, "loss": 0.0159, "step": 13843 }, { "epoch": 1.9402943237561319, "grad_norm": 0.1103387251496315, "learning_rate": 0.00010857689547955035, "loss": 0.0278, "step": 13844 }, { "epoch": 1.9404344779257183, "grad_norm": 0.3250039517879486, "learning_rate": 0.00010856254484573067, "loss": 0.0296, "step": 13845 }, { "epoch": 1.9405746320953048, "grad_norm": 0.30050671100616455, "learning_rate": 0.00010854819421191102, "loss": 0.0476, "step": 13846 }, { "epoch": 1.9407147862648912, "grad_norm": 0.3676130771636963, "learning_rate": 0.00010853384357809136, "loss": 0.0519, "step": 13847 }, { "epoch": 1.940854940434478, "grad_norm": 0.5416865944862366, "learning_rate": 0.00010851949294427169, "loss": 0.0487, "step": 13848 }, { "epoch": 1.9409950946040646, "grad_norm": 0.3596961200237274, "learning_rate": 0.00010850514231045204, "loss": 0.0525, "step": 13849 }, { "epoch": 1.941135248773651, "grad_norm": 0.1657838225364685, "learning_rate": 0.00010849079167663239, "loss": 0.0116, "step": 13850 }, { "epoch": 1.9412754029432375, "grad_norm": 0.11259095370769501, "learning_rate": 0.00010847644104281271, "loss": 0.0181, "step": 13851 }, { "epoch": 1.941415557112824, "grad_norm": 0.11800874024629593, "learning_rate": 0.00010846209040899306, "loss": 0.011, "step": 13852 }, { "epoch": 1.9415557112824107, "grad_norm": 0.9125251173973083, "learning_rate": 0.00010844773977517338, "loss": 0.0403, "step": 13853 }, { "epoch": 1.9416958654519973, "grad_norm": 0.6194085478782654, "learning_rate": 0.00010843338914135373, "loss": 0.0887, "step": 13854 }, { "epoch": 1.9418360196215838, "grad_norm": 0.20562759041786194, "learning_rate": 0.00010841903850753408, "loss": 0.0334, "step": 13855 }, { "epoch": 1.9419761737911703, "grad_norm": 0.381644070148468, "learning_rate": 0.00010840468787371441, "loss": 0.1484, "step": 13856 }, { "epoch": 1.9421163279607567, "grad_norm": 0.188826784491539, "learning_rate": 0.00010839033723989475, "loss": 0.0472, "step": 13857 }, { "epoch": 1.9422564821303434, "grad_norm": 0.16431394219398499, "learning_rate": 0.0001083759866060751, "loss": 0.0337, "step": 13858 }, { "epoch": 1.9423966362999299, "grad_norm": 0.9686247110366821, "learning_rate": 0.00010836163597225542, "loss": 0.0764, "step": 13859 }, { "epoch": 1.9425367904695165, "grad_norm": 0.3906697928905487, "learning_rate": 0.00010834728533843578, "loss": 0.0448, "step": 13860 }, { "epoch": 1.942676944639103, "grad_norm": 0.5008538961410522, "learning_rate": 0.00010833293470461611, "loss": 0.0672, "step": 13861 }, { "epoch": 1.9428170988086895, "grad_norm": 0.3694528639316559, "learning_rate": 0.00010831858407079645, "loss": 0.054, "step": 13862 }, { "epoch": 1.9429572529782762, "grad_norm": 0.29367485642433167, "learning_rate": 0.00010830423343697679, "loss": 0.0182, "step": 13863 }, { "epoch": 1.9430974071478626, "grad_norm": 0.18175604939460754, "learning_rate": 0.00010828988280315712, "loss": 0.0274, "step": 13864 }, { "epoch": 1.9432375613174493, "grad_norm": 0.16520561277866364, "learning_rate": 0.00010827553216933748, "loss": 0.0208, "step": 13865 }, { "epoch": 1.9433777154870358, "grad_norm": 0.5899354219436646, "learning_rate": 0.00010826118153551782, "loss": 0.0953, "step": 13866 }, { "epoch": 1.9435178696566222, "grad_norm": 0.194908007979393, "learning_rate": 0.00010824683090169815, "loss": 0.0356, "step": 13867 }, { "epoch": 1.9436580238262087, "grad_norm": 0.08533184975385666, "learning_rate": 0.00010823248026787849, "loss": 0.0317, "step": 13868 }, { "epoch": 1.9437981779957954, "grad_norm": 0.22282400727272034, "learning_rate": 0.00010821812963405882, "loss": 0.0406, "step": 13869 }, { "epoch": 1.943938332165382, "grad_norm": 0.179804265499115, "learning_rate": 0.00010820377900023916, "loss": 0.0153, "step": 13870 }, { "epoch": 1.9440784863349685, "grad_norm": 0.1297025829553604, "learning_rate": 0.00010818942836641952, "loss": 0.0187, "step": 13871 }, { "epoch": 1.944218640504555, "grad_norm": 0.11439014971256256, "learning_rate": 0.00010817507773259984, "loss": 0.0148, "step": 13872 }, { "epoch": 1.9443587946741414, "grad_norm": 0.7564060688018799, "learning_rate": 0.00010816072709878019, "loss": 0.067, "step": 13873 }, { "epoch": 1.944498948843728, "grad_norm": 0.17752495408058167, "learning_rate": 0.00010814637646496053, "loss": 0.0152, "step": 13874 }, { "epoch": 1.9446391030133148, "grad_norm": 0.18348746001720428, "learning_rate": 0.00010813202583114086, "loss": 0.0124, "step": 13875 }, { "epoch": 1.9447792571829012, "grad_norm": 0.37344425916671753, "learning_rate": 0.00010811767519732121, "loss": 0.0463, "step": 13876 }, { "epoch": 1.9449194113524877, "grad_norm": 0.2197548747062683, "learning_rate": 0.00010810332456350155, "loss": 0.0185, "step": 13877 }, { "epoch": 1.9450595655220742, "grad_norm": 0.1182716116309166, "learning_rate": 0.00010808897392968188, "loss": 0.0099, "step": 13878 }, { "epoch": 1.9451997196916608, "grad_norm": 0.8102246522903442, "learning_rate": 0.00010807462329586223, "loss": 0.0724, "step": 13879 }, { "epoch": 1.9453398738612475, "grad_norm": 0.48709583282470703, "learning_rate": 0.00010806027266204255, "loss": 0.0202, "step": 13880 }, { "epoch": 1.945480028030834, "grad_norm": 0.5163368582725525, "learning_rate": 0.00010804592202822291, "loss": 0.0427, "step": 13881 }, { "epoch": 1.9456201822004204, "grad_norm": 1.0289500951766968, "learning_rate": 0.00010803157139440325, "loss": 0.0403, "step": 13882 }, { "epoch": 1.945760336370007, "grad_norm": 1.6088106632232666, "learning_rate": 0.00010801722076058358, "loss": 0.2601, "step": 13883 }, { "epoch": 1.9459004905395936, "grad_norm": 0.749770998954773, "learning_rate": 0.00010800287012676392, "loss": 0.0889, "step": 13884 }, { "epoch": 1.9460406447091803, "grad_norm": 1.7017935514450073, "learning_rate": 0.00010798851949294426, "loss": 0.2107, "step": 13885 }, { "epoch": 1.9461807988787667, "grad_norm": 0.5391717553138733, "learning_rate": 0.00010797416885912459, "loss": 0.0717, "step": 13886 }, { "epoch": 1.9463209530483532, "grad_norm": 0.21586287021636963, "learning_rate": 0.00010795981822530495, "loss": 0.0241, "step": 13887 }, { "epoch": 1.9464611072179396, "grad_norm": 0.23221690952777863, "learning_rate": 0.00010794546759148528, "loss": 0.0254, "step": 13888 }, { "epoch": 1.9466012613875263, "grad_norm": 0.6500512957572937, "learning_rate": 0.00010793111695766562, "loss": 0.0439, "step": 13889 }, { "epoch": 1.9467414155571128, "grad_norm": 0.25360721349716187, "learning_rate": 0.00010791676632384596, "loss": 0.0482, "step": 13890 }, { "epoch": 1.9468815697266995, "grad_norm": 0.46393564343452454, "learning_rate": 0.00010790241569002629, "loss": 0.0297, "step": 13891 }, { "epoch": 1.947021723896286, "grad_norm": 0.22118176519870758, "learning_rate": 0.00010788806505620665, "loss": 0.018, "step": 13892 }, { "epoch": 1.9471618780658724, "grad_norm": 0.6282756924629211, "learning_rate": 0.00010787371442238699, "loss": 0.0809, "step": 13893 }, { "epoch": 1.947302032235459, "grad_norm": 0.3335075080394745, "learning_rate": 0.00010785936378856732, "loss": 0.0357, "step": 13894 }, { "epoch": 1.9474421864050455, "grad_norm": 0.09476706385612488, "learning_rate": 0.00010784501315474766, "loss": 0.0141, "step": 13895 }, { "epoch": 1.9475823405746322, "grad_norm": 0.24879112839698792, "learning_rate": 0.00010783066252092799, "loss": 0.0437, "step": 13896 }, { "epoch": 1.9477224947442187, "grad_norm": 0.2767227590084076, "learning_rate": 0.00010781631188710834, "loss": 0.0426, "step": 13897 }, { "epoch": 1.9478626489138051, "grad_norm": 0.3350938856601715, "learning_rate": 0.00010780196125328868, "loss": 0.0444, "step": 13898 }, { "epoch": 1.9480028030833916, "grad_norm": 0.2501748204231262, "learning_rate": 0.00010778761061946901, "loss": 0.0387, "step": 13899 }, { "epoch": 1.9481429572529783, "grad_norm": 0.1859581470489502, "learning_rate": 0.00010777325998564936, "loss": 0.0202, "step": 13900 }, { "epoch": 1.948283111422565, "grad_norm": 0.5712987780570984, "learning_rate": 0.0001077589093518297, "loss": 0.049, "step": 13901 }, { "epoch": 1.9484232655921514, "grad_norm": 0.3544265925884247, "learning_rate": 0.00010774455871801003, "loss": 0.0345, "step": 13902 }, { "epoch": 1.9485634197617379, "grad_norm": 0.21755433082580566, "learning_rate": 0.00010773020808419038, "loss": 0.0378, "step": 13903 }, { "epoch": 1.9487035739313243, "grad_norm": 0.29317066073417664, "learning_rate": 0.00010771585745037071, "loss": 0.0343, "step": 13904 }, { "epoch": 1.948843728100911, "grad_norm": 0.36446142196655273, "learning_rate": 0.00010770150681655105, "loss": 0.0468, "step": 13905 }, { "epoch": 1.9489838822704977, "grad_norm": 0.29610416293144226, "learning_rate": 0.0001076871561827314, "loss": 0.0319, "step": 13906 }, { "epoch": 1.9491240364400841, "grad_norm": 0.4245496988296509, "learning_rate": 0.00010767280554891172, "loss": 0.0499, "step": 13907 }, { "epoch": 1.9492641906096706, "grad_norm": 0.6212261319160461, "learning_rate": 0.00010765845491509208, "loss": 0.063, "step": 13908 }, { "epoch": 1.949404344779257, "grad_norm": 0.19561579823493958, "learning_rate": 0.00010764410428127242, "loss": 0.0469, "step": 13909 }, { "epoch": 1.9495444989488437, "grad_norm": 0.24210990965366364, "learning_rate": 0.00010762975364745275, "loss": 0.0574, "step": 13910 }, { "epoch": 1.9496846531184304, "grad_norm": 0.4476587474346161, "learning_rate": 0.00010761540301363309, "loss": 0.026, "step": 13911 }, { "epoch": 1.9498248072880169, "grad_norm": 0.2300807535648346, "learning_rate": 0.00010760105237981345, "loss": 0.0641, "step": 13912 }, { "epoch": 1.9499649614576033, "grad_norm": 0.18532782793045044, "learning_rate": 0.00010758670174599378, "loss": 0.049, "step": 13913 }, { "epoch": 1.9501051156271898, "grad_norm": 0.17468349635601044, "learning_rate": 0.00010757235111217412, "loss": 0.0349, "step": 13914 }, { "epoch": 1.9502452697967765, "grad_norm": 0.22240892052650452, "learning_rate": 0.00010755800047835445, "loss": 0.0599, "step": 13915 }, { "epoch": 1.9503854239663632, "grad_norm": 0.2816687822341919, "learning_rate": 0.00010754364984453479, "loss": 0.0578, "step": 13916 }, { "epoch": 1.9505255781359496, "grad_norm": 0.3398972749710083, "learning_rate": 0.00010752929921071513, "loss": 0.1, "step": 13917 }, { "epoch": 1.950665732305536, "grad_norm": 0.25512760877609253, "learning_rate": 0.00010751494857689546, "loss": 0.016, "step": 13918 }, { "epoch": 1.9508058864751225, "grad_norm": 0.18065519630908966, "learning_rate": 0.00010750059794307581, "loss": 0.0249, "step": 13919 }, { "epoch": 1.9509460406447092, "grad_norm": 0.36691734194755554, "learning_rate": 0.00010748624730925616, "loss": 0.0273, "step": 13920 }, { "epoch": 1.9510861948142957, "grad_norm": 0.061061400920152664, "learning_rate": 0.00010747189667543649, "loss": 0.0049, "step": 13921 }, { "epoch": 1.9512263489838824, "grad_norm": 0.31019407510757446, "learning_rate": 0.00010745754604161683, "loss": 0.0385, "step": 13922 }, { "epoch": 1.9513665031534688, "grad_norm": 0.2908509075641632, "learning_rate": 0.00010744319540779716, "loss": 0.0921, "step": 13923 }, { "epoch": 1.9515066573230553, "grad_norm": 0.30684125423431396, "learning_rate": 0.00010742884477397751, "loss": 0.0318, "step": 13924 }, { "epoch": 1.9516468114926417, "grad_norm": 0.15897582471370697, "learning_rate": 0.00010741449414015785, "loss": 0.0238, "step": 13925 }, { "epoch": 1.9517869656622284, "grad_norm": 0.2699630558490753, "learning_rate": 0.00010740014350633818, "loss": 0.069, "step": 13926 }, { "epoch": 1.951927119831815, "grad_norm": 2.184220790863037, "learning_rate": 0.00010738579287251852, "loss": 0.0643, "step": 13927 }, { "epoch": 1.9520672740014016, "grad_norm": 0.27847978472709656, "learning_rate": 0.00010737144223869888, "loss": 0.0424, "step": 13928 }, { "epoch": 1.952207428170988, "grad_norm": 0.5898178219795227, "learning_rate": 0.00010735709160487921, "loss": 0.0426, "step": 13929 }, { "epoch": 1.9523475823405745, "grad_norm": 0.9590038657188416, "learning_rate": 0.00010734274097105955, "loss": 0.1187, "step": 13930 }, { "epoch": 1.9524877365101612, "grad_norm": 0.3945847153663635, "learning_rate": 0.00010732839033723988, "loss": 0.0351, "step": 13931 }, { "epoch": 1.9526278906797478, "grad_norm": 1.1869257688522339, "learning_rate": 0.00010731403970342022, "loss": 0.179, "step": 13932 }, { "epoch": 1.9527680448493343, "grad_norm": 2.1791369915008545, "learning_rate": 0.00010729968906960056, "loss": 0.1583, "step": 13933 }, { "epoch": 1.9529081990189208, "grad_norm": 0.1371636986732483, "learning_rate": 0.00010728533843578089, "loss": 0.0067, "step": 13934 }, { "epoch": 1.9530483531885072, "grad_norm": 2.6841821670532227, "learning_rate": 0.00010727098780196125, "loss": 0.3381, "step": 13935 }, { "epoch": 1.953188507358094, "grad_norm": 0.11485984176397324, "learning_rate": 0.00010725663716814159, "loss": 0.0203, "step": 13936 }, { "epoch": 1.9533286615276806, "grad_norm": 0.7508218288421631, "learning_rate": 0.00010724228653432192, "loss": 0.0697, "step": 13937 }, { "epoch": 1.953468815697267, "grad_norm": 0.211881622672081, "learning_rate": 0.00010722793590050226, "loss": 0.0559, "step": 13938 }, { "epoch": 1.9536089698668535, "grad_norm": 0.12432222813367844, "learning_rate": 0.00010721358526668259, "loss": 0.0316, "step": 13939 }, { "epoch": 1.95374912403644, "grad_norm": 0.2746027112007141, "learning_rate": 0.00010719923463286294, "loss": 0.0131, "step": 13940 }, { "epoch": 1.9538892782060266, "grad_norm": 0.3552861511707306, "learning_rate": 0.00010718488399904329, "loss": 0.0408, "step": 13941 }, { "epoch": 1.9540294323756133, "grad_norm": 0.4911704957485199, "learning_rate": 0.00010717053336522362, "loss": 0.0735, "step": 13942 }, { "epoch": 1.9541695865451998, "grad_norm": 0.5915058851242065, "learning_rate": 0.00010715618273140396, "loss": 0.0907, "step": 13943 }, { "epoch": 1.9543097407147862, "grad_norm": 0.08755096048116684, "learning_rate": 0.00010714183209758431, "loss": 0.0105, "step": 13944 }, { "epoch": 1.9544498948843727, "grad_norm": 0.4453171491622925, "learning_rate": 0.00010712748146376464, "loss": 0.0501, "step": 13945 }, { "epoch": 1.9545900490539594, "grad_norm": 0.4702605903148651, "learning_rate": 0.00010711313082994498, "loss": 0.0249, "step": 13946 }, { "epoch": 1.9547302032235458, "grad_norm": 0.2845425605773926, "learning_rate": 0.00010709878019612533, "loss": 0.0333, "step": 13947 }, { "epoch": 1.9548703573931325, "grad_norm": 0.09130755811929703, "learning_rate": 0.00010708442956230565, "loss": 0.0128, "step": 13948 }, { "epoch": 1.955010511562719, "grad_norm": 0.12655296921730042, "learning_rate": 0.000107070078928486, "loss": 0.0131, "step": 13949 }, { "epoch": 1.9551506657323054, "grad_norm": 0.4547109603881836, "learning_rate": 0.00010705572829466633, "loss": 0.0392, "step": 13950 }, { "epoch": 1.9552908199018921, "grad_norm": 0.1656562089920044, "learning_rate": 0.00010704137766084668, "loss": 0.0682, "step": 13951 }, { "epoch": 1.9554309740714786, "grad_norm": 0.3922971487045288, "learning_rate": 0.00010702702702702702, "loss": 0.0484, "step": 13952 }, { "epoch": 1.9555711282410653, "grad_norm": 0.23750469088554382, "learning_rate": 0.00010701267639320735, "loss": 0.0517, "step": 13953 }, { "epoch": 1.9557112824106517, "grad_norm": 0.25085195899009705, "learning_rate": 0.0001069983257593877, "loss": 0.049, "step": 13954 }, { "epoch": 1.9558514365802382, "grad_norm": 0.49034756422042847, "learning_rate": 0.00010698397512556805, "loss": 0.0161, "step": 13955 }, { "epoch": 1.9559915907498246, "grad_norm": 0.15123486518859863, "learning_rate": 0.00010696962449174838, "loss": 0.0427, "step": 13956 }, { "epoch": 1.9561317449194113, "grad_norm": 0.43760210275650024, "learning_rate": 0.00010695527385792872, "loss": 0.0825, "step": 13957 }, { "epoch": 1.956271899088998, "grad_norm": 0.11341866850852966, "learning_rate": 0.00010694092322410905, "loss": 0.0199, "step": 13958 }, { "epoch": 1.9564120532585845, "grad_norm": 0.48485666513442993, "learning_rate": 0.00010692657259028939, "loss": 0.0445, "step": 13959 }, { "epoch": 1.956552207428171, "grad_norm": 0.19133444130420685, "learning_rate": 0.00010691222195646975, "loss": 0.0452, "step": 13960 }, { "epoch": 1.9566923615977574, "grad_norm": 0.18332277238368988, "learning_rate": 0.00010689787132265008, "loss": 0.0379, "step": 13961 }, { "epoch": 1.956832515767344, "grad_norm": 0.5678544640541077, "learning_rate": 0.00010688352068883042, "loss": 0.12, "step": 13962 }, { "epoch": 1.9569726699369308, "grad_norm": 0.17859381437301636, "learning_rate": 0.00010686917005501076, "loss": 0.0176, "step": 13963 }, { "epoch": 1.9571128241065172, "grad_norm": 0.12010553479194641, "learning_rate": 0.00010685481942119109, "loss": 0.0151, "step": 13964 }, { "epoch": 1.9572529782761037, "grad_norm": 0.22215235233306885, "learning_rate": 0.00010684046878737143, "loss": 0.0355, "step": 13965 }, { "epoch": 1.9573931324456901, "grad_norm": 0.42693641781806946, "learning_rate": 0.00010682611815355176, "loss": 0.0793, "step": 13966 }, { "epoch": 1.9575332866152768, "grad_norm": 0.3330225646495819, "learning_rate": 0.00010681176751973211, "loss": 0.0533, "step": 13967 }, { "epoch": 1.9576734407848635, "grad_norm": 0.27477332949638367, "learning_rate": 0.00010679741688591246, "loss": 0.0213, "step": 13968 }, { "epoch": 1.95781359495445, "grad_norm": 0.22648563981056213, "learning_rate": 0.00010678306625209278, "loss": 0.042, "step": 13969 }, { "epoch": 1.9579537491240364, "grad_norm": 0.23525995016098022, "learning_rate": 0.00010676871561827313, "loss": 0.0189, "step": 13970 }, { "epoch": 1.9580939032936229, "grad_norm": 0.22855354845523834, "learning_rate": 0.00010675436498445348, "loss": 0.0298, "step": 13971 }, { "epoch": 1.9582340574632096, "grad_norm": 0.7233769297599792, "learning_rate": 0.00010674001435063381, "loss": 0.1841, "step": 13972 }, { "epoch": 1.9583742116327962, "grad_norm": 0.12312700599431992, "learning_rate": 0.00010672566371681415, "loss": 0.0065, "step": 13973 }, { "epoch": 1.9585143658023827, "grad_norm": 0.7623341679573059, "learning_rate": 0.00010671131308299448, "loss": 0.0476, "step": 13974 }, { "epoch": 1.9586545199719692, "grad_norm": 0.484224796295166, "learning_rate": 0.00010669696244917482, "loss": 0.0998, "step": 13975 }, { "epoch": 1.9587946741415556, "grad_norm": 0.8296681642532349, "learning_rate": 0.00010668261181535518, "loss": 0.0459, "step": 13976 }, { "epoch": 1.9589348283111423, "grad_norm": 0.12233518064022064, "learning_rate": 0.00010666826118153551, "loss": 0.0157, "step": 13977 }, { "epoch": 1.9590749824807288, "grad_norm": 0.029155075550079346, "learning_rate": 0.00010665391054771585, "loss": 0.0019, "step": 13978 }, { "epoch": 1.9592151366503154, "grad_norm": 0.20978274941444397, "learning_rate": 0.00010663955991389619, "loss": 0.0165, "step": 13979 }, { "epoch": 1.959355290819902, "grad_norm": 0.5570623874664307, "learning_rate": 0.00010662520928007652, "loss": 0.188, "step": 13980 }, { "epoch": 1.9594954449894884, "grad_norm": 0.4756470322608948, "learning_rate": 0.00010661085864625686, "loss": 0.042, "step": 13981 }, { "epoch": 1.959635599159075, "grad_norm": 0.17194361984729767, "learning_rate": 0.00010659650801243719, "loss": 0.0291, "step": 13982 }, { "epoch": 1.9597757533286615, "grad_norm": 1.010820746421814, "learning_rate": 0.00010658215737861755, "loss": 0.1147, "step": 13983 }, { "epoch": 1.9599159074982482, "grad_norm": 0.3724798262119293, "learning_rate": 0.00010656780674479789, "loss": 0.041, "step": 13984 }, { "epoch": 1.9600560616678346, "grad_norm": 0.9576537013053894, "learning_rate": 0.00010655345611097822, "loss": 0.0944, "step": 13985 }, { "epoch": 1.960196215837421, "grad_norm": 0.1325252801179886, "learning_rate": 0.00010653910547715856, "loss": 0.0217, "step": 13986 }, { "epoch": 1.9603363700070076, "grad_norm": 0.2720743417739868, "learning_rate": 0.00010652475484333892, "loss": 0.0436, "step": 13987 }, { "epoch": 1.9604765241765942, "grad_norm": 0.6176046133041382, "learning_rate": 0.00010651040420951924, "loss": 0.0949, "step": 13988 }, { "epoch": 1.960616678346181, "grad_norm": 0.311335027217865, "learning_rate": 0.00010649605357569959, "loss": 0.0793, "step": 13989 }, { "epoch": 1.9607568325157674, "grad_norm": 0.3555140495300293, "learning_rate": 0.00010648170294187993, "loss": 0.0415, "step": 13990 }, { "epoch": 1.9608969866853538, "grad_norm": 0.4476960301399231, "learning_rate": 0.00010646735230806026, "loss": 0.0423, "step": 13991 }, { "epoch": 1.9610371408549403, "grad_norm": 0.49478137493133545, "learning_rate": 0.00010645300167424061, "loss": 0.0595, "step": 13992 }, { "epoch": 1.961177295024527, "grad_norm": 0.43799328804016113, "learning_rate": 0.00010643865104042094, "loss": 0.1071, "step": 13993 }, { "epoch": 1.9613174491941137, "grad_norm": 0.4966183304786682, "learning_rate": 0.00010642430040660128, "loss": 0.1129, "step": 13994 }, { "epoch": 1.9614576033637001, "grad_norm": 0.33611536026000977, "learning_rate": 0.00010640994977278163, "loss": 0.0442, "step": 13995 }, { "epoch": 1.9615977575332866, "grad_norm": 0.27683529257774353, "learning_rate": 0.00010639559913896195, "loss": 0.0546, "step": 13996 }, { "epoch": 1.961737911702873, "grad_norm": 0.15661093592643738, "learning_rate": 0.0001063812485051423, "loss": 0.0289, "step": 13997 }, { "epoch": 1.9618780658724597, "grad_norm": 0.16219112277030945, "learning_rate": 0.00010636689787132265, "loss": 0.039, "step": 13998 }, { "epoch": 1.9620182200420464, "grad_norm": 0.22393590211868286, "learning_rate": 0.00010635254723750298, "loss": 0.0355, "step": 13999 }, { "epoch": 1.9621583742116329, "grad_norm": 0.4806915819644928, "learning_rate": 0.00010633819660368332, "loss": 0.0757, "step": 14000 }, { "epoch": 1.9622985283812193, "grad_norm": 0.2986256182193756, "learning_rate": 0.00010632384596986365, "loss": 0.0324, "step": 14001 }, { "epoch": 1.9624386825508058, "grad_norm": 0.20005184412002563, "learning_rate": 0.000106309495336044, "loss": 0.0834, "step": 14002 }, { "epoch": 1.9625788367203925, "grad_norm": 0.19226433336734772, "learning_rate": 0.00010629514470222435, "loss": 0.0302, "step": 14003 }, { "epoch": 1.9627189908899791, "grad_norm": 0.15790244936943054, "learning_rate": 0.00010628079406840468, "loss": 0.0353, "step": 14004 }, { "epoch": 1.9628591450595656, "grad_norm": 0.25119757652282715, "learning_rate": 0.00010626644343458502, "loss": 0.0287, "step": 14005 }, { "epoch": 1.962999299229152, "grad_norm": 0.20292729139328003, "learning_rate": 0.00010625209280076536, "loss": 0.0289, "step": 14006 }, { "epoch": 1.9631394533987385, "grad_norm": 0.19640420377254486, "learning_rate": 0.00010623774216694569, "loss": 0.0327, "step": 14007 }, { "epoch": 1.9632796075683252, "grad_norm": 0.20290902256965637, "learning_rate": 0.00010622339153312605, "loss": 0.0412, "step": 14008 }, { "epoch": 1.9634197617379117, "grad_norm": 0.3083365857601166, "learning_rate": 0.00010620904089930637, "loss": 0.0434, "step": 14009 }, { "epoch": 1.9635599159074983, "grad_norm": 0.38759079575538635, "learning_rate": 0.00010619469026548672, "loss": 0.0354, "step": 14010 }, { "epoch": 1.9637000700770848, "grad_norm": 0.20216983556747437, "learning_rate": 0.00010618033963166706, "loss": 0.0542, "step": 14011 }, { "epoch": 1.9638402242466713, "grad_norm": 0.20141395926475525, "learning_rate": 0.00010616598899784739, "loss": 0.0393, "step": 14012 }, { "epoch": 1.9639803784162577, "grad_norm": 0.20983704924583435, "learning_rate": 0.00010615163836402773, "loss": 0.0142, "step": 14013 }, { "epoch": 1.9641205325858444, "grad_norm": 0.26330599188804626, "learning_rate": 0.00010613728773020809, "loss": 0.0542, "step": 14014 }, { "epoch": 1.964260686755431, "grad_norm": 0.1540800929069519, "learning_rate": 0.00010612293709638841, "loss": 0.0411, "step": 14015 }, { "epoch": 1.9644008409250175, "grad_norm": 0.3687482178211212, "learning_rate": 0.00010610858646256876, "loss": 0.0716, "step": 14016 }, { "epoch": 1.964540995094604, "grad_norm": 0.24748332798480988, "learning_rate": 0.00010609423582874908, "loss": 0.0309, "step": 14017 }, { "epoch": 1.9646811492641905, "grad_norm": 0.3238883316516876, "learning_rate": 0.00010607988519492943, "loss": 0.088, "step": 14018 }, { "epoch": 1.9648213034337771, "grad_norm": 0.3410910665988922, "learning_rate": 0.00010606553456110978, "loss": 0.0896, "step": 14019 }, { "epoch": 1.9649614576033638, "grad_norm": 0.1531667560338974, "learning_rate": 0.00010605118392729011, "loss": 0.023, "step": 14020 }, { "epoch": 1.9651016117729503, "grad_norm": 0.24498090147972107, "learning_rate": 0.00010603683329347045, "loss": 0.0586, "step": 14021 }, { "epoch": 1.9652417659425367, "grad_norm": 0.11039438843727112, "learning_rate": 0.0001060224826596508, "loss": 0.0131, "step": 14022 }, { "epoch": 1.9653819201121232, "grad_norm": 0.4878736734390259, "learning_rate": 0.00010600813202583112, "loss": 0.0508, "step": 14023 }, { "epoch": 1.9655220742817099, "grad_norm": 0.270585298538208, "learning_rate": 0.00010599378139201148, "loss": 0.0605, "step": 14024 }, { "epoch": 1.9656622284512966, "grad_norm": 0.4651467800140381, "learning_rate": 0.00010597943075819182, "loss": 0.0542, "step": 14025 }, { "epoch": 1.965802382620883, "grad_norm": 0.4888472259044647, "learning_rate": 0.00010596508012437215, "loss": 0.0455, "step": 14026 }, { "epoch": 1.9659425367904695, "grad_norm": 0.23348116874694824, "learning_rate": 0.00010595072949055249, "loss": 0.0358, "step": 14027 }, { "epoch": 1.966082690960056, "grad_norm": 0.505035400390625, "learning_rate": 0.00010593637885673282, "loss": 0.0683, "step": 14028 }, { "epoch": 1.9662228451296426, "grad_norm": 0.5941540598869324, "learning_rate": 0.00010592202822291316, "loss": 0.0607, "step": 14029 }, { "epoch": 1.9663629992992293, "grad_norm": 1.0807098150253296, "learning_rate": 0.00010590767758909352, "loss": 0.055, "step": 14030 }, { "epoch": 1.9665031534688158, "grad_norm": 0.4120032787322998, "learning_rate": 0.00010589332695527385, "loss": 0.0313, "step": 14031 }, { "epoch": 1.9666433076384022, "grad_norm": 0.519198477268219, "learning_rate": 0.00010587897632145419, "loss": 0.0519, "step": 14032 }, { "epoch": 1.9667834618079887, "grad_norm": 0.6556109189987183, "learning_rate": 0.00010586462568763453, "loss": 0.0692, "step": 14033 }, { "epoch": 1.9669236159775754, "grad_norm": 1.7057074308395386, "learning_rate": 0.00010585027505381486, "loss": 0.2809, "step": 14034 }, { "epoch": 1.9670637701471618, "grad_norm": 1.1197335720062256, "learning_rate": 0.00010583592441999522, "loss": 0.0688, "step": 14035 }, { "epoch": 1.9672039243167485, "grad_norm": 0.16459771990776062, "learning_rate": 0.00010582157378617554, "loss": 0.0272, "step": 14036 }, { "epoch": 1.967344078486335, "grad_norm": 0.20394645631313324, "learning_rate": 0.00010580722315235589, "loss": 0.0428, "step": 14037 }, { "epoch": 1.9674842326559214, "grad_norm": 0.31463733315467834, "learning_rate": 0.00010579287251853623, "loss": 0.1215, "step": 14038 }, { "epoch": 1.967624386825508, "grad_norm": 0.3070388436317444, "learning_rate": 0.00010577852188471656, "loss": 0.1286, "step": 14039 }, { "epoch": 1.9677645409950946, "grad_norm": 0.34140148758888245, "learning_rate": 0.00010576417125089691, "loss": 0.0406, "step": 14040 }, { "epoch": 1.9679046951646813, "grad_norm": 0.16881799697875977, "learning_rate": 0.00010574982061707725, "loss": 0.0208, "step": 14041 }, { "epoch": 1.9680448493342677, "grad_norm": 0.1559797078371048, "learning_rate": 0.00010573546998325758, "loss": 0.0175, "step": 14042 }, { "epoch": 1.9681850035038542, "grad_norm": 0.567139208316803, "learning_rate": 0.00010572111934943793, "loss": 0.0233, "step": 14043 }, { "epoch": 1.9683251576734406, "grad_norm": 0.08762841671705246, "learning_rate": 0.00010570676871561825, "loss": 0.014, "step": 14044 }, { "epoch": 1.9684653118430273, "grad_norm": 0.1431482434272766, "learning_rate": 0.0001056924180817986, "loss": 0.0149, "step": 14045 }, { "epoch": 1.968605466012614, "grad_norm": 0.20340314507484436, "learning_rate": 0.00010567806744797895, "loss": 0.0328, "step": 14046 }, { "epoch": 1.9687456201822005, "grad_norm": 0.504148542881012, "learning_rate": 0.00010566371681415928, "loss": 0.0497, "step": 14047 }, { "epoch": 1.968885774351787, "grad_norm": 0.1360362470149994, "learning_rate": 0.00010564936618033962, "loss": 0.0257, "step": 14048 }, { "epoch": 1.9690259285213734, "grad_norm": 0.5313668251037598, "learning_rate": 0.00010563501554651996, "loss": 0.0756, "step": 14049 }, { "epoch": 1.96916608269096, "grad_norm": 0.2683827877044678, "learning_rate": 0.00010562066491270029, "loss": 0.0511, "step": 14050 }, { "epoch": 1.9693062368605467, "grad_norm": 0.32181647419929504, "learning_rate": 0.00010560631427888065, "loss": 0.0396, "step": 14051 }, { "epoch": 1.9694463910301332, "grad_norm": 0.2120240032672882, "learning_rate": 0.00010559196364506098, "loss": 0.0154, "step": 14052 }, { "epoch": 1.9695865451997197, "grad_norm": 0.4073394238948822, "learning_rate": 0.00010557761301124132, "loss": 0.0753, "step": 14053 }, { "epoch": 1.969726699369306, "grad_norm": 0.16407668590545654, "learning_rate": 0.00010556326237742166, "loss": 0.0392, "step": 14054 }, { "epoch": 1.9698668535388928, "grad_norm": 0.4192562401294708, "learning_rate": 0.00010554891174360199, "loss": 0.0356, "step": 14055 }, { "epoch": 1.9700070077084795, "grad_norm": 0.16742397844791412, "learning_rate": 0.00010553456110978235, "loss": 0.0675, "step": 14056 }, { "epoch": 1.970147161878066, "grad_norm": 0.14298264682292938, "learning_rate": 0.00010552021047596269, "loss": 0.0501, "step": 14057 }, { "epoch": 1.9702873160476524, "grad_norm": 0.6647476553916931, "learning_rate": 0.00010550585984214302, "loss": 0.0715, "step": 14058 }, { "epoch": 1.9704274702172389, "grad_norm": 0.2449726015329361, "learning_rate": 0.00010549150920832336, "loss": 0.0394, "step": 14059 }, { "epoch": 1.9705676243868255, "grad_norm": 0.3318365514278412, "learning_rate": 0.0001054771585745037, "loss": 0.0409, "step": 14060 }, { "epoch": 1.9707077785564122, "grad_norm": 0.09152483940124512, "learning_rate": 0.00010546280794068403, "loss": 0.0149, "step": 14061 }, { "epoch": 1.9708479327259987, "grad_norm": 0.29178494215011597, "learning_rate": 0.00010544845730686438, "loss": 0.0379, "step": 14062 }, { "epoch": 1.9709880868955851, "grad_norm": 0.4171029031276703, "learning_rate": 0.00010543410667304471, "loss": 0.1133, "step": 14063 }, { "epoch": 1.9711282410651716, "grad_norm": 0.18766044080257416, "learning_rate": 0.00010541975603922506, "loss": 0.0386, "step": 14064 }, { "epoch": 1.9712683952347583, "grad_norm": 0.1554962396621704, "learning_rate": 0.0001054054054054054, "loss": 0.036, "step": 14065 }, { "epoch": 1.9714085494043447, "grad_norm": 0.24288548529148102, "learning_rate": 0.00010539105477158573, "loss": 0.0656, "step": 14066 }, { "epoch": 1.9715487035739314, "grad_norm": 0.9484646320343018, "learning_rate": 0.00010537670413776608, "loss": 0.1434, "step": 14067 }, { "epoch": 1.9716888577435179, "grad_norm": 0.26609301567077637, "learning_rate": 0.00010536235350394642, "loss": 0.0392, "step": 14068 }, { "epoch": 1.9718290119131043, "grad_norm": 0.29580777883529663, "learning_rate": 0.00010534800287012675, "loss": 0.0468, "step": 14069 }, { "epoch": 1.9719691660826908, "grad_norm": 0.23241661489009857, "learning_rate": 0.0001053336522363071, "loss": 0.0368, "step": 14070 }, { "epoch": 1.9721093202522775, "grad_norm": 0.4995517432689667, "learning_rate": 0.00010531930160248742, "loss": 0.081, "step": 14071 }, { "epoch": 1.9722494744218642, "grad_norm": 0.37326884269714355, "learning_rate": 0.00010530495096866778, "loss": 0.0372, "step": 14072 }, { "epoch": 1.9723896285914506, "grad_norm": 0.3576323390007019, "learning_rate": 0.00010529060033484812, "loss": 0.0441, "step": 14073 }, { "epoch": 1.972529782761037, "grad_norm": 0.6952236890792847, "learning_rate": 0.00010527624970102845, "loss": 0.033, "step": 14074 }, { "epoch": 1.9726699369306235, "grad_norm": 0.23206666111946106, "learning_rate": 0.00010526189906720879, "loss": 0.0346, "step": 14075 }, { "epoch": 1.9728100911002102, "grad_norm": 0.2733270823955536, "learning_rate": 0.00010524754843338913, "loss": 0.0595, "step": 14076 }, { "epoch": 1.972950245269797, "grad_norm": 0.2883847653865814, "learning_rate": 0.00010523319779956946, "loss": 0.0419, "step": 14077 }, { "epoch": 1.9730903994393834, "grad_norm": 0.4231919050216675, "learning_rate": 0.00010521884716574982, "loss": 0.0554, "step": 14078 }, { "epoch": 1.9732305536089698, "grad_norm": 0.30283451080322266, "learning_rate": 0.00010520449653193015, "loss": 0.0238, "step": 14079 }, { "epoch": 1.9733707077785563, "grad_norm": 0.07608125358819962, "learning_rate": 0.00010519014589811049, "loss": 0.0061, "step": 14080 }, { "epoch": 1.973510861948143, "grad_norm": 2.808887481689453, "learning_rate": 0.00010517579526429083, "loss": 0.0529, "step": 14081 }, { "epoch": 1.9736510161177296, "grad_norm": 0.5500365495681763, "learning_rate": 0.00010516144463047116, "loss": 0.0316, "step": 14082 }, { "epoch": 1.973791170287316, "grad_norm": 0.5552535653114319, "learning_rate": 0.00010514709399665151, "loss": 0.1317, "step": 14083 }, { "epoch": 1.9739313244569026, "grad_norm": 5.861975193023682, "learning_rate": 0.00010513274336283186, "loss": 0.3374, "step": 14084 }, { "epoch": 1.974071478626489, "grad_norm": 0.9901712536811829, "learning_rate": 0.00010511839272901219, "loss": 0.1485, "step": 14085 }, { "epoch": 1.9742116327960757, "grad_norm": 0.118989959359169, "learning_rate": 0.00010510404209519253, "loss": 0.0317, "step": 14086 }, { "epoch": 1.9743517869656624, "grad_norm": 0.25722867250442505, "learning_rate": 0.00010508969146137286, "loss": 0.0414, "step": 14087 }, { "epoch": 1.9744919411352488, "grad_norm": 0.19089168310165405, "learning_rate": 0.00010507534082755321, "loss": 0.0287, "step": 14088 }, { "epoch": 1.9746320953048353, "grad_norm": 0.3456478714942932, "learning_rate": 0.00010506099019373355, "loss": 0.0849, "step": 14089 }, { "epoch": 1.9747722494744218, "grad_norm": 0.331366628408432, "learning_rate": 0.00010504663955991388, "loss": 0.0549, "step": 14090 }, { "epoch": 1.9749124036440084, "grad_norm": 0.3205987513065338, "learning_rate": 0.00010503228892609422, "loss": 0.0355, "step": 14091 }, { "epoch": 1.975052557813595, "grad_norm": 0.6546750068664551, "learning_rate": 0.00010501793829227457, "loss": 0.0468, "step": 14092 }, { "epoch": 1.9751927119831816, "grad_norm": 0.1530638486146927, "learning_rate": 0.0001050035876584549, "loss": 0.0262, "step": 14093 }, { "epoch": 1.975332866152768, "grad_norm": 0.22343894839286804, "learning_rate": 0.00010498923702463525, "loss": 0.0401, "step": 14094 }, { "epoch": 1.9754730203223545, "grad_norm": 0.3264727294445038, "learning_rate": 0.00010497488639081558, "loss": 0.0321, "step": 14095 }, { "epoch": 1.9756131744919412, "grad_norm": 0.29887378215789795, "learning_rate": 0.00010496053575699592, "loss": 0.084, "step": 14096 }, { "epoch": 1.9757533286615276, "grad_norm": 0.19343771040439606, "learning_rate": 0.00010494618512317626, "loss": 0.0257, "step": 14097 }, { "epoch": 1.9758934828311143, "grad_norm": 0.07619180530309677, "learning_rate": 0.00010493183448935659, "loss": 0.008, "step": 14098 }, { "epoch": 1.9760336370007008, "grad_norm": 0.4226943552494049, "learning_rate": 0.00010491748385553695, "loss": 0.0454, "step": 14099 }, { "epoch": 1.9761737911702872, "grad_norm": 0.5277911424636841, "learning_rate": 0.00010490313322171729, "loss": 0.0566, "step": 14100 }, { "epoch": 1.9763139453398737, "grad_norm": 0.6614856123924255, "learning_rate": 0.00010488878258789762, "loss": 0.0116, "step": 14101 }, { "epoch": 1.9764540995094604, "grad_norm": 0.33292704820632935, "learning_rate": 0.00010487443195407796, "loss": 0.0419, "step": 14102 }, { "epoch": 1.976594253679047, "grad_norm": 0.7209381461143494, "learning_rate": 0.00010486008132025832, "loss": 0.0298, "step": 14103 }, { "epoch": 1.9767344078486335, "grad_norm": 0.17458872497081757, "learning_rate": 0.00010484573068643864, "loss": 0.0318, "step": 14104 }, { "epoch": 1.97687456201822, "grad_norm": 0.35672834515571594, "learning_rate": 0.00010483138005261899, "loss": 0.0539, "step": 14105 }, { "epoch": 1.9770147161878064, "grad_norm": 0.5295037627220154, "learning_rate": 0.00010481702941879932, "loss": 0.0458, "step": 14106 }, { "epoch": 1.9771548703573931, "grad_norm": 0.05617177113890648, "learning_rate": 0.00010480267878497966, "loss": 0.013, "step": 14107 }, { "epoch": 1.9772950245269798, "grad_norm": 0.28012824058532715, "learning_rate": 0.00010478832815116, "loss": 0.039, "step": 14108 }, { "epoch": 1.9774351786965663, "grad_norm": 0.23234710097312927, "learning_rate": 0.00010477397751734034, "loss": 0.041, "step": 14109 }, { "epoch": 1.9775753328661527, "grad_norm": 0.5331157445907593, "learning_rate": 0.00010475962688352068, "loss": 0.0196, "step": 14110 }, { "epoch": 1.9777154870357392, "grad_norm": 0.40266895294189453, "learning_rate": 0.00010474527624970103, "loss": 0.0819, "step": 14111 }, { "epoch": 1.9778556412053259, "grad_norm": 0.3187224864959717, "learning_rate": 0.00010473092561588135, "loss": 0.0358, "step": 14112 }, { "epoch": 1.9779957953749125, "grad_norm": 0.6463882327079773, "learning_rate": 0.0001047165749820617, "loss": 0.0945, "step": 14113 }, { "epoch": 1.978135949544499, "grad_norm": 0.22417816519737244, "learning_rate": 0.00010470222434824203, "loss": 0.0319, "step": 14114 }, { "epoch": 1.9782761037140855, "grad_norm": 0.7379664182662964, "learning_rate": 0.00010468787371442238, "loss": 0.0565, "step": 14115 }, { "epoch": 1.978416257883672, "grad_norm": 0.17094933986663818, "learning_rate": 0.00010467352308060272, "loss": 0.0134, "step": 14116 }, { "epoch": 1.9785564120532586, "grad_norm": 0.0786542072892189, "learning_rate": 0.00010465917244678305, "loss": 0.0116, "step": 14117 }, { "epoch": 1.9786965662228453, "grad_norm": 0.41802674531936646, "learning_rate": 0.0001046448218129634, "loss": 0.0422, "step": 14118 }, { "epoch": 1.9788367203924317, "grad_norm": 0.29712462425231934, "learning_rate": 0.00010463047117914375, "loss": 0.0981, "step": 14119 }, { "epoch": 1.9789768745620182, "grad_norm": 0.1856394112110138, "learning_rate": 0.00010461612054532408, "loss": 0.0453, "step": 14120 }, { "epoch": 1.9791170287316047, "grad_norm": 0.23553521931171417, "learning_rate": 0.00010460176991150442, "loss": 0.0364, "step": 14121 }, { "epoch": 1.9792571829011913, "grad_norm": 0.6078153848648071, "learning_rate": 0.00010458741927768475, "loss": 0.0384, "step": 14122 }, { "epoch": 1.9793973370707778, "grad_norm": 0.5511221885681152, "learning_rate": 0.00010457306864386509, "loss": 0.04, "step": 14123 }, { "epoch": 1.9795374912403645, "grad_norm": 0.9764310121536255, "learning_rate": 0.00010455871801004543, "loss": 0.081, "step": 14124 }, { "epoch": 1.979677645409951, "grad_norm": 0.9053546786308289, "learning_rate": 0.00010454436737622577, "loss": 0.09, "step": 14125 }, { "epoch": 1.9798177995795374, "grad_norm": 0.22252118587493896, "learning_rate": 0.00010453001674240612, "loss": 0.0271, "step": 14126 }, { "epoch": 1.979957953749124, "grad_norm": 0.1598093956708908, "learning_rate": 0.00010451566610858646, "loss": 0.0133, "step": 14127 }, { "epoch": 1.9800981079187105, "grad_norm": 0.4796890914440155, "learning_rate": 0.00010450131547476679, "loss": 0.0758, "step": 14128 }, { "epoch": 1.9802382620882972, "grad_norm": 0.3614867329597473, "learning_rate": 0.00010448696484094713, "loss": 0.0299, "step": 14129 }, { "epoch": 1.9803784162578837, "grad_norm": 0.13194267451763153, "learning_rate": 0.00010447261420712746, "loss": 0.0171, "step": 14130 }, { "epoch": 1.9805185704274701, "grad_norm": 0.598003089427948, "learning_rate": 0.00010445826357330781, "loss": 0.1502, "step": 14131 }, { "epoch": 1.9806587245970566, "grad_norm": 0.7155991196632385, "learning_rate": 0.00010444391293948816, "loss": 0.0321, "step": 14132 }, { "epoch": 1.9807988787666433, "grad_norm": 0.490131676197052, "learning_rate": 0.00010442956230566848, "loss": 0.0418, "step": 14133 }, { "epoch": 1.98093903293623, "grad_norm": 0.47156602144241333, "learning_rate": 0.00010441521167184883, "loss": 0.0303, "step": 14134 }, { "epoch": 1.9810791871058164, "grad_norm": 1.3970472812652588, "learning_rate": 0.00010440086103802918, "loss": 0.0927, "step": 14135 }, { "epoch": 1.981219341275403, "grad_norm": 0.4822499454021454, "learning_rate": 0.00010438651040420951, "loss": 0.0566, "step": 14136 }, { "epoch": 1.9813594954449893, "grad_norm": 0.20475226640701294, "learning_rate": 0.00010437215977038985, "loss": 0.0389, "step": 14137 }, { "epoch": 1.981499649614576, "grad_norm": 0.551353931427002, "learning_rate": 0.0001043578091365702, "loss": 0.0355, "step": 14138 }, { "epoch": 1.9816398037841627, "grad_norm": 0.24566631019115448, "learning_rate": 0.00010434345850275052, "loss": 0.0694, "step": 14139 }, { "epoch": 1.9817799579537492, "grad_norm": 0.49406033754348755, "learning_rate": 0.00010432910786893087, "loss": 0.0594, "step": 14140 }, { "epoch": 1.9819201121233356, "grad_norm": 0.20493844151496887, "learning_rate": 0.00010431475723511121, "loss": 0.0938, "step": 14141 }, { "epoch": 1.982060266292922, "grad_norm": 0.4261718690395355, "learning_rate": 0.00010430040660129155, "loss": 0.0726, "step": 14142 }, { "epoch": 1.9822004204625088, "grad_norm": 0.22277750074863434, "learning_rate": 0.00010428605596747189, "loss": 0.0169, "step": 14143 }, { "epoch": 1.9823405746320955, "grad_norm": 0.21987660229206085, "learning_rate": 0.00010427170533365222, "loss": 0.0213, "step": 14144 }, { "epoch": 1.982480728801682, "grad_norm": 0.46280214190483093, "learning_rate": 0.00010425735469983256, "loss": 0.0525, "step": 14145 }, { "epoch": 1.9826208829712684, "grad_norm": 0.3055768609046936, "learning_rate": 0.00010424300406601292, "loss": 0.0706, "step": 14146 }, { "epoch": 1.9827610371408548, "grad_norm": 0.2504782974720001, "learning_rate": 0.00010422865343219325, "loss": 0.0256, "step": 14147 }, { "epoch": 1.9829011913104415, "grad_norm": 0.09164685755968094, "learning_rate": 0.00010421430279837359, "loss": 0.0091, "step": 14148 }, { "epoch": 1.9830413454800282, "grad_norm": 0.643365740776062, "learning_rate": 0.00010419995216455392, "loss": 0.0428, "step": 14149 }, { "epoch": 1.9831814996496147, "grad_norm": 0.2546672523021698, "learning_rate": 0.00010418560153073426, "loss": 0.0175, "step": 14150 }, { "epoch": 1.9833216538192011, "grad_norm": 0.18915030360221863, "learning_rate": 0.00010417125089691462, "loss": 0.012, "step": 14151 }, { "epoch": 1.9834618079887876, "grad_norm": 0.24554553627967834, "learning_rate": 0.00010415690026309494, "loss": 0.0397, "step": 14152 }, { "epoch": 1.9836019621583743, "grad_norm": 0.9851786494255066, "learning_rate": 0.00010414254962927529, "loss": 0.1146, "step": 14153 }, { "epoch": 1.9837421163279607, "grad_norm": 0.1032838299870491, "learning_rate": 0.00010412819899545563, "loss": 0.0091, "step": 14154 }, { "epoch": 1.9838822704975474, "grad_norm": 0.24289239943027496, "learning_rate": 0.00010411384836163596, "loss": 0.0494, "step": 14155 }, { "epoch": 1.9840224246671339, "grad_norm": 0.27520713210105896, "learning_rate": 0.0001040994977278163, "loss": 0.0189, "step": 14156 }, { "epoch": 1.9841625788367203, "grad_norm": 0.1735120564699173, "learning_rate": 0.00010408514709399664, "loss": 0.0124, "step": 14157 }, { "epoch": 1.9843027330063068, "grad_norm": 0.273334264755249, "learning_rate": 0.00010407079646017698, "loss": 0.0555, "step": 14158 }, { "epoch": 1.9844428871758935, "grad_norm": 0.18390794098377228, "learning_rate": 0.00010405644582635733, "loss": 0.0344, "step": 14159 }, { "epoch": 1.9845830413454801, "grad_norm": 0.36073118448257446, "learning_rate": 0.00010404209519253765, "loss": 0.0507, "step": 14160 }, { "epoch": 1.9847231955150666, "grad_norm": 0.21442502737045288, "learning_rate": 0.000104027744558718, "loss": 0.0286, "step": 14161 }, { "epoch": 1.984863349684653, "grad_norm": 0.26301631331443787, "learning_rate": 0.00010401339392489835, "loss": 0.0713, "step": 14162 }, { "epoch": 1.9850035038542395, "grad_norm": 0.4175613820552826, "learning_rate": 0.00010399904329107868, "loss": 0.0496, "step": 14163 }, { "epoch": 1.9851436580238262, "grad_norm": 0.24470297992229462, "learning_rate": 0.00010398469265725902, "loss": 0.0578, "step": 14164 }, { "epoch": 1.9852838121934129, "grad_norm": 0.32175448536872864, "learning_rate": 0.00010397034202343935, "loss": 0.0808, "step": 14165 }, { "epoch": 1.9854239663629993, "grad_norm": 0.9071288108825684, "learning_rate": 0.00010395599138961969, "loss": 0.0697, "step": 14166 }, { "epoch": 1.9855641205325858, "grad_norm": 0.35176801681518555, "learning_rate": 0.00010394164075580005, "loss": 0.0163, "step": 14167 }, { "epoch": 1.9857042747021723, "grad_norm": 0.15793417394161224, "learning_rate": 0.00010392729012198038, "loss": 0.0281, "step": 14168 }, { "epoch": 1.985844428871759, "grad_norm": 0.3313787281513214, "learning_rate": 0.00010391293948816072, "loss": 0.0496, "step": 14169 }, { "epoch": 1.9859845830413456, "grad_norm": 0.5487057566642761, "learning_rate": 0.00010389858885434106, "loss": 0.0862, "step": 14170 }, { "epoch": 1.986124737210932, "grad_norm": 0.14403605461120605, "learning_rate": 0.00010388423822052139, "loss": 0.0097, "step": 14171 }, { "epoch": 1.9862648913805185, "grad_norm": 0.2174629420042038, "learning_rate": 0.00010386988758670173, "loss": 0.0222, "step": 14172 }, { "epoch": 1.986405045550105, "grad_norm": 0.8246282339096069, "learning_rate": 0.00010385553695288209, "loss": 0.0449, "step": 14173 }, { "epoch": 1.9865451997196917, "grad_norm": 0.5739886164665222, "learning_rate": 0.00010384118631906242, "loss": 0.022, "step": 14174 }, { "epoch": 1.9866853538892784, "grad_norm": 1.486023187637329, "learning_rate": 0.00010382683568524276, "loss": 0.063, "step": 14175 }, { "epoch": 1.9868255080588648, "grad_norm": 0.362218976020813, "learning_rate": 0.00010381248505142309, "loss": 0.069, "step": 14176 }, { "epoch": 1.9869656622284513, "grad_norm": 0.4330422282218933, "learning_rate": 0.00010379813441760343, "loss": 0.0433, "step": 14177 }, { "epoch": 1.9871058163980377, "grad_norm": 0.4412398636341095, "learning_rate": 0.00010378378378378378, "loss": 0.018, "step": 14178 }, { "epoch": 1.9872459705676244, "grad_norm": 0.1058543473482132, "learning_rate": 0.00010376943314996411, "loss": 0.0073, "step": 14179 }, { "epoch": 1.9873861247372109, "grad_norm": 0.1804308146238327, "learning_rate": 0.00010375508251614446, "loss": 0.0422, "step": 14180 }, { "epoch": 1.9875262789067976, "grad_norm": 0.2622183561325073, "learning_rate": 0.0001037407318823248, "loss": 0.032, "step": 14181 }, { "epoch": 1.987666433076384, "grad_norm": 0.3941948413848877, "learning_rate": 0.00010372638124850513, "loss": 0.0447, "step": 14182 }, { "epoch": 1.9878065872459705, "grad_norm": 1.0045377016067505, "learning_rate": 0.00010371203061468548, "loss": 0.0499, "step": 14183 }, { "epoch": 1.9879467414155572, "grad_norm": 0.19761860370635986, "learning_rate": 0.00010369767998086581, "loss": 0.0295, "step": 14184 }, { "epoch": 1.9880868955851436, "grad_norm": 2.321589469909668, "learning_rate": 0.00010368332934704615, "loss": 0.1747, "step": 14185 }, { "epoch": 1.9882270497547303, "grad_norm": 0.28479689359664917, "learning_rate": 0.0001036689787132265, "loss": 0.0238, "step": 14186 }, { "epoch": 1.9883672039243168, "grad_norm": 0.24905124306678772, "learning_rate": 0.00010365462807940682, "loss": 0.0152, "step": 14187 }, { "epoch": 1.9885073580939032, "grad_norm": 0.16113144159317017, "learning_rate": 0.00010364027744558717, "loss": 0.0241, "step": 14188 }, { "epoch": 1.9886475122634897, "grad_norm": 0.2999199628829956, "learning_rate": 0.00010362592681176752, "loss": 0.0387, "step": 14189 }, { "epoch": 1.9887876664330764, "grad_norm": 0.301959365606308, "learning_rate": 0.00010361157617794785, "loss": 0.0583, "step": 14190 }, { "epoch": 1.988927820602663, "grad_norm": 0.21860761940479279, "learning_rate": 0.00010359722554412819, "loss": 0.0207, "step": 14191 }, { "epoch": 1.9890679747722495, "grad_norm": 0.260749489068985, "learning_rate": 0.00010358287491030852, "loss": 0.0166, "step": 14192 }, { "epoch": 1.989208128941836, "grad_norm": 0.3328326940536499, "learning_rate": 0.00010356852427648886, "loss": 0.0972, "step": 14193 }, { "epoch": 1.9893482831114224, "grad_norm": 0.2738496661186218, "learning_rate": 0.00010355417364266922, "loss": 0.0228, "step": 14194 }, { "epoch": 1.989488437281009, "grad_norm": 0.20044095814228058, "learning_rate": 0.00010353982300884955, "loss": 0.0264, "step": 14195 }, { "epoch": 1.9896285914505958, "grad_norm": 0.322775661945343, "learning_rate": 0.00010352547237502989, "loss": 0.0439, "step": 14196 }, { "epoch": 1.9897687456201822, "grad_norm": 0.24739785492420197, "learning_rate": 0.00010351112174121023, "loss": 0.0369, "step": 14197 }, { "epoch": 1.9899088997897687, "grad_norm": 0.11036693304777145, "learning_rate": 0.00010349677110739056, "loss": 0.0128, "step": 14198 }, { "epoch": 1.9900490539593552, "grad_norm": 0.4505460858345032, "learning_rate": 0.00010348242047357092, "loss": 0.0628, "step": 14199 }, { "epoch": 1.9901892081289418, "grad_norm": 0.28146296739578247, "learning_rate": 0.00010346806983975124, "loss": 0.0522, "step": 14200 }, { "epoch": 1.9903293622985285, "grad_norm": 0.23998992145061493, "learning_rate": 0.00010345371920593159, "loss": 0.0427, "step": 14201 }, { "epoch": 1.990469516468115, "grad_norm": 0.19448859989643097, "learning_rate": 0.00010343936857211193, "loss": 0.0192, "step": 14202 }, { "epoch": 1.9906096706377014, "grad_norm": 0.22926148772239685, "learning_rate": 0.00010342501793829226, "loss": 0.0447, "step": 14203 }, { "epoch": 1.990749824807288, "grad_norm": 0.2581430673599243, "learning_rate": 0.0001034106673044726, "loss": 0.0284, "step": 14204 }, { "epoch": 1.9908899789768746, "grad_norm": 0.19479742646217346, "learning_rate": 0.00010339631667065295, "loss": 0.0281, "step": 14205 }, { "epoch": 1.9910301331464613, "grad_norm": 0.3242775499820709, "learning_rate": 0.00010338196603683328, "loss": 0.0247, "step": 14206 }, { "epoch": 1.9911702873160477, "grad_norm": 0.33048728108406067, "learning_rate": 0.00010336761540301362, "loss": 0.1099, "step": 14207 }, { "epoch": 1.9913104414856342, "grad_norm": 0.08701975643634796, "learning_rate": 0.00010335326476919397, "loss": 0.0075, "step": 14208 }, { "epoch": 1.9914505956552206, "grad_norm": 0.42110908031463623, "learning_rate": 0.0001033389141353743, "loss": 0.0247, "step": 14209 }, { "epoch": 1.9915907498248073, "grad_norm": 0.2061358094215393, "learning_rate": 0.00010332456350155465, "loss": 0.02, "step": 14210 }, { "epoch": 1.9917309039943938, "grad_norm": 0.7582136988639832, "learning_rate": 0.00010331021286773498, "loss": 0.0274, "step": 14211 }, { "epoch": 1.9918710581639805, "grad_norm": 0.18745380640029907, "learning_rate": 0.00010329586223391532, "loss": 0.0276, "step": 14212 }, { "epoch": 1.992011212333567, "grad_norm": 0.26254788041114807, "learning_rate": 0.00010328151160009566, "loss": 0.0372, "step": 14213 }, { "epoch": 1.9921513665031534, "grad_norm": 0.4268210828304291, "learning_rate": 0.00010326716096627599, "loss": 0.026, "step": 14214 }, { "epoch": 1.99229152067274, "grad_norm": 0.8060597777366638, "learning_rate": 0.00010325281033245635, "loss": 0.0357, "step": 14215 }, { "epoch": 1.9924316748423265, "grad_norm": 0.411082923412323, "learning_rate": 0.00010323845969863669, "loss": 0.0911, "step": 14216 }, { "epoch": 1.9925718290119132, "grad_norm": 0.17483334243297577, "learning_rate": 0.00010322410906481702, "loss": 0.0162, "step": 14217 }, { "epoch": 1.9927119831814997, "grad_norm": 0.998288631439209, "learning_rate": 0.00010320975843099736, "loss": 0.0714, "step": 14218 }, { "epoch": 1.9928521373510861, "grad_norm": 0.4254349172115326, "learning_rate": 0.00010319540779717769, "loss": 0.0398, "step": 14219 }, { "epoch": 1.9929922915206726, "grad_norm": 0.6960335969924927, "learning_rate": 0.00010318105716335805, "loss": 0.1023, "step": 14220 }, { "epoch": 1.9931324456902593, "grad_norm": 0.2798539102077484, "learning_rate": 0.00010316670652953839, "loss": 0.0415, "step": 14221 }, { "epoch": 1.993272599859846, "grad_norm": 0.09360259026288986, "learning_rate": 0.00010315235589571872, "loss": 0.0237, "step": 14222 }, { "epoch": 1.9934127540294324, "grad_norm": 0.2600257098674774, "learning_rate": 0.00010313800526189906, "loss": 0.0222, "step": 14223 }, { "epoch": 1.9935529081990189, "grad_norm": 0.5302936434745789, "learning_rate": 0.0001031236546280794, "loss": 0.0556, "step": 14224 }, { "epoch": 1.9936930623686053, "grad_norm": 0.3347436487674713, "learning_rate": 0.00010310930399425973, "loss": 0.0258, "step": 14225 }, { "epoch": 1.993833216538192, "grad_norm": 0.24498754739761353, "learning_rate": 0.00010309495336044008, "loss": 0.0334, "step": 14226 }, { "epoch": 1.9939733707077787, "grad_norm": 0.16731178760528564, "learning_rate": 0.00010308060272662041, "loss": 0.0148, "step": 14227 }, { "epoch": 1.9941135248773652, "grad_norm": 0.6915204524993896, "learning_rate": 0.00010306625209280076, "loss": 0.0776, "step": 14228 }, { "epoch": 1.9942536790469516, "grad_norm": 0.09304513782262802, "learning_rate": 0.0001030519014589811, "loss": 0.0082, "step": 14229 }, { "epoch": 1.994393833216538, "grad_norm": 0.056376006454229355, "learning_rate": 0.00010303755082516143, "loss": 0.004, "step": 14230 }, { "epoch": 1.9945339873861248, "grad_norm": 0.6417027711868286, "learning_rate": 0.00010302320019134178, "loss": 0.0605, "step": 14231 }, { "epoch": 1.9946741415557114, "grad_norm": 0.5420274138450623, "learning_rate": 0.00010300884955752212, "loss": 0.0969, "step": 14232 }, { "epoch": 1.994814295725298, "grad_norm": 0.8372558951377869, "learning_rate": 0.00010299449892370245, "loss": 0.0789, "step": 14233 }, { "epoch": 1.9949544498948844, "grad_norm": 1.303813099861145, "learning_rate": 0.0001029801482898828, "loss": 0.0694, "step": 14234 }, { "epoch": 1.9950946040644708, "grad_norm": 0.2677852213382721, "learning_rate": 0.00010296579765606312, "loss": 0.0538, "step": 14235 }, { "epoch": 1.9952347582340575, "grad_norm": 0.2184418886899948, "learning_rate": 0.00010295144702224348, "loss": 0.0224, "step": 14236 }, { "epoch": 1.9953749124036442, "grad_norm": 0.4506930410861969, "learning_rate": 0.00010293709638842382, "loss": 0.0767, "step": 14237 }, { "epoch": 1.9955150665732306, "grad_norm": 0.31031256914138794, "learning_rate": 0.00010292274575460415, "loss": 0.0573, "step": 14238 }, { "epoch": 1.995655220742817, "grad_norm": 0.16498108208179474, "learning_rate": 0.00010290839512078449, "loss": 0.0117, "step": 14239 }, { "epoch": 1.9957953749124036, "grad_norm": 0.5877910256385803, "learning_rate": 0.00010289404448696483, "loss": 0.0762, "step": 14240 }, { "epoch": 1.9959355290819902, "grad_norm": 0.19379130005836487, "learning_rate": 0.00010287969385314516, "loss": 0.0345, "step": 14241 }, { "epoch": 1.9960756832515767, "grad_norm": 0.13378974795341492, "learning_rate": 0.00010286534321932552, "loss": 0.0227, "step": 14242 }, { "epoch": 1.9962158374211634, "grad_norm": 0.35973796248435974, "learning_rate": 0.00010285099258550585, "loss": 0.0734, "step": 14243 }, { "epoch": 1.9963559915907498, "grad_norm": 0.13812102377414703, "learning_rate": 0.00010283664195168619, "loss": 0.0127, "step": 14244 }, { "epoch": 1.9964961457603363, "grad_norm": 0.3664568364620209, "learning_rate": 0.00010282229131786653, "loss": 0.0692, "step": 14245 }, { "epoch": 1.9966362999299228, "grad_norm": 0.26672065258026123, "learning_rate": 0.00010280794068404686, "loss": 0.0718, "step": 14246 }, { "epoch": 1.9967764540995094, "grad_norm": 0.375955730676651, "learning_rate": 0.00010279359005022721, "loss": 0.0571, "step": 14247 }, { "epoch": 1.9969166082690961, "grad_norm": 0.8177602291107178, "learning_rate": 0.00010277923941640756, "loss": 0.0375, "step": 14248 }, { "epoch": 1.9970567624386826, "grad_norm": 0.31063029170036316, "learning_rate": 0.00010276488878258789, "loss": 0.1102, "step": 14249 }, { "epoch": 1.997196916608269, "grad_norm": 0.6209132075309753, "learning_rate": 0.00010275053814876823, "loss": 0.1039, "step": 14250 }, { "epoch": 1.9973370707778555, "grad_norm": 0.22595059871673584, "learning_rate": 0.00010273618751494857, "loss": 0.0312, "step": 14251 }, { "epoch": 1.9974772249474422, "grad_norm": 0.05967971310019493, "learning_rate": 0.00010272183688112891, "loss": 0.0067, "step": 14252 }, { "epoch": 1.9976173791170289, "grad_norm": 0.2264532446861267, "learning_rate": 0.00010270748624730925, "loss": 0.0409, "step": 14253 }, { "epoch": 1.9977575332866153, "grad_norm": 0.4629777669906616, "learning_rate": 0.00010269313561348958, "loss": 0.0735, "step": 14254 }, { "epoch": 1.9978976874562018, "grad_norm": 0.27865928411483765, "learning_rate": 0.00010267878497966992, "loss": 0.0429, "step": 14255 }, { "epoch": 1.9980378416257882, "grad_norm": 0.21995767951011658, "learning_rate": 0.00010266443434585027, "loss": 0.0286, "step": 14256 }, { "epoch": 1.998177995795375, "grad_norm": 0.2889644503593445, "learning_rate": 0.0001026500837120306, "loss": 0.0552, "step": 14257 }, { "epoch": 1.9983181499649616, "grad_norm": 0.2833872437477112, "learning_rate": 0.00010263573307821095, "loss": 0.0241, "step": 14258 }, { "epoch": 1.998458304134548, "grad_norm": 0.1358339488506317, "learning_rate": 0.00010262138244439129, "loss": 0.0151, "step": 14259 }, { "epoch": 1.9985984583041345, "grad_norm": 0.7646512389183044, "learning_rate": 0.00010260703181057162, "loss": 0.0738, "step": 14260 }, { "epoch": 1.998738612473721, "grad_norm": 0.3783266544342041, "learning_rate": 0.00010259268117675196, "loss": 0.0595, "step": 14261 }, { "epoch": 1.9988787666433077, "grad_norm": 0.6213785409927368, "learning_rate": 0.00010257833054293229, "loss": 0.1944, "step": 14262 }, { "epoch": 1.9990189208128943, "grad_norm": 0.3035310208797455, "learning_rate": 0.00010256397990911265, "loss": 0.013, "step": 14263 }, { "epoch": 1.9991590749824808, "grad_norm": 0.22235266864299774, "learning_rate": 0.00010254962927529299, "loss": 0.0399, "step": 14264 }, { "epoch": 1.9992992291520673, "grad_norm": 0.2720561921596527, "learning_rate": 0.00010253527864147332, "loss": 0.0261, "step": 14265 }, { "epoch": 1.9994393833216537, "grad_norm": 0.2022603452205658, "learning_rate": 0.00010252092800765366, "loss": 0.019, "step": 14266 }, { "epoch": 1.9995795374912404, "grad_norm": 0.4893212616443634, "learning_rate": 0.000102506577373834, "loss": 0.0503, "step": 14267 }, { "epoch": 1.9997196916608269, "grad_norm": 0.283733606338501, "learning_rate": 0.00010249222674001434, "loss": 0.0101, "step": 14268 }, { "epoch": 1.9998598458304135, "grad_norm": 3.0472166538238525, "learning_rate": 0.00010247787610619469, "loss": 0.2659, "step": 14269 }, { "epoch": 2.0, "grad_norm": 3.6867873668670654, "learning_rate": 0.00010246352547237502, "loss": 0.3818, "step": 14270 }, { "epoch": 2.0001401541695865, "grad_norm": 0.23136372864246368, "learning_rate": 0.00010244917483855536, "loss": 0.0377, "step": 14271 }, { "epoch": 2.000280308339173, "grad_norm": 0.3771960735321045, "learning_rate": 0.0001024348242047357, "loss": 0.0314, "step": 14272 }, { "epoch": 2.00042046250876, "grad_norm": 0.18012498319149017, "learning_rate": 0.00010242047357091603, "loss": 0.0263, "step": 14273 }, { "epoch": 2.0005606166783463, "grad_norm": 0.48979452252388, "learning_rate": 0.00010240612293709638, "loss": 0.0296, "step": 14274 }, { "epoch": 2.0007007708479327, "grad_norm": 0.5342593789100647, "learning_rate": 0.00010239177230327673, "loss": 0.0347, "step": 14275 }, { "epoch": 2.000840925017519, "grad_norm": 0.3313027024269104, "learning_rate": 0.00010237742166945705, "loss": 0.0557, "step": 14276 }, { "epoch": 2.0009810791871057, "grad_norm": 0.4729999899864197, "learning_rate": 0.0001023630710356374, "loss": 0.0338, "step": 14277 }, { "epoch": 2.0011212333566926, "grad_norm": 0.18270646035671234, "learning_rate": 0.00010234872040181773, "loss": 0.0404, "step": 14278 }, { "epoch": 2.001261387526279, "grad_norm": 0.2502637803554535, "learning_rate": 0.00010233436976799808, "loss": 0.0435, "step": 14279 }, { "epoch": 2.0014015416958655, "grad_norm": 0.23573294281959534, "learning_rate": 0.00010232001913417842, "loss": 0.0401, "step": 14280 }, { "epoch": 2.001541695865452, "grad_norm": 0.21577972173690796, "learning_rate": 0.00010230566850035875, "loss": 0.0194, "step": 14281 }, { "epoch": 2.0016818500350384, "grad_norm": 0.3586162328720093, "learning_rate": 0.0001022913178665391, "loss": 0.0583, "step": 14282 }, { "epoch": 2.0018220042046253, "grad_norm": 0.35905852913856506, "learning_rate": 0.00010227696723271944, "loss": 0.0536, "step": 14283 }, { "epoch": 2.0019621583742118, "grad_norm": 0.1588849574327469, "learning_rate": 0.00010226261659889978, "loss": 0.014, "step": 14284 }, { "epoch": 2.0021023125437982, "grad_norm": 0.13074587285518646, "learning_rate": 0.00010224826596508012, "loss": 0.0092, "step": 14285 }, { "epoch": 2.0022424667133847, "grad_norm": 0.2219579666852951, "learning_rate": 0.00010223391533126046, "loss": 0.0183, "step": 14286 }, { "epoch": 2.002382620882971, "grad_norm": 0.29415473341941833, "learning_rate": 0.00010221956469744079, "loss": 0.0787, "step": 14287 }, { "epoch": 2.0025227750525576, "grad_norm": 0.4591987133026123, "learning_rate": 0.00010220521406362113, "loss": 0.0667, "step": 14288 }, { "epoch": 2.0026629292221445, "grad_norm": 0.4064667522907257, "learning_rate": 0.00010219086342980146, "loss": 0.0423, "step": 14289 }, { "epoch": 2.002803083391731, "grad_norm": 0.22674909234046936, "learning_rate": 0.00010217651279598182, "loss": 0.0452, "step": 14290 }, { "epoch": 2.0029432375613174, "grad_norm": 0.10505981743335724, "learning_rate": 0.00010216216216216216, "loss": 0.0111, "step": 14291 }, { "epoch": 2.003083391730904, "grad_norm": 0.3039432168006897, "learning_rate": 0.00010214781152834249, "loss": 0.098, "step": 14292 }, { "epoch": 2.0032235459004903, "grad_norm": 0.11148194968700409, "learning_rate": 0.00010213346089452283, "loss": 0.0048, "step": 14293 }, { "epoch": 2.0033637000700772, "grad_norm": 0.11342445015907288, "learning_rate": 0.00010211911026070319, "loss": 0.0086, "step": 14294 }, { "epoch": 2.0035038542396637, "grad_norm": 0.19621862471103668, "learning_rate": 0.00010210475962688351, "loss": 0.0227, "step": 14295 }, { "epoch": 2.00364400840925, "grad_norm": 0.20716015994548798, "learning_rate": 0.00010209040899306386, "loss": 0.0496, "step": 14296 }, { "epoch": 2.0037841625788366, "grad_norm": 0.5497312545776367, "learning_rate": 0.00010207605835924418, "loss": 0.0781, "step": 14297 }, { "epoch": 2.003924316748423, "grad_norm": 0.22204463183879852, "learning_rate": 0.00010206170772542453, "loss": 0.0081, "step": 14298 }, { "epoch": 2.00406447091801, "grad_norm": 0.1989804357290268, "learning_rate": 0.00010204735709160487, "loss": 0.0221, "step": 14299 }, { "epoch": 2.0042046250875964, "grad_norm": 0.26693519949913025, "learning_rate": 0.00010203300645778521, "loss": 0.0461, "step": 14300 }, { "epoch": 2.004344779257183, "grad_norm": 0.11593149602413177, "learning_rate": 0.00010201865582396555, "loss": 0.0402, "step": 14301 }, { "epoch": 2.0044849334267694, "grad_norm": 0.255950927734375, "learning_rate": 0.0001020043051901459, "loss": 0.0129, "step": 14302 }, { "epoch": 2.004625087596356, "grad_norm": 0.1897752434015274, "learning_rate": 0.00010198995455632622, "loss": 0.015, "step": 14303 }, { "epoch": 2.0047652417659427, "grad_norm": 0.8016980886459351, "learning_rate": 0.00010197560392250657, "loss": 0.1002, "step": 14304 }, { "epoch": 2.004905395935529, "grad_norm": 0.0957736074924469, "learning_rate": 0.0001019612532886869, "loss": 0.0129, "step": 14305 }, { "epoch": 2.0050455501051156, "grad_norm": 0.1342475414276123, "learning_rate": 0.00010194690265486725, "loss": 0.0053, "step": 14306 }, { "epoch": 2.005185704274702, "grad_norm": 0.10899920761585236, "learning_rate": 0.00010193255202104759, "loss": 0.0058, "step": 14307 }, { "epoch": 2.0053258584442886, "grad_norm": 0.03692729398608208, "learning_rate": 0.00010191820138722792, "loss": 0.0022, "step": 14308 }, { "epoch": 2.0054660126138755, "grad_norm": 0.08919638395309448, "learning_rate": 0.00010190385075340826, "loss": 0.0059, "step": 14309 }, { "epoch": 2.005606166783462, "grad_norm": 0.23241807520389557, "learning_rate": 0.00010188950011958862, "loss": 0.0122, "step": 14310 }, { "epoch": 2.0057463209530484, "grad_norm": 0.06809527426958084, "learning_rate": 0.00010187514948576895, "loss": 0.0053, "step": 14311 }, { "epoch": 2.005886475122635, "grad_norm": 0.23260357975959778, "learning_rate": 0.00010186079885194929, "loss": 0.0394, "step": 14312 }, { "epoch": 2.0060266292922213, "grad_norm": 0.45001575350761414, "learning_rate": 0.00010184644821812962, "loss": 0.0242, "step": 14313 }, { "epoch": 2.0061667834618078, "grad_norm": 0.46052512526512146, "learning_rate": 0.00010183209758430996, "loss": 0.0139, "step": 14314 }, { "epoch": 2.0063069376313947, "grad_norm": 0.45768025517463684, "learning_rate": 0.0001018177469504903, "loss": 0.0123, "step": 14315 }, { "epoch": 2.006447091800981, "grad_norm": 0.3107963800430298, "learning_rate": 0.00010180339631667064, "loss": 0.059, "step": 14316 }, { "epoch": 2.0065872459705676, "grad_norm": 0.7314419150352478, "learning_rate": 0.00010178904568285099, "loss": 0.0566, "step": 14317 }, { "epoch": 2.006727400140154, "grad_norm": 0.29502925276756287, "learning_rate": 0.00010177469504903133, "loss": 0.0108, "step": 14318 }, { "epoch": 2.0068675543097405, "grad_norm": 0.2972310781478882, "learning_rate": 0.00010176034441521166, "loss": 0.0128, "step": 14319 }, { "epoch": 2.0070077084793274, "grad_norm": 0.7865574955940247, "learning_rate": 0.000101745993781392, "loss": 0.0275, "step": 14320 }, { "epoch": 2.007147862648914, "grad_norm": 0.20951853692531586, "learning_rate": 0.00010173164314757235, "loss": 0.0219, "step": 14321 }, { "epoch": 2.0072880168185003, "grad_norm": 0.3417162299156189, "learning_rate": 0.00010171729251375268, "loss": 0.1068, "step": 14322 }, { "epoch": 2.007428170988087, "grad_norm": 0.18037255108356476, "learning_rate": 0.00010170294187993303, "loss": 0.073, "step": 14323 }, { "epoch": 2.0075683251576733, "grad_norm": 0.5002044439315796, "learning_rate": 0.00010168859124611335, "loss": 0.0334, "step": 14324 }, { "epoch": 2.00770847932726, "grad_norm": 0.24219584465026855, "learning_rate": 0.0001016742406122937, "loss": 0.0197, "step": 14325 }, { "epoch": 2.0078486334968466, "grad_norm": 0.028247453272342682, "learning_rate": 0.00010165988997847405, "loss": 0.0021, "step": 14326 }, { "epoch": 2.007988787666433, "grad_norm": 1.0345940589904785, "learning_rate": 0.00010164553934465438, "loss": 0.0626, "step": 14327 }, { "epoch": 2.0081289418360195, "grad_norm": 0.2338320016860962, "learning_rate": 0.00010163118871083472, "loss": 0.0308, "step": 14328 }, { "epoch": 2.008269096005606, "grad_norm": 0.5210989117622375, "learning_rate": 0.00010161683807701506, "loss": 0.0639, "step": 14329 }, { "epoch": 2.008409250175193, "grad_norm": 0.3688915967941284, "learning_rate": 0.00010160248744319539, "loss": 0.0263, "step": 14330 }, { "epoch": 2.0085494043447794, "grad_norm": 0.08547557145357132, "learning_rate": 0.00010158813680937575, "loss": 0.0035, "step": 14331 }, { "epoch": 2.008689558514366, "grad_norm": 0.12328723818063736, "learning_rate": 0.00010157378617555608, "loss": 0.0229, "step": 14332 }, { "epoch": 2.0088297126839523, "grad_norm": 0.3097442090511322, "learning_rate": 0.00010155943554173642, "loss": 0.0425, "step": 14333 }, { "epoch": 2.0089698668535387, "grad_norm": 0.04403432458639145, "learning_rate": 0.00010154508490791676, "loss": 0.0048, "step": 14334 }, { "epoch": 2.0091100210231256, "grad_norm": 0.21485382318496704, "learning_rate": 0.00010153073427409709, "loss": 0.0421, "step": 14335 }, { "epoch": 2.009250175192712, "grad_norm": 0.15131321549415588, "learning_rate": 0.00010151638364027743, "loss": 0.0168, "step": 14336 }, { "epoch": 2.0093903293622986, "grad_norm": 0.1158590093255043, "learning_rate": 0.00010150203300645779, "loss": 0.0193, "step": 14337 }, { "epoch": 2.009530483531885, "grad_norm": 0.126034677028656, "learning_rate": 0.00010148768237263812, "loss": 0.0239, "step": 14338 }, { "epoch": 2.0096706377014715, "grad_norm": 1.0763146877288818, "learning_rate": 0.00010147333173881846, "loss": 0.0428, "step": 14339 }, { "epoch": 2.0098107918710584, "grad_norm": 0.24444782733917236, "learning_rate": 0.00010145898110499879, "loss": 0.0231, "step": 14340 }, { "epoch": 2.009950946040645, "grad_norm": 0.3235735297203064, "learning_rate": 0.00010144463047117913, "loss": 0.0782, "step": 14341 }, { "epoch": 2.0100911002102313, "grad_norm": 0.3248410224914551, "learning_rate": 0.00010143027983735948, "loss": 0.0172, "step": 14342 }, { "epoch": 2.0102312543798178, "grad_norm": 0.3081238269805908, "learning_rate": 0.00010141592920353981, "loss": 0.0245, "step": 14343 }, { "epoch": 2.010371408549404, "grad_norm": 0.16979824006557465, "learning_rate": 0.00010140157856972016, "loss": 0.0427, "step": 14344 }, { "epoch": 2.0105115627189907, "grad_norm": 0.06817808747291565, "learning_rate": 0.0001013872279359005, "loss": 0.0035, "step": 14345 }, { "epoch": 2.0106517168885776, "grad_norm": 0.21799680590629578, "learning_rate": 0.00010137287730208083, "loss": 0.0292, "step": 14346 }, { "epoch": 2.010791871058164, "grad_norm": 0.17592798173427582, "learning_rate": 0.00010135852666826118, "loss": 0.0151, "step": 14347 }, { "epoch": 2.0109320252277505, "grad_norm": 0.12155366688966751, "learning_rate": 0.00010134417603444151, "loss": 0.0215, "step": 14348 }, { "epoch": 2.011072179397337, "grad_norm": 0.46024125814437866, "learning_rate": 0.00010132982540062185, "loss": 0.0284, "step": 14349 }, { "epoch": 2.0112123335669234, "grad_norm": 0.015230483375489712, "learning_rate": 0.0001013154747668022, "loss": 0.0008, "step": 14350 }, { "epoch": 2.0113524877365103, "grad_norm": 0.03840211033821106, "learning_rate": 0.00010130112413298252, "loss": 0.0016, "step": 14351 }, { "epoch": 2.011492641906097, "grad_norm": 0.2641845643520355, "learning_rate": 0.00010128677349916287, "loss": 0.0515, "step": 14352 }, { "epoch": 2.0116327960756832, "grad_norm": 0.31310683488845825, "learning_rate": 0.00010127242286534322, "loss": 0.0128, "step": 14353 }, { "epoch": 2.0117729502452697, "grad_norm": 0.39936432242393494, "learning_rate": 0.00010125807223152355, "loss": 0.0242, "step": 14354 }, { "epoch": 2.011913104414856, "grad_norm": 0.09522376954555511, "learning_rate": 0.00010124372159770389, "loss": 0.0044, "step": 14355 }, { "epoch": 2.012053258584443, "grad_norm": 0.2599260210990906, "learning_rate": 0.00010122937096388423, "loss": 0.0094, "step": 14356 }, { "epoch": 2.0121934127540295, "grad_norm": 0.1651524305343628, "learning_rate": 0.00010121502033006456, "loss": 0.0195, "step": 14357 }, { "epoch": 2.012333566923616, "grad_norm": 0.6188377737998962, "learning_rate": 0.00010120066969624492, "loss": 0.118, "step": 14358 }, { "epoch": 2.0124737210932024, "grad_norm": 0.11369770765304565, "learning_rate": 0.00010118631906242525, "loss": 0.0056, "step": 14359 }, { "epoch": 2.012613875262789, "grad_norm": 0.2961896061897278, "learning_rate": 0.00010117196842860559, "loss": 0.0234, "step": 14360 }, { "epoch": 2.012754029432376, "grad_norm": 0.3469317555427551, "learning_rate": 0.00010115761779478593, "loss": 0.0916, "step": 14361 }, { "epoch": 2.0128941836019623, "grad_norm": 0.37067559361457825, "learning_rate": 0.00010114326716096626, "loss": 0.0402, "step": 14362 }, { "epoch": 2.0130343377715487, "grad_norm": 0.4539516866207123, "learning_rate": 0.00010112891652714662, "loss": 0.0261, "step": 14363 }, { "epoch": 2.013174491941135, "grad_norm": 0.32531091570854187, "learning_rate": 0.00010111456589332696, "loss": 0.0446, "step": 14364 }, { "epoch": 2.0133146461107216, "grad_norm": 0.262358158826828, "learning_rate": 0.00010110021525950729, "loss": 0.0312, "step": 14365 }, { "epoch": 2.0134548002803085, "grad_norm": 0.3419927954673767, "learning_rate": 0.00010108586462568763, "loss": 0.0404, "step": 14366 }, { "epoch": 2.013594954449895, "grad_norm": 0.9540210962295532, "learning_rate": 0.00010107151399186796, "loss": 0.0316, "step": 14367 }, { "epoch": 2.0137351086194815, "grad_norm": 0.7896899580955505, "learning_rate": 0.0001010571633580483, "loss": 0.1242, "step": 14368 }, { "epoch": 2.013875262789068, "grad_norm": 0.6743713021278381, "learning_rate": 0.00010104281272422865, "loss": 0.0419, "step": 14369 }, { "epoch": 2.0140154169586544, "grad_norm": 2.9574239253997803, "learning_rate": 0.00010102846209040898, "loss": 0.0469, "step": 14370 }, { "epoch": 2.0141555711282413, "grad_norm": 0.11079802364110947, "learning_rate": 0.00010101411145658932, "loss": 0.0077, "step": 14371 }, { "epoch": 2.0142957252978277, "grad_norm": 0.7975444197654724, "learning_rate": 0.00010099976082276967, "loss": 0.0245, "step": 14372 }, { "epoch": 2.014435879467414, "grad_norm": 0.30389925837516785, "learning_rate": 0.00010098541018895, "loss": 0.0119, "step": 14373 }, { "epoch": 2.0145760336370007, "grad_norm": 0.09679289162158966, "learning_rate": 0.00010097105955513035, "loss": 0.0197, "step": 14374 }, { "epoch": 2.014716187806587, "grad_norm": 0.4335888922214508, "learning_rate": 0.00010095670892131068, "loss": 0.0384, "step": 14375 }, { "epoch": 2.0148563419761736, "grad_norm": 0.38530582189559937, "learning_rate": 0.00010094235828749102, "loss": 0.0594, "step": 14376 }, { "epoch": 2.0149964961457605, "grad_norm": 0.13853639364242554, "learning_rate": 0.00010092800765367136, "loss": 0.0287, "step": 14377 }, { "epoch": 2.015136650315347, "grad_norm": 0.23372463881969452, "learning_rate": 0.00010091365701985169, "loss": 0.0307, "step": 14378 }, { "epoch": 2.0152768044849334, "grad_norm": 0.4094722270965576, "learning_rate": 0.00010089930638603205, "loss": 0.0145, "step": 14379 }, { "epoch": 2.01541695865452, "grad_norm": 0.3784569203853607, "learning_rate": 0.00010088495575221239, "loss": 0.0108, "step": 14380 }, { "epoch": 2.0155571128241063, "grad_norm": 0.1826721429824829, "learning_rate": 0.00010087060511839272, "loss": 0.026, "step": 14381 }, { "epoch": 2.0156972669936932, "grad_norm": 0.3056948781013489, "learning_rate": 0.00010085625448457306, "loss": 0.0313, "step": 14382 }, { "epoch": 2.0158374211632797, "grad_norm": 0.3778788149356842, "learning_rate": 0.00010084190385075339, "loss": 0.0329, "step": 14383 }, { "epoch": 2.015977575332866, "grad_norm": 0.15764489769935608, "learning_rate": 0.00010082755321693373, "loss": 0.0331, "step": 14384 }, { "epoch": 2.0161177295024526, "grad_norm": 0.2387782484292984, "learning_rate": 0.00010081320258311409, "loss": 0.0142, "step": 14385 }, { "epoch": 2.016257883672039, "grad_norm": 0.12807060778141022, "learning_rate": 0.00010079885194929442, "loss": 0.0148, "step": 14386 }, { "epoch": 2.016398037841626, "grad_norm": 0.12051426619291306, "learning_rate": 0.00010078450131547476, "loss": 0.0179, "step": 14387 }, { "epoch": 2.0165381920112124, "grad_norm": 0.20849700272083282, "learning_rate": 0.0001007701506816551, "loss": 0.0101, "step": 14388 }, { "epoch": 2.016678346180799, "grad_norm": 0.20255093276500702, "learning_rate": 0.00010075580004783543, "loss": 0.0095, "step": 14389 }, { "epoch": 2.0168185003503853, "grad_norm": 0.1897495687007904, "learning_rate": 0.00010074144941401578, "loss": 0.0295, "step": 14390 }, { "epoch": 2.016958654519972, "grad_norm": 0.3409312963485718, "learning_rate": 0.00010072709878019611, "loss": 0.0255, "step": 14391 }, { "epoch": 2.0170988086895587, "grad_norm": 0.29313597083091736, "learning_rate": 0.00010071274814637645, "loss": 0.0359, "step": 14392 }, { "epoch": 2.017238962859145, "grad_norm": 0.2857000529766083, "learning_rate": 0.0001006983975125568, "loss": 0.0463, "step": 14393 }, { "epoch": 2.0173791170287316, "grad_norm": 0.31748104095458984, "learning_rate": 0.00010068404687873713, "loss": 0.0403, "step": 14394 }, { "epoch": 2.017519271198318, "grad_norm": 0.36438286304473877, "learning_rate": 0.00010066969624491748, "loss": 0.0432, "step": 14395 }, { "epoch": 2.0176594253679045, "grad_norm": 0.12800566852092743, "learning_rate": 0.00010065534561109782, "loss": 0.0337, "step": 14396 }, { "epoch": 2.0177995795374915, "grad_norm": 0.2623644173145294, "learning_rate": 0.00010064099497727815, "loss": 0.0314, "step": 14397 }, { "epoch": 2.017939733707078, "grad_norm": 0.5782424211502075, "learning_rate": 0.0001006266443434585, "loss": 0.0472, "step": 14398 }, { "epoch": 2.0180798878766644, "grad_norm": 0.14538604021072388, "learning_rate": 0.00010061229370963884, "loss": 0.0162, "step": 14399 }, { "epoch": 2.018220042046251, "grad_norm": 0.4025847613811493, "learning_rate": 0.00010059794307581916, "loss": 0.0559, "step": 14400 }, { "epoch": 2.0183601962158373, "grad_norm": 0.31601834297180176, "learning_rate": 0.00010058359244199952, "loss": 0.018, "step": 14401 }, { "epoch": 2.0185003503854237, "grad_norm": 0.06269515305757523, "learning_rate": 0.00010056924180817985, "loss": 0.0067, "step": 14402 }, { "epoch": 2.0186405045550107, "grad_norm": 0.18927812576293945, "learning_rate": 0.00010055489117436019, "loss": 0.0229, "step": 14403 }, { "epoch": 2.018780658724597, "grad_norm": 0.5609269142150879, "learning_rate": 0.00010054054054054053, "loss": 0.0519, "step": 14404 }, { "epoch": 2.0189208128941836, "grad_norm": 0.18753154575824738, "learning_rate": 0.00010052618990672086, "loss": 0.0349, "step": 14405 }, { "epoch": 2.01906096706377, "grad_norm": 0.3656015396118164, "learning_rate": 0.00010051183927290122, "loss": 0.0285, "step": 14406 }, { "epoch": 2.0192011212333565, "grad_norm": 0.7377094626426697, "learning_rate": 0.00010049748863908156, "loss": 0.0314, "step": 14407 }, { "epoch": 2.0193412754029434, "grad_norm": 0.29533520340919495, "learning_rate": 0.00010048313800526189, "loss": 0.0856, "step": 14408 }, { "epoch": 2.01948142957253, "grad_norm": 0.12153347581624985, "learning_rate": 0.00010046878737144223, "loss": 0.0308, "step": 14409 }, { "epoch": 2.0196215837421163, "grad_norm": 0.2440887838602066, "learning_rate": 0.00010045443673762256, "loss": 0.0115, "step": 14410 }, { "epoch": 2.0197617379117028, "grad_norm": 0.09669745713472366, "learning_rate": 0.00010044008610380291, "loss": 0.0383, "step": 14411 }, { "epoch": 2.0199018920812892, "grad_norm": 0.14960606396198273, "learning_rate": 0.00010042573546998326, "loss": 0.0096, "step": 14412 }, { "epoch": 2.020042046250876, "grad_norm": 0.5913904905319214, "learning_rate": 0.00010041138483616359, "loss": 0.0589, "step": 14413 }, { "epoch": 2.0201822004204626, "grad_norm": 0.26117458939552307, "learning_rate": 0.00010039703420234393, "loss": 0.0338, "step": 14414 }, { "epoch": 2.020322354590049, "grad_norm": 0.986532986164093, "learning_rate": 0.00010038268356852427, "loss": 0.1555, "step": 14415 }, { "epoch": 2.0204625087596355, "grad_norm": 0.24483701586723328, "learning_rate": 0.0001003683329347046, "loss": 0.0344, "step": 14416 }, { "epoch": 2.020602662929222, "grad_norm": 0.28337931632995605, "learning_rate": 0.00010035398230088495, "loss": 0.0268, "step": 14417 }, { "epoch": 2.020742817098809, "grad_norm": 0.7726407647132874, "learning_rate": 0.00010033963166706528, "loss": 0.038, "step": 14418 }, { "epoch": 2.0208829712683953, "grad_norm": 0.8181468844413757, "learning_rate": 0.00010032528103324562, "loss": 0.1987, "step": 14419 }, { "epoch": 2.021023125437982, "grad_norm": 1.5980054140090942, "learning_rate": 0.00010031093039942597, "loss": 0.2307, "step": 14420 }, { "epoch": 2.0211632796075683, "grad_norm": 0.11903064697980881, "learning_rate": 0.0001002965797656063, "loss": 0.0301, "step": 14421 }, { "epoch": 2.0213034337771547, "grad_norm": 0.11184151470661163, "learning_rate": 0.00010028222913178665, "loss": 0.0115, "step": 14422 }, { "epoch": 2.0214435879467416, "grad_norm": 0.182241752743721, "learning_rate": 0.00010026787849796699, "loss": 0.0173, "step": 14423 }, { "epoch": 2.021583742116328, "grad_norm": 0.22382840514183044, "learning_rate": 0.00010025352786414732, "loss": 0.065, "step": 14424 }, { "epoch": 2.0217238962859145, "grad_norm": 0.22317323088645935, "learning_rate": 0.00010023917723032766, "loss": 0.0296, "step": 14425 }, { "epoch": 2.021864050455501, "grad_norm": 0.3675713539123535, "learning_rate": 0.00010022482659650799, "loss": 0.0302, "step": 14426 }, { "epoch": 2.0220042046250875, "grad_norm": 0.22817359864711761, "learning_rate": 0.00010021047596268835, "loss": 0.0716, "step": 14427 }, { "epoch": 2.0221443587946744, "grad_norm": 0.2380255162715912, "learning_rate": 0.00010019612532886869, "loss": 0.0425, "step": 14428 }, { "epoch": 2.022284512964261, "grad_norm": 0.10376962274312973, "learning_rate": 0.00010018177469504902, "loss": 0.0111, "step": 14429 }, { "epoch": 2.0224246671338473, "grad_norm": 0.12821175158023834, "learning_rate": 0.00010016742406122936, "loss": 0.0331, "step": 14430 }, { "epoch": 2.0225648213034337, "grad_norm": 0.10619758814573288, "learning_rate": 0.0001001530734274097, "loss": 0.0196, "step": 14431 }, { "epoch": 2.02270497547302, "grad_norm": 0.29811498522758484, "learning_rate": 0.00010013872279359003, "loss": 0.0524, "step": 14432 }, { "epoch": 2.0228451296426067, "grad_norm": 0.28642040491104126, "learning_rate": 0.00010012437215977039, "loss": 0.0438, "step": 14433 }, { "epoch": 2.0229852838121936, "grad_norm": 0.36200645565986633, "learning_rate": 0.00010011002152595073, "loss": 0.0585, "step": 14434 }, { "epoch": 2.02312543798178, "grad_norm": 0.12036851048469543, "learning_rate": 0.00010009567089213106, "loss": 0.0101, "step": 14435 }, { "epoch": 2.0232655921513665, "grad_norm": 0.3903593420982361, "learning_rate": 0.0001000813202583114, "loss": 0.0319, "step": 14436 }, { "epoch": 2.023405746320953, "grad_norm": 0.21410390734672546, "learning_rate": 0.00010006696962449173, "loss": 0.0153, "step": 14437 }, { "epoch": 2.0235459004905394, "grad_norm": 0.1886194944381714, "learning_rate": 0.00010005261899067208, "loss": 0.017, "step": 14438 }, { "epoch": 2.0236860546601263, "grad_norm": 0.13052061200141907, "learning_rate": 0.00010003826835685243, "loss": 0.012, "step": 14439 }, { "epoch": 2.0238262088297128, "grad_norm": 0.09371787309646606, "learning_rate": 0.00010002391772303275, "loss": 0.0111, "step": 14440 }, { "epoch": 2.023966362999299, "grad_norm": 0.18195025622844696, "learning_rate": 0.0001000095670892131, "loss": 0.034, "step": 14441 }, { "epoch": 2.0241065171688857, "grad_norm": 0.0965365394949913, "learning_rate": 9.999521645539345e-05, "loss": 0.0111, "step": 14442 }, { "epoch": 2.024246671338472, "grad_norm": 0.09762502461671829, "learning_rate": 9.998086582157378e-05, "loss": 0.0125, "step": 14443 }, { "epoch": 2.024386825508059, "grad_norm": 0.1168842762708664, "learning_rate": 9.996651518775412e-05, "loss": 0.0132, "step": 14444 }, { "epoch": 2.0245269796776455, "grad_norm": 0.2688584327697754, "learning_rate": 9.995216455393445e-05, "loss": 0.0656, "step": 14445 }, { "epoch": 2.024667133847232, "grad_norm": 0.15135279297828674, "learning_rate": 9.99378139201148e-05, "loss": 0.0393, "step": 14446 }, { "epoch": 2.0248072880168184, "grad_norm": 0.18845799565315247, "learning_rate": 9.992346328629514e-05, "loss": 0.0143, "step": 14447 }, { "epoch": 2.024947442186405, "grad_norm": 0.20573319494724274, "learning_rate": 9.990911265247546e-05, "loss": 0.0325, "step": 14448 }, { "epoch": 2.025087596355992, "grad_norm": 0.16403938829898834, "learning_rate": 9.989476201865582e-05, "loss": 0.0239, "step": 14449 }, { "epoch": 2.0252277505255782, "grad_norm": 0.1833772212266922, "learning_rate": 9.988041138483616e-05, "loss": 0.0604, "step": 14450 }, { "epoch": 2.0253679046951647, "grad_norm": 0.07425392419099808, "learning_rate": 9.986606075101649e-05, "loss": 0.0049, "step": 14451 }, { "epoch": 2.025508058864751, "grad_norm": 0.4004087746143341, "learning_rate": 9.985171011719683e-05, "loss": 0.029, "step": 14452 }, { "epoch": 2.0256482130343376, "grad_norm": 1.3883413076400757, "learning_rate": 9.983735948337716e-05, "loss": 0.0555, "step": 14453 }, { "epoch": 2.0257883672039245, "grad_norm": 0.2745436131954193, "learning_rate": 9.982300884955752e-05, "loss": 0.0208, "step": 14454 }, { "epoch": 2.025928521373511, "grad_norm": 0.2581547796726227, "learning_rate": 9.980865821573786e-05, "loss": 0.047, "step": 14455 }, { "epoch": 2.0260686755430974, "grad_norm": 0.1561075747013092, "learning_rate": 9.979430758191819e-05, "loss": 0.0206, "step": 14456 }, { "epoch": 2.026208829712684, "grad_norm": 0.026349002495408058, "learning_rate": 9.977995694809853e-05, "loss": 0.0036, "step": 14457 }, { "epoch": 2.0263489838822704, "grad_norm": 0.1585310995578766, "learning_rate": 9.976560631427889e-05, "loss": 0.0176, "step": 14458 }, { "epoch": 2.026489138051857, "grad_norm": 0.1944943368434906, "learning_rate": 9.975125568045921e-05, "loss": 0.0181, "step": 14459 }, { "epoch": 2.0266292922214437, "grad_norm": 0.12261159718036652, "learning_rate": 9.973690504663956e-05, "loss": 0.0143, "step": 14460 }, { "epoch": 2.02676944639103, "grad_norm": 0.5750231742858887, "learning_rate": 9.972255441281988e-05, "loss": 0.0414, "step": 14461 }, { "epoch": 2.0269096005606166, "grad_norm": 0.3503555357456207, "learning_rate": 9.970820377900023e-05, "loss": 0.0275, "step": 14462 }, { "epoch": 2.027049754730203, "grad_norm": 0.5148195028305054, "learning_rate": 9.969385314518057e-05, "loss": 0.0185, "step": 14463 }, { "epoch": 2.0271899088997896, "grad_norm": 0.486939400434494, "learning_rate": 9.96795025113609e-05, "loss": 0.0452, "step": 14464 }, { "epoch": 2.0273300630693765, "grad_norm": 0.6523065567016602, "learning_rate": 9.966515187754125e-05, "loss": 0.0463, "step": 14465 }, { "epoch": 2.027470217238963, "grad_norm": 0.22304333746433258, "learning_rate": 9.96508012437216e-05, "loss": 0.0146, "step": 14466 }, { "epoch": 2.0276103714085494, "grad_norm": 0.3470122516155243, "learning_rate": 9.963645060990192e-05, "loss": 0.0391, "step": 14467 }, { "epoch": 2.027750525578136, "grad_norm": 0.20208820700645447, "learning_rate": 9.962209997608227e-05, "loss": 0.0185, "step": 14468 }, { "epoch": 2.0278906797477223, "grad_norm": 0.33971959352493286, "learning_rate": 9.960774934226262e-05, "loss": 0.0732, "step": 14469 }, { "epoch": 2.028030833917309, "grad_norm": 0.6886947751045227, "learning_rate": 9.959339870844295e-05, "loss": 0.0474, "step": 14470 }, { "epoch": 2.0281709880868957, "grad_norm": 0.23506073653697968, "learning_rate": 9.957904807462329e-05, "loss": 0.0516, "step": 14471 }, { "epoch": 2.028311142256482, "grad_norm": 0.18277232348918915, "learning_rate": 9.956469744080362e-05, "loss": 0.025, "step": 14472 }, { "epoch": 2.0284512964260686, "grad_norm": 0.08887793868780136, "learning_rate": 9.955034680698396e-05, "loss": 0.0083, "step": 14473 }, { "epoch": 2.028591450595655, "grad_norm": 0.29537466168403625, "learning_rate": 9.953599617316432e-05, "loss": 0.0232, "step": 14474 }, { "epoch": 2.028731604765242, "grad_norm": 0.1345331370830536, "learning_rate": 9.952164553934465e-05, "loss": 0.0137, "step": 14475 }, { "epoch": 2.0288717589348284, "grad_norm": 0.07308512926101685, "learning_rate": 9.950729490552499e-05, "loss": 0.0054, "step": 14476 }, { "epoch": 2.029011913104415, "grad_norm": 0.12065040320158005, "learning_rate": 9.949294427170533e-05, "loss": 0.0064, "step": 14477 }, { "epoch": 2.0291520672740013, "grad_norm": 0.183810755610466, "learning_rate": 9.947859363788566e-05, "loss": 0.0334, "step": 14478 }, { "epoch": 2.029292221443588, "grad_norm": 0.16208213567733765, "learning_rate": 9.9464243004066e-05, "loss": 0.0267, "step": 14479 }, { "epoch": 2.0294323756131747, "grad_norm": 0.5596699118614197, "learning_rate": 9.944989237024633e-05, "loss": 0.0951, "step": 14480 }, { "epoch": 2.029572529782761, "grad_norm": 0.23863954842090607, "learning_rate": 9.943554173642669e-05, "loss": 0.0454, "step": 14481 }, { "epoch": 2.0297126839523476, "grad_norm": 0.2928112745285034, "learning_rate": 9.942119110260703e-05, "loss": 0.0346, "step": 14482 }, { "epoch": 2.029852838121934, "grad_norm": 0.11333935707807541, "learning_rate": 9.940684046878736e-05, "loss": 0.0129, "step": 14483 }, { "epoch": 2.0299929922915205, "grad_norm": 0.3229149281978607, "learning_rate": 9.93924898349677e-05, "loss": 0.0386, "step": 14484 }, { "epoch": 2.0301331464611074, "grad_norm": 0.1360381543636322, "learning_rate": 9.937813920114805e-05, "loss": 0.021, "step": 14485 }, { "epoch": 2.030273300630694, "grad_norm": 0.08666659891605377, "learning_rate": 9.936378856732838e-05, "loss": 0.0065, "step": 14486 }, { "epoch": 2.0304134548002803, "grad_norm": 0.17681150138378143, "learning_rate": 9.934943793350873e-05, "loss": 0.0363, "step": 14487 }, { "epoch": 2.030553608969867, "grad_norm": 0.18216165900230408, "learning_rate": 9.933508729968905e-05, "loss": 0.0176, "step": 14488 }, { "epoch": 2.0306937631394533, "grad_norm": 0.12447208911180496, "learning_rate": 9.93207366658694e-05, "loss": 0.0142, "step": 14489 }, { "epoch": 2.0308339173090397, "grad_norm": 0.2375001162290573, "learning_rate": 9.930638603204975e-05, "loss": 0.032, "step": 14490 }, { "epoch": 2.0309740714786266, "grad_norm": 0.2855256497859955, "learning_rate": 9.929203539823008e-05, "loss": 0.037, "step": 14491 }, { "epoch": 2.031114225648213, "grad_norm": 0.15849518775939941, "learning_rate": 9.927768476441042e-05, "loss": 0.0345, "step": 14492 }, { "epoch": 2.0312543798177995, "grad_norm": 0.3493972718715668, "learning_rate": 9.926333413059076e-05, "loss": 0.0625, "step": 14493 }, { "epoch": 2.031394533987386, "grad_norm": 0.21555230021476746, "learning_rate": 9.924898349677109e-05, "loss": 0.037, "step": 14494 }, { "epoch": 2.0315346881569725, "grad_norm": 0.1917514204978943, "learning_rate": 9.923463286295144e-05, "loss": 0.0128, "step": 14495 }, { "epoch": 2.0316748423265594, "grad_norm": 0.11909850686788559, "learning_rate": 9.922028222913176e-05, "loss": 0.0242, "step": 14496 }, { "epoch": 2.031814996496146, "grad_norm": 0.39441365003585815, "learning_rate": 9.920593159531212e-05, "loss": 0.0448, "step": 14497 }, { "epoch": 2.0319551506657323, "grad_norm": 0.4820905327796936, "learning_rate": 9.919158096149246e-05, "loss": 0.0665, "step": 14498 }, { "epoch": 2.0320953048353187, "grad_norm": 0.22301989793777466, "learning_rate": 9.917723032767279e-05, "loss": 0.0143, "step": 14499 }, { "epoch": 2.032235459004905, "grad_norm": 0.23312315344810486, "learning_rate": 9.916287969385313e-05, "loss": 0.0121, "step": 14500 }, { "epoch": 2.032375613174492, "grad_norm": 0.24394792318344116, "learning_rate": 9.914852906003349e-05, "loss": 0.0134, "step": 14501 }, { "epoch": 2.0325157673440786, "grad_norm": 0.15797056257724762, "learning_rate": 9.913417842621382e-05, "loss": 0.0204, "step": 14502 }, { "epoch": 2.032655921513665, "grad_norm": 0.6717702150344849, "learning_rate": 9.911982779239416e-05, "loss": 0.0599, "step": 14503 }, { "epoch": 2.0327960756832515, "grad_norm": 0.21511223912239075, "learning_rate": 9.91054771585745e-05, "loss": 0.0228, "step": 14504 }, { "epoch": 2.032936229852838, "grad_norm": 0.3279041051864624, "learning_rate": 9.909112652475483e-05, "loss": 0.0383, "step": 14505 }, { "epoch": 2.033076384022425, "grad_norm": 0.035527553409338, "learning_rate": 9.907677589093518e-05, "loss": 0.0031, "step": 14506 }, { "epoch": 2.0332165381920113, "grad_norm": 0.36504456400871277, "learning_rate": 9.906242525711551e-05, "loss": 0.0546, "step": 14507 }, { "epoch": 2.0333566923615978, "grad_norm": 0.23326686024665833, "learning_rate": 9.904807462329586e-05, "loss": 0.0155, "step": 14508 }, { "epoch": 2.0334968465311842, "grad_norm": 1.6420146226882935, "learning_rate": 9.90337239894762e-05, "loss": 0.0572, "step": 14509 }, { "epoch": 2.0336370007007707, "grad_norm": 0.4718138873577118, "learning_rate": 9.901937335565653e-05, "loss": 0.0493, "step": 14510 }, { "epoch": 2.0337771548703576, "grad_norm": 0.26314330101013184, "learning_rate": 9.900502272183687e-05, "loss": 0.0593, "step": 14511 }, { "epoch": 2.033917309039944, "grad_norm": 0.24281050264835358, "learning_rate": 9.899067208801722e-05, "loss": 0.0139, "step": 14512 }, { "epoch": 2.0340574632095305, "grad_norm": 0.32817041873931885, "learning_rate": 9.897632145419755e-05, "loss": 0.0469, "step": 14513 }, { "epoch": 2.034197617379117, "grad_norm": 0.08950285613536835, "learning_rate": 9.89619708203779e-05, "loss": 0.0203, "step": 14514 }, { "epoch": 2.0343377715487034, "grad_norm": 0.38477644324302673, "learning_rate": 9.894762018655822e-05, "loss": 0.0386, "step": 14515 }, { "epoch": 2.0344779257182903, "grad_norm": 0.21968553960323334, "learning_rate": 9.893326955273857e-05, "loss": 0.06, "step": 14516 }, { "epoch": 2.034618079887877, "grad_norm": 0.15917927026748657, "learning_rate": 9.891891891891892e-05, "loss": 0.017, "step": 14517 }, { "epoch": 2.0347582340574633, "grad_norm": 0.03450698405504227, "learning_rate": 9.890456828509925e-05, "loss": 0.0027, "step": 14518 }, { "epoch": 2.0348983882270497, "grad_norm": 0.36688032746315, "learning_rate": 9.889021765127959e-05, "loss": 0.0706, "step": 14519 }, { "epoch": 2.035038542396636, "grad_norm": NaN, "learning_rate": 9.887586701745993e-05, "loss": 0.3196, "step": 14520 }, { "epoch": 2.0351786965662226, "grad_norm": 0.1534242033958435, "learning_rate": 9.887586701745993e-05, "loss": 0.0225, "step": 14521 }, { "epoch": 2.0353188507358095, "grad_norm": 0.17387579381465912, "learning_rate": 9.886151638364026e-05, "loss": 0.0303, "step": 14522 }, { "epoch": 2.035459004905396, "grad_norm": 0.15925176441669464, "learning_rate": 9.884716574982062e-05, "loss": 0.0145, "step": 14523 }, { "epoch": 2.0355991590749825, "grad_norm": 0.42877814173698425, "learning_rate": 9.883281511600095e-05, "loss": 0.0641, "step": 14524 }, { "epoch": 2.035739313244569, "grad_norm": 0.28471511602401733, "learning_rate": 9.881846448218129e-05, "loss": 0.0323, "step": 14525 }, { "epoch": 2.0358794674141554, "grad_norm": 0.09378957748413086, "learning_rate": 9.880411384836163e-05, "loss": 0.0102, "step": 14526 }, { "epoch": 2.0360196215837423, "grad_norm": 0.5060093998908997, "learning_rate": 9.878976321454196e-05, "loss": 0.0363, "step": 14527 }, { "epoch": 2.0361597757533287, "grad_norm": 0.13294915854930878, "learning_rate": 9.87754125807223e-05, "loss": 0.0117, "step": 14528 }, { "epoch": 2.036299929922915, "grad_norm": 1.1636812686920166, "learning_rate": 9.876106194690266e-05, "loss": 0.071, "step": 14529 }, { "epoch": 2.0364400840925017, "grad_norm": 0.08930788934230804, "learning_rate": 9.874671131308299e-05, "loss": 0.0055, "step": 14530 }, { "epoch": 2.036580238262088, "grad_norm": 0.5023823380470276, "learning_rate": 9.873236067926333e-05, "loss": 0.0447, "step": 14531 }, { "epoch": 2.036720392431675, "grad_norm": 0.2278163582086563, "learning_rate": 9.871801004544366e-05, "loss": 0.0267, "step": 14532 }, { "epoch": 2.0368605466012615, "grad_norm": 0.24701005220413208, "learning_rate": 9.8703659411624e-05, "loss": 0.0464, "step": 14533 }, { "epoch": 2.037000700770848, "grad_norm": 0.09374483674764633, "learning_rate": 9.868930877780435e-05, "loss": 0.0067, "step": 14534 }, { "epoch": 2.0371408549404344, "grad_norm": 0.19721589982509613, "learning_rate": 9.867495814398468e-05, "loss": 0.0455, "step": 14535 }, { "epoch": 2.037281009110021, "grad_norm": 0.20399923622608185, "learning_rate": 9.866060751016502e-05, "loss": 0.0069, "step": 14536 }, { "epoch": 2.0374211632796078, "grad_norm": 0.21088121831417084, "learning_rate": 9.864625687634537e-05, "loss": 0.0422, "step": 14537 }, { "epoch": 2.037561317449194, "grad_norm": 0.06076661869883537, "learning_rate": 9.86319062425257e-05, "loss": 0.0071, "step": 14538 }, { "epoch": 2.0377014716187807, "grad_norm": 0.1837327778339386, "learning_rate": 9.861755560870605e-05, "loss": 0.0062, "step": 14539 }, { "epoch": 2.037841625788367, "grad_norm": 0.365121066570282, "learning_rate": 9.860320497488638e-05, "loss": 0.0499, "step": 14540 }, { "epoch": 2.0379817799579536, "grad_norm": 0.13977009057998657, "learning_rate": 9.858885434106672e-05, "loss": 0.0083, "step": 14541 }, { "epoch": 2.0381219341275405, "grad_norm": 0.6147711873054504, "learning_rate": 9.857450370724706e-05, "loss": 0.0314, "step": 14542 }, { "epoch": 2.038262088297127, "grad_norm": 0.5251495838165283, "learning_rate": 9.856015307342739e-05, "loss": 0.0663, "step": 14543 }, { "epoch": 2.0384022424667134, "grad_norm": 0.12395856529474258, "learning_rate": 9.854580243960773e-05, "loss": 0.0226, "step": 14544 }, { "epoch": 2.0385423966363, "grad_norm": 0.02359222248196602, "learning_rate": 9.853145180578809e-05, "loss": 0.002, "step": 14545 }, { "epoch": 2.0386825508058863, "grad_norm": 0.18385474383831024, "learning_rate": 9.851710117196842e-05, "loss": 0.009, "step": 14546 }, { "epoch": 2.0388227049754732, "grad_norm": 0.0622151754796505, "learning_rate": 9.850275053814876e-05, "loss": 0.0042, "step": 14547 }, { "epoch": 2.0389628591450597, "grad_norm": 0.7372965812683105, "learning_rate": 9.84883999043291e-05, "loss": 0.0404, "step": 14548 }, { "epoch": 2.039103013314646, "grad_norm": 0.23335157334804535, "learning_rate": 9.847404927050943e-05, "loss": 0.0477, "step": 14549 }, { "epoch": 2.0392431674842326, "grad_norm": 0.06278901547193527, "learning_rate": 9.845969863668979e-05, "loss": 0.0051, "step": 14550 }, { "epoch": 2.039383321653819, "grad_norm": 0.09418494254350662, "learning_rate": 9.844534800287012e-05, "loss": 0.0128, "step": 14551 }, { "epoch": 2.0395234758234055, "grad_norm": 0.43041256070137024, "learning_rate": 9.843099736905046e-05, "loss": 0.0575, "step": 14552 }, { "epoch": 2.0396636299929924, "grad_norm": 0.4484850764274597, "learning_rate": 9.84166467352308e-05, "loss": 0.0583, "step": 14553 }, { "epoch": 2.039803784162579, "grad_norm": 0.25465598702430725, "learning_rate": 9.840229610141113e-05, "loss": 0.015, "step": 14554 }, { "epoch": 2.0399439383321654, "grad_norm": 0.17130139470100403, "learning_rate": 9.838794546759148e-05, "loss": 0.0269, "step": 14555 }, { "epoch": 2.040084092501752, "grad_norm": 0.29218029975891113, "learning_rate": 9.837359483377183e-05, "loss": 0.0145, "step": 14556 }, { "epoch": 2.0402242466713383, "grad_norm": 0.24774783849716187, "learning_rate": 9.835924419995215e-05, "loss": 0.0219, "step": 14557 }, { "epoch": 2.040364400840925, "grad_norm": 0.19440512359142303, "learning_rate": 9.83448935661325e-05, "loss": 0.013, "step": 14558 }, { "epoch": 2.0405045550105116, "grad_norm": 0.14187614619731903, "learning_rate": 9.833054293231283e-05, "loss": 0.0455, "step": 14559 }, { "epoch": 2.040644709180098, "grad_norm": 0.20071375370025635, "learning_rate": 9.831619229849317e-05, "loss": 0.0171, "step": 14560 }, { "epoch": 2.0407848633496846, "grad_norm": 0.5312033295631409, "learning_rate": 9.830184166467352e-05, "loss": 0.0422, "step": 14561 }, { "epoch": 2.040925017519271, "grad_norm": 0.40411654114723206, "learning_rate": 9.828749103085385e-05, "loss": 0.037, "step": 14562 }, { "epoch": 2.041065171688858, "grad_norm": 0.26766201853752136, "learning_rate": 9.82731403970342e-05, "loss": 0.0547, "step": 14563 }, { "epoch": 2.0412053258584444, "grad_norm": 0.7721509337425232, "learning_rate": 9.825878976321454e-05, "loss": 0.0823, "step": 14564 }, { "epoch": 2.041345480028031, "grad_norm": 0.7094186544418335, "learning_rate": 9.824443912939486e-05, "loss": 0.0415, "step": 14565 }, { "epoch": 2.0414856341976173, "grad_norm": 0.21951128542423248, "learning_rate": 9.823008849557522e-05, "loss": 0.0223, "step": 14566 }, { "epoch": 2.0416257883672038, "grad_norm": 0.13689817488193512, "learning_rate": 9.821573786175555e-05, "loss": 0.0049, "step": 14567 }, { "epoch": 2.0417659425367907, "grad_norm": 1.0043683052062988, "learning_rate": 9.820138722793589e-05, "loss": 0.0343, "step": 14568 }, { "epoch": 2.041906096706377, "grad_norm": 3.0330190658569336, "learning_rate": 9.818703659411623e-05, "loss": 0.3474, "step": 14569 }, { "epoch": 2.0420462508759636, "grad_norm": 0.044684477150440216, "learning_rate": 9.817268596029656e-05, "loss": 0.0023, "step": 14570 }, { "epoch": 2.04218640504555, "grad_norm": 0.29785287380218506, "learning_rate": 9.815833532647692e-05, "loss": 0.05, "step": 14571 }, { "epoch": 2.0423265592151365, "grad_norm": 0.14133098721504211, "learning_rate": 9.814398469265726e-05, "loss": 0.0239, "step": 14572 }, { "epoch": 2.0424667133847234, "grad_norm": 0.3275611102581024, "learning_rate": 9.812963405883759e-05, "loss": 0.0534, "step": 14573 }, { "epoch": 2.04260686755431, "grad_norm": 0.12047803401947021, "learning_rate": 9.811528342501793e-05, "loss": 0.0126, "step": 14574 }, { "epoch": 2.0427470217238963, "grad_norm": 0.09081151336431503, "learning_rate": 9.810093279119826e-05, "loss": 0.0106, "step": 14575 }, { "epoch": 2.042887175893483, "grad_norm": 0.24666573107242584, "learning_rate": 9.80865821573786e-05, "loss": 0.0419, "step": 14576 }, { "epoch": 2.0430273300630692, "grad_norm": 0.13887514173984528, "learning_rate": 9.807223152355896e-05, "loss": 0.0259, "step": 14577 }, { "epoch": 2.0431674842326557, "grad_norm": 0.3158944845199585, "learning_rate": 9.805788088973929e-05, "loss": 0.0288, "step": 14578 }, { "epoch": 2.0433076384022426, "grad_norm": 0.4252224266529083, "learning_rate": 9.804353025591963e-05, "loss": 0.0826, "step": 14579 }, { "epoch": 2.043447792571829, "grad_norm": 0.5137424468994141, "learning_rate": 9.802917962209997e-05, "loss": 0.0507, "step": 14580 }, { "epoch": 2.0435879467414155, "grad_norm": 0.20665203034877777, "learning_rate": 9.80148289882803e-05, "loss": 0.0206, "step": 14581 }, { "epoch": 2.043728100911002, "grad_norm": 0.48795753717422485, "learning_rate": 9.800047835446065e-05, "loss": 0.0782, "step": 14582 }, { "epoch": 2.0438682550805884, "grad_norm": 0.296686053276062, "learning_rate": 9.7986127720641e-05, "loss": 0.0736, "step": 14583 }, { "epoch": 2.0440084092501754, "grad_norm": 0.24494190514087677, "learning_rate": 9.797177708682132e-05, "loss": 0.0491, "step": 14584 }, { "epoch": 2.044148563419762, "grad_norm": 0.0630914568901062, "learning_rate": 9.795742645300167e-05, "loss": 0.0074, "step": 14585 }, { "epoch": 2.0442887175893483, "grad_norm": 0.423389196395874, "learning_rate": 9.7943075819182e-05, "loss": 0.1202, "step": 14586 }, { "epoch": 2.0444288717589347, "grad_norm": 0.23362980782985687, "learning_rate": 9.792872518536235e-05, "loss": 0.013, "step": 14587 }, { "epoch": 2.044569025928521, "grad_norm": 0.21208184957504272, "learning_rate": 9.791437455154269e-05, "loss": 0.0604, "step": 14588 }, { "epoch": 2.044709180098108, "grad_norm": 0.20589450001716614, "learning_rate": 9.790002391772302e-05, "loss": 0.0353, "step": 14589 }, { "epoch": 2.0448493342676946, "grad_norm": 0.3093615472316742, "learning_rate": 9.788567328390336e-05, "loss": 0.0306, "step": 14590 }, { "epoch": 2.044989488437281, "grad_norm": 0.11499712616205215, "learning_rate": 9.78713226500837e-05, "loss": 0.0156, "step": 14591 }, { "epoch": 2.0451296426068675, "grad_norm": 0.09062822163105011, "learning_rate": 9.785697201626403e-05, "loss": 0.0083, "step": 14592 }, { "epoch": 2.045269796776454, "grad_norm": 0.12232232838869095, "learning_rate": 9.784262138244439e-05, "loss": 0.0186, "step": 14593 }, { "epoch": 2.045409950946041, "grad_norm": 0.5004503130912781, "learning_rate": 9.782827074862472e-05, "loss": 0.0187, "step": 14594 }, { "epoch": 2.0455501051156273, "grad_norm": 0.05065590888261795, "learning_rate": 9.781392011480506e-05, "loss": 0.0036, "step": 14595 }, { "epoch": 2.0456902592852138, "grad_norm": 0.10655776411294937, "learning_rate": 9.77995694809854e-05, "loss": 0.0093, "step": 14596 }, { "epoch": 2.0458304134548, "grad_norm": 0.062482330948114395, "learning_rate": 9.778521884716573e-05, "loss": 0.0064, "step": 14597 }, { "epoch": 2.0459705676243867, "grad_norm": 0.4218922257423401, "learning_rate": 9.777086821334609e-05, "loss": 0.0551, "step": 14598 }, { "epoch": 2.0461107217939736, "grad_norm": 0.13488878309726715, "learning_rate": 9.775651757952643e-05, "loss": 0.011, "step": 14599 }, { "epoch": 2.04625087596356, "grad_norm": 0.2483682930469513, "learning_rate": 9.774216694570676e-05, "loss": 0.0135, "step": 14600 }, { "epoch": 2.0463910301331465, "grad_norm": 0.45724424719810486, "learning_rate": 9.77278163118871e-05, "loss": 0.0619, "step": 14601 }, { "epoch": 2.046531184302733, "grad_norm": 0.23024897277355194, "learning_rate": 9.771346567806743e-05, "loss": 0.0601, "step": 14602 }, { "epoch": 2.0466713384723194, "grad_norm": 0.39379122853279114, "learning_rate": 9.769911504424778e-05, "loss": 0.0251, "step": 14603 }, { "epoch": 2.046811492641906, "grad_norm": 0.4059712588787079, "learning_rate": 9.768476441042813e-05, "loss": 0.0268, "step": 14604 }, { "epoch": 2.0469516468114928, "grad_norm": 0.034140221774578094, "learning_rate": 9.767041377660845e-05, "loss": 0.0037, "step": 14605 }, { "epoch": 2.0470918009810792, "grad_norm": 0.17910324037075043, "learning_rate": 9.76560631427888e-05, "loss": 0.0092, "step": 14606 }, { "epoch": 2.0472319551506657, "grad_norm": 0.26883307099342346, "learning_rate": 9.764171250896914e-05, "loss": 0.0336, "step": 14607 }, { "epoch": 2.047372109320252, "grad_norm": 0.08821037411689758, "learning_rate": 9.762736187514947e-05, "loss": 0.0079, "step": 14608 }, { "epoch": 2.0475122634898386, "grad_norm": 0.27331221103668213, "learning_rate": 9.761301124132982e-05, "loss": 0.0581, "step": 14609 }, { "epoch": 2.0476524176594255, "grad_norm": 0.2804799973964691, "learning_rate": 9.759866060751015e-05, "loss": 0.0194, "step": 14610 }, { "epoch": 2.047792571829012, "grad_norm": 0.6483967900276184, "learning_rate": 9.75843099736905e-05, "loss": 0.1039, "step": 14611 }, { "epoch": 2.0479327259985984, "grad_norm": 0.31753939390182495, "learning_rate": 9.756995933987084e-05, "loss": 0.0273, "step": 14612 }, { "epoch": 2.048072880168185, "grad_norm": 0.5934249758720398, "learning_rate": 9.755560870605116e-05, "loss": 0.0464, "step": 14613 }, { "epoch": 2.0482130343377714, "grad_norm": 0.21168068051338196, "learning_rate": 9.754125807223152e-05, "loss": 0.0133, "step": 14614 }, { "epoch": 2.0483531885073583, "grad_norm": 0.3751913905143738, "learning_rate": 9.752690743841186e-05, "loss": 0.0291, "step": 14615 }, { "epoch": 2.0484933426769447, "grad_norm": 0.1160811111330986, "learning_rate": 9.751255680459219e-05, "loss": 0.011, "step": 14616 }, { "epoch": 2.048633496846531, "grad_norm": 0.6843565702438354, "learning_rate": 9.749820617077253e-05, "loss": 0.0332, "step": 14617 }, { "epoch": 2.0487736510161176, "grad_norm": 2.3835079669952393, "learning_rate": 9.748385553695289e-05, "loss": 0.2327, "step": 14618 }, { "epoch": 2.048913805185704, "grad_norm": 0.4588441252708435, "learning_rate": 9.746950490313322e-05, "loss": 0.0399, "step": 14619 }, { "epoch": 2.049053959355291, "grad_norm": 3.7697811126708984, "learning_rate": 9.745515426931356e-05, "loss": 0.0705, "step": 14620 }, { "epoch": 2.0491941135248775, "grad_norm": 0.2121492326259613, "learning_rate": 9.744080363549389e-05, "loss": 0.0215, "step": 14621 }, { "epoch": 2.049334267694464, "grad_norm": 0.09743131697177887, "learning_rate": 9.742645300167423e-05, "loss": 0.01, "step": 14622 }, { "epoch": 2.0494744218640504, "grad_norm": 0.09635625034570694, "learning_rate": 9.741210236785457e-05, "loss": 0.0075, "step": 14623 }, { "epoch": 2.049614576033637, "grad_norm": 0.7265884876251221, "learning_rate": 9.73977517340349e-05, "loss": 0.0291, "step": 14624 }, { "epoch": 2.0497547302032237, "grad_norm": 0.250246524810791, "learning_rate": 9.738340110021526e-05, "loss": 0.0466, "step": 14625 }, { "epoch": 2.04989488437281, "grad_norm": 0.051374293863773346, "learning_rate": 9.73690504663956e-05, "loss": 0.0041, "step": 14626 }, { "epoch": 2.0500350385423967, "grad_norm": 0.31546446681022644, "learning_rate": 9.735469983257593e-05, "loss": 0.0131, "step": 14627 }, { "epoch": 2.050175192711983, "grad_norm": 0.4238879680633545, "learning_rate": 9.734034919875627e-05, "loss": 0.0822, "step": 14628 }, { "epoch": 2.0503153468815696, "grad_norm": 0.1808554232120514, "learning_rate": 9.73259985649366e-05, "loss": 0.0124, "step": 14629 }, { "epoch": 2.0504555010511565, "grad_norm": 0.7445446252822876, "learning_rate": 9.731164793111695e-05, "loss": 0.0444, "step": 14630 }, { "epoch": 2.050595655220743, "grad_norm": 0.20793062448501587, "learning_rate": 9.72972972972973e-05, "loss": 0.0297, "step": 14631 }, { "epoch": 2.0507358093903294, "grad_norm": 0.2349175065755844, "learning_rate": 9.728294666347762e-05, "loss": 0.01, "step": 14632 }, { "epoch": 2.050875963559916, "grad_norm": 0.19442962110042572, "learning_rate": 9.726859602965797e-05, "loss": 0.0374, "step": 14633 }, { "epoch": 2.0510161177295023, "grad_norm": 0.23417486250400543, "learning_rate": 9.725424539583832e-05, "loss": 0.0326, "step": 14634 }, { "epoch": 2.051156271899089, "grad_norm": 0.791458785533905, "learning_rate": 9.723989476201865e-05, "loss": 0.0718, "step": 14635 }, { "epoch": 2.0512964260686757, "grad_norm": 0.15302234888076782, "learning_rate": 9.722554412819899e-05, "loss": 0.0272, "step": 14636 }, { "epoch": 2.051436580238262, "grad_norm": 0.07960764318704605, "learning_rate": 9.721119349437932e-05, "loss": 0.0049, "step": 14637 }, { "epoch": 2.0515767344078486, "grad_norm": 0.2141430675983429, "learning_rate": 9.719684286055966e-05, "loss": 0.0377, "step": 14638 }, { "epoch": 2.051716888577435, "grad_norm": 0.3910596966743469, "learning_rate": 9.718249222674e-05, "loss": 0.0565, "step": 14639 }, { "epoch": 2.0518570427470215, "grad_norm": 0.03645210340619087, "learning_rate": 9.716814159292033e-05, "loss": 0.0019, "step": 14640 }, { "epoch": 2.0519971969166084, "grad_norm": 1.0703109502792358, "learning_rate": 9.715379095910069e-05, "loss": 0.0793, "step": 14641 }, { "epoch": 2.052137351086195, "grad_norm": 0.07595302164554596, "learning_rate": 9.713944032528103e-05, "loss": 0.0142, "step": 14642 }, { "epoch": 2.0522775052557813, "grad_norm": 0.1973145455121994, "learning_rate": 9.712508969146136e-05, "loss": 0.0187, "step": 14643 }, { "epoch": 2.052417659425368, "grad_norm": 0.08789122849702835, "learning_rate": 9.71107390576417e-05, "loss": 0.0229, "step": 14644 }, { "epoch": 2.0525578135949543, "grad_norm": 0.29548490047454834, "learning_rate": 9.709638842382203e-05, "loss": 0.0267, "step": 14645 }, { "epoch": 2.052697967764541, "grad_norm": 0.24903497099876404, "learning_rate": 9.708203779000239e-05, "loss": 0.0304, "step": 14646 }, { "epoch": 2.0528381219341276, "grad_norm": 0.08784700185060501, "learning_rate": 9.706768715618273e-05, "loss": 0.0175, "step": 14647 }, { "epoch": 2.052978276103714, "grad_norm": 0.47082197666168213, "learning_rate": 9.705333652236306e-05, "loss": 0.0583, "step": 14648 }, { "epoch": 2.0531184302733005, "grad_norm": 0.2877300977706909, "learning_rate": 9.70389858885434e-05, "loss": 0.0363, "step": 14649 }, { "epoch": 2.053258584442887, "grad_norm": 0.5941381454467773, "learning_rate": 9.702463525472375e-05, "loss": 0.1078, "step": 14650 }, { "epoch": 2.053398738612474, "grad_norm": 0.5060108304023743, "learning_rate": 9.701028462090408e-05, "loss": 0.0312, "step": 14651 }, { "epoch": 2.0535388927820604, "grad_norm": 1.0659782886505127, "learning_rate": 9.699593398708443e-05, "loss": 0.172, "step": 14652 }, { "epoch": 2.053679046951647, "grad_norm": 0.25115126371383667, "learning_rate": 9.698158335326475e-05, "loss": 0.0192, "step": 14653 }, { "epoch": 2.0538192011212333, "grad_norm": 0.317066490650177, "learning_rate": 9.69672327194451e-05, "loss": 0.0852, "step": 14654 }, { "epoch": 2.0539593552908197, "grad_norm": 0.48709696531295776, "learning_rate": 9.695288208562544e-05, "loss": 0.0518, "step": 14655 }, { "epoch": 2.0540995094604066, "grad_norm": 0.4695848524570465, "learning_rate": 9.693853145180577e-05, "loss": 0.0801, "step": 14656 }, { "epoch": 2.054239663629993, "grad_norm": 0.24045182764530182, "learning_rate": 9.692418081798612e-05, "loss": 0.0228, "step": 14657 }, { "epoch": 2.0543798177995796, "grad_norm": 0.36782848834991455, "learning_rate": 9.690983018416646e-05, "loss": 0.0577, "step": 14658 }, { "epoch": 2.054519971969166, "grad_norm": 0.24183416366577148, "learning_rate": 9.689547955034679e-05, "loss": 0.081, "step": 14659 }, { "epoch": 2.0546601261387525, "grad_norm": 0.44207194447517395, "learning_rate": 9.688112891652713e-05, "loss": 0.0355, "step": 14660 }, { "epoch": 2.0548002803083394, "grad_norm": 0.3749368488788605, "learning_rate": 9.686677828270749e-05, "loss": 0.0294, "step": 14661 }, { "epoch": 2.054940434477926, "grad_norm": 0.4326549172401428, "learning_rate": 9.685242764888782e-05, "loss": 0.0348, "step": 14662 }, { "epoch": 2.0550805886475123, "grad_norm": 0.30109161138534546, "learning_rate": 9.683807701506816e-05, "loss": 0.0492, "step": 14663 }, { "epoch": 2.0552207428170988, "grad_norm": 0.01653483137488365, "learning_rate": 9.682372638124849e-05, "loss": 0.0018, "step": 14664 }, { "epoch": 2.0553608969866852, "grad_norm": 0.21044203639030457, "learning_rate": 9.680937574742883e-05, "loss": 0.0061, "step": 14665 }, { "epoch": 2.0555010511562717, "grad_norm": 0.8893194198608398, "learning_rate": 9.679502511360919e-05, "loss": 0.0775, "step": 14666 }, { "epoch": 2.0556412053258586, "grad_norm": 0.6652569770812988, "learning_rate": 9.678067447978952e-05, "loss": 0.0732, "step": 14667 }, { "epoch": 2.055781359495445, "grad_norm": 3.4368643760681152, "learning_rate": 9.676632384596986e-05, "loss": 0.1104, "step": 14668 }, { "epoch": 2.0559215136650315, "grad_norm": 0.638452410697937, "learning_rate": 9.67519732121502e-05, "loss": 0.0368, "step": 14669 }, { "epoch": 2.056061667834618, "grad_norm": 0.5533886551856995, "learning_rate": 9.673762257833053e-05, "loss": 0.0661, "step": 14670 }, { "epoch": 2.0562018220042044, "grad_norm": 0.12036539614200592, "learning_rate": 9.672327194451087e-05, "loss": 0.0248, "step": 14671 }, { "epoch": 2.0563419761737913, "grad_norm": 0.12919162213802338, "learning_rate": 9.67089213106912e-05, "loss": 0.0324, "step": 14672 }, { "epoch": 2.056482130343378, "grad_norm": 0.2486315816640854, "learning_rate": 9.669457067687156e-05, "loss": 0.0056, "step": 14673 }, { "epoch": 2.0566222845129642, "grad_norm": 0.4113796055316925, "learning_rate": 9.66802200430519e-05, "loss": 0.0394, "step": 14674 }, { "epoch": 2.0567624386825507, "grad_norm": 0.1997787356376648, "learning_rate": 9.666586940923223e-05, "loss": 0.0341, "step": 14675 }, { "epoch": 2.056902592852137, "grad_norm": 0.32701364159584045, "learning_rate": 9.665151877541257e-05, "loss": 0.032, "step": 14676 }, { "epoch": 2.057042747021724, "grad_norm": 0.09779198467731476, "learning_rate": 9.663716814159292e-05, "loss": 0.0114, "step": 14677 }, { "epoch": 2.0571829011913105, "grad_norm": 0.3206586241722107, "learning_rate": 9.662281750777325e-05, "loss": 0.0131, "step": 14678 }, { "epoch": 2.057323055360897, "grad_norm": 0.30883026123046875, "learning_rate": 9.66084668739536e-05, "loss": 0.0332, "step": 14679 }, { "epoch": 2.0574632095304835, "grad_norm": 0.3375200629234314, "learning_rate": 9.659411624013392e-05, "loss": 0.0615, "step": 14680 }, { "epoch": 2.05760336370007, "grad_norm": 0.16260966658592224, "learning_rate": 9.657976560631427e-05, "loss": 0.0162, "step": 14681 }, { "epoch": 2.057743517869657, "grad_norm": 0.171087846159935, "learning_rate": 9.656541497249462e-05, "loss": 0.0243, "step": 14682 }, { "epoch": 2.0578836720392433, "grad_norm": 0.24988307058811188, "learning_rate": 9.655106433867495e-05, "loss": 0.0658, "step": 14683 }, { "epoch": 2.0580238262088297, "grad_norm": 0.1413131207227707, "learning_rate": 9.653671370485529e-05, "loss": 0.038, "step": 14684 }, { "epoch": 2.058163980378416, "grad_norm": 0.4566271901130676, "learning_rate": 9.652236307103563e-05, "loss": 0.0287, "step": 14685 }, { "epoch": 2.0583041345480027, "grad_norm": 0.24705936014652252, "learning_rate": 9.650801243721596e-05, "loss": 0.0739, "step": 14686 }, { "epoch": 2.0584442887175896, "grad_norm": 0.20026229321956635, "learning_rate": 9.64936618033963e-05, "loss": 0.0092, "step": 14687 }, { "epoch": 2.058584442887176, "grad_norm": 0.07073967903852463, "learning_rate": 9.647931116957665e-05, "loss": 0.0094, "step": 14688 }, { "epoch": 2.0587245970567625, "grad_norm": 0.4652431905269623, "learning_rate": 9.646496053575699e-05, "loss": 0.0341, "step": 14689 }, { "epoch": 2.058864751226349, "grad_norm": 0.47174856066703796, "learning_rate": 9.645060990193733e-05, "loss": 0.0589, "step": 14690 }, { "epoch": 2.0590049053959354, "grad_norm": 0.12462644279003143, "learning_rate": 9.643625926811766e-05, "loss": 0.0205, "step": 14691 }, { "epoch": 2.0591450595655223, "grad_norm": 0.2781469225883484, "learning_rate": 9.6421908634298e-05, "loss": 0.0503, "step": 14692 }, { "epoch": 2.0592852137351088, "grad_norm": 0.46155428886413574, "learning_rate": 9.640755800047836e-05, "loss": 0.0246, "step": 14693 }, { "epoch": 2.059425367904695, "grad_norm": 0.4891853928565979, "learning_rate": 9.639320736665869e-05, "loss": 0.0386, "step": 14694 }, { "epoch": 2.0595655220742817, "grad_norm": 0.12988385558128357, "learning_rate": 9.637885673283903e-05, "loss": 0.0149, "step": 14695 }, { "epoch": 2.059705676243868, "grad_norm": 0.5600318908691406, "learning_rate": 9.636450609901937e-05, "loss": 0.0392, "step": 14696 }, { "epoch": 2.0598458304134546, "grad_norm": 0.4391777217388153, "learning_rate": 9.63501554651997e-05, "loss": 0.0482, "step": 14697 }, { "epoch": 2.0599859845830415, "grad_norm": 0.14108222723007202, "learning_rate": 9.633580483138005e-05, "loss": 0.0238, "step": 14698 }, { "epoch": 2.060126138752628, "grad_norm": 0.12062130123376846, "learning_rate": 9.632145419756038e-05, "loss": 0.0289, "step": 14699 }, { "epoch": 2.0602662929222144, "grad_norm": 0.07438750565052032, "learning_rate": 9.630710356374072e-05, "loss": 0.0092, "step": 14700 }, { "epoch": 2.060406447091801, "grad_norm": 0.1751435101032257, "learning_rate": 9.629275292992107e-05, "loss": 0.0184, "step": 14701 }, { "epoch": 2.0605466012613873, "grad_norm": 0.19144080579280853, "learning_rate": 9.62784022961014e-05, "loss": 0.0127, "step": 14702 }, { "epoch": 2.0606867554309742, "grad_norm": 0.24372050166130066, "learning_rate": 9.626405166228174e-05, "loss": 0.0428, "step": 14703 }, { "epoch": 2.0608269096005607, "grad_norm": 0.16367536783218384, "learning_rate": 9.624970102846209e-05, "loss": 0.0592, "step": 14704 }, { "epoch": 2.060967063770147, "grad_norm": 0.2811152935028076, "learning_rate": 9.623535039464242e-05, "loss": 0.0204, "step": 14705 }, { "epoch": 2.0611072179397336, "grad_norm": 0.21813584864139557, "learning_rate": 9.622099976082276e-05, "loss": 0.0209, "step": 14706 }, { "epoch": 2.06124737210932, "grad_norm": 0.09859484434127808, "learning_rate": 9.620664912700309e-05, "loss": 0.0113, "step": 14707 }, { "epoch": 2.061387526278907, "grad_norm": 1.1235848665237427, "learning_rate": 9.619229849318343e-05, "loss": 0.0476, "step": 14708 }, { "epoch": 2.0615276804484934, "grad_norm": 0.2925051748752594, "learning_rate": 9.617794785936379e-05, "loss": 0.0202, "step": 14709 }, { "epoch": 2.06166783461808, "grad_norm": 0.6838991641998291, "learning_rate": 9.616359722554412e-05, "loss": 0.0469, "step": 14710 }, { "epoch": 2.0618079887876664, "grad_norm": 0.32836249470710754, "learning_rate": 9.614924659172446e-05, "loss": 0.0305, "step": 14711 }, { "epoch": 2.061948142957253, "grad_norm": 0.059826552867889404, "learning_rate": 9.61348959579048e-05, "loss": 0.0037, "step": 14712 }, { "epoch": 2.0620882971268397, "grad_norm": 0.7304503917694092, "learning_rate": 9.612054532408513e-05, "loss": 0.0692, "step": 14713 }, { "epoch": 2.062228451296426, "grad_norm": 0.21271809935569763, "learning_rate": 9.610619469026549e-05, "loss": 0.0427, "step": 14714 }, { "epoch": 2.0623686054660126, "grad_norm": 0.15730740129947662, "learning_rate": 9.609184405644582e-05, "loss": 0.0081, "step": 14715 }, { "epoch": 2.062508759635599, "grad_norm": 0.3494764268398285, "learning_rate": 9.607749342262616e-05, "loss": 0.0434, "step": 14716 }, { "epoch": 2.0626489138051856, "grad_norm": 0.6698757410049438, "learning_rate": 9.60631427888065e-05, "loss": 0.0376, "step": 14717 }, { "epoch": 2.0627890679747725, "grad_norm": 0.9941021203994751, "learning_rate": 9.604879215498683e-05, "loss": 0.0556, "step": 14718 }, { "epoch": 2.062929222144359, "grad_norm": 0.3633747696876526, "learning_rate": 9.603444152116717e-05, "loss": 0.0298, "step": 14719 }, { "epoch": 2.0630693763139454, "grad_norm": 0.8286240100860596, "learning_rate": 9.602009088734753e-05, "loss": 0.0695, "step": 14720 }, { "epoch": 2.063209530483532, "grad_norm": 0.37841561436653137, "learning_rate": 9.600574025352785e-05, "loss": 0.0645, "step": 14721 }, { "epoch": 2.0633496846531183, "grad_norm": 0.35046958923339844, "learning_rate": 9.59913896197082e-05, "loss": 0.0805, "step": 14722 }, { "epoch": 2.063489838822705, "grad_norm": 0.16203956305980682, "learning_rate": 9.597703898588853e-05, "loss": 0.0206, "step": 14723 }, { "epoch": 2.0636299929922917, "grad_norm": 0.09663776308298111, "learning_rate": 9.596268835206887e-05, "loss": 0.0134, "step": 14724 }, { "epoch": 2.063770147161878, "grad_norm": 0.2519320249557495, "learning_rate": 9.594833771824922e-05, "loss": 0.0378, "step": 14725 }, { "epoch": 2.0639103013314646, "grad_norm": 0.20961010456085205, "learning_rate": 9.593398708442955e-05, "loss": 0.0233, "step": 14726 }, { "epoch": 2.064050455501051, "grad_norm": 0.18142196536064148, "learning_rate": 9.59196364506099e-05, "loss": 0.0468, "step": 14727 }, { "epoch": 2.0641906096706375, "grad_norm": 0.197112038731575, "learning_rate": 9.590528581679024e-05, "loss": 0.0152, "step": 14728 }, { "epoch": 2.0643307638402244, "grad_norm": 0.3717029094696045, "learning_rate": 9.589093518297056e-05, "loss": 0.0231, "step": 14729 }, { "epoch": 2.064470918009811, "grad_norm": 0.22763249278068542, "learning_rate": 9.587658454915092e-05, "loss": 0.032, "step": 14730 }, { "epoch": 2.0646110721793973, "grad_norm": 0.13348527252674103, "learning_rate": 9.586223391533126e-05, "loss": 0.0242, "step": 14731 }, { "epoch": 2.064751226348984, "grad_norm": 0.34499162435531616, "learning_rate": 9.584788328151159e-05, "loss": 0.0215, "step": 14732 }, { "epoch": 2.0648913805185702, "grad_norm": 0.23842300474643707, "learning_rate": 9.583353264769193e-05, "loss": 0.0812, "step": 14733 }, { "epoch": 2.065031534688157, "grad_norm": 0.07128602266311646, "learning_rate": 9.581918201387226e-05, "loss": 0.0062, "step": 14734 }, { "epoch": 2.0651716888577436, "grad_norm": 0.2558954954147339, "learning_rate": 9.58048313800526e-05, "loss": 0.0076, "step": 14735 }, { "epoch": 2.06531184302733, "grad_norm": 0.10760050266981125, "learning_rate": 9.579048074623296e-05, "loss": 0.0058, "step": 14736 }, { "epoch": 2.0654519971969165, "grad_norm": 0.15660718083381653, "learning_rate": 9.577613011241329e-05, "loss": 0.0352, "step": 14737 }, { "epoch": 2.065592151366503, "grad_norm": 0.1692039519548416, "learning_rate": 9.576177947859363e-05, "loss": 0.0176, "step": 14738 }, { "epoch": 2.06573230553609, "grad_norm": 0.11902142316102982, "learning_rate": 9.574742884477397e-05, "loss": 0.0123, "step": 14739 }, { "epoch": 2.0658724597056763, "grad_norm": 0.10024394094944, "learning_rate": 9.57330782109543e-05, "loss": 0.0124, "step": 14740 }, { "epoch": 2.066012613875263, "grad_norm": 0.08610425889492035, "learning_rate": 9.571872757713466e-05, "loss": 0.0156, "step": 14741 }, { "epoch": 2.0661527680448493, "grad_norm": 0.1398169994354248, "learning_rate": 9.570437694331498e-05, "loss": 0.0291, "step": 14742 }, { "epoch": 2.0662929222144357, "grad_norm": 0.1331656128168106, "learning_rate": 9.569002630949533e-05, "loss": 0.0313, "step": 14743 }, { "epoch": 2.0664330763840226, "grad_norm": 0.25851210951805115, "learning_rate": 9.567567567567567e-05, "loss": 0.0219, "step": 14744 }, { "epoch": 2.066573230553609, "grad_norm": 0.2615129351615906, "learning_rate": 9.5661325041856e-05, "loss": 0.0345, "step": 14745 }, { "epoch": 2.0667133847231955, "grad_norm": 0.14532554149627686, "learning_rate": 9.564697440803635e-05, "loss": 0.0141, "step": 14746 }, { "epoch": 2.066853538892782, "grad_norm": 0.0849614068865776, "learning_rate": 9.56326237742167e-05, "loss": 0.0051, "step": 14747 }, { "epoch": 2.0669936930623685, "grad_norm": 0.4805326759815216, "learning_rate": 9.561827314039702e-05, "loss": 0.0438, "step": 14748 }, { "epoch": 2.067133847231955, "grad_norm": 0.26844966411590576, "learning_rate": 9.560392250657737e-05, "loss": 0.0099, "step": 14749 }, { "epoch": 2.067274001401542, "grad_norm": 0.3314116299152374, "learning_rate": 9.55895718727577e-05, "loss": 0.0184, "step": 14750 }, { "epoch": 2.0674141555711283, "grad_norm": 0.44598299264907837, "learning_rate": 9.557522123893804e-05, "loss": 0.0258, "step": 14751 }, { "epoch": 2.0675543097407147, "grad_norm": 0.15765361487865448, "learning_rate": 9.556087060511839e-05, "loss": 0.0129, "step": 14752 }, { "epoch": 2.067694463910301, "grad_norm": 0.20319585502147675, "learning_rate": 9.554651997129872e-05, "loss": 0.0566, "step": 14753 }, { "epoch": 2.0678346180798877, "grad_norm": 0.1543562114238739, "learning_rate": 9.553216933747906e-05, "loss": 0.02, "step": 14754 }, { "epoch": 2.0679747722494746, "grad_norm": 0.495862752199173, "learning_rate": 9.55178187036594e-05, "loss": 0.0193, "step": 14755 }, { "epoch": 2.068114926419061, "grad_norm": 0.14246585965156555, "learning_rate": 9.550346806983973e-05, "loss": 0.0339, "step": 14756 }, { "epoch": 2.0682550805886475, "grad_norm": 0.23880068957805634, "learning_rate": 9.548911743602009e-05, "loss": 0.0331, "step": 14757 }, { "epoch": 2.068395234758234, "grad_norm": 0.1509748101234436, "learning_rate": 9.547476680220042e-05, "loss": 0.0137, "step": 14758 }, { "epoch": 2.0685353889278204, "grad_norm": 0.01984417624771595, "learning_rate": 9.546041616838076e-05, "loss": 0.002, "step": 14759 }, { "epoch": 2.0686755430974073, "grad_norm": 0.8755910396575928, "learning_rate": 9.54460655345611e-05, "loss": 0.1317, "step": 14760 }, { "epoch": 2.0688156972669938, "grad_norm": 0.5292233824729919, "learning_rate": 9.543171490074143e-05, "loss": 0.1019, "step": 14761 }, { "epoch": 2.0689558514365802, "grad_norm": 0.060510825365781784, "learning_rate": 9.541736426692179e-05, "loss": 0.0026, "step": 14762 }, { "epoch": 2.0690960056061667, "grad_norm": 0.18027576804161072, "learning_rate": 9.540301363310213e-05, "loss": 0.0334, "step": 14763 }, { "epoch": 2.069236159775753, "grad_norm": 0.30132341384887695, "learning_rate": 9.538866299928246e-05, "loss": 0.0309, "step": 14764 }, { "epoch": 2.06937631394534, "grad_norm": 0.5033591985702515, "learning_rate": 9.53743123654628e-05, "loss": 0.1201, "step": 14765 }, { "epoch": 2.0695164681149265, "grad_norm": 0.47116026282310486, "learning_rate": 9.535996173164314e-05, "loss": 0.0153, "step": 14766 }, { "epoch": 2.069656622284513, "grad_norm": 0.03919925540685654, "learning_rate": 9.534561109782347e-05, "loss": 0.0029, "step": 14767 }, { "epoch": 2.0697967764540994, "grad_norm": 0.2626688778400421, "learning_rate": 9.533126046400383e-05, "loss": 0.0083, "step": 14768 }, { "epoch": 2.069936930623686, "grad_norm": 0.8016400933265686, "learning_rate": 9.531690983018415e-05, "loss": 0.0521, "step": 14769 }, { "epoch": 2.070077084793273, "grad_norm": 0.025379212573170662, "learning_rate": 9.53025591963645e-05, "loss": 0.002, "step": 14770 }, { "epoch": 2.0702172389628593, "grad_norm": 0.20608901977539062, "learning_rate": 9.528820856254484e-05, "loss": 0.0235, "step": 14771 }, { "epoch": 2.0703573931324457, "grad_norm": 0.19838140904903412, "learning_rate": 9.527385792872517e-05, "loss": 0.0165, "step": 14772 }, { "epoch": 2.070497547302032, "grad_norm": 0.14500342309474945, "learning_rate": 9.525950729490552e-05, "loss": 0.0181, "step": 14773 }, { "epoch": 2.0706377014716186, "grad_norm": 0.2321852147579193, "learning_rate": 9.524515666108586e-05, "loss": 0.0242, "step": 14774 }, { "epoch": 2.0707778556412055, "grad_norm": 0.17350716888904572, "learning_rate": 9.52308060272662e-05, "loss": 0.0407, "step": 14775 }, { "epoch": 2.070918009810792, "grad_norm": 0.3438485860824585, "learning_rate": 9.521645539344654e-05, "loss": 0.0234, "step": 14776 }, { "epoch": 2.0710581639803785, "grad_norm": 0.18411746621131897, "learning_rate": 9.520210475962686e-05, "loss": 0.0454, "step": 14777 }, { "epoch": 2.071198318149965, "grad_norm": 0.2771705985069275, "learning_rate": 9.518775412580722e-05, "loss": 0.0169, "step": 14778 }, { "epoch": 2.0713384723195514, "grad_norm": 0.061035752296447754, "learning_rate": 9.517340349198756e-05, "loss": 0.0042, "step": 14779 }, { "epoch": 2.071478626489138, "grad_norm": 0.26582252979278564, "learning_rate": 9.515905285816789e-05, "loss": 0.034, "step": 14780 }, { "epoch": 2.0716187806587247, "grad_norm": 0.37238767743110657, "learning_rate": 9.514470222434823e-05, "loss": 0.0659, "step": 14781 }, { "epoch": 2.071758934828311, "grad_norm": 0.07462254911661148, "learning_rate": 9.513035159052857e-05, "loss": 0.0042, "step": 14782 }, { "epoch": 2.0718990889978977, "grad_norm": 0.2020999938249588, "learning_rate": 9.51160009567089e-05, "loss": 0.0062, "step": 14783 }, { "epoch": 2.072039243167484, "grad_norm": 0.20204313099384308, "learning_rate": 9.510165032288926e-05, "loss": 0.0144, "step": 14784 }, { "epoch": 2.0721793973370706, "grad_norm": 0.171986386179924, "learning_rate": 9.508729968906959e-05, "loss": 0.0307, "step": 14785 }, { "epoch": 2.0723195515066575, "grad_norm": 0.32277095317840576, "learning_rate": 9.507294905524993e-05, "loss": 0.0572, "step": 14786 }, { "epoch": 2.072459705676244, "grad_norm": 0.22186830639839172, "learning_rate": 9.505859842143027e-05, "loss": 0.0178, "step": 14787 }, { "epoch": 2.0725998598458304, "grad_norm": 0.22441157698631287, "learning_rate": 9.50442477876106e-05, "loss": 0.0086, "step": 14788 }, { "epoch": 2.072740014015417, "grad_norm": 0.17712929844856262, "learning_rate": 9.502989715379096e-05, "loss": 0.0327, "step": 14789 }, { "epoch": 2.0728801681850033, "grad_norm": 0.22668881714344025, "learning_rate": 9.50155465199713e-05, "loss": 0.0116, "step": 14790 }, { "epoch": 2.07302032235459, "grad_norm": 0.26879608631134033, "learning_rate": 9.500119588615163e-05, "loss": 0.0419, "step": 14791 }, { "epoch": 2.0731604765241767, "grad_norm": 0.1716337651014328, "learning_rate": 9.498684525233197e-05, "loss": 0.0225, "step": 14792 }, { "epoch": 2.073300630693763, "grad_norm": 0.14381234347820282, "learning_rate": 9.49724946185123e-05, "loss": 0.0182, "step": 14793 }, { "epoch": 2.0734407848633496, "grad_norm": 0.11770530790090561, "learning_rate": 9.495814398469265e-05, "loss": 0.0244, "step": 14794 }, { "epoch": 2.073580939032936, "grad_norm": 0.2196437567472458, "learning_rate": 9.4943793350873e-05, "loss": 0.0492, "step": 14795 }, { "epoch": 2.073721093202523, "grad_norm": 0.31392401456832886, "learning_rate": 9.492944271705332e-05, "loss": 0.044, "step": 14796 }, { "epoch": 2.0738612473721094, "grad_norm": 0.4719558656215668, "learning_rate": 9.491509208323367e-05, "loss": 0.0526, "step": 14797 }, { "epoch": 2.074001401541696, "grad_norm": 0.14595955610275269, "learning_rate": 9.490074144941401e-05, "loss": 0.0135, "step": 14798 }, { "epoch": 2.0741415557112823, "grad_norm": 0.09964407235383987, "learning_rate": 9.488639081559435e-05, "loss": 0.0044, "step": 14799 }, { "epoch": 2.074281709880869, "grad_norm": 0.3317849934101105, "learning_rate": 9.487204018177469e-05, "loss": 0.0454, "step": 14800 }, { "epoch": 2.0744218640504557, "grad_norm": 0.3520927131175995, "learning_rate": 9.485768954795502e-05, "loss": 0.02, "step": 14801 }, { "epoch": 2.074562018220042, "grad_norm": 0.18402360379695892, "learning_rate": 9.484333891413536e-05, "loss": 0.0139, "step": 14802 }, { "epoch": 2.0747021723896286, "grad_norm": 0.25146934390068054, "learning_rate": 9.48289882803157e-05, "loss": 0.0447, "step": 14803 }, { "epoch": 2.074842326559215, "grad_norm": 0.7736601233482361, "learning_rate": 9.481463764649603e-05, "loss": 0.0533, "step": 14804 }, { "epoch": 2.0749824807288015, "grad_norm": 0.13212797045707703, "learning_rate": 9.480028701267639e-05, "loss": 0.0405, "step": 14805 }, { "epoch": 2.0751226348983884, "grad_norm": 0.24164605140686035, "learning_rate": 9.478593637885673e-05, "loss": 0.0506, "step": 14806 }, { "epoch": 2.075262789067975, "grad_norm": 0.17525669932365417, "learning_rate": 9.477158574503706e-05, "loss": 0.0293, "step": 14807 }, { "epoch": 2.0754029432375614, "grad_norm": 0.2722138464450836, "learning_rate": 9.47572351112174e-05, "loss": 0.036, "step": 14808 }, { "epoch": 2.075543097407148, "grad_norm": 0.27481138706207275, "learning_rate": 9.474288447739776e-05, "loss": 0.0679, "step": 14809 }, { "epoch": 2.0756832515767343, "grad_norm": 0.16709543764591217, "learning_rate": 9.472853384357809e-05, "loss": 0.0326, "step": 14810 }, { "epoch": 2.0758234057463207, "grad_norm": 0.4637592136859894, "learning_rate": 9.471418320975843e-05, "loss": 0.0382, "step": 14811 }, { "epoch": 2.0759635599159076, "grad_norm": 0.3533687889575958, "learning_rate": 9.469983257593876e-05, "loss": 0.0362, "step": 14812 }, { "epoch": 2.076103714085494, "grad_norm": 0.15624204277992249, "learning_rate": 9.46854819421191e-05, "loss": 0.0531, "step": 14813 }, { "epoch": 2.0762438682550806, "grad_norm": 0.6235769987106323, "learning_rate": 9.467113130829944e-05, "loss": 0.016, "step": 14814 }, { "epoch": 2.076384022424667, "grad_norm": 0.5708448886871338, "learning_rate": 9.465678067447978e-05, "loss": 0.0708, "step": 14815 }, { "epoch": 2.0765241765942535, "grad_norm": 0.8833497762680054, "learning_rate": 9.464243004066013e-05, "loss": 0.1026, "step": 14816 }, { "epoch": 2.0766643307638404, "grad_norm": 0.5276862382888794, "learning_rate": 9.462807940684047e-05, "loss": 0.0206, "step": 14817 }, { "epoch": 2.076804484933427, "grad_norm": 0.05500069260597229, "learning_rate": 9.46137287730208e-05, "loss": 0.0028, "step": 14818 }, { "epoch": 2.0769446391030133, "grad_norm": 1.0682421922683716, "learning_rate": 9.459937813920114e-05, "loss": 0.037, "step": 14819 }, { "epoch": 2.0770847932725998, "grad_norm": 3.1182734966278076, "learning_rate": 9.458502750538147e-05, "loss": 0.0421, "step": 14820 }, { "epoch": 2.077224947442186, "grad_norm": 0.07194377481937408, "learning_rate": 9.457067687156182e-05, "loss": 0.0065, "step": 14821 }, { "epoch": 2.077365101611773, "grad_norm": 0.3269622027873993, "learning_rate": 9.455632623774216e-05, "loss": 0.0164, "step": 14822 }, { "epoch": 2.0775052557813596, "grad_norm": 0.5817004442214966, "learning_rate": 9.454197560392249e-05, "loss": 0.0766, "step": 14823 }, { "epoch": 2.077645409950946, "grad_norm": 0.09402894228696823, "learning_rate": 9.452762497010283e-05, "loss": 0.0181, "step": 14824 }, { "epoch": 2.0777855641205325, "grad_norm": 0.28523150086402893, "learning_rate": 9.451327433628319e-05, "loss": 0.0746, "step": 14825 }, { "epoch": 2.077925718290119, "grad_norm": 0.1748807430267334, "learning_rate": 9.449892370246352e-05, "loss": 0.0121, "step": 14826 }, { "epoch": 2.078065872459706, "grad_norm": 0.0679233968257904, "learning_rate": 9.448457306864386e-05, "loss": 0.0047, "step": 14827 }, { "epoch": 2.0782060266292923, "grad_norm": 0.13750289380550385, "learning_rate": 9.447022243482419e-05, "loss": 0.0448, "step": 14828 }, { "epoch": 2.078346180798879, "grad_norm": 0.4468323290348053, "learning_rate": 9.445587180100453e-05, "loss": 0.0615, "step": 14829 }, { "epoch": 2.0784863349684652, "grad_norm": 0.13197042047977448, "learning_rate": 9.444152116718487e-05, "loss": 0.0156, "step": 14830 }, { "epoch": 2.0786264891380517, "grad_norm": 0.3296675384044647, "learning_rate": 9.442717053336522e-05, "loss": 0.0473, "step": 14831 }, { "epoch": 2.0787666433076386, "grad_norm": 0.24824611842632294, "learning_rate": 9.441281989954556e-05, "loss": 0.0215, "step": 14832 }, { "epoch": 2.078906797477225, "grad_norm": 0.11044174432754517, "learning_rate": 9.43984692657259e-05, "loss": 0.0115, "step": 14833 }, { "epoch": 2.0790469516468115, "grad_norm": 0.34932875633239746, "learning_rate": 9.438411863190623e-05, "loss": 0.0189, "step": 14834 }, { "epoch": 2.079187105816398, "grad_norm": 0.1812528520822525, "learning_rate": 9.436976799808657e-05, "loss": 0.0153, "step": 14835 }, { "epoch": 2.0793272599859844, "grad_norm": 0.35823875665664673, "learning_rate": 9.43554173642669e-05, "loss": 0.0553, "step": 14836 }, { "epoch": 2.0794674141555713, "grad_norm": 0.3172191381454468, "learning_rate": 9.434106673044726e-05, "loss": 0.0371, "step": 14837 }, { "epoch": 2.079607568325158, "grad_norm": 0.46404993534088135, "learning_rate": 9.43267160966276e-05, "loss": 0.0778, "step": 14838 }, { "epoch": 2.0797477224947443, "grad_norm": 0.10121031105518341, "learning_rate": 9.431236546280793e-05, "loss": 0.0078, "step": 14839 }, { "epoch": 2.0798878766643307, "grad_norm": 0.1828089952468872, "learning_rate": 9.429801482898827e-05, "loss": 0.0269, "step": 14840 }, { "epoch": 2.080028030833917, "grad_norm": 0.2937658131122589, "learning_rate": 9.428366419516862e-05, "loss": 0.0156, "step": 14841 }, { "epoch": 2.0801681850035036, "grad_norm": 0.15324236452579498, "learning_rate": 9.426931356134895e-05, "loss": 0.0307, "step": 14842 }, { "epoch": 2.0803083391730905, "grad_norm": 0.06872966140508652, "learning_rate": 9.42549629275293e-05, "loss": 0.0044, "step": 14843 }, { "epoch": 2.080448493342677, "grad_norm": 0.3504643440246582, "learning_rate": 9.424061229370964e-05, "loss": 0.0516, "step": 14844 }, { "epoch": 2.0805886475122635, "grad_norm": 0.370417982339859, "learning_rate": 9.422626165988997e-05, "loss": 0.0282, "step": 14845 }, { "epoch": 2.08072880168185, "grad_norm": 0.444980263710022, "learning_rate": 9.421191102607031e-05, "loss": 0.0259, "step": 14846 }, { "epoch": 2.0808689558514364, "grad_norm": 0.029027914628386497, "learning_rate": 9.419756039225065e-05, "loss": 0.0026, "step": 14847 }, { "epoch": 2.0810091100210233, "grad_norm": 0.16119834780693054, "learning_rate": 9.418320975843099e-05, "loss": 0.0161, "step": 14848 }, { "epoch": 2.0811492641906097, "grad_norm": 0.022371480241417885, "learning_rate": 9.416885912461133e-05, "loss": 0.0026, "step": 14849 }, { "epoch": 2.081289418360196, "grad_norm": 0.6151967644691467, "learning_rate": 9.415450849079166e-05, "loss": 0.0567, "step": 14850 }, { "epoch": 2.0814295725297827, "grad_norm": 0.2203221172094345, "learning_rate": 9.4140157856972e-05, "loss": 0.0163, "step": 14851 }, { "epoch": 2.081569726699369, "grad_norm": 0.16146695613861084, "learning_rate": 9.412580722315236e-05, "loss": 0.0175, "step": 14852 }, { "epoch": 2.081709880868956, "grad_norm": 0.34382033348083496, "learning_rate": 9.411145658933269e-05, "loss": 0.0363, "step": 14853 }, { "epoch": 2.0818500350385425, "grad_norm": 0.14570355415344238, "learning_rate": 9.409710595551303e-05, "loss": 0.0126, "step": 14854 }, { "epoch": 2.081990189208129, "grad_norm": 0.1399497538805008, "learning_rate": 9.408275532169336e-05, "loss": 0.0029, "step": 14855 }, { "epoch": 2.0821303433777154, "grad_norm": 0.7136857509613037, "learning_rate": 9.40684046878737e-05, "loss": 0.1233, "step": 14856 }, { "epoch": 2.082270497547302, "grad_norm": 0.08985535055398941, "learning_rate": 9.405405405405406e-05, "loss": 0.0068, "step": 14857 }, { "epoch": 2.0824106517168888, "grad_norm": 0.7420704364776611, "learning_rate": 9.403970342023439e-05, "loss": 0.0596, "step": 14858 }, { "epoch": 2.0825508058864752, "grad_norm": 0.27444449067115784, "learning_rate": 9.402535278641473e-05, "loss": 0.0115, "step": 14859 }, { "epoch": 2.0826909600560617, "grad_norm": 0.02939487434923649, "learning_rate": 9.401100215259507e-05, "loss": 0.0025, "step": 14860 }, { "epoch": 2.082831114225648, "grad_norm": 0.3931616246700287, "learning_rate": 9.39966515187754e-05, "loss": 0.0309, "step": 14861 }, { "epoch": 2.0829712683952346, "grad_norm": 0.38206520676612854, "learning_rate": 9.398230088495574e-05, "loss": 0.057, "step": 14862 }, { "epoch": 2.0831114225648215, "grad_norm": 0.02849535085260868, "learning_rate": 9.396795025113608e-05, "loss": 0.0031, "step": 14863 }, { "epoch": 2.083251576734408, "grad_norm": 0.1993153840303421, "learning_rate": 9.395359961731642e-05, "loss": 0.0223, "step": 14864 }, { "epoch": 2.0833917309039944, "grad_norm": 0.5830625891685486, "learning_rate": 9.393924898349677e-05, "loss": 0.0331, "step": 14865 }, { "epoch": 2.083531885073581, "grad_norm": 0.39083006978034973, "learning_rate": 9.39248983496771e-05, "loss": 0.0347, "step": 14866 }, { "epoch": 2.0836720392431674, "grad_norm": 0.27150633931159973, "learning_rate": 9.391054771585744e-05, "loss": 0.0439, "step": 14867 }, { "epoch": 2.0838121934127543, "grad_norm": 0.4067094326019287, "learning_rate": 9.389619708203779e-05, "loss": 0.0573, "step": 14868 }, { "epoch": 2.0839523475823407, "grad_norm": 0.35810044407844543, "learning_rate": 9.388184644821812e-05, "loss": 0.0382, "step": 14869 }, { "epoch": 2.084092501751927, "grad_norm": 0.8136069178581238, "learning_rate": 9.386749581439846e-05, "loss": 0.0432, "step": 14870 }, { "epoch": 2.0842326559215136, "grad_norm": 0.06815282255411148, "learning_rate": 9.385314518057879e-05, "loss": 0.0075, "step": 14871 }, { "epoch": 2.0843728100911, "grad_norm": 0.1116667091846466, "learning_rate": 9.383879454675913e-05, "loss": 0.0085, "step": 14872 }, { "epoch": 2.0845129642606866, "grad_norm": 0.24363498389720917, "learning_rate": 9.382444391293949e-05, "loss": 0.0626, "step": 14873 }, { "epoch": 2.0846531184302735, "grad_norm": 0.17404122650623322, "learning_rate": 9.381009327911982e-05, "loss": 0.0119, "step": 14874 }, { "epoch": 2.08479327259986, "grad_norm": 0.25800928473472595, "learning_rate": 9.379574264530016e-05, "loss": 0.0028, "step": 14875 }, { "epoch": 2.0849334267694464, "grad_norm": 0.09687094390392303, "learning_rate": 9.37813920114805e-05, "loss": 0.0252, "step": 14876 }, { "epoch": 2.085073580939033, "grad_norm": 0.1450301706790924, "learning_rate": 9.376704137766083e-05, "loss": 0.0343, "step": 14877 }, { "epoch": 2.0852137351086193, "grad_norm": 0.14033378660678864, "learning_rate": 9.375269074384117e-05, "loss": 0.0139, "step": 14878 }, { "epoch": 2.085353889278206, "grad_norm": 0.19887270033359528, "learning_rate": 9.373834011002153e-05, "loss": 0.0286, "step": 14879 }, { "epoch": 2.0854940434477927, "grad_norm": 0.38860470056533813, "learning_rate": 9.372398947620186e-05, "loss": 0.0248, "step": 14880 }, { "epoch": 2.085634197617379, "grad_norm": 0.20430448651313782, "learning_rate": 9.37096388423822e-05, "loss": 0.0423, "step": 14881 }, { "epoch": 2.0857743517869656, "grad_norm": 0.11464152485132217, "learning_rate": 9.369528820856253e-05, "loss": 0.0208, "step": 14882 }, { "epoch": 2.085914505956552, "grad_norm": 0.06624356657266617, "learning_rate": 9.368093757474287e-05, "loss": 0.0032, "step": 14883 }, { "epoch": 2.086054660126139, "grad_norm": 0.24441885948181152, "learning_rate": 9.366658694092323e-05, "loss": 0.0275, "step": 14884 }, { "epoch": 2.0861948142957254, "grad_norm": 0.4828717112541199, "learning_rate": 9.365223630710355e-05, "loss": 0.1089, "step": 14885 }, { "epoch": 2.086334968465312, "grad_norm": 0.3373447060585022, "learning_rate": 9.36378856732839e-05, "loss": 0.0398, "step": 14886 }, { "epoch": 2.0864751226348983, "grad_norm": 0.23026028275489807, "learning_rate": 9.362353503946424e-05, "loss": 0.0349, "step": 14887 }, { "epoch": 2.0866152768044848, "grad_norm": 0.40679338574409485, "learning_rate": 9.360918440564457e-05, "loss": 0.0214, "step": 14888 }, { "epoch": 2.0867554309740717, "grad_norm": 0.09359004348516464, "learning_rate": 9.359483377182492e-05, "loss": 0.054, "step": 14889 }, { "epoch": 2.086895585143658, "grad_norm": 0.4207221567630768, "learning_rate": 9.358048313800525e-05, "loss": 0.0301, "step": 14890 }, { "epoch": 2.0870357393132446, "grad_norm": 0.056044742465019226, "learning_rate": 9.35661325041856e-05, "loss": 0.0055, "step": 14891 }, { "epoch": 2.087175893482831, "grad_norm": 0.14045846462249756, "learning_rate": 9.355178187036594e-05, "loss": 0.0221, "step": 14892 }, { "epoch": 2.0873160476524175, "grad_norm": 1.7049165964126587, "learning_rate": 9.353743123654626e-05, "loss": 0.0691, "step": 14893 }, { "epoch": 2.087456201822004, "grad_norm": 0.08491361886262894, "learning_rate": 9.35230806027266e-05, "loss": 0.0094, "step": 14894 }, { "epoch": 2.087596355991591, "grad_norm": 0.18862558901309967, "learning_rate": 9.350872996890696e-05, "loss": 0.0523, "step": 14895 }, { "epoch": 2.0877365101611773, "grad_norm": 0.4013764560222626, "learning_rate": 9.349437933508729e-05, "loss": 0.073, "step": 14896 }, { "epoch": 2.087876664330764, "grad_norm": 0.8161226511001587, "learning_rate": 9.348002870126763e-05, "loss": 0.0526, "step": 14897 }, { "epoch": 2.0880168185003503, "grad_norm": 0.24728426337242126, "learning_rate": 9.346567806744796e-05, "loss": 0.0491, "step": 14898 }, { "epoch": 2.0881569726699367, "grad_norm": 0.15324163436889648, "learning_rate": 9.34513274336283e-05, "loss": 0.0472, "step": 14899 }, { "epoch": 2.0882971268395236, "grad_norm": 0.048960451036691666, "learning_rate": 9.343697679980866e-05, "loss": 0.005, "step": 14900 }, { "epoch": 2.08843728100911, "grad_norm": 0.08626089245080948, "learning_rate": 9.342262616598899e-05, "loss": 0.0035, "step": 14901 }, { "epoch": 2.0885774351786965, "grad_norm": 0.31123363971710205, "learning_rate": 9.340827553216933e-05, "loss": 0.0281, "step": 14902 }, { "epoch": 2.088717589348283, "grad_norm": 0.19551149010658264, "learning_rate": 9.339392489834967e-05, "loss": 0.0169, "step": 14903 }, { "epoch": 2.0888577435178695, "grad_norm": 0.19941270351409912, "learning_rate": 9.337957426453e-05, "loss": 0.0093, "step": 14904 }, { "epoch": 2.0889978976874564, "grad_norm": 0.09357432276010513, "learning_rate": 9.336522363071036e-05, "loss": 0.0048, "step": 14905 }, { "epoch": 2.089138051857043, "grad_norm": 0.3881312608718872, "learning_rate": 9.335087299689068e-05, "loss": 0.0264, "step": 14906 }, { "epoch": 2.0892782060266293, "grad_norm": 0.46990731358528137, "learning_rate": 9.333652236307103e-05, "loss": 0.0462, "step": 14907 }, { "epoch": 2.0894183601962157, "grad_norm": 0.09395871311426163, "learning_rate": 9.332217172925137e-05, "loss": 0.0039, "step": 14908 }, { "epoch": 2.089558514365802, "grad_norm": 0.4471248984336853, "learning_rate": 9.33078210954317e-05, "loss": 0.0274, "step": 14909 }, { "epoch": 2.089698668535389, "grad_norm": 0.28431451320648193, "learning_rate": 9.329347046161205e-05, "loss": 0.0214, "step": 14910 }, { "epoch": 2.0898388227049756, "grad_norm": 0.769780695438385, "learning_rate": 9.32791198277924e-05, "loss": 0.0725, "step": 14911 }, { "epoch": 2.089978976874562, "grad_norm": 0.18135784566402435, "learning_rate": 9.326476919397272e-05, "loss": 0.0185, "step": 14912 }, { "epoch": 2.0901191310441485, "grad_norm": 0.20114558935165405, "learning_rate": 9.325041856015307e-05, "loss": 0.008, "step": 14913 }, { "epoch": 2.090259285213735, "grad_norm": 0.5058400630950928, "learning_rate": 9.323606792633341e-05, "loss": 0.0284, "step": 14914 }, { "epoch": 2.090399439383322, "grad_norm": 0.11433473974466324, "learning_rate": 9.322171729251374e-05, "loss": 0.0197, "step": 14915 }, { "epoch": 2.0905395935529083, "grad_norm": 0.22332864999771118, "learning_rate": 9.320736665869409e-05, "loss": 0.0287, "step": 14916 }, { "epoch": 2.0906797477224948, "grad_norm": 0.05541842058300972, "learning_rate": 9.319301602487442e-05, "loss": 0.0062, "step": 14917 }, { "epoch": 2.0908199018920812, "grad_norm": 0.20706160366535187, "learning_rate": 9.317866539105476e-05, "loss": 0.0071, "step": 14918 }, { "epoch": 2.0909600560616677, "grad_norm": 0.7946418523788452, "learning_rate": 9.31643147572351e-05, "loss": 0.2562, "step": 14919 }, { "epoch": 2.0911002102312546, "grad_norm": 0.3528929650783539, "learning_rate": 9.314996412341543e-05, "loss": 0.0479, "step": 14920 }, { "epoch": 2.091240364400841, "grad_norm": 0.5547553896903992, "learning_rate": 9.313561348959579e-05, "loss": 0.0528, "step": 14921 }, { "epoch": 2.0913805185704275, "grad_norm": 0.2563416063785553, "learning_rate": 9.312126285577613e-05, "loss": 0.0476, "step": 14922 }, { "epoch": 2.091520672740014, "grad_norm": 0.12070932239294052, "learning_rate": 9.310691222195646e-05, "loss": 0.0184, "step": 14923 }, { "epoch": 2.0916608269096004, "grad_norm": 0.22918175160884857, "learning_rate": 9.30925615881368e-05, "loss": 0.0347, "step": 14924 }, { "epoch": 2.091800981079187, "grad_norm": 0.4921470880508423, "learning_rate": 9.307821095431713e-05, "loss": 0.0564, "step": 14925 }, { "epoch": 2.091941135248774, "grad_norm": 0.3449842929840088, "learning_rate": 9.306386032049749e-05, "loss": 0.0686, "step": 14926 }, { "epoch": 2.0920812894183602, "grad_norm": 0.4262050688266754, "learning_rate": 9.304950968667783e-05, "loss": 0.0416, "step": 14927 }, { "epoch": 2.0922214435879467, "grad_norm": 0.3482522666454315, "learning_rate": 9.303515905285816e-05, "loss": 0.0443, "step": 14928 }, { "epoch": 2.092361597757533, "grad_norm": 0.35002216696739197, "learning_rate": 9.30208084190385e-05, "loss": 0.0447, "step": 14929 }, { "epoch": 2.0925017519271196, "grad_norm": 0.12243515253067017, "learning_rate": 9.300645778521884e-05, "loss": 0.0067, "step": 14930 }, { "epoch": 2.0926419060967065, "grad_norm": 0.23505719006061554, "learning_rate": 9.299210715139917e-05, "loss": 0.0247, "step": 14931 }, { "epoch": 2.092782060266293, "grad_norm": 0.7883654832839966, "learning_rate": 9.297775651757953e-05, "loss": 0.0863, "step": 14932 }, { "epoch": 2.0929222144358794, "grad_norm": 0.26290085911750793, "learning_rate": 9.296340588375985e-05, "loss": 0.0134, "step": 14933 }, { "epoch": 2.093062368605466, "grad_norm": 0.2892436981201172, "learning_rate": 9.29490552499402e-05, "loss": 0.022, "step": 14934 }, { "epoch": 2.0932025227750524, "grad_norm": 0.25163471698760986, "learning_rate": 9.293470461612054e-05, "loss": 0.0328, "step": 14935 }, { "epoch": 2.0933426769446393, "grad_norm": 0.047421135008335114, "learning_rate": 9.292035398230087e-05, "loss": 0.0041, "step": 14936 }, { "epoch": 2.0934828311142257, "grad_norm": 0.1715579628944397, "learning_rate": 9.290600334848122e-05, "loss": 0.0104, "step": 14937 }, { "epoch": 2.093622985283812, "grad_norm": 0.2589839994907379, "learning_rate": 9.289165271466156e-05, "loss": 0.0371, "step": 14938 }, { "epoch": 2.0937631394533986, "grad_norm": 0.18597784638404846, "learning_rate": 9.287730208084189e-05, "loss": 0.0228, "step": 14939 }, { "epoch": 2.093903293622985, "grad_norm": 0.2406667172908783, "learning_rate": 9.286295144702224e-05, "loss": 0.0158, "step": 14940 }, { "epoch": 2.094043447792572, "grad_norm": 0.05905997008085251, "learning_rate": 9.284860081320256e-05, "loss": 0.0051, "step": 14941 }, { "epoch": 2.0941836019621585, "grad_norm": 0.09497687965631485, "learning_rate": 9.283425017938292e-05, "loss": 0.0112, "step": 14942 }, { "epoch": 2.094323756131745, "grad_norm": 0.11059311777353287, "learning_rate": 9.281989954556326e-05, "loss": 0.0303, "step": 14943 }, { "epoch": 2.0944639103013314, "grad_norm": 0.25810706615448, "learning_rate": 9.280554891174359e-05, "loss": 0.0248, "step": 14944 }, { "epoch": 2.094604064470918, "grad_norm": 0.11542551964521408, "learning_rate": 9.279119827792393e-05, "loss": 0.0104, "step": 14945 }, { "epoch": 2.0947442186405048, "grad_norm": 0.3972245156764984, "learning_rate": 9.277684764410427e-05, "loss": 0.0486, "step": 14946 }, { "epoch": 2.094884372810091, "grad_norm": 0.4474865198135376, "learning_rate": 9.27624970102846e-05, "loss": 0.0626, "step": 14947 }, { "epoch": 2.0950245269796777, "grad_norm": 0.15198349952697754, "learning_rate": 9.274814637646496e-05, "loss": 0.0087, "step": 14948 }, { "epoch": 2.095164681149264, "grad_norm": 0.21712373197078705, "learning_rate": 9.273379574264529e-05, "loss": 0.0449, "step": 14949 }, { "epoch": 2.0953048353188506, "grad_norm": 0.3505094647407532, "learning_rate": 9.271944510882563e-05, "loss": 0.0591, "step": 14950 }, { "epoch": 2.0954449894884375, "grad_norm": 0.21627293527126312, "learning_rate": 9.270509447500597e-05, "loss": 0.0131, "step": 14951 }, { "epoch": 2.095585143658024, "grad_norm": 0.2086072862148285, "learning_rate": 9.26907438411863e-05, "loss": 0.0297, "step": 14952 }, { "epoch": 2.0957252978276104, "grad_norm": 0.04036533087491989, "learning_rate": 9.267639320736666e-05, "loss": 0.0042, "step": 14953 }, { "epoch": 2.095865451997197, "grad_norm": 0.4073203206062317, "learning_rate": 9.2662042573547e-05, "loss": 0.049, "step": 14954 }, { "epoch": 2.0960056061667833, "grad_norm": 0.32072335481643677, "learning_rate": 9.264769193972733e-05, "loss": 0.0179, "step": 14955 }, { "epoch": 2.09614576033637, "grad_norm": 0.21063660085201263, "learning_rate": 9.263334130590767e-05, "loss": 0.0112, "step": 14956 }, { "epoch": 2.0962859145059567, "grad_norm": 0.22807221114635468, "learning_rate": 9.261899067208801e-05, "loss": 0.0194, "step": 14957 }, { "epoch": 2.096426068675543, "grad_norm": 0.20837998390197754, "learning_rate": 9.260464003826835e-05, "loss": 0.0346, "step": 14958 }, { "epoch": 2.0965662228451296, "grad_norm": 0.20721688866615295, "learning_rate": 9.25902894044487e-05, "loss": 0.0247, "step": 14959 }, { "epoch": 2.096706377014716, "grad_norm": 0.18544557690620422, "learning_rate": 9.257593877062902e-05, "loss": 0.015, "step": 14960 }, { "epoch": 2.0968465311843025, "grad_norm": 0.15007969737052917, "learning_rate": 9.256158813680937e-05, "loss": 0.0247, "step": 14961 }, { "epoch": 2.0969866853538894, "grad_norm": 0.5063300728797913, "learning_rate": 9.254723750298971e-05, "loss": 0.03, "step": 14962 }, { "epoch": 2.097126839523476, "grad_norm": 0.12026919424533844, "learning_rate": 9.253288686917004e-05, "loss": 0.0187, "step": 14963 }, { "epoch": 2.0972669936930624, "grad_norm": 0.5496593713760376, "learning_rate": 9.251853623535039e-05, "loss": 0.0387, "step": 14964 }, { "epoch": 2.097407147862649, "grad_norm": 2.127610206604004, "learning_rate": 9.250418560153073e-05, "loss": 0.1238, "step": 14965 }, { "epoch": 2.0975473020322353, "grad_norm": 0.35284027457237244, "learning_rate": 9.248983496771106e-05, "loss": 0.0308, "step": 14966 }, { "epoch": 2.097687456201822, "grad_norm": 0.6448554396629333, "learning_rate": 9.24754843338914e-05, "loss": 0.0573, "step": 14967 }, { "epoch": 2.0978276103714086, "grad_norm": 0.1658954620361328, "learning_rate": 9.246113370007173e-05, "loss": 0.0177, "step": 14968 }, { "epoch": 2.097967764540995, "grad_norm": 0.1602509766817093, "learning_rate": 9.244678306625209e-05, "loss": 0.0134, "step": 14969 }, { "epoch": 2.0981079187105816, "grad_norm": 0.11166302114725113, "learning_rate": 9.243243243243243e-05, "loss": 0.0104, "step": 14970 }, { "epoch": 2.098248072880168, "grad_norm": 0.2130666822195053, "learning_rate": 9.241808179861276e-05, "loss": 0.0302, "step": 14971 }, { "epoch": 2.098388227049755, "grad_norm": 0.22285212576389313, "learning_rate": 9.24037311647931e-05, "loss": 0.0069, "step": 14972 }, { "epoch": 2.0985283812193414, "grad_norm": 0.19388644397258759, "learning_rate": 9.238938053097344e-05, "loss": 0.0305, "step": 14973 }, { "epoch": 2.098668535388928, "grad_norm": 0.471657931804657, "learning_rate": 9.237502989715379e-05, "loss": 0.0308, "step": 14974 }, { "epoch": 2.0988086895585143, "grad_norm": 0.2812153398990631, "learning_rate": 9.236067926333413e-05, "loss": 0.0582, "step": 14975 }, { "epoch": 2.0989488437281008, "grad_norm": 0.37519508600234985, "learning_rate": 9.234632862951446e-05, "loss": 0.0257, "step": 14976 }, { "epoch": 2.0990889978976877, "grad_norm": 0.26630720496177673, "learning_rate": 9.23319779956948e-05, "loss": 0.1327, "step": 14977 }, { "epoch": 2.099229152067274, "grad_norm": 0.17649459838867188, "learning_rate": 9.231762736187514e-05, "loss": 0.0081, "step": 14978 }, { "epoch": 2.0993693062368606, "grad_norm": 0.1197456419467926, "learning_rate": 9.230327672805547e-05, "loss": 0.0103, "step": 14979 }, { "epoch": 2.099509460406447, "grad_norm": 0.3109102249145508, "learning_rate": 9.228892609423582e-05, "loss": 0.0443, "step": 14980 }, { "epoch": 2.0996496145760335, "grad_norm": 0.37403836846351624, "learning_rate": 9.227457546041617e-05, "loss": 0.037, "step": 14981 }, { "epoch": 2.0997897687456204, "grad_norm": 0.15053027868270874, "learning_rate": 9.22602248265965e-05, "loss": 0.0114, "step": 14982 }, { "epoch": 2.099929922915207, "grad_norm": 0.22350580990314484, "learning_rate": 9.224587419277684e-05, "loss": 0.0824, "step": 14983 }, { "epoch": 2.1000700770847933, "grad_norm": 0.23979859054088593, "learning_rate": 9.223152355895717e-05, "loss": 0.0443, "step": 14984 }, { "epoch": 2.10021023125438, "grad_norm": 0.06921961903572083, "learning_rate": 9.221717292513752e-05, "loss": 0.0047, "step": 14985 }, { "epoch": 2.1003503854239662, "grad_norm": 0.5286343693733215, "learning_rate": 9.220282229131786e-05, "loss": 0.0571, "step": 14986 }, { "epoch": 2.1004905395935527, "grad_norm": 0.43653401732444763, "learning_rate": 9.218847165749819e-05, "loss": 0.0439, "step": 14987 }, { "epoch": 2.1006306937631396, "grad_norm": 0.26151660084724426, "learning_rate": 9.217412102367853e-05, "loss": 0.0322, "step": 14988 }, { "epoch": 2.100770847932726, "grad_norm": 0.2586711049079895, "learning_rate": 9.215977038985888e-05, "loss": 0.0626, "step": 14989 }, { "epoch": 2.1009110021023125, "grad_norm": 0.18462680280208588, "learning_rate": 9.214541975603922e-05, "loss": 0.0431, "step": 14990 }, { "epoch": 2.101051156271899, "grad_norm": 0.19644387066364288, "learning_rate": 9.213106912221956e-05, "loss": 0.015, "step": 14991 }, { "epoch": 2.1011913104414854, "grad_norm": 0.16139784455299377, "learning_rate": 9.21167184883999e-05, "loss": 0.0326, "step": 14992 }, { "epoch": 2.1013314646110723, "grad_norm": 0.3132253885269165, "learning_rate": 9.210236785458023e-05, "loss": 0.0495, "step": 14993 }, { "epoch": 2.101471618780659, "grad_norm": 0.16853821277618408, "learning_rate": 9.208801722076057e-05, "loss": 0.0165, "step": 14994 }, { "epoch": 2.1016117729502453, "grad_norm": 0.1404504030942917, "learning_rate": 9.20736665869409e-05, "loss": 0.0313, "step": 14995 }, { "epoch": 2.1017519271198317, "grad_norm": 0.49739542603492737, "learning_rate": 9.205931595312126e-05, "loss": 0.0471, "step": 14996 }, { "epoch": 2.101892081289418, "grad_norm": 1.9007058143615723, "learning_rate": 9.20449653193016e-05, "loss": 0.0628, "step": 14997 }, { "epoch": 2.102032235459005, "grad_norm": 0.13976016640663147, "learning_rate": 9.203061468548193e-05, "loss": 0.0125, "step": 14998 }, { "epoch": 2.1021723896285915, "grad_norm": 0.1881859302520752, "learning_rate": 9.201626405166227e-05, "loss": 0.0152, "step": 14999 }, { "epoch": 2.102312543798178, "grad_norm": 0.3632969260215759, "learning_rate": 9.200191341784263e-05, "loss": 0.0191, "step": 15000 }, { "epoch": 2.1024526979677645, "grad_norm": 0.05354613810777664, "learning_rate": 9.198756278402296e-05, "loss": 0.0047, "step": 15001 }, { "epoch": 2.102592852137351, "grad_norm": 0.47835275530815125, "learning_rate": 9.19732121502033e-05, "loss": 0.0455, "step": 15002 }, { "epoch": 2.102733006306938, "grad_norm": 0.0926915854215622, "learning_rate": 9.195886151638363e-05, "loss": 0.0127, "step": 15003 }, { "epoch": 2.1028731604765243, "grad_norm": 0.3747750222682953, "learning_rate": 9.194451088256397e-05, "loss": 0.0142, "step": 15004 }, { "epoch": 2.1030133146461107, "grad_norm": 0.2441720962524414, "learning_rate": 9.193016024874431e-05, "loss": 0.0454, "step": 15005 }, { "epoch": 2.103153468815697, "grad_norm": 0.22082845866680145, "learning_rate": 9.191580961492465e-05, "loss": 0.0097, "step": 15006 }, { "epoch": 2.1032936229852837, "grad_norm": 0.23242922127246857, "learning_rate": 9.1901458981105e-05, "loss": 0.0357, "step": 15007 }, { "epoch": 2.1034337771548706, "grad_norm": 0.04633573070168495, "learning_rate": 9.188710834728534e-05, "loss": 0.0047, "step": 15008 }, { "epoch": 2.103573931324457, "grad_norm": 0.1291520744562149, "learning_rate": 9.187275771346566e-05, "loss": 0.0128, "step": 15009 }, { "epoch": 2.1037140854940435, "grad_norm": 0.34169691801071167, "learning_rate": 9.185840707964601e-05, "loss": 0.0442, "step": 15010 }, { "epoch": 2.10385423966363, "grad_norm": 0.1102093979716301, "learning_rate": 9.184405644582634e-05, "loss": 0.0195, "step": 15011 }, { "epoch": 2.1039943938332164, "grad_norm": 0.07528748363256454, "learning_rate": 9.182970581200669e-05, "loss": 0.0054, "step": 15012 }, { "epoch": 2.1041345480028033, "grad_norm": 0.28213754296302795, "learning_rate": 9.181535517818703e-05, "loss": 0.0441, "step": 15013 }, { "epoch": 2.1042747021723898, "grad_norm": 0.5728062987327576, "learning_rate": 9.180100454436736e-05, "loss": 0.0743, "step": 15014 }, { "epoch": 2.1044148563419762, "grad_norm": 0.12452620267868042, "learning_rate": 9.17866539105477e-05, "loss": 0.011, "step": 15015 }, { "epoch": 2.1045550105115627, "grad_norm": 0.1997743397951126, "learning_rate": 9.177230327672806e-05, "loss": 0.0293, "step": 15016 }, { "epoch": 2.104695164681149, "grad_norm": 0.06323877722024918, "learning_rate": 9.175795264290839e-05, "loss": 0.0062, "step": 15017 }, { "epoch": 2.1048353188507356, "grad_norm": 0.9490754008293152, "learning_rate": 9.174360200908873e-05, "loss": 0.1493, "step": 15018 }, { "epoch": 2.1049754730203225, "grad_norm": 0.09940283745527267, "learning_rate": 9.172925137526906e-05, "loss": 0.0032, "step": 15019 }, { "epoch": 2.105115627189909, "grad_norm": 0.7202023267745972, "learning_rate": 9.17149007414494e-05, "loss": 0.0641, "step": 15020 }, { "epoch": 2.1052557813594954, "grad_norm": 0.4888520836830139, "learning_rate": 9.170055010762976e-05, "loss": 0.0873, "step": 15021 }, { "epoch": 2.105395935529082, "grad_norm": 0.076435387134552, "learning_rate": 9.168619947381009e-05, "loss": 0.0105, "step": 15022 }, { "epoch": 2.1055360896986683, "grad_norm": 0.1431753784418106, "learning_rate": 9.167184883999043e-05, "loss": 0.0173, "step": 15023 }, { "epoch": 2.1056762438682552, "grad_norm": 0.3891960680484772, "learning_rate": 9.165749820617077e-05, "loss": 0.0149, "step": 15024 }, { "epoch": 2.1058163980378417, "grad_norm": 0.13248807191848755, "learning_rate": 9.16431475723511e-05, "loss": 0.0327, "step": 15025 }, { "epoch": 2.105956552207428, "grad_norm": 0.20844857394695282, "learning_rate": 9.162879693853144e-05, "loss": 0.0522, "step": 15026 }, { "epoch": 2.1060967063770146, "grad_norm": 0.14371460676193237, "learning_rate": 9.16144463047118e-05, "loss": 0.0128, "step": 15027 }, { "epoch": 2.106236860546601, "grad_norm": 0.5110690593719482, "learning_rate": 9.160009567089212e-05, "loss": 0.0422, "step": 15028 }, { "epoch": 2.106377014716188, "grad_norm": 0.5287502408027649, "learning_rate": 9.158574503707247e-05, "loss": 0.0326, "step": 15029 }, { "epoch": 2.1065171688857744, "grad_norm": 0.2070641815662384, "learning_rate": 9.15713944032528e-05, "loss": 0.0427, "step": 15030 }, { "epoch": 2.106657323055361, "grad_norm": 0.2262621521949768, "learning_rate": 9.155704376943314e-05, "loss": 0.0228, "step": 15031 }, { "epoch": 2.1067974772249474, "grad_norm": 0.3216087222099304, "learning_rate": 9.154269313561349e-05, "loss": 0.0424, "step": 15032 }, { "epoch": 2.106937631394534, "grad_norm": 0.33272409439086914, "learning_rate": 9.152834250179382e-05, "loss": 0.0527, "step": 15033 }, { "epoch": 2.1070777855641207, "grad_norm": 0.2307039499282837, "learning_rate": 9.151399186797416e-05, "loss": 0.035, "step": 15034 }, { "epoch": 2.107217939733707, "grad_norm": 0.23774205148220062, "learning_rate": 9.14996412341545e-05, "loss": 0.0088, "step": 15035 }, { "epoch": 2.1073580939032936, "grad_norm": 0.21409811079502106, "learning_rate": 9.148529060033483e-05, "loss": 0.0376, "step": 15036 }, { "epoch": 2.10749824807288, "grad_norm": 0.12241832911968231, "learning_rate": 9.147093996651519e-05, "loss": 0.0375, "step": 15037 }, { "epoch": 2.1076384022424666, "grad_norm": 0.1866355538368225, "learning_rate": 9.145658933269552e-05, "loss": 0.0221, "step": 15038 }, { "epoch": 2.1077785564120535, "grad_norm": 0.30886805057525635, "learning_rate": 9.144223869887586e-05, "loss": 0.0213, "step": 15039 }, { "epoch": 2.10791871058164, "grad_norm": 0.22603009641170502, "learning_rate": 9.14278880650562e-05, "loss": 0.0253, "step": 15040 }, { "epoch": 2.1080588647512264, "grad_norm": 0.23343555629253387, "learning_rate": 9.141353743123653e-05, "loss": 0.0256, "step": 15041 }, { "epoch": 2.108199018920813, "grad_norm": 0.2893286943435669, "learning_rate": 9.139918679741687e-05, "loss": 0.0338, "step": 15042 }, { "epoch": 2.1083391730903993, "grad_norm": 0.21442104876041412, "learning_rate": 9.138483616359723e-05, "loss": 0.0343, "step": 15043 }, { "epoch": 2.108479327259986, "grad_norm": 0.43006017804145813, "learning_rate": 9.137048552977756e-05, "loss": 0.0487, "step": 15044 }, { "epoch": 2.1086194814295727, "grad_norm": 0.18558984994888306, "learning_rate": 9.13561348959579e-05, "loss": 0.0116, "step": 15045 }, { "epoch": 2.108759635599159, "grad_norm": 0.3185671269893646, "learning_rate": 9.134178426213823e-05, "loss": 0.0109, "step": 15046 }, { "epoch": 2.1088997897687456, "grad_norm": 0.13591139018535614, "learning_rate": 9.132743362831857e-05, "loss": 0.0282, "step": 15047 }, { "epoch": 2.109039943938332, "grad_norm": 0.48032113909721375, "learning_rate": 9.131308299449893e-05, "loss": 0.062, "step": 15048 }, { "epoch": 2.1091800981079185, "grad_norm": 0.44369369745254517, "learning_rate": 9.129873236067925e-05, "loss": 0.0596, "step": 15049 }, { "epoch": 2.1093202522775054, "grad_norm": 0.20268294215202332, "learning_rate": 9.12843817268596e-05, "loss": 0.066, "step": 15050 }, { "epoch": 2.109460406447092, "grad_norm": 0.33319535851478577, "learning_rate": 9.127003109303994e-05, "loss": 0.0703, "step": 15051 }, { "epoch": 2.1096005606166783, "grad_norm": 0.0852564200758934, "learning_rate": 9.125568045922027e-05, "loss": 0.0221, "step": 15052 }, { "epoch": 2.109740714786265, "grad_norm": 0.7165330052375793, "learning_rate": 9.124132982540062e-05, "loss": 0.0214, "step": 15053 }, { "epoch": 2.1098808689558513, "grad_norm": 0.3211999833583832, "learning_rate": 9.122697919158095e-05, "loss": 0.0176, "step": 15054 }, { "epoch": 2.110021023125438, "grad_norm": 0.2102336287498474, "learning_rate": 9.12126285577613e-05, "loss": 0.0153, "step": 15055 }, { "epoch": 2.1101611772950246, "grad_norm": 0.28420835733413696, "learning_rate": 9.119827792394164e-05, "loss": 0.0416, "step": 15056 }, { "epoch": 2.110301331464611, "grad_norm": 0.2846992313861847, "learning_rate": 9.118392729012196e-05, "loss": 0.0673, "step": 15057 }, { "epoch": 2.1104414856341975, "grad_norm": 0.12391500174999237, "learning_rate": 9.11695766563023e-05, "loss": 0.009, "step": 15058 }, { "epoch": 2.110581639803784, "grad_norm": 0.16295009851455688, "learning_rate": 9.115522602248266e-05, "loss": 0.0362, "step": 15059 }, { "epoch": 2.110721793973371, "grad_norm": 0.32742732763290405, "learning_rate": 9.114087538866299e-05, "loss": 0.0157, "step": 15060 }, { "epoch": 2.1108619481429574, "grad_norm": 0.24940289556980133, "learning_rate": 9.112652475484333e-05, "loss": 0.0164, "step": 15061 }, { "epoch": 2.111002102312544, "grad_norm": 0.47123095393180847, "learning_rate": 9.111217412102367e-05, "loss": 0.0217, "step": 15062 }, { "epoch": 2.1111422564821303, "grad_norm": 0.0580204539000988, "learning_rate": 9.1097823487204e-05, "loss": 0.0057, "step": 15063 }, { "epoch": 2.1112824106517167, "grad_norm": 0.2690834403038025, "learning_rate": 9.108347285338436e-05, "loss": 0.0394, "step": 15064 }, { "epoch": 2.1114225648213036, "grad_norm": 0.43335792422294617, "learning_rate": 9.106912221956469e-05, "loss": 0.0839, "step": 15065 }, { "epoch": 2.11156271899089, "grad_norm": 0.4414302408695221, "learning_rate": 9.105477158574503e-05, "loss": 0.0187, "step": 15066 }, { "epoch": 2.1117028731604766, "grad_norm": 0.7357553839683533, "learning_rate": 9.104042095192537e-05, "loss": 0.0398, "step": 15067 }, { "epoch": 2.111843027330063, "grad_norm": 0.436136394739151, "learning_rate": 9.10260703181057e-05, "loss": 0.0594, "step": 15068 }, { "epoch": 2.1119831814996495, "grad_norm": 1.2286555767059326, "learning_rate": 9.101171968428606e-05, "loss": 0.0562, "step": 15069 }, { "epoch": 2.112123335669236, "grad_norm": 2.225698232650757, "learning_rate": 9.09973690504664e-05, "loss": 0.1456, "step": 15070 }, { "epoch": 2.112263489838823, "grad_norm": 0.4084653854370117, "learning_rate": 9.098301841664673e-05, "loss": 0.0312, "step": 15071 }, { "epoch": 2.1124036440084093, "grad_norm": 0.09670858085155487, "learning_rate": 9.096866778282707e-05, "loss": 0.0166, "step": 15072 }, { "epoch": 2.1125437981779958, "grad_norm": 0.30542707443237305, "learning_rate": 9.09543171490074e-05, "loss": 0.0362, "step": 15073 }, { "epoch": 2.112683952347582, "grad_norm": 0.1805487424135208, "learning_rate": 9.093996651518774e-05, "loss": 0.0277, "step": 15074 }, { "epoch": 2.1128241065171687, "grad_norm": 0.1850481629371643, "learning_rate": 9.09256158813681e-05, "loss": 0.0099, "step": 15075 }, { "epoch": 2.1129642606867556, "grad_norm": 0.1688794046640396, "learning_rate": 9.091126524754842e-05, "loss": 0.0567, "step": 15076 }, { "epoch": 2.113104414856342, "grad_norm": 0.09091239422559738, "learning_rate": 9.089691461372877e-05, "loss": 0.0226, "step": 15077 }, { "epoch": 2.1132445690259285, "grad_norm": 0.22825609147548676, "learning_rate": 9.088256397990911e-05, "loss": 0.0372, "step": 15078 }, { "epoch": 2.113384723195515, "grad_norm": 0.6003784537315369, "learning_rate": 9.086821334608944e-05, "loss": 0.0778, "step": 15079 }, { "epoch": 2.1135248773651014, "grad_norm": 0.18894918262958527, "learning_rate": 9.085386271226979e-05, "loss": 0.0235, "step": 15080 }, { "epoch": 2.1136650315346883, "grad_norm": 0.5138563513755798, "learning_rate": 9.083951207845012e-05, "loss": 0.0473, "step": 15081 }, { "epoch": 2.113805185704275, "grad_norm": 0.21134959161281586, "learning_rate": 9.082516144463046e-05, "loss": 0.041, "step": 15082 }, { "epoch": 2.1139453398738612, "grad_norm": 0.20066098868846893, "learning_rate": 9.08108108108108e-05, "loss": 0.0197, "step": 15083 }, { "epoch": 2.1140854940434477, "grad_norm": 0.5675198435783386, "learning_rate": 9.079646017699113e-05, "loss": 0.0218, "step": 15084 }, { "epoch": 2.114225648213034, "grad_norm": 0.20292043685913086, "learning_rate": 9.078210954317149e-05, "loss": 0.0226, "step": 15085 }, { "epoch": 2.114365802382621, "grad_norm": 0.14065830409526825, "learning_rate": 9.076775890935183e-05, "loss": 0.0261, "step": 15086 }, { "epoch": 2.1145059565522075, "grad_norm": 0.21567001938819885, "learning_rate": 9.075340827553216e-05, "loss": 0.0085, "step": 15087 }, { "epoch": 2.114646110721794, "grad_norm": 0.28315526247024536, "learning_rate": 9.07390576417125e-05, "loss": 0.071, "step": 15088 }, { "epoch": 2.1147862648913804, "grad_norm": 0.3099398612976074, "learning_rate": 9.072470700789283e-05, "loss": 0.0318, "step": 15089 }, { "epoch": 2.114926419060967, "grad_norm": 0.036153245717287064, "learning_rate": 9.071035637407317e-05, "loss": 0.0038, "step": 15090 }, { "epoch": 2.115066573230554, "grad_norm": 0.23041552305221558, "learning_rate": 9.069600574025353e-05, "loss": 0.0487, "step": 15091 }, { "epoch": 2.1152067274001403, "grad_norm": 0.5008238554000854, "learning_rate": 9.068165510643386e-05, "loss": 0.0296, "step": 15092 }, { "epoch": 2.1153468815697267, "grad_norm": 0.04281735047698021, "learning_rate": 9.06673044726142e-05, "loss": 0.0046, "step": 15093 }, { "epoch": 2.115487035739313, "grad_norm": 0.4874298572540283, "learning_rate": 9.065295383879454e-05, "loss": 0.0169, "step": 15094 }, { "epoch": 2.1156271899088996, "grad_norm": 0.1225438192486763, "learning_rate": 9.063860320497487e-05, "loss": 0.0048, "step": 15095 }, { "epoch": 2.1157673440784865, "grad_norm": 0.16113333404064178, "learning_rate": 9.062425257115523e-05, "loss": 0.0195, "step": 15096 }, { "epoch": 2.115907498248073, "grad_norm": 0.5520235896110535, "learning_rate": 9.060990193733555e-05, "loss": 0.0225, "step": 15097 }, { "epoch": 2.1160476524176595, "grad_norm": 0.9119699001312256, "learning_rate": 9.05955513035159e-05, "loss": 0.0232, "step": 15098 }, { "epoch": 2.116187806587246, "grad_norm": 0.2690444588661194, "learning_rate": 9.058120066969624e-05, "loss": 0.065, "step": 15099 }, { "epoch": 2.1163279607568324, "grad_norm": 0.5533698201179504, "learning_rate": 9.056685003587657e-05, "loss": 0.0323, "step": 15100 }, { "epoch": 2.116468114926419, "grad_norm": 0.49332866072654724, "learning_rate": 9.055249940205692e-05, "loss": 0.039, "step": 15101 }, { "epoch": 2.1166082690960057, "grad_norm": 0.0938776284456253, "learning_rate": 9.053814876823726e-05, "loss": 0.0111, "step": 15102 }, { "epoch": 2.116748423265592, "grad_norm": 0.4235430359840393, "learning_rate": 9.052379813441759e-05, "loss": 0.0745, "step": 15103 }, { "epoch": 2.1168885774351787, "grad_norm": 0.09138573706150055, "learning_rate": 9.050944750059794e-05, "loss": 0.0091, "step": 15104 }, { "epoch": 2.117028731604765, "grad_norm": 0.4912731945514679, "learning_rate": 9.049509686677828e-05, "loss": 0.0201, "step": 15105 }, { "epoch": 2.1171688857743516, "grad_norm": 0.3563465476036072, "learning_rate": 9.04807462329586e-05, "loss": 0.0196, "step": 15106 }, { "epoch": 2.1173090399439385, "grad_norm": 0.053236689418554306, "learning_rate": 9.046639559913896e-05, "loss": 0.0028, "step": 15107 }, { "epoch": 2.117449194113525, "grad_norm": 0.37504300475120544, "learning_rate": 9.045204496531929e-05, "loss": 0.0361, "step": 15108 }, { "epoch": 2.1175893482831114, "grad_norm": 0.14413096010684967, "learning_rate": 9.043769433149963e-05, "loss": 0.0082, "step": 15109 }, { "epoch": 2.117729502452698, "grad_norm": 0.2738886773586273, "learning_rate": 9.042334369767997e-05, "loss": 0.035, "step": 15110 }, { "epoch": 2.1178696566222843, "grad_norm": 0.2167007476091385, "learning_rate": 9.04089930638603e-05, "loss": 0.0231, "step": 15111 }, { "epoch": 2.1180098107918712, "grad_norm": 0.117019884288311, "learning_rate": 9.039464243004066e-05, "loss": 0.0117, "step": 15112 }, { "epoch": 2.1181499649614577, "grad_norm": 0.1578192412853241, "learning_rate": 9.0380291796221e-05, "loss": 0.009, "step": 15113 }, { "epoch": 2.118290119131044, "grad_norm": 0.25520068407058716, "learning_rate": 9.036594116240133e-05, "loss": 0.0287, "step": 15114 }, { "epoch": 2.1184302733006306, "grad_norm": 0.8332095146179199, "learning_rate": 9.035159052858167e-05, "loss": 0.0544, "step": 15115 }, { "epoch": 2.118570427470217, "grad_norm": 0.9073244333267212, "learning_rate": 9.0337239894762e-05, "loss": 0.0597, "step": 15116 }, { "epoch": 2.118710581639804, "grad_norm": 1.3589686155319214, "learning_rate": 9.032288926094236e-05, "loss": 0.0506, "step": 15117 }, { "epoch": 2.1188507358093904, "grad_norm": 0.18314681947231293, "learning_rate": 9.03085386271227e-05, "loss": 0.0026, "step": 15118 }, { "epoch": 2.118990889978977, "grad_norm": 1.2299740314483643, "learning_rate": 9.029418799330303e-05, "loss": 0.0446, "step": 15119 }, { "epoch": 2.1191310441485633, "grad_norm": 3.954184055328369, "learning_rate": 9.027983735948337e-05, "loss": 0.15, "step": 15120 }, { "epoch": 2.11927119831815, "grad_norm": 0.33408287167549133, "learning_rate": 9.026548672566371e-05, "loss": 0.0796, "step": 15121 }, { "epoch": 2.1194113524877367, "grad_norm": 0.23975008726119995, "learning_rate": 9.025113609184404e-05, "loss": 0.034, "step": 15122 }, { "epoch": 2.119551506657323, "grad_norm": 0.24978220462799072, "learning_rate": 9.02367854580244e-05, "loss": 0.0057, "step": 15123 }, { "epoch": 2.1196916608269096, "grad_norm": 0.6574159264564514, "learning_rate": 9.022243482420472e-05, "loss": 0.0353, "step": 15124 }, { "epoch": 2.119831814996496, "grad_norm": 0.1471230834722519, "learning_rate": 9.020808419038507e-05, "loss": 0.0215, "step": 15125 }, { "epoch": 2.1199719691660825, "grad_norm": 0.1987726241350174, "learning_rate": 9.019373355656541e-05, "loss": 0.0405, "step": 15126 }, { "epoch": 2.1201121233356695, "grad_norm": 0.3172414004802704, "learning_rate": 9.017938292274574e-05, "loss": 0.0706, "step": 15127 }, { "epoch": 2.120252277505256, "grad_norm": 0.6078309416770935, "learning_rate": 9.016503228892609e-05, "loss": 0.0444, "step": 15128 }, { "epoch": 2.1203924316748424, "grad_norm": 0.11829888820648193, "learning_rate": 9.015068165510643e-05, "loss": 0.0146, "step": 15129 }, { "epoch": 2.120532585844429, "grad_norm": 0.11326898634433746, "learning_rate": 9.013633102128676e-05, "loss": 0.0081, "step": 15130 }, { "epoch": 2.1206727400140153, "grad_norm": 0.18263469636440277, "learning_rate": 9.01219803874671e-05, "loss": 0.0124, "step": 15131 }, { "epoch": 2.1208128941836017, "grad_norm": 0.21754388511180878, "learning_rate": 9.010762975364743e-05, "loss": 0.0731, "step": 15132 }, { "epoch": 2.1209530483531887, "grad_norm": 0.21932834386825562, "learning_rate": 9.009327911982779e-05, "loss": 0.0151, "step": 15133 }, { "epoch": 2.121093202522775, "grad_norm": 0.5655601620674133, "learning_rate": 9.007892848600813e-05, "loss": 0.0856, "step": 15134 }, { "epoch": 2.1212333566923616, "grad_norm": 0.1506335437297821, "learning_rate": 9.006457785218846e-05, "loss": 0.0145, "step": 15135 }, { "epoch": 2.121373510861948, "grad_norm": 0.38737690448760986, "learning_rate": 9.00502272183688e-05, "loss": 0.0291, "step": 15136 }, { "epoch": 2.1215136650315345, "grad_norm": 0.05948127061128616, "learning_rate": 9.003587658454914e-05, "loss": 0.0034, "step": 15137 }, { "epoch": 2.1216538192011214, "grad_norm": 0.4119667112827301, "learning_rate": 9.002152595072947e-05, "loss": 0.0484, "step": 15138 }, { "epoch": 2.121793973370708, "grad_norm": 0.08403030782938004, "learning_rate": 9.000717531690983e-05, "loss": 0.0144, "step": 15139 }, { "epoch": 2.1219341275402943, "grad_norm": 0.29257088899612427, "learning_rate": 8.999282468309017e-05, "loss": 0.0643, "step": 15140 }, { "epoch": 2.1220742817098808, "grad_norm": 0.40619707107543945, "learning_rate": 8.99784740492705e-05, "loss": 0.0295, "step": 15141 }, { "epoch": 2.1222144358794672, "grad_norm": 0.1887200027704239, "learning_rate": 8.996412341545084e-05, "loss": 0.0379, "step": 15142 }, { "epoch": 2.122354590049054, "grad_norm": 0.7990462779998779, "learning_rate": 8.994977278163117e-05, "loss": 0.0617, "step": 15143 }, { "epoch": 2.1224947442186406, "grad_norm": 0.15633563697338104, "learning_rate": 8.993542214781152e-05, "loss": 0.0071, "step": 15144 }, { "epoch": 2.122634898388227, "grad_norm": 0.34932953119277954, "learning_rate": 8.992107151399187e-05, "loss": 0.0391, "step": 15145 }, { "epoch": 2.1227750525578135, "grad_norm": 0.3217383027076721, "learning_rate": 8.99067208801722e-05, "loss": 0.0109, "step": 15146 }, { "epoch": 2.1229152067274, "grad_norm": 0.07806293666362762, "learning_rate": 8.989237024635254e-05, "loss": 0.045, "step": 15147 }, { "epoch": 2.123055360896987, "grad_norm": 0.5690058469772339, "learning_rate": 8.98780196125329e-05, "loss": 0.0592, "step": 15148 }, { "epoch": 2.1231955150665733, "grad_norm": 0.2966855466365814, "learning_rate": 8.986366897871322e-05, "loss": 0.0454, "step": 15149 }, { "epoch": 2.12333566923616, "grad_norm": 0.11907083541154861, "learning_rate": 8.984931834489356e-05, "loss": 0.0096, "step": 15150 }, { "epoch": 2.1234758234057463, "grad_norm": 0.2653689682483673, "learning_rate": 8.983496771107389e-05, "loss": 0.0471, "step": 15151 }, { "epoch": 2.1236159775753327, "grad_norm": 0.10507310926914215, "learning_rate": 8.982061707725423e-05, "loss": 0.0079, "step": 15152 }, { "epoch": 2.1237561317449196, "grad_norm": 0.27919918298721313, "learning_rate": 8.980626644343458e-05, "loss": 0.0534, "step": 15153 }, { "epoch": 2.123896285914506, "grad_norm": 0.8085997700691223, "learning_rate": 8.97919158096149e-05, "loss": 0.0247, "step": 15154 }, { "epoch": 2.1240364400840925, "grad_norm": 0.15061944723129272, "learning_rate": 8.977756517579526e-05, "loss": 0.0149, "step": 15155 }, { "epoch": 2.124176594253679, "grad_norm": 0.16149917244911194, "learning_rate": 8.97632145419756e-05, "loss": 0.0103, "step": 15156 }, { "epoch": 2.1243167484232655, "grad_norm": 0.5081571340560913, "learning_rate": 8.974886390815593e-05, "loss": 0.0163, "step": 15157 }, { "epoch": 2.1244569025928524, "grad_norm": 0.11678512394428253, "learning_rate": 8.973451327433627e-05, "loss": 0.0106, "step": 15158 }, { "epoch": 2.124597056762439, "grad_norm": 1.1179828643798828, "learning_rate": 8.97201626405166e-05, "loss": 0.0461, "step": 15159 }, { "epoch": 2.1247372109320253, "grad_norm": 0.20063628256320953, "learning_rate": 8.970581200669696e-05, "loss": 0.0157, "step": 15160 }, { "epoch": 2.1248773651016117, "grad_norm": 0.16870984435081482, "learning_rate": 8.96914613728773e-05, "loss": 0.0277, "step": 15161 }, { "epoch": 2.125017519271198, "grad_norm": 0.3032510280609131, "learning_rate": 8.967711073905763e-05, "loss": 0.0578, "step": 15162 }, { "epoch": 2.1251576734407847, "grad_norm": 0.10628536343574524, "learning_rate": 8.966276010523797e-05, "loss": 0.0089, "step": 15163 }, { "epoch": 2.1252978276103716, "grad_norm": 0.08259095251560211, "learning_rate": 8.964840947141833e-05, "loss": 0.0089, "step": 15164 }, { "epoch": 2.125437981779958, "grad_norm": 1.2813059091567993, "learning_rate": 8.963405883759866e-05, "loss": 0.0433, "step": 15165 }, { "epoch": 2.1255781359495445, "grad_norm": 0.190083846449852, "learning_rate": 8.9619708203779e-05, "loss": 0.029, "step": 15166 }, { "epoch": 2.125718290119131, "grad_norm": 1.1281046867370605, "learning_rate": 8.960535756995933e-05, "loss": 0.1216, "step": 15167 }, { "epoch": 2.1258584442887174, "grad_norm": 0.8072153925895691, "learning_rate": 8.959100693613967e-05, "loss": 0.112, "step": 15168 }, { "epoch": 2.1259985984583043, "grad_norm": 0.4067913591861725, "learning_rate": 8.957665630232001e-05, "loss": 0.1216, "step": 15169 }, { "epoch": 2.1261387526278908, "grad_norm": 0.6728463768959045, "learning_rate": 8.956230566850034e-05, "loss": 0.0463, "step": 15170 }, { "epoch": 2.126278906797477, "grad_norm": 0.2107124924659729, "learning_rate": 8.95479550346807e-05, "loss": 0.0496, "step": 15171 }, { "epoch": 2.1264190609670637, "grad_norm": 0.22387650609016418, "learning_rate": 8.953360440086104e-05, "loss": 0.0189, "step": 15172 }, { "epoch": 2.12655921513665, "grad_norm": 0.3885575830936432, "learning_rate": 8.951925376704136e-05, "loss": 0.0483, "step": 15173 }, { "epoch": 2.126699369306237, "grad_norm": 0.3481922447681427, "learning_rate": 8.950490313322171e-05, "loss": 0.0536, "step": 15174 }, { "epoch": 2.1268395234758235, "grad_norm": 0.347377210855484, "learning_rate": 8.949055249940206e-05, "loss": 0.0228, "step": 15175 }, { "epoch": 2.12697967764541, "grad_norm": 0.15950915217399597, "learning_rate": 8.947620186558239e-05, "loss": 0.0302, "step": 15176 }, { "epoch": 2.1271198318149964, "grad_norm": 0.19280372560024261, "learning_rate": 8.946185123176273e-05, "loss": 0.0274, "step": 15177 }, { "epoch": 2.127259985984583, "grad_norm": 0.19810117781162262, "learning_rate": 8.944750059794306e-05, "loss": 0.0305, "step": 15178 }, { "epoch": 2.12740014015417, "grad_norm": 0.24144794046878815, "learning_rate": 8.94331499641234e-05, "loss": 0.0564, "step": 15179 }, { "epoch": 2.1275402943237562, "grad_norm": 0.48928698897361755, "learning_rate": 8.941879933030376e-05, "loss": 0.0456, "step": 15180 }, { "epoch": 2.1276804484933427, "grad_norm": 0.19243639707565308, "learning_rate": 8.940444869648409e-05, "loss": 0.0239, "step": 15181 }, { "epoch": 2.127820602662929, "grad_norm": 0.2557934820652008, "learning_rate": 8.939009806266443e-05, "loss": 0.0271, "step": 15182 }, { "epoch": 2.1279607568325156, "grad_norm": 0.2183423936367035, "learning_rate": 8.937574742884477e-05, "loss": 0.0374, "step": 15183 }, { "epoch": 2.128100911002102, "grad_norm": 0.20925457775592804, "learning_rate": 8.93613967950251e-05, "loss": 0.0196, "step": 15184 }, { "epoch": 2.128241065171689, "grad_norm": 0.25236693024635315, "learning_rate": 8.934704616120544e-05, "loss": 0.0328, "step": 15185 }, { "epoch": 2.1283812193412754, "grad_norm": 0.18731079995632172, "learning_rate": 8.933269552738577e-05, "loss": 0.0138, "step": 15186 }, { "epoch": 2.128521373510862, "grad_norm": 0.16091123223304749, "learning_rate": 8.931834489356613e-05, "loss": 0.0096, "step": 15187 }, { "epoch": 2.1286615276804484, "grad_norm": 0.26221275329589844, "learning_rate": 8.930399425974647e-05, "loss": 0.0563, "step": 15188 }, { "epoch": 2.1288016818500353, "grad_norm": 0.09023147076368332, "learning_rate": 8.92896436259268e-05, "loss": 0.016, "step": 15189 }, { "epoch": 2.1289418360196217, "grad_norm": 0.38218751549720764, "learning_rate": 8.927529299210714e-05, "loss": 0.0449, "step": 15190 }, { "epoch": 2.129081990189208, "grad_norm": 0.34352391958236694, "learning_rate": 8.92609423582875e-05, "loss": 0.0313, "step": 15191 }, { "epoch": 2.1292221443587946, "grad_norm": 0.21111008524894714, "learning_rate": 8.924659172446782e-05, "loss": 0.0317, "step": 15192 }, { "epoch": 2.129362298528381, "grad_norm": 0.2605690360069275, "learning_rate": 8.923224109064817e-05, "loss": 0.0435, "step": 15193 }, { "epoch": 2.1295024526979676, "grad_norm": 0.5971900224685669, "learning_rate": 8.92178904568285e-05, "loss": 0.0203, "step": 15194 }, { "epoch": 2.1296426068675545, "grad_norm": 0.2440394163131714, "learning_rate": 8.920353982300884e-05, "loss": 0.0185, "step": 15195 }, { "epoch": 2.129782761037141, "grad_norm": 0.24238969385623932, "learning_rate": 8.918918918918919e-05, "loss": 0.0547, "step": 15196 }, { "epoch": 2.1299229152067274, "grad_norm": 0.1616920679807663, "learning_rate": 8.917483855536952e-05, "loss": 0.0152, "step": 15197 }, { "epoch": 2.130063069376314, "grad_norm": 0.3849319517612457, "learning_rate": 8.916048792154986e-05, "loss": 0.0604, "step": 15198 }, { "epoch": 2.1302032235459003, "grad_norm": 0.29612505435943604, "learning_rate": 8.91461372877302e-05, "loss": 0.0125, "step": 15199 }, { "epoch": 2.130343377715487, "grad_norm": 0.16791734099388123, "learning_rate": 8.913178665391053e-05, "loss": 0.0335, "step": 15200 }, { "epoch": 2.1304835318850737, "grad_norm": 0.06513180583715439, "learning_rate": 8.911743602009088e-05, "loss": 0.0042, "step": 15201 }, { "epoch": 2.13062368605466, "grad_norm": 0.19055478274822235, "learning_rate": 8.91030853862712e-05, "loss": 0.0128, "step": 15202 }, { "epoch": 2.1307638402242466, "grad_norm": 0.23441746830940247, "learning_rate": 8.908873475245156e-05, "loss": 0.0171, "step": 15203 }, { "epoch": 2.130903994393833, "grad_norm": 0.1360238939523697, "learning_rate": 8.90743841186319e-05, "loss": 0.0111, "step": 15204 }, { "epoch": 2.13104414856342, "grad_norm": 0.43195340037345886, "learning_rate": 8.906003348481223e-05, "loss": 0.0571, "step": 15205 }, { "epoch": 2.1311843027330064, "grad_norm": 0.05040917918086052, "learning_rate": 8.904568285099257e-05, "loss": 0.0034, "step": 15206 }, { "epoch": 2.131324456902593, "grad_norm": 0.18566788733005524, "learning_rate": 8.903133221717293e-05, "loss": 0.0085, "step": 15207 }, { "epoch": 2.1314646110721793, "grad_norm": 0.4247599244117737, "learning_rate": 8.901698158335326e-05, "loss": 0.0326, "step": 15208 }, { "epoch": 2.131604765241766, "grad_norm": 0.44142937660217285, "learning_rate": 8.90026309495336e-05, "loss": 0.0197, "step": 15209 }, { "epoch": 2.1317449194113527, "grad_norm": 0.27340027689933777, "learning_rate": 8.898828031571393e-05, "loss": 0.0215, "step": 15210 }, { "epoch": 2.131885073580939, "grad_norm": 0.13507801294326782, "learning_rate": 8.897392968189427e-05, "loss": 0.0084, "step": 15211 }, { "epoch": 2.1320252277505256, "grad_norm": 0.4074319303035736, "learning_rate": 8.895957904807463e-05, "loss": 0.0418, "step": 15212 }, { "epoch": 2.132165381920112, "grad_norm": 0.3392559885978699, "learning_rate": 8.894522841425495e-05, "loss": 0.0343, "step": 15213 }, { "epoch": 2.1323055360896985, "grad_norm": 0.19033478200435638, "learning_rate": 8.89308777804353e-05, "loss": 0.0117, "step": 15214 }, { "epoch": 2.132445690259285, "grad_norm": 0.4595630466938019, "learning_rate": 8.891652714661564e-05, "loss": 0.0301, "step": 15215 }, { "epoch": 2.132585844428872, "grad_norm": 0.04427970573306084, "learning_rate": 8.890217651279597e-05, "loss": 0.0017, "step": 15216 }, { "epoch": 2.1327259985984584, "grad_norm": 0.5045440196990967, "learning_rate": 8.888782587897631e-05, "loss": 0.0104, "step": 15217 }, { "epoch": 2.132866152768045, "grad_norm": 0.4157669246196747, "learning_rate": 8.887347524515666e-05, "loss": 0.0553, "step": 15218 }, { "epoch": 2.1330063069376313, "grad_norm": 2.893181800842285, "learning_rate": 8.8859124611337e-05, "loss": 0.1899, "step": 15219 }, { "epoch": 2.133146461107218, "grad_norm": 1.4312331676483154, "learning_rate": 8.884477397751734e-05, "loss": 0.0328, "step": 15220 }, { "epoch": 2.1332866152768046, "grad_norm": 0.40497875213623047, "learning_rate": 8.883042334369766e-05, "loss": 0.0109, "step": 15221 }, { "epoch": 2.133426769446391, "grad_norm": 0.3215682804584503, "learning_rate": 8.8816072709878e-05, "loss": 0.0271, "step": 15222 }, { "epoch": 2.1335669236159776, "grad_norm": 0.2847515046596527, "learning_rate": 8.880172207605836e-05, "loss": 0.0351, "step": 15223 }, { "epoch": 2.133707077785564, "grad_norm": 0.11130450665950775, "learning_rate": 8.878737144223869e-05, "loss": 0.0094, "step": 15224 }, { "epoch": 2.1338472319551505, "grad_norm": 0.3143520653247833, "learning_rate": 8.877302080841903e-05, "loss": 0.0302, "step": 15225 }, { "epoch": 2.1339873861247374, "grad_norm": 0.9587177038192749, "learning_rate": 8.875867017459937e-05, "loss": 0.1074, "step": 15226 }, { "epoch": 2.134127540294324, "grad_norm": 0.3057488799095154, "learning_rate": 8.87443195407797e-05, "loss": 0.0134, "step": 15227 }, { "epoch": 2.1342676944639103, "grad_norm": 0.11165688931941986, "learning_rate": 8.872996890696006e-05, "loss": 0.0053, "step": 15228 }, { "epoch": 2.1344078486334968, "grad_norm": 0.29063186049461365, "learning_rate": 8.871561827314039e-05, "loss": 0.0579, "step": 15229 }, { "epoch": 2.134548002803083, "grad_norm": 0.21145744621753693, "learning_rate": 8.870126763932073e-05, "loss": 0.0217, "step": 15230 }, { "epoch": 2.13468815697267, "grad_norm": 0.1780807226896286, "learning_rate": 8.868691700550107e-05, "loss": 0.0136, "step": 15231 }, { "epoch": 2.1348283111422566, "grad_norm": 0.22877776622772217, "learning_rate": 8.86725663716814e-05, "loss": 0.0293, "step": 15232 }, { "epoch": 2.134968465311843, "grad_norm": 0.2548004686832428, "learning_rate": 8.865821573786174e-05, "loss": 0.0552, "step": 15233 }, { "epoch": 2.1351086194814295, "grad_norm": 0.10363692790269852, "learning_rate": 8.86438651040421e-05, "loss": 0.0107, "step": 15234 }, { "epoch": 2.135248773651016, "grad_norm": 0.1902381181716919, "learning_rate": 8.862951447022243e-05, "loss": 0.0241, "step": 15235 }, { "epoch": 2.135388927820603, "grad_norm": 0.1303856521844864, "learning_rate": 8.861516383640277e-05, "loss": 0.0335, "step": 15236 }, { "epoch": 2.1355290819901893, "grad_norm": 0.157906174659729, "learning_rate": 8.86008132025831e-05, "loss": 0.0157, "step": 15237 }, { "epoch": 2.1356692361597758, "grad_norm": 0.03608357161283493, "learning_rate": 8.858646256876344e-05, "loss": 0.0047, "step": 15238 }, { "epoch": 2.1358093903293622, "grad_norm": 0.16708041727542877, "learning_rate": 8.85721119349438e-05, "loss": 0.0197, "step": 15239 }, { "epoch": 2.1359495444989487, "grad_norm": 0.47102996706962585, "learning_rate": 8.855776130112412e-05, "loss": 0.04, "step": 15240 }, { "epoch": 2.1360896986685356, "grad_norm": 0.2384759485721588, "learning_rate": 8.854341066730447e-05, "loss": 0.0538, "step": 15241 }, { "epoch": 2.136229852838122, "grad_norm": 0.06157780811190605, "learning_rate": 8.852906003348481e-05, "loss": 0.0058, "step": 15242 }, { "epoch": 2.1363700070077085, "grad_norm": 0.48096176981925964, "learning_rate": 8.851470939966514e-05, "loss": 0.0614, "step": 15243 }, { "epoch": 2.136510161177295, "grad_norm": 0.26099517941474915, "learning_rate": 8.850035876584549e-05, "loss": 0.0267, "step": 15244 }, { "epoch": 2.1366503153468814, "grad_norm": 0.21684157848358154, "learning_rate": 8.848600813202582e-05, "loss": 0.0143, "step": 15245 }, { "epoch": 2.136790469516468, "grad_norm": 0.3699253499507904, "learning_rate": 8.847165749820616e-05, "loss": 0.0482, "step": 15246 }, { "epoch": 2.136930623686055, "grad_norm": 0.04588789492845535, "learning_rate": 8.84573068643865e-05, "loss": 0.0036, "step": 15247 }, { "epoch": 2.1370707778556413, "grad_norm": 0.2779466509819031, "learning_rate": 8.844295623056683e-05, "loss": 0.0731, "step": 15248 }, { "epoch": 2.1372109320252277, "grad_norm": 0.3055035471916199, "learning_rate": 8.842860559674718e-05, "loss": 0.0313, "step": 15249 }, { "epoch": 2.137351086194814, "grad_norm": 0.22759056091308594, "learning_rate": 8.841425496292753e-05, "loss": 0.0273, "step": 15250 }, { "epoch": 2.1374912403644006, "grad_norm": 0.15930531919002533, "learning_rate": 8.839990432910786e-05, "loss": 0.0248, "step": 15251 }, { "epoch": 2.1376313945339875, "grad_norm": 0.1530465930700302, "learning_rate": 8.83855536952882e-05, "loss": 0.0271, "step": 15252 }, { "epoch": 2.137771548703574, "grad_norm": 0.20927803218364716, "learning_rate": 8.837120306146854e-05, "loss": 0.0224, "step": 15253 }, { "epoch": 2.1379117028731605, "grad_norm": 0.7085014581680298, "learning_rate": 8.835685242764887e-05, "loss": 0.106, "step": 15254 }, { "epoch": 2.138051857042747, "grad_norm": 0.2739074230194092, "learning_rate": 8.834250179382923e-05, "loss": 0.0553, "step": 15255 }, { "epoch": 2.1381920112123334, "grad_norm": 0.632444441318512, "learning_rate": 8.832815116000956e-05, "loss": 0.1238, "step": 15256 }, { "epoch": 2.1383321653819203, "grad_norm": 0.13395020365715027, "learning_rate": 8.83138005261899e-05, "loss": 0.0218, "step": 15257 }, { "epoch": 2.1384723195515067, "grad_norm": 0.14127908647060394, "learning_rate": 8.829944989237024e-05, "loss": 0.0059, "step": 15258 }, { "epoch": 2.138612473721093, "grad_norm": 0.12099134176969528, "learning_rate": 8.828509925855057e-05, "loss": 0.0065, "step": 15259 }, { "epoch": 2.1387526278906797, "grad_norm": 0.3147258162498474, "learning_rate": 8.827074862473093e-05, "loss": 0.0198, "step": 15260 }, { "epoch": 2.138892782060266, "grad_norm": 0.1141904890537262, "learning_rate": 8.825639799091127e-05, "loss": 0.0366, "step": 15261 }, { "epoch": 2.139032936229853, "grad_norm": 0.5908268094062805, "learning_rate": 8.82420473570916e-05, "loss": 0.0356, "step": 15262 }, { "epoch": 2.1391730903994395, "grad_norm": 0.5669394135475159, "learning_rate": 8.822769672327194e-05, "loss": 0.1031, "step": 15263 }, { "epoch": 2.139313244569026, "grad_norm": 0.3832843005657196, "learning_rate": 8.821334608945227e-05, "loss": 0.0256, "step": 15264 }, { "epoch": 2.1394533987386124, "grad_norm": 0.20154747366905212, "learning_rate": 8.819899545563261e-05, "loss": 0.0185, "step": 15265 }, { "epoch": 2.139593552908199, "grad_norm": 0.3565092086791992, "learning_rate": 8.818464482181296e-05, "loss": 0.0158, "step": 15266 }, { "epoch": 2.1397337070777858, "grad_norm": 0.3719462752342224, "learning_rate": 8.817029418799329e-05, "loss": 0.0125, "step": 15267 }, { "epoch": 2.139873861247372, "grad_norm": 0.8984256386756897, "learning_rate": 8.815594355417364e-05, "loss": 0.058, "step": 15268 }, { "epoch": 2.1400140154169587, "grad_norm": 0.5621334314346313, "learning_rate": 8.814159292035398e-05, "loss": 0.0299, "step": 15269 }, { "epoch": 2.140154169586545, "grad_norm": 0.08213845640420914, "learning_rate": 8.81272422865343e-05, "loss": 0.0057, "step": 15270 }, { "epoch": 2.1402943237561316, "grad_norm": 0.32144880294799805, "learning_rate": 8.811289165271466e-05, "loss": 0.055, "step": 15271 }, { "epoch": 2.1404344779257185, "grad_norm": 0.28303712606430054, "learning_rate": 8.809854101889499e-05, "loss": 0.035, "step": 15272 }, { "epoch": 2.140574632095305, "grad_norm": 0.22838906943798065, "learning_rate": 8.808419038507533e-05, "loss": 0.0313, "step": 15273 }, { "epoch": 2.1407147862648914, "grad_norm": 0.18559999763965607, "learning_rate": 8.806983975125567e-05, "loss": 0.0391, "step": 15274 }, { "epoch": 2.140854940434478, "grad_norm": 0.41581666469573975, "learning_rate": 8.8055489117436e-05, "loss": 0.0709, "step": 15275 }, { "epoch": 2.1409950946040643, "grad_norm": 0.42859184741973877, "learning_rate": 8.804113848361636e-05, "loss": 0.0133, "step": 15276 }, { "epoch": 2.141135248773651, "grad_norm": 0.6941805481910706, "learning_rate": 8.80267878497967e-05, "loss": 0.0725, "step": 15277 }, { "epoch": 2.1412754029432377, "grad_norm": 0.35639214515686035, "learning_rate": 8.801243721597703e-05, "loss": 0.0428, "step": 15278 }, { "epoch": 2.141415557112824, "grad_norm": 0.141020268201828, "learning_rate": 8.799808658215737e-05, "loss": 0.0185, "step": 15279 }, { "epoch": 2.1415557112824106, "grad_norm": 0.29653400182724, "learning_rate": 8.79837359483377e-05, "loss": 0.034, "step": 15280 }, { "epoch": 2.141695865451997, "grad_norm": 0.1827317178249359, "learning_rate": 8.796938531451804e-05, "loss": 0.0265, "step": 15281 }, { "epoch": 2.1418360196215835, "grad_norm": 0.4067576229572296, "learning_rate": 8.79550346806984e-05, "loss": 0.0295, "step": 15282 }, { "epoch": 2.1419761737911704, "grad_norm": 0.2345951795578003, "learning_rate": 8.794068404687873e-05, "loss": 0.0674, "step": 15283 }, { "epoch": 2.142116327960757, "grad_norm": 0.11867791414260864, "learning_rate": 8.792633341305907e-05, "loss": 0.0168, "step": 15284 }, { "epoch": 2.1422564821303434, "grad_norm": 0.24366700649261475, "learning_rate": 8.791198277923941e-05, "loss": 0.0286, "step": 15285 }, { "epoch": 2.14239663629993, "grad_norm": 0.10818212479352951, "learning_rate": 8.789763214541974e-05, "loss": 0.0088, "step": 15286 }, { "epoch": 2.1425367904695163, "grad_norm": 0.339449018239975, "learning_rate": 8.78832815116001e-05, "loss": 0.0168, "step": 15287 }, { "epoch": 2.142676944639103, "grad_norm": 0.23022769391536713, "learning_rate": 8.786893087778044e-05, "loss": 0.0418, "step": 15288 }, { "epoch": 2.1428170988086896, "grad_norm": 0.3154343068599701, "learning_rate": 8.785458024396077e-05, "loss": 0.0152, "step": 15289 }, { "epoch": 2.142957252978276, "grad_norm": 0.08846689760684967, "learning_rate": 8.784022961014111e-05, "loss": 0.0072, "step": 15290 }, { "epoch": 2.1430974071478626, "grad_norm": 0.3567538559436798, "learning_rate": 8.782587897632144e-05, "loss": 0.0251, "step": 15291 }, { "epoch": 2.143237561317449, "grad_norm": 0.09792426228523254, "learning_rate": 8.781152834250179e-05, "loss": 0.0089, "step": 15292 }, { "epoch": 2.143377715487036, "grad_norm": 0.5313330292701721, "learning_rate": 8.779717770868213e-05, "loss": 0.0109, "step": 15293 }, { "epoch": 2.1435178696566224, "grad_norm": 0.506056547164917, "learning_rate": 8.778282707486246e-05, "loss": 0.0285, "step": 15294 }, { "epoch": 2.143658023826209, "grad_norm": 0.2221793234348297, "learning_rate": 8.77684764410428e-05, "loss": 0.0301, "step": 15295 }, { "epoch": 2.1437981779957953, "grad_norm": 0.2336333990097046, "learning_rate": 8.775412580722315e-05, "loss": 0.0284, "step": 15296 }, { "epoch": 2.1439383321653818, "grad_norm": 0.4865369498729706, "learning_rate": 8.773977517340348e-05, "loss": 0.0342, "step": 15297 }, { "epoch": 2.1440784863349687, "grad_norm": 0.13347235321998596, "learning_rate": 8.772542453958383e-05, "loss": 0.0124, "step": 15298 }, { "epoch": 2.144218640504555, "grad_norm": 0.12443598359823227, "learning_rate": 8.771107390576416e-05, "loss": 0.0133, "step": 15299 }, { "epoch": 2.1443587946741416, "grad_norm": 0.1774871051311493, "learning_rate": 8.76967232719445e-05, "loss": 0.0181, "step": 15300 }, { "epoch": 2.144498948843728, "grad_norm": 0.17956632375717163, "learning_rate": 8.768237263812484e-05, "loss": 0.02, "step": 15301 }, { "epoch": 2.1446391030133145, "grad_norm": 0.35058730840682983, "learning_rate": 8.766802200430517e-05, "loss": 0.0416, "step": 15302 }, { "epoch": 2.1447792571829014, "grad_norm": 0.07622545212507248, "learning_rate": 8.765367137048553e-05, "loss": 0.0069, "step": 15303 }, { "epoch": 2.144919411352488, "grad_norm": 0.5079339146614075, "learning_rate": 8.763932073666587e-05, "loss": 0.0464, "step": 15304 }, { "epoch": 2.1450595655220743, "grad_norm": 0.3639276623725891, "learning_rate": 8.76249701028462e-05, "loss": 0.0487, "step": 15305 }, { "epoch": 2.145199719691661, "grad_norm": 0.07134179025888443, "learning_rate": 8.761061946902654e-05, "loss": 0.0073, "step": 15306 }, { "epoch": 2.1453398738612472, "grad_norm": 0.15557405352592468, "learning_rate": 8.759626883520687e-05, "loss": 0.0283, "step": 15307 }, { "epoch": 2.1454800280308337, "grad_norm": 0.26771560311317444, "learning_rate": 8.758191820138722e-05, "loss": 0.0107, "step": 15308 }, { "epoch": 2.1456201822004206, "grad_norm": 0.3347119092941284, "learning_rate": 8.756756756756757e-05, "loss": 0.0366, "step": 15309 }, { "epoch": 2.145760336370007, "grad_norm": 0.7270070314407349, "learning_rate": 8.75532169337479e-05, "loss": 0.0109, "step": 15310 }, { "epoch": 2.1459004905395935, "grad_norm": 0.2638106644153595, "learning_rate": 8.753886629992824e-05, "loss": 0.0114, "step": 15311 }, { "epoch": 2.14604064470918, "grad_norm": 0.19569070637226105, "learning_rate": 8.752451566610858e-05, "loss": 0.0203, "step": 15312 }, { "epoch": 2.1461807988787664, "grad_norm": 0.3968261480331421, "learning_rate": 8.751016503228891e-05, "loss": 0.0219, "step": 15313 }, { "epoch": 2.1463209530483534, "grad_norm": 0.08780604600906372, "learning_rate": 8.749581439846926e-05, "loss": 0.0289, "step": 15314 }, { "epoch": 2.14646110721794, "grad_norm": 0.24469269812107086, "learning_rate": 8.748146376464959e-05, "loss": 0.0381, "step": 15315 }, { "epoch": 2.1466012613875263, "grad_norm": 0.8661059737205505, "learning_rate": 8.746711313082993e-05, "loss": 0.0577, "step": 15316 }, { "epoch": 2.1467414155571127, "grad_norm": 0.166100412607193, "learning_rate": 8.745276249701028e-05, "loss": 0.0089, "step": 15317 }, { "epoch": 2.146881569726699, "grad_norm": 0.39557480812072754, "learning_rate": 8.74384118631906e-05, "loss": 0.0062, "step": 15318 }, { "epoch": 2.147021723896286, "grad_norm": 1.7046048641204834, "learning_rate": 8.742406122937096e-05, "loss": 0.0849, "step": 15319 }, { "epoch": 2.1471618780658726, "grad_norm": 1.7429933547973633, "learning_rate": 8.74097105955513e-05, "loss": 0.2424, "step": 15320 }, { "epoch": 2.147302032235459, "grad_norm": 0.08225884288549423, "learning_rate": 8.739535996173163e-05, "loss": 0.0109, "step": 15321 }, { "epoch": 2.1474421864050455, "grad_norm": 0.29446908831596375, "learning_rate": 8.738100932791197e-05, "loss": 0.0648, "step": 15322 }, { "epoch": 2.147582340574632, "grad_norm": 0.13351160287857056, "learning_rate": 8.736665869409233e-05, "loss": 0.0096, "step": 15323 }, { "epoch": 2.147722494744219, "grad_norm": 0.14831215143203735, "learning_rate": 8.735230806027266e-05, "loss": 0.0148, "step": 15324 }, { "epoch": 2.1478626489138053, "grad_norm": 0.45169252157211304, "learning_rate": 8.7337957426453e-05, "loss": 0.1339, "step": 15325 }, { "epoch": 2.1480028030833918, "grad_norm": 0.11727157235145569, "learning_rate": 8.732360679263333e-05, "loss": 0.0115, "step": 15326 }, { "epoch": 2.148142957252978, "grad_norm": 0.09567470848560333, "learning_rate": 8.730925615881367e-05, "loss": 0.0072, "step": 15327 }, { "epoch": 2.1482831114225647, "grad_norm": 0.3257153034210205, "learning_rate": 8.729490552499401e-05, "loss": 0.0163, "step": 15328 }, { "epoch": 2.148423265592151, "grad_norm": 0.12457440793514252, "learning_rate": 8.728055489117434e-05, "loss": 0.0082, "step": 15329 }, { "epoch": 2.148563419761738, "grad_norm": 0.34792935848236084, "learning_rate": 8.72662042573547e-05, "loss": 0.0743, "step": 15330 }, { "epoch": 2.1487035739313245, "grad_norm": 0.16119292378425598, "learning_rate": 8.725185362353504e-05, "loss": 0.0182, "step": 15331 }, { "epoch": 2.148843728100911, "grad_norm": 0.24097059667110443, "learning_rate": 8.723750298971537e-05, "loss": 0.0262, "step": 15332 }, { "epoch": 2.1489838822704974, "grad_norm": 0.4506450295448303, "learning_rate": 8.722315235589571e-05, "loss": 0.042, "step": 15333 }, { "epoch": 2.1491240364400843, "grad_norm": 0.03821880370378494, "learning_rate": 8.720880172207604e-05, "loss": 0.0029, "step": 15334 }, { "epoch": 2.1492641906096708, "grad_norm": 0.26305267214775085, "learning_rate": 8.71944510882564e-05, "loss": 0.0341, "step": 15335 }, { "epoch": 2.1494043447792572, "grad_norm": 0.21345479786396027, "learning_rate": 8.718010045443674e-05, "loss": 0.014, "step": 15336 }, { "epoch": 2.1495444989488437, "grad_norm": 0.26840320229530334, "learning_rate": 8.716574982061706e-05, "loss": 0.0426, "step": 15337 }, { "epoch": 2.14968465311843, "grad_norm": 1.1672320365905762, "learning_rate": 8.715139918679741e-05, "loss": 0.0295, "step": 15338 }, { "epoch": 2.1498248072880166, "grad_norm": 0.2941757142543793, "learning_rate": 8.713704855297776e-05, "loss": 0.0188, "step": 15339 }, { "epoch": 2.1499649614576035, "grad_norm": 0.19164547324180603, "learning_rate": 8.712269791915809e-05, "loss": 0.0158, "step": 15340 }, { "epoch": 2.15010511562719, "grad_norm": 0.314678817987442, "learning_rate": 8.710834728533843e-05, "loss": 0.0448, "step": 15341 }, { "epoch": 2.1502452697967764, "grad_norm": 0.13238593935966492, "learning_rate": 8.709399665151876e-05, "loss": 0.0263, "step": 15342 }, { "epoch": 2.150385423966363, "grad_norm": 0.0877959132194519, "learning_rate": 8.70796460176991e-05, "loss": 0.0051, "step": 15343 }, { "epoch": 2.1505255781359494, "grad_norm": 0.18056292831897736, "learning_rate": 8.706529538387945e-05, "loss": 0.0102, "step": 15344 }, { "epoch": 2.1506657323055363, "grad_norm": 0.14252761006355286, "learning_rate": 8.705094475005977e-05, "loss": 0.0338, "step": 15345 }, { "epoch": 2.1508058864751227, "grad_norm": 0.43279486894607544, "learning_rate": 8.703659411624013e-05, "loss": 0.0356, "step": 15346 }, { "epoch": 2.150946040644709, "grad_norm": 0.1459585577249527, "learning_rate": 8.702224348242047e-05, "loss": 0.0161, "step": 15347 }, { "epoch": 2.1510861948142956, "grad_norm": 0.31226831674575806, "learning_rate": 8.70078928486008e-05, "loss": 0.0317, "step": 15348 }, { "epoch": 2.151226348983882, "grad_norm": 0.2940607964992523, "learning_rate": 8.699354221478114e-05, "loss": 0.0328, "step": 15349 }, { "epoch": 2.151366503153469, "grad_norm": 0.07008252292871475, "learning_rate": 8.697919158096147e-05, "loss": 0.0049, "step": 15350 }, { "epoch": 2.1515066573230555, "grad_norm": 0.19552765786647797, "learning_rate": 8.696484094714183e-05, "loss": 0.0345, "step": 15351 }, { "epoch": 2.151646811492642, "grad_norm": 0.08969911187887192, "learning_rate": 8.695049031332217e-05, "loss": 0.0077, "step": 15352 }, { "epoch": 2.1517869656622284, "grad_norm": 0.10951448231935501, "learning_rate": 8.69361396795025e-05, "loss": 0.0096, "step": 15353 }, { "epoch": 2.151927119831815, "grad_norm": 0.14570145308971405, "learning_rate": 8.692178904568284e-05, "loss": 0.0361, "step": 15354 }, { "epoch": 2.1520672740014017, "grad_norm": 0.253081351518631, "learning_rate": 8.69074384118632e-05, "loss": 0.0231, "step": 15355 }, { "epoch": 2.152207428170988, "grad_norm": 0.16390496492385864, "learning_rate": 8.689308777804352e-05, "loss": 0.0126, "step": 15356 }, { "epoch": 2.1523475823405747, "grad_norm": 0.07381082326173782, "learning_rate": 8.687873714422387e-05, "loss": 0.0136, "step": 15357 }, { "epoch": 2.152487736510161, "grad_norm": 0.22054754197597504, "learning_rate": 8.68643865104042e-05, "loss": 0.0166, "step": 15358 }, { "epoch": 2.1526278906797476, "grad_norm": 0.8149431347846985, "learning_rate": 8.685003587658454e-05, "loss": 0.0539, "step": 15359 }, { "epoch": 2.152768044849334, "grad_norm": 0.356865257024765, "learning_rate": 8.683568524276488e-05, "loss": 0.0333, "step": 15360 }, { "epoch": 2.152908199018921, "grad_norm": 0.1602364480495453, "learning_rate": 8.682133460894522e-05, "loss": 0.0114, "step": 15361 }, { "epoch": 2.1530483531885074, "grad_norm": 0.17978474497795105, "learning_rate": 8.680698397512556e-05, "loss": 0.0082, "step": 15362 }, { "epoch": 2.153188507358094, "grad_norm": 0.22223439812660217, "learning_rate": 8.67926333413059e-05, "loss": 0.055, "step": 15363 }, { "epoch": 2.1533286615276803, "grad_norm": 0.3857043385505676, "learning_rate": 8.677828270748623e-05, "loss": 0.0196, "step": 15364 }, { "epoch": 2.1534688156972672, "grad_norm": 0.2708699703216553, "learning_rate": 8.676393207366658e-05, "loss": 0.0055, "step": 15365 }, { "epoch": 2.1536089698668537, "grad_norm": 0.9954331517219543, "learning_rate": 8.674958143984693e-05, "loss": 0.1388, "step": 15366 }, { "epoch": 2.15374912403644, "grad_norm": 0.49957525730133057, "learning_rate": 8.673523080602726e-05, "loss": 0.0211, "step": 15367 }, { "epoch": 2.1538892782060266, "grad_norm": 0.19946284592151642, "learning_rate": 8.67208801722076e-05, "loss": 0.0122, "step": 15368 }, { "epoch": 2.154029432375613, "grad_norm": 0.13076874613761902, "learning_rate": 8.670652953838793e-05, "loss": 0.0041, "step": 15369 }, { "epoch": 2.1541695865451995, "grad_norm": 0.3796508312225342, "learning_rate": 8.669217890456827e-05, "loss": 0.0102, "step": 15370 }, { "epoch": 2.1543097407147864, "grad_norm": 0.13304169476032257, "learning_rate": 8.667782827074863e-05, "loss": 0.0183, "step": 15371 }, { "epoch": 2.154449894884373, "grad_norm": 0.21116164326667786, "learning_rate": 8.666347763692896e-05, "loss": 0.0166, "step": 15372 }, { "epoch": 2.1545900490539593, "grad_norm": 0.12565453350543976, "learning_rate": 8.66491270031093e-05, "loss": 0.0197, "step": 15373 }, { "epoch": 2.154730203223546, "grad_norm": 0.23317596316337585, "learning_rate": 8.663477636928964e-05, "loss": 0.0178, "step": 15374 }, { "epoch": 2.1548703573931323, "grad_norm": 0.16248784959316254, "learning_rate": 8.662042573546997e-05, "loss": 0.0247, "step": 15375 }, { "epoch": 2.155010511562719, "grad_norm": 0.1887175291776657, "learning_rate": 8.660607510165031e-05, "loss": 0.0347, "step": 15376 }, { "epoch": 2.1551506657323056, "grad_norm": 0.2740532457828522, "learning_rate": 8.659172446783065e-05, "loss": 0.0208, "step": 15377 }, { "epoch": 2.155290819901892, "grad_norm": 0.19722363352775574, "learning_rate": 8.6577373834011e-05, "loss": 0.0154, "step": 15378 }, { "epoch": 2.1554309740714785, "grad_norm": 0.98005211353302, "learning_rate": 8.656302320019134e-05, "loss": 0.0789, "step": 15379 }, { "epoch": 2.155571128241065, "grad_norm": 0.1699998825788498, "learning_rate": 8.654867256637167e-05, "loss": 0.0195, "step": 15380 }, { "epoch": 2.155711282410652, "grad_norm": 0.11828425526618958, "learning_rate": 8.653432193255201e-05, "loss": 0.0123, "step": 15381 }, { "epoch": 2.1558514365802384, "grad_norm": 0.1831325888633728, "learning_rate": 8.651997129873236e-05, "loss": 0.0145, "step": 15382 }, { "epoch": 2.155991590749825, "grad_norm": 0.11076951771974564, "learning_rate": 8.65056206649127e-05, "loss": 0.0104, "step": 15383 }, { "epoch": 2.1561317449194113, "grad_norm": 0.07718174904584885, "learning_rate": 8.649127003109304e-05, "loss": 0.0141, "step": 15384 }, { "epoch": 2.1562718990889977, "grad_norm": 0.07783882319927216, "learning_rate": 8.647691939727336e-05, "loss": 0.0051, "step": 15385 }, { "epoch": 2.1564120532585846, "grad_norm": 0.23227113485336304, "learning_rate": 8.64625687634537e-05, "loss": 0.0461, "step": 15386 }, { "epoch": 2.156552207428171, "grad_norm": 0.808319628238678, "learning_rate": 8.644821812963406e-05, "loss": 0.0329, "step": 15387 }, { "epoch": 2.1566923615977576, "grad_norm": 0.1746961921453476, "learning_rate": 8.643386749581439e-05, "loss": 0.0596, "step": 15388 }, { "epoch": 2.156832515767344, "grad_norm": 0.17402112483978271, "learning_rate": 8.641951686199473e-05, "loss": 0.0674, "step": 15389 }, { "epoch": 2.1569726699369305, "grad_norm": 0.28075429797172546, "learning_rate": 8.640516622817507e-05, "loss": 0.0364, "step": 15390 }, { "epoch": 2.157112824106517, "grad_norm": 0.05913713946938515, "learning_rate": 8.63908155943554e-05, "loss": 0.0186, "step": 15391 }, { "epoch": 2.157252978276104, "grad_norm": 0.3385704457759857, "learning_rate": 8.637646496053575e-05, "loss": 0.0693, "step": 15392 }, { "epoch": 2.1573931324456903, "grad_norm": 0.19813212752342224, "learning_rate": 8.636211432671609e-05, "loss": 0.03, "step": 15393 }, { "epoch": 2.1575332866152768, "grad_norm": 0.37917935848236084, "learning_rate": 8.634776369289643e-05, "loss": 0.054, "step": 15394 }, { "epoch": 2.1576734407848632, "grad_norm": 0.12380579113960266, "learning_rate": 8.633341305907677e-05, "loss": 0.0077, "step": 15395 }, { "epoch": 2.1578135949544497, "grad_norm": 0.4453830122947693, "learning_rate": 8.63190624252571e-05, "loss": 0.0278, "step": 15396 }, { "epoch": 2.1579537491240366, "grad_norm": 0.5230569243431091, "learning_rate": 8.630471179143744e-05, "loss": 0.0948, "step": 15397 }, { "epoch": 2.158093903293623, "grad_norm": 0.06244026869535446, "learning_rate": 8.62903611576178e-05, "loss": 0.0061, "step": 15398 }, { "epoch": 2.1582340574632095, "grad_norm": 0.16651076078414917, "learning_rate": 8.627601052379813e-05, "loss": 0.0118, "step": 15399 }, { "epoch": 2.158374211632796, "grad_norm": 0.1521030217409134, "learning_rate": 8.626165988997847e-05, "loss": 0.011, "step": 15400 }, { "epoch": 2.1585143658023824, "grad_norm": 0.27640479803085327, "learning_rate": 8.624730925615881e-05, "loss": 0.0716, "step": 15401 }, { "epoch": 2.1586545199719693, "grad_norm": 0.08490371704101562, "learning_rate": 8.623295862233914e-05, "loss": 0.0097, "step": 15402 }, { "epoch": 2.158794674141556, "grad_norm": 0.22089050710201263, "learning_rate": 8.62186079885195e-05, "loss": 0.0102, "step": 15403 }, { "epoch": 2.1589348283111423, "grad_norm": 0.24452009797096252, "learning_rate": 8.620425735469982e-05, "loss": 0.0395, "step": 15404 }, { "epoch": 2.1590749824807287, "grad_norm": 0.23416975140571594, "learning_rate": 8.618990672088017e-05, "loss": 0.0421, "step": 15405 }, { "epoch": 2.159215136650315, "grad_norm": 0.4060458242893219, "learning_rate": 8.617555608706051e-05, "loss": 0.029, "step": 15406 }, { "epoch": 2.159355290819902, "grad_norm": 0.10219886898994446, "learning_rate": 8.616120545324084e-05, "loss": 0.0052, "step": 15407 }, { "epoch": 2.1594954449894885, "grad_norm": 0.2730812430381775, "learning_rate": 8.614685481942118e-05, "loss": 0.0319, "step": 15408 }, { "epoch": 2.159635599159075, "grad_norm": 0.08153347671031952, "learning_rate": 8.613250418560153e-05, "loss": 0.0088, "step": 15409 }, { "epoch": 2.1597757533286615, "grad_norm": 0.4057573974132538, "learning_rate": 8.611815355178186e-05, "loss": 0.0823, "step": 15410 }, { "epoch": 2.159915907498248, "grad_norm": 0.4863761365413666, "learning_rate": 8.61038029179622e-05, "loss": 0.0524, "step": 15411 }, { "epoch": 2.160056061667835, "grad_norm": 0.03333592787384987, "learning_rate": 8.608945228414253e-05, "loss": 0.0019, "step": 15412 }, { "epoch": 2.1601962158374213, "grad_norm": 0.48775067925453186, "learning_rate": 8.607510165032288e-05, "loss": 0.0567, "step": 15413 }, { "epoch": 2.1603363700070077, "grad_norm": 0.47304537892341614, "learning_rate": 8.606075101650323e-05, "loss": 0.039, "step": 15414 }, { "epoch": 2.160476524176594, "grad_norm": 0.22962334752082825, "learning_rate": 8.604640038268356e-05, "loss": 0.0068, "step": 15415 }, { "epoch": 2.1606166783461807, "grad_norm": 0.06080048531293869, "learning_rate": 8.60320497488639e-05, "loss": 0.0058, "step": 15416 }, { "epoch": 2.1607568325157676, "grad_norm": 0.3105883300304413, "learning_rate": 8.601769911504424e-05, "loss": 0.0632, "step": 15417 }, { "epoch": 2.160896986685354, "grad_norm": 1.8197739124298096, "learning_rate": 8.600334848122457e-05, "loss": 0.0529, "step": 15418 }, { "epoch": 2.1610371408549405, "grad_norm": 0.23264367878437042, "learning_rate": 8.598899784740493e-05, "loss": 0.0782, "step": 15419 }, { "epoch": 2.161177295024527, "grad_norm": 0.9086876511573792, "learning_rate": 8.597464721358526e-05, "loss": 0.0526, "step": 15420 }, { "epoch": 2.1613174491941134, "grad_norm": 0.11605122685432434, "learning_rate": 8.59602965797656e-05, "loss": 0.0241, "step": 15421 }, { "epoch": 2.1614576033637, "grad_norm": 0.5580950379371643, "learning_rate": 8.594594594594594e-05, "loss": 0.0593, "step": 15422 }, { "epoch": 2.1615977575332868, "grad_norm": 0.21459150314331055, "learning_rate": 8.593159531212627e-05, "loss": 0.0211, "step": 15423 }, { "epoch": 2.161737911702873, "grad_norm": 0.15788954496383667, "learning_rate": 8.591724467830661e-05, "loss": 0.0386, "step": 15424 }, { "epoch": 2.1618780658724597, "grad_norm": 0.058036163449287415, "learning_rate": 8.590289404448697e-05, "loss": 0.0053, "step": 15425 }, { "epoch": 2.162018220042046, "grad_norm": 0.19580991566181183, "learning_rate": 8.58885434106673e-05, "loss": 0.0558, "step": 15426 }, { "epoch": 2.1621583742116326, "grad_norm": 0.28299856185913086, "learning_rate": 8.587419277684764e-05, "loss": 0.053, "step": 15427 }, { "epoch": 2.1622985283812195, "grad_norm": 0.1504969596862793, "learning_rate": 8.585984214302797e-05, "loss": 0.0192, "step": 15428 }, { "epoch": 2.162438682550806, "grad_norm": 0.1376238316297531, "learning_rate": 8.584549150920831e-05, "loss": 0.0073, "step": 15429 }, { "epoch": 2.1625788367203924, "grad_norm": 0.17269179224967957, "learning_rate": 8.583114087538866e-05, "loss": 0.0224, "step": 15430 }, { "epoch": 2.162718990889979, "grad_norm": 0.06337358802556992, "learning_rate": 8.581679024156899e-05, "loss": 0.0033, "step": 15431 }, { "epoch": 2.1628591450595653, "grad_norm": 0.3600456714630127, "learning_rate": 8.580243960774934e-05, "loss": 0.0311, "step": 15432 }, { "epoch": 2.1629992992291522, "grad_norm": 0.09824953973293304, "learning_rate": 8.578808897392968e-05, "loss": 0.0081, "step": 15433 }, { "epoch": 2.1631394533987387, "grad_norm": 1.3486517667770386, "learning_rate": 8.577373834011e-05, "loss": 0.0131, "step": 15434 }, { "epoch": 2.163279607568325, "grad_norm": 0.2690047323703766, "learning_rate": 8.575938770629036e-05, "loss": 0.0305, "step": 15435 }, { "epoch": 2.1634197617379116, "grad_norm": 0.11086538434028625, "learning_rate": 8.57450370724707e-05, "loss": 0.0125, "step": 15436 }, { "epoch": 2.163559915907498, "grad_norm": 0.19321806728839874, "learning_rate": 8.573068643865103e-05, "loss": 0.0196, "step": 15437 }, { "epoch": 2.163700070077085, "grad_norm": 0.1602705717086792, "learning_rate": 8.571633580483137e-05, "loss": 0.0298, "step": 15438 }, { "epoch": 2.1638402242466714, "grad_norm": 0.5952815413475037, "learning_rate": 8.57019851710117e-05, "loss": 0.0649, "step": 15439 }, { "epoch": 2.163980378416258, "grad_norm": 0.22576525807380676, "learning_rate": 8.568763453719204e-05, "loss": 0.0163, "step": 15440 }, { "epoch": 2.1641205325858444, "grad_norm": 0.2509418725967407, "learning_rate": 8.56732839033724e-05, "loss": 0.0355, "step": 15441 }, { "epoch": 2.164260686755431, "grad_norm": 0.22534427046775818, "learning_rate": 8.565893326955273e-05, "loss": 0.0178, "step": 15442 }, { "epoch": 2.1644008409250177, "grad_norm": 0.11104210466146469, "learning_rate": 8.564458263573307e-05, "loss": 0.0068, "step": 15443 }, { "epoch": 2.164540995094604, "grad_norm": 0.02992108464241028, "learning_rate": 8.563023200191341e-05, "loss": 0.0021, "step": 15444 }, { "epoch": 2.1646811492641906, "grad_norm": 0.3460654020309448, "learning_rate": 8.561588136809374e-05, "loss": 0.0303, "step": 15445 }, { "epoch": 2.164821303433777, "grad_norm": 0.18774725496768951, "learning_rate": 8.56015307342741e-05, "loss": 0.0119, "step": 15446 }, { "epoch": 2.1649614576033636, "grad_norm": 0.22597059607505798, "learning_rate": 8.558718010045443e-05, "loss": 0.0459, "step": 15447 }, { "epoch": 2.1651016117729505, "grad_norm": 0.2159491777420044, "learning_rate": 8.557282946663477e-05, "loss": 0.0349, "step": 15448 }, { "epoch": 2.165241765942537, "grad_norm": 0.2804139256477356, "learning_rate": 8.555847883281511e-05, "loss": 0.0308, "step": 15449 }, { "epoch": 2.1653819201121234, "grad_norm": 0.1893835961818695, "learning_rate": 8.554412819899544e-05, "loss": 0.0756, "step": 15450 }, { "epoch": 2.16552207428171, "grad_norm": 0.2896615266799927, "learning_rate": 8.55297775651758e-05, "loss": 0.0885, "step": 15451 }, { "epoch": 2.1656622284512963, "grad_norm": 0.12295167148113251, "learning_rate": 8.551542693135614e-05, "loss": 0.0073, "step": 15452 }, { "epoch": 2.1658023826208828, "grad_norm": 0.18107888102531433, "learning_rate": 8.550107629753647e-05, "loss": 0.0083, "step": 15453 }, { "epoch": 2.1659425367904697, "grad_norm": 0.11750922352075577, "learning_rate": 8.548672566371681e-05, "loss": 0.0128, "step": 15454 }, { "epoch": 2.166082690960056, "grad_norm": 0.2867882549762726, "learning_rate": 8.547237502989714e-05, "loss": 0.056, "step": 15455 }, { "epoch": 2.1662228451296426, "grad_norm": 0.3362749218940735, "learning_rate": 8.545802439607748e-05, "loss": 0.049, "step": 15456 }, { "epoch": 2.166362999299229, "grad_norm": 0.1076216995716095, "learning_rate": 8.544367376225783e-05, "loss": 0.034, "step": 15457 }, { "epoch": 2.1665031534688155, "grad_norm": 0.22457194328308105, "learning_rate": 8.542932312843816e-05, "loss": 0.0179, "step": 15458 }, { "epoch": 2.1666433076384024, "grad_norm": 0.19430334866046906, "learning_rate": 8.54149724946185e-05, "loss": 0.0279, "step": 15459 }, { "epoch": 2.166783461807989, "grad_norm": 0.028072109445929527, "learning_rate": 8.540062186079885e-05, "loss": 0.002, "step": 15460 }, { "epoch": 2.1669236159775753, "grad_norm": 0.9777933359146118, "learning_rate": 8.538627122697917e-05, "loss": 0.0389, "step": 15461 }, { "epoch": 2.167063770147162, "grad_norm": 0.3590082824230194, "learning_rate": 8.537192059315953e-05, "loss": 0.0063, "step": 15462 }, { "epoch": 2.1672039243167482, "grad_norm": 0.05881037190556526, "learning_rate": 8.535756995933986e-05, "loss": 0.0054, "step": 15463 }, { "epoch": 2.167344078486335, "grad_norm": 0.4571841061115265, "learning_rate": 8.53432193255202e-05, "loss": 0.093, "step": 15464 }, { "epoch": 2.1674842326559216, "grad_norm": 1.7408326864242554, "learning_rate": 8.532886869170054e-05, "loss": 0.1378, "step": 15465 }, { "epoch": 2.167624386825508, "grad_norm": 0.3223438858985901, "learning_rate": 8.531451805788087e-05, "loss": 0.0082, "step": 15466 }, { "epoch": 2.1677645409950945, "grad_norm": 1.9041938781738281, "learning_rate": 8.530016742406123e-05, "loss": 0.1508, "step": 15467 }, { "epoch": 2.167904695164681, "grad_norm": 0.07710761576890945, "learning_rate": 8.528581679024157e-05, "loss": 0.0057, "step": 15468 }, { "epoch": 2.168044849334268, "grad_norm": 0.3869759142398834, "learning_rate": 8.52714661564219e-05, "loss": 0.0278, "step": 15469 }, { "epoch": 2.1681850035038543, "grad_norm": 0.8428178429603577, "learning_rate": 8.525711552260224e-05, "loss": 0.1272, "step": 15470 }, { "epoch": 2.168325157673441, "grad_norm": 0.3046847879886627, "learning_rate": 8.524276488878258e-05, "loss": 0.0419, "step": 15471 }, { "epoch": 2.1684653118430273, "grad_norm": 0.1405702531337738, "learning_rate": 8.522841425496292e-05, "loss": 0.0146, "step": 15472 }, { "epoch": 2.1686054660126137, "grad_norm": 0.30379512906074524, "learning_rate": 8.521406362114327e-05, "loss": 0.052, "step": 15473 }, { "epoch": 2.1687456201822006, "grad_norm": 0.09665139019489288, "learning_rate": 8.51997129873236e-05, "loss": 0.0256, "step": 15474 }, { "epoch": 2.168885774351787, "grad_norm": 0.26153433322906494, "learning_rate": 8.518536235350394e-05, "loss": 0.0582, "step": 15475 }, { "epoch": 2.1690259285213735, "grad_norm": 0.2731797397136688, "learning_rate": 8.517101171968428e-05, "loss": 0.0348, "step": 15476 }, { "epoch": 2.16916608269096, "grad_norm": 0.1394374966621399, "learning_rate": 8.515666108586461e-05, "loss": 0.0161, "step": 15477 }, { "epoch": 2.1693062368605465, "grad_norm": 0.16301877796649933, "learning_rate": 8.514231045204496e-05, "loss": 0.018, "step": 15478 }, { "epoch": 2.1694463910301334, "grad_norm": 0.18953461945056915, "learning_rate": 8.51279598182253e-05, "loss": 0.0321, "step": 15479 }, { "epoch": 2.16958654519972, "grad_norm": 0.25635281205177307, "learning_rate": 8.511360918440563e-05, "loss": 0.0215, "step": 15480 }, { "epoch": 2.1697266993693063, "grad_norm": 0.12357616424560547, "learning_rate": 8.509925855058598e-05, "loss": 0.0194, "step": 15481 }, { "epoch": 2.1698668535388927, "grad_norm": 0.1335647702217102, "learning_rate": 8.50849079167663e-05, "loss": 0.0151, "step": 15482 }, { "epoch": 2.170007007708479, "grad_norm": 0.5593230128288269, "learning_rate": 8.507055728294666e-05, "loss": 0.1031, "step": 15483 }, { "epoch": 2.1701471618780657, "grad_norm": 0.26036587357521057, "learning_rate": 8.5056206649127e-05, "loss": 0.0102, "step": 15484 }, { "epoch": 2.1702873160476526, "grad_norm": 0.26807889342308044, "learning_rate": 8.504185601530733e-05, "loss": 0.0237, "step": 15485 }, { "epoch": 2.170427470217239, "grad_norm": 0.1733667403459549, "learning_rate": 8.502750538148767e-05, "loss": 0.0445, "step": 15486 }, { "epoch": 2.1705676243868255, "grad_norm": 0.37354567646980286, "learning_rate": 8.501315474766802e-05, "loss": 0.0953, "step": 15487 }, { "epoch": 2.170707778556412, "grad_norm": 0.7670668363571167, "learning_rate": 8.499880411384836e-05, "loss": 0.0467, "step": 15488 }, { "epoch": 2.1708479327259984, "grad_norm": 0.25219079852104187, "learning_rate": 8.49844534800287e-05, "loss": 0.0088, "step": 15489 }, { "epoch": 2.1709880868955853, "grad_norm": 0.48107919096946716, "learning_rate": 8.497010284620903e-05, "loss": 0.0167, "step": 15490 }, { "epoch": 2.1711282410651718, "grad_norm": 0.06198776140809059, "learning_rate": 8.495575221238937e-05, "loss": 0.0114, "step": 15491 }, { "epoch": 2.1712683952347582, "grad_norm": 0.3005569875240326, "learning_rate": 8.494140157856971e-05, "loss": 0.0915, "step": 15492 }, { "epoch": 2.1714085494043447, "grad_norm": 0.25535961985588074, "learning_rate": 8.492705094475004e-05, "loss": 0.1006, "step": 15493 }, { "epoch": 2.171548703573931, "grad_norm": 0.2022262066602707, "learning_rate": 8.49127003109304e-05, "loss": 0.0369, "step": 15494 }, { "epoch": 2.171688857743518, "grad_norm": 0.11930090188980103, "learning_rate": 8.489834967711074e-05, "loss": 0.0061, "step": 15495 }, { "epoch": 2.1718290119131045, "grad_norm": 0.8684285879135132, "learning_rate": 8.488399904329107e-05, "loss": 0.059, "step": 15496 }, { "epoch": 2.171969166082691, "grad_norm": 0.3775840699672699, "learning_rate": 8.486964840947141e-05, "loss": 0.0123, "step": 15497 }, { "epoch": 2.1721093202522774, "grad_norm": 0.22777603566646576, "learning_rate": 8.485529777565174e-05, "loss": 0.026, "step": 15498 }, { "epoch": 2.172249474421864, "grad_norm": 1.125455379486084, "learning_rate": 8.48409471418321e-05, "loss": 0.0287, "step": 15499 }, { "epoch": 2.172389628591451, "grad_norm": 0.14573828876018524, "learning_rate": 8.482659650801244e-05, "loss": 0.0257, "step": 15500 }, { "epoch": 2.1725297827610373, "grad_norm": 0.2938830554485321, "learning_rate": 8.481224587419276e-05, "loss": 0.0168, "step": 15501 }, { "epoch": 2.1726699369306237, "grad_norm": 0.4528128504753113, "learning_rate": 8.47978952403731e-05, "loss": 0.0535, "step": 15502 }, { "epoch": 2.17281009110021, "grad_norm": 0.17684079706668854, "learning_rate": 8.478354460655345e-05, "loss": 0.0129, "step": 15503 }, { "epoch": 2.1729502452697966, "grad_norm": 0.1797565370798111, "learning_rate": 8.476919397273379e-05, "loss": 0.0173, "step": 15504 }, { "epoch": 2.173090399439383, "grad_norm": 0.02430335059762001, "learning_rate": 8.475484333891413e-05, "loss": 0.0023, "step": 15505 }, { "epoch": 2.17323055360897, "grad_norm": 1.0244117975234985, "learning_rate": 8.474049270509446e-05, "loss": 0.0593, "step": 15506 }, { "epoch": 2.1733707077785565, "grad_norm": 0.4036937654018402, "learning_rate": 8.47261420712748e-05, "loss": 0.0653, "step": 15507 }, { "epoch": 2.173510861948143, "grad_norm": 0.6878527998924255, "learning_rate": 8.471179143745515e-05, "loss": 0.0787, "step": 15508 }, { "epoch": 2.1736510161177294, "grad_norm": 0.10806319117546082, "learning_rate": 8.469744080363547e-05, "loss": 0.0055, "step": 15509 }, { "epoch": 2.1737911702873163, "grad_norm": 0.26832789182662964, "learning_rate": 8.468309016981583e-05, "loss": 0.052, "step": 15510 }, { "epoch": 2.1739313244569027, "grad_norm": 0.13309180736541748, "learning_rate": 8.466873953599617e-05, "loss": 0.0656, "step": 15511 }, { "epoch": 2.174071478626489, "grad_norm": 0.013673707842826843, "learning_rate": 8.46543889021765e-05, "loss": 0.0012, "step": 15512 }, { "epoch": 2.1742116327960757, "grad_norm": 0.1314159482717514, "learning_rate": 8.464003826835684e-05, "loss": 0.0125, "step": 15513 }, { "epoch": 2.174351786965662, "grad_norm": 0.3085721433162689, "learning_rate": 8.46256876345372e-05, "loss": 0.104, "step": 15514 }, { "epoch": 2.1744919411352486, "grad_norm": 0.16569912433624268, "learning_rate": 8.461133700071753e-05, "loss": 0.0211, "step": 15515 }, { "epoch": 2.1746320953048355, "grad_norm": 0.22588637471199036, "learning_rate": 8.459698636689787e-05, "loss": 0.0415, "step": 15516 }, { "epoch": 2.174772249474422, "grad_norm": 0.29579490423202515, "learning_rate": 8.45826357330782e-05, "loss": 0.0557, "step": 15517 }, { "epoch": 2.1749124036440084, "grad_norm": 0.30834218859672546, "learning_rate": 8.456828509925854e-05, "loss": 0.0173, "step": 15518 }, { "epoch": 2.175052557813595, "grad_norm": 0.730586588382721, "learning_rate": 8.455393446543888e-05, "loss": 0.0328, "step": 15519 }, { "epoch": 2.1751927119831813, "grad_norm": 0.08423329889774323, "learning_rate": 8.453958383161922e-05, "loss": 0.0047, "step": 15520 }, { "epoch": 2.175332866152768, "grad_norm": 0.3593636751174927, "learning_rate": 8.452523319779957e-05, "loss": 0.0284, "step": 15521 }, { "epoch": 2.1754730203223547, "grad_norm": 0.2500229775905609, "learning_rate": 8.451088256397991e-05, "loss": 0.0389, "step": 15522 }, { "epoch": 2.175613174491941, "grad_norm": 0.15188990533351898, "learning_rate": 8.449653193016024e-05, "loss": 0.0214, "step": 15523 }, { "epoch": 2.1757533286615276, "grad_norm": 0.09120609611272812, "learning_rate": 8.448218129634058e-05, "loss": 0.0139, "step": 15524 }, { "epoch": 2.175893482831114, "grad_norm": 0.22486819326877594, "learning_rate": 8.446783066252091e-05, "loss": 0.0278, "step": 15525 }, { "epoch": 2.176033637000701, "grad_norm": 0.0872679054737091, "learning_rate": 8.445348002870126e-05, "loss": 0.01, "step": 15526 }, { "epoch": 2.1761737911702874, "grad_norm": 0.09633751958608627, "learning_rate": 8.44391293948816e-05, "loss": 0.0246, "step": 15527 }, { "epoch": 2.176313945339874, "grad_norm": 0.11577687412500381, "learning_rate": 8.442477876106193e-05, "loss": 0.0062, "step": 15528 }, { "epoch": 2.1764540995094603, "grad_norm": 0.10755651444196701, "learning_rate": 8.441042812724228e-05, "loss": 0.0137, "step": 15529 }, { "epoch": 2.176594253679047, "grad_norm": 0.38401877880096436, "learning_rate": 8.439607749342263e-05, "loss": 0.0142, "step": 15530 }, { "epoch": 2.1767344078486337, "grad_norm": 0.07044266909360886, "learning_rate": 8.438172685960296e-05, "loss": 0.0125, "step": 15531 }, { "epoch": 2.17687456201822, "grad_norm": 0.40811052918434143, "learning_rate": 8.43673762257833e-05, "loss": 0.0388, "step": 15532 }, { "epoch": 2.1770147161878066, "grad_norm": 0.21889354288578033, "learning_rate": 8.435302559196363e-05, "loss": 0.0526, "step": 15533 }, { "epoch": 2.177154870357393, "grad_norm": 0.21658968925476074, "learning_rate": 8.433867495814397e-05, "loss": 0.0376, "step": 15534 }, { "epoch": 2.1772950245269795, "grad_norm": 0.06853201240301132, "learning_rate": 8.432432432432432e-05, "loss": 0.0059, "step": 15535 }, { "epoch": 2.177435178696566, "grad_norm": 0.28299546241760254, "learning_rate": 8.430997369050466e-05, "loss": 0.0249, "step": 15536 }, { "epoch": 2.177575332866153, "grad_norm": 0.1991221308708191, "learning_rate": 8.4295623056685e-05, "loss": 0.0798, "step": 15537 }, { "epoch": 2.1777154870357394, "grad_norm": 0.22990986704826355, "learning_rate": 8.428127242286534e-05, "loss": 0.0081, "step": 15538 }, { "epoch": 2.177855641205326, "grad_norm": 0.11797018349170685, "learning_rate": 8.426692178904567e-05, "loss": 0.0296, "step": 15539 }, { "epoch": 2.1779957953749123, "grad_norm": 0.11003970354795456, "learning_rate": 8.425257115522601e-05, "loss": 0.0145, "step": 15540 }, { "epoch": 2.178135949544499, "grad_norm": 0.05072799697518349, "learning_rate": 8.423822052140634e-05, "loss": 0.0028, "step": 15541 }, { "epoch": 2.1782761037140856, "grad_norm": 0.3402263820171356, "learning_rate": 8.42238698875867e-05, "loss": 0.0314, "step": 15542 }, { "epoch": 2.178416257883672, "grad_norm": 0.22324246168136597, "learning_rate": 8.420951925376704e-05, "loss": 0.0286, "step": 15543 }, { "epoch": 2.1785564120532586, "grad_norm": 0.27035585045814514, "learning_rate": 8.419516861994737e-05, "loss": 0.0195, "step": 15544 }, { "epoch": 2.178696566222845, "grad_norm": 0.2956833839416504, "learning_rate": 8.418081798612771e-05, "loss": 0.034, "step": 15545 }, { "epoch": 2.1788367203924315, "grad_norm": 0.23110932111740112, "learning_rate": 8.416646735230806e-05, "loss": 0.026, "step": 15546 }, { "epoch": 2.1789768745620184, "grad_norm": 0.15697066485881805, "learning_rate": 8.41521167184884e-05, "loss": 0.0132, "step": 15547 }, { "epoch": 2.179117028731605, "grad_norm": 0.25031179189682007, "learning_rate": 8.413776608466874e-05, "loss": 0.0132, "step": 15548 }, { "epoch": 2.1792571829011913, "grad_norm": 0.6249184012413025, "learning_rate": 8.412341545084908e-05, "loss": 0.0316, "step": 15549 }, { "epoch": 2.1793973370707778, "grad_norm": 0.1307736337184906, "learning_rate": 8.41090648170294e-05, "loss": 0.0211, "step": 15550 }, { "epoch": 2.179537491240364, "grad_norm": 0.3390008509159088, "learning_rate": 8.409471418320975e-05, "loss": 0.0416, "step": 15551 }, { "epoch": 2.179677645409951, "grad_norm": 0.6477555632591248, "learning_rate": 8.408036354939009e-05, "loss": 0.034, "step": 15552 }, { "epoch": 2.1798177995795376, "grad_norm": 0.6356512308120728, "learning_rate": 8.406601291557043e-05, "loss": 0.0368, "step": 15553 }, { "epoch": 2.179957953749124, "grad_norm": 0.22738708555698395, "learning_rate": 8.405166228175077e-05, "loss": 0.0224, "step": 15554 }, { "epoch": 2.1800981079187105, "grad_norm": 0.05572545900940895, "learning_rate": 8.40373116479311e-05, "loss": 0.0026, "step": 15555 }, { "epoch": 2.180238262088297, "grad_norm": 0.11022742092609406, "learning_rate": 8.402296101411145e-05, "loss": 0.0113, "step": 15556 }, { "epoch": 2.180378416257884, "grad_norm": 0.32729431986808777, "learning_rate": 8.40086103802918e-05, "loss": 0.0207, "step": 15557 }, { "epoch": 2.1805185704274703, "grad_norm": 0.16148781776428223, "learning_rate": 8.399425974647213e-05, "loss": 0.0285, "step": 15558 }, { "epoch": 2.180658724597057, "grad_norm": 0.14963307976722717, "learning_rate": 8.397990911265247e-05, "loss": 0.0314, "step": 15559 }, { "epoch": 2.1807988787666432, "grad_norm": 0.23668617010116577, "learning_rate": 8.39655584788328e-05, "loss": 0.05, "step": 15560 }, { "epoch": 2.1809390329362297, "grad_norm": 1.4304167032241821, "learning_rate": 8.395120784501314e-05, "loss": 0.1111, "step": 15561 }, { "epoch": 2.1810791871058166, "grad_norm": 0.5803403854370117, "learning_rate": 8.39368572111935e-05, "loss": 0.0838, "step": 15562 }, { "epoch": 2.181219341275403, "grad_norm": 0.42928269505500793, "learning_rate": 8.392250657737383e-05, "loss": 0.0095, "step": 15563 }, { "epoch": 2.1813594954449895, "grad_norm": 0.08319581300020218, "learning_rate": 8.390815594355417e-05, "loss": 0.0033, "step": 15564 }, { "epoch": 2.181499649614576, "grad_norm": 0.23862707614898682, "learning_rate": 8.389380530973451e-05, "loss": 0.0365, "step": 15565 }, { "epoch": 2.1816398037841624, "grad_norm": 2.4348604679107666, "learning_rate": 8.387945467591484e-05, "loss": 0.0827, "step": 15566 }, { "epoch": 2.181779957953749, "grad_norm": 0.10237549245357513, "learning_rate": 8.386510404209518e-05, "loss": 0.0042, "step": 15567 }, { "epoch": 2.181920112123336, "grad_norm": 0.39557740092277527, "learning_rate": 8.385075340827552e-05, "loss": 0.0294, "step": 15568 }, { "epoch": 2.1820602662929223, "grad_norm": 1.4514493942260742, "learning_rate": 8.383640277445587e-05, "loss": 0.2599, "step": 15569 }, { "epoch": 2.1822004204625087, "grad_norm": 0.22535893321037292, "learning_rate": 8.382205214063621e-05, "loss": 0.0943, "step": 15570 }, { "epoch": 2.182340574632095, "grad_norm": 0.20261456072330475, "learning_rate": 8.380770150681654e-05, "loss": 0.0234, "step": 15571 }, { "epoch": 2.1824807288016816, "grad_norm": 0.1964482069015503, "learning_rate": 8.379335087299688e-05, "loss": 0.0082, "step": 15572 }, { "epoch": 2.1826208829712685, "grad_norm": 0.18574251234531403, "learning_rate": 8.377900023917723e-05, "loss": 0.0169, "step": 15573 }, { "epoch": 2.182761037140855, "grad_norm": 0.1273166686296463, "learning_rate": 8.376464960535756e-05, "loss": 0.0182, "step": 15574 }, { "epoch": 2.1829011913104415, "grad_norm": 0.2799573540687561, "learning_rate": 8.37502989715379e-05, "loss": 0.0314, "step": 15575 }, { "epoch": 2.183041345480028, "grad_norm": 0.29510653018951416, "learning_rate": 8.373594833771823e-05, "loss": 0.0646, "step": 15576 }, { "epoch": 2.1831814996496144, "grad_norm": 0.21985645592212677, "learning_rate": 8.372159770389858e-05, "loss": 0.0562, "step": 15577 }, { "epoch": 2.1833216538192013, "grad_norm": 0.1172732561826706, "learning_rate": 8.370724707007893e-05, "loss": 0.011, "step": 15578 }, { "epoch": 2.1834618079887878, "grad_norm": 0.23802527785301208, "learning_rate": 8.369289643625926e-05, "loss": 0.0518, "step": 15579 }, { "epoch": 2.183601962158374, "grad_norm": 0.12701694667339325, "learning_rate": 8.36785458024396e-05, "loss": 0.023, "step": 15580 }, { "epoch": 2.1837421163279607, "grad_norm": 0.3812354803085327, "learning_rate": 8.366419516861994e-05, "loss": 0.0321, "step": 15581 }, { "epoch": 2.183882270497547, "grad_norm": 0.3682289719581604, "learning_rate": 8.364984453480027e-05, "loss": 0.0544, "step": 15582 }, { "epoch": 2.184022424667134, "grad_norm": 0.3134566843509674, "learning_rate": 8.363549390098061e-05, "loss": 0.0543, "step": 15583 }, { "epoch": 2.1841625788367205, "grad_norm": 0.14092233777046204, "learning_rate": 8.362114326716097e-05, "loss": 0.0174, "step": 15584 }, { "epoch": 2.184302733006307, "grad_norm": 0.11824993044137955, "learning_rate": 8.36067926333413e-05, "loss": 0.0128, "step": 15585 }, { "epoch": 2.1844428871758934, "grad_norm": 0.190822571516037, "learning_rate": 8.359244199952164e-05, "loss": 0.0436, "step": 15586 }, { "epoch": 2.18458304134548, "grad_norm": 0.05926097184419632, "learning_rate": 8.357809136570197e-05, "loss": 0.0093, "step": 15587 }, { "epoch": 2.1847231955150668, "grad_norm": 0.16819730401039124, "learning_rate": 8.356374073188231e-05, "loss": 0.0459, "step": 15588 }, { "epoch": 2.1848633496846532, "grad_norm": 1.3574039936065674, "learning_rate": 8.354939009806267e-05, "loss": 0.0848, "step": 15589 }, { "epoch": 2.1850035038542397, "grad_norm": 0.20953278243541718, "learning_rate": 8.3535039464243e-05, "loss": 0.0247, "step": 15590 }, { "epoch": 2.185143658023826, "grad_norm": 0.1937340348958969, "learning_rate": 8.352068883042334e-05, "loss": 0.0201, "step": 15591 }, { "epoch": 2.1852838121934126, "grad_norm": 0.31552377343177795, "learning_rate": 8.350633819660368e-05, "loss": 0.0179, "step": 15592 }, { "epoch": 2.1854239663629995, "grad_norm": 0.1680138260126114, "learning_rate": 8.349198756278401e-05, "loss": 0.0064, "step": 15593 }, { "epoch": 2.185564120532586, "grad_norm": 0.30566340684890747, "learning_rate": 8.347763692896436e-05, "loss": 0.0245, "step": 15594 }, { "epoch": 2.1857042747021724, "grad_norm": 0.18651804327964783, "learning_rate": 8.346328629514469e-05, "loss": 0.0393, "step": 15595 }, { "epoch": 2.185844428871759, "grad_norm": 0.24722754955291748, "learning_rate": 8.344893566132503e-05, "loss": 0.0273, "step": 15596 }, { "epoch": 2.1859845830413454, "grad_norm": 0.048305630683898926, "learning_rate": 8.343458502750538e-05, "loss": 0.0057, "step": 15597 }, { "epoch": 2.186124737210932, "grad_norm": 0.17607168853282928, "learning_rate": 8.34202343936857e-05, "loss": 0.032, "step": 15598 }, { "epoch": 2.1862648913805187, "grad_norm": 0.2770044505596161, "learning_rate": 8.340588375986606e-05, "loss": 0.0181, "step": 15599 }, { "epoch": 2.186405045550105, "grad_norm": 0.13178180158138275, "learning_rate": 8.33915331260464e-05, "loss": 0.008, "step": 15600 }, { "epoch": 2.1865451997196916, "grad_norm": 0.20559416711330414, "learning_rate": 8.337718249222673e-05, "loss": 0.0358, "step": 15601 }, { "epoch": 2.186685353889278, "grad_norm": 0.08401753008365631, "learning_rate": 8.336283185840707e-05, "loss": 0.0064, "step": 15602 }, { "epoch": 2.1868255080588646, "grad_norm": 0.39797934889793396, "learning_rate": 8.33484812245874e-05, "loss": 0.0704, "step": 15603 }, { "epoch": 2.1869656622284515, "grad_norm": 0.3423841595649719, "learning_rate": 8.333413059076774e-05, "loss": 0.0291, "step": 15604 }, { "epoch": 2.187105816398038, "grad_norm": 0.4315163195133209, "learning_rate": 8.33197799569481e-05, "loss": 0.0302, "step": 15605 }, { "epoch": 2.1872459705676244, "grad_norm": 0.12608914077281952, "learning_rate": 8.330542932312843e-05, "loss": 0.0153, "step": 15606 }, { "epoch": 2.187386124737211, "grad_norm": 0.1963818520307541, "learning_rate": 8.329107868930877e-05, "loss": 0.0285, "step": 15607 }, { "epoch": 2.1875262789067973, "grad_norm": 0.3715837001800537, "learning_rate": 8.327672805548911e-05, "loss": 0.0143, "step": 15608 }, { "epoch": 2.187666433076384, "grad_norm": 0.1704009473323822, "learning_rate": 8.326237742166944e-05, "loss": 0.0239, "step": 15609 }, { "epoch": 2.1878065872459707, "grad_norm": 0.1322927176952362, "learning_rate": 8.32480267878498e-05, "loss": 0.014, "step": 15610 }, { "epoch": 2.187946741415557, "grad_norm": 0.041757725179195404, "learning_rate": 8.323367615403013e-05, "loss": 0.0049, "step": 15611 }, { "epoch": 2.1880868955851436, "grad_norm": 0.1920960247516632, "learning_rate": 8.321932552021047e-05, "loss": 0.0141, "step": 15612 }, { "epoch": 2.18822704975473, "grad_norm": 0.39365094900131226, "learning_rate": 8.320497488639081e-05, "loss": 0.0131, "step": 15613 }, { "epoch": 2.188367203924317, "grad_norm": 0.610433042049408, "learning_rate": 8.319062425257114e-05, "loss": 0.0352, "step": 15614 }, { "epoch": 2.1885073580939034, "grad_norm": 0.1152632012963295, "learning_rate": 8.31762736187515e-05, "loss": 0.0087, "step": 15615 }, { "epoch": 2.18864751226349, "grad_norm": 0.40525224804878235, "learning_rate": 8.316192298493184e-05, "loss": 0.0312, "step": 15616 }, { "epoch": 2.1887876664330763, "grad_norm": 0.3027915060520172, "learning_rate": 8.314757235111217e-05, "loss": 0.0117, "step": 15617 }, { "epoch": 2.1889278206026628, "grad_norm": 0.38697054982185364, "learning_rate": 8.313322171729251e-05, "loss": 0.1625, "step": 15618 }, { "epoch": 2.1890679747722497, "grad_norm": 0.5098830461502075, "learning_rate": 8.311887108347285e-05, "loss": 0.0333, "step": 15619 }, { "epoch": 2.189208128941836, "grad_norm": 0.20754776895046234, "learning_rate": 8.310452044965318e-05, "loss": 0.0043, "step": 15620 }, { "epoch": 2.1893482831114226, "grad_norm": 0.4233125150203705, "learning_rate": 8.309016981583353e-05, "loss": 0.0401, "step": 15621 }, { "epoch": 2.189488437281009, "grad_norm": 0.5059199333190918, "learning_rate": 8.307581918201386e-05, "loss": 0.0302, "step": 15622 }, { "epoch": 2.1896285914505955, "grad_norm": 0.22175323963165283, "learning_rate": 8.30614685481942e-05, "loss": 0.0175, "step": 15623 }, { "epoch": 2.1897687456201824, "grad_norm": 0.2992059290409088, "learning_rate": 8.304711791437455e-05, "loss": 0.0599, "step": 15624 }, { "epoch": 2.189908899789769, "grad_norm": 0.08164261281490326, "learning_rate": 8.303276728055487e-05, "loss": 0.0108, "step": 15625 }, { "epoch": 2.1900490539593553, "grad_norm": 0.14064110815525055, "learning_rate": 8.301841664673523e-05, "loss": 0.0245, "step": 15626 }, { "epoch": 2.190189208128942, "grad_norm": 0.04325925558805466, "learning_rate": 8.300406601291557e-05, "loss": 0.0034, "step": 15627 }, { "epoch": 2.1903293622985283, "grad_norm": 0.3913405239582062, "learning_rate": 8.29897153790959e-05, "loss": 0.0661, "step": 15628 }, { "epoch": 2.1904695164681147, "grad_norm": 0.172748863697052, "learning_rate": 8.297536474527624e-05, "loss": 0.0116, "step": 15629 }, { "epoch": 2.1906096706377016, "grad_norm": 0.30456429719924927, "learning_rate": 8.296101411145657e-05, "loss": 0.0753, "step": 15630 }, { "epoch": 2.190749824807288, "grad_norm": 0.09865356236696243, "learning_rate": 8.294666347763693e-05, "loss": 0.0086, "step": 15631 }, { "epoch": 2.1908899789768745, "grad_norm": 0.13714629411697388, "learning_rate": 8.293231284381727e-05, "loss": 0.0131, "step": 15632 }, { "epoch": 2.191030133146461, "grad_norm": 0.3552764356136322, "learning_rate": 8.29179622099976e-05, "loss": 0.0402, "step": 15633 }, { "epoch": 2.1911702873160475, "grad_norm": 0.32466045022010803, "learning_rate": 8.290361157617794e-05, "loss": 0.0261, "step": 15634 }, { "epoch": 2.1913104414856344, "grad_norm": 0.2198207676410675, "learning_rate": 8.288926094235828e-05, "loss": 0.0158, "step": 15635 }, { "epoch": 2.191450595655221, "grad_norm": 0.22316282987594604, "learning_rate": 8.287491030853861e-05, "loss": 0.0305, "step": 15636 }, { "epoch": 2.1915907498248073, "grad_norm": 0.21982179582118988, "learning_rate": 8.286055967471897e-05, "loss": 0.0502, "step": 15637 }, { "epoch": 2.1917309039943937, "grad_norm": 0.4248678386211395, "learning_rate": 8.28462090408993e-05, "loss": 0.0421, "step": 15638 }, { "epoch": 2.19187105816398, "grad_norm": 0.10520629584789276, "learning_rate": 8.283185840707964e-05, "loss": 0.0035, "step": 15639 }, { "epoch": 2.192011212333567, "grad_norm": 0.08738252520561218, "learning_rate": 8.281750777325998e-05, "loss": 0.0077, "step": 15640 }, { "epoch": 2.1921513665031536, "grad_norm": 0.2180115133523941, "learning_rate": 8.280315713944031e-05, "loss": 0.0336, "step": 15641 }, { "epoch": 2.19229152067274, "grad_norm": 0.1201363056898117, "learning_rate": 8.278880650562066e-05, "loss": 0.0144, "step": 15642 }, { "epoch": 2.1924316748423265, "grad_norm": 0.1581735908985138, "learning_rate": 8.2774455871801e-05, "loss": 0.0251, "step": 15643 }, { "epoch": 2.192571829011913, "grad_norm": 0.24820972979068756, "learning_rate": 8.276010523798133e-05, "loss": 0.0229, "step": 15644 }, { "epoch": 2.1927119831815, "grad_norm": 0.5092044472694397, "learning_rate": 8.274575460416168e-05, "loss": 0.1268, "step": 15645 }, { "epoch": 2.1928521373510863, "grad_norm": 0.39710351824760437, "learning_rate": 8.2731403970342e-05, "loss": 0.0382, "step": 15646 }, { "epoch": 2.1929922915206728, "grad_norm": 0.08613835275173187, "learning_rate": 8.271705333652236e-05, "loss": 0.0091, "step": 15647 }, { "epoch": 2.1931324456902592, "grad_norm": 0.08021494746208191, "learning_rate": 8.27027027027027e-05, "loss": 0.0249, "step": 15648 }, { "epoch": 2.1932725998598457, "grad_norm": 0.08379749208688736, "learning_rate": 8.268835206888303e-05, "loss": 0.0048, "step": 15649 }, { "epoch": 2.193412754029432, "grad_norm": 0.35445496439933777, "learning_rate": 8.267400143506337e-05, "loss": 0.0933, "step": 15650 }, { "epoch": 2.193552908199019, "grad_norm": 0.1973295956850052, "learning_rate": 8.265965080124372e-05, "loss": 0.0758, "step": 15651 }, { "epoch": 2.1936930623686055, "grad_norm": 0.3184235095977783, "learning_rate": 8.264530016742404e-05, "loss": 0.0551, "step": 15652 }, { "epoch": 2.193833216538192, "grad_norm": 0.6656478643417358, "learning_rate": 8.26309495336044e-05, "loss": 0.0574, "step": 15653 }, { "epoch": 2.1939733707077784, "grad_norm": 0.2157331109046936, "learning_rate": 8.261659889978473e-05, "loss": 0.0266, "step": 15654 }, { "epoch": 2.1941135248773653, "grad_norm": 0.18208567798137665, "learning_rate": 8.260224826596507e-05, "loss": 0.0152, "step": 15655 }, { "epoch": 2.194253679046952, "grad_norm": 0.3098605275154114, "learning_rate": 8.258789763214541e-05, "loss": 0.0664, "step": 15656 }, { "epoch": 2.1943938332165382, "grad_norm": 0.14590348303318024, "learning_rate": 8.257354699832574e-05, "loss": 0.0181, "step": 15657 }, { "epoch": 2.1945339873861247, "grad_norm": 0.16141259670257568, "learning_rate": 8.25591963645061e-05, "loss": 0.036, "step": 15658 }, { "epoch": 2.194674141555711, "grad_norm": 0.8787152171134949, "learning_rate": 8.254484573068644e-05, "loss": 0.0519, "step": 15659 }, { "epoch": 2.1948142957252976, "grad_norm": 0.10097132623195648, "learning_rate": 8.253049509686677e-05, "loss": 0.0114, "step": 15660 }, { "epoch": 2.1949544498948845, "grad_norm": 1.190528392791748, "learning_rate": 8.251614446304711e-05, "loss": 0.0448, "step": 15661 }, { "epoch": 2.195094604064471, "grad_norm": 0.12969990074634552, "learning_rate": 8.250179382922745e-05, "loss": 0.0046, "step": 15662 }, { "epoch": 2.1952347582340574, "grad_norm": 0.4736093580722809, "learning_rate": 8.24874431954078e-05, "loss": 0.0601, "step": 15663 }, { "epoch": 2.195374912403644, "grad_norm": 0.2785087525844574, "learning_rate": 8.247309256158814e-05, "loss": 0.0082, "step": 15664 }, { "epoch": 2.1955150665732304, "grad_norm": 0.2481694370508194, "learning_rate": 8.245874192776846e-05, "loss": 0.0149, "step": 15665 }, { "epoch": 2.1956552207428173, "grad_norm": 1.168166995048523, "learning_rate": 8.24443912939488e-05, "loss": 0.0847, "step": 15666 }, { "epoch": 2.1957953749124037, "grad_norm": 0.646602213382721, "learning_rate": 8.243004066012915e-05, "loss": 0.0665, "step": 15667 }, { "epoch": 2.19593552908199, "grad_norm": 0.4012528657913208, "learning_rate": 8.241569002630948e-05, "loss": 0.0777, "step": 15668 }, { "epoch": 2.1960756832515766, "grad_norm": 0.6125938892364502, "learning_rate": 8.240133939248983e-05, "loss": 0.1572, "step": 15669 }, { "epoch": 2.196215837421163, "grad_norm": 4.760125637054443, "learning_rate": 8.238698875867018e-05, "loss": 0.1638, "step": 15670 }, { "epoch": 2.19635599159075, "grad_norm": 0.1384824812412262, "learning_rate": 8.23726381248505e-05, "loss": 0.0214, "step": 15671 }, { "epoch": 2.1964961457603365, "grad_norm": 0.276643842458725, "learning_rate": 8.235828749103085e-05, "loss": 0.0246, "step": 15672 }, { "epoch": 2.196636299929923, "grad_norm": 0.3067857325077057, "learning_rate": 8.234393685721117e-05, "loss": 0.0462, "step": 15673 }, { "epoch": 2.1967764540995094, "grad_norm": 0.2647033929824829, "learning_rate": 8.232958622339153e-05, "loss": 0.02, "step": 15674 }, { "epoch": 2.196916608269096, "grad_norm": 0.08739818632602692, "learning_rate": 8.231523558957187e-05, "loss": 0.0128, "step": 15675 }, { "epoch": 2.1970567624386828, "grad_norm": 0.40206077694892883, "learning_rate": 8.23008849557522e-05, "loss": 0.0448, "step": 15676 }, { "epoch": 2.197196916608269, "grad_norm": 0.1920454055070877, "learning_rate": 8.228653432193254e-05, "loss": 0.0423, "step": 15677 }, { "epoch": 2.1973370707778557, "grad_norm": 0.3084172308444977, "learning_rate": 8.227218368811288e-05, "loss": 0.036, "step": 15678 }, { "epoch": 2.197477224947442, "grad_norm": 0.27525025606155396, "learning_rate": 8.225783305429323e-05, "loss": 0.0446, "step": 15679 }, { "epoch": 2.1976173791170286, "grad_norm": 0.12222567945718765, "learning_rate": 8.224348242047357e-05, "loss": 0.0378, "step": 15680 }, { "epoch": 2.197757533286615, "grad_norm": 0.46290692687034607, "learning_rate": 8.22291317866539e-05, "loss": 0.0686, "step": 15681 }, { "epoch": 2.197897687456202, "grad_norm": 0.34350353479385376, "learning_rate": 8.221478115283424e-05, "loss": 0.0634, "step": 15682 }, { "epoch": 2.1980378416257884, "grad_norm": 0.07873933762311935, "learning_rate": 8.220043051901458e-05, "loss": 0.0123, "step": 15683 }, { "epoch": 2.198177995795375, "grad_norm": 0.6873977184295654, "learning_rate": 8.218607988519491e-05, "loss": 0.0334, "step": 15684 }, { "epoch": 2.1983181499649613, "grad_norm": 0.25200486183166504, "learning_rate": 8.217172925137527e-05, "loss": 0.0489, "step": 15685 }, { "epoch": 2.1984583041345482, "grad_norm": 0.10532114654779434, "learning_rate": 8.215737861755561e-05, "loss": 0.0134, "step": 15686 }, { "epoch": 2.1985984583041347, "grad_norm": 0.2552756369113922, "learning_rate": 8.214302798373594e-05, "loss": 0.0315, "step": 15687 }, { "epoch": 2.198738612473721, "grad_norm": 0.1742653101682663, "learning_rate": 8.212867734991628e-05, "loss": 0.0186, "step": 15688 }, { "epoch": 2.1988787666433076, "grad_norm": 0.3285054564476013, "learning_rate": 8.211432671609661e-05, "loss": 0.0207, "step": 15689 }, { "epoch": 2.199018920812894, "grad_norm": 0.2602458894252777, "learning_rate": 8.209997608227696e-05, "loss": 0.0257, "step": 15690 }, { "epoch": 2.1991590749824805, "grad_norm": 0.06654641032218933, "learning_rate": 8.20856254484573e-05, "loss": 0.008, "step": 15691 }, { "epoch": 2.1992992291520674, "grad_norm": 0.1489337831735611, "learning_rate": 8.207127481463763e-05, "loss": 0.0259, "step": 15692 }, { "epoch": 2.199439383321654, "grad_norm": 0.19741222262382507, "learning_rate": 8.205692418081798e-05, "loss": 0.0253, "step": 15693 }, { "epoch": 2.1995795374912404, "grad_norm": 0.28719857335090637, "learning_rate": 8.204257354699832e-05, "loss": 0.0116, "step": 15694 }, { "epoch": 2.199719691660827, "grad_norm": 0.548724353313446, "learning_rate": 8.202822291317866e-05, "loss": 0.0288, "step": 15695 }, { "epoch": 2.1998598458304133, "grad_norm": 0.17500430345535278, "learning_rate": 8.2013872279359e-05, "loss": 0.0143, "step": 15696 }, { "epoch": 2.2, "grad_norm": 0.21776443719863892, "learning_rate": 8.199952164553934e-05, "loss": 0.1035, "step": 15697 }, { "epoch": 2.2001401541695866, "grad_norm": 0.3577345609664917, "learning_rate": 8.198517101171967e-05, "loss": 0.0669, "step": 15698 }, { "epoch": 2.200280308339173, "grad_norm": 0.2582548260688782, "learning_rate": 8.197082037790002e-05, "loss": 0.0967, "step": 15699 }, { "epoch": 2.2004204625087596, "grad_norm": 0.2810274064540863, "learning_rate": 8.195646974408034e-05, "loss": 0.033, "step": 15700 }, { "epoch": 2.200560616678346, "grad_norm": 0.09076671302318573, "learning_rate": 8.19421191102607e-05, "loss": 0.0093, "step": 15701 }, { "epoch": 2.200700770847933, "grad_norm": 0.1197870597243309, "learning_rate": 8.192776847644104e-05, "loss": 0.0294, "step": 15702 }, { "epoch": 2.2008409250175194, "grad_norm": 0.5010340809822083, "learning_rate": 8.191341784262137e-05, "loss": 0.039, "step": 15703 }, { "epoch": 2.200981079187106, "grad_norm": 0.1756926327943802, "learning_rate": 8.189906720880171e-05, "loss": 0.0605, "step": 15704 }, { "epoch": 2.2011212333566923, "grad_norm": 0.20833875238895416, "learning_rate": 8.188471657498207e-05, "loss": 0.0113, "step": 15705 }, { "epoch": 2.2012613875262788, "grad_norm": 0.4840003550052643, "learning_rate": 8.18703659411624e-05, "loss": 0.0371, "step": 15706 }, { "epoch": 2.2014015416958657, "grad_norm": 0.3240581154823303, "learning_rate": 8.185601530734274e-05, "loss": 0.0395, "step": 15707 }, { "epoch": 2.201541695865452, "grad_norm": 0.12794287502765656, "learning_rate": 8.184166467352307e-05, "loss": 0.017, "step": 15708 }, { "epoch": 2.2016818500350386, "grad_norm": 0.7090755105018616, "learning_rate": 8.182731403970341e-05, "loss": 0.0775, "step": 15709 }, { "epoch": 2.201822004204625, "grad_norm": 0.13308900594711304, "learning_rate": 8.181296340588376e-05, "loss": 0.0218, "step": 15710 }, { "epoch": 2.2019621583742115, "grad_norm": 0.15461736917495728, "learning_rate": 8.17986127720641e-05, "loss": 0.0443, "step": 15711 }, { "epoch": 2.202102312543798, "grad_norm": 0.13474911451339722, "learning_rate": 8.178426213824444e-05, "loss": 0.0191, "step": 15712 }, { "epoch": 2.202242466713385, "grad_norm": 0.293152391910553, "learning_rate": 8.176991150442478e-05, "loss": 0.0422, "step": 15713 }, { "epoch": 2.2023826208829713, "grad_norm": 1.1336601972579956, "learning_rate": 8.17555608706051e-05, "loss": 0.0359, "step": 15714 }, { "epoch": 2.202522775052558, "grad_norm": 0.024906359612941742, "learning_rate": 8.174121023678545e-05, "loss": 0.0022, "step": 15715 }, { "epoch": 2.2026629292221442, "grad_norm": 0.12857244908809662, "learning_rate": 8.172685960296578e-05, "loss": 0.0075, "step": 15716 }, { "epoch": 2.2028030833917307, "grad_norm": 0.28347164392471313, "learning_rate": 8.171250896914613e-05, "loss": 0.0088, "step": 15717 }, { "epoch": 2.2029432375613176, "grad_norm": 0.19016562402248383, "learning_rate": 8.169815833532647e-05, "loss": 0.009, "step": 15718 }, { "epoch": 2.203083391730904, "grad_norm": 1.0748003721237183, "learning_rate": 8.16838077015068e-05, "loss": 0.113, "step": 15719 }, { "epoch": 2.2032235459004905, "grad_norm": 0.5496826171875, "learning_rate": 8.166945706768715e-05, "loss": 0.0278, "step": 15720 }, { "epoch": 2.203363700070077, "grad_norm": 0.12921671569347382, "learning_rate": 8.16551064338675e-05, "loss": 0.0136, "step": 15721 }, { "epoch": 2.2035038542396634, "grad_norm": 0.20940911769866943, "learning_rate": 8.164075580004783e-05, "loss": 0.0318, "step": 15722 }, { "epoch": 2.2036440084092503, "grad_norm": 0.21101483702659607, "learning_rate": 8.162640516622817e-05, "loss": 0.045, "step": 15723 }, { "epoch": 2.203784162578837, "grad_norm": 0.5724600553512573, "learning_rate": 8.16120545324085e-05, "loss": 0.0215, "step": 15724 }, { "epoch": 2.2039243167484233, "grad_norm": 0.07508917897939682, "learning_rate": 8.159770389858884e-05, "loss": 0.0055, "step": 15725 }, { "epoch": 2.2040644709180097, "grad_norm": 0.09718918055295944, "learning_rate": 8.15833532647692e-05, "loss": 0.0223, "step": 15726 }, { "epoch": 2.204204625087596, "grad_norm": 0.025699010118842125, "learning_rate": 8.156900263094953e-05, "loss": 0.0028, "step": 15727 }, { "epoch": 2.204344779257183, "grad_norm": 0.1400269865989685, "learning_rate": 8.155465199712987e-05, "loss": 0.021, "step": 15728 }, { "epoch": 2.2044849334267695, "grad_norm": 0.09716742485761642, "learning_rate": 8.154030136331021e-05, "loss": 0.0119, "step": 15729 }, { "epoch": 2.204625087596356, "grad_norm": 0.13582508265972137, "learning_rate": 8.152595072949054e-05, "loss": 0.0066, "step": 15730 }, { "epoch": 2.2047652417659425, "grad_norm": 0.34170815348625183, "learning_rate": 8.151160009567088e-05, "loss": 0.034, "step": 15731 }, { "epoch": 2.204905395935529, "grad_norm": 0.08761975914239883, "learning_rate": 8.149724946185124e-05, "loss": 0.006, "step": 15732 }, { "epoch": 2.205045550105116, "grad_norm": 0.06848367303609848, "learning_rate": 8.148289882803157e-05, "loss": 0.0084, "step": 15733 }, { "epoch": 2.2051857042747023, "grad_norm": 0.24451403319835663, "learning_rate": 8.146854819421191e-05, "loss": 0.0271, "step": 15734 }, { "epoch": 2.2053258584442887, "grad_norm": 0.1860520839691162, "learning_rate": 8.145419756039224e-05, "loss": 0.0252, "step": 15735 }, { "epoch": 2.205466012613875, "grad_norm": 0.0633758008480072, "learning_rate": 8.143984692657258e-05, "loss": 0.0064, "step": 15736 }, { "epoch": 2.2056061667834617, "grad_norm": 0.27101773023605347, "learning_rate": 8.142549629275293e-05, "loss": 0.0291, "step": 15737 }, { "epoch": 2.2057463209530486, "grad_norm": 0.1298302710056305, "learning_rate": 8.141114565893326e-05, "loss": 0.0087, "step": 15738 }, { "epoch": 2.205886475122635, "grad_norm": 0.21071410179138184, "learning_rate": 8.13967950251136e-05, "loss": 0.0222, "step": 15739 }, { "epoch": 2.2060266292922215, "grad_norm": 0.3418385982513428, "learning_rate": 8.138244439129395e-05, "loss": 0.0479, "step": 15740 }, { "epoch": 2.206166783461808, "grad_norm": 0.13273079693317413, "learning_rate": 8.136809375747428e-05, "loss": 0.023, "step": 15741 }, { "epoch": 2.2063069376313944, "grad_norm": 0.22827519476413727, "learning_rate": 8.135374312365463e-05, "loss": 0.0238, "step": 15742 }, { "epoch": 2.206447091800981, "grad_norm": 0.7551180720329285, "learning_rate": 8.133939248983496e-05, "loss": 0.0267, "step": 15743 }, { "epoch": 2.2065872459705678, "grad_norm": 0.14162230491638184, "learning_rate": 8.13250418560153e-05, "loss": 0.0214, "step": 15744 }, { "epoch": 2.2067274001401542, "grad_norm": 0.1592104136943817, "learning_rate": 8.131069122219564e-05, "loss": 0.006, "step": 15745 }, { "epoch": 2.2068675543097407, "grad_norm": 0.27113738656044006, "learning_rate": 8.129634058837597e-05, "loss": 0.0191, "step": 15746 }, { "epoch": 2.207007708479327, "grad_norm": 0.10886117815971375, "learning_rate": 8.128198995455631e-05, "loss": 0.0144, "step": 15747 }, { "epoch": 2.2071478626489136, "grad_norm": 0.09191807359457016, "learning_rate": 8.126763932073667e-05, "loss": 0.0119, "step": 15748 }, { "epoch": 2.2072880168185005, "grad_norm": 0.46607664227485657, "learning_rate": 8.1253288686917e-05, "loss": 0.0641, "step": 15749 }, { "epoch": 2.207428170988087, "grad_norm": 0.339548796415329, "learning_rate": 8.123893805309734e-05, "loss": 0.0773, "step": 15750 }, { "epoch": 2.2075683251576734, "grad_norm": 0.2656480073928833, "learning_rate": 8.122458741927767e-05, "loss": 0.0838, "step": 15751 }, { "epoch": 2.20770847932726, "grad_norm": 0.12845660746097565, "learning_rate": 8.121023678545801e-05, "loss": 0.0152, "step": 15752 }, { "epoch": 2.2078486334968463, "grad_norm": 0.2567709684371948, "learning_rate": 8.119588615163837e-05, "loss": 0.0149, "step": 15753 }, { "epoch": 2.2079887876664333, "grad_norm": 0.09469053894281387, "learning_rate": 8.11815355178187e-05, "loss": 0.0092, "step": 15754 }, { "epoch": 2.2081289418360197, "grad_norm": 0.16169095039367676, "learning_rate": 8.116718488399904e-05, "loss": 0.0151, "step": 15755 }, { "epoch": 2.208269096005606, "grad_norm": 0.11135906726121902, "learning_rate": 8.115283425017938e-05, "loss": 0.0162, "step": 15756 }, { "epoch": 2.2084092501751926, "grad_norm": 0.23499958217144012, "learning_rate": 8.113848361635971e-05, "loss": 0.0463, "step": 15757 }, { "epoch": 2.208549404344779, "grad_norm": 0.3057786822319031, "learning_rate": 8.112413298254006e-05, "loss": 0.0277, "step": 15758 }, { "epoch": 2.208689558514366, "grad_norm": 0.21030275523662567, "learning_rate": 8.110978234872039e-05, "loss": 0.0326, "step": 15759 }, { "epoch": 2.2088297126839525, "grad_norm": 0.20218226313591003, "learning_rate": 8.109543171490073e-05, "loss": 0.0202, "step": 15760 }, { "epoch": 2.208969866853539, "grad_norm": 0.15559740364551544, "learning_rate": 8.108108108108108e-05, "loss": 0.024, "step": 15761 }, { "epoch": 2.2091100210231254, "grad_norm": 0.3975464105606079, "learning_rate": 8.10667304472614e-05, "loss": 0.0312, "step": 15762 }, { "epoch": 2.209250175192712, "grad_norm": 0.48109114170074463, "learning_rate": 8.105237981344175e-05, "loss": 0.0119, "step": 15763 }, { "epoch": 2.2093903293622987, "grad_norm": 0.7652236223220825, "learning_rate": 8.10380291796221e-05, "loss": 0.132, "step": 15764 }, { "epoch": 2.209530483531885, "grad_norm": 0.3879028260707855, "learning_rate": 8.102367854580243e-05, "loss": 0.0238, "step": 15765 }, { "epoch": 2.2096706377014717, "grad_norm": 0.18922802805900574, "learning_rate": 8.100932791198277e-05, "loss": 0.0454, "step": 15766 }, { "epoch": 2.209810791871058, "grad_norm": 0.3922787010669708, "learning_rate": 8.09949772781631e-05, "loss": 0.0189, "step": 15767 }, { "epoch": 2.2099509460406446, "grad_norm": 0.7028340101242065, "learning_rate": 8.098062664434344e-05, "loss": 0.1249, "step": 15768 }, { "epoch": 2.2100911002102315, "grad_norm": 0.9105939865112305, "learning_rate": 8.09662760105238e-05, "loss": 0.096, "step": 15769 }, { "epoch": 2.210231254379818, "grad_norm": 0.07654590159654617, "learning_rate": 8.095192537670413e-05, "loss": 0.0025, "step": 15770 }, { "epoch": 2.2103714085494044, "grad_norm": 0.11136972904205322, "learning_rate": 8.093757474288447e-05, "loss": 0.018, "step": 15771 }, { "epoch": 2.210511562718991, "grad_norm": 0.16092923283576965, "learning_rate": 8.092322410906481e-05, "loss": 0.05, "step": 15772 }, { "epoch": 2.2106517168885773, "grad_norm": 0.1688794642686844, "learning_rate": 8.090887347524514e-05, "loss": 0.0455, "step": 15773 }, { "epoch": 2.2107918710581638, "grad_norm": 0.25758206844329834, "learning_rate": 8.08945228414255e-05, "loss": 0.0384, "step": 15774 }, { "epoch": 2.2109320252277507, "grad_norm": 0.16360634565353394, "learning_rate": 8.088017220760584e-05, "loss": 0.021, "step": 15775 }, { "epoch": 2.211072179397337, "grad_norm": 0.12428990006446838, "learning_rate": 8.086582157378617e-05, "loss": 0.0392, "step": 15776 }, { "epoch": 2.2112123335669236, "grad_norm": 0.1947309970855713, "learning_rate": 8.085147093996651e-05, "loss": 0.0322, "step": 15777 }, { "epoch": 2.21135248773651, "grad_norm": 0.3968620300292969, "learning_rate": 8.083712030614684e-05, "loss": 0.0617, "step": 15778 }, { "epoch": 2.2114926419060965, "grad_norm": 0.2640931308269501, "learning_rate": 8.082276967232718e-05, "loss": 0.0371, "step": 15779 }, { "epoch": 2.2116327960756834, "grad_norm": 0.2555704414844513, "learning_rate": 8.080841903850754e-05, "loss": 0.0356, "step": 15780 }, { "epoch": 2.21177295024527, "grad_norm": 0.23118658363819122, "learning_rate": 8.079406840468786e-05, "loss": 0.0389, "step": 15781 }, { "epoch": 2.2119131044148563, "grad_norm": 0.19575774669647217, "learning_rate": 8.077971777086821e-05, "loss": 0.0244, "step": 15782 }, { "epoch": 2.212053258584443, "grad_norm": 0.31075602769851685, "learning_rate": 8.076536713704855e-05, "loss": 0.0345, "step": 15783 }, { "epoch": 2.2121934127540293, "grad_norm": 0.1856757402420044, "learning_rate": 8.075101650322888e-05, "loss": 0.0393, "step": 15784 }, { "epoch": 2.212333566923616, "grad_norm": 0.29064038395881653, "learning_rate": 8.073666586940923e-05, "loss": 0.0327, "step": 15785 }, { "epoch": 2.2124737210932026, "grad_norm": 0.2372765988111496, "learning_rate": 8.072231523558956e-05, "loss": 0.0602, "step": 15786 }, { "epoch": 2.212613875262789, "grad_norm": 0.16154570877552032, "learning_rate": 8.07079646017699e-05, "loss": 0.0359, "step": 15787 }, { "epoch": 2.2127540294323755, "grad_norm": 0.13458600640296936, "learning_rate": 8.069361396795025e-05, "loss": 0.0081, "step": 15788 }, { "epoch": 2.212894183601962, "grad_norm": 0.16281646490097046, "learning_rate": 8.067926333413057e-05, "loss": 0.0257, "step": 15789 }, { "epoch": 2.213034337771549, "grad_norm": 0.2414088100194931, "learning_rate": 8.066491270031093e-05, "loss": 0.0286, "step": 15790 }, { "epoch": 2.2131744919411354, "grad_norm": 0.11083386093378067, "learning_rate": 8.065056206649127e-05, "loss": 0.0092, "step": 15791 }, { "epoch": 2.213314646110722, "grad_norm": 0.26579955220222473, "learning_rate": 8.06362114326716e-05, "loss": 0.0507, "step": 15792 }, { "epoch": 2.2134548002803083, "grad_norm": 0.20172646641731262, "learning_rate": 8.062186079885194e-05, "loss": 0.0143, "step": 15793 }, { "epoch": 2.2135949544498947, "grad_norm": 0.24762548506259918, "learning_rate": 8.060751016503227e-05, "loss": 0.0601, "step": 15794 }, { "epoch": 2.213735108619481, "grad_norm": 0.3366411626338959, "learning_rate": 8.059315953121261e-05, "loss": 0.0344, "step": 15795 }, { "epoch": 2.213875262789068, "grad_norm": 0.26526540517807007, "learning_rate": 8.057880889739297e-05, "loss": 0.0535, "step": 15796 }, { "epoch": 2.2140154169586546, "grad_norm": 0.22602668404579163, "learning_rate": 8.05644582635733e-05, "loss": 0.0369, "step": 15797 }, { "epoch": 2.214155571128241, "grad_norm": 0.4422999620437622, "learning_rate": 8.055010762975364e-05, "loss": 0.0552, "step": 15798 }, { "epoch": 2.2142957252978275, "grad_norm": 0.2021181881427765, "learning_rate": 8.053575699593398e-05, "loss": 0.0341, "step": 15799 }, { "epoch": 2.2144358794674144, "grad_norm": 0.23665854334831238, "learning_rate": 8.052140636211431e-05, "loss": 0.0544, "step": 15800 }, { "epoch": 2.214576033637001, "grad_norm": 0.3430425822734833, "learning_rate": 8.050705572829467e-05, "loss": 0.0386, "step": 15801 }, { "epoch": 2.2147161878065873, "grad_norm": 0.14774034917354584, "learning_rate": 8.0492705094475e-05, "loss": 0.0317, "step": 15802 }, { "epoch": 2.2148563419761738, "grad_norm": 0.08194131404161453, "learning_rate": 8.047835446065534e-05, "loss": 0.0067, "step": 15803 }, { "epoch": 2.21499649614576, "grad_norm": 0.11670569330453873, "learning_rate": 8.046400382683568e-05, "loss": 0.0069, "step": 15804 }, { "epoch": 2.2151366503153467, "grad_norm": 0.39882898330688477, "learning_rate": 8.044965319301601e-05, "loss": 0.0218, "step": 15805 }, { "epoch": 2.2152768044849336, "grad_norm": 0.16679386794567108, "learning_rate": 8.043530255919636e-05, "loss": 0.0117, "step": 15806 }, { "epoch": 2.21541695865452, "grad_norm": 0.8383575677871704, "learning_rate": 8.04209519253767e-05, "loss": 0.0658, "step": 15807 }, { "epoch": 2.2155571128241065, "grad_norm": 0.11720184236764908, "learning_rate": 8.040660129155703e-05, "loss": 0.0085, "step": 15808 }, { "epoch": 2.215697266993693, "grad_norm": 0.17931196093559265, "learning_rate": 8.039225065773738e-05, "loss": 0.0147, "step": 15809 }, { "epoch": 2.2158374211632794, "grad_norm": 0.06549227982759476, "learning_rate": 8.037790002391772e-05, "loss": 0.0057, "step": 15810 }, { "epoch": 2.2159775753328663, "grad_norm": 0.08017376810312271, "learning_rate": 8.036354939009805e-05, "loss": 0.0174, "step": 15811 }, { "epoch": 2.216117729502453, "grad_norm": 0.036735810339450836, "learning_rate": 8.03491987562784e-05, "loss": 0.0031, "step": 15812 }, { "epoch": 2.2162578836720392, "grad_norm": 0.6916406750679016, "learning_rate": 8.033484812245873e-05, "loss": 0.0385, "step": 15813 }, { "epoch": 2.2163980378416257, "grad_norm": 0.3367035388946533, "learning_rate": 8.032049748863907e-05, "loss": 0.0082, "step": 15814 }, { "epoch": 2.216538192011212, "grad_norm": 0.19987551867961884, "learning_rate": 8.030614685481942e-05, "loss": 0.0185, "step": 15815 }, { "epoch": 2.216678346180799, "grad_norm": 0.30841147899627686, "learning_rate": 8.029179622099974e-05, "loss": 0.024, "step": 15816 }, { "epoch": 2.2168185003503855, "grad_norm": 4.956757068634033, "learning_rate": 8.02774455871801e-05, "loss": 0.2366, "step": 15817 }, { "epoch": 2.216958654519972, "grad_norm": 0.4238620400428772, "learning_rate": 8.026309495336044e-05, "loss": 0.0473, "step": 15818 }, { "epoch": 2.2170988086895584, "grad_norm": 0.304696649312973, "learning_rate": 8.024874431954077e-05, "loss": 0.0131, "step": 15819 }, { "epoch": 2.217238962859145, "grad_norm": 1.752905011177063, "learning_rate": 8.023439368572111e-05, "loss": 0.1664, "step": 15820 }, { "epoch": 2.217379117028732, "grad_norm": 0.26391544938087463, "learning_rate": 8.022004305190144e-05, "loss": 0.0296, "step": 15821 }, { "epoch": 2.2175192711983183, "grad_norm": 0.3294784128665924, "learning_rate": 8.02056924180818e-05, "loss": 0.0588, "step": 15822 }, { "epoch": 2.2176594253679047, "grad_norm": 0.7349872589111328, "learning_rate": 8.019134178426214e-05, "loss": 0.0425, "step": 15823 }, { "epoch": 2.217799579537491, "grad_norm": 0.1430329531431198, "learning_rate": 8.017699115044247e-05, "loss": 0.0083, "step": 15824 }, { "epoch": 2.2179397337070776, "grad_norm": 0.3361467719078064, "learning_rate": 8.016264051662281e-05, "loss": 0.0264, "step": 15825 }, { "epoch": 2.218079887876664, "grad_norm": 0.21544459462165833, "learning_rate": 8.014828988280315e-05, "loss": 0.0619, "step": 15826 }, { "epoch": 2.218220042046251, "grad_norm": 0.16525933146476746, "learning_rate": 8.013393924898348e-05, "loss": 0.0151, "step": 15827 }, { "epoch": 2.2183601962158375, "grad_norm": 0.15224115550518036, "learning_rate": 8.011958861516384e-05, "loss": 0.0171, "step": 15828 }, { "epoch": 2.218500350385424, "grad_norm": 0.39868131279945374, "learning_rate": 8.010523798134416e-05, "loss": 0.0486, "step": 15829 }, { "epoch": 2.2186405045550104, "grad_norm": 0.4792543947696686, "learning_rate": 8.00908873475245e-05, "loss": 0.0305, "step": 15830 }, { "epoch": 2.2187806587245973, "grad_norm": 0.14432522654533386, "learning_rate": 8.007653671370485e-05, "loss": 0.0398, "step": 15831 }, { "epoch": 2.2189208128941837, "grad_norm": 0.3087961971759796, "learning_rate": 8.006218607988518e-05, "loss": 0.0279, "step": 15832 }, { "epoch": 2.21906096706377, "grad_norm": 0.10566693544387817, "learning_rate": 8.004783544606553e-05, "loss": 0.0084, "step": 15833 }, { "epoch": 2.2192011212333567, "grad_norm": 0.348856121301651, "learning_rate": 8.003348481224587e-05, "loss": 0.0187, "step": 15834 }, { "epoch": 2.219341275402943, "grad_norm": 0.07867445796728134, "learning_rate": 8.00191341784262e-05, "loss": 0.0112, "step": 15835 }, { "epoch": 2.2194814295725296, "grad_norm": 0.4268884062767029, "learning_rate": 8.000478354460655e-05, "loss": 0.069, "step": 15836 }, { "epoch": 2.2196215837421165, "grad_norm": 0.1312306672334671, "learning_rate": 7.999043291078687e-05, "loss": 0.024, "step": 15837 }, { "epoch": 2.219761737911703, "grad_norm": 0.13145339488983154, "learning_rate": 7.997608227696723e-05, "loss": 0.0108, "step": 15838 }, { "epoch": 2.2199018920812894, "grad_norm": 0.38580700755119324, "learning_rate": 7.996173164314757e-05, "loss": 0.0387, "step": 15839 }, { "epoch": 2.220042046250876, "grad_norm": 0.2187608778476715, "learning_rate": 7.99473810093279e-05, "loss": 0.0289, "step": 15840 }, { "epoch": 2.2201822004204623, "grad_norm": 0.4201809763908386, "learning_rate": 7.993303037550824e-05, "loss": 0.0349, "step": 15841 }, { "epoch": 2.2203223545900492, "grad_norm": 0.13052545487880707, "learning_rate": 7.991867974168858e-05, "loss": 0.0141, "step": 15842 }, { "epoch": 2.2204625087596357, "grad_norm": 0.25117334723472595, "learning_rate": 7.990432910786891e-05, "loss": 0.0269, "step": 15843 }, { "epoch": 2.220602662929222, "grad_norm": 0.17811857163906097, "learning_rate": 7.988997847404927e-05, "loss": 0.0431, "step": 15844 }, { "epoch": 2.2207428170988086, "grad_norm": 0.11061608046293259, "learning_rate": 7.987562784022961e-05, "loss": 0.006, "step": 15845 }, { "epoch": 2.220882971268395, "grad_norm": 0.13096849620342255, "learning_rate": 7.986127720640994e-05, "loss": 0.0135, "step": 15846 }, { "epoch": 2.221023125437982, "grad_norm": 0.1101568415760994, "learning_rate": 7.984692657259028e-05, "loss": 0.0121, "step": 15847 }, { "epoch": 2.2211632796075684, "grad_norm": 0.21730497479438782, "learning_rate": 7.983257593877061e-05, "loss": 0.0318, "step": 15848 }, { "epoch": 2.221303433777155, "grad_norm": 0.23728030920028687, "learning_rate": 7.981822530495097e-05, "loss": 0.0437, "step": 15849 }, { "epoch": 2.2214435879467413, "grad_norm": 0.30696719884872437, "learning_rate": 7.980387467113131e-05, "loss": 0.0508, "step": 15850 }, { "epoch": 2.221583742116328, "grad_norm": 0.06760215014219284, "learning_rate": 7.978952403731164e-05, "loss": 0.0059, "step": 15851 }, { "epoch": 2.2217238962859147, "grad_norm": 0.1362176388502121, "learning_rate": 7.977517340349198e-05, "loss": 0.0134, "step": 15852 }, { "epoch": 2.221864050455501, "grad_norm": 0.18390010297298431, "learning_rate": 7.976082276967233e-05, "loss": 0.0084, "step": 15853 }, { "epoch": 2.2220042046250876, "grad_norm": 0.17166583240032196, "learning_rate": 7.974647213585266e-05, "loss": 0.0114, "step": 15854 }, { "epoch": 2.222144358794674, "grad_norm": 0.0382232666015625, "learning_rate": 7.9732121502033e-05, "loss": 0.0043, "step": 15855 }, { "epoch": 2.2222845129642605, "grad_norm": 0.2232348918914795, "learning_rate": 7.971777086821333e-05, "loss": 0.0189, "step": 15856 }, { "epoch": 2.222424667133847, "grad_norm": 0.5808573365211487, "learning_rate": 7.970342023439368e-05, "loss": 0.0944, "step": 15857 }, { "epoch": 2.222564821303434, "grad_norm": 0.1688917577266693, "learning_rate": 7.968906960057402e-05, "loss": 0.0159, "step": 15858 }, { "epoch": 2.2227049754730204, "grad_norm": 0.22208845615386963, "learning_rate": 7.967471896675435e-05, "loss": 0.0186, "step": 15859 }, { "epoch": 2.222845129642607, "grad_norm": 0.30864429473876953, "learning_rate": 7.96603683329347e-05, "loss": 0.0315, "step": 15860 }, { "epoch": 2.2229852838121933, "grad_norm": 0.19438521564006805, "learning_rate": 7.964601769911504e-05, "loss": 0.0119, "step": 15861 }, { "epoch": 2.2231254379817798, "grad_norm": 0.396528422832489, "learning_rate": 7.963166706529537e-05, "loss": 0.02, "step": 15862 }, { "epoch": 2.2232655921513667, "grad_norm": 0.14854362607002258, "learning_rate": 7.961731643147571e-05, "loss": 0.0281, "step": 15863 }, { "epoch": 2.223405746320953, "grad_norm": 0.12330492585897446, "learning_rate": 7.960296579765604e-05, "loss": 0.0104, "step": 15864 }, { "epoch": 2.2235459004905396, "grad_norm": 0.8328088521957397, "learning_rate": 7.95886151638364e-05, "loss": 0.0498, "step": 15865 }, { "epoch": 2.223686054660126, "grad_norm": 0.2999665439128876, "learning_rate": 7.957426453001674e-05, "loss": 0.0348, "step": 15866 }, { "epoch": 2.2238262088297125, "grad_norm": 0.47874000668525696, "learning_rate": 7.955991389619707e-05, "loss": 0.074, "step": 15867 }, { "epoch": 2.2239663629992994, "grad_norm": 0.06939361244440079, "learning_rate": 7.954556326237741e-05, "loss": 0.0056, "step": 15868 }, { "epoch": 2.224106517168886, "grad_norm": 0.11657527089118958, "learning_rate": 7.953121262855777e-05, "loss": 0.008, "step": 15869 }, { "epoch": 2.2242466713384723, "grad_norm": 0.1638582944869995, "learning_rate": 7.95168619947381e-05, "loss": 0.0046, "step": 15870 }, { "epoch": 2.2243868255080588, "grad_norm": 0.20034876465797424, "learning_rate": 7.950251136091844e-05, "loss": 0.0532, "step": 15871 }, { "epoch": 2.2245269796776452, "grad_norm": 0.2525147497653961, "learning_rate": 7.948816072709877e-05, "loss": 0.0242, "step": 15872 }, { "epoch": 2.224667133847232, "grad_norm": 0.29534441232681274, "learning_rate": 7.947381009327911e-05, "loss": 0.0933, "step": 15873 }, { "epoch": 2.2248072880168186, "grad_norm": 0.4660206139087677, "learning_rate": 7.945945945945945e-05, "loss": 0.0504, "step": 15874 }, { "epoch": 2.224947442186405, "grad_norm": 0.15314942598342896, "learning_rate": 7.944510882563978e-05, "loss": 0.0432, "step": 15875 }, { "epoch": 2.2250875963559915, "grad_norm": 0.1253141462802887, "learning_rate": 7.943075819182014e-05, "loss": 0.0149, "step": 15876 }, { "epoch": 2.225227750525578, "grad_norm": 0.3300476372241974, "learning_rate": 7.941640755800048e-05, "loss": 0.0387, "step": 15877 }, { "epoch": 2.225367904695165, "grad_norm": 0.19265851378440857, "learning_rate": 7.94020569241808e-05, "loss": 0.0321, "step": 15878 }, { "epoch": 2.2255080588647513, "grad_norm": 0.1389550119638443, "learning_rate": 7.938770629036115e-05, "loss": 0.0119, "step": 15879 }, { "epoch": 2.225648213034338, "grad_norm": 0.06762897223234177, "learning_rate": 7.93733556565415e-05, "loss": 0.0057, "step": 15880 }, { "epoch": 2.2257883672039243, "grad_norm": 0.367824912071228, "learning_rate": 7.935900502272183e-05, "loss": 0.0511, "step": 15881 }, { "epoch": 2.2259285213735107, "grad_norm": 0.1511528044939041, "learning_rate": 7.934465438890217e-05, "loss": 0.0191, "step": 15882 }, { "epoch": 2.2260686755430976, "grad_norm": 0.2962946593761444, "learning_rate": 7.93303037550825e-05, "loss": 0.0525, "step": 15883 }, { "epoch": 2.226208829712684, "grad_norm": 0.15286998450756073, "learning_rate": 7.931595312126285e-05, "loss": 0.0134, "step": 15884 }, { "epoch": 2.2263489838822705, "grad_norm": 0.2257305085659027, "learning_rate": 7.93016024874432e-05, "loss": 0.0209, "step": 15885 }, { "epoch": 2.226489138051857, "grad_norm": 0.498569130897522, "learning_rate": 7.928725185362353e-05, "loss": 0.0445, "step": 15886 }, { "epoch": 2.2266292922214435, "grad_norm": 0.2954264283180237, "learning_rate": 7.927290121980387e-05, "loss": 0.0221, "step": 15887 }, { "epoch": 2.22676944639103, "grad_norm": 0.06533745676279068, "learning_rate": 7.925855058598421e-05, "loss": 0.0088, "step": 15888 }, { "epoch": 2.226909600560617, "grad_norm": 0.11143027245998383, "learning_rate": 7.924419995216454e-05, "loss": 0.0295, "step": 15889 }, { "epoch": 2.2270497547302033, "grad_norm": 0.3494460880756378, "learning_rate": 7.922984931834488e-05, "loss": 0.0806, "step": 15890 }, { "epoch": 2.2271899088997897, "grad_norm": 0.19088922441005707, "learning_rate": 7.921549868452521e-05, "loss": 0.0514, "step": 15891 }, { "epoch": 2.227330063069376, "grad_norm": 0.6574686765670776, "learning_rate": 7.920114805070557e-05, "loss": 0.081, "step": 15892 }, { "epoch": 2.2274702172389627, "grad_norm": 0.16093890368938446, "learning_rate": 7.918679741688591e-05, "loss": 0.0206, "step": 15893 }, { "epoch": 2.2276103714085496, "grad_norm": 0.07079556584358215, "learning_rate": 7.917244678306624e-05, "loss": 0.005, "step": 15894 }, { "epoch": 2.227750525578136, "grad_norm": 0.24389594793319702, "learning_rate": 7.915809614924658e-05, "loss": 0.0214, "step": 15895 }, { "epoch": 2.2278906797477225, "grad_norm": 0.230647012591362, "learning_rate": 7.914374551542694e-05, "loss": 0.0759, "step": 15896 }, { "epoch": 2.228030833917309, "grad_norm": 0.22221429646015167, "learning_rate": 7.912939488160727e-05, "loss": 0.0466, "step": 15897 }, { "epoch": 2.2281709880868954, "grad_norm": 0.2491951584815979, "learning_rate": 7.911504424778761e-05, "loss": 0.0463, "step": 15898 }, { "epoch": 2.2283111422564823, "grad_norm": 0.14288195967674255, "learning_rate": 7.910069361396794e-05, "loss": 0.0075, "step": 15899 }, { "epoch": 2.2284512964260688, "grad_norm": 0.1017732098698616, "learning_rate": 7.908634298014828e-05, "loss": 0.0084, "step": 15900 }, { "epoch": 2.228591450595655, "grad_norm": 0.368023544549942, "learning_rate": 7.907199234632863e-05, "loss": 0.0691, "step": 15901 }, { "epoch": 2.2287316047652417, "grad_norm": 0.06449741870164871, "learning_rate": 7.905764171250896e-05, "loss": 0.0132, "step": 15902 }, { "epoch": 2.228871758934828, "grad_norm": 0.22720414400100708, "learning_rate": 7.90432910786893e-05, "loss": 0.05, "step": 15903 }, { "epoch": 2.229011913104415, "grad_norm": 0.49815794825553894, "learning_rate": 7.902894044486965e-05, "loss": 0.0426, "step": 15904 }, { "epoch": 2.2291520672740015, "grad_norm": 0.21913887560367584, "learning_rate": 7.901458981104998e-05, "loss": 0.0913, "step": 15905 }, { "epoch": 2.229292221443588, "grad_norm": 0.21141712367534637, "learning_rate": 7.900023917723032e-05, "loss": 0.0543, "step": 15906 }, { "epoch": 2.2294323756131744, "grad_norm": 0.6616555452346802, "learning_rate": 7.898588854341065e-05, "loss": 0.0465, "step": 15907 }, { "epoch": 2.229572529782761, "grad_norm": 0.10346011072397232, "learning_rate": 7.8971537909591e-05, "loss": 0.0096, "step": 15908 }, { "epoch": 2.229712683952348, "grad_norm": 0.22836457192897797, "learning_rate": 7.895718727577134e-05, "loss": 0.0279, "step": 15909 }, { "epoch": 2.2298528381219342, "grad_norm": 0.2049017697572708, "learning_rate": 7.894283664195167e-05, "loss": 0.0188, "step": 15910 }, { "epoch": 2.2299929922915207, "grad_norm": 0.273671954870224, "learning_rate": 7.892848600813201e-05, "loss": 0.0312, "step": 15911 }, { "epoch": 2.230133146461107, "grad_norm": 0.32447123527526855, "learning_rate": 7.891413537431237e-05, "loss": 0.0443, "step": 15912 }, { "epoch": 2.2302733006306936, "grad_norm": 0.21466729044914246, "learning_rate": 7.88997847404927e-05, "loss": 0.0458, "step": 15913 }, { "epoch": 2.2304134548002805, "grad_norm": 0.18691864609718323, "learning_rate": 7.888543410667304e-05, "loss": 0.0153, "step": 15914 }, { "epoch": 2.230553608969867, "grad_norm": 0.32756713032722473, "learning_rate": 7.887108347285337e-05, "loss": 0.0262, "step": 15915 }, { "epoch": 2.2306937631394534, "grad_norm": 0.9787201285362244, "learning_rate": 7.885673283903371e-05, "loss": 0.0381, "step": 15916 }, { "epoch": 2.23083391730904, "grad_norm": 0.7889458537101746, "learning_rate": 7.884238220521407e-05, "loss": 0.0416, "step": 15917 }, { "epoch": 2.2309740714786264, "grad_norm": 0.6555795073509216, "learning_rate": 7.88280315713944e-05, "loss": 0.0272, "step": 15918 }, { "epoch": 2.231114225648213, "grad_norm": 0.9492841958999634, "learning_rate": 7.881368093757474e-05, "loss": 0.0525, "step": 15919 }, { "epoch": 2.2312543798177997, "grad_norm": 1.0203248262405396, "learning_rate": 7.879933030375508e-05, "loss": 0.0271, "step": 15920 }, { "epoch": 2.231394533987386, "grad_norm": 0.10743717849254608, "learning_rate": 7.878497966993541e-05, "loss": 0.0213, "step": 15921 }, { "epoch": 2.2315346881569726, "grad_norm": 0.13191774487495422, "learning_rate": 7.877062903611575e-05, "loss": 0.0105, "step": 15922 }, { "epoch": 2.231674842326559, "grad_norm": 0.1725078821182251, "learning_rate": 7.87562784022961e-05, "loss": 0.0367, "step": 15923 }, { "epoch": 2.2318149964961456, "grad_norm": 0.1473742574453354, "learning_rate": 7.874192776847643e-05, "loss": 0.0163, "step": 15924 }, { "epoch": 2.2319551506657325, "grad_norm": 0.20539367198944092, "learning_rate": 7.872757713465678e-05, "loss": 0.053, "step": 15925 }, { "epoch": 2.232095304835319, "grad_norm": 0.21103744208812714, "learning_rate": 7.87132265008371e-05, "loss": 0.0108, "step": 15926 }, { "epoch": 2.2322354590049054, "grad_norm": 0.14602236449718475, "learning_rate": 7.869887586701745e-05, "loss": 0.0106, "step": 15927 }, { "epoch": 2.232375613174492, "grad_norm": 0.1512695848941803, "learning_rate": 7.86845252331978e-05, "loss": 0.0145, "step": 15928 }, { "epoch": 2.2325157673440783, "grad_norm": 0.22045272588729858, "learning_rate": 7.867017459937813e-05, "loss": 0.0213, "step": 15929 }, { "epoch": 2.232655921513665, "grad_norm": 0.2753868103027344, "learning_rate": 7.865582396555847e-05, "loss": 0.0419, "step": 15930 }, { "epoch": 2.2327960756832517, "grad_norm": 0.19521088898181915, "learning_rate": 7.864147333173882e-05, "loss": 0.0236, "step": 15931 }, { "epoch": 2.232936229852838, "grad_norm": 0.3316512405872345, "learning_rate": 7.862712269791914e-05, "loss": 0.052, "step": 15932 }, { "epoch": 2.2330763840224246, "grad_norm": 0.20682649314403534, "learning_rate": 7.86127720640995e-05, "loss": 0.0182, "step": 15933 }, { "epoch": 2.233216538192011, "grad_norm": 0.08356979489326477, "learning_rate": 7.859842143027983e-05, "loss": 0.0101, "step": 15934 }, { "epoch": 2.233356692361598, "grad_norm": 0.07834531366825104, "learning_rate": 7.858407079646017e-05, "loss": 0.0082, "step": 15935 }, { "epoch": 2.2334968465311844, "grad_norm": 0.17835354804992676, "learning_rate": 7.856972016264051e-05, "loss": 0.0302, "step": 15936 }, { "epoch": 2.233637000700771, "grad_norm": 0.3725316822528839, "learning_rate": 7.855536952882084e-05, "loss": 0.0324, "step": 15937 }, { "epoch": 2.2337771548703573, "grad_norm": 0.09457622468471527, "learning_rate": 7.854101889500118e-05, "loss": 0.0156, "step": 15938 }, { "epoch": 2.233917309039944, "grad_norm": 0.217263862490654, "learning_rate": 7.852666826118154e-05, "loss": 0.0171, "step": 15939 }, { "epoch": 2.2340574632095302, "grad_norm": 0.2612225115299225, "learning_rate": 7.851231762736187e-05, "loss": 0.0466, "step": 15940 }, { "epoch": 2.234197617379117, "grad_norm": 0.34249433875083923, "learning_rate": 7.849796699354221e-05, "loss": 0.0455, "step": 15941 }, { "epoch": 2.2343377715487036, "grad_norm": 0.08639832586050034, "learning_rate": 7.848361635972254e-05, "loss": 0.0148, "step": 15942 }, { "epoch": 2.23447792571829, "grad_norm": 0.23127956688404083, "learning_rate": 7.846926572590288e-05, "loss": 0.0462, "step": 15943 }, { "epoch": 2.2346180798878765, "grad_norm": 0.48839306831359863, "learning_rate": 7.845491509208324e-05, "loss": 0.0429, "step": 15944 }, { "epoch": 2.2347582340574634, "grad_norm": 0.35168054699897766, "learning_rate": 7.844056445826356e-05, "loss": 0.0487, "step": 15945 }, { "epoch": 2.23489838822705, "grad_norm": 0.2229284942150116, "learning_rate": 7.842621382444391e-05, "loss": 0.034, "step": 15946 }, { "epoch": 2.2350385423966364, "grad_norm": 0.07867902517318726, "learning_rate": 7.841186319062425e-05, "loss": 0.0072, "step": 15947 }, { "epoch": 2.235178696566223, "grad_norm": 0.1597619652748108, "learning_rate": 7.839751255680458e-05, "loss": 0.0238, "step": 15948 }, { "epoch": 2.2353188507358093, "grad_norm": 0.1586986631155014, "learning_rate": 7.838316192298493e-05, "loss": 0.0116, "step": 15949 }, { "epoch": 2.2354590049053957, "grad_norm": 0.4441637694835663, "learning_rate": 7.836881128916526e-05, "loss": 0.0452, "step": 15950 }, { "epoch": 2.2355991590749826, "grad_norm": 0.21711398661136627, "learning_rate": 7.83544606553456e-05, "loss": 0.0362, "step": 15951 }, { "epoch": 2.235739313244569, "grad_norm": 0.09045593440532684, "learning_rate": 7.834011002152595e-05, "loss": 0.0063, "step": 15952 }, { "epoch": 2.2358794674141556, "grad_norm": 0.08355707675218582, "learning_rate": 7.832575938770627e-05, "loss": 0.0126, "step": 15953 }, { "epoch": 2.236019621583742, "grad_norm": 0.08078756183385849, "learning_rate": 7.831140875388662e-05, "loss": 0.0074, "step": 15954 }, { "epoch": 2.2361597757533285, "grad_norm": 0.12156230956315994, "learning_rate": 7.829705812006697e-05, "loss": 0.0197, "step": 15955 }, { "epoch": 2.2362999299229154, "grad_norm": 0.2588076591491699, "learning_rate": 7.82827074862473e-05, "loss": 0.0686, "step": 15956 }, { "epoch": 2.236440084092502, "grad_norm": 0.2731691002845764, "learning_rate": 7.826835685242764e-05, "loss": 0.0842, "step": 15957 }, { "epoch": 2.2365802382620883, "grad_norm": 0.2036910355091095, "learning_rate": 7.825400621860799e-05, "loss": 0.0341, "step": 15958 }, { "epoch": 2.2367203924316748, "grad_norm": 0.16906343400478363, "learning_rate": 7.823965558478831e-05, "loss": 0.0249, "step": 15959 }, { "epoch": 2.236860546601261, "grad_norm": 0.8929670453071594, "learning_rate": 7.822530495096867e-05, "loss": 0.0885, "step": 15960 }, { "epoch": 2.237000700770848, "grad_norm": 0.32786768674850464, "learning_rate": 7.8210954317149e-05, "loss": 0.0584, "step": 15961 }, { "epoch": 2.2371408549404346, "grad_norm": 0.9510080814361572, "learning_rate": 7.819660368332934e-05, "loss": 0.0295, "step": 15962 }, { "epoch": 2.237281009110021, "grad_norm": 0.543478786945343, "learning_rate": 7.818225304950968e-05, "loss": 0.0879, "step": 15963 }, { "epoch": 2.2374211632796075, "grad_norm": 0.34720319509506226, "learning_rate": 7.816790241569001e-05, "loss": 0.1185, "step": 15964 }, { "epoch": 2.237561317449194, "grad_norm": 0.5739341974258423, "learning_rate": 7.815355178187037e-05, "loss": 0.0742, "step": 15965 }, { "epoch": 2.237701471618781, "grad_norm": 0.8970751762390137, "learning_rate": 7.813920114805071e-05, "loss": 0.2244, "step": 15966 }, { "epoch": 2.2378416257883673, "grad_norm": 0.23464936017990112, "learning_rate": 7.812485051423104e-05, "loss": 0.0156, "step": 15967 }, { "epoch": 2.2379817799579538, "grad_norm": 0.13832317292690277, "learning_rate": 7.811049988041138e-05, "loss": 0.0189, "step": 15968 }, { "epoch": 2.2381219341275402, "grad_norm": 0.24305181205272675, "learning_rate": 7.809614924659171e-05, "loss": 0.0141, "step": 15969 }, { "epoch": 2.2382620882971267, "grad_norm": 0.18162932991981506, "learning_rate": 7.808179861277205e-05, "loss": 0.013, "step": 15970 }, { "epoch": 2.238402242466713, "grad_norm": 0.11373086273670197, "learning_rate": 7.80674479789524e-05, "loss": 0.0146, "step": 15971 }, { "epoch": 2.2385423966363, "grad_norm": 0.17261861264705658, "learning_rate": 7.805309734513273e-05, "loss": 0.0086, "step": 15972 }, { "epoch": 2.2386825508058865, "grad_norm": 0.07054518163204193, "learning_rate": 7.803874671131308e-05, "loss": 0.0047, "step": 15973 }, { "epoch": 2.238822704975473, "grad_norm": 0.16624481976032257, "learning_rate": 7.802439607749342e-05, "loss": 0.0372, "step": 15974 }, { "epoch": 2.2389628591450594, "grad_norm": 0.18570061028003693, "learning_rate": 7.801004544367375e-05, "loss": 0.0422, "step": 15975 }, { "epoch": 2.2391030133146463, "grad_norm": 0.1790638267993927, "learning_rate": 7.79956948098541e-05, "loss": 0.0153, "step": 15976 }, { "epoch": 2.239243167484233, "grad_norm": 0.7532731294631958, "learning_rate": 7.798134417603443e-05, "loss": 0.0849, "step": 15977 }, { "epoch": 2.2393833216538193, "grad_norm": 0.10992532223463058, "learning_rate": 7.796699354221477e-05, "loss": 0.0308, "step": 15978 }, { "epoch": 2.2395234758234057, "grad_norm": 0.3545118570327759, "learning_rate": 7.795264290839512e-05, "loss": 0.0908, "step": 15979 }, { "epoch": 2.239663629992992, "grad_norm": 0.14106042683124542, "learning_rate": 7.793829227457544e-05, "loss": 0.0236, "step": 15980 }, { "epoch": 2.2398037841625786, "grad_norm": 0.243631973862648, "learning_rate": 7.79239416407558e-05, "loss": 0.0381, "step": 15981 }, { "epoch": 2.2399439383321655, "grad_norm": 0.12529917061328888, "learning_rate": 7.790959100693614e-05, "loss": 0.0425, "step": 15982 }, { "epoch": 2.240084092501752, "grad_norm": 0.07214927673339844, "learning_rate": 7.789524037311647e-05, "loss": 0.0078, "step": 15983 }, { "epoch": 2.2402242466713385, "grad_norm": 0.08505829423666, "learning_rate": 7.788088973929681e-05, "loss": 0.0098, "step": 15984 }, { "epoch": 2.240364400840925, "grad_norm": 0.3662014603614807, "learning_rate": 7.786653910547714e-05, "loss": 0.0205, "step": 15985 }, { "epoch": 2.2405045550105114, "grad_norm": 0.24449719488620758, "learning_rate": 7.785218847165748e-05, "loss": 0.0334, "step": 15986 }, { "epoch": 2.2406447091800983, "grad_norm": 0.2470383644104004, "learning_rate": 7.783783783783784e-05, "loss": 0.0375, "step": 15987 }, { "epoch": 2.2407848633496847, "grad_norm": 0.13602149486541748, "learning_rate": 7.782348720401817e-05, "loss": 0.0132, "step": 15988 }, { "epoch": 2.240925017519271, "grad_norm": 0.27616575360298157, "learning_rate": 7.780913657019851e-05, "loss": 0.0423, "step": 15989 }, { "epoch": 2.2410651716888577, "grad_norm": 0.5360847115516663, "learning_rate": 7.779478593637885e-05, "loss": 0.0418, "step": 15990 }, { "epoch": 2.241205325858444, "grad_norm": 0.07039465010166168, "learning_rate": 7.778043530255918e-05, "loss": 0.0066, "step": 15991 }, { "epoch": 2.241345480028031, "grad_norm": 0.16954585909843445, "learning_rate": 7.776608466873954e-05, "loss": 0.0206, "step": 15992 }, { "epoch": 2.2414856341976175, "grad_norm": 0.491550087928772, "learning_rate": 7.775173403491988e-05, "loss": 0.0898, "step": 15993 }, { "epoch": 2.241625788367204, "grad_norm": 0.23907309770584106, "learning_rate": 7.77373834011002e-05, "loss": 0.065, "step": 15994 }, { "epoch": 2.2417659425367904, "grad_norm": 0.20439140498638153, "learning_rate": 7.772303276728055e-05, "loss": 0.0257, "step": 15995 }, { "epoch": 2.241906096706377, "grad_norm": 0.17895087599754333, "learning_rate": 7.770868213346088e-05, "loss": 0.0209, "step": 15996 }, { "epoch": 2.2420462508759638, "grad_norm": 0.2104882299900055, "learning_rate": 7.769433149964123e-05, "loss": 0.0167, "step": 15997 }, { "epoch": 2.2421864050455502, "grad_norm": 0.02608693577349186, "learning_rate": 7.767998086582157e-05, "loss": 0.0032, "step": 15998 }, { "epoch": 2.2423265592151367, "grad_norm": 0.3028233051300049, "learning_rate": 7.76656302320019e-05, "loss": 0.0414, "step": 15999 }, { "epoch": 2.242466713384723, "grad_norm": 0.13197112083435059, "learning_rate": 7.765127959818225e-05, "loss": 0.0323, "step": 16000 }, { "epoch": 2.2426068675543096, "grad_norm": 0.3226237893104553, "learning_rate": 7.763692896436259e-05, "loss": 0.0402, "step": 16001 }, { "epoch": 2.242747021723896, "grad_norm": 0.1994910091161728, "learning_rate": 7.762257833054292e-05, "loss": 0.0211, "step": 16002 }, { "epoch": 2.242887175893483, "grad_norm": 0.16609889268875122, "learning_rate": 7.760822769672327e-05, "loss": 0.0531, "step": 16003 }, { "epoch": 2.2430273300630694, "grad_norm": 0.14180219173431396, "learning_rate": 7.75938770629036e-05, "loss": 0.0312, "step": 16004 }, { "epoch": 2.243167484232656, "grad_norm": 0.055453915148973465, "learning_rate": 7.757952642908394e-05, "loss": 0.0045, "step": 16005 }, { "epoch": 2.2433076384022423, "grad_norm": 0.24018704891204834, "learning_rate": 7.756517579526428e-05, "loss": 0.0192, "step": 16006 }, { "epoch": 2.2434477925718292, "grad_norm": 0.16401568055152893, "learning_rate": 7.755082516144461e-05, "loss": 0.0334, "step": 16007 }, { "epoch": 2.2435879467414157, "grad_norm": 0.05596894398331642, "learning_rate": 7.753647452762497e-05, "loss": 0.0065, "step": 16008 }, { "epoch": 2.243728100911002, "grad_norm": 0.2165966033935547, "learning_rate": 7.752212389380531e-05, "loss": 0.0503, "step": 16009 }, { "epoch": 2.2438682550805886, "grad_norm": 0.2890777885913849, "learning_rate": 7.750777325998564e-05, "loss": 0.0209, "step": 16010 }, { "epoch": 2.244008409250175, "grad_norm": 0.5852856636047363, "learning_rate": 7.749342262616598e-05, "loss": 0.0822, "step": 16011 }, { "epoch": 2.2441485634197615, "grad_norm": 0.2690792381763458, "learning_rate": 7.747907199234631e-05, "loss": 0.0509, "step": 16012 }, { "epoch": 2.2442887175893484, "grad_norm": 0.43139123916625977, "learning_rate": 7.746472135852667e-05, "loss": 0.0388, "step": 16013 }, { "epoch": 2.244428871758935, "grad_norm": 0.309966504573822, "learning_rate": 7.745037072470701e-05, "loss": 0.0397, "step": 16014 }, { "epoch": 2.2445690259285214, "grad_norm": 0.4398465156555176, "learning_rate": 7.743602009088734e-05, "loss": 0.0234, "step": 16015 }, { "epoch": 2.244709180098108, "grad_norm": 0.10380081832408905, "learning_rate": 7.742166945706768e-05, "loss": 0.0176, "step": 16016 }, { "epoch": 2.2448493342676943, "grad_norm": 0.16911178827285767, "learning_rate": 7.740731882324802e-05, "loss": 0.0143, "step": 16017 }, { "epoch": 2.244989488437281, "grad_norm": 0.23687949776649475, "learning_rate": 7.739296818942835e-05, "loss": 0.0065, "step": 16018 }, { "epoch": 2.2451296426068676, "grad_norm": 0.4006432890892029, "learning_rate": 7.73786175556087e-05, "loss": 0.0238, "step": 16019 }, { "epoch": 2.245269796776454, "grad_norm": 3.2815606594085693, "learning_rate": 7.736426692178903e-05, "loss": 0.1635, "step": 16020 }, { "epoch": 2.2454099509460406, "grad_norm": 0.18410198390483856, "learning_rate": 7.734991628796938e-05, "loss": 0.0163, "step": 16021 }, { "epoch": 2.245550105115627, "grad_norm": 0.27747076749801636, "learning_rate": 7.733556565414972e-05, "loss": 0.0347, "step": 16022 }, { "epoch": 2.245690259285214, "grad_norm": 0.15299734473228455, "learning_rate": 7.732121502033005e-05, "loss": 0.0177, "step": 16023 }, { "epoch": 2.2458304134548004, "grad_norm": 0.21046869456768036, "learning_rate": 7.73068643865104e-05, "loss": 0.016, "step": 16024 }, { "epoch": 2.245970567624387, "grad_norm": 0.23049715161323547, "learning_rate": 7.729251375269074e-05, "loss": 0.0358, "step": 16025 }, { "epoch": 2.2461107217939733, "grad_norm": 0.06531044840812683, "learning_rate": 7.727816311887107e-05, "loss": 0.0071, "step": 16026 }, { "epoch": 2.2462508759635598, "grad_norm": 0.1953677088022232, "learning_rate": 7.726381248505141e-05, "loss": 0.0215, "step": 16027 }, { "epoch": 2.2463910301331467, "grad_norm": 0.09398777037858963, "learning_rate": 7.724946185123177e-05, "loss": 0.0127, "step": 16028 }, { "epoch": 2.246531184302733, "grad_norm": 0.09099038690328598, "learning_rate": 7.72351112174121e-05, "loss": 0.0115, "step": 16029 }, { "epoch": 2.2466713384723196, "grad_norm": 0.05430116131901741, "learning_rate": 7.722076058359244e-05, "loss": 0.0043, "step": 16030 }, { "epoch": 2.246811492641906, "grad_norm": 0.2957955002784729, "learning_rate": 7.720640994977277e-05, "loss": 0.0087, "step": 16031 }, { "epoch": 2.2469516468114925, "grad_norm": 0.2726699411869049, "learning_rate": 7.719205931595311e-05, "loss": 0.0725, "step": 16032 }, { "epoch": 2.247091800981079, "grad_norm": 0.09464208781719208, "learning_rate": 7.717770868213345e-05, "loss": 0.0151, "step": 16033 }, { "epoch": 2.247231955150666, "grad_norm": 0.23228485882282257, "learning_rate": 7.716335804831378e-05, "loss": 0.0347, "step": 16034 }, { "epoch": 2.2473721093202523, "grad_norm": 0.3284781575202942, "learning_rate": 7.714900741449414e-05, "loss": 0.0217, "step": 16035 }, { "epoch": 2.247512263489839, "grad_norm": 0.2675741910934448, "learning_rate": 7.713465678067448e-05, "loss": 0.068, "step": 16036 }, { "epoch": 2.2476524176594253, "grad_norm": 0.18760760128498077, "learning_rate": 7.712030614685481e-05, "loss": 0.0291, "step": 16037 }, { "epoch": 2.2477925718290117, "grad_norm": 0.09009896963834763, "learning_rate": 7.710595551303515e-05, "loss": 0.0098, "step": 16038 }, { "epoch": 2.2479327259985986, "grad_norm": 0.2564639449119568, "learning_rate": 7.709160487921548e-05, "loss": 0.0481, "step": 16039 }, { "epoch": 2.248072880168185, "grad_norm": 0.10603039711713791, "learning_rate": 7.707725424539584e-05, "loss": 0.0073, "step": 16040 }, { "epoch": 2.2482130343377715, "grad_norm": 0.2566912770271301, "learning_rate": 7.706290361157618e-05, "loss": 0.0396, "step": 16041 }, { "epoch": 2.248353188507358, "grad_norm": 0.17767763137817383, "learning_rate": 7.70485529777565e-05, "loss": 0.0147, "step": 16042 }, { "epoch": 2.2484933426769445, "grad_norm": 0.20038369297981262, "learning_rate": 7.703420234393685e-05, "loss": 0.0529, "step": 16043 }, { "epoch": 2.2486334968465314, "grad_norm": 0.7695234417915344, "learning_rate": 7.70198517101172e-05, "loss": 0.0411, "step": 16044 }, { "epoch": 2.248773651016118, "grad_norm": 0.2092326432466507, "learning_rate": 7.700550107629753e-05, "loss": 0.0088, "step": 16045 }, { "epoch": 2.2489138051857043, "grad_norm": 0.14405342936515808, "learning_rate": 7.699115044247787e-05, "loss": 0.0194, "step": 16046 }, { "epoch": 2.2490539593552907, "grad_norm": 0.22511768341064453, "learning_rate": 7.69767998086582e-05, "loss": 0.0166, "step": 16047 }, { "epoch": 2.249194113524877, "grad_norm": 0.10174588114023209, "learning_rate": 7.696244917483854e-05, "loss": 0.0174, "step": 16048 }, { "epoch": 2.249334267694464, "grad_norm": 0.2544974088668823, "learning_rate": 7.694809854101889e-05, "loss": 0.0274, "step": 16049 }, { "epoch": 2.2494744218640506, "grad_norm": 0.47817814350128174, "learning_rate": 7.693374790719923e-05, "loss": 0.0906, "step": 16050 }, { "epoch": 2.249614576033637, "grad_norm": 0.08470392972230911, "learning_rate": 7.691939727337957e-05, "loss": 0.0106, "step": 16051 }, { "epoch": 2.2497547302032235, "grad_norm": 0.16570419073104858, "learning_rate": 7.690504663955991e-05, "loss": 0.0057, "step": 16052 }, { "epoch": 2.24989488437281, "grad_norm": 0.3965643346309662, "learning_rate": 7.689069600574024e-05, "loss": 0.0748, "step": 16053 }, { "epoch": 2.2500350385423964, "grad_norm": 0.1252543330192566, "learning_rate": 7.687634537192058e-05, "loss": 0.0134, "step": 16054 }, { "epoch": 2.2501751927119833, "grad_norm": 0.22512929141521454, "learning_rate": 7.686199473810091e-05, "loss": 0.0176, "step": 16055 }, { "epoch": 2.2503153468815698, "grad_norm": 0.36529284715652466, "learning_rate": 7.684764410428127e-05, "loss": 0.0248, "step": 16056 }, { "epoch": 2.250455501051156, "grad_norm": 0.2669682800769806, "learning_rate": 7.683329347046161e-05, "loss": 0.0144, "step": 16057 }, { "epoch": 2.2505956552207427, "grad_norm": 0.2966409921646118, "learning_rate": 7.681894283664194e-05, "loss": 0.0178, "step": 16058 }, { "epoch": 2.2507358093903296, "grad_norm": 0.34525495767593384, "learning_rate": 7.680459220282228e-05, "loss": 0.0209, "step": 16059 }, { "epoch": 2.250875963559916, "grad_norm": 0.4665195345878601, "learning_rate": 7.679024156900264e-05, "loss": 0.0245, "step": 16060 }, { "epoch": 2.2510161177295025, "grad_norm": 0.10393421351909637, "learning_rate": 7.677589093518297e-05, "loss": 0.0099, "step": 16061 }, { "epoch": 2.251156271899089, "grad_norm": 0.6053932905197144, "learning_rate": 7.676154030136331e-05, "loss": 0.0339, "step": 16062 }, { "epoch": 2.2512964260686754, "grad_norm": 0.25282928347587585, "learning_rate": 7.674718966754364e-05, "loss": 0.0192, "step": 16063 }, { "epoch": 2.251436580238262, "grad_norm": 0.09686914831399918, "learning_rate": 7.673283903372398e-05, "loss": 0.0046, "step": 16064 }, { "epoch": 2.251576734407849, "grad_norm": 0.2713959217071533, "learning_rate": 7.671848839990432e-05, "loss": 0.0351, "step": 16065 }, { "epoch": 2.2517168885774352, "grad_norm": 0.0872144103050232, "learning_rate": 7.670413776608466e-05, "loss": 0.0072, "step": 16066 }, { "epoch": 2.2518570427470217, "grad_norm": 2.0572173595428467, "learning_rate": 7.6689787132265e-05, "loss": 0.1727, "step": 16067 }, { "epoch": 2.251997196916608, "grad_norm": 0.7735179662704468, "learning_rate": 7.667543649844535e-05, "loss": 0.1503, "step": 16068 }, { "epoch": 2.252137351086195, "grad_norm": 0.5997719764709473, "learning_rate": 7.666108586462568e-05, "loss": 0.0158, "step": 16069 }, { "epoch": 2.2522775052557815, "grad_norm": 2.608621597290039, "learning_rate": 7.664673523080602e-05, "loss": 0.2723, "step": 16070 }, { "epoch": 2.252417659425368, "grad_norm": 0.2662675380706787, "learning_rate": 7.663238459698637e-05, "loss": 0.046, "step": 16071 }, { "epoch": 2.2525578135949544, "grad_norm": 0.307176411151886, "learning_rate": 7.66180339631667e-05, "loss": 0.0306, "step": 16072 }, { "epoch": 2.252697967764541, "grad_norm": 0.08651146292686462, "learning_rate": 7.660368332934704e-05, "loss": 0.0125, "step": 16073 }, { "epoch": 2.2528381219341274, "grad_norm": 0.22952255606651306, "learning_rate": 7.658933269552737e-05, "loss": 0.0322, "step": 16074 }, { "epoch": 2.2529782761037143, "grad_norm": 0.21150943636894226, "learning_rate": 7.657498206170771e-05, "loss": 0.0235, "step": 16075 }, { "epoch": 2.2531184302733007, "grad_norm": 0.1866176277399063, "learning_rate": 7.656063142788807e-05, "loss": 0.0192, "step": 16076 }, { "epoch": 2.253258584442887, "grad_norm": 0.2756612300872803, "learning_rate": 7.65462807940684e-05, "loss": 0.0309, "step": 16077 }, { "epoch": 2.2533987386124736, "grad_norm": 0.1412435621023178, "learning_rate": 7.653193016024874e-05, "loss": 0.026, "step": 16078 }, { "epoch": 2.25353889278206, "grad_norm": 0.25307074189186096, "learning_rate": 7.651757952642908e-05, "loss": 0.0254, "step": 16079 }, { "epoch": 2.253679046951647, "grad_norm": 0.1415809541940689, "learning_rate": 7.650322889260941e-05, "loss": 0.0106, "step": 16080 }, { "epoch": 2.2538192011212335, "grad_norm": 0.10863583534955978, "learning_rate": 7.648887825878975e-05, "loss": 0.0085, "step": 16081 }, { "epoch": 2.25395935529082, "grad_norm": 0.4418480396270752, "learning_rate": 7.64745276249701e-05, "loss": 0.0246, "step": 16082 }, { "epoch": 2.2540995094604064, "grad_norm": 0.42418670654296875, "learning_rate": 7.646017699115044e-05, "loss": 0.0483, "step": 16083 }, { "epoch": 2.254239663629993, "grad_norm": 0.14411413669586182, "learning_rate": 7.644582635733078e-05, "loss": 0.0125, "step": 16084 }, { "epoch": 2.2543798177995793, "grad_norm": 0.2587570250034332, "learning_rate": 7.643147572351111e-05, "loss": 0.0237, "step": 16085 }, { "epoch": 2.254519971969166, "grad_norm": 0.2747219204902649, "learning_rate": 7.641712508969145e-05, "loss": 0.0212, "step": 16086 }, { "epoch": 2.2546601261387527, "grad_norm": 0.20177651941776276, "learning_rate": 7.64027744558718e-05, "loss": 0.0405, "step": 16087 }, { "epoch": 2.254800280308339, "grad_norm": 0.29879772663116455, "learning_rate": 7.638842382205213e-05, "loss": 0.0432, "step": 16088 }, { "epoch": 2.2549404344779256, "grad_norm": 0.22972798347473145, "learning_rate": 7.637407318823248e-05, "loss": 0.0171, "step": 16089 }, { "epoch": 2.2550805886475125, "grad_norm": 0.2076868861913681, "learning_rate": 7.63597225544128e-05, "loss": 0.0285, "step": 16090 }, { "epoch": 2.255220742817099, "grad_norm": 0.1993919312953949, "learning_rate": 7.634537192059315e-05, "loss": 0.0241, "step": 16091 }, { "epoch": 2.2553608969866854, "grad_norm": 0.03470282629132271, "learning_rate": 7.63310212867735e-05, "loss": 0.0015, "step": 16092 }, { "epoch": 2.255501051156272, "grad_norm": 0.2611018419265747, "learning_rate": 7.631667065295383e-05, "loss": 0.0267, "step": 16093 }, { "epoch": 2.2556412053258583, "grad_norm": 0.17524974048137665, "learning_rate": 7.630232001913417e-05, "loss": 0.0086, "step": 16094 }, { "epoch": 2.255781359495445, "grad_norm": 0.26347264647483826, "learning_rate": 7.628796938531452e-05, "loss": 0.0216, "step": 16095 }, { "epoch": 2.2559215136650317, "grad_norm": 0.07756421715021133, "learning_rate": 7.627361875149484e-05, "loss": 0.0106, "step": 16096 }, { "epoch": 2.256061667834618, "grad_norm": 0.3247242569923401, "learning_rate": 7.625926811767519e-05, "loss": 0.0172, "step": 16097 }, { "epoch": 2.2562018220042046, "grad_norm": 0.2472013235092163, "learning_rate": 7.624491748385553e-05, "loss": 0.0317, "step": 16098 }, { "epoch": 2.256341976173791, "grad_norm": 0.14462490379810333, "learning_rate": 7.623056685003587e-05, "loss": 0.0346, "step": 16099 }, { "epoch": 2.2564821303433775, "grad_norm": 0.26738011837005615, "learning_rate": 7.621621621621621e-05, "loss": 0.0248, "step": 16100 }, { "epoch": 2.2566222845129644, "grad_norm": 0.5486509203910828, "learning_rate": 7.620186558239654e-05, "loss": 0.1267, "step": 16101 }, { "epoch": 2.256762438682551, "grad_norm": 0.3519798219203949, "learning_rate": 7.618751494857688e-05, "loss": 0.0141, "step": 16102 }, { "epoch": 2.2569025928521373, "grad_norm": 0.5758402943611145, "learning_rate": 7.617316431475724e-05, "loss": 0.0553, "step": 16103 }, { "epoch": 2.257042747021724, "grad_norm": 0.13290636241436005, "learning_rate": 7.615881368093757e-05, "loss": 0.0077, "step": 16104 }, { "epoch": 2.2571829011913103, "grad_norm": 0.16322965919971466, "learning_rate": 7.614446304711791e-05, "loss": 0.0212, "step": 16105 }, { "epoch": 2.257323055360897, "grad_norm": 0.46359366178512573, "learning_rate": 7.613011241329825e-05, "loss": 0.051, "step": 16106 }, { "epoch": 2.2574632095304836, "grad_norm": 0.43224605917930603, "learning_rate": 7.611576177947858e-05, "loss": 0.0202, "step": 16107 }, { "epoch": 2.25760336370007, "grad_norm": 0.04966854304075241, "learning_rate": 7.610141114565894e-05, "loss": 0.0023, "step": 16108 }, { "epoch": 2.2577435178696565, "grad_norm": 0.06300931423902512, "learning_rate": 7.608706051183926e-05, "loss": 0.0038, "step": 16109 }, { "epoch": 2.257883672039243, "grad_norm": 0.18797099590301514, "learning_rate": 7.607270987801961e-05, "loss": 0.0144, "step": 16110 }, { "epoch": 2.25802382620883, "grad_norm": 0.34303364157676697, "learning_rate": 7.605835924419995e-05, "loss": 0.0534, "step": 16111 }, { "epoch": 2.2581639803784164, "grad_norm": 0.24372737109661102, "learning_rate": 7.604400861038028e-05, "loss": 0.0282, "step": 16112 }, { "epoch": 2.258304134548003, "grad_norm": 0.20383049547672272, "learning_rate": 7.602965797656062e-05, "loss": 0.012, "step": 16113 }, { "epoch": 2.2584442887175893, "grad_norm": 0.311422199010849, "learning_rate": 7.601530734274098e-05, "loss": 0.0506, "step": 16114 }, { "epoch": 2.2585844428871757, "grad_norm": 0.15760605037212372, "learning_rate": 7.60009567089213e-05, "loss": 0.0128, "step": 16115 }, { "epoch": 2.258724597056762, "grad_norm": 0.49481332302093506, "learning_rate": 7.598660607510165e-05, "loss": 0.0366, "step": 16116 }, { "epoch": 2.258864751226349, "grad_norm": 0.14541682600975037, "learning_rate": 7.597225544128197e-05, "loss": 0.008, "step": 16117 }, { "epoch": 2.2590049053959356, "grad_norm": 0.47736671566963196, "learning_rate": 7.595790480746232e-05, "loss": 0.0372, "step": 16118 }, { "epoch": 2.259145059565522, "grad_norm": 0.4966481924057007, "learning_rate": 7.594355417364267e-05, "loss": 0.0194, "step": 16119 }, { "epoch": 2.2592852137351085, "grad_norm": 3.5510356426239014, "learning_rate": 7.5929203539823e-05, "loss": 0.1012, "step": 16120 }, { "epoch": 2.2594253679046954, "grad_norm": 0.2877337336540222, "learning_rate": 7.591485290600334e-05, "loss": 0.0432, "step": 16121 }, { "epoch": 2.259565522074282, "grad_norm": 0.27166762948036194, "learning_rate": 7.590050227218369e-05, "loss": 0.0482, "step": 16122 }, { "epoch": 2.2597056762438683, "grad_norm": 0.3315783441066742, "learning_rate": 7.588615163836401e-05, "loss": 0.0573, "step": 16123 }, { "epoch": 2.2598458304134548, "grad_norm": 0.396234929561615, "learning_rate": 7.587180100454437e-05, "loss": 0.0317, "step": 16124 }, { "epoch": 2.2599859845830412, "grad_norm": 0.3639105260372162, "learning_rate": 7.58574503707247e-05, "loss": 0.0306, "step": 16125 }, { "epoch": 2.2601261387526277, "grad_norm": 0.1023806780576706, "learning_rate": 7.584309973690504e-05, "loss": 0.0105, "step": 16126 }, { "epoch": 2.2602662929222146, "grad_norm": 0.25488799810409546, "learning_rate": 7.582874910308538e-05, "loss": 0.0243, "step": 16127 }, { "epoch": 2.260406447091801, "grad_norm": 0.15481606125831604, "learning_rate": 7.581439846926571e-05, "loss": 0.0096, "step": 16128 }, { "epoch": 2.2605466012613875, "grad_norm": 0.18601356446743011, "learning_rate": 7.580004783544605e-05, "loss": 0.0327, "step": 16129 }, { "epoch": 2.260686755430974, "grad_norm": 0.47852209210395813, "learning_rate": 7.578569720162641e-05, "loss": 0.0293, "step": 16130 }, { "epoch": 2.2608269096005604, "grad_norm": 0.19744735956192017, "learning_rate": 7.577134656780674e-05, "loss": 0.0232, "step": 16131 }, { "epoch": 2.2609670637701473, "grad_norm": 0.2642534077167511, "learning_rate": 7.575699593398708e-05, "loss": 0.0123, "step": 16132 }, { "epoch": 2.261107217939734, "grad_norm": 0.3753826320171356, "learning_rate": 7.574264530016741e-05, "loss": 0.0154, "step": 16133 }, { "epoch": 2.2612473721093203, "grad_norm": 0.3004189729690552, "learning_rate": 7.572829466634775e-05, "loss": 0.0578, "step": 16134 }, { "epoch": 2.2613875262789067, "grad_norm": 0.1730934977531433, "learning_rate": 7.57139440325281e-05, "loss": 0.0105, "step": 16135 }, { "epoch": 2.261527680448493, "grad_norm": 0.21417799592018127, "learning_rate": 7.569959339870843e-05, "loss": 0.0193, "step": 16136 }, { "epoch": 2.26166783461808, "grad_norm": 0.1666230410337448, "learning_rate": 7.568524276488878e-05, "loss": 0.0135, "step": 16137 }, { "epoch": 2.2618079887876665, "grad_norm": 0.22300715744495392, "learning_rate": 7.567089213106912e-05, "loss": 0.0158, "step": 16138 }, { "epoch": 2.261948142957253, "grad_norm": 0.08547382801771164, "learning_rate": 7.565654149724945e-05, "loss": 0.0045, "step": 16139 }, { "epoch": 2.2620882971268395, "grad_norm": 0.3082025349140167, "learning_rate": 7.56421908634298e-05, "loss": 0.0749, "step": 16140 }, { "epoch": 2.262228451296426, "grad_norm": 0.2884487807750702, "learning_rate": 7.562784022961014e-05, "loss": 0.0107, "step": 16141 }, { "epoch": 2.262368605466013, "grad_norm": 0.11946040391921997, "learning_rate": 7.561348959579047e-05, "loss": 0.0065, "step": 16142 }, { "epoch": 2.2625087596355993, "grad_norm": 0.5854082703590393, "learning_rate": 7.559913896197082e-05, "loss": 0.0187, "step": 16143 }, { "epoch": 2.2626489138051857, "grad_norm": 0.3553685247898102, "learning_rate": 7.558478832815114e-05, "loss": 0.0357, "step": 16144 }, { "epoch": 2.262789067974772, "grad_norm": 0.2248820662498474, "learning_rate": 7.557043769433149e-05, "loss": 0.03, "step": 16145 }, { "epoch": 2.2629292221443587, "grad_norm": 0.07247384637594223, "learning_rate": 7.555608706051184e-05, "loss": 0.0033, "step": 16146 }, { "epoch": 2.263069376313945, "grad_norm": 0.7017240524291992, "learning_rate": 7.554173642669217e-05, "loss": 0.0143, "step": 16147 }, { "epoch": 2.263209530483532, "grad_norm": 0.35459059476852417, "learning_rate": 7.552738579287251e-05, "loss": 0.0545, "step": 16148 }, { "epoch": 2.2633496846531185, "grad_norm": 0.9143280386924744, "learning_rate": 7.551303515905285e-05, "loss": 0.0349, "step": 16149 }, { "epoch": 2.263489838822705, "grad_norm": 0.12702693045139313, "learning_rate": 7.549868452523318e-05, "loss": 0.0186, "step": 16150 }, { "epoch": 2.2636299929922914, "grad_norm": 0.3911602199077606, "learning_rate": 7.548433389141354e-05, "loss": 0.0273, "step": 16151 }, { "epoch": 2.2637701471618783, "grad_norm": 0.17827431857585907, "learning_rate": 7.546998325759387e-05, "loss": 0.0231, "step": 16152 }, { "epoch": 2.2639103013314648, "grad_norm": 0.09816229343414307, "learning_rate": 7.545563262377421e-05, "loss": 0.0051, "step": 16153 }, { "epoch": 2.264050455501051, "grad_norm": 0.1798473596572876, "learning_rate": 7.544128198995455e-05, "loss": 0.0123, "step": 16154 }, { "epoch": 2.2641906096706377, "grad_norm": 0.215614914894104, "learning_rate": 7.542693135613488e-05, "loss": 0.0109, "step": 16155 }, { "epoch": 2.264330763840224, "grad_norm": 0.503149151802063, "learning_rate": 7.541258072231524e-05, "loss": 0.0283, "step": 16156 }, { "epoch": 2.2644709180098106, "grad_norm": 0.6698678135871887, "learning_rate": 7.539823008849558e-05, "loss": 0.0381, "step": 16157 }, { "epoch": 2.2646110721793975, "grad_norm": 0.29192933440208435, "learning_rate": 7.53838794546759e-05, "loss": 0.0331, "step": 16158 }, { "epoch": 2.264751226348984, "grad_norm": 0.1856585294008255, "learning_rate": 7.536952882085625e-05, "loss": 0.0413, "step": 16159 }, { "epoch": 2.2648913805185704, "grad_norm": 0.12938711047172546, "learning_rate": 7.535517818703658e-05, "loss": 0.0167, "step": 16160 }, { "epoch": 2.265031534688157, "grad_norm": 0.10505757480859756, "learning_rate": 7.534082755321693e-05, "loss": 0.0324, "step": 16161 }, { "epoch": 2.2651716888577433, "grad_norm": 0.5922462344169617, "learning_rate": 7.532647691939727e-05, "loss": 0.0323, "step": 16162 }, { "epoch": 2.2653118430273302, "grad_norm": 0.18770937621593475, "learning_rate": 7.53121262855776e-05, "loss": 0.0343, "step": 16163 }, { "epoch": 2.2654519971969167, "grad_norm": 0.06127657741308212, "learning_rate": 7.529777565175795e-05, "loss": 0.0041, "step": 16164 }, { "epoch": 2.265592151366503, "grad_norm": 0.3119257688522339, "learning_rate": 7.528342501793829e-05, "loss": 0.0599, "step": 16165 }, { "epoch": 2.2657323055360896, "grad_norm": 0.9948211312294006, "learning_rate": 7.526907438411862e-05, "loss": 0.0591, "step": 16166 }, { "epoch": 2.265872459705676, "grad_norm": 0.3948303759098053, "learning_rate": 7.525472375029897e-05, "loss": 0.0331, "step": 16167 }, { "epoch": 2.266012613875263, "grad_norm": 0.7139501571655273, "learning_rate": 7.52403731164793e-05, "loss": 0.0334, "step": 16168 }, { "epoch": 2.2661527680448494, "grad_norm": 0.5135822296142578, "learning_rate": 7.522602248265964e-05, "loss": 0.0252, "step": 16169 }, { "epoch": 2.266292922214436, "grad_norm": 0.724551796913147, "learning_rate": 7.521167184883998e-05, "loss": 0.0818, "step": 16170 }, { "epoch": 2.2664330763840224, "grad_norm": 0.1179722398519516, "learning_rate": 7.519732121502031e-05, "loss": 0.0134, "step": 16171 }, { "epoch": 2.266573230553609, "grad_norm": 0.12055621296167374, "learning_rate": 7.518297058120067e-05, "loss": 0.0155, "step": 16172 }, { "epoch": 2.2667133847231957, "grad_norm": 0.22950193285942078, "learning_rate": 7.516861994738101e-05, "loss": 0.0118, "step": 16173 }, { "epoch": 2.266853538892782, "grad_norm": 0.3248205780982971, "learning_rate": 7.515426931356134e-05, "loss": 0.034, "step": 16174 }, { "epoch": 2.2669936930623686, "grad_norm": 0.13271230459213257, "learning_rate": 7.513991867974168e-05, "loss": 0.0245, "step": 16175 }, { "epoch": 2.267133847231955, "grad_norm": 0.1510247141122818, "learning_rate": 7.512556804592202e-05, "loss": 0.0148, "step": 16176 }, { "epoch": 2.2672740014015416, "grad_norm": 0.33316341042518616, "learning_rate": 7.511121741210237e-05, "loss": 0.027, "step": 16177 }, { "epoch": 2.267414155571128, "grad_norm": 0.3815443515777588, "learning_rate": 7.509686677828271e-05, "loss": 0.0219, "step": 16178 }, { "epoch": 2.267554309740715, "grad_norm": 0.614569365978241, "learning_rate": 7.508251614446304e-05, "loss": 0.0486, "step": 16179 }, { "epoch": 2.2676944639103014, "grad_norm": 0.08508642762899399, "learning_rate": 7.506816551064338e-05, "loss": 0.0172, "step": 16180 }, { "epoch": 2.267834618079888, "grad_norm": 0.45303839445114136, "learning_rate": 7.505381487682372e-05, "loss": 0.0399, "step": 16181 }, { "epoch": 2.2679747722494743, "grad_norm": 0.31105920672416687, "learning_rate": 7.503946424300405e-05, "loss": 0.0438, "step": 16182 }, { "epoch": 2.268114926419061, "grad_norm": 0.16513462364673615, "learning_rate": 7.50251136091844e-05, "loss": 0.012, "step": 16183 }, { "epoch": 2.2682550805886477, "grad_norm": 0.21608415246009827, "learning_rate": 7.501076297536475e-05, "loss": 0.0267, "step": 16184 }, { "epoch": 2.268395234758234, "grad_norm": 0.13387833535671234, "learning_rate": 7.499641234154508e-05, "loss": 0.0172, "step": 16185 }, { "epoch": 2.2685353889278206, "grad_norm": 0.10963825136423111, "learning_rate": 7.498206170772542e-05, "loss": 0.0136, "step": 16186 }, { "epoch": 2.268675543097407, "grad_norm": 0.19476504623889923, "learning_rate": 7.496771107390576e-05, "loss": 0.0236, "step": 16187 }, { "epoch": 2.2688156972669935, "grad_norm": 0.136983260512352, "learning_rate": 7.49533604400861e-05, "loss": 0.0141, "step": 16188 }, { "epoch": 2.2689558514365804, "grad_norm": 0.12143999338150024, "learning_rate": 7.493900980626643e-05, "loss": 0.0031, "step": 16189 }, { "epoch": 2.269096005606167, "grad_norm": 0.14770960807800293, "learning_rate": 7.492465917244679e-05, "loss": 0.0112, "step": 16190 }, { "epoch": 2.2692361597757533, "grad_norm": 0.09765385836362839, "learning_rate": 7.491030853862711e-05, "loss": 0.0179, "step": 16191 }, { "epoch": 2.26937631394534, "grad_norm": 0.10464373975992203, "learning_rate": 7.489595790480746e-05, "loss": 0.0078, "step": 16192 }, { "epoch": 2.2695164681149262, "grad_norm": 0.49572744965553284, "learning_rate": 7.48816072709878e-05, "loss": 0.054, "step": 16193 }, { "epoch": 2.269656622284513, "grad_norm": 0.14305707812309265, "learning_rate": 7.486725663716814e-05, "loss": 0.0137, "step": 16194 }, { "epoch": 2.2697967764540996, "grad_norm": 0.5051373839378357, "learning_rate": 7.485290600334847e-05, "loss": 0.0297, "step": 16195 }, { "epoch": 2.269936930623686, "grad_norm": 0.31745806336402893, "learning_rate": 7.483855536952881e-05, "loss": 0.0292, "step": 16196 }, { "epoch": 2.2700770847932725, "grad_norm": 0.23002316057682037, "learning_rate": 7.482420473570915e-05, "loss": 0.0342, "step": 16197 }, { "epoch": 2.270217238962859, "grad_norm": 0.4532015919685364, "learning_rate": 7.48098541018895e-05, "loss": 0.0519, "step": 16198 }, { "epoch": 2.2703573931324454, "grad_norm": 0.18719851970672607, "learning_rate": 7.479550346806984e-05, "loss": 0.0075, "step": 16199 }, { "epoch": 2.2704975473020323, "grad_norm": 0.19947662949562073, "learning_rate": 7.478115283425017e-05, "loss": 0.0252, "step": 16200 }, { "epoch": 2.270637701471619, "grad_norm": 0.42769742012023926, "learning_rate": 7.476680220043051e-05, "loss": 0.0301, "step": 16201 }, { "epoch": 2.2707778556412053, "grad_norm": 0.5155138969421387, "learning_rate": 7.475245156661085e-05, "loss": 0.0414, "step": 16202 }, { "epoch": 2.2709180098107917, "grad_norm": 0.42418935894966125, "learning_rate": 7.473810093279119e-05, "loss": 0.0323, "step": 16203 }, { "epoch": 2.2710581639803786, "grad_norm": 0.15725037455558777, "learning_rate": 7.472375029897154e-05, "loss": 0.0108, "step": 16204 }, { "epoch": 2.271198318149965, "grad_norm": 0.15566274523735046, "learning_rate": 7.470939966515186e-05, "loss": 0.0129, "step": 16205 }, { "epoch": 2.2713384723195515, "grad_norm": 0.17007608711719513, "learning_rate": 7.469504903133222e-05, "loss": 0.0146, "step": 16206 }, { "epoch": 2.271478626489138, "grad_norm": 0.28880584239959717, "learning_rate": 7.468069839751255e-05, "loss": 0.0584, "step": 16207 }, { "epoch": 2.2716187806587245, "grad_norm": 0.4530080258846283, "learning_rate": 7.466634776369289e-05, "loss": 0.0447, "step": 16208 }, { "epoch": 2.271758934828311, "grad_norm": 0.09833940863609314, "learning_rate": 7.465199712987323e-05, "loss": 0.0067, "step": 16209 }, { "epoch": 2.271899088997898, "grad_norm": 0.19690768420696259, "learning_rate": 7.463764649605357e-05, "loss": 0.0233, "step": 16210 }, { "epoch": 2.2720392431674843, "grad_norm": 0.04336480423808098, "learning_rate": 7.46232958622339e-05, "loss": 0.0022, "step": 16211 }, { "epoch": 2.2721793973370707, "grad_norm": 0.06939428299665451, "learning_rate": 7.460894522841424e-05, "loss": 0.0143, "step": 16212 }, { "epoch": 2.272319551506657, "grad_norm": 0.1259625256061554, "learning_rate": 7.459459459459459e-05, "loss": 0.0082, "step": 16213 }, { "epoch": 2.272459705676244, "grad_norm": 0.05249089375138283, "learning_rate": 7.458024396077493e-05, "loss": 0.0053, "step": 16214 }, { "epoch": 2.2725998598458306, "grad_norm": 0.4364691376686096, "learning_rate": 7.456589332695527e-05, "loss": 0.0227, "step": 16215 }, { "epoch": 2.272740014015417, "grad_norm": 0.3146170377731323, "learning_rate": 7.45515426931356e-05, "loss": 0.0534, "step": 16216 }, { "epoch": 2.2728801681850035, "grad_norm": 0.4993615746498108, "learning_rate": 7.453719205931596e-05, "loss": 0.1025, "step": 16217 }, { "epoch": 2.27302032235459, "grad_norm": 0.1917387843132019, "learning_rate": 7.452284142549628e-05, "loss": 0.0235, "step": 16218 }, { "epoch": 2.2731604765241764, "grad_norm": 1.1244992017745972, "learning_rate": 7.450849079167663e-05, "loss": 0.1698, "step": 16219 }, { "epoch": 2.2733006306937633, "grad_norm": 1.2356315851211548, "learning_rate": 7.449414015785697e-05, "loss": 0.0946, "step": 16220 }, { "epoch": 2.2734407848633498, "grad_norm": 0.18650762736797333, "learning_rate": 7.447978952403731e-05, "loss": 0.0213, "step": 16221 }, { "epoch": 2.2735809390329362, "grad_norm": 0.3772331774234772, "learning_rate": 7.446543889021765e-05, "loss": 0.0249, "step": 16222 }, { "epoch": 2.2737210932025227, "grad_norm": 0.18132229149341583, "learning_rate": 7.445108825639798e-05, "loss": 0.0068, "step": 16223 }, { "epoch": 2.273861247372109, "grad_norm": 0.06925246864557266, "learning_rate": 7.443673762257832e-05, "loss": 0.0054, "step": 16224 }, { "epoch": 2.274001401541696, "grad_norm": 0.1610412895679474, "learning_rate": 7.442238698875867e-05, "loss": 0.0267, "step": 16225 }, { "epoch": 2.2741415557112825, "grad_norm": 0.23465511202812195, "learning_rate": 7.440803635493901e-05, "loss": 0.0323, "step": 16226 }, { "epoch": 2.274281709880869, "grad_norm": 0.18110215663909912, "learning_rate": 7.439368572111934e-05, "loss": 0.0254, "step": 16227 }, { "epoch": 2.2744218640504554, "grad_norm": 0.018187547102570534, "learning_rate": 7.437933508729968e-05, "loss": 0.0015, "step": 16228 }, { "epoch": 2.274562018220042, "grad_norm": 0.2320176512002945, "learning_rate": 7.436498445348002e-05, "loss": 0.0791, "step": 16229 }, { "epoch": 2.2747021723896284, "grad_norm": 0.3378334641456604, "learning_rate": 7.435063381966036e-05, "loss": 0.008, "step": 16230 }, { "epoch": 2.2748423265592153, "grad_norm": 0.18843097984790802, "learning_rate": 7.43362831858407e-05, "loss": 0.0235, "step": 16231 }, { "epoch": 2.2749824807288017, "grad_norm": 0.1092878207564354, "learning_rate": 7.432193255202103e-05, "loss": 0.0112, "step": 16232 }, { "epoch": 2.275122634898388, "grad_norm": 0.4976484179496765, "learning_rate": 7.430758191820139e-05, "loss": 0.048, "step": 16233 }, { "epoch": 2.2752627890679746, "grad_norm": 0.2779671549797058, "learning_rate": 7.429323128438172e-05, "loss": 0.0705, "step": 16234 }, { "epoch": 2.2754029432375615, "grad_norm": 0.3394249975681305, "learning_rate": 7.427888065056206e-05, "loss": 0.0597, "step": 16235 }, { "epoch": 2.275543097407148, "grad_norm": 0.044666409492492676, "learning_rate": 7.42645300167424e-05, "loss": 0.0041, "step": 16236 }, { "epoch": 2.2756832515767345, "grad_norm": 0.15766453742980957, "learning_rate": 7.425017938292274e-05, "loss": 0.0391, "step": 16237 }, { "epoch": 2.275823405746321, "grad_norm": 0.5730196833610535, "learning_rate": 7.423582874910309e-05, "loss": 0.0676, "step": 16238 }, { "epoch": 2.2759635599159074, "grad_norm": 0.42103469371795654, "learning_rate": 7.422147811528341e-05, "loss": 0.0387, "step": 16239 }, { "epoch": 2.276103714085494, "grad_norm": 0.7110700607299805, "learning_rate": 7.420712748146376e-05, "loss": 0.0169, "step": 16240 }, { "epoch": 2.2762438682550807, "grad_norm": 0.22068460285663605, "learning_rate": 7.41927768476441e-05, "loss": 0.0225, "step": 16241 }, { "epoch": 2.276384022424667, "grad_norm": 0.1457379162311554, "learning_rate": 7.417842621382444e-05, "loss": 0.0213, "step": 16242 }, { "epoch": 2.2765241765942537, "grad_norm": 0.21809007227420807, "learning_rate": 7.416407558000477e-05, "loss": 0.0644, "step": 16243 }, { "epoch": 2.27666433076384, "grad_norm": 0.5167080163955688, "learning_rate": 7.414972494618511e-05, "loss": 0.0506, "step": 16244 }, { "epoch": 2.2768044849334266, "grad_norm": 0.3382079303264618, "learning_rate": 7.413537431236545e-05, "loss": 0.0261, "step": 16245 }, { "epoch": 2.2769446391030135, "grad_norm": 0.6171927452087402, "learning_rate": 7.41210236785458e-05, "loss": 0.0565, "step": 16246 }, { "epoch": 2.2770847932726, "grad_norm": 0.26119115948677063, "learning_rate": 7.410667304472614e-05, "loss": 0.0197, "step": 16247 }, { "epoch": 2.2772249474421864, "grad_norm": 0.4102140963077545, "learning_rate": 7.409232241090647e-05, "loss": 0.0512, "step": 16248 }, { "epoch": 2.277365101611773, "grad_norm": 0.05564238131046295, "learning_rate": 7.407797177708682e-05, "loss": 0.0061, "step": 16249 }, { "epoch": 2.2775052557813593, "grad_norm": 0.2707335948944092, "learning_rate": 7.406362114326715e-05, "loss": 0.0798, "step": 16250 }, { "epoch": 2.277645409950946, "grad_norm": 0.2930961549282074, "learning_rate": 7.404927050944749e-05, "loss": 0.0259, "step": 16251 }, { "epoch": 2.2777855641205327, "grad_norm": 0.1461946666240692, "learning_rate": 7.403491987562783e-05, "loss": 0.0183, "step": 16252 }, { "epoch": 2.277925718290119, "grad_norm": 0.08901000022888184, "learning_rate": 7.402056924180818e-05, "loss": 0.0108, "step": 16253 }, { "epoch": 2.2780658724597056, "grad_norm": 0.8977463841438293, "learning_rate": 7.400621860798852e-05, "loss": 0.0281, "step": 16254 }, { "epoch": 2.278206026629292, "grad_norm": 0.36621615290641785, "learning_rate": 7.399186797416885e-05, "loss": 0.0397, "step": 16255 }, { "epoch": 2.278346180798879, "grad_norm": 0.185032457113266, "learning_rate": 7.397751734034919e-05, "loss": 0.0104, "step": 16256 }, { "epoch": 2.2784863349684654, "grad_norm": 0.391226202249527, "learning_rate": 7.396316670652953e-05, "loss": 0.0147, "step": 16257 }, { "epoch": 2.278626489138052, "grad_norm": 0.1199955940246582, "learning_rate": 7.394881607270987e-05, "loss": 0.0192, "step": 16258 }, { "epoch": 2.2787666433076383, "grad_norm": 0.12605716288089752, "learning_rate": 7.39344654388902e-05, "loss": 0.0052, "step": 16259 }, { "epoch": 2.278906797477225, "grad_norm": 0.1453598141670227, "learning_rate": 7.392011480507056e-05, "loss": 0.0147, "step": 16260 }, { "epoch": 2.2790469516468113, "grad_norm": 0.20824269950389862, "learning_rate": 7.390576417125089e-05, "loss": 0.0097, "step": 16261 }, { "epoch": 2.279187105816398, "grad_norm": 0.29285532236099243, "learning_rate": 7.389141353743123e-05, "loss": 0.0183, "step": 16262 }, { "epoch": 2.2793272599859846, "grad_norm": 0.2977740466594696, "learning_rate": 7.387706290361157e-05, "loss": 0.0475, "step": 16263 }, { "epoch": 2.279467414155571, "grad_norm": 0.32792624831199646, "learning_rate": 7.386271226979191e-05, "loss": 0.0275, "step": 16264 }, { "epoch": 2.2796075683251575, "grad_norm": 1.1770472526550293, "learning_rate": 7.384836163597225e-05, "loss": 0.1272, "step": 16265 }, { "epoch": 2.2797477224947444, "grad_norm": 0.22878652811050415, "learning_rate": 7.383401100215258e-05, "loss": 0.0127, "step": 16266 }, { "epoch": 2.279887876664331, "grad_norm": 0.6441991329193115, "learning_rate": 7.381966036833293e-05, "loss": 0.0255, "step": 16267 }, { "epoch": 2.2800280308339174, "grad_norm": 0.4638057053089142, "learning_rate": 7.380530973451327e-05, "loss": 0.0321, "step": 16268 }, { "epoch": 2.280168185003504, "grad_norm": 1.3254001140594482, "learning_rate": 7.379095910069361e-05, "loss": 0.1844, "step": 16269 }, { "epoch": 2.2803083391730903, "grad_norm": 1.0455617904663086, "learning_rate": 7.377660846687395e-05, "loss": 0.0733, "step": 16270 }, { "epoch": 2.2804484933426767, "grad_norm": 0.3305984437465668, "learning_rate": 7.376225783305428e-05, "loss": 0.0525, "step": 16271 }, { "epoch": 2.2805886475122636, "grad_norm": 0.12932036817073822, "learning_rate": 7.374790719923464e-05, "loss": 0.0126, "step": 16272 }, { "epoch": 2.28072880168185, "grad_norm": 0.1050378680229187, "learning_rate": 7.373355656541496e-05, "loss": 0.0111, "step": 16273 }, { "epoch": 2.2808689558514366, "grad_norm": 0.1309729963541031, "learning_rate": 7.37192059315953e-05, "loss": 0.0075, "step": 16274 }, { "epoch": 2.281009110021023, "grad_norm": 0.3628782331943512, "learning_rate": 7.370485529777564e-05, "loss": 0.0314, "step": 16275 }, { "epoch": 2.2811492641906095, "grad_norm": 0.10706055164337158, "learning_rate": 7.369050466395599e-05, "loss": 0.0133, "step": 16276 }, { "epoch": 2.2812894183601964, "grad_norm": 0.08899001032114029, "learning_rate": 7.367615403013632e-05, "loss": 0.0122, "step": 16277 }, { "epoch": 2.281429572529783, "grad_norm": 0.28892695903778076, "learning_rate": 7.366180339631666e-05, "loss": 0.0502, "step": 16278 }, { "epoch": 2.2815697266993693, "grad_norm": 0.2373223453760147, "learning_rate": 7.3647452762497e-05, "loss": 0.0252, "step": 16279 }, { "epoch": 2.2817098808689558, "grad_norm": 0.08226356655359268, "learning_rate": 7.363310212867735e-05, "loss": 0.0115, "step": 16280 }, { "epoch": 2.2818500350385422, "grad_norm": 0.1792638748884201, "learning_rate": 7.361875149485769e-05, "loss": 0.0246, "step": 16281 }, { "epoch": 2.281990189208129, "grad_norm": 0.08150475472211838, "learning_rate": 7.360440086103802e-05, "loss": 0.0062, "step": 16282 }, { "epoch": 2.2821303433777156, "grad_norm": 0.1887059211730957, "learning_rate": 7.359005022721836e-05, "loss": 0.009, "step": 16283 }, { "epoch": 2.282270497547302, "grad_norm": 0.05382366105914116, "learning_rate": 7.35756995933987e-05, "loss": 0.0022, "step": 16284 }, { "epoch": 2.2824106517168885, "grad_norm": 0.4411434233188629, "learning_rate": 7.356134895957904e-05, "loss": 0.024, "step": 16285 }, { "epoch": 2.282550805886475, "grad_norm": 0.13121947646141052, "learning_rate": 7.354699832575939e-05, "loss": 0.0256, "step": 16286 }, { "epoch": 2.282690960056062, "grad_norm": 0.2541201710700989, "learning_rate": 7.353264769193973e-05, "loss": 0.0196, "step": 16287 }, { "epoch": 2.2828311142256483, "grad_norm": 0.2207149714231491, "learning_rate": 7.351829705812007e-05, "loss": 0.0294, "step": 16288 }, { "epoch": 2.282971268395235, "grad_norm": 0.24132980406284332, "learning_rate": 7.35039464243004e-05, "loss": 0.0539, "step": 16289 }, { "epoch": 2.2831114225648212, "grad_norm": 0.40868696570396423, "learning_rate": 7.348959579048074e-05, "loss": 0.033, "step": 16290 }, { "epoch": 2.2832515767344077, "grad_norm": 0.31463509798049927, "learning_rate": 7.347524515666108e-05, "loss": 0.0759, "step": 16291 }, { "epoch": 2.283391730903994, "grad_norm": 0.13443753123283386, "learning_rate": 7.346089452284142e-05, "loss": 0.0389, "step": 16292 }, { "epoch": 2.283531885073581, "grad_norm": 0.29175665974617004, "learning_rate": 7.344654388902175e-05, "loss": 0.0554, "step": 16293 }, { "epoch": 2.2836720392431675, "grad_norm": 0.25446051359176636, "learning_rate": 7.34321932552021e-05, "loss": 0.0358, "step": 16294 }, { "epoch": 2.283812193412754, "grad_norm": 0.47193390130996704, "learning_rate": 7.341784262138244e-05, "loss": 0.0573, "step": 16295 }, { "epoch": 2.2839523475823404, "grad_norm": 0.2153552621603012, "learning_rate": 7.340349198756278e-05, "loss": 0.0265, "step": 16296 }, { "epoch": 2.2840925017519274, "grad_norm": 0.1195971667766571, "learning_rate": 7.338914135374312e-05, "loss": 0.0187, "step": 16297 }, { "epoch": 2.284232655921514, "grad_norm": 0.1436283141374588, "learning_rate": 7.337479071992345e-05, "loss": 0.0197, "step": 16298 }, { "epoch": 2.2843728100911003, "grad_norm": 0.06564446538686752, "learning_rate": 7.33604400861038e-05, "loss": 0.003, "step": 16299 }, { "epoch": 2.2845129642606867, "grad_norm": 0.29224902391433716, "learning_rate": 7.334608945228413e-05, "loss": 0.0384, "step": 16300 }, { "epoch": 2.284653118430273, "grad_norm": 0.16283342242240906, "learning_rate": 7.333173881846448e-05, "loss": 0.0364, "step": 16301 }, { "epoch": 2.2847932725998596, "grad_norm": 0.2010299563407898, "learning_rate": 7.331738818464482e-05, "loss": 0.0303, "step": 16302 }, { "epoch": 2.2849334267694466, "grad_norm": 0.18469111621379852, "learning_rate": 7.330303755082516e-05, "loss": 0.0181, "step": 16303 }, { "epoch": 2.285073580939033, "grad_norm": 0.007796700578182936, "learning_rate": 7.32886869170055e-05, "loss": 0.0009, "step": 16304 }, { "epoch": 2.2852137351086195, "grad_norm": 0.08528345078229904, "learning_rate": 7.327433628318583e-05, "loss": 0.0079, "step": 16305 }, { "epoch": 2.285353889278206, "grad_norm": 0.5259838104248047, "learning_rate": 7.325998564936617e-05, "loss": 0.0759, "step": 16306 }, { "epoch": 2.2854940434477924, "grad_norm": 0.224366694688797, "learning_rate": 7.324563501554652e-05, "loss": 0.0124, "step": 16307 }, { "epoch": 2.2856341976173793, "grad_norm": 0.21579058468341827, "learning_rate": 7.323128438172686e-05, "loss": 0.0183, "step": 16308 }, { "epoch": 2.2857743517869658, "grad_norm": 0.19897300004959106, "learning_rate": 7.321693374790719e-05, "loss": 0.0107, "step": 16309 }, { "epoch": 2.285914505956552, "grad_norm": 0.20075054466724396, "learning_rate": 7.320258311408753e-05, "loss": 0.0172, "step": 16310 }, { "epoch": 2.2860546601261387, "grad_norm": 0.17577716708183289, "learning_rate": 7.318823248026787e-05, "loss": 0.0063, "step": 16311 }, { "epoch": 2.286194814295725, "grad_norm": 0.272735595703125, "learning_rate": 7.317388184644821e-05, "loss": 0.0192, "step": 16312 }, { "epoch": 2.286334968465312, "grad_norm": 0.24795500934123993, "learning_rate": 7.315953121262855e-05, "loss": 0.0545, "step": 16313 }, { "epoch": 2.2864751226348985, "grad_norm": 0.12107976526021957, "learning_rate": 7.314518057880888e-05, "loss": 0.0099, "step": 16314 }, { "epoch": 2.286615276804485, "grad_norm": 0.8941563963890076, "learning_rate": 7.313082994498924e-05, "loss": 0.1324, "step": 16315 }, { "epoch": 2.2867554309740714, "grad_norm": 0.04670083522796631, "learning_rate": 7.311647931116957e-05, "loss": 0.0012, "step": 16316 }, { "epoch": 2.286895585143658, "grad_norm": 0.36447498202323914, "learning_rate": 7.310212867734991e-05, "loss": 0.044, "step": 16317 }, { "epoch": 2.2870357393132448, "grad_norm": 2.088002920150757, "learning_rate": 7.308777804353025e-05, "loss": 0.0318, "step": 16318 }, { "epoch": 2.2871758934828312, "grad_norm": 0.15274091064929962, "learning_rate": 7.30734274097106e-05, "loss": 0.0032, "step": 16319 }, { "epoch": 2.2873160476524177, "grad_norm": 2.037416696548462, "learning_rate": 7.305907677589094e-05, "loss": 0.1003, "step": 16320 }, { "epoch": 2.287456201822004, "grad_norm": 0.3803187906742096, "learning_rate": 7.304472614207126e-05, "loss": 0.0916, "step": 16321 }, { "epoch": 2.2875963559915906, "grad_norm": 0.6628861427307129, "learning_rate": 7.30303755082516e-05, "loss": 0.0753, "step": 16322 }, { "epoch": 2.287736510161177, "grad_norm": 0.47854968905448914, "learning_rate": 7.301602487443195e-05, "loss": 0.0316, "step": 16323 }, { "epoch": 2.287876664330764, "grad_norm": 0.3248313367366791, "learning_rate": 7.300167424061229e-05, "loss": 0.0359, "step": 16324 }, { "epoch": 2.2880168185003504, "grad_norm": 0.2358720898628235, "learning_rate": 7.298732360679262e-05, "loss": 0.0341, "step": 16325 }, { "epoch": 2.288156972669937, "grad_norm": 0.08605606108903885, "learning_rate": 7.297297297297297e-05, "loss": 0.0053, "step": 16326 }, { "epoch": 2.2882971268395234, "grad_norm": 0.2811298370361328, "learning_rate": 7.29586223391533e-05, "loss": 0.0236, "step": 16327 }, { "epoch": 2.2884372810091103, "grad_norm": 0.33712103962898254, "learning_rate": 7.294427170533365e-05, "loss": 0.0477, "step": 16328 }, { "epoch": 2.2885774351786967, "grad_norm": 0.2777279317378998, "learning_rate": 7.292992107151399e-05, "loss": 0.0284, "step": 16329 }, { "epoch": 2.288717589348283, "grad_norm": 0.25908640027046204, "learning_rate": 7.291557043769433e-05, "loss": 0.0489, "step": 16330 }, { "epoch": 2.2888577435178696, "grad_norm": 0.1270764172077179, "learning_rate": 7.290121980387467e-05, "loss": 0.0164, "step": 16331 }, { "epoch": 2.288997897687456, "grad_norm": 0.21425771713256836, "learning_rate": 7.2886869170055e-05, "loss": 0.0117, "step": 16332 }, { "epoch": 2.2891380518570426, "grad_norm": 0.23401951789855957, "learning_rate": 7.287251853623534e-05, "loss": 0.0491, "step": 16333 }, { "epoch": 2.2892782060266295, "grad_norm": 0.3998829424381256, "learning_rate": 7.285816790241568e-05, "loss": 0.0284, "step": 16334 }, { "epoch": 2.289418360196216, "grad_norm": 0.08534751087427139, "learning_rate": 7.284381726859603e-05, "loss": 0.0142, "step": 16335 }, { "epoch": 2.2895585143658024, "grad_norm": 0.36604636907577515, "learning_rate": 7.282946663477637e-05, "loss": 0.0983, "step": 16336 }, { "epoch": 2.289698668535389, "grad_norm": 0.08008506149053574, "learning_rate": 7.28151160009567e-05, "loss": 0.0088, "step": 16337 }, { "epoch": 2.2898388227049753, "grad_norm": 0.2987242639064789, "learning_rate": 7.280076536713704e-05, "loss": 0.0384, "step": 16338 }, { "epoch": 2.289978976874562, "grad_norm": 0.2669370770454407, "learning_rate": 7.278641473331738e-05, "loss": 0.0319, "step": 16339 }, { "epoch": 2.2901191310441487, "grad_norm": 0.08875153213739395, "learning_rate": 7.277206409949772e-05, "loss": 0.0087, "step": 16340 }, { "epoch": 2.290259285213735, "grad_norm": 0.06805703043937683, "learning_rate": 7.275771346567805e-05, "loss": 0.0164, "step": 16341 }, { "epoch": 2.2903994393833216, "grad_norm": 0.2065509557723999, "learning_rate": 7.274336283185841e-05, "loss": 0.0142, "step": 16342 }, { "epoch": 2.290539593552908, "grad_norm": 2.216456413269043, "learning_rate": 7.272901219803874e-05, "loss": 0.0278, "step": 16343 }, { "epoch": 2.2906797477224945, "grad_norm": 0.38408058881759644, "learning_rate": 7.271466156421908e-05, "loss": 0.0238, "step": 16344 }, { "epoch": 2.2908199018920814, "grad_norm": 0.1543806493282318, "learning_rate": 7.270031093039942e-05, "loss": 0.0136, "step": 16345 }, { "epoch": 2.290960056061668, "grad_norm": 0.4030522406101227, "learning_rate": 7.268596029657976e-05, "loss": 0.0333, "step": 16346 }, { "epoch": 2.2911002102312543, "grad_norm": 0.1624830812215805, "learning_rate": 7.26716096627601e-05, "loss": 0.0099, "step": 16347 }, { "epoch": 2.291240364400841, "grad_norm": 0.27439552545547485, "learning_rate": 7.265725902894043e-05, "loss": 0.0219, "step": 16348 }, { "epoch": 2.2913805185704277, "grad_norm": 0.6404179334640503, "learning_rate": 7.264290839512078e-05, "loss": 0.0535, "step": 16349 }, { "epoch": 2.291520672740014, "grad_norm": 0.11418481171131134, "learning_rate": 7.262855776130112e-05, "loss": 0.019, "step": 16350 }, { "epoch": 2.2916608269096006, "grad_norm": 0.1833251416683197, "learning_rate": 7.261420712748146e-05, "loss": 0.0615, "step": 16351 }, { "epoch": 2.291800981079187, "grad_norm": 0.22833088040351868, "learning_rate": 7.25998564936618e-05, "loss": 0.0354, "step": 16352 }, { "epoch": 2.2919411352487735, "grad_norm": 0.33519577980041504, "learning_rate": 7.258550585984213e-05, "loss": 0.0465, "step": 16353 }, { "epoch": 2.29208128941836, "grad_norm": 0.040395405143499374, "learning_rate": 7.257115522602247e-05, "loss": 0.0025, "step": 16354 }, { "epoch": 2.292221443587947, "grad_norm": 0.13844487071037292, "learning_rate": 7.255680459220281e-05, "loss": 0.0079, "step": 16355 }, { "epoch": 2.2923615977575333, "grad_norm": 0.22500017285346985, "learning_rate": 7.254245395838316e-05, "loss": 0.0231, "step": 16356 }, { "epoch": 2.29250175192712, "grad_norm": 0.5496867895126343, "learning_rate": 7.252810332456349e-05, "loss": 0.1337, "step": 16357 }, { "epoch": 2.2926419060967063, "grad_norm": 0.4275802671909332, "learning_rate": 7.251375269074384e-05, "loss": 0.0364, "step": 16358 }, { "epoch": 2.292782060266293, "grad_norm": 0.505795419216156, "learning_rate": 7.249940205692417e-05, "loss": 0.0275, "step": 16359 }, { "epoch": 2.2929222144358796, "grad_norm": 0.12260551005601883, "learning_rate": 7.248505142310451e-05, "loss": 0.0248, "step": 16360 }, { "epoch": 2.293062368605466, "grad_norm": 0.14141011238098145, "learning_rate": 7.247070078928485e-05, "loss": 0.013, "step": 16361 }, { "epoch": 2.2932025227750525, "grad_norm": 0.7091691493988037, "learning_rate": 7.24563501554652e-05, "loss": 0.0433, "step": 16362 }, { "epoch": 2.293342676944639, "grad_norm": 0.9841392040252686, "learning_rate": 7.244199952164554e-05, "loss": 0.0728, "step": 16363 }, { "epoch": 2.2934828311142255, "grad_norm": 0.2956642508506775, "learning_rate": 7.242764888782587e-05, "loss": 0.045, "step": 16364 }, { "epoch": 2.2936229852838124, "grad_norm": 0.1715112179517746, "learning_rate": 7.241329825400622e-05, "loss": 0.0177, "step": 16365 }, { "epoch": 2.293763139453399, "grad_norm": 0.5652415156364441, "learning_rate": 7.239894762018655e-05, "loss": 0.0579, "step": 16366 }, { "epoch": 2.2939032936229853, "grad_norm": 0.11872896552085876, "learning_rate": 7.238459698636689e-05, "loss": 0.0048, "step": 16367 }, { "epoch": 2.2940434477925717, "grad_norm": 0.46811461448669434, "learning_rate": 7.237024635254723e-05, "loss": 0.131, "step": 16368 }, { "epoch": 2.294183601962158, "grad_norm": 1.2988077402114868, "learning_rate": 7.235589571872758e-05, "loss": 0.0951, "step": 16369 }, { "epoch": 2.294323756131745, "grad_norm": NaN, "learning_rate": 7.23415450849079e-05, "loss": 0.1121, "step": 16370 }, { "epoch": 2.2944639103013316, "grad_norm": 0.4405251443386078, "learning_rate": 7.23415450849079e-05, "loss": 0.0448, "step": 16371 }, { "epoch": 2.294604064470918, "grad_norm": 0.16474972665309906, "learning_rate": 7.232719445108825e-05, "loss": 0.0375, "step": 16372 }, { "epoch": 2.2947442186405045, "grad_norm": 0.173185795545578, "learning_rate": 7.231284381726859e-05, "loss": 0.0182, "step": 16373 }, { "epoch": 2.294884372810091, "grad_norm": 0.31825515627861023, "learning_rate": 7.229849318344893e-05, "loss": 0.0203, "step": 16374 }, { "epoch": 2.2950245269796774, "grad_norm": 0.2430480718612671, "learning_rate": 7.228414254962927e-05, "loss": 0.0107, "step": 16375 }, { "epoch": 2.2951646811492643, "grad_norm": 0.23854903876781464, "learning_rate": 7.22697919158096e-05, "loss": 0.0229, "step": 16376 }, { "epoch": 2.2953048353188508, "grad_norm": 0.28108254075050354, "learning_rate": 7.225544128198994e-05, "loss": 0.0238, "step": 16377 }, { "epoch": 2.2954449894884372, "grad_norm": 0.5692513585090637, "learning_rate": 7.224109064817029e-05, "loss": 0.0547, "step": 16378 }, { "epoch": 2.2955851436580237, "grad_norm": 0.27059462666511536, "learning_rate": 7.222674001435063e-05, "loss": 0.0507, "step": 16379 }, { "epoch": 2.2957252978276106, "grad_norm": 0.2959274351596832, "learning_rate": 7.221238938053097e-05, "loss": 0.0656, "step": 16380 }, { "epoch": 2.295865451997197, "grad_norm": 0.17645077407360077, "learning_rate": 7.21980387467113e-05, "loss": 0.023, "step": 16381 }, { "epoch": 2.2960056061667835, "grad_norm": 0.11586400866508484, "learning_rate": 7.218368811289166e-05, "loss": 0.0098, "step": 16382 }, { "epoch": 2.29614576033637, "grad_norm": 0.11652031540870667, "learning_rate": 7.216933747907198e-05, "loss": 0.0116, "step": 16383 }, { "epoch": 2.2962859145059564, "grad_norm": 0.13560156524181366, "learning_rate": 7.215498684525233e-05, "loss": 0.0134, "step": 16384 }, { "epoch": 2.296426068675543, "grad_norm": 0.07631959021091461, "learning_rate": 7.214063621143267e-05, "loss": 0.0091, "step": 16385 }, { "epoch": 2.29656622284513, "grad_norm": 0.24796761572360992, "learning_rate": 7.212628557761301e-05, "loss": 0.0394, "step": 16386 }, { "epoch": 2.2967063770147162, "grad_norm": 0.46616461873054504, "learning_rate": 7.211193494379334e-05, "loss": 0.0587, "step": 16387 }, { "epoch": 2.2968465311843027, "grad_norm": 0.3734676241874695, "learning_rate": 7.209758430997368e-05, "loss": 0.0272, "step": 16388 }, { "epoch": 2.296986685353889, "grad_norm": 0.336534708738327, "learning_rate": 7.208323367615402e-05, "loss": 0.0687, "step": 16389 }, { "epoch": 2.297126839523476, "grad_norm": 0.2475401759147644, "learning_rate": 7.206888304233437e-05, "loss": 0.0117, "step": 16390 }, { "epoch": 2.2972669936930625, "grad_norm": 0.6748811602592468, "learning_rate": 7.205453240851471e-05, "loss": 0.0101, "step": 16391 }, { "epoch": 2.297407147862649, "grad_norm": 0.2517526149749756, "learning_rate": 7.204018177469504e-05, "loss": 0.0379, "step": 16392 }, { "epoch": 2.2975473020322355, "grad_norm": 0.1673772782087326, "learning_rate": 7.202583114087538e-05, "loss": 0.0148, "step": 16393 }, { "epoch": 2.297687456201822, "grad_norm": 0.7198790311813354, "learning_rate": 7.201148050705572e-05, "loss": 0.03, "step": 16394 }, { "epoch": 2.2978276103714084, "grad_norm": 0.09669344872236252, "learning_rate": 7.199712987323606e-05, "loss": 0.0135, "step": 16395 }, { "epoch": 2.2979677645409953, "grad_norm": 0.15343192219734192, "learning_rate": 7.19827792394164e-05, "loss": 0.0114, "step": 16396 }, { "epoch": 2.2981079187105817, "grad_norm": 0.22420866787433624, "learning_rate": 7.196842860559673e-05, "loss": 0.0189, "step": 16397 }, { "epoch": 2.298248072880168, "grad_norm": 0.3045988380908966, "learning_rate": 7.195407797177709e-05, "loss": 0.069, "step": 16398 }, { "epoch": 2.2983882270497547, "grad_norm": 0.2505404055118561, "learning_rate": 7.193972733795742e-05, "loss": 0.0485, "step": 16399 }, { "epoch": 2.298528381219341, "grad_norm": 0.3031961917877197, "learning_rate": 7.192537670413776e-05, "loss": 0.0406, "step": 16400 }, { "epoch": 2.298668535388928, "grad_norm": 0.17877697944641113, "learning_rate": 7.19110260703181e-05, "loss": 0.0236, "step": 16401 }, { "epoch": 2.2988086895585145, "grad_norm": 0.10980509221553802, "learning_rate": 7.189667543649844e-05, "loss": 0.0262, "step": 16402 }, { "epoch": 2.298948843728101, "grad_norm": 0.047711316496133804, "learning_rate": 7.188232480267877e-05, "loss": 0.0041, "step": 16403 }, { "epoch": 2.2990889978976874, "grad_norm": 0.2273794263601303, "learning_rate": 7.186797416885911e-05, "loss": 0.0515, "step": 16404 }, { "epoch": 2.299229152067274, "grad_norm": 0.40179771184921265, "learning_rate": 7.185362353503946e-05, "loss": 0.0349, "step": 16405 }, { "epoch": 2.2993693062368603, "grad_norm": 0.45591670274734497, "learning_rate": 7.18392729012198e-05, "loss": 0.0544, "step": 16406 }, { "epoch": 2.299509460406447, "grad_norm": 0.04963712394237518, "learning_rate": 7.182492226740014e-05, "loss": 0.0045, "step": 16407 }, { "epoch": 2.2996496145760337, "grad_norm": 0.1084568053483963, "learning_rate": 7.181057163358047e-05, "loss": 0.008, "step": 16408 }, { "epoch": 2.29978976874562, "grad_norm": 0.43498218059539795, "learning_rate": 7.179622099976082e-05, "loss": 0.0438, "step": 16409 }, { "epoch": 2.2999299229152066, "grad_norm": 0.3103965222835541, "learning_rate": 7.178187036594115e-05, "loss": 0.0363, "step": 16410 }, { "epoch": 2.3000700770847935, "grad_norm": 0.18497788906097412, "learning_rate": 7.17675197321215e-05, "loss": 0.0108, "step": 16411 }, { "epoch": 2.30021023125438, "grad_norm": 0.17044216394424438, "learning_rate": 7.175316909830184e-05, "loss": 0.0301, "step": 16412 }, { "epoch": 2.3003503854239664, "grad_norm": 0.08862544596195221, "learning_rate": 7.173881846448218e-05, "loss": 0.0048, "step": 16413 }, { "epoch": 2.300490539593553, "grad_norm": 0.09470432996749878, "learning_rate": 7.172446783066252e-05, "loss": 0.0047, "step": 16414 }, { "epoch": 2.3006306937631393, "grad_norm": 0.9463232159614563, "learning_rate": 7.171011719684285e-05, "loss": 0.1228, "step": 16415 }, { "epoch": 2.300770847932726, "grad_norm": 0.13871662318706512, "learning_rate": 7.169576656302319e-05, "loss": 0.046, "step": 16416 }, { "epoch": 2.3009110021023127, "grad_norm": 0.7979040145874023, "learning_rate": 7.168141592920353e-05, "loss": 0.2621, "step": 16417 }, { "epoch": 2.301051156271899, "grad_norm": 1.1652113199234009, "learning_rate": 7.166706529538388e-05, "loss": 0.1517, "step": 16418 }, { "epoch": 2.3011913104414856, "grad_norm": 0.9334061741828918, "learning_rate": 7.16527146615642e-05, "loss": 0.1352, "step": 16419 }, { "epoch": 2.301331464611072, "grad_norm": 0.4707517623901367, "learning_rate": 7.163836402774455e-05, "loss": 0.0162, "step": 16420 }, { "epoch": 2.3014716187806585, "grad_norm": 0.13064664602279663, "learning_rate": 7.162401339392489e-05, "loss": 0.012, "step": 16421 }, { "epoch": 2.3016117729502454, "grad_norm": 0.22265811264514923, "learning_rate": 7.160966276010523e-05, "loss": 0.0158, "step": 16422 }, { "epoch": 2.301751927119832, "grad_norm": 0.3326801359653473, "learning_rate": 7.159531212628557e-05, "loss": 0.0821, "step": 16423 }, { "epoch": 2.3018920812894184, "grad_norm": 0.2284831553697586, "learning_rate": 7.15809614924659e-05, "loss": 0.011, "step": 16424 }, { "epoch": 2.302032235459005, "grad_norm": 0.26042962074279785, "learning_rate": 7.156661085864626e-05, "loss": 0.0196, "step": 16425 }, { "epoch": 2.3021723896285913, "grad_norm": 0.26026061177253723, "learning_rate": 7.155226022482659e-05, "loss": 0.0512, "step": 16426 }, { "epoch": 2.302312543798178, "grad_norm": 0.09287726134061813, "learning_rate": 7.153790959100693e-05, "loss": 0.0058, "step": 16427 }, { "epoch": 2.3024526979677646, "grad_norm": 0.13203004002571106, "learning_rate": 7.152355895718727e-05, "loss": 0.0183, "step": 16428 }, { "epoch": 2.302592852137351, "grad_norm": 0.09747445583343506, "learning_rate": 7.150920832336761e-05, "loss": 0.013, "step": 16429 }, { "epoch": 2.3027330063069376, "grad_norm": 0.21391847729682922, "learning_rate": 7.149485768954795e-05, "loss": 0.0099, "step": 16430 }, { "epoch": 2.302873160476524, "grad_norm": 0.1325349509716034, "learning_rate": 7.148050705572828e-05, "loss": 0.0115, "step": 16431 }, { "epoch": 2.303013314646111, "grad_norm": 0.28195393085479736, "learning_rate": 7.146615642190863e-05, "loss": 0.0423, "step": 16432 }, { "epoch": 2.3031534688156974, "grad_norm": 0.4755287766456604, "learning_rate": 7.145180578808897e-05, "loss": 0.0469, "step": 16433 }, { "epoch": 2.303293622985284, "grad_norm": 0.4658677875995636, "learning_rate": 7.143745515426931e-05, "loss": 0.0675, "step": 16434 }, { "epoch": 2.3034337771548703, "grad_norm": 0.09921735525131226, "learning_rate": 7.142310452044964e-05, "loss": 0.008, "step": 16435 }, { "epoch": 2.3035739313244568, "grad_norm": 0.17093031108379364, "learning_rate": 7.140875388662998e-05, "loss": 0.0396, "step": 16436 }, { "epoch": 2.303714085494043, "grad_norm": 0.15276773273944855, "learning_rate": 7.139440325281032e-05, "loss": 0.0099, "step": 16437 }, { "epoch": 2.30385423966363, "grad_norm": 0.16145962476730347, "learning_rate": 7.138005261899066e-05, "loss": 0.0207, "step": 16438 }, { "epoch": 2.3039943938332166, "grad_norm": 0.545348048210144, "learning_rate": 7.1365701985171e-05, "loss": 0.0933, "step": 16439 }, { "epoch": 2.304134548002803, "grad_norm": 0.23149898648262024, "learning_rate": 7.135135135135135e-05, "loss": 0.0337, "step": 16440 }, { "epoch": 2.3042747021723895, "grad_norm": 0.13768118619918823, "learning_rate": 7.133700071753169e-05, "loss": 0.0206, "step": 16441 }, { "epoch": 2.3044148563419764, "grad_norm": 0.3924516439437866, "learning_rate": 7.132265008371202e-05, "loss": 0.0431, "step": 16442 }, { "epoch": 2.304555010511563, "grad_norm": 0.274938702583313, "learning_rate": 7.130829944989236e-05, "loss": 0.0437, "step": 16443 }, { "epoch": 2.3046951646811493, "grad_norm": 0.17318908870220184, "learning_rate": 7.12939488160727e-05, "loss": 0.014, "step": 16444 }, { "epoch": 2.304835318850736, "grad_norm": 0.1927107274532318, "learning_rate": 7.127959818225305e-05, "loss": 0.0638, "step": 16445 }, { "epoch": 2.3049754730203222, "grad_norm": 0.32449328899383545, "learning_rate": 7.126524754843339e-05, "loss": 0.0459, "step": 16446 }, { "epoch": 2.3051156271899087, "grad_norm": 0.20378804206848145, "learning_rate": 7.125089691461372e-05, "loss": 0.0174, "step": 16447 }, { "epoch": 2.3052557813594956, "grad_norm": 0.08240165561437607, "learning_rate": 7.123654628079407e-05, "loss": 0.025, "step": 16448 }, { "epoch": 2.305395935529082, "grad_norm": 0.07225339114665985, "learning_rate": 7.12221956469744e-05, "loss": 0.004, "step": 16449 }, { "epoch": 2.3055360896986685, "grad_norm": 0.2991752624511719, "learning_rate": 7.120784501315474e-05, "loss": 0.0625, "step": 16450 }, { "epoch": 2.305676243868255, "grad_norm": 0.2670639157295227, "learning_rate": 7.119349437933508e-05, "loss": 0.0133, "step": 16451 }, { "epoch": 2.3058163980378414, "grad_norm": 0.24177685379981995, "learning_rate": 7.117914374551543e-05, "loss": 0.0228, "step": 16452 }, { "epoch": 2.3059565522074283, "grad_norm": 0.20552009344100952, "learning_rate": 7.116479311169576e-05, "loss": 0.0173, "step": 16453 }, { "epoch": 2.306096706377015, "grad_norm": 0.09374982118606567, "learning_rate": 7.11504424778761e-05, "loss": 0.0101, "step": 16454 }, { "epoch": 2.3062368605466013, "grad_norm": 0.21385590732097626, "learning_rate": 7.113609184405644e-05, "loss": 0.0132, "step": 16455 }, { "epoch": 2.3063770147161877, "grad_norm": 0.26455244421958923, "learning_rate": 7.112174121023678e-05, "loss": 0.0389, "step": 16456 }, { "epoch": 2.306517168885774, "grad_norm": 0.07907243818044662, "learning_rate": 7.110739057641712e-05, "loss": 0.0077, "step": 16457 }, { "epoch": 2.306657323055361, "grad_norm": 0.5661972165107727, "learning_rate": 7.109303994259745e-05, "loss": 0.0192, "step": 16458 }, { "epoch": 2.3067974772249475, "grad_norm": 0.2675645053386688, "learning_rate": 7.10786893087778e-05, "loss": 0.0492, "step": 16459 }, { "epoch": 2.306937631394534, "grad_norm": 0.1989121437072754, "learning_rate": 7.106433867495814e-05, "loss": 0.0319, "step": 16460 }, { "epoch": 2.3070777855641205, "grad_norm": 0.20769351720809937, "learning_rate": 7.104998804113848e-05, "loss": 0.016, "step": 16461 }, { "epoch": 2.307217939733707, "grad_norm": 0.15482890605926514, "learning_rate": 7.103563740731882e-05, "loss": 0.0151, "step": 16462 }, { "epoch": 2.307358093903294, "grad_norm": 0.05521903559565544, "learning_rate": 7.102128677349915e-05, "loss": 0.0034, "step": 16463 }, { "epoch": 2.3074982480728803, "grad_norm": 0.11155806481838226, "learning_rate": 7.10069361396795e-05, "loss": 0.008, "step": 16464 }, { "epoch": 2.3076384022424667, "grad_norm": 0.18680016696453094, "learning_rate": 7.099258550585983e-05, "loss": 0.0313, "step": 16465 }, { "epoch": 2.307778556412053, "grad_norm": 0.42223021388053894, "learning_rate": 7.097823487204018e-05, "loss": 0.0118, "step": 16466 }, { "epoch": 2.3079187105816397, "grad_norm": 1.411315679550171, "learning_rate": 7.096388423822052e-05, "loss": 0.1292, "step": 16467 }, { "epoch": 2.308058864751226, "grad_norm": 0.14049489796161652, "learning_rate": 7.094953360440086e-05, "loss": 0.0076, "step": 16468 }, { "epoch": 2.308199018920813, "grad_norm": 0.29055240750312805, "learning_rate": 7.093518297058119e-05, "loss": 0.0534, "step": 16469 }, { "epoch": 2.3083391730903995, "grad_norm": 2.0605289936065674, "learning_rate": 7.092083233676153e-05, "loss": 0.1397, "step": 16470 }, { "epoch": 2.308479327259986, "grad_norm": 0.11836738884449005, "learning_rate": 7.090648170294187e-05, "loss": 0.0119, "step": 16471 }, { "epoch": 2.3086194814295724, "grad_norm": 0.33166682720184326, "learning_rate": 7.089213106912222e-05, "loss": 0.0435, "step": 16472 }, { "epoch": 2.3087596355991593, "grad_norm": 0.16832590103149414, "learning_rate": 7.087778043530256e-05, "loss": 0.0142, "step": 16473 }, { "epoch": 2.3088997897687458, "grad_norm": 0.24544009566307068, "learning_rate": 7.086342980148289e-05, "loss": 0.0314, "step": 16474 }, { "epoch": 2.3090399439383322, "grad_norm": 0.13889656960964203, "learning_rate": 7.084907916766324e-05, "loss": 0.0216, "step": 16475 }, { "epoch": 2.3091800981079187, "grad_norm": 0.09510023891925812, "learning_rate": 7.083472853384357e-05, "loss": 0.0045, "step": 16476 }, { "epoch": 2.309320252277505, "grad_norm": 0.2245909720659256, "learning_rate": 7.082037790002391e-05, "loss": 0.0493, "step": 16477 }, { "epoch": 2.3094604064470916, "grad_norm": 2.259777784347534, "learning_rate": 7.080602726620425e-05, "loss": 0.0666, "step": 16478 }, { "epoch": 2.3096005606166785, "grad_norm": 0.4320547580718994, "learning_rate": 7.07916766323846e-05, "loss": 0.0143, "step": 16479 }, { "epoch": 2.309740714786265, "grad_norm": 0.45345693826675415, "learning_rate": 7.077732599856494e-05, "loss": 0.0528, "step": 16480 }, { "epoch": 2.3098808689558514, "grad_norm": 1.4806920289993286, "learning_rate": 7.076297536474527e-05, "loss": 0.0282, "step": 16481 }, { "epoch": 2.310021023125438, "grad_norm": 0.08577558398246765, "learning_rate": 7.074862473092561e-05, "loss": 0.0083, "step": 16482 }, { "epoch": 2.3101611772950243, "grad_norm": 0.10238148272037506, "learning_rate": 7.073427409710595e-05, "loss": 0.0057, "step": 16483 }, { "epoch": 2.3103013314646113, "grad_norm": 0.34073325991630554, "learning_rate": 7.07199234632863e-05, "loss": 0.0291, "step": 16484 }, { "epoch": 2.3104414856341977, "grad_norm": 0.16331274807453156, "learning_rate": 7.070557282946662e-05, "loss": 0.0141, "step": 16485 }, { "epoch": 2.310581639803784, "grad_norm": 0.19547033309936523, "learning_rate": 7.069122219564696e-05, "loss": 0.0253, "step": 16486 }, { "epoch": 2.3107217939733706, "grad_norm": 0.6395230889320374, "learning_rate": 7.06768715618273e-05, "loss": 0.0128, "step": 16487 }, { "epoch": 2.310861948142957, "grad_norm": 0.197127565741539, "learning_rate": 7.066252092800765e-05, "loss": 0.0275, "step": 16488 }, { "epoch": 2.3110021023125435, "grad_norm": 0.33264726400375366, "learning_rate": 7.064817029418799e-05, "loss": 0.0436, "step": 16489 }, { "epoch": 2.3111422564821305, "grad_norm": 0.2518979012966156, "learning_rate": 7.063381966036832e-05, "loss": 0.0292, "step": 16490 }, { "epoch": 2.311282410651717, "grad_norm": 0.41997990012168884, "learning_rate": 7.061946902654867e-05, "loss": 0.0196, "step": 16491 }, { "epoch": 2.3114225648213034, "grad_norm": 0.4283941984176636, "learning_rate": 7.0605118392729e-05, "loss": 0.0316, "step": 16492 }, { "epoch": 2.31156271899089, "grad_norm": 0.19397379457950592, "learning_rate": 7.059076775890935e-05, "loss": 0.0207, "step": 16493 }, { "epoch": 2.3117028731604767, "grad_norm": 0.05297606810927391, "learning_rate": 7.057641712508969e-05, "loss": 0.0036, "step": 16494 }, { "epoch": 2.311843027330063, "grad_norm": 0.45471590757369995, "learning_rate": 7.056206649127003e-05, "loss": 0.0363, "step": 16495 }, { "epoch": 2.3119831814996497, "grad_norm": 0.48851683735847473, "learning_rate": 7.054771585745037e-05, "loss": 0.0268, "step": 16496 }, { "epoch": 2.312123335669236, "grad_norm": 0.15683528780937195, "learning_rate": 7.05333652236307e-05, "loss": 0.0294, "step": 16497 }, { "epoch": 2.3122634898388226, "grad_norm": 0.48048844933509827, "learning_rate": 7.051901458981104e-05, "loss": 0.0434, "step": 16498 }, { "epoch": 2.312403644008409, "grad_norm": 0.44901102781295776, "learning_rate": 7.050466395599138e-05, "loss": 0.0883, "step": 16499 }, { "epoch": 2.312543798177996, "grad_norm": 0.22067351639270782, "learning_rate": 7.049031332217173e-05, "loss": 0.0188, "step": 16500 }, { "epoch": 2.3126839523475824, "grad_norm": 0.2735159695148468, "learning_rate": 7.047596268835206e-05, "loss": 0.0562, "step": 16501 }, { "epoch": 2.312824106517169, "grad_norm": 0.5904778242111206, "learning_rate": 7.04616120545324e-05, "loss": 0.0232, "step": 16502 }, { "epoch": 2.3129642606867553, "grad_norm": 0.3933092951774597, "learning_rate": 7.044726142071274e-05, "loss": 0.043, "step": 16503 }, { "epoch": 2.313104414856342, "grad_norm": 0.12955988943576813, "learning_rate": 7.043291078689308e-05, "loss": 0.0085, "step": 16504 }, { "epoch": 2.3132445690259287, "grad_norm": 0.10079353302717209, "learning_rate": 7.041856015307342e-05, "loss": 0.0137, "step": 16505 }, { "epoch": 2.313384723195515, "grad_norm": 0.20393581688404083, "learning_rate": 7.040420951925375e-05, "loss": 0.0246, "step": 16506 }, { "epoch": 2.3135248773651016, "grad_norm": 0.1674930900335312, "learning_rate": 7.038985888543411e-05, "loss": 0.0541, "step": 16507 }, { "epoch": 2.313665031534688, "grad_norm": 0.34796109795570374, "learning_rate": 7.037550825161444e-05, "loss": 0.0149, "step": 16508 }, { "epoch": 2.3138051857042745, "grad_norm": 0.27727648615837097, "learning_rate": 7.036115761779478e-05, "loss": 0.0274, "step": 16509 }, { "epoch": 2.3139453398738614, "grad_norm": 0.8513749837875366, "learning_rate": 7.034680698397512e-05, "loss": 0.0098, "step": 16510 }, { "epoch": 2.314085494043448, "grad_norm": 0.42540132999420166, "learning_rate": 7.033245635015546e-05, "loss": 0.0627, "step": 16511 }, { "epoch": 2.3142256482130343, "grad_norm": 0.18111921846866608, "learning_rate": 7.03181057163358e-05, "loss": 0.0758, "step": 16512 }, { "epoch": 2.314365802382621, "grad_norm": 0.2865699827671051, "learning_rate": 7.030375508251613e-05, "loss": 0.0328, "step": 16513 }, { "epoch": 2.3145059565522073, "grad_norm": 0.21320120990276337, "learning_rate": 7.028940444869648e-05, "loss": 0.0504, "step": 16514 }, { "epoch": 2.314646110721794, "grad_norm": 1.5606576204299927, "learning_rate": 7.027505381487682e-05, "loss": 0.1099, "step": 16515 }, { "epoch": 2.3147862648913806, "grad_norm": 0.4962499737739563, "learning_rate": 7.026070318105716e-05, "loss": 0.0322, "step": 16516 }, { "epoch": 2.314926419060967, "grad_norm": 0.5625011920928955, "learning_rate": 7.024635254723749e-05, "loss": 0.0445, "step": 16517 }, { "epoch": 2.3150665732305535, "grad_norm": 1.182071566581726, "learning_rate": 7.023200191341784e-05, "loss": 0.1061, "step": 16518 }, { "epoch": 2.31520672740014, "grad_norm": 1.0522364377975464, "learning_rate": 7.021765127959817e-05, "loss": 0.1033, "step": 16519 }, { "epoch": 2.3153468815697265, "grad_norm": 2.5701210498809814, "learning_rate": 7.020330064577851e-05, "loss": 0.2004, "step": 16520 }, { "epoch": 2.3154870357393134, "grad_norm": 0.0727614015340805, "learning_rate": 7.018895001195886e-05, "loss": 0.0098, "step": 16521 }, { "epoch": 2.3156271899089, "grad_norm": 0.03042016364634037, "learning_rate": 7.01745993781392e-05, "loss": 0.0025, "step": 16522 }, { "epoch": 2.3157673440784863, "grad_norm": 0.16228154301643372, "learning_rate": 7.016024874431954e-05, "loss": 0.01, "step": 16523 }, { "epoch": 2.3159074982480727, "grad_norm": 0.16910363733768463, "learning_rate": 7.014589811049987e-05, "loss": 0.0338, "step": 16524 }, { "epoch": 2.3160476524176596, "grad_norm": 0.3106030821800232, "learning_rate": 7.013154747668021e-05, "loss": 0.0345, "step": 16525 }, { "epoch": 2.316187806587246, "grad_norm": 0.3267758786678314, "learning_rate": 7.011719684286055e-05, "loss": 0.0551, "step": 16526 }, { "epoch": 2.3163279607568326, "grad_norm": 0.26353809237480164, "learning_rate": 7.01028462090409e-05, "loss": 0.0176, "step": 16527 }, { "epoch": 2.316468114926419, "grad_norm": 0.26869645714759827, "learning_rate": 7.008849557522124e-05, "loss": 0.0139, "step": 16528 }, { "epoch": 2.3166082690960055, "grad_norm": 0.18929380178451538, "learning_rate": 7.007414494140157e-05, "loss": 0.0237, "step": 16529 }, { "epoch": 2.316748423265592, "grad_norm": 0.19186298549175262, "learning_rate": 7.005979430758191e-05, "loss": 0.0155, "step": 16530 }, { "epoch": 2.316888577435179, "grad_norm": 0.3253980278968811, "learning_rate": 7.004544367376225e-05, "loss": 0.0572, "step": 16531 }, { "epoch": 2.3170287316047653, "grad_norm": 0.05737369880080223, "learning_rate": 7.003109303994259e-05, "loss": 0.0045, "step": 16532 }, { "epoch": 2.3171688857743518, "grad_norm": 0.27716708183288574, "learning_rate": 7.001674240612292e-05, "loss": 0.0634, "step": 16533 }, { "epoch": 2.317309039943938, "grad_norm": 0.07707148045301437, "learning_rate": 7.000239177230328e-05, "loss": 0.0096, "step": 16534 }, { "epoch": 2.317449194113525, "grad_norm": 0.4619060158729553, "learning_rate": 6.99880411384836e-05, "loss": 0.0681, "step": 16535 }, { "epoch": 2.3175893482831116, "grad_norm": 0.12493345886468887, "learning_rate": 6.997369050466395e-05, "loss": 0.0299, "step": 16536 }, { "epoch": 2.317729502452698, "grad_norm": 0.1722351759672165, "learning_rate": 6.995933987084429e-05, "loss": 0.0179, "step": 16537 }, { "epoch": 2.3178696566222845, "grad_norm": 0.061598047614097595, "learning_rate": 6.994498923702463e-05, "loss": 0.0168, "step": 16538 }, { "epoch": 2.318009810791871, "grad_norm": 0.05584261566400528, "learning_rate": 6.993063860320497e-05, "loss": 0.0095, "step": 16539 }, { "epoch": 2.3181499649614574, "grad_norm": 0.2388603240251541, "learning_rate": 6.99162879693853e-05, "loss": 0.0504, "step": 16540 }, { "epoch": 2.3182901191310443, "grad_norm": 0.19127263128757477, "learning_rate": 6.990193733556564e-05, "loss": 0.027, "step": 16541 }, { "epoch": 2.318430273300631, "grad_norm": 0.16364222764968872, "learning_rate": 6.988758670174599e-05, "loss": 0.0404, "step": 16542 }, { "epoch": 2.3185704274702172, "grad_norm": 0.24606789648532867, "learning_rate": 6.987323606792633e-05, "loss": 0.0081, "step": 16543 }, { "epoch": 2.3187105816398037, "grad_norm": 0.1390107274055481, "learning_rate": 6.985888543410667e-05, "loss": 0.0408, "step": 16544 }, { "epoch": 2.31885073580939, "grad_norm": 0.08847220987081528, "learning_rate": 6.9844534800287e-05, "loss": 0.009, "step": 16545 }, { "epoch": 2.318990889978977, "grad_norm": 0.1411711573600769, "learning_rate": 6.983018416646734e-05, "loss": 0.014, "step": 16546 }, { "epoch": 2.3191310441485635, "grad_norm": 0.2588651180267334, "learning_rate": 6.981583353264768e-05, "loss": 0.0444, "step": 16547 }, { "epoch": 2.31927119831815, "grad_norm": 0.25281545519828796, "learning_rate": 6.980148289882803e-05, "loss": 0.0132, "step": 16548 }, { "epoch": 2.3194113524877364, "grad_norm": 0.2586451768875122, "learning_rate": 6.978713226500837e-05, "loss": 0.0109, "step": 16549 }, { "epoch": 2.319551506657323, "grad_norm": 0.25907331705093384, "learning_rate": 6.977278163118871e-05, "loss": 0.047, "step": 16550 }, { "epoch": 2.3196916608269094, "grad_norm": 0.2584594488143921, "learning_rate": 6.975843099736904e-05, "loss": 0.0527, "step": 16551 }, { "epoch": 2.3198318149964963, "grad_norm": 0.24839650094509125, "learning_rate": 6.974408036354938e-05, "loss": 0.0759, "step": 16552 }, { "epoch": 2.3199719691660827, "grad_norm": 0.10946990549564362, "learning_rate": 6.972972972972972e-05, "loss": 0.012, "step": 16553 }, { "epoch": 2.320112123335669, "grad_norm": 0.18611249327659607, "learning_rate": 6.971537909591007e-05, "loss": 0.0539, "step": 16554 }, { "epoch": 2.3202522775052556, "grad_norm": 0.11179360002279282, "learning_rate": 6.970102846209041e-05, "loss": 0.0106, "step": 16555 }, { "epoch": 2.3203924316748425, "grad_norm": 0.1899394989013672, "learning_rate": 6.968667782827074e-05, "loss": 0.0348, "step": 16556 }, { "epoch": 2.320532585844429, "grad_norm": 0.11880426853895187, "learning_rate": 6.967232719445109e-05, "loss": 0.0075, "step": 16557 }, { "epoch": 2.3206727400140155, "grad_norm": 0.2622591555118561, "learning_rate": 6.965797656063142e-05, "loss": 0.0092, "step": 16558 }, { "epoch": 2.320812894183602, "grad_norm": 0.2903350591659546, "learning_rate": 6.964362592681176e-05, "loss": 0.0953, "step": 16559 }, { "epoch": 2.3209530483531884, "grad_norm": 0.3928678631782532, "learning_rate": 6.96292752929921e-05, "loss": 0.036, "step": 16560 }, { "epoch": 2.321093202522775, "grad_norm": 0.08735954016447067, "learning_rate": 6.961492465917245e-05, "loss": 0.009, "step": 16561 }, { "epoch": 2.3212333566923617, "grad_norm": 0.163041889667511, "learning_rate": 6.960057402535279e-05, "loss": 0.0136, "step": 16562 }, { "epoch": 2.321373510861948, "grad_norm": 0.37632039189338684, "learning_rate": 6.958622339153312e-05, "loss": 0.0776, "step": 16563 }, { "epoch": 2.3215136650315347, "grad_norm": 0.13588887453079224, "learning_rate": 6.957187275771346e-05, "loss": 0.0127, "step": 16564 }, { "epoch": 2.321653819201121, "grad_norm": 0.24518993496894836, "learning_rate": 6.95575221238938e-05, "loss": 0.0178, "step": 16565 }, { "epoch": 2.3217939733707076, "grad_norm": 0.15324679017066956, "learning_rate": 6.954317149007414e-05, "loss": 0.0157, "step": 16566 }, { "epoch": 2.3219341275402945, "grad_norm": 1.1067732572555542, "learning_rate": 6.952882085625447e-05, "loss": 0.0873, "step": 16567 }, { "epoch": 2.322074281709881, "grad_norm": 1.524893045425415, "learning_rate": 6.951447022243481e-05, "loss": 0.2479, "step": 16568 }, { "epoch": 2.3222144358794674, "grad_norm": 0.33871299028396606, "learning_rate": 6.950011958861516e-05, "loss": 0.0112, "step": 16569 }, { "epoch": 2.322354590049054, "grad_norm": 0.41682717204093933, "learning_rate": 6.94857689547955e-05, "loss": 0.011, "step": 16570 }, { "epoch": 2.3224947442186403, "grad_norm": 0.1875259429216385, "learning_rate": 6.947141832097584e-05, "loss": 0.023, "step": 16571 }, { "epoch": 2.3226348983882272, "grad_norm": 0.153966024518013, "learning_rate": 6.945706768715617e-05, "loss": 0.0126, "step": 16572 }, { "epoch": 2.3227750525578137, "grad_norm": 0.37229520082473755, "learning_rate": 6.944271705333652e-05, "loss": 0.0285, "step": 16573 }, { "epoch": 2.3229152067274, "grad_norm": 0.264862984418869, "learning_rate": 6.942836641951685e-05, "loss": 0.0537, "step": 16574 }, { "epoch": 2.3230553608969866, "grad_norm": 0.10851848870515823, "learning_rate": 6.94140157856972e-05, "loss": 0.0107, "step": 16575 }, { "epoch": 2.323195515066573, "grad_norm": 0.44682520627975464, "learning_rate": 6.939966515187754e-05, "loss": 0.052, "step": 16576 }, { "epoch": 2.32333566923616, "grad_norm": 0.08117534965276718, "learning_rate": 6.938531451805788e-05, "loss": 0.0085, "step": 16577 }, { "epoch": 2.3234758234057464, "grad_norm": 0.5897807478904724, "learning_rate": 6.937096388423822e-05, "loss": 0.0725, "step": 16578 }, { "epoch": 2.323615977575333, "grad_norm": 0.36941468715667725, "learning_rate": 6.935661325041855e-05, "loss": 0.0315, "step": 16579 }, { "epoch": 2.3237561317449194, "grad_norm": 0.22469641268253326, "learning_rate": 6.934226261659889e-05, "loss": 0.0203, "step": 16580 }, { "epoch": 2.323896285914506, "grad_norm": 0.16602832078933716, "learning_rate": 6.932791198277923e-05, "loss": 0.0251, "step": 16581 }, { "epoch": 2.3240364400840923, "grad_norm": 0.17652295529842377, "learning_rate": 6.931356134895958e-05, "loss": 0.015, "step": 16582 }, { "epoch": 2.324176594253679, "grad_norm": 0.08235567063093185, "learning_rate": 6.92992107151399e-05, "loss": 0.0184, "step": 16583 }, { "epoch": 2.3243167484232656, "grad_norm": 0.13511085510253906, "learning_rate": 6.928486008132025e-05, "loss": 0.0075, "step": 16584 }, { "epoch": 2.324456902592852, "grad_norm": 0.3036658465862274, "learning_rate": 6.927050944750059e-05, "loss": 0.0202, "step": 16585 }, { "epoch": 2.3245970567624386, "grad_norm": 0.25037023425102234, "learning_rate": 6.925615881368093e-05, "loss": 0.0203, "step": 16586 }, { "epoch": 2.3247372109320255, "grad_norm": 0.24066416919231415, "learning_rate": 6.924180817986127e-05, "loss": 0.0532, "step": 16587 }, { "epoch": 2.324877365101612, "grad_norm": 0.22306126356124878, "learning_rate": 6.922745754604162e-05, "loss": 0.0322, "step": 16588 }, { "epoch": 2.3250175192711984, "grad_norm": 0.10941849648952484, "learning_rate": 6.921310691222196e-05, "loss": 0.0044, "step": 16589 }, { "epoch": 2.325157673440785, "grad_norm": 0.04568088799715042, "learning_rate": 6.919875627840229e-05, "loss": 0.0037, "step": 16590 }, { "epoch": 2.3252978276103713, "grad_norm": 0.08783835917711258, "learning_rate": 6.918440564458263e-05, "loss": 0.0028, "step": 16591 }, { "epoch": 2.3254379817799578, "grad_norm": 0.16091756522655487, "learning_rate": 6.917005501076297e-05, "loss": 0.0065, "step": 16592 }, { "epoch": 2.3255781359495447, "grad_norm": 0.07261763513088226, "learning_rate": 6.915570437694331e-05, "loss": 0.0055, "step": 16593 }, { "epoch": 2.325718290119131, "grad_norm": 0.2247370183467865, "learning_rate": 6.914135374312365e-05, "loss": 0.0258, "step": 16594 }, { "epoch": 2.3258584442887176, "grad_norm": 0.28299105167388916, "learning_rate": 6.912700310930398e-05, "loss": 0.0244, "step": 16595 }, { "epoch": 2.325998598458304, "grad_norm": 0.21893605589866638, "learning_rate": 6.911265247548433e-05, "loss": 0.0374, "step": 16596 }, { "epoch": 2.3261387526278905, "grad_norm": 0.22427575290203094, "learning_rate": 6.909830184166467e-05, "loss": 0.0141, "step": 16597 }, { "epoch": 2.3262789067974774, "grad_norm": 0.2727227210998535, "learning_rate": 6.908395120784501e-05, "loss": 0.0345, "step": 16598 }, { "epoch": 2.326419060967064, "grad_norm": 0.2515237331390381, "learning_rate": 6.906960057402534e-05, "loss": 0.0283, "step": 16599 }, { "epoch": 2.3265592151366503, "grad_norm": 0.25063416361808777, "learning_rate": 6.90552499402057e-05, "loss": 0.0216, "step": 16600 }, { "epoch": 2.3266993693062368, "grad_norm": 0.5554426312446594, "learning_rate": 6.904089930638602e-05, "loss": 0.0457, "step": 16601 }, { "epoch": 2.3268395234758232, "grad_norm": 0.1692066490650177, "learning_rate": 6.902654867256636e-05, "loss": 0.0195, "step": 16602 }, { "epoch": 2.32697967764541, "grad_norm": 0.251560240983963, "learning_rate": 6.90121980387467e-05, "loss": 0.0163, "step": 16603 }, { "epoch": 2.3271198318149966, "grad_norm": 0.14772725105285645, "learning_rate": 6.899784740492705e-05, "loss": 0.0194, "step": 16604 }, { "epoch": 2.327259985984583, "grad_norm": 0.22039751708507538, "learning_rate": 6.898349677110739e-05, "loss": 0.0158, "step": 16605 }, { "epoch": 2.3274001401541695, "grad_norm": 0.2137940227985382, "learning_rate": 6.896914613728772e-05, "loss": 0.0368, "step": 16606 }, { "epoch": 2.327540294323756, "grad_norm": 0.11543194204568863, "learning_rate": 6.895479550346806e-05, "loss": 0.0105, "step": 16607 }, { "epoch": 2.327680448493343, "grad_norm": 0.48380768299102783, "learning_rate": 6.89404448696484e-05, "loss": 0.042, "step": 16608 }, { "epoch": 2.3278206026629293, "grad_norm": 0.3692674934864044, "learning_rate": 6.892609423582875e-05, "loss": 0.0426, "step": 16609 }, { "epoch": 2.327960756832516, "grad_norm": 0.16432121396064758, "learning_rate": 6.891174360200909e-05, "loss": 0.0759, "step": 16610 }, { "epoch": 2.3281009110021023, "grad_norm": 0.05551853030920029, "learning_rate": 6.889739296818942e-05, "loss": 0.007, "step": 16611 }, { "epoch": 2.3282410651716887, "grad_norm": 0.15788644552230835, "learning_rate": 6.888304233436976e-05, "loss": 0.0082, "step": 16612 }, { "epoch": 2.328381219341275, "grad_norm": 0.351199209690094, "learning_rate": 6.88686917005501e-05, "loss": 0.0183, "step": 16613 }, { "epoch": 2.328521373510862, "grad_norm": 0.5869712829589844, "learning_rate": 6.885434106673044e-05, "loss": 0.0302, "step": 16614 }, { "epoch": 2.3286615276804485, "grad_norm": 1.0614866018295288, "learning_rate": 6.883999043291077e-05, "loss": 0.0993, "step": 16615 }, { "epoch": 2.328801681850035, "grad_norm": 0.5097002387046814, "learning_rate": 6.882563979909113e-05, "loss": 0.0321, "step": 16616 }, { "epoch": 2.3289418360196215, "grad_norm": 1.2023595571517944, "learning_rate": 6.881128916527146e-05, "loss": 0.0556, "step": 16617 }, { "epoch": 2.3290819901892084, "grad_norm": 1.4112474918365479, "learning_rate": 6.87969385314518e-05, "loss": 0.1111, "step": 16618 }, { "epoch": 2.329222144358795, "grad_norm": 0.38839244842529297, "learning_rate": 6.878258789763214e-05, "loss": 0.0271, "step": 16619 }, { "epoch": 2.3293622985283813, "grad_norm": 2.6200151443481445, "learning_rate": 6.876823726381248e-05, "loss": 0.0648, "step": 16620 }, { "epoch": 2.3295024526979677, "grad_norm": 0.07727036625146866, "learning_rate": 6.875388662999282e-05, "loss": 0.0226, "step": 16621 }, { "epoch": 2.329642606867554, "grad_norm": 0.5386391878128052, "learning_rate": 6.873953599617315e-05, "loss": 0.0478, "step": 16622 }, { "epoch": 2.3297827610371407, "grad_norm": 0.23182600736618042, "learning_rate": 6.872518536235351e-05, "loss": 0.0566, "step": 16623 }, { "epoch": 2.3299229152067276, "grad_norm": 0.31552937626838684, "learning_rate": 6.871083472853384e-05, "loss": 0.0298, "step": 16624 }, { "epoch": 2.330063069376314, "grad_norm": 0.32090187072753906, "learning_rate": 6.869648409471418e-05, "loss": 0.0298, "step": 16625 }, { "epoch": 2.3302032235459005, "grad_norm": 0.07858520746231079, "learning_rate": 6.868213346089452e-05, "loss": 0.0051, "step": 16626 }, { "epoch": 2.330343377715487, "grad_norm": 0.20682689547538757, "learning_rate": 6.866778282707486e-05, "loss": 0.0102, "step": 16627 }, { "epoch": 2.3304835318850734, "grad_norm": 0.23187360167503357, "learning_rate": 6.865343219325519e-05, "loss": 0.0336, "step": 16628 }, { "epoch": 2.3306236860546603, "grad_norm": 0.10584904253482819, "learning_rate": 6.863908155943553e-05, "loss": 0.0204, "step": 16629 }, { "epoch": 2.3307638402242468, "grad_norm": 0.225706547498703, "learning_rate": 6.862473092561588e-05, "loss": 0.0156, "step": 16630 }, { "epoch": 2.3309039943938332, "grad_norm": 0.4158373475074768, "learning_rate": 6.861038029179622e-05, "loss": 0.0235, "step": 16631 }, { "epoch": 2.3310441485634197, "grad_norm": 0.1999945193529129, "learning_rate": 6.859602965797656e-05, "loss": 0.0293, "step": 16632 }, { "epoch": 2.331184302733006, "grad_norm": 0.2068648487329483, "learning_rate": 6.858167902415689e-05, "loss": 0.0451, "step": 16633 }, { "epoch": 2.331324456902593, "grad_norm": 0.5172854661941528, "learning_rate": 6.856732839033723e-05, "loss": 0.0407, "step": 16634 }, { "epoch": 2.3314646110721795, "grad_norm": 0.08726966381072998, "learning_rate": 6.855297775651757e-05, "loss": 0.0148, "step": 16635 }, { "epoch": 2.331604765241766, "grad_norm": 0.06812809407711029, "learning_rate": 6.853862712269791e-05, "loss": 0.0053, "step": 16636 }, { "epoch": 2.3317449194113524, "grad_norm": 0.0888265073299408, "learning_rate": 6.852427648887826e-05, "loss": 0.0069, "step": 16637 }, { "epoch": 2.331885073580939, "grad_norm": 0.27313244342803955, "learning_rate": 6.850992585505859e-05, "loss": 0.016, "step": 16638 }, { "epoch": 2.332025227750526, "grad_norm": 0.28939148783683777, "learning_rate": 6.849557522123894e-05, "loss": 0.0227, "step": 16639 }, { "epoch": 2.3321653819201122, "grad_norm": 0.12822197377681732, "learning_rate": 6.848122458741927e-05, "loss": 0.013, "step": 16640 }, { "epoch": 2.3323055360896987, "grad_norm": 0.08915102481842041, "learning_rate": 6.846687395359961e-05, "loss": 0.0059, "step": 16641 }, { "epoch": 2.332445690259285, "grad_norm": 0.1277698278427124, "learning_rate": 6.845252331977995e-05, "loss": 0.0072, "step": 16642 }, { "epoch": 2.3325858444288716, "grad_norm": 0.4313455820083618, "learning_rate": 6.84381726859603e-05, "loss": 0.0311, "step": 16643 }, { "epoch": 2.332725998598458, "grad_norm": 0.332746297121048, "learning_rate": 6.842382205214062e-05, "loss": 0.0338, "step": 16644 }, { "epoch": 2.332866152768045, "grad_norm": 0.29175901412963867, "learning_rate": 6.840947141832097e-05, "loss": 0.0207, "step": 16645 }, { "epoch": 2.3330063069376314, "grad_norm": 0.17617519199848175, "learning_rate": 6.839512078450131e-05, "loss": 0.0188, "step": 16646 }, { "epoch": 2.333146461107218, "grad_norm": 0.321696400642395, "learning_rate": 6.838077015068165e-05, "loss": 0.0365, "step": 16647 }, { "epoch": 2.3332866152768044, "grad_norm": 0.1628393679857254, "learning_rate": 6.836641951686199e-05, "loss": 0.0272, "step": 16648 }, { "epoch": 2.3334267694463913, "grad_norm": 0.10594715923070908, "learning_rate": 6.835206888304232e-05, "loss": 0.0066, "step": 16649 }, { "epoch": 2.3335669236159777, "grad_norm": 0.7272409796714783, "learning_rate": 6.833771824922266e-05, "loss": 0.0346, "step": 16650 }, { "epoch": 2.333707077785564, "grad_norm": 0.6533247232437134, "learning_rate": 6.8323367615403e-05, "loss": 0.042, "step": 16651 }, { "epoch": 2.3338472319551506, "grad_norm": 0.10759581625461578, "learning_rate": 6.830901698158335e-05, "loss": 0.0171, "step": 16652 }, { "epoch": 2.333987386124737, "grad_norm": 0.5260326862335205, "learning_rate": 6.829466634776369e-05, "loss": 0.0673, "step": 16653 }, { "epoch": 2.3341275402943236, "grad_norm": 0.2641003429889679, "learning_rate": 6.828031571394402e-05, "loss": 0.0217, "step": 16654 }, { "epoch": 2.3342676944639105, "grad_norm": 0.18134334683418274, "learning_rate": 6.826596508012437e-05, "loss": 0.0119, "step": 16655 }, { "epoch": 2.334407848633497, "grad_norm": 0.748595654964447, "learning_rate": 6.82516144463047e-05, "loss": 0.0551, "step": 16656 }, { "epoch": 2.3345480028030834, "grad_norm": 0.5826172828674316, "learning_rate": 6.823726381248505e-05, "loss": 0.0509, "step": 16657 }, { "epoch": 2.33468815697267, "grad_norm": 0.22067295014858246, "learning_rate": 6.822291317866539e-05, "loss": 0.028, "step": 16658 }, { "epoch": 2.3348283111422563, "grad_norm": 0.44468802213668823, "learning_rate": 6.820856254484573e-05, "loss": 0.0376, "step": 16659 }, { "epoch": 2.334968465311843, "grad_norm": 0.5872768759727478, "learning_rate": 6.819421191102606e-05, "loss": 0.0672, "step": 16660 }, { "epoch": 2.3351086194814297, "grad_norm": 0.05444268882274628, "learning_rate": 6.81798612772064e-05, "loss": 0.0051, "step": 16661 }, { "epoch": 2.335248773651016, "grad_norm": 0.23349133133888245, "learning_rate": 6.816551064338674e-05, "loss": 0.0094, "step": 16662 }, { "epoch": 2.3353889278206026, "grad_norm": 0.21105697751045227, "learning_rate": 6.815116000956708e-05, "loss": 0.003, "step": 16663 }, { "epoch": 2.335529081990189, "grad_norm": 0.48818713426589966, "learning_rate": 6.813680937574743e-05, "loss": 0.0213, "step": 16664 }, { "epoch": 2.3356692361597755, "grad_norm": 0.726647138595581, "learning_rate": 6.812245874192775e-05, "loss": 0.0158, "step": 16665 }, { "epoch": 2.3358093903293624, "grad_norm": 0.3100503981113434, "learning_rate": 6.810810810810811e-05, "loss": 0.0227, "step": 16666 }, { "epoch": 2.335949544498949, "grad_norm": 1.2361353635787964, "learning_rate": 6.809375747428844e-05, "loss": 0.0699, "step": 16667 }, { "epoch": 2.3360896986685353, "grad_norm": 2.3035330772399902, "learning_rate": 6.807940684046878e-05, "loss": 0.17, "step": 16668 }, { "epoch": 2.336229852838122, "grad_norm": 1.7631738185882568, "learning_rate": 6.806505620664912e-05, "loss": 0.0744, "step": 16669 }, { "epoch": 2.3363700070077087, "grad_norm": 2.252312421798706, "learning_rate": 6.805070557282947e-05, "loss": 0.4268, "step": 16670 }, { "epoch": 2.336510161177295, "grad_norm": 0.48670411109924316, "learning_rate": 6.803635493900981e-05, "loss": 0.0744, "step": 16671 }, { "epoch": 2.3366503153468816, "grad_norm": 0.2907535135746002, "learning_rate": 6.802200430519014e-05, "loss": 0.0518, "step": 16672 }, { "epoch": 2.336790469516468, "grad_norm": 0.04928178712725639, "learning_rate": 6.800765367137048e-05, "loss": 0.0074, "step": 16673 }, { "epoch": 2.3369306236860545, "grad_norm": 0.09691784530878067, "learning_rate": 6.799330303755082e-05, "loss": 0.0054, "step": 16674 }, { "epoch": 2.337070777855641, "grad_norm": 0.08753497153520584, "learning_rate": 6.797895240373116e-05, "loss": 0.0209, "step": 16675 }, { "epoch": 2.337210932025228, "grad_norm": 0.23312413692474365, "learning_rate": 6.796460176991149e-05, "loss": 0.0241, "step": 16676 }, { "epoch": 2.3373510861948144, "grad_norm": 0.7392073273658752, "learning_rate": 6.795025113609183e-05, "loss": 0.0297, "step": 16677 }, { "epoch": 2.337491240364401, "grad_norm": 0.46310803294181824, "learning_rate": 6.793590050227218e-05, "loss": 0.0458, "step": 16678 }, { "epoch": 2.3376313945339873, "grad_norm": 0.08045361191034317, "learning_rate": 6.792154986845252e-05, "loss": 0.011, "step": 16679 }, { "epoch": 2.337771548703574, "grad_norm": 0.34028133749961853, "learning_rate": 6.790719923463286e-05, "loss": 0.0525, "step": 16680 }, { "epoch": 2.3379117028731606, "grad_norm": 0.08520636707544327, "learning_rate": 6.789284860081319e-05, "loss": 0.0301, "step": 16681 }, { "epoch": 2.338051857042747, "grad_norm": 0.811071515083313, "learning_rate": 6.787849796699354e-05, "loss": 0.0564, "step": 16682 }, { "epoch": 2.3381920112123336, "grad_norm": 0.18612436950206757, "learning_rate": 6.786414733317387e-05, "loss": 0.016, "step": 16683 }, { "epoch": 2.33833216538192, "grad_norm": 0.12141390889883041, "learning_rate": 6.784979669935421e-05, "loss": 0.0138, "step": 16684 }, { "epoch": 2.3384723195515065, "grad_norm": 0.5466933250427246, "learning_rate": 6.783544606553456e-05, "loss": 0.05, "step": 16685 }, { "epoch": 2.3386124737210934, "grad_norm": 0.4337654709815979, "learning_rate": 6.78210954317149e-05, "loss": 0.0381, "step": 16686 }, { "epoch": 2.33875262789068, "grad_norm": 0.5993040204048157, "learning_rate": 6.780674479789524e-05, "loss": 0.105, "step": 16687 }, { "epoch": 2.3388927820602663, "grad_norm": 0.10743822902441025, "learning_rate": 6.779239416407557e-05, "loss": 0.0171, "step": 16688 }, { "epoch": 2.3390329362298528, "grad_norm": 0.3625826835632324, "learning_rate": 6.777804353025591e-05, "loss": 0.075, "step": 16689 }, { "epoch": 2.339173090399439, "grad_norm": 0.14484982192516327, "learning_rate": 6.776369289643625e-05, "loss": 0.013, "step": 16690 }, { "epoch": 2.339313244569026, "grad_norm": 0.14316824078559875, "learning_rate": 6.77493422626166e-05, "loss": 0.011, "step": 16691 }, { "epoch": 2.3394533987386126, "grad_norm": 0.30650871992111206, "learning_rate": 6.773499162879692e-05, "loss": 0.0766, "step": 16692 }, { "epoch": 2.339593552908199, "grad_norm": 0.4431057870388031, "learning_rate": 6.772064099497727e-05, "loss": 0.0579, "step": 16693 }, { "epoch": 2.3397337070777855, "grad_norm": 0.1763039082288742, "learning_rate": 6.770629036115761e-05, "loss": 0.0178, "step": 16694 }, { "epoch": 2.339873861247372, "grad_norm": 0.08771751075983047, "learning_rate": 6.769193972733795e-05, "loss": 0.0097, "step": 16695 }, { "epoch": 2.3400140154169584, "grad_norm": 0.2820626497268677, "learning_rate": 6.767758909351829e-05, "loss": 0.029, "step": 16696 }, { "epoch": 2.3401541695865453, "grad_norm": 0.1556430459022522, "learning_rate": 6.766323845969863e-05, "loss": 0.0152, "step": 16697 }, { "epoch": 2.340294323756132, "grad_norm": 0.20324398577213287, "learning_rate": 6.764888782587898e-05, "loss": 0.0319, "step": 16698 }, { "epoch": 2.3404344779257182, "grad_norm": 0.1523885428905487, "learning_rate": 6.76345371920593e-05, "loss": 0.0238, "step": 16699 }, { "epoch": 2.3405746320953047, "grad_norm": 0.1927792876958847, "learning_rate": 6.762018655823965e-05, "loss": 0.0238, "step": 16700 }, { "epoch": 2.3407147862648916, "grad_norm": 1.7138257026672363, "learning_rate": 6.760583592441999e-05, "loss": 0.0583, "step": 16701 }, { "epoch": 2.340854940434478, "grad_norm": 0.130364328622818, "learning_rate": 6.759148529060033e-05, "loss": 0.0125, "step": 16702 }, { "epoch": 2.3409950946040645, "grad_norm": 0.31931135058403015, "learning_rate": 6.757713465678067e-05, "loss": 0.0141, "step": 16703 }, { "epoch": 2.341135248773651, "grad_norm": 0.1332167088985443, "learning_rate": 6.7562784022961e-05, "loss": 0.0141, "step": 16704 }, { "epoch": 2.3412754029432374, "grad_norm": 0.16314949095249176, "learning_rate": 6.754843338914136e-05, "loss": 0.0557, "step": 16705 }, { "epoch": 2.341415557112824, "grad_norm": 0.2002786546945572, "learning_rate": 6.753408275532169e-05, "loss": 0.0344, "step": 16706 }, { "epoch": 2.341555711282411, "grad_norm": 0.42262527346611023, "learning_rate": 6.751973212150203e-05, "loss": 0.009, "step": 16707 }, { "epoch": 2.3416958654519973, "grad_norm": 0.6499032378196716, "learning_rate": 6.750538148768236e-05, "loss": 0.0362, "step": 16708 }, { "epoch": 2.3418360196215837, "grad_norm": 0.23660887777805328, "learning_rate": 6.749103085386271e-05, "loss": 0.0119, "step": 16709 }, { "epoch": 2.34197617379117, "grad_norm": 0.5324763059616089, "learning_rate": 6.747668022004304e-05, "loss": 0.0345, "step": 16710 }, { "epoch": 2.3421163279607566, "grad_norm": 1.1433472633361816, "learning_rate": 6.746232958622338e-05, "loss": 0.1076, "step": 16711 }, { "epoch": 2.3422564821303435, "grad_norm": 0.4222467541694641, "learning_rate": 6.744797895240373e-05, "loss": 0.0739, "step": 16712 }, { "epoch": 2.34239663629993, "grad_norm": 0.2540854811668396, "learning_rate": 6.743362831858407e-05, "loss": 0.0084, "step": 16713 }, { "epoch": 2.3425367904695165, "grad_norm": 0.015154390595853329, "learning_rate": 6.741927768476441e-05, "loss": 0.0014, "step": 16714 }, { "epoch": 2.342676944639103, "grad_norm": 0.059180036187171936, "learning_rate": 6.740492705094474e-05, "loss": 0.0081, "step": 16715 }, { "epoch": 2.3428170988086894, "grad_norm": 0.4236818253993988, "learning_rate": 6.739057641712508e-05, "loss": 0.0134, "step": 16716 }, { "epoch": 2.3429572529782763, "grad_norm": 0.03651474788784981, "learning_rate": 6.737622578330542e-05, "loss": 0.0029, "step": 16717 }, { "epoch": 2.3430974071478627, "grad_norm": 0.2646081745624542, "learning_rate": 6.736187514948576e-05, "loss": 0.0084, "step": 16718 }, { "epoch": 2.343237561317449, "grad_norm": 0.1457076519727707, "learning_rate": 6.734752451566611e-05, "loss": 0.0208, "step": 16719 }, { "epoch": 2.3433777154870357, "grad_norm": 3.895206928253174, "learning_rate": 6.733317388184644e-05, "loss": 0.3217, "step": 16720 }, { "epoch": 2.343517869656622, "grad_norm": 0.08148682862520218, "learning_rate": 6.731882324802679e-05, "loss": 0.009, "step": 16721 }, { "epoch": 2.343658023826209, "grad_norm": 0.38824546337127686, "learning_rate": 6.730447261420712e-05, "loss": 0.0375, "step": 16722 }, { "epoch": 2.3437981779957955, "grad_norm": 0.48277169466018677, "learning_rate": 6.729012198038746e-05, "loss": 0.0223, "step": 16723 }, { "epoch": 2.343938332165382, "grad_norm": 0.07657844573259354, "learning_rate": 6.727577134656779e-05, "loss": 0.0026, "step": 16724 }, { "epoch": 2.3440784863349684, "grad_norm": 0.08366616815328598, "learning_rate": 6.726142071274815e-05, "loss": 0.0049, "step": 16725 }, { "epoch": 2.344218640504555, "grad_norm": 0.1319780796766281, "learning_rate": 6.724707007892847e-05, "loss": 0.0258, "step": 16726 }, { "epoch": 2.3443587946741413, "grad_norm": 0.03102542832493782, "learning_rate": 6.723271944510882e-05, "loss": 0.0076, "step": 16727 }, { "epoch": 2.3444989488437282, "grad_norm": 0.32032671570777893, "learning_rate": 6.721836881128916e-05, "loss": 0.0235, "step": 16728 }, { "epoch": 2.3446391030133147, "grad_norm": 0.23940148949623108, "learning_rate": 6.72040181774695e-05, "loss": 0.0126, "step": 16729 }, { "epoch": 2.344779257182901, "grad_norm": 0.26261255145072937, "learning_rate": 6.718966754364984e-05, "loss": 0.034, "step": 16730 }, { "epoch": 2.3449194113524876, "grad_norm": 0.20490874350070953, "learning_rate": 6.717531690983017e-05, "loss": 0.0265, "step": 16731 }, { "epoch": 2.3450595655220745, "grad_norm": 0.20463722944259644, "learning_rate": 6.716096627601051e-05, "loss": 0.0186, "step": 16732 }, { "epoch": 2.345199719691661, "grad_norm": 0.15001113712787628, "learning_rate": 6.714661564219086e-05, "loss": 0.0223, "step": 16733 }, { "epoch": 2.3453398738612474, "grad_norm": 0.056073326617479324, "learning_rate": 6.71322650083712e-05, "loss": 0.0043, "step": 16734 }, { "epoch": 2.345480028030834, "grad_norm": 0.18617713451385498, "learning_rate": 6.711791437455154e-05, "loss": 0.026, "step": 16735 }, { "epoch": 2.3456201822004203, "grad_norm": 0.18807999789714813, "learning_rate": 6.710356374073188e-05, "loss": 0.0369, "step": 16736 }, { "epoch": 2.345760336370007, "grad_norm": 0.16750648617744446, "learning_rate": 6.708921310691222e-05, "loss": 0.0091, "step": 16737 }, { "epoch": 2.3459004905395937, "grad_norm": 0.3186100423336029, "learning_rate": 6.707486247309255e-05, "loss": 0.0231, "step": 16738 }, { "epoch": 2.34604064470918, "grad_norm": 0.3226334750652313, "learning_rate": 6.70605118392729e-05, "loss": 0.0141, "step": 16739 }, { "epoch": 2.3461807988787666, "grad_norm": 0.27671194076538086, "learning_rate": 6.704616120545324e-05, "loss": 0.0178, "step": 16740 }, { "epoch": 2.346320953048353, "grad_norm": 0.35524260997772217, "learning_rate": 6.703181057163358e-05, "loss": 0.0237, "step": 16741 }, { "epoch": 2.3464611072179395, "grad_norm": 0.4764234721660614, "learning_rate": 6.701745993781391e-05, "loss": 0.0558, "step": 16742 }, { "epoch": 2.3466012613875264, "grad_norm": 0.16321344673633575, "learning_rate": 6.700310930399425e-05, "loss": 0.0106, "step": 16743 }, { "epoch": 2.346741415557113, "grad_norm": 0.4195289611816406, "learning_rate": 6.698875867017459e-05, "loss": 0.0147, "step": 16744 }, { "epoch": 2.3468815697266994, "grad_norm": 0.0997488722205162, "learning_rate": 6.697440803635493e-05, "loss": 0.011, "step": 16745 }, { "epoch": 2.347021723896286, "grad_norm": 0.530870258808136, "learning_rate": 6.696005740253528e-05, "loss": 0.0268, "step": 16746 }, { "epoch": 2.3471618780658723, "grad_norm": 0.08970702439546585, "learning_rate": 6.69457067687156e-05, "loss": 0.0058, "step": 16747 }, { "epoch": 2.347302032235459, "grad_norm": 0.16014617681503296, "learning_rate": 6.693135613489596e-05, "loss": 0.0359, "step": 16748 }, { "epoch": 2.3474421864050456, "grad_norm": 0.21170884370803833, "learning_rate": 6.691700550107629e-05, "loss": 0.0098, "step": 16749 }, { "epoch": 2.347582340574632, "grad_norm": 0.20161627233028412, "learning_rate": 6.690265486725663e-05, "loss": 0.0165, "step": 16750 }, { "epoch": 2.3477224947442186, "grad_norm": 0.08059614896774292, "learning_rate": 6.688830423343697e-05, "loss": 0.0395, "step": 16751 }, { "epoch": 2.347862648913805, "grad_norm": 0.3088594377040863, "learning_rate": 6.687395359961732e-05, "loss": 0.0165, "step": 16752 }, { "epoch": 2.348002803083392, "grad_norm": 0.1841048002243042, "learning_rate": 6.685960296579766e-05, "loss": 0.0107, "step": 16753 }, { "epoch": 2.3481429572529784, "grad_norm": 0.7148263454437256, "learning_rate": 6.684525233197799e-05, "loss": 0.0472, "step": 16754 }, { "epoch": 2.348283111422565, "grad_norm": 0.0530632808804512, "learning_rate": 6.683090169815833e-05, "loss": 0.0036, "step": 16755 }, { "epoch": 2.3484232655921513, "grad_norm": 0.16300493478775024, "learning_rate": 6.681655106433867e-05, "loss": 0.0114, "step": 16756 }, { "epoch": 2.3485634197617378, "grad_norm": 0.0721244141459465, "learning_rate": 6.680220043051901e-05, "loss": 0.0035, "step": 16757 }, { "epoch": 2.3487035739313242, "grad_norm": 0.13145971298217773, "learning_rate": 6.678784979669934e-05, "loss": 0.0508, "step": 16758 }, { "epoch": 2.348843728100911, "grad_norm": 0.07286734879016876, "learning_rate": 6.677349916287968e-05, "loss": 0.0031, "step": 16759 }, { "epoch": 2.3489838822704976, "grad_norm": 0.17474566400051117, "learning_rate": 6.675914852906003e-05, "loss": 0.0191, "step": 16760 }, { "epoch": 2.349124036440084, "grad_norm": 0.2069219946861267, "learning_rate": 6.674479789524037e-05, "loss": 0.0066, "step": 16761 }, { "epoch": 2.3492641906096705, "grad_norm": 0.22322790324687958, "learning_rate": 6.673044726142071e-05, "loss": 0.0192, "step": 16762 }, { "epoch": 2.3494043447792574, "grad_norm": 0.15062567591667175, "learning_rate": 6.671609662760104e-05, "loss": 0.0241, "step": 16763 }, { "epoch": 2.349544498948844, "grad_norm": 0.467182993888855, "learning_rate": 6.67017459937814e-05, "loss": 0.0133, "step": 16764 }, { "epoch": 2.3496846531184303, "grad_norm": 0.08561839163303375, "learning_rate": 6.668739535996172e-05, "loss": 0.0056, "step": 16765 }, { "epoch": 2.349824807288017, "grad_norm": 0.26732197403907776, "learning_rate": 6.667304472614206e-05, "loss": 0.0224, "step": 16766 }, { "epoch": 2.3499649614576033, "grad_norm": 0.8566556572914124, "learning_rate": 6.66586940923224e-05, "loss": 0.1034, "step": 16767 }, { "epoch": 2.3501051156271897, "grad_norm": 1.312525987625122, "learning_rate": 6.664434345850275e-05, "loss": 0.0357, "step": 16768 }, { "epoch": 2.3502452697967766, "grad_norm": 0.05718226358294487, "learning_rate": 6.662999282468309e-05, "loss": 0.0034, "step": 16769 }, { "epoch": 2.350385423966363, "grad_norm": 1.637704610824585, "learning_rate": 6.661564219086342e-05, "loss": 0.0364, "step": 16770 }, { "epoch": 2.3505255781359495, "grad_norm": 0.2637943625450134, "learning_rate": 6.660129155704376e-05, "loss": 0.046, "step": 16771 }, { "epoch": 2.350665732305536, "grad_norm": 0.11931415647268295, "learning_rate": 6.65869409232241e-05, "loss": 0.0058, "step": 16772 }, { "epoch": 2.3508058864751225, "grad_norm": 0.2528848946094513, "learning_rate": 6.657259028940445e-05, "loss": 0.0317, "step": 16773 }, { "epoch": 2.3509460406447094, "grad_norm": 0.5890038013458252, "learning_rate": 6.655823965558477e-05, "loss": 0.0386, "step": 16774 }, { "epoch": 2.351086194814296, "grad_norm": 0.020809343084692955, "learning_rate": 6.654388902176513e-05, "loss": 0.0012, "step": 16775 }, { "epoch": 2.3512263489838823, "grad_norm": 0.3048439919948578, "learning_rate": 6.652953838794546e-05, "loss": 0.0096, "step": 16776 }, { "epoch": 2.3513665031534687, "grad_norm": 0.29299676418304443, "learning_rate": 6.65151877541258e-05, "loss": 0.042, "step": 16777 }, { "epoch": 2.351506657323055, "grad_norm": 0.18414941430091858, "learning_rate": 6.650083712030614e-05, "loss": 0.0148, "step": 16778 }, { "epoch": 2.351646811492642, "grad_norm": 0.07064701616764069, "learning_rate": 6.648648648648648e-05, "loss": 0.0077, "step": 16779 }, { "epoch": 2.3517869656622286, "grad_norm": 0.2584095299243927, "learning_rate": 6.647213585266683e-05, "loss": 0.0431, "step": 16780 }, { "epoch": 2.351927119831815, "grad_norm": 0.36756014823913574, "learning_rate": 6.645778521884716e-05, "loss": 0.0328, "step": 16781 }, { "epoch": 2.3520672740014015, "grad_norm": 0.2210976928472519, "learning_rate": 6.64434345850275e-05, "loss": 0.0256, "step": 16782 }, { "epoch": 2.352207428170988, "grad_norm": 0.2337346076965332, "learning_rate": 6.642908395120784e-05, "loss": 0.009, "step": 16783 }, { "epoch": 2.352347582340575, "grad_norm": 0.2046017199754715, "learning_rate": 6.641473331738818e-05, "loss": 0.0282, "step": 16784 }, { "epoch": 2.3524877365101613, "grad_norm": 0.3241329491138458, "learning_rate": 6.640038268356852e-05, "loss": 0.0384, "step": 16785 }, { "epoch": 2.3526278906797478, "grad_norm": 0.11610342562198639, "learning_rate": 6.638603204974885e-05, "loss": 0.0065, "step": 16786 }, { "epoch": 2.352768044849334, "grad_norm": 0.15356920659542084, "learning_rate": 6.63716814159292e-05, "loss": 0.0214, "step": 16787 }, { "epoch": 2.3529081990189207, "grad_norm": 0.391592800617218, "learning_rate": 6.635733078210954e-05, "loss": 0.0359, "step": 16788 }, { "epoch": 2.353048353188507, "grad_norm": 0.25107723474502563, "learning_rate": 6.634298014828988e-05, "loss": 0.034, "step": 16789 }, { "epoch": 2.353188507358094, "grad_norm": 0.09637567400932312, "learning_rate": 6.632862951447021e-05, "loss": 0.0138, "step": 16790 }, { "epoch": 2.3533286615276805, "grad_norm": 0.8719984889030457, "learning_rate": 6.631427888065056e-05, "loss": 0.07, "step": 16791 }, { "epoch": 2.353468815697267, "grad_norm": 0.027512352913618088, "learning_rate": 6.629992824683089e-05, "loss": 0.0028, "step": 16792 }, { "epoch": 2.3536089698668534, "grad_norm": 0.12451263517141342, "learning_rate": 6.628557761301123e-05, "loss": 0.018, "step": 16793 }, { "epoch": 2.3537491240364403, "grad_norm": 0.1623818278312683, "learning_rate": 6.627122697919158e-05, "loss": 0.0174, "step": 16794 }, { "epoch": 2.353889278206027, "grad_norm": 0.326121062040329, "learning_rate": 6.625687634537192e-05, "loss": 0.0224, "step": 16795 }, { "epoch": 2.3540294323756132, "grad_norm": 0.5130059123039246, "learning_rate": 6.624252571155226e-05, "loss": 0.0731, "step": 16796 }, { "epoch": 2.3541695865451997, "grad_norm": 0.166997030377388, "learning_rate": 6.622817507773259e-05, "loss": 0.0168, "step": 16797 }, { "epoch": 2.354309740714786, "grad_norm": 0.03297094255685806, "learning_rate": 6.621382444391293e-05, "loss": 0.004, "step": 16798 }, { "epoch": 2.3544498948843726, "grad_norm": 0.11285286396741867, "learning_rate": 6.619947381009327e-05, "loss": 0.0515, "step": 16799 }, { "epoch": 2.3545900490539595, "grad_norm": 0.1945682018995285, "learning_rate": 6.618512317627361e-05, "loss": 0.0165, "step": 16800 }, { "epoch": 2.354730203223546, "grad_norm": 0.27851366996765137, "learning_rate": 6.617077254245396e-05, "loss": 0.0182, "step": 16801 }, { "epoch": 2.3548703573931324, "grad_norm": 0.2644082009792328, "learning_rate": 6.615642190863429e-05, "loss": 0.0518, "step": 16802 }, { "epoch": 2.355010511562719, "grad_norm": 0.4891209304332733, "learning_rate": 6.614207127481463e-05, "loss": 0.0563, "step": 16803 }, { "epoch": 2.3551506657323054, "grad_norm": 0.18028511106967926, "learning_rate": 6.612772064099497e-05, "loss": 0.0075, "step": 16804 }, { "epoch": 2.3552908199018923, "grad_norm": 0.07767307758331299, "learning_rate": 6.611337000717531e-05, "loss": 0.0074, "step": 16805 }, { "epoch": 2.3554309740714787, "grad_norm": 0.05823894217610359, "learning_rate": 6.609901937335564e-05, "loss": 0.0031, "step": 16806 }, { "epoch": 2.355571128241065, "grad_norm": 0.3098495602607727, "learning_rate": 6.6084668739536e-05, "loss": 0.0513, "step": 16807 }, { "epoch": 2.3557112824106516, "grad_norm": 0.051881611347198486, "learning_rate": 6.607031810571632e-05, "loss": 0.0044, "step": 16808 }, { "epoch": 2.355851436580238, "grad_norm": 0.2774588167667389, "learning_rate": 6.605596747189667e-05, "loss": 0.0577, "step": 16809 }, { "epoch": 2.3559915907498246, "grad_norm": 0.86620032787323, "learning_rate": 6.604161683807701e-05, "loss": 0.1558, "step": 16810 }, { "epoch": 2.3561317449194115, "grad_norm": 0.6263657212257385, "learning_rate": 6.602726620425735e-05, "loss": 0.0308, "step": 16811 }, { "epoch": 2.356271899088998, "grad_norm": 0.3518466353416443, "learning_rate": 6.601291557043769e-05, "loss": 0.0751, "step": 16812 }, { "epoch": 2.3564120532585844, "grad_norm": 0.5598931312561035, "learning_rate": 6.599856493661802e-05, "loss": 0.0562, "step": 16813 }, { "epoch": 2.356552207428171, "grad_norm": 0.41986972093582153, "learning_rate": 6.598421430279838e-05, "loss": 0.032, "step": 16814 }, { "epoch": 2.3566923615977577, "grad_norm": 0.3903028964996338, "learning_rate": 6.59698636689787e-05, "loss": 0.0715, "step": 16815 }, { "epoch": 2.356832515767344, "grad_norm": 1.2094265222549438, "learning_rate": 6.595551303515905e-05, "loss": 0.0303, "step": 16816 }, { "epoch": 2.3569726699369307, "grad_norm": 1.2694437503814697, "learning_rate": 6.594116240133939e-05, "loss": 0.0623, "step": 16817 }, { "epoch": 2.357112824106517, "grad_norm": 1.1664642095565796, "learning_rate": 6.592681176751973e-05, "loss": 0.0516, "step": 16818 }, { "epoch": 2.3572529782761036, "grad_norm": 1.7885206937789917, "learning_rate": 6.591246113370006e-05, "loss": 0.0481, "step": 16819 }, { "epoch": 2.35739313244569, "grad_norm": 0.8192026019096375, "learning_rate": 6.58981104998804e-05, "loss": 0.0995, "step": 16820 }, { "epoch": 2.357533286615277, "grad_norm": 0.7249272465705872, "learning_rate": 6.588375986606075e-05, "loss": 0.014, "step": 16821 }, { "epoch": 2.3576734407848634, "grad_norm": 0.3018769323825836, "learning_rate": 6.586940923224109e-05, "loss": 0.0325, "step": 16822 }, { "epoch": 2.35781359495445, "grad_norm": 0.2909446656703949, "learning_rate": 6.585505859842143e-05, "loss": 0.0352, "step": 16823 }, { "epoch": 2.3579537491240363, "grad_norm": 0.2735503911972046, "learning_rate": 6.584070796460176e-05, "loss": 0.0152, "step": 16824 }, { "epoch": 2.3580939032936232, "grad_norm": 0.3283838927745819, "learning_rate": 6.58263573307821e-05, "loss": 0.0351, "step": 16825 }, { "epoch": 2.3582340574632097, "grad_norm": 0.2914266884326935, "learning_rate": 6.581200669696244e-05, "loss": 0.0706, "step": 16826 }, { "epoch": 2.358374211632796, "grad_norm": 0.2754274904727936, "learning_rate": 6.579765606314278e-05, "loss": 0.0355, "step": 16827 }, { "epoch": 2.3585143658023826, "grad_norm": 0.2437049150466919, "learning_rate": 6.578330542932313e-05, "loss": 0.0664, "step": 16828 }, { "epoch": 2.358654519971969, "grad_norm": 0.3542616665363312, "learning_rate": 6.576895479550345e-05, "loss": 0.0498, "step": 16829 }, { "epoch": 2.3587946741415555, "grad_norm": 0.3559741973876953, "learning_rate": 6.575460416168381e-05, "loss": 0.019, "step": 16830 }, { "epoch": 2.3589348283111424, "grad_norm": 0.08941761404275894, "learning_rate": 6.574025352786414e-05, "loss": 0.0056, "step": 16831 }, { "epoch": 2.359074982480729, "grad_norm": 0.10273107886314392, "learning_rate": 6.572590289404448e-05, "loss": 0.0084, "step": 16832 }, { "epoch": 2.3592151366503153, "grad_norm": 0.27030569314956665, "learning_rate": 6.571155226022482e-05, "loss": 0.0252, "step": 16833 }, { "epoch": 2.359355290819902, "grad_norm": 0.17541047930717468, "learning_rate": 6.569720162640517e-05, "loss": 0.0208, "step": 16834 }, { "epoch": 2.3594954449894883, "grad_norm": 0.171622171998024, "learning_rate": 6.56828509925855e-05, "loss": 0.0261, "step": 16835 }, { "epoch": 2.359635599159075, "grad_norm": 0.1687568575143814, "learning_rate": 6.566850035876584e-05, "loss": 0.0221, "step": 16836 }, { "epoch": 2.3597757533286616, "grad_norm": 0.2137746810913086, "learning_rate": 6.565414972494618e-05, "loss": 0.0412, "step": 16837 }, { "epoch": 2.359915907498248, "grad_norm": 0.1906731128692627, "learning_rate": 6.563979909112652e-05, "loss": 0.015, "step": 16838 }, { "epoch": 2.3600560616678345, "grad_norm": 0.12018901854753494, "learning_rate": 6.562544845730686e-05, "loss": 0.0068, "step": 16839 }, { "epoch": 2.360196215837421, "grad_norm": 0.25816285610198975, "learning_rate": 6.561109782348719e-05, "loss": 0.0246, "step": 16840 }, { "epoch": 2.3603363700070075, "grad_norm": 0.22464999556541443, "learning_rate": 6.559674718966753e-05, "loss": 0.031, "step": 16841 }, { "epoch": 2.3604765241765944, "grad_norm": 0.2570783495903015, "learning_rate": 6.558239655584788e-05, "loss": 0.0389, "step": 16842 }, { "epoch": 2.360616678346181, "grad_norm": 0.08131103217601776, "learning_rate": 6.556804592202822e-05, "loss": 0.005, "step": 16843 }, { "epoch": 2.3607568325157673, "grad_norm": 0.4477812647819519, "learning_rate": 6.555369528820856e-05, "loss": 0.0459, "step": 16844 }, { "epoch": 2.3608969866853537, "grad_norm": 0.36922189593315125, "learning_rate": 6.55393446543889e-05, "loss": 0.0109, "step": 16845 }, { "epoch": 2.3610371408549407, "grad_norm": 0.19609679281711578, "learning_rate": 6.552499402056924e-05, "loss": 0.0048, "step": 16846 }, { "epoch": 2.361177295024527, "grad_norm": 0.03386911377310753, "learning_rate": 6.551064338674957e-05, "loss": 0.0032, "step": 16847 }, { "epoch": 2.3613174491941136, "grad_norm": 0.6084646582603455, "learning_rate": 6.549629275292991e-05, "loss": 0.0682, "step": 16848 }, { "epoch": 2.3614576033637, "grad_norm": 0.17862890660762787, "learning_rate": 6.548194211911026e-05, "loss": 0.0171, "step": 16849 }, { "epoch": 2.3615977575332865, "grad_norm": 0.38720476627349854, "learning_rate": 6.54675914852906e-05, "loss": 0.0075, "step": 16850 }, { "epoch": 2.361737911702873, "grad_norm": 0.11374688148498535, "learning_rate": 6.545324085147094e-05, "loss": 0.0113, "step": 16851 }, { "epoch": 2.36187806587246, "grad_norm": 0.3034529983997345, "learning_rate": 6.543889021765127e-05, "loss": 0.0185, "step": 16852 }, { "epoch": 2.3620182200420463, "grad_norm": 0.37107303738594055, "learning_rate": 6.542453958383161e-05, "loss": 0.0446, "step": 16853 }, { "epoch": 2.3621583742116328, "grad_norm": 0.2727849781513214, "learning_rate": 6.541018895001195e-05, "loss": 0.0659, "step": 16854 }, { "epoch": 2.3622985283812192, "grad_norm": 0.15175366401672363, "learning_rate": 6.53958383161923e-05, "loss": 0.0052, "step": 16855 }, { "epoch": 2.362438682550806, "grad_norm": 0.2139585167169571, "learning_rate": 6.538148768237262e-05, "loss": 0.0286, "step": 16856 }, { "epoch": 2.3625788367203926, "grad_norm": 0.06553781032562256, "learning_rate": 6.536713704855298e-05, "loss": 0.0034, "step": 16857 }, { "epoch": 2.362718990889979, "grad_norm": 0.25779879093170166, "learning_rate": 6.535278641473331e-05, "loss": 0.0318, "step": 16858 }, { "epoch": 2.3628591450595655, "grad_norm": 0.28648388385772705, "learning_rate": 6.533843578091365e-05, "loss": 0.0087, "step": 16859 }, { "epoch": 2.362999299229152, "grad_norm": 0.3281601667404175, "learning_rate": 6.532408514709399e-05, "loss": 0.0429, "step": 16860 }, { "epoch": 2.3631394533987384, "grad_norm": 0.2057129293680191, "learning_rate": 6.530973451327433e-05, "loss": 0.0171, "step": 16861 }, { "epoch": 2.3632796075683253, "grad_norm": 0.4731660485267639, "learning_rate": 6.529538387945468e-05, "loss": 0.0912, "step": 16862 }, { "epoch": 2.363419761737912, "grad_norm": 0.7325529456138611, "learning_rate": 6.5281033245635e-05, "loss": 0.0377, "step": 16863 }, { "epoch": 2.3635599159074983, "grad_norm": 0.16006779670715332, "learning_rate": 6.526668261181535e-05, "loss": 0.0186, "step": 16864 }, { "epoch": 2.3637000700770847, "grad_norm": 0.43588986992836, "learning_rate": 6.525233197799569e-05, "loss": 0.0601, "step": 16865 }, { "epoch": 2.363840224246671, "grad_norm": 0.025887934491038322, "learning_rate": 6.523798134417603e-05, "loss": 0.0026, "step": 16866 }, { "epoch": 2.363980378416258, "grad_norm": 0.4535498321056366, "learning_rate": 6.522363071035637e-05, "loss": 0.0832, "step": 16867 }, { "epoch": 2.3641205325858445, "grad_norm": 2.6256422996520996, "learning_rate": 6.52092800765367e-05, "loss": 0.0216, "step": 16868 }, { "epoch": 2.364260686755431, "grad_norm": 2.908099412918091, "learning_rate": 6.519492944271704e-05, "loss": 0.0863, "step": 16869 }, { "epoch": 2.3644008409250175, "grad_norm": 0.8879013061523438, "learning_rate": 6.518057880889739e-05, "loss": 0.2711, "step": 16870 }, { "epoch": 2.364540995094604, "grad_norm": 0.25158190727233887, "learning_rate": 6.516622817507773e-05, "loss": 0.009, "step": 16871 }, { "epoch": 2.3646811492641904, "grad_norm": 0.13140380382537842, "learning_rate": 6.515187754125806e-05, "loss": 0.02, "step": 16872 }, { "epoch": 2.3648213034337773, "grad_norm": 0.1571803241968155, "learning_rate": 6.513752690743841e-05, "loss": 0.0107, "step": 16873 }, { "epoch": 2.3649614576033637, "grad_norm": 0.08212386816740036, "learning_rate": 6.512317627361874e-05, "loss": 0.0089, "step": 16874 }, { "epoch": 2.36510161177295, "grad_norm": 0.5521555542945862, "learning_rate": 6.510882563979908e-05, "loss": 0.0331, "step": 16875 }, { "epoch": 2.3652417659425367, "grad_norm": 0.0706278383731842, "learning_rate": 6.509447500597943e-05, "loss": 0.0048, "step": 16876 }, { "epoch": 2.3653819201121236, "grad_norm": 0.12548606097698212, "learning_rate": 6.508012437215977e-05, "loss": 0.0057, "step": 16877 }, { "epoch": 2.36552207428171, "grad_norm": 0.2095845490694046, "learning_rate": 6.506577373834011e-05, "loss": 0.0121, "step": 16878 }, { "epoch": 2.3656622284512965, "grad_norm": 0.23944036662578583, "learning_rate": 6.505142310452044e-05, "loss": 0.0224, "step": 16879 }, { "epoch": 2.365802382620883, "grad_norm": 0.7114379405975342, "learning_rate": 6.503707247070078e-05, "loss": 0.0444, "step": 16880 }, { "epoch": 2.3659425367904694, "grad_norm": 0.24444064497947693, "learning_rate": 6.502272183688112e-05, "loss": 0.0221, "step": 16881 }, { "epoch": 2.366082690960056, "grad_norm": 0.22728760540485382, "learning_rate": 6.500837120306146e-05, "loss": 0.0072, "step": 16882 }, { "epoch": 2.3662228451296428, "grad_norm": 0.7124162912368774, "learning_rate": 6.499402056924181e-05, "loss": 0.0407, "step": 16883 }, { "epoch": 2.366362999299229, "grad_norm": 0.05480144917964935, "learning_rate": 6.497966993542215e-05, "loss": 0.004, "step": 16884 }, { "epoch": 2.3665031534688157, "grad_norm": 0.20836380124092102, "learning_rate": 6.496531930160248e-05, "loss": 0.0376, "step": 16885 }, { "epoch": 2.366643307638402, "grad_norm": 0.25173670053482056, "learning_rate": 6.495096866778282e-05, "loss": 0.0205, "step": 16886 }, { "epoch": 2.3667834618079886, "grad_norm": 0.23888419568538666, "learning_rate": 6.493661803396316e-05, "loss": 0.0152, "step": 16887 }, { "epoch": 2.3669236159775755, "grad_norm": 0.11593123525381088, "learning_rate": 6.49222674001435e-05, "loss": 0.0155, "step": 16888 }, { "epoch": 2.367063770147162, "grad_norm": 0.3511277437210083, "learning_rate": 6.490791676632385e-05, "loss": 0.0882, "step": 16889 }, { "epoch": 2.3672039243167484, "grad_norm": 0.3233483135700226, "learning_rate": 6.489356613250417e-05, "loss": 0.0222, "step": 16890 }, { "epoch": 2.367344078486335, "grad_norm": 0.4679713845252991, "learning_rate": 6.487921549868452e-05, "loss": 0.0248, "step": 16891 }, { "epoch": 2.3674842326559213, "grad_norm": 0.18504492938518524, "learning_rate": 6.486486486486486e-05, "loss": 0.0027, "step": 16892 }, { "epoch": 2.3676243868255082, "grad_norm": 0.7173232436180115, "learning_rate": 6.48505142310452e-05, "loss": 0.0448, "step": 16893 }, { "epoch": 2.3677645409950947, "grad_norm": 0.06801613420248032, "learning_rate": 6.483616359722554e-05, "loss": 0.0055, "step": 16894 }, { "epoch": 2.367904695164681, "grad_norm": 0.16512177884578705, "learning_rate": 6.482181296340587e-05, "loss": 0.0344, "step": 16895 }, { "epoch": 2.3680448493342676, "grad_norm": 0.10589629411697388, "learning_rate": 6.480746232958623e-05, "loss": 0.0082, "step": 16896 }, { "epoch": 2.368185003503854, "grad_norm": 0.14586491882801056, "learning_rate": 6.479311169576656e-05, "loss": 0.0237, "step": 16897 }, { "epoch": 2.368325157673441, "grad_norm": 0.32704222202301025, "learning_rate": 6.47787610619469e-05, "loss": 0.0199, "step": 16898 }, { "epoch": 2.3684653118430274, "grad_norm": 0.5472651720046997, "learning_rate": 6.476441042812724e-05, "loss": 0.0393, "step": 16899 }, { "epoch": 2.368605466012614, "grad_norm": 0.47295185923576355, "learning_rate": 6.475005979430758e-05, "loss": 0.0436, "step": 16900 }, { "epoch": 2.3687456201822004, "grad_norm": 0.12014143913984299, "learning_rate": 6.473570916048791e-05, "loss": 0.0093, "step": 16901 }, { "epoch": 2.368885774351787, "grad_norm": 0.21537567675113678, "learning_rate": 6.472135852666825e-05, "loss": 0.0399, "step": 16902 }, { "epoch": 2.3690259285213733, "grad_norm": 0.19364473223686218, "learning_rate": 6.47070078928486e-05, "loss": 0.0223, "step": 16903 }, { "epoch": 2.36916608269096, "grad_norm": 0.11488724499940872, "learning_rate": 6.469265725902894e-05, "loss": 0.0117, "step": 16904 }, { "epoch": 2.3693062368605466, "grad_norm": 0.580854594707489, "learning_rate": 6.467830662520928e-05, "loss": 0.0442, "step": 16905 }, { "epoch": 2.369446391030133, "grad_norm": 0.08569135516881943, "learning_rate": 6.466395599138961e-05, "loss": 0.0061, "step": 16906 }, { "epoch": 2.3695865451997196, "grad_norm": 0.30035510659217834, "learning_rate": 6.464960535756995e-05, "loss": 0.0371, "step": 16907 }, { "epoch": 2.3697266993693065, "grad_norm": 0.14449165761470795, "learning_rate": 6.463525472375029e-05, "loss": 0.0671, "step": 16908 }, { "epoch": 2.369866853538893, "grad_norm": 0.2254241555929184, "learning_rate": 6.462090408993063e-05, "loss": 0.0507, "step": 16909 }, { "epoch": 2.3700070077084794, "grad_norm": 0.3636128306388855, "learning_rate": 6.460655345611098e-05, "loss": 0.0525, "step": 16910 }, { "epoch": 2.370147161878066, "grad_norm": 0.032167065888643265, "learning_rate": 6.45922028222913e-05, "loss": 0.0024, "step": 16911 }, { "epoch": 2.3702873160476523, "grad_norm": 0.24461814761161804, "learning_rate": 6.457785218847166e-05, "loss": 0.0101, "step": 16912 }, { "epoch": 2.3704274702172388, "grad_norm": 0.3655865788459778, "learning_rate": 6.456350155465199e-05, "loss": 0.0853, "step": 16913 }, { "epoch": 2.3705676243868257, "grad_norm": 0.13492873311042786, "learning_rate": 6.454915092083233e-05, "loss": 0.015, "step": 16914 }, { "epoch": 2.370707778556412, "grad_norm": 0.04546618461608887, "learning_rate": 6.453480028701267e-05, "loss": 0.0035, "step": 16915 }, { "epoch": 2.3708479327259986, "grad_norm": 1.2768014669418335, "learning_rate": 6.452044965319302e-05, "loss": 0.0374, "step": 16916 }, { "epoch": 2.370988086895585, "grad_norm": 0.06340478360652924, "learning_rate": 6.450609901937334e-05, "loss": 0.0041, "step": 16917 }, { "epoch": 2.3711282410651715, "grad_norm": 0.0500447079539299, "learning_rate": 6.449174838555369e-05, "loss": 0.0044, "step": 16918 }, { "epoch": 2.3712683952347584, "grad_norm": 0.08931964635848999, "learning_rate": 6.447739775173403e-05, "loss": 0.0043, "step": 16919 }, { "epoch": 2.371408549404345, "grad_norm": 0.6721488833427429, "learning_rate": 6.446304711791437e-05, "loss": 0.0757, "step": 16920 }, { "epoch": 2.3715487035739313, "grad_norm": 1.0046268701553345, "learning_rate": 6.444869648409471e-05, "loss": 0.0556, "step": 16921 }, { "epoch": 2.371688857743518, "grad_norm": 0.34156569838523865, "learning_rate": 6.443434585027504e-05, "loss": 0.0433, "step": 16922 }, { "epoch": 2.3718290119131042, "grad_norm": 0.37228503823280334, "learning_rate": 6.44199952164554e-05, "loss": 0.0209, "step": 16923 }, { "epoch": 2.371969166082691, "grad_norm": 0.11845259368419647, "learning_rate": 6.440564458263573e-05, "loss": 0.0109, "step": 16924 }, { "epoch": 2.3721093202522776, "grad_norm": 0.22649669647216797, "learning_rate": 6.439129394881607e-05, "loss": 0.0462, "step": 16925 }, { "epoch": 2.372249474421864, "grad_norm": 0.13704326748847961, "learning_rate": 6.437694331499641e-05, "loss": 0.0219, "step": 16926 }, { "epoch": 2.3723896285914505, "grad_norm": 0.22121179103851318, "learning_rate": 6.436259268117675e-05, "loss": 0.0283, "step": 16927 }, { "epoch": 2.372529782761037, "grad_norm": 0.034840602427721024, "learning_rate": 6.43482420473571e-05, "loss": 0.004, "step": 16928 }, { "epoch": 2.372669936930624, "grad_norm": 0.22619760036468506, "learning_rate": 6.433389141353742e-05, "loss": 0.0215, "step": 16929 }, { "epoch": 2.3728100911002104, "grad_norm": 0.05948106572031975, "learning_rate": 6.431954077971776e-05, "loss": 0.0031, "step": 16930 }, { "epoch": 2.372950245269797, "grad_norm": 0.21795515716075897, "learning_rate": 6.43051901458981e-05, "loss": 0.0292, "step": 16931 }, { "epoch": 2.3730903994393833, "grad_norm": 0.21289026737213135, "learning_rate": 6.429083951207845e-05, "loss": 0.0184, "step": 16932 }, { "epoch": 2.3732305536089697, "grad_norm": 0.5294535756111145, "learning_rate": 6.427648887825878e-05, "loss": 0.0706, "step": 16933 }, { "epoch": 2.373370707778556, "grad_norm": 0.1806413233280182, "learning_rate": 6.426213824443912e-05, "loss": 0.0233, "step": 16934 }, { "epoch": 2.373510861948143, "grad_norm": 0.522103488445282, "learning_rate": 6.424778761061946e-05, "loss": 0.0895, "step": 16935 }, { "epoch": 2.3736510161177296, "grad_norm": 0.21771401166915894, "learning_rate": 6.42334369767998e-05, "loss": 0.0243, "step": 16936 }, { "epoch": 2.373791170287316, "grad_norm": 0.4710409343242645, "learning_rate": 6.421908634298015e-05, "loss": 0.0294, "step": 16937 }, { "epoch": 2.3739313244569025, "grad_norm": 0.23137691617012024, "learning_rate": 6.420473570916047e-05, "loss": 0.0054, "step": 16938 }, { "epoch": 2.3740714786264894, "grad_norm": 0.07563072443008423, "learning_rate": 6.419038507534083e-05, "loss": 0.0073, "step": 16939 }, { "epoch": 2.374211632796076, "grad_norm": 0.3973282277584076, "learning_rate": 6.417603444152116e-05, "loss": 0.0855, "step": 16940 }, { "epoch": 2.3743517869656623, "grad_norm": 0.1551436185836792, "learning_rate": 6.41616838077015e-05, "loss": 0.0244, "step": 16941 }, { "epoch": 2.3744919411352488, "grad_norm": 0.05691460147500038, "learning_rate": 6.414733317388184e-05, "loss": 0.0038, "step": 16942 }, { "epoch": 2.374632095304835, "grad_norm": 0.09008825570344925, "learning_rate": 6.413298254006218e-05, "loss": 0.0043, "step": 16943 }, { "epoch": 2.3747722494744217, "grad_norm": 0.8951126337051392, "learning_rate": 6.411863190624253e-05, "loss": 0.0254, "step": 16944 }, { "epoch": 2.3749124036440086, "grad_norm": 0.2815404236316681, "learning_rate": 6.410428127242286e-05, "loss": 0.0304, "step": 16945 }, { "epoch": 2.375052557813595, "grad_norm": 0.06636140495538712, "learning_rate": 6.40899306386032e-05, "loss": 0.004, "step": 16946 }, { "epoch": 2.3751927119831815, "grad_norm": 0.09949080646038055, "learning_rate": 6.407558000478354e-05, "loss": 0.0138, "step": 16947 }, { "epoch": 2.375332866152768, "grad_norm": 0.12217723578214645, "learning_rate": 6.406122937096388e-05, "loss": 0.0222, "step": 16948 }, { "epoch": 2.3754730203223544, "grad_norm": 0.3615403175354004, "learning_rate": 6.404687873714421e-05, "loss": 0.0345, "step": 16949 }, { "epoch": 2.3756131744919413, "grad_norm": 0.9572797417640686, "learning_rate": 6.403252810332455e-05, "loss": 0.0984, "step": 16950 }, { "epoch": 2.3757533286615278, "grad_norm": 0.18464066088199615, "learning_rate": 6.40181774695049e-05, "loss": 0.0169, "step": 16951 }, { "epoch": 2.3758934828311142, "grad_norm": 0.45925700664520264, "learning_rate": 6.400382683568524e-05, "loss": 0.0685, "step": 16952 }, { "epoch": 2.3760336370007007, "grad_norm": 0.2794213891029358, "learning_rate": 6.398947620186558e-05, "loss": 0.0405, "step": 16953 }, { "epoch": 2.376173791170287, "grad_norm": 0.3778564929962158, "learning_rate": 6.397512556804591e-05, "loss": 0.0765, "step": 16954 }, { "epoch": 2.3763139453398736, "grad_norm": 0.20111976563930511, "learning_rate": 6.396077493422626e-05, "loss": 0.0313, "step": 16955 }, { "epoch": 2.3764540995094605, "grad_norm": 0.09110995382070541, "learning_rate": 6.394642430040659e-05, "loss": 0.0086, "step": 16956 }, { "epoch": 2.376594253679047, "grad_norm": 0.6317217350006104, "learning_rate": 6.393207366658693e-05, "loss": 0.0821, "step": 16957 }, { "epoch": 2.3767344078486334, "grad_norm": 0.19801975786685944, "learning_rate": 6.391772303276728e-05, "loss": 0.0261, "step": 16958 }, { "epoch": 2.37687456201822, "grad_norm": 0.2943643629550934, "learning_rate": 6.390337239894762e-05, "loss": 0.0488, "step": 16959 }, { "epoch": 2.377014716187807, "grad_norm": 0.7233700156211853, "learning_rate": 6.388902176512796e-05, "loss": 0.0126, "step": 16960 }, { "epoch": 2.3771548703573933, "grad_norm": 0.24063602089881897, "learning_rate": 6.387467113130829e-05, "loss": 0.0259, "step": 16961 }, { "epoch": 2.3772950245269797, "grad_norm": 0.4068490266799927, "learning_rate": 6.386032049748864e-05, "loss": 0.0168, "step": 16962 }, { "epoch": 2.377435178696566, "grad_norm": 0.6211187839508057, "learning_rate": 6.384596986366897e-05, "loss": 0.0334, "step": 16963 }, { "epoch": 2.3775753328661526, "grad_norm": 0.5214171409606934, "learning_rate": 6.383161922984931e-05, "loss": 0.0496, "step": 16964 }, { "epoch": 2.377715487035739, "grad_norm": 0.5902215838432312, "learning_rate": 6.381726859602964e-05, "loss": 0.082, "step": 16965 }, { "epoch": 2.377855641205326, "grad_norm": 0.17820778489112854, "learning_rate": 6.380291796221e-05, "loss": 0.0077, "step": 16966 }, { "epoch": 2.3779957953749125, "grad_norm": 0.2765914499759674, "learning_rate": 6.378856732839033e-05, "loss": 0.065, "step": 16967 }, { "epoch": 2.378135949544499, "grad_norm": 0.18553400039672852, "learning_rate": 6.377421669457067e-05, "loss": 0.019, "step": 16968 }, { "epoch": 2.3782761037140854, "grad_norm": 0.09079540520906448, "learning_rate": 6.375986606075101e-05, "loss": 0.0031, "step": 16969 }, { "epoch": 2.3784162578836723, "grad_norm": 1.549363613128662, "learning_rate": 6.374551542693135e-05, "loss": 0.0565, "step": 16970 }, { "epoch": 2.3785564120532587, "grad_norm": 0.2311146855354309, "learning_rate": 6.37311647931117e-05, "loss": 0.0333, "step": 16971 }, { "epoch": 2.378696566222845, "grad_norm": 0.4168628454208374, "learning_rate": 6.371681415929202e-05, "loss": 0.0536, "step": 16972 }, { "epoch": 2.3788367203924317, "grad_norm": 0.18378330767154694, "learning_rate": 6.370246352547237e-05, "loss": 0.0139, "step": 16973 }, { "epoch": 2.378976874562018, "grad_norm": 0.2749338448047638, "learning_rate": 6.368811289165271e-05, "loss": 0.0304, "step": 16974 }, { "epoch": 2.3791170287316046, "grad_norm": 0.36670419573783875, "learning_rate": 6.367376225783305e-05, "loss": 0.025, "step": 16975 }, { "epoch": 2.3792571829011915, "grad_norm": 0.2734379172325134, "learning_rate": 6.365941162401339e-05, "loss": 0.0168, "step": 16976 }, { "epoch": 2.379397337070778, "grad_norm": 0.16084024310112, "learning_rate": 6.364506099019372e-05, "loss": 0.0136, "step": 16977 }, { "epoch": 2.3795374912403644, "grad_norm": 0.11295800656080246, "learning_rate": 6.363071035637408e-05, "loss": 0.0157, "step": 16978 }, { "epoch": 2.379677645409951, "grad_norm": 0.13842664659023285, "learning_rate": 6.36163597225544e-05, "loss": 0.0221, "step": 16979 }, { "epoch": 2.3798177995795373, "grad_norm": 0.22612878680229187, "learning_rate": 6.360200908873475e-05, "loss": 0.0274, "step": 16980 }, { "epoch": 2.379957953749124, "grad_norm": 0.19449256360530853, "learning_rate": 6.358765845491508e-05, "loss": 0.0131, "step": 16981 }, { "epoch": 2.3800981079187107, "grad_norm": 0.5510298609733582, "learning_rate": 6.357330782109543e-05, "loss": 0.0995, "step": 16982 }, { "epoch": 2.380238262088297, "grad_norm": 0.11415030062198639, "learning_rate": 6.355895718727576e-05, "loss": 0.0173, "step": 16983 }, { "epoch": 2.3803784162578836, "grad_norm": 0.18728147447109222, "learning_rate": 6.35446065534561e-05, "loss": 0.0205, "step": 16984 }, { "epoch": 2.38051857042747, "grad_norm": 0.26597216725349426, "learning_rate": 6.353025591963644e-05, "loss": 0.0665, "step": 16985 }, { "epoch": 2.3806587245970565, "grad_norm": 0.30623021721839905, "learning_rate": 6.351590528581679e-05, "loss": 0.0136, "step": 16986 }, { "epoch": 2.3807988787666434, "grad_norm": 0.10489777475595474, "learning_rate": 6.350155465199713e-05, "loss": 0.0269, "step": 16987 }, { "epoch": 2.38093903293623, "grad_norm": 0.08576521277427673, "learning_rate": 6.348720401817746e-05, "loss": 0.0135, "step": 16988 }, { "epoch": 2.3810791871058163, "grad_norm": 0.13533248007297516, "learning_rate": 6.34728533843578e-05, "loss": 0.0064, "step": 16989 }, { "epoch": 2.381219341275403, "grad_norm": 0.4142076075077057, "learning_rate": 6.345850275053814e-05, "loss": 0.0404, "step": 16990 }, { "epoch": 2.3813594954449897, "grad_norm": 0.19384825229644775, "learning_rate": 6.344415211671848e-05, "loss": 0.0123, "step": 16991 }, { "epoch": 2.381499649614576, "grad_norm": 0.19663523137569427, "learning_rate": 6.342980148289883e-05, "loss": 0.0216, "step": 16992 }, { "epoch": 2.3816398037841626, "grad_norm": 0.09719817340373993, "learning_rate": 6.341545084907915e-05, "loss": 0.0086, "step": 16993 }, { "epoch": 2.381779957953749, "grad_norm": 0.12020457535982132, "learning_rate": 6.340110021525951e-05, "loss": 0.0275, "step": 16994 }, { "epoch": 2.3819201121233355, "grad_norm": 0.06197202578186989, "learning_rate": 6.338674958143984e-05, "loss": 0.0093, "step": 16995 }, { "epoch": 2.382060266292922, "grad_norm": 0.34650909900665283, "learning_rate": 6.337239894762018e-05, "loss": 0.0213, "step": 16996 }, { "epoch": 2.382200420462509, "grad_norm": 0.09400416910648346, "learning_rate": 6.335804831380052e-05, "loss": 0.0527, "step": 16997 }, { "epoch": 2.3823405746320954, "grad_norm": 0.31884637475013733, "learning_rate": 6.334369767998087e-05, "loss": 0.0204, "step": 16998 }, { "epoch": 2.382480728801682, "grad_norm": 0.6473954319953918, "learning_rate": 6.33293470461612e-05, "loss": 0.0566, "step": 16999 }, { "epoch": 2.3826208829712683, "grad_norm": 0.12085790187120438, "learning_rate": 6.331499641234154e-05, "loss": 0.0044, "step": 17000 }, { "epoch": 2.382761037140855, "grad_norm": 0.3617757260799408, "learning_rate": 6.330064577852188e-05, "loss": 0.0176, "step": 17001 }, { "epoch": 2.3829011913104416, "grad_norm": 0.2565879225730896, "learning_rate": 6.328629514470222e-05, "loss": 0.0696, "step": 17002 }, { "epoch": 2.383041345480028, "grad_norm": 0.631227970123291, "learning_rate": 6.327194451088256e-05, "loss": 0.0283, "step": 17003 }, { "epoch": 2.3831814996496146, "grad_norm": 0.08526162058115005, "learning_rate": 6.325759387706289e-05, "loss": 0.0048, "step": 17004 }, { "epoch": 2.383321653819201, "grad_norm": 0.17534476518630981, "learning_rate": 6.324324324324325e-05, "loss": 0.0226, "step": 17005 }, { "epoch": 2.3834618079887875, "grad_norm": 0.22487549483776093, "learning_rate": 6.322889260942358e-05, "loss": 0.0076, "step": 17006 }, { "epoch": 2.3836019621583744, "grad_norm": 0.3976882994174957, "learning_rate": 6.321454197560392e-05, "loss": 0.016, "step": 17007 }, { "epoch": 2.383742116327961, "grad_norm": 0.2527986764907837, "learning_rate": 6.320019134178426e-05, "loss": 0.0214, "step": 17008 }, { "epoch": 2.3838822704975473, "grad_norm": 0.5134919285774231, "learning_rate": 6.31858407079646e-05, "loss": 0.0468, "step": 17009 }, { "epoch": 2.3840224246671338, "grad_norm": 0.12431655824184418, "learning_rate": 6.317149007414494e-05, "loss": 0.0076, "step": 17010 }, { "epoch": 2.3841625788367202, "grad_norm": 0.6028428077697754, "learning_rate": 6.315713944032527e-05, "loss": 0.0682, "step": 17011 }, { "epoch": 2.384302733006307, "grad_norm": 0.18822795152664185, "learning_rate": 6.314278880650561e-05, "loss": 0.0298, "step": 17012 }, { "epoch": 2.3844428871758936, "grad_norm": 0.029102494940161705, "learning_rate": 6.312843817268596e-05, "loss": 0.0022, "step": 17013 }, { "epoch": 2.38458304134548, "grad_norm": 0.260664165019989, "learning_rate": 6.31140875388663e-05, "loss": 0.0095, "step": 17014 }, { "epoch": 2.3847231955150665, "grad_norm": 0.4358069598674774, "learning_rate": 6.309973690504663e-05, "loss": 0.0242, "step": 17015 }, { "epoch": 2.384863349684653, "grad_norm": 0.20669299364089966, "learning_rate": 6.308538627122697e-05, "loss": 0.0241, "step": 17016 }, { "epoch": 2.3850035038542394, "grad_norm": 0.2118389755487442, "learning_rate": 6.307103563740731e-05, "loss": 0.024, "step": 17017 }, { "epoch": 2.3851436580238263, "grad_norm": 0.528901994228363, "learning_rate": 6.305668500358765e-05, "loss": 0.0783, "step": 17018 }, { "epoch": 2.385283812193413, "grad_norm": 0.2793574333190918, "learning_rate": 6.3042334369768e-05, "loss": 0.0168, "step": 17019 }, { "epoch": 2.3854239663629992, "grad_norm": 0.46677568554878235, "learning_rate": 6.302798373594832e-05, "loss": 0.1444, "step": 17020 }, { "epoch": 2.3855641205325857, "grad_norm": 0.04272417724132538, "learning_rate": 6.301363310212868e-05, "loss": 0.0047, "step": 17021 }, { "epoch": 2.3857042747021726, "grad_norm": 0.28152692317962646, "learning_rate": 6.299928246830901e-05, "loss": 0.0315, "step": 17022 }, { "epoch": 2.385844428871759, "grad_norm": 0.12600915133953094, "learning_rate": 6.298493183448935e-05, "loss": 0.0116, "step": 17023 }, { "epoch": 2.3859845830413455, "grad_norm": 0.2111283391714096, "learning_rate": 6.297058120066969e-05, "loss": 0.0219, "step": 17024 }, { "epoch": 2.386124737210932, "grad_norm": 0.5127525329589844, "learning_rate": 6.295623056685003e-05, "loss": 0.0508, "step": 17025 }, { "epoch": 2.3862648913805184, "grad_norm": 0.4175175428390503, "learning_rate": 6.294187993303038e-05, "loss": 0.0161, "step": 17026 }, { "epoch": 2.386405045550105, "grad_norm": 0.27285531163215637, "learning_rate": 6.29275292992107e-05, "loss": 0.0204, "step": 17027 }, { "epoch": 2.386545199719692, "grad_norm": 0.11451488733291626, "learning_rate": 6.291317866539105e-05, "loss": 0.0375, "step": 17028 }, { "epoch": 2.3866853538892783, "grad_norm": 0.044639017432928085, "learning_rate": 6.289882803157139e-05, "loss": 0.0038, "step": 17029 }, { "epoch": 2.3868255080588647, "grad_norm": 0.06948194652795792, "learning_rate": 6.288447739775173e-05, "loss": 0.0084, "step": 17030 }, { "epoch": 2.386965662228451, "grad_norm": 0.08171617984771729, "learning_rate": 6.287012676393206e-05, "loss": 0.0066, "step": 17031 }, { "epoch": 2.3871058163980376, "grad_norm": 0.37806883454322815, "learning_rate": 6.285577613011242e-05, "loss": 0.0142, "step": 17032 }, { "epoch": 2.3872459705676246, "grad_norm": 0.224162757396698, "learning_rate": 6.284142549629274e-05, "loss": 0.0672, "step": 17033 }, { "epoch": 2.387386124737211, "grad_norm": 0.05461925268173218, "learning_rate": 6.282707486247309e-05, "loss": 0.004, "step": 17034 }, { "epoch": 2.3875262789067975, "grad_norm": 0.07444392889738083, "learning_rate": 6.281272422865343e-05, "loss": 0.0077, "step": 17035 }, { "epoch": 2.387666433076384, "grad_norm": 0.08721990138292313, "learning_rate": 6.279837359483377e-05, "loss": 0.0104, "step": 17036 }, { "epoch": 2.3878065872459704, "grad_norm": 0.18916510045528412, "learning_rate": 6.278402296101411e-05, "loss": 0.0195, "step": 17037 }, { "epoch": 2.3879467414155573, "grad_norm": 0.2810911536216736, "learning_rate": 6.276967232719444e-05, "loss": 0.0436, "step": 17038 }, { "epoch": 2.3880868955851438, "grad_norm": 0.20790895819664001, "learning_rate": 6.275532169337478e-05, "loss": 0.0267, "step": 17039 }, { "epoch": 2.38822704975473, "grad_norm": 0.1411064714193344, "learning_rate": 6.274097105955513e-05, "loss": 0.0339, "step": 17040 }, { "epoch": 2.3883672039243167, "grad_norm": 0.36535367369651794, "learning_rate": 6.272662042573547e-05, "loss": 0.0737, "step": 17041 }, { "epoch": 2.388507358093903, "grad_norm": 0.5073526501655579, "learning_rate": 6.271226979191581e-05, "loss": 0.0403, "step": 17042 }, { "epoch": 2.38864751226349, "grad_norm": 0.14355416595935822, "learning_rate": 6.269791915809614e-05, "loss": 0.015, "step": 17043 }, { "epoch": 2.3887876664330765, "grad_norm": 0.2587110102176666, "learning_rate": 6.268356852427648e-05, "loss": 0.0043, "step": 17044 }, { "epoch": 2.388927820602663, "grad_norm": 0.1335587054491043, "learning_rate": 6.266921789045682e-05, "loss": 0.0178, "step": 17045 }, { "epoch": 2.3890679747722494, "grad_norm": 0.18810881674289703, "learning_rate": 6.265486725663716e-05, "loss": 0.0176, "step": 17046 }, { "epoch": 2.389208128941836, "grad_norm": 0.09448571503162384, "learning_rate": 6.26405166228175e-05, "loss": 0.0102, "step": 17047 }, { "epoch": 2.3893482831114223, "grad_norm": 0.2839357554912567, "learning_rate": 6.262616598899785e-05, "loss": 0.0606, "step": 17048 }, { "epoch": 2.3894884372810092, "grad_norm": 0.2945462465286255, "learning_rate": 6.261181535517818e-05, "loss": 0.0486, "step": 17049 }, { "epoch": 2.3896285914505957, "grad_norm": 0.7806060910224915, "learning_rate": 6.259746472135852e-05, "loss": 0.0193, "step": 17050 }, { "epoch": 2.389768745620182, "grad_norm": 0.15069469809532166, "learning_rate": 6.258311408753886e-05, "loss": 0.0589, "step": 17051 }, { "epoch": 2.3899088997897686, "grad_norm": 0.37087157368659973, "learning_rate": 6.25687634537192e-05, "loss": 0.0124, "step": 17052 }, { "epoch": 2.3900490539593555, "grad_norm": 0.5365495681762695, "learning_rate": 6.255441281989955e-05, "loss": 0.0604, "step": 17053 }, { "epoch": 2.390189208128942, "grad_norm": 0.5000490546226501, "learning_rate": 6.254006218607987e-05, "loss": 0.0914, "step": 17054 }, { "epoch": 2.3903293622985284, "grad_norm": 0.43623706698417664, "learning_rate": 6.252571155226022e-05, "loss": 0.0302, "step": 17055 }, { "epoch": 2.390469516468115, "grad_norm": 0.13209204375743866, "learning_rate": 6.251136091844056e-05, "loss": 0.0212, "step": 17056 }, { "epoch": 2.3906096706377014, "grad_norm": 0.6652680039405823, "learning_rate": 6.24970102846209e-05, "loss": 0.1109, "step": 17057 }, { "epoch": 2.390749824807288, "grad_norm": 0.5584889650344849, "learning_rate": 6.248265965080124e-05, "loss": 0.0574, "step": 17058 }, { "epoch": 2.3908899789768747, "grad_norm": 0.271617591381073, "learning_rate": 6.246830901698157e-05, "loss": 0.0114, "step": 17059 }, { "epoch": 2.391030133146461, "grad_norm": 0.459026962518692, "learning_rate": 6.245395838316191e-05, "loss": 0.0261, "step": 17060 }, { "epoch": 2.3911702873160476, "grad_norm": 0.3595461845397949, "learning_rate": 6.243960774934226e-05, "loss": 0.0291, "step": 17061 }, { "epoch": 2.391310441485634, "grad_norm": 0.1368018090724945, "learning_rate": 6.24252571155226e-05, "loss": 0.0233, "step": 17062 }, { "epoch": 2.3914505956552206, "grad_norm": 0.17785823345184326, "learning_rate": 6.241090648170293e-05, "loss": 0.0122, "step": 17063 }, { "epoch": 2.3915907498248075, "grad_norm": 0.34287145733833313, "learning_rate": 6.239655584788328e-05, "loss": 0.0073, "step": 17064 }, { "epoch": 2.391730903994394, "grad_norm": 0.23111079633235931, "learning_rate": 6.238220521406361e-05, "loss": 0.0647, "step": 17065 }, { "epoch": 2.3918710581639804, "grad_norm": 0.14815792441368103, "learning_rate": 6.236785458024395e-05, "loss": 0.0059, "step": 17066 }, { "epoch": 2.392011212333567, "grad_norm": 0.7067782282829285, "learning_rate": 6.23535039464243e-05, "loss": 0.0763, "step": 17067 }, { "epoch": 2.3921513665031533, "grad_norm": 0.8463408350944519, "learning_rate": 6.233915331260464e-05, "loss": 0.2086, "step": 17068 }, { "epoch": 2.39229152067274, "grad_norm": 0.7380649447441101, "learning_rate": 6.232480267878498e-05, "loss": 0.1277, "step": 17069 }, { "epoch": 2.3924316748423267, "grad_norm": 0.692986786365509, "learning_rate": 6.231045204496531e-05, "loss": 0.0661, "step": 17070 }, { "epoch": 2.392571829011913, "grad_norm": 0.20089690387248993, "learning_rate": 6.229610141114566e-05, "loss": 0.0286, "step": 17071 }, { "epoch": 2.3927119831814996, "grad_norm": 0.1232534870505333, "learning_rate": 6.228175077732599e-05, "loss": 0.0205, "step": 17072 }, { "epoch": 2.392852137351086, "grad_norm": 0.2921312153339386, "learning_rate": 6.226740014350633e-05, "loss": 0.0275, "step": 17073 }, { "epoch": 2.392992291520673, "grad_norm": 0.2044769823551178, "learning_rate": 6.225304950968668e-05, "loss": 0.0397, "step": 17074 }, { "epoch": 2.3931324456902594, "grad_norm": 0.10084021091461182, "learning_rate": 6.223869887586702e-05, "loss": 0.0198, "step": 17075 }, { "epoch": 2.393272599859846, "grad_norm": 0.13797718286514282, "learning_rate": 6.222434824204735e-05, "loss": 0.0097, "step": 17076 }, { "epoch": 2.3934127540294323, "grad_norm": 0.49426141381263733, "learning_rate": 6.220999760822769e-05, "loss": 0.0251, "step": 17077 }, { "epoch": 2.393552908199019, "grad_norm": 0.14347510039806366, "learning_rate": 6.219564697440803e-05, "loss": 0.026, "step": 17078 }, { "epoch": 2.3936930623686052, "grad_norm": 0.24426138401031494, "learning_rate": 6.218129634058837e-05, "loss": 0.0398, "step": 17079 }, { "epoch": 2.393833216538192, "grad_norm": 0.09398007392883301, "learning_rate": 6.216694570676872e-05, "loss": 0.0113, "step": 17080 }, { "epoch": 2.3939733707077786, "grad_norm": 0.15549708902835846, "learning_rate": 6.215259507294904e-05, "loss": 0.0227, "step": 17081 }, { "epoch": 2.394113524877365, "grad_norm": 0.0769689753651619, "learning_rate": 6.213824443912939e-05, "loss": 0.006, "step": 17082 }, { "epoch": 2.3942536790469515, "grad_norm": 0.1584383100271225, "learning_rate": 6.212389380530973e-05, "loss": 0.0071, "step": 17083 }, { "epoch": 2.3943938332165384, "grad_norm": 0.049454204738140106, "learning_rate": 6.210954317149007e-05, "loss": 0.0029, "step": 17084 }, { "epoch": 2.394533987386125, "grad_norm": 0.21866178512573242, "learning_rate": 6.209519253767041e-05, "loss": 0.0178, "step": 17085 }, { "epoch": 2.3946741415557113, "grad_norm": 0.3654237985610962, "learning_rate": 6.208084190385074e-05, "loss": 0.0177, "step": 17086 }, { "epoch": 2.394814295725298, "grad_norm": 0.10090696811676025, "learning_rate": 6.20664912700311e-05, "loss": 0.0057, "step": 17087 }, { "epoch": 2.3949544498948843, "grad_norm": 0.1193164810538292, "learning_rate": 6.205214063621143e-05, "loss": 0.0117, "step": 17088 }, { "epoch": 2.3950946040644707, "grad_norm": 0.17001782357692719, "learning_rate": 6.203779000239177e-05, "loss": 0.0477, "step": 17089 }, { "epoch": 2.3952347582340576, "grad_norm": 0.25766634941101074, "learning_rate": 6.202343936857211e-05, "loss": 0.036, "step": 17090 }, { "epoch": 2.395374912403644, "grad_norm": 0.3608410954475403, "learning_rate": 6.200908873475245e-05, "loss": 0.042, "step": 17091 }, { "epoch": 2.3955150665732305, "grad_norm": 0.2884196639060974, "learning_rate": 6.199473810093278e-05, "loss": 0.0171, "step": 17092 }, { "epoch": 2.395655220742817, "grad_norm": 0.11337452381849289, "learning_rate": 6.198038746711312e-05, "loss": 0.0069, "step": 17093 }, { "epoch": 2.3957953749124035, "grad_norm": 0.06342663615942001, "learning_rate": 6.196603683329346e-05, "loss": 0.0106, "step": 17094 }, { "epoch": 2.3959355290819904, "grad_norm": 0.5680875778198242, "learning_rate": 6.19516861994738e-05, "loss": 0.0159, "step": 17095 }, { "epoch": 2.396075683251577, "grad_norm": 0.2034035176038742, "learning_rate": 6.193733556565415e-05, "loss": 0.0563, "step": 17096 }, { "epoch": 2.3962158374211633, "grad_norm": 0.18437014520168304, "learning_rate": 6.192298493183448e-05, "loss": 0.0262, "step": 17097 }, { "epoch": 2.3963559915907497, "grad_norm": 0.22780919075012207, "learning_rate": 6.190863429801482e-05, "loss": 0.0471, "step": 17098 }, { "epoch": 2.396496145760336, "grad_norm": 0.04214702546596527, "learning_rate": 6.189428366419516e-05, "loss": 0.0049, "step": 17099 }, { "epoch": 2.396636299929923, "grad_norm": 0.21062171459197998, "learning_rate": 6.18799330303755e-05, "loss": 0.0278, "step": 17100 }, { "epoch": 2.3967764540995096, "grad_norm": 0.2688031494617462, "learning_rate": 6.186558239655585e-05, "loss": 0.0177, "step": 17101 }, { "epoch": 2.396916608269096, "grad_norm": 0.3999437093734741, "learning_rate": 6.185123176273617e-05, "loss": 0.0302, "step": 17102 }, { "epoch": 2.3970567624386825, "grad_norm": 0.4475868046283722, "learning_rate": 6.183688112891653e-05, "loss": 0.0964, "step": 17103 }, { "epoch": 2.397196916608269, "grad_norm": 0.11797259747982025, "learning_rate": 6.182253049509686e-05, "loss": 0.0162, "step": 17104 }, { "epoch": 2.397337070777856, "grad_norm": 0.3219282031059265, "learning_rate": 6.18081798612772e-05, "loss": 0.0204, "step": 17105 }, { "epoch": 2.3974772249474423, "grad_norm": 0.22857385873794556, "learning_rate": 6.179382922745754e-05, "loss": 0.0279, "step": 17106 }, { "epoch": 2.3976173791170288, "grad_norm": 0.30190396308898926, "learning_rate": 6.177947859363788e-05, "loss": 0.0264, "step": 17107 }, { "epoch": 2.3977575332866152, "grad_norm": 0.02618985064327717, "learning_rate": 6.176512795981821e-05, "loss": 0.0026, "step": 17108 }, { "epoch": 2.3978976874562017, "grad_norm": 0.17172715067863464, "learning_rate": 6.175077732599856e-05, "loss": 0.0201, "step": 17109 }, { "epoch": 2.398037841625788, "grad_norm": 0.09613469243049622, "learning_rate": 6.17364266921789e-05, "loss": 0.0074, "step": 17110 }, { "epoch": 2.398177995795375, "grad_norm": 0.32952970266342163, "learning_rate": 6.172207605835924e-05, "loss": 0.1115, "step": 17111 }, { "epoch": 2.3983181499649615, "grad_norm": 0.013354174792766571, "learning_rate": 6.170772542453958e-05, "loss": 0.0011, "step": 17112 }, { "epoch": 2.398458304134548, "grad_norm": 0.5007480382919312, "learning_rate": 6.169337479071991e-05, "loss": 0.099, "step": 17113 }, { "epoch": 2.3985984583041344, "grad_norm": 0.1851712465286255, "learning_rate": 6.167902415690027e-05, "loss": 0.0179, "step": 17114 }, { "epoch": 2.3987386124737213, "grad_norm": 0.47097906470298767, "learning_rate": 6.16646735230806e-05, "loss": 0.0175, "step": 17115 }, { "epoch": 2.398878766643308, "grad_norm": 0.1594603806734085, "learning_rate": 6.165032288926094e-05, "loss": 0.0088, "step": 17116 }, { "epoch": 2.3990189208128943, "grad_norm": 0.8486140370368958, "learning_rate": 6.163597225544128e-05, "loss": 0.073, "step": 17117 }, { "epoch": 2.3991590749824807, "grad_norm": 0.13665708899497986, "learning_rate": 6.162162162162162e-05, "loss": 0.0079, "step": 17118 }, { "epoch": 2.399299229152067, "grad_norm": 0.17007233202457428, "learning_rate": 6.160727098780196e-05, "loss": 0.0122, "step": 17119 }, { "epoch": 2.3994393833216536, "grad_norm": 2.2276971340179443, "learning_rate": 6.159292035398229e-05, "loss": 0.1992, "step": 17120 }, { "epoch": 2.3995795374912405, "grad_norm": 0.050837937742471695, "learning_rate": 6.157856972016263e-05, "loss": 0.0053, "step": 17121 }, { "epoch": 2.399719691660827, "grad_norm": 0.096001535654068, "learning_rate": 6.156421908634298e-05, "loss": 0.0138, "step": 17122 }, { "epoch": 2.3998598458304135, "grad_norm": 0.14186835289001465, "learning_rate": 6.154986845252332e-05, "loss": 0.0163, "step": 17123 }, { "epoch": 2.4, "grad_norm": 0.10023430734872818, "learning_rate": 6.153551781870365e-05, "loss": 0.0274, "step": 17124 }, { "epoch": 2.4001401541695864, "grad_norm": 0.10408713668584824, "learning_rate": 6.152116718488399e-05, "loss": 0.013, "step": 17125 }, { "epoch": 2.4002803083391733, "grad_norm": 0.24739912152290344, "learning_rate": 6.150681655106433e-05, "loss": 0.043, "step": 17126 }, { "epoch": 2.4004204625087597, "grad_norm": 0.07923022657632828, "learning_rate": 6.149246591724467e-05, "loss": 0.0049, "step": 17127 }, { "epoch": 2.400560616678346, "grad_norm": 0.17069591581821442, "learning_rate": 6.147811528342501e-05, "loss": 0.0119, "step": 17128 }, { "epoch": 2.4007007708479327, "grad_norm": 0.25277915596961975, "learning_rate": 6.146376464960534e-05, "loss": 0.0388, "step": 17129 }, { "epoch": 2.400840925017519, "grad_norm": 0.35724565386772156, "learning_rate": 6.14494140157857e-05, "loss": 0.0458, "step": 17130 }, { "epoch": 2.4009810791871056, "grad_norm": 0.16841921210289001, "learning_rate": 6.143506338196603e-05, "loss": 0.0178, "step": 17131 }, { "epoch": 2.4011212333566925, "grad_norm": 0.16430306434631348, "learning_rate": 6.142071274814637e-05, "loss": 0.0101, "step": 17132 }, { "epoch": 2.401261387526279, "grad_norm": 0.18938927352428436, "learning_rate": 6.140636211432671e-05, "loss": 0.0185, "step": 17133 }, { "epoch": 2.4014015416958654, "grad_norm": 0.11738921701908112, "learning_rate": 6.139201148050705e-05, "loss": 0.009, "step": 17134 }, { "epoch": 2.401541695865452, "grad_norm": 0.740653932094574, "learning_rate": 6.13776608466874e-05, "loss": 0.0246, "step": 17135 }, { "epoch": 2.4016818500350388, "grad_norm": 0.157445028424263, "learning_rate": 6.136331021286772e-05, "loss": 0.0305, "step": 17136 }, { "epoch": 2.401822004204625, "grad_norm": 0.12229357659816742, "learning_rate": 6.134895957904807e-05, "loss": 0.0263, "step": 17137 }, { "epoch": 2.4019621583742117, "grad_norm": 0.18288549780845642, "learning_rate": 6.133460894522841e-05, "loss": 0.0161, "step": 17138 }, { "epoch": 2.402102312543798, "grad_norm": 0.20269018411636353, "learning_rate": 6.132025831140875e-05, "loss": 0.0162, "step": 17139 }, { "epoch": 2.4022424667133846, "grad_norm": 0.4243325889110565, "learning_rate": 6.130590767758909e-05, "loss": 0.0396, "step": 17140 }, { "epoch": 2.402382620882971, "grad_norm": 0.398271381855011, "learning_rate": 6.129155704376942e-05, "loss": 0.0585, "step": 17141 }, { "epoch": 2.402522775052558, "grad_norm": 0.22428776323795319, "learning_rate": 6.127720640994976e-05, "loss": 0.0098, "step": 17142 }, { "epoch": 2.4026629292221444, "grad_norm": 0.3369782865047455, "learning_rate": 6.12628557761301e-05, "loss": 0.0252, "step": 17143 }, { "epoch": 2.402803083391731, "grad_norm": 0.1796584576368332, "learning_rate": 6.124850514231045e-05, "loss": 0.0107, "step": 17144 }, { "epoch": 2.4029432375613173, "grad_norm": 0.08359088748693466, "learning_rate": 6.123415450849079e-05, "loss": 0.007, "step": 17145 }, { "epoch": 2.4030833917309042, "grad_norm": 0.16979293525218964, "learning_rate": 6.121980387467113e-05, "loss": 0.0312, "step": 17146 }, { "epoch": 2.4032235459004907, "grad_norm": 0.3132004737854004, "learning_rate": 6.120545324085146e-05, "loss": 0.027, "step": 17147 }, { "epoch": 2.403363700070077, "grad_norm": 0.12117159366607666, "learning_rate": 6.11911026070318e-05, "loss": 0.0291, "step": 17148 }, { "epoch": 2.4035038542396636, "grad_norm": 0.24035337567329407, "learning_rate": 6.117675197321214e-05, "loss": 0.0236, "step": 17149 }, { "epoch": 2.40364400840925, "grad_norm": 0.13443993031978607, "learning_rate": 6.116240133939249e-05, "loss": 0.0125, "step": 17150 }, { "epoch": 2.4037841625788365, "grad_norm": 0.19733117520809174, "learning_rate": 6.114805070557283e-05, "loss": 0.0283, "step": 17151 }, { "epoch": 2.4039243167484234, "grad_norm": 0.1524735391139984, "learning_rate": 6.113370007175316e-05, "loss": 0.0095, "step": 17152 }, { "epoch": 2.40406447091801, "grad_norm": 0.05169086903333664, "learning_rate": 6.111934943793351e-05, "loss": 0.0028, "step": 17153 }, { "epoch": 2.4042046250875964, "grad_norm": 0.28921663761138916, "learning_rate": 6.110499880411384e-05, "loss": 0.0354, "step": 17154 }, { "epoch": 2.404344779257183, "grad_norm": 0.08542416989803314, "learning_rate": 6.109064817029418e-05, "loss": 0.0074, "step": 17155 }, { "epoch": 2.4044849334267693, "grad_norm": 0.2869613766670227, "learning_rate": 6.107629753647453e-05, "loss": 0.0511, "step": 17156 }, { "epoch": 2.404625087596356, "grad_norm": 0.23059694468975067, "learning_rate": 6.106194690265487e-05, "loss": 0.0321, "step": 17157 }, { "epoch": 2.4047652417659426, "grad_norm": 0.15354962646961212, "learning_rate": 6.10475962688352e-05, "loss": 0.0233, "step": 17158 }, { "epoch": 2.404905395935529, "grad_norm": 0.1864965856075287, "learning_rate": 6.103324563501554e-05, "loss": 0.0346, "step": 17159 }, { "epoch": 2.4050455501051156, "grad_norm": 0.3018342852592468, "learning_rate": 6.101889500119588e-05, "loss": 0.012, "step": 17160 }, { "epoch": 2.405185704274702, "grad_norm": 0.1295814961194992, "learning_rate": 6.100454436737622e-05, "loss": 0.0081, "step": 17161 }, { "epoch": 2.4053258584442885, "grad_norm": 0.947309672832489, "learning_rate": 6.099019373355656e-05, "loss": 0.1207, "step": 17162 }, { "epoch": 2.4054660126138754, "grad_norm": 0.6175588369369507, "learning_rate": 6.09758430997369e-05, "loss": 0.0639, "step": 17163 }, { "epoch": 2.405606166783462, "grad_norm": 0.21148596704006195, "learning_rate": 6.0961492465917236e-05, "loss": 0.0142, "step": 17164 }, { "epoch": 2.4057463209530483, "grad_norm": 0.03760712593793869, "learning_rate": 6.0947141832097585e-05, "loss": 0.0019, "step": 17165 }, { "epoch": 2.4058864751226348, "grad_norm": 0.044599004089832306, "learning_rate": 6.093279119827792e-05, "loss": 0.0018, "step": 17166 }, { "epoch": 2.4060266292922217, "grad_norm": 0.15554621815681458, "learning_rate": 6.0918440564458255e-05, "loss": 0.0057, "step": 17167 }, { "epoch": 2.406166783461808, "grad_norm": 0.46567302942276, "learning_rate": 6.09040899306386e-05, "loss": 0.0275, "step": 17168 }, { "epoch": 2.4063069376313946, "grad_norm": 0.03271007910370827, "learning_rate": 6.088973929681894e-05, "loss": 0.0028, "step": 17169 }, { "epoch": 2.406447091800981, "grad_norm": 0.197012260556221, "learning_rate": 6.0875388662999275e-05, "loss": 0.0103, "step": 17170 }, { "epoch": 2.4065872459705675, "grad_norm": 0.25466659665107727, "learning_rate": 6.086103802917962e-05, "loss": 0.0852, "step": 17171 }, { "epoch": 2.406727400140154, "grad_norm": 0.13236021995544434, "learning_rate": 6.084668739535995e-05, "loss": 0.0108, "step": 17172 }, { "epoch": 2.406867554309741, "grad_norm": 0.19801558554172516, "learning_rate": 6.08323367615403e-05, "loss": 0.0964, "step": 17173 }, { "epoch": 2.4070077084793273, "grad_norm": 0.3901863098144531, "learning_rate": 6.081798612772064e-05, "loss": 0.0081, "step": 17174 }, { "epoch": 2.407147862648914, "grad_norm": 0.2413594275712967, "learning_rate": 6.080363549390097e-05, "loss": 0.0493, "step": 17175 }, { "epoch": 2.4072880168185002, "grad_norm": 0.1466943770647049, "learning_rate": 6.0789284860081314e-05, "loss": 0.0201, "step": 17176 }, { "epoch": 2.4074281709880867, "grad_norm": 0.16011151671409607, "learning_rate": 6.0774934226261656e-05, "loss": 0.0365, "step": 17177 }, { "epoch": 2.4075683251576736, "grad_norm": 0.18179325759410858, "learning_rate": 6.076058359244199e-05, "loss": 0.0489, "step": 17178 }, { "epoch": 2.40770847932726, "grad_norm": 0.2514638900756836, "learning_rate": 6.0746232958622334e-05, "loss": 0.0337, "step": 17179 }, { "epoch": 2.4078486334968465, "grad_norm": 0.2746260166168213, "learning_rate": 6.0731882324802676e-05, "loss": 0.0207, "step": 17180 }, { "epoch": 2.407988787666433, "grad_norm": 0.18683600425720215, "learning_rate": 6.071753169098302e-05, "loss": 0.0135, "step": 17181 }, { "epoch": 2.4081289418360194, "grad_norm": 0.3731100857257843, "learning_rate": 6.070318105716335e-05, "loss": 0.0747, "step": 17182 }, { "epoch": 2.4082690960056063, "grad_norm": 0.07254581153392792, "learning_rate": 6.068883042334369e-05, "loss": 0.004, "step": 17183 }, { "epoch": 2.408409250175193, "grad_norm": 0.1650509238243103, "learning_rate": 6.067447978952404e-05, "loss": 0.0296, "step": 17184 }, { "epoch": 2.4085494043447793, "grad_norm": 0.20255090296268463, "learning_rate": 6.066012915570437e-05, "loss": 0.0188, "step": 17185 }, { "epoch": 2.4086895585143657, "grad_norm": 0.3189687430858612, "learning_rate": 6.064577852188471e-05, "loss": 0.0855, "step": 17186 }, { "epoch": 2.408829712683952, "grad_norm": 0.12048765271902084, "learning_rate": 6.063142788806505e-05, "loss": 0.0155, "step": 17187 }, { "epoch": 2.408969866853539, "grad_norm": 0.3445298671722412, "learning_rate": 6.061707725424539e-05, "loss": 0.0524, "step": 17188 }, { "epoch": 2.4091100210231255, "grad_norm": 0.22000162303447723, "learning_rate": 6.0602726620425734e-05, "loss": 0.0196, "step": 17189 }, { "epoch": 2.409250175192712, "grad_norm": 0.22827807068824768, "learning_rate": 6.058837598660607e-05, "loss": 0.0283, "step": 17190 }, { "epoch": 2.4093903293622985, "grad_norm": 0.277342289686203, "learning_rate": 6.0574025352786405e-05, "loss": 0.0076, "step": 17191 }, { "epoch": 2.409530483531885, "grad_norm": 0.2629663348197937, "learning_rate": 6.0559674718966754e-05, "loss": 0.0227, "step": 17192 }, { "epoch": 2.4096706377014714, "grad_norm": 0.47268515825271606, "learning_rate": 6.054532408514709e-05, "loss": 0.0381, "step": 17193 }, { "epoch": 2.4098107918710583, "grad_norm": 0.14151810109615326, "learning_rate": 6.0530973451327425e-05, "loss": 0.0242, "step": 17194 }, { "epoch": 2.4099509460406447, "grad_norm": 0.12931035459041595, "learning_rate": 6.051662281750777e-05, "loss": 0.017, "step": 17195 }, { "epoch": 2.410091100210231, "grad_norm": 0.18927617371082306, "learning_rate": 6.050227218368811e-05, "loss": 0.024, "step": 17196 }, { "epoch": 2.4102312543798177, "grad_norm": 0.3010213077068329, "learning_rate": 6.048792154986845e-05, "loss": 0.0333, "step": 17197 }, { "epoch": 2.4103714085494046, "grad_norm": 0.17706774175167084, "learning_rate": 6.0473570916048786e-05, "loss": 0.061, "step": 17198 }, { "epoch": 2.410511562718991, "grad_norm": 0.17203454673290253, "learning_rate": 6.045922028222912e-05, "loss": 0.0108, "step": 17199 }, { "epoch": 2.4106517168885775, "grad_norm": 0.19356462359428406, "learning_rate": 6.044486964840947e-05, "loss": 0.0446, "step": 17200 }, { "epoch": 2.410791871058164, "grad_norm": 0.8297691345214844, "learning_rate": 6.0430519014589806e-05, "loss": 0.0288, "step": 17201 }, { "epoch": 2.4109320252277504, "grad_norm": 0.3473604917526245, "learning_rate": 6.041616838077014e-05, "loss": 0.0261, "step": 17202 }, { "epoch": 2.411072179397337, "grad_norm": 0.14792002737522125, "learning_rate": 6.040181774695048e-05, "loss": 0.0149, "step": 17203 }, { "epoch": 2.4112123335669238, "grad_norm": 0.31362733244895935, "learning_rate": 6.0387467113130825e-05, "loss": 0.0364, "step": 17204 }, { "epoch": 2.4113524877365102, "grad_norm": 0.07948626577854156, "learning_rate": 6.037311647931117e-05, "loss": 0.0072, "step": 17205 }, { "epoch": 2.4114926419060967, "grad_norm": 0.21225471794605255, "learning_rate": 6.03587658454915e-05, "loss": 0.0658, "step": 17206 }, { "epoch": 2.411632796075683, "grad_norm": 0.14901341497898102, "learning_rate": 6.034441521167184e-05, "loss": 0.03, "step": 17207 }, { "epoch": 2.4117729502452696, "grad_norm": 0.20639602839946747, "learning_rate": 6.033006457785219e-05, "loss": 0.0253, "step": 17208 }, { "epoch": 2.4119131044148565, "grad_norm": 0.17323389649391174, "learning_rate": 6.031571394403252e-05, "loss": 0.0405, "step": 17209 }, { "epoch": 2.412053258584443, "grad_norm": 0.16269272565841675, "learning_rate": 6.030136331021286e-05, "loss": 0.0109, "step": 17210 }, { "epoch": 2.4121934127540294, "grad_norm": 0.3181898295879364, "learning_rate": 6.02870126763932e-05, "loss": 0.023, "step": 17211 }, { "epoch": 2.412333566923616, "grad_norm": 0.4775742292404175, "learning_rate": 6.027266204257354e-05, "loss": 0.1531, "step": 17212 }, { "epoch": 2.4124737210932024, "grad_norm": 0.21781279146671295, "learning_rate": 6.0258311408753884e-05, "loss": 0.0239, "step": 17213 }, { "epoch": 2.4126138752627893, "grad_norm": 0.09848164767026901, "learning_rate": 6.024396077493422e-05, "loss": 0.0075, "step": 17214 }, { "epoch": 2.4127540294323757, "grad_norm": 0.6712706685066223, "learning_rate": 6.0229610141114555e-05, "loss": 0.1075, "step": 17215 }, { "epoch": 2.412894183601962, "grad_norm": 1.1856974363327026, "learning_rate": 6.0215259507294904e-05, "loss": 0.0955, "step": 17216 }, { "epoch": 2.4130343377715486, "grad_norm": 0.43746069073677063, "learning_rate": 6.020090887347524e-05, "loss": 0.0472, "step": 17217 }, { "epoch": 2.413174491941135, "grad_norm": 0.17862793803215027, "learning_rate": 6.0186558239655574e-05, "loss": 0.0096, "step": 17218 }, { "epoch": 2.413314646110722, "grad_norm": 2.1139705181121826, "learning_rate": 6.017220760583592e-05, "loss": 0.2104, "step": 17219 }, { "epoch": 2.4134548002803085, "grad_norm": 1.1476143598556519, "learning_rate": 6.015785697201626e-05, "loss": 0.1617, "step": 17220 }, { "epoch": 2.413594954449895, "grad_norm": 0.20820845663547516, "learning_rate": 6.01435063381966e-05, "loss": 0.0289, "step": 17221 }, { "epoch": 2.4137351086194814, "grad_norm": 0.27490726113319397, "learning_rate": 6.0129155704376936e-05, "loss": 0.0213, "step": 17222 }, { "epoch": 2.413875262789068, "grad_norm": 0.13856208324432373, "learning_rate": 6.011480507055728e-05, "loss": 0.0087, "step": 17223 }, { "epoch": 2.4140154169586543, "grad_norm": 0.24667789041996002, "learning_rate": 6.010045443673762e-05, "loss": 0.03, "step": 17224 }, { "epoch": 2.414155571128241, "grad_norm": 0.09652167558670044, "learning_rate": 6.0086103802917956e-05, "loss": 0.0119, "step": 17225 }, { "epoch": 2.4142957252978277, "grad_norm": 0.2773926556110382, "learning_rate": 6.007175316909829e-05, "loss": 0.0278, "step": 17226 }, { "epoch": 2.414435879467414, "grad_norm": 0.25053325295448303, "learning_rate": 6.005740253527864e-05, "loss": 0.0232, "step": 17227 }, { "epoch": 2.4145760336370006, "grad_norm": 0.09703636169433594, "learning_rate": 6.0043051901458975e-05, "loss": 0.0085, "step": 17228 }, { "epoch": 2.4147161878065875, "grad_norm": 0.12566767632961273, "learning_rate": 6.002870126763932e-05, "loss": 0.0367, "step": 17229 }, { "epoch": 2.414856341976174, "grad_norm": 0.35741427540779114, "learning_rate": 6.001435063381965e-05, "loss": 0.0277, "step": 17230 }, { "epoch": 2.4149964961457604, "grad_norm": 0.29923439025878906, "learning_rate": 5.9999999999999995e-05, "loss": 0.0533, "step": 17231 }, { "epoch": 2.415136650315347, "grad_norm": 0.3132534325122833, "learning_rate": 5.998564936618034e-05, "loss": 0.0444, "step": 17232 }, { "epoch": 2.4152768044849333, "grad_norm": 0.17771215736865997, "learning_rate": 5.997129873236067e-05, "loss": 0.0247, "step": 17233 }, { "epoch": 2.4154169586545198, "grad_norm": 0.2619757056236267, "learning_rate": 5.995694809854101e-05, "loss": 0.049, "step": 17234 }, { "epoch": 2.4155571128241067, "grad_norm": 0.3328060209751129, "learning_rate": 5.9942597464721356e-05, "loss": 0.0341, "step": 17235 }, { "epoch": 2.415697266993693, "grad_norm": 0.21630752086639404, "learning_rate": 5.992824683090169e-05, "loss": 0.0182, "step": 17236 }, { "epoch": 2.4158374211632796, "grad_norm": 0.219991534948349, "learning_rate": 5.9913896197082034e-05, "loss": 0.0286, "step": 17237 }, { "epoch": 2.415977575332866, "grad_norm": 0.647718608379364, "learning_rate": 5.989954556326237e-05, "loss": 0.0338, "step": 17238 }, { "epoch": 2.4161177295024525, "grad_norm": 0.11938625574111938, "learning_rate": 5.988519492944271e-05, "loss": 0.0125, "step": 17239 }, { "epoch": 2.4162578836720394, "grad_norm": 0.048991985619068146, "learning_rate": 5.9870844295623053e-05, "loss": 0.004, "step": 17240 }, { "epoch": 2.416398037841626, "grad_norm": 0.24102351069450378, "learning_rate": 5.985649366180339e-05, "loss": 0.0184, "step": 17241 }, { "epoch": 2.4165381920112123, "grad_norm": 0.06479820609092712, "learning_rate": 5.9842143027983724e-05, "loss": 0.0092, "step": 17242 }, { "epoch": 2.416678346180799, "grad_norm": 0.18250589072704315, "learning_rate": 5.982779239416407e-05, "loss": 0.0329, "step": 17243 }, { "epoch": 2.4168185003503853, "grad_norm": 0.042573247104883194, "learning_rate": 5.981344176034441e-05, "loss": 0.0019, "step": 17244 }, { "epoch": 2.416958654519972, "grad_norm": 0.14258728921413422, "learning_rate": 5.979909112652475e-05, "loss": 0.0151, "step": 17245 }, { "epoch": 2.4170988086895586, "grad_norm": 0.08728601038455963, "learning_rate": 5.9784740492705086e-05, "loss": 0.0036, "step": 17246 }, { "epoch": 2.417238962859145, "grad_norm": 0.496239572763443, "learning_rate": 5.9770389858885435e-05, "loss": 0.0282, "step": 17247 }, { "epoch": 2.4173791170287315, "grad_norm": 0.19870057702064514, "learning_rate": 5.975603922506577e-05, "loss": 0.025, "step": 17248 }, { "epoch": 2.417519271198318, "grad_norm": 0.16261160373687744, "learning_rate": 5.9741688591246105e-05, "loss": 0.0277, "step": 17249 }, { "epoch": 2.417659425367905, "grad_norm": 0.3400171101093292, "learning_rate": 5.972733795742644e-05, "loss": 0.0126, "step": 17250 }, { "epoch": 2.4177995795374914, "grad_norm": 0.06124268099665642, "learning_rate": 5.971298732360679e-05, "loss": 0.0075, "step": 17251 }, { "epoch": 2.417939733707078, "grad_norm": 0.09326623380184174, "learning_rate": 5.9698636689787125e-05, "loss": 0.0115, "step": 17252 }, { "epoch": 2.4180798878766643, "grad_norm": 0.1605793833732605, "learning_rate": 5.968428605596747e-05, "loss": 0.0358, "step": 17253 }, { "epoch": 2.4182200420462507, "grad_norm": 0.15432000160217285, "learning_rate": 5.966993542214781e-05, "loss": 0.007, "step": 17254 }, { "epoch": 2.418360196215837, "grad_norm": 0.1933581382036209, "learning_rate": 5.965558478832815e-05, "loss": 0.016, "step": 17255 }, { "epoch": 2.418500350385424, "grad_norm": 0.3474451005458832, "learning_rate": 5.9641234154508487e-05, "loss": 0.0534, "step": 17256 }, { "epoch": 2.4186405045550106, "grad_norm": 0.2471638321876526, "learning_rate": 5.962688352068882e-05, "loss": 0.0809, "step": 17257 }, { "epoch": 2.418780658724597, "grad_norm": 0.18646566569805145, "learning_rate": 5.961253288686917e-05, "loss": 0.0078, "step": 17258 }, { "epoch": 2.4189208128941835, "grad_norm": 0.09521641582250595, "learning_rate": 5.9598182253049506e-05, "loss": 0.0076, "step": 17259 }, { "epoch": 2.4190609670637704, "grad_norm": 0.19135162234306335, "learning_rate": 5.958383161922984e-05, "loss": 0.0565, "step": 17260 }, { "epoch": 2.419201121233357, "grad_norm": 0.15246811509132385, "learning_rate": 5.9569480985410184e-05, "loss": 0.0129, "step": 17261 }, { "epoch": 2.4193412754029433, "grad_norm": 0.038744255900382996, "learning_rate": 5.9555130351590526e-05, "loss": 0.0035, "step": 17262 }, { "epoch": 2.4194814295725298, "grad_norm": 0.09676238894462585, "learning_rate": 5.954077971777087e-05, "loss": 0.0068, "step": 17263 }, { "epoch": 2.419621583742116, "grad_norm": 0.3693442940711975, "learning_rate": 5.95264290839512e-05, "loss": 0.0159, "step": 17264 }, { "epoch": 2.4197617379117027, "grad_norm": 0.20712009072303772, "learning_rate": 5.951207845013154e-05, "loss": 0.0171, "step": 17265 }, { "epoch": 2.4199018920812896, "grad_norm": 0.1251690536737442, "learning_rate": 5.949772781631189e-05, "loss": 0.0039, "step": 17266 }, { "epoch": 2.420042046250876, "grad_norm": 0.5674152970314026, "learning_rate": 5.948337718249222e-05, "loss": 0.0474, "step": 17267 }, { "epoch": 2.4201822004204625, "grad_norm": 0.5214142203330994, "learning_rate": 5.946902654867256e-05, "loss": 0.0746, "step": 17268 }, { "epoch": 2.420322354590049, "grad_norm": 0.6095026135444641, "learning_rate": 5.94546759148529e-05, "loss": 0.0226, "step": 17269 }, { "epoch": 2.4204625087596354, "grad_norm": 2.0127460956573486, "learning_rate": 5.944032528103324e-05, "loss": 0.1201, "step": 17270 }, { "epoch": 2.4206026629292223, "grad_norm": 0.1310524046421051, "learning_rate": 5.9425974647213584e-05, "loss": 0.0238, "step": 17271 }, { "epoch": 2.420742817098809, "grad_norm": 0.0937386304140091, "learning_rate": 5.941162401339392e-05, "loss": 0.0119, "step": 17272 }, { "epoch": 2.4208829712683952, "grad_norm": 0.21025891602039337, "learning_rate": 5.9397273379574255e-05, "loss": 0.0178, "step": 17273 }, { "epoch": 2.4210231254379817, "grad_norm": 0.5389658808708191, "learning_rate": 5.9382922745754604e-05, "loss": 0.0327, "step": 17274 }, { "epoch": 2.421163279607568, "grad_norm": 0.14517338573932648, "learning_rate": 5.936857211193494e-05, "loss": 0.0289, "step": 17275 }, { "epoch": 2.4213034337771546, "grad_norm": 0.1003274917602539, "learning_rate": 5.9354221478115275e-05, "loss": 0.0076, "step": 17276 }, { "epoch": 2.4214435879467415, "grad_norm": 0.267656147480011, "learning_rate": 5.933987084429562e-05, "loss": 0.0478, "step": 17277 }, { "epoch": 2.421583742116328, "grad_norm": 0.13005879521369934, "learning_rate": 5.932552021047596e-05, "loss": 0.0119, "step": 17278 }, { "epoch": 2.4217238962859144, "grad_norm": 0.1691494882106781, "learning_rate": 5.93111695766563e-05, "loss": 0.0215, "step": 17279 }, { "epoch": 2.421864050455501, "grad_norm": 0.2108124941587448, "learning_rate": 5.9296818942836636e-05, "loss": 0.0204, "step": 17280 }, { "epoch": 2.422004204625088, "grad_norm": 0.24599264562129974, "learning_rate": 5.928246830901697e-05, "loss": 0.0239, "step": 17281 }, { "epoch": 2.4221443587946743, "grad_norm": 0.1283554881811142, "learning_rate": 5.926811767519732e-05, "loss": 0.0059, "step": 17282 }, { "epoch": 2.4222845129642607, "grad_norm": 0.1940176784992218, "learning_rate": 5.9253767041377656e-05, "loss": 0.0213, "step": 17283 }, { "epoch": 2.422424667133847, "grad_norm": 0.07534559816122055, "learning_rate": 5.923941640755799e-05, "loss": 0.0093, "step": 17284 }, { "epoch": 2.4225648213034336, "grad_norm": 0.35346484184265137, "learning_rate": 5.922506577373833e-05, "loss": 0.0114, "step": 17285 }, { "epoch": 2.42270497547302, "grad_norm": 0.2003057897090912, "learning_rate": 5.9210715139918675e-05, "loss": 0.0156, "step": 17286 }, { "epoch": 2.422845129642607, "grad_norm": 0.21672599017620087, "learning_rate": 5.919636450609902e-05, "loss": 0.0144, "step": 17287 }, { "epoch": 2.4229852838121935, "grad_norm": 0.14388997852802277, "learning_rate": 5.918201387227935e-05, "loss": 0.0403, "step": 17288 }, { "epoch": 2.42312543798178, "grad_norm": 0.2987062335014343, "learning_rate": 5.916766323845969e-05, "loss": 0.0106, "step": 17289 }, { "epoch": 2.4232655921513664, "grad_norm": 0.37020429968833923, "learning_rate": 5.915331260464004e-05, "loss": 0.0394, "step": 17290 }, { "epoch": 2.4234057463209533, "grad_norm": 0.27939000725746155, "learning_rate": 5.913896197082037e-05, "loss": 0.0354, "step": 17291 }, { "epoch": 2.4235459004905398, "grad_norm": 0.3336494565010071, "learning_rate": 5.912461133700071e-05, "loss": 0.0293, "step": 17292 }, { "epoch": 2.423686054660126, "grad_norm": 0.3211798071861267, "learning_rate": 5.911026070318106e-05, "loss": 0.0222, "step": 17293 }, { "epoch": 2.4238262088297127, "grad_norm": 0.16890409588813782, "learning_rate": 5.909591006936139e-05, "loss": 0.0112, "step": 17294 }, { "epoch": 2.423966362999299, "grad_norm": 0.09941990673542023, "learning_rate": 5.9081559435541734e-05, "loss": 0.018, "step": 17295 }, { "epoch": 2.4241065171688856, "grad_norm": 0.26585209369659424, "learning_rate": 5.906720880172207e-05, "loss": 0.01, "step": 17296 }, { "epoch": 2.4242466713384725, "grad_norm": 0.28566795587539673, "learning_rate": 5.905285816790241e-05, "loss": 0.0231, "step": 17297 }, { "epoch": 2.424386825508059, "grad_norm": 0.16020673513412476, "learning_rate": 5.9038507534082754e-05, "loss": 0.0088, "step": 17298 }, { "epoch": 2.4245269796776454, "grad_norm": 0.20671851933002472, "learning_rate": 5.902415690026309e-05, "loss": 0.0462, "step": 17299 }, { "epoch": 2.424667133847232, "grad_norm": 0.24831578135490417, "learning_rate": 5.9009806266443424e-05, "loss": 0.0132, "step": 17300 }, { "epoch": 2.4248072880168183, "grad_norm": 0.11230577528476715, "learning_rate": 5.899545563262377e-05, "loss": 0.0159, "step": 17301 }, { "epoch": 2.4249474421864052, "grad_norm": 0.40549013018608093, "learning_rate": 5.898110499880411e-05, "loss": 0.0288, "step": 17302 }, { "epoch": 2.4250875963559917, "grad_norm": 0.21482624113559723, "learning_rate": 5.896675436498445e-05, "loss": 0.0411, "step": 17303 }, { "epoch": 2.425227750525578, "grad_norm": 0.7297971248626709, "learning_rate": 5.8952403731164786e-05, "loss": 0.041, "step": 17304 }, { "epoch": 2.4253679046951646, "grad_norm": 0.20108667016029358, "learning_rate": 5.893805309734513e-05, "loss": 0.0224, "step": 17305 }, { "epoch": 2.425508058864751, "grad_norm": 0.18769244849681854, "learning_rate": 5.892370246352547e-05, "loss": 0.0152, "step": 17306 }, { "epoch": 2.4256482130343375, "grad_norm": 0.3743934631347656, "learning_rate": 5.8909351829705806e-05, "loss": 0.0271, "step": 17307 }, { "epoch": 2.4257883672039244, "grad_norm": 0.06821400672197342, "learning_rate": 5.889500119588614e-05, "loss": 0.0068, "step": 17308 }, { "epoch": 2.425928521373511, "grad_norm": 0.1807606965303421, "learning_rate": 5.888065056206649e-05, "loss": 0.0239, "step": 17309 }, { "epoch": 2.4260686755430974, "grad_norm": 0.2810732126235962, "learning_rate": 5.8866299928246825e-05, "loss": 0.0242, "step": 17310 }, { "epoch": 2.426208829712684, "grad_norm": 0.326339989900589, "learning_rate": 5.885194929442717e-05, "loss": 0.0131, "step": 17311 }, { "epoch": 2.4263489838822707, "grad_norm": 0.3636963367462158, "learning_rate": 5.88375986606075e-05, "loss": 0.0584, "step": 17312 }, { "epoch": 2.426489138051857, "grad_norm": 0.2233114242553711, "learning_rate": 5.8823248026787845e-05, "loss": 0.0705, "step": 17313 }, { "epoch": 2.4266292922214436, "grad_norm": 0.05255798250436783, "learning_rate": 5.880889739296819e-05, "loss": 0.0047, "step": 17314 }, { "epoch": 2.42676944639103, "grad_norm": 0.307024210691452, "learning_rate": 5.879454675914852e-05, "loss": 0.0427, "step": 17315 }, { "epoch": 2.4269096005606166, "grad_norm": 0.539598286151886, "learning_rate": 5.878019612532886e-05, "loss": 0.0325, "step": 17316 }, { "epoch": 2.427049754730203, "grad_norm": 0.13242220878601074, "learning_rate": 5.8765845491509206e-05, "loss": 0.0048, "step": 17317 }, { "epoch": 2.42718990889979, "grad_norm": 0.012009680271148682, "learning_rate": 5.875149485768954e-05, "loss": 0.0013, "step": 17318 }, { "epoch": 2.4273300630693764, "grad_norm": 0.25054246187210083, "learning_rate": 5.8737144223869884e-05, "loss": 0.0126, "step": 17319 }, { "epoch": 2.427470217238963, "grad_norm": 0.5420270562171936, "learning_rate": 5.872279359005022e-05, "loss": 0.0058, "step": 17320 }, { "epoch": 2.4276103714085493, "grad_norm": 0.31170839071273804, "learning_rate": 5.870844295623056e-05, "loss": 0.0161, "step": 17321 }, { "epoch": 2.427750525578136, "grad_norm": 0.280725359916687, "learning_rate": 5.86940923224109e-05, "loss": 0.0295, "step": 17322 }, { "epoch": 2.4278906797477227, "grad_norm": 0.22767262160778046, "learning_rate": 5.867974168859124e-05, "loss": 0.0279, "step": 17323 }, { "epoch": 2.428030833917309, "grad_norm": 0.08386383205652237, "learning_rate": 5.8665391054771574e-05, "loss": 0.0053, "step": 17324 }, { "epoch": 2.4281709880868956, "grad_norm": 0.27017468214035034, "learning_rate": 5.865104042095192e-05, "loss": 0.0277, "step": 17325 }, { "epoch": 2.428311142256482, "grad_norm": 0.16565978527069092, "learning_rate": 5.863668978713226e-05, "loss": 0.0219, "step": 17326 }, { "epoch": 2.4284512964260685, "grad_norm": 0.319987028837204, "learning_rate": 5.86223391533126e-05, "loss": 0.0342, "step": 17327 }, { "epoch": 2.4285914505956554, "grad_norm": 0.11172568798065186, "learning_rate": 5.860798851949294e-05, "loss": 0.0219, "step": 17328 }, { "epoch": 2.428731604765242, "grad_norm": 0.1598750203847885, "learning_rate": 5.859363788567328e-05, "loss": 0.0101, "step": 17329 }, { "epoch": 2.4288717589348283, "grad_norm": 0.28091269731521606, "learning_rate": 5.857928725185362e-05, "loss": 0.0572, "step": 17330 }, { "epoch": 2.4290119131044148, "grad_norm": 0.3136027753353119, "learning_rate": 5.8564936618033955e-05, "loss": 0.0133, "step": 17331 }, { "epoch": 2.4291520672740012, "grad_norm": 0.4446505308151245, "learning_rate": 5.8550585984214304e-05, "loss": 0.0511, "step": 17332 }, { "epoch": 2.429292221443588, "grad_norm": 0.09604140371084213, "learning_rate": 5.853623535039464e-05, "loss": 0.0047, "step": 17333 }, { "epoch": 2.4294323756131746, "grad_norm": 0.22032104432582855, "learning_rate": 5.8521884716574975e-05, "loss": 0.0268, "step": 17334 }, { "epoch": 2.429572529782761, "grad_norm": 0.2517751455307007, "learning_rate": 5.850753408275532e-05, "loss": 0.0415, "step": 17335 }, { "epoch": 2.4297126839523475, "grad_norm": 0.58295738697052, "learning_rate": 5.849318344893566e-05, "loss": 0.0272, "step": 17336 }, { "epoch": 2.429852838121934, "grad_norm": 0.1690925508737564, "learning_rate": 5.8478832815115994e-05, "loss": 0.0376, "step": 17337 }, { "epoch": 2.4299929922915204, "grad_norm": 0.230002298951149, "learning_rate": 5.8464482181296337e-05, "loss": 0.0109, "step": 17338 }, { "epoch": 2.4301331464611073, "grad_norm": 0.20594878494739532, "learning_rate": 5.845013154747667e-05, "loss": 0.0234, "step": 17339 }, { "epoch": 2.430273300630694, "grad_norm": 0.13403496146202087, "learning_rate": 5.843578091365702e-05, "loss": 0.0467, "step": 17340 }, { "epoch": 2.4304134548002803, "grad_norm": 0.27012327313423157, "learning_rate": 5.8421430279837356e-05, "loss": 0.0699, "step": 17341 }, { "epoch": 2.4305536089698667, "grad_norm": 0.32172754406929016, "learning_rate": 5.840707964601769e-05, "loss": 0.0428, "step": 17342 }, { "epoch": 2.4306937631394536, "grad_norm": 0.2663648724555969, "learning_rate": 5.8392729012198034e-05, "loss": 0.0393, "step": 17343 }, { "epoch": 2.43083391730904, "grad_norm": 0.3988592028617859, "learning_rate": 5.8378378378378376e-05, "loss": 0.0315, "step": 17344 }, { "epoch": 2.4309740714786265, "grad_norm": 0.34296631813049316, "learning_rate": 5.836402774455871e-05, "loss": 0.0445, "step": 17345 }, { "epoch": 2.431114225648213, "grad_norm": 0.21390260756015778, "learning_rate": 5.834967711073905e-05, "loss": 0.031, "step": 17346 }, { "epoch": 2.4312543798177995, "grad_norm": 0.09705667197704315, "learning_rate": 5.833532647691939e-05, "loss": 0.0035, "step": 17347 }, { "epoch": 2.431394533987386, "grad_norm": 0.2565990388393402, "learning_rate": 5.832097584309974e-05, "loss": 0.066, "step": 17348 }, { "epoch": 2.431534688156973, "grad_norm": 0.4618794918060303, "learning_rate": 5.830662520928007e-05, "loss": 0.0331, "step": 17349 }, { "epoch": 2.4316748423265593, "grad_norm": 0.4538531005382538, "learning_rate": 5.829227457546041e-05, "loss": 0.0417, "step": 17350 }, { "epoch": 2.4318149964961457, "grad_norm": 0.22675305604934692, "learning_rate": 5.827792394164075e-05, "loss": 0.0175, "step": 17351 }, { "epoch": 2.431955150665732, "grad_norm": 0.23802068829536438, "learning_rate": 5.826357330782109e-05, "loss": 0.0353, "step": 17352 }, { "epoch": 2.4320953048353187, "grad_norm": 0.2600937783718109, "learning_rate": 5.824922267400143e-05, "loss": 0.0213, "step": 17353 }, { "epoch": 2.4322354590049056, "grad_norm": 0.11470136046409607, "learning_rate": 5.823487204018177e-05, "loss": 0.0062, "step": 17354 }, { "epoch": 2.432375613174492, "grad_norm": 0.25565436482429504, "learning_rate": 5.8220521406362105e-05, "loss": 0.0394, "step": 17355 }, { "epoch": 2.4325157673440785, "grad_norm": 0.11170201748609543, "learning_rate": 5.8206170772542454e-05, "loss": 0.0094, "step": 17356 }, { "epoch": 2.432655921513665, "grad_norm": 0.4334985315799713, "learning_rate": 5.819182013872279e-05, "loss": 0.0428, "step": 17357 }, { "epoch": 2.4327960756832514, "grad_norm": 0.13811059296131134, "learning_rate": 5.8177469504903125e-05, "loss": 0.0345, "step": 17358 }, { "epoch": 2.4329362298528383, "grad_norm": 0.4813917577266693, "learning_rate": 5.816311887108347e-05, "loss": 0.0228, "step": 17359 }, { "epoch": 2.4330763840224248, "grad_norm": 0.3816911280155182, "learning_rate": 5.814876823726381e-05, "loss": 0.0362, "step": 17360 }, { "epoch": 2.4332165381920112, "grad_norm": 0.399700403213501, "learning_rate": 5.8134417603444144e-05, "loss": 0.0483, "step": 17361 }, { "epoch": 2.4333566923615977, "grad_norm": 0.0950688049197197, "learning_rate": 5.8120066969624486e-05, "loss": 0.0156, "step": 17362 }, { "epoch": 2.433496846531184, "grad_norm": 0.7060579657554626, "learning_rate": 5.810571633580482e-05, "loss": 0.0337, "step": 17363 }, { "epoch": 2.433637000700771, "grad_norm": 0.18039365112781525, "learning_rate": 5.809136570198517e-05, "loss": 0.0162, "step": 17364 }, { "epoch": 2.4337771548703575, "grad_norm": 0.18923288583755493, "learning_rate": 5.8077015068165506e-05, "loss": 0.0043, "step": 17365 }, { "epoch": 2.433917309039944, "grad_norm": 0.23057551681995392, "learning_rate": 5.806266443434584e-05, "loss": 0.0093, "step": 17366 }, { "epoch": 2.4340574632095304, "grad_norm": 0.0744875818490982, "learning_rate": 5.804831380052619e-05, "loss": 0.0024, "step": 17367 }, { "epoch": 2.434197617379117, "grad_norm": 0.4068446159362793, "learning_rate": 5.8033963166706525e-05, "loss": 0.0457, "step": 17368 }, { "epoch": 2.4343377715487033, "grad_norm": 2.500253915786743, "learning_rate": 5.801961253288686e-05, "loss": 0.036, "step": 17369 }, { "epoch": 2.4344779257182902, "grad_norm": 0.30666816234588623, "learning_rate": 5.80052618990672e-05, "loss": 0.0257, "step": 17370 }, { "epoch": 2.4346180798878767, "grad_norm": 0.4612908959388733, "learning_rate": 5.7990911265247545e-05, "loss": 0.0832, "step": 17371 }, { "epoch": 2.434758234057463, "grad_norm": 0.2421182096004486, "learning_rate": 5.797656063142789e-05, "loss": 0.0618, "step": 17372 }, { "epoch": 2.4348983882270496, "grad_norm": 0.2944526672363281, "learning_rate": 5.796220999760822e-05, "loss": 0.0074, "step": 17373 }, { "epoch": 2.4350385423966365, "grad_norm": 0.16928522288799286, "learning_rate": 5.794785936378856e-05, "loss": 0.0171, "step": 17374 }, { "epoch": 2.435178696566223, "grad_norm": 0.4732314944267273, "learning_rate": 5.7933508729968907e-05, "loss": 0.051, "step": 17375 }, { "epoch": 2.4353188507358094, "grad_norm": 0.07006821781396866, "learning_rate": 5.791915809614924e-05, "loss": 0.0042, "step": 17376 }, { "epoch": 2.435459004905396, "grad_norm": 0.11372796446084976, "learning_rate": 5.790480746232958e-05, "loss": 0.0113, "step": 17377 }, { "epoch": 2.4355991590749824, "grad_norm": 0.13307808339595795, "learning_rate": 5.789045682850992e-05, "loss": 0.036, "step": 17378 }, { "epoch": 2.435739313244569, "grad_norm": 0.17327964305877686, "learning_rate": 5.787610619469026e-05, "loss": 0.0437, "step": 17379 }, { "epoch": 2.4358794674141557, "grad_norm": 0.1326773762702942, "learning_rate": 5.7861755560870604e-05, "loss": 0.0149, "step": 17380 }, { "epoch": 2.436019621583742, "grad_norm": 0.1691352277994156, "learning_rate": 5.784740492705094e-05, "loss": 0.0106, "step": 17381 }, { "epoch": 2.4361597757533286, "grad_norm": 0.09650550782680511, "learning_rate": 5.7833054293231274e-05, "loss": 0.0189, "step": 17382 }, { "epoch": 2.436299929922915, "grad_norm": 0.33244314789772034, "learning_rate": 5.781870365941162e-05, "loss": 0.0315, "step": 17383 }, { "epoch": 2.4364400840925016, "grad_norm": 0.4329344630241394, "learning_rate": 5.780435302559196e-05, "loss": 0.0661, "step": 17384 }, { "epoch": 2.4365802382620885, "grad_norm": 0.2881946861743927, "learning_rate": 5.7790002391772294e-05, "loss": 0.0722, "step": 17385 }, { "epoch": 2.436720392431675, "grad_norm": 0.14996686577796936, "learning_rate": 5.7775651757952636e-05, "loss": 0.0505, "step": 17386 }, { "epoch": 2.4368605466012614, "grad_norm": 0.24359512329101562, "learning_rate": 5.776130112413298e-05, "loss": 0.0178, "step": 17387 }, { "epoch": 2.437000700770848, "grad_norm": 0.017672166228294373, "learning_rate": 5.774695049031332e-05, "loss": 0.0014, "step": 17388 }, { "epoch": 2.4371408549404343, "grad_norm": 0.05534207820892334, "learning_rate": 5.7732599856493656e-05, "loss": 0.0035, "step": 17389 }, { "epoch": 2.437281009110021, "grad_norm": 0.18137279152870178, "learning_rate": 5.771824922267399e-05, "loss": 0.0245, "step": 17390 }, { "epoch": 2.4374211632796077, "grad_norm": 0.19302916526794434, "learning_rate": 5.770389858885434e-05, "loss": 0.0181, "step": 17391 }, { "epoch": 2.437561317449194, "grad_norm": 0.3653002977371216, "learning_rate": 5.7689547955034675e-05, "loss": 0.0407, "step": 17392 }, { "epoch": 2.4377014716187806, "grad_norm": 0.15861906111240387, "learning_rate": 5.767519732121501e-05, "loss": 0.0138, "step": 17393 }, { "epoch": 2.437841625788367, "grad_norm": 0.16883985698223114, "learning_rate": 5.766084668739535e-05, "loss": 0.0417, "step": 17394 }, { "epoch": 2.437981779957954, "grad_norm": 0.16244134306907654, "learning_rate": 5.7646496053575695e-05, "loss": 0.0162, "step": 17395 }, { "epoch": 2.4381219341275404, "grad_norm": 0.1126076802611351, "learning_rate": 5.763214541975604e-05, "loss": 0.0048, "step": 17396 }, { "epoch": 2.438262088297127, "grad_norm": 0.1689811497926712, "learning_rate": 5.761779478593637e-05, "loss": 0.0408, "step": 17397 }, { "epoch": 2.4384022424667133, "grad_norm": 0.13655903935432434, "learning_rate": 5.760344415211671e-05, "loss": 0.0364, "step": 17398 }, { "epoch": 2.4385423966363, "grad_norm": 0.10317081958055496, "learning_rate": 5.7589093518297056e-05, "loss": 0.0214, "step": 17399 }, { "epoch": 2.4386825508058863, "grad_norm": 0.3053319752216339, "learning_rate": 5.757474288447739e-05, "loss": 0.0277, "step": 17400 }, { "epoch": 2.438822704975473, "grad_norm": 0.12531666457653046, "learning_rate": 5.756039225065773e-05, "loss": 0.0126, "step": 17401 }, { "epoch": 2.4389628591450596, "grad_norm": 0.3033042252063751, "learning_rate": 5.7546041616838076e-05, "loss": 0.0106, "step": 17402 }, { "epoch": 2.439103013314646, "grad_norm": 0.42690351605415344, "learning_rate": 5.753169098301841e-05, "loss": 0.0338, "step": 17403 }, { "epoch": 2.4392431674842325, "grad_norm": 0.2444438338279724, "learning_rate": 5.751734034919875e-05, "loss": 0.0169, "step": 17404 }, { "epoch": 2.4393833216538194, "grad_norm": 0.16978374123573303, "learning_rate": 5.750298971537909e-05, "loss": 0.0177, "step": 17405 }, { "epoch": 2.439523475823406, "grad_norm": 0.29568228125572205, "learning_rate": 5.748863908155944e-05, "loss": 0.0532, "step": 17406 }, { "epoch": 2.4396636299929924, "grad_norm": 0.24909085035324097, "learning_rate": 5.747428844773977e-05, "loss": 0.0243, "step": 17407 }, { "epoch": 2.439803784162579, "grad_norm": 0.12027157843112946, "learning_rate": 5.745993781392011e-05, "loss": 0.0176, "step": 17408 }, { "epoch": 2.4399439383321653, "grad_norm": 0.17350029945373535, "learning_rate": 5.7445587180100444e-05, "loss": 0.0298, "step": 17409 }, { "epoch": 2.4400840925017517, "grad_norm": 0.5006131529808044, "learning_rate": 5.743123654628079e-05, "loss": 0.0316, "step": 17410 }, { "epoch": 2.4402242466713386, "grad_norm": 0.3299851417541504, "learning_rate": 5.741688591246113e-05, "loss": 0.0667, "step": 17411 }, { "epoch": 2.440364400840925, "grad_norm": 0.17065083980560303, "learning_rate": 5.740253527864147e-05, "loss": 0.0261, "step": 17412 }, { "epoch": 2.4405045550105116, "grad_norm": 0.5163525342941284, "learning_rate": 5.7388184644821805e-05, "loss": 0.0069, "step": 17413 }, { "epoch": 2.440644709180098, "grad_norm": 0.18288275599479675, "learning_rate": 5.7373834011002154e-05, "loss": 0.0126, "step": 17414 }, { "epoch": 2.4407848633496845, "grad_norm": 0.3170284032821655, "learning_rate": 5.735948337718249e-05, "loss": 0.019, "step": 17415 }, { "epoch": 2.4409250175192714, "grad_norm": 0.024866115301847458, "learning_rate": 5.7345132743362825e-05, "loss": 0.002, "step": 17416 }, { "epoch": 2.441065171688858, "grad_norm": 0.8360458612442017, "learning_rate": 5.733078210954317e-05, "loss": 0.0731, "step": 17417 }, { "epoch": 2.4412053258584443, "grad_norm": 2.647637367248535, "learning_rate": 5.731643147572351e-05, "loss": 0.1484, "step": 17418 }, { "epoch": 2.4413454800280308, "grad_norm": 0.12292732298374176, "learning_rate": 5.7302080841903844e-05, "loss": 0.0027, "step": 17419 }, { "epoch": 2.441485634197617, "grad_norm": 1.0630799531936646, "learning_rate": 5.7287730208084186e-05, "loss": 0.0933, "step": 17420 }, { "epoch": 2.4416257883672037, "grad_norm": 0.20364363491535187, "learning_rate": 5.727337957426452e-05, "loss": 0.0352, "step": 17421 }, { "epoch": 2.4417659425367906, "grad_norm": 0.1532624065876007, "learning_rate": 5.725902894044487e-05, "loss": 0.0471, "step": 17422 }, { "epoch": 2.441906096706377, "grad_norm": 0.17431098222732544, "learning_rate": 5.7244678306625206e-05, "loss": 0.026, "step": 17423 }, { "epoch": 2.4420462508759635, "grad_norm": 0.20087915658950806, "learning_rate": 5.723032767280554e-05, "loss": 0.0163, "step": 17424 }, { "epoch": 2.44218640504555, "grad_norm": 0.3538360893726349, "learning_rate": 5.7215977038985883e-05, "loss": 0.0563, "step": 17425 }, { "epoch": 2.442326559215137, "grad_norm": 0.11413371562957764, "learning_rate": 5.7201626405166226e-05, "loss": 0.0148, "step": 17426 }, { "epoch": 2.4424667133847233, "grad_norm": 0.06987227499485016, "learning_rate": 5.718727577134656e-05, "loss": 0.007, "step": 17427 }, { "epoch": 2.44260686755431, "grad_norm": 0.07358165085315704, "learning_rate": 5.71729251375269e-05, "loss": 0.0067, "step": 17428 }, { "epoch": 2.4427470217238962, "grad_norm": 0.15052703022956848, "learning_rate": 5.715857450370724e-05, "loss": 0.0143, "step": 17429 }, { "epoch": 2.4428871758934827, "grad_norm": 0.33657753467559814, "learning_rate": 5.714422386988759e-05, "loss": 0.0784, "step": 17430 }, { "epoch": 2.443027330063069, "grad_norm": 0.2944040894508362, "learning_rate": 5.712987323606792e-05, "loss": 0.0328, "step": 17431 }, { "epoch": 2.443167484232656, "grad_norm": 0.07326116412878036, "learning_rate": 5.711552260224826e-05, "loss": 0.0082, "step": 17432 }, { "epoch": 2.4433076384022425, "grad_norm": 0.20281057059764862, "learning_rate": 5.71011719684286e-05, "loss": 0.0101, "step": 17433 }, { "epoch": 2.443447792571829, "grad_norm": 0.8643790483474731, "learning_rate": 5.708682133460894e-05, "loss": 0.0396, "step": 17434 }, { "epoch": 2.4435879467414154, "grad_norm": 1.076904296875, "learning_rate": 5.707247070078928e-05, "loss": 0.0353, "step": 17435 }, { "epoch": 2.4437281009110023, "grad_norm": 0.2580803632736206, "learning_rate": 5.705812006696962e-05, "loss": 0.0192, "step": 17436 }, { "epoch": 2.443868255080589, "grad_norm": 0.2819705307483673, "learning_rate": 5.7043769433149955e-05, "loss": 0.0255, "step": 17437 }, { "epoch": 2.4440084092501753, "grad_norm": 0.20648692548274994, "learning_rate": 5.7029418799330304e-05, "loss": 0.0304, "step": 17438 }, { "epoch": 2.4441485634197617, "grad_norm": 0.06767088174819946, "learning_rate": 5.701506816551064e-05, "loss": 0.0038, "step": 17439 }, { "epoch": 2.444288717589348, "grad_norm": 0.6174846291542053, "learning_rate": 5.7000717531690975e-05, "loss": 0.065, "step": 17440 }, { "epoch": 2.4444288717589346, "grad_norm": 0.2815219461917877, "learning_rate": 5.6986366897871323e-05, "loss": 0.0319, "step": 17441 }, { "epoch": 2.4445690259285215, "grad_norm": 0.1457332819700241, "learning_rate": 5.697201626405166e-05, "loss": 0.0252, "step": 17442 }, { "epoch": 2.444709180098108, "grad_norm": 0.15533718466758728, "learning_rate": 5.6957665630231994e-05, "loss": 0.0166, "step": 17443 }, { "epoch": 2.4448493342676945, "grad_norm": 0.12442232668399811, "learning_rate": 5.6943314996412336e-05, "loss": 0.0225, "step": 17444 }, { "epoch": 2.444989488437281, "grad_norm": 0.30915507674217224, "learning_rate": 5.692896436259268e-05, "loss": 0.0482, "step": 17445 }, { "epoch": 2.4451296426068674, "grad_norm": 0.30458346009254456, "learning_rate": 5.691461372877302e-05, "loss": 0.0841, "step": 17446 }, { "epoch": 2.4452697967764543, "grad_norm": 0.2273937165737152, "learning_rate": 5.6900263094953356e-05, "loss": 0.054, "step": 17447 }, { "epoch": 2.4454099509460407, "grad_norm": 0.4468878209590912, "learning_rate": 5.688591246113369e-05, "loss": 0.0224, "step": 17448 }, { "epoch": 2.445550105115627, "grad_norm": 0.16137811541557312, "learning_rate": 5.687156182731404e-05, "loss": 0.004, "step": 17449 }, { "epoch": 2.4456902592852137, "grad_norm": 0.13575440645217896, "learning_rate": 5.6857211193494375e-05, "loss": 0.006, "step": 17450 }, { "epoch": 2.4458304134548, "grad_norm": 0.18281446397304535, "learning_rate": 5.684286055967471e-05, "loss": 0.0161, "step": 17451 }, { "epoch": 2.4459705676243866, "grad_norm": 0.5374318957328796, "learning_rate": 5.682850992585505e-05, "loss": 0.0254, "step": 17452 }, { "epoch": 2.4461107217939735, "grad_norm": 0.19403211772441864, "learning_rate": 5.6814159292035395e-05, "loss": 0.0133, "step": 17453 }, { "epoch": 2.44625087596356, "grad_norm": 0.4154665768146515, "learning_rate": 5.679980865821574e-05, "loss": 0.0546, "step": 17454 }, { "epoch": 2.4463910301331464, "grad_norm": 0.028070323169231415, "learning_rate": 5.678545802439607e-05, "loss": 0.0032, "step": 17455 }, { "epoch": 2.446531184302733, "grad_norm": 0.08635620772838593, "learning_rate": 5.677110739057641e-05, "loss": 0.0131, "step": 17456 }, { "epoch": 2.4466713384723198, "grad_norm": 0.25716128945350647, "learning_rate": 5.6756756756756757e-05, "loss": 0.056, "step": 17457 }, { "epoch": 2.4468114926419062, "grad_norm": 0.40723538398742676, "learning_rate": 5.674240612293709e-05, "loss": 0.0142, "step": 17458 }, { "epoch": 2.4469516468114927, "grad_norm": 0.4177103042602539, "learning_rate": 5.672805548911743e-05, "loss": 0.0807, "step": 17459 }, { "epoch": 2.447091800981079, "grad_norm": 0.30821430683135986, "learning_rate": 5.671370485529777e-05, "loss": 0.0191, "step": 17460 }, { "epoch": 2.4472319551506656, "grad_norm": 1.9172149896621704, "learning_rate": 5.669935422147811e-05, "loss": 0.0251, "step": 17461 }, { "epoch": 2.447372109320252, "grad_norm": 0.11550029367208481, "learning_rate": 5.6685003587658454e-05, "loss": 0.0294, "step": 17462 }, { "epoch": 2.447512263489839, "grad_norm": 0.34596455097198486, "learning_rate": 5.667065295383879e-05, "loss": 0.0537, "step": 17463 }, { "epoch": 2.4476524176594254, "grad_norm": 0.04013461992144585, "learning_rate": 5.6656302320019124e-05, "loss": 0.0016, "step": 17464 }, { "epoch": 2.447792571829012, "grad_norm": 0.657112181186676, "learning_rate": 5.664195168619947e-05, "loss": 0.0693, "step": 17465 }, { "epoch": 2.4479327259985983, "grad_norm": 0.0632171779870987, "learning_rate": 5.662760105237981e-05, "loss": 0.0034, "step": 17466 }, { "epoch": 2.4480728801681853, "grad_norm": 0.027072517201304436, "learning_rate": 5.6613250418560144e-05, "loss": 0.0023, "step": 17467 }, { "epoch": 2.4482130343377717, "grad_norm": 0.5208009481430054, "learning_rate": 5.6598899784740486e-05, "loss": 0.0369, "step": 17468 }, { "epoch": 2.448353188507358, "grad_norm": 0.1418853998184204, "learning_rate": 5.658454915092083e-05, "loss": 0.0089, "step": 17469 }, { "epoch": 2.4484933426769446, "grad_norm": 2.169551134109497, "learning_rate": 5.657019851710117e-05, "loss": 0.0949, "step": 17470 }, { "epoch": 2.448633496846531, "grad_norm": 0.23169922828674316, "learning_rate": 5.6555847883281505e-05, "loss": 0.0216, "step": 17471 }, { "epoch": 2.4487736510161175, "grad_norm": 0.2709794044494629, "learning_rate": 5.654149724946184e-05, "loss": 0.0089, "step": 17472 }, { "epoch": 2.4489138051857045, "grad_norm": 0.1287136971950531, "learning_rate": 5.652714661564219e-05, "loss": 0.0119, "step": 17473 }, { "epoch": 2.449053959355291, "grad_norm": 0.1448056697845459, "learning_rate": 5.6512795981822525e-05, "loss": 0.0095, "step": 17474 }, { "epoch": 2.4491941135248774, "grad_norm": 0.4334147572517395, "learning_rate": 5.649844534800286e-05, "loss": 0.0405, "step": 17475 }, { "epoch": 2.449334267694464, "grad_norm": 0.12910759449005127, "learning_rate": 5.64840947141832e-05, "loss": 0.0163, "step": 17476 }, { "epoch": 2.4494744218640503, "grad_norm": 0.2877131998538971, "learning_rate": 5.6469744080363545e-05, "loss": 0.0403, "step": 17477 }, { "epoch": 2.449614576033637, "grad_norm": 0.12174680829048157, "learning_rate": 5.645539344654389e-05, "loss": 0.0103, "step": 17478 }, { "epoch": 2.4497547302032237, "grad_norm": 0.2639316916465759, "learning_rate": 5.644104281272422e-05, "loss": 0.021, "step": 17479 }, { "epoch": 2.44989488437281, "grad_norm": 0.12304729968309402, "learning_rate": 5.6426692178904564e-05, "loss": 0.0077, "step": 17480 }, { "epoch": 2.4500350385423966, "grad_norm": 0.39205509424209595, "learning_rate": 5.6412341545084906e-05, "loss": 0.0241, "step": 17481 }, { "epoch": 2.450175192711983, "grad_norm": 0.2815830707550049, "learning_rate": 5.639799091126524e-05, "loss": 0.0266, "step": 17482 }, { "epoch": 2.4503153468815695, "grad_norm": 0.5846973061561584, "learning_rate": 5.638364027744558e-05, "loss": 0.0653, "step": 17483 }, { "epoch": 2.4504555010511564, "grad_norm": 0.3415982723236084, "learning_rate": 5.6369289643625926e-05, "loss": 0.0608, "step": 17484 }, { "epoch": 2.450595655220743, "grad_norm": 0.2679758667945862, "learning_rate": 5.635493900980626e-05, "loss": 0.0476, "step": 17485 }, { "epoch": 2.4507358093903293, "grad_norm": 0.3460039496421814, "learning_rate": 5.63405883759866e-05, "loss": 0.0226, "step": 17486 }, { "epoch": 2.4508759635599158, "grad_norm": 0.1832917332649231, "learning_rate": 5.632623774216694e-05, "loss": 0.0155, "step": 17487 }, { "epoch": 2.4510161177295027, "grad_norm": 0.25096654891967773, "learning_rate": 5.631188710834728e-05, "loss": 0.0315, "step": 17488 }, { "epoch": 2.451156271899089, "grad_norm": 0.05631822720170021, "learning_rate": 5.629753647452762e-05, "loss": 0.0062, "step": 17489 }, { "epoch": 2.4512964260686756, "grad_norm": 0.1662900298833847, "learning_rate": 5.628318584070796e-05, "loss": 0.0425, "step": 17490 }, { "epoch": 2.451436580238262, "grad_norm": 0.2209007441997528, "learning_rate": 5.6268835206888294e-05, "loss": 0.0341, "step": 17491 }, { "epoch": 2.4515767344078485, "grad_norm": 0.0636403039097786, "learning_rate": 5.625448457306864e-05, "loss": 0.0069, "step": 17492 }, { "epoch": 2.451716888577435, "grad_norm": 0.33765727281570435, "learning_rate": 5.624013393924898e-05, "loss": 0.0168, "step": 17493 }, { "epoch": 2.451857042747022, "grad_norm": 0.23613616824150085, "learning_rate": 5.622578330542932e-05, "loss": 0.0193, "step": 17494 }, { "epoch": 2.4519971969166083, "grad_norm": 0.1330777257680893, "learning_rate": 5.6211432671609655e-05, "loss": 0.007, "step": 17495 }, { "epoch": 2.452137351086195, "grad_norm": 0.13238130509853363, "learning_rate": 5.619708203779e-05, "loss": 0.0388, "step": 17496 }, { "epoch": 2.4522775052557813, "grad_norm": 0.31312137842178345, "learning_rate": 5.618273140397034e-05, "loss": 0.013, "step": 17497 }, { "epoch": 2.4524176594253677, "grad_norm": 0.1287730187177658, "learning_rate": 5.6168380770150675e-05, "loss": 0.0074, "step": 17498 }, { "epoch": 2.4525578135949546, "grad_norm": 0.31204330921173096, "learning_rate": 5.615403013633101e-05, "loss": 0.0354, "step": 17499 }, { "epoch": 2.452697967764541, "grad_norm": 0.17967405915260315, "learning_rate": 5.613967950251136e-05, "loss": 0.0257, "step": 17500 }, { "epoch": 2.4528381219341275, "grad_norm": 0.07374734431505203, "learning_rate": 5.6125328868691694e-05, "loss": 0.0067, "step": 17501 }, { "epoch": 2.452978276103714, "grad_norm": 0.5038384199142456, "learning_rate": 5.6110978234872036e-05, "loss": 0.0577, "step": 17502 }, { "epoch": 2.4531184302733005, "grad_norm": 0.05403308570384979, "learning_rate": 5.609662760105237e-05, "loss": 0.0034, "step": 17503 }, { "epoch": 2.4532585844428874, "grad_norm": 0.1436837762594223, "learning_rate": 5.6082276967232714e-05, "loss": 0.0133, "step": 17504 }, { "epoch": 2.453398738612474, "grad_norm": 0.16623951494693756, "learning_rate": 5.6067926333413056e-05, "loss": 0.0316, "step": 17505 }, { "epoch": 2.4535388927820603, "grad_norm": 0.14871586859226227, "learning_rate": 5.605357569959339e-05, "loss": 0.0439, "step": 17506 }, { "epoch": 2.4536790469516467, "grad_norm": 0.09176642447710037, "learning_rate": 5.603922506577373e-05, "loss": 0.0106, "step": 17507 }, { "epoch": 2.453819201121233, "grad_norm": 0.13082154095172882, "learning_rate": 5.6024874431954076e-05, "loss": 0.0126, "step": 17508 }, { "epoch": 2.45395935529082, "grad_norm": 0.09946896135807037, "learning_rate": 5.601052379813441e-05, "loss": 0.0034, "step": 17509 }, { "epoch": 2.4540995094604066, "grad_norm": 0.396707683801651, "learning_rate": 5.599617316431475e-05, "loss": 0.068, "step": 17510 }, { "epoch": 2.454239663629993, "grad_norm": 0.3288958966732025, "learning_rate": 5.598182253049509e-05, "loss": 0.0312, "step": 17511 }, { "epoch": 2.4543798177995795, "grad_norm": 0.20087780058383942, "learning_rate": 5.596747189667543e-05, "loss": 0.0107, "step": 17512 }, { "epoch": 2.454519971969166, "grad_norm": 0.5589919090270996, "learning_rate": 5.595312126285577e-05, "loss": 0.0252, "step": 17513 }, { "epoch": 2.4546601261387524, "grad_norm": 0.13789360225200653, "learning_rate": 5.593877062903611e-05, "loss": 0.0206, "step": 17514 }, { "epoch": 2.4548002803083393, "grad_norm": 0.6786129474639893, "learning_rate": 5.592441999521646e-05, "loss": 0.13, "step": 17515 }, { "epoch": 2.4549404344779258, "grad_norm": 0.46774470806121826, "learning_rate": 5.591006936139679e-05, "loss": 0.0776, "step": 17516 }, { "epoch": 2.455080588647512, "grad_norm": 0.47916993498802185, "learning_rate": 5.589571872757713e-05, "loss": 0.0791, "step": 17517 }, { "epoch": 2.4552207428170987, "grad_norm": 0.1951342672109604, "learning_rate": 5.588136809375747e-05, "loss": 0.0523, "step": 17518 }, { "epoch": 2.4553608969866856, "grad_norm": 2.039923667907715, "learning_rate": 5.586701745993781e-05, "loss": 0.0448, "step": 17519 }, { "epoch": 2.455501051156272, "grad_norm": 0.44729429483413696, "learning_rate": 5.585266682611815e-05, "loss": 0.0136, "step": 17520 }, { "epoch": 2.4556412053258585, "grad_norm": 0.17004776000976562, "learning_rate": 5.583831619229849e-05, "loss": 0.0418, "step": 17521 }, { "epoch": 2.455781359495445, "grad_norm": 0.45309966802597046, "learning_rate": 5.5823965558478824e-05, "loss": 0.0583, "step": 17522 }, { "epoch": 2.4559215136650314, "grad_norm": 0.3523557186126709, "learning_rate": 5.580961492465917e-05, "loss": 0.0547, "step": 17523 }, { "epoch": 2.456061667834618, "grad_norm": 0.1562371402978897, "learning_rate": 5.579526429083951e-05, "loss": 0.0291, "step": 17524 }, { "epoch": 2.456201822004205, "grad_norm": 0.46670010685920715, "learning_rate": 5.5780913657019844e-05, "loss": 0.0736, "step": 17525 }, { "epoch": 2.4563419761737912, "grad_norm": 0.29604804515838623, "learning_rate": 5.5766563023200186e-05, "loss": 0.0314, "step": 17526 }, { "epoch": 2.4564821303433777, "grad_norm": 0.048980217427015305, "learning_rate": 5.575221238938053e-05, "loss": 0.0048, "step": 17527 }, { "epoch": 2.456622284512964, "grad_norm": 0.3164384961128235, "learning_rate": 5.573786175556087e-05, "loss": 0.0215, "step": 17528 }, { "epoch": 2.4567624386825506, "grad_norm": 0.14032945036888123, "learning_rate": 5.5723511121741206e-05, "loss": 0.0135, "step": 17529 }, { "epoch": 2.4569025928521375, "grad_norm": 0.16116999089717865, "learning_rate": 5.570916048792154e-05, "loss": 0.0191, "step": 17530 }, { "epoch": 2.457042747021724, "grad_norm": 0.5274431109428406, "learning_rate": 5.569480985410189e-05, "loss": 0.0459, "step": 17531 }, { "epoch": 2.4571829011913104, "grad_norm": 0.07985707372426987, "learning_rate": 5.5680459220282225e-05, "loss": 0.0149, "step": 17532 }, { "epoch": 2.457323055360897, "grad_norm": 0.3965173661708832, "learning_rate": 5.566610858646256e-05, "loss": 0.0689, "step": 17533 }, { "epoch": 2.4574632095304834, "grad_norm": 0.10106916725635529, "learning_rate": 5.56517579526429e-05, "loss": 0.007, "step": 17534 }, { "epoch": 2.4576033637000703, "grad_norm": 0.07537499815225601, "learning_rate": 5.5637407318823245e-05, "loss": 0.0121, "step": 17535 }, { "epoch": 2.4577435178696567, "grad_norm": 0.08306626230478287, "learning_rate": 5.562305668500359e-05, "loss": 0.0073, "step": 17536 }, { "epoch": 2.457883672039243, "grad_norm": 0.32017800211906433, "learning_rate": 5.560870605118392e-05, "loss": 0.0142, "step": 17537 }, { "epoch": 2.4580238262088296, "grad_norm": 0.06587784737348557, "learning_rate": 5.559435541736426e-05, "loss": 0.0091, "step": 17538 }, { "epoch": 2.458163980378416, "grad_norm": 0.1399422138929367, "learning_rate": 5.5580004783544606e-05, "loss": 0.0247, "step": 17539 }, { "epoch": 2.458304134548003, "grad_norm": 0.08159590512514114, "learning_rate": 5.556565414972494e-05, "loss": 0.0087, "step": 17540 }, { "epoch": 2.4584442887175895, "grad_norm": 0.5555731058120728, "learning_rate": 5.555130351590528e-05, "loss": 0.0305, "step": 17541 }, { "epoch": 2.458584442887176, "grad_norm": 0.1639791578054428, "learning_rate": 5.553695288208562e-05, "loss": 0.0296, "step": 17542 }, { "epoch": 2.4587245970567624, "grad_norm": 0.13340994715690613, "learning_rate": 5.552260224826596e-05, "loss": 0.0116, "step": 17543 }, { "epoch": 2.458864751226349, "grad_norm": 0.5196988582611084, "learning_rate": 5.5508251614446304e-05, "loss": 0.0336, "step": 17544 }, { "epoch": 2.4590049053959353, "grad_norm": 0.3339941203594208, "learning_rate": 5.549390098062664e-05, "loss": 0.042, "step": 17545 }, { "epoch": 2.459145059565522, "grad_norm": 0.15877309441566467, "learning_rate": 5.5479550346806974e-05, "loss": 0.0296, "step": 17546 }, { "epoch": 2.4592852137351087, "grad_norm": 0.10402316600084305, "learning_rate": 5.546519971298732e-05, "loss": 0.0129, "step": 17547 }, { "epoch": 2.459425367904695, "grad_norm": 0.21463969349861145, "learning_rate": 5.545084907916766e-05, "loss": 0.0391, "step": 17548 }, { "epoch": 2.4595655220742816, "grad_norm": 0.2541576623916626, "learning_rate": 5.5436498445347994e-05, "loss": 0.0364, "step": 17549 }, { "epoch": 2.4597056762438685, "grad_norm": 0.19378821551799774, "learning_rate": 5.5422147811528336e-05, "loss": 0.0175, "step": 17550 }, { "epoch": 2.459845830413455, "grad_norm": 0.27218207716941833, "learning_rate": 5.540779717770868e-05, "loss": 0.0074, "step": 17551 }, { "epoch": 2.4599859845830414, "grad_norm": 0.25482678413391113, "learning_rate": 5.539344654388902e-05, "loss": 0.0747, "step": 17552 }, { "epoch": 2.460126138752628, "grad_norm": 0.2299598753452301, "learning_rate": 5.5379095910069355e-05, "loss": 0.0277, "step": 17553 }, { "epoch": 2.4602662929222143, "grad_norm": 0.4402224123477936, "learning_rate": 5.53647452762497e-05, "loss": 0.0313, "step": 17554 }, { "epoch": 2.460406447091801, "grad_norm": 0.05747057870030403, "learning_rate": 5.535039464243004e-05, "loss": 0.005, "step": 17555 }, { "epoch": 2.4605466012613877, "grad_norm": 0.39516496658325195, "learning_rate": 5.5336044008610375e-05, "loss": 0.0731, "step": 17556 }, { "epoch": 2.460686755430974, "grad_norm": 0.16670043766498566, "learning_rate": 5.532169337479071e-05, "loss": 0.0072, "step": 17557 }, { "epoch": 2.4608269096005606, "grad_norm": 0.23560599982738495, "learning_rate": 5.530734274097106e-05, "loss": 0.0062, "step": 17558 }, { "epoch": 2.460967063770147, "grad_norm": 0.23532281816005707, "learning_rate": 5.5292992107151395e-05, "loss": 0.0309, "step": 17559 }, { "epoch": 2.4611072179397335, "grad_norm": 0.13903413712978363, "learning_rate": 5.527864147333174e-05, "loss": 0.0121, "step": 17560 }, { "epoch": 2.4612473721093204, "grad_norm": 0.14731024205684662, "learning_rate": 5.526429083951207e-05, "loss": 0.0058, "step": 17561 }, { "epoch": 2.461387526278907, "grad_norm": 0.22105124592781067, "learning_rate": 5.5249940205692414e-05, "loss": 0.0334, "step": 17562 }, { "epoch": 2.4615276804484933, "grad_norm": 0.18250513076782227, "learning_rate": 5.5235589571872756e-05, "loss": 0.028, "step": 17563 }, { "epoch": 2.46166783461808, "grad_norm": 0.2927420437335968, "learning_rate": 5.522123893805309e-05, "loss": 0.0202, "step": 17564 }, { "epoch": 2.4618079887876663, "grad_norm": 0.17645567655563354, "learning_rate": 5.520688830423343e-05, "loss": 0.0055, "step": 17565 }, { "epoch": 2.461948142957253, "grad_norm": 0.1739467978477478, "learning_rate": 5.5192537670413776e-05, "loss": 0.0071, "step": 17566 }, { "epoch": 2.4620882971268396, "grad_norm": 0.12015873938798904, "learning_rate": 5.517818703659411e-05, "loss": 0.0292, "step": 17567 }, { "epoch": 2.462228451296426, "grad_norm": 0.15247364342212677, "learning_rate": 5.516383640277445e-05, "loss": 0.0073, "step": 17568 }, { "epoch": 2.4623686054660125, "grad_norm": 1.001036524772644, "learning_rate": 5.514948576895479e-05, "loss": 0.0779, "step": 17569 }, { "epoch": 2.462508759635599, "grad_norm": 1.0480105876922607, "learning_rate": 5.513513513513513e-05, "loss": 0.0321, "step": 17570 }, { "epoch": 2.462648913805186, "grad_norm": 0.24105487763881683, "learning_rate": 5.512078450131547e-05, "loss": 0.0369, "step": 17571 }, { "epoch": 2.4627890679747724, "grad_norm": 0.09204325824975967, "learning_rate": 5.510643386749581e-05, "loss": 0.0125, "step": 17572 }, { "epoch": 2.462929222144359, "grad_norm": 0.32665038108825684, "learning_rate": 5.5092083233676143e-05, "loss": 0.0316, "step": 17573 }, { "epoch": 2.4630693763139453, "grad_norm": 1.4087618589401245, "learning_rate": 5.507773259985649e-05, "loss": 0.0309, "step": 17574 }, { "epoch": 2.4632095304835318, "grad_norm": 0.37037238478660583, "learning_rate": 5.506338196603683e-05, "loss": 0.0332, "step": 17575 }, { "epoch": 2.463349684653118, "grad_norm": 0.26116031408309937, "learning_rate": 5.504903133221717e-05, "loss": 0.0419, "step": 17576 }, { "epoch": 2.463489838822705, "grad_norm": 0.46755537390708923, "learning_rate": 5.5034680698397505e-05, "loss": 0.0267, "step": 17577 }, { "epoch": 2.4636299929922916, "grad_norm": 0.26234257221221924, "learning_rate": 5.502033006457785e-05, "loss": 0.0459, "step": 17578 }, { "epoch": 2.463770147161878, "grad_norm": 0.5037165880203247, "learning_rate": 5.500597943075819e-05, "loss": 0.0349, "step": 17579 }, { "epoch": 2.4639103013314645, "grad_norm": 0.15681907534599304, "learning_rate": 5.4991628796938525e-05, "loss": 0.0134, "step": 17580 }, { "epoch": 2.4640504555010514, "grad_norm": 0.6365145444869995, "learning_rate": 5.497727816311886e-05, "loss": 0.071, "step": 17581 }, { "epoch": 2.464190609670638, "grad_norm": 0.26379770040512085, "learning_rate": 5.496292752929921e-05, "loss": 0.027, "step": 17582 }, { "epoch": 2.4643307638402243, "grad_norm": 0.3846650719642639, "learning_rate": 5.4948576895479544e-05, "loss": 0.0255, "step": 17583 }, { "epoch": 2.4644709180098108, "grad_norm": 0.5092746615409851, "learning_rate": 5.4934226261659886e-05, "loss": 0.0303, "step": 17584 }, { "epoch": 2.4646110721793972, "grad_norm": 0.4900977909564972, "learning_rate": 5.491987562784022e-05, "loss": 0.0111, "step": 17585 }, { "epoch": 2.4647512263489837, "grad_norm": 0.0670226439833641, "learning_rate": 5.4905524994020564e-05, "loss": 0.0043, "step": 17586 }, { "epoch": 2.4648913805185706, "grad_norm": 0.06610870361328125, "learning_rate": 5.4891174360200906e-05, "loss": 0.0079, "step": 17587 }, { "epoch": 2.465031534688157, "grad_norm": 0.180598184466362, "learning_rate": 5.487682372638124e-05, "loss": 0.0086, "step": 17588 }, { "epoch": 2.4651716888577435, "grad_norm": 0.18171022832393646, "learning_rate": 5.486247309256159e-05, "loss": 0.0269, "step": 17589 }, { "epoch": 2.46531184302733, "grad_norm": 0.2641808092594147, "learning_rate": 5.4848122458741925e-05, "loss": 0.0335, "step": 17590 }, { "epoch": 2.4654519971969164, "grad_norm": 0.29543328285217285, "learning_rate": 5.483377182492226e-05, "loss": 0.0268, "step": 17591 }, { "epoch": 2.4655921513665033, "grad_norm": 0.20150195062160492, "learning_rate": 5.48194211911026e-05, "loss": 0.0955, "step": 17592 }, { "epoch": 2.46573230553609, "grad_norm": 0.3302060067653656, "learning_rate": 5.4805070557282945e-05, "loss": 0.0383, "step": 17593 }, { "epoch": 2.4658724597056763, "grad_norm": 0.22340722382068634, "learning_rate": 5.479071992346328e-05, "loss": 0.0537, "step": 17594 }, { "epoch": 2.4660126138752627, "grad_norm": 0.20863080024719238, "learning_rate": 5.477636928964362e-05, "loss": 0.0174, "step": 17595 }, { "epoch": 2.466152768044849, "grad_norm": 0.280705988407135, "learning_rate": 5.476201865582396e-05, "loss": 0.0259, "step": 17596 }, { "epoch": 2.4662929222144356, "grad_norm": 0.23006755113601685, "learning_rate": 5.474766802200431e-05, "loss": 0.0542, "step": 17597 }, { "epoch": 2.4664330763840225, "grad_norm": 0.44066715240478516, "learning_rate": 5.473331738818464e-05, "loss": 0.0239, "step": 17598 }, { "epoch": 2.466573230553609, "grad_norm": 0.1422925889492035, "learning_rate": 5.471896675436498e-05, "loss": 0.0156, "step": 17599 }, { "epoch": 2.4667133847231955, "grad_norm": 0.5817549824714661, "learning_rate": 5.470461612054532e-05, "loss": 0.073, "step": 17600 }, { "epoch": 2.466853538892782, "grad_norm": 0.14191415905952454, "learning_rate": 5.469026548672566e-05, "loss": 0.0057, "step": 17601 }, { "epoch": 2.466993693062369, "grad_norm": 0.1116810217499733, "learning_rate": 5.4675914852906e-05, "loss": 0.0091, "step": 17602 }, { "epoch": 2.4671338472319553, "grad_norm": 0.2270602583885193, "learning_rate": 5.466156421908634e-05, "loss": 0.0264, "step": 17603 }, { "epoch": 2.4672740014015417, "grad_norm": 0.38715335726737976, "learning_rate": 5.4647213585266674e-05, "loss": 0.0669, "step": 17604 }, { "epoch": 2.467414155571128, "grad_norm": 0.12069467455148697, "learning_rate": 5.463286295144702e-05, "loss": 0.0071, "step": 17605 }, { "epoch": 2.4675543097407147, "grad_norm": 0.2475578635931015, "learning_rate": 5.461851231762736e-05, "loss": 0.0549, "step": 17606 }, { "epoch": 2.467694463910301, "grad_norm": 0.4554625451564789, "learning_rate": 5.4604161683807694e-05, "loss": 0.0532, "step": 17607 }, { "epoch": 2.467834618079888, "grad_norm": 0.09981324523687363, "learning_rate": 5.4589811049988036e-05, "loss": 0.0165, "step": 17608 }, { "epoch": 2.4679747722494745, "grad_norm": 0.5379135012626648, "learning_rate": 5.457546041616838e-05, "loss": 0.0228, "step": 17609 }, { "epoch": 2.468114926419061, "grad_norm": 0.11988509446382523, "learning_rate": 5.4561109782348714e-05, "loss": 0.0386, "step": 17610 }, { "epoch": 2.4682550805886474, "grad_norm": 0.15549993515014648, "learning_rate": 5.4546759148529056e-05, "loss": 0.0082, "step": 17611 }, { "epoch": 2.4683952347582343, "grad_norm": 0.26520514488220215, "learning_rate": 5.453240851470939e-05, "loss": 0.0377, "step": 17612 }, { "epoch": 2.4685353889278208, "grad_norm": 0.12857557833194733, "learning_rate": 5.451805788088974e-05, "loss": 0.0082, "step": 17613 }, { "epoch": 2.468675543097407, "grad_norm": 0.21515578031539917, "learning_rate": 5.4503707247070075e-05, "loss": 0.0166, "step": 17614 }, { "epoch": 2.4688156972669937, "grad_norm": 0.1563521772623062, "learning_rate": 5.448935661325041e-05, "loss": 0.0186, "step": 17615 }, { "epoch": 2.46895585143658, "grad_norm": 0.4665060341358185, "learning_rate": 5.447500597943075e-05, "loss": 0.0302, "step": 17616 }, { "epoch": 2.4690960056061666, "grad_norm": 0.22005833685398102, "learning_rate": 5.4460655345611095e-05, "loss": 0.018, "step": 17617 }, { "epoch": 2.4692361597757535, "grad_norm": 0.328733891248703, "learning_rate": 5.444630471179143e-05, "loss": 0.0121, "step": 17618 }, { "epoch": 2.46937631394534, "grad_norm": 0.10675746947526932, "learning_rate": 5.443195407797177e-05, "loss": 0.0039, "step": 17619 }, { "epoch": 2.4695164681149264, "grad_norm": 0.25655025243759155, "learning_rate": 5.441760344415211e-05, "loss": 0.0289, "step": 17620 }, { "epoch": 2.469656622284513, "grad_norm": 0.28004828095436096, "learning_rate": 5.4403252810332456e-05, "loss": 0.0334, "step": 17621 }, { "epoch": 2.4697967764540993, "grad_norm": 0.6077659726142883, "learning_rate": 5.438890217651279e-05, "loss": 0.0644, "step": 17622 }, { "epoch": 2.4699369306236862, "grad_norm": 0.19305074214935303, "learning_rate": 5.437455154269313e-05, "loss": 0.0216, "step": 17623 }, { "epoch": 2.4700770847932727, "grad_norm": 0.4131547808647156, "learning_rate": 5.436020090887347e-05, "loss": 0.0347, "step": 17624 }, { "epoch": 2.470217238962859, "grad_norm": 0.14775022864341736, "learning_rate": 5.434585027505381e-05, "loss": 0.0172, "step": 17625 }, { "epoch": 2.4703573931324456, "grad_norm": 0.17525510489940643, "learning_rate": 5.433149964123415e-05, "loss": 0.0164, "step": 17626 }, { "epoch": 2.470497547302032, "grad_norm": 0.15028899908065796, "learning_rate": 5.431714900741449e-05, "loss": 0.0291, "step": 17627 }, { "epoch": 2.4706377014716185, "grad_norm": 0.42396828532218933, "learning_rate": 5.430279837359483e-05, "loss": 0.0373, "step": 17628 }, { "epoch": 2.4707778556412054, "grad_norm": 0.26741477847099304, "learning_rate": 5.428844773977517e-05, "loss": 0.0815, "step": 17629 }, { "epoch": 2.470918009810792, "grad_norm": 0.23611553013324738, "learning_rate": 5.427409710595551e-05, "loss": 0.0191, "step": 17630 }, { "epoch": 2.4710581639803784, "grad_norm": 0.18713533878326416, "learning_rate": 5.4259746472135844e-05, "loss": 0.0185, "step": 17631 }, { "epoch": 2.471198318149965, "grad_norm": 0.11704926937818527, "learning_rate": 5.424539583831619e-05, "loss": 0.0178, "step": 17632 }, { "epoch": 2.4713384723195517, "grad_norm": 0.16930925846099854, "learning_rate": 5.423104520449653e-05, "loss": 0.0136, "step": 17633 }, { "epoch": 2.471478626489138, "grad_norm": 0.339443564414978, "learning_rate": 5.421669457067686e-05, "loss": 0.008, "step": 17634 }, { "epoch": 2.4716187806587246, "grad_norm": 0.1321626454591751, "learning_rate": 5.4202343936857205e-05, "loss": 0.017, "step": 17635 }, { "epoch": 2.471758934828311, "grad_norm": 0.36218515038490295, "learning_rate": 5.418799330303755e-05, "loss": 0.0414, "step": 17636 }, { "epoch": 2.4718990889978976, "grad_norm": 0.28529658913612366, "learning_rate": 5.417364266921789e-05, "loss": 0.0315, "step": 17637 }, { "epoch": 2.472039243167484, "grad_norm": 0.1235649436712265, "learning_rate": 5.4159292035398225e-05, "loss": 0.0083, "step": 17638 }, { "epoch": 2.472179397337071, "grad_norm": 0.03561408445239067, "learning_rate": 5.414494140157856e-05, "loss": 0.002, "step": 17639 }, { "epoch": 2.4723195515066574, "grad_norm": 0.22890983521938324, "learning_rate": 5.413059076775891e-05, "loss": 0.0323, "step": 17640 }, { "epoch": 2.472459705676244, "grad_norm": 0.1588759422302246, "learning_rate": 5.4116240133939244e-05, "loss": 0.0307, "step": 17641 }, { "epoch": 2.4725998598458303, "grad_norm": 0.1341257095336914, "learning_rate": 5.410188950011958e-05, "loss": 0.0085, "step": 17642 }, { "epoch": 2.4727400140154168, "grad_norm": 0.7717081904411316, "learning_rate": 5.408753886629992e-05, "loss": 0.0716, "step": 17643 }, { "epoch": 2.4728801681850037, "grad_norm": 0.20445075631141663, "learning_rate": 5.4073188232480264e-05, "loss": 0.0345, "step": 17644 }, { "epoch": 2.47302032235459, "grad_norm": 0.12482832372188568, "learning_rate": 5.4058837598660606e-05, "loss": 0.0418, "step": 17645 }, { "epoch": 2.4731604765241766, "grad_norm": 0.09583435952663422, "learning_rate": 5.404448696484094e-05, "loss": 0.0172, "step": 17646 }, { "epoch": 2.473300630693763, "grad_norm": 0.8656417727470398, "learning_rate": 5.403013633102128e-05, "loss": 0.1083, "step": 17647 }, { "epoch": 2.4734407848633495, "grad_norm": 0.4089510142803192, "learning_rate": 5.4015785697201626e-05, "loss": 0.0417, "step": 17648 }, { "epoch": 2.4735809390329364, "grad_norm": 0.29118308424949646, "learning_rate": 5.400143506338196e-05, "loss": 0.065, "step": 17649 }, { "epoch": 2.473721093202523, "grad_norm": 0.3107341527938843, "learning_rate": 5.3987084429562296e-05, "loss": 0.0104, "step": 17650 }, { "epoch": 2.4738612473721093, "grad_norm": 0.5279232263565063, "learning_rate": 5.397273379574264e-05, "loss": 0.0311, "step": 17651 }, { "epoch": 2.474001401541696, "grad_norm": 0.13390517234802246, "learning_rate": 5.395838316192298e-05, "loss": 0.0137, "step": 17652 }, { "epoch": 2.4741415557112822, "grad_norm": 0.5480524301528931, "learning_rate": 5.394403252810332e-05, "loss": 0.1103, "step": 17653 }, { "epoch": 2.474281709880869, "grad_norm": 0.47438445687294006, "learning_rate": 5.392968189428366e-05, "loss": 0.0625, "step": 17654 }, { "epoch": 2.4744218640504556, "grad_norm": 0.2500709593296051, "learning_rate": 5.3915331260463993e-05, "loss": 0.0353, "step": 17655 }, { "epoch": 2.474562018220042, "grad_norm": 0.1397605985403061, "learning_rate": 5.390098062664434e-05, "loss": 0.0055, "step": 17656 }, { "epoch": 2.4747021723896285, "grad_norm": 0.27476930618286133, "learning_rate": 5.388662999282468e-05, "loss": 0.0647, "step": 17657 }, { "epoch": 2.474842326559215, "grad_norm": 0.3947570025920868, "learning_rate": 5.387227935900501e-05, "loss": 0.0598, "step": 17658 }, { "epoch": 2.4749824807288014, "grad_norm": 0.08438500761985779, "learning_rate": 5.3857928725185355e-05, "loss": 0.0097, "step": 17659 }, { "epoch": 2.4751226348983884, "grad_norm": 0.05265098065137863, "learning_rate": 5.38435780913657e-05, "loss": 0.0062, "step": 17660 }, { "epoch": 2.475262789067975, "grad_norm": 0.5026751756668091, "learning_rate": 5.382922745754604e-05, "loss": 0.0414, "step": 17661 }, { "epoch": 2.4754029432375613, "grad_norm": 0.4059378206729889, "learning_rate": 5.3814876823726375e-05, "loss": 0.0441, "step": 17662 }, { "epoch": 2.4755430974071477, "grad_norm": 0.09328906983137131, "learning_rate": 5.3800526189906724e-05, "loss": 0.004, "step": 17663 }, { "epoch": 2.4756832515767346, "grad_norm": 0.09670199453830719, "learning_rate": 5.378617555608706e-05, "loss": 0.0052, "step": 17664 }, { "epoch": 2.475823405746321, "grad_norm": 0.5110570192337036, "learning_rate": 5.3771824922267394e-05, "loss": 0.0458, "step": 17665 }, { "epoch": 2.4759635599159076, "grad_norm": 0.22674758732318878, "learning_rate": 5.375747428844773e-05, "loss": 0.0129, "step": 17666 }, { "epoch": 2.476103714085494, "grad_norm": 0.4692128896713257, "learning_rate": 5.374312365462808e-05, "loss": 0.0542, "step": 17667 }, { "epoch": 2.4762438682550805, "grad_norm": 0.6973477602005005, "learning_rate": 5.3728773020808414e-05, "loss": 0.0369, "step": 17668 }, { "epoch": 2.476384022424667, "grad_norm": 0.5574290752410889, "learning_rate": 5.3714422386988756e-05, "loss": 0.0109, "step": 17669 }, { "epoch": 2.476524176594254, "grad_norm": 1.2437050342559814, "learning_rate": 5.370007175316909e-05, "loss": 0.0368, "step": 17670 }, { "epoch": 2.4766643307638403, "grad_norm": 0.08141283690929413, "learning_rate": 5.368572111934944e-05, "loss": 0.0088, "step": 17671 }, { "epoch": 2.4768044849334268, "grad_norm": 0.17542196810245514, "learning_rate": 5.3671370485529775e-05, "loss": 0.0457, "step": 17672 }, { "epoch": 2.476944639103013, "grad_norm": 0.1895805150270462, "learning_rate": 5.365701985171011e-05, "loss": 0.0133, "step": 17673 }, { "epoch": 2.4770847932725997, "grad_norm": 0.13209833204746246, "learning_rate": 5.3642669217890446e-05, "loss": 0.0093, "step": 17674 }, { "epoch": 2.4772249474421866, "grad_norm": 0.3393106758594513, "learning_rate": 5.3628318584070795e-05, "loss": 0.0854, "step": 17675 }, { "epoch": 2.477365101611773, "grad_norm": 0.27093663811683655, "learning_rate": 5.361396795025113e-05, "loss": 0.0307, "step": 17676 }, { "epoch": 2.4775052557813595, "grad_norm": 0.15531690418720245, "learning_rate": 5.359961731643147e-05, "loss": 0.0171, "step": 17677 }, { "epoch": 2.477645409950946, "grad_norm": 0.531705379486084, "learning_rate": 5.358526668261181e-05, "loss": 0.0695, "step": 17678 }, { "epoch": 2.4777855641205324, "grad_norm": 0.34920862317085266, "learning_rate": 5.357091604879216e-05, "loss": 0.0275, "step": 17679 }, { "epoch": 2.4779257182901193, "grad_norm": 0.2613199055194855, "learning_rate": 5.355656541497249e-05, "loss": 0.0248, "step": 17680 }, { "epoch": 2.4780658724597058, "grad_norm": 0.15108931064605713, "learning_rate": 5.354221478115283e-05, "loss": 0.0274, "step": 17681 }, { "epoch": 2.4782060266292922, "grad_norm": 0.1552342027425766, "learning_rate": 5.352786414733316e-05, "loss": 0.0259, "step": 17682 }, { "epoch": 2.4783461807988787, "grad_norm": 0.320279985666275, "learning_rate": 5.351351351351351e-05, "loss": 0.0606, "step": 17683 }, { "epoch": 2.478486334968465, "grad_norm": 0.5443435311317444, "learning_rate": 5.349916287969385e-05, "loss": 0.0536, "step": 17684 }, { "epoch": 2.478626489138052, "grad_norm": 0.31841275095939636, "learning_rate": 5.348481224587419e-05, "loss": 0.049, "step": 17685 }, { "epoch": 2.4787666433076385, "grad_norm": 0.12419672310352325, "learning_rate": 5.3470461612054524e-05, "loss": 0.0108, "step": 17686 }, { "epoch": 2.478906797477225, "grad_norm": 0.3704558312892914, "learning_rate": 5.345611097823487e-05, "loss": 0.0406, "step": 17687 }, { "epoch": 2.4790469516468114, "grad_norm": 0.18540392816066742, "learning_rate": 5.344176034441521e-05, "loss": 0.0224, "step": 17688 }, { "epoch": 2.479187105816398, "grad_norm": 0.49803242087364197, "learning_rate": 5.3427409710595544e-05, "loss": 0.0808, "step": 17689 }, { "epoch": 2.4793272599859844, "grad_norm": 0.23828963935375214, "learning_rate": 5.341305907677588e-05, "loss": 0.0576, "step": 17690 }, { "epoch": 2.4794674141555713, "grad_norm": 0.24287208914756775, "learning_rate": 5.339870844295623e-05, "loss": 0.0731, "step": 17691 }, { "epoch": 2.4796075683251577, "grad_norm": 0.43838050961494446, "learning_rate": 5.3384357809136563e-05, "loss": 0.0264, "step": 17692 }, { "epoch": 2.479747722494744, "grad_norm": 0.13406634330749512, "learning_rate": 5.3370007175316906e-05, "loss": 0.01, "step": 17693 }, { "epoch": 2.4798878766643306, "grad_norm": 0.28545108437538147, "learning_rate": 5.335565654149724e-05, "loss": 0.0296, "step": 17694 }, { "epoch": 2.4800280308339175, "grad_norm": 0.38217201828956604, "learning_rate": 5.334130590767759e-05, "loss": 0.0609, "step": 17695 }, { "epoch": 2.480168185003504, "grad_norm": 0.2534046471118927, "learning_rate": 5.3326955273857925e-05, "loss": 0.0286, "step": 17696 }, { "epoch": 2.4803083391730905, "grad_norm": 0.2855016589164734, "learning_rate": 5.331260464003826e-05, "loss": 0.0329, "step": 17697 }, { "epoch": 2.480448493342677, "grad_norm": 0.42521926760673523, "learning_rate": 5.3298254006218596e-05, "loss": 0.0241, "step": 17698 }, { "epoch": 2.4805886475122634, "grad_norm": 0.10707966983318329, "learning_rate": 5.3283903372398945e-05, "loss": 0.0061, "step": 17699 }, { "epoch": 2.48072880168185, "grad_norm": 0.18590058386325836, "learning_rate": 5.326955273857928e-05, "loss": 0.0431, "step": 17700 }, { "epoch": 2.4808689558514367, "grad_norm": 0.3210992217063904, "learning_rate": 5.325520210475962e-05, "loss": 0.0851, "step": 17701 }, { "epoch": 2.481009110021023, "grad_norm": 0.20693227648735046, "learning_rate": 5.3240851470939964e-05, "loss": 0.0533, "step": 17702 }, { "epoch": 2.4811492641906097, "grad_norm": 0.2266017347574234, "learning_rate": 5.3226500837120306e-05, "loss": 0.0502, "step": 17703 }, { "epoch": 2.481289418360196, "grad_norm": 0.15091189742088318, "learning_rate": 5.321215020330064e-05, "loss": 0.0174, "step": 17704 }, { "epoch": 2.4814295725297826, "grad_norm": 0.3703337609767914, "learning_rate": 5.319779956948098e-05, "loss": 0.0424, "step": 17705 }, { "epoch": 2.4815697266993695, "grad_norm": 0.11492913216352463, "learning_rate": 5.3183448935661326e-05, "loss": 0.0119, "step": 17706 }, { "epoch": 2.481709880868956, "grad_norm": 0.14657877385616302, "learning_rate": 5.316909830184166e-05, "loss": 0.0093, "step": 17707 }, { "epoch": 2.4818500350385424, "grad_norm": 0.2525840699672699, "learning_rate": 5.3154747668022e-05, "loss": 0.035, "step": 17708 }, { "epoch": 2.481990189208129, "grad_norm": 0.18262861669063568, "learning_rate": 5.314039703420234e-05, "loss": 0.0216, "step": 17709 }, { "epoch": 2.4821303433777153, "grad_norm": 0.1263977289199829, "learning_rate": 5.312604640038268e-05, "loss": 0.0093, "step": 17710 }, { "epoch": 2.4822704975473022, "grad_norm": 0.3245948553085327, "learning_rate": 5.311169576656302e-05, "loss": 0.1534, "step": 17711 }, { "epoch": 2.4824106517168887, "grad_norm": 0.1624259203672409, "learning_rate": 5.309734513274336e-05, "loss": 0.0159, "step": 17712 }, { "epoch": 2.482550805886475, "grad_norm": 0.8008989095687866, "learning_rate": 5.3082994498923694e-05, "loss": 0.0603, "step": 17713 }, { "epoch": 2.4826909600560616, "grad_norm": 0.06288202852010727, "learning_rate": 5.306864386510404e-05, "loss": 0.003, "step": 17714 }, { "epoch": 2.482831114225648, "grad_norm": 1.5275949239730835, "learning_rate": 5.305429323128438e-05, "loss": 0.0289, "step": 17715 }, { "epoch": 2.482971268395235, "grad_norm": 0.06765114516019821, "learning_rate": 5.303994259746471e-05, "loss": 0.0037, "step": 17716 }, { "epoch": 2.4831114225648214, "grad_norm": 1.492302417755127, "learning_rate": 5.3025591963645055e-05, "loss": 0.0832, "step": 17717 }, { "epoch": 2.483251576734408, "grad_norm": 0.49175533652305603, "learning_rate": 5.30112413298254e-05, "loss": 0.0294, "step": 17718 }, { "epoch": 2.4833917309039943, "grad_norm": 0.13394728302955627, "learning_rate": 5.299689069600574e-05, "loss": 0.0057, "step": 17719 }, { "epoch": 2.483531885073581, "grad_norm": 0.11188614368438721, "learning_rate": 5.2982540062186075e-05, "loss": 0.0076, "step": 17720 }, { "epoch": 2.4836720392431673, "grad_norm": 0.12141694128513336, "learning_rate": 5.296818942836641e-05, "loss": 0.0126, "step": 17721 }, { "epoch": 2.483812193412754, "grad_norm": 0.2904088795185089, "learning_rate": 5.295383879454676e-05, "loss": 0.0125, "step": 17722 }, { "epoch": 2.4839523475823406, "grad_norm": 0.16423878073692322, "learning_rate": 5.2939488160727094e-05, "loss": 0.0288, "step": 17723 }, { "epoch": 2.484092501751927, "grad_norm": 0.20900902152061462, "learning_rate": 5.292513752690743e-05, "loss": 0.0248, "step": 17724 }, { "epoch": 2.4842326559215135, "grad_norm": 0.08876301348209381, "learning_rate": 5.291078689308777e-05, "loss": 0.0119, "step": 17725 }, { "epoch": 2.4843728100911004, "grad_norm": 0.2671598196029663, "learning_rate": 5.2896436259268114e-05, "loss": 0.0315, "step": 17726 }, { "epoch": 2.484512964260687, "grad_norm": 0.18206124007701874, "learning_rate": 5.2882085625448456e-05, "loss": 0.0104, "step": 17727 }, { "epoch": 2.4846531184302734, "grad_norm": 0.06854202598333359, "learning_rate": 5.286773499162879e-05, "loss": 0.0057, "step": 17728 }, { "epoch": 2.48479327259986, "grad_norm": 0.4192564785480499, "learning_rate": 5.285338435780913e-05, "loss": 0.0278, "step": 17729 }, { "epoch": 2.4849334267694463, "grad_norm": 0.18883728981018066, "learning_rate": 5.2839033723989476e-05, "loss": 0.0073, "step": 17730 }, { "epoch": 2.4850735809390327, "grad_norm": 0.2082323282957077, "learning_rate": 5.282468309016981e-05, "loss": 0.0676, "step": 17731 }, { "epoch": 2.4852137351086196, "grad_norm": 0.48897141218185425, "learning_rate": 5.2810332456350146e-05, "loss": 0.0222, "step": 17732 }, { "epoch": 2.485353889278206, "grad_norm": 0.2984616458415985, "learning_rate": 5.279598182253049e-05, "loss": 0.0338, "step": 17733 }, { "epoch": 2.4854940434477926, "grad_norm": 0.1615888625383377, "learning_rate": 5.278163118871083e-05, "loss": 0.0184, "step": 17734 }, { "epoch": 2.485634197617379, "grad_norm": 0.5336517095565796, "learning_rate": 5.276728055489117e-05, "loss": 0.0573, "step": 17735 }, { "epoch": 2.4857743517869655, "grad_norm": 0.13963228464126587, "learning_rate": 5.275292992107151e-05, "loss": 0.0261, "step": 17736 }, { "epoch": 2.4859145059565524, "grad_norm": 0.3494892120361328, "learning_rate": 5.273857928725185e-05, "loss": 0.0749, "step": 17737 }, { "epoch": 2.486054660126139, "grad_norm": 0.13461467623710632, "learning_rate": 5.272422865343219e-05, "loss": 0.0203, "step": 17738 }, { "epoch": 2.4861948142957253, "grad_norm": 0.47826001048088074, "learning_rate": 5.270987801961253e-05, "loss": 0.0671, "step": 17739 }, { "epoch": 2.4863349684653118, "grad_norm": 0.34306755661964417, "learning_rate": 5.269552738579286e-05, "loss": 0.0623, "step": 17740 }, { "epoch": 2.4864751226348982, "grad_norm": 0.5603805184364319, "learning_rate": 5.268117675197321e-05, "loss": 0.0388, "step": 17741 }, { "epoch": 2.4866152768044847, "grad_norm": 0.4433344900608063, "learning_rate": 5.266682611815355e-05, "loss": 0.0183, "step": 17742 }, { "epoch": 2.4867554309740716, "grad_norm": 0.486017644405365, "learning_rate": 5.265247548433389e-05, "loss": 0.0395, "step": 17743 }, { "epoch": 2.486895585143658, "grad_norm": 0.16959120333194733, "learning_rate": 5.2638124850514225e-05, "loss": 0.0187, "step": 17744 }, { "epoch": 2.4870357393132445, "grad_norm": 0.2675526738166809, "learning_rate": 5.262377421669457e-05, "loss": 0.0483, "step": 17745 }, { "epoch": 2.487175893482831, "grad_norm": 0.2669757306575775, "learning_rate": 5.260942358287491e-05, "loss": 0.0316, "step": 17746 }, { "epoch": 2.487316047652418, "grad_norm": 0.12552404403686523, "learning_rate": 5.2595072949055244e-05, "loss": 0.0118, "step": 17747 }, { "epoch": 2.4874562018220043, "grad_norm": 0.2678147256374359, "learning_rate": 5.258072231523558e-05, "loss": 0.0158, "step": 17748 }, { "epoch": 2.487596355991591, "grad_norm": 0.11293377727270126, "learning_rate": 5.256637168141593e-05, "loss": 0.0195, "step": 17749 }, { "epoch": 2.4877365101611773, "grad_norm": 0.2692854106426239, "learning_rate": 5.2552021047596264e-05, "loss": 0.0182, "step": 17750 }, { "epoch": 2.4878766643307637, "grad_norm": 0.23372231423854828, "learning_rate": 5.2537670413776606e-05, "loss": 0.0176, "step": 17751 }, { "epoch": 2.48801681850035, "grad_norm": 0.8065145611763, "learning_rate": 5.252331977995694e-05, "loss": 0.0388, "step": 17752 }, { "epoch": 2.488156972669937, "grad_norm": 0.4757090210914612, "learning_rate": 5.250896914613728e-05, "loss": 0.022, "step": 17753 }, { "epoch": 2.4882971268395235, "grad_norm": 0.09404882788658142, "learning_rate": 5.2494618512317625e-05, "loss": 0.0174, "step": 17754 }, { "epoch": 2.48843728100911, "grad_norm": 0.21451681852340698, "learning_rate": 5.248026787849796e-05, "loss": 0.0332, "step": 17755 }, { "epoch": 2.4885774351786965, "grad_norm": 0.23228569328784943, "learning_rate": 5.2465917244678296e-05, "loss": 0.0358, "step": 17756 }, { "epoch": 2.4887175893482834, "grad_norm": 0.11712727695703506, "learning_rate": 5.2451566610858645e-05, "loss": 0.0068, "step": 17757 }, { "epoch": 2.48885774351787, "grad_norm": 0.360446572303772, "learning_rate": 5.243721597703898e-05, "loss": 0.0313, "step": 17758 }, { "epoch": 2.4889978976874563, "grad_norm": 0.20387250185012817, "learning_rate": 5.242286534321932e-05, "loss": 0.0262, "step": 17759 }, { "epoch": 2.4891380518570427, "grad_norm": 0.3580976724624634, "learning_rate": 5.240851470939966e-05, "loss": 0.0592, "step": 17760 }, { "epoch": 2.489278206026629, "grad_norm": 0.13282306492328644, "learning_rate": 5.239416407558e-05, "loss": 0.0361, "step": 17761 }, { "epoch": 2.4894183601962157, "grad_norm": 0.6758518815040588, "learning_rate": 5.237981344176034e-05, "loss": 0.1175, "step": 17762 }, { "epoch": 2.4895585143658026, "grad_norm": 0.40552985668182373, "learning_rate": 5.236546280794068e-05, "loss": 0.0431, "step": 17763 }, { "epoch": 2.489698668535389, "grad_norm": 0.19911421835422516, "learning_rate": 5.235111217412101e-05, "loss": 0.0135, "step": 17764 }, { "epoch": 2.4898388227049755, "grad_norm": 0.6081478595733643, "learning_rate": 5.233676154030136e-05, "loss": 0.0935, "step": 17765 }, { "epoch": 2.489978976874562, "grad_norm": 1.7193678617477417, "learning_rate": 5.23224109064817e-05, "loss": 0.0933, "step": 17766 }, { "epoch": 2.4901191310441484, "grad_norm": 0.49033698439598083, "learning_rate": 5.230806027266204e-05, "loss": 0.103, "step": 17767 }, { "epoch": 2.4902592852137353, "grad_norm": 0.5711290836334229, "learning_rate": 5.2293709638842374e-05, "loss": 0.0755, "step": 17768 }, { "epoch": 2.4903994393833218, "grad_norm": 1.0722718238830566, "learning_rate": 5.2279359005022716e-05, "loss": 0.037, "step": 17769 }, { "epoch": 2.490539593552908, "grad_norm": 0.3700416088104248, "learning_rate": 5.226500837120306e-05, "loss": 0.0461, "step": 17770 }, { "epoch": 2.4906797477224947, "grad_norm": 0.15610124170780182, "learning_rate": 5.2250657737383394e-05, "loss": 0.0167, "step": 17771 }, { "epoch": 2.490819901892081, "grad_norm": 0.229808047413826, "learning_rate": 5.223630710356373e-05, "loss": 0.0161, "step": 17772 }, { "epoch": 2.4909600560616676, "grad_norm": 0.12942081689834595, "learning_rate": 5.222195646974408e-05, "loss": 0.0204, "step": 17773 }, { "epoch": 2.4911002102312545, "grad_norm": 0.43906357884407043, "learning_rate": 5.2207605835924413e-05, "loss": 0.0271, "step": 17774 }, { "epoch": 2.491240364400841, "grad_norm": 0.4648025631904602, "learning_rate": 5.2193255202104756e-05, "loss": 0.0575, "step": 17775 }, { "epoch": 2.4913805185704274, "grad_norm": 0.1541581153869629, "learning_rate": 5.21789045682851e-05, "loss": 0.0276, "step": 17776 }, { "epoch": 2.491520672740014, "grad_norm": 0.5011047720909119, "learning_rate": 5.216455393446543e-05, "loss": 0.0301, "step": 17777 }, { "epoch": 2.491660826909601, "grad_norm": 0.18152040243148804, "learning_rate": 5.2150203300645775e-05, "loss": 0.0147, "step": 17778 }, { "epoch": 2.4918009810791872, "grad_norm": 0.07357105612754822, "learning_rate": 5.213585266682611e-05, "loss": 0.0076, "step": 17779 }, { "epoch": 2.4919411352487737, "grad_norm": 0.33727243542671204, "learning_rate": 5.212150203300646e-05, "loss": 0.0526, "step": 17780 }, { "epoch": 2.49208128941836, "grad_norm": 0.35229700803756714, "learning_rate": 5.2107151399186795e-05, "loss": 0.0462, "step": 17781 }, { "epoch": 2.4922214435879466, "grad_norm": 0.18100976943969727, "learning_rate": 5.209280076536713e-05, "loss": 0.0182, "step": 17782 }, { "epoch": 2.492361597757533, "grad_norm": 0.03994106501340866, "learning_rate": 5.207845013154747e-05, "loss": 0.0124, "step": 17783 }, { "epoch": 2.49250175192712, "grad_norm": 0.05880053713917732, "learning_rate": 5.2064099497727814e-05, "loss": 0.0049, "step": 17784 }, { "epoch": 2.4926419060967064, "grad_norm": 0.28882768750190735, "learning_rate": 5.204974886390815e-05, "loss": 0.0232, "step": 17785 }, { "epoch": 2.492782060266293, "grad_norm": 0.705376148223877, "learning_rate": 5.203539823008849e-05, "loss": 0.0876, "step": 17786 }, { "epoch": 2.4929222144358794, "grad_norm": 0.21390913426876068, "learning_rate": 5.202104759626883e-05, "loss": 0.0288, "step": 17787 }, { "epoch": 2.4930623686054663, "grad_norm": 0.29915696382522583, "learning_rate": 5.2006696962449176e-05, "loss": 0.0275, "step": 17788 }, { "epoch": 2.4932025227750527, "grad_norm": 0.30241236090660095, "learning_rate": 5.199234632862951e-05, "loss": 0.0409, "step": 17789 }, { "epoch": 2.493342676944639, "grad_norm": 0.09223033487796783, "learning_rate": 5.1977995694809847e-05, "loss": 0.0081, "step": 17790 }, { "epoch": 2.4934828311142256, "grad_norm": 0.24814589321613312, "learning_rate": 5.196364506099019e-05, "loss": 0.0375, "step": 17791 }, { "epoch": 2.493622985283812, "grad_norm": 0.16973090171813965, "learning_rate": 5.194929442717053e-05, "loss": 0.0295, "step": 17792 }, { "epoch": 2.4937631394533986, "grad_norm": 0.5886319875717163, "learning_rate": 5.1934943793350866e-05, "loss": 0.0561, "step": 17793 }, { "epoch": 2.4939032936229855, "grad_norm": 0.08989480882883072, "learning_rate": 5.192059315953121e-05, "loss": 0.0144, "step": 17794 }, { "epoch": 2.494043447792572, "grad_norm": 0.345582515001297, "learning_rate": 5.1906242525711544e-05, "loss": 0.0267, "step": 17795 }, { "epoch": 2.4941836019621584, "grad_norm": 0.1257106363773346, "learning_rate": 5.189189189189189e-05, "loss": 0.0133, "step": 17796 }, { "epoch": 2.494323756131745, "grad_norm": 0.583721935749054, "learning_rate": 5.187754125807223e-05, "loss": 0.0905, "step": 17797 }, { "epoch": 2.4944639103013313, "grad_norm": 0.09211540967226028, "learning_rate": 5.186319062425256e-05, "loss": 0.0064, "step": 17798 }, { "epoch": 2.494604064470918, "grad_norm": 0.1583651602268219, "learning_rate": 5.1848839990432905e-05, "loss": 0.0267, "step": 17799 }, { "epoch": 2.4947442186405047, "grad_norm": 0.1800818145275116, "learning_rate": 5.183448935661325e-05, "loss": 0.0237, "step": 17800 }, { "epoch": 2.494884372810091, "grad_norm": 0.19386602938175201, "learning_rate": 5.182013872279358e-05, "loss": 0.0147, "step": 17801 }, { "epoch": 2.4950245269796776, "grad_norm": 0.5707101821899414, "learning_rate": 5.1805788088973925e-05, "loss": 0.0198, "step": 17802 }, { "epoch": 2.495164681149264, "grad_norm": 0.2839837670326233, "learning_rate": 5.179143745515426e-05, "loss": 0.0081, "step": 17803 }, { "epoch": 2.4953048353188505, "grad_norm": 0.27072033286094666, "learning_rate": 5.177708682133461e-05, "loss": 0.0387, "step": 17804 }, { "epoch": 2.4954449894884374, "grad_norm": 0.1146271824836731, "learning_rate": 5.1762736187514944e-05, "loss": 0.0185, "step": 17805 }, { "epoch": 2.495585143658024, "grad_norm": 0.09938567131757736, "learning_rate": 5.174838555369528e-05, "loss": 0.0191, "step": 17806 }, { "epoch": 2.4957252978276103, "grad_norm": 0.302967369556427, "learning_rate": 5.173403491987562e-05, "loss": 0.0154, "step": 17807 }, { "epoch": 2.495865451997197, "grad_norm": 0.38114574551582336, "learning_rate": 5.1719684286055964e-05, "loss": 0.0298, "step": 17808 }, { "epoch": 2.4960056061667837, "grad_norm": 0.2538285553455353, "learning_rate": 5.17053336522363e-05, "loss": 0.0375, "step": 17809 }, { "epoch": 2.49614576033637, "grad_norm": 0.48061642050743103, "learning_rate": 5.169098301841664e-05, "loss": 0.0762, "step": 17810 }, { "epoch": 2.4962859145059566, "grad_norm": 0.08205193281173706, "learning_rate": 5.1676632384596984e-05, "loss": 0.0053, "step": 17811 }, { "epoch": 2.496426068675543, "grad_norm": 0.23486006259918213, "learning_rate": 5.1662281750777326e-05, "loss": 0.0045, "step": 17812 }, { "epoch": 2.4965662228451295, "grad_norm": 0.20154549181461334, "learning_rate": 5.164793111695766e-05, "loss": 0.0252, "step": 17813 }, { "epoch": 2.496706377014716, "grad_norm": 0.5714675188064575, "learning_rate": 5.1633580483137996e-05, "loss": 0.0272, "step": 17814 }, { "epoch": 2.496846531184303, "grad_norm": 0.399156779050827, "learning_rate": 5.1619229849318345e-05, "loss": 0.0265, "step": 17815 }, { "epoch": 2.4969866853538893, "grad_norm": 0.4697190821170807, "learning_rate": 5.160487921549868e-05, "loss": 0.0449, "step": 17816 }, { "epoch": 2.497126839523476, "grad_norm": 0.6566028594970703, "learning_rate": 5.159052858167902e-05, "loss": 0.053, "step": 17817 }, { "epoch": 2.4972669936930623, "grad_norm": 3.4883246421813965, "learning_rate": 5.157617794785936e-05, "loss": 0.1284, "step": 17818 }, { "epoch": 2.4974071478626487, "grad_norm": 0.5223955512046814, "learning_rate": 5.15618273140397e-05, "loss": 0.0453, "step": 17819 }, { "epoch": 2.4975473020322356, "grad_norm": 0.3521726727485657, "learning_rate": 5.154747668022004e-05, "loss": 0.0173, "step": 17820 }, { "epoch": 2.497687456201822, "grad_norm": 0.1914532631635666, "learning_rate": 5.153312604640038e-05, "loss": 0.0119, "step": 17821 }, { "epoch": 2.4978276103714085, "grad_norm": 0.17097818851470947, "learning_rate": 5.151877541258071e-05, "loss": 0.0392, "step": 17822 }, { "epoch": 2.497967764540995, "grad_norm": 0.0675303190946579, "learning_rate": 5.150442477876106e-05, "loss": 0.0053, "step": 17823 }, { "epoch": 2.4981079187105815, "grad_norm": 0.231094628572464, "learning_rate": 5.14900741449414e-05, "loss": 0.0227, "step": 17824 }, { "epoch": 2.4982480728801684, "grad_norm": 0.07156160473823547, "learning_rate": 5.147572351112174e-05, "loss": 0.0112, "step": 17825 }, { "epoch": 2.498388227049755, "grad_norm": 0.13417445123195648, "learning_rate": 5.1461372877302075e-05, "loss": 0.013, "step": 17826 }, { "epoch": 2.4985283812193413, "grad_norm": 0.22243309020996094, "learning_rate": 5.144702224348242e-05, "loss": 0.0225, "step": 17827 }, { "epoch": 2.4986685353889277, "grad_norm": 0.10465124249458313, "learning_rate": 5.143267160966276e-05, "loss": 0.0153, "step": 17828 }, { "epoch": 2.498808689558514, "grad_norm": 0.32272014021873474, "learning_rate": 5.1418320975843094e-05, "loss": 0.0404, "step": 17829 }, { "epoch": 2.498948843728101, "grad_norm": 0.08015695959329605, "learning_rate": 5.140397034202343e-05, "loss": 0.0088, "step": 17830 }, { "epoch": 2.4990889978976876, "grad_norm": 0.2525484561920166, "learning_rate": 5.138961970820378e-05, "loss": 0.0129, "step": 17831 }, { "epoch": 2.499229152067274, "grad_norm": 0.10982628166675568, "learning_rate": 5.1375269074384114e-05, "loss": 0.0237, "step": 17832 }, { "epoch": 2.4993693062368605, "grad_norm": 0.10212475061416626, "learning_rate": 5.1360918440564456e-05, "loss": 0.0216, "step": 17833 }, { "epoch": 2.499509460406447, "grad_norm": 0.04232310131192207, "learning_rate": 5.134656780674479e-05, "loss": 0.0035, "step": 17834 }, { "epoch": 2.4996496145760334, "grad_norm": 0.15495185554027557, "learning_rate": 5.133221717292513e-05, "loss": 0.0132, "step": 17835 }, { "epoch": 2.4997897687456203, "grad_norm": 0.09340164065361023, "learning_rate": 5.1317866539105475e-05, "loss": 0.0087, "step": 17836 }, { "epoch": 2.4999299229152068, "grad_norm": 0.23512424528598785, "learning_rate": 5.130351590528581e-05, "loss": 0.0423, "step": 17837 }, { "epoch": 2.5000700770847932, "grad_norm": 0.06448642164468765, "learning_rate": 5.1289165271466146e-05, "loss": 0.0094, "step": 17838 }, { "epoch": 2.5002102312543797, "grad_norm": 0.5939103960990906, "learning_rate": 5.1274814637646495e-05, "loss": 0.0179, "step": 17839 }, { "epoch": 2.5003503854239666, "grad_norm": 0.358553409576416, "learning_rate": 5.126046400382683e-05, "loss": 0.0102, "step": 17840 }, { "epoch": 2.500490539593553, "grad_norm": 0.13601501286029816, "learning_rate": 5.124611337000717e-05, "loss": 0.0067, "step": 17841 }, { "epoch": 2.5006306937631395, "grad_norm": 0.2634809613227844, "learning_rate": 5.123176273618751e-05, "loss": 0.0724, "step": 17842 }, { "epoch": 2.500770847932726, "grad_norm": 0.1829523891210556, "learning_rate": 5.121741210236785e-05, "loss": 0.0123, "step": 17843 }, { "epoch": 2.5009110021023124, "grad_norm": 0.3545670509338379, "learning_rate": 5.120306146854819e-05, "loss": 0.0115, "step": 17844 }, { "epoch": 2.501051156271899, "grad_norm": 0.0709671676158905, "learning_rate": 5.118871083472853e-05, "loss": 0.0068, "step": 17845 }, { "epoch": 2.501191310441486, "grad_norm": 0.4570164084434509, "learning_rate": 5.117436020090886e-05, "loss": 0.0406, "step": 17846 }, { "epoch": 2.5013314646110723, "grad_norm": 0.08937258273363113, "learning_rate": 5.116000956708921e-05, "loss": 0.0067, "step": 17847 }, { "epoch": 2.5014716187806587, "grad_norm": 0.0228976309299469, "learning_rate": 5.114565893326955e-05, "loss": 0.0024, "step": 17848 }, { "epoch": 2.501611772950245, "grad_norm": 0.4653420150279999, "learning_rate": 5.113130829944989e-05, "loss": 0.0248, "step": 17849 }, { "epoch": 2.501751927119832, "grad_norm": 0.36147162318229675, "learning_rate": 5.111695766563023e-05, "loss": 0.0326, "step": 17850 }, { "epoch": 2.5018920812894185, "grad_norm": 0.17428427934646606, "learning_rate": 5.1102607031810566e-05, "loss": 0.0192, "step": 17851 }, { "epoch": 2.502032235459005, "grad_norm": 0.3164307177066803, "learning_rate": 5.108825639799091e-05, "loss": 0.0234, "step": 17852 }, { "epoch": 2.5021723896285915, "grad_norm": 0.094609834253788, "learning_rate": 5.1073905764171244e-05, "loss": 0.0161, "step": 17853 }, { "epoch": 2.502312543798178, "grad_norm": 0.13780805468559265, "learning_rate": 5.105955513035159e-05, "loss": 0.0204, "step": 17854 }, { "epoch": 2.5024526979677644, "grad_norm": 0.14991626143455505, "learning_rate": 5.104520449653193e-05, "loss": 0.0168, "step": 17855 }, { "epoch": 2.502592852137351, "grad_norm": 0.33201947808265686, "learning_rate": 5.103085386271226e-05, "loss": 0.0066, "step": 17856 }, { "epoch": 2.5027330063069377, "grad_norm": 0.22748306393623352, "learning_rate": 5.1016503228892605e-05, "loss": 0.0656, "step": 17857 }, { "epoch": 2.502873160476524, "grad_norm": 0.15124912559986115, "learning_rate": 5.100215259507295e-05, "loss": 0.0152, "step": 17858 }, { "epoch": 2.5030133146461107, "grad_norm": 0.10268676280975342, "learning_rate": 5.098780196125328e-05, "loss": 0.0114, "step": 17859 }, { "epoch": 2.503153468815697, "grad_norm": 0.1801644265651703, "learning_rate": 5.0973451327433625e-05, "loss": 0.0282, "step": 17860 }, { "epoch": 2.503293622985284, "grad_norm": 0.17832498252391815, "learning_rate": 5.095910069361396e-05, "loss": 0.0061, "step": 17861 }, { "epoch": 2.5034337771548705, "grad_norm": 0.1257418543100357, "learning_rate": 5.094475005979431e-05, "loss": 0.0054, "step": 17862 }, { "epoch": 2.503573931324457, "grad_norm": 0.10041609406471252, "learning_rate": 5.0930399425974645e-05, "loss": 0.0028, "step": 17863 }, { "epoch": 2.5037140854940434, "grad_norm": 0.13054272532463074, "learning_rate": 5.091604879215498e-05, "loss": 0.0083, "step": 17864 }, { "epoch": 2.50385423966363, "grad_norm": 0.4925798177719116, "learning_rate": 5.090169815833532e-05, "loss": 0.1121, "step": 17865 }, { "epoch": 2.5039943938332163, "grad_norm": 0.5421183109283447, "learning_rate": 5.0887347524515664e-05, "loss": 0.0267, "step": 17866 }, { "epoch": 2.504134548002803, "grad_norm": 0.1838768720626831, "learning_rate": 5.0872996890696e-05, "loss": 0.0243, "step": 17867 }, { "epoch": 2.5042747021723897, "grad_norm": 0.565723180770874, "learning_rate": 5.085864625687634e-05, "loss": 0.1334, "step": 17868 }, { "epoch": 2.504414856341976, "grad_norm": 0.41988706588745117, "learning_rate": 5.084429562305668e-05, "loss": 0.018, "step": 17869 }, { "epoch": 2.5045550105115626, "grad_norm": 0.4841556251049042, "learning_rate": 5.0829944989237026e-05, "loss": 0.0718, "step": 17870 }, { "epoch": 2.5046951646811495, "grad_norm": 0.2065269649028778, "learning_rate": 5.081559435541736e-05, "loss": 0.0505, "step": 17871 }, { "epoch": 2.504835318850736, "grad_norm": 0.2573491632938385, "learning_rate": 5.0801243721597697e-05, "loss": 0.0093, "step": 17872 }, { "epoch": 2.5049754730203224, "grad_norm": 0.5838304162025452, "learning_rate": 5.078689308777804e-05, "loss": 0.033, "step": 17873 }, { "epoch": 2.505115627189909, "grad_norm": 0.19565308094024658, "learning_rate": 5.077254245395838e-05, "loss": 0.0289, "step": 17874 }, { "epoch": 2.5052557813594953, "grad_norm": 0.18960800766944885, "learning_rate": 5.0758191820138716e-05, "loss": 0.0407, "step": 17875 }, { "epoch": 2.505395935529082, "grad_norm": 0.43577390909194946, "learning_rate": 5.074384118631906e-05, "loss": 0.0325, "step": 17876 }, { "epoch": 2.5055360896986687, "grad_norm": 0.2558959424495697, "learning_rate": 5.0729490552499394e-05, "loss": 0.0124, "step": 17877 }, { "epoch": 2.505676243868255, "grad_norm": 0.2798151969909668, "learning_rate": 5.071513991867974e-05, "loss": 0.0419, "step": 17878 }, { "epoch": 2.5058163980378416, "grad_norm": 0.17561772465705872, "learning_rate": 5.070078928486008e-05, "loss": 0.0362, "step": 17879 }, { "epoch": 2.505956552207428, "grad_norm": 0.27153992652893066, "learning_rate": 5.068643865104041e-05, "loss": 0.0448, "step": 17880 }, { "epoch": 2.506096706377015, "grad_norm": 0.5290229320526123, "learning_rate": 5.0672088017220755e-05, "loss": 0.0669, "step": 17881 }, { "epoch": 2.5062368605466014, "grad_norm": 0.13243520259857178, "learning_rate": 5.06577373834011e-05, "loss": 0.0109, "step": 17882 }, { "epoch": 2.506377014716188, "grad_norm": 0.15251211822032928, "learning_rate": 5.064338674958143e-05, "loss": 0.0376, "step": 17883 }, { "epoch": 2.5065171688857744, "grad_norm": 0.11372412741184235, "learning_rate": 5.0629036115761775e-05, "loss": 0.0141, "step": 17884 }, { "epoch": 2.506657323055361, "grad_norm": 0.12586988508701324, "learning_rate": 5.061468548194212e-05, "loss": 0.034, "step": 17885 }, { "epoch": 2.5067974772249473, "grad_norm": 0.1825370043516159, "learning_rate": 5.060033484812246e-05, "loss": 0.0083, "step": 17886 }, { "epoch": 2.5069376313945337, "grad_norm": 0.25620436668395996, "learning_rate": 5.0585984214302794e-05, "loss": 0.0312, "step": 17887 }, { "epoch": 2.5070777855641206, "grad_norm": 0.1578015834093094, "learning_rate": 5.057163358048313e-05, "loss": 0.0129, "step": 17888 }, { "epoch": 2.507217939733707, "grad_norm": 0.12077666074037552, "learning_rate": 5.055728294666348e-05, "loss": 0.0148, "step": 17889 }, { "epoch": 2.5073580939032936, "grad_norm": 0.22039912641048431, "learning_rate": 5.0542932312843814e-05, "loss": 0.0269, "step": 17890 }, { "epoch": 2.50749824807288, "grad_norm": 0.05985488370060921, "learning_rate": 5.052858167902415e-05, "loss": 0.0039, "step": 17891 }, { "epoch": 2.507638402242467, "grad_norm": 0.26816192269325256, "learning_rate": 5.051423104520449e-05, "loss": 0.0115, "step": 17892 }, { "epoch": 2.5077785564120534, "grad_norm": 0.0780048593878746, "learning_rate": 5.0499880411384833e-05, "loss": 0.0064, "step": 17893 }, { "epoch": 2.50791871058164, "grad_norm": 0.14516665041446686, "learning_rate": 5.0485529777565176e-05, "loss": 0.0298, "step": 17894 }, { "epoch": 2.5080588647512263, "grad_norm": 0.05282115563750267, "learning_rate": 5.047117914374551e-05, "loss": 0.0042, "step": 17895 }, { "epoch": 2.5081990189208128, "grad_norm": 0.2411554604768753, "learning_rate": 5.0456828509925846e-05, "loss": 0.0554, "step": 17896 }, { "epoch": 2.508339173090399, "grad_norm": 0.05972570553421974, "learning_rate": 5.0442477876106195e-05, "loss": 0.0068, "step": 17897 }, { "epoch": 2.508479327259986, "grad_norm": 0.10814305394887924, "learning_rate": 5.042812724228653e-05, "loss": 0.0086, "step": 17898 }, { "epoch": 2.5086194814295726, "grad_norm": 0.38663139939308167, "learning_rate": 5.0413776608466866e-05, "loss": 0.0209, "step": 17899 }, { "epoch": 2.508759635599159, "grad_norm": 0.05171884223818779, "learning_rate": 5.039942597464721e-05, "loss": 0.0022, "step": 17900 }, { "epoch": 2.5088997897687455, "grad_norm": 0.2223225086927414, "learning_rate": 5.038507534082755e-05, "loss": 0.0345, "step": 17901 }, { "epoch": 2.5090399439383324, "grad_norm": 0.12274236232042313, "learning_rate": 5.037072470700789e-05, "loss": 0.0055, "step": 17902 }, { "epoch": 2.509180098107919, "grad_norm": 0.466958224773407, "learning_rate": 5.035637407318823e-05, "loss": 0.0068, "step": 17903 }, { "epoch": 2.5093202522775053, "grad_norm": 0.8914198279380798, "learning_rate": 5.034202343936856e-05, "loss": 0.0396, "step": 17904 }, { "epoch": 2.509460406447092, "grad_norm": 0.19931572675704956, "learning_rate": 5.032767280554891e-05, "loss": 0.0163, "step": 17905 }, { "epoch": 2.5096005606166782, "grad_norm": 0.18438127636909485, "learning_rate": 5.031332217172925e-05, "loss": 0.0248, "step": 17906 }, { "epoch": 2.5097407147862647, "grad_norm": 0.2518075406551361, "learning_rate": 5.029897153790958e-05, "loss": 0.0196, "step": 17907 }, { "epoch": 2.509880868955851, "grad_norm": 0.13407835364341736, "learning_rate": 5.0284620904089924e-05, "loss": 0.0188, "step": 17908 }, { "epoch": 2.510021023125438, "grad_norm": 0.33575475215911865, "learning_rate": 5.0270270270270267e-05, "loss": 0.0311, "step": 17909 }, { "epoch": 2.5101611772950245, "grad_norm": 0.16032351553440094, "learning_rate": 5.025591963645061e-05, "loss": 0.0097, "step": 17910 }, { "epoch": 2.510301331464611, "grad_norm": 0.022804399952292442, "learning_rate": 5.0241569002630944e-05, "loss": 0.0014, "step": 17911 }, { "epoch": 2.510441485634198, "grad_norm": 0.0834144577383995, "learning_rate": 5.022721836881128e-05, "loss": 0.0039, "step": 17912 }, { "epoch": 2.5105816398037843, "grad_norm": 0.2841784656047821, "learning_rate": 5.021286773499163e-05, "loss": 0.0255, "step": 17913 }, { "epoch": 2.510721793973371, "grad_norm": 0.21736538410186768, "learning_rate": 5.0198517101171964e-05, "loss": 0.0333, "step": 17914 }, { "epoch": 2.5108619481429573, "grad_norm": 0.07583963871002197, "learning_rate": 5.01841664673523e-05, "loss": 0.0437, "step": 17915 }, { "epoch": 2.5110021023125437, "grad_norm": 0.19257482886314392, "learning_rate": 5.016981583353264e-05, "loss": 0.0218, "step": 17916 }, { "epoch": 2.51114225648213, "grad_norm": 0.210791677236557, "learning_rate": 5.015546519971298e-05, "loss": 0.0242, "step": 17917 }, { "epoch": 2.5112824106517166, "grad_norm": 0.07649431377649307, "learning_rate": 5.0141114565893325e-05, "loss": 0.0045, "step": 17918 }, { "epoch": 2.5114225648213035, "grad_norm": 1.675681471824646, "learning_rate": 5.012676393207366e-05, "loss": 0.1565, "step": 17919 }, { "epoch": 2.51156271899089, "grad_norm": 0.8766339421272278, "learning_rate": 5.0112413298253996e-05, "loss": 0.2809, "step": 17920 }, { "epoch": 2.5117028731604765, "grad_norm": 0.15034359693527222, "learning_rate": 5.0098062664434345e-05, "loss": 0.0117, "step": 17921 }, { "epoch": 2.511843027330063, "grad_norm": 0.2890153229236603, "learning_rate": 5.008371203061468e-05, "loss": 0.0356, "step": 17922 }, { "epoch": 2.51198318149965, "grad_norm": 0.18044079840183258, "learning_rate": 5.0069361396795016e-05, "loss": 0.0427, "step": 17923 }, { "epoch": 2.5121233356692363, "grad_norm": 0.21608898043632507, "learning_rate": 5.0055010762975364e-05, "loss": 0.0431, "step": 17924 }, { "epoch": 2.5122634898388227, "grad_norm": 0.12421417981386185, "learning_rate": 5.00406601291557e-05, "loss": 0.0429, "step": 17925 }, { "epoch": 2.512403644008409, "grad_norm": 0.19879715144634247, "learning_rate": 5.002630949533604e-05, "loss": 0.0072, "step": 17926 }, { "epoch": 2.5125437981779957, "grad_norm": 0.24948570132255554, "learning_rate": 5.001195886151638e-05, "loss": 0.0392, "step": 17927 }, { "epoch": 2.512683952347582, "grad_norm": 0.09584183990955353, "learning_rate": 4.9997608227696726e-05, "loss": 0.0053, "step": 17928 }, { "epoch": 2.512824106517169, "grad_norm": 0.13370759785175323, "learning_rate": 4.998325759387706e-05, "loss": 0.0098, "step": 17929 }, { "epoch": 2.5129642606867555, "grad_norm": 0.12032349407672882, "learning_rate": 4.99689069600574e-05, "loss": 0.0065, "step": 17930 }, { "epoch": 2.513104414856342, "grad_norm": 0.09858205169439316, "learning_rate": 4.995455632623773e-05, "loss": 0.008, "step": 17931 }, { "epoch": 2.5132445690259284, "grad_norm": 0.18433667719364166, "learning_rate": 4.994020569241808e-05, "loss": 0.0193, "step": 17932 }, { "epoch": 2.5133847231955153, "grad_norm": 0.48852431774139404, "learning_rate": 4.9925855058598416e-05, "loss": 0.0302, "step": 17933 }, { "epoch": 2.5135248773651018, "grad_norm": 0.062215156853199005, "learning_rate": 4.991150442477876e-05, "loss": 0.0134, "step": 17934 }, { "epoch": 2.5136650315346882, "grad_norm": 0.131892591714859, "learning_rate": 4.9897153790959094e-05, "loss": 0.0384, "step": 17935 }, { "epoch": 2.5138051857042747, "grad_norm": 0.6051559448242188, "learning_rate": 4.988280315713944e-05, "loss": 0.049, "step": 17936 }, { "epoch": 2.513945339873861, "grad_norm": 0.29307442903518677, "learning_rate": 4.986845252331978e-05, "loss": 0.027, "step": 17937 }, { "epoch": 2.5140854940434476, "grad_norm": 0.25392740964889526, "learning_rate": 4.985410188950011e-05, "loss": 0.0241, "step": 17938 }, { "epoch": 2.514225648213034, "grad_norm": 0.1617138385772705, "learning_rate": 4.983975125568045e-05, "loss": 0.0208, "step": 17939 }, { "epoch": 2.514365802382621, "grad_norm": 0.42534786462783813, "learning_rate": 4.98254006218608e-05, "loss": 0.117, "step": 17940 }, { "epoch": 2.5145059565522074, "grad_norm": 0.0377785824239254, "learning_rate": 4.981104998804113e-05, "loss": 0.0031, "step": 17941 }, { "epoch": 2.514646110721794, "grad_norm": 0.2530292272567749, "learning_rate": 4.9796699354221475e-05, "loss": 0.0441, "step": 17942 }, { "epoch": 2.514786264891381, "grad_norm": 0.44305163621902466, "learning_rate": 4.978234872040181e-05, "loss": 0.0792, "step": 17943 }, { "epoch": 2.5149264190609673, "grad_norm": 0.2983666658401489, "learning_rate": 4.976799808658216e-05, "loss": 0.0439, "step": 17944 }, { "epoch": 2.5150665732305537, "grad_norm": 0.18985411524772644, "learning_rate": 4.9753647452762495e-05, "loss": 0.015, "step": 17945 }, { "epoch": 2.51520672740014, "grad_norm": 0.6229122281074524, "learning_rate": 4.973929681894283e-05, "loss": 0.0306, "step": 17946 }, { "epoch": 2.5153468815697266, "grad_norm": 0.11859645694494247, "learning_rate": 4.9724946185123165e-05, "loss": 0.0147, "step": 17947 }, { "epoch": 2.515487035739313, "grad_norm": 0.2446531057357788, "learning_rate": 4.9710595551303514e-05, "loss": 0.0491, "step": 17948 }, { "epoch": 2.5156271899088996, "grad_norm": 0.16358372569084167, "learning_rate": 4.969624491748385e-05, "loss": 0.006, "step": 17949 }, { "epoch": 2.5157673440784865, "grad_norm": 0.18392562866210938, "learning_rate": 4.968189428366419e-05, "loss": 0.0088, "step": 17950 }, { "epoch": 2.515907498248073, "grad_norm": 0.04929543659090996, "learning_rate": 4.966754364984453e-05, "loss": 0.005, "step": 17951 }, { "epoch": 2.5160476524176594, "grad_norm": 0.9538974761962891, "learning_rate": 4.9653193016024876e-05, "loss": 0.0479, "step": 17952 }, { "epoch": 2.516187806587246, "grad_norm": 0.12925632297992706, "learning_rate": 4.963884238220521e-05, "loss": 0.0051, "step": 17953 }, { "epoch": 2.5163279607568327, "grad_norm": 0.13811703026294708, "learning_rate": 4.9624491748385546e-05, "loss": 0.0122, "step": 17954 }, { "epoch": 2.516468114926419, "grad_norm": 0.37643539905548096, "learning_rate": 4.961014111456588e-05, "loss": 0.0191, "step": 17955 }, { "epoch": 2.5166082690960057, "grad_norm": 0.19138064980506897, "learning_rate": 4.959579048074623e-05, "loss": 0.0254, "step": 17956 }, { "epoch": 2.516748423265592, "grad_norm": 0.09747377783060074, "learning_rate": 4.9581439846926566e-05, "loss": 0.0075, "step": 17957 }, { "epoch": 2.5168885774351786, "grad_norm": 1.5282933712005615, "learning_rate": 4.956708921310691e-05, "loss": 0.0129, "step": 17958 }, { "epoch": 2.517028731604765, "grad_norm": 0.10173609107732773, "learning_rate": 4.955273857928725e-05, "loss": 0.0327, "step": 17959 }, { "epoch": 2.517168885774352, "grad_norm": 0.2733389139175415, "learning_rate": 4.953838794546759e-05, "loss": 0.0299, "step": 17960 }, { "epoch": 2.5173090399439384, "grad_norm": 0.1406165063381195, "learning_rate": 4.952403731164793e-05, "loss": 0.0147, "step": 17961 }, { "epoch": 2.517449194113525, "grad_norm": 0.24290359020233154, "learning_rate": 4.950968667782826e-05, "loss": 0.0946, "step": 17962 }, { "epoch": 2.5175893482831113, "grad_norm": 0.031261276453733444, "learning_rate": 4.949533604400861e-05, "loss": 0.0024, "step": 17963 }, { "epoch": 2.517729502452698, "grad_norm": 0.6725165247917175, "learning_rate": 4.948098541018895e-05, "loss": 0.0741, "step": 17964 }, { "epoch": 2.5178696566222847, "grad_norm": 0.22323952615261078, "learning_rate": 4.946663477636928e-05, "loss": 0.0956, "step": 17965 }, { "epoch": 2.518009810791871, "grad_norm": 0.058933075517416, "learning_rate": 4.9452284142549625e-05, "loss": 0.0045, "step": 17966 }, { "epoch": 2.5181499649614576, "grad_norm": 0.21088677644729614, "learning_rate": 4.943793350872997e-05, "loss": 0.0145, "step": 17967 }, { "epoch": 2.518290119131044, "grad_norm": 0.9093017578125, "learning_rate": 4.942358287491031e-05, "loss": 0.1194, "step": 17968 }, { "epoch": 2.5184302733006305, "grad_norm": 0.46114274859428406, "learning_rate": 4.9409232241090644e-05, "loss": 0.029, "step": 17969 }, { "epoch": 2.518570427470217, "grad_norm": 2.3653149604797363, "learning_rate": 4.939488160727098e-05, "loss": 0.1459, "step": 17970 }, { "epoch": 2.518710581639804, "grad_norm": 0.2956080138683319, "learning_rate": 4.938053097345133e-05, "loss": 0.0288, "step": 17971 }, { "epoch": 2.5188507358093903, "grad_norm": 0.23610498011112213, "learning_rate": 4.9366180339631664e-05, "loss": 0.0205, "step": 17972 }, { "epoch": 2.518990889978977, "grad_norm": 0.08853091299533844, "learning_rate": 4.9351829705812e-05, "loss": 0.0101, "step": 17973 }, { "epoch": 2.5191310441485633, "grad_norm": 0.11466262489557266, "learning_rate": 4.933747907199234e-05, "loss": 0.0077, "step": 17974 }, { "epoch": 2.51927119831815, "grad_norm": 0.03590298816561699, "learning_rate": 4.9323128438172683e-05, "loss": 0.0034, "step": 17975 }, { "epoch": 2.5194113524877366, "grad_norm": 0.3006124794483185, "learning_rate": 4.9308777804353026e-05, "loss": 0.0106, "step": 17976 }, { "epoch": 2.519551506657323, "grad_norm": 0.2731272578239441, "learning_rate": 4.929442717053336e-05, "loss": 0.0313, "step": 17977 }, { "epoch": 2.5196916608269095, "grad_norm": 0.16604268550872803, "learning_rate": 4.9280076536713696e-05, "loss": 0.0187, "step": 17978 }, { "epoch": 2.519831814996496, "grad_norm": 0.3185887932777405, "learning_rate": 4.9265725902894045e-05, "loss": 0.0409, "step": 17979 }, { "epoch": 2.5199719691660825, "grad_norm": 0.19657133519649506, "learning_rate": 4.925137526907438e-05, "loss": 0.0224, "step": 17980 }, { "epoch": 2.5201121233356694, "grad_norm": 0.2634916603565216, "learning_rate": 4.9237024635254716e-05, "loss": 0.0172, "step": 17981 }, { "epoch": 2.520252277505256, "grad_norm": 0.04718831926584244, "learning_rate": 4.922267400143506e-05, "loss": 0.0039, "step": 17982 }, { "epoch": 2.5203924316748423, "grad_norm": 0.15247592329978943, "learning_rate": 4.92083233676154e-05, "loss": 0.0193, "step": 17983 }, { "epoch": 2.5205325858444287, "grad_norm": 0.12196923792362213, "learning_rate": 4.919397273379574e-05, "loss": 0.0166, "step": 17984 }, { "epoch": 2.5206727400140156, "grad_norm": 0.16519244015216827, "learning_rate": 4.917962209997608e-05, "loss": 0.0276, "step": 17985 }, { "epoch": 2.520812894183602, "grad_norm": 0.2617371678352356, "learning_rate": 4.916527146615641e-05, "loss": 0.0593, "step": 17986 }, { "epoch": 2.5209530483531886, "grad_norm": 0.20458994805812836, "learning_rate": 4.915092083233676e-05, "loss": 0.032, "step": 17987 }, { "epoch": 2.521093202522775, "grad_norm": 0.3306627869606018, "learning_rate": 4.91365701985171e-05, "loss": 0.0532, "step": 17988 }, { "epoch": 2.5212333566923615, "grad_norm": 0.12184417992830276, "learning_rate": 4.912221956469743e-05, "loss": 0.0205, "step": 17989 }, { "epoch": 2.521373510861948, "grad_norm": 0.12734109163284302, "learning_rate": 4.9107868930877774e-05, "loss": 0.024, "step": 17990 }, { "epoch": 2.521513665031535, "grad_norm": 0.3743153214454651, "learning_rate": 4.9093518297058117e-05, "loss": 0.0393, "step": 17991 }, { "epoch": 2.5216538192011213, "grad_norm": 0.2873273491859436, "learning_rate": 4.907916766323846e-05, "loss": 0.0186, "step": 17992 }, { "epoch": 2.5217939733707078, "grad_norm": 0.1817651391029358, "learning_rate": 4.9064817029418794e-05, "loss": 0.0224, "step": 17993 }, { "epoch": 2.5219341275402942, "grad_norm": 0.2557279169559479, "learning_rate": 4.905046639559913e-05, "loss": 0.0426, "step": 17994 }, { "epoch": 2.522074281709881, "grad_norm": 0.4094601571559906, "learning_rate": 4.903611576177948e-05, "loss": 0.0023, "step": 17995 }, { "epoch": 2.5222144358794676, "grad_norm": 0.23652827739715576, "learning_rate": 4.9021765127959814e-05, "loss": 0.0182, "step": 17996 }, { "epoch": 2.522354590049054, "grad_norm": 0.18629562854766846, "learning_rate": 4.900741449414015e-05, "loss": 0.0234, "step": 17997 }, { "epoch": 2.5224947442186405, "grad_norm": 0.3949895203113556, "learning_rate": 4.89930638603205e-05, "loss": 0.0143, "step": 17998 }, { "epoch": 2.522634898388227, "grad_norm": 0.08356636017560959, "learning_rate": 4.897871322650083e-05, "loss": 0.0052, "step": 17999 }, { "epoch": 2.5227750525578134, "grad_norm": 0.49311429262161255, "learning_rate": 4.8964362592681175e-05, "loss": 0.0553, "step": 18000 }, { "epoch": 2.5229152067274, "grad_norm": 0.36115556955337524, "learning_rate": 4.895001195886151e-05, "loss": 0.0128, "step": 18001 }, { "epoch": 2.523055360896987, "grad_norm": 0.31049636006355286, "learning_rate": 4.893566132504185e-05, "loss": 0.0249, "step": 18002 }, { "epoch": 2.5231955150665732, "grad_norm": 0.04329279437661171, "learning_rate": 4.8921310691222195e-05, "loss": 0.0032, "step": 18003 }, { "epoch": 2.5233356692361597, "grad_norm": 0.15391989052295685, "learning_rate": 4.890696005740253e-05, "loss": 0.0198, "step": 18004 }, { "epoch": 2.523475823405746, "grad_norm": 1.2122329473495483, "learning_rate": 4.8892609423582865e-05, "loss": 0.0888, "step": 18005 }, { "epoch": 2.523615977575333, "grad_norm": 0.31095290184020996, "learning_rate": 4.8878258789763214e-05, "loss": 0.0145, "step": 18006 }, { "epoch": 2.5237561317449195, "grad_norm": 0.31254544854164124, "learning_rate": 4.886390815594355e-05, "loss": 0.0189, "step": 18007 }, { "epoch": 2.523896285914506, "grad_norm": 0.038663607090711594, "learning_rate": 4.884955752212389e-05, "loss": 0.0062, "step": 18008 }, { "epoch": 2.5240364400840924, "grad_norm": 0.0795617625117302, "learning_rate": 4.883520688830423e-05, "loss": 0.0058, "step": 18009 }, { "epoch": 2.524176594253679, "grad_norm": 0.17984271049499512, "learning_rate": 4.882085625448457e-05, "loss": 0.0046, "step": 18010 }, { "epoch": 2.5243167484232654, "grad_norm": 0.07299651950597763, "learning_rate": 4.880650562066491e-05, "loss": 0.0034, "step": 18011 }, { "epoch": 2.5244569025928523, "grad_norm": 0.17152342200279236, "learning_rate": 4.879215498684525e-05, "loss": 0.0039, "step": 18012 }, { "epoch": 2.5245970567624387, "grad_norm": 0.26020678877830505, "learning_rate": 4.877780435302558e-05, "loss": 0.0336, "step": 18013 }, { "epoch": 2.524737210932025, "grad_norm": 0.3551461696624756, "learning_rate": 4.876345371920593e-05, "loss": 0.0351, "step": 18014 }, { "epoch": 2.5248773651016116, "grad_norm": 0.11331973969936371, "learning_rate": 4.8749103085386266e-05, "loss": 0.006, "step": 18015 }, { "epoch": 2.5250175192711986, "grad_norm": 0.04525251314043999, "learning_rate": 4.873475245156661e-05, "loss": 0.0024, "step": 18016 }, { "epoch": 2.525157673440785, "grad_norm": 0.25383517146110535, "learning_rate": 4.8720401817746944e-05, "loss": 0.027, "step": 18017 }, { "epoch": 2.5252978276103715, "grad_norm": 0.03788610175251961, "learning_rate": 4.8706051183927286e-05, "loss": 0.0028, "step": 18018 }, { "epoch": 2.525437981779958, "grad_norm": 0.1253206431865692, "learning_rate": 4.869170055010763e-05, "loss": 0.0087, "step": 18019 }, { "epoch": 2.5255781359495444, "grad_norm": 2.796492338180542, "learning_rate": 4.867734991628796e-05, "loss": 0.121, "step": 18020 }, { "epoch": 2.525718290119131, "grad_norm": 0.3664858937263489, "learning_rate": 4.86629992824683e-05, "loss": 0.0085, "step": 18021 }, { "epoch": 2.5258584442887178, "grad_norm": 0.554985761642456, "learning_rate": 4.864864864864865e-05, "loss": 0.0246, "step": 18022 }, { "epoch": 2.525998598458304, "grad_norm": 0.20167531073093414, "learning_rate": 4.863429801482898e-05, "loss": 0.0337, "step": 18023 }, { "epoch": 2.5261387526278907, "grad_norm": 0.19294866919517517, "learning_rate": 4.8619947381009325e-05, "loss": 0.0194, "step": 18024 }, { "epoch": 2.526278906797477, "grad_norm": 0.3500731289386749, "learning_rate": 4.860559674718966e-05, "loss": 0.063, "step": 18025 }, { "epoch": 2.526419060967064, "grad_norm": 0.15809476375579834, "learning_rate": 4.859124611337e-05, "loss": 0.0398, "step": 18026 }, { "epoch": 2.5265592151366505, "grad_norm": 0.19352607429027557, "learning_rate": 4.8576895479550345e-05, "loss": 0.0374, "step": 18027 }, { "epoch": 2.526699369306237, "grad_norm": 0.16967914998531342, "learning_rate": 4.856254484573068e-05, "loss": 0.0256, "step": 18028 }, { "epoch": 2.5268395234758234, "grad_norm": 0.06481167674064636, "learning_rate": 4.8548194211911015e-05, "loss": 0.0089, "step": 18029 }, { "epoch": 2.52697967764541, "grad_norm": 0.18969322741031647, "learning_rate": 4.8533843578091364e-05, "loss": 0.0376, "step": 18030 }, { "epoch": 2.5271198318149963, "grad_norm": 0.2537195682525635, "learning_rate": 4.85194929442717e-05, "loss": 0.0085, "step": 18031 }, { "epoch": 2.527259985984583, "grad_norm": 0.36229178309440613, "learning_rate": 4.850514231045204e-05, "loss": 0.0561, "step": 18032 }, { "epoch": 2.5274001401541697, "grad_norm": 0.6040002703666687, "learning_rate": 4.849079167663238e-05, "loss": 0.0343, "step": 18033 }, { "epoch": 2.527540294323756, "grad_norm": 0.3086155652999878, "learning_rate": 4.847644104281272e-05, "loss": 0.0355, "step": 18034 }, { "epoch": 2.5276804484933426, "grad_norm": 0.12460111826658249, "learning_rate": 4.846209040899306e-05, "loss": 0.0133, "step": 18035 }, { "epoch": 2.527820602662929, "grad_norm": 0.15721264481544495, "learning_rate": 4.8447739775173396e-05, "loss": 0.0102, "step": 18036 }, { "epoch": 2.527960756832516, "grad_norm": 0.10478731244802475, "learning_rate": 4.8433389141353745e-05, "loss": 0.0083, "step": 18037 }, { "epoch": 2.5281009110021024, "grad_norm": 0.1951928734779358, "learning_rate": 4.841903850753408e-05, "loss": 0.0715, "step": 18038 }, { "epoch": 2.528241065171689, "grad_norm": 0.1319209337234497, "learning_rate": 4.8404687873714416e-05, "loss": 0.0159, "step": 18039 }, { "epoch": 2.5283812193412754, "grad_norm": 0.2013491839170456, "learning_rate": 4.839033723989476e-05, "loss": 0.018, "step": 18040 }, { "epoch": 2.528521373510862, "grad_norm": 0.506523072719574, "learning_rate": 4.83759866060751e-05, "loss": 0.0434, "step": 18041 }, { "epoch": 2.5286615276804483, "grad_norm": 0.1992463916540146, "learning_rate": 4.8361635972255436e-05, "loss": 0.0145, "step": 18042 }, { "epoch": 2.528801681850035, "grad_norm": 0.4441595673561096, "learning_rate": 4.834728533843578e-05, "loss": 0.0288, "step": 18043 }, { "epoch": 2.5289418360196216, "grad_norm": 0.61379075050354, "learning_rate": 4.833293470461611e-05, "loss": 0.0827, "step": 18044 }, { "epoch": 2.529081990189208, "grad_norm": 0.5615122318267822, "learning_rate": 4.831858407079646e-05, "loss": 0.0441, "step": 18045 }, { "epoch": 2.5292221443587946, "grad_norm": 0.0574890561401844, "learning_rate": 4.83042334369768e-05, "loss": 0.0045, "step": 18046 }, { "epoch": 2.5293622985283815, "grad_norm": 0.28991183638572693, "learning_rate": 4.828988280315713e-05, "loss": 0.0287, "step": 18047 }, { "epoch": 2.529502452697968, "grad_norm": 0.08361067622900009, "learning_rate": 4.8275532169337475e-05, "loss": 0.0134, "step": 18048 }, { "epoch": 2.5296426068675544, "grad_norm": 0.17233183979988098, "learning_rate": 4.826118153551782e-05, "loss": 0.0333, "step": 18049 }, { "epoch": 2.529782761037141, "grad_norm": 0.6347658634185791, "learning_rate": 4.824683090169815e-05, "loss": 0.0911, "step": 18050 }, { "epoch": 2.5299229152067273, "grad_norm": 0.3135128915309906, "learning_rate": 4.8232480267878494e-05, "loss": 0.0467, "step": 18051 }, { "epoch": 2.5300630693763138, "grad_norm": 0.0376332625746727, "learning_rate": 4.821812963405883e-05, "loss": 0.0174, "step": 18052 }, { "epoch": 2.5302032235459, "grad_norm": 0.5588517785072327, "learning_rate": 4.820377900023918e-05, "loss": 0.0273, "step": 18053 }, { "epoch": 2.530343377715487, "grad_norm": 0.12721534073352814, "learning_rate": 4.8189428366419514e-05, "loss": 0.0115, "step": 18054 }, { "epoch": 2.5304835318850736, "grad_norm": 0.2577660083770752, "learning_rate": 4.817507773259985e-05, "loss": 0.0115, "step": 18055 }, { "epoch": 2.53062368605466, "grad_norm": 0.15856869518756866, "learning_rate": 4.816072709878019e-05, "loss": 0.0131, "step": 18056 }, { "epoch": 2.530763840224247, "grad_norm": 0.08920245617628098, "learning_rate": 4.814637646496053e-05, "loss": 0.0038, "step": 18057 }, { "epoch": 2.5309039943938334, "grad_norm": 0.07169445604085922, "learning_rate": 4.813202583114087e-05, "loss": 0.0047, "step": 18058 }, { "epoch": 2.53104414856342, "grad_norm": 0.18557284772396088, "learning_rate": 4.811767519732121e-05, "loss": 0.0205, "step": 18059 }, { "epoch": 2.5311843027330063, "grad_norm": 0.09519461542367935, "learning_rate": 4.8103324563501546e-05, "loss": 0.005, "step": 18060 }, { "epoch": 2.531324456902593, "grad_norm": 0.39410871267318726, "learning_rate": 4.8088973929681895e-05, "loss": 0.0614, "step": 18061 }, { "epoch": 2.5314646110721792, "grad_norm": 0.06212591007351875, "learning_rate": 4.807462329586223e-05, "loss": 0.005, "step": 18062 }, { "epoch": 2.5316047652417657, "grad_norm": 0.25844892859458923, "learning_rate": 4.8060272662042566e-05, "loss": 0.0204, "step": 18063 }, { "epoch": 2.5317449194113526, "grad_norm": 0.09644581377506256, "learning_rate": 4.804592202822291e-05, "loss": 0.0053, "step": 18064 }, { "epoch": 2.531885073580939, "grad_norm": 0.01994510553777218, "learning_rate": 4.803157139440325e-05, "loss": 0.0012, "step": 18065 }, { "epoch": 2.5320252277505255, "grad_norm": 0.5027076601982117, "learning_rate": 4.8017220760583585e-05, "loss": 0.0173, "step": 18066 }, { "epoch": 2.532165381920112, "grad_norm": 0.016899367794394493, "learning_rate": 4.800287012676393e-05, "loss": 0.0013, "step": 18067 }, { "epoch": 2.532305536089699, "grad_norm": 0.6354995965957642, "learning_rate": 4.798851949294426e-05, "loss": 0.0346, "step": 18068 }, { "epoch": 2.5324456902592853, "grad_norm": 3.736849784851074, "learning_rate": 4.797416885912461e-05, "loss": 0.1438, "step": 18069 }, { "epoch": 2.532585844428872, "grad_norm": 1.1676243543624878, "learning_rate": 4.795981822530495e-05, "loss": 0.0203, "step": 18070 }, { "epoch": 2.5327259985984583, "grad_norm": 0.30121898651123047, "learning_rate": 4.794546759148528e-05, "loss": 0.0432, "step": 18071 }, { "epoch": 2.5328661527680447, "grad_norm": 0.19894465804100037, "learning_rate": 4.793111695766563e-05, "loss": 0.0234, "step": 18072 }, { "epoch": 2.533006306937631, "grad_norm": 0.12876781821250916, "learning_rate": 4.7916766323845966e-05, "loss": 0.0061, "step": 18073 }, { "epoch": 2.533146461107218, "grad_norm": 0.2891557812690735, "learning_rate": 4.79024156900263e-05, "loss": 0.04, "step": 18074 }, { "epoch": 2.5332866152768045, "grad_norm": 0.08656831085681915, "learning_rate": 4.7888065056206644e-05, "loss": 0.0345, "step": 18075 }, { "epoch": 2.533426769446391, "grad_norm": 0.28342166543006897, "learning_rate": 4.7873714422386986e-05, "loss": 0.0186, "step": 18076 }, { "epoch": 2.5335669236159775, "grad_norm": 0.17604508996009827, "learning_rate": 4.785936378856733e-05, "loss": 0.0559, "step": 18077 }, { "epoch": 2.5337070777855644, "grad_norm": 0.11375840753316879, "learning_rate": 4.7845013154747664e-05, "loss": 0.0132, "step": 18078 }, { "epoch": 2.533847231955151, "grad_norm": 0.07525541633367538, "learning_rate": 4.7830662520928e-05, "loss": 0.0057, "step": 18079 }, { "epoch": 2.5339873861247373, "grad_norm": 0.32763245701789856, "learning_rate": 4.781631188710835e-05, "loss": 0.0583, "step": 18080 }, { "epoch": 2.5341275402943237, "grad_norm": 0.19011011719703674, "learning_rate": 4.780196125328868e-05, "loss": 0.0181, "step": 18081 }, { "epoch": 2.53426769446391, "grad_norm": 0.31514057517051697, "learning_rate": 4.778761061946902e-05, "loss": 0.0513, "step": 18082 }, { "epoch": 2.5344078486334967, "grad_norm": 0.25016865134239197, "learning_rate": 4.777325998564936e-05, "loss": 0.0311, "step": 18083 }, { "epoch": 2.534548002803083, "grad_norm": 0.22858941555023193, "learning_rate": 4.77589093518297e-05, "loss": 0.0211, "step": 18084 }, { "epoch": 2.53468815697267, "grad_norm": 0.3425392210483551, "learning_rate": 4.7744558718010045e-05, "loss": 0.0392, "step": 18085 }, { "epoch": 2.5348283111422565, "grad_norm": 0.3620901107788086, "learning_rate": 4.773020808419038e-05, "loss": 0.0572, "step": 18086 }, { "epoch": 2.534968465311843, "grad_norm": 0.13005124032497406, "learning_rate": 4.7715857450370715e-05, "loss": 0.0198, "step": 18087 }, { "epoch": 2.53510861948143, "grad_norm": 0.06559021025896072, "learning_rate": 4.7701506816551064e-05, "loss": 0.0063, "step": 18088 }, { "epoch": 2.5352487736510163, "grad_norm": 0.42133620381355286, "learning_rate": 4.76871561827314e-05, "loss": 0.0396, "step": 18089 }, { "epoch": 2.5353889278206028, "grad_norm": 0.3147045969963074, "learning_rate": 4.7672805548911735e-05, "loss": 0.0328, "step": 18090 }, { "epoch": 2.5355290819901892, "grad_norm": 0.052514929324388504, "learning_rate": 4.765845491509208e-05, "loss": 0.0038, "step": 18091 }, { "epoch": 2.5356692361597757, "grad_norm": 0.389994740486145, "learning_rate": 4.764410428127242e-05, "loss": 0.0534, "step": 18092 }, { "epoch": 2.535809390329362, "grad_norm": 0.3380163311958313, "learning_rate": 4.762975364745276e-05, "loss": 0.0246, "step": 18093 }, { "epoch": 2.5359495444989486, "grad_norm": 0.19539874792099, "learning_rate": 4.76154030136331e-05, "loss": 0.0367, "step": 18094 }, { "epoch": 2.5360896986685355, "grad_norm": 0.2270340621471405, "learning_rate": 4.760105237981343e-05, "loss": 0.0361, "step": 18095 }, { "epoch": 2.536229852838122, "grad_norm": 0.11640356481075287, "learning_rate": 4.758670174599378e-05, "loss": 0.0119, "step": 18096 }, { "epoch": 2.5363700070077084, "grad_norm": 0.3057485520839691, "learning_rate": 4.7572351112174116e-05, "loss": 0.04, "step": 18097 }, { "epoch": 2.536510161177295, "grad_norm": 0.09914348274469376, "learning_rate": 4.755800047835445e-05, "loss": 0.0052, "step": 18098 }, { "epoch": 2.536650315346882, "grad_norm": 0.06515375524759293, "learning_rate": 4.7543649844534794e-05, "loss": 0.0027, "step": 18099 }, { "epoch": 2.5367904695164682, "grad_norm": 0.39961355924606323, "learning_rate": 4.7529299210715136e-05, "loss": 0.0522, "step": 18100 }, { "epoch": 2.5369306236860547, "grad_norm": 0.15189528465270996, "learning_rate": 4.751494857689548e-05, "loss": 0.0137, "step": 18101 }, { "epoch": 2.537070777855641, "grad_norm": 0.7765271663665771, "learning_rate": 4.750059794307581e-05, "loss": 0.0204, "step": 18102 }, { "epoch": 2.5372109320252276, "grad_norm": 0.34573015570640564, "learning_rate": 4.748624730925615e-05, "loss": 0.0395, "step": 18103 }, { "epoch": 2.537351086194814, "grad_norm": 0.07802074402570724, "learning_rate": 4.74718966754365e-05, "loss": 0.0077, "step": 18104 }, { "epoch": 2.537491240364401, "grad_norm": 0.23393578827381134, "learning_rate": 4.745754604161683e-05, "loss": 0.0405, "step": 18105 }, { "epoch": 2.5376313945339875, "grad_norm": 0.11811116337776184, "learning_rate": 4.7443195407797175e-05, "loss": 0.0095, "step": 18106 }, { "epoch": 2.537771548703574, "grad_norm": 0.09666913002729416, "learning_rate": 4.742884477397751e-05, "loss": 0.0024, "step": 18107 }, { "epoch": 2.5379117028731604, "grad_norm": 0.7012326717376709, "learning_rate": 4.741449414015785e-05, "loss": 0.0493, "step": 18108 }, { "epoch": 2.5380518570427473, "grad_norm": 0.03890467435121536, "learning_rate": 4.7400143506338194e-05, "loss": 0.002, "step": 18109 }, { "epoch": 2.5381920112123337, "grad_norm": 0.16483518481254578, "learning_rate": 4.738579287251853e-05, "loss": 0.013, "step": 18110 }, { "epoch": 2.53833216538192, "grad_norm": 0.5212472677230835, "learning_rate": 4.737144223869888e-05, "loss": 0.0165, "step": 18111 }, { "epoch": 2.5384723195515067, "grad_norm": 0.23199720680713654, "learning_rate": 4.7357091604879214e-05, "loss": 0.0593, "step": 18112 }, { "epoch": 2.538612473721093, "grad_norm": 0.5993015766143799, "learning_rate": 4.734274097105955e-05, "loss": 0.033, "step": 18113 }, { "epoch": 2.5387526278906796, "grad_norm": 0.8392219543457031, "learning_rate": 4.732839033723989e-05, "loss": 0.0327, "step": 18114 }, { "epoch": 2.538892782060266, "grad_norm": 0.24607329070568085, "learning_rate": 4.7314039703420234e-05, "loss": 0.0201, "step": 18115 }, { "epoch": 2.539032936229853, "grad_norm": 0.5014885067939758, "learning_rate": 4.729968906960057e-05, "loss": 0.0703, "step": 18116 }, { "epoch": 2.5391730903994394, "grad_norm": 0.5055301189422607, "learning_rate": 4.728533843578091e-05, "loss": 0.0266, "step": 18117 }, { "epoch": 2.539313244569026, "grad_norm": 0.6922755837440491, "learning_rate": 4.7270987801961246e-05, "loss": 0.0414, "step": 18118 }, { "epoch": 2.5394533987386123, "grad_norm": 0.5933761596679688, "learning_rate": 4.7256637168141595e-05, "loss": 0.0616, "step": 18119 }, { "epoch": 2.539593552908199, "grad_norm": 0.17315752804279327, "learning_rate": 4.724228653432193e-05, "loss": 0.0211, "step": 18120 }, { "epoch": 2.5397337070777857, "grad_norm": 0.32750681042671204, "learning_rate": 4.7227935900502266e-05, "loss": 0.042, "step": 18121 }, { "epoch": 2.539873861247372, "grad_norm": 0.1828962117433548, "learning_rate": 4.721358526668261e-05, "loss": 0.0345, "step": 18122 }, { "epoch": 2.5400140154169586, "grad_norm": 0.5407280325889587, "learning_rate": 4.719923463286295e-05, "loss": 0.0367, "step": 18123 }, { "epoch": 2.540154169586545, "grad_norm": 0.1347690224647522, "learning_rate": 4.7184883999043285e-05, "loss": 0.0329, "step": 18124 }, { "epoch": 2.5402943237561315, "grad_norm": 0.11175432801246643, "learning_rate": 4.717053336522363e-05, "loss": 0.0205, "step": 18125 }, { "epoch": 2.5404344779257184, "grad_norm": 0.1709836721420288, "learning_rate": 4.715618273140396e-05, "loss": 0.0295, "step": 18126 }, { "epoch": 2.540574632095305, "grad_norm": 0.24169591069221497, "learning_rate": 4.714183209758431e-05, "loss": 0.0325, "step": 18127 }, { "epoch": 2.5407147862648913, "grad_norm": 0.3713139593601227, "learning_rate": 4.712748146376465e-05, "loss": 0.0754, "step": 18128 }, { "epoch": 2.540854940434478, "grad_norm": 0.3695809841156006, "learning_rate": 4.711313082994498e-05, "loss": 0.0195, "step": 18129 }, { "epoch": 2.5409950946040647, "grad_norm": 0.3140918016433716, "learning_rate": 4.7098780196125325e-05, "loss": 0.0432, "step": 18130 }, { "epoch": 2.541135248773651, "grad_norm": 0.17927393317222595, "learning_rate": 4.708442956230567e-05, "loss": 0.0152, "step": 18131 }, { "epoch": 2.5412754029432376, "grad_norm": 0.19948315620422363, "learning_rate": 4.7070078928486e-05, "loss": 0.0067, "step": 18132 }, { "epoch": 2.541415557112824, "grad_norm": 0.5426936149597168, "learning_rate": 4.7055728294666344e-05, "loss": 0.0621, "step": 18133 }, { "epoch": 2.5415557112824105, "grad_norm": 0.1400577425956726, "learning_rate": 4.704137766084668e-05, "loss": 0.0249, "step": 18134 }, { "epoch": 2.541695865451997, "grad_norm": 0.1902443766593933, "learning_rate": 4.702702702702703e-05, "loss": 0.0123, "step": 18135 }, { "epoch": 2.541836019621584, "grad_norm": 1.3975757360458374, "learning_rate": 4.7012676393207364e-05, "loss": 0.0284, "step": 18136 }, { "epoch": 2.5419761737911704, "grad_norm": 0.17924724519252777, "learning_rate": 4.69983257593877e-05, "loss": 0.0285, "step": 18137 }, { "epoch": 2.542116327960757, "grad_norm": 0.21573258936405182, "learning_rate": 4.698397512556804e-05, "loss": 0.0047, "step": 18138 }, { "epoch": 2.5422564821303433, "grad_norm": 0.27791786193847656, "learning_rate": 4.696962449174838e-05, "loss": 0.0264, "step": 18139 }, { "epoch": 2.54239663629993, "grad_norm": 0.35525017976760864, "learning_rate": 4.695527385792872e-05, "loss": 0.0248, "step": 18140 }, { "epoch": 2.5425367904695166, "grad_norm": 0.17878513038158417, "learning_rate": 4.694092322410906e-05, "loss": 0.0207, "step": 18141 }, { "epoch": 2.542676944639103, "grad_norm": 0.527005672454834, "learning_rate": 4.6926572590289396e-05, "loss": 0.0286, "step": 18142 }, { "epoch": 2.5428170988086896, "grad_norm": 0.22158260643482208, "learning_rate": 4.6912221956469745e-05, "loss": 0.0187, "step": 18143 }, { "epoch": 2.542957252978276, "grad_norm": 0.1652626395225525, "learning_rate": 4.689787132265008e-05, "loss": 0.0115, "step": 18144 }, { "epoch": 2.5430974071478625, "grad_norm": 0.1802297681570053, "learning_rate": 4.6883520688830416e-05, "loss": 0.0165, "step": 18145 }, { "epoch": 2.543237561317449, "grad_norm": 0.09339119493961334, "learning_rate": 4.6869170055010765e-05, "loss": 0.015, "step": 18146 }, { "epoch": 2.543377715487036, "grad_norm": 0.12759867310523987, "learning_rate": 4.68548194211911e-05, "loss": 0.0146, "step": 18147 }, { "epoch": 2.5435178696566223, "grad_norm": 0.1906467080116272, "learning_rate": 4.6840468787371435e-05, "loss": 0.0101, "step": 18148 }, { "epoch": 2.5436580238262088, "grad_norm": 0.5868571400642395, "learning_rate": 4.682611815355178e-05, "loss": 0.034, "step": 18149 }, { "epoch": 2.543798177995795, "grad_norm": 0.0958685353398323, "learning_rate": 4.681176751973212e-05, "loss": 0.0129, "step": 18150 }, { "epoch": 2.543938332165382, "grad_norm": 0.31481021642684937, "learning_rate": 4.679741688591246e-05, "loss": 0.0549, "step": 18151 }, { "epoch": 2.5440784863349686, "grad_norm": 0.1680697351694107, "learning_rate": 4.67830662520928e-05, "loss": 0.0232, "step": 18152 }, { "epoch": 2.544218640504555, "grad_norm": 0.07928688079118729, "learning_rate": 4.676871561827313e-05, "loss": 0.0053, "step": 18153 }, { "epoch": 2.5443587946741415, "grad_norm": 0.11807805299758911, "learning_rate": 4.675436498445348e-05, "loss": 0.0226, "step": 18154 }, { "epoch": 2.544498948843728, "grad_norm": 0.31371504068374634, "learning_rate": 4.6740014350633816e-05, "loss": 0.0523, "step": 18155 }, { "epoch": 2.5446391030133144, "grad_norm": 0.1173236072063446, "learning_rate": 4.672566371681415e-05, "loss": 0.0055, "step": 18156 }, { "epoch": 2.5447792571829013, "grad_norm": 0.2525007724761963, "learning_rate": 4.6711313082994494e-05, "loss": 0.0292, "step": 18157 }, { "epoch": 2.544919411352488, "grad_norm": 0.31894198060035706, "learning_rate": 4.6696962449174836e-05, "loss": 0.0201, "step": 18158 }, { "epoch": 2.5450595655220742, "grad_norm": 0.30312296748161316, "learning_rate": 4.668261181535518e-05, "loss": 0.0072, "step": 18159 }, { "epoch": 2.5451997196916607, "grad_norm": 0.30419832468032837, "learning_rate": 4.6668261181535513e-05, "loss": 0.0206, "step": 18160 }, { "epoch": 2.5453398738612476, "grad_norm": 0.17003212869167328, "learning_rate": 4.665391054771585e-05, "loss": 0.0296, "step": 18161 }, { "epoch": 2.545480028030834, "grad_norm": 0.24882318079471588, "learning_rate": 4.66395599138962e-05, "loss": 0.0361, "step": 18162 }, { "epoch": 2.5456201822004205, "grad_norm": 0.03577204421162605, "learning_rate": 4.662520928007653e-05, "loss": 0.0022, "step": 18163 }, { "epoch": 2.545760336370007, "grad_norm": 0.17998141050338745, "learning_rate": 4.661085864625687e-05, "loss": 0.0872, "step": 18164 }, { "epoch": 2.5459004905395934, "grad_norm": 0.028033820912241936, "learning_rate": 4.659650801243721e-05, "loss": 0.0013, "step": 18165 }, { "epoch": 2.54604064470918, "grad_norm": 0.30691298842430115, "learning_rate": 4.658215737861755e-05, "loss": 0.0341, "step": 18166 }, { "epoch": 2.546180798878767, "grad_norm": 0.056820787489414215, "learning_rate": 4.6567806744797895e-05, "loss": 0.0032, "step": 18167 }, { "epoch": 2.5463209530483533, "grad_norm": 0.22577285766601562, "learning_rate": 4.655345611097823e-05, "loss": 0.0046, "step": 18168 }, { "epoch": 2.5464611072179397, "grad_norm": 0.5582699179649353, "learning_rate": 4.6539105477158565e-05, "loss": 0.0699, "step": 18169 }, { "epoch": 2.546601261387526, "grad_norm": 1.1404780149459839, "learning_rate": 4.6524754843338914e-05, "loss": 0.0952, "step": 18170 }, { "epoch": 2.546741415557113, "grad_norm": 0.13056795299053192, "learning_rate": 4.651040420951925e-05, "loss": 0.0105, "step": 18171 }, { "epoch": 2.5468815697266995, "grad_norm": 0.7744981646537781, "learning_rate": 4.6496053575699585e-05, "loss": 0.0711, "step": 18172 }, { "epoch": 2.547021723896286, "grad_norm": 0.19840537011623383, "learning_rate": 4.648170294187993e-05, "loss": 0.0429, "step": 18173 }, { "epoch": 2.5471618780658725, "grad_norm": 0.4157489836215973, "learning_rate": 4.646735230806027e-05, "loss": 0.0625, "step": 18174 }, { "epoch": 2.547302032235459, "grad_norm": 0.27275922894477844, "learning_rate": 4.645300167424061e-05, "loss": 0.024, "step": 18175 }, { "epoch": 2.5474421864050454, "grad_norm": 0.21879683434963226, "learning_rate": 4.6438651040420947e-05, "loss": 0.0624, "step": 18176 }, { "epoch": 2.547582340574632, "grad_norm": 0.1999858021736145, "learning_rate": 4.642430040660128e-05, "loss": 0.0219, "step": 18177 }, { "epoch": 2.5477224947442187, "grad_norm": 0.12946660816669464, "learning_rate": 4.640994977278163e-05, "loss": 0.0161, "step": 18178 }, { "epoch": 2.547862648913805, "grad_norm": 0.1165948137640953, "learning_rate": 4.6395599138961966e-05, "loss": 0.0158, "step": 18179 }, { "epoch": 2.5480028030833917, "grad_norm": 0.20548461377620697, "learning_rate": 4.63812485051423e-05, "loss": 0.0516, "step": 18180 }, { "epoch": 2.548142957252978, "grad_norm": 0.2472551017999649, "learning_rate": 4.6366897871322644e-05, "loss": 0.043, "step": 18181 }, { "epoch": 2.548283111422565, "grad_norm": 0.11097093671560287, "learning_rate": 4.6352547237502986e-05, "loss": 0.006, "step": 18182 }, { "epoch": 2.5484232655921515, "grad_norm": 0.133823961019516, "learning_rate": 4.633819660368333e-05, "loss": 0.0147, "step": 18183 }, { "epoch": 2.548563419761738, "grad_norm": 0.3769824504852295, "learning_rate": 4.632384596986366e-05, "loss": 0.0262, "step": 18184 }, { "epoch": 2.5487035739313244, "grad_norm": 0.43822044134140015, "learning_rate": 4.6309495336044005e-05, "loss": 0.0207, "step": 18185 }, { "epoch": 2.548843728100911, "grad_norm": 0.48032504320144653, "learning_rate": 4.629514470222435e-05, "loss": 0.0294, "step": 18186 }, { "epoch": 2.5489838822704973, "grad_norm": 0.18290066719055176, "learning_rate": 4.628079406840468e-05, "loss": 0.0134, "step": 18187 }, { "epoch": 2.5491240364400842, "grad_norm": 0.22822889685630798, "learning_rate": 4.626644343458502e-05, "loss": 0.0146, "step": 18188 }, { "epoch": 2.5492641906096707, "grad_norm": 0.49276286363601685, "learning_rate": 4.625209280076537e-05, "loss": 0.0585, "step": 18189 }, { "epoch": 2.549404344779257, "grad_norm": 0.29231491684913635, "learning_rate": 4.62377421669457e-05, "loss": 0.0173, "step": 18190 }, { "epoch": 2.5495444989488436, "grad_norm": 0.08550060540437698, "learning_rate": 4.6223391533126044e-05, "loss": 0.011, "step": 18191 }, { "epoch": 2.5496846531184305, "grad_norm": 0.10867733508348465, "learning_rate": 4.620904089930638e-05, "loss": 0.0049, "step": 18192 }, { "epoch": 2.549824807288017, "grad_norm": 0.17415931820869446, "learning_rate": 4.619469026548672e-05, "loss": 0.0437, "step": 18193 }, { "epoch": 2.5499649614576034, "grad_norm": 0.3211270868778229, "learning_rate": 4.6180339631667064e-05, "loss": 0.0439, "step": 18194 }, { "epoch": 2.55010511562719, "grad_norm": 0.09146318584680557, "learning_rate": 4.61659889978474e-05, "loss": 0.0078, "step": 18195 }, { "epoch": 2.5502452697967763, "grad_norm": 0.0607328936457634, "learning_rate": 4.6151638364027735e-05, "loss": 0.0043, "step": 18196 }, { "epoch": 2.550385423966363, "grad_norm": 0.5442698001861572, "learning_rate": 4.6137287730208084e-05, "loss": 0.0186, "step": 18197 }, { "epoch": 2.5505255781359493, "grad_norm": 0.25543537735939026, "learning_rate": 4.612293709638842e-05, "loss": 0.0295, "step": 18198 }, { "epoch": 2.550665732305536, "grad_norm": 0.4715573787689209, "learning_rate": 4.610858646256876e-05, "loss": 0.038, "step": 18199 }, { "epoch": 2.5508058864751226, "grad_norm": 0.23139479756355286, "learning_rate": 4.6094235828749096e-05, "loss": 0.0307, "step": 18200 }, { "epoch": 2.550946040644709, "grad_norm": 0.2581072747707367, "learning_rate": 4.607988519492944e-05, "loss": 0.0473, "step": 18201 }, { "epoch": 2.551086194814296, "grad_norm": 0.12375976890325546, "learning_rate": 4.606553456110978e-05, "loss": 0.0139, "step": 18202 }, { "epoch": 2.5512263489838825, "grad_norm": 0.4782208800315857, "learning_rate": 4.6051183927290116e-05, "loss": 0.038, "step": 18203 }, { "epoch": 2.551366503153469, "grad_norm": 0.24449339509010315, "learning_rate": 4.603683329347045e-05, "loss": 0.0269, "step": 18204 }, { "epoch": 2.5515066573230554, "grad_norm": 0.10471366345882416, "learning_rate": 4.60224826596508e-05, "loss": 0.0058, "step": 18205 }, { "epoch": 2.551646811492642, "grad_norm": 0.15542851388454437, "learning_rate": 4.6008132025831135e-05, "loss": 0.0176, "step": 18206 }, { "epoch": 2.5517869656622283, "grad_norm": 0.34025418758392334, "learning_rate": 4.599378139201148e-05, "loss": 0.024, "step": 18207 }, { "epoch": 2.5519271198318147, "grad_norm": 0.3624800145626068, "learning_rate": 4.597943075819181e-05, "loss": 0.0263, "step": 18208 }, { "epoch": 2.5520672740014017, "grad_norm": 0.49432995915412903, "learning_rate": 4.5965080124372155e-05, "loss": 0.0384, "step": 18209 }, { "epoch": 2.552207428170988, "grad_norm": 0.45202943682670593, "learning_rate": 4.59507294905525e-05, "loss": 0.092, "step": 18210 }, { "epoch": 2.5523475823405746, "grad_norm": 0.7757467031478882, "learning_rate": 4.593637885673283e-05, "loss": 0.0767, "step": 18211 }, { "epoch": 2.552487736510161, "grad_norm": 0.32253241539001465, "learning_rate": 4.592202822291317e-05, "loss": 0.0206, "step": 18212 }, { "epoch": 2.552627890679748, "grad_norm": 0.09224551916122437, "learning_rate": 4.590767758909352e-05, "loss": 0.0052, "step": 18213 }, { "epoch": 2.5527680448493344, "grad_norm": 0.5775570869445801, "learning_rate": 4.589332695527385e-05, "loss": 0.0883, "step": 18214 }, { "epoch": 2.552908199018921, "grad_norm": 0.15705999732017517, "learning_rate": 4.5878976321454194e-05, "loss": 0.0071, "step": 18215 }, { "epoch": 2.5530483531885073, "grad_norm": 0.5348058342933655, "learning_rate": 4.586462568763453e-05, "loss": 0.0362, "step": 18216 }, { "epoch": 2.5531885073580938, "grad_norm": 0.14788256585597992, "learning_rate": 4.585027505381488e-05, "loss": 0.0092, "step": 18217 }, { "epoch": 2.5533286615276802, "grad_norm": 0.2483576536178589, "learning_rate": 4.5835924419995214e-05, "loss": 0.0437, "step": 18218 }, { "epoch": 2.553468815697267, "grad_norm": 0.49410519003868103, "learning_rate": 4.582157378617555e-05, "loss": 0.0121, "step": 18219 }, { "epoch": 2.5536089698668536, "grad_norm": 2.821918249130249, "learning_rate": 4.58072231523559e-05, "loss": 0.0732, "step": 18220 }, { "epoch": 2.55374912403644, "grad_norm": 0.22442418336868286, "learning_rate": 4.579287251853623e-05, "loss": 0.0508, "step": 18221 }, { "epoch": 2.5538892782060265, "grad_norm": 0.09452279657125473, "learning_rate": 4.577852188471657e-05, "loss": 0.0037, "step": 18222 }, { "epoch": 2.5540294323756134, "grad_norm": 0.0503377839922905, "learning_rate": 4.576417125089691e-05, "loss": 0.0053, "step": 18223 }, { "epoch": 2.5541695865452, "grad_norm": 0.32724228501319885, "learning_rate": 4.574982061707725e-05, "loss": 0.0261, "step": 18224 }, { "epoch": 2.5543097407147863, "grad_norm": 0.1376713514328003, "learning_rate": 4.5735469983257595e-05, "loss": 0.0257, "step": 18225 }, { "epoch": 2.554449894884373, "grad_norm": 0.235674187541008, "learning_rate": 4.572111934943793e-05, "loss": 0.017, "step": 18226 }, { "epoch": 2.5545900490539593, "grad_norm": 0.12057920545339584, "learning_rate": 4.5706768715618266e-05, "loss": 0.0232, "step": 18227 }, { "epoch": 2.5547302032235457, "grad_norm": 1.7068744897842407, "learning_rate": 4.5692418081798614e-05, "loss": 0.0718, "step": 18228 }, { "epoch": 2.554870357393132, "grad_norm": 0.3147523105144501, "learning_rate": 4.567806744797895e-05, "loss": 0.0235, "step": 18229 }, { "epoch": 2.555010511562719, "grad_norm": 0.21371452510356903, "learning_rate": 4.5663716814159285e-05, "loss": 0.0316, "step": 18230 }, { "epoch": 2.5551506657323055, "grad_norm": 0.39048099517822266, "learning_rate": 4.564936618033963e-05, "loss": 0.0276, "step": 18231 }, { "epoch": 2.555290819901892, "grad_norm": 0.18305683135986328, "learning_rate": 4.563501554651997e-05, "loss": 0.0135, "step": 18232 }, { "epoch": 2.555430974071479, "grad_norm": 0.20129340887069702, "learning_rate": 4.562066491270031e-05, "loss": 0.0495, "step": 18233 }, { "epoch": 2.5555711282410654, "grad_norm": 0.21871869266033173, "learning_rate": 4.560631427888065e-05, "loss": 0.0162, "step": 18234 }, { "epoch": 2.555711282410652, "grad_norm": 0.23431149125099182, "learning_rate": 4.559196364506098e-05, "loss": 0.0483, "step": 18235 }, { "epoch": 2.5558514365802383, "grad_norm": 0.14710527658462524, "learning_rate": 4.557761301124133e-05, "loss": 0.0079, "step": 18236 }, { "epoch": 2.5559915907498247, "grad_norm": 0.19244778156280518, "learning_rate": 4.5563262377421666e-05, "loss": 0.0283, "step": 18237 }, { "epoch": 2.556131744919411, "grad_norm": 0.16671106219291687, "learning_rate": 4.5548911743602e-05, "loss": 0.0169, "step": 18238 }, { "epoch": 2.5562718990889977, "grad_norm": 0.09498811513185501, "learning_rate": 4.5534561109782344e-05, "loss": 0.0095, "step": 18239 }, { "epoch": 2.5564120532585846, "grad_norm": 0.37462374567985535, "learning_rate": 4.5520210475962686e-05, "loss": 0.0089, "step": 18240 }, { "epoch": 2.556552207428171, "grad_norm": 0.19506587088108063, "learning_rate": 4.550585984214303e-05, "loss": 0.0177, "step": 18241 }, { "epoch": 2.5566923615977575, "grad_norm": 0.2198793590068817, "learning_rate": 4.5491509208323363e-05, "loss": 0.0116, "step": 18242 }, { "epoch": 2.556832515767344, "grad_norm": 0.02570890448987484, "learning_rate": 4.54771585745037e-05, "loss": 0.0023, "step": 18243 }, { "epoch": 2.556972669936931, "grad_norm": 0.24092863500118256, "learning_rate": 4.546280794068405e-05, "loss": 0.0553, "step": 18244 }, { "epoch": 2.5571128241065173, "grad_norm": 0.1576327234506607, "learning_rate": 4.544845730686438e-05, "loss": 0.0168, "step": 18245 }, { "epoch": 2.5572529782761038, "grad_norm": 0.24204587936401367, "learning_rate": 4.543410667304472e-05, "loss": 0.0232, "step": 18246 }, { "epoch": 2.55739313244569, "grad_norm": 0.10460522025823593, "learning_rate": 4.541975603922506e-05, "loss": 0.0073, "step": 18247 }, { "epoch": 2.5575332866152767, "grad_norm": 0.15091226994991302, "learning_rate": 4.54054054054054e-05, "loss": 0.0056, "step": 18248 }, { "epoch": 2.557673440784863, "grad_norm": 0.13097040355205536, "learning_rate": 4.5391054771585745e-05, "loss": 0.0123, "step": 18249 }, { "epoch": 2.55781359495445, "grad_norm": 0.09134726971387863, "learning_rate": 4.537670413776608e-05, "loss": 0.0071, "step": 18250 }, { "epoch": 2.5579537491240365, "grad_norm": 0.05973028019070625, "learning_rate": 4.5362353503946415e-05, "loss": 0.0047, "step": 18251 }, { "epoch": 2.558093903293623, "grad_norm": 0.4121887683868408, "learning_rate": 4.5348002870126764e-05, "loss": 0.0248, "step": 18252 }, { "epoch": 2.5582340574632094, "grad_norm": 0.3629038631916046, "learning_rate": 4.53336522363071e-05, "loss": 0.0472, "step": 18253 }, { "epoch": 2.5583742116327963, "grad_norm": 0.8563442826271057, "learning_rate": 4.5319301602487435e-05, "loss": 0.0299, "step": 18254 }, { "epoch": 2.558514365802383, "grad_norm": 0.1299944818019867, "learning_rate": 4.530495096866778e-05, "loss": 0.0081, "step": 18255 }, { "epoch": 2.5586545199719692, "grad_norm": 0.3666844069957733, "learning_rate": 4.529060033484812e-05, "loss": 0.064, "step": 18256 }, { "epoch": 2.5587946741415557, "grad_norm": 0.18424613773822784, "learning_rate": 4.527624970102846e-05, "loss": 0.0087, "step": 18257 }, { "epoch": 2.558934828311142, "grad_norm": 0.06487855315208435, "learning_rate": 4.5261899067208797e-05, "loss": 0.0214, "step": 18258 }, { "epoch": 2.5590749824807286, "grad_norm": 0.193250373005867, "learning_rate": 4.524754843338914e-05, "loss": 0.0212, "step": 18259 }, { "epoch": 2.559215136650315, "grad_norm": 0.05787370726466179, "learning_rate": 4.523319779956948e-05, "loss": 0.0087, "step": 18260 }, { "epoch": 2.559355290819902, "grad_norm": 0.0944092720746994, "learning_rate": 4.5218847165749816e-05, "loss": 0.0035, "step": 18261 }, { "epoch": 2.5594954449894884, "grad_norm": 0.300245463848114, "learning_rate": 4.520449653193015e-05, "loss": 0.014, "step": 18262 }, { "epoch": 2.559635599159075, "grad_norm": 0.19826042652130127, "learning_rate": 4.51901458981105e-05, "loss": 0.008, "step": 18263 }, { "epoch": 2.5597757533286614, "grad_norm": 0.8386831879615784, "learning_rate": 4.5175795264290836e-05, "loss": 0.0447, "step": 18264 }, { "epoch": 2.5599159074982483, "grad_norm": 0.5162139534950256, "learning_rate": 4.516144463047118e-05, "loss": 0.0505, "step": 18265 }, { "epoch": 2.5600560616678347, "grad_norm": 0.18964841961860657, "learning_rate": 4.514709399665151e-05, "loss": 0.0378, "step": 18266 }, { "epoch": 2.560196215837421, "grad_norm": 0.5080700516700745, "learning_rate": 4.5132743362831855e-05, "loss": 0.0626, "step": 18267 }, { "epoch": 2.5603363700070076, "grad_norm": 0.2166011482477188, "learning_rate": 4.51183927290122e-05, "loss": 0.0186, "step": 18268 }, { "epoch": 2.560476524176594, "grad_norm": 0.7766144275665283, "learning_rate": 4.510404209519253e-05, "loss": 0.0774, "step": 18269 }, { "epoch": 2.5606166783461806, "grad_norm": 0.3797752857208252, "learning_rate": 4.508969146137287e-05, "loss": 0.0069, "step": 18270 }, { "epoch": 2.5607568325157675, "grad_norm": 0.7576006650924683, "learning_rate": 4.507534082755322e-05, "loss": 0.0343, "step": 18271 }, { "epoch": 2.560896986685354, "grad_norm": 0.544466495513916, "learning_rate": 4.506099019373355e-05, "loss": 0.0312, "step": 18272 }, { "epoch": 2.5610371408549404, "grad_norm": 0.38504084944725037, "learning_rate": 4.5046639559913894e-05, "loss": 0.043, "step": 18273 }, { "epoch": 2.561177295024527, "grad_norm": 0.19185112416744232, "learning_rate": 4.503228892609423e-05, "loss": 0.0166, "step": 18274 }, { "epoch": 2.5613174491941137, "grad_norm": 0.24532164633274078, "learning_rate": 4.501793829227457e-05, "loss": 0.0378, "step": 18275 }, { "epoch": 2.5614576033637, "grad_norm": 0.8023670315742493, "learning_rate": 4.5003587658454914e-05, "loss": 0.0269, "step": 18276 }, { "epoch": 2.5615977575332867, "grad_norm": 0.2732026278972626, "learning_rate": 4.498923702463525e-05, "loss": 0.0174, "step": 18277 }, { "epoch": 2.561737911702873, "grad_norm": 0.12387651950120926, "learning_rate": 4.4974886390815585e-05, "loss": 0.0114, "step": 18278 }, { "epoch": 2.5618780658724596, "grad_norm": 0.13737429678440094, "learning_rate": 4.4960535756995933e-05, "loss": 0.0188, "step": 18279 }, { "epoch": 2.562018220042046, "grad_norm": 0.31917428970336914, "learning_rate": 4.494618512317627e-05, "loss": 0.0626, "step": 18280 }, { "epoch": 2.562158374211633, "grad_norm": 0.08627309650182724, "learning_rate": 4.493183448935661e-05, "loss": 0.0042, "step": 18281 }, { "epoch": 2.5622985283812194, "grad_norm": 0.15085409581661224, "learning_rate": 4.4917483855536946e-05, "loss": 0.0275, "step": 18282 }, { "epoch": 2.562438682550806, "grad_norm": 0.15053768455982208, "learning_rate": 4.490313322171729e-05, "loss": 0.0086, "step": 18283 }, { "epoch": 2.5625788367203923, "grad_norm": 0.23506274819374084, "learning_rate": 4.488878258789763e-05, "loss": 0.0386, "step": 18284 }, { "epoch": 2.5627189908899792, "grad_norm": 0.23363982141017914, "learning_rate": 4.4874431954077966e-05, "loss": 0.0331, "step": 18285 }, { "epoch": 2.5628591450595657, "grad_norm": 0.2985607385635376, "learning_rate": 4.48600813202583e-05, "loss": 0.0441, "step": 18286 }, { "epoch": 2.562999299229152, "grad_norm": 0.15091802179813385, "learning_rate": 4.484573068643865e-05, "loss": 0.0104, "step": 18287 }, { "epoch": 2.5631394533987386, "grad_norm": 0.026273801922798157, "learning_rate": 4.4831380052618985e-05, "loss": 0.001, "step": 18288 }, { "epoch": 2.563279607568325, "grad_norm": 0.10379917919635773, "learning_rate": 4.481702941879933e-05, "loss": 0.0189, "step": 18289 }, { "epoch": 2.5634197617379115, "grad_norm": 0.33128872513771057, "learning_rate": 4.480267878497966e-05, "loss": 0.0465, "step": 18290 }, { "epoch": 2.563559915907498, "grad_norm": 0.3149373531341553, "learning_rate": 4.4788328151160005e-05, "loss": 0.0224, "step": 18291 }, { "epoch": 2.563700070077085, "grad_norm": 0.1765746921300888, "learning_rate": 4.477397751734035e-05, "loss": 0.0147, "step": 18292 }, { "epoch": 2.5638402242466714, "grad_norm": 0.06919504702091217, "learning_rate": 4.475962688352068e-05, "loss": 0.0072, "step": 18293 }, { "epoch": 2.563980378416258, "grad_norm": 0.51337069272995, "learning_rate": 4.474527624970103e-05, "loss": 0.0545, "step": 18294 }, { "epoch": 2.5641205325858443, "grad_norm": 0.16400989890098572, "learning_rate": 4.473092561588137e-05, "loss": 0.0521, "step": 18295 }, { "epoch": 2.564260686755431, "grad_norm": 0.19401909410953522, "learning_rate": 4.47165749820617e-05, "loss": 0.0347, "step": 18296 }, { "epoch": 2.5644008409250176, "grad_norm": 0.16718193888664246, "learning_rate": 4.4702224348242044e-05, "loss": 0.0113, "step": 18297 }, { "epoch": 2.564540995094604, "grad_norm": 0.3253571689128876, "learning_rate": 4.4687873714422386e-05, "loss": 0.0464, "step": 18298 }, { "epoch": 2.5646811492641906, "grad_norm": 0.6502189040184021, "learning_rate": 4.467352308060272e-05, "loss": 0.0357, "step": 18299 }, { "epoch": 2.564821303433777, "grad_norm": 0.3103705644607544, "learning_rate": 4.4659172446783064e-05, "loss": 0.0171, "step": 18300 }, { "epoch": 2.5649614576033635, "grad_norm": 0.9361165761947632, "learning_rate": 4.46448218129634e-05, "loss": 0.0355, "step": 18301 }, { "epoch": 2.5651016117729504, "grad_norm": 0.13929978013038635, "learning_rate": 4.463047117914375e-05, "loss": 0.0454, "step": 18302 }, { "epoch": 2.565241765942537, "grad_norm": 0.2941024601459503, "learning_rate": 4.461612054532408e-05, "loss": 0.0681, "step": 18303 }, { "epoch": 2.5653819201121233, "grad_norm": 0.3907836973667145, "learning_rate": 4.460176991150442e-05, "loss": 0.1028, "step": 18304 }, { "epoch": 2.5655220742817098, "grad_norm": 0.7705672979354858, "learning_rate": 4.458741927768476e-05, "loss": 0.0259, "step": 18305 }, { "epoch": 2.5656622284512967, "grad_norm": 0.164019376039505, "learning_rate": 4.45730686438651e-05, "loss": 0.0041, "step": 18306 }, { "epoch": 2.565802382620883, "grad_norm": 0.20495638251304626, "learning_rate": 4.455871801004544e-05, "loss": 0.0061, "step": 18307 }, { "epoch": 2.5659425367904696, "grad_norm": 0.24308723211288452, "learning_rate": 4.454436737622578e-05, "loss": 0.0391, "step": 18308 }, { "epoch": 2.566082690960056, "grad_norm": 0.1835632175207138, "learning_rate": 4.4530016742406116e-05, "loss": 0.0288, "step": 18309 }, { "epoch": 2.5662228451296425, "grad_norm": 0.233128622174263, "learning_rate": 4.4515666108586464e-05, "loss": 0.0534, "step": 18310 }, { "epoch": 2.566362999299229, "grad_norm": 0.2201002985239029, "learning_rate": 4.45013154747668e-05, "loss": 0.0131, "step": 18311 }, { "epoch": 2.566503153468816, "grad_norm": 0.6070578098297119, "learning_rate": 4.4486964840947135e-05, "loss": 0.0171, "step": 18312 }, { "epoch": 2.5666433076384023, "grad_norm": 0.3898150622844696, "learning_rate": 4.447261420712748e-05, "loss": 0.0749, "step": 18313 }, { "epoch": 2.5667834618079888, "grad_norm": 0.22572098672389984, "learning_rate": 4.445826357330782e-05, "loss": 0.0325, "step": 18314 }, { "epoch": 2.5669236159775752, "grad_norm": 0.2406264841556549, "learning_rate": 4.4443912939488155e-05, "loss": 0.0568, "step": 18315 }, { "epoch": 2.567063770147162, "grad_norm": 0.9114459753036499, "learning_rate": 4.44295623056685e-05, "loss": 0.0264, "step": 18316 }, { "epoch": 2.5672039243167486, "grad_norm": 0.1865604668855667, "learning_rate": 4.441521167184883e-05, "loss": 0.0163, "step": 18317 }, { "epoch": 2.567344078486335, "grad_norm": 0.24366647005081177, "learning_rate": 4.440086103802918e-05, "loss": 0.0222, "step": 18318 }, { "epoch": 2.5674842326559215, "grad_norm": 0.48044905066490173, "learning_rate": 4.4386510404209516e-05, "loss": 0.1255, "step": 18319 }, { "epoch": 2.567624386825508, "grad_norm": 0.4263973832130432, "learning_rate": 4.437215977038985e-05, "loss": 0.0369, "step": 18320 }, { "epoch": 2.5677645409950944, "grad_norm": 0.06407378613948822, "learning_rate": 4.4357809136570194e-05, "loss": 0.0057, "step": 18321 }, { "epoch": 2.567904695164681, "grad_norm": 0.12242628633975983, "learning_rate": 4.4343458502750536e-05, "loss": 0.0348, "step": 18322 }, { "epoch": 2.568044849334268, "grad_norm": 0.34747666120529175, "learning_rate": 4.432910786893087e-05, "loss": 0.0128, "step": 18323 }, { "epoch": 2.5681850035038543, "grad_norm": 0.04966302961111069, "learning_rate": 4.431475723511121e-05, "loss": 0.0039, "step": 18324 }, { "epoch": 2.5683251576734407, "grad_norm": 0.27620163559913635, "learning_rate": 4.430040660129155e-05, "loss": 0.0219, "step": 18325 }, { "epoch": 2.568465311843027, "grad_norm": 0.19051134586334229, "learning_rate": 4.42860559674719e-05, "loss": 0.0227, "step": 18326 }, { "epoch": 2.568605466012614, "grad_norm": 0.25438886880874634, "learning_rate": 4.427170533365223e-05, "loss": 0.0516, "step": 18327 }, { "epoch": 2.5687456201822005, "grad_norm": 0.2635815739631653, "learning_rate": 4.425735469983257e-05, "loss": 0.0095, "step": 18328 }, { "epoch": 2.568885774351787, "grad_norm": 0.3351733684539795, "learning_rate": 4.424300406601291e-05, "loss": 0.042, "step": 18329 }, { "epoch": 2.5690259285213735, "grad_norm": 0.2577472925186157, "learning_rate": 4.422865343219325e-05, "loss": 0.0472, "step": 18330 }, { "epoch": 2.56916608269096, "grad_norm": 0.10261719673871994, "learning_rate": 4.421430279837359e-05, "loss": 0.0105, "step": 18331 }, { "epoch": 2.5693062368605464, "grad_norm": 0.4056299328804016, "learning_rate": 4.419995216455393e-05, "loss": 0.0171, "step": 18332 }, { "epoch": 2.5694463910301333, "grad_norm": 0.14270593225955963, "learning_rate": 4.418560153073427e-05, "loss": 0.0279, "step": 18333 }, { "epoch": 2.5695865451997197, "grad_norm": 0.12293795496225357, "learning_rate": 4.4171250896914614e-05, "loss": 0.0206, "step": 18334 }, { "epoch": 2.569726699369306, "grad_norm": 0.24613048136234283, "learning_rate": 4.415690026309495e-05, "loss": 0.0178, "step": 18335 }, { "epoch": 2.5698668535388927, "grad_norm": 0.5383864045143127, "learning_rate": 4.4142549629275285e-05, "loss": 0.0654, "step": 18336 }, { "epoch": 2.5700070077084796, "grad_norm": 0.22100849449634552, "learning_rate": 4.4128198995455634e-05, "loss": 0.067, "step": 18337 }, { "epoch": 2.570147161878066, "grad_norm": 0.35696256160736084, "learning_rate": 4.411384836163597e-05, "loss": 0.0164, "step": 18338 }, { "epoch": 2.5702873160476525, "grad_norm": 0.18946433067321777, "learning_rate": 4.4099497727816304e-05, "loss": 0.0214, "step": 18339 }, { "epoch": 2.570427470217239, "grad_norm": 0.20125623047351837, "learning_rate": 4.4085147093996646e-05, "loss": 0.0298, "step": 18340 }, { "epoch": 2.5705676243868254, "grad_norm": 0.2019723355770111, "learning_rate": 4.407079646017699e-05, "loss": 0.0384, "step": 18341 }, { "epoch": 2.570707778556412, "grad_norm": 0.09664402902126312, "learning_rate": 4.405644582635733e-05, "loss": 0.0237, "step": 18342 }, { "epoch": 2.5708479327259988, "grad_norm": 0.10501237958669662, "learning_rate": 4.4042095192537666e-05, "loss": 0.006, "step": 18343 }, { "epoch": 2.5709880868955852, "grad_norm": 0.1476975679397583, "learning_rate": 4.4027744558718e-05, "loss": 0.0114, "step": 18344 }, { "epoch": 2.5711282410651717, "grad_norm": 0.7946990728378296, "learning_rate": 4.401339392489835e-05, "loss": 0.0607, "step": 18345 }, { "epoch": 2.571268395234758, "grad_norm": 0.08263041824102402, "learning_rate": 4.3999043291078686e-05, "loss": 0.0061, "step": 18346 }, { "epoch": 2.571408549404345, "grad_norm": 0.3564976453781128, "learning_rate": 4.398469265725902e-05, "loss": 0.0498, "step": 18347 }, { "epoch": 2.5715487035739315, "grad_norm": 0.18800511956214905, "learning_rate": 4.397034202343936e-05, "loss": 0.0357, "step": 18348 }, { "epoch": 2.571688857743518, "grad_norm": 0.05083693563938141, "learning_rate": 4.3955991389619705e-05, "loss": 0.0018, "step": 18349 }, { "epoch": 2.5718290119131044, "grad_norm": 0.10608505457639694, "learning_rate": 4.394164075580005e-05, "loss": 0.0098, "step": 18350 }, { "epoch": 2.571969166082691, "grad_norm": 0.5920299291610718, "learning_rate": 4.392729012198038e-05, "loss": 0.0245, "step": 18351 }, { "epoch": 2.5721093202522773, "grad_norm": 0.14916224777698517, "learning_rate": 4.391293948816072e-05, "loss": 0.0189, "step": 18352 }, { "epoch": 2.572249474421864, "grad_norm": 0.2072741836309433, "learning_rate": 4.389858885434107e-05, "loss": 0.0269, "step": 18353 }, { "epoch": 2.5723896285914507, "grad_norm": 0.13273829221725464, "learning_rate": 4.38842382205214e-05, "loss": 0.026, "step": 18354 }, { "epoch": 2.572529782761037, "grad_norm": 0.034441083669662476, "learning_rate": 4.386988758670174e-05, "loss": 0.0024, "step": 18355 }, { "epoch": 2.5726699369306236, "grad_norm": 0.27286863327026367, "learning_rate": 4.385553695288208e-05, "loss": 0.0217, "step": 18356 }, { "epoch": 2.57281009110021, "grad_norm": 0.16159188747406006, "learning_rate": 4.384118631906242e-05, "loss": 0.0298, "step": 18357 }, { "epoch": 2.572950245269797, "grad_norm": 0.19649852812290192, "learning_rate": 4.3826835685242764e-05, "loss": 0.0216, "step": 18358 }, { "epoch": 2.5730903994393834, "grad_norm": 0.340781033039093, "learning_rate": 4.38124850514231e-05, "loss": 0.03, "step": 18359 }, { "epoch": 2.57323055360897, "grad_norm": 0.21577833592891693, "learning_rate": 4.3798134417603435e-05, "loss": 0.0254, "step": 18360 }, { "epoch": 2.5733707077785564, "grad_norm": 0.19541950523853302, "learning_rate": 4.3783783783783783e-05, "loss": 0.0093, "step": 18361 }, { "epoch": 2.573510861948143, "grad_norm": 0.3279116451740265, "learning_rate": 4.376943314996412e-05, "loss": 0.0658, "step": 18362 }, { "epoch": 2.5736510161177293, "grad_norm": 0.18248826265335083, "learning_rate": 4.3755082516144454e-05, "loss": 0.0179, "step": 18363 }, { "epoch": 2.573791170287316, "grad_norm": 0.4545961022377014, "learning_rate": 4.3740731882324796e-05, "loss": 0.0204, "step": 18364 }, { "epoch": 2.5739313244569026, "grad_norm": 0.0790606439113617, "learning_rate": 4.372638124850514e-05, "loss": 0.0071, "step": 18365 }, { "epoch": 2.574071478626489, "grad_norm": 0.7308099269866943, "learning_rate": 4.371203061468548e-05, "loss": 0.0288, "step": 18366 }, { "epoch": 2.5742116327960756, "grad_norm": 0.3434159457683563, "learning_rate": 4.3697679980865816e-05, "loss": 0.0411, "step": 18367 }, { "epoch": 2.5743517869656625, "grad_norm": 1.4460663795471191, "learning_rate": 4.3683329347046165e-05, "loss": 0.0399, "step": 18368 }, { "epoch": 2.574491941135249, "grad_norm": 0.15944431722164154, "learning_rate": 4.36689787132265e-05, "loss": 0.0039, "step": 18369 }, { "epoch": 2.5746320953048354, "grad_norm": 0.6013672947883606, "learning_rate": 4.3654628079406835e-05, "loss": 0.0455, "step": 18370 }, { "epoch": 2.574772249474422, "grad_norm": 0.484423965215683, "learning_rate": 4.364027744558717e-05, "loss": 0.0522, "step": 18371 }, { "epoch": 2.5749124036440083, "grad_norm": 0.19116707146167755, "learning_rate": 4.362592681176752e-05, "loss": 0.0106, "step": 18372 }, { "epoch": 2.5750525578135948, "grad_norm": 0.2497573047876358, "learning_rate": 4.3611576177947855e-05, "loss": 0.0077, "step": 18373 }, { "epoch": 2.5751927119831812, "grad_norm": 0.5705140829086304, "learning_rate": 4.35972255441282e-05, "loss": 0.0719, "step": 18374 }, { "epoch": 2.575332866152768, "grad_norm": 0.19943994283676147, "learning_rate": 4.358287491030853e-05, "loss": 0.0142, "step": 18375 }, { "epoch": 2.5754730203223546, "grad_norm": 0.2182893007993698, "learning_rate": 4.356852427648888e-05, "loss": 0.0492, "step": 18376 }, { "epoch": 2.575613174491941, "grad_norm": 0.1173836961388588, "learning_rate": 4.3554173642669217e-05, "loss": 0.0075, "step": 18377 }, { "epoch": 2.575753328661528, "grad_norm": 0.10085103660821915, "learning_rate": 4.353982300884955e-05, "loss": 0.0092, "step": 18378 }, { "epoch": 2.5758934828311144, "grad_norm": 0.43157729506492615, "learning_rate": 4.352547237502989e-05, "loss": 0.0612, "step": 18379 }, { "epoch": 2.576033637000701, "grad_norm": 1.0196260213851929, "learning_rate": 4.3511121741210236e-05, "loss": 0.028, "step": 18380 }, { "epoch": 2.5761737911702873, "grad_norm": 0.16957086324691772, "learning_rate": 4.349677110739057e-05, "loss": 0.0153, "step": 18381 }, { "epoch": 2.576313945339874, "grad_norm": 0.3136398196220398, "learning_rate": 4.3482420473570914e-05, "loss": 0.0298, "step": 18382 }, { "epoch": 2.5764540995094602, "grad_norm": 0.21988998353481293, "learning_rate": 4.346806983975125e-05, "loss": 0.0917, "step": 18383 }, { "epoch": 2.5765942536790467, "grad_norm": 0.30825674533843994, "learning_rate": 4.34537192059316e-05, "loss": 0.0205, "step": 18384 }, { "epoch": 2.5767344078486336, "grad_norm": 0.24619002640247345, "learning_rate": 4.343936857211193e-05, "loss": 0.0204, "step": 18385 }, { "epoch": 2.57687456201822, "grad_norm": 0.11678573489189148, "learning_rate": 4.342501793829227e-05, "loss": 0.0043, "step": 18386 }, { "epoch": 2.5770147161878065, "grad_norm": 0.12242564558982849, "learning_rate": 4.341066730447261e-05, "loss": 0.0136, "step": 18387 }, { "epoch": 2.577154870357393, "grad_norm": 0.12053769826889038, "learning_rate": 4.339631667065295e-05, "loss": 0.0151, "step": 18388 }, { "epoch": 2.57729502452698, "grad_norm": 0.18951837718486786, "learning_rate": 4.338196603683329e-05, "loss": 0.0258, "step": 18389 }, { "epoch": 2.5774351786965664, "grad_norm": 0.2236301451921463, "learning_rate": 4.336761540301363e-05, "loss": 0.0367, "step": 18390 }, { "epoch": 2.577575332866153, "grad_norm": 0.09455155581235886, "learning_rate": 4.3353264769193965e-05, "loss": 0.006, "step": 18391 }, { "epoch": 2.5777154870357393, "grad_norm": 0.07828865945339203, "learning_rate": 4.3338914135374314e-05, "loss": 0.006, "step": 18392 }, { "epoch": 2.5778556412053257, "grad_norm": 0.04116327315568924, "learning_rate": 4.332456350155465e-05, "loss": 0.003, "step": 18393 }, { "epoch": 2.577995795374912, "grad_norm": 0.1679389625787735, "learning_rate": 4.3310212867734985e-05, "loss": 0.0265, "step": 18394 }, { "epoch": 2.578135949544499, "grad_norm": 0.13193699717521667, "learning_rate": 4.329586223391533e-05, "loss": 0.0159, "step": 18395 }, { "epoch": 2.5782761037140856, "grad_norm": 0.18403862416744232, "learning_rate": 4.328151160009567e-05, "loss": 0.0446, "step": 18396 }, { "epoch": 2.578416257883672, "grad_norm": 0.6315674781799316, "learning_rate": 4.3267160966276005e-05, "loss": 0.0248, "step": 18397 }, { "epoch": 2.5785564120532585, "grad_norm": 0.2854273319244385, "learning_rate": 4.325281033245635e-05, "loss": 0.0204, "step": 18398 }, { "epoch": 2.5786965662228454, "grad_norm": 0.4008709788322449, "learning_rate": 4.323845969863668e-05, "loss": 0.0351, "step": 18399 }, { "epoch": 2.578836720392432, "grad_norm": 0.10933005064725876, "learning_rate": 4.322410906481703e-05, "loss": 0.0105, "step": 18400 }, { "epoch": 2.5789768745620183, "grad_norm": 0.16192081570625305, "learning_rate": 4.3209758430997366e-05, "loss": 0.0438, "step": 18401 }, { "epoch": 2.5791170287316048, "grad_norm": 0.8346202969551086, "learning_rate": 4.31954077971777e-05, "loss": 0.0342, "step": 18402 }, { "epoch": 2.579257182901191, "grad_norm": 0.2391958087682724, "learning_rate": 4.3181057163358044e-05, "loss": 0.0236, "step": 18403 }, { "epoch": 2.5793973370707777, "grad_norm": 0.14135269820690155, "learning_rate": 4.3166706529538386e-05, "loss": 0.0381, "step": 18404 }, { "epoch": 2.579537491240364, "grad_norm": 0.17120225727558136, "learning_rate": 4.315235589571872e-05, "loss": 0.0109, "step": 18405 }, { "epoch": 2.579677645409951, "grad_norm": 0.04020866006612778, "learning_rate": 4.313800526189906e-05, "loss": 0.0034, "step": 18406 }, { "epoch": 2.5798177995795375, "grad_norm": 0.28193074464797974, "learning_rate": 4.3123654628079405e-05, "loss": 0.043, "step": 18407 }, { "epoch": 2.579957953749124, "grad_norm": 0.3373149037361145, "learning_rate": 4.310930399425975e-05, "loss": 0.0104, "step": 18408 }, { "epoch": 2.580098107918711, "grad_norm": 0.19258467853069305, "learning_rate": 4.309495336044008e-05, "loss": 0.0521, "step": 18409 }, { "epoch": 2.5802382620882973, "grad_norm": 0.5233100056648254, "learning_rate": 4.308060272662042e-05, "loss": 0.0181, "step": 18410 }, { "epoch": 2.580378416257884, "grad_norm": 0.2077948898077011, "learning_rate": 4.306625209280077e-05, "loss": 0.0166, "step": 18411 }, { "epoch": 2.5805185704274702, "grad_norm": 0.33574870228767395, "learning_rate": 4.30519014589811e-05, "loss": 0.0301, "step": 18412 }, { "epoch": 2.5806587245970567, "grad_norm": 0.38488245010375977, "learning_rate": 4.303755082516144e-05, "loss": 0.0343, "step": 18413 }, { "epoch": 2.580798878766643, "grad_norm": 0.021686121821403503, "learning_rate": 4.302320019134178e-05, "loss": 0.0018, "step": 18414 }, { "epoch": 2.5809390329362296, "grad_norm": 0.132170170545578, "learning_rate": 4.300884955752212e-05, "loss": 0.0143, "step": 18415 }, { "epoch": 2.5810791871058165, "grad_norm": 0.14774839580059052, "learning_rate": 4.2994498923702464e-05, "loss": 0.0422, "step": 18416 }, { "epoch": 2.581219341275403, "grad_norm": 0.1877659559249878, "learning_rate": 4.29801482898828e-05, "loss": 0.0149, "step": 18417 }, { "epoch": 2.5813594954449894, "grad_norm": 0.34519264101982117, "learning_rate": 4.2965797656063135e-05, "loss": 0.0131, "step": 18418 }, { "epoch": 2.581499649614576, "grad_norm": 0.8436042666435242, "learning_rate": 4.2951447022243484e-05, "loss": 0.0815, "step": 18419 }, { "epoch": 2.581639803784163, "grad_norm": 0.6240946650505066, "learning_rate": 4.293709638842382e-05, "loss": 0.045, "step": 18420 }, { "epoch": 2.5817799579537493, "grad_norm": 0.46625909209251404, "learning_rate": 4.2922745754604154e-05, "loss": 0.0383, "step": 18421 }, { "epoch": 2.5819201121233357, "grad_norm": 0.0793287381529808, "learning_rate": 4.2908395120784496e-05, "loss": 0.0046, "step": 18422 }, { "epoch": 2.582060266292922, "grad_norm": 0.3346347510814667, "learning_rate": 4.289404448696484e-05, "loss": 0.0344, "step": 18423 }, { "epoch": 2.5822004204625086, "grad_norm": 0.09946427494287491, "learning_rate": 4.287969385314518e-05, "loss": 0.0087, "step": 18424 }, { "epoch": 2.582340574632095, "grad_norm": 0.5931811332702637, "learning_rate": 4.2865343219325516e-05, "loss": 0.0555, "step": 18425 }, { "epoch": 2.582480728801682, "grad_norm": 0.32411035895347595, "learning_rate": 4.285099258550585e-05, "loss": 0.0374, "step": 18426 }, { "epoch": 2.5826208829712685, "grad_norm": 0.3806039094924927, "learning_rate": 4.28366419516862e-05, "loss": 0.0117, "step": 18427 }, { "epoch": 2.582761037140855, "grad_norm": 0.07168141007423401, "learning_rate": 4.2822291317866536e-05, "loss": 0.0059, "step": 18428 }, { "epoch": 2.5829011913104414, "grad_norm": 0.5293334722518921, "learning_rate": 4.280794068404687e-05, "loss": 0.0221, "step": 18429 }, { "epoch": 2.5830413454800283, "grad_norm": 0.3320784270763397, "learning_rate": 4.279359005022721e-05, "loss": 0.0319, "step": 18430 }, { "epoch": 2.5831814996496147, "grad_norm": 0.18998445570468903, "learning_rate": 4.2779239416407555e-05, "loss": 0.0256, "step": 18431 }, { "epoch": 2.583321653819201, "grad_norm": 0.2475634664297104, "learning_rate": 4.27648887825879e-05, "loss": 0.0488, "step": 18432 }, { "epoch": 2.5834618079887877, "grad_norm": 0.28841716051101685, "learning_rate": 4.275053814876823e-05, "loss": 0.0728, "step": 18433 }, { "epoch": 2.583601962158374, "grad_norm": 0.28521859645843506, "learning_rate": 4.273618751494857e-05, "loss": 0.0325, "step": 18434 }, { "epoch": 2.5837421163279606, "grad_norm": 0.16159546375274658, "learning_rate": 4.272183688112892e-05, "loss": 0.0066, "step": 18435 }, { "epoch": 2.583882270497547, "grad_norm": 0.22974339127540588, "learning_rate": 4.270748624730925e-05, "loss": 0.0209, "step": 18436 }, { "epoch": 2.584022424667134, "grad_norm": 0.5020043253898621, "learning_rate": 4.269313561348959e-05, "loss": 0.0565, "step": 18437 }, { "epoch": 2.5841625788367204, "grad_norm": 0.2730749547481537, "learning_rate": 4.267878497966993e-05, "loss": 0.0931, "step": 18438 }, { "epoch": 2.584302733006307, "grad_norm": 0.5322865843772888, "learning_rate": 4.266443434585027e-05, "loss": 0.1059, "step": 18439 }, { "epoch": 2.5844428871758933, "grad_norm": 0.07507546991109848, "learning_rate": 4.2650083712030614e-05, "loss": 0.0042, "step": 18440 }, { "epoch": 2.5845830413454802, "grad_norm": 0.2223496437072754, "learning_rate": 4.263573307821095e-05, "loss": 0.0175, "step": 18441 }, { "epoch": 2.5847231955150667, "grad_norm": 0.31748926639556885, "learning_rate": 4.262138244439129e-05, "loss": 0.0273, "step": 18442 }, { "epoch": 2.584863349684653, "grad_norm": 0.06157343089580536, "learning_rate": 4.260703181057163e-05, "loss": 0.0031, "step": 18443 }, { "epoch": 2.5850035038542396, "grad_norm": 0.08624120056629181, "learning_rate": 4.259268117675197e-05, "loss": 0.015, "step": 18444 }, { "epoch": 2.585143658023826, "grad_norm": 0.20628924667835236, "learning_rate": 4.2578330542932304e-05, "loss": 0.017, "step": 18445 }, { "epoch": 2.5852838121934125, "grad_norm": 0.21479536592960358, "learning_rate": 4.256397990911265e-05, "loss": 0.0175, "step": 18446 }, { "epoch": 2.5854239663629994, "grad_norm": 0.036246560513973236, "learning_rate": 4.254962927529299e-05, "loss": 0.0043, "step": 18447 }, { "epoch": 2.585564120532586, "grad_norm": 0.11516187340021133, "learning_rate": 4.253527864147333e-05, "loss": 0.0039, "step": 18448 }, { "epoch": 2.5857042747021723, "grad_norm": 0.12631452083587646, "learning_rate": 4.2520928007653666e-05, "loss": 0.0099, "step": 18449 }, { "epoch": 2.585844428871759, "grad_norm": 0.13929536938667297, "learning_rate": 4.250657737383401e-05, "loss": 0.0082, "step": 18450 }, { "epoch": 2.5859845830413457, "grad_norm": 0.20480823516845703, "learning_rate": 4.249222674001435e-05, "loss": 0.0349, "step": 18451 }, { "epoch": 2.586124737210932, "grad_norm": 0.1254032552242279, "learning_rate": 4.2477876106194685e-05, "loss": 0.0087, "step": 18452 }, { "epoch": 2.5862648913805186, "grad_norm": 1.004272699356079, "learning_rate": 4.246352547237502e-05, "loss": 0.0597, "step": 18453 }, { "epoch": 2.586405045550105, "grad_norm": 0.09287992864847183, "learning_rate": 4.244917483855537e-05, "loss": 0.0143, "step": 18454 }, { "epoch": 2.5865451997196915, "grad_norm": 0.27284035086631775, "learning_rate": 4.2434824204735705e-05, "loss": 0.0183, "step": 18455 }, { "epoch": 2.586685353889278, "grad_norm": 0.1848074048757553, "learning_rate": 4.242047357091605e-05, "loss": 0.0081, "step": 18456 }, { "epoch": 2.586825508058865, "grad_norm": 0.21825416386127472, "learning_rate": 4.240612293709638e-05, "loss": 0.0104, "step": 18457 }, { "epoch": 2.5869656622284514, "grad_norm": 0.22444355487823486, "learning_rate": 4.2391772303276724e-05, "loss": 0.0372, "step": 18458 }, { "epoch": 2.587105816398038, "grad_norm": 0.20439444482326508, "learning_rate": 4.2377421669457067e-05, "loss": 0.0215, "step": 18459 }, { "epoch": 2.5872459705676243, "grad_norm": 0.0767916887998581, "learning_rate": 4.23630710356374e-05, "loss": 0.0048, "step": 18460 }, { "epoch": 2.587386124737211, "grad_norm": 0.22043246030807495, "learning_rate": 4.234872040181774e-05, "loss": 0.0322, "step": 18461 }, { "epoch": 2.5875262789067976, "grad_norm": 0.1928377002477646, "learning_rate": 4.2334369767998086e-05, "loss": 0.0097, "step": 18462 }, { "epoch": 2.587666433076384, "grad_norm": 0.07479529082775116, "learning_rate": 4.232001913417842e-05, "loss": 0.0066, "step": 18463 }, { "epoch": 2.5878065872459706, "grad_norm": 0.0872916430234909, "learning_rate": 4.2305668500358764e-05, "loss": 0.0054, "step": 18464 }, { "epoch": 2.587946741415557, "grad_norm": 0.146233469247818, "learning_rate": 4.22913178665391e-05, "loss": 0.0138, "step": 18465 }, { "epoch": 2.5880868955851435, "grad_norm": 0.5427401661872864, "learning_rate": 4.227696723271944e-05, "loss": 0.0261, "step": 18466 }, { "epoch": 2.58822704975473, "grad_norm": 0.1921662539243698, "learning_rate": 4.226261659889978e-05, "loss": 0.0162, "step": 18467 }, { "epoch": 2.588367203924317, "grad_norm": 0.8048128485679626, "learning_rate": 4.224826596508012e-05, "loss": 0.1125, "step": 18468 }, { "epoch": 2.5885073580939033, "grad_norm": 0.24106605350971222, "learning_rate": 4.2233915331260454e-05, "loss": 0.0116, "step": 18469 }, { "epoch": 2.5886475122634898, "grad_norm": 1.096875786781311, "learning_rate": 4.22195646974408e-05, "loss": 0.058, "step": 18470 }, { "epoch": 2.5887876664330762, "grad_norm": 0.248779296875, "learning_rate": 4.220521406362114e-05, "loss": 0.0459, "step": 18471 }, { "epoch": 2.588927820602663, "grad_norm": 0.21327553689479828, "learning_rate": 4.219086342980148e-05, "loss": 0.0239, "step": 18472 }, { "epoch": 2.5890679747722496, "grad_norm": 0.21323801577091217, "learning_rate": 4.2176512795981815e-05, "loss": 0.0072, "step": 18473 }, { "epoch": 2.589208128941836, "grad_norm": 0.33114296197891235, "learning_rate": 4.216216216216216e-05, "loss": 0.0484, "step": 18474 }, { "epoch": 2.5893482831114225, "grad_norm": 0.09613249450922012, "learning_rate": 4.21478115283425e-05, "loss": 0.0065, "step": 18475 }, { "epoch": 2.589488437281009, "grad_norm": 0.5842782258987427, "learning_rate": 4.2133460894522835e-05, "loss": 0.0366, "step": 18476 }, { "epoch": 2.5896285914505954, "grad_norm": 0.5013179183006287, "learning_rate": 4.211911026070317e-05, "loss": 0.0446, "step": 18477 }, { "epoch": 2.5897687456201823, "grad_norm": 0.45787426829338074, "learning_rate": 4.210475962688352e-05, "loss": 0.0111, "step": 18478 }, { "epoch": 2.589908899789769, "grad_norm": 0.338286429643631, "learning_rate": 4.2090408993063855e-05, "loss": 0.0191, "step": 18479 }, { "epoch": 2.5900490539593553, "grad_norm": 0.3275730609893799, "learning_rate": 4.20760583592442e-05, "loss": 0.0562, "step": 18480 }, { "epoch": 2.5901892081289417, "grad_norm": 0.17356082797050476, "learning_rate": 4.206170772542454e-05, "loss": 0.0055, "step": 18481 }, { "epoch": 2.5903293622985286, "grad_norm": 0.29584771394729614, "learning_rate": 4.2047357091604874e-05, "loss": 0.0208, "step": 18482 }, { "epoch": 2.590469516468115, "grad_norm": 0.18936409056186676, "learning_rate": 4.2033006457785216e-05, "loss": 0.0288, "step": 18483 }, { "epoch": 2.5906096706377015, "grad_norm": 0.08536079525947571, "learning_rate": 4.201865582396555e-05, "loss": 0.0209, "step": 18484 }, { "epoch": 2.590749824807288, "grad_norm": 0.4714975655078888, "learning_rate": 4.20043051901459e-05, "loss": 0.0528, "step": 18485 }, { "epoch": 2.5908899789768745, "grad_norm": 0.2756427824497223, "learning_rate": 4.1989954556326236e-05, "loss": 0.0846, "step": 18486 }, { "epoch": 2.591030133146461, "grad_norm": 0.16883422434329987, "learning_rate": 4.197560392250657e-05, "loss": 0.0515, "step": 18487 }, { "epoch": 2.591170287316048, "grad_norm": 0.1769227385520935, "learning_rate": 4.196125328868691e-05, "loss": 0.0061, "step": 18488 }, { "epoch": 2.5913104414856343, "grad_norm": 0.05441201478242874, "learning_rate": 4.1946902654867255e-05, "loss": 0.0049, "step": 18489 }, { "epoch": 2.5914505956552207, "grad_norm": 0.13326485455036163, "learning_rate": 4.193255202104759e-05, "loss": 0.0157, "step": 18490 }, { "epoch": 2.591590749824807, "grad_norm": 0.5163449645042419, "learning_rate": 4.191820138722793e-05, "loss": 0.0679, "step": 18491 }, { "epoch": 2.591730903994394, "grad_norm": 0.09258285164833069, "learning_rate": 4.190385075340827e-05, "loss": 0.0045, "step": 18492 }, { "epoch": 2.5918710581639806, "grad_norm": 0.1363135576248169, "learning_rate": 4.188950011958862e-05, "loss": 0.0141, "step": 18493 }, { "epoch": 2.592011212333567, "grad_norm": 0.5469874143600464, "learning_rate": 4.187514948576895e-05, "loss": 0.0535, "step": 18494 }, { "epoch": 2.5921513665031535, "grad_norm": 0.08730471879243851, "learning_rate": 4.186079885194929e-05, "loss": 0.0067, "step": 18495 }, { "epoch": 2.59229152067274, "grad_norm": 0.1166330873966217, "learning_rate": 4.184644821812963e-05, "loss": 0.0095, "step": 18496 }, { "epoch": 2.5924316748423264, "grad_norm": 0.25809523463249207, "learning_rate": 4.183209758430997e-05, "loss": 0.0187, "step": 18497 }, { "epoch": 2.592571829011913, "grad_norm": 0.2569851577281952, "learning_rate": 4.181774695049031e-05, "loss": 0.0165, "step": 18498 }, { "epoch": 2.5927119831814998, "grad_norm": 0.37615805864334106, "learning_rate": 4.180339631667065e-05, "loss": 0.0385, "step": 18499 }, { "epoch": 2.592852137351086, "grad_norm": 0.3347506523132324, "learning_rate": 4.1789045682850985e-05, "loss": 0.0273, "step": 18500 }, { "epoch": 2.5929922915206727, "grad_norm": 0.23138432204723358, "learning_rate": 4.1774695049031334e-05, "loss": 0.0544, "step": 18501 }, { "epoch": 2.593132445690259, "grad_norm": 0.5268917679786682, "learning_rate": 4.176034441521167e-05, "loss": 0.0283, "step": 18502 }, { "epoch": 2.593272599859846, "grad_norm": 0.3320567309856415, "learning_rate": 4.1745993781392004e-05, "loss": 0.0662, "step": 18503 }, { "epoch": 2.5934127540294325, "grad_norm": 0.09726834297180176, "learning_rate": 4.1731643147572346e-05, "loss": 0.0319, "step": 18504 }, { "epoch": 2.593552908199019, "grad_norm": 0.30209630727767944, "learning_rate": 4.171729251375269e-05, "loss": 0.0306, "step": 18505 }, { "epoch": 2.5936930623686054, "grad_norm": 0.14331725239753723, "learning_rate": 4.170294187993303e-05, "loss": 0.0169, "step": 18506 }, { "epoch": 2.593833216538192, "grad_norm": 0.10237036645412445, "learning_rate": 4.1688591246113366e-05, "loss": 0.0041, "step": 18507 }, { "epoch": 2.5939733707077783, "grad_norm": 0.042850177735090256, "learning_rate": 4.16742406122937e-05, "loss": 0.0115, "step": 18508 }, { "epoch": 2.5941135248773652, "grad_norm": 0.8127962946891785, "learning_rate": 4.165988997847405e-05, "loss": 0.0365, "step": 18509 }, { "epoch": 2.5942536790469517, "grad_norm": 0.2180366963148117, "learning_rate": 4.1645539344654386e-05, "loss": 0.0496, "step": 18510 }, { "epoch": 2.594393833216538, "grad_norm": 0.16415521502494812, "learning_rate": 4.163118871083472e-05, "loss": 0.0174, "step": 18511 }, { "epoch": 2.5945339873861246, "grad_norm": 0.09151072055101395, "learning_rate": 4.161683807701506e-05, "loss": 0.0084, "step": 18512 }, { "epoch": 2.5946741415557115, "grad_norm": 0.0869913399219513, "learning_rate": 4.1602487443195405e-05, "loss": 0.0215, "step": 18513 }, { "epoch": 2.594814295725298, "grad_norm": 0.15585070848464966, "learning_rate": 4.158813680937575e-05, "loss": 0.0211, "step": 18514 }, { "epoch": 2.5949544498948844, "grad_norm": 0.9072529673576355, "learning_rate": 4.157378617555608e-05, "loss": 0.0596, "step": 18515 }, { "epoch": 2.595094604064471, "grad_norm": 0.07980072498321533, "learning_rate": 4.1559435541736425e-05, "loss": 0.0095, "step": 18516 }, { "epoch": 2.5952347582340574, "grad_norm": 0.326193630695343, "learning_rate": 4.154508490791677e-05, "loss": 0.0382, "step": 18517 }, { "epoch": 2.595374912403644, "grad_norm": 0.08973927050828934, "learning_rate": 4.15307342740971e-05, "loss": 0.008, "step": 18518 }, { "epoch": 2.5955150665732303, "grad_norm": 0.12881870567798615, "learning_rate": 4.151638364027744e-05, "loss": 0.0047, "step": 18519 }, { "epoch": 2.595655220742817, "grad_norm": 0.2405724823474884, "learning_rate": 4.1502033006457786e-05, "loss": 0.0127, "step": 18520 }, { "epoch": 2.5957953749124036, "grad_norm": 0.6358450055122375, "learning_rate": 4.148768237263812e-05, "loss": 0.0218, "step": 18521 }, { "epoch": 2.59593552908199, "grad_norm": 0.21287740767002106, "learning_rate": 4.1473331738818464e-05, "loss": 0.0379, "step": 18522 }, { "epoch": 2.596075683251577, "grad_norm": 0.17231370508670807, "learning_rate": 4.14589811049988e-05, "loss": 0.0427, "step": 18523 }, { "epoch": 2.5962158374211635, "grad_norm": 0.22845718264579773, "learning_rate": 4.144463047117914e-05, "loss": 0.0356, "step": 18524 }, { "epoch": 2.59635599159075, "grad_norm": 0.17676453292369843, "learning_rate": 4.143027983735948e-05, "loss": 0.0143, "step": 18525 }, { "epoch": 2.5964961457603364, "grad_norm": 0.13186609745025635, "learning_rate": 4.141592920353982e-05, "loss": 0.0188, "step": 18526 }, { "epoch": 2.596636299929923, "grad_norm": 0.08770882338285446, "learning_rate": 4.1401578569720154e-05, "loss": 0.0121, "step": 18527 }, { "epoch": 2.5967764540995093, "grad_norm": 0.3491726517677307, "learning_rate": 4.13872279359005e-05, "loss": 0.0033, "step": 18528 }, { "epoch": 2.5969166082690958, "grad_norm": 0.16179046034812927, "learning_rate": 4.137287730208084e-05, "loss": 0.0381, "step": 18529 }, { "epoch": 2.5970567624386827, "grad_norm": 0.20397137105464935, "learning_rate": 4.135852666826118e-05, "loss": 0.0407, "step": 18530 }, { "epoch": 2.597196916608269, "grad_norm": 0.20023071765899658, "learning_rate": 4.1344176034441516e-05, "loss": 0.0257, "step": 18531 }, { "epoch": 2.5973370707778556, "grad_norm": 1.0694224834442139, "learning_rate": 4.132982540062186e-05, "loss": 0.031, "step": 18532 }, { "epoch": 2.597477224947442, "grad_norm": 0.06407024711370468, "learning_rate": 4.13154747668022e-05, "loss": 0.0044, "step": 18533 }, { "epoch": 2.597617379117029, "grad_norm": 0.15482138097286224, "learning_rate": 4.1301124132982535e-05, "loss": 0.0386, "step": 18534 }, { "epoch": 2.5977575332866154, "grad_norm": 0.23148861527442932, "learning_rate": 4.128677349916287e-05, "loss": 0.0328, "step": 18535 }, { "epoch": 2.597897687456202, "grad_norm": 0.3446612060070038, "learning_rate": 4.127242286534322e-05, "loss": 0.0612, "step": 18536 }, { "epoch": 2.5980378416257883, "grad_norm": 0.08672413975000381, "learning_rate": 4.1258072231523555e-05, "loss": 0.02, "step": 18537 }, { "epoch": 2.598177995795375, "grad_norm": 0.16013412177562714, "learning_rate": 4.12437215977039e-05, "loss": 0.0175, "step": 18538 }, { "epoch": 2.5983181499649612, "grad_norm": 0.1819830983877182, "learning_rate": 4.122937096388423e-05, "loss": 0.0077, "step": 18539 }, { "epoch": 2.598458304134548, "grad_norm": 0.47628769278526306, "learning_rate": 4.1215020330064574e-05, "loss": 0.0579, "step": 18540 }, { "epoch": 2.5985984583041346, "grad_norm": 0.19871847331523895, "learning_rate": 4.1200669696244916e-05, "loss": 0.0325, "step": 18541 }, { "epoch": 2.598738612473721, "grad_norm": 0.07882734388113022, "learning_rate": 4.118631906242525e-05, "loss": 0.0166, "step": 18542 }, { "epoch": 2.5988787666433075, "grad_norm": 0.2993088662624359, "learning_rate": 4.117196842860559e-05, "loss": 0.0462, "step": 18543 }, { "epoch": 2.5990189208128944, "grad_norm": 0.17911703884601593, "learning_rate": 4.1157617794785936e-05, "loss": 0.0238, "step": 18544 }, { "epoch": 2.599159074982481, "grad_norm": 0.44249698519706726, "learning_rate": 4.114326716096627e-05, "loss": 0.0751, "step": 18545 }, { "epoch": 2.5992992291520673, "grad_norm": 0.20505116879940033, "learning_rate": 4.1128916527146613e-05, "loss": 0.0458, "step": 18546 }, { "epoch": 2.599439383321654, "grad_norm": 0.08975695818662643, "learning_rate": 4.111456589332695e-05, "loss": 0.0156, "step": 18547 }, { "epoch": 2.5995795374912403, "grad_norm": 0.20133747160434723, "learning_rate": 4.110021525950729e-05, "loss": 0.0305, "step": 18548 }, { "epoch": 2.5997196916608267, "grad_norm": 1.546108603477478, "learning_rate": 4.108586462568763e-05, "loss": 0.0526, "step": 18549 }, { "epoch": 2.599859845830413, "grad_norm": 0.43777474761009216, "learning_rate": 4.107151399186797e-05, "loss": 0.0155, "step": 18550 }, { "epoch": 2.6, "grad_norm": 0.40382590889930725, "learning_rate": 4.1057163358048304e-05, "loss": 0.0321, "step": 18551 }, { "epoch": 2.6001401541695865, "grad_norm": 0.10036035627126694, "learning_rate": 4.104281272422865e-05, "loss": 0.0088, "step": 18552 }, { "epoch": 2.600280308339173, "grad_norm": 0.22109805047512054, "learning_rate": 4.102846209040899e-05, "loss": 0.0095, "step": 18553 }, { "epoch": 2.60042046250876, "grad_norm": 0.18864211440086365, "learning_rate": 4.101411145658933e-05, "loss": 0.0372, "step": 18554 }, { "epoch": 2.6005606166783464, "grad_norm": 0.07249768078327179, "learning_rate": 4.099976082276967e-05, "loss": 0.0086, "step": 18555 }, { "epoch": 2.600700770847933, "grad_norm": 0.22700278460979462, "learning_rate": 4.098541018895001e-05, "loss": 0.0253, "step": 18556 }, { "epoch": 2.6008409250175193, "grad_norm": 0.320683091878891, "learning_rate": 4.097105955513035e-05, "loss": 0.0244, "step": 18557 }, { "epoch": 2.6009810791871057, "grad_norm": 0.2824430465698242, "learning_rate": 4.0956708921310685e-05, "loss": 0.0182, "step": 18558 }, { "epoch": 2.601121233356692, "grad_norm": 0.30952298641204834, "learning_rate": 4.0942358287491034e-05, "loss": 0.0147, "step": 18559 }, { "epoch": 2.6012613875262787, "grad_norm": 0.3556981384754181, "learning_rate": 4.092800765367137e-05, "loss": 0.016, "step": 18560 }, { "epoch": 2.6014015416958656, "grad_norm": 0.11541136354207993, "learning_rate": 4.0913657019851705e-05, "loss": 0.0079, "step": 18561 }, { "epoch": 2.601541695865452, "grad_norm": 0.22843077778816223, "learning_rate": 4.089930638603205e-05, "loss": 0.0136, "step": 18562 }, { "epoch": 2.6016818500350385, "grad_norm": 0.2954002916812897, "learning_rate": 4.088495575221239e-05, "loss": 0.0216, "step": 18563 }, { "epoch": 2.601822004204625, "grad_norm": 0.10958950221538544, "learning_rate": 4.0870605118392724e-05, "loss": 0.0087, "step": 18564 }, { "epoch": 2.601962158374212, "grad_norm": 0.32158029079437256, "learning_rate": 4.0856254484573066e-05, "loss": 0.0713, "step": 18565 }, { "epoch": 2.6021023125437983, "grad_norm": 0.2884770333766937, "learning_rate": 4.08419038507534e-05, "loss": 0.0246, "step": 18566 }, { "epoch": 2.6022424667133848, "grad_norm": 1.6252360343933105, "learning_rate": 4.082755321693375e-05, "loss": 0.0548, "step": 18567 }, { "epoch": 2.6023826208829712, "grad_norm": 0.5166999101638794, "learning_rate": 4.0813202583114086e-05, "loss": 0.0323, "step": 18568 }, { "epoch": 2.6025227750525577, "grad_norm": 0.11942706257104874, "learning_rate": 4.079885194929442e-05, "loss": 0.0047, "step": 18569 }, { "epoch": 2.602662929222144, "grad_norm": 0.48290205001831055, "learning_rate": 4.078450131547476e-05, "loss": 0.0317, "step": 18570 }, { "epoch": 2.602803083391731, "grad_norm": 0.09516703337430954, "learning_rate": 4.0770150681655105e-05, "loss": 0.0188, "step": 18571 }, { "epoch": 2.6029432375613175, "grad_norm": 0.2316620796918869, "learning_rate": 4.075580004783544e-05, "loss": 0.0252, "step": 18572 }, { "epoch": 2.603083391730904, "grad_norm": 0.19330072402954102, "learning_rate": 4.074144941401578e-05, "loss": 0.0305, "step": 18573 }, { "epoch": 2.6032235459004904, "grad_norm": 0.11013898253440857, "learning_rate": 4.072709878019612e-05, "loss": 0.0053, "step": 18574 }, { "epoch": 2.6033637000700773, "grad_norm": 0.8182455897331238, "learning_rate": 4.071274814637647e-05, "loss": 0.0403, "step": 18575 }, { "epoch": 2.603503854239664, "grad_norm": 0.16656769812107086, "learning_rate": 4.06983975125568e-05, "loss": 0.0151, "step": 18576 }, { "epoch": 2.6036440084092503, "grad_norm": 0.18454699218273163, "learning_rate": 4.068404687873714e-05, "loss": 0.0169, "step": 18577 }, { "epoch": 2.6037841625788367, "grad_norm": 0.08318546414375305, "learning_rate": 4.066969624491748e-05, "loss": 0.0194, "step": 18578 }, { "epoch": 2.603924316748423, "grad_norm": 0.4396114945411682, "learning_rate": 4.065534561109782e-05, "loss": 0.0598, "step": 18579 }, { "epoch": 2.6040644709180096, "grad_norm": 0.42915040254592896, "learning_rate": 4.064099497727816e-05, "loss": 0.0548, "step": 18580 }, { "epoch": 2.604204625087596, "grad_norm": 0.24726633727550507, "learning_rate": 4.06266443434585e-05, "loss": 0.0242, "step": 18581 }, { "epoch": 2.604344779257183, "grad_norm": 0.8310969471931458, "learning_rate": 4.0612293709638835e-05, "loss": 0.048, "step": 18582 }, { "epoch": 2.6044849334267695, "grad_norm": 0.07173267006874084, "learning_rate": 4.0597943075819184e-05, "loss": 0.0038, "step": 18583 }, { "epoch": 2.604625087596356, "grad_norm": 0.16220510005950928, "learning_rate": 4.058359244199952e-05, "loss": 0.0093, "step": 18584 }, { "epoch": 2.6047652417659424, "grad_norm": 0.07501126825809479, "learning_rate": 4.0569241808179854e-05, "loss": 0.0063, "step": 18585 }, { "epoch": 2.6049053959355293, "grad_norm": 0.24349640309810638, "learning_rate": 4.0554891174360196e-05, "loss": 0.0324, "step": 18586 }, { "epoch": 2.6050455501051157, "grad_norm": 0.2243170142173767, "learning_rate": 4.054054054054054e-05, "loss": 0.0111, "step": 18587 }, { "epoch": 2.605185704274702, "grad_norm": 0.5562153458595276, "learning_rate": 4.0526189906720874e-05, "loss": 0.0147, "step": 18588 }, { "epoch": 2.6053258584442887, "grad_norm": 0.1636039763689041, "learning_rate": 4.0511839272901216e-05, "loss": 0.0143, "step": 18589 }, { "epoch": 2.605466012613875, "grad_norm": 0.11264914274215698, "learning_rate": 4.049748863908155e-05, "loss": 0.0128, "step": 18590 }, { "epoch": 2.6056061667834616, "grad_norm": 0.11894587427377701, "learning_rate": 4.04831380052619e-05, "loss": 0.0047, "step": 18591 }, { "epoch": 2.6057463209530485, "grad_norm": 0.02416457235813141, "learning_rate": 4.0468787371442235e-05, "loss": 0.002, "step": 18592 }, { "epoch": 2.605886475122635, "grad_norm": 0.7101230621337891, "learning_rate": 4.045443673762257e-05, "loss": 0.0363, "step": 18593 }, { "epoch": 2.6060266292922214, "grad_norm": 0.17755533754825592, "learning_rate": 4.044008610380292e-05, "loss": 0.0487, "step": 18594 }, { "epoch": 2.606166783461808, "grad_norm": 0.23735153675079346, "learning_rate": 4.0425735469983255e-05, "loss": 0.0578, "step": 18595 }, { "epoch": 2.6063069376313948, "grad_norm": 0.291685551404953, "learning_rate": 4.041138483616359e-05, "loss": 0.0483, "step": 18596 }, { "epoch": 2.606447091800981, "grad_norm": 0.24020987749099731, "learning_rate": 4.039703420234393e-05, "loss": 0.0485, "step": 18597 }, { "epoch": 2.6065872459705677, "grad_norm": 0.08242785185575485, "learning_rate": 4.0382683568524275e-05, "loss": 0.0116, "step": 18598 }, { "epoch": 2.606727400140154, "grad_norm": 0.38203585147857666, "learning_rate": 4.036833293470462e-05, "loss": 0.1042, "step": 18599 }, { "epoch": 2.6068675543097406, "grad_norm": 0.3935910761356354, "learning_rate": 4.035398230088495e-05, "loss": 0.0196, "step": 18600 }, { "epoch": 2.607007708479327, "grad_norm": 0.2833174765110016, "learning_rate": 4.033963166706529e-05, "loss": 0.0122, "step": 18601 }, { "epoch": 2.607147862648914, "grad_norm": 0.24325448274612427, "learning_rate": 4.0325281033245636e-05, "loss": 0.0135, "step": 18602 }, { "epoch": 2.6072880168185004, "grad_norm": 0.11215915530920029, "learning_rate": 4.031093039942597e-05, "loss": 0.0371, "step": 18603 }, { "epoch": 2.607428170988087, "grad_norm": 0.1178319975733757, "learning_rate": 4.029657976560631e-05, "loss": 0.0117, "step": 18604 }, { "epoch": 2.6075683251576733, "grad_norm": 0.2862848937511444, "learning_rate": 4.028222913178665e-05, "loss": 0.0282, "step": 18605 }, { "epoch": 2.6077084793272602, "grad_norm": 0.3154328167438507, "learning_rate": 4.026787849796699e-05, "loss": 0.0112, "step": 18606 }, { "epoch": 2.6078486334968467, "grad_norm": 0.6282109618186951, "learning_rate": 4.025352786414733e-05, "loss": 0.0476, "step": 18607 }, { "epoch": 2.607988787666433, "grad_norm": 0.10051321983337402, "learning_rate": 4.023917723032767e-05, "loss": 0.0043, "step": 18608 }, { "epoch": 2.6081289418360196, "grad_norm": 0.38662955164909363, "learning_rate": 4.0224826596508004e-05, "loss": 0.0236, "step": 18609 }, { "epoch": 2.608269096005606, "grad_norm": 0.2803041934967041, "learning_rate": 4.021047596268835e-05, "loss": 0.0352, "step": 18610 }, { "epoch": 2.6084092501751925, "grad_norm": 0.6029271483421326, "learning_rate": 4.019612532886869e-05, "loss": 0.0226, "step": 18611 }, { "epoch": 2.608549404344779, "grad_norm": 0.13210083544254303, "learning_rate": 4.0181774695049024e-05, "loss": 0.0351, "step": 18612 }, { "epoch": 2.608689558514366, "grad_norm": 0.7515329122543335, "learning_rate": 4.0167424061229366e-05, "loss": 0.1048, "step": 18613 }, { "epoch": 2.6088297126839524, "grad_norm": 0.5831995010375977, "learning_rate": 4.015307342740971e-05, "loss": 0.0375, "step": 18614 }, { "epoch": 2.608969866853539, "grad_norm": 0.47365301847457886, "learning_rate": 4.013872279359005e-05, "loss": 0.0609, "step": 18615 }, { "epoch": 2.6091100210231253, "grad_norm": 0.11963936686515808, "learning_rate": 4.0124372159770385e-05, "loss": 0.0053, "step": 18616 }, { "epoch": 2.609250175192712, "grad_norm": 0.013404007069766521, "learning_rate": 4.011002152595072e-05, "loss": 0.0013, "step": 18617 }, { "epoch": 2.6093903293622986, "grad_norm": 0.08783210068941116, "learning_rate": 4.009567089213107e-05, "loss": 0.0036, "step": 18618 }, { "epoch": 2.609530483531885, "grad_norm": 0.8851631879806519, "learning_rate": 4.0081320258311405e-05, "loss": 0.0453, "step": 18619 }, { "epoch": 2.6096706377014716, "grad_norm": 0.1214151605963707, "learning_rate": 4.006696962449174e-05, "loss": 0.0102, "step": 18620 }, { "epoch": 2.609810791871058, "grad_norm": 0.22432951629161835, "learning_rate": 4.005261899067208e-05, "loss": 0.022, "step": 18621 }, { "epoch": 2.6099509460406445, "grad_norm": 0.20865321159362793, "learning_rate": 4.0038268356852424e-05, "loss": 0.0103, "step": 18622 }, { "epoch": 2.6100911002102314, "grad_norm": 0.14837253093719482, "learning_rate": 4.0023917723032766e-05, "loss": 0.0069, "step": 18623 }, { "epoch": 2.610231254379818, "grad_norm": 0.2372204214334488, "learning_rate": 4.00095670892131e-05, "loss": 0.0409, "step": 18624 }, { "epoch": 2.6103714085494043, "grad_norm": 0.1609046459197998, "learning_rate": 3.999521645539344e-05, "loss": 0.013, "step": 18625 }, { "epoch": 2.6105115627189908, "grad_norm": 0.16017426550388336, "learning_rate": 3.9980865821573786e-05, "loss": 0.0169, "step": 18626 }, { "epoch": 2.6106517168885777, "grad_norm": 0.238394096493721, "learning_rate": 3.996651518775412e-05, "loss": 0.051, "step": 18627 }, { "epoch": 2.610791871058164, "grad_norm": 0.43685412406921387, "learning_rate": 3.995216455393446e-05, "loss": 0.0388, "step": 18628 }, { "epoch": 2.6109320252277506, "grad_norm": 0.16761700809001923, "learning_rate": 3.9937813920114806e-05, "loss": 0.036, "step": 18629 }, { "epoch": 2.611072179397337, "grad_norm": 0.09796120971441269, "learning_rate": 3.992346328629514e-05, "loss": 0.006, "step": 18630 }, { "epoch": 2.6112123335669235, "grad_norm": 0.22367095947265625, "learning_rate": 3.990911265247548e-05, "loss": 0.028, "step": 18631 }, { "epoch": 2.61135248773651, "grad_norm": 1.1693377494812012, "learning_rate": 3.989476201865582e-05, "loss": 0.0534, "step": 18632 }, { "epoch": 2.611492641906097, "grad_norm": 0.39454886317253113, "learning_rate": 3.988041138483617e-05, "loss": 0.0451, "step": 18633 }, { "epoch": 2.6116327960756833, "grad_norm": 0.21093155443668365, "learning_rate": 3.98660607510165e-05, "loss": 0.0146, "step": 18634 }, { "epoch": 2.61177295024527, "grad_norm": 0.12873238325119019, "learning_rate": 3.985171011719684e-05, "loss": 0.025, "step": 18635 }, { "epoch": 2.6119131044148562, "grad_norm": 0.29310110211372375, "learning_rate": 3.983735948337717e-05, "loss": 0.028, "step": 18636 }, { "epoch": 2.612053258584443, "grad_norm": 0.5587449073791504, "learning_rate": 3.982300884955752e-05, "loss": 0.0232, "step": 18637 }, { "epoch": 2.6121934127540296, "grad_norm": 0.09433801472187042, "learning_rate": 3.980865821573786e-05, "loss": 0.0081, "step": 18638 }, { "epoch": 2.612333566923616, "grad_norm": 0.5015447735786438, "learning_rate": 3.97943075819182e-05, "loss": 0.0544, "step": 18639 }, { "epoch": 2.6124737210932025, "grad_norm": 0.10308688133955002, "learning_rate": 3.9779956948098535e-05, "loss": 0.0051, "step": 18640 }, { "epoch": 2.612613875262789, "grad_norm": 0.2075146734714508, "learning_rate": 3.9765606314278884e-05, "loss": 0.0152, "step": 18641 }, { "epoch": 2.6127540294323754, "grad_norm": 0.1745568811893463, "learning_rate": 3.975125568045922e-05, "loss": 0.0278, "step": 18642 }, { "epoch": 2.612894183601962, "grad_norm": 0.21518918871879578, "learning_rate": 3.9736905046639554e-05, "loss": 0.0353, "step": 18643 }, { "epoch": 2.613034337771549, "grad_norm": 0.056774020195007324, "learning_rate": 3.972255441281989e-05, "loss": 0.01, "step": 18644 }, { "epoch": 2.6131744919411353, "grad_norm": 0.12873075902462006, "learning_rate": 3.970820377900024e-05, "loss": 0.0155, "step": 18645 }, { "epoch": 2.6133146461107217, "grad_norm": 0.17562609910964966, "learning_rate": 3.9693853145180574e-05, "loss": 0.0199, "step": 18646 }, { "epoch": 2.613454800280308, "grad_norm": 0.17403732240200043, "learning_rate": 3.9679502511360916e-05, "loss": 0.0287, "step": 18647 }, { "epoch": 2.613594954449895, "grad_norm": 0.12795758247375488, "learning_rate": 3.966515187754125e-05, "loss": 0.0201, "step": 18648 }, { "epoch": 2.6137351086194816, "grad_norm": 0.13197122514247894, "learning_rate": 3.96508012437216e-05, "loss": 0.0143, "step": 18649 }, { "epoch": 2.613875262789068, "grad_norm": 0.21328748762607574, "learning_rate": 3.9636450609901936e-05, "loss": 0.0169, "step": 18650 }, { "epoch": 2.6140154169586545, "grad_norm": 0.40437453985214233, "learning_rate": 3.962209997608227e-05, "loss": 0.0665, "step": 18651 }, { "epoch": 2.614155571128241, "grad_norm": 0.1974000632762909, "learning_rate": 3.9607749342262606e-05, "loss": 0.0148, "step": 18652 }, { "epoch": 2.6142957252978274, "grad_norm": 0.14026904106140137, "learning_rate": 3.9593398708442955e-05, "loss": 0.0178, "step": 18653 }, { "epoch": 2.6144358794674143, "grad_norm": 0.5113092660903931, "learning_rate": 3.957904807462329e-05, "loss": 0.0232, "step": 18654 }, { "epoch": 2.6145760336370008, "grad_norm": 0.02748430334031582, "learning_rate": 3.956469744080363e-05, "loss": 0.0013, "step": 18655 }, { "epoch": 2.614716187806587, "grad_norm": 0.20271700620651245, "learning_rate": 3.955034680698397e-05, "loss": 0.0124, "step": 18656 }, { "epoch": 2.6148563419761737, "grad_norm": 0.06992552429437637, "learning_rate": 3.953599617316432e-05, "loss": 0.0056, "step": 18657 }, { "epoch": 2.6149964961457606, "grad_norm": 0.31607091426849365, "learning_rate": 3.952164553934465e-05, "loss": 0.0243, "step": 18658 }, { "epoch": 2.615136650315347, "grad_norm": 0.25952139496803284, "learning_rate": 3.950729490552499e-05, "loss": 0.0455, "step": 18659 }, { "epoch": 2.6152768044849335, "grad_norm": 0.12047473341226578, "learning_rate": 3.949294427170532e-05, "loss": 0.021, "step": 18660 }, { "epoch": 2.61541695865452, "grad_norm": 0.15446336567401886, "learning_rate": 3.947859363788567e-05, "loss": 0.0146, "step": 18661 }, { "epoch": 2.6155571128241064, "grad_norm": 0.14092274010181427, "learning_rate": 3.946424300406601e-05, "loss": 0.0048, "step": 18662 }, { "epoch": 2.615697266993693, "grad_norm": 0.5055900812149048, "learning_rate": 3.944989237024635e-05, "loss": 0.0464, "step": 18663 }, { "epoch": 2.6158374211632793, "grad_norm": 0.012625631876289845, "learning_rate": 3.9435541736426685e-05, "loss": 0.0009, "step": 18664 }, { "epoch": 2.6159775753328662, "grad_norm": 0.09986929595470428, "learning_rate": 3.9421191102607034e-05, "loss": 0.0028, "step": 18665 }, { "epoch": 2.6161177295024527, "grad_norm": 0.16719025373458862, "learning_rate": 3.940684046878737e-05, "loss": 0.0228, "step": 18666 }, { "epoch": 2.616257883672039, "grad_norm": 1.1687332391738892, "learning_rate": 3.9392489834967704e-05, "loss": 0.1238, "step": 18667 }, { "epoch": 2.616398037841626, "grad_norm": 0.24654161930084229, "learning_rate": 3.937813920114805e-05, "loss": 0.0354, "step": 18668 }, { "epoch": 2.6165381920112125, "grad_norm": 2.5124268531799316, "learning_rate": 3.936378856732839e-05, "loss": 0.1255, "step": 18669 }, { "epoch": 2.616678346180799, "grad_norm": 0.21716395020484924, "learning_rate": 3.9349437933508724e-05, "loss": 0.0075, "step": 18670 }, { "epoch": 2.6168185003503854, "grad_norm": 0.2228481024503708, "learning_rate": 3.9335087299689066e-05, "loss": 0.0212, "step": 18671 }, { "epoch": 2.616958654519972, "grad_norm": 0.1531447321176529, "learning_rate": 3.932073666586941e-05, "loss": 0.0167, "step": 18672 }, { "epoch": 2.6170988086895584, "grad_norm": 0.16984587907791138, "learning_rate": 3.930638603204975e-05, "loss": 0.0126, "step": 18673 }, { "epoch": 2.617238962859145, "grad_norm": 0.18887673318386078, "learning_rate": 3.9292035398230085e-05, "loss": 0.0371, "step": 18674 }, { "epoch": 2.6173791170287317, "grad_norm": 0.4719323217868805, "learning_rate": 3.927768476441042e-05, "loss": 0.0087, "step": 18675 }, { "epoch": 2.617519271198318, "grad_norm": 0.27098533511161804, "learning_rate": 3.926333413059077e-05, "loss": 0.0621, "step": 18676 }, { "epoch": 2.6176594253679046, "grad_norm": 0.2018071413040161, "learning_rate": 3.9248983496771105e-05, "loss": 0.0248, "step": 18677 }, { "epoch": 2.617799579537491, "grad_norm": 0.15955694019794464, "learning_rate": 3.923463286295144e-05, "loss": 0.0343, "step": 18678 }, { "epoch": 2.617939733707078, "grad_norm": 0.24343745410442352, "learning_rate": 3.922028222913178e-05, "loss": 0.0619, "step": 18679 }, { "epoch": 2.6180798878766645, "grad_norm": 0.3357413411140442, "learning_rate": 3.9205931595312125e-05, "loss": 0.0876, "step": 18680 }, { "epoch": 2.618220042046251, "grad_norm": 0.18207237124443054, "learning_rate": 3.919158096149247e-05, "loss": 0.0172, "step": 18681 }, { "epoch": 2.6183601962158374, "grad_norm": 0.4779229164123535, "learning_rate": 3.91772303276728e-05, "loss": 0.0238, "step": 18682 }, { "epoch": 2.618500350385424, "grad_norm": 0.45011502504348755, "learning_rate": 3.916287969385314e-05, "loss": 0.0597, "step": 18683 }, { "epoch": 2.6186405045550103, "grad_norm": 0.2848474681377411, "learning_rate": 3.9148529060033486e-05, "loss": 0.0169, "step": 18684 }, { "epoch": 2.618780658724597, "grad_norm": 0.12286489456892014, "learning_rate": 3.913417842621382e-05, "loss": 0.0121, "step": 18685 }, { "epoch": 2.6189208128941837, "grad_norm": 0.24249719083309174, "learning_rate": 3.911982779239416e-05, "loss": 0.0357, "step": 18686 }, { "epoch": 2.61906096706377, "grad_norm": 0.12054488062858582, "learning_rate": 3.91054771585745e-05, "loss": 0.0209, "step": 18687 }, { "epoch": 2.6192011212333566, "grad_norm": 0.08669152855873108, "learning_rate": 3.909112652475484e-05, "loss": 0.0076, "step": 18688 }, { "epoch": 2.6193412754029435, "grad_norm": 0.10875049978494644, "learning_rate": 3.907677589093518e-05, "loss": 0.0105, "step": 18689 }, { "epoch": 2.61948142957253, "grad_norm": 0.11482339352369308, "learning_rate": 3.906242525711552e-05, "loss": 0.0117, "step": 18690 }, { "epoch": 2.6196215837421164, "grad_norm": 0.21303151547908783, "learning_rate": 3.9048074623295854e-05, "loss": 0.0158, "step": 18691 }, { "epoch": 2.619761737911703, "grad_norm": 0.28482940793037415, "learning_rate": 3.90337239894762e-05, "loss": 0.01, "step": 18692 }, { "epoch": 2.6199018920812893, "grad_norm": 0.07731757313013077, "learning_rate": 3.901937335565654e-05, "loss": 0.0072, "step": 18693 }, { "epoch": 2.620042046250876, "grad_norm": 0.2117357701063156, "learning_rate": 3.9005022721836873e-05, "loss": 0.0472, "step": 18694 }, { "epoch": 2.6201822004204622, "grad_norm": 0.3398694097995758, "learning_rate": 3.8990672088017216e-05, "loss": 0.0201, "step": 18695 }, { "epoch": 2.620322354590049, "grad_norm": 0.08713182061910629, "learning_rate": 3.897632145419756e-05, "loss": 0.0152, "step": 18696 }, { "epoch": 2.6204625087596356, "grad_norm": 0.18486535549163818, "learning_rate": 3.89619708203779e-05, "loss": 0.0146, "step": 18697 }, { "epoch": 2.620602662929222, "grad_norm": 0.04910740628838539, "learning_rate": 3.8947620186558235e-05, "loss": 0.0058, "step": 18698 }, { "epoch": 2.620742817098809, "grad_norm": 0.29267776012420654, "learning_rate": 3.893326955273857e-05, "loss": 0.019, "step": 18699 }, { "epoch": 2.6208829712683954, "grad_norm": 0.06982609629631042, "learning_rate": 3.891891891891892e-05, "loss": 0.0037, "step": 18700 }, { "epoch": 2.621023125437982, "grad_norm": 0.10264930874109268, "learning_rate": 3.8904568285099255e-05, "loss": 0.0144, "step": 18701 }, { "epoch": 2.6211632796075683, "grad_norm": 0.11372557282447815, "learning_rate": 3.889021765127959e-05, "loss": 0.0167, "step": 18702 }, { "epoch": 2.621303433777155, "grad_norm": 0.02014012448489666, "learning_rate": 3.887586701745994e-05, "loss": 0.0017, "step": 18703 }, { "epoch": 2.6214435879467413, "grad_norm": 0.13954277336597443, "learning_rate": 3.8861516383640274e-05, "loss": 0.0057, "step": 18704 }, { "epoch": 2.6215837421163277, "grad_norm": 0.16644546389579773, "learning_rate": 3.8847165749820616e-05, "loss": 0.0066, "step": 18705 }, { "epoch": 2.6217238962859146, "grad_norm": 0.09715601801872253, "learning_rate": 3.883281511600095e-05, "loss": 0.0036, "step": 18706 }, { "epoch": 2.621864050455501, "grad_norm": 0.1313941925764084, "learning_rate": 3.8818464482181294e-05, "loss": 0.0086, "step": 18707 }, { "epoch": 2.6220042046250875, "grad_norm": 0.09903451800346375, "learning_rate": 3.8804113848361636e-05, "loss": 0.0118, "step": 18708 }, { "epoch": 2.622144358794674, "grad_norm": 0.35303157567977905, "learning_rate": 3.878976321454197e-05, "loss": 0.0416, "step": 18709 }, { "epoch": 2.622284512964261, "grad_norm": 0.1517418920993805, "learning_rate": 3.8775412580722307e-05, "loss": 0.0069, "step": 18710 }, { "epoch": 2.6224246671338474, "grad_norm": 0.1451348066329956, "learning_rate": 3.8761061946902655e-05, "loss": 0.0192, "step": 18711 }, { "epoch": 2.622564821303434, "grad_norm": 0.517396092414856, "learning_rate": 3.874671131308299e-05, "loss": 0.0586, "step": 18712 }, { "epoch": 2.6227049754730203, "grad_norm": 0.5663255453109741, "learning_rate": 3.873236067926333e-05, "loss": 0.0961, "step": 18713 }, { "epoch": 2.6228451296426067, "grad_norm": 0.330269455909729, "learning_rate": 3.871801004544367e-05, "loss": 0.0318, "step": 18714 }, { "epoch": 2.622985283812193, "grad_norm": 1.0959969758987427, "learning_rate": 3.870365941162401e-05, "loss": 0.0655, "step": 18715 }, { "epoch": 2.62312543798178, "grad_norm": 0.17817942798137665, "learning_rate": 3.868930877780435e-05, "loss": 0.0281, "step": 18716 }, { "epoch": 2.6232655921513666, "grad_norm": 1.015141487121582, "learning_rate": 3.867495814398469e-05, "loss": 0.1967, "step": 18717 }, { "epoch": 2.623405746320953, "grad_norm": 1.9474964141845703, "learning_rate": 3.866060751016502e-05, "loss": 0.2112, "step": 18718 }, { "epoch": 2.6235459004905395, "grad_norm": 0.7807974219322205, "learning_rate": 3.864625687634537e-05, "loss": 0.0294, "step": 18719 }, { "epoch": 2.6236860546601264, "grad_norm": 2.3920202255249023, "learning_rate": 3.863190624252571e-05, "loss": 0.0499, "step": 18720 }, { "epoch": 2.623826208829713, "grad_norm": 0.2157333642244339, "learning_rate": 3.861755560870605e-05, "loss": 0.0317, "step": 18721 }, { "epoch": 2.6239663629992993, "grad_norm": 0.19947752356529236, "learning_rate": 3.8603204974886385e-05, "loss": 0.0358, "step": 18722 }, { "epoch": 2.6241065171688858, "grad_norm": 0.1280020773410797, "learning_rate": 3.858885434106673e-05, "loss": 0.0184, "step": 18723 }, { "epoch": 2.6242466713384722, "grad_norm": 0.7770083546638489, "learning_rate": 3.857450370724707e-05, "loss": 0.0459, "step": 18724 }, { "epoch": 2.6243868255080587, "grad_norm": 0.23050451278686523, "learning_rate": 3.8560153073427404e-05, "loss": 0.016, "step": 18725 }, { "epoch": 2.624526979677645, "grad_norm": 0.28075748682022095, "learning_rate": 3.854580243960774e-05, "loss": 0.0149, "step": 18726 }, { "epoch": 2.624667133847232, "grad_norm": 0.1706288903951645, "learning_rate": 3.853145180578809e-05, "loss": 0.0152, "step": 18727 }, { "epoch": 2.6248072880168185, "grad_norm": 0.2784833312034607, "learning_rate": 3.8517101171968424e-05, "loss": 0.038, "step": 18728 }, { "epoch": 2.624947442186405, "grad_norm": 0.06164579465985298, "learning_rate": 3.8502750538148766e-05, "loss": 0.006, "step": 18729 }, { "epoch": 2.6250875963559914, "grad_norm": 0.195857971906662, "learning_rate": 3.84883999043291e-05, "loss": 0.0145, "step": 18730 }, { "epoch": 2.6252277505255783, "grad_norm": 0.11202902346849442, "learning_rate": 3.8474049270509444e-05, "loss": 0.0127, "step": 18731 }, { "epoch": 2.625367904695165, "grad_norm": 0.1263100653886795, "learning_rate": 3.8459698636689786e-05, "loss": 0.0194, "step": 18732 }, { "epoch": 2.6255080588647512, "grad_norm": 0.2511688768863678, "learning_rate": 3.844534800287012e-05, "loss": 0.0348, "step": 18733 }, { "epoch": 2.6256482130343377, "grad_norm": 0.40503019094467163, "learning_rate": 3.8430997369050456e-05, "loss": 0.0438, "step": 18734 }, { "epoch": 2.625788367203924, "grad_norm": 0.3158361315727234, "learning_rate": 3.8416646735230805e-05, "loss": 0.0304, "step": 18735 }, { "epoch": 2.6259285213735106, "grad_norm": 0.06601721048355103, "learning_rate": 3.840229610141114e-05, "loss": 0.0044, "step": 18736 }, { "epoch": 2.6260686755430975, "grad_norm": 0.41442951560020447, "learning_rate": 3.838794546759148e-05, "loss": 0.0316, "step": 18737 }, { "epoch": 2.626208829712684, "grad_norm": 0.4343923032283783, "learning_rate": 3.837359483377182e-05, "loss": 0.05, "step": 18738 }, { "epoch": 2.6263489838822704, "grad_norm": 0.442106157541275, "learning_rate": 3.835924419995216e-05, "loss": 0.0406, "step": 18739 }, { "epoch": 2.626489138051857, "grad_norm": 0.192957803606987, "learning_rate": 3.83448935661325e-05, "loss": 0.0375, "step": 18740 }, { "epoch": 2.626629292221444, "grad_norm": 0.08421743661165237, "learning_rate": 3.833054293231284e-05, "loss": 0.0216, "step": 18741 }, { "epoch": 2.6267694463910303, "grad_norm": 0.4264732599258423, "learning_rate": 3.8316192298493186e-05, "loss": 0.0542, "step": 18742 }, { "epoch": 2.6269096005606167, "grad_norm": 0.25863268971443176, "learning_rate": 3.830184166467352e-05, "loss": 0.0968, "step": 18743 }, { "epoch": 2.627049754730203, "grad_norm": 0.3869832158088684, "learning_rate": 3.828749103085386e-05, "loss": 0.0299, "step": 18744 }, { "epoch": 2.6271899088997896, "grad_norm": 0.3391312062740326, "learning_rate": 3.82731403970342e-05, "loss": 0.0109, "step": 18745 }, { "epoch": 2.627330063069376, "grad_norm": 0.06701905280351639, "learning_rate": 3.825878976321454e-05, "loss": 0.0074, "step": 18746 }, { "epoch": 2.627470217238963, "grad_norm": 0.22616274654865265, "learning_rate": 3.824443912939488e-05, "loss": 0.0399, "step": 18747 }, { "epoch": 2.6276103714085495, "grad_norm": 0.06368976086378098, "learning_rate": 3.823008849557522e-05, "loss": 0.006, "step": 18748 }, { "epoch": 2.627750525578136, "grad_norm": 0.6950241327285767, "learning_rate": 3.8215737861755554e-05, "loss": 0.0348, "step": 18749 }, { "epoch": 2.6278906797477224, "grad_norm": 0.11261796206235886, "learning_rate": 3.82013872279359e-05, "loss": 0.009, "step": 18750 }, { "epoch": 2.6280308339173093, "grad_norm": 0.4362614154815674, "learning_rate": 3.818703659411624e-05, "loss": 0.0283, "step": 18751 }, { "epoch": 2.6281709880868958, "grad_norm": 0.0697779431939125, "learning_rate": 3.8172685960296574e-05, "loss": 0.0057, "step": 18752 }, { "epoch": 2.628311142256482, "grad_norm": 0.1416267305612564, "learning_rate": 3.8158335326476916e-05, "loss": 0.0106, "step": 18753 }, { "epoch": 2.6284512964260687, "grad_norm": 0.20650041103363037, "learning_rate": 3.814398469265726e-05, "loss": 0.0452, "step": 18754 }, { "epoch": 2.628591450595655, "grad_norm": 0.15064755082130432, "learning_rate": 3.812963405883759e-05, "loss": 0.0106, "step": 18755 }, { "epoch": 2.6287316047652416, "grad_norm": 0.11406257748603821, "learning_rate": 3.8115283425017935e-05, "loss": 0.0039, "step": 18756 }, { "epoch": 2.628871758934828, "grad_norm": 0.396472692489624, "learning_rate": 3.810093279119827e-05, "loss": 0.0222, "step": 18757 }, { "epoch": 2.629011913104415, "grad_norm": 1.1222608089447021, "learning_rate": 3.808658215737862e-05, "loss": 0.0595, "step": 18758 }, { "epoch": 2.6291520672740014, "grad_norm": 0.6270762085914612, "learning_rate": 3.8072231523558955e-05, "loss": 0.0762, "step": 18759 }, { "epoch": 2.629292221443588, "grad_norm": 0.13180424273014069, "learning_rate": 3.805788088973929e-05, "loss": 0.0071, "step": 18760 }, { "epoch": 2.6294323756131743, "grad_norm": 0.11908093839883804, "learning_rate": 3.804353025591963e-05, "loss": 0.0165, "step": 18761 }, { "epoch": 2.6295725297827612, "grad_norm": 0.09246785193681717, "learning_rate": 3.8029179622099974e-05, "loss": 0.0083, "step": 18762 }, { "epoch": 2.6297126839523477, "grad_norm": 0.16399389505386353, "learning_rate": 3.801482898828031e-05, "loss": 0.0075, "step": 18763 }, { "epoch": 2.629852838121934, "grad_norm": 0.1467788815498352, "learning_rate": 3.800047835446065e-05, "loss": 0.0048, "step": 18764 }, { "epoch": 2.6299929922915206, "grad_norm": 0.09992936253547668, "learning_rate": 3.798612772064099e-05, "loss": 0.0111, "step": 18765 }, { "epoch": 2.630133146461107, "grad_norm": 0.6389560103416443, "learning_rate": 3.7971777086821336e-05, "loss": 0.0302, "step": 18766 }, { "epoch": 2.6302733006306935, "grad_norm": 1.1857097148895264, "learning_rate": 3.795742645300167e-05, "loss": 0.0768, "step": 18767 }, { "epoch": 2.6304134548002804, "grad_norm": 0.26087599992752075, "learning_rate": 3.794307581918201e-05, "loss": 0.0396, "step": 18768 }, { "epoch": 2.630553608969867, "grad_norm": 1.3678200244903564, "learning_rate": 3.792872518536235e-05, "loss": 0.2118, "step": 18769 }, { "epoch": 2.6306937631394534, "grad_norm": 0.25882554054260254, "learning_rate": 3.791437455154269e-05, "loss": 0.0101, "step": 18770 }, { "epoch": 2.63083391730904, "grad_norm": 0.1423613727092743, "learning_rate": 3.7900023917723026e-05, "loss": 0.0283, "step": 18771 }, { "epoch": 2.6309740714786267, "grad_norm": 0.44040006399154663, "learning_rate": 3.788567328390337e-05, "loss": 0.0138, "step": 18772 }, { "epoch": 2.631114225648213, "grad_norm": 0.15163441002368927, "learning_rate": 3.7871322650083704e-05, "loss": 0.0107, "step": 18773 }, { "epoch": 2.6312543798177996, "grad_norm": 0.10793764889240265, "learning_rate": 3.785697201626405e-05, "loss": 0.0205, "step": 18774 }, { "epoch": 2.631394533987386, "grad_norm": 0.2825261354446411, "learning_rate": 3.784262138244439e-05, "loss": 0.0296, "step": 18775 }, { "epoch": 2.6315346881569726, "grad_norm": 0.09201250970363617, "learning_rate": 3.7828270748624723e-05, "loss": 0.0113, "step": 18776 }, { "epoch": 2.631674842326559, "grad_norm": 0.14555513858795166, "learning_rate": 3.781392011480507e-05, "loss": 0.0194, "step": 18777 }, { "epoch": 2.631814996496146, "grad_norm": 0.08499991148710251, "learning_rate": 3.779956948098541e-05, "loss": 0.0087, "step": 18778 }, { "epoch": 2.6319551506657324, "grad_norm": 0.13297484815120697, "learning_rate": 3.778521884716574e-05, "loss": 0.0136, "step": 18779 }, { "epoch": 2.632095304835319, "grad_norm": 0.1272265464067459, "learning_rate": 3.7770868213346085e-05, "loss": 0.0214, "step": 18780 }, { "epoch": 2.6322354590049053, "grad_norm": 0.16664016246795654, "learning_rate": 3.775651757952643e-05, "loss": 0.0353, "step": 18781 }, { "epoch": 2.632375613174492, "grad_norm": 0.29483941197395325, "learning_rate": 3.774216694570677e-05, "loss": 0.04, "step": 18782 }, { "epoch": 2.6325157673440787, "grad_norm": 0.25430095195770264, "learning_rate": 3.7727816311887105e-05, "loss": 0.0246, "step": 18783 }, { "epoch": 2.632655921513665, "grad_norm": 0.25163504481315613, "learning_rate": 3.771346567806744e-05, "loss": 0.0327, "step": 18784 }, { "epoch": 2.6327960756832516, "grad_norm": 0.18371450901031494, "learning_rate": 3.769911504424779e-05, "loss": 0.0095, "step": 18785 }, { "epoch": 2.632936229852838, "grad_norm": 0.16136924922466278, "learning_rate": 3.7684764410428124e-05, "loss": 0.0102, "step": 18786 }, { "epoch": 2.6330763840224245, "grad_norm": 0.06175116449594498, "learning_rate": 3.7670413776608466e-05, "loss": 0.0142, "step": 18787 }, { "epoch": 2.633216538192011, "grad_norm": 0.24549566209316254, "learning_rate": 3.76560631427888e-05, "loss": 0.0119, "step": 18788 }, { "epoch": 2.633356692361598, "grad_norm": 1.073024868965149, "learning_rate": 3.7641712508969144e-05, "loss": 0.0601, "step": 18789 }, { "epoch": 2.6334968465311843, "grad_norm": 0.22789622843265533, "learning_rate": 3.7627361875149486e-05, "loss": 0.0248, "step": 18790 }, { "epoch": 2.633637000700771, "grad_norm": 0.1780734807252884, "learning_rate": 3.761301124132982e-05, "loss": 0.0276, "step": 18791 }, { "epoch": 2.6337771548703572, "grad_norm": 0.3170514702796936, "learning_rate": 3.7598660607510157e-05, "loss": 0.0279, "step": 18792 }, { "epoch": 2.633917309039944, "grad_norm": 0.16251447796821594, "learning_rate": 3.7584309973690505e-05, "loss": 0.0117, "step": 18793 }, { "epoch": 2.6340574632095306, "grad_norm": 0.12397683411836624, "learning_rate": 3.756995933987084e-05, "loss": 0.0201, "step": 18794 }, { "epoch": 2.634197617379117, "grad_norm": 0.1930229663848877, "learning_rate": 3.755560870605118e-05, "loss": 0.0352, "step": 18795 }, { "epoch": 2.6343377715487035, "grad_norm": 0.12706762552261353, "learning_rate": 3.754125807223152e-05, "loss": 0.0079, "step": 18796 }, { "epoch": 2.63447792571829, "grad_norm": 0.22937016189098358, "learning_rate": 3.752690743841186e-05, "loss": 0.0267, "step": 18797 }, { "epoch": 2.6346180798878764, "grad_norm": 0.2612133324146271, "learning_rate": 3.75125568045922e-05, "loss": 0.0341, "step": 18798 }, { "epoch": 2.6347582340574633, "grad_norm": 0.49783846735954285, "learning_rate": 3.749820617077254e-05, "loss": 0.0503, "step": 18799 }, { "epoch": 2.63489838822705, "grad_norm": 0.11377884447574615, "learning_rate": 3.748385553695288e-05, "loss": 0.0123, "step": 18800 }, { "epoch": 2.6350385423966363, "grad_norm": 0.2304898053407669, "learning_rate": 3.7469504903133215e-05, "loss": 0.0338, "step": 18801 }, { "epoch": 2.6351786965662227, "grad_norm": 0.1135595515370369, "learning_rate": 3.745515426931356e-05, "loss": 0.0177, "step": 18802 }, { "epoch": 2.6353188507358096, "grad_norm": 0.28614211082458496, "learning_rate": 3.74408036354939e-05, "loss": 0.0416, "step": 18803 }, { "epoch": 2.635459004905396, "grad_norm": 0.13080421090126038, "learning_rate": 3.7426453001674235e-05, "loss": 0.0049, "step": 18804 }, { "epoch": 2.6355991590749825, "grad_norm": 0.2835003733634949, "learning_rate": 3.741210236785458e-05, "loss": 0.0254, "step": 18805 }, { "epoch": 2.635739313244569, "grad_norm": 0.2274160236120224, "learning_rate": 3.739775173403492e-05, "loss": 0.0224, "step": 18806 }, { "epoch": 2.6358794674141555, "grad_norm": 0.2237512171268463, "learning_rate": 3.7383401100215254e-05, "loss": 0.0221, "step": 18807 }, { "epoch": 2.636019621583742, "grad_norm": 0.07804173231124878, "learning_rate": 3.7369050466395596e-05, "loss": 0.0044, "step": 18808 }, { "epoch": 2.636159775753329, "grad_norm": 0.7695476412773132, "learning_rate": 3.735469983257593e-05, "loss": 0.045, "step": 18809 }, { "epoch": 2.6362999299229153, "grad_norm": 0.23624801635742188, "learning_rate": 3.7340349198756274e-05, "loss": 0.0135, "step": 18810 }, { "epoch": 2.6364400840925017, "grad_norm": 0.5543002486228943, "learning_rate": 3.7325998564936616e-05, "loss": 0.0667, "step": 18811 }, { "epoch": 2.636580238262088, "grad_norm": 0.05600513517856598, "learning_rate": 3.731164793111695e-05, "loss": 0.0031, "step": 18812 }, { "epoch": 2.636720392431675, "grad_norm": 0.15577149391174316, "learning_rate": 3.7297297297297293e-05, "loss": 0.0285, "step": 18813 }, { "epoch": 2.6368605466012616, "grad_norm": 0.23244145512580872, "learning_rate": 3.7282946663477636e-05, "loss": 0.0474, "step": 18814 }, { "epoch": 2.637000700770848, "grad_norm": 0.6145548224449158, "learning_rate": 3.726859602965798e-05, "loss": 0.1077, "step": 18815 }, { "epoch": 2.6371408549404345, "grad_norm": 0.5801859498023987, "learning_rate": 3.725424539583831e-05, "loss": 0.0227, "step": 18816 }, { "epoch": 2.637281009110021, "grad_norm": 0.6869896650314331, "learning_rate": 3.7239894762018655e-05, "loss": 0.055, "step": 18817 }, { "epoch": 2.6374211632796074, "grad_norm": 1.5788154602050781, "learning_rate": 3.722554412819899e-05, "loss": 0.076, "step": 18818 }, { "epoch": 2.637561317449194, "grad_norm": 1.0737547874450684, "learning_rate": 3.721119349437933e-05, "loss": 0.1191, "step": 18819 }, { "epoch": 2.6377014716187808, "grad_norm": 1.3908734321594238, "learning_rate": 3.719684286055967e-05, "loss": 0.5036, "step": 18820 }, { "epoch": 2.6378416257883672, "grad_norm": 0.34114110469818115, "learning_rate": 3.718249222674001e-05, "loss": 0.0173, "step": 18821 }, { "epoch": 2.6379817799579537, "grad_norm": 0.15989378094673157, "learning_rate": 3.716814159292035e-05, "loss": 0.0214, "step": 18822 }, { "epoch": 2.63812193412754, "grad_norm": 0.2138173133134842, "learning_rate": 3.7153790959100694e-05, "loss": 0.0193, "step": 18823 }, { "epoch": 2.638262088297127, "grad_norm": 0.10950767248868942, "learning_rate": 3.713944032528103e-05, "loss": 0.0073, "step": 18824 }, { "epoch": 2.6384022424667135, "grad_norm": 0.09944452345371246, "learning_rate": 3.712508969146137e-05, "loss": 0.0087, "step": 18825 }, { "epoch": 2.6385423966363, "grad_norm": 0.09011691808700562, "learning_rate": 3.711073905764171e-05, "loss": 0.0089, "step": 18826 }, { "epoch": 2.6386825508058864, "grad_norm": 0.19053417444229126, "learning_rate": 3.709638842382205e-05, "loss": 0.0316, "step": 18827 }, { "epoch": 2.638822704975473, "grad_norm": 0.27169933915138245, "learning_rate": 3.7082037790002385e-05, "loss": 0.0382, "step": 18828 }, { "epoch": 2.6389628591450593, "grad_norm": 0.09622090309858322, "learning_rate": 3.706768715618273e-05, "loss": 0.0122, "step": 18829 }, { "epoch": 2.6391030133146463, "grad_norm": 0.12218036502599716, "learning_rate": 3.705333652236307e-05, "loss": 0.0116, "step": 18830 }, { "epoch": 2.6392431674842327, "grad_norm": 0.2866056561470032, "learning_rate": 3.703898588854341e-05, "loss": 0.0121, "step": 18831 }, { "epoch": 2.639383321653819, "grad_norm": 0.22964531183242798, "learning_rate": 3.7024635254723746e-05, "loss": 0.0377, "step": 18832 }, { "epoch": 2.6395234758234056, "grad_norm": 0.17243830859661102, "learning_rate": 3.701028462090409e-05, "loss": 0.0237, "step": 18833 }, { "epoch": 2.6396636299929925, "grad_norm": 0.37671300768852234, "learning_rate": 3.6995933987084424e-05, "loss": 0.0224, "step": 18834 }, { "epoch": 2.639803784162579, "grad_norm": 0.09349383413791656, "learning_rate": 3.6981583353264766e-05, "loss": 0.0097, "step": 18835 }, { "epoch": 2.6399439383321655, "grad_norm": 0.10590095818042755, "learning_rate": 3.69672327194451e-05, "loss": 0.0064, "step": 18836 }, { "epoch": 2.640084092501752, "grad_norm": 0.10722266882658005, "learning_rate": 3.695288208562544e-05, "loss": 0.0092, "step": 18837 }, { "epoch": 2.6402242466713384, "grad_norm": 0.14299264550209045, "learning_rate": 3.6938531451805785e-05, "loss": 0.0223, "step": 18838 }, { "epoch": 2.640364400840925, "grad_norm": 0.12215137481689453, "learning_rate": 3.692418081798613e-05, "loss": 0.0136, "step": 18839 }, { "epoch": 2.6405045550105113, "grad_norm": 0.06626320630311966, "learning_rate": 3.690983018416646e-05, "loss": 0.0076, "step": 18840 }, { "epoch": 2.640644709180098, "grad_norm": 0.6944338083267212, "learning_rate": 3.6895479550346805e-05, "loss": 0.0332, "step": 18841 }, { "epoch": 2.6407848633496847, "grad_norm": 0.15840215981006622, "learning_rate": 3.688112891652714e-05, "loss": 0.018, "step": 18842 }, { "epoch": 2.640925017519271, "grad_norm": 0.2623918056488037, "learning_rate": 3.686677828270748e-05, "loss": 0.0415, "step": 18843 }, { "epoch": 2.641065171688858, "grad_norm": 0.0757504254579544, "learning_rate": 3.685242764888782e-05, "loss": 0.0146, "step": 18844 }, { "epoch": 2.6412053258584445, "grad_norm": 0.31935518980026245, "learning_rate": 3.683807701506816e-05, "loss": 0.0379, "step": 18845 }, { "epoch": 2.641345480028031, "grad_norm": 0.25937655568122864, "learning_rate": 3.68237263812485e-05, "loss": 0.0172, "step": 18846 }, { "epoch": 2.6414856341976174, "grad_norm": 0.28963372111320496, "learning_rate": 3.6809375747428844e-05, "loss": 0.0318, "step": 18847 }, { "epoch": 2.641625788367204, "grad_norm": 0.07449734210968018, "learning_rate": 3.679502511360918e-05, "loss": 0.0048, "step": 18848 }, { "epoch": 2.6417659425367903, "grad_norm": 0.08503377437591553, "learning_rate": 3.678067447978952e-05, "loss": 0.0249, "step": 18849 }, { "epoch": 2.6419060967063768, "grad_norm": 0.08904992789030075, "learning_rate": 3.6766323845969864e-05, "loss": 0.0075, "step": 18850 }, { "epoch": 2.6420462508759637, "grad_norm": 0.24954663217067719, "learning_rate": 3.67519732121502e-05, "loss": 0.0104, "step": 18851 }, { "epoch": 2.64218640504555, "grad_norm": 0.44495853781700134, "learning_rate": 3.673762257833054e-05, "loss": 0.0364, "step": 18852 }, { "epoch": 2.6423265592151366, "grad_norm": 0.17904818058013916, "learning_rate": 3.6723271944510876e-05, "loss": 0.0063, "step": 18853 }, { "epoch": 2.642466713384723, "grad_norm": 0.10657403618097305, "learning_rate": 3.670892131069122e-05, "loss": 0.0073, "step": 18854 }, { "epoch": 2.64260686755431, "grad_norm": 0.21306875348091125, "learning_rate": 3.669457067687156e-05, "loss": 0.0294, "step": 18855 }, { "epoch": 2.6427470217238964, "grad_norm": 0.19244778156280518, "learning_rate": 3.66802200430519e-05, "loss": 0.0608, "step": 18856 }, { "epoch": 2.642887175893483, "grad_norm": 0.26421770453453064, "learning_rate": 3.666586940923224e-05, "loss": 0.0353, "step": 18857 }, { "epoch": 2.6430273300630693, "grad_norm": 0.4158807396888733, "learning_rate": 3.665151877541258e-05, "loss": 0.0814, "step": 18858 }, { "epoch": 2.643167484232656, "grad_norm": 0.15251097083091736, "learning_rate": 3.6637168141592915e-05, "loss": 0.0068, "step": 18859 }, { "epoch": 2.6433076384022423, "grad_norm": 0.2577818036079407, "learning_rate": 3.662281750777326e-05, "loss": 0.033, "step": 18860 }, { "epoch": 2.643447792571829, "grad_norm": 0.10909423232078552, "learning_rate": 3.660846687395359e-05, "loss": 0.0065, "step": 18861 }, { "epoch": 2.6435879467414156, "grad_norm": 0.05739521607756615, "learning_rate": 3.6594116240133935e-05, "loss": 0.0049, "step": 18862 }, { "epoch": 2.643728100911002, "grad_norm": 0.41375911235809326, "learning_rate": 3.657976560631428e-05, "loss": 0.0592, "step": 18863 }, { "epoch": 2.6438682550805885, "grad_norm": 0.49567365646362305, "learning_rate": 3.656541497249462e-05, "loss": 0.0492, "step": 18864 }, { "epoch": 2.6440084092501754, "grad_norm": 0.03446665406227112, "learning_rate": 3.6551064338674955e-05, "loss": 0.0037, "step": 18865 }, { "epoch": 2.644148563419762, "grad_norm": 0.16851094365119934, "learning_rate": 3.65367137048553e-05, "loss": 0.0263, "step": 18866 }, { "epoch": 2.6442887175893484, "grad_norm": 0.11972197145223618, "learning_rate": 3.652236307103563e-05, "loss": 0.0022, "step": 18867 }, { "epoch": 2.644428871758935, "grad_norm": 0.4065820276737213, "learning_rate": 3.6508012437215974e-05, "loss": 0.0624, "step": 18868 }, { "epoch": 2.6445690259285213, "grad_norm": 0.9345421195030212, "learning_rate": 3.649366180339631e-05, "loss": 0.1679, "step": 18869 }, { "epoch": 2.6447091800981077, "grad_norm": 0.2692727744579315, "learning_rate": 3.647931116957665e-05, "loss": 0.0184, "step": 18870 }, { "epoch": 2.644849334267694, "grad_norm": 0.1302371621131897, "learning_rate": 3.6464960535756994e-05, "loss": 0.0375, "step": 18871 }, { "epoch": 2.644989488437281, "grad_norm": 0.12588495016098022, "learning_rate": 3.6450609901937336e-05, "loss": 0.0141, "step": 18872 }, { "epoch": 2.6451296426068676, "grad_norm": 0.11748453229665756, "learning_rate": 3.643625926811767e-05, "loss": 0.0147, "step": 18873 }, { "epoch": 2.645269796776454, "grad_norm": 0.3150537610054016, "learning_rate": 3.642190863429801e-05, "loss": 0.0471, "step": 18874 }, { "epoch": 2.6454099509460405, "grad_norm": 0.18696613609790802, "learning_rate": 3.640755800047835e-05, "loss": 0.0531, "step": 18875 }, { "epoch": 2.6455501051156274, "grad_norm": 0.21966686844825745, "learning_rate": 3.639320736665869e-05, "loss": 0.0565, "step": 18876 }, { "epoch": 2.645690259285214, "grad_norm": 0.13831181824207306, "learning_rate": 3.6378856732839026e-05, "loss": 0.0246, "step": 18877 }, { "epoch": 2.6458304134548003, "grad_norm": 0.19923293590545654, "learning_rate": 3.636450609901937e-05, "loss": 0.0288, "step": 18878 }, { "epoch": 2.6459705676243868, "grad_norm": 0.2263757288455963, "learning_rate": 3.635015546519971e-05, "loss": 0.016, "step": 18879 }, { "epoch": 2.646110721793973, "grad_norm": 0.21622996032238007, "learning_rate": 3.633580483138005e-05, "loss": 0.0273, "step": 18880 }, { "epoch": 2.6462508759635597, "grad_norm": 0.12332558631896973, "learning_rate": 3.632145419756039e-05, "loss": 0.0155, "step": 18881 }, { "epoch": 2.6463910301331466, "grad_norm": 0.32262420654296875, "learning_rate": 3.630710356374073e-05, "loss": 0.0402, "step": 18882 }, { "epoch": 2.646531184302733, "grad_norm": 0.24925532937049866, "learning_rate": 3.6292752929921065e-05, "loss": 0.0213, "step": 18883 }, { "epoch": 2.6466713384723195, "grad_norm": 0.09134061634540558, "learning_rate": 3.627840229610141e-05, "loss": 0.0158, "step": 18884 }, { "epoch": 2.646811492641906, "grad_norm": 0.3863559663295746, "learning_rate": 3.626405166228174e-05, "loss": 0.0115, "step": 18885 }, { "epoch": 2.646951646811493, "grad_norm": 0.3700583577156067, "learning_rate": 3.6249701028462085e-05, "loss": 0.0163, "step": 18886 }, { "epoch": 2.6470918009810793, "grad_norm": 0.13881954550743103, "learning_rate": 3.623535039464243e-05, "loss": 0.0134, "step": 18887 }, { "epoch": 2.647231955150666, "grad_norm": 0.26932060718536377, "learning_rate": 3.622099976082277e-05, "loss": 0.0228, "step": 18888 }, { "epoch": 2.6473721093202522, "grad_norm": 0.4892718493938446, "learning_rate": 3.620664912700311e-05, "loss": 0.0533, "step": 18889 }, { "epoch": 2.6475122634898387, "grad_norm": 0.09212004393339157, "learning_rate": 3.6192298493183446e-05, "loss": 0.0074, "step": 18890 }, { "epoch": 2.647652417659425, "grad_norm": 0.24756762385368347, "learning_rate": 3.617794785936379e-05, "loss": 0.0409, "step": 18891 }, { "epoch": 2.647792571829012, "grad_norm": 0.3749455511569977, "learning_rate": 3.6163597225544124e-05, "loss": 0.0596, "step": 18892 }, { "epoch": 2.6479327259985985, "grad_norm": 0.08964864909648895, "learning_rate": 3.6149246591724466e-05, "loss": 0.0133, "step": 18893 }, { "epoch": 2.648072880168185, "grad_norm": 0.09138436615467072, "learning_rate": 3.61348959579048e-05, "loss": 0.0145, "step": 18894 }, { "epoch": 2.6482130343377714, "grad_norm": 0.18257910013198853, "learning_rate": 3.6120545324085143e-05, "loss": 0.0803, "step": 18895 }, { "epoch": 2.6483531885073583, "grad_norm": 0.12781164050102234, "learning_rate": 3.6106194690265486e-05, "loss": 0.0103, "step": 18896 }, { "epoch": 2.648493342676945, "grad_norm": 0.19195881485939026, "learning_rate": 3.609184405644583e-05, "loss": 0.0345, "step": 18897 }, { "epoch": 2.6486334968465313, "grad_norm": 0.14529934525489807, "learning_rate": 3.607749342262616e-05, "loss": 0.0095, "step": 18898 }, { "epoch": 2.6487736510161177, "grad_norm": 0.2866417467594147, "learning_rate": 3.6063142788806505e-05, "loss": 0.0187, "step": 18899 }, { "epoch": 2.648913805185704, "grad_norm": 0.23872049152851105, "learning_rate": 3.604879215498684e-05, "loss": 0.036, "step": 18900 }, { "epoch": 2.6490539593552906, "grad_norm": 0.057637810707092285, "learning_rate": 3.603444152116718e-05, "loss": 0.0016, "step": 18901 }, { "epoch": 2.649194113524877, "grad_norm": 0.2614864110946655, "learning_rate": 3.602009088734752e-05, "loss": 0.0321, "step": 18902 }, { "epoch": 2.649334267694464, "grad_norm": 1.463587999343872, "learning_rate": 3.600574025352786e-05, "loss": 0.0429, "step": 18903 }, { "epoch": 2.6494744218640505, "grad_norm": 0.12550930678844452, "learning_rate": 3.59913896197082e-05, "loss": 0.0084, "step": 18904 }, { "epoch": 2.649614576033637, "grad_norm": 0.18181069195270538, "learning_rate": 3.5977038985888544e-05, "loss": 0.0258, "step": 18905 }, { "epoch": 2.6497547302032234, "grad_norm": 0.28609520196914673, "learning_rate": 3.596268835206888e-05, "loss": 0.0352, "step": 18906 }, { "epoch": 2.6498948843728103, "grad_norm": 0.1043582409620285, "learning_rate": 3.594833771824922e-05, "loss": 0.0095, "step": 18907 }, { "epoch": 2.6500350385423967, "grad_norm": 0.727829098701477, "learning_rate": 3.593398708442956e-05, "loss": 0.0274, "step": 18908 }, { "epoch": 2.650175192711983, "grad_norm": 0.038930390030145645, "learning_rate": 3.59196364506099e-05, "loss": 0.0022, "step": 18909 }, { "epoch": 2.6503153468815697, "grad_norm": 1.0560684204101562, "learning_rate": 3.5905285816790234e-05, "loss": 0.0451, "step": 18910 }, { "epoch": 2.650455501051156, "grad_norm": 0.2329055815935135, "learning_rate": 3.5890935182970577e-05, "loss": 0.0215, "step": 18911 }, { "epoch": 2.6505956552207426, "grad_norm": 0.2704979479312897, "learning_rate": 3.587658454915092e-05, "loss": 0.0506, "step": 18912 }, { "epoch": 2.6507358093903295, "grad_norm": 0.016580017283558846, "learning_rate": 3.586223391533126e-05, "loss": 0.0013, "step": 18913 }, { "epoch": 2.650875963559916, "grad_norm": 0.2812025249004364, "learning_rate": 3.5847883281511596e-05, "loss": 0.031, "step": 18914 }, { "epoch": 2.6510161177295024, "grad_norm": 0.1254044771194458, "learning_rate": 3.583353264769194e-05, "loss": 0.0068, "step": 18915 }, { "epoch": 2.651156271899089, "grad_norm": 0.8288163542747498, "learning_rate": 3.5819182013872274e-05, "loss": 0.0716, "step": 18916 }, { "epoch": 2.6512964260686758, "grad_norm": 0.1282484382390976, "learning_rate": 3.5804831380052616e-05, "loss": 0.0208, "step": 18917 }, { "epoch": 2.6514365802382622, "grad_norm": 0.585984468460083, "learning_rate": 3.579048074623295e-05, "loss": 0.0235, "step": 18918 }, { "epoch": 2.6515767344078487, "grad_norm": 1.3142791986465454, "learning_rate": 3.577613011241329e-05, "loss": 0.0439, "step": 18919 }, { "epoch": 2.651716888577435, "grad_norm": 1.7962381839752197, "learning_rate": 3.5761779478593635e-05, "loss": 0.1308, "step": 18920 }, { "epoch": 2.6518570427470216, "grad_norm": 0.0776958018541336, "learning_rate": 3.574742884477398e-05, "loss": 0.0034, "step": 18921 }, { "epoch": 2.651997196916608, "grad_norm": 0.1606481671333313, "learning_rate": 3.573307821095431e-05, "loss": 0.015, "step": 18922 }, { "epoch": 2.652137351086195, "grad_norm": 0.34216776490211487, "learning_rate": 3.5718727577134655e-05, "loss": 0.0096, "step": 18923 }, { "epoch": 2.6522775052557814, "grad_norm": 0.14637905359268188, "learning_rate": 3.570437694331499e-05, "loss": 0.0392, "step": 18924 }, { "epoch": 2.652417659425368, "grad_norm": 0.1564028412103653, "learning_rate": 3.569002630949533e-05, "loss": 0.0086, "step": 18925 }, { "epoch": 2.6525578135949544, "grad_norm": 0.07537053525447845, "learning_rate": 3.5675675675675674e-05, "loss": 0.0125, "step": 18926 }, { "epoch": 2.6526979677645413, "grad_norm": 0.16018956899642944, "learning_rate": 3.566132504185601e-05, "loss": 0.0168, "step": 18927 }, { "epoch": 2.6528381219341277, "grad_norm": 0.09596879035234451, "learning_rate": 3.564697440803635e-05, "loss": 0.0085, "step": 18928 }, { "epoch": 2.652978276103714, "grad_norm": 0.049494411796331406, "learning_rate": 3.5632623774216694e-05, "loss": 0.007, "step": 18929 }, { "epoch": 2.6531184302733006, "grad_norm": 0.592934787273407, "learning_rate": 3.5618273140397036e-05, "loss": 0.0602, "step": 18930 }, { "epoch": 2.653258584442887, "grad_norm": 0.10772743076086044, "learning_rate": 3.560392250657737e-05, "loss": 0.0049, "step": 18931 }, { "epoch": 2.6533987386124736, "grad_norm": 0.2497989684343338, "learning_rate": 3.5589571872757714e-05, "loss": 0.0705, "step": 18932 }, { "epoch": 2.65353889278206, "grad_norm": 0.15350478887557983, "learning_rate": 3.557522123893805e-05, "loss": 0.013, "step": 18933 }, { "epoch": 2.653679046951647, "grad_norm": 0.1272919774055481, "learning_rate": 3.556087060511839e-05, "loss": 0.0194, "step": 18934 }, { "epoch": 2.6538192011212334, "grad_norm": 0.2402794361114502, "learning_rate": 3.5546519971298726e-05, "loss": 0.0426, "step": 18935 }, { "epoch": 2.65395935529082, "grad_norm": 0.24647396802902222, "learning_rate": 3.553216933747907e-05, "loss": 0.0515, "step": 18936 }, { "epoch": 2.6540995094604063, "grad_norm": 0.06253121793270111, "learning_rate": 3.551781870365941e-05, "loss": 0.0162, "step": 18937 }, { "epoch": 2.654239663629993, "grad_norm": 0.20771507918834686, "learning_rate": 3.550346806983975e-05, "loss": 0.0326, "step": 18938 }, { "epoch": 2.6543798177995797, "grad_norm": 0.16264566779136658, "learning_rate": 3.548911743602009e-05, "loss": 0.029, "step": 18939 }, { "epoch": 2.654519971969166, "grad_norm": 0.19247594475746155, "learning_rate": 3.547476680220043e-05, "loss": 0.0081, "step": 18940 }, { "epoch": 2.6546601261387526, "grad_norm": 0.17784127593040466, "learning_rate": 3.5460416168380765e-05, "loss": 0.0198, "step": 18941 }, { "epoch": 2.654800280308339, "grad_norm": 0.17636115849018097, "learning_rate": 3.544606553456111e-05, "loss": 0.0379, "step": 18942 }, { "epoch": 2.6549404344779255, "grad_norm": 0.09767672419548035, "learning_rate": 3.543171490074144e-05, "loss": 0.0221, "step": 18943 }, { "epoch": 2.6550805886475124, "grad_norm": 0.25687310099601746, "learning_rate": 3.5417364266921785e-05, "loss": 0.0299, "step": 18944 }, { "epoch": 2.655220742817099, "grad_norm": 0.13880766928195953, "learning_rate": 3.540301363310213e-05, "loss": 0.0227, "step": 18945 }, { "epoch": 2.6553608969866853, "grad_norm": 0.12838076055049896, "learning_rate": 3.538866299928247e-05, "loss": 0.0248, "step": 18946 }, { "epoch": 2.6555010511562718, "grad_norm": 0.542231023311615, "learning_rate": 3.5374312365462805e-05, "loss": 0.0443, "step": 18947 }, { "epoch": 2.6556412053258587, "grad_norm": 0.3959196209907532, "learning_rate": 3.535996173164315e-05, "loss": 0.0409, "step": 18948 }, { "epoch": 2.655781359495445, "grad_norm": 0.1578100323677063, "learning_rate": 3.534561109782348e-05, "loss": 0.0273, "step": 18949 }, { "epoch": 2.6559215136650316, "grad_norm": 0.20516900718212128, "learning_rate": 3.5331260464003824e-05, "loss": 0.0151, "step": 18950 }, { "epoch": 2.656061667834618, "grad_norm": 0.3041323125362396, "learning_rate": 3.531690983018416e-05, "loss": 0.046, "step": 18951 }, { "epoch": 2.6562018220042045, "grad_norm": 0.04740258306264877, "learning_rate": 3.53025591963645e-05, "loss": 0.0037, "step": 18952 }, { "epoch": 2.656341976173791, "grad_norm": 0.15366479754447937, "learning_rate": 3.5288208562544844e-05, "loss": 0.0269, "step": 18953 }, { "epoch": 2.656482130343378, "grad_norm": 0.07111267745494843, "learning_rate": 3.5273857928725186e-05, "loss": 0.0037, "step": 18954 }, { "epoch": 2.6566222845129643, "grad_norm": 0.3880583941936493, "learning_rate": 3.525950729490552e-05, "loss": 0.026, "step": 18955 }, { "epoch": 2.656762438682551, "grad_norm": 0.3831045925617218, "learning_rate": 3.524515666108586e-05, "loss": 0.068, "step": 18956 }, { "epoch": 2.6569025928521373, "grad_norm": 0.14486099779605865, "learning_rate": 3.52308060272662e-05, "loss": 0.008, "step": 18957 }, { "epoch": 2.657042747021724, "grad_norm": 0.14572961628437042, "learning_rate": 3.521645539344654e-05, "loss": 0.0173, "step": 18958 }, { "epoch": 2.6571829011913106, "grad_norm": 0.19364328682422638, "learning_rate": 3.5202104759626876e-05, "loss": 0.0154, "step": 18959 }, { "epoch": 2.657323055360897, "grad_norm": 0.5254031419754028, "learning_rate": 3.518775412580722e-05, "loss": 0.0923, "step": 18960 }, { "epoch": 2.6574632095304835, "grad_norm": 0.39478737115859985, "learning_rate": 3.517340349198756e-05, "loss": 0.016, "step": 18961 }, { "epoch": 2.65760336370007, "grad_norm": 0.3078412413597107, "learning_rate": 3.51590528581679e-05, "loss": 0.0094, "step": 18962 }, { "epoch": 2.6577435178696565, "grad_norm": 0.14455798268318176, "learning_rate": 3.514470222434824e-05, "loss": 0.0263, "step": 18963 }, { "epoch": 2.657883672039243, "grad_norm": 0.12683892250061035, "learning_rate": 3.513035159052858e-05, "loss": 0.0291, "step": 18964 }, { "epoch": 2.65802382620883, "grad_norm": 0.14042271673679352, "learning_rate": 3.511600095670892e-05, "loss": 0.007, "step": 18965 }, { "epoch": 2.6581639803784163, "grad_norm": 0.3700561821460724, "learning_rate": 3.510165032288926e-05, "loss": 0.0184, "step": 18966 }, { "epoch": 2.6583041345480027, "grad_norm": 1.007859706878662, "learning_rate": 3.50872996890696e-05, "loss": 0.0415, "step": 18967 }, { "epoch": 2.658444288717589, "grad_norm": 1.218210220336914, "learning_rate": 3.5072949055249935e-05, "loss": 0.0565, "step": 18968 }, { "epoch": 2.658584442887176, "grad_norm": 0.162925586104393, "learning_rate": 3.505859842143028e-05, "loss": 0.0196, "step": 18969 }, { "epoch": 2.6587245970567626, "grad_norm": 1.554841160774231, "learning_rate": 3.504424778761062e-05, "loss": 0.0312, "step": 18970 }, { "epoch": 2.658864751226349, "grad_norm": 0.19761858880519867, "learning_rate": 3.5029897153790954e-05, "loss": 0.0311, "step": 18971 }, { "epoch": 2.6590049053959355, "grad_norm": 0.14827671647071838, "learning_rate": 3.5015546519971296e-05, "loss": 0.0048, "step": 18972 }, { "epoch": 2.659145059565522, "grad_norm": 0.49627920985221863, "learning_rate": 3.500119588615164e-05, "loss": 0.0123, "step": 18973 }, { "epoch": 2.6592852137351084, "grad_norm": 0.10764946043491364, "learning_rate": 3.4986845252331974e-05, "loss": 0.0227, "step": 18974 }, { "epoch": 2.6594253679046953, "grad_norm": 0.28822797536849976, "learning_rate": 3.4972494618512316e-05, "loss": 0.0395, "step": 18975 }, { "epoch": 2.6595655220742818, "grad_norm": 0.9102768898010254, "learning_rate": 3.495814398469265e-05, "loss": 0.0145, "step": 18976 }, { "epoch": 2.659705676243868, "grad_norm": 0.42475634813308716, "learning_rate": 3.494379335087299e-05, "loss": 0.0128, "step": 18977 }, { "epoch": 2.6598458304134547, "grad_norm": 0.16807851195335388, "learning_rate": 3.4929442717053335e-05, "loss": 0.0225, "step": 18978 }, { "epoch": 2.6599859845830416, "grad_norm": 0.1106436625123024, "learning_rate": 3.491509208323367e-05, "loss": 0.0176, "step": 18979 }, { "epoch": 2.660126138752628, "grad_norm": 0.3467520475387573, "learning_rate": 3.490074144941401e-05, "loss": 0.0243, "step": 18980 }, { "epoch": 2.6602662929222145, "grad_norm": 0.24657103419303894, "learning_rate": 3.4886390815594355e-05, "loss": 0.0474, "step": 18981 }, { "epoch": 2.660406447091801, "grad_norm": 0.9045045971870422, "learning_rate": 3.487204018177469e-05, "loss": 0.0224, "step": 18982 }, { "epoch": 2.6605466012613874, "grad_norm": 0.3485991656780243, "learning_rate": 3.485768954795503e-05, "loss": 0.0605, "step": 18983 }, { "epoch": 2.660686755430974, "grad_norm": 0.1156155914068222, "learning_rate": 3.484333891413537e-05, "loss": 0.0116, "step": 18984 }, { "epoch": 2.6608269096005603, "grad_norm": 0.06602780520915985, "learning_rate": 3.482898828031571e-05, "loss": 0.006, "step": 18985 }, { "epoch": 2.6609670637701472, "grad_norm": 0.18632598221302032, "learning_rate": 3.481463764649605e-05, "loss": 0.0201, "step": 18986 }, { "epoch": 2.6611072179397337, "grad_norm": 0.779121994972229, "learning_rate": 3.4800287012676394e-05, "loss": 0.0585, "step": 18987 }, { "epoch": 2.66124737210932, "grad_norm": 0.39528995752334595, "learning_rate": 3.478593637885673e-05, "loss": 0.0339, "step": 18988 }, { "epoch": 2.661387526278907, "grad_norm": 0.23643599450588226, "learning_rate": 3.477158574503707e-05, "loss": 0.0309, "step": 18989 }, { "epoch": 2.6615276804484935, "grad_norm": 0.31903865933418274, "learning_rate": 3.475723511121741e-05, "loss": 0.0072, "step": 18990 }, { "epoch": 2.66166783461808, "grad_norm": 0.21672050654888153, "learning_rate": 3.474288447739775e-05, "loss": 0.0276, "step": 18991 }, { "epoch": 2.6618079887876664, "grad_norm": 0.1100042313337326, "learning_rate": 3.4728533843578084e-05, "loss": 0.0167, "step": 18992 }, { "epoch": 2.661948142957253, "grad_norm": 0.39871272444725037, "learning_rate": 3.4714183209758427e-05, "loss": 0.1247, "step": 18993 }, { "epoch": 2.6620882971268394, "grad_norm": 0.17371678352355957, "learning_rate": 3.469983257593877e-05, "loss": 0.0036, "step": 18994 }, { "epoch": 2.662228451296426, "grad_norm": 0.15228840708732605, "learning_rate": 3.468548194211911e-05, "loss": 0.0129, "step": 18995 }, { "epoch": 2.6623686054660127, "grad_norm": 0.04049418866634369, "learning_rate": 3.4671131308299446e-05, "loss": 0.0048, "step": 18996 }, { "epoch": 2.662508759635599, "grad_norm": 0.14255087077617645, "learning_rate": 3.465678067447979e-05, "loss": 0.0247, "step": 18997 }, { "epoch": 2.6626489138051856, "grad_norm": 0.41856247186660767, "learning_rate": 3.4642430040660124e-05, "loss": 0.0308, "step": 18998 }, { "epoch": 2.662789067974772, "grad_norm": 0.04924283176660538, "learning_rate": 3.4628079406840466e-05, "loss": 0.004, "step": 18999 }, { "epoch": 2.662929222144359, "grad_norm": 0.1426495611667633, "learning_rate": 3.461372877302081e-05, "loss": 0.0105, "step": 19000 }, { "epoch": 2.6630693763139455, "grad_norm": 0.2284073829650879, "learning_rate": 3.459937813920114e-05, "loss": 0.0187, "step": 19001 }, { "epoch": 2.663209530483532, "grad_norm": 0.24322715401649475, "learning_rate": 3.4585027505381485e-05, "loss": 0.0265, "step": 19002 }, { "epoch": 2.6633496846531184, "grad_norm": 0.2997440695762634, "learning_rate": 3.457067687156183e-05, "loss": 0.036, "step": 19003 }, { "epoch": 2.663489838822705, "grad_norm": 0.26688557863235474, "learning_rate": 3.455632623774216e-05, "loss": 0.0265, "step": 19004 }, { "epoch": 2.6636299929922913, "grad_norm": 0.30681562423706055, "learning_rate": 3.4541975603922505e-05, "loss": 0.0183, "step": 19005 }, { "epoch": 2.663770147161878, "grad_norm": 0.29359689354896545, "learning_rate": 3.452762497010285e-05, "loss": 0.011, "step": 19006 }, { "epoch": 2.6639103013314647, "grad_norm": 0.13415510952472687, "learning_rate": 3.451327433628318e-05, "loss": 0.0162, "step": 19007 }, { "epoch": 2.664050455501051, "grad_norm": 0.20264385640621185, "learning_rate": 3.4498923702463524e-05, "loss": 0.0212, "step": 19008 }, { "epoch": 2.6641906096706376, "grad_norm": 0.23709861934185028, "learning_rate": 3.448457306864386e-05, "loss": 0.0202, "step": 19009 }, { "epoch": 2.6643307638402245, "grad_norm": 0.18399900197982788, "learning_rate": 3.44702224348242e-05, "loss": 0.0311, "step": 19010 }, { "epoch": 2.664470918009811, "grad_norm": 1.5087790489196777, "learning_rate": 3.4455871801004544e-05, "loss": 0.0328, "step": 19011 }, { "epoch": 2.6646110721793974, "grad_norm": 0.1305650919675827, "learning_rate": 3.444152116718488e-05, "loss": 0.0064, "step": 19012 }, { "epoch": 2.664751226348984, "grad_norm": 1.2339916229248047, "learning_rate": 3.442717053336522e-05, "loss": 0.0511, "step": 19013 }, { "epoch": 2.6648913805185703, "grad_norm": 0.10763300955295563, "learning_rate": 3.4412819899545563e-05, "loss": 0.0063, "step": 19014 }, { "epoch": 2.665031534688157, "grad_norm": 0.4558926522731781, "learning_rate": 3.43984692657259e-05, "loss": 0.0089, "step": 19015 }, { "epoch": 2.6651716888577432, "grad_norm": 0.847318172454834, "learning_rate": 3.438411863190624e-05, "loss": 0.0591, "step": 19016 }, { "epoch": 2.66531184302733, "grad_norm": 0.03884310647845268, "learning_rate": 3.4369767998086576e-05, "loss": 0.0021, "step": 19017 }, { "epoch": 2.6654519971969166, "grad_norm": 0.06958263367414474, "learning_rate": 3.435541736426692e-05, "loss": 0.002, "step": 19018 }, { "epoch": 2.665592151366503, "grad_norm": 0.1667638123035431, "learning_rate": 3.434106673044726e-05, "loss": 0.0426, "step": 19019 }, { "epoch": 2.66573230553609, "grad_norm": 0.27648887038230896, "learning_rate": 3.4326716096627596e-05, "loss": 0.0108, "step": 19020 }, { "epoch": 2.6658724597056764, "grad_norm": 0.09245189279317856, "learning_rate": 3.431236546280794e-05, "loss": 0.0136, "step": 19021 }, { "epoch": 2.666012613875263, "grad_norm": 0.5118001699447632, "learning_rate": 3.429801482898828e-05, "loss": 0.0276, "step": 19022 }, { "epoch": 2.6661527680448494, "grad_norm": 0.26874810457229614, "learning_rate": 3.4283664195168615e-05, "loss": 0.0943, "step": 19023 }, { "epoch": 2.666292922214436, "grad_norm": 0.23096001148223877, "learning_rate": 3.426931356134896e-05, "loss": 0.0228, "step": 19024 }, { "epoch": 2.6664330763840223, "grad_norm": 0.06933944672346115, "learning_rate": 3.425496292752929e-05, "loss": 0.0034, "step": 19025 }, { "epoch": 2.6665732305536087, "grad_norm": 0.1690792739391327, "learning_rate": 3.4240612293709635e-05, "loss": 0.0155, "step": 19026 }, { "epoch": 2.6667133847231956, "grad_norm": 0.33580997586250305, "learning_rate": 3.422626165988998e-05, "loss": 0.0108, "step": 19027 }, { "epoch": 2.666853538892782, "grad_norm": 0.5261475443840027, "learning_rate": 3.421191102607031e-05, "loss": 0.0769, "step": 19028 }, { "epoch": 2.6669936930623686, "grad_norm": 0.21120865643024445, "learning_rate": 3.4197560392250654e-05, "loss": 0.0145, "step": 19029 }, { "epoch": 2.667133847231955, "grad_norm": 0.18853791058063507, "learning_rate": 3.4183209758430997e-05, "loss": 0.0171, "step": 19030 }, { "epoch": 2.667274001401542, "grad_norm": 0.12237367779016495, "learning_rate": 3.416885912461133e-05, "loss": 0.017, "step": 19031 }, { "epoch": 2.6674141555711284, "grad_norm": 0.12582413852214813, "learning_rate": 3.4154508490791674e-05, "loss": 0.0478, "step": 19032 }, { "epoch": 2.667554309740715, "grad_norm": 0.18743738532066345, "learning_rate": 3.414015785697201e-05, "loss": 0.0122, "step": 19033 }, { "epoch": 2.6676944639103013, "grad_norm": 0.07217022776603699, "learning_rate": 3.412580722315235e-05, "loss": 0.007, "step": 19034 }, { "epoch": 2.6678346180798878, "grad_norm": 0.29194602370262146, "learning_rate": 3.4111456589332694e-05, "loss": 0.0165, "step": 19035 }, { "epoch": 2.667974772249474, "grad_norm": 0.11871635168790817, "learning_rate": 3.409710595551303e-05, "loss": 0.0184, "step": 19036 }, { "epoch": 2.668114926419061, "grad_norm": 0.3665285110473633, "learning_rate": 3.408275532169337e-05, "loss": 0.0171, "step": 19037 }, { "epoch": 2.6682550805886476, "grad_norm": 0.27697038650512695, "learning_rate": 3.406840468787371e-05, "loss": 0.0245, "step": 19038 }, { "epoch": 2.668395234758234, "grad_norm": 0.1697680801153183, "learning_rate": 3.4054054054054055e-05, "loss": 0.0144, "step": 19039 }, { "epoch": 2.6685353889278205, "grad_norm": 0.21182020008563995, "learning_rate": 3.403970342023439e-05, "loss": 0.0184, "step": 19040 }, { "epoch": 2.6686755430974074, "grad_norm": 0.026870721951127052, "learning_rate": 3.402535278641473e-05, "loss": 0.0018, "step": 19041 }, { "epoch": 2.668815697266994, "grad_norm": 0.3477993309497833, "learning_rate": 3.401100215259507e-05, "loss": 0.051, "step": 19042 }, { "epoch": 2.6689558514365803, "grad_norm": 0.23562243580818176, "learning_rate": 3.399665151877541e-05, "loss": 0.044, "step": 19043 }, { "epoch": 2.6690960056061668, "grad_norm": 0.16737982630729675, "learning_rate": 3.3982300884955746e-05, "loss": 0.0108, "step": 19044 }, { "epoch": 2.6692361597757532, "grad_norm": 0.10520761460065842, "learning_rate": 3.396795025113609e-05, "loss": 0.0085, "step": 19045 }, { "epoch": 2.6693763139453397, "grad_norm": 0.23197759687900543, "learning_rate": 3.395359961731643e-05, "loss": 0.037, "step": 19046 }, { "epoch": 2.669516468114926, "grad_norm": 0.46111539006233215, "learning_rate": 3.393924898349677e-05, "loss": 0.0416, "step": 19047 }, { "epoch": 2.669656622284513, "grad_norm": 0.19760540127754211, "learning_rate": 3.392489834967711e-05, "loss": 0.0232, "step": 19048 }, { "epoch": 2.6697967764540995, "grad_norm": 0.07133524864912033, "learning_rate": 3.391054771585745e-05, "loss": 0.0304, "step": 19049 }, { "epoch": 2.669936930623686, "grad_norm": 0.3377271890640259, "learning_rate": 3.3896197082037785e-05, "loss": 0.0378, "step": 19050 }, { "epoch": 2.6700770847932724, "grad_norm": 0.3027612864971161, "learning_rate": 3.388184644821813e-05, "loss": 0.072, "step": 19051 }, { "epoch": 2.6702172389628593, "grad_norm": 0.30920156836509705, "learning_rate": 3.386749581439846e-05, "loss": 0.012, "step": 19052 }, { "epoch": 2.670357393132446, "grad_norm": 0.2439681589603424, "learning_rate": 3.3853145180578804e-05, "loss": 0.0158, "step": 19053 }, { "epoch": 2.6704975473020323, "grad_norm": 0.04542482644319534, "learning_rate": 3.3838794546759146e-05, "loss": 0.0021, "step": 19054 }, { "epoch": 2.6706377014716187, "grad_norm": 0.2715564966201782, "learning_rate": 3.382444391293949e-05, "loss": 0.0152, "step": 19055 }, { "epoch": 2.670777855641205, "grad_norm": 0.08921615779399872, "learning_rate": 3.3810093279119824e-05, "loss": 0.0061, "step": 19056 }, { "epoch": 2.6709180098107916, "grad_norm": 0.18041425943374634, "learning_rate": 3.3795742645300166e-05, "loss": 0.0159, "step": 19057 }, { "epoch": 2.6710581639803785, "grad_norm": 0.44078949093818665, "learning_rate": 3.37813920114805e-05, "loss": 0.0239, "step": 19058 }, { "epoch": 2.671198318149965, "grad_norm": 0.2560601532459259, "learning_rate": 3.376704137766084e-05, "loss": 0.0183, "step": 19059 }, { "epoch": 2.6713384723195515, "grad_norm": 0.7539780735969543, "learning_rate": 3.375269074384118e-05, "loss": 0.0201, "step": 19060 }, { "epoch": 2.671478626489138, "grad_norm": 0.13882705569267273, "learning_rate": 3.373834011002152e-05, "loss": 0.0134, "step": 19061 }, { "epoch": 2.671618780658725, "grad_norm": 0.917358934879303, "learning_rate": 3.372398947620186e-05, "loss": 0.0388, "step": 19062 }, { "epoch": 2.6717589348283113, "grad_norm": 0.5067930221557617, "learning_rate": 3.3709638842382205e-05, "loss": 0.0262, "step": 19063 }, { "epoch": 2.6718990889978977, "grad_norm": 0.3193817138671875, "learning_rate": 3.369528820856254e-05, "loss": 0.0144, "step": 19064 }, { "epoch": 2.672039243167484, "grad_norm": 0.1851431280374527, "learning_rate": 3.368093757474288e-05, "loss": 0.0057, "step": 19065 }, { "epoch": 2.6721793973370707, "grad_norm": 0.06776344776153564, "learning_rate": 3.366658694092322e-05, "loss": 0.0072, "step": 19066 }, { "epoch": 2.672319551506657, "grad_norm": 0.4747142195701599, "learning_rate": 3.365223630710356e-05, "loss": 0.0135, "step": 19067 }, { "epoch": 2.672459705676244, "grad_norm": 0.839833676815033, "learning_rate": 3.3637885673283895e-05, "loss": 0.0418, "step": 19068 }, { "epoch": 2.6725998598458305, "grad_norm": 2.1092984676361084, "learning_rate": 3.362353503946424e-05, "loss": 0.1103, "step": 19069 }, { "epoch": 2.672740014015417, "grad_norm": 0.06647612154483795, "learning_rate": 3.360918440564458e-05, "loss": 0.0029, "step": 19070 }, { "epoch": 2.6728801681850034, "grad_norm": 0.4686107039451599, "learning_rate": 3.359483377182492e-05, "loss": 0.032, "step": 19071 }, { "epoch": 2.6730203223545903, "grad_norm": 0.0698034018278122, "learning_rate": 3.358048313800526e-05, "loss": 0.0037, "step": 19072 }, { "epoch": 2.6731604765241768, "grad_norm": 0.15699800848960876, "learning_rate": 3.35661325041856e-05, "loss": 0.0282, "step": 19073 }, { "epoch": 2.6733006306937632, "grad_norm": 0.37768080830574036, "learning_rate": 3.355178187036594e-05, "loss": 0.0261, "step": 19074 }, { "epoch": 2.6734407848633497, "grad_norm": 0.581476628780365, "learning_rate": 3.3537431236546276e-05, "loss": 0.0865, "step": 19075 }, { "epoch": 2.673580939032936, "grad_norm": 0.13296441733837128, "learning_rate": 3.352308060272662e-05, "loss": 0.0227, "step": 19076 }, { "epoch": 2.6737210932025226, "grad_norm": 0.2900911569595337, "learning_rate": 3.3508729968906954e-05, "loss": 0.0482, "step": 19077 }, { "epoch": 2.673861247372109, "grad_norm": 0.478647381067276, "learning_rate": 3.3494379335087296e-05, "loss": 0.0749, "step": 19078 }, { "epoch": 2.674001401541696, "grad_norm": 0.34488147497177124, "learning_rate": 3.348002870126764e-05, "loss": 0.0203, "step": 19079 }, { "epoch": 2.6741415557112824, "grad_norm": 0.07160986959934235, "learning_rate": 3.346567806744798e-05, "loss": 0.0163, "step": 19080 }, { "epoch": 2.674281709880869, "grad_norm": 0.15958480536937714, "learning_rate": 3.3451327433628316e-05, "loss": 0.0323, "step": 19081 }, { "epoch": 2.6744218640504553, "grad_norm": 0.06173427402973175, "learning_rate": 3.343697679980866e-05, "loss": 0.0047, "step": 19082 }, { "epoch": 2.6745620182200422, "grad_norm": 0.1466919630765915, "learning_rate": 3.342262616598899e-05, "loss": 0.0191, "step": 19083 }, { "epoch": 2.6747021723896287, "grad_norm": 0.3475992977619171, "learning_rate": 3.3408275532169335e-05, "loss": 0.0249, "step": 19084 }, { "epoch": 2.674842326559215, "grad_norm": 0.24090787768363953, "learning_rate": 3.339392489834967e-05, "loss": 0.0382, "step": 19085 }, { "epoch": 2.6749824807288016, "grad_norm": 0.11143588274717331, "learning_rate": 3.337957426453001e-05, "loss": 0.029, "step": 19086 }, { "epoch": 2.675122634898388, "grad_norm": 0.18509972095489502, "learning_rate": 3.3365223630710355e-05, "loss": 0.0354, "step": 19087 }, { "epoch": 2.6752627890679745, "grad_norm": 0.27674421668052673, "learning_rate": 3.33508729968907e-05, "loss": 0.0399, "step": 19088 }, { "epoch": 2.6754029432375614, "grad_norm": 0.09937164187431335, "learning_rate": 3.333652236307103e-05, "loss": 0.008, "step": 19089 }, { "epoch": 2.675543097407148, "grad_norm": 0.0679984912276268, "learning_rate": 3.3322171729251374e-05, "loss": 0.0084, "step": 19090 }, { "epoch": 2.6756832515767344, "grad_norm": 0.5039976239204407, "learning_rate": 3.330782109543171e-05, "loss": 0.0264, "step": 19091 }, { "epoch": 2.675823405746321, "grad_norm": 0.25375527143478394, "learning_rate": 3.329347046161205e-05, "loss": 0.0044, "step": 19092 }, { "epoch": 2.6759635599159077, "grad_norm": 0.6165251135826111, "learning_rate": 3.327911982779239e-05, "loss": 0.0322, "step": 19093 }, { "epoch": 2.676103714085494, "grad_norm": 0.2337779402732849, "learning_rate": 3.326476919397273e-05, "loss": 0.0282, "step": 19094 }, { "epoch": 2.6762438682550806, "grad_norm": 0.2859530448913574, "learning_rate": 3.325041856015307e-05, "loss": 0.0093, "step": 19095 }, { "epoch": 2.676384022424667, "grad_norm": 0.027607286348938942, "learning_rate": 3.3236067926333413e-05, "loss": 0.0022, "step": 19096 }, { "epoch": 2.6765241765942536, "grad_norm": 0.18494337797164917, "learning_rate": 3.322171729251375e-05, "loss": 0.0131, "step": 19097 }, { "epoch": 2.67666433076384, "grad_norm": 0.24858082830905914, "learning_rate": 3.320736665869409e-05, "loss": 0.0138, "step": 19098 }, { "epoch": 2.676804484933427, "grad_norm": 0.18532218039035797, "learning_rate": 3.3193016024874426e-05, "loss": 0.0355, "step": 19099 }, { "epoch": 2.6769446391030134, "grad_norm": 0.16465339064598083, "learning_rate": 3.317866539105477e-05, "loss": 0.0205, "step": 19100 }, { "epoch": 2.6770847932726, "grad_norm": 0.24401308596134186, "learning_rate": 3.3164314757235104e-05, "loss": 0.0489, "step": 19101 }, { "epoch": 2.6772249474421863, "grad_norm": 0.1126483604311943, "learning_rate": 3.3149964123415446e-05, "loss": 0.0137, "step": 19102 }, { "epoch": 2.677365101611773, "grad_norm": 0.3556588292121887, "learning_rate": 3.313561348959579e-05, "loss": 0.0661, "step": 19103 }, { "epoch": 2.6775052557813597, "grad_norm": 0.09374068677425385, "learning_rate": 3.312126285577613e-05, "loss": 0.0091, "step": 19104 }, { "epoch": 2.677645409950946, "grad_norm": 0.6063767671585083, "learning_rate": 3.3106912221956465e-05, "loss": 0.0505, "step": 19105 }, { "epoch": 2.6777855641205326, "grad_norm": 0.3379063606262207, "learning_rate": 3.309256158813681e-05, "loss": 0.0353, "step": 19106 }, { "epoch": 2.677925718290119, "grad_norm": 0.4798426628112793, "learning_rate": 3.307821095431714e-05, "loss": 0.0106, "step": 19107 }, { "epoch": 2.6780658724597055, "grad_norm": 0.41608569025993347, "learning_rate": 3.3063860320497485e-05, "loss": 0.1626, "step": 19108 }, { "epoch": 2.678206026629292, "grad_norm": 0.8105961680412292, "learning_rate": 3.304950968667782e-05, "loss": 0.0207, "step": 19109 }, { "epoch": 2.678346180798879, "grad_norm": 0.08732123672962189, "learning_rate": 3.303515905285816e-05, "loss": 0.0046, "step": 19110 }, { "epoch": 2.6784863349684653, "grad_norm": 0.3657243251800537, "learning_rate": 3.3020808419038504e-05, "loss": 0.0884, "step": 19111 }, { "epoch": 2.678626489138052, "grad_norm": 0.003782531712204218, "learning_rate": 3.3006457785218847e-05, "loss": 0.0004, "step": 19112 }, { "epoch": 2.6787666433076383, "grad_norm": 0.04383417218923569, "learning_rate": 3.299210715139919e-05, "loss": 0.0044, "step": 19113 }, { "epoch": 2.678906797477225, "grad_norm": 0.14292454719543457, "learning_rate": 3.2977756517579524e-05, "loss": 0.007, "step": 19114 }, { "epoch": 2.6790469516468116, "grad_norm": 0.27917948365211487, "learning_rate": 3.2963405883759866e-05, "loss": 0.0135, "step": 19115 }, { "epoch": 2.679187105816398, "grad_norm": 0.16126903891563416, "learning_rate": 3.29490552499402e-05, "loss": 0.0092, "step": 19116 }, { "epoch": 2.6793272599859845, "grad_norm": 0.16355693340301514, "learning_rate": 3.2934704616120544e-05, "loss": 0.0198, "step": 19117 }, { "epoch": 2.679467414155571, "grad_norm": 0.01838826760649681, "learning_rate": 3.292035398230088e-05, "loss": 0.0009, "step": 19118 }, { "epoch": 2.6796075683251575, "grad_norm": 0.05736461654305458, "learning_rate": 3.290600334848122e-05, "loss": 0.0038, "step": 19119 }, { "epoch": 2.6797477224947444, "grad_norm": 0.8242635130882263, "learning_rate": 3.289165271466156e-05, "loss": 0.0687, "step": 19120 }, { "epoch": 2.679887876664331, "grad_norm": 0.3225947618484497, "learning_rate": 3.2877302080841905e-05, "loss": 0.0911, "step": 19121 }, { "epoch": 2.6800280308339173, "grad_norm": 0.10817845165729523, "learning_rate": 3.286295144702224e-05, "loss": 0.0085, "step": 19122 }, { "epoch": 2.6801681850035037, "grad_norm": 0.11754167079925537, "learning_rate": 3.284860081320258e-05, "loss": 0.0123, "step": 19123 }, { "epoch": 2.6803083391730906, "grad_norm": 0.09311207383871078, "learning_rate": 3.283425017938292e-05, "loss": 0.0107, "step": 19124 }, { "epoch": 2.680448493342677, "grad_norm": 0.25137677788734436, "learning_rate": 3.281989954556326e-05, "loss": 0.0247, "step": 19125 }, { "epoch": 2.6805886475122636, "grad_norm": 0.27546167373657227, "learning_rate": 3.2805548911743595e-05, "loss": 0.0708, "step": 19126 }, { "epoch": 2.68072880168185, "grad_norm": 0.17241717875003815, "learning_rate": 3.279119827792394e-05, "loss": 0.0371, "step": 19127 }, { "epoch": 2.6808689558514365, "grad_norm": 0.3461940586566925, "learning_rate": 3.277684764410428e-05, "loss": 0.0465, "step": 19128 }, { "epoch": 2.681009110021023, "grad_norm": 0.23759038746356964, "learning_rate": 3.276249701028462e-05, "loss": 0.0414, "step": 19129 }, { "epoch": 2.6811492641906094, "grad_norm": 0.3206912875175476, "learning_rate": 3.274814637646496e-05, "loss": 0.0292, "step": 19130 }, { "epoch": 2.6812894183601963, "grad_norm": 0.636391282081604, "learning_rate": 3.27337957426453e-05, "loss": 0.0376, "step": 19131 }, { "epoch": 2.6814295725297828, "grad_norm": 0.29441094398498535, "learning_rate": 3.2719445108825635e-05, "loss": 0.0202, "step": 19132 }, { "epoch": 2.681569726699369, "grad_norm": 0.20151387155056, "learning_rate": 3.270509447500598e-05, "loss": 0.025, "step": 19133 }, { "epoch": 2.681709880868956, "grad_norm": 0.1554955244064331, "learning_rate": 3.269074384118631e-05, "loss": 0.0208, "step": 19134 }, { "epoch": 2.6818500350385426, "grad_norm": 0.04337377846240997, "learning_rate": 3.2676393207366654e-05, "loss": 0.0072, "step": 19135 }, { "epoch": 2.681990189208129, "grad_norm": 0.33683398365974426, "learning_rate": 3.2662042573546996e-05, "loss": 0.0131, "step": 19136 }, { "epoch": 2.6821303433777155, "grad_norm": 0.3223258852958679, "learning_rate": 3.264769193972734e-05, "loss": 0.0299, "step": 19137 }, { "epoch": 2.682270497547302, "grad_norm": 0.36297956109046936, "learning_rate": 3.2633341305907674e-05, "loss": 0.033, "step": 19138 }, { "epoch": 2.6824106517168884, "grad_norm": 0.10239852964878082, "learning_rate": 3.2618990672088016e-05, "loss": 0.0022, "step": 19139 }, { "epoch": 2.682550805886475, "grad_norm": 0.6311405897140503, "learning_rate": 3.260464003826835e-05, "loss": 0.0903, "step": 19140 }, { "epoch": 2.682690960056062, "grad_norm": 0.017206447198987007, "learning_rate": 3.259028940444869e-05, "loss": 0.0012, "step": 19141 }, { "epoch": 2.6828311142256482, "grad_norm": 0.43335095047950745, "learning_rate": 3.257593877062903e-05, "loss": 0.0715, "step": 19142 }, { "epoch": 2.6829712683952347, "grad_norm": 0.21185848116874695, "learning_rate": 3.256158813680937e-05, "loss": 0.04, "step": 19143 }, { "epoch": 2.683111422564821, "grad_norm": 0.19532856345176697, "learning_rate": 3.254723750298971e-05, "loss": 0.0214, "step": 19144 }, { "epoch": 2.683251576734408, "grad_norm": 0.22247779369354248, "learning_rate": 3.2532886869170055e-05, "loss": 0.0195, "step": 19145 }, { "epoch": 2.6833917309039945, "grad_norm": 0.061474330723285675, "learning_rate": 3.251853623535039e-05, "loss": 0.0085, "step": 19146 }, { "epoch": 2.683531885073581, "grad_norm": 0.0926731750369072, "learning_rate": 3.250418560153073e-05, "loss": 0.0067, "step": 19147 }, { "epoch": 2.6836720392431674, "grad_norm": 0.3298453986644745, "learning_rate": 3.2489834967711075e-05, "loss": 0.0444, "step": 19148 }, { "epoch": 2.683812193412754, "grad_norm": 0.12941385805606842, "learning_rate": 3.247548433389141e-05, "loss": 0.0104, "step": 19149 }, { "epoch": 2.6839523475823404, "grad_norm": 0.2862327992916107, "learning_rate": 3.246113370007175e-05, "loss": 0.0326, "step": 19150 }, { "epoch": 2.6840925017519273, "grad_norm": 0.29845452308654785, "learning_rate": 3.244678306625209e-05, "loss": 0.0243, "step": 19151 }, { "epoch": 2.6842326559215137, "grad_norm": 0.09152431786060333, "learning_rate": 3.243243243243243e-05, "loss": 0.0112, "step": 19152 }, { "epoch": 2.6843728100911, "grad_norm": 0.25802135467529297, "learning_rate": 3.241808179861277e-05, "loss": 0.0094, "step": 19153 }, { "epoch": 2.6845129642606866, "grad_norm": 0.2895890772342682, "learning_rate": 3.2403731164793114e-05, "loss": 0.035, "step": 19154 }, { "epoch": 2.6846531184302735, "grad_norm": 0.2526528537273407, "learning_rate": 3.238938053097345e-05, "loss": 0.0169, "step": 19155 }, { "epoch": 2.68479327259986, "grad_norm": 0.4358956515789032, "learning_rate": 3.237502989715379e-05, "loss": 0.0283, "step": 19156 }, { "epoch": 2.6849334267694465, "grad_norm": 0.4859786927700043, "learning_rate": 3.2360679263334126e-05, "loss": 0.0231, "step": 19157 }, { "epoch": 2.685073580939033, "grad_norm": 0.19008463621139526, "learning_rate": 3.234632862951447e-05, "loss": 0.0139, "step": 19158 }, { "epoch": 2.6852137351086194, "grad_norm": 0.11303292214870453, "learning_rate": 3.2331977995694804e-05, "loss": 0.021, "step": 19159 }, { "epoch": 2.685353889278206, "grad_norm": 0.09639807790517807, "learning_rate": 3.2317627361875146e-05, "loss": 0.0063, "step": 19160 }, { "epoch": 2.6854940434477923, "grad_norm": 0.04655229672789574, "learning_rate": 3.230327672805549e-05, "loss": 0.0024, "step": 19161 }, { "epoch": 2.685634197617379, "grad_norm": 0.1499675065279007, "learning_rate": 3.228892609423583e-05, "loss": 0.0236, "step": 19162 }, { "epoch": 2.6857743517869657, "grad_norm": 0.9135861396789551, "learning_rate": 3.2274575460416166e-05, "loss": 0.0334, "step": 19163 }, { "epoch": 2.685914505956552, "grad_norm": 0.119294673204422, "learning_rate": 3.226022482659651e-05, "loss": 0.0022, "step": 19164 }, { "epoch": 2.686054660126139, "grad_norm": 0.17394724488258362, "learning_rate": 3.224587419277684e-05, "loss": 0.0071, "step": 19165 }, { "epoch": 2.6861948142957255, "grad_norm": 0.19721810519695282, "learning_rate": 3.2231523558957185e-05, "loss": 0.0104, "step": 19166 }, { "epoch": 2.686334968465312, "grad_norm": 0.35553625226020813, "learning_rate": 3.221717292513752e-05, "loss": 0.0156, "step": 19167 }, { "epoch": 2.6864751226348984, "grad_norm": 0.5617117881774902, "learning_rate": 3.220282229131786e-05, "loss": 0.0291, "step": 19168 }, { "epoch": 2.686615276804485, "grad_norm": 0.2425331026315689, "learning_rate": 3.2188471657498205e-05, "loss": 0.0059, "step": 19169 }, { "epoch": 2.6867554309740713, "grad_norm": 1.1779361963272095, "learning_rate": 3.217412102367855e-05, "loss": 0.0563, "step": 19170 }, { "epoch": 2.686895585143658, "grad_norm": 0.23621971905231476, "learning_rate": 3.215977038985888e-05, "loss": 0.028, "step": 19171 }, { "epoch": 2.6870357393132447, "grad_norm": 0.22600258886814117, "learning_rate": 3.2145419756039224e-05, "loss": 0.0118, "step": 19172 }, { "epoch": 2.687175893482831, "grad_norm": 0.047861602157354355, "learning_rate": 3.213106912221956e-05, "loss": 0.0043, "step": 19173 }, { "epoch": 2.6873160476524176, "grad_norm": 0.6564404368400574, "learning_rate": 3.21167184883999e-05, "loss": 0.0305, "step": 19174 }, { "epoch": 2.687456201822004, "grad_norm": 1.4879180192947388, "learning_rate": 3.210236785458024e-05, "loss": 0.0705, "step": 19175 }, { "epoch": 2.687596355991591, "grad_norm": 0.47716811299324036, "learning_rate": 3.208801722076058e-05, "loss": 0.0518, "step": 19176 }, { "epoch": 2.6877365101611774, "grad_norm": 0.03959793597459793, "learning_rate": 3.207366658694092e-05, "loss": 0.0028, "step": 19177 }, { "epoch": 2.687876664330764, "grad_norm": 0.15340334177017212, "learning_rate": 3.205931595312126e-05, "loss": 0.0222, "step": 19178 }, { "epoch": 2.6880168185003503, "grad_norm": 0.3232771158218384, "learning_rate": 3.20449653193016e-05, "loss": 0.0252, "step": 19179 }, { "epoch": 2.688156972669937, "grad_norm": 0.2110404521226883, "learning_rate": 3.203061468548194e-05, "loss": 0.041, "step": 19180 }, { "epoch": 2.6882971268395233, "grad_norm": 0.12288319319486618, "learning_rate": 3.2016264051662276e-05, "loss": 0.0067, "step": 19181 }, { "epoch": 2.68843728100911, "grad_norm": 0.5726479291915894, "learning_rate": 3.200191341784262e-05, "loss": 0.037, "step": 19182 }, { "epoch": 2.6885774351786966, "grad_norm": 0.07861670851707458, "learning_rate": 3.1987562784022954e-05, "loss": 0.0073, "step": 19183 }, { "epoch": 2.688717589348283, "grad_norm": 0.15803228318691254, "learning_rate": 3.1973212150203296e-05, "loss": 0.0109, "step": 19184 }, { "epoch": 2.6888577435178695, "grad_norm": 0.1511002779006958, "learning_rate": 3.195886151638364e-05, "loss": 0.014, "step": 19185 }, { "epoch": 2.6889978976874565, "grad_norm": 0.17733974754810333, "learning_rate": 3.194451088256398e-05, "loss": 0.0551, "step": 19186 }, { "epoch": 2.689138051857043, "grad_norm": 0.19843867421150208, "learning_rate": 3.193016024874432e-05, "loss": 0.0173, "step": 19187 }, { "epoch": 2.6892782060266294, "grad_norm": 0.09071072936058044, "learning_rate": 3.191580961492466e-05, "loss": 0.0026, "step": 19188 }, { "epoch": 2.689418360196216, "grad_norm": 0.3565714955329895, "learning_rate": 3.1901458981105e-05, "loss": 0.0221, "step": 19189 }, { "epoch": 2.6895585143658023, "grad_norm": 0.12883344292640686, "learning_rate": 3.1887108347285335e-05, "loss": 0.0174, "step": 19190 }, { "epoch": 2.6896986685353887, "grad_norm": 1.3392831087112427, "learning_rate": 3.187275771346568e-05, "loss": 0.0258, "step": 19191 }, { "epoch": 2.689838822704975, "grad_norm": 0.223307803273201, "learning_rate": 3.185840707964601e-05, "loss": 0.0091, "step": 19192 }, { "epoch": 2.689978976874562, "grad_norm": 0.20991235971450806, "learning_rate": 3.1844056445826354e-05, "loss": 0.0199, "step": 19193 }, { "epoch": 2.6901191310441486, "grad_norm": 0.876562774181366, "learning_rate": 3.1829705812006696e-05, "loss": 0.0532, "step": 19194 }, { "epoch": 2.690259285213735, "grad_norm": 0.1591513454914093, "learning_rate": 3.181535517818704e-05, "loss": 0.0146, "step": 19195 }, { "epoch": 2.6903994393833215, "grad_norm": 0.17315897345542908, "learning_rate": 3.1801004544367374e-05, "loss": 0.0407, "step": 19196 }, { "epoch": 2.6905395935529084, "grad_norm": 0.06539208441972733, "learning_rate": 3.1786653910547716e-05, "loss": 0.0057, "step": 19197 }, { "epoch": 2.690679747722495, "grad_norm": 0.10878187417984009, "learning_rate": 3.177230327672805e-05, "loss": 0.0052, "step": 19198 }, { "epoch": 2.6908199018920813, "grad_norm": 0.06214870885014534, "learning_rate": 3.1757952642908394e-05, "loss": 0.0044, "step": 19199 }, { "epoch": 2.6909600560616678, "grad_norm": 0.04756125062704086, "learning_rate": 3.174360200908873e-05, "loss": 0.0043, "step": 19200 }, { "epoch": 2.6911002102312542, "grad_norm": 0.05723460763692856, "learning_rate": 3.172925137526907e-05, "loss": 0.0052, "step": 19201 }, { "epoch": 2.6912403644008407, "grad_norm": 0.11539015173912048, "learning_rate": 3.171490074144941e-05, "loss": 0.0157, "step": 19202 }, { "epoch": 2.6913805185704276, "grad_norm": 0.20039841532707214, "learning_rate": 3.1700550107629755e-05, "loss": 0.0251, "step": 19203 }, { "epoch": 2.691520672740014, "grad_norm": 0.11434441804885864, "learning_rate": 3.168619947381009e-05, "loss": 0.0183, "step": 19204 }, { "epoch": 2.6916608269096005, "grad_norm": 0.058986179530620575, "learning_rate": 3.167184883999043e-05, "loss": 0.0035, "step": 19205 }, { "epoch": 2.691800981079187, "grad_norm": 0.23571625351905823, "learning_rate": 3.165749820617077e-05, "loss": 0.0214, "step": 19206 }, { "epoch": 2.691941135248774, "grad_norm": 0.7661653161048889, "learning_rate": 3.164314757235111e-05, "loss": 0.0616, "step": 19207 }, { "epoch": 2.6920812894183603, "grad_norm": 0.22897180914878845, "learning_rate": 3.1628796938531445e-05, "loss": 0.0092, "step": 19208 }, { "epoch": 2.692221443587947, "grad_norm": 0.03336496278643608, "learning_rate": 3.161444630471179e-05, "loss": 0.0013, "step": 19209 }, { "epoch": 2.6923615977575333, "grad_norm": 0.4114571213722229, "learning_rate": 3.160009567089213e-05, "loss": 0.062, "step": 19210 }, { "epoch": 2.6925017519271197, "grad_norm": 0.0611628033220768, "learning_rate": 3.158574503707247e-05, "loss": 0.003, "step": 19211 }, { "epoch": 2.692641906096706, "grad_norm": 0.39568981528282166, "learning_rate": 3.157139440325281e-05, "loss": 0.0392, "step": 19212 }, { "epoch": 2.692782060266293, "grad_norm": 0.3336886167526245, "learning_rate": 3.155704376943315e-05, "loss": 0.0385, "step": 19213 }, { "epoch": 2.6929222144358795, "grad_norm": 0.10875049978494644, "learning_rate": 3.1542693135613485e-05, "loss": 0.0033, "step": 19214 }, { "epoch": 2.693062368605466, "grad_norm": 0.7708019614219666, "learning_rate": 3.152834250179383e-05, "loss": 0.0257, "step": 19215 }, { "epoch": 2.6932025227750525, "grad_norm": 0.5931612849235535, "learning_rate": 3.151399186797416e-05, "loss": 0.0474, "step": 19216 }, { "epoch": 2.6933426769446394, "grad_norm": 0.2632920742034912, "learning_rate": 3.1499641234154504e-05, "loss": 0.0043, "step": 19217 }, { "epoch": 2.693482831114226, "grad_norm": 0.8514915108680725, "learning_rate": 3.1485290600334846e-05, "loss": 0.1476, "step": 19218 }, { "epoch": 2.6936229852838123, "grad_norm": 1.1172393560409546, "learning_rate": 3.147093996651519e-05, "loss": 0.0936, "step": 19219 }, { "epoch": 2.6937631394533987, "grad_norm": 1.2590055465698242, "learning_rate": 3.1456589332695524e-05, "loss": 0.039, "step": 19220 }, { "epoch": 2.693903293622985, "grad_norm": 0.3902694582939148, "learning_rate": 3.1442238698875866e-05, "loss": 0.0467, "step": 19221 }, { "epoch": 2.6940434477925717, "grad_norm": 0.2569301724433899, "learning_rate": 3.142788806505621e-05, "loss": 0.0304, "step": 19222 }, { "epoch": 2.694183601962158, "grad_norm": 0.18170863389968872, "learning_rate": 3.141353743123654e-05, "loss": 0.0422, "step": 19223 }, { "epoch": 2.694323756131745, "grad_norm": 0.2346627414226532, "learning_rate": 3.1399186797416885e-05, "loss": 0.0117, "step": 19224 }, { "epoch": 2.6944639103013315, "grad_norm": 0.13657331466674805, "learning_rate": 3.138483616359722e-05, "loss": 0.0203, "step": 19225 }, { "epoch": 2.694604064470918, "grad_norm": 0.2002572864294052, "learning_rate": 3.137048552977756e-05, "loss": 0.0259, "step": 19226 }, { "epoch": 2.6947442186405044, "grad_norm": 0.2764512598514557, "learning_rate": 3.1356134895957905e-05, "loss": 0.0433, "step": 19227 }, { "epoch": 2.6948843728100913, "grad_norm": 0.15916398167610168, "learning_rate": 3.134178426213824e-05, "loss": 0.0288, "step": 19228 }, { "epoch": 2.6950245269796778, "grad_norm": 0.2500961720943451, "learning_rate": 3.132743362831858e-05, "loss": 0.016, "step": 19229 }, { "epoch": 2.695164681149264, "grad_norm": 0.16817136108875275, "learning_rate": 3.1313082994498924e-05, "loss": 0.0101, "step": 19230 }, { "epoch": 2.6953048353188507, "grad_norm": 0.062395401298999786, "learning_rate": 3.129873236067926e-05, "loss": 0.0063, "step": 19231 }, { "epoch": 2.695444989488437, "grad_norm": 0.08706668764352798, "learning_rate": 3.12843817268596e-05, "loss": 0.0137, "step": 19232 }, { "epoch": 2.6955851436580236, "grad_norm": 0.35383933782577515, "learning_rate": 3.127003109303994e-05, "loss": 0.01, "step": 19233 }, { "epoch": 2.6957252978276105, "grad_norm": 0.27063995599746704, "learning_rate": 3.125568045922028e-05, "loss": 0.0689, "step": 19234 }, { "epoch": 2.695865451997197, "grad_norm": 0.09047505259513855, "learning_rate": 3.124132982540062e-05, "loss": 0.0137, "step": 19235 }, { "epoch": 2.6960056061667834, "grad_norm": 0.15810434520244598, "learning_rate": 3.122697919158096e-05, "loss": 0.0112, "step": 19236 }, { "epoch": 2.69614576033637, "grad_norm": 0.4797719120979309, "learning_rate": 3.12126285577613e-05, "loss": 0.0589, "step": 19237 }, { "epoch": 2.696285914505957, "grad_norm": 0.07274297624826431, "learning_rate": 3.119827792394164e-05, "loss": 0.005, "step": 19238 }, { "epoch": 2.6964260686755432, "grad_norm": 0.23896664381027222, "learning_rate": 3.1183927290121976e-05, "loss": 0.0228, "step": 19239 }, { "epoch": 2.6965662228451297, "grad_norm": 0.7867727875709534, "learning_rate": 3.116957665630232e-05, "loss": 0.0166, "step": 19240 }, { "epoch": 2.696706377014716, "grad_norm": 0.5129566192626953, "learning_rate": 3.1155226022482654e-05, "loss": 0.0249, "step": 19241 }, { "epoch": 2.6968465311843026, "grad_norm": 0.17058336734771729, "learning_rate": 3.1140875388662996e-05, "loss": 0.0267, "step": 19242 }, { "epoch": 2.696986685353889, "grad_norm": 0.5474103093147278, "learning_rate": 3.112652475484334e-05, "loss": 0.0419, "step": 19243 }, { "epoch": 2.697126839523476, "grad_norm": 0.38460883498191833, "learning_rate": 3.111217412102367e-05, "loss": 0.0222, "step": 19244 }, { "epoch": 2.6972669936930624, "grad_norm": 0.08524461835622787, "learning_rate": 3.1097823487204015e-05, "loss": 0.0271, "step": 19245 }, { "epoch": 2.697407147862649, "grad_norm": 0.40569591522216797, "learning_rate": 3.108347285338436e-05, "loss": 0.058, "step": 19246 }, { "epoch": 2.6975473020322354, "grad_norm": 0.13875296711921692, "learning_rate": 3.106912221956469e-05, "loss": 0.0246, "step": 19247 }, { "epoch": 2.6976874562018223, "grad_norm": 0.23318496346473694, "learning_rate": 3.1054771585745035e-05, "loss": 0.0077, "step": 19248 }, { "epoch": 2.6978276103714087, "grad_norm": 1.1501868963241577, "learning_rate": 3.104042095192537e-05, "loss": 0.1671, "step": 19249 }, { "epoch": 2.697967764540995, "grad_norm": 0.014810135588049889, "learning_rate": 3.102607031810571e-05, "loss": 0.001, "step": 19250 }, { "epoch": 2.6981079187105816, "grad_norm": 0.0525469109416008, "learning_rate": 3.1011719684286055e-05, "loss": 0.0033, "step": 19251 }, { "epoch": 2.698248072880168, "grad_norm": 0.23817744851112366, "learning_rate": 3.099736905046639e-05, "loss": 0.0592, "step": 19252 }, { "epoch": 2.6983882270497546, "grad_norm": 0.07781829684972763, "learning_rate": 3.098301841664673e-05, "loss": 0.0032, "step": 19253 }, { "epoch": 2.698528381219341, "grad_norm": 0.3349219262599945, "learning_rate": 3.0968667782827074e-05, "loss": 0.0109, "step": 19254 }, { "epoch": 2.698668535388928, "grad_norm": 0.22210752964019775, "learning_rate": 3.095431714900741e-05, "loss": 0.0499, "step": 19255 }, { "epoch": 2.6988086895585144, "grad_norm": 0.3338225185871124, "learning_rate": 3.093996651518775e-05, "loss": 0.0446, "step": 19256 }, { "epoch": 2.698948843728101, "grad_norm": 0.21556487679481506, "learning_rate": 3.092561588136809e-05, "loss": 0.0247, "step": 19257 }, { "epoch": 2.6990889978976873, "grad_norm": 0.5140829682350159, "learning_rate": 3.091126524754843e-05, "loss": 0.0318, "step": 19258 }, { "epoch": 2.699229152067274, "grad_norm": 0.3025587797164917, "learning_rate": 3.089691461372877e-05, "loss": 0.0488, "step": 19259 }, { "epoch": 2.6993693062368607, "grad_norm": 0.12738296389579773, "learning_rate": 3.0882563979909107e-05, "loss": 0.0294, "step": 19260 }, { "epoch": 2.699509460406447, "grad_norm": 0.213106170296669, "learning_rate": 3.086821334608945e-05, "loss": 0.0328, "step": 19261 }, { "epoch": 2.6996496145760336, "grad_norm": 0.21545083820819855, "learning_rate": 3.085386271226979e-05, "loss": 0.0413, "step": 19262 }, { "epoch": 2.69978976874562, "grad_norm": 0.1640138030052185, "learning_rate": 3.083951207845013e-05, "loss": 0.0152, "step": 19263 }, { "epoch": 2.6999299229152065, "grad_norm": 0.16837556660175323, "learning_rate": 3.082516144463047e-05, "loss": 0.0168, "step": 19264 }, { "epoch": 2.7000700770847934, "grad_norm": 0.46164238452911377, "learning_rate": 3.081081081081081e-05, "loss": 0.0303, "step": 19265 }, { "epoch": 2.70021023125438, "grad_norm": 0.8670220375061035, "learning_rate": 3.0796460176991146e-05, "loss": 0.043, "step": 19266 }, { "epoch": 2.7003503854239663, "grad_norm": 0.09610386192798615, "learning_rate": 3.078210954317149e-05, "loss": 0.0036, "step": 19267 }, { "epoch": 2.700490539593553, "grad_norm": 0.4493335485458374, "learning_rate": 3.076775890935182e-05, "loss": 0.0501, "step": 19268 }, { "epoch": 2.7006306937631397, "grad_norm": 3.75961971282959, "learning_rate": 3.0753408275532165e-05, "loss": 0.1498, "step": 19269 }, { "epoch": 2.700770847932726, "grad_norm": 0.05502821132540703, "learning_rate": 3.073905764171251e-05, "loss": 0.0024, "step": 19270 }, { "epoch": 2.7009110021023126, "grad_norm": 0.09672386199235916, "learning_rate": 3.072470700789285e-05, "loss": 0.0138, "step": 19271 }, { "epoch": 2.701051156271899, "grad_norm": 0.2971091866493225, "learning_rate": 3.0710356374073185e-05, "loss": 0.0293, "step": 19272 }, { "epoch": 2.7011913104414855, "grad_norm": 0.11204875260591507, "learning_rate": 3.069600574025353e-05, "loss": 0.0107, "step": 19273 }, { "epoch": 2.701331464611072, "grad_norm": 0.41986027359962463, "learning_rate": 3.068165510643386e-05, "loss": 0.0729, "step": 19274 }, { "epoch": 2.701471618780659, "grad_norm": 0.12055579572916031, "learning_rate": 3.0667304472614204e-05, "loss": 0.0126, "step": 19275 }, { "epoch": 2.7016117729502453, "grad_norm": 0.05585943162441254, "learning_rate": 3.0652953838794546e-05, "loss": 0.0029, "step": 19276 }, { "epoch": 2.701751927119832, "grad_norm": 0.33906131982803345, "learning_rate": 3.063860320497488e-05, "loss": 0.0778, "step": 19277 }, { "epoch": 2.7018920812894183, "grad_norm": 0.10691893845796585, "learning_rate": 3.0624252571155224e-05, "loss": 0.0082, "step": 19278 }, { "epoch": 2.702032235459005, "grad_norm": 0.33289530873298645, "learning_rate": 3.0609901937335566e-05, "loss": 0.022, "step": 19279 }, { "epoch": 2.7021723896285916, "grad_norm": 0.39636731147766113, "learning_rate": 3.05955513035159e-05, "loss": 0.0236, "step": 19280 }, { "epoch": 2.702312543798178, "grad_norm": 0.20659658312797546, "learning_rate": 3.0581200669696243e-05, "loss": 0.0144, "step": 19281 }, { "epoch": 2.7024526979677645, "grad_norm": 0.0468139685690403, "learning_rate": 3.056685003587658e-05, "loss": 0.0038, "step": 19282 }, { "epoch": 2.702592852137351, "grad_norm": 0.2533717751502991, "learning_rate": 3.055249940205692e-05, "loss": 0.0265, "step": 19283 }, { "epoch": 2.7027330063069375, "grad_norm": 0.2451319545507431, "learning_rate": 3.053814876823726e-05, "loss": 0.0331, "step": 19284 }, { "epoch": 2.702873160476524, "grad_norm": 0.2609651982784271, "learning_rate": 3.05237981344176e-05, "loss": 0.0315, "step": 19285 }, { "epoch": 2.703013314646111, "grad_norm": 0.18486066162586212, "learning_rate": 3.050944750059794e-05, "loss": 0.0124, "step": 19286 }, { "epoch": 2.7031534688156973, "grad_norm": 0.3572184145450592, "learning_rate": 3.049509686677828e-05, "loss": 0.0164, "step": 19287 }, { "epoch": 2.7032936229852838, "grad_norm": 0.1174364909529686, "learning_rate": 3.0480746232958618e-05, "loss": 0.011, "step": 19288 }, { "epoch": 2.70343377715487, "grad_norm": 0.04312127083539963, "learning_rate": 3.046639559913896e-05, "loss": 0.0041, "step": 19289 }, { "epoch": 2.703573931324457, "grad_norm": 0.062024567276239395, "learning_rate": 3.04520449653193e-05, "loss": 0.0072, "step": 19290 }, { "epoch": 2.7037140854940436, "grad_norm": 0.18893611431121826, "learning_rate": 3.0437694331499637e-05, "loss": 0.0124, "step": 19291 }, { "epoch": 2.70385423966363, "grad_norm": 0.030310414731502533, "learning_rate": 3.0423343697679976e-05, "loss": 0.0022, "step": 19292 }, { "epoch": 2.7039943938332165, "grad_norm": 0.06617215275764465, "learning_rate": 3.040899306386032e-05, "loss": 0.0066, "step": 19293 }, { "epoch": 2.704134548002803, "grad_norm": 0.17179124057292938, "learning_rate": 3.0394642430040657e-05, "loss": 0.0059, "step": 19294 }, { "epoch": 2.7042747021723894, "grad_norm": 0.16624535620212555, "learning_rate": 3.0380291796220996e-05, "loss": 0.0197, "step": 19295 }, { "epoch": 2.7044148563419763, "grad_norm": 0.05562801659107208, "learning_rate": 3.0365941162401338e-05, "loss": 0.0058, "step": 19296 }, { "epoch": 2.7045550105115628, "grad_norm": 0.30680719017982483, "learning_rate": 3.0351590528581677e-05, "loss": 0.0167, "step": 19297 }, { "epoch": 2.7046951646811492, "grad_norm": 0.14756862819194794, "learning_rate": 3.033723989476202e-05, "loss": 0.0266, "step": 19298 }, { "epoch": 2.7048353188507357, "grad_norm": 0.055031850934028625, "learning_rate": 3.0322889260942354e-05, "loss": 0.0214, "step": 19299 }, { "epoch": 2.7049754730203226, "grad_norm": 0.30849936604499817, "learning_rate": 3.0308538627122696e-05, "loss": 0.0775, "step": 19300 }, { "epoch": 2.705115627189909, "grad_norm": 0.16849109530448914, "learning_rate": 3.0294187993303035e-05, "loss": 0.0196, "step": 19301 }, { "epoch": 2.7052557813594955, "grad_norm": 0.09167186170816422, "learning_rate": 3.0279837359483377e-05, "loss": 0.0031, "step": 19302 }, { "epoch": 2.705395935529082, "grad_norm": 0.2093467265367508, "learning_rate": 3.0265486725663712e-05, "loss": 0.0201, "step": 19303 }, { "epoch": 2.7055360896986684, "grad_norm": 0.24855487048625946, "learning_rate": 3.0251136091844054e-05, "loss": 0.0116, "step": 19304 }, { "epoch": 2.705676243868255, "grad_norm": 0.32233527302742004, "learning_rate": 3.0236785458024393e-05, "loss": 0.0423, "step": 19305 }, { "epoch": 2.7058163980378414, "grad_norm": 0.09075789898633957, "learning_rate": 3.0222434824204735e-05, "loss": 0.0092, "step": 19306 }, { "epoch": 2.7059565522074283, "grad_norm": 0.22996492683887482, "learning_rate": 3.020808419038507e-05, "loss": 0.0194, "step": 19307 }, { "epoch": 2.7060967063770147, "grad_norm": 0.20749510824680328, "learning_rate": 3.0193733556565413e-05, "loss": 0.0167, "step": 19308 }, { "epoch": 2.706236860546601, "grad_norm": 0.36879536509513855, "learning_rate": 3.017938292274575e-05, "loss": 0.0506, "step": 19309 }, { "epoch": 2.706377014716188, "grad_norm": 0.2779279947280884, "learning_rate": 3.0165032288926094e-05, "loss": 0.0493, "step": 19310 }, { "epoch": 2.7065171688857745, "grad_norm": 0.3724881112575531, "learning_rate": 3.015068165510643e-05, "loss": 0.067, "step": 19311 }, { "epoch": 2.706657323055361, "grad_norm": 0.21186470985412598, "learning_rate": 3.013633102128677e-05, "loss": 0.0411, "step": 19312 }, { "epoch": 2.7067974772249475, "grad_norm": 0.10183316469192505, "learning_rate": 3.012198038746711e-05, "loss": 0.0087, "step": 19313 }, { "epoch": 2.706937631394534, "grad_norm": 0.48984479904174805, "learning_rate": 3.0107629753647452e-05, "loss": 0.1369, "step": 19314 }, { "epoch": 2.7070777855641204, "grad_norm": 0.016151679679751396, "learning_rate": 3.0093279119827787e-05, "loss": 0.0014, "step": 19315 }, { "epoch": 2.707217939733707, "grad_norm": 0.34023815393447876, "learning_rate": 3.007892848600813e-05, "loss": 0.037, "step": 19316 }, { "epoch": 2.7073580939032937, "grad_norm": 0.661260724067688, "learning_rate": 3.0064577852188468e-05, "loss": 0.1406, "step": 19317 }, { "epoch": 2.70749824807288, "grad_norm": 0.2402743399143219, "learning_rate": 3.005022721836881e-05, "loss": 0.0063, "step": 19318 }, { "epoch": 2.7076384022424667, "grad_norm": 2.0923216342926025, "learning_rate": 3.0035876584549145e-05, "loss": 0.1607, "step": 19319 }, { "epoch": 2.707778556412053, "grad_norm": NaN, "learning_rate": 3.0021525950729488e-05, "loss": 0.4982, "step": 19320 }, { "epoch": 2.70791871058164, "grad_norm": 0.2181277573108673, "learning_rate": 3.0021525950729488e-05, "loss": 0.022, "step": 19321 }, { "epoch": 2.7080588647512265, "grad_norm": 0.2151375114917755, "learning_rate": 3.0007175316909826e-05, "loss": 0.0246, "step": 19322 }, { "epoch": 2.708199018920813, "grad_norm": 0.8480563759803772, "learning_rate": 2.999282468309017e-05, "loss": 0.0288, "step": 19323 }, { "epoch": 2.7083391730903994, "grad_norm": 0.35201698541641235, "learning_rate": 2.9978474049270504e-05, "loss": 0.057, "step": 19324 }, { "epoch": 2.708479327259986, "grad_norm": 0.204260915517807, "learning_rate": 2.9964123415450846e-05, "loss": 0.0136, "step": 19325 }, { "epoch": 2.7086194814295723, "grad_norm": 0.0723210871219635, "learning_rate": 2.9949772781631185e-05, "loss": 0.0117, "step": 19326 }, { "epoch": 2.708759635599159, "grad_norm": 0.1000300869345665, "learning_rate": 2.9935422147811527e-05, "loss": 0.006, "step": 19327 }, { "epoch": 2.7088997897687457, "grad_norm": 0.15267686545848846, "learning_rate": 2.9921071513991862e-05, "loss": 0.0169, "step": 19328 }, { "epoch": 2.709039943938332, "grad_norm": 0.14752452075481415, "learning_rate": 2.9906720880172204e-05, "loss": 0.017, "step": 19329 }, { "epoch": 2.7091800981079186, "grad_norm": 0.4122331738471985, "learning_rate": 2.9892370246352543e-05, "loss": 0.0351, "step": 19330 }, { "epoch": 2.7093202522775055, "grad_norm": 0.2442162185907364, "learning_rate": 2.9878019612532885e-05, "loss": 0.0484, "step": 19331 }, { "epoch": 2.709460406447092, "grad_norm": 0.20503857731819153, "learning_rate": 2.986366897871322e-05, "loss": 0.0168, "step": 19332 }, { "epoch": 2.7096005606166784, "grad_norm": 0.1015247330069542, "learning_rate": 2.9849318344893562e-05, "loss": 0.0238, "step": 19333 }, { "epoch": 2.709740714786265, "grad_norm": 0.062133099883794785, "learning_rate": 2.9834967711073905e-05, "loss": 0.0032, "step": 19334 }, { "epoch": 2.7098808689558513, "grad_norm": 0.39330267906188965, "learning_rate": 2.9820617077254243e-05, "loss": 0.0164, "step": 19335 }, { "epoch": 2.710021023125438, "grad_norm": 0.0634106993675232, "learning_rate": 2.9806266443434585e-05, "loss": 0.0027, "step": 19336 }, { "epoch": 2.7101611772950243, "grad_norm": 0.3063826262950897, "learning_rate": 2.979191580961492e-05, "loss": 0.1063, "step": 19337 }, { "epoch": 2.710301331464611, "grad_norm": 0.18283236026763916, "learning_rate": 2.9777565175795263e-05, "loss": 0.0339, "step": 19338 }, { "epoch": 2.7104414856341976, "grad_norm": 0.1700955033302307, "learning_rate": 2.97632145419756e-05, "loss": 0.0223, "step": 19339 }, { "epoch": 2.710581639803784, "grad_norm": 0.5060061812400818, "learning_rate": 2.9748863908155944e-05, "loss": 0.018, "step": 19340 }, { "epoch": 2.7107217939733705, "grad_norm": 0.17611917853355408, "learning_rate": 2.973451327433628e-05, "loss": 0.02, "step": 19341 }, { "epoch": 2.7108619481429574, "grad_norm": 0.2941989004611969, "learning_rate": 2.972016264051662e-05, "loss": 0.0182, "step": 19342 }, { "epoch": 2.711002102312544, "grad_norm": 0.26543599367141724, "learning_rate": 2.970581200669696e-05, "loss": 0.0341, "step": 19343 }, { "epoch": 2.7111422564821304, "grad_norm": 0.39066413044929504, "learning_rate": 2.9691461372877302e-05, "loss": 0.0108, "step": 19344 }, { "epoch": 2.711282410651717, "grad_norm": 0.14799782633781433, "learning_rate": 2.9677110739057637e-05, "loss": 0.0062, "step": 19345 }, { "epoch": 2.7114225648213033, "grad_norm": 0.38191646337509155, "learning_rate": 2.966276010523798e-05, "loss": 0.0157, "step": 19346 }, { "epoch": 2.7115627189908897, "grad_norm": 0.02129509672522545, "learning_rate": 2.9648409471418318e-05, "loss": 0.0011, "step": 19347 }, { "epoch": 2.7117028731604766, "grad_norm": 0.04881303384900093, "learning_rate": 2.963405883759866e-05, "loss": 0.0207, "step": 19348 }, { "epoch": 2.711843027330063, "grad_norm": 0.10351043939590454, "learning_rate": 2.9619708203778996e-05, "loss": 0.009, "step": 19349 }, { "epoch": 2.7119831814996496, "grad_norm": 0.34045377373695374, "learning_rate": 2.9605357569959338e-05, "loss": 0.0208, "step": 19350 }, { "epoch": 2.712123335669236, "grad_norm": 0.11457183957099915, "learning_rate": 2.9591006936139676e-05, "loss": 0.0192, "step": 19351 }, { "epoch": 2.712263489838823, "grad_norm": 0.33656391501426697, "learning_rate": 2.957665630232002e-05, "loss": 0.0303, "step": 19352 }, { "epoch": 2.7124036440084094, "grad_norm": 1.1371277570724487, "learning_rate": 2.9562305668500354e-05, "loss": 0.0078, "step": 19353 }, { "epoch": 2.712543798177996, "grad_norm": 0.6659761071205139, "learning_rate": 2.9547955034680696e-05, "loss": 0.1001, "step": 19354 }, { "epoch": 2.7126839523475823, "grad_norm": 0.1783275604248047, "learning_rate": 2.9533604400861035e-05, "loss": 0.0468, "step": 19355 }, { "epoch": 2.7128241065171688, "grad_norm": 0.4399384558200836, "learning_rate": 2.9519253767041377e-05, "loss": 0.0187, "step": 19356 }, { "epoch": 2.7129642606867552, "grad_norm": 0.525000274181366, "learning_rate": 2.9504903133221712e-05, "loss": 0.0304, "step": 19357 }, { "epoch": 2.713104414856342, "grad_norm": 1.0218911170959473, "learning_rate": 2.9490552499402054e-05, "loss": 0.0432, "step": 19358 }, { "epoch": 2.7132445690259286, "grad_norm": 0.08774565905332565, "learning_rate": 2.9476201865582393e-05, "loss": 0.0138, "step": 19359 }, { "epoch": 2.713384723195515, "grad_norm": 0.10079289972782135, "learning_rate": 2.9461851231762735e-05, "loss": 0.0064, "step": 19360 }, { "epoch": 2.7135248773651015, "grad_norm": 0.036775000393390656, "learning_rate": 2.944750059794307e-05, "loss": 0.0026, "step": 19361 }, { "epoch": 2.7136650315346884, "grad_norm": 0.3501421809196472, "learning_rate": 2.9433149964123413e-05, "loss": 0.0465, "step": 19362 }, { "epoch": 2.713805185704275, "grad_norm": 0.43776899576187134, "learning_rate": 2.941879933030375e-05, "loss": 0.0389, "step": 19363 }, { "epoch": 2.7139453398738613, "grad_norm": 0.3469582796096802, "learning_rate": 2.9404448696484093e-05, "loss": 0.0692, "step": 19364 }, { "epoch": 2.714085494043448, "grad_norm": 0.0735086128115654, "learning_rate": 2.939009806266443e-05, "loss": 0.0021, "step": 19365 }, { "epoch": 2.7142256482130342, "grad_norm": 0.9639247059822083, "learning_rate": 2.937574742884477e-05, "loss": 0.0616, "step": 19366 }, { "epoch": 2.7143658023826207, "grad_norm": 0.43536242842674255, "learning_rate": 2.936139679502511e-05, "loss": 0.0122, "step": 19367 }, { "epoch": 2.714505956552207, "grad_norm": 1.8804329633712769, "learning_rate": 2.934704616120545e-05, "loss": 0.0964, "step": 19368 }, { "epoch": 2.714646110721794, "grad_norm": 1.5284156799316406, "learning_rate": 2.9332695527385787e-05, "loss": 0.1079, "step": 19369 }, { "epoch": 2.7147862648913805, "grad_norm": 0.1590013951063156, "learning_rate": 2.931834489356613e-05, "loss": 0.0052, "step": 19370 }, { "epoch": 2.714926419060967, "grad_norm": 0.06793346256017685, "learning_rate": 2.930399425974647e-05, "loss": 0.0054, "step": 19371 }, { "epoch": 2.7150665732305534, "grad_norm": 0.3141545355319977, "learning_rate": 2.928964362592681e-05, "loss": 0.0726, "step": 19372 }, { "epoch": 2.7152067274001404, "grad_norm": 0.17232657968997955, "learning_rate": 2.9275292992107152e-05, "loss": 0.0149, "step": 19373 }, { "epoch": 2.715346881569727, "grad_norm": 0.2903800904750824, "learning_rate": 2.9260942358287487e-05, "loss": 0.0317, "step": 19374 }, { "epoch": 2.7154870357393133, "grad_norm": 0.16670174896717072, "learning_rate": 2.924659172446783e-05, "loss": 0.0294, "step": 19375 }, { "epoch": 2.7156271899088997, "grad_norm": 0.10735027492046356, "learning_rate": 2.9232241090648168e-05, "loss": 0.0103, "step": 19376 }, { "epoch": 2.715767344078486, "grad_norm": 0.1174519807100296, "learning_rate": 2.921789045682851e-05, "loss": 0.0133, "step": 19377 }, { "epoch": 2.7159074982480726, "grad_norm": 0.13293814659118652, "learning_rate": 2.9203539823008846e-05, "loss": 0.0106, "step": 19378 }, { "epoch": 2.7160476524176596, "grad_norm": 0.3962964415550232, "learning_rate": 2.9189189189189188e-05, "loss": 0.0211, "step": 19379 }, { "epoch": 2.716187806587246, "grad_norm": 0.3265494108200073, "learning_rate": 2.9174838555369527e-05, "loss": 0.0294, "step": 19380 }, { "epoch": 2.7163279607568325, "grad_norm": 0.15156802535057068, "learning_rate": 2.916048792154987e-05, "loss": 0.0295, "step": 19381 }, { "epoch": 2.716468114926419, "grad_norm": 0.1435842663049698, "learning_rate": 2.9146137287730204e-05, "loss": 0.015, "step": 19382 }, { "epoch": 2.716608269096006, "grad_norm": 0.15818379819393158, "learning_rate": 2.9131786653910546e-05, "loss": 0.012, "step": 19383 }, { "epoch": 2.7167484232655923, "grad_norm": 0.16083475947380066, "learning_rate": 2.9117436020090885e-05, "loss": 0.0409, "step": 19384 }, { "epoch": 2.7168885774351788, "grad_norm": 0.309542715549469, "learning_rate": 2.9103085386271227e-05, "loss": 0.0185, "step": 19385 }, { "epoch": 2.717028731604765, "grad_norm": 0.07071244716644287, "learning_rate": 2.9088734752451562e-05, "loss": 0.0142, "step": 19386 }, { "epoch": 2.7171688857743517, "grad_norm": 0.23412373661994934, "learning_rate": 2.9074384118631904e-05, "loss": 0.0458, "step": 19387 }, { "epoch": 2.717309039943938, "grad_norm": 0.38587841391563416, "learning_rate": 2.9060033484812243e-05, "loss": 0.0154, "step": 19388 }, { "epoch": 2.717449194113525, "grad_norm": 0.6501622796058655, "learning_rate": 2.9045682850992585e-05, "loss": 0.0994, "step": 19389 }, { "epoch": 2.7175893482831115, "grad_norm": 0.9390354752540588, "learning_rate": 2.903133221717292e-05, "loss": 0.0542, "step": 19390 }, { "epoch": 2.717729502452698, "grad_norm": 0.14710909128189087, "learning_rate": 2.9016981583353263e-05, "loss": 0.0295, "step": 19391 }, { "epoch": 2.7178696566222844, "grad_norm": 0.25121939182281494, "learning_rate": 2.90026309495336e-05, "loss": 0.0239, "step": 19392 }, { "epoch": 2.7180098107918713, "grad_norm": 0.20317435264587402, "learning_rate": 2.8988280315713944e-05, "loss": 0.0301, "step": 19393 }, { "epoch": 2.7181499649614578, "grad_norm": 0.4621746242046356, "learning_rate": 2.897392968189428e-05, "loss": 0.1095, "step": 19394 }, { "epoch": 2.7182901191310442, "grad_norm": 0.18017859756946564, "learning_rate": 2.895957904807462e-05, "loss": 0.0306, "step": 19395 }, { "epoch": 2.7184302733006307, "grad_norm": 0.11911261081695557, "learning_rate": 2.894522841425496e-05, "loss": 0.0195, "step": 19396 }, { "epoch": 2.718570427470217, "grad_norm": 0.14756834506988525, "learning_rate": 2.8930877780435302e-05, "loss": 0.0186, "step": 19397 }, { "epoch": 2.7187105816398036, "grad_norm": 0.13270531594753265, "learning_rate": 2.8916527146615637e-05, "loss": 0.0104, "step": 19398 }, { "epoch": 2.71885073580939, "grad_norm": 0.1459481418132782, "learning_rate": 2.890217651279598e-05, "loss": 0.0469, "step": 19399 }, { "epoch": 2.718990889978977, "grad_norm": 0.12475838512182236, "learning_rate": 2.8887825878976318e-05, "loss": 0.0101, "step": 19400 }, { "epoch": 2.7191310441485634, "grad_norm": 0.3245266377925873, "learning_rate": 2.887347524515666e-05, "loss": 0.0066, "step": 19401 }, { "epoch": 2.71927119831815, "grad_norm": 0.20143072307109833, "learning_rate": 2.8859124611336995e-05, "loss": 0.0232, "step": 19402 }, { "epoch": 2.7194113524877364, "grad_norm": 0.36088934540748596, "learning_rate": 2.8844773977517338e-05, "loss": 0.039, "step": 19403 }, { "epoch": 2.7195515066573233, "grad_norm": 0.29269424080848694, "learning_rate": 2.8830423343697676e-05, "loss": 0.0201, "step": 19404 }, { "epoch": 2.7196916608269097, "grad_norm": 0.12134575843811035, "learning_rate": 2.881607270987802e-05, "loss": 0.0258, "step": 19405 }, { "epoch": 2.719831814996496, "grad_norm": 0.039690855890512466, "learning_rate": 2.8801722076058354e-05, "loss": 0.0021, "step": 19406 }, { "epoch": 2.7199719691660826, "grad_norm": 0.4401889443397522, "learning_rate": 2.8787371442238696e-05, "loss": 0.0412, "step": 19407 }, { "epoch": 2.720112123335669, "grad_norm": 0.2721652686595917, "learning_rate": 2.8773020808419038e-05, "loss": 0.0287, "step": 19408 }, { "epoch": 2.7202522775052556, "grad_norm": 0.06766214966773987, "learning_rate": 2.8758670174599377e-05, "loss": 0.0057, "step": 19409 }, { "epoch": 2.7203924316748425, "grad_norm": 0.09282040596008301, "learning_rate": 2.874431954077972e-05, "loss": 0.0104, "step": 19410 }, { "epoch": 2.720532585844429, "grad_norm": 0.138297900557518, "learning_rate": 2.8729968906960054e-05, "loss": 0.0057, "step": 19411 }, { "epoch": 2.7206727400140154, "grad_norm": 0.6147881746292114, "learning_rate": 2.8715618273140396e-05, "loss": 0.0248, "step": 19412 }, { "epoch": 2.720812894183602, "grad_norm": 0.5212564468383789, "learning_rate": 2.8701267639320735e-05, "loss": 0.0346, "step": 19413 }, { "epoch": 2.7209530483531887, "grad_norm": 0.2303069680929184, "learning_rate": 2.8686917005501077e-05, "loss": 0.0304, "step": 19414 }, { "epoch": 2.721093202522775, "grad_norm": 0.17975178360939026, "learning_rate": 2.8672566371681412e-05, "loss": 0.0126, "step": 19415 }, { "epoch": 2.7212333566923617, "grad_norm": 0.08367469906806946, "learning_rate": 2.8658215737861755e-05, "loss": 0.0195, "step": 19416 }, { "epoch": 2.721373510861948, "grad_norm": 0.14639873802661896, "learning_rate": 2.8643865104042093e-05, "loss": 0.0135, "step": 19417 }, { "epoch": 2.7215136650315346, "grad_norm": 0.4908272325992584, "learning_rate": 2.8629514470222435e-05, "loss": 0.1071, "step": 19418 }, { "epoch": 2.721653819201121, "grad_norm": 1.3779081106185913, "learning_rate": 2.861516383640277e-05, "loss": 0.147, "step": 19419 }, { "epoch": 2.721793973370708, "grad_norm": 0.8610438704490662, "learning_rate": 2.8600813202583113e-05, "loss": 0.0349, "step": 19420 }, { "epoch": 2.7219341275402944, "grad_norm": 0.03710014745593071, "learning_rate": 2.858646256876345e-05, "loss": 0.0034, "step": 19421 }, { "epoch": 2.722074281709881, "grad_norm": 0.20834094285964966, "learning_rate": 2.8572111934943794e-05, "loss": 0.0125, "step": 19422 }, { "epoch": 2.7222144358794673, "grad_norm": 0.23193572461605072, "learning_rate": 2.855776130112413e-05, "loss": 0.0425, "step": 19423 }, { "epoch": 2.7223545900490542, "grad_norm": 0.11113123595714569, "learning_rate": 2.854341066730447e-05, "loss": 0.0143, "step": 19424 }, { "epoch": 2.7224947442186407, "grad_norm": 0.17334316670894623, "learning_rate": 2.852906003348481e-05, "loss": 0.0234, "step": 19425 }, { "epoch": 2.722634898388227, "grad_norm": 0.19656060636043549, "learning_rate": 2.8514709399665152e-05, "loss": 0.0195, "step": 19426 }, { "epoch": 2.7227750525578136, "grad_norm": 0.06525545567274094, "learning_rate": 2.8500358765845487e-05, "loss": 0.0035, "step": 19427 }, { "epoch": 2.7229152067274, "grad_norm": 0.38396257162094116, "learning_rate": 2.848600813202583e-05, "loss": 0.0428, "step": 19428 }, { "epoch": 2.7230553608969865, "grad_norm": 0.05099687725305557, "learning_rate": 2.8471657498206168e-05, "loss": 0.006, "step": 19429 }, { "epoch": 2.723195515066573, "grad_norm": 0.42549929022789, "learning_rate": 2.845730686438651e-05, "loss": 0.044, "step": 19430 }, { "epoch": 2.72333566923616, "grad_norm": 0.1808493584394455, "learning_rate": 2.8442956230566846e-05, "loss": 0.0393, "step": 19431 }, { "epoch": 2.7234758234057463, "grad_norm": 0.4539145231246948, "learning_rate": 2.8428605596747188e-05, "loss": 0.0637, "step": 19432 }, { "epoch": 2.723615977575333, "grad_norm": 0.8421884775161743, "learning_rate": 2.8414254962927526e-05, "loss": 0.0475, "step": 19433 }, { "epoch": 2.7237561317449193, "grad_norm": 0.09060514718294144, "learning_rate": 2.839990432910787e-05, "loss": 0.0052, "step": 19434 }, { "epoch": 2.723896285914506, "grad_norm": 0.07637181133031845, "learning_rate": 2.8385553695288204e-05, "loss": 0.0041, "step": 19435 }, { "epoch": 2.7240364400840926, "grad_norm": 0.3477778434753418, "learning_rate": 2.8371203061468546e-05, "loss": 0.0473, "step": 19436 }, { "epoch": 2.724176594253679, "grad_norm": 0.20549605786800385, "learning_rate": 2.8356852427648885e-05, "loss": 0.0164, "step": 19437 }, { "epoch": 2.7243167484232655, "grad_norm": 0.6651625633239746, "learning_rate": 2.8342501793829227e-05, "loss": 0.0511, "step": 19438 }, { "epoch": 2.724456902592852, "grad_norm": 0.09272904694080353, "learning_rate": 2.8328151160009562e-05, "loss": 0.0039, "step": 19439 }, { "epoch": 2.7245970567624385, "grad_norm": 0.6257189512252808, "learning_rate": 2.8313800526189904e-05, "loss": 0.0117, "step": 19440 }, { "epoch": 2.7247372109320254, "grad_norm": 0.11424887180328369, "learning_rate": 2.8299449892370243e-05, "loss": 0.0105, "step": 19441 }, { "epoch": 2.724877365101612, "grad_norm": 0.38900700211524963, "learning_rate": 2.8285099258550585e-05, "loss": 0.0264, "step": 19442 }, { "epoch": 2.7250175192711983, "grad_norm": 0.21545597910881042, "learning_rate": 2.827074862473092e-05, "loss": 0.0491, "step": 19443 }, { "epoch": 2.7251576734407847, "grad_norm": 0.45535576343536377, "learning_rate": 2.8256397990911263e-05, "loss": 0.0425, "step": 19444 }, { "epoch": 2.7252978276103716, "grad_norm": 0.21709613502025604, "learning_rate": 2.82420473570916e-05, "loss": 0.0213, "step": 19445 }, { "epoch": 2.725437981779958, "grad_norm": 0.14636696875095367, "learning_rate": 2.8227696723271943e-05, "loss": 0.0122, "step": 19446 }, { "epoch": 2.7255781359495446, "grad_norm": 0.42629554867744446, "learning_rate": 2.8213346089452282e-05, "loss": 0.0853, "step": 19447 }, { "epoch": 2.725718290119131, "grad_norm": 0.12266921252012253, "learning_rate": 2.819899545563262e-05, "loss": 0.006, "step": 19448 }, { "epoch": 2.7258584442887175, "grad_norm": 0.42401838302612305, "learning_rate": 2.8184644821812963e-05, "loss": 0.0406, "step": 19449 }, { "epoch": 2.725998598458304, "grad_norm": 0.23997937142848969, "learning_rate": 2.81702941879933e-05, "loss": 0.062, "step": 19450 }, { "epoch": 2.7261387526278904, "grad_norm": 0.10570783168077469, "learning_rate": 2.815594355417364e-05, "loss": 0.0276, "step": 19451 }, { "epoch": 2.7262789067974773, "grad_norm": 0.11220171302556992, "learning_rate": 2.814159292035398e-05, "loss": 0.0216, "step": 19452 }, { "epoch": 2.7264190609670638, "grad_norm": 0.31235256791114807, "learning_rate": 2.812724228653432e-05, "loss": 0.0213, "step": 19453 }, { "epoch": 2.7265592151366502, "grad_norm": 0.24962976574897766, "learning_rate": 2.811289165271466e-05, "loss": 0.0076, "step": 19454 }, { "epoch": 2.726699369306237, "grad_norm": 0.18489417433738708, "learning_rate": 2.8098541018895e-05, "loss": 0.0116, "step": 19455 }, { "epoch": 2.7268395234758236, "grad_norm": 0.4950798451900482, "learning_rate": 2.8084190385075337e-05, "loss": 0.0215, "step": 19456 }, { "epoch": 2.72697967764541, "grad_norm": 0.21324247121810913, "learning_rate": 2.806983975125568e-05, "loss": 0.0071, "step": 19457 }, { "epoch": 2.7271198318149965, "grad_norm": 0.18768100440502167, "learning_rate": 2.8055489117436018e-05, "loss": 0.018, "step": 19458 }, { "epoch": 2.727259985984583, "grad_norm": 0.25995275378227234, "learning_rate": 2.8041138483616357e-05, "loss": 0.0169, "step": 19459 }, { "epoch": 2.7274001401541694, "grad_norm": 0.13752292096614838, "learning_rate": 2.8026787849796696e-05, "loss": 0.0114, "step": 19460 }, { "epoch": 2.727540294323756, "grad_norm": 0.11174220591783524, "learning_rate": 2.8012437215977038e-05, "loss": 0.0064, "step": 19461 }, { "epoch": 2.727680448493343, "grad_norm": 0.13980048894882202, "learning_rate": 2.7998086582157376e-05, "loss": 0.0047, "step": 19462 }, { "epoch": 2.7278206026629293, "grad_norm": 0.7803621888160706, "learning_rate": 2.7983735948337715e-05, "loss": 0.053, "step": 19463 }, { "epoch": 2.7279607568325157, "grad_norm": 0.12612923979759216, "learning_rate": 2.7969385314518054e-05, "loss": 0.0048, "step": 19464 }, { "epoch": 2.728100911002102, "grad_norm": 0.08793405443429947, "learning_rate": 2.7955034680698396e-05, "loss": 0.0038, "step": 19465 }, { "epoch": 2.728241065171689, "grad_norm": 0.37049227952957153, "learning_rate": 2.7940684046878735e-05, "loss": 0.047, "step": 19466 }, { "epoch": 2.7283812193412755, "grad_norm": 0.162235289812088, "learning_rate": 2.7926333413059074e-05, "loss": 0.006, "step": 19467 }, { "epoch": 2.728521373510862, "grad_norm": 0.33643218874931335, "learning_rate": 2.7911982779239412e-05, "loss": 0.0157, "step": 19468 }, { "epoch": 2.7286615276804485, "grad_norm": 0.6997774839401245, "learning_rate": 2.7897632145419754e-05, "loss": 0.0382, "step": 19469 }, { "epoch": 2.728801681850035, "grad_norm": 0.3014971613883972, "learning_rate": 2.7883281511600093e-05, "loss": 0.0365, "step": 19470 }, { "epoch": 2.7289418360196214, "grad_norm": 0.1825840175151825, "learning_rate": 2.7868930877780435e-05, "loss": 0.0711, "step": 19471 }, { "epoch": 2.7290819901892083, "grad_norm": 0.21778908371925354, "learning_rate": 2.785458024396077e-05, "loss": 0.0427, "step": 19472 }, { "epoch": 2.7292221443587947, "grad_norm": 0.03799497336149216, "learning_rate": 2.7840229610141113e-05, "loss": 0.002, "step": 19473 }, { "epoch": 2.729362298528381, "grad_norm": 0.4807928502559662, "learning_rate": 2.782587897632145e-05, "loss": 0.0521, "step": 19474 }, { "epoch": 2.7295024526979677, "grad_norm": 0.12992073595523834, "learning_rate": 2.7811528342501793e-05, "loss": 0.009, "step": 19475 }, { "epoch": 2.7296426068675546, "grad_norm": 0.06080075725913048, "learning_rate": 2.779717770868213e-05, "loss": 0.0053, "step": 19476 }, { "epoch": 2.729782761037141, "grad_norm": 0.28789106011390686, "learning_rate": 2.778282707486247e-05, "loss": 0.0379, "step": 19477 }, { "epoch": 2.7299229152067275, "grad_norm": 0.15006516873836517, "learning_rate": 2.776847644104281e-05, "loss": 0.022, "step": 19478 }, { "epoch": 2.730063069376314, "grad_norm": 0.30781152844429016, "learning_rate": 2.7754125807223152e-05, "loss": 0.0378, "step": 19479 }, { "epoch": 2.7302032235459004, "grad_norm": 0.14483267068862915, "learning_rate": 2.7739775173403487e-05, "loss": 0.0376, "step": 19480 }, { "epoch": 2.730343377715487, "grad_norm": 0.19391140341758728, "learning_rate": 2.772542453958383e-05, "loss": 0.0471, "step": 19481 }, { "epoch": 2.7304835318850733, "grad_norm": 0.25918322801589966, "learning_rate": 2.7711073905764168e-05, "loss": 0.0124, "step": 19482 }, { "epoch": 2.73062368605466, "grad_norm": 0.1158665269613266, "learning_rate": 2.769672327194451e-05, "loss": 0.0037, "step": 19483 }, { "epoch": 2.7307638402242467, "grad_norm": 0.22808733582496643, "learning_rate": 2.768237263812485e-05, "loss": 0.0263, "step": 19484 }, { "epoch": 2.730903994393833, "grad_norm": 0.11032707989215851, "learning_rate": 2.7668022004305187e-05, "loss": 0.005, "step": 19485 }, { "epoch": 2.73104414856342, "grad_norm": 0.12492655962705612, "learning_rate": 2.765367137048553e-05, "loss": 0.0191, "step": 19486 }, { "epoch": 2.7311843027330065, "grad_norm": 0.31816771626472473, "learning_rate": 2.763932073666587e-05, "loss": 0.036, "step": 19487 }, { "epoch": 2.731324456902593, "grad_norm": 0.12014422565698624, "learning_rate": 2.7624970102846207e-05, "loss": 0.0103, "step": 19488 }, { "epoch": 2.7314646110721794, "grad_norm": 0.45320242643356323, "learning_rate": 2.7610619469026546e-05, "loss": 0.0326, "step": 19489 }, { "epoch": 2.731604765241766, "grad_norm": 0.6622133851051331, "learning_rate": 2.7596268835206888e-05, "loss": 0.0164, "step": 19490 }, { "epoch": 2.7317449194113523, "grad_norm": 0.08448745310306549, "learning_rate": 2.7581918201387227e-05, "loss": 0.0118, "step": 19491 }, { "epoch": 2.731885073580939, "grad_norm": 0.310040146112442, "learning_rate": 2.7567567567567565e-05, "loss": 0.0263, "step": 19492 }, { "epoch": 2.7320252277505257, "grad_norm": 0.4742746651172638, "learning_rate": 2.7553216933747904e-05, "loss": 0.053, "step": 19493 }, { "epoch": 2.732165381920112, "grad_norm": 0.21682065725326538, "learning_rate": 2.7538866299928246e-05, "loss": 0.0373, "step": 19494 }, { "epoch": 2.7323055360896986, "grad_norm": 0.09455765783786774, "learning_rate": 2.7524515666108585e-05, "loss": 0.0059, "step": 19495 }, { "epoch": 2.732445690259285, "grad_norm": 0.3978339731693268, "learning_rate": 2.7510165032288924e-05, "loss": 0.0318, "step": 19496 }, { "epoch": 2.732585844428872, "grad_norm": 0.10412567853927612, "learning_rate": 2.7495814398469262e-05, "loss": 0.0036, "step": 19497 }, { "epoch": 2.7327259985984584, "grad_norm": 0.8110059499740601, "learning_rate": 2.7481463764649604e-05, "loss": 0.0743, "step": 19498 }, { "epoch": 2.732866152768045, "grad_norm": 0.16204354166984558, "learning_rate": 2.7467113130829943e-05, "loss": 0.0188, "step": 19499 }, { "epoch": 2.7330063069376314, "grad_norm": 0.2942333519458771, "learning_rate": 2.7452762497010282e-05, "loss": 0.045, "step": 19500 }, { "epoch": 2.733146461107218, "grad_norm": 0.18157266080379486, "learning_rate": 2.743841186319062e-05, "loss": 0.0182, "step": 19501 }, { "epoch": 2.7332866152768043, "grad_norm": 0.3237326145172119, "learning_rate": 2.7424061229370963e-05, "loss": 0.0108, "step": 19502 }, { "epoch": 2.733426769446391, "grad_norm": 0.44361165165901184, "learning_rate": 2.74097105955513e-05, "loss": 0.027, "step": 19503 }, { "epoch": 2.7335669236159776, "grad_norm": 0.10324309766292572, "learning_rate": 2.739535996173164e-05, "loss": 0.0255, "step": 19504 }, { "epoch": 2.733707077785564, "grad_norm": 0.11034010350704193, "learning_rate": 2.738100932791198e-05, "loss": 0.0128, "step": 19505 }, { "epoch": 2.7338472319551506, "grad_norm": 0.20787382125854492, "learning_rate": 2.736665869409232e-05, "loss": 0.0386, "step": 19506 }, { "epoch": 2.7339873861247375, "grad_norm": 0.3326590955257416, "learning_rate": 2.735230806027266e-05, "loss": 0.0183, "step": 19507 }, { "epoch": 2.734127540294324, "grad_norm": 0.10108204931020737, "learning_rate": 2.7337957426453e-05, "loss": 0.0105, "step": 19508 }, { "epoch": 2.7342676944639104, "grad_norm": 0.5957496762275696, "learning_rate": 2.7323606792633337e-05, "loss": 0.0349, "step": 19509 }, { "epoch": 2.734407848633497, "grad_norm": 0.5165349245071411, "learning_rate": 2.730925615881368e-05, "loss": 0.0333, "step": 19510 }, { "epoch": 2.7345480028030833, "grad_norm": 0.13919594883918762, "learning_rate": 2.7294905524994018e-05, "loss": 0.0181, "step": 19511 }, { "epoch": 2.7346881569726698, "grad_norm": 0.1279531866312027, "learning_rate": 2.7280554891174357e-05, "loss": 0.0053, "step": 19512 }, { "epoch": 2.734828311142256, "grad_norm": 0.14738792181015015, "learning_rate": 2.7266204257354695e-05, "loss": 0.0057, "step": 19513 }, { "epoch": 2.734968465311843, "grad_norm": 0.25338512659072876, "learning_rate": 2.7251853623535038e-05, "loss": 0.0196, "step": 19514 }, { "epoch": 2.7351086194814296, "grad_norm": 0.8143048286437988, "learning_rate": 2.7237502989715376e-05, "loss": 0.0313, "step": 19515 }, { "epoch": 2.735248773651016, "grad_norm": 0.21566462516784668, "learning_rate": 2.7223152355895715e-05, "loss": 0.0226, "step": 19516 }, { "epoch": 2.7353889278206025, "grad_norm": 0.2890811562538147, "learning_rate": 2.7208801722076054e-05, "loss": 0.0164, "step": 19517 }, { "epoch": 2.7355290819901894, "grad_norm": 0.29207444190979004, "learning_rate": 2.7194451088256396e-05, "loss": 0.0065, "step": 19518 }, { "epoch": 2.735669236159776, "grad_norm": 0.029932493343949318, "learning_rate": 2.7180100454436735e-05, "loss": 0.002, "step": 19519 }, { "epoch": 2.7358093903293623, "grad_norm": 0.5298171639442444, "learning_rate": 2.7165749820617073e-05, "loss": 0.0271, "step": 19520 }, { "epoch": 2.735949544498949, "grad_norm": 0.3158225417137146, "learning_rate": 2.7151399186797415e-05, "loss": 0.0733, "step": 19521 }, { "epoch": 2.7360896986685352, "grad_norm": 0.10514859855175018, "learning_rate": 2.7137048552977754e-05, "loss": 0.033, "step": 19522 }, { "epoch": 2.7362298528381217, "grad_norm": 0.048539139330387115, "learning_rate": 2.7122697919158096e-05, "loss": 0.0043, "step": 19523 }, { "epoch": 2.7363700070077086, "grad_norm": 0.07124984264373779, "learning_rate": 2.710834728533843e-05, "loss": 0.003, "step": 19524 }, { "epoch": 2.736510161177295, "grad_norm": 0.25148114562034607, "learning_rate": 2.7093996651518774e-05, "loss": 0.0235, "step": 19525 }, { "epoch": 2.7366503153468815, "grad_norm": 0.1240663155913353, "learning_rate": 2.7079646017699112e-05, "loss": 0.013, "step": 19526 }, { "epoch": 2.736790469516468, "grad_norm": 0.2573774755001068, "learning_rate": 2.7065295383879455e-05, "loss": 0.0109, "step": 19527 }, { "epoch": 2.736930623686055, "grad_norm": 0.20868085324764252, "learning_rate": 2.705094475005979e-05, "loss": 0.0175, "step": 19528 }, { "epoch": 2.7370707778556413, "grad_norm": 0.21531789004802704, "learning_rate": 2.7036594116240132e-05, "loss": 0.0249, "step": 19529 }, { "epoch": 2.737210932025228, "grad_norm": 0.13642530143260956, "learning_rate": 2.702224348242047e-05, "loss": 0.0083, "step": 19530 }, { "epoch": 2.7373510861948143, "grad_norm": 0.41284242272377014, "learning_rate": 2.7007892848600813e-05, "loss": 0.0351, "step": 19531 }, { "epoch": 2.7374912403644007, "grad_norm": 0.22113879024982452, "learning_rate": 2.6993542214781148e-05, "loss": 0.0303, "step": 19532 }, { "epoch": 2.737631394533987, "grad_norm": 0.11116727441549301, "learning_rate": 2.697919158096149e-05, "loss": 0.0159, "step": 19533 }, { "epoch": 2.737771548703574, "grad_norm": 0.5387252569198608, "learning_rate": 2.696484094714183e-05, "loss": 0.0781, "step": 19534 }, { "epoch": 2.7379117028731605, "grad_norm": 0.371222585439682, "learning_rate": 2.695049031332217e-05, "loss": 0.0074, "step": 19535 }, { "epoch": 2.738051857042747, "grad_norm": 0.07178059965372086, "learning_rate": 2.6936139679502506e-05, "loss": 0.0063, "step": 19536 }, { "epoch": 2.7381920112123335, "grad_norm": 0.17377156019210815, "learning_rate": 2.692178904568285e-05, "loss": 0.0134, "step": 19537 }, { "epoch": 2.7383321653819204, "grad_norm": 0.38265857100486755, "learning_rate": 2.6907438411863187e-05, "loss": 0.0277, "step": 19538 }, { "epoch": 2.738472319551507, "grad_norm": 0.3630073070526123, "learning_rate": 2.689308777804353e-05, "loss": 0.0433, "step": 19539 }, { "epoch": 2.7386124737210933, "grad_norm": 0.21383531391620636, "learning_rate": 2.6878737144223865e-05, "loss": 0.0236, "step": 19540 }, { "epoch": 2.7387526278906797, "grad_norm": 0.07050150632858276, "learning_rate": 2.6864386510404207e-05, "loss": 0.0222, "step": 19541 }, { "epoch": 2.738892782060266, "grad_norm": 0.09447526186704636, "learning_rate": 2.6850035876584546e-05, "loss": 0.0091, "step": 19542 }, { "epoch": 2.7390329362298527, "grad_norm": 0.2507941424846649, "learning_rate": 2.6835685242764888e-05, "loss": 0.0291, "step": 19543 }, { "epoch": 2.739173090399439, "grad_norm": 0.2393520623445511, "learning_rate": 2.6821334608945223e-05, "loss": 0.0177, "step": 19544 }, { "epoch": 2.739313244569026, "grad_norm": 0.013296181336045265, "learning_rate": 2.6806983975125565e-05, "loss": 0.001, "step": 19545 }, { "epoch": 2.7394533987386125, "grad_norm": 0.7778105139732361, "learning_rate": 2.6792633341305904e-05, "loss": 0.0713, "step": 19546 }, { "epoch": 2.739593552908199, "grad_norm": 0.28636693954467773, "learning_rate": 2.6778282707486246e-05, "loss": 0.0175, "step": 19547 }, { "epoch": 2.7397337070777854, "grad_norm": 0.06413032859563828, "learning_rate": 2.676393207366658e-05, "loss": 0.0094, "step": 19548 }, { "epoch": 2.7398738612473723, "grad_norm": 0.17904096841812134, "learning_rate": 2.6749581439846923e-05, "loss": 0.0315, "step": 19549 }, { "epoch": 2.7400140154169588, "grad_norm": 0.19152651727199554, "learning_rate": 2.6735230806027262e-05, "loss": 0.016, "step": 19550 }, { "epoch": 2.7401541695865452, "grad_norm": 0.2007782757282257, "learning_rate": 2.6720880172207604e-05, "loss": 0.0209, "step": 19551 }, { "epoch": 2.7402943237561317, "grad_norm": 0.1621219962835312, "learning_rate": 2.670652953838794e-05, "loss": 0.0282, "step": 19552 }, { "epoch": 2.740434477925718, "grad_norm": 0.2676963210105896, "learning_rate": 2.6692178904568282e-05, "loss": 0.0234, "step": 19553 }, { "epoch": 2.7405746320953046, "grad_norm": 0.16784681379795074, "learning_rate": 2.667782827074862e-05, "loss": 0.0221, "step": 19554 }, { "epoch": 2.7407147862648915, "grad_norm": 0.12461447715759277, "learning_rate": 2.6663477636928963e-05, "loss": 0.0222, "step": 19555 }, { "epoch": 2.740854940434478, "grad_norm": 0.09514413774013519, "learning_rate": 2.6649127003109298e-05, "loss": 0.0061, "step": 19556 }, { "epoch": 2.7409950946040644, "grad_norm": 0.18024393916130066, "learning_rate": 2.663477636928964e-05, "loss": 0.0421, "step": 19557 }, { "epoch": 2.741135248773651, "grad_norm": 0.1847964972257614, "learning_rate": 2.6620425735469982e-05, "loss": 0.0467, "step": 19558 }, { "epoch": 2.741275402943238, "grad_norm": 0.12194634974002838, "learning_rate": 2.660607510165032e-05, "loss": 0.0069, "step": 19559 }, { "epoch": 2.7414155571128243, "grad_norm": 0.07754049450159073, "learning_rate": 2.6591724467830663e-05, "loss": 0.0153, "step": 19560 }, { "epoch": 2.7415557112824107, "grad_norm": 0.14624209702014923, "learning_rate": 2.6577373834011e-05, "loss": 0.0159, "step": 19561 }, { "epoch": 2.741695865451997, "grad_norm": 0.18295606970787048, "learning_rate": 2.656302320019134e-05, "loss": 0.0112, "step": 19562 }, { "epoch": 2.7418360196215836, "grad_norm": 0.15413790941238403, "learning_rate": 2.654867256637168e-05, "loss": 0.0424, "step": 19563 }, { "epoch": 2.74197617379117, "grad_norm": 0.1537507176399231, "learning_rate": 2.653432193255202e-05, "loss": 0.0227, "step": 19564 }, { "epoch": 2.742116327960757, "grad_norm": 0.1616927981376648, "learning_rate": 2.6519971298732357e-05, "loss": 0.0075, "step": 19565 }, { "epoch": 2.7422564821303435, "grad_norm": 0.3945559859275818, "learning_rate": 2.65056206649127e-05, "loss": 0.0398, "step": 19566 }, { "epoch": 2.74239663629993, "grad_norm": 0.45897382497787476, "learning_rate": 2.6491270031093037e-05, "loss": 0.0563, "step": 19567 }, { "epoch": 2.7425367904695164, "grad_norm": 0.3488963842391968, "learning_rate": 2.647691939727338e-05, "loss": 0.068, "step": 19568 }, { "epoch": 2.7426769446391033, "grad_norm": 0.6251013875007629, "learning_rate": 2.6462568763453715e-05, "loss": 0.0507, "step": 19569 }, { "epoch": 2.7428170988086897, "grad_norm": 4.094577789306641, "learning_rate": 2.6448218129634057e-05, "loss": 0.5689, "step": 19570 }, { "epoch": 2.742957252978276, "grad_norm": 0.027309728786349297, "learning_rate": 2.6433867495814396e-05, "loss": 0.0026, "step": 19571 }, { "epoch": 2.7430974071478627, "grad_norm": 0.1594122350215912, "learning_rate": 2.6419516861994738e-05, "loss": 0.0135, "step": 19572 }, { "epoch": 2.743237561317449, "grad_norm": 0.5485442876815796, "learning_rate": 2.6405166228175073e-05, "loss": 0.0243, "step": 19573 }, { "epoch": 2.7433777154870356, "grad_norm": 0.1151149645447731, "learning_rate": 2.6390815594355415e-05, "loss": 0.0158, "step": 19574 }, { "epoch": 2.743517869656622, "grad_norm": 0.2660016119480133, "learning_rate": 2.6376464960535754e-05, "loss": 0.0292, "step": 19575 }, { "epoch": 2.743658023826209, "grad_norm": 0.08330038189888, "learning_rate": 2.6362114326716096e-05, "loss": 0.0115, "step": 19576 }, { "epoch": 2.7437981779957954, "grad_norm": 0.13475893437862396, "learning_rate": 2.634776369289643e-05, "loss": 0.0287, "step": 19577 }, { "epoch": 2.743938332165382, "grad_norm": 0.3418802320957184, "learning_rate": 2.6333413059076774e-05, "loss": 0.0768, "step": 19578 }, { "epoch": 2.7440784863349683, "grad_norm": 0.30021992325782776, "learning_rate": 2.6319062425257112e-05, "loss": 0.054, "step": 19579 }, { "epoch": 2.744218640504555, "grad_norm": 0.5679717659950256, "learning_rate": 2.6304711791437454e-05, "loss": 0.1236, "step": 19580 }, { "epoch": 2.7443587946741417, "grad_norm": 0.23690758645534515, "learning_rate": 2.629036115761779e-05, "loss": 0.0233, "step": 19581 }, { "epoch": 2.744498948843728, "grad_norm": 0.24494487047195435, "learning_rate": 2.6276010523798132e-05, "loss": 0.0371, "step": 19582 }, { "epoch": 2.7446391030133146, "grad_norm": 0.11789540201425552, "learning_rate": 2.626165988997847e-05, "loss": 0.0199, "step": 19583 }, { "epoch": 2.744779257182901, "grad_norm": 0.06834021955728531, "learning_rate": 2.6247309256158813e-05, "loss": 0.0032, "step": 19584 }, { "epoch": 2.7449194113524875, "grad_norm": 0.26725703477859497, "learning_rate": 2.6232958622339148e-05, "loss": 0.0179, "step": 19585 }, { "epoch": 2.7450595655220744, "grad_norm": 0.01768028549849987, "learning_rate": 2.621860798851949e-05, "loss": 0.0016, "step": 19586 }, { "epoch": 2.745199719691661, "grad_norm": 0.049925513565540314, "learning_rate": 2.620425735469983e-05, "loss": 0.0025, "step": 19587 }, { "epoch": 2.7453398738612473, "grad_norm": 0.2147846668958664, "learning_rate": 2.618990672088017e-05, "loss": 0.0337, "step": 19588 }, { "epoch": 2.745480028030834, "grad_norm": 0.37037205696105957, "learning_rate": 2.6175556087060506e-05, "loss": 0.0367, "step": 19589 }, { "epoch": 2.7456201822004207, "grad_norm": 0.08151174336671829, "learning_rate": 2.616120545324085e-05, "loss": 0.0095, "step": 19590 }, { "epoch": 2.745760336370007, "grad_norm": 0.5276138186454773, "learning_rate": 2.6146854819421187e-05, "loss": 0.0396, "step": 19591 }, { "epoch": 2.7459004905395936, "grad_norm": 0.08152516931295395, "learning_rate": 2.613250418560153e-05, "loss": 0.0076, "step": 19592 }, { "epoch": 2.74604064470918, "grad_norm": 0.052304986864328384, "learning_rate": 2.6118153551781865e-05, "loss": 0.0182, "step": 19593 }, { "epoch": 2.7461807988787665, "grad_norm": 0.18098177015781403, "learning_rate": 2.6103802917962207e-05, "loss": 0.0083, "step": 19594 }, { "epoch": 2.746320953048353, "grad_norm": 0.4572729170322418, "learning_rate": 2.608945228414255e-05, "loss": 0.0622, "step": 19595 }, { "epoch": 2.7464611072179395, "grad_norm": 0.153750479221344, "learning_rate": 2.6075101650322888e-05, "loss": 0.0077, "step": 19596 }, { "epoch": 2.7466012613875264, "grad_norm": 0.19995661079883575, "learning_rate": 2.606075101650323e-05, "loss": 0.0219, "step": 19597 }, { "epoch": 2.746741415557113, "grad_norm": 0.19544510543346405, "learning_rate": 2.6046400382683565e-05, "loss": 0.0139, "step": 19598 }, { "epoch": 2.7468815697266993, "grad_norm": 0.14776603877544403, "learning_rate": 2.6032049748863907e-05, "loss": 0.014, "step": 19599 }, { "epoch": 2.747021723896286, "grad_norm": 0.21596087515354156, "learning_rate": 2.6017699115044246e-05, "loss": 0.0251, "step": 19600 }, { "epoch": 2.7471618780658726, "grad_norm": 0.25250551104545593, "learning_rate": 2.6003348481224588e-05, "loss": 0.0269, "step": 19601 }, { "epoch": 2.747302032235459, "grad_norm": 0.11572522670030594, "learning_rate": 2.5988997847404923e-05, "loss": 0.0182, "step": 19602 }, { "epoch": 2.7474421864050456, "grad_norm": 0.22012758255004883, "learning_rate": 2.5974647213585265e-05, "loss": 0.0333, "step": 19603 }, { "epoch": 2.747582340574632, "grad_norm": 0.23778599500656128, "learning_rate": 2.5960296579765604e-05, "loss": 0.0196, "step": 19604 }, { "epoch": 2.7477224947442185, "grad_norm": 0.11386556923389435, "learning_rate": 2.5945945945945946e-05, "loss": 0.0332, "step": 19605 }, { "epoch": 2.747862648913805, "grad_norm": 0.42700856924057007, "learning_rate": 2.593159531212628e-05, "loss": 0.0526, "step": 19606 }, { "epoch": 2.748002803083392, "grad_norm": 0.18638870120048523, "learning_rate": 2.5917244678306624e-05, "loss": 0.0254, "step": 19607 }, { "epoch": 2.7481429572529783, "grad_norm": 0.1494918018579483, "learning_rate": 2.5902894044486962e-05, "loss": 0.0355, "step": 19608 }, { "epoch": 2.7482831114225648, "grad_norm": 0.20779912173748016, "learning_rate": 2.5888543410667305e-05, "loss": 0.0084, "step": 19609 }, { "epoch": 2.748423265592151, "grad_norm": 0.20620432496070862, "learning_rate": 2.587419277684764e-05, "loss": 0.0187, "step": 19610 }, { "epoch": 2.748563419761738, "grad_norm": 0.43751242756843567, "learning_rate": 2.5859842143027982e-05, "loss": 0.043, "step": 19611 }, { "epoch": 2.7487035739313246, "grad_norm": 0.22197231650352478, "learning_rate": 2.584549150920832e-05, "loss": 0.0135, "step": 19612 }, { "epoch": 2.748843728100911, "grad_norm": 0.26453399658203125, "learning_rate": 2.5831140875388663e-05, "loss": 0.0148, "step": 19613 }, { "epoch": 2.7489838822704975, "grad_norm": 0.2887434661388397, "learning_rate": 2.5816790241568998e-05, "loss": 0.0331, "step": 19614 }, { "epoch": 2.749124036440084, "grad_norm": 0.03805162012577057, "learning_rate": 2.580243960774934e-05, "loss": 0.002, "step": 19615 }, { "epoch": 2.7492641906096704, "grad_norm": 0.010262798517942429, "learning_rate": 2.578808897392968e-05, "loss": 0.001, "step": 19616 }, { "epoch": 2.7494043447792573, "grad_norm": 0.2911091148853302, "learning_rate": 2.577373834011002e-05, "loss": 0.0155, "step": 19617 }, { "epoch": 2.749544498948844, "grad_norm": 0.1620534211397171, "learning_rate": 2.5759387706290356e-05, "loss": 0.0367, "step": 19618 }, { "epoch": 2.7496846531184302, "grad_norm": 1.4123246669769287, "learning_rate": 2.57450370724707e-05, "loss": 0.0814, "step": 19619 }, { "epoch": 2.7498248072880167, "grad_norm": 0.48660263419151306, "learning_rate": 2.5730686438651037e-05, "loss": 0.0499, "step": 19620 }, { "epoch": 2.7499649614576036, "grad_norm": 0.3663142919540405, "learning_rate": 2.571633580483138e-05, "loss": 0.0244, "step": 19621 }, { "epoch": 2.75010511562719, "grad_norm": 0.05115147680044174, "learning_rate": 2.5701985171011715e-05, "loss": 0.0036, "step": 19622 }, { "epoch": 2.7502452697967765, "grad_norm": 0.042254868894815445, "learning_rate": 2.5687634537192057e-05, "loss": 0.0036, "step": 19623 }, { "epoch": 2.750385423966363, "grad_norm": 0.203120157122612, "learning_rate": 2.5673283903372396e-05, "loss": 0.0615, "step": 19624 }, { "epoch": 2.7505255781359494, "grad_norm": 0.37443286180496216, "learning_rate": 2.5658933269552738e-05, "loss": 0.0494, "step": 19625 }, { "epoch": 2.750665732305536, "grad_norm": 0.09348617494106293, "learning_rate": 2.5644582635733073e-05, "loss": 0.0104, "step": 19626 }, { "epoch": 2.7508058864751224, "grad_norm": 0.14447706937789917, "learning_rate": 2.5630232001913415e-05, "loss": 0.0166, "step": 19627 }, { "epoch": 2.7509460406447093, "grad_norm": 0.5320804119110107, "learning_rate": 2.5615881368093754e-05, "loss": 0.0558, "step": 19628 }, { "epoch": 2.7510861948142957, "grad_norm": 0.16138537228107452, "learning_rate": 2.5601530734274096e-05, "loss": 0.0102, "step": 19629 }, { "epoch": 2.751226348983882, "grad_norm": 0.11705881357192993, "learning_rate": 2.558718010045443e-05, "loss": 0.0167, "step": 19630 }, { "epoch": 2.751366503153469, "grad_norm": 0.3916154205799103, "learning_rate": 2.5572829466634773e-05, "loss": 0.0172, "step": 19631 }, { "epoch": 2.7515066573230555, "grad_norm": 0.09149889647960663, "learning_rate": 2.5558478832815116e-05, "loss": 0.0075, "step": 19632 }, { "epoch": 2.751646811492642, "grad_norm": 0.09864673763513565, "learning_rate": 2.5544128198995454e-05, "loss": 0.0088, "step": 19633 }, { "epoch": 2.7517869656622285, "grad_norm": 0.19561338424682617, "learning_rate": 2.5529777565175796e-05, "loss": 0.013, "step": 19634 }, { "epoch": 2.751927119831815, "grad_norm": 0.1205207109451294, "learning_rate": 2.551542693135613e-05, "loss": 0.0184, "step": 19635 }, { "epoch": 2.7520672740014014, "grad_norm": 0.08013292402029037, "learning_rate": 2.5501076297536474e-05, "loss": 0.0057, "step": 19636 }, { "epoch": 2.752207428170988, "grad_norm": 0.06723947077989578, "learning_rate": 2.5486725663716813e-05, "loss": 0.0068, "step": 19637 }, { "epoch": 2.7523475823405747, "grad_norm": 0.13616110384464264, "learning_rate": 2.5472375029897155e-05, "loss": 0.0211, "step": 19638 }, { "epoch": 2.752487736510161, "grad_norm": 0.10861470550298691, "learning_rate": 2.545802439607749e-05, "loss": 0.0091, "step": 19639 }, { "epoch": 2.7526278906797477, "grad_norm": 0.7247406244277954, "learning_rate": 2.5443673762257832e-05, "loss": 0.0639, "step": 19640 }, { "epoch": 2.752768044849334, "grad_norm": 0.12205429375171661, "learning_rate": 2.542932312843817e-05, "loss": 0.0163, "step": 19641 }, { "epoch": 2.752908199018921, "grad_norm": 0.2109237015247345, "learning_rate": 2.5414972494618513e-05, "loss": 0.0303, "step": 19642 }, { "epoch": 2.7530483531885075, "grad_norm": 0.23513998091220856, "learning_rate": 2.5400621860798848e-05, "loss": 0.0118, "step": 19643 }, { "epoch": 2.753188507358094, "grad_norm": 0.07528147101402283, "learning_rate": 2.538627122697919e-05, "loss": 0.0023, "step": 19644 }, { "epoch": 2.7533286615276804, "grad_norm": 0.14794763922691345, "learning_rate": 2.537192059315953e-05, "loss": 0.0041, "step": 19645 }, { "epoch": 2.753468815697267, "grad_norm": 0.2268216907978058, "learning_rate": 2.535756995933987e-05, "loss": 0.0253, "step": 19646 }, { "epoch": 2.7536089698668533, "grad_norm": 0.3463696539402008, "learning_rate": 2.5343219325520207e-05, "loss": 0.0326, "step": 19647 }, { "epoch": 2.7537491240364402, "grad_norm": 0.09093203395605087, "learning_rate": 2.532886869170055e-05, "loss": 0.0234, "step": 19648 }, { "epoch": 2.7538892782060267, "grad_norm": 0.311085045337677, "learning_rate": 2.5314518057880887e-05, "loss": 0.0422, "step": 19649 }, { "epoch": 2.754029432375613, "grad_norm": 0.05444694310426712, "learning_rate": 2.530016742406123e-05, "loss": 0.0051, "step": 19650 }, { "epoch": 2.7541695865451996, "grad_norm": 0.20184659957885742, "learning_rate": 2.5285816790241565e-05, "loss": 0.0106, "step": 19651 }, { "epoch": 2.7543097407147865, "grad_norm": 0.1727100908756256, "learning_rate": 2.5271466156421907e-05, "loss": 0.0161, "step": 19652 }, { "epoch": 2.754449894884373, "grad_norm": 0.22122488915920258, "learning_rate": 2.5257115522602246e-05, "loss": 0.0396, "step": 19653 }, { "epoch": 2.7545900490539594, "grad_norm": 0.22743038833141327, "learning_rate": 2.5242764888782588e-05, "loss": 0.0196, "step": 19654 }, { "epoch": 2.754730203223546, "grad_norm": 0.14631937444210052, "learning_rate": 2.5228414254962923e-05, "loss": 0.0208, "step": 19655 }, { "epoch": 2.7548703573931324, "grad_norm": 0.12212645262479782, "learning_rate": 2.5214063621143265e-05, "loss": 0.0037, "step": 19656 }, { "epoch": 2.755010511562719, "grad_norm": 0.6375158429145813, "learning_rate": 2.5199712987323604e-05, "loss": 0.0292, "step": 19657 }, { "epoch": 2.7551506657323053, "grad_norm": 0.2148681879043579, "learning_rate": 2.5185362353503946e-05, "loss": 0.0342, "step": 19658 }, { "epoch": 2.755290819901892, "grad_norm": 0.6318073868751526, "learning_rate": 2.517101171968428e-05, "loss": 0.0207, "step": 19659 }, { "epoch": 2.7554309740714786, "grad_norm": 0.26880159974098206, "learning_rate": 2.5156661085864624e-05, "loss": 0.0309, "step": 19660 }, { "epoch": 2.755571128241065, "grad_norm": 0.11923572421073914, "learning_rate": 2.5142310452044962e-05, "loss": 0.0184, "step": 19661 }, { "epoch": 2.7557112824106516, "grad_norm": 0.8590844869613647, "learning_rate": 2.5127959818225304e-05, "loss": 0.0572, "step": 19662 }, { "epoch": 2.7558514365802385, "grad_norm": 0.1506556272506714, "learning_rate": 2.511360918440564e-05, "loss": 0.0048, "step": 19663 }, { "epoch": 2.755991590749825, "grad_norm": 0.12805749475955963, "learning_rate": 2.5099258550585982e-05, "loss": 0.0102, "step": 19664 }, { "epoch": 2.7561317449194114, "grad_norm": 0.43791666626930237, "learning_rate": 2.508490791676632e-05, "loss": 0.034, "step": 19665 }, { "epoch": 2.756271899088998, "grad_norm": 1.111950397491455, "learning_rate": 2.5070557282946663e-05, "loss": 0.0515, "step": 19666 }, { "epoch": 2.7564120532585843, "grad_norm": 0.5381291508674622, "learning_rate": 2.5056206649126998e-05, "loss": 0.076, "step": 19667 }, { "epoch": 2.7565522074281708, "grad_norm": 0.3691125810146332, "learning_rate": 2.504185601530734e-05, "loss": 0.0138, "step": 19668 }, { "epoch": 2.7566923615977577, "grad_norm": 0.31792792677879333, "learning_rate": 2.5027505381487682e-05, "loss": 0.022, "step": 19669 }, { "epoch": 2.756832515767344, "grad_norm": 0.9472073912620544, "learning_rate": 2.501315474766802e-05, "loss": 0.0327, "step": 19670 }, { "epoch": 2.7569726699369306, "grad_norm": 0.02514302171766758, "learning_rate": 2.4998804113848363e-05, "loss": 0.0025, "step": 19671 }, { "epoch": 2.757112824106517, "grad_norm": 0.14029261469841003, "learning_rate": 2.49844534800287e-05, "loss": 0.0085, "step": 19672 }, { "epoch": 2.757252978276104, "grad_norm": 0.2055148184299469, "learning_rate": 2.497010284620904e-05, "loss": 0.011, "step": 19673 }, { "epoch": 2.7573931324456904, "grad_norm": 0.40623387694358826, "learning_rate": 2.495575221238938e-05, "loss": 0.0211, "step": 19674 }, { "epoch": 2.757533286615277, "grad_norm": 0.1334078311920166, "learning_rate": 2.494140157856972e-05, "loss": 0.0162, "step": 19675 }, { "epoch": 2.7576734407848633, "grad_norm": 0.21310746669769287, "learning_rate": 2.4927050944750057e-05, "loss": 0.0188, "step": 19676 }, { "epoch": 2.7578135949544498, "grad_norm": 0.21902373433113098, "learning_rate": 2.49127003109304e-05, "loss": 0.0439, "step": 19677 }, { "epoch": 2.7579537491240362, "grad_norm": 0.1510096937417984, "learning_rate": 2.4898349677110738e-05, "loss": 0.0392, "step": 19678 }, { "epoch": 2.758093903293623, "grad_norm": 0.13296210765838623, "learning_rate": 2.488399904329108e-05, "loss": 0.0029, "step": 19679 }, { "epoch": 2.7582340574632096, "grad_norm": 0.08638874441385269, "learning_rate": 2.4869648409471415e-05, "loss": 0.0075, "step": 19680 }, { "epoch": 2.758374211632796, "grad_norm": 0.543943464756012, "learning_rate": 2.4855297775651757e-05, "loss": 0.0472, "step": 19681 }, { "epoch": 2.7585143658023825, "grad_norm": 0.18065424263477325, "learning_rate": 2.4840947141832096e-05, "loss": 0.0141, "step": 19682 }, { "epoch": 2.7586545199719694, "grad_norm": 0.203375443816185, "learning_rate": 2.4826596508012438e-05, "loss": 0.005, "step": 19683 }, { "epoch": 2.758794674141556, "grad_norm": 0.10661984980106354, "learning_rate": 2.4812245874192773e-05, "loss": 0.0232, "step": 19684 }, { "epoch": 2.7589348283111423, "grad_norm": 0.15223032236099243, "learning_rate": 2.4797895240373115e-05, "loss": 0.0329, "step": 19685 }, { "epoch": 2.759074982480729, "grad_norm": 0.3048984408378601, "learning_rate": 2.4783544606553454e-05, "loss": 0.0201, "step": 19686 }, { "epoch": 2.7592151366503153, "grad_norm": 0.15029452741146088, "learning_rate": 2.4769193972733796e-05, "loss": 0.0088, "step": 19687 }, { "epoch": 2.7593552908199017, "grad_norm": 0.1745430827140808, "learning_rate": 2.475484333891413e-05, "loss": 0.0177, "step": 19688 }, { "epoch": 2.759495444989488, "grad_norm": 0.06062167137861252, "learning_rate": 2.4740492705094474e-05, "loss": 0.0073, "step": 19689 }, { "epoch": 2.759635599159075, "grad_norm": 0.28899627923965454, "learning_rate": 2.4726142071274812e-05, "loss": 0.0242, "step": 19690 }, { "epoch": 2.7597757533286615, "grad_norm": 0.1996884047985077, "learning_rate": 2.4711791437455154e-05, "loss": 0.0511, "step": 19691 }, { "epoch": 2.759915907498248, "grad_norm": 0.13133123517036438, "learning_rate": 2.469744080363549e-05, "loss": 0.031, "step": 19692 }, { "epoch": 2.7600560616678345, "grad_norm": 0.274278849363327, "learning_rate": 2.4683090169815832e-05, "loss": 0.0329, "step": 19693 }, { "epoch": 2.7601962158374214, "grad_norm": 0.6064431071281433, "learning_rate": 2.466873953599617e-05, "loss": 0.0255, "step": 19694 }, { "epoch": 2.760336370007008, "grad_norm": 0.9422585368156433, "learning_rate": 2.4654388902176513e-05, "loss": 0.0993, "step": 19695 }, { "epoch": 2.7604765241765943, "grad_norm": 0.3376786708831787, "learning_rate": 2.4640038268356848e-05, "loss": 0.0288, "step": 19696 }, { "epoch": 2.7606166783461807, "grad_norm": 0.1295783519744873, "learning_rate": 2.462568763453719e-05, "loss": 0.0093, "step": 19697 }, { "epoch": 2.760756832515767, "grad_norm": 0.18326438963413239, "learning_rate": 2.461133700071753e-05, "loss": 0.0136, "step": 19698 }, { "epoch": 2.7608969866853537, "grad_norm": 0.05002005025744438, "learning_rate": 2.459698636689787e-05, "loss": 0.0052, "step": 19699 }, { "epoch": 2.7610371408549406, "grad_norm": 0.29818522930145264, "learning_rate": 2.4582635733078206e-05, "loss": 0.0064, "step": 19700 }, { "epoch": 2.761177295024527, "grad_norm": 0.2192409634590149, "learning_rate": 2.456828509925855e-05, "loss": 0.0128, "step": 19701 }, { "epoch": 2.7613174491941135, "grad_norm": 0.10857821255922318, "learning_rate": 2.4553934465438887e-05, "loss": 0.0139, "step": 19702 }, { "epoch": 2.7614576033637, "grad_norm": 0.047642312943935394, "learning_rate": 2.453958383161923e-05, "loss": 0.0023, "step": 19703 }, { "epoch": 2.761597757533287, "grad_norm": 0.11918731033802032, "learning_rate": 2.4525233197799565e-05, "loss": 0.021, "step": 19704 }, { "epoch": 2.7617379117028733, "grad_norm": 0.14333125948905945, "learning_rate": 2.4510882563979907e-05, "loss": 0.0241, "step": 19705 }, { "epoch": 2.7618780658724598, "grad_norm": 0.03244173899292946, "learning_rate": 2.449653193016025e-05, "loss": 0.0016, "step": 19706 }, { "epoch": 2.7620182200420462, "grad_norm": 0.4298053979873657, "learning_rate": 2.4482181296340588e-05, "loss": 0.0354, "step": 19707 }, { "epoch": 2.7621583742116327, "grad_norm": 0.3704368472099304, "learning_rate": 2.4467830662520926e-05, "loss": 0.039, "step": 19708 }, { "epoch": 2.762298528381219, "grad_norm": 0.12969982624053955, "learning_rate": 2.4453480028701265e-05, "loss": 0.0095, "step": 19709 }, { "epoch": 2.762438682550806, "grad_norm": 0.1285260021686554, "learning_rate": 2.4439129394881607e-05, "loss": 0.0077, "step": 19710 }, { "epoch": 2.7625788367203925, "grad_norm": 0.07477578520774841, "learning_rate": 2.4424778761061946e-05, "loss": 0.0063, "step": 19711 }, { "epoch": 2.762718990889979, "grad_norm": 0.9807720184326172, "learning_rate": 2.4410428127242285e-05, "loss": 0.0771, "step": 19712 }, { "epoch": 2.7628591450595654, "grad_norm": 0.09606026113033295, "learning_rate": 2.4396077493422623e-05, "loss": 0.0191, "step": 19713 }, { "epoch": 2.7629992992291523, "grad_norm": 0.027572304010391235, "learning_rate": 2.4381726859602965e-05, "loss": 0.0023, "step": 19714 }, { "epoch": 2.763139453398739, "grad_norm": 0.10843207687139511, "learning_rate": 2.4367376225783304e-05, "loss": 0.0062, "step": 19715 }, { "epoch": 2.7632796075683252, "grad_norm": 0.5103393197059631, "learning_rate": 2.4353025591963643e-05, "loss": 0.0377, "step": 19716 }, { "epoch": 2.7634197617379117, "grad_norm": 0.15288607776165009, "learning_rate": 2.433867495814398e-05, "loss": 0.0122, "step": 19717 }, { "epoch": 2.763559915907498, "grad_norm": 0.3308166265487671, "learning_rate": 2.4324324324324324e-05, "loss": 0.0082, "step": 19718 }, { "epoch": 2.7637000700770846, "grad_norm": 0.6372894644737244, "learning_rate": 2.4309973690504662e-05, "loss": 0.0976, "step": 19719 }, { "epoch": 2.763840224246671, "grad_norm": 0.4351816475391388, "learning_rate": 2.4295623056685e-05, "loss": 0.0325, "step": 19720 }, { "epoch": 2.763980378416258, "grad_norm": 0.7362777590751648, "learning_rate": 2.428127242286534e-05, "loss": 0.1098, "step": 19721 }, { "epoch": 2.7641205325858444, "grad_norm": 0.24201729893684387, "learning_rate": 2.4266921789045682e-05, "loss": 0.025, "step": 19722 }, { "epoch": 2.764260686755431, "grad_norm": 0.171593576669693, "learning_rate": 2.425257115522602e-05, "loss": 0.0139, "step": 19723 }, { "epoch": 2.7644008409250174, "grad_norm": 0.25515106320381165, "learning_rate": 2.423822052140636e-05, "loss": 0.0599, "step": 19724 }, { "epoch": 2.7645409950946043, "grad_norm": 0.19472874701023102, "learning_rate": 2.4223869887586698e-05, "loss": 0.0332, "step": 19725 }, { "epoch": 2.7646811492641907, "grad_norm": 0.10033785551786423, "learning_rate": 2.420951925376704e-05, "loss": 0.0111, "step": 19726 }, { "epoch": 2.764821303433777, "grad_norm": 0.36555203795433044, "learning_rate": 2.419516861994738e-05, "loss": 0.0106, "step": 19727 }, { "epoch": 2.7649614576033636, "grad_norm": 0.26554426550865173, "learning_rate": 2.4180817986127718e-05, "loss": 0.0148, "step": 19728 }, { "epoch": 2.76510161177295, "grad_norm": 0.23013059794902802, "learning_rate": 2.4166467352308056e-05, "loss": 0.0356, "step": 19729 }, { "epoch": 2.7652417659425366, "grad_norm": 0.16837471723556519, "learning_rate": 2.41521167184884e-05, "loss": 0.018, "step": 19730 }, { "epoch": 2.7653819201121235, "grad_norm": 0.44855231046676636, "learning_rate": 2.4137766084668737e-05, "loss": 0.0143, "step": 19731 }, { "epoch": 2.76552207428171, "grad_norm": 0.13251405954360962, "learning_rate": 2.4123415450849076e-05, "loss": 0.0071, "step": 19732 }, { "epoch": 2.7656622284512964, "grad_norm": 0.1331344097852707, "learning_rate": 2.4109064817029415e-05, "loss": 0.0239, "step": 19733 }, { "epoch": 2.765802382620883, "grad_norm": 0.08691603690385818, "learning_rate": 2.4094714183209757e-05, "loss": 0.0061, "step": 19734 }, { "epoch": 2.7659425367904698, "grad_norm": 0.2996131479740143, "learning_rate": 2.4080363549390096e-05, "loss": 0.0088, "step": 19735 }, { "epoch": 2.766082690960056, "grad_norm": 0.11712979525327682, "learning_rate": 2.4066012915570434e-05, "loss": 0.0281, "step": 19736 }, { "epoch": 2.7662228451296427, "grad_norm": 0.15780794620513916, "learning_rate": 2.4051662281750773e-05, "loss": 0.0216, "step": 19737 }, { "epoch": 2.766362999299229, "grad_norm": 0.28268617391586304, "learning_rate": 2.4037311647931115e-05, "loss": 0.0268, "step": 19738 }, { "epoch": 2.7665031534688156, "grad_norm": 0.2745468020439148, "learning_rate": 2.4022961014111454e-05, "loss": 0.0127, "step": 19739 }, { "epoch": 2.766643307638402, "grad_norm": 0.12582933902740479, "learning_rate": 2.4008610380291793e-05, "loss": 0.0155, "step": 19740 }, { "epoch": 2.766783461807989, "grad_norm": 0.07017385214567184, "learning_rate": 2.399425974647213e-05, "loss": 0.0069, "step": 19741 }, { "epoch": 2.7669236159775754, "grad_norm": 0.22924700379371643, "learning_rate": 2.3979909112652473e-05, "loss": 0.0289, "step": 19742 }, { "epoch": 2.767063770147162, "grad_norm": 0.26382994651794434, "learning_rate": 2.3965558478832816e-05, "loss": 0.012, "step": 19743 }, { "epoch": 2.7672039243167483, "grad_norm": 0.1538844257593155, "learning_rate": 2.395120784501315e-05, "loss": 0.0222, "step": 19744 }, { "epoch": 2.7673440784863352, "grad_norm": 0.12456761300563812, "learning_rate": 2.3936857211193493e-05, "loss": 0.0084, "step": 19745 }, { "epoch": 2.7674842326559217, "grad_norm": 0.11433926969766617, "learning_rate": 2.3922506577373832e-05, "loss": 0.0156, "step": 19746 }, { "epoch": 2.767624386825508, "grad_norm": 0.037178389728069305, "learning_rate": 2.3908155943554174e-05, "loss": 0.0023, "step": 19747 }, { "epoch": 2.7677645409950946, "grad_norm": 0.12188324332237244, "learning_rate": 2.389380530973451e-05, "loss": 0.0287, "step": 19748 }, { "epoch": 2.767904695164681, "grad_norm": 0.46723467111587524, "learning_rate": 2.387945467591485e-05, "loss": 0.0385, "step": 19749 }, { "epoch": 2.7680448493342675, "grad_norm": 0.0709342509508133, "learning_rate": 2.386510404209519e-05, "loss": 0.0055, "step": 19750 }, { "epoch": 2.768185003503854, "grad_norm": 0.17604003846645355, "learning_rate": 2.3850753408275532e-05, "loss": 0.0237, "step": 19751 }, { "epoch": 2.768325157673441, "grad_norm": 0.21704339981079102, "learning_rate": 2.3836402774455867e-05, "loss": 0.0267, "step": 19752 }, { "epoch": 2.7684653118430274, "grad_norm": 0.09651315957307816, "learning_rate": 2.382205214063621e-05, "loss": 0.007, "step": 19753 }, { "epoch": 2.768605466012614, "grad_norm": 0.14743298292160034, "learning_rate": 2.380770150681655e-05, "loss": 0.0199, "step": 19754 }, { "epoch": 2.7687456201822003, "grad_norm": 0.20465560257434845, "learning_rate": 2.379335087299689e-05, "loss": 0.0086, "step": 19755 }, { "epoch": 2.768885774351787, "grad_norm": 0.1472039520740509, "learning_rate": 2.3779000239177226e-05, "loss": 0.0346, "step": 19756 }, { "epoch": 2.7690259285213736, "grad_norm": 0.02799607440829277, "learning_rate": 2.3764649605357568e-05, "loss": 0.0014, "step": 19757 }, { "epoch": 2.76916608269096, "grad_norm": 0.3255635201931, "learning_rate": 2.3750298971537907e-05, "loss": 0.0657, "step": 19758 }, { "epoch": 2.7693062368605466, "grad_norm": 0.3716498911380768, "learning_rate": 2.373594833771825e-05, "loss": 0.0741, "step": 19759 }, { "epoch": 2.769446391030133, "grad_norm": 0.10132478922605515, "learning_rate": 2.3721597703898587e-05, "loss": 0.0133, "step": 19760 }, { "epoch": 2.7695865451997195, "grad_norm": 0.09269966185092926, "learning_rate": 2.3707247070078926e-05, "loss": 0.007, "step": 19761 }, { "epoch": 2.7697266993693064, "grad_norm": 0.26648542284965515, "learning_rate": 2.3692896436259265e-05, "loss": 0.0169, "step": 19762 }, { "epoch": 2.769866853538893, "grad_norm": 0.14521369338035583, "learning_rate": 2.3678545802439607e-05, "loss": 0.0173, "step": 19763 }, { "epoch": 2.7700070077084793, "grad_norm": 0.09749699383974075, "learning_rate": 2.3664195168619946e-05, "loss": 0.0077, "step": 19764 }, { "epoch": 2.7701471618780658, "grad_norm": 0.32288554310798645, "learning_rate": 2.3649844534800284e-05, "loss": 0.0511, "step": 19765 }, { "epoch": 2.7702873160476527, "grad_norm": 0.03599060699343681, "learning_rate": 2.3635493900980623e-05, "loss": 0.003, "step": 19766 }, { "epoch": 2.770427470217239, "grad_norm": 1.1522167921066284, "learning_rate": 2.3621143267160965e-05, "loss": 0.0552, "step": 19767 }, { "epoch": 2.7705676243868256, "grad_norm": 0.09315192699432373, "learning_rate": 2.3606792633341304e-05, "loss": 0.0053, "step": 19768 }, { "epoch": 2.770707778556412, "grad_norm": 0.019055582582950592, "learning_rate": 2.3592441999521643e-05, "loss": 0.001, "step": 19769 }, { "epoch": 2.7708479327259985, "grad_norm": 0.11070076376199722, "learning_rate": 2.357809136570198e-05, "loss": 0.0045, "step": 19770 }, { "epoch": 2.770988086895585, "grad_norm": 0.06582643836736679, "learning_rate": 2.3563740731882324e-05, "loss": 0.0023, "step": 19771 }, { "epoch": 2.7711282410651714, "grad_norm": 0.3027840852737427, "learning_rate": 2.3549390098062662e-05, "loss": 0.0275, "step": 19772 }, { "epoch": 2.7712683952347583, "grad_norm": 0.09555289149284363, "learning_rate": 2.3535039464243e-05, "loss": 0.0163, "step": 19773 }, { "epoch": 2.771408549404345, "grad_norm": 0.21223174035549164, "learning_rate": 2.352068883042334e-05, "loss": 0.034, "step": 19774 }, { "epoch": 2.7715487035739312, "grad_norm": 0.11695139855146408, "learning_rate": 2.3506338196603682e-05, "loss": 0.0114, "step": 19775 }, { "epoch": 2.771688857743518, "grad_norm": 0.15395647287368774, "learning_rate": 2.349198756278402e-05, "loss": 0.0164, "step": 19776 }, { "epoch": 2.7718290119131046, "grad_norm": 0.2660770118236542, "learning_rate": 2.347763692896436e-05, "loss": 0.0808, "step": 19777 }, { "epoch": 2.771969166082691, "grad_norm": 0.21667182445526123, "learning_rate": 2.3463286295144698e-05, "loss": 0.0144, "step": 19778 }, { "epoch": 2.7721093202522775, "grad_norm": 0.44012385606765747, "learning_rate": 2.344893566132504e-05, "loss": 0.0255, "step": 19779 }, { "epoch": 2.772249474421864, "grad_norm": 0.18145763874053955, "learning_rate": 2.3434585027505382e-05, "loss": 0.0166, "step": 19780 }, { "epoch": 2.7723896285914504, "grad_norm": 0.4543394446372986, "learning_rate": 2.3420234393685718e-05, "loss": 0.0246, "step": 19781 }, { "epoch": 2.772529782761037, "grad_norm": 0.08987830579280853, "learning_rate": 2.340588375986606e-05, "loss": 0.0074, "step": 19782 }, { "epoch": 2.772669936930624, "grad_norm": 0.6360204219818115, "learning_rate": 2.33915331260464e-05, "loss": 0.0788, "step": 19783 }, { "epoch": 2.7728100911002103, "grad_norm": 0.3392009735107422, "learning_rate": 2.337718249222674e-05, "loss": 0.0352, "step": 19784 }, { "epoch": 2.7729502452697967, "grad_norm": 0.17291054129600525, "learning_rate": 2.3362831858407076e-05, "loss": 0.0164, "step": 19785 }, { "epoch": 2.773090399439383, "grad_norm": 0.2199353277683258, "learning_rate": 2.3348481224587418e-05, "loss": 0.0226, "step": 19786 }, { "epoch": 2.77323055360897, "grad_norm": 0.5193815231323242, "learning_rate": 2.3334130590767757e-05, "loss": 0.0349, "step": 19787 }, { "epoch": 2.7733707077785565, "grad_norm": 0.006943926215171814, "learning_rate": 2.33197799569481e-05, "loss": 0.0007, "step": 19788 }, { "epoch": 2.773510861948143, "grad_norm": 0.4154582917690277, "learning_rate": 2.3305429323128434e-05, "loss": 0.0661, "step": 19789 }, { "epoch": 2.7736510161177295, "grad_norm": 0.3527981638908386, "learning_rate": 2.3291078689308776e-05, "loss": 0.0773, "step": 19790 }, { "epoch": 2.773791170287316, "grad_norm": 0.0982864573597908, "learning_rate": 2.3276728055489115e-05, "loss": 0.0107, "step": 19791 }, { "epoch": 2.7739313244569024, "grad_norm": 0.27146583795547485, "learning_rate": 2.3262377421669457e-05, "loss": 0.0248, "step": 19792 }, { "epoch": 2.7740714786264893, "grad_norm": 0.13046716153621674, "learning_rate": 2.3248026787849792e-05, "loss": 0.01, "step": 19793 }, { "epoch": 2.7742116327960757, "grad_norm": 0.14189113676548004, "learning_rate": 2.3233676154030135e-05, "loss": 0.0084, "step": 19794 }, { "epoch": 2.774351786965662, "grad_norm": 0.5580766201019287, "learning_rate": 2.3219325520210473e-05, "loss": 0.0285, "step": 19795 }, { "epoch": 2.7744919411352487, "grad_norm": 0.18864187598228455, "learning_rate": 2.3204974886390815e-05, "loss": 0.0188, "step": 19796 }, { "epoch": 2.7746320953048356, "grad_norm": 0.15269257128238678, "learning_rate": 2.319062425257115e-05, "loss": 0.019, "step": 19797 }, { "epoch": 2.774772249474422, "grad_norm": 0.12125962972640991, "learning_rate": 2.3176273618751493e-05, "loss": 0.007, "step": 19798 }, { "epoch": 2.7749124036440085, "grad_norm": 0.34460729360580444, "learning_rate": 2.316192298493183e-05, "loss": 0.0524, "step": 19799 }, { "epoch": 2.775052557813595, "grad_norm": 0.20926158130168915, "learning_rate": 2.3147572351112174e-05, "loss": 0.0363, "step": 19800 }, { "epoch": 2.7751927119831814, "grad_norm": 0.20499832928180695, "learning_rate": 2.313322171729251e-05, "loss": 0.0291, "step": 19801 }, { "epoch": 2.775332866152768, "grad_norm": 0.0962163656949997, "learning_rate": 2.311887108347285e-05, "loss": 0.0066, "step": 19802 }, { "epoch": 2.7754730203223543, "grad_norm": 0.20544691383838654, "learning_rate": 2.310452044965319e-05, "loss": 0.014, "step": 19803 }, { "epoch": 2.7756131744919412, "grad_norm": 0.12060816586017609, "learning_rate": 2.3090169815833532e-05, "loss": 0.0097, "step": 19804 }, { "epoch": 2.7757533286615277, "grad_norm": 0.20883211493492126, "learning_rate": 2.3075819182013867e-05, "loss": 0.0266, "step": 19805 }, { "epoch": 2.775893482831114, "grad_norm": 0.25741854310035706, "learning_rate": 2.306146854819421e-05, "loss": 0.0689, "step": 19806 }, { "epoch": 2.7760336370007006, "grad_norm": 0.18505950272083282, "learning_rate": 2.3047117914374548e-05, "loss": 0.0306, "step": 19807 }, { "epoch": 2.7761737911702875, "grad_norm": 0.184142604470253, "learning_rate": 2.303276728055489e-05, "loss": 0.0088, "step": 19808 }, { "epoch": 2.776313945339874, "grad_norm": 0.06502658873796463, "learning_rate": 2.3018416646735226e-05, "loss": 0.0069, "step": 19809 }, { "epoch": 2.7764540995094604, "grad_norm": 0.2792748510837555, "learning_rate": 2.3004066012915568e-05, "loss": 0.0188, "step": 19810 }, { "epoch": 2.776594253679047, "grad_norm": 3.139632225036621, "learning_rate": 2.2989715379095906e-05, "loss": 0.0537, "step": 19811 }, { "epoch": 2.7767344078486333, "grad_norm": 0.012282947078347206, "learning_rate": 2.297536474527625e-05, "loss": 0.0008, "step": 19812 }, { "epoch": 2.77687456201822, "grad_norm": 0.30013370513916016, "learning_rate": 2.2961014111456584e-05, "loss": 0.067, "step": 19813 }, { "epoch": 2.7770147161878067, "grad_norm": 0.2789977192878723, "learning_rate": 2.2946663477636926e-05, "loss": 0.0562, "step": 19814 }, { "epoch": 2.777154870357393, "grad_norm": 0.5803288221359253, "learning_rate": 2.2932312843817265e-05, "loss": 0.0197, "step": 19815 }, { "epoch": 2.7772950245269796, "grad_norm": 0.019407566636800766, "learning_rate": 2.2917962209997607e-05, "loss": 0.0012, "step": 19816 }, { "epoch": 2.777435178696566, "grad_norm": 0.2308964878320694, "learning_rate": 2.290361157617795e-05, "loss": 0.0546, "step": 19817 }, { "epoch": 2.777575332866153, "grad_norm": 0.13067297637462616, "learning_rate": 2.2889260942358284e-05, "loss": 0.0044, "step": 19818 }, { "epoch": 2.7777154870357395, "grad_norm": 0.026862701401114464, "learning_rate": 2.2874910308538626e-05, "loss": 0.0017, "step": 19819 }, { "epoch": 2.777855641205326, "grad_norm": 2.3220887184143066, "learning_rate": 2.2860559674718965e-05, "loss": 0.134, "step": 19820 }, { "epoch": 2.7779957953749124, "grad_norm": 0.06867286562919617, "learning_rate": 2.2846209040899307e-05, "loss": 0.003, "step": 19821 }, { "epoch": 2.778135949544499, "grad_norm": 0.07390697300434113, "learning_rate": 2.2831858407079643e-05, "loss": 0.0054, "step": 19822 }, { "epoch": 2.7782761037140853, "grad_norm": 0.16000312566757202, "learning_rate": 2.2817507773259985e-05, "loss": 0.0297, "step": 19823 }, { "epoch": 2.778416257883672, "grad_norm": 0.17961719632148743, "learning_rate": 2.2803157139440323e-05, "loss": 0.0147, "step": 19824 }, { "epoch": 2.7785564120532587, "grad_norm": 0.2783864736557007, "learning_rate": 2.2788806505620666e-05, "loss": 0.0337, "step": 19825 }, { "epoch": 2.778696566222845, "grad_norm": 0.08844500035047531, "learning_rate": 2.2774455871801e-05, "loss": 0.0186, "step": 19826 }, { "epoch": 2.7788367203924316, "grad_norm": 0.3968968689441681, "learning_rate": 2.2760105237981343e-05, "loss": 0.0693, "step": 19827 }, { "epoch": 2.7789768745620185, "grad_norm": 0.1678866446018219, "learning_rate": 2.2745754604161682e-05, "loss": 0.0092, "step": 19828 }, { "epoch": 2.779117028731605, "grad_norm": 0.18766486644744873, "learning_rate": 2.2731403970342024e-05, "loss": 0.0192, "step": 19829 }, { "epoch": 2.7792571829011914, "grad_norm": 0.4989485442638397, "learning_rate": 2.271705333652236e-05, "loss": 0.0288, "step": 19830 }, { "epoch": 2.779397337070778, "grad_norm": 0.10022205114364624, "learning_rate": 2.27027027027027e-05, "loss": 0.0318, "step": 19831 }, { "epoch": 2.7795374912403643, "grad_norm": 0.10035252571105957, "learning_rate": 2.268835206888304e-05, "loss": 0.0299, "step": 19832 }, { "epoch": 2.7796776454099508, "grad_norm": 0.04994402825832367, "learning_rate": 2.2674001435063382e-05, "loss": 0.0039, "step": 19833 }, { "epoch": 2.7798177995795372, "grad_norm": 0.3298553228378296, "learning_rate": 2.2659650801243717e-05, "loss": 0.0234, "step": 19834 }, { "epoch": 2.779957953749124, "grad_norm": 0.0856083482503891, "learning_rate": 2.264530016742406e-05, "loss": 0.0078, "step": 19835 }, { "epoch": 2.7800981079187106, "grad_norm": 0.18359985947608948, "learning_rate": 2.2630949533604398e-05, "loss": 0.0206, "step": 19836 }, { "epoch": 2.780238262088297, "grad_norm": 0.5506598353385925, "learning_rate": 2.261659889978474e-05, "loss": 0.049, "step": 19837 }, { "epoch": 2.7803784162578835, "grad_norm": 0.11980695277452469, "learning_rate": 2.2602248265965076e-05, "loss": 0.0084, "step": 19838 }, { "epoch": 2.7805185704274704, "grad_norm": 0.22937439382076263, "learning_rate": 2.2587897632145418e-05, "loss": 0.0733, "step": 19839 }, { "epoch": 2.780658724597057, "grad_norm": 0.3908344805240631, "learning_rate": 2.2573546998325757e-05, "loss": 0.0376, "step": 19840 }, { "epoch": 2.7807988787666433, "grad_norm": 0.4814217984676361, "learning_rate": 2.25591963645061e-05, "loss": 0.0353, "step": 19841 }, { "epoch": 2.78093903293623, "grad_norm": 0.2078816443681717, "learning_rate": 2.2544845730686434e-05, "loss": 0.031, "step": 19842 }, { "epoch": 2.7810791871058163, "grad_norm": 0.21305575966835022, "learning_rate": 2.2530495096866776e-05, "loss": 0.052, "step": 19843 }, { "epoch": 2.7812193412754027, "grad_norm": 0.10749264806509018, "learning_rate": 2.2516144463047115e-05, "loss": 0.0193, "step": 19844 }, { "epoch": 2.7813594954449896, "grad_norm": 0.2677127718925476, "learning_rate": 2.2501793829227457e-05, "loss": 0.0435, "step": 19845 }, { "epoch": 2.781499649614576, "grad_norm": 0.24073554575443268, "learning_rate": 2.2487443195407792e-05, "loss": 0.0491, "step": 19846 }, { "epoch": 2.7816398037841625, "grad_norm": 0.3661080300807953, "learning_rate": 2.2473092561588134e-05, "loss": 0.0211, "step": 19847 }, { "epoch": 2.781779957953749, "grad_norm": 0.19566532969474792, "learning_rate": 2.2458741927768473e-05, "loss": 0.0149, "step": 19848 }, { "epoch": 2.781920112123336, "grad_norm": 0.09284968674182892, "learning_rate": 2.2444391293948815e-05, "loss": 0.0064, "step": 19849 }, { "epoch": 2.7820602662929224, "grad_norm": 0.2263614982366562, "learning_rate": 2.243004066012915e-05, "loss": 0.0399, "step": 19850 }, { "epoch": 2.782200420462509, "grad_norm": 0.040337175130844116, "learning_rate": 2.2415690026309493e-05, "loss": 0.0025, "step": 19851 }, { "epoch": 2.7823405746320953, "grad_norm": 0.2804590165615082, "learning_rate": 2.240133939248983e-05, "loss": 0.0445, "step": 19852 }, { "epoch": 2.7824807288016817, "grad_norm": 0.10034050792455673, "learning_rate": 2.2386988758670174e-05, "loss": 0.0072, "step": 19853 }, { "epoch": 2.782620882971268, "grad_norm": 0.2392459362745285, "learning_rate": 2.2372638124850516e-05, "loss": 0.0269, "step": 19854 }, { "epoch": 2.782761037140855, "grad_norm": 0.13013502955436707, "learning_rate": 2.235828749103085e-05, "loss": 0.0148, "step": 19855 }, { "epoch": 2.7829011913104416, "grad_norm": 0.14276379346847534, "learning_rate": 2.2343936857211193e-05, "loss": 0.024, "step": 19856 }, { "epoch": 2.783041345480028, "grad_norm": 0.29417479038238525, "learning_rate": 2.2329586223391532e-05, "loss": 0.0428, "step": 19857 }, { "epoch": 2.7831814996496145, "grad_norm": 0.13541923463344574, "learning_rate": 2.2315235589571874e-05, "loss": 0.0074, "step": 19858 }, { "epoch": 2.7833216538192014, "grad_norm": 0.21642860770225525, "learning_rate": 2.230088495575221e-05, "loss": 0.0329, "step": 19859 }, { "epoch": 2.783461807988788, "grad_norm": 0.10510509461164474, "learning_rate": 2.228653432193255e-05, "loss": 0.0308, "step": 19860 }, { "epoch": 2.7836019621583743, "grad_norm": 0.19402119517326355, "learning_rate": 2.227218368811289e-05, "loss": 0.008, "step": 19861 }, { "epoch": 2.7837421163279608, "grad_norm": 0.05505824834108353, "learning_rate": 2.2257833054293232e-05, "loss": 0.0022, "step": 19862 }, { "epoch": 2.783882270497547, "grad_norm": 0.1609456092119217, "learning_rate": 2.2243482420473568e-05, "loss": 0.0047, "step": 19863 }, { "epoch": 2.7840224246671337, "grad_norm": 0.37182798981666565, "learning_rate": 2.222913178665391e-05, "loss": 0.0284, "step": 19864 }, { "epoch": 2.78416257883672, "grad_norm": 0.7403817772865295, "learning_rate": 2.221478115283425e-05, "loss": 0.0107, "step": 19865 }, { "epoch": 2.784302733006307, "grad_norm": 0.015163655392825603, "learning_rate": 2.220043051901459e-05, "loss": 0.001, "step": 19866 }, { "epoch": 2.7844428871758935, "grad_norm": 0.2052098661661148, "learning_rate": 2.2186079885194926e-05, "loss": 0.0141, "step": 19867 }, { "epoch": 2.78458304134548, "grad_norm": 0.6579716205596924, "learning_rate": 2.2171729251375268e-05, "loss": 0.1095, "step": 19868 }, { "epoch": 2.7847231955150664, "grad_norm": 0.05567855387926102, "learning_rate": 2.2157378617555607e-05, "loss": 0.001, "step": 19869 }, { "epoch": 2.7848633496846533, "grad_norm": 0.7235983610153198, "learning_rate": 2.214302798373595e-05, "loss": 0.1546, "step": 19870 }, { "epoch": 2.78500350385424, "grad_norm": 0.2096051573753357, "learning_rate": 2.2128677349916284e-05, "loss": 0.0351, "step": 19871 }, { "epoch": 2.7851436580238262, "grad_norm": 0.08475883305072784, "learning_rate": 2.2114326716096626e-05, "loss": 0.0018, "step": 19872 }, { "epoch": 2.7852838121934127, "grad_norm": 0.1402767300605774, "learning_rate": 2.2099976082276965e-05, "loss": 0.0484, "step": 19873 }, { "epoch": 2.785423966362999, "grad_norm": 0.10882092267274857, "learning_rate": 2.2085625448457307e-05, "loss": 0.0062, "step": 19874 }, { "epoch": 2.7855641205325856, "grad_norm": 0.6786602735519409, "learning_rate": 2.2071274814637642e-05, "loss": 0.0318, "step": 19875 }, { "epoch": 2.7857042747021725, "grad_norm": 0.21453382074832916, "learning_rate": 2.2056924180817985e-05, "loss": 0.0408, "step": 19876 }, { "epoch": 2.785844428871759, "grad_norm": 0.1139344722032547, "learning_rate": 2.2042573546998323e-05, "loss": 0.0224, "step": 19877 }, { "epoch": 2.7859845830413454, "grad_norm": 0.12129947543144226, "learning_rate": 2.2028222913178665e-05, "loss": 0.0058, "step": 19878 }, { "epoch": 2.786124737210932, "grad_norm": 0.3911696672439575, "learning_rate": 2.2013872279359e-05, "loss": 0.0435, "step": 19879 }, { "epoch": 2.786264891380519, "grad_norm": 0.02805904485285282, "learning_rate": 2.1999521645539343e-05, "loss": 0.0018, "step": 19880 }, { "epoch": 2.7864050455501053, "grad_norm": 0.05103493481874466, "learning_rate": 2.198517101171968e-05, "loss": 0.0043, "step": 19881 }, { "epoch": 2.7865451997196917, "grad_norm": 0.3952654302120209, "learning_rate": 2.1970820377900024e-05, "loss": 0.0661, "step": 19882 }, { "epoch": 2.786685353889278, "grad_norm": 0.1119556725025177, "learning_rate": 2.195646974408036e-05, "loss": 0.0302, "step": 19883 }, { "epoch": 2.7868255080588646, "grad_norm": 0.10786882787942886, "learning_rate": 2.19421191102607e-05, "loss": 0.0136, "step": 19884 }, { "epoch": 2.786965662228451, "grad_norm": 0.3601684868335724, "learning_rate": 2.192776847644104e-05, "loss": 0.03, "step": 19885 }, { "epoch": 2.787105816398038, "grad_norm": 0.17197385430335999, "learning_rate": 2.1913417842621382e-05, "loss": 0.014, "step": 19886 }, { "epoch": 2.7872459705676245, "grad_norm": 0.10475435107946396, "learning_rate": 2.1899067208801717e-05, "loss": 0.0134, "step": 19887 }, { "epoch": 2.787386124737211, "grad_norm": 0.19809970259666443, "learning_rate": 2.188471657498206e-05, "loss": 0.0062, "step": 19888 }, { "epoch": 2.7875262789067974, "grad_norm": 0.10463893413543701, "learning_rate": 2.1870365941162398e-05, "loss": 0.0099, "step": 19889 }, { "epoch": 2.7876664330763843, "grad_norm": 0.19980274140834808, "learning_rate": 2.185601530734274e-05, "loss": 0.0326, "step": 19890 }, { "epoch": 2.7878065872459707, "grad_norm": 0.05061763897538185, "learning_rate": 2.1841664673523082e-05, "loss": 0.0028, "step": 19891 }, { "epoch": 2.787946741415557, "grad_norm": 0.17905987799167633, "learning_rate": 2.1827314039703418e-05, "loss": 0.0112, "step": 19892 }, { "epoch": 2.7880868955851437, "grad_norm": 0.24636733531951904, "learning_rate": 2.181296340588376e-05, "loss": 0.0286, "step": 19893 }, { "epoch": 2.78822704975473, "grad_norm": 0.12782379984855652, "learning_rate": 2.17986127720641e-05, "loss": 0.0089, "step": 19894 }, { "epoch": 2.7883672039243166, "grad_norm": 0.18282784521579742, "learning_rate": 2.178426213824444e-05, "loss": 0.0342, "step": 19895 }, { "epoch": 2.788507358093903, "grad_norm": 0.1590076982975006, "learning_rate": 2.1769911504424776e-05, "loss": 0.0144, "step": 19896 }, { "epoch": 2.78864751226349, "grad_norm": 0.38412681221961975, "learning_rate": 2.1755560870605118e-05, "loss": 0.0646, "step": 19897 }, { "epoch": 2.7887876664330764, "grad_norm": 0.07290639728307724, "learning_rate": 2.1741210236785457e-05, "loss": 0.0085, "step": 19898 }, { "epoch": 2.788927820602663, "grad_norm": 0.16273124516010284, "learning_rate": 2.17268596029658e-05, "loss": 0.0082, "step": 19899 }, { "epoch": 2.7890679747722493, "grad_norm": 0.36756864190101624, "learning_rate": 2.1712508969146134e-05, "loss": 0.0563, "step": 19900 }, { "epoch": 2.7892081289418362, "grad_norm": 0.047376178205013275, "learning_rate": 2.1698158335326476e-05, "loss": 0.0027, "step": 19901 }, { "epoch": 2.7893482831114227, "grad_norm": 0.07464243471622467, "learning_rate": 2.1683807701506815e-05, "loss": 0.0066, "step": 19902 }, { "epoch": 2.789488437281009, "grad_norm": 0.07505783438682556, "learning_rate": 2.1669457067687157e-05, "loss": 0.0064, "step": 19903 }, { "epoch": 2.7896285914505956, "grad_norm": 0.21771767735481262, "learning_rate": 2.1655106433867493e-05, "loss": 0.0277, "step": 19904 }, { "epoch": 2.789768745620182, "grad_norm": 0.132535919547081, "learning_rate": 2.1640755800047835e-05, "loss": 0.0235, "step": 19905 }, { "epoch": 2.7899088997897685, "grad_norm": 0.5503745675086975, "learning_rate": 2.1626405166228173e-05, "loss": 0.0523, "step": 19906 }, { "epoch": 2.7900490539593554, "grad_norm": 0.215169757604599, "learning_rate": 2.1612054532408515e-05, "loss": 0.0047, "step": 19907 }, { "epoch": 2.790189208128942, "grad_norm": 0.2809276282787323, "learning_rate": 2.159770389858885e-05, "loss": 0.0408, "step": 19908 }, { "epoch": 2.7903293622985283, "grad_norm": 0.11196155846118927, "learning_rate": 2.1583353264769193e-05, "loss": 0.0115, "step": 19909 }, { "epoch": 2.790469516468115, "grad_norm": 0.09218896925449371, "learning_rate": 2.156900263094953e-05, "loss": 0.011, "step": 19910 }, { "epoch": 2.7906096706377017, "grad_norm": 0.16444481909275055, "learning_rate": 2.1554651997129874e-05, "loss": 0.0185, "step": 19911 }, { "epoch": 2.790749824807288, "grad_norm": 0.08260773867368698, "learning_rate": 2.154030136331021e-05, "loss": 0.0046, "step": 19912 }, { "epoch": 2.7908899789768746, "grad_norm": 0.20342746376991272, "learning_rate": 2.152595072949055e-05, "loss": 0.0088, "step": 19913 }, { "epoch": 2.791030133146461, "grad_norm": 0.04194631054997444, "learning_rate": 2.151160009567089e-05, "loss": 0.0046, "step": 19914 }, { "epoch": 2.7911702873160475, "grad_norm": 0.19466149806976318, "learning_rate": 2.1497249461851232e-05, "loss": 0.0452, "step": 19915 }, { "epoch": 2.791310441485634, "grad_norm": 0.154509037733078, "learning_rate": 2.1482898828031567e-05, "loss": 0.0082, "step": 19916 }, { "epoch": 2.7914505956552205, "grad_norm": 0.7207839488983154, "learning_rate": 2.146854819421191e-05, "loss": 0.0326, "step": 19917 }, { "epoch": 2.7915907498248074, "grad_norm": 0.36176037788391113, "learning_rate": 2.1454197560392248e-05, "loss": 0.0122, "step": 19918 }, { "epoch": 2.791730903994394, "grad_norm": 0.3289072811603546, "learning_rate": 2.143984692657259e-05, "loss": 0.0084, "step": 19919 }, { "epoch": 2.7918710581639803, "grad_norm": 0.6676080822944641, "learning_rate": 2.1425496292752926e-05, "loss": 0.0375, "step": 19920 }, { "epoch": 2.792011212333567, "grad_norm": 0.14369215071201324, "learning_rate": 2.1411145658933268e-05, "loss": 0.026, "step": 19921 }, { "epoch": 2.7921513665031537, "grad_norm": 0.23929215967655182, "learning_rate": 2.1396795025113607e-05, "loss": 0.0243, "step": 19922 }, { "epoch": 2.79229152067274, "grad_norm": 0.15751765668392181, "learning_rate": 2.138244439129395e-05, "loss": 0.0197, "step": 19923 }, { "epoch": 2.7924316748423266, "grad_norm": 0.12370754033327103, "learning_rate": 2.1368093757474284e-05, "loss": 0.0347, "step": 19924 }, { "epoch": 2.792571829011913, "grad_norm": 0.2292085886001587, "learning_rate": 2.1353743123654626e-05, "loss": 0.0328, "step": 19925 }, { "epoch": 2.7927119831814995, "grad_norm": 0.2243906706571579, "learning_rate": 2.1339392489834965e-05, "loss": 0.0151, "step": 19926 }, { "epoch": 2.792852137351086, "grad_norm": 0.1460040658712387, "learning_rate": 2.1325041856015307e-05, "loss": 0.0133, "step": 19927 }, { "epoch": 2.792992291520673, "grad_norm": 0.2599943280220032, "learning_rate": 2.1310691222195646e-05, "loss": 0.0228, "step": 19928 }, { "epoch": 2.7931324456902593, "grad_norm": 0.1314019113779068, "learning_rate": 2.1296340588375984e-05, "loss": 0.0111, "step": 19929 }, { "epoch": 2.7932725998598458, "grad_norm": 0.23316337168216705, "learning_rate": 2.1281989954556326e-05, "loss": 0.0157, "step": 19930 }, { "epoch": 2.7934127540294322, "grad_norm": 0.0410420224070549, "learning_rate": 2.1267639320736665e-05, "loss": 0.003, "step": 19931 }, { "epoch": 2.793552908199019, "grad_norm": 0.020699461922049522, "learning_rate": 2.1253288686917004e-05, "loss": 0.0025, "step": 19932 }, { "epoch": 2.7936930623686056, "grad_norm": 0.2327887862920761, "learning_rate": 2.1238938053097343e-05, "loss": 0.0556, "step": 19933 }, { "epoch": 2.793833216538192, "grad_norm": 0.31376755237579346, "learning_rate": 2.1224587419277685e-05, "loss": 0.0785, "step": 19934 }, { "epoch": 2.7939733707077785, "grad_norm": 0.2757945656776428, "learning_rate": 2.1210236785458023e-05, "loss": 0.0647, "step": 19935 }, { "epoch": 2.794113524877365, "grad_norm": 0.36520570516586304, "learning_rate": 2.1195886151638362e-05, "loss": 0.0683, "step": 19936 }, { "epoch": 2.7942536790469514, "grad_norm": 0.2816203832626343, "learning_rate": 2.11815355178187e-05, "loss": 0.0387, "step": 19937 }, { "epoch": 2.7943938332165383, "grad_norm": 0.21043512225151062, "learning_rate": 2.1167184883999043e-05, "loss": 0.0586, "step": 19938 }, { "epoch": 2.794533987386125, "grad_norm": 0.05700146406888962, "learning_rate": 2.1152834250179382e-05, "loss": 0.0051, "step": 19939 }, { "epoch": 2.7946741415557113, "grad_norm": 0.051535919308662415, "learning_rate": 2.113848361635972e-05, "loss": 0.0022, "step": 19940 }, { "epoch": 2.7948142957252977, "grad_norm": 0.12905417382717133, "learning_rate": 2.112413298254006e-05, "loss": 0.0237, "step": 19941 }, { "epoch": 2.7949544498948846, "grad_norm": 0.18502721190452576, "learning_rate": 2.11097823487204e-05, "loss": 0.0272, "step": 19942 }, { "epoch": 2.795094604064471, "grad_norm": 0.35748687386512756, "learning_rate": 2.109543171490074e-05, "loss": 0.0287, "step": 19943 }, { "epoch": 2.7952347582340575, "grad_norm": 0.086206816136837, "learning_rate": 2.108108108108108e-05, "loss": 0.0086, "step": 19944 }, { "epoch": 2.795374912403644, "grad_norm": 0.12639585137367249, "learning_rate": 2.1066730447261418e-05, "loss": 0.02, "step": 19945 }, { "epoch": 2.7955150665732305, "grad_norm": 0.3880903124809265, "learning_rate": 2.105237981344176e-05, "loss": 0.0322, "step": 19946 }, { "epoch": 2.795655220742817, "grad_norm": 0.2510320544242859, "learning_rate": 2.10380291796221e-05, "loss": 0.0813, "step": 19947 }, { "epoch": 2.7957953749124034, "grad_norm": 0.03349941223859787, "learning_rate": 2.1023678545802437e-05, "loss": 0.0036, "step": 19948 }, { "epoch": 2.7959355290819903, "grad_norm": 0.2164732664823532, "learning_rate": 2.1009327911982776e-05, "loss": 0.0295, "step": 19949 }, { "epoch": 2.7960756832515767, "grad_norm": 0.19333980977535248, "learning_rate": 2.0994977278163118e-05, "loss": 0.0376, "step": 19950 }, { "epoch": 2.796215837421163, "grad_norm": 0.30675041675567627, "learning_rate": 2.0980626644343457e-05, "loss": 0.0365, "step": 19951 }, { "epoch": 2.79635599159075, "grad_norm": 0.6219843626022339, "learning_rate": 2.0966276010523795e-05, "loss": 0.0262, "step": 19952 }, { "epoch": 2.7964961457603366, "grad_norm": 0.12456759065389633, "learning_rate": 2.0951925376704134e-05, "loss": 0.007, "step": 19953 }, { "epoch": 2.796636299929923, "grad_norm": 0.5238804221153259, "learning_rate": 2.0937574742884476e-05, "loss": 0.0308, "step": 19954 }, { "epoch": 2.7967764540995095, "grad_norm": 0.29134076833724976, "learning_rate": 2.0923224109064815e-05, "loss": 0.0189, "step": 19955 }, { "epoch": 2.796916608269096, "grad_norm": 0.24165566265583038, "learning_rate": 2.0908873475245154e-05, "loss": 0.0388, "step": 19956 }, { "epoch": 2.7970567624386824, "grad_norm": 0.5876925587654114, "learning_rate": 2.0894522841425492e-05, "loss": 0.0165, "step": 19957 }, { "epoch": 2.797196916608269, "grad_norm": 0.2325659543275833, "learning_rate": 2.0880172207605834e-05, "loss": 0.0407, "step": 19958 }, { "epoch": 2.7973370707778558, "grad_norm": 0.1631787270307541, "learning_rate": 2.0865821573786173e-05, "loss": 0.0331, "step": 19959 }, { "epoch": 2.797477224947442, "grad_norm": 0.20489393174648285, "learning_rate": 2.0851470939966515e-05, "loss": 0.0177, "step": 19960 }, { "epoch": 2.7976173791170287, "grad_norm": 0.14901484549045563, "learning_rate": 2.083712030614685e-05, "loss": 0.0038, "step": 19961 }, { "epoch": 2.797757533286615, "grad_norm": 0.24769359827041626, "learning_rate": 2.0822769672327193e-05, "loss": 0.0419, "step": 19962 }, { "epoch": 2.797897687456202, "grad_norm": 0.04648546874523163, "learning_rate": 2.080841903850753e-05, "loss": 0.0039, "step": 19963 }, { "epoch": 2.7980378416257885, "grad_norm": 0.33478906750679016, "learning_rate": 2.0794068404687874e-05, "loss": 0.0472, "step": 19964 }, { "epoch": 2.798177995795375, "grad_norm": 0.24060703814029694, "learning_rate": 2.0779717770868212e-05, "loss": 0.0338, "step": 19965 }, { "epoch": 2.7983181499649614, "grad_norm": 0.8144930601119995, "learning_rate": 2.076536713704855e-05, "loss": 0.1267, "step": 19966 }, { "epoch": 2.798458304134548, "grad_norm": 0.26101940870285034, "learning_rate": 2.0751016503228893e-05, "loss": 0.0466, "step": 19967 }, { "epoch": 2.7985984583041343, "grad_norm": 1.7428154945373535, "learning_rate": 2.0736665869409232e-05, "loss": 0.0447, "step": 19968 }, { "epoch": 2.7987386124737212, "grad_norm": 1.5862023830413818, "learning_rate": 2.072231523558957e-05, "loss": 0.1341, "step": 19969 }, { "epoch": 2.7988787666433077, "grad_norm": 0.7910722494125366, "learning_rate": 2.070796460176991e-05, "loss": 0.092, "step": 19970 }, { "epoch": 2.799018920812894, "grad_norm": 0.12827037274837494, "learning_rate": 2.069361396795025e-05, "loss": 0.0378, "step": 19971 }, { "epoch": 2.7991590749824806, "grad_norm": 0.21304842829704285, "learning_rate": 2.067926333413059e-05, "loss": 0.0431, "step": 19972 }, { "epoch": 2.7992992291520675, "grad_norm": 0.16096867620944977, "learning_rate": 2.066491270031093e-05, "loss": 0.0351, "step": 19973 }, { "epoch": 2.799439383321654, "grad_norm": 0.080171138048172, "learning_rate": 2.0650562066491268e-05, "loss": 0.0115, "step": 19974 }, { "epoch": 2.7995795374912404, "grad_norm": 0.21438491344451904, "learning_rate": 2.063621143267161e-05, "loss": 0.0356, "step": 19975 }, { "epoch": 2.799719691660827, "grad_norm": 0.23420527577400208, "learning_rate": 2.062186079885195e-05, "loss": 0.0165, "step": 19976 }, { "epoch": 2.7998598458304134, "grad_norm": 0.14552585780620575, "learning_rate": 2.0607510165032287e-05, "loss": 0.0134, "step": 19977 }, { "epoch": 2.8, "grad_norm": 0.2259116917848587, "learning_rate": 2.0593159531212626e-05, "loss": 0.0156, "step": 19978 }, { "epoch": 2.8001401541695863, "grad_norm": 0.16932596266269684, "learning_rate": 2.0578808897392968e-05, "loss": 0.0184, "step": 19979 }, { "epoch": 2.800280308339173, "grad_norm": 0.019965756684541702, "learning_rate": 2.0564458263573307e-05, "loss": 0.0021, "step": 19980 }, { "epoch": 2.8004204625087596, "grad_norm": 0.07200606912374496, "learning_rate": 2.0550107629753645e-05, "loss": 0.012, "step": 19981 }, { "epoch": 2.800560616678346, "grad_norm": 0.20571266114711761, "learning_rate": 2.0535756995933984e-05, "loss": 0.0113, "step": 19982 }, { "epoch": 2.8007007708479326, "grad_norm": 0.2929847836494446, "learning_rate": 2.0521406362114326e-05, "loss": 0.025, "step": 19983 }, { "epoch": 2.8008409250175195, "grad_norm": 0.5060651302337646, "learning_rate": 2.0507055728294665e-05, "loss": 0.0457, "step": 19984 }, { "epoch": 2.800981079187106, "grad_norm": 0.33901354670524597, "learning_rate": 2.0492705094475004e-05, "loss": 0.0414, "step": 19985 }, { "epoch": 2.8011212333566924, "grad_norm": 0.07111307978630066, "learning_rate": 2.0478354460655342e-05, "loss": 0.0069, "step": 19986 }, { "epoch": 2.801261387526279, "grad_norm": 0.24877874553203583, "learning_rate": 2.0464003826835685e-05, "loss": 0.0222, "step": 19987 }, { "epoch": 2.8014015416958653, "grad_norm": 0.224320188164711, "learning_rate": 2.0449653193016023e-05, "loss": 0.0085, "step": 19988 }, { "epoch": 2.8015416958654518, "grad_norm": 0.07319874316453934, "learning_rate": 2.0435302559196362e-05, "loss": 0.0185, "step": 19989 }, { "epoch": 2.8016818500350387, "grad_norm": 0.19133345782756805, "learning_rate": 2.04209519253767e-05, "loss": 0.041, "step": 19990 }, { "epoch": 2.801822004204625, "grad_norm": 0.042898040264844894, "learning_rate": 2.0406601291557043e-05, "loss": 0.0026, "step": 19991 }, { "epoch": 2.8019621583742116, "grad_norm": 0.2284245640039444, "learning_rate": 2.039225065773738e-05, "loss": 0.0199, "step": 19992 }, { "epoch": 2.802102312543798, "grad_norm": 0.08462200313806534, "learning_rate": 2.037790002391772e-05, "loss": 0.0136, "step": 19993 }, { "epoch": 2.802242466713385, "grad_norm": 0.6263629198074341, "learning_rate": 2.036354939009806e-05, "loss": 0.0679, "step": 19994 }, { "epoch": 2.8023826208829714, "grad_norm": 0.14108793437480927, "learning_rate": 2.03491987562784e-05, "loss": 0.0088, "step": 19995 }, { "epoch": 2.802522775052558, "grad_norm": 0.3206084072589874, "learning_rate": 2.033484812245874e-05, "loss": 0.0567, "step": 19996 }, { "epoch": 2.8026629292221443, "grad_norm": 0.2152736932039261, "learning_rate": 2.032049748863908e-05, "loss": 0.0133, "step": 19997 }, { "epoch": 2.802803083391731, "grad_norm": 0.12140417844057083, "learning_rate": 2.0306146854819417e-05, "loss": 0.0194, "step": 19998 }, { "epoch": 2.8029432375613172, "grad_norm": 0.3021085262298584, "learning_rate": 2.029179622099976e-05, "loss": 0.0216, "step": 19999 }, { "epoch": 2.803083391730904, "grad_norm": 0.22126875817775726, "learning_rate": 2.0277445587180098e-05, "loss": 0.0284, "step": 20000 }, { "epoch": 2.8032235459004906, "grad_norm": 0.5474613904953003, "learning_rate": 2.0263094953360437e-05, "loss": 0.0322, "step": 20001 }, { "epoch": 2.803363700070077, "grad_norm": 0.14236284792423248, "learning_rate": 2.0248744319540776e-05, "loss": 0.006, "step": 20002 }, { "epoch": 2.8035038542396635, "grad_norm": 0.19918866455554962, "learning_rate": 2.0234393685721118e-05, "loss": 0.032, "step": 20003 }, { "epoch": 2.8036440084092504, "grad_norm": 1.1128315925598145, "learning_rate": 2.022004305190146e-05, "loss": 0.0656, "step": 20004 }, { "epoch": 2.803784162578837, "grad_norm": 0.2393268495798111, "learning_rate": 2.0205692418081795e-05, "loss": 0.0074, "step": 20005 }, { "epoch": 2.8039243167484234, "grad_norm": 0.05781044065952301, "learning_rate": 2.0191341784262137e-05, "loss": 0.0037, "step": 20006 }, { "epoch": 2.80406447091801, "grad_norm": 0.09157800674438477, "learning_rate": 2.0176991150442476e-05, "loss": 0.0149, "step": 20007 }, { "epoch": 2.8042046250875963, "grad_norm": 0.2359168380498886, "learning_rate": 2.0162640516622818e-05, "loss": 0.0171, "step": 20008 }, { "epoch": 2.8043447792571827, "grad_norm": 0.11985179036855698, "learning_rate": 2.0148289882803153e-05, "loss": 0.0087, "step": 20009 }, { "epoch": 2.804484933426769, "grad_norm": 0.02725437842309475, "learning_rate": 2.0133939248983496e-05, "loss": 0.002, "step": 20010 }, { "epoch": 2.804625087596356, "grad_norm": 0.052521172910928726, "learning_rate": 2.0119588615163834e-05, "loss": 0.0028, "step": 20011 }, { "epoch": 2.8047652417659426, "grad_norm": 0.04475374519824982, "learning_rate": 2.0105237981344176e-05, "loss": 0.0041, "step": 20012 }, { "epoch": 2.804905395935529, "grad_norm": 0.43351632356643677, "learning_rate": 2.0090887347524512e-05, "loss": 0.0854, "step": 20013 }, { "epoch": 2.8050455501051155, "grad_norm": 0.1831410676240921, "learning_rate": 2.0076536713704854e-05, "loss": 0.0191, "step": 20014 }, { "epoch": 2.8051857042747024, "grad_norm": 1.5342525243759155, "learning_rate": 2.0062186079885193e-05, "loss": 0.0371, "step": 20015 }, { "epoch": 2.805325858444289, "grad_norm": 0.011657540686428547, "learning_rate": 2.0047835446065535e-05, "loss": 0.001, "step": 20016 }, { "epoch": 2.8054660126138753, "grad_norm": 0.856203556060791, "learning_rate": 2.003348481224587e-05, "loss": 0.138, "step": 20017 }, { "epoch": 2.8056061667834618, "grad_norm": 0.23107963800430298, "learning_rate": 2.0019134178426212e-05, "loss": 0.0179, "step": 20018 }, { "epoch": 2.805746320953048, "grad_norm": 0.4495178163051605, "learning_rate": 2.000478354460655e-05, "loss": 0.0298, "step": 20019 }, { "epoch": 2.8058864751226347, "grad_norm": 0.6873546242713928, "learning_rate": 1.9990432910786893e-05, "loss": 0.0842, "step": 20020 }, { "epoch": 2.8060266292922216, "grad_norm": 0.21186579763889313, "learning_rate": 1.997608227696723e-05, "loss": 0.0243, "step": 20021 }, { "epoch": 2.806166783461808, "grad_norm": 0.2650187611579895, "learning_rate": 1.996173164314757e-05, "loss": 0.0291, "step": 20022 }, { "epoch": 2.8063069376313945, "grad_norm": 0.13266363739967346, "learning_rate": 1.994738100932791e-05, "loss": 0.0332, "step": 20023 }, { "epoch": 2.806447091800981, "grad_norm": 0.06920032948255539, "learning_rate": 1.993303037550825e-05, "loss": 0.0098, "step": 20024 }, { "epoch": 2.806587245970568, "grad_norm": 0.052378904074430466, "learning_rate": 1.9918679741688587e-05, "loss": 0.0051, "step": 20025 }, { "epoch": 2.8067274001401543, "grad_norm": 0.3211519718170166, "learning_rate": 1.990432910786893e-05, "loss": 0.0143, "step": 20026 }, { "epoch": 2.8068675543097408, "grad_norm": 0.014825846068561077, "learning_rate": 1.9889978474049267e-05, "loss": 0.0016, "step": 20027 }, { "epoch": 2.8070077084793272, "grad_norm": 0.084259532392025, "learning_rate": 1.987562784022961e-05, "loss": 0.0053, "step": 20028 }, { "epoch": 2.8071478626489137, "grad_norm": 0.02732730656862259, "learning_rate": 1.9861277206409945e-05, "loss": 0.0023, "step": 20029 }, { "epoch": 2.8072880168185, "grad_norm": 0.11814083158969879, "learning_rate": 1.9846926572590287e-05, "loss": 0.0255, "step": 20030 }, { "epoch": 2.807428170988087, "grad_norm": 0.48678386211395264, "learning_rate": 1.9832575938770626e-05, "loss": 0.0484, "step": 20031 }, { "epoch": 2.8075683251576735, "grad_norm": 0.12674832344055176, "learning_rate": 1.9818225304950968e-05, "loss": 0.0122, "step": 20032 }, { "epoch": 2.80770847932726, "grad_norm": 0.3977404534816742, "learning_rate": 1.9803874671131303e-05, "loss": 0.0182, "step": 20033 }, { "epoch": 2.8078486334968464, "grad_norm": 0.09031179547309875, "learning_rate": 1.9789524037311645e-05, "loss": 0.0213, "step": 20034 }, { "epoch": 2.8079887876664333, "grad_norm": 0.17990154027938843, "learning_rate": 1.9775173403491984e-05, "loss": 0.0285, "step": 20035 }, { "epoch": 2.80812894183602, "grad_norm": 0.10841778665781021, "learning_rate": 1.9760822769672326e-05, "loss": 0.0204, "step": 20036 }, { "epoch": 2.8082690960056063, "grad_norm": 0.0737527385354042, "learning_rate": 1.974647213585266e-05, "loss": 0.0069, "step": 20037 }, { "epoch": 2.8084092501751927, "grad_norm": 0.06787803769111633, "learning_rate": 1.9732121502033004e-05, "loss": 0.0066, "step": 20038 }, { "epoch": 2.808549404344779, "grad_norm": 0.16270709037780762, "learning_rate": 1.9717770868213342e-05, "loss": 0.015, "step": 20039 }, { "epoch": 2.8086895585143656, "grad_norm": 0.11124812811613083, "learning_rate": 1.9703420234393684e-05, "loss": 0.0145, "step": 20040 }, { "epoch": 2.808829712683952, "grad_norm": 0.23035526275634766, "learning_rate": 1.9689069600574027e-05, "loss": 0.035, "step": 20041 }, { "epoch": 2.808969866853539, "grad_norm": 0.01653190888464451, "learning_rate": 1.9674718966754362e-05, "loss": 0.0018, "step": 20042 }, { "epoch": 2.8091100210231255, "grad_norm": 0.14227880537509918, "learning_rate": 1.9660368332934704e-05, "loss": 0.0096, "step": 20043 }, { "epoch": 2.809250175192712, "grad_norm": 0.1516581028699875, "learning_rate": 1.9646017699115043e-05, "loss": 0.0062, "step": 20044 }, { "epoch": 2.8093903293622984, "grad_norm": 0.4181561768054962, "learning_rate": 1.9631667065295385e-05, "loss": 0.032, "step": 20045 }, { "epoch": 2.8095304835318853, "grad_norm": 0.17425920069217682, "learning_rate": 1.961731643147572e-05, "loss": 0.0296, "step": 20046 }, { "epoch": 2.8096706377014717, "grad_norm": 0.23781277239322662, "learning_rate": 1.9602965797656062e-05, "loss": 0.0107, "step": 20047 }, { "epoch": 2.809810791871058, "grad_norm": 0.3627985417842865, "learning_rate": 1.95886151638364e-05, "loss": 0.0181, "step": 20048 }, { "epoch": 2.8099509460406447, "grad_norm": 0.18137101829051971, "learning_rate": 1.9574264530016743e-05, "loss": 0.0219, "step": 20049 }, { "epoch": 2.810091100210231, "grad_norm": 0.260376513004303, "learning_rate": 1.955991389619708e-05, "loss": 0.048, "step": 20050 }, { "epoch": 2.8102312543798176, "grad_norm": 0.07062747329473495, "learning_rate": 1.954556326237742e-05, "loss": 0.0118, "step": 20051 }, { "epoch": 2.8103714085494045, "grad_norm": 0.360655814409256, "learning_rate": 1.953121262855776e-05, "loss": 0.0428, "step": 20052 }, { "epoch": 2.810511562718991, "grad_norm": 0.2344047725200653, "learning_rate": 1.95168619947381e-05, "loss": 0.0195, "step": 20053 }, { "epoch": 2.8106517168885774, "grad_norm": 0.06112000346183777, "learning_rate": 1.9502511360918437e-05, "loss": 0.0044, "step": 20054 }, { "epoch": 2.810791871058164, "grad_norm": 0.09430229663848877, "learning_rate": 1.948816072709878e-05, "loss": 0.0096, "step": 20055 }, { "epoch": 2.8109320252277508, "grad_norm": 0.49903953075408936, "learning_rate": 1.9473810093279118e-05, "loss": 0.0249, "step": 20056 }, { "epoch": 2.8110721793973372, "grad_norm": 0.20669111609458923, "learning_rate": 1.945945945945946e-05, "loss": 0.0104, "step": 20057 }, { "epoch": 2.8112123335669237, "grad_norm": 0.06677598506212234, "learning_rate": 1.9445108825639795e-05, "loss": 0.0034, "step": 20058 }, { "epoch": 2.81135248773651, "grad_norm": 0.20507769286632538, "learning_rate": 1.9430758191820137e-05, "loss": 0.0406, "step": 20059 }, { "epoch": 2.8114926419060966, "grad_norm": 0.20784273743629456, "learning_rate": 1.9416407558000476e-05, "loss": 0.0269, "step": 20060 }, { "epoch": 2.811632796075683, "grad_norm": 0.1675213873386383, "learning_rate": 1.9402056924180818e-05, "loss": 0.0206, "step": 20061 }, { "epoch": 2.8117729502452695, "grad_norm": 0.5399994850158691, "learning_rate": 1.9387706290361153e-05, "loss": 0.0202, "step": 20062 }, { "epoch": 2.8119131044148564, "grad_norm": 0.02934509515762329, "learning_rate": 1.9373355656541495e-05, "loss": 0.0029, "step": 20063 }, { "epoch": 2.812053258584443, "grad_norm": 0.02207348309457302, "learning_rate": 1.9359005022721834e-05, "loss": 0.0013, "step": 20064 }, { "epoch": 2.8121934127540293, "grad_norm": 0.9961301684379578, "learning_rate": 1.9344654388902176e-05, "loss": 0.0278, "step": 20065 }, { "epoch": 2.8123335669236162, "grad_norm": 0.2945929169654846, "learning_rate": 1.933030375508251e-05, "loss": 0.0422, "step": 20066 }, { "epoch": 2.8124737210932027, "grad_norm": 0.5682245492935181, "learning_rate": 1.9315953121262854e-05, "loss": 0.0893, "step": 20067 }, { "epoch": 2.812613875262789, "grad_norm": 0.3704012930393219, "learning_rate": 1.9301602487443192e-05, "loss": 0.032, "step": 20068 }, { "epoch": 2.8127540294323756, "grad_norm": 1.348705530166626, "learning_rate": 1.9287251853623535e-05, "loss": 0.0358, "step": 20069 }, { "epoch": 2.812894183601962, "grad_norm": 0.21642029285430908, "learning_rate": 1.927290121980387e-05, "loss": 0.0048, "step": 20070 }, { "epoch": 2.8130343377715485, "grad_norm": 0.5931062698364258, "learning_rate": 1.9258550585984212e-05, "loss": 0.0443, "step": 20071 }, { "epoch": 2.813174491941135, "grad_norm": 0.1410433053970337, "learning_rate": 1.924419995216455e-05, "loss": 0.0163, "step": 20072 }, { "epoch": 2.813314646110722, "grad_norm": 0.2444588541984558, "learning_rate": 1.9229849318344893e-05, "loss": 0.0616, "step": 20073 }, { "epoch": 2.8134548002803084, "grad_norm": 0.11151965707540512, "learning_rate": 1.9215498684525228e-05, "loss": 0.0221, "step": 20074 }, { "epoch": 2.813594954449895, "grad_norm": 0.2686412036418915, "learning_rate": 1.920114805070557e-05, "loss": 0.019, "step": 20075 }, { "epoch": 2.8137351086194813, "grad_norm": 0.3521868586540222, "learning_rate": 1.918679741688591e-05, "loss": 0.0322, "step": 20076 }, { "epoch": 2.813875262789068, "grad_norm": 0.010168555192649364, "learning_rate": 1.917244678306625e-05, "loss": 0.0011, "step": 20077 }, { "epoch": 2.8140154169586546, "grad_norm": 0.3403485119342804, "learning_rate": 1.9158096149246593e-05, "loss": 0.023, "step": 20078 }, { "epoch": 2.814155571128241, "grad_norm": 0.13261543214321136, "learning_rate": 1.914374551542693e-05, "loss": 0.0055, "step": 20079 }, { "epoch": 2.8142957252978276, "grad_norm": 0.23161859810352325, "learning_rate": 1.912939488160727e-05, "loss": 0.0143, "step": 20080 }, { "epoch": 2.814435879467414, "grad_norm": 0.4936937391757965, "learning_rate": 1.911504424778761e-05, "loss": 0.0411, "step": 20081 }, { "epoch": 2.8145760336370005, "grad_norm": 0.3196813464164734, "learning_rate": 1.910069361396795e-05, "loss": 0.0417, "step": 20082 }, { "epoch": 2.8147161878065874, "grad_norm": 0.29124414920806885, "learning_rate": 1.9086342980148287e-05, "loss": 0.0221, "step": 20083 }, { "epoch": 2.814856341976174, "grad_norm": 0.12162228673696518, "learning_rate": 1.907199234632863e-05, "loss": 0.013, "step": 20084 }, { "epoch": 2.8149964961457603, "grad_norm": 0.060638755559921265, "learning_rate": 1.9057641712508968e-05, "loss": 0.0076, "step": 20085 }, { "epoch": 2.8151366503153468, "grad_norm": 0.31550222635269165, "learning_rate": 1.904329107868931e-05, "loss": 0.0527, "step": 20086 }, { "epoch": 2.8152768044849337, "grad_norm": 0.08666808158159256, "learning_rate": 1.9028940444869645e-05, "loss": 0.0029, "step": 20087 }, { "epoch": 2.81541695865452, "grad_norm": 0.48483553528785706, "learning_rate": 1.9014589811049987e-05, "loss": 0.0477, "step": 20088 }, { "epoch": 2.8155571128241066, "grad_norm": 0.13772916793823242, "learning_rate": 1.9000239177230326e-05, "loss": 0.0133, "step": 20089 }, { "epoch": 2.815697266993693, "grad_norm": 0.05882766470313072, "learning_rate": 1.8985888543410668e-05, "loss": 0.0068, "step": 20090 }, { "epoch": 2.8158374211632795, "grad_norm": 0.1865001618862152, "learning_rate": 1.8971537909591003e-05, "loss": 0.0291, "step": 20091 }, { "epoch": 2.815977575332866, "grad_norm": 0.1174512431025505, "learning_rate": 1.8957187275771346e-05, "loss": 0.0425, "step": 20092 }, { "epoch": 2.8161177295024524, "grad_norm": 0.024458911269903183, "learning_rate": 1.8942836641951684e-05, "loss": 0.0012, "step": 20093 }, { "epoch": 2.8162578836720393, "grad_norm": 0.06882781535387039, "learning_rate": 1.8928486008132026e-05, "loss": 0.0081, "step": 20094 }, { "epoch": 2.816398037841626, "grad_norm": 0.10022664815187454, "learning_rate": 1.8914135374312362e-05, "loss": 0.016, "step": 20095 }, { "epoch": 2.8165381920112122, "grad_norm": 0.4673845171928406, "learning_rate": 1.8899784740492704e-05, "loss": 0.0601, "step": 20096 }, { "epoch": 2.816678346180799, "grad_norm": 0.4063095152378082, "learning_rate": 1.8885434106673043e-05, "loss": 0.045, "step": 20097 }, { "epoch": 2.8168185003503856, "grad_norm": 0.21431665122509003, "learning_rate": 1.8871083472853385e-05, "loss": 0.043, "step": 20098 }, { "epoch": 2.816958654519972, "grad_norm": 0.028018292039632797, "learning_rate": 1.885673283903372e-05, "loss": 0.0029, "step": 20099 }, { "epoch": 2.8170988086895585, "grad_norm": 0.2614838480949402, "learning_rate": 1.8842382205214062e-05, "loss": 0.0325, "step": 20100 }, { "epoch": 2.817238962859145, "grad_norm": 0.21096856892108917, "learning_rate": 1.88280315713944e-05, "loss": 0.0667, "step": 20101 }, { "epoch": 2.8173791170287315, "grad_norm": 0.04970110207796097, "learning_rate": 1.8813680937574743e-05, "loss": 0.0063, "step": 20102 }, { "epoch": 2.817519271198318, "grad_norm": 0.1813165843486786, "learning_rate": 1.8799330303755078e-05, "loss": 0.0095, "step": 20103 }, { "epoch": 2.817659425367905, "grad_norm": 0.08293382823467255, "learning_rate": 1.878497966993542e-05, "loss": 0.0068, "step": 20104 }, { "epoch": 2.8177995795374913, "grad_norm": 0.22900699079036713, "learning_rate": 1.877062903611576e-05, "loss": 0.057, "step": 20105 }, { "epoch": 2.8179397337070777, "grad_norm": 0.11686235666275024, "learning_rate": 1.87562784022961e-05, "loss": 0.0378, "step": 20106 }, { "epoch": 2.818079887876664, "grad_norm": 0.15948370099067688, "learning_rate": 1.874192776847644e-05, "loss": 0.0097, "step": 20107 }, { "epoch": 2.818220042046251, "grad_norm": 0.3438152074813843, "learning_rate": 1.872757713465678e-05, "loss": 0.0179, "step": 20108 }, { "epoch": 2.8183601962158376, "grad_norm": 0.17170532047748566, "learning_rate": 1.8713226500837117e-05, "loss": 0.005, "step": 20109 }, { "epoch": 2.818500350385424, "grad_norm": 0.16956299543380737, "learning_rate": 1.869887586701746e-05, "loss": 0.0407, "step": 20110 }, { "epoch": 2.8186405045550105, "grad_norm": 0.22396832704544067, "learning_rate": 1.8684525233197798e-05, "loss": 0.0183, "step": 20111 }, { "epoch": 2.818780658724597, "grad_norm": 0.4402769207954407, "learning_rate": 1.8670174599378137e-05, "loss": 0.0637, "step": 20112 }, { "epoch": 2.8189208128941834, "grad_norm": 0.19635671377182007, "learning_rate": 1.8655823965558476e-05, "loss": 0.0198, "step": 20113 }, { "epoch": 2.8190609670637703, "grad_norm": 0.22729657590389252, "learning_rate": 1.8641473331738818e-05, "loss": 0.0069, "step": 20114 }, { "epoch": 2.8192011212333568, "grad_norm": 0.21595925092697144, "learning_rate": 1.8627122697919157e-05, "loss": 0.0341, "step": 20115 }, { "epoch": 2.819341275402943, "grad_norm": 0.36268818378448486, "learning_rate": 1.8612772064099495e-05, "loss": 0.0249, "step": 20116 }, { "epoch": 2.8194814295725297, "grad_norm": 0.06712613254785538, "learning_rate": 1.8598421430279834e-05, "loss": 0.0021, "step": 20117 }, { "epoch": 2.8196215837421166, "grad_norm": 0.06468553096055984, "learning_rate": 1.8584070796460176e-05, "loss": 0.0018, "step": 20118 }, { "epoch": 2.819761737911703, "grad_norm": 0.5762037634849548, "learning_rate": 1.8569720162640515e-05, "loss": 0.0601, "step": 20119 }, { "epoch": 2.8199018920812895, "grad_norm": 0.1784004271030426, "learning_rate": 1.8555369528820854e-05, "loss": 0.0053, "step": 20120 }, { "epoch": 2.820042046250876, "grad_norm": 0.07527387142181396, "learning_rate": 1.8541018895001192e-05, "loss": 0.007, "step": 20121 }, { "epoch": 2.8201822004204624, "grad_norm": 0.08619034290313721, "learning_rate": 1.8526668261181534e-05, "loss": 0.0069, "step": 20122 }, { "epoch": 2.820322354590049, "grad_norm": 0.14755816757678986, "learning_rate": 1.8512317627361873e-05, "loss": 0.0356, "step": 20123 }, { "epoch": 2.8204625087596353, "grad_norm": 0.13590243458747864, "learning_rate": 1.8497966993542212e-05, "loss": 0.006, "step": 20124 }, { "epoch": 2.8206026629292222, "grad_norm": 0.17464981973171234, "learning_rate": 1.848361635972255e-05, "loss": 0.0267, "step": 20125 }, { "epoch": 2.8207428170988087, "grad_norm": 0.1709974855184555, "learning_rate": 1.8469265725902893e-05, "loss": 0.0197, "step": 20126 }, { "epoch": 2.820882971268395, "grad_norm": 0.14928732812404633, "learning_rate": 1.845491509208323e-05, "loss": 0.0173, "step": 20127 }, { "epoch": 2.8210231254379816, "grad_norm": 0.5162956118583679, "learning_rate": 1.844056445826357e-05, "loss": 0.0189, "step": 20128 }, { "epoch": 2.8211632796075685, "grad_norm": 0.3384089469909668, "learning_rate": 1.842621382444391e-05, "loss": 0.0618, "step": 20129 }, { "epoch": 2.821303433777155, "grad_norm": 0.08030930161476135, "learning_rate": 1.841186319062425e-05, "loss": 0.007, "step": 20130 }, { "epoch": 2.8214435879467414, "grad_norm": 0.2609022557735443, "learning_rate": 1.839751255680459e-05, "loss": 0.0179, "step": 20131 }, { "epoch": 2.821583742116328, "grad_norm": 0.6638238430023193, "learning_rate": 1.8383161922984932e-05, "loss": 0.0501, "step": 20132 }, { "epoch": 2.8217238962859144, "grad_norm": 0.07239685207605362, "learning_rate": 1.836881128916527e-05, "loss": 0.0203, "step": 20133 }, { "epoch": 2.821864050455501, "grad_norm": 0.3404446840286255, "learning_rate": 1.835446065534561e-05, "loss": 0.0576, "step": 20134 }, { "epoch": 2.8220042046250877, "grad_norm": 0.331024169921875, "learning_rate": 1.834011002152595e-05, "loss": 0.0086, "step": 20135 }, { "epoch": 2.822144358794674, "grad_norm": 0.3042827248573303, "learning_rate": 1.832575938770629e-05, "loss": 0.012, "step": 20136 }, { "epoch": 2.8222845129642606, "grad_norm": 0.04164764657616615, "learning_rate": 1.831140875388663e-05, "loss": 0.0034, "step": 20137 }, { "epoch": 2.822424667133847, "grad_norm": 0.0468953438103199, "learning_rate": 1.8297058120066968e-05, "loss": 0.0022, "step": 20138 }, { "epoch": 2.822564821303434, "grad_norm": 0.501755952835083, "learning_rate": 1.828270748624731e-05, "loss": 0.0602, "step": 20139 }, { "epoch": 2.8227049754730205, "grad_norm": 0.1853044480085373, "learning_rate": 1.826835685242765e-05, "loss": 0.0081, "step": 20140 }, { "epoch": 2.822845129642607, "grad_norm": 0.00757548026740551, "learning_rate": 1.8254006218607987e-05, "loss": 0.0009, "step": 20141 }, { "epoch": 2.8229852838121934, "grad_norm": 0.17804817855358124, "learning_rate": 1.8239655584788326e-05, "loss": 0.0149, "step": 20142 }, { "epoch": 2.82312543798178, "grad_norm": 0.33779144287109375, "learning_rate": 1.8225304950968668e-05, "loss": 0.0253, "step": 20143 }, { "epoch": 2.8232655921513663, "grad_norm": 0.1400834619998932, "learning_rate": 1.8210954317149007e-05, "loss": 0.029, "step": 20144 }, { "epoch": 2.823405746320953, "grad_norm": 0.16597984731197357, "learning_rate": 1.8196603683329345e-05, "loss": 0.0249, "step": 20145 }, { "epoch": 2.8235459004905397, "grad_norm": 0.08694522082805634, "learning_rate": 1.8182253049509684e-05, "loss": 0.0038, "step": 20146 }, { "epoch": 2.823686054660126, "grad_norm": 0.5231955647468567, "learning_rate": 1.8167902415690026e-05, "loss": 0.0289, "step": 20147 }, { "epoch": 2.8238262088297126, "grad_norm": 0.06470644474029541, "learning_rate": 1.8153551781870365e-05, "loss": 0.011, "step": 20148 }, { "epoch": 2.8239663629992995, "grad_norm": 0.5129496455192566, "learning_rate": 1.8139201148050704e-05, "loss": 0.0843, "step": 20149 }, { "epoch": 2.824106517168886, "grad_norm": 0.011135991662740707, "learning_rate": 1.8124850514231042e-05, "loss": 0.0011, "step": 20150 }, { "epoch": 2.8242466713384724, "grad_norm": 0.18264475464820862, "learning_rate": 1.8110499880411384e-05, "loss": 0.0461, "step": 20151 }, { "epoch": 2.824386825508059, "grad_norm": 0.08170921355485916, "learning_rate": 1.8096149246591723e-05, "loss": 0.0055, "step": 20152 }, { "epoch": 2.8245269796776453, "grad_norm": 0.2484898865222931, "learning_rate": 1.8081798612772062e-05, "loss": 0.0149, "step": 20153 }, { "epoch": 2.824667133847232, "grad_norm": 0.25872939825057983, "learning_rate": 1.80674479789524e-05, "loss": 0.0401, "step": 20154 }, { "epoch": 2.8248072880168182, "grad_norm": 0.0702553391456604, "learning_rate": 1.8053097345132743e-05, "loss": 0.0034, "step": 20155 }, { "epoch": 2.824947442186405, "grad_norm": 0.13753172755241394, "learning_rate": 1.803874671131308e-05, "loss": 0.0259, "step": 20156 }, { "epoch": 2.8250875963559916, "grad_norm": 0.31921085715293884, "learning_rate": 1.802439607749342e-05, "loss": 0.0301, "step": 20157 }, { "epoch": 2.825227750525578, "grad_norm": 0.1717853844165802, "learning_rate": 1.801004544367376e-05, "loss": 0.0084, "step": 20158 }, { "epoch": 2.8253679046951645, "grad_norm": 0.0830700471997261, "learning_rate": 1.79956948098541e-05, "loss": 0.0187, "step": 20159 }, { "epoch": 2.8255080588647514, "grad_norm": 0.17259301245212555, "learning_rate": 1.798134417603444e-05, "loss": 0.0119, "step": 20160 }, { "epoch": 2.825648213034338, "grad_norm": 0.37203341722488403, "learning_rate": 1.796699354221478e-05, "loss": 0.0252, "step": 20161 }, { "epoch": 2.8257883672039243, "grad_norm": 0.6927008628845215, "learning_rate": 1.7952642908395117e-05, "loss": 0.0461, "step": 20162 }, { "epoch": 2.825928521373511, "grad_norm": 0.1689605414867401, "learning_rate": 1.793829227457546e-05, "loss": 0.0209, "step": 20163 }, { "epoch": 2.8260686755430973, "grad_norm": 0.14327998459339142, "learning_rate": 1.7923941640755798e-05, "loss": 0.0028, "step": 20164 }, { "epoch": 2.8262088297126837, "grad_norm": 0.3046146035194397, "learning_rate": 1.7909591006936137e-05, "loss": 0.0274, "step": 20165 }, { "epoch": 2.8263489838822706, "grad_norm": 0.2714897096157074, "learning_rate": 1.7895240373116476e-05, "loss": 0.0018, "step": 20166 }, { "epoch": 2.826489138051857, "grad_norm": 0.1348365992307663, "learning_rate": 1.7880889739296818e-05, "loss": 0.0153, "step": 20167 }, { "epoch": 2.8266292922214435, "grad_norm": 0.5544025897979736, "learning_rate": 1.7866539105477156e-05, "loss": 0.0614, "step": 20168 }, { "epoch": 2.82676944639103, "grad_norm": 0.10236028581857681, "learning_rate": 1.7852188471657495e-05, "loss": 0.0066, "step": 20169 }, { "epoch": 2.826909600560617, "grad_norm": 1.294047474861145, "learning_rate": 1.7837837837837837e-05, "loss": 0.1696, "step": 20170 }, { "epoch": 2.8270497547302034, "grad_norm": 0.6213603615760803, "learning_rate": 1.7823487204018176e-05, "loss": 0.093, "step": 20171 }, { "epoch": 2.82718990889979, "grad_norm": 0.050013910979032516, "learning_rate": 1.7809136570198518e-05, "loss": 0.0049, "step": 20172 }, { "epoch": 2.8273300630693763, "grad_norm": 0.051053084433078766, "learning_rate": 1.7794785936378857e-05, "loss": 0.0059, "step": 20173 }, { "epoch": 2.8274702172389627, "grad_norm": 0.1913643628358841, "learning_rate": 1.7780435302559195e-05, "loss": 0.0266, "step": 20174 }, { "epoch": 2.827610371408549, "grad_norm": 0.15279848873615265, "learning_rate": 1.7766084668739534e-05, "loss": 0.0068, "step": 20175 }, { "epoch": 2.827750525578136, "grad_norm": 0.07097769528627396, "learning_rate": 1.7751734034919876e-05, "loss": 0.0076, "step": 20176 }, { "epoch": 2.8278906797477226, "grad_norm": 0.13913406431674957, "learning_rate": 1.7737383401100215e-05, "loss": 0.0144, "step": 20177 }, { "epoch": 2.828030833917309, "grad_norm": 0.15220694243907928, "learning_rate": 1.7723032767280554e-05, "loss": 0.0342, "step": 20178 }, { "epoch": 2.8281709880868955, "grad_norm": 0.23292815685272217, "learning_rate": 1.7708682133460892e-05, "loss": 0.029, "step": 20179 }, { "epoch": 2.8283111422564824, "grad_norm": 0.4258395731449127, "learning_rate": 1.7694331499641235e-05, "loss": 0.0283, "step": 20180 }, { "epoch": 2.828451296426069, "grad_norm": 0.8651798367500305, "learning_rate": 1.7679980865821573e-05, "loss": 0.0343, "step": 20181 }, { "epoch": 2.8285914505956553, "grad_norm": 0.09384683519601822, "learning_rate": 1.7665630232001912e-05, "loss": 0.0242, "step": 20182 }, { "epoch": 2.8287316047652418, "grad_norm": 0.22945542633533478, "learning_rate": 1.765127959818225e-05, "loss": 0.0369, "step": 20183 }, { "epoch": 2.8288717589348282, "grad_norm": 0.04409319907426834, "learning_rate": 1.7636928964362593e-05, "loss": 0.0049, "step": 20184 }, { "epoch": 2.8290119131044147, "grad_norm": 0.2397792786359787, "learning_rate": 1.762257833054293e-05, "loss": 0.0374, "step": 20185 }, { "epoch": 2.829152067274001, "grad_norm": 0.20370949804782867, "learning_rate": 1.760822769672327e-05, "loss": 0.0116, "step": 20186 }, { "epoch": 2.829292221443588, "grad_norm": 0.1491415798664093, "learning_rate": 1.759387706290361e-05, "loss": 0.0152, "step": 20187 }, { "epoch": 2.8294323756131745, "grad_norm": 0.22468191385269165, "learning_rate": 1.757952642908395e-05, "loss": 0.015, "step": 20188 }, { "epoch": 2.829572529782761, "grad_norm": 0.19595931470394135, "learning_rate": 1.756517579526429e-05, "loss": 0.0694, "step": 20189 }, { "epoch": 2.8297126839523474, "grad_norm": 0.21717269718647003, "learning_rate": 1.755082516144463e-05, "loss": 0.0135, "step": 20190 }, { "epoch": 2.8298528381219343, "grad_norm": 0.3220983147621155, "learning_rate": 1.7536474527624967e-05, "loss": 0.0242, "step": 20191 }, { "epoch": 2.829992992291521, "grad_norm": 0.08474330604076385, "learning_rate": 1.752212389380531e-05, "loss": 0.0175, "step": 20192 }, { "epoch": 2.8301331464611073, "grad_norm": 0.07432655245065689, "learning_rate": 1.7507773259985648e-05, "loss": 0.0095, "step": 20193 }, { "epoch": 2.8302733006306937, "grad_norm": 0.49580323696136475, "learning_rate": 1.7493422626165987e-05, "loss": 0.0462, "step": 20194 }, { "epoch": 2.83041345480028, "grad_norm": 0.05561661347746849, "learning_rate": 1.7479071992346326e-05, "loss": 0.0049, "step": 20195 }, { "epoch": 2.8305536089698666, "grad_norm": 0.2758733332157135, "learning_rate": 1.7464721358526668e-05, "loss": 0.0574, "step": 20196 }, { "epoch": 2.8306937631394535, "grad_norm": 0.566324770450592, "learning_rate": 1.7450370724707006e-05, "loss": 0.0299, "step": 20197 }, { "epoch": 2.83083391730904, "grad_norm": 0.24207179248332977, "learning_rate": 1.7436020090887345e-05, "loss": 0.0642, "step": 20198 }, { "epoch": 2.8309740714786265, "grad_norm": 0.3416862487792969, "learning_rate": 1.7421669457067684e-05, "loss": 0.0298, "step": 20199 }, { "epoch": 2.831114225648213, "grad_norm": 0.17715443670749664, "learning_rate": 1.7407318823248026e-05, "loss": 0.0105, "step": 20200 }, { "epoch": 2.8312543798178, "grad_norm": 0.18120242655277252, "learning_rate": 1.7392968189428365e-05, "loss": 0.011, "step": 20201 }, { "epoch": 2.8313945339873863, "grad_norm": 0.11265964061021805, "learning_rate": 1.7378617555608703e-05, "loss": 0.0447, "step": 20202 }, { "epoch": 2.8315346881569727, "grad_norm": 0.3904508650302887, "learning_rate": 1.7364266921789042e-05, "loss": 0.0408, "step": 20203 }, { "epoch": 2.831674842326559, "grad_norm": 0.025759408250451088, "learning_rate": 1.7349916287969384e-05, "loss": 0.0025, "step": 20204 }, { "epoch": 2.8318149964961457, "grad_norm": 0.05911707133054733, "learning_rate": 1.7335565654149723e-05, "loss": 0.0061, "step": 20205 }, { "epoch": 2.831955150665732, "grad_norm": 0.1870964765548706, "learning_rate": 1.7321215020330062e-05, "loss": 0.0215, "step": 20206 }, { "epoch": 2.832095304835319, "grad_norm": 0.30868837237358093, "learning_rate": 1.7306864386510404e-05, "loss": 0.0247, "step": 20207 }, { "epoch": 2.8322354590049055, "grad_norm": 0.17618998885154724, "learning_rate": 1.7292513752690743e-05, "loss": 0.007, "step": 20208 }, { "epoch": 2.832375613174492, "grad_norm": 0.3716455399990082, "learning_rate": 1.727816311887108e-05, "loss": 0.0337, "step": 20209 }, { "epoch": 2.8325157673440784, "grad_norm": 0.1306070238351822, "learning_rate": 1.7263812485051423e-05, "loss": 0.0137, "step": 20210 }, { "epoch": 2.8326559215136653, "grad_norm": 0.3776424527168274, "learning_rate": 1.7249461851231762e-05, "loss": 0.0076, "step": 20211 }, { "epoch": 2.8327960756832518, "grad_norm": 0.09000629931688309, "learning_rate": 1.72351112174121e-05, "loss": 0.0169, "step": 20212 }, { "epoch": 2.832936229852838, "grad_norm": 0.1441204845905304, "learning_rate": 1.722076058359244e-05, "loss": 0.0145, "step": 20213 }, { "epoch": 2.8330763840224247, "grad_norm": 0.08253537118434906, "learning_rate": 1.7206409949772782e-05, "loss": 0.0036, "step": 20214 }, { "epoch": 2.833216538192011, "grad_norm": 0.014261264353990555, "learning_rate": 1.719205931595312e-05, "loss": 0.0015, "step": 20215 }, { "epoch": 2.8333566923615976, "grad_norm": 0.5262055993080139, "learning_rate": 1.717770868213346e-05, "loss": 0.0391, "step": 20216 }, { "epoch": 2.833496846531184, "grad_norm": 0.025808749720454216, "learning_rate": 1.7163358048313798e-05, "loss": 0.0019, "step": 20217 }, { "epoch": 2.833637000700771, "grad_norm": 0.6952418684959412, "learning_rate": 1.714900741449414e-05, "loss": 0.072, "step": 20218 }, { "epoch": 2.8337771548703574, "grad_norm": 0.6326785683631897, "learning_rate": 1.713465678067448e-05, "loss": 0.0347, "step": 20219 }, { "epoch": 2.833917309039944, "grad_norm": 0.443508118391037, "learning_rate": 1.7120306146854817e-05, "loss": 0.0247, "step": 20220 }, { "epoch": 2.8340574632095303, "grad_norm": 0.26656726002693176, "learning_rate": 1.7105955513035156e-05, "loss": 0.0348, "step": 20221 }, { "epoch": 2.8341976173791172, "grad_norm": 0.28205418586730957, "learning_rate": 1.7091604879215498e-05, "loss": 0.019, "step": 20222 }, { "epoch": 2.8343377715487037, "grad_norm": 0.041545867919921875, "learning_rate": 1.7077254245395837e-05, "loss": 0.0043, "step": 20223 }, { "epoch": 2.83447792571829, "grad_norm": 0.3739026188850403, "learning_rate": 1.7062903611576176e-05, "loss": 0.0489, "step": 20224 }, { "epoch": 2.8346180798878766, "grad_norm": 0.09915999323129654, "learning_rate": 1.7048552977756514e-05, "loss": 0.0105, "step": 20225 }, { "epoch": 2.834758234057463, "grad_norm": 0.5638039708137512, "learning_rate": 1.7034202343936857e-05, "loss": 0.0191, "step": 20226 }, { "epoch": 2.8348983882270495, "grad_norm": 0.15827758610248566, "learning_rate": 1.7019851710117195e-05, "loss": 0.014, "step": 20227 }, { "epoch": 2.8350385423966364, "grad_norm": 0.2947181463241577, "learning_rate": 1.7005501076297534e-05, "loss": 0.0675, "step": 20228 }, { "epoch": 2.835178696566223, "grad_norm": 0.1616363823413849, "learning_rate": 1.6991150442477873e-05, "loss": 0.0326, "step": 20229 }, { "epoch": 2.8353188507358094, "grad_norm": 0.17346526682376862, "learning_rate": 1.6976799808658215e-05, "loss": 0.0333, "step": 20230 }, { "epoch": 2.835459004905396, "grad_norm": 0.25854647159576416, "learning_rate": 1.6962449174838554e-05, "loss": 0.0241, "step": 20231 }, { "epoch": 2.8355991590749827, "grad_norm": 0.23596741259098053, "learning_rate": 1.6948098541018892e-05, "loss": 0.0119, "step": 20232 }, { "epoch": 2.835739313244569, "grad_norm": 0.19777964055538177, "learning_rate": 1.693374790719923e-05, "loss": 0.0366, "step": 20233 }, { "epoch": 2.8358794674141556, "grad_norm": 0.31582239270210266, "learning_rate": 1.6919397273379573e-05, "loss": 0.0292, "step": 20234 }, { "epoch": 2.836019621583742, "grad_norm": 0.10154934227466583, "learning_rate": 1.6905046639559912e-05, "loss": 0.0106, "step": 20235 }, { "epoch": 2.8361597757533286, "grad_norm": 0.17357492446899414, "learning_rate": 1.689069600574025e-05, "loss": 0.0214, "step": 20236 }, { "epoch": 2.836299929922915, "grad_norm": 0.33176639676094055, "learning_rate": 1.687634537192059e-05, "loss": 0.0428, "step": 20237 }, { "epoch": 2.8364400840925015, "grad_norm": 0.28931349515914917, "learning_rate": 1.686199473810093e-05, "loss": 0.0191, "step": 20238 }, { "epoch": 2.8365802382620884, "grad_norm": 0.33798739314079285, "learning_rate": 1.684764410428127e-05, "loss": 0.0554, "step": 20239 }, { "epoch": 2.836720392431675, "grad_norm": 0.13870789110660553, "learning_rate": 1.683329347046161e-05, "loss": 0.0266, "step": 20240 }, { "epoch": 2.8368605466012613, "grad_norm": 0.2243368774652481, "learning_rate": 1.6818942836641948e-05, "loss": 0.032, "step": 20241 }, { "epoch": 2.837000700770848, "grad_norm": 0.0687587708234787, "learning_rate": 1.680459220282229e-05, "loss": 0.0048, "step": 20242 }, { "epoch": 2.8371408549404347, "grad_norm": 0.3018030524253845, "learning_rate": 1.679024156900263e-05, "loss": 0.0346, "step": 20243 }, { "epoch": 2.837281009110021, "grad_norm": 0.26874881982803345, "learning_rate": 1.677589093518297e-05, "loss": 0.0111, "step": 20244 }, { "epoch": 2.8374211632796076, "grad_norm": 0.14872173964977264, "learning_rate": 1.676154030136331e-05, "loss": 0.0126, "step": 20245 }, { "epoch": 2.837561317449194, "grad_norm": 0.40505680441856384, "learning_rate": 1.6747189667543648e-05, "loss": 0.0472, "step": 20246 }, { "epoch": 2.8377014716187805, "grad_norm": 0.28024744987487793, "learning_rate": 1.673283903372399e-05, "loss": 0.0357, "step": 20247 }, { "epoch": 2.837841625788367, "grad_norm": 0.14496368169784546, "learning_rate": 1.671848839990433e-05, "loss": 0.0811, "step": 20248 }, { "epoch": 2.837981779957954, "grad_norm": 0.19921255111694336, "learning_rate": 1.6704137766084668e-05, "loss": 0.027, "step": 20249 }, { "epoch": 2.8381219341275403, "grad_norm": 0.11754713952541351, "learning_rate": 1.6689787132265006e-05, "loss": 0.0085, "step": 20250 }, { "epoch": 2.838262088297127, "grad_norm": 0.44583505392074585, "learning_rate": 1.667543649844535e-05, "loss": 0.0734, "step": 20251 }, { "epoch": 2.8384022424667132, "grad_norm": 1.23448646068573, "learning_rate": 1.6661085864625687e-05, "loss": 0.0252, "step": 20252 }, { "epoch": 2.8385423966363, "grad_norm": 0.05516646429896355, "learning_rate": 1.6646735230806026e-05, "loss": 0.0061, "step": 20253 }, { "epoch": 2.8386825508058866, "grad_norm": 0.06524958461523056, "learning_rate": 1.6632384596986365e-05, "loss": 0.0208, "step": 20254 }, { "epoch": 2.838822704975473, "grad_norm": 0.25072863698005676, "learning_rate": 1.6618033963166707e-05, "loss": 0.0258, "step": 20255 }, { "epoch": 2.8389628591450595, "grad_norm": 0.08586220443248749, "learning_rate": 1.6603683329347045e-05, "loss": 0.0049, "step": 20256 }, { "epoch": 2.839103013314646, "grad_norm": 0.11943991482257843, "learning_rate": 1.6589332695527384e-05, "loss": 0.0074, "step": 20257 }, { "epoch": 2.8392431674842324, "grad_norm": 0.0650561973452568, "learning_rate": 1.6574982061707723e-05, "loss": 0.0035, "step": 20258 }, { "epoch": 2.8393833216538193, "grad_norm": 0.05371643230319023, "learning_rate": 1.6560631427888065e-05, "loss": 0.0027, "step": 20259 }, { "epoch": 2.839523475823406, "grad_norm": 0.18674317002296448, "learning_rate": 1.6546280794068404e-05, "loss": 0.0132, "step": 20260 }, { "epoch": 2.8396636299929923, "grad_norm": 0.03560641035437584, "learning_rate": 1.6531930160248742e-05, "loss": 0.0024, "step": 20261 }, { "epoch": 2.8398037841625787, "grad_norm": 0.41691285371780396, "learning_rate": 1.651757952642908e-05, "loss": 0.0165, "step": 20262 }, { "epoch": 2.8399439383321656, "grad_norm": 0.09331688284873962, "learning_rate": 1.6503228892609423e-05, "loss": 0.0099, "step": 20263 }, { "epoch": 2.840084092501752, "grad_norm": 1.6376011371612549, "learning_rate": 1.6488878258789762e-05, "loss": 0.1782, "step": 20264 }, { "epoch": 2.8402242466713385, "grad_norm": 1.7144731283187866, "learning_rate": 1.64745276249701e-05, "loss": 0.0366, "step": 20265 }, { "epoch": 2.840364400840925, "grad_norm": 0.5791053175926208, "learning_rate": 1.646017699115044e-05, "loss": 0.0161, "step": 20266 }, { "epoch": 2.8405045550105115, "grad_norm": 0.29969364404678345, "learning_rate": 1.644582635733078e-05, "loss": 0.0071, "step": 20267 }, { "epoch": 2.840644709180098, "grad_norm": 0.00609160028398037, "learning_rate": 1.643147572351112e-05, "loss": 0.0005, "step": 20268 }, { "epoch": 2.8407848633496844, "grad_norm": 0.48932984471321106, "learning_rate": 1.641712508969146e-05, "loss": 0.011, "step": 20269 }, { "epoch": 2.8409250175192713, "grad_norm": NaN, "learning_rate": 1.6402774455871798e-05, "loss": 0.1917, "step": 20270 }, { "epoch": 2.8410651716888577, "grad_norm": 0.5322683453559875, "learning_rate": 1.6402774455871798e-05, "loss": 0.0257, "step": 20271 }, { "epoch": 2.841205325858444, "grad_norm": 0.5537340641021729, "learning_rate": 1.638842382205214e-05, "loss": 0.0302, "step": 20272 }, { "epoch": 2.8413454800280307, "grad_norm": 0.12467426806688309, "learning_rate": 1.637407318823248e-05, "loss": 0.0127, "step": 20273 }, { "epoch": 2.8414856341976176, "grad_norm": 0.28427594900131226, "learning_rate": 1.6359722554412817e-05, "loss": 0.0412, "step": 20274 }, { "epoch": 2.841625788367204, "grad_norm": 0.13745933771133423, "learning_rate": 1.6345371920593156e-05, "loss": 0.013, "step": 20275 }, { "epoch": 2.8417659425367905, "grad_norm": 0.21139267086982727, "learning_rate": 1.6331021286773498e-05, "loss": 0.0323, "step": 20276 }, { "epoch": 2.841906096706377, "grad_norm": 0.15647540986537933, "learning_rate": 1.6316670652953837e-05, "loss": 0.0082, "step": 20277 }, { "epoch": 2.8420462508759634, "grad_norm": 0.07853582501411438, "learning_rate": 1.6302320019134176e-05, "loss": 0.0031, "step": 20278 }, { "epoch": 2.84218640504555, "grad_norm": 0.16068831086158752, "learning_rate": 1.6287969385314514e-05, "loss": 0.0523, "step": 20279 }, { "epoch": 2.8423265592151368, "grad_norm": 0.3388252556324005, "learning_rate": 1.6273618751494856e-05, "loss": 0.0298, "step": 20280 }, { "epoch": 2.8424667133847232, "grad_norm": 0.14630305767059326, "learning_rate": 1.6259268117675195e-05, "loss": 0.0064, "step": 20281 }, { "epoch": 2.8426068675543097, "grad_norm": 0.2632675766944885, "learning_rate": 1.6244917483855537e-05, "loss": 0.0111, "step": 20282 }, { "epoch": 2.842747021723896, "grad_norm": 0.016888601705431938, "learning_rate": 1.6230566850035876e-05, "loss": 0.0015, "step": 20283 }, { "epoch": 2.842887175893483, "grad_norm": 0.1076136976480484, "learning_rate": 1.6216216216216215e-05, "loss": 0.0117, "step": 20284 }, { "epoch": 2.8430273300630695, "grad_norm": 0.22523699700832367, "learning_rate": 1.6201865582396557e-05, "loss": 0.0301, "step": 20285 }, { "epoch": 2.843167484232656, "grad_norm": 0.09478264302015305, "learning_rate": 1.6187514948576896e-05, "loss": 0.0056, "step": 20286 }, { "epoch": 2.8433076384022424, "grad_norm": 0.1267780065536499, "learning_rate": 1.6173164314757234e-05, "loss": 0.0149, "step": 20287 }, { "epoch": 2.843447792571829, "grad_norm": 0.2338511198759079, "learning_rate": 1.6158813680937573e-05, "loss": 0.0378, "step": 20288 }, { "epoch": 2.8435879467414154, "grad_norm": 0.015144739300012589, "learning_rate": 1.6144463047117915e-05, "loss": 0.0013, "step": 20289 }, { "epoch": 2.8437281009110023, "grad_norm": 0.09922248870134354, "learning_rate": 1.6130112413298254e-05, "loss": 0.0049, "step": 20290 }, { "epoch": 2.8438682550805887, "grad_norm": 0.03603662922978401, "learning_rate": 1.6115761779478593e-05, "loss": 0.001, "step": 20291 }, { "epoch": 2.844008409250175, "grad_norm": 0.1759449988603592, "learning_rate": 1.610141114565893e-05, "loss": 0.0152, "step": 20292 }, { "epoch": 2.8441485634197616, "grad_norm": 0.08061642199754715, "learning_rate": 1.6087060511839273e-05, "loss": 0.0076, "step": 20293 }, { "epoch": 2.8442887175893485, "grad_norm": 0.07315284013748169, "learning_rate": 1.6072709878019612e-05, "loss": 0.0083, "step": 20294 }, { "epoch": 2.844428871758935, "grad_norm": 0.11606989055871964, "learning_rate": 1.605835924419995e-05, "loss": 0.0357, "step": 20295 }, { "epoch": 2.8445690259285215, "grad_norm": 0.0716494619846344, "learning_rate": 1.604400861038029e-05, "loss": 0.0024, "step": 20296 }, { "epoch": 2.844709180098108, "grad_norm": 0.24385316669940948, "learning_rate": 1.602965797656063e-05, "loss": 0.0193, "step": 20297 }, { "epoch": 2.8448493342676944, "grad_norm": 0.18004268407821655, "learning_rate": 1.601530734274097e-05, "loss": 0.0068, "step": 20298 }, { "epoch": 2.844989488437281, "grad_norm": 0.0315437950193882, "learning_rate": 1.600095670892131e-05, "loss": 0.0018, "step": 20299 }, { "epoch": 2.8451296426068673, "grad_norm": 0.21020697057247162, "learning_rate": 1.5986606075101648e-05, "loss": 0.0978, "step": 20300 }, { "epoch": 2.845269796776454, "grad_norm": 0.16509751975536346, "learning_rate": 1.597225544128199e-05, "loss": 0.009, "step": 20301 }, { "epoch": 2.8454099509460407, "grad_norm": 0.08987118303775787, "learning_rate": 1.595790480746233e-05, "loss": 0.0256, "step": 20302 }, { "epoch": 2.845550105115627, "grad_norm": 0.14443255960941315, "learning_rate": 1.5943554173642667e-05, "loss": 0.0104, "step": 20303 }, { "epoch": 2.8456902592852136, "grad_norm": 0.022094091400504112, "learning_rate": 1.5929203539823006e-05, "loss": 0.0012, "step": 20304 }, { "epoch": 2.8458304134548005, "grad_norm": 0.285263329744339, "learning_rate": 1.5914852906003348e-05, "loss": 0.0095, "step": 20305 }, { "epoch": 2.845970567624387, "grad_norm": 0.5317838788032532, "learning_rate": 1.5900502272183687e-05, "loss": 0.0181, "step": 20306 }, { "epoch": 2.8461107217939734, "grad_norm": 0.1129615530371666, "learning_rate": 1.5886151638364026e-05, "loss": 0.0082, "step": 20307 }, { "epoch": 2.84625087596356, "grad_norm": 0.27298229932785034, "learning_rate": 1.5871801004544364e-05, "loss": 0.0118, "step": 20308 }, { "epoch": 2.8463910301331463, "grad_norm": 0.045300569385290146, "learning_rate": 1.5857450370724707e-05, "loss": 0.0028, "step": 20309 }, { "epoch": 2.8465311843027328, "grad_norm": 0.5991926193237305, "learning_rate": 1.5843099736905045e-05, "loss": 0.0801, "step": 20310 }, { "epoch": 2.8466713384723197, "grad_norm": 0.05029904097318649, "learning_rate": 1.5828749103085384e-05, "loss": 0.0014, "step": 20311 }, { "epoch": 2.846811492641906, "grad_norm": 0.053701359778642654, "learning_rate": 1.5814398469265723e-05, "loss": 0.002, "step": 20312 }, { "epoch": 2.8469516468114926, "grad_norm": 0.0034739954862743616, "learning_rate": 1.5800047835446065e-05, "loss": 0.0004, "step": 20313 }, { "epoch": 2.847091800981079, "grad_norm": 0.017947666347026825, "learning_rate": 1.5785697201626404e-05, "loss": 0.0013, "step": 20314 }, { "epoch": 2.847231955150666, "grad_norm": 0.40516209602355957, "learning_rate": 1.5771346567806742e-05, "loss": 0.0321, "step": 20315 }, { "epoch": 2.8473721093202524, "grad_norm": 1.0125679969787598, "learning_rate": 1.575699593398708e-05, "loss": 0.0973, "step": 20316 }, { "epoch": 2.847512263489839, "grad_norm": 0.167917862534523, "learning_rate": 1.5742645300167423e-05, "loss": 0.0076, "step": 20317 }, { "epoch": 2.8476524176594253, "grad_norm": 0.44034916162490845, "learning_rate": 1.5728294666347762e-05, "loss": 0.0095, "step": 20318 }, { "epoch": 2.847792571829012, "grad_norm": 0.3668735921382904, "learning_rate": 1.5713944032528104e-05, "loss": 0.0706, "step": 20319 }, { "epoch": 2.8479327259985983, "grad_norm": 1.4763059616088867, "learning_rate": 1.5699593398708443e-05, "loss": 0.0599, "step": 20320 }, { "epoch": 2.848072880168185, "grad_norm": 0.3076702356338501, "learning_rate": 1.568524276488878e-05, "loss": 0.0803, "step": 20321 }, { "epoch": 2.8482130343377716, "grad_norm": 0.1260942816734314, "learning_rate": 1.567089213106912e-05, "loss": 0.0116, "step": 20322 }, { "epoch": 2.848353188507358, "grad_norm": 0.3856593072414398, "learning_rate": 1.5656541497249462e-05, "loss": 0.0464, "step": 20323 }, { "epoch": 2.8484933426769445, "grad_norm": 0.16212178766727448, "learning_rate": 1.56421908634298e-05, "loss": 0.04, "step": 20324 }, { "epoch": 2.8486334968465314, "grad_norm": 0.2588249146938324, "learning_rate": 1.562784022961014e-05, "loss": 0.0264, "step": 20325 }, { "epoch": 2.848773651016118, "grad_norm": 0.15476007759571075, "learning_rate": 1.561348959579048e-05, "loss": 0.0128, "step": 20326 }, { "epoch": 2.8489138051857044, "grad_norm": 0.07228241860866547, "learning_rate": 1.559913896197082e-05, "loss": 0.0098, "step": 20327 }, { "epoch": 2.849053959355291, "grad_norm": 0.06651583313941956, "learning_rate": 1.558478832815116e-05, "loss": 0.0059, "step": 20328 }, { "epoch": 2.8491941135248773, "grad_norm": 0.39124396443367004, "learning_rate": 1.5570437694331498e-05, "loss": 0.0606, "step": 20329 }, { "epoch": 2.8493342676944637, "grad_norm": 0.2665528357028961, "learning_rate": 1.5556087060511837e-05, "loss": 0.0193, "step": 20330 }, { "epoch": 2.84947442186405, "grad_norm": 0.1665567010641098, "learning_rate": 1.554173642669218e-05, "loss": 0.0234, "step": 20331 }, { "epoch": 2.849614576033637, "grad_norm": 0.04394285008311272, "learning_rate": 1.5527385792872518e-05, "loss": 0.0025, "step": 20332 }, { "epoch": 2.8497547302032236, "grad_norm": 0.24094484746456146, "learning_rate": 1.5513035159052856e-05, "loss": 0.0378, "step": 20333 }, { "epoch": 2.84989488437281, "grad_norm": 0.19465135037899017, "learning_rate": 1.5498684525233195e-05, "loss": 0.0337, "step": 20334 }, { "epoch": 2.8500350385423965, "grad_norm": 0.1165773943066597, "learning_rate": 1.5484333891413537e-05, "loss": 0.0075, "step": 20335 }, { "epoch": 2.8501751927119834, "grad_norm": 0.6786255836486816, "learning_rate": 1.5469983257593876e-05, "loss": 0.0334, "step": 20336 }, { "epoch": 2.85031534688157, "grad_norm": 0.011521991342306137, "learning_rate": 1.5455632623774215e-05, "loss": 0.0011, "step": 20337 }, { "epoch": 2.8504555010511563, "grad_norm": 0.0730847418308258, "learning_rate": 1.5441281989954553e-05, "loss": 0.0048, "step": 20338 }, { "epoch": 2.8505956552207428, "grad_norm": 0.22655510902404785, "learning_rate": 1.5426931356134895e-05, "loss": 0.0145, "step": 20339 }, { "epoch": 2.8507358093903292, "grad_norm": 0.15519924461841583, "learning_rate": 1.5412580722315234e-05, "loss": 0.0098, "step": 20340 }, { "epoch": 2.8508759635599157, "grad_norm": 0.05947353318333626, "learning_rate": 1.5398230088495573e-05, "loss": 0.0053, "step": 20341 }, { "epoch": 2.8510161177295026, "grad_norm": 0.16406801342964172, "learning_rate": 1.538387945467591e-05, "loss": 0.0172, "step": 20342 }, { "epoch": 2.851156271899089, "grad_norm": 0.2745133936405182, "learning_rate": 1.5369528820856254e-05, "loss": 0.0343, "step": 20343 }, { "epoch": 2.8512964260686755, "grad_norm": 0.2881617248058319, "learning_rate": 1.5355178187036592e-05, "loss": 0.0249, "step": 20344 }, { "epoch": 2.851436580238262, "grad_norm": 0.17715638875961304, "learning_rate": 1.534082755321693e-05, "loss": 0.0212, "step": 20345 }, { "epoch": 2.851576734407849, "grad_norm": 0.4022597074508667, "learning_rate": 1.5326476919397273e-05, "loss": 0.0793, "step": 20346 }, { "epoch": 2.8517168885774353, "grad_norm": 0.022772137075662613, "learning_rate": 1.5312126285577612e-05, "loss": 0.0016, "step": 20347 }, { "epoch": 2.851857042747022, "grad_norm": 0.2803971767425537, "learning_rate": 1.529777565175795e-05, "loss": 0.0491, "step": 20348 }, { "epoch": 2.8519971969166082, "grad_norm": 0.059749312698841095, "learning_rate": 1.528342501793829e-05, "loss": 0.0023, "step": 20349 }, { "epoch": 2.8521373510861947, "grad_norm": 0.010602869093418121, "learning_rate": 1.526907438411863e-05, "loss": 0.0009, "step": 20350 }, { "epoch": 2.852277505255781, "grad_norm": 0.2774984836578369, "learning_rate": 1.525472375029897e-05, "loss": 0.0139, "step": 20351 }, { "epoch": 2.852417659425368, "grad_norm": 0.01156681776046753, "learning_rate": 1.5240373116479309e-05, "loss": 0.0008, "step": 20352 }, { "epoch": 2.8525578135949545, "grad_norm": 0.2745216190814972, "learning_rate": 1.522602248265965e-05, "loss": 0.0318, "step": 20353 }, { "epoch": 2.852697967764541, "grad_norm": 0.025583239272236824, "learning_rate": 1.5211671848839988e-05, "loss": 0.0023, "step": 20354 }, { "epoch": 2.8528381219341274, "grad_norm": 0.1819443553686142, "learning_rate": 1.5197321215020329e-05, "loss": 0.0298, "step": 20355 }, { "epoch": 2.8529782761037144, "grad_norm": 0.08847843110561371, "learning_rate": 1.5182970581200669e-05, "loss": 0.0041, "step": 20356 }, { "epoch": 2.853118430273301, "grad_norm": 0.05471263825893402, "learning_rate": 1.516861994738101e-05, "loss": 0.0043, "step": 20357 }, { "epoch": 2.8532585844428873, "grad_norm": 0.2533074617385864, "learning_rate": 1.5154269313561348e-05, "loss": 0.0161, "step": 20358 }, { "epoch": 2.8533987386124737, "grad_norm": 0.22861048579216003, "learning_rate": 1.5139918679741688e-05, "loss": 0.0423, "step": 20359 }, { "epoch": 2.85353889278206, "grad_norm": 0.4860513508319855, "learning_rate": 1.5125568045922027e-05, "loss": 0.0386, "step": 20360 }, { "epoch": 2.8536790469516466, "grad_norm": 0.027445362880825996, "learning_rate": 1.5111217412102368e-05, "loss": 0.0015, "step": 20361 }, { "epoch": 2.853819201121233, "grad_norm": 0.19729383289813995, "learning_rate": 1.5096866778282706e-05, "loss": 0.01, "step": 20362 }, { "epoch": 2.85395935529082, "grad_norm": 0.45119708776474, "learning_rate": 1.5082516144463047e-05, "loss": 0.03, "step": 20363 }, { "epoch": 2.8540995094604065, "grad_norm": 0.2652711272239685, "learning_rate": 1.5068165510643386e-05, "loss": 0.0159, "step": 20364 }, { "epoch": 2.854239663629993, "grad_norm": 0.03018065355718136, "learning_rate": 1.5053814876823726e-05, "loss": 0.0011, "step": 20365 }, { "epoch": 2.8543798177995794, "grad_norm": 0.07536077499389648, "learning_rate": 1.5039464243004065e-05, "loss": 0.0049, "step": 20366 }, { "epoch": 2.8545199719691663, "grad_norm": 0.35838422179222107, "learning_rate": 1.5025113609184405e-05, "loss": 0.0267, "step": 20367 }, { "epoch": 2.8546601261387528, "grad_norm": 0.5044819116592407, "learning_rate": 1.5010762975364744e-05, "loss": 0.0675, "step": 20368 }, { "epoch": 2.854800280308339, "grad_norm": 2.760685920715332, "learning_rate": 1.4996412341545084e-05, "loss": 0.4009, "step": 20369 }, { "epoch": 2.8549404344779257, "grad_norm": 3.385244369506836, "learning_rate": 1.4982061707725423e-05, "loss": 0.1376, "step": 20370 }, { "epoch": 2.855080588647512, "grad_norm": 0.3678162097930908, "learning_rate": 1.4967711073905763e-05, "loss": 0.0242, "step": 20371 }, { "epoch": 2.8552207428170986, "grad_norm": 0.21036860346794128, "learning_rate": 1.4953360440086102e-05, "loss": 0.0291, "step": 20372 }, { "epoch": 2.8553608969866855, "grad_norm": 0.18883508443832397, "learning_rate": 1.4939009806266443e-05, "loss": 0.0221, "step": 20373 }, { "epoch": 2.855501051156272, "grad_norm": 0.2510300874710083, "learning_rate": 1.4924659172446781e-05, "loss": 0.0211, "step": 20374 }, { "epoch": 2.8556412053258584, "grad_norm": 0.12770569324493408, "learning_rate": 1.4910308538627122e-05, "loss": 0.0205, "step": 20375 }, { "epoch": 2.855781359495445, "grad_norm": 0.200319305062294, "learning_rate": 1.489595790480746e-05, "loss": 0.0134, "step": 20376 }, { "epoch": 2.8559215136650318, "grad_norm": 0.16275058686733246, "learning_rate": 1.48816072709878e-05, "loss": 0.0288, "step": 20377 }, { "epoch": 2.8560616678346182, "grad_norm": 0.19373728334903717, "learning_rate": 1.486725663716814e-05, "loss": 0.016, "step": 20378 }, { "epoch": 2.8562018220042047, "grad_norm": 0.08098205178976059, "learning_rate": 1.485290600334848e-05, "loss": 0.0058, "step": 20379 }, { "epoch": 2.856341976173791, "grad_norm": 0.20354300737380981, "learning_rate": 1.4838555369528819e-05, "loss": 0.0099, "step": 20380 }, { "epoch": 2.8564821303433776, "grad_norm": 0.4066515266895294, "learning_rate": 1.4824204735709159e-05, "loss": 0.0558, "step": 20381 }, { "epoch": 2.856622284512964, "grad_norm": 0.1347045600414276, "learning_rate": 1.4809854101889498e-05, "loss": 0.0307, "step": 20382 }, { "epoch": 2.8567624386825505, "grad_norm": 0.16052640974521637, "learning_rate": 1.4795503468069838e-05, "loss": 0.0146, "step": 20383 }, { "epoch": 2.8569025928521374, "grad_norm": 0.2812305688858032, "learning_rate": 1.4781152834250177e-05, "loss": 0.0327, "step": 20384 }, { "epoch": 2.857042747021724, "grad_norm": 0.039231885224580765, "learning_rate": 1.4766802200430517e-05, "loss": 0.004, "step": 20385 }, { "epoch": 2.8571829011913104, "grad_norm": 0.13137809932231903, "learning_rate": 1.4752451566610856e-05, "loss": 0.016, "step": 20386 }, { "epoch": 2.8573230553608973, "grad_norm": 0.2043808400630951, "learning_rate": 1.4738100932791197e-05, "loss": 0.0236, "step": 20387 }, { "epoch": 2.8574632095304837, "grad_norm": 0.34610849618911743, "learning_rate": 1.4723750298971535e-05, "loss": 0.0514, "step": 20388 }, { "epoch": 2.85760336370007, "grad_norm": 0.10060770064592361, "learning_rate": 1.4709399665151876e-05, "loss": 0.0059, "step": 20389 }, { "epoch": 2.8577435178696566, "grad_norm": 0.3698410093784332, "learning_rate": 1.4695049031332214e-05, "loss": 0.0361, "step": 20390 }, { "epoch": 2.857883672039243, "grad_norm": 0.438769668340683, "learning_rate": 1.4680698397512555e-05, "loss": 0.044, "step": 20391 }, { "epoch": 2.8580238262088296, "grad_norm": 0.37049970030784607, "learning_rate": 1.4666347763692894e-05, "loss": 0.0457, "step": 20392 }, { "epoch": 2.858163980378416, "grad_norm": 0.10531621426343918, "learning_rate": 1.4651997129873236e-05, "loss": 0.0042, "step": 20393 }, { "epoch": 2.858304134548003, "grad_norm": 0.16144564747810364, "learning_rate": 1.4637646496053576e-05, "loss": 0.0237, "step": 20394 }, { "epoch": 2.8584442887175894, "grad_norm": 0.09245503693819046, "learning_rate": 1.4623295862233915e-05, "loss": 0.0099, "step": 20395 }, { "epoch": 2.858584442887176, "grad_norm": 0.31200745701789856, "learning_rate": 1.4608945228414255e-05, "loss": 0.0167, "step": 20396 }, { "epoch": 2.8587245970567623, "grad_norm": 0.2925493121147156, "learning_rate": 1.4594594594594594e-05, "loss": 0.0769, "step": 20397 }, { "epoch": 2.858864751226349, "grad_norm": 0.09629321098327637, "learning_rate": 1.4580243960774934e-05, "loss": 0.0236, "step": 20398 }, { "epoch": 2.8590049053959357, "grad_norm": 0.09990980476140976, "learning_rate": 1.4565893326955273e-05, "loss": 0.0259, "step": 20399 }, { "epoch": 2.859145059565522, "grad_norm": 0.1156524121761322, "learning_rate": 1.4551542693135613e-05, "loss": 0.0166, "step": 20400 }, { "epoch": 2.8592852137351086, "grad_norm": 0.16261838376522064, "learning_rate": 1.4537192059315952e-05, "loss": 0.0233, "step": 20401 }, { "epoch": 2.859425367904695, "grad_norm": 0.15608060359954834, "learning_rate": 1.4522841425496293e-05, "loss": 0.0112, "step": 20402 }, { "epoch": 2.8595655220742815, "grad_norm": 0.037038516253232956, "learning_rate": 1.4508490791676631e-05, "loss": 0.0039, "step": 20403 }, { "epoch": 2.8597056762438684, "grad_norm": 0.22943833470344543, "learning_rate": 1.4494140157856972e-05, "loss": 0.0373, "step": 20404 }, { "epoch": 2.859845830413455, "grad_norm": 0.7149508595466614, "learning_rate": 1.447978952403731e-05, "loss": 0.0393, "step": 20405 }, { "epoch": 2.8599859845830413, "grad_norm": 0.3068964183330536, "learning_rate": 1.4465438890217651e-05, "loss": 0.0236, "step": 20406 }, { "epoch": 2.860126138752628, "grad_norm": 0.18925514817237854, "learning_rate": 1.445108825639799e-05, "loss": 0.0565, "step": 20407 }, { "epoch": 2.8602662929222147, "grad_norm": 0.4989233911037445, "learning_rate": 1.443673762257833e-05, "loss": 0.0271, "step": 20408 }, { "epoch": 2.860406447091801, "grad_norm": 1.1134618520736694, "learning_rate": 1.4422386988758669e-05, "loss": 0.0509, "step": 20409 }, { "epoch": 2.8605466012613876, "grad_norm": 0.3696964681148529, "learning_rate": 1.440803635493901e-05, "loss": 0.0664, "step": 20410 }, { "epoch": 2.860686755430974, "grad_norm": 0.3626372218132019, "learning_rate": 1.4393685721119348e-05, "loss": 0.0509, "step": 20411 }, { "epoch": 2.8608269096005605, "grad_norm": 0.11314673721790314, "learning_rate": 1.4379335087299688e-05, "loss": 0.0094, "step": 20412 }, { "epoch": 2.860967063770147, "grad_norm": 0.05812682956457138, "learning_rate": 1.4364984453480027e-05, "loss": 0.0044, "step": 20413 }, { "epoch": 2.8611072179397334, "grad_norm": 0.21471843123435974, "learning_rate": 1.4350633819660367e-05, "loss": 0.0163, "step": 20414 }, { "epoch": 2.8612473721093203, "grad_norm": 0.04024404287338257, "learning_rate": 1.4336283185840706e-05, "loss": 0.0027, "step": 20415 }, { "epoch": 2.861387526278907, "grad_norm": 0.3599867522716522, "learning_rate": 1.4321932552021047e-05, "loss": 0.0603, "step": 20416 }, { "epoch": 2.8615276804484933, "grad_norm": 0.6264867782592773, "learning_rate": 1.4307581918201385e-05, "loss": 0.0389, "step": 20417 }, { "epoch": 2.86166783461808, "grad_norm": 0.12961158156394958, "learning_rate": 1.4293231284381726e-05, "loss": 0.0766, "step": 20418 }, { "epoch": 2.8618079887876666, "grad_norm": 1.5094178915023804, "learning_rate": 1.4278880650562064e-05, "loss": 0.1293, "step": 20419 }, { "epoch": 2.861948142957253, "grad_norm": 1.2366629838943481, "learning_rate": 1.4264530016742405e-05, "loss": 0.1142, "step": 20420 }, { "epoch": 2.8620882971268395, "grad_norm": 0.30086979269981384, "learning_rate": 1.4250179382922744e-05, "loss": 0.0595, "step": 20421 }, { "epoch": 2.862228451296426, "grad_norm": 0.14553207159042358, "learning_rate": 1.4235828749103084e-05, "loss": 0.0174, "step": 20422 }, { "epoch": 2.8623686054660125, "grad_norm": 0.30825215578079224, "learning_rate": 1.4221478115283423e-05, "loss": 0.025, "step": 20423 }, { "epoch": 2.862508759635599, "grad_norm": 0.11741652339696884, "learning_rate": 1.4207127481463763e-05, "loss": 0.009, "step": 20424 }, { "epoch": 2.862648913805186, "grad_norm": 0.07556717097759247, "learning_rate": 1.4192776847644102e-05, "loss": 0.0168, "step": 20425 }, { "epoch": 2.8627890679747723, "grad_norm": 0.13203367590904236, "learning_rate": 1.4178426213824442e-05, "loss": 0.013, "step": 20426 }, { "epoch": 2.8629292221443587, "grad_norm": 0.35279136896133423, "learning_rate": 1.4164075580004781e-05, "loss": 0.0819, "step": 20427 }, { "epoch": 2.863069376313945, "grad_norm": 0.2570411264896393, "learning_rate": 1.4149724946185121e-05, "loss": 0.0086, "step": 20428 }, { "epoch": 2.863209530483532, "grad_norm": 0.17944258451461792, "learning_rate": 1.413537431236546e-05, "loss": 0.0151, "step": 20429 }, { "epoch": 2.8633496846531186, "grad_norm": 0.07360837608575821, "learning_rate": 1.41210236785458e-05, "loss": 0.0061, "step": 20430 }, { "epoch": 2.863489838822705, "grad_norm": 0.21010488271713257, "learning_rate": 1.4106673044726141e-05, "loss": 0.0408, "step": 20431 }, { "epoch": 2.8636299929922915, "grad_norm": 0.3781132102012634, "learning_rate": 1.4092322410906481e-05, "loss": 0.0402, "step": 20432 }, { "epoch": 2.863770147161878, "grad_norm": 0.20468628406524658, "learning_rate": 1.407797177708682e-05, "loss": 0.0262, "step": 20433 }, { "epoch": 2.8639103013314644, "grad_norm": 0.08186621963977814, "learning_rate": 1.406362114326716e-05, "loss": 0.0125, "step": 20434 }, { "epoch": 2.8640504555010513, "grad_norm": 0.41598352789878845, "learning_rate": 1.40492705094475e-05, "loss": 0.0287, "step": 20435 }, { "epoch": 2.8641906096706378, "grad_norm": 0.1666543185710907, "learning_rate": 1.403491987562784e-05, "loss": 0.0142, "step": 20436 }, { "epoch": 2.8643307638402242, "grad_norm": 0.2777056396007538, "learning_rate": 1.4020569241808178e-05, "loss": 0.0334, "step": 20437 }, { "epoch": 2.8644709180098107, "grad_norm": 0.10430144518613815, "learning_rate": 1.4006218607988519e-05, "loss": 0.0084, "step": 20438 }, { "epoch": 2.8646110721793976, "grad_norm": 0.21764230728149414, "learning_rate": 1.3991867974168858e-05, "loss": 0.0382, "step": 20439 }, { "epoch": 2.864751226348984, "grad_norm": 0.22724409401416779, "learning_rate": 1.3977517340349198e-05, "loss": 0.0174, "step": 20440 }, { "epoch": 2.8648913805185705, "grad_norm": 0.3295377492904663, "learning_rate": 1.3963166706529537e-05, "loss": 0.0164, "step": 20441 }, { "epoch": 2.865031534688157, "grad_norm": 0.3335498869419098, "learning_rate": 1.3948816072709877e-05, "loss": 0.0081, "step": 20442 }, { "epoch": 2.8651716888577434, "grad_norm": 0.10871914029121399, "learning_rate": 1.3934465438890218e-05, "loss": 0.0103, "step": 20443 }, { "epoch": 2.86531184302733, "grad_norm": 0.1450573205947876, "learning_rate": 1.3920114805070556e-05, "loss": 0.0242, "step": 20444 }, { "epoch": 2.8654519971969163, "grad_norm": 0.09006363153457642, "learning_rate": 1.3905764171250897e-05, "loss": 0.0166, "step": 20445 }, { "epoch": 2.8655921513665032, "grad_norm": 1.0678198337554932, "learning_rate": 1.3891413537431235e-05, "loss": 0.1008, "step": 20446 }, { "epoch": 2.8657323055360897, "grad_norm": 0.10796839743852615, "learning_rate": 1.3877062903611576e-05, "loss": 0.0063, "step": 20447 }, { "epoch": 2.865872459705676, "grad_norm": 0.13320298492908478, "learning_rate": 1.3862712269791915e-05, "loss": 0.0098, "step": 20448 }, { "epoch": 2.8660126138752626, "grad_norm": 0.4603837728500366, "learning_rate": 1.3848361635972255e-05, "loss": 0.0149, "step": 20449 }, { "epoch": 2.8661527680448495, "grad_norm": 0.4901426136493683, "learning_rate": 1.3834011002152594e-05, "loss": 0.0212, "step": 20450 }, { "epoch": 2.866292922214436, "grad_norm": 0.456548810005188, "learning_rate": 1.3819660368332934e-05, "loss": 0.0301, "step": 20451 }, { "epoch": 2.8664330763840224, "grad_norm": 0.09810171276330948, "learning_rate": 1.3805309734513273e-05, "loss": 0.0215, "step": 20452 }, { "epoch": 2.866573230553609, "grad_norm": 0.13737161457538605, "learning_rate": 1.3790959100693613e-05, "loss": 0.0189, "step": 20453 }, { "epoch": 2.8667133847231954, "grad_norm": 0.15599215030670166, "learning_rate": 1.3776608466873952e-05, "loss": 0.0142, "step": 20454 }, { "epoch": 2.866853538892782, "grad_norm": 0.21872660517692566, "learning_rate": 1.3762257833054292e-05, "loss": 0.0232, "step": 20455 }, { "epoch": 2.8669936930623687, "grad_norm": 0.19498136639595032, "learning_rate": 1.3747907199234631e-05, "loss": 0.0579, "step": 20456 }, { "epoch": 2.867133847231955, "grad_norm": 0.02320132963359356, "learning_rate": 1.3733556565414972e-05, "loss": 0.0026, "step": 20457 }, { "epoch": 2.8672740014015416, "grad_norm": 0.3561120331287384, "learning_rate": 1.371920593159531e-05, "loss": 0.0954, "step": 20458 }, { "epoch": 2.867414155571128, "grad_norm": 0.07305943220853806, "learning_rate": 1.370485529777565e-05, "loss": 0.0087, "step": 20459 }, { "epoch": 2.867554309740715, "grad_norm": 0.06453564018011093, "learning_rate": 1.369050466395599e-05, "loss": 0.0025, "step": 20460 }, { "epoch": 2.8676944639103015, "grad_norm": 0.2136397510766983, "learning_rate": 1.367615403013633e-05, "loss": 0.0201, "step": 20461 }, { "epoch": 2.867834618079888, "grad_norm": 0.2717055380344391, "learning_rate": 1.3661803396316669e-05, "loss": 0.02, "step": 20462 }, { "epoch": 2.8679747722494744, "grad_norm": 0.09508466720581055, "learning_rate": 1.3647452762497009e-05, "loss": 0.0115, "step": 20463 }, { "epoch": 2.868114926419061, "grad_norm": 0.09293036162853241, "learning_rate": 1.3633102128677348e-05, "loss": 0.0102, "step": 20464 }, { "epoch": 2.8682550805886473, "grad_norm": 0.12921813130378723, "learning_rate": 1.3618751494857688e-05, "loss": 0.0067, "step": 20465 }, { "epoch": 2.868395234758234, "grad_norm": 0.15859870612621307, "learning_rate": 1.3604400861038027e-05, "loss": 0.0265, "step": 20466 }, { "epoch": 2.8685353889278207, "grad_norm": 0.32528579235076904, "learning_rate": 1.3590050227218367e-05, "loss": 0.0339, "step": 20467 }, { "epoch": 2.868675543097407, "grad_norm": 0.5237740278244019, "learning_rate": 1.3575699593398708e-05, "loss": 0.0271, "step": 20468 }, { "epoch": 2.8688156972669936, "grad_norm": 1.419663906097412, "learning_rate": 1.3561348959579048e-05, "loss": 0.0669, "step": 20469 }, { "epoch": 2.8689558514365805, "grad_norm": 0.9900166392326355, "learning_rate": 1.3546998325759387e-05, "loss": 0.0935, "step": 20470 }, { "epoch": 2.869096005606167, "grad_norm": 0.05839133635163307, "learning_rate": 1.3532647691939727e-05, "loss": 0.0179, "step": 20471 }, { "epoch": 2.8692361597757534, "grad_norm": 0.09993952512741089, "learning_rate": 1.3518297058120066e-05, "loss": 0.0203, "step": 20472 }, { "epoch": 2.86937631394534, "grad_norm": 0.11818927526473999, "learning_rate": 1.3503946424300406e-05, "loss": 0.0133, "step": 20473 }, { "epoch": 2.8695164681149263, "grad_norm": 0.26086297631263733, "learning_rate": 1.3489595790480745e-05, "loss": 0.0225, "step": 20474 }, { "epoch": 2.869656622284513, "grad_norm": 0.34967029094696045, "learning_rate": 1.3475245156661086e-05, "loss": 0.0182, "step": 20475 }, { "epoch": 2.8697967764540993, "grad_norm": 0.15338881313800812, "learning_rate": 1.3460894522841424e-05, "loss": 0.0207, "step": 20476 }, { "epoch": 2.869936930623686, "grad_norm": 0.15352988243103027, "learning_rate": 1.3446543889021765e-05, "loss": 0.023, "step": 20477 }, { "epoch": 2.8700770847932726, "grad_norm": 0.15371306240558624, "learning_rate": 1.3432193255202103e-05, "loss": 0.0362, "step": 20478 }, { "epoch": 2.870217238962859, "grad_norm": 0.2695786654949188, "learning_rate": 1.3417842621382444e-05, "loss": 0.0223, "step": 20479 }, { "epoch": 2.8703573931324455, "grad_norm": 0.11328046023845673, "learning_rate": 1.3403491987562783e-05, "loss": 0.0207, "step": 20480 }, { "epoch": 2.8704975473020324, "grad_norm": 0.20594169199466705, "learning_rate": 1.3389141353743123e-05, "loss": 0.0173, "step": 20481 }, { "epoch": 2.870637701471619, "grad_norm": 0.216615691781044, "learning_rate": 1.3374790719923462e-05, "loss": 0.0193, "step": 20482 }, { "epoch": 2.8707778556412054, "grad_norm": 0.3203184902667999, "learning_rate": 1.3360440086103802e-05, "loss": 0.037, "step": 20483 }, { "epoch": 2.870918009810792, "grad_norm": 0.2253418117761612, "learning_rate": 1.3346089452284141e-05, "loss": 0.0185, "step": 20484 }, { "epoch": 2.8710581639803783, "grad_norm": 0.13091346621513367, "learning_rate": 1.3331738818464481e-05, "loss": 0.0163, "step": 20485 }, { "epoch": 2.8711983181499647, "grad_norm": 0.14826782047748566, "learning_rate": 1.331738818464482e-05, "loss": 0.0263, "step": 20486 }, { "epoch": 2.8713384723195516, "grad_norm": 0.779538094997406, "learning_rate": 1.330303755082516e-05, "loss": 0.0342, "step": 20487 }, { "epoch": 2.871478626489138, "grad_norm": 0.08050556480884552, "learning_rate": 1.32886869170055e-05, "loss": 0.0045, "step": 20488 }, { "epoch": 2.8716187806587246, "grad_norm": 0.1804610639810562, "learning_rate": 1.327433628318584e-05, "loss": 0.0264, "step": 20489 }, { "epoch": 2.871758934828311, "grad_norm": 0.6832132935523987, "learning_rate": 1.3259985649366178e-05, "loss": 0.0845, "step": 20490 }, { "epoch": 2.871899088997898, "grad_norm": 0.1840461641550064, "learning_rate": 1.3245635015546519e-05, "loss": 0.027, "step": 20491 }, { "epoch": 2.8720392431674844, "grad_norm": 0.215186208486557, "learning_rate": 1.3231284381726857e-05, "loss": 0.0587, "step": 20492 }, { "epoch": 2.872179397337071, "grad_norm": 0.10376659780740738, "learning_rate": 1.3216933747907198e-05, "loss": 0.0317, "step": 20493 }, { "epoch": 2.8723195515066573, "grad_norm": 0.19432315230369568, "learning_rate": 1.3202583114087537e-05, "loss": 0.0394, "step": 20494 }, { "epoch": 2.8724597056762438, "grad_norm": 0.07225720584392548, "learning_rate": 1.3188232480267877e-05, "loss": 0.0039, "step": 20495 }, { "epoch": 2.87259985984583, "grad_norm": 0.07259931415319443, "learning_rate": 1.3173881846448216e-05, "loss": 0.0059, "step": 20496 }, { "epoch": 2.872740014015417, "grad_norm": 0.13902798295021057, "learning_rate": 1.3159531212628556e-05, "loss": 0.0246, "step": 20497 }, { "epoch": 2.8728801681850036, "grad_norm": 0.1350734382867813, "learning_rate": 1.3145180578808895e-05, "loss": 0.0086, "step": 20498 }, { "epoch": 2.87302032235459, "grad_norm": 0.043200213462114334, "learning_rate": 1.3130829944989235e-05, "loss": 0.0025, "step": 20499 }, { "epoch": 2.8731604765241765, "grad_norm": 0.25786125659942627, "learning_rate": 1.3116479311169574e-05, "loss": 0.0165, "step": 20500 }, { "epoch": 2.8733006306937634, "grad_norm": 0.29306137561798096, "learning_rate": 1.3102128677349914e-05, "loss": 0.0224, "step": 20501 }, { "epoch": 2.87344078486335, "grad_norm": 0.30698108673095703, "learning_rate": 1.3087778043530253e-05, "loss": 0.0384, "step": 20502 }, { "epoch": 2.8735809390329363, "grad_norm": 0.2830221951007843, "learning_rate": 1.3073427409710594e-05, "loss": 0.0384, "step": 20503 }, { "epoch": 2.873721093202523, "grad_norm": 0.3073148727416992, "learning_rate": 1.3059076775890932e-05, "loss": 0.0965, "step": 20504 }, { "epoch": 2.8738612473721092, "grad_norm": 0.16444863379001617, "learning_rate": 1.3044726142071274e-05, "loss": 0.0152, "step": 20505 }, { "epoch": 2.8740014015416957, "grad_norm": 0.26114946603775024, "learning_rate": 1.3030375508251615e-05, "loss": 0.0121, "step": 20506 }, { "epoch": 2.874141555711282, "grad_norm": 1.1925033330917358, "learning_rate": 1.3016024874431954e-05, "loss": 0.106, "step": 20507 }, { "epoch": 2.874281709880869, "grad_norm": 0.09891834110021591, "learning_rate": 1.3001674240612294e-05, "loss": 0.0404, "step": 20508 }, { "epoch": 2.8744218640504555, "grad_norm": 0.3454154133796692, "learning_rate": 1.2987323606792633e-05, "loss": 0.0143, "step": 20509 }, { "epoch": 2.874562018220042, "grad_norm": 0.08711627125740051, "learning_rate": 1.2972972972972973e-05, "loss": 0.0055, "step": 20510 }, { "epoch": 2.8747021723896284, "grad_norm": 0.17824983596801758, "learning_rate": 1.2958622339153312e-05, "loss": 0.015, "step": 20511 }, { "epoch": 2.8748423265592153, "grad_norm": 0.35351672768592834, "learning_rate": 1.2944271705333652e-05, "loss": 0.0086, "step": 20512 }, { "epoch": 2.874982480728802, "grad_norm": 0.33355438709259033, "learning_rate": 1.2929921071513991e-05, "loss": 0.0071, "step": 20513 }, { "epoch": 2.8751226348983883, "grad_norm": 1.1041265726089478, "learning_rate": 1.2915570437694331e-05, "loss": 0.1779, "step": 20514 }, { "epoch": 2.8752627890679747, "grad_norm": 0.12072717398405075, "learning_rate": 1.290121980387467e-05, "loss": 0.0195, "step": 20515 }, { "epoch": 2.875402943237561, "grad_norm": 0.6618925929069519, "learning_rate": 1.288686917005501e-05, "loss": 0.0807, "step": 20516 }, { "epoch": 2.8755430974071476, "grad_norm": 0.6493064761161804, "learning_rate": 1.287251853623535e-05, "loss": 0.1032, "step": 20517 }, { "epoch": 2.8756832515767345, "grad_norm": 1.1774274110794067, "learning_rate": 1.285816790241569e-05, "loss": 0.0545, "step": 20518 }, { "epoch": 2.875823405746321, "grad_norm": 0.07859918475151062, "learning_rate": 1.2843817268596028e-05, "loss": 0.0032, "step": 20519 }, { "epoch": 2.8759635599159075, "grad_norm": 0.9119218587875366, "learning_rate": 1.2829466634776369e-05, "loss": 0.0469, "step": 20520 }, { "epoch": 2.876103714085494, "grad_norm": 0.4710049331188202, "learning_rate": 1.2815116000956708e-05, "loss": 0.0253, "step": 20521 }, { "epoch": 2.876243868255081, "grad_norm": 0.1618664711713791, "learning_rate": 1.2800765367137048e-05, "loss": 0.0172, "step": 20522 }, { "epoch": 2.8763840224246673, "grad_norm": 0.41987690329551697, "learning_rate": 1.2786414733317387e-05, "loss": 0.0327, "step": 20523 }, { "epoch": 2.8765241765942537, "grad_norm": 0.09576867520809174, "learning_rate": 1.2772064099497727e-05, "loss": 0.009, "step": 20524 }, { "epoch": 2.87666433076384, "grad_norm": 0.13913939893245697, "learning_rate": 1.2757713465678066e-05, "loss": 0.0286, "step": 20525 }, { "epoch": 2.8768044849334267, "grad_norm": 0.2558138966560364, "learning_rate": 1.2743362831858406e-05, "loss": 0.0092, "step": 20526 }, { "epoch": 2.876944639103013, "grad_norm": 0.1790776252746582, "learning_rate": 1.2729012198038745e-05, "loss": 0.0501, "step": 20527 }, { "epoch": 2.8770847932725996, "grad_norm": 0.17836473882198334, "learning_rate": 1.2714661564219085e-05, "loss": 0.0325, "step": 20528 }, { "epoch": 2.8772249474421865, "grad_norm": 0.17657721042633057, "learning_rate": 1.2700310930399424e-05, "loss": 0.051, "step": 20529 }, { "epoch": 2.877365101611773, "grad_norm": 0.01016154419630766, "learning_rate": 1.2685960296579765e-05, "loss": 0.0012, "step": 20530 }, { "epoch": 2.8775052557813594, "grad_norm": 0.2563575208187103, "learning_rate": 1.2671609662760103e-05, "loss": 0.0188, "step": 20531 }, { "epoch": 2.8776454099509463, "grad_norm": 0.13290025293827057, "learning_rate": 1.2657259028940444e-05, "loss": 0.0125, "step": 20532 }, { "epoch": 2.8777855641205328, "grad_norm": 0.17262418568134308, "learning_rate": 1.2642908395120782e-05, "loss": 0.027, "step": 20533 }, { "epoch": 2.8779257182901192, "grad_norm": 0.08365587890148163, "learning_rate": 1.2628557761301123e-05, "loss": 0.0023, "step": 20534 }, { "epoch": 2.8780658724597057, "grad_norm": 0.2181965708732605, "learning_rate": 1.2614207127481462e-05, "loss": 0.0297, "step": 20535 }, { "epoch": 2.878206026629292, "grad_norm": 0.09229299426078796, "learning_rate": 1.2599856493661802e-05, "loss": 0.0123, "step": 20536 }, { "epoch": 2.8783461807988786, "grad_norm": 0.24683672189712524, "learning_rate": 1.258550585984214e-05, "loss": 0.0305, "step": 20537 }, { "epoch": 2.878486334968465, "grad_norm": 0.24597430229187012, "learning_rate": 1.2571155226022481e-05, "loss": 0.0497, "step": 20538 }, { "epoch": 2.878626489138052, "grad_norm": 0.5246784687042236, "learning_rate": 1.255680459220282e-05, "loss": 0.034, "step": 20539 }, { "epoch": 2.8787666433076384, "grad_norm": 0.06071069836616516, "learning_rate": 1.254245395838316e-05, "loss": 0.0076, "step": 20540 }, { "epoch": 2.878906797477225, "grad_norm": 0.14612825214862823, "learning_rate": 1.2528103324563499e-05, "loss": 0.0382, "step": 20541 }, { "epoch": 2.8790469516468113, "grad_norm": 0.21174681186676025, "learning_rate": 1.2513752690743841e-05, "loss": 0.0192, "step": 20542 }, { "epoch": 2.8791871058163983, "grad_norm": 0.07328194379806519, "learning_rate": 1.2499402056924182e-05, "loss": 0.0076, "step": 20543 }, { "epoch": 2.8793272599859847, "grad_norm": 0.30224767327308655, "learning_rate": 1.248505142310452e-05, "loss": 0.0087, "step": 20544 }, { "epoch": 2.879467414155571, "grad_norm": 1.3327641487121582, "learning_rate": 1.247070078928486e-05, "loss": 0.0326, "step": 20545 }, { "epoch": 2.8796075683251576, "grad_norm": 0.07316180318593979, "learning_rate": 1.24563501554652e-05, "loss": 0.0143, "step": 20546 }, { "epoch": 2.879747722494744, "grad_norm": 0.20163120329380035, "learning_rate": 1.244199952164554e-05, "loss": 0.0321, "step": 20547 }, { "epoch": 2.8798878766643305, "grad_norm": 0.09757734835147858, "learning_rate": 1.2427648887825879e-05, "loss": 0.0064, "step": 20548 }, { "epoch": 2.8800280308339175, "grad_norm": 0.09465175122022629, "learning_rate": 1.2413298254006219e-05, "loss": 0.0041, "step": 20549 }, { "epoch": 2.880168185003504, "grad_norm": 0.22634288668632507, "learning_rate": 1.2398947620186558e-05, "loss": 0.0106, "step": 20550 }, { "epoch": 2.8803083391730904, "grad_norm": 0.1089482307434082, "learning_rate": 1.2384596986366898e-05, "loss": 0.0077, "step": 20551 }, { "epoch": 2.880448493342677, "grad_norm": 0.26292601227760315, "learning_rate": 1.2370246352547237e-05, "loss": 0.0174, "step": 20552 }, { "epoch": 2.8805886475122637, "grad_norm": 0.2562173306941986, "learning_rate": 1.2355895718727577e-05, "loss": 0.0378, "step": 20553 }, { "epoch": 2.88072880168185, "grad_norm": 0.35360920429229736, "learning_rate": 1.2341545084907916e-05, "loss": 0.0578, "step": 20554 }, { "epoch": 2.8808689558514367, "grad_norm": 0.20812025666236877, "learning_rate": 1.2327194451088256e-05, "loss": 0.0234, "step": 20555 }, { "epoch": 2.881009110021023, "grad_norm": 0.24358178675174713, "learning_rate": 1.2312843817268595e-05, "loss": 0.0328, "step": 20556 }, { "epoch": 2.8811492641906096, "grad_norm": 0.054564934223890305, "learning_rate": 1.2298493183448936e-05, "loss": 0.0016, "step": 20557 }, { "epoch": 2.881289418360196, "grad_norm": 0.6981430649757385, "learning_rate": 1.2284142549629274e-05, "loss": 0.0695, "step": 20558 }, { "epoch": 2.8814295725297825, "grad_norm": 0.04398428276181221, "learning_rate": 1.2269791915809615e-05, "loss": 0.0016, "step": 20559 }, { "epoch": 2.8815697266993694, "grad_norm": 0.2964060604572296, "learning_rate": 1.2255441281989953e-05, "loss": 0.0358, "step": 20560 }, { "epoch": 2.881709880868956, "grad_norm": 0.10761590301990509, "learning_rate": 1.2241090648170294e-05, "loss": 0.0168, "step": 20561 }, { "epoch": 2.8818500350385423, "grad_norm": 0.4069973826408386, "learning_rate": 1.2226740014350633e-05, "loss": 0.0703, "step": 20562 }, { "epoch": 2.881990189208129, "grad_norm": 0.2163611799478531, "learning_rate": 1.2212389380530973e-05, "loss": 0.0311, "step": 20563 }, { "epoch": 2.8821303433777157, "grad_norm": 0.4572719931602478, "learning_rate": 1.2198038746711312e-05, "loss": 0.0737, "step": 20564 }, { "epoch": 2.882270497547302, "grad_norm": 0.10436063259840012, "learning_rate": 1.2183688112891652e-05, "loss": 0.0085, "step": 20565 }, { "epoch": 2.8824106517168886, "grad_norm": 0.2913877069950104, "learning_rate": 1.216933747907199e-05, "loss": 0.0079, "step": 20566 }, { "epoch": 2.882550805886475, "grad_norm": 0.5824584364891052, "learning_rate": 1.2154986845252331e-05, "loss": 0.0813, "step": 20567 }, { "epoch": 2.8826909600560615, "grad_norm": 0.1155749037861824, "learning_rate": 1.214063621143267e-05, "loss": 0.0061, "step": 20568 }, { "epoch": 2.882831114225648, "grad_norm": 0.5120499134063721, "learning_rate": 1.212628557761301e-05, "loss": 0.09, "step": 20569 }, { "epoch": 2.882971268395235, "grad_norm": 0.08372246474027634, "learning_rate": 1.2111934943793349e-05, "loss": 0.007, "step": 20570 }, { "epoch": 2.8831114225648213, "grad_norm": 0.7174161672592163, "learning_rate": 1.209758430997369e-05, "loss": 0.0355, "step": 20571 }, { "epoch": 2.883251576734408, "grad_norm": 0.13389822840690613, "learning_rate": 1.2083233676154028e-05, "loss": 0.0267, "step": 20572 }, { "epoch": 2.8833917309039943, "grad_norm": 0.1598690152168274, "learning_rate": 1.2068883042334369e-05, "loss": 0.0277, "step": 20573 }, { "epoch": 2.883531885073581, "grad_norm": 0.14782583713531494, "learning_rate": 1.2054532408514707e-05, "loss": 0.0145, "step": 20574 }, { "epoch": 2.8836720392431676, "grad_norm": 0.25962793827056885, "learning_rate": 1.2040181774695048e-05, "loss": 0.0302, "step": 20575 }, { "epoch": 2.883812193412754, "grad_norm": 0.10121694207191467, "learning_rate": 1.2025831140875387e-05, "loss": 0.0047, "step": 20576 }, { "epoch": 2.8839523475823405, "grad_norm": 0.1421107053756714, "learning_rate": 1.2011480507055727e-05, "loss": 0.0064, "step": 20577 }, { "epoch": 2.884092501751927, "grad_norm": 0.1648636907339096, "learning_rate": 1.1997129873236066e-05, "loss": 0.0395, "step": 20578 }, { "epoch": 2.8842326559215135, "grad_norm": 0.06656556576490402, "learning_rate": 1.1982779239416408e-05, "loss": 0.0125, "step": 20579 }, { "epoch": 2.8843728100911004, "grad_norm": 0.13815541565418243, "learning_rate": 1.1968428605596747e-05, "loss": 0.008, "step": 20580 }, { "epoch": 2.884512964260687, "grad_norm": 0.27275902032852173, "learning_rate": 1.1954077971777087e-05, "loss": 0.0095, "step": 20581 }, { "epoch": 2.8846531184302733, "grad_norm": 0.18239572644233704, "learning_rate": 1.1939727337957426e-05, "loss": 0.0221, "step": 20582 }, { "epoch": 2.8847932725998597, "grad_norm": 0.17327091097831726, "learning_rate": 1.1925376704137766e-05, "loss": 0.0295, "step": 20583 }, { "epoch": 2.8849334267694466, "grad_norm": 0.10624104738235474, "learning_rate": 1.1911026070318105e-05, "loss": 0.0226, "step": 20584 }, { "epoch": 2.885073580939033, "grad_norm": 0.16192522644996643, "learning_rate": 1.1896675436498445e-05, "loss": 0.0089, "step": 20585 }, { "epoch": 2.8852137351086196, "grad_norm": 0.22291190922260284, "learning_rate": 1.1882324802678784e-05, "loss": 0.0274, "step": 20586 }, { "epoch": 2.885353889278206, "grad_norm": 0.24589356780052185, "learning_rate": 1.1867974168859124e-05, "loss": 0.0228, "step": 20587 }, { "epoch": 2.8854940434477925, "grad_norm": 0.5466956496238708, "learning_rate": 1.1853623535039463e-05, "loss": 0.0315, "step": 20588 }, { "epoch": 2.885634197617379, "grad_norm": 0.28918564319610596, "learning_rate": 1.1839272901219804e-05, "loss": 0.032, "step": 20589 }, { "epoch": 2.8857743517869654, "grad_norm": 0.11162767559289932, "learning_rate": 1.1824922267400142e-05, "loss": 0.0144, "step": 20590 }, { "epoch": 2.8859145059565523, "grad_norm": 0.11436593532562256, "learning_rate": 1.1810571633580483e-05, "loss": 0.0317, "step": 20591 }, { "epoch": 2.8860546601261388, "grad_norm": 0.12318064272403717, "learning_rate": 1.1796220999760821e-05, "loss": 0.0209, "step": 20592 }, { "epoch": 2.886194814295725, "grad_norm": 0.11822043359279633, "learning_rate": 1.1781870365941162e-05, "loss": 0.0262, "step": 20593 }, { "epoch": 2.8863349684653117, "grad_norm": 0.41611841320991516, "learning_rate": 1.17675197321215e-05, "loss": 0.0171, "step": 20594 }, { "epoch": 2.8864751226348986, "grad_norm": 0.269073486328125, "learning_rate": 1.1753169098301841e-05, "loss": 0.0164, "step": 20595 }, { "epoch": 2.886615276804485, "grad_norm": 0.21539461612701416, "learning_rate": 1.173881846448218e-05, "loss": 0.0295, "step": 20596 }, { "epoch": 2.8867554309740715, "grad_norm": 0.11899608373641968, "learning_rate": 1.172446783066252e-05, "loss": 0.0127, "step": 20597 }, { "epoch": 2.886895585143658, "grad_norm": 0.22435010969638824, "learning_rate": 1.1710117196842859e-05, "loss": 0.0125, "step": 20598 }, { "epoch": 2.8870357393132444, "grad_norm": 0.2176763415336609, "learning_rate": 1.16957665630232e-05, "loss": 0.054, "step": 20599 }, { "epoch": 2.887175893482831, "grad_norm": 0.0818527415394783, "learning_rate": 1.1681415929203538e-05, "loss": 0.0049, "step": 20600 }, { "epoch": 2.887316047652418, "grad_norm": 0.5863497257232666, "learning_rate": 1.1667065295383878e-05, "loss": 0.0575, "step": 20601 }, { "epoch": 2.8874562018220042, "grad_norm": 0.15718479454517365, "learning_rate": 1.1652714661564217e-05, "loss": 0.03, "step": 20602 }, { "epoch": 2.8875963559915907, "grad_norm": 0.19008733332157135, "learning_rate": 1.1638364027744558e-05, "loss": 0.0415, "step": 20603 }, { "epoch": 2.887736510161177, "grad_norm": 0.02169755846261978, "learning_rate": 1.1624013393924896e-05, "loss": 0.0016, "step": 20604 }, { "epoch": 2.887876664330764, "grad_norm": 0.044860586524009705, "learning_rate": 1.1609662760105237e-05, "loss": 0.0017, "step": 20605 }, { "epoch": 2.8880168185003505, "grad_norm": 0.37165677547454834, "learning_rate": 1.1595312126285575e-05, "loss": 0.0248, "step": 20606 }, { "epoch": 2.888156972669937, "grad_norm": 0.0824437215924263, "learning_rate": 1.1580961492465916e-05, "loss": 0.0067, "step": 20607 }, { "epoch": 2.8882971268395234, "grad_norm": 1.2491965293884277, "learning_rate": 1.1566610858646255e-05, "loss": 0.034, "step": 20608 }, { "epoch": 2.88843728100911, "grad_norm": 0.4895854890346527, "learning_rate": 1.1552260224826595e-05, "loss": 0.0993, "step": 20609 }, { "epoch": 2.8885774351786964, "grad_norm": 0.763146162033081, "learning_rate": 1.1537909591006934e-05, "loss": 0.0314, "step": 20610 }, { "epoch": 2.8887175893482833, "grad_norm": 0.1062176451086998, "learning_rate": 1.1523558957187274e-05, "loss": 0.011, "step": 20611 }, { "epoch": 2.8888577435178697, "grad_norm": 0.08441688120365143, "learning_rate": 1.1509208323367613e-05, "loss": 0.0093, "step": 20612 }, { "epoch": 2.888997897687456, "grad_norm": 0.17439578473567963, "learning_rate": 1.1494857689547953e-05, "loss": 0.0343, "step": 20613 }, { "epoch": 2.8891380518570426, "grad_norm": 0.28553247451782227, "learning_rate": 1.1480507055728292e-05, "loss": 0.0368, "step": 20614 }, { "epoch": 2.8892782060266295, "grad_norm": 0.22482764720916748, "learning_rate": 1.1466156421908632e-05, "loss": 0.012, "step": 20615 }, { "epoch": 2.889418360196216, "grad_norm": 1.044072151184082, "learning_rate": 1.1451805788088974e-05, "loss": 0.0209, "step": 20616 }, { "epoch": 2.8895585143658025, "grad_norm": 1.2441165447235107, "learning_rate": 1.1437455154269313e-05, "loss": 0.0408, "step": 20617 }, { "epoch": 2.889698668535389, "grad_norm": 0.0749635100364685, "learning_rate": 1.1423104520449654e-05, "loss": 0.0059, "step": 20618 }, { "epoch": 2.8898388227049754, "grad_norm": 0.26553890109062195, "learning_rate": 1.1408753886629992e-05, "loss": 0.1125, "step": 20619 }, { "epoch": 2.889978976874562, "grad_norm": 0.021862143650650978, "learning_rate": 1.1394403252810333e-05, "loss": 0.0014, "step": 20620 }, { "epoch": 2.8901191310441483, "grad_norm": 0.14215447008609772, "learning_rate": 1.1380052618990671e-05, "loss": 0.0048, "step": 20621 }, { "epoch": 2.890259285213735, "grad_norm": 0.4674132168292999, "learning_rate": 1.1365701985171012e-05, "loss": 0.0171, "step": 20622 }, { "epoch": 2.8903994393833217, "grad_norm": 0.1958397924900055, "learning_rate": 1.135135135135135e-05, "loss": 0.0103, "step": 20623 }, { "epoch": 2.890539593552908, "grad_norm": 0.32101282477378845, "learning_rate": 1.1337000717531691e-05, "loss": 0.071, "step": 20624 }, { "epoch": 2.8906797477224946, "grad_norm": 0.2379656285047531, "learning_rate": 1.132265008371203e-05, "loss": 0.0222, "step": 20625 }, { "epoch": 2.8908199018920815, "grad_norm": 0.1310795545578003, "learning_rate": 1.130829944989237e-05, "loss": 0.0076, "step": 20626 }, { "epoch": 2.890960056061668, "grad_norm": 0.22999387979507446, "learning_rate": 1.1293948816072709e-05, "loss": 0.0246, "step": 20627 }, { "epoch": 2.8911002102312544, "grad_norm": 0.1883697211742401, "learning_rate": 1.127959818225305e-05, "loss": 0.0147, "step": 20628 }, { "epoch": 2.891240364400841, "grad_norm": 0.0720987319946289, "learning_rate": 1.1265247548433388e-05, "loss": 0.0094, "step": 20629 }, { "epoch": 2.8913805185704273, "grad_norm": 0.3023030757904053, "learning_rate": 1.1250896914613728e-05, "loss": 0.047, "step": 20630 }, { "epoch": 2.891520672740014, "grad_norm": 0.1286933571100235, "learning_rate": 1.1236546280794067e-05, "loss": 0.0205, "step": 20631 }, { "epoch": 2.8916608269096007, "grad_norm": 0.3219941258430481, "learning_rate": 1.1222195646974408e-05, "loss": 0.0243, "step": 20632 }, { "epoch": 2.891800981079187, "grad_norm": 0.09781860560178757, "learning_rate": 1.1207845013154746e-05, "loss": 0.0053, "step": 20633 }, { "epoch": 2.8919411352487736, "grad_norm": 0.059332434087991714, "learning_rate": 1.1193494379335087e-05, "loss": 0.0053, "step": 20634 }, { "epoch": 2.89208128941836, "grad_norm": 0.1558128148317337, "learning_rate": 1.1179143745515425e-05, "loss": 0.0207, "step": 20635 }, { "epoch": 2.892221443587947, "grad_norm": 0.43020737171173096, "learning_rate": 1.1164793111695766e-05, "loss": 0.0261, "step": 20636 }, { "epoch": 2.8923615977575334, "grad_norm": 0.5143353939056396, "learning_rate": 1.1150442477876105e-05, "loss": 0.0409, "step": 20637 }, { "epoch": 2.89250175192712, "grad_norm": 0.13568823039531708, "learning_rate": 1.1136091844056445e-05, "loss": 0.0218, "step": 20638 }, { "epoch": 2.8926419060967064, "grad_norm": 0.11360125988721848, "learning_rate": 1.1121741210236784e-05, "loss": 0.012, "step": 20639 }, { "epoch": 2.892782060266293, "grad_norm": 1.168239951133728, "learning_rate": 1.1107390576417124e-05, "loss": 0.0701, "step": 20640 }, { "epoch": 2.8929222144358793, "grad_norm": 0.5349233746528625, "learning_rate": 1.1093039942597463e-05, "loss": 0.0866, "step": 20641 }, { "epoch": 2.893062368605466, "grad_norm": 0.16325749456882477, "learning_rate": 1.1078689308777803e-05, "loss": 0.0139, "step": 20642 }, { "epoch": 2.8932025227750526, "grad_norm": 0.08652399480342865, "learning_rate": 1.1064338674958142e-05, "loss": 0.0098, "step": 20643 }, { "epoch": 2.893342676944639, "grad_norm": 0.1399209052324295, "learning_rate": 1.1049988041138482e-05, "loss": 0.0135, "step": 20644 }, { "epoch": 2.8934828311142256, "grad_norm": 0.4124363958835602, "learning_rate": 1.1035637407318821e-05, "loss": 0.0576, "step": 20645 }, { "epoch": 2.8936229852838125, "grad_norm": 0.23660466074943542, "learning_rate": 1.1021286773499162e-05, "loss": 0.0382, "step": 20646 }, { "epoch": 2.893763139453399, "grad_norm": 0.40035301446914673, "learning_rate": 1.10069361396795e-05, "loss": 0.0463, "step": 20647 }, { "epoch": 2.8939032936229854, "grad_norm": 0.269803524017334, "learning_rate": 1.099258550585984e-05, "loss": 0.0317, "step": 20648 }, { "epoch": 2.894043447792572, "grad_norm": 0.1005750447511673, "learning_rate": 1.097823487204018e-05, "loss": 0.0046, "step": 20649 }, { "epoch": 2.8941836019621583, "grad_norm": 0.3053402900695801, "learning_rate": 1.096388423822052e-05, "loss": 0.0212, "step": 20650 }, { "epoch": 2.8943237561317448, "grad_norm": 0.25356629490852356, "learning_rate": 1.0949533604400859e-05, "loss": 0.0311, "step": 20651 }, { "epoch": 2.894463910301331, "grad_norm": 0.08650394529104233, "learning_rate": 1.0935182970581199e-05, "loss": 0.0043, "step": 20652 }, { "epoch": 2.894604064470918, "grad_norm": 0.25055503845214844, "learning_rate": 1.0920832336761541e-05, "loss": 0.036, "step": 20653 }, { "epoch": 2.8947442186405046, "grad_norm": 0.06276492029428482, "learning_rate": 1.090648170294188e-05, "loss": 0.0037, "step": 20654 }, { "epoch": 2.894884372810091, "grad_norm": 0.3738802373409271, "learning_rate": 1.089213106912222e-05, "loss": 0.0367, "step": 20655 }, { "epoch": 2.8950245269796775, "grad_norm": 0.14314015209674835, "learning_rate": 1.0877780435302559e-05, "loss": 0.0222, "step": 20656 }, { "epoch": 2.8951646811492644, "grad_norm": 0.06546798348426819, "learning_rate": 1.08634298014829e-05, "loss": 0.0023, "step": 20657 }, { "epoch": 2.895304835318851, "grad_norm": 0.2814999222755432, "learning_rate": 1.0849079167663238e-05, "loss": 0.0249, "step": 20658 }, { "epoch": 2.8954449894884373, "grad_norm": 0.16743066906929016, "learning_rate": 1.0834728533843579e-05, "loss": 0.0309, "step": 20659 }, { "epoch": 2.8955851436580238, "grad_norm": 0.04918849840760231, "learning_rate": 1.0820377900023917e-05, "loss": 0.0077, "step": 20660 }, { "epoch": 2.8957252978276102, "grad_norm": 0.40250658988952637, "learning_rate": 1.0806027266204258e-05, "loss": 0.0538, "step": 20661 }, { "epoch": 2.8958654519971967, "grad_norm": 0.38218533992767334, "learning_rate": 1.0791676632384596e-05, "loss": 0.0413, "step": 20662 }, { "epoch": 2.8960056061667836, "grad_norm": 0.04741606116294861, "learning_rate": 1.0777325998564937e-05, "loss": 0.0028, "step": 20663 }, { "epoch": 2.89614576033637, "grad_norm": 0.1715167760848999, "learning_rate": 1.0762975364745276e-05, "loss": 0.0258, "step": 20664 }, { "epoch": 2.8962859145059565, "grad_norm": 0.15992993116378784, "learning_rate": 1.0748624730925616e-05, "loss": 0.015, "step": 20665 }, { "epoch": 2.896426068675543, "grad_norm": 0.2722056210041046, "learning_rate": 1.0734274097105955e-05, "loss": 0.0365, "step": 20666 }, { "epoch": 2.89656622284513, "grad_norm": 0.8713580965995789, "learning_rate": 1.0719923463286295e-05, "loss": 0.1105, "step": 20667 }, { "epoch": 2.8967063770147163, "grad_norm": 0.5795403122901917, "learning_rate": 1.0705572829466634e-05, "loss": 0.0662, "step": 20668 }, { "epoch": 2.896846531184303, "grad_norm": 0.3681755065917969, "learning_rate": 1.0691222195646974e-05, "loss": 0.0075, "step": 20669 }, { "epoch": 2.8969866853538893, "grad_norm": 0.4776165783405304, "learning_rate": 1.0676871561827313e-05, "loss": 0.0787, "step": 20670 }, { "epoch": 2.8971268395234757, "grad_norm": 0.17915162444114685, "learning_rate": 1.0662520928007653e-05, "loss": 0.0254, "step": 20671 }, { "epoch": 2.897266993693062, "grad_norm": 0.7088584303855896, "learning_rate": 1.0648170294187992e-05, "loss": 0.0454, "step": 20672 }, { "epoch": 2.897407147862649, "grad_norm": 0.1979878842830658, "learning_rate": 1.0633819660368333e-05, "loss": 0.0339, "step": 20673 }, { "epoch": 2.8975473020322355, "grad_norm": 0.07501644641160965, "learning_rate": 1.0619469026548671e-05, "loss": 0.0078, "step": 20674 }, { "epoch": 2.897687456201822, "grad_norm": 0.10961319506168365, "learning_rate": 1.0605118392729012e-05, "loss": 0.0069, "step": 20675 }, { "epoch": 2.8978276103714085, "grad_norm": 0.15714536607265472, "learning_rate": 1.059076775890935e-05, "loss": 0.0084, "step": 20676 }, { "epoch": 2.8979677645409954, "grad_norm": 0.2076135277748108, "learning_rate": 1.0576417125089691e-05, "loss": 0.0081, "step": 20677 }, { "epoch": 2.898107918710582, "grad_norm": 0.098606638610363, "learning_rate": 1.056206649127003e-05, "loss": 0.0132, "step": 20678 }, { "epoch": 2.8982480728801683, "grad_norm": 0.11080455780029297, "learning_rate": 1.054771585745037e-05, "loss": 0.0218, "step": 20679 }, { "epoch": 2.8983882270497547, "grad_norm": 0.811276912689209, "learning_rate": 1.0533365223630709e-05, "loss": 0.0626, "step": 20680 }, { "epoch": 2.898528381219341, "grad_norm": 0.31210771203041077, "learning_rate": 1.051901458981105e-05, "loss": 0.0471, "step": 20681 }, { "epoch": 2.8986685353889277, "grad_norm": 0.0819486752152443, "learning_rate": 1.0504663955991388e-05, "loss": 0.0062, "step": 20682 }, { "epoch": 2.898808689558514, "grad_norm": 0.4523376524448395, "learning_rate": 1.0490313322171728e-05, "loss": 0.0461, "step": 20683 }, { "epoch": 2.898948843728101, "grad_norm": 0.15217606723308563, "learning_rate": 1.0475962688352067e-05, "loss": 0.0564, "step": 20684 }, { "epoch": 2.8990889978976875, "grad_norm": 0.0509762205183506, "learning_rate": 1.0461612054532407e-05, "loss": 0.0032, "step": 20685 }, { "epoch": 2.899229152067274, "grad_norm": 0.14570952951908112, "learning_rate": 1.0447261420712746e-05, "loss": 0.0201, "step": 20686 }, { "epoch": 2.8993693062368604, "grad_norm": 0.11255156993865967, "learning_rate": 1.0432910786893087e-05, "loss": 0.0036, "step": 20687 }, { "epoch": 2.8995094604064473, "grad_norm": 0.3249497711658478, "learning_rate": 1.0418560153073425e-05, "loss": 0.1012, "step": 20688 }, { "epoch": 2.8996496145760338, "grad_norm": 0.2085169106721878, "learning_rate": 1.0404209519253766e-05, "loss": 0.0213, "step": 20689 }, { "epoch": 2.89978976874562, "grad_norm": 0.1037660464644432, "learning_rate": 1.0389858885434106e-05, "loss": 0.0112, "step": 20690 }, { "epoch": 2.8999299229152067, "grad_norm": 0.5664847493171692, "learning_rate": 1.0375508251614447e-05, "loss": 0.0078, "step": 20691 }, { "epoch": 2.900070077084793, "grad_norm": 0.2693379819393158, "learning_rate": 1.0361157617794785e-05, "loss": 0.0182, "step": 20692 }, { "epoch": 2.9002102312543796, "grad_norm": 0.10767032951116562, "learning_rate": 1.0346806983975126e-05, "loss": 0.0043, "step": 20693 }, { "epoch": 2.9003503854239665, "grad_norm": 0.17976680397987366, "learning_rate": 1.0332456350155464e-05, "loss": 0.0079, "step": 20694 }, { "epoch": 2.900490539593553, "grad_norm": 0.06164063513278961, "learning_rate": 1.0318105716335805e-05, "loss": 0.0113, "step": 20695 }, { "epoch": 2.9006306937631394, "grad_norm": 0.18699444830417633, "learning_rate": 1.0303755082516144e-05, "loss": 0.0255, "step": 20696 }, { "epoch": 2.900770847932726, "grad_norm": 0.04256226867437363, "learning_rate": 1.0289404448696484e-05, "loss": 0.0051, "step": 20697 }, { "epoch": 2.900911002102313, "grad_norm": 0.033912040293216705, "learning_rate": 1.0275053814876823e-05, "loss": 0.0019, "step": 20698 }, { "epoch": 2.9010511562718992, "grad_norm": 0.2507621943950653, "learning_rate": 1.0260703181057163e-05, "loss": 0.0283, "step": 20699 }, { "epoch": 2.9011913104414857, "grad_norm": 0.2935486137866974, "learning_rate": 1.0246352547237502e-05, "loss": 0.0304, "step": 20700 }, { "epoch": 2.901331464611072, "grad_norm": 0.22213530540466309, "learning_rate": 1.0232001913417842e-05, "loss": 0.0118, "step": 20701 }, { "epoch": 2.9014716187806586, "grad_norm": 0.17837722599506378, "learning_rate": 1.0217651279598181e-05, "loss": 0.009, "step": 20702 }, { "epoch": 2.901611772950245, "grad_norm": 0.3788335919380188, "learning_rate": 1.0203300645778521e-05, "loss": 0.0278, "step": 20703 }, { "epoch": 2.9017519271198315, "grad_norm": 0.05208876356482506, "learning_rate": 1.018895001195886e-05, "loss": 0.0039, "step": 20704 }, { "epoch": 2.9018920812894184, "grad_norm": 0.24590559303760529, "learning_rate": 1.01745993781392e-05, "loss": 0.0241, "step": 20705 }, { "epoch": 2.902032235459005, "grad_norm": 0.22803764045238495, "learning_rate": 1.016024874431954e-05, "loss": 0.0243, "step": 20706 }, { "epoch": 2.9021723896285914, "grad_norm": 0.1533641219139099, "learning_rate": 1.014589811049988e-05, "loss": 0.0262, "step": 20707 }, { "epoch": 2.9023125437981783, "grad_norm": 0.5914942622184753, "learning_rate": 1.0131547476680218e-05, "loss": 0.0179, "step": 20708 }, { "epoch": 2.9024526979677647, "grad_norm": 0.05916742607951164, "learning_rate": 1.0117196842860559e-05, "loss": 0.0047, "step": 20709 }, { "epoch": 2.902592852137351, "grad_norm": 0.11456607282161713, "learning_rate": 1.0102846209040898e-05, "loss": 0.006, "step": 20710 }, { "epoch": 2.9027330063069376, "grad_norm": 0.2938591241836548, "learning_rate": 1.0088495575221238e-05, "loss": 0.0213, "step": 20711 }, { "epoch": 2.902873160476524, "grad_norm": 0.14763087034225464, "learning_rate": 1.0074144941401577e-05, "loss": 0.0081, "step": 20712 }, { "epoch": 2.9030133146461106, "grad_norm": 0.5123448967933655, "learning_rate": 1.0059794307581917e-05, "loss": 0.0476, "step": 20713 }, { "epoch": 2.903153468815697, "grad_norm": 0.3131152391433716, "learning_rate": 1.0045443673762256e-05, "loss": 0.0095, "step": 20714 }, { "epoch": 2.903293622985284, "grad_norm": 0.07629865407943726, "learning_rate": 1.0031093039942596e-05, "loss": 0.0311, "step": 20715 }, { "epoch": 2.9034337771548704, "grad_norm": 0.2567148506641388, "learning_rate": 1.0016742406122935e-05, "loss": 0.0085, "step": 20716 }, { "epoch": 2.903573931324457, "grad_norm": 0.04181884974241257, "learning_rate": 1.0002391772303275e-05, "loss": 0.002, "step": 20717 }, { "epoch": 2.9037140854940433, "grad_norm": 0.054413337260484695, "learning_rate": 9.988041138483614e-06, "loss": 0.0037, "step": 20718 }, { "epoch": 2.90385423966363, "grad_norm": 1.6050736904144287, "learning_rate": 9.973690504663955e-06, "loss": 0.1004, "step": 20719 }, { "epoch": 2.9039943938332167, "grad_norm": 0.1725742667913437, "learning_rate": 9.959339870844293e-06, "loss": 0.0044, "step": 20720 }, { "epoch": 2.904134548002803, "grad_norm": 0.12213186919689178, "learning_rate": 9.944989237024634e-06, "loss": 0.0208, "step": 20721 }, { "epoch": 2.9042747021723896, "grad_norm": 0.14038968086242676, "learning_rate": 9.930638603204972e-06, "loss": 0.0231, "step": 20722 }, { "epoch": 2.904414856341976, "grad_norm": 0.4169723093509674, "learning_rate": 9.916287969385313e-06, "loss": 0.037, "step": 20723 }, { "epoch": 2.9045550105115625, "grad_norm": 0.15851376950740814, "learning_rate": 9.901937335565652e-06, "loss": 0.0213, "step": 20724 }, { "epoch": 2.9046951646811494, "grad_norm": 0.7114185690879822, "learning_rate": 9.887586701745992e-06, "loss": 0.0153, "step": 20725 }, { "epoch": 2.904835318850736, "grad_norm": 0.4193439185619354, "learning_rate": 9.87323606792633e-06, "loss": 0.0261, "step": 20726 }, { "epoch": 2.9049754730203223, "grad_norm": 0.12254543602466583, "learning_rate": 9.858885434106671e-06, "loss": 0.017, "step": 20727 }, { "epoch": 2.905115627189909, "grad_norm": 0.2768273651599884, "learning_rate": 9.844534800287013e-06, "loss": 0.028, "step": 20728 }, { "epoch": 2.9052557813594957, "grad_norm": 0.29675331711769104, "learning_rate": 9.830184166467352e-06, "loss": 0.02, "step": 20729 }, { "epoch": 2.905395935529082, "grad_norm": 0.1500357985496521, "learning_rate": 9.815833532647692e-06, "loss": 0.0126, "step": 20730 }, { "epoch": 2.9055360896986686, "grad_norm": 0.3799542188644409, "learning_rate": 9.801482898828031e-06, "loss": 0.036, "step": 20731 }, { "epoch": 2.905676243868255, "grad_norm": 0.15092352032661438, "learning_rate": 9.787132265008372e-06, "loss": 0.0098, "step": 20732 }, { "epoch": 2.9058163980378415, "grad_norm": 0.20201392471790314, "learning_rate": 9.77278163118871e-06, "loss": 0.0154, "step": 20733 }, { "epoch": 2.905956552207428, "grad_norm": 0.2058466672897339, "learning_rate": 9.75843099736905e-06, "loss": 0.0416, "step": 20734 }, { "epoch": 2.9060967063770144, "grad_norm": 0.2651395797729492, "learning_rate": 9.74408036354939e-06, "loss": 0.0364, "step": 20735 }, { "epoch": 2.9062368605466014, "grad_norm": 0.25694939494132996, "learning_rate": 9.72972972972973e-06, "loss": 0.0341, "step": 20736 }, { "epoch": 2.906377014716188, "grad_norm": 0.30681174993515015, "learning_rate": 9.715379095910069e-06, "loss": 0.0414, "step": 20737 }, { "epoch": 2.9065171688857743, "grad_norm": 0.14340701699256897, "learning_rate": 9.701028462090409e-06, "loss": 0.0237, "step": 20738 }, { "epoch": 2.9066573230553607, "grad_norm": 0.40006113052368164, "learning_rate": 9.686677828270748e-06, "loss": 0.012, "step": 20739 }, { "epoch": 2.9067974772249476, "grad_norm": 0.16262710094451904, "learning_rate": 9.672327194451088e-06, "loss": 0.0178, "step": 20740 }, { "epoch": 2.906937631394534, "grad_norm": 0.15633246302604675, "learning_rate": 9.657976560631427e-06, "loss": 0.0232, "step": 20741 }, { "epoch": 2.9070777855641206, "grad_norm": 0.16302275657653809, "learning_rate": 9.643625926811767e-06, "loss": 0.0133, "step": 20742 }, { "epoch": 2.907217939733707, "grad_norm": 0.2189866006374359, "learning_rate": 9.629275292992106e-06, "loss": 0.0526, "step": 20743 }, { "epoch": 2.9073580939032935, "grad_norm": 0.32441413402557373, "learning_rate": 9.614924659172446e-06, "loss": 0.0227, "step": 20744 }, { "epoch": 2.90749824807288, "grad_norm": 0.19663912057876587, "learning_rate": 9.600574025352785e-06, "loss": 0.0194, "step": 20745 }, { "epoch": 2.907638402242467, "grad_norm": 0.1286417692899704, "learning_rate": 9.586223391533126e-06, "loss": 0.0137, "step": 20746 }, { "epoch": 2.9077785564120533, "grad_norm": 0.38042134046554565, "learning_rate": 9.571872757713464e-06, "loss": 0.0213, "step": 20747 }, { "epoch": 2.9079187105816398, "grad_norm": 0.3720579445362091, "learning_rate": 9.557522123893805e-06, "loss": 0.0215, "step": 20748 }, { "epoch": 2.908058864751226, "grad_norm": 2.1758949756622314, "learning_rate": 9.543171490074143e-06, "loss": 0.0153, "step": 20749 }, { "epoch": 2.908199018920813, "grad_norm": 0.26301172375679016, "learning_rate": 9.528820856254484e-06, "loss": 0.0649, "step": 20750 }, { "epoch": 2.9083391730903996, "grad_norm": 0.12222176045179367, "learning_rate": 9.514470222434823e-06, "loss": 0.0325, "step": 20751 }, { "epoch": 2.908479327259986, "grad_norm": 0.10018991678953171, "learning_rate": 9.500119588615163e-06, "loss": 0.0105, "step": 20752 }, { "epoch": 2.9086194814295725, "grad_norm": 0.27538055181503296, "learning_rate": 9.485768954795502e-06, "loss": 0.0629, "step": 20753 }, { "epoch": 2.908759635599159, "grad_norm": 0.09087176620960236, "learning_rate": 9.471418320975842e-06, "loss": 0.0058, "step": 20754 }, { "epoch": 2.9088997897687454, "grad_norm": 0.35563525557518005, "learning_rate": 9.457067687156181e-06, "loss": 0.0486, "step": 20755 }, { "epoch": 2.9090399439383323, "grad_norm": 0.6977300643920898, "learning_rate": 9.442717053336521e-06, "loss": 0.0529, "step": 20756 }, { "epoch": 2.9091800981079188, "grad_norm": 0.03876902535557747, "learning_rate": 9.42836641951686e-06, "loss": 0.0036, "step": 20757 }, { "epoch": 2.9093202522775052, "grad_norm": 0.2578149437904358, "learning_rate": 9.4140157856972e-06, "loss": 0.01, "step": 20758 }, { "epoch": 2.9094604064470917, "grad_norm": 0.34983158111572266, "learning_rate": 9.399665151877539e-06, "loss": 0.0328, "step": 20759 }, { "epoch": 2.9096005606166786, "grad_norm": 0.11553102731704712, "learning_rate": 9.38531451805788e-06, "loss": 0.0096, "step": 20760 }, { "epoch": 2.909740714786265, "grad_norm": 0.11713887006044388, "learning_rate": 9.37096388423822e-06, "loss": 0.011, "step": 20761 }, { "epoch": 2.9098808689558515, "grad_norm": 0.03882862627506256, "learning_rate": 9.356613250418559e-06, "loss": 0.0037, "step": 20762 }, { "epoch": 2.910021023125438, "grad_norm": 0.1979157030582428, "learning_rate": 9.342262616598899e-06, "loss": 0.017, "step": 20763 }, { "epoch": 2.9101611772950244, "grad_norm": 0.1617109775543213, "learning_rate": 9.327911982779238e-06, "loss": 0.0091, "step": 20764 }, { "epoch": 2.910301331464611, "grad_norm": 0.11878762394189835, "learning_rate": 9.313561348959578e-06, "loss": 0.007, "step": 20765 }, { "epoch": 2.9104414856341974, "grad_norm": 0.4726565480232239, "learning_rate": 9.299210715139917e-06, "loss": 0.0657, "step": 20766 }, { "epoch": 2.9105816398037843, "grad_norm": 0.35598182678222656, "learning_rate": 9.284860081320257e-06, "loss": 0.026, "step": 20767 }, { "epoch": 2.9107217939733707, "grad_norm": 0.8986560106277466, "learning_rate": 9.270509447500596e-06, "loss": 0.176, "step": 20768 }, { "epoch": 2.910861948142957, "grad_norm": 0.4295255243778229, "learning_rate": 9.256158813680937e-06, "loss": 0.0282, "step": 20769 }, { "epoch": 2.9110021023125436, "grad_norm": 2.182746410369873, "learning_rate": 9.241808179861275e-06, "loss": 0.1048, "step": 20770 }, { "epoch": 2.9111422564821305, "grad_norm": 0.13311715424060822, "learning_rate": 9.227457546041616e-06, "loss": 0.0087, "step": 20771 }, { "epoch": 2.911282410651717, "grad_norm": 0.17250411212444305, "learning_rate": 9.213106912221954e-06, "loss": 0.0265, "step": 20772 }, { "epoch": 2.9114225648213035, "grad_norm": 0.2883392870426178, "learning_rate": 9.198756278402295e-06, "loss": 0.0361, "step": 20773 }, { "epoch": 2.91156271899089, "grad_norm": 0.1712668240070343, "learning_rate": 9.184405644582635e-06, "loss": 0.0166, "step": 20774 }, { "epoch": 2.9117028731604764, "grad_norm": 0.1162927895784378, "learning_rate": 9.170055010762976e-06, "loss": 0.0445, "step": 20775 }, { "epoch": 2.911843027330063, "grad_norm": 0.2495385706424713, "learning_rate": 9.155704376943314e-06, "loss": 0.025, "step": 20776 }, { "epoch": 2.9119831814996497, "grad_norm": 0.15533806383609772, "learning_rate": 9.141353743123655e-06, "loss": 0.0122, "step": 20777 }, { "epoch": 2.912123335669236, "grad_norm": 0.1594160497188568, "learning_rate": 9.127003109303994e-06, "loss": 0.0545, "step": 20778 }, { "epoch": 2.9122634898388227, "grad_norm": 0.23043681681156158, "learning_rate": 9.112652475484334e-06, "loss": 0.0167, "step": 20779 }, { "epoch": 2.912403644008409, "grad_norm": 0.08006884902715683, "learning_rate": 9.098301841664673e-06, "loss": 0.0043, "step": 20780 }, { "epoch": 2.912543798177996, "grad_norm": 1.5369068384170532, "learning_rate": 9.083951207845013e-06, "loss": 0.0375, "step": 20781 }, { "epoch": 2.9126839523475825, "grad_norm": 0.21777760982513428, "learning_rate": 9.069600574025352e-06, "loss": 0.0207, "step": 20782 }, { "epoch": 2.912824106517169, "grad_norm": 0.3543589115142822, "learning_rate": 9.055249940205692e-06, "loss": 0.0362, "step": 20783 }, { "epoch": 2.9129642606867554, "grad_norm": 0.21634580194950104, "learning_rate": 9.040899306386031e-06, "loss": 0.0152, "step": 20784 }, { "epoch": 2.913104414856342, "grad_norm": 0.11264508962631226, "learning_rate": 9.026548672566371e-06, "loss": 0.0177, "step": 20785 }, { "epoch": 2.9132445690259283, "grad_norm": 0.4805937111377716, "learning_rate": 9.01219803874671e-06, "loss": 0.0404, "step": 20786 }, { "epoch": 2.9133847231955152, "grad_norm": 0.04068087786436081, "learning_rate": 8.99784740492705e-06, "loss": 0.0019, "step": 20787 }, { "epoch": 2.9135248773651017, "grad_norm": 0.3326500356197357, "learning_rate": 8.98349677110739e-06, "loss": 0.0094, "step": 20788 }, { "epoch": 2.913665031534688, "grad_norm": 0.4000054597854614, "learning_rate": 8.96914613728773e-06, "loss": 0.0356, "step": 20789 }, { "epoch": 2.9138051857042746, "grad_norm": 0.20616628229618073, "learning_rate": 8.954795503468068e-06, "loss": 0.0224, "step": 20790 }, { "epoch": 2.9139453398738615, "grad_norm": 0.127966970205307, "learning_rate": 8.940444869648409e-06, "loss": 0.0112, "step": 20791 }, { "epoch": 2.914085494043448, "grad_norm": 0.6540483236312866, "learning_rate": 8.926094235828748e-06, "loss": 0.0584, "step": 20792 }, { "epoch": 2.9142256482130344, "grad_norm": 0.7131612300872803, "learning_rate": 8.911743602009088e-06, "loss": 0.0341, "step": 20793 }, { "epoch": 2.914365802382621, "grad_norm": 0.03764643520116806, "learning_rate": 8.897392968189428e-06, "loss": 0.0029, "step": 20794 }, { "epoch": 2.9145059565522073, "grad_norm": 0.0637982189655304, "learning_rate": 8.883042334369767e-06, "loss": 0.0061, "step": 20795 }, { "epoch": 2.914646110721794, "grad_norm": 0.22289808094501495, "learning_rate": 8.868691700550108e-06, "loss": 0.0315, "step": 20796 }, { "epoch": 2.9147862648913803, "grad_norm": 0.2513267695903778, "learning_rate": 8.854341066730446e-06, "loss": 0.0389, "step": 20797 }, { "epoch": 2.914926419060967, "grad_norm": 0.18218311667442322, "learning_rate": 8.839990432910787e-06, "loss": 0.0108, "step": 20798 }, { "epoch": 2.9150665732305536, "grad_norm": 0.11901721358299255, "learning_rate": 8.825639799091125e-06, "loss": 0.01, "step": 20799 }, { "epoch": 2.91520672740014, "grad_norm": 0.43263545632362366, "learning_rate": 8.811289165271466e-06, "loss": 0.0094, "step": 20800 }, { "epoch": 2.9153468815697265, "grad_norm": 0.20341414213180542, "learning_rate": 8.796938531451805e-06, "loss": 0.0226, "step": 20801 }, { "epoch": 2.9154870357393134, "grad_norm": 0.34665700793266296, "learning_rate": 8.782587897632145e-06, "loss": 0.0462, "step": 20802 }, { "epoch": 2.9156271899089, "grad_norm": 0.36099669337272644, "learning_rate": 8.768237263812484e-06, "loss": 0.0528, "step": 20803 }, { "epoch": 2.9157673440784864, "grad_norm": 0.033484939485788345, "learning_rate": 8.753886629992824e-06, "loss": 0.0014, "step": 20804 }, { "epoch": 2.915907498248073, "grad_norm": 0.06174330785870552, "learning_rate": 8.739535996173163e-06, "loss": 0.0045, "step": 20805 }, { "epoch": 2.9160476524176593, "grad_norm": 0.44212549924850464, "learning_rate": 8.725185362353503e-06, "loss": 0.0132, "step": 20806 }, { "epoch": 2.9161878065872457, "grad_norm": 0.1343972235918045, "learning_rate": 8.710834728533842e-06, "loss": 0.0257, "step": 20807 }, { "epoch": 2.9163279607568326, "grad_norm": 0.19658316671848297, "learning_rate": 8.696484094714182e-06, "loss": 0.0124, "step": 20808 }, { "epoch": 2.916468114926419, "grad_norm": 0.43633759021759033, "learning_rate": 8.682133460894521e-06, "loss": 0.0335, "step": 20809 }, { "epoch": 2.9166082690960056, "grad_norm": 0.13331104815006256, "learning_rate": 8.667782827074862e-06, "loss": 0.0165, "step": 20810 }, { "epoch": 2.916748423265592, "grad_norm": 0.2501240074634552, "learning_rate": 8.653432193255202e-06, "loss": 0.0246, "step": 20811 }, { "epoch": 2.916888577435179, "grad_norm": 0.08909827470779419, "learning_rate": 8.63908155943554e-06, "loss": 0.0112, "step": 20812 }, { "epoch": 2.9170287316047654, "grad_norm": 0.18700209259986877, "learning_rate": 8.624730925615881e-06, "loss": 0.0457, "step": 20813 }, { "epoch": 2.917168885774352, "grad_norm": 0.3018716275691986, "learning_rate": 8.61038029179622e-06, "loss": 0.0132, "step": 20814 }, { "epoch": 2.9173090399439383, "grad_norm": 0.5658467411994934, "learning_rate": 8.59602965797656e-06, "loss": 0.0736, "step": 20815 }, { "epoch": 2.9174491941135248, "grad_norm": 0.17314061522483826, "learning_rate": 8.581679024156899e-06, "loss": 0.0037, "step": 20816 }, { "epoch": 2.9175893482831112, "grad_norm": 1.6219761371612549, "learning_rate": 8.56732839033724e-06, "loss": 0.1375, "step": 20817 }, { "epoch": 2.917729502452698, "grad_norm": 1.7230589389801025, "learning_rate": 8.552977756517578e-06, "loss": 0.2051, "step": 20818 }, { "epoch": 2.9178696566222846, "grad_norm": 4.344144344329834, "learning_rate": 8.538627122697919e-06, "loss": 0.1788, "step": 20819 }, { "epoch": 2.918009810791871, "grad_norm": 0.03655634820461273, "learning_rate": 8.524276488878257e-06, "loss": 0.0027, "step": 20820 }, { "epoch": 2.9181499649614575, "grad_norm": 0.25102561712265015, "learning_rate": 8.509925855058598e-06, "loss": 0.0456, "step": 20821 }, { "epoch": 2.9182901191310444, "grad_norm": 0.07444274425506592, "learning_rate": 8.495575221238936e-06, "loss": 0.0038, "step": 20822 }, { "epoch": 2.918430273300631, "grad_norm": 0.11049601435661316, "learning_rate": 8.481224587419277e-06, "loss": 0.062, "step": 20823 }, { "epoch": 2.9185704274702173, "grad_norm": 0.060240164399147034, "learning_rate": 8.466873953599616e-06, "loss": 0.0063, "step": 20824 }, { "epoch": 2.918710581639804, "grad_norm": 0.4039372503757477, "learning_rate": 8.452523319779956e-06, "loss": 0.0247, "step": 20825 }, { "epoch": 2.9188507358093903, "grad_norm": 0.2745022177696228, "learning_rate": 8.438172685960295e-06, "loss": 0.0472, "step": 20826 }, { "epoch": 2.9189908899789767, "grad_norm": 0.2167574018239975, "learning_rate": 8.423822052140635e-06, "loss": 0.0187, "step": 20827 }, { "epoch": 2.919131044148563, "grad_norm": 0.4564080238342285, "learning_rate": 8.409471418320974e-06, "loss": 0.035, "step": 20828 }, { "epoch": 2.91927119831815, "grad_norm": 0.04281604662537575, "learning_rate": 8.395120784501314e-06, "loss": 0.0025, "step": 20829 }, { "epoch": 2.9194113524877365, "grad_norm": 0.16506488621234894, "learning_rate": 8.380770150681655e-06, "loss": 0.0524, "step": 20830 }, { "epoch": 2.919551506657323, "grad_norm": 0.06750055402517319, "learning_rate": 8.366419516861995e-06, "loss": 0.0036, "step": 20831 }, { "epoch": 2.9196916608269095, "grad_norm": 0.16076305508613586, "learning_rate": 8.352068883042334e-06, "loss": 0.0074, "step": 20832 }, { "epoch": 2.9198318149964964, "grad_norm": 0.07818535715341568, "learning_rate": 8.337718249222674e-06, "loss": 0.0052, "step": 20833 }, { "epoch": 2.919971969166083, "grad_norm": 0.19906705617904663, "learning_rate": 8.323367615403013e-06, "loss": 0.0392, "step": 20834 }, { "epoch": 2.9201121233356693, "grad_norm": 0.15549708902835846, "learning_rate": 8.309016981583353e-06, "loss": 0.0334, "step": 20835 }, { "epoch": 2.9202522775052557, "grad_norm": 0.08082341402769089, "learning_rate": 8.294666347763692e-06, "loss": 0.0093, "step": 20836 }, { "epoch": 2.920392431674842, "grad_norm": 0.14476197957992554, "learning_rate": 8.280315713944032e-06, "loss": 0.0607, "step": 20837 }, { "epoch": 2.9205325858444287, "grad_norm": 0.31879347562789917, "learning_rate": 8.265965080124371e-06, "loss": 0.0756, "step": 20838 }, { "epoch": 2.9206727400140156, "grad_norm": 0.460928738117218, "learning_rate": 8.251614446304712e-06, "loss": 0.032, "step": 20839 }, { "epoch": 2.920812894183602, "grad_norm": 0.24567623436450958, "learning_rate": 8.23726381248505e-06, "loss": 0.0311, "step": 20840 }, { "epoch": 2.9209530483531885, "grad_norm": 0.1459200233221054, "learning_rate": 8.22291317866539e-06, "loss": 0.0203, "step": 20841 }, { "epoch": 2.921093202522775, "grad_norm": 0.12736356258392334, "learning_rate": 8.20856254484573e-06, "loss": 0.026, "step": 20842 }, { "epoch": 2.921233356692362, "grad_norm": 0.15769840776920319, "learning_rate": 8.19421191102607e-06, "loss": 0.0296, "step": 20843 }, { "epoch": 2.9213735108619483, "grad_norm": 0.10037697106599808, "learning_rate": 8.179861277206409e-06, "loss": 0.026, "step": 20844 }, { "epoch": 2.9215136650315348, "grad_norm": 0.13630695641040802, "learning_rate": 8.165510643386749e-06, "loss": 0.0161, "step": 20845 }, { "epoch": 2.921653819201121, "grad_norm": 0.2527487277984619, "learning_rate": 8.151160009567088e-06, "loss": 0.0628, "step": 20846 }, { "epoch": 2.9217939733707077, "grad_norm": 0.17540931701660156, "learning_rate": 8.136809375747428e-06, "loss": 0.021, "step": 20847 }, { "epoch": 2.921934127540294, "grad_norm": 0.08566952496767044, "learning_rate": 8.122458741927769e-06, "loss": 0.0046, "step": 20848 }, { "epoch": 2.9220742817098806, "grad_norm": 0.19337183237075806, "learning_rate": 8.108108108108107e-06, "loss": 0.0197, "step": 20849 }, { "epoch": 2.9222144358794675, "grad_norm": 0.562846839427948, "learning_rate": 8.093757474288448e-06, "loss": 0.0345, "step": 20850 }, { "epoch": 2.922354590049054, "grad_norm": 0.20644716918468475, "learning_rate": 8.079406840468786e-06, "loss": 0.0393, "step": 20851 }, { "epoch": 2.9224947442186404, "grad_norm": 0.03776136413216591, "learning_rate": 8.065056206649127e-06, "loss": 0.0022, "step": 20852 }, { "epoch": 2.9226348983882273, "grad_norm": 0.06299964338541031, "learning_rate": 8.050705572829466e-06, "loss": 0.0055, "step": 20853 }, { "epoch": 2.922775052557814, "grad_norm": 0.24422027170658112, "learning_rate": 8.036354939009806e-06, "loss": 0.0333, "step": 20854 }, { "epoch": 2.9229152067274002, "grad_norm": 0.1088394895195961, "learning_rate": 8.022004305190145e-06, "loss": 0.0103, "step": 20855 }, { "epoch": 2.9230553608969867, "grad_norm": 0.35398152470588684, "learning_rate": 8.007653671370485e-06, "loss": 0.0715, "step": 20856 }, { "epoch": 2.923195515066573, "grad_norm": 0.1836041361093521, "learning_rate": 7.993303037550824e-06, "loss": 0.0601, "step": 20857 }, { "epoch": 2.9233356692361596, "grad_norm": 0.18405325710773468, "learning_rate": 7.978952403731164e-06, "loss": 0.0216, "step": 20858 }, { "epoch": 2.923475823405746, "grad_norm": 0.12270653247833252, "learning_rate": 7.964601769911503e-06, "loss": 0.0066, "step": 20859 }, { "epoch": 2.923615977575333, "grad_norm": 0.12997110188007355, "learning_rate": 7.950251136091843e-06, "loss": 0.011, "step": 20860 }, { "epoch": 2.9237561317449194, "grad_norm": 0.3330586552619934, "learning_rate": 7.935900502272182e-06, "loss": 0.0414, "step": 20861 }, { "epoch": 2.923896285914506, "grad_norm": 0.10077826678752899, "learning_rate": 7.921549868452523e-06, "loss": 0.004, "step": 20862 }, { "epoch": 2.9240364400840924, "grad_norm": 0.4784476161003113, "learning_rate": 7.907199234632861e-06, "loss": 0.0728, "step": 20863 }, { "epoch": 2.9241765942536793, "grad_norm": 0.0951777920126915, "learning_rate": 7.892848600813202e-06, "loss": 0.0057, "step": 20864 }, { "epoch": 2.9243167484232657, "grad_norm": 0.03978104516863823, "learning_rate": 7.87849796699354e-06, "loss": 0.0017, "step": 20865 }, { "epoch": 2.924456902592852, "grad_norm": 0.21975819766521454, "learning_rate": 7.864147333173881e-06, "loss": 0.0144, "step": 20866 }, { "epoch": 2.9245970567624386, "grad_norm": 0.31612688302993774, "learning_rate": 7.849796699354221e-06, "loss": 0.032, "step": 20867 }, { "epoch": 2.924737210932025, "grad_norm": 0.6473194360733032, "learning_rate": 7.83544606553456e-06, "loss": 0.0366, "step": 20868 }, { "epoch": 2.9248773651016116, "grad_norm": 0.9579076170921326, "learning_rate": 7.8210954317149e-06, "loss": 0.0867, "step": 20869 }, { "epoch": 2.9250175192711985, "grad_norm": 1.9025181531906128, "learning_rate": 7.80674479789524e-06, "loss": 0.4027, "step": 20870 }, { "epoch": 2.925157673440785, "grad_norm": 0.14700014889240265, "learning_rate": 7.79239416407558e-06, "loss": 0.0133, "step": 20871 }, { "epoch": 2.9252978276103714, "grad_norm": 0.36744391918182373, "learning_rate": 7.778043530255918e-06, "loss": 0.0489, "step": 20872 }, { "epoch": 2.925437981779958, "grad_norm": 0.11922232806682587, "learning_rate": 7.763692896436259e-06, "loss": 0.0072, "step": 20873 }, { "epoch": 2.9255781359495447, "grad_norm": 0.22037041187286377, "learning_rate": 7.749342262616597e-06, "loss": 0.0154, "step": 20874 }, { "epoch": 2.925718290119131, "grad_norm": 0.34085753560066223, "learning_rate": 7.734991628796938e-06, "loss": 0.0306, "step": 20875 }, { "epoch": 2.9258584442887177, "grad_norm": 0.19489246606826782, "learning_rate": 7.720640994977277e-06, "loss": 0.0471, "step": 20876 }, { "epoch": 2.925998598458304, "grad_norm": 0.32996195554733276, "learning_rate": 7.706290361157617e-06, "loss": 0.0216, "step": 20877 }, { "epoch": 2.9261387526278906, "grad_norm": 0.2208753526210785, "learning_rate": 7.691939727337956e-06, "loss": 0.018, "step": 20878 }, { "epoch": 2.926278906797477, "grad_norm": 0.3136223256587982, "learning_rate": 7.677589093518296e-06, "loss": 0.0189, "step": 20879 }, { "epoch": 2.9264190609670635, "grad_norm": 0.12015900760889053, "learning_rate": 7.663238459698637e-06, "loss": 0.0114, "step": 20880 }, { "epoch": 2.9265592151366504, "grad_norm": 0.16330033540725708, "learning_rate": 7.648887825878975e-06, "loss": 0.038, "step": 20881 }, { "epoch": 2.926699369306237, "grad_norm": 0.3236497640609741, "learning_rate": 7.634537192059316e-06, "loss": 0.0714, "step": 20882 }, { "epoch": 2.9268395234758233, "grad_norm": 0.252256840467453, "learning_rate": 7.6201865582396545e-06, "loss": 0.0673, "step": 20883 }, { "epoch": 2.9269796776454102, "grad_norm": 0.28608256578445435, "learning_rate": 7.605835924419994e-06, "loss": 0.036, "step": 20884 }, { "epoch": 2.9271198318149967, "grad_norm": 0.5605271458625793, "learning_rate": 7.5914852906003345e-06, "loss": 0.0568, "step": 20885 }, { "epoch": 2.927259985984583, "grad_norm": 0.28941836953163147, "learning_rate": 7.577134656780674e-06, "loss": 0.0377, "step": 20886 }, { "epoch": 2.9274001401541696, "grad_norm": 0.09259998053312302, "learning_rate": 7.562784022961014e-06, "loss": 0.0032, "step": 20887 }, { "epoch": 2.927540294323756, "grad_norm": 0.07618865370750427, "learning_rate": 7.548433389141353e-06, "loss": 0.0194, "step": 20888 }, { "epoch": 2.9276804484933425, "grad_norm": 0.08588466048240662, "learning_rate": 7.534082755321693e-06, "loss": 0.021, "step": 20889 }, { "epoch": 2.927820602662929, "grad_norm": 0.16338613629341125, "learning_rate": 7.519732121502032e-06, "loss": 0.0282, "step": 20890 }, { "epoch": 2.927960756832516, "grad_norm": 0.111556276679039, "learning_rate": 7.505381487682372e-06, "loss": 0.0292, "step": 20891 }, { "epoch": 2.9281009110021023, "grad_norm": 0.302435040473938, "learning_rate": 7.4910308538627115e-06, "loss": 0.073, "step": 20892 }, { "epoch": 2.928241065171689, "grad_norm": 0.13511331379413605, "learning_rate": 7.476680220043051e-06, "loss": 0.0241, "step": 20893 }, { "epoch": 2.9283812193412753, "grad_norm": 0.36202773451805115, "learning_rate": 7.462329586223391e-06, "loss": 0.0216, "step": 20894 }, { "epoch": 2.928521373510862, "grad_norm": 0.08806430548429489, "learning_rate": 7.44797895240373e-06, "loss": 0.0046, "step": 20895 }, { "epoch": 2.9286615276804486, "grad_norm": 0.08994189649820328, "learning_rate": 7.43362831858407e-06, "loss": 0.03, "step": 20896 }, { "epoch": 2.928801681850035, "grad_norm": 0.1963607668876648, "learning_rate": 7.419277684764409e-06, "loss": 0.024, "step": 20897 }, { "epoch": 2.9289418360196215, "grad_norm": 0.18027660250663757, "learning_rate": 7.404927050944749e-06, "loss": 0.0222, "step": 20898 }, { "epoch": 2.929081990189208, "grad_norm": 0.11307781934738159, "learning_rate": 7.3905764171250885e-06, "loss": 0.0209, "step": 20899 }, { "epoch": 2.9292221443587945, "grad_norm": 0.04108583927154541, "learning_rate": 7.376225783305428e-06, "loss": 0.0028, "step": 20900 }, { "epoch": 2.9293622985283814, "grad_norm": 0.11263587325811386, "learning_rate": 7.361875149485768e-06, "loss": 0.0132, "step": 20901 }, { "epoch": 2.929502452697968, "grad_norm": 0.11941960453987122, "learning_rate": 7.347524515666107e-06, "loss": 0.0087, "step": 20902 }, { "epoch": 2.9296426068675543, "grad_norm": 0.19769345223903656, "learning_rate": 7.333173881846447e-06, "loss": 0.0143, "step": 20903 }, { "epoch": 2.9297827610371407, "grad_norm": 0.6844443082809448, "learning_rate": 7.318823248026788e-06, "loss": 0.0443, "step": 20904 }, { "epoch": 2.9299229152067277, "grad_norm": 0.21895532310009003, "learning_rate": 7.304472614207128e-06, "loss": 0.0176, "step": 20905 }, { "epoch": 2.930063069376314, "grad_norm": 0.09981156140565872, "learning_rate": 7.290121980387467e-06, "loss": 0.0153, "step": 20906 }, { "epoch": 2.9302032235459006, "grad_norm": 0.5449497699737549, "learning_rate": 7.275771346567807e-06, "loss": 0.03, "step": 20907 }, { "epoch": 2.930343377715487, "grad_norm": 0.26879242062568665, "learning_rate": 7.261420712748146e-06, "loss": 0.0128, "step": 20908 }, { "epoch": 2.9304835318850735, "grad_norm": 0.4231356680393219, "learning_rate": 7.247070078928486e-06, "loss": 0.0168, "step": 20909 }, { "epoch": 2.93062368605466, "grad_norm": 0.1815986931324005, "learning_rate": 7.2327194451088255e-06, "loss": 0.0261, "step": 20910 }, { "epoch": 2.9307638402242464, "grad_norm": 0.14714835584163666, "learning_rate": 7.218368811289165e-06, "loss": 0.0175, "step": 20911 }, { "epoch": 2.9309039943938333, "grad_norm": 0.17421674728393555, "learning_rate": 7.204018177469505e-06, "loss": 0.0279, "step": 20912 }, { "epoch": 2.9310441485634198, "grad_norm": 0.2407115399837494, "learning_rate": 7.189667543649844e-06, "loss": 0.045, "step": 20913 }, { "epoch": 2.9311843027330062, "grad_norm": 0.06666436046361923, "learning_rate": 7.175316909830184e-06, "loss": 0.0027, "step": 20914 }, { "epoch": 2.9313244569025927, "grad_norm": 0.5216501951217651, "learning_rate": 7.160966276010523e-06, "loss": 0.0203, "step": 20915 }, { "epoch": 2.9314646110721796, "grad_norm": 0.8742838501930237, "learning_rate": 7.146615642190863e-06, "loss": 0.201, "step": 20916 }, { "epoch": 2.931604765241766, "grad_norm": 0.307233065366745, "learning_rate": 7.1322650083712025e-06, "loss": 0.0371, "step": 20917 }, { "epoch": 2.9317449194113525, "grad_norm": 0.40731897950172424, "learning_rate": 7.117914374551542e-06, "loss": 0.0065, "step": 20918 }, { "epoch": 2.931885073580939, "grad_norm": 2.596923828125, "learning_rate": 7.103563740731882e-06, "loss": 0.1723, "step": 20919 }, { "epoch": 2.9320252277505254, "grad_norm": 1.0663001537322998, "learning_rate": 7.089213106912221e-06, "loss": 0.0273, "step": 20920 }, { "epoch": 2.932165381920112, "grad_norm": 0.32411348819732666, "learning_rate": 7.074862473092561e-06, "loss": 0.0346, "step": 20921 }, { "epoch": 2.932305536089699, "grad_norm": 0.03323211148381233, "learning_rate": 7.0605118392729e-06, "loss": 0.0026, "step": 20922 }, { "epoch": 2.9324456902592853, "grad_norm": 0.43752187490463257, "learning_rate": 7.046161205453241e-06, "loss": 0.0448, "step": 20923 }, { "epoch": 2.9325858444288717, "grad_norm": 0.0739763006567955, "learning_rate": 7.03181057163358e-06, "loss": 0.0099, "step": 20924 }, { "epoch": 2.932725998598458, "grad_norm": 0.19363577663898468, "learning_rate": 7.01745993781392e-06, "loss": 0.0458, "step": 20925 }, { "epoch": 2.932866152768045, "grad_norm": 0.17047443985939026, "learning_rate": 7.0031093039942594e-06, "loss": 0.0215, "step": 20926 }, { "epoch": 2.9330063069376315, "grad_norm": 0.2737269401550293, "learning_rate": 6.988758670174599e-06, "loss": 0.0096, "step": 20927 }, { "epoch": 2.933146461107218, "grad_norm": 0.16186510026454926, "learning_rate": 6.974408036354939e-06, "loss": 0.0608, "step": 20928 }, { "epoch": 2.9332866152768045, "grad_norm": 0.34598278999328613, "learning_rate": 6.960057402535278e-06, "loss": 0.0354, "step": 20929 }, { "epoch": 2.933426769446391, "grad_norm": 0.2241280972957611, "learning_rate": 6.945706768715618e-06, "loss": 0.0195, "step": 20930 }, { "epoch": 2.9335669236159774, "grad_norm": 0.06546413153409958, "learning_rate": 6.931356134895957e-06, "loss": 0.0055, "step": 20931 }, { "epoch": 2.9337070777855643, "grad_norm": 0.12368803471326828, "learning_rate": 6.917005501076297e-06, "loss": 0.0106, "step": 20932 }, { "epoch": 2.9338472319551507, "grad_norm": 0.2745272219181061, "learning_rate": 6.9026548672566364e-06, "loss": 0.018, "step": 20933 }, { "epoch": 2.933987386124737, "grad_norm": 0.16309945285320282, "learning_rate": 6.888304233436976e-06, "loss": 0.0103, "step": 20934 }, { "epoch": 2.9341275402943237, "grad_norm": 0.355155348777771, "learning_rate": 6.873953599617316e-06, "loss": 0.0625, "step": 20935 }, { "epoch": 2.9342676944639106, "grad_norm": 0.144060418009758, "learning_rate": 6.859602965797655e-06, "loss": 0.0081, "step": 20936 }, { "epoch": 2.934407848633497, "grad_norm": 0.10298395156860352, "learning_rate": 6.845252331977995e-06, "loss": 0.0266, "step": 20937 }, { "epoch": 2.9345480028030835, "grad_norm": 0.13036443293094635, "learning_rate": 6.830901698158334e-06, "loss": 0.0335, "step": 20938 }, { "epoch": 2.93468815697267, "grad_norm": 0.21388401091098785, "learning_rate": 6.816551064338674e-06, "loss": 0.0282, "step": 20939 }, { "epoch": 2.9348283111422564, "grad_norm": 0.12957242131233215, "learning_rate": 6.8022004305190134e-06, "loss": 0.0272, "step": 20940 }, { "epoch": 2.934968465311843, "grad_norm": 0.03360304608941078, "learning_rate": 6.787849796699354e-06, "loss": 0.0102, "step": 20941 }, { "epoch": 2.9351086194814293, "grad_norm": 0.0971352681517601, "learning_rate": 6.7734991628796934e-06, "loss": 0.0137, "step": 20942 }, { "epoch": 2.935248773651016, "grad_norm": 0.04756896197795868, "learning_rate": 6.759148529060033e-06, "loss": 0.0038, "step": 20943 }, { "epoch": 2.9353889278206027, "grad_norm": 0.4916985332965851, "learning_rate": 6.744797895240373e-06, "loss": 0.0213, "step": 20944 }, { "epoch": 2.935529081990189, "grad_norm": 0.0612313486635685, "learning_rate": 6.730447261420712e-06, "loss": 0.0055, "step": 20945 }, { "epoch": 2.9356692361597756, "grad_norm": 0.1277768462896347, "learning_rate": 6.716096627601052e-06, "loss": 0.0269, "step": 20946 }, { "epoch": 2.9358093903293625, "grad_norm": 0.313719242811203, "learning_rate": 6.701745993781391e-06, "loss": 0.0131, "step": 20947 }, { "epoch": 2.935949544498949, "grad_norm": 0.12861113250255585, "learning_rate": 6.687395359961731e-06, "loss": 0.0144, "step": 20948 }, { "epoch": 2.9360896986685354, "grad_norm": 0.14126648008823395, "learning_rate": 6.6730447261420704e-06, "loss": 0.0075, "step": 20949 }, { "epoch": 2.936229852838122, "grad_norm": 0.2856072187423706, "learning_rate": 6.65869409232241e-06, "loss": 0.0198, "step": 20950 }, { "epoch": 2.9363700070077083, "grad_norm": 0.4148334860801697, "learning_rate": 6.64434345850275e-06, "loss": 0.0272, "step": 20951 }, { "epoch": 2.936510161177295, "grad_norm": 0.20596837997436523, "learning_rate": 6.629992824683089e-06, "loss": 0.0086, "step": 20952 }, { "epoch": 2.9366503153468817, "grad_norm": 0.1942443996667862, "learning_rate": 6.615642190863429e-06, "loss": 0.0118, "step": 20953 }, { "epoch": 2.936790469516468, "grad_norm": 0.11931493878364563, "learning_rate": 6.601291557043768e-06, "loss": 0.0175, "step": 20954 }, { "epoch": 2.9369306236860546, "grad_norm": 0.1311112493276596, "learning_rate": 6.586940923224108e-06, "loss": 0.0084, "step": 20955 }, { "epoch": 2.937070777855641, "grad_norm": 0.3379358947277069, "learning_rate": 6.5725902894044474e-06, "loss": 0.0734, "step": 20956 }, { "epoch": 2.937210932025228, "grad_norm": 0.6628038287162781, "learning_rate": 6.558239655584787e-06, "loss": 0.0354, "step": 20957 }, { "epoch": 2.9373510861948144, "grad_norm": 0.18850652873516083, "learning_rate": 6.543889021765127e-06, "loss": 0.0132, "step": 20958 }, { "epoch": 2.937491240364401, "grad_norm": 0.2557535171508789, "learning_rate": 6.529538387945466e-06, "loss": 0.0361, "step": 20959 }, { "epoch": 2.9376313945339874, "grad_norm": 0.12206394970417023, "learning_rate": 6.515187754125807e-06, "loss": 0.0113, "step": 20960 }, { "epoch": 2.937771548703574, "grad_norm": 0.13461731374263763, "learning_rate": 6.500837120306147e-06, "loss": 0.0254, "step": 20961 }, { "epoch": 2.9379117028731603, "grad_norm": 0.0747813731431961, "learning_rate": 6.4864864864864866e-06, "loss": 0.004, "step": 20962 }, { "epoch": 2.938051857042747, "grad_norm": 0.2458028346300125, "learning_rate": 6.472135852666826e-06, "loss": 0.0224, "step": 20963 }, { "epoch": 2.9381920112123336, "grad_norm": 0.2038590908050537, "learning_rate": 6.457785218847166e-06, "loss": 0.0077, "step": 20964 }, { "epoch": 2.93833216538192, "grad_norm": 0.20349450409412384, "learning_rate": 6.443434585027505e-06, "loss": 0.0297, "step": 20965 }, { "epoch": 2.9384723195515066, "grad_norm": 0.4349125325679779, "learning_rate": 6.429083951207845e-06, "loss": 0.0349, "step": 20966 }, { "epoch": 2.9386124737210935, "grad_norm": 0.07646788656711578, "learning_rate": 6.414733317388184e-06, "loss": 0.0032, "step": 20967 }, { "epoch": 2.93875262789068, "grad_norm": 0.05840045213699341, "learning_rate": 6.400382683568524e-06, "loss": 0.0022, "step": 20968 }, { "epoch": 2.9388927820602664, "grad_norm": 1.8922349214553833, "learning_rate": 6.3860320497488636e-06, "loss": 0.1689, "step": 20969 }, { "epoch": 2.939032936229853, "grad_norm": 0.00875465851277113, "learning_rate": 6.371681415929203e-06, "loss": 0.0007, "step": 20970 }, { "epoch": 2.9391730903994393, "grad_norm": 0.19148333370685577, "learning_rate": 6.357330782109543e-06, "loss": 0.0096, "step": 20971 }, { "epoch": 2.9393132445690258, "grad_norm": 0.23200567066669464, "learning_rate": 6.342980148289882e-06, "loss": 0.049, "step": 20972 }, { "epoch": 2.939453398738612, "grad_norm": 0.19728942215442657, "learning_rate": 6.328629514470222e-06, "loss": 0.0168, "step": 20973 }, { "epoch": 2.939593552908199, "grad_norm": 0.21060748398303986, "learning_rate": 6.314278880650561e-06, "loss": 0.0074, "step": 20974 }, { "epoch": 2.9397337070777856, "grad_norm": 0.20466677844524384, "learning_rate": 6.299928246830901e-06, "loss": 0.0289, "step": 20975 }, { "epoch": 2.939873861247372, "grad_norm": 0.30256277322769165, "learning_rate": 6.2855776130112406e-06, "loss": 0.0273, "step": 20976 }, { "epoch": 2.9400140154169585, "grad_norm": 0.9201028943061829, "learning_rate": 6.27122697919158e-06, "loss": 0.0444, "step": 20977 }, { "epoch": 2.9401541695865454, "grad_norm": 0.2992725372314453, "learning_rate": 6.2568763453719206e-06, "loss": 0.0507, "step": 20978 }, { "epoch": 2.940294323756132, "grad_norm": 0.18159158527851105, "learning_rate": 6.24252571155226e-06, "loss": 0.0093, "step": 20979 }, { "epoch": 2.9404344779257183, "grad_norm": 0.23910143971443176, "learning_rate": 6.2281750777326e-06, "loss": 0.0218, "step": 20980 }, { "epoch": 2.940574632095305, "grad_norm": 0.36086729168891907, "learning_rate": 6.213824443912939e-06, "loss": 0.0342, "step": 20981 }, { "epoch": 2.9407147862648912, "grad_norm": 0.1406344622373581, "learning_rate": 6.199473810093279e-06, "loss": 0.0211, "step": 20982 }, { "epoch": 2.9408549404344777, "grad_norm": 0.2438364028930664, "learning_rate": 6.185123176273618e-06, "loss": 0.0077, "step": 20983 }, { "epoch": 2.9409950946040646, "grad_norm": 0.23444446921348572, "learning_rate": 6.170772542453958e-06, "loss": 0.0204, "step": 20984 }, { "epoch": 2.941135248773651, "grad_norm": 0.160509392619133, "learning_rate": 6.1564219086342976e-06, "loss": 0.018, "step": 20985 }, { "epoch": 2.9412754029432375, "grad_norm": 0.25123390555381775, "learning_rate": 6.142071274814637e-06, "loss": 0.0131, "step": 20986 }, { "epoch": 2.941415557112824, "grad_norm": 0.16712206602096558, "learning_rate": 6.127720640994977e-06, "loss": 0.0285, "step": 20987 }, { "epoch": 2.941555711282411, "grad_norm": 0.14667803049087524, "learning_rate": 6.113370007175316e-06, "loss": 0.0247, "step": 20988 }, { "epoch": 2.9416958654519973, "grad_norm": 0.12914063036441803, "learning_rate": 6.099019373355656e-06, "loss": 0.0035, "step": 20989 }, { "epoch": 2.941836019621584, "grad_norm": 0.010141808539628983, "learning_rate": 6.084668739535995e-06, "loss": 0.0004, "step": 20990 }, { "epoch": 2.9419761737911703, "grad_norm": 0.2180391550064087, "learning_rate": 6.070318105716335e-06, "loss": 0.0074, "step": 20991 }, { "epoch": 2.9421163279607567, "grad_norm": 0.38126474618911743, "learning_rate": 6.0559674718966746e-06, "loss": 0.0357, "step": 20992 }, { "epoch": 2.942256482130343, "grad_norm": 0.17474308609962463, "learning_rate": 6.041616838077014e-06, "loss": 0.035, "step": 20993 }, { "epoch": 2.9423966362999296, "grad_norm": 0.13245925307273865, "learning_rate": 6.027266204257354e-06, "loss": 0.0416, "step": 20994 }, { "epoch": 2.9425367904695165, "grad_norm": 0.6099827289581299, "learning_rate": 6.012915570437693e-06, "loss": 0.0387, "step": 20995 }, { "epoch": 2.942676944639103, "grad_norm": 0.04228270426392555, "learning_rate": 5.998564936618033e-06, "loss": 0.0031, "step": 20996 }, { "epoch": 2.9428170988086895, "grad_norm": 0.12102396786212921, "learning_rate": 5.984214302798373e-06, "loss": 0.0143, "step": 20997 }, { "epoch": 2.9429572529782764, "grad_norm": 0.33325010538101196, "learning_rate": 5.969863668978713e-06, "loss": 0.015, "step": 20998 }, { "epoch": 2.943097407147863, "grad_norm": 0.2509569823741913, "learning_rate": 5.955513035159052e-06, "loss": 0.0295, "step": 20999 }, { "epoch": 2.9432375613174493, "grad_norm": 0.2118096649646759, "learning_rate": 5.941162401339392e-06, "loss": 0.0169, "step": 21000 }, { "epoch": 2.9433777154870358, "grad_norm": 0.14205168187618256, "learning_rate": 5.9268117675197315e-06, "loss": 0.0124, "step": 21001 }, { "epoch": 2.943517869656622, "grad_norm": 0.15084637701511383, "learning_rate": 5.912461133700071e-06, "loss": 0.0179, "step": 21002 }, { "epoch": 2.9436580238262087, "grad_norm": 0.18869581818580627, "learning_rate": 5.898110499880411e-06, "loss": 0.0116, "step": 21003 }, { "epoch": 2.943798177995795, "grad_norm": 0.3209415078163147, "learning_rate": 5.88375986606075e-06, "loss": 0.0609, "step": 21004 }, { "epoch": 2.943938332165382, "grad_norm": 0.3442994952201843, "learning_rate": 5.86940923224109e-06, "loss": 0.0214, "step": 21005 }, { "epoch": 2.9440784863349685, "grad_norm": 0.12832537293434143, "learning_rate": 5.855058598421429e-06, "loss": 0.0085, "step": 21006 }, { "epoch": 2.944218640504555, "grad_norm": 0.6553977131843567, "learning_rate": 5.840707964601769e-06, "loss": 0.0234, "step": 21007 }, { "epoch": 2.9443587946741414, "grad_norm": 0.01101483590900898, "learning_rate": 5.8263573307821085e-06, "loss": 0.0007, "step": 21008 }, { "epoch": 2.9444989488437283, "grad_norm": 0.11822237819433212, "learning_rate": 5.812006696962448e-06, "loss": 0.0114, "step": 21009 }, { "epoch": 2.9446391030133148, "grad_norm": 0.10611727088689804, "learning_rate": 5.797656063142788e-06, "loss": 0.0063, "step": 21010 }, { "epoch": 2.9447792571829012, "grad_norm": 0.6857806444168091, "learning_rate": 5.783305429323127e-06, "loss": 0.0143, "step": 21011 }, { "epoch": 2.9449194113524877, "grad_norm": 0.3767915368080139, "learning_rate": 5.768954795503467e-06, "loss": 0.0351, "step": 21012 }, { "epoch": 2.945059565522074, "grad_norm": 0.21097201108932495, "learning_rate": 5.754604161683806e-06, "loss": 0.0335, "step": 21013 }, { "epoch": 2.9451997196916606, "grad_norm": 0.12750670313835144, "learning_rate": 5.740253527864146e-06, "loss": 0.0163, "step": 21014 }, { "epoch": 2.9453398738612475, "grad_norm": 0.21914798021316528, "learning_rate": 5.725902894044487e-06, "loss": 0.0111, "step": 21015 }, { "epoch": 2.945480028030834, "grad_norm": 0.19769099354743958, "learning_rate": 5.711552260224827e-06, "loss": 0.0098, "step": 21016 }, { "epoch": 2.9456201822004204, "grad_norm": 0.3958126902580261, "learning_rate": 5.697201626405166e-06, "loss": 0.0127, "step": 21017 }, { "epoch": 2.945760336370007, "grad_norm": 0.5439279675483704, "learning_rate": 5.682850992585506e-06, "loss": 0.0253, "step": 21018 }, { "epoch": 2.945900490539594, "grad_norm": 0.5001281499862671, "learning_rate": 5.6685003587658455e-06, "loss": 0.035, "step": 21019 }, { "epoch": 2.9460406447091803, "grad_norm": 0.44221031665802, "learning_rate": 5.654149724946185e-06, "loss": 0.0478, "step": 21020 }, { "epoch": 2.9461807988787667, "grad_norm": 0.08216775953769684, "learning_rate": 5.639799091126525e-06, "loss": 0.0056, "step": 21021 }, { "epoch": 2.946320953048353, "grad_norm": 0.07792092114686966, "learning_rate": 5.625448457306864e-06, "loss": 0.0026, "step": 21022 }, { "epoch": 2.9464611072179396, "grad_norm": 0.38634616136550903, "learning_rate": 5.611097823487204e-06, "loss": 0.0954, "step": 21023 }, { "epoch": 2.946601261387526, "grad_norm": 0.19332477450370789, "learning_rate": 5.596747189667543e-06, "loss": 0.0284, "step": 21024 }, { "epoch": 2.9467414155571126, "grad_norm": 0.19380851089954376, "learning_rate": 5.582396555847883e-06, "loss": 0.015, "step": 21025 }, { "epoch": 2.9468815697266995, "grad_norm": 0.20197142660617828, "learning_rate": 5.5680459220282225e-06, "loss": 0.0073, "step": 21026 }, { "epoch": 2.947021723896286, "grad_norm": 1.0442070960998535, "learning_rate": 5.553695288208562e-06, "loss": 0.1178, "step": 21027 }, { "epoch": 2.9471618780658724, "grad_norm": 0.08172272890806198, "learning_rate": 5.539344654388902e-06, "loss": 0.0118, "step": 21028 }, { "epoch": 2.9473020322354593, "grad_norm": 0.1193513423204422, "learning_rate": 5.524994020569241e-06, "loss": 0.0063, "step": 21029 }, { "epoch": 2.9474421864050457, "grad_norm": 0.12481007725000381, "learning_rate": 5.510643386749581e-06, "loss": 0.0062, "step": 21030 }, { "epoch": 2.947582340574632, "grad_norm": 0.24042418599128723, "learning_rate": 5.49629275292992e-06, "loss": 0.0182, "step": 21031 }, { "epoch": 2.9477224947442187, "grad_norm": 0.14098285138607025, "learning_rate": 5.48194211911026e-06, "loss": 0.0231, "step": 21032 }, { "epoch": 2.947862648913805, "grad_norm": 0.18717357516288757, "learning_rate": 5.4675914852905995e-06, "loss": 0.0125, "step": 21033 }, { "epoch": 2.9480028030833916, "grad_norm": 0.16105882823467255, "learning_rate": 5.45324085147094e-06, "loss": 0.0056, "step": 21034 }, { "epoch": 2.948142957252978, "grad_norm": 0.10270168632268906, "learning_rate": 5.4388902176512795e-06, "loss": 0.0148, "step": 21035 }, { "epoch": 2.948283111422565, "grad_norm": 0.08530911803245544, "learning_rate": 5.424539583831619e-06, "loss": 0.0086, "step": 21036 }, { "epoch": 2.9484232655921514, "grad_norm": 0.21199356019496918, "learning_rate": 5.410188950011959e-06, "loss": 0.0268, "step": 21037 }, { "epoch": 2.948563419761738, "grad_norm": 0.20915906131267548, "learning_rate": 5.395838316192298e-06, "loss": 0.0375, "step": 21038 }, { "epoch": 2.9487035739313243, "grad_norm": 0.167807936668396, "learning_rate": 5.381487682372638e-06, "loss": 0.0631, "step": 21039 }, { "epoch": 2.948843728100911, "grad_norm": 0.1882878690958023, "learning_rate": 5.367137048552977e-06, "loss": 0.0257, "step": 21040 }, { "epoch": 2.9489838822704977, "grad_norm": 0.262844055891037, "learning_rate": 5.352786414733317e-06, "loss": 0.0155, "step": 21041 }, { "epoch": 2.949124036440084, "grad_norm": 0.04019749164581299, "learning_rate": 5.3384357809136565e-06, "loss": 0.0028, "step": 21042 }, { "epoch": 2.9492641906096706, "grad_norm": 0.3905564844608307, "learning_rate": 5.324085147093996e-06, "loss": 0.0444, "step": 21043 }, { "epoch": 2.949404344779257, "grad_norm": 0.04536590352654457, "learning_rate": 5.309734513274336e-06, "loss": 0.0096, "step": 21044 }, { "epoch": 2.9495444989488435, "grad_norm": 0.20009024441242218, "learning_rate": 5.295383879454675e-06, "loss": 0.0506, "step": 21045 }, { "epoch": 2.9496846531184304, "grad_norm": 0.3012358844280243, "learning_rate": 5.281033245635015e-06, "loss": 0.0456, "step": 21046 }, { "epoch": 2.949824807288017, "grad_norm": 0.2205495983362198, "learning_rate": 5.266682611815354e-06, "loss": 0.0215, "step": 21047 }, { "epoch": 2.9499649614576033, "grad_norm": 0.21878769993782043, "learning_rate": 5.252331977995694e-06, "loss": 0.0315, "step": 21048 }, { "epoch": 2.95010511562719, "grad_norm": 0.1739622801542282, "learning_rate": 5.2379813441760335e-06, "loss": 0.0106, "step": 21049 }, { "epoch": 2.9502452697967767, "grad_norm": 0.08818540722131729, "learning_rate": 5.223630710356373e-06, "loss": 0.0109, "step": 21050 }, { "epoch": 2.950385423966363, "grad_norm": 0.08541856706142426, "learning_rate": 5.209280076536713e-06, "loss": 0.014, "step": 21051 }, { "epoch": 2.9505255781359496, "grad_norm": 0.20288494229316711, "learning_rate": 5.194929442717053e-06, "loss": 0.0134, "step": 21052 }, { "epoch": 2.950665732305536, "grad_norm": 0.3345883786678314, "learning_rate": 5.180578808897393e-06, "loss": 0.0544, "step": 21053 }, { "epoch": 2.9508058864751225, "grad_norm": 0.18844570219516754, "learning_rate": 5.166228175077732e-06, "loss": 0.0161, "step": 21054 }, { "epoch": 2.950946040644709, "grad_norm": 0.06456686556339264, "learning_rate": 5.151877541258072e-06, "loss": 0.0041, "step": 21055 }, { "epoch": 2.9510861948142955, "grad_norm": 0.14287172257900238, "learning_rate": 5.137526907438411e-06, "loss": 0.0277, "step": 21056 }, { "epoch": 2.9512263489838824, "grad_norm": 0.29339340329170227, "learning_rate": 5.123176273618751e-06, "loss": 0.009, "step": 21057 }, { "epoch": 2.951366503153469, "grad_norm": 0.036791812628507614, "learning_rate": 5.1088256397990905e-06, "loss": 0.0022, "step": 21058 }, { "epoch": 2.9515066573230553, "grad_norm": 0.09941355139017105, "learning_rate": 5.09447500597943e-06, "loss": 0.0189, "step": 21059 }, { "epoch": 2.9516468114926417, "grad_norm": 0.193936288356781, "learning_rate": 5.08012437215977e-06, "loss": 0.0222, "step": 21060 }, { "epoch": 2.9517869656622286, "grad_norm": 0.24540333449840546, "learning_rate": 5.065773738340109e-06, "loss": 0.0199, "step": 21061 }, { "epoch": 2.951927119831815, "grad_norm": 0.18111878633499146, "learning_rate": 5.051423104520449e-06, "loss": 0.021, "step": 21062 }, { "epoch": 2.9520672740014016, "grad_norm": 0.20715396106243134, "learning_rate": 5.037072470700788e-06, "loss": 0.0663, "step": 21063 }, { "epoch": 2.952207428170988, "grad_norm": 0.1730603128671646, "learning_rate": 5.022721836881128e-06, "loss": 0.0097, "step": 21064 }, { "epoch": 2.9523475823405745, "grad_norm": 0.5647817254066467, "learning_rate": 5.0083712030614675e-06, "loss": 0.0167, "step": 21065 }, { "epoch": 2.952487736510161, "grad_norm": 0.45230671763420105, "learning_rate": 4.994020569241807e-06, "loss": 0.0523, "step": 21066 }, { "epoch": 2.952627890679748, "grad_norm": 1.4154618978500366, "learning_rate": 4.979669935422147e-06, "loss": 0.049, "step": 21067 }, { "epoch": 2.9527680448493343, "grad_norm": 0.04920990765094757, "learning_rate": 4.965319301602486e-06, "loss": 0.0029, "step": 21068 }, { "epoch": 2.9529081990189208, "grad_norm": 0.29503774642944336, "learning_rate": 4.950968667782826e-06, "loss": 0.0183, "step": 21069 }, { "epoch": 2.9530483531885072, "grad_norm": 0.24438603222370148, "learning_rate": 4.936618033963165e-06, "loss": 0.0072, "step": 21070 }, { "epoch": 2.953188507358094, "grad_norm": 0.3728465735912323, "learning_rate": 4.922267400143507e-06, "loss": 0.037, "step": 21071 }, { "epoch": 2.9533286615276806, "grad_norm": 0.057553790509700775, "learning_rate": 4.907916766323846e-06, "loss": 0.0034, "step": 21072 }, { "epoch": 2.953468815697267, "grad_norm": 0.11790420114994049, "learning_rate": 4.893566132504186e-06, "loss": 0.0171, "step": 21073 }, { "epoch": 2.9536089698668535, "grad_norm": 0.09027604758739471, "learning_rate": 4.879215498684525e-06, "loss": 0.0051, "step": 21074 }, { "epoch": 2.95374912403644, "grad_norm": 0.19138166308403015, "learning_rate": 4.864864864864865e-06, "loss": 0.0184, "step": 21075 }, { "epoch": 2.9538892782060264, "grad_norm": 0.2814255654811859, "learning_rate": 4.8505142310452045e-06, "loss": 0.0476, "step": 21076 }, { "epoch": 2.9540294323756133, "grad_norm": 0.08900079876184464, "learning_rate": 4.836163597225544e-06, "loss": 0.0165, "step": 21077 }, { "epoch": 2.9541695865452, "grad_norm": 0.12783344089984894, "learning_rate": 4.821812963405884e-06, "loss": 0.011, "step": 21078 }, { "epoch": 2.9543097407147862, "grad_norm": 0.07159767299890518, "learning_rate": 4.807462329586223e-06, "loss": 0.0079, "step": 21079 }, { "epoch": 2.9544498948843727, "grad_norm": 0.09055927395820618, "learning_rate": 4.793111695766563e-06, "loss": 0.0042, "step": 21080 }, { "epoch": 2.9545900490539596, "grad_norm": 0.21537242829799652, "learning_rate": 4.778761061946902e-06, "loss": 0.0115, "step": 21081 }, { "epoch": 2.954730203223546, "grad_norm": 0.10648280382156372, "learning_rate": 4.764410428127242e-06, "loss": 0.0293, "step": 21082 }, { "epoch": 2.9548703573931325, "grad_norm": 0.149746835231781, "learning_rate": 4.7500597943075815e-06, "loss": 0.0133, "step": 21083 }, { "epoch": 2.955010511562719, "grad_norm": 0.1368332803249359, "learning_rate": 4.735709160487921e-06, "loss": 0.011, "step": 21084 }, { "epoch": 2.9551506657323054, "grad_norm": 1.235548973083496, "learning_rate": 4.721358526668261e-06, "loss": 0.101, "step": 21085 }, { "epoch": 2.955290819901892, "grad_norm": 0.23924636840820312, "learning_rate": 4.7070078928486e-06, "loss": 0.0611, "step": 21086 }, { "epoch": 2.9554309740714784, "grad_norm": 0.07855143398046494, "learning_rate": 4.69265725902894e-06, "loss": 0.0086, "step": 21087 }, { "epoch": 2.9555711282410653, "grad_norm": 0.23724548518657684, "learning_rate": 4.678306625209279e-06, "loss": 0.0327, "step": 21088 }, { "epoch": 2.9557112824106517, "grad_norm": 0.16771820187568665, "learning_rate": 4.663955991389619e-06, "loss": 0.0384, "step": 21089 }, { "epoch": 2.955851436580238, "grad_norm": 0.2794395685195923, "learning_rate": 4.6496053575699585e-06, "loss": 0.0205, "step": 21090 }, { "epoch": 2.9559915907498246, "grad_norm": 0.09262455254793167, "learning_rate": 4.635254723750298e-06, "loss": 0.0026, "step": 21091 }, { "epoch": 2.9561317449194116, "grad_norm": 0.2059822827577591, "learning_rate": 4.620904089930638e-06, "loss": 0.0166, "step": 21092 }, { "epoch": 2.956271899088998, "grad_norm": 0.19926834106445312, "learning_rate": 4.606553456110977e-06, "loss": 0.0222, "step": 21093 }, { "epoch": 2.9564120532585845, "grad_norm": 0.4881141483783722, "learning_rate": 4.592202822291318e-06, "loss": 0.0277, "step": 21094 }, { "epoch": 2.956552207428171, "grad_norm": 0.03848414495587349, "learning_rate": 4.577852188471657e-06, "loss": 0.003, "step": 21095 }, { "epoch": 2.9566923615977574, "grad_norm": 0.21560364961624146, "learning_rate": 4.563501554651997e-06, "loss": 0.0371, "step": 21096 }, { "epoch": 2.956832515767344, "grad_norm": 0.4213297963142395, "learning_rate": 4.549150920832336e-06, "loss": 0.0545, "step": 21097 }, { "epoch": 2.9569726699369308, "grad_norm": 0.06178172677755356, "learning_rate": 4.534800287012676e-06, "loss": 0.0107, "step": 21098 }, { "epoch": 2.957112824106517, "grad_norm": 0.09656988084316254, "learning_rate": 4.5204496531930155e-06, "loss": 0.0055, "step": 21099 }, { "epoch": 2.9572529782761037, "grad_norm": 0.23314829170703888, "learning_rate": 4.506099019373355e-06, "loss": 0.0149, "step": 21100 }, { "epoch": 2.95739313244569, "grad_norm": 0.16913571953773499, "learning_rate": 4.491748385553695e-06, "loss": 0.0411, "step": 21101 }, { "epoch": 2.957533286615277, "grad_norm": 0.08227922022342682, "learning_rate": 4.477397751734034e-06, "loss": 0.0119, "step": 21102 }, { "epoch": 2.9576734407848635, "grad_norm": 0.14493155479431152, "learning_rate": 4.463047117914374e-06, "loss": 0.0038, "step": 21103 }, { "epoch": 2.95781359495445, "grad_norm": 0.18913684785366058, "learning_rate": 4.448696484094714e-06, "loss": 0.0423, "step": 21104 }, { "epoch": 2.9579537491240364, "grad_norm": 0.2183585911989212, "learning_rate": 4.434345850275054e-06, "loss": 0.0207, "step": 21105 }, { "epoch": 2.958093903293623, "grad_norm": 0.24108831584453583, "learning_rate": 4.419995216455393e-06, "loss": 0.0143, "step": 21106 }, { "epoch": 2.9582340574632093, "grad_norm": 0.5341862440109253, "learning_rate": 4.405644582635733e-06, "loss": 0.0259, "step": 21107 }, { "epoch": 2.9583742116327962, "grad_norm": 0.10886256396770477, "learning_rate": 4.3912939488160725e-06, "loss": 0.0101, "step": 21108 }, { "epoch": 2.9585143658023827, "grad_norm": 0.09370721131563187, "learning_rate": 4.376943314996412e-06, "loss": 0.0157, "step": 21109 }, { "epoch": 2.958654519971969, "grad_norm": 0.5097808241844177, "learning_rate": 4.362592681176752e-06, "loss": 0.0176, "step": 21110 }, { "epoch": 2.9587946741415556, "grad_norm": 0.18530426919460297, "learning_rate": 4.348242047357091e-06, "loss": 0.0092, "step": 21111 }, { "epoch": 2.9589348283111425, "grad_norm": 0.23256361484527588, "learning_rate": 4.333891413537431e-06, "loss": 0.022, "step": 21112 }, { "epoch": 2.959074982480729, "grad_norm": 0.223111093044281, "learning_rate": 4.31954077971777e-06, "loss": 0.0198, "step": 21113 }, { "epoch": 2.9592151366503154, "grad_norm": 0.11564124375581741, "learning_rate": 4.30519014589811e-06, "loss": 0.0093, "step": 21114 }, { "epoch": 2.959355290819902, "grad_norm": 0.34722933173179626, "learning_rate": 4.2908395120784495e-06, "loss": 0.0111, "step": 21115 }, { "epoch": 2.9594954449894884, "grad_norm": 0.17747914791107178, "learning_rate": 4.276488878258789e-06, "loss": 0.0322, "step": 21116 }, { "epoch": 2.959635599159075, "grad_norm": 0.0960494801402092, "learning_rate": 4.262138244439129e-06, "loss": 0.0105, "step": 21117 }, { "epoch": 2.9597757533286613, "grad_norm": 0.870903730392456, "learning_rate": 4.247787610619468e-06, "loss": 0.1474, "step": 21118 }, { "epoch": 2.959915907498248, "grad_norm": 2.2370543479919434, "learning_rate": 4.233436976799808e-06, "loss": 0.4678, "step": 21119 }, { "epoch": 2.9600560616678346, "grad_norm": 0.023159483447670937, "learning_rate": 4.219086342980147e-06, "loss": 0.0016, "step": 21120 }, { "epoch": 2.960196215837421, "grad_norm": 0.12340908497571945, "learning_rate": 4.204735709160487e-06, "loss": 0.0101, "step": 21121 }, { "epoch": 2.9603363700070076, "grad_norm": 0.463870644569397, "learning_rate": 4.190385075340827e-06, "loss": 0.0266, "step": 21122 }, { "epoch": 2.9604765241765945, "grad_norm": 0.34553825855255127, "learning_rate": 4.176034441521167e-06, "loss": 0.0283, "step": 21123 }, { "epoch": 2.960616678346181, "grad_norm": 0.10228119790554047, "learning_rate": 4.1616838077015065e-06, "loss": 0.0027, "step": 21124 }, { "epoch": 2.9607568325157674, "grad_norm": 0.07837118953466415, "learning_rate": 4.147333173881846e-06, "loss": 0.0062, "step": 21125 }, { "epoch": 2.960896986685354, "grad_norm": 0.3279400169849396, "learning_rate": 4.132982540062186e-06, "loss": 0.0451, "step": 21126 }, { "epoch": 2.9610371408549403, "grad_norm": 0.25230130553245544, "learning_rate": 4.118631906242525e-06, "loss": 0.0199, "step": 21127 }, { "epoch": 2.9611772950245268, "grad_norm": 0.10527370870113373, "learning_rate": 4.104281272422865e-06, "loss": 0.0244, "step": 21128 }, { "epoch": 2.9613174491941137, "grad_norm": 0.2723374664783478, "learning_rate": 4.089930638603204e-06, "loss": 0.0619, "step": 21129 }, { "epoch": 2.9614576033637, "grad_norm": 0.15058882534503937, "learning_rate": 4.075580004783544e-06, "loss": 0.0183, "step": 21130 }, { "epoch": 2.9615977575332866, "grad_norm": 0.15751156210899353, "learning_rate": 4.061229370963884e-06, "loss": 0.0055, "step": 21131 }, { "epoch": 2.961737911702873, "grad_norm": 0.12466356158256531, "learning_rate": 4.046878737144224e-06, "loss": 0.0231, "step": 21132 }, { "epoch": 2.96187806587246, "grad_norm": 0.15521079301834106, "learning_rate": 4.0325281033245635e-06, "loss": 0.0354, "step": 21133 }, { "epoch": 2.9620182200420464, "grad_norm": 0.05635342374444008, "learning_rate": 4.018177469504903e-06, "loss": 0.0033, "step": 21134 }, { "epoch": 2.962158374211633, "grad_norm": 0.2846575677394867, "learning_rate": 4.003826835685243e-06, "loss": 0.0201, "step": 21135 }, { "epoch": 2.9622985283812193, "grad_norm": 0.14820873737335205, "learning_rate": 3.989476201865582e-06, "loss": 0.0329, "step": 21136 }, { "epoch": 2.962438682550806, "grad_norm": 0.01651100255548954, "learning_rate": 3.975125568045922e-06, "loss": 0.0014, "step": 21137 }, { "epoch": 2.9625788367203922, "grad_norm": 0.08997229486703873, "learning_rate": 3.960774934226261e-06, "loss": 0.0097, "step": 21138 }, { "epoch": 2.962718990889979, "grad_norm": 0.0956442579627037, "learning_rate": 3.946424300406601e-06, "loss": 0.012, "step": 21139 }, { "epoch": 2.9628591450595656, "grad_norm": 0.3473750650882721, "learning_rate": 3.9320736665869405e-06, "loss": 0.0194, "step": 21140 }, { "epoch": 2.962999299229152, "grad_norm": 0.347017765045166, "learning_rate": 3.91772303276728e-06, "loss": 0.0183, "step": 21141 }, { "epoch": 2.9631394533987385, "grad_norm": 0.21526440978050232, "learning_rate": 3.90337239894762e-06, "loss": 0.019, "step": 21142 }, { "epoch": 2.9632796075683254, "grad_norm": 0.08299297094345093, "learning_rate": 3.889021765127959e-06, "loss": 0.0106, "step": 21143 }, { "epoch": 2.963419761737912, "grad_norm": 0.35823145508766174, "learning_rate": 3.874671131308299e-06, "loss": 0.09, "step": 21144 }, { "epoch": 2.9635599159074983, "grad_norm": 0.3212105333805084, "learning_rate": 3.860320497488638e-06, "loss": 0.0308, "step": 21145 }, { "epoch": 2.963700070077085, "grad_norm": 0.13644059002399445, "learning_rate": 3.845969863668978e-06, "loss": 0.0044, "step": 21146 }, { "epoch": 2.9638402242466713, "grad_norm": 0.09371822327375412, "learning_rate": 3.831619229849318e-06, "loss": 0.0174, "step": 21147 }, { "epoch": 2.9639803784162577, "grad_norm": 0.1725771129131317, "learning_rate": 3.817268596029658e-06, "loss": 0.0202, "step": 21148 }, { "epoch": 2.964120532585844, "grad_norm": 0.5339385867118835, "learning_rate": 3.802917962209997e-06, "loss": 0.0257, "step": 21149 }, { "epoch": 2.964260686755431, "grad_norm": 0.10800144821405411, "learning_rate": 3.788567328390337e-06, "loss": 0.0304, "step": 21150 }, { "epoch": 2.9644008409250175, "grad_norm": 0.054744333028793335, "learning_rate": 3.7742166945706766e-06, "loss": 0.0031, "step": 21151 }, { "epoch": 2.964540995094604, "grad_norm": 0.0457860492169857, "learning_rate": 3.759866060751016e-06, "loss": 0.0048, "step": 21152 }, { "epoch": 2.9646811492641905, "grad_norm": 0.0574570968747139, "learning_rate": 3.7455154269313557e-06, "loss": 0.0041, "step": 21153 }, { "epoch": 2.9648213034337774, "grad_norm": 0.14818532764911652, "learning_rate": 3.7311647931116953e-06, "loss": 0.0069, "step": 21154 }, { "epoch": 2.964961457603364, "grad_norm": 0.08829306066036224, "learning_rate": 3.716814159292035e-06, "loss": 0.0142, "step": 21155 }, { "epoch": 2.9651016117729503, "grad_norm": 0.07578791677951813, "learning_rate": 3.7024635254723744e-06, "loss": 0.006, "step": 21156 }, { "epoch": 2.9652417659425367, "grad_norm": 0.04134010896086693, "learning_rate": 3.688112891652714e-06, "loss": 0.0022, "step": 21157 }, { "epoch": 2.965381920112123, "grad_norm": 0.08086366951465607, "learning_rate": 3.6737622578330536e-06, "loss": 0.0142, "step": 21158 }, { "epoch": 2.9655220742817097, "grad_norm": 0.05936136841773987, "learning_rate": 3.659411624013394e-06, "loss": 0.007, "step": 21159 }, { "epoch": 2.9656622284512966, "grad_norm": 0.2730533182621002, "learning_rate": 3.6450609901937336e-06, "loss": 0.0184, "step": 21160 }, { "epoch": 2.965802382620883, "grad_norm": 0.2608994245529175, "learning_rate": 3.630710356374073e-06, "loss": 0.0182, "step": 21161 }, { "epoch": 2.9659425367904695, "grad_norm": 0.16505511105060577, "learning_rate": 3.6163597225544127e-06, "loss": 0.005, "step": 21162 }, { "epoch": 2.966082690960056, "grad_norm": 0.00855859275907278, "learning_rate": 3.6020090887347523e-06, "loss": 0.0007, "step": 21163 }, { "epoch": 2.966222845129643, "grad_norm": 0.5090882778167725, "learning_rate": 3.587658454915092e-06, "loss": 0.0544, "step": 21164 }, { "epoch": 2.9663629992992293, "grad_norm": 0.7754547595977783, "learning_rate": 3.5733078210954314e-06, "loss": 0.0114, "step": 21165 }, { "epoch": 2.9665031534688158, "grad_norm": 0.2170303910970688, "learning_rate": 3.558957187275771e-06, "loss": 0.0614, "step": 21166 }, { "epoch": 2.9666433076384022, "grad_norm": 0.669796347618103, "learning_rate": 3.5446065534561106e-06, "loss": 0.0094, "step": 21167 }, { "epoch": 2.9667834618079887, "grad_norm": 0.6994901299476624, "learning_rate": 3.53025591963645e-06, "loss": 0.0144, "step": 21168 }, { "epoch": 2.966923615977575, "grad_norm": 0.37669000029563904, "learning_rate": 3.51590528581679e-06, "loss": 0.0921, "step": 21169 }, { "epoch": 2.9670637701471616, "grad_norm": 0.6713768243789673, "learning_rate": 3.5015546519971297e-06, "loss": 0.1055, "step": 21170 }, { "epoch": 2.9672039243167485, "grad_norm": 0.20481841266155243, "learning_rate": 3.4872040181774693e-06, "loss": 0.0346, "step": 21171 }, { "epoch": 2.967344078486335, "grad_norm": 0.20191913843154907, "learning_rate": 3.472853384357809e-06, "loss": 0.0474, "step": 21172 }, { "epoch": 2.9674842326559214, "grad_norm": 0.12157867103815079, "learning_rate": 3.4585027505381484e-06, "loss": 0.0121, "step": 21173 }, { "epoch": 2.9676243868255083, "grad_norm": 0.01615927927196026, "learning_rate": 3.444152116718488e-06, "loss": 0.0011, "step": 21174 }, { "epoch": 2.967764540995095, "grad_norm": 0.12683695554733276, "learning_rate": 3.4298014828988276e-06, "loss": 0.0139, "step": 21175 }, { "epoch": 2.9679046951646813, "grad_norm": 0.2118932157754898, "learning_rate": 3.415450849079167e-06, "loss": 0.0132, "step": 21176 }, { "epoch": 2.9680448493342677, "grad_norm": 0.17958365380764008, "learning_rate": 3.4011002152595067e-06, "loss": 0.0442, "step": 21177 }, { "epoch": 2.968185003503854, "grad_norm": 0.20443286001682281, "learning_rate": 3.3867495814398467e-06, "loss": 0.0278, "step": 21178 }, { "epoch": 2.9683251576734406, "grad_norm": 0.1908264011144638, "learning_rate": 3.3723989476201863e-06, "loss": 0.0257, "step": 21179 }, { "epoch": 2.968465311843027, "grad_norm": 0.09006085246801376, "learning_rate": 3.358048313800526e-06, "loss": 0.0269, "step": 21180 }, { "epoch": 2.968605466012614, "grad_norm": 0.3181600570678711, "learning_rate": 3.3436976799808654e-06, "loss": 0.0253, "step": 21181 }, { "epoch": 2.9687456201822005, "grad_norm": 0.16416595876216888, "learning_rate": 3.329347046161205e-06, "loss": 0.0102, "step": 21182 }, { "epoch": 2.968885774351787, "grad_norm": 0.10876847058534622, "learning_rate": 3.3149964123415446e-06, "loss": 0.0197, "step": 21183 }, { "epoch": 2.9690259285213734, "grad_norm": 0.17660491168498993, "learning_rate": 3.300645778521884e-06, "loss": 0.0163, "step": 21184 }, { "epoch": 2.9691660826909603, "grad_norm": 0.08246561139822006, "learning_rate": 3.2862951447022237e-06, "loss": 0.0086, "step": 21185 }, { "epoch": 2.9693062368605467, "grad_norm": 0.18474839627742767, "learning_rate": 3.2719445108825633e-06, "loss": 0.0479, "step": 21186 }, { "epoch": 2.969446391030133, "grad_norm": 0.31320396065711975, "learning_rate": 3.2575938770629037e-06, "loss": 0.0675, "step": 21187 }, { "epoch": 2.9695865451997197, "grad_norm": 0.11197633296251297, "learning_rate": 3.2432432432432433e-06, "loss": 0.0124, "step": 21188 }, { "epoch": 2.969726699369306, "grad_norm": 0.05305375158786774, "learning_rate": 3.228892609423583e-06, "loss": 0.0065, "step": 21189 }, { "epoch": 2.9698668535388926, "grad_norm": 0.31757596135139465, "learning_rate": 3.2145419756039224e-06, "loss": 0.0538, "step": 21190 }, { "epoch": 2.9700070077084795, "grad_norm": 0.1560099720954895, "learning_rate": 3.200191341784262e-06, "loss": 0.0303, "step": 21191 }, { "epoch": 2.970147161878066, "grad_norm": 0.2689273953437805, "learning_rate": 3.1858407079646016e-06, "loss": 0.0148, "step": 21192 }, { "epoch": 2.9702873160476524, "grad_norm": 0.04956325888633728, "learning_rate": 3.171490074144941e-06, "loss": 0.0029, "step": 21193 }, { "epoch": 2.970427470217239, "grad_norm": 0.1273495852947235, "learning_rate": 3.1571394403252807e-06, "loss": 0.0151, "step": 21194 }, { "epoch": 2.9705676243868258, "grad_norm": 0.2906442880630493, "learning_rate": 3.1427888065056203e-06, "loss": 0.0348, "step": 21195 }, { "epoch": 2.970707778556412, "grad_norm": 0.11216489970684052, "learning_rate": 3.1284381726859603e-06, "loss": 0.0067, "step": 21196 }, { "epoch": 2.9708479327259987, "grad_norm": 0.05651843920350075, "learning_rate": 3.1140875388663e-06, "loss": 0.0057, "step": 21197 }, { "epoch": 2.970988086895585, "grad_norm": 0.26865845918655396, "learning_rate": 3.0997369050466394e-06, "loss": 0.0145, "step": 21198 }, { "epoch": 2.9711282410651716, "grad_norm": 0.10654281079769135, "learning_rate": 3.085386271226979e-06, "loss": 0.0064, "step": 21199 }, { "epoch": 2.971268395234758, "grad_norm": 0.18553903698921204, "learning_rate": 3.0710356374073186e-06, "loss": 0.0255, "step": 21200 }, { "epoch": 2.9714085494043445, "grad_norm": 0.2749457061290741, "learning_rate": 3.056685003587658e-06, "loss": 0.0313, "step": 21201 }, { "epoch": 2.9715487035739314, "grad_norm": 0.2068312168121338, "learning_rate": 3.0423343697679977e-06, "loss": 0.0119, "step": 21202 }, { "epoch": 2.971688857743518, "grad_norm": 0.42408275604248047, "learning_rate": 3.0279837359483373e-06, "loss": 0.0464, "step": 21203 }, { "epoch": 2.9718290119131043, "grad_norm": 0.2625313997268677, "learning_rate": 3.013633102128677e-06, "loss": 0.049, "step": 21204 }, { "epoch": 2.971969166082691, "grad_norm": 0.15315940976142883, "learning_rate": 2.9992824683090164e-06, "loss": 0.0158, "step": 21205 }, { "epoch": 2.9721093202522777, "grad_norm": 0.23790588974952698, "learning_rate": 2.9849318344893564e-06, "loss": 0.0157, "step": 21206 }, { "epoch": 2.972249474421864, "grad_norm": 0.18927103281021118, "learning_rate": 2.970581200669696e-06, "loss": 0.0529, "step": 21207 }, { "epoch": 2.9723896285914506, "grad_norm": 0.16035114228725433, "learning_rate": 2.9562305668500356e-06, "loss": 0.0169, "step": 21208 }, { "epoch": 2.972529782761037, "grad_norm": 0.11114153265953064, "learning_rate": 2.941879933030375e-06, "loss": 0.0174, "step": 21209 }, { "epoch": 2.9726699369306235, "grad_norm": 0.3049420118331909, "learning_rate": 2.9275292992107147e-06, "loss": 0.0421, "step": 21210 }, { "epoch": 2.97281009110021, "grad_norm": 0.006898100487887859, "learning_rate": 2.9131786653910543e-06, "loss": 0.0007, "step": 21211 }, { "epoch": 2.972950245269797, "grad_norm": 0.4988664984703064, "learning_rate": 2.898828031571394e-06, "loss": 0.079, "step": 21212 }, { "epoch": 2.9730903994393834, "grad_norm": 0.5038439035415649, "learning_rate": 2.8844773977517334e-06, "loss": 0.0535, "step": 21213 }, { "epoch": 2.97323055360897, "grad_norm": 0.31087058782577515, "learning_rate": 2.870126763932073e-06, "loss": 0.0182, "step": 21214 }, { "epoch": 2.9733707077785563, "grad_norm": 0.1631285548210144, "learning_rate": 2.8557761301124134e-06, "loss": 0.0085, "step": 21215 }, { "epoch": 2.973510861948143, "grad_norm": 0.05021699145436287, "learning_rate": 2.841425496292753e-06, "loss": 0.0045, "step": 21216 }, { "epoch": 2.9736510161177296, "grad_norm": 0.9280708432197571, "learning_rate": 2.8270748624730925e-06, "loss": 0.0167, "step": 21217 }, { "epoch": 2.973791170287316, "grad_norm": 0.6436378955841064, "learning_rate": 2.812724228653432e-06, "loss": 0.1539, "step": 21218 }, { "epoch": 2.9739313244569026, "grad_norm": 0.02286190167069435, "learning_rate": 2.7983735948337717e-06, "loss": 0.0015, "step": 21219 }, { "epoch": 2.974071478626489, "grad_norm": 0.34060630202293396, "learning_rate": 2.7840229610141113e-06, "loss": 0.0462, "step": 21220 }, { "epoch": 2.9742116327960755, "grad_norm": 0.15698304772377014, "learning_rate": 2.769672327194451e-06, "loss": 0.0312, "step": 21221 }, { "epoch": 2.9743517869656624, "grad_norm": 0.18912239372730255, "learning_rate": 2.7553216933747904e-06, "loss": 0.0247, "step": 21222 }, { "epoch": 2.974491941135249, "grad_norm": 0.18822844326496124, "learning_rate": 2.74097105955513e-06, "loss": 0.0383, "step": 21223 }, { "epoch": 2.9746320953048353, "grad_norm": 0.12350863218307495, "learning_rate": 2.72662042573547e-06, "loss": 0.0126, "step": 21224 }, { "epoch": 2.9747722494744218, "grad_norm": 0.1239120289683342, "learning_rate": 2.7122697919158095e-06, "loss": 0.0069, "step": 21225 }, { "epoch": 2.9749124036440087, "grad_norm": 0.03043368086218834, "learning_rate": 2.697919158096149e-06, "loss": 0.0055, "step": 21226 }, { "epoch": 2.975052557813595, "grad_norm": 0.14320045709609985, "learning_rate": 2.6835685242764887e-06, "loss": 0.0126, "step": 21227 }, { "epoch": 2.9751927119831816, "grad_norm": 0.1972735971212387, "learning_rate": 2.6692178904568283e-06, "loss": 0.0165, "step": 21228 }, { "epoch": 2.975332866152768, "grad_norm": 0.4378927946090698, "learning_rate": 2.654867256637168e-06, "loss": 0.0173, "step": 21229 }, { "epoch": 2.9754730203223545, "grad_norm": 0.17313575744628906, "learning_rate": 2.6405166228175074e-06, "loss": 0.0119, "step": 21230 }, { "epoch": 2.975613174491941, "grad_norm": 0.3695804178714752, "learning_rate": 2.626165988997847e-06, "loss": 0.0159, "step": 21231 }, { "epoch": 2.9757533286615274, "grad_norm": 0.10442319512367249, "learning_rate": 2.6118153551781865e-06, "loss": 0.0276, "step": 21232 }, { "epoch": 2.9758934828311143, "grad_norm": 0.08686045557260513, "learning_rate": 2.5974647213585265e-06, "loss": 0.0076, "step": 21233 }, { "epoch": 2.976033637000701, "grad_norm": 0.15557065606117249, "learning_rate": 2.583114087538866e-06, "loss": 0.0437, "step": 21234 }, { "epoch": 2.9761737911702872, "grad_norm": 0.09110182523727417, "learning_rate": 2.5687634537192057e-06, "loss": 0.0123, "step": 21235 }, { "epoch": 2.9763139453398737, "grad_norm": 0.13279235363006592, "learning_rate": 2.5544128198995453e-06, "loss": 0.009, "step": 21236 }, { "epoch": 2.9764540995094606, "grad_norm": 0.1045272946357727, "learning_rate": 2.540062186079885e-06, "loss": 0.0132, "step": 21237 }, { "epoch": 2.976594253679047, "grad_norm": 0.4879589378833771, "learning_rate": 2.5257115522602244e-06, "loss": 0.0385, "step": 21238 }, { "epoch": 2.9767344078486335, "grad_norm": 0.4535438120365143, "learning_rate": 2.511360918440564e-06, "loss": 0.0265, "step": 21239 }, { "epoch": 2.97687456201822, "grad_norm": 0.1670013666152954, "learning_rate": 2.4970102846209035e-06, "loss": 0.0199, "step": 21240 }, { "epoch": 2.9770147161878064, "grad_norm": 0.08886120468378067, "learning_rate": 2.482659650801243e-06, "loss": 0.0071, "step": 21241 }, { "epoch": 2.977154870357393, "grad_norm": 0.12215469032526016, "learning_rate": 2.4683090169815827e-06, "loss": 0.0065, "step": 21242 }, { "epoch": 2.97729502452698, "grad_norm": 0.0543903149664402, "learning_rate": 2.453958383161923e-06, "loss": 0.0078, "step": 21243 }, { "epoch": 2.9774351786965663, "grad_norm": 0.1725052148103714, "learning_rate": 2.4396077493422627e-06, "loss": 0.0447, "step": 21244 }, { "epoch": 2.9775753328661527, "grad_norm": 0.1441146433353424, "learning_rate": 2.4252571155226022e-06, "loss": 0.0101, "step": 21245 }, { "epoch": 2.977715487035739, "grad_norm": 0.3440361022949219, "learning_rate": 2.410906481702942e-06, "loss": 0.0211, "step": 21246 }, { "epoch": 2.977855641205326, "grad_norm": 0.3911580741405487, "learning_rate": 2.3965558478832814e-06, "loss": 0.0714, "step": 21247 }, { "epoch": 2.9779957953749125, "grad_norm": 0.2645511031150818, "learning_rate": 2.382205214063621e-06, "loss": 0.0086, "step": 21248 }, { "epoch": 2.978135949544499, "grad_norm": 0.04280121996998787, "learning_rate": 2.3678545802439605e-06, "loss": 0.0034, "step": 21249 }, { "epoch": 2.9782761037140855, "grad_norm": 0.15920425951480865, "learning_rate": 2.3535039464243e-06, "loss": 0.0224, "step": 21250 }, { "epoch": 2.978416257883672, "grad_norm": 0.4336516261100769, "learning_rate": 2.3391533126046397e-06, "loss": 0.0482, "step": 21251 }, { "epoch": 2.9785564120532584, "grad_norm": 0.039143770933151245, "learning_rate": 2.3248026787849792e-06, "loss": 0.0046, "step": 21252 }, { "epoch": 2.9786965662228453, "grad_norm": 0.6950653195381165, "learning_rate": 2.310452044965319e-06, "loss": 0.0808, "step": 21253 }, { "epoch": 2.9788367203924317, "grad_norm": 0.03702020272612572, "learning_rate": 2.296101411145659e-06, "loss": 0.0016, "step": 21254 }, { "epoch": 2.978976874562018, "grad_norm": 0.11000578850507736, "learning_rate": 2.2817507773259984e-06, "loss": 0.0097, "step": 21255 }, { "epoch": 2.9791170287316047, "grad_norm": 0.08814690262079239, "learning_rate": 2.267400143506338e-06, "loss": 0.0066, "step": 21256 }, { "epoch": 2.9792571829011916, "grad_norm": 0.1446048766374588, "learning_rate": 2.2530495096866775e-06, "loss": 0.0113, "step": 21257 }, { "epoch": 2.979397337070778, "grad_norm": 0.028340548276901245, "learning_rate": 2.238698875867017e-06, "loss": 0.001, "step": 21258 }, { "epoch": 2.9795374912403645, "grad_norm": 0.07785959541797638, "learning_rate": 2.224348242047357e-06, "loss": 0.0071, "step": 21259 }, { "epoch": 2.979677645409951, "grad_norm": 0.24910004436969757, "learning_rate": 2.2099976082276967e-06, "loss": 0.0154, "step": 21260 }, { "epoch": 2.9798177995795374, "grad_norm": 1.5766299962997437, "learning_rate": 2.1956469744080362e-06, "loss": 0.1005, "step": 21261 }, { "epoch": 2.979957953749124, "grad_norm": 0.3562525510787964, "learning_rate": 2.181296340588376e-06, "loss": 0.0675, "step": 21262 }, { "epoch": 2.9800981079187103, "grad_norm": 0.22078558802604675, "learning_rate": 2.1669457067687154e-06, "loss": 0.0323, "step": 21263 }, { "epoch": 2.9802382620882972, "grad_norm": 0.26065829396247864, "learning_rate": 2.152595072949055e-06, "loss": 0.0148, "step": 21264 }, { "epoch": 2.9803784162578837, "grad_norm": 0.18954956531524658, "learning_rate": 2.1382444391293945e-06, "loss": 0.0076, "step": 21265 }, { "epoch": 2.98051857042747, "grad_norm": 0.704615592956543, "learning_rate": 2.123893805309734e-06, "loss": 0.0202, "step": 21266 }, { "epoch": 2.9806587245970566, "grad_norm": 0.4070110321044922, "learning_rate": 2.1095431714900737e-06, "loss": 0.0789, "step": 21267 }, { "epoch": 2.9807988787666435, "grad_norm": 1.0493751764297485, "learning_rate": 2.0951925376704137e-06, "loss": 0.0793, "step": 21268 }, { "epoch": 2.98093903293623, "grad_norm": 0.14510242640972137, "learning_rate": 2.0808419038507532e-06, "loss": 0.0108, "step": 21269 }, { "epoch": 2.9810791871058164, "grad_norm": 1.8625648021697998, "learning_rate": 2.066491270031093e-06, "loss": 0.0589, "step": 21270 }, { "epoch": 2.981219341275403, "grad_norm": 0.09420888870954514, "learning_rate": 2.0521406362114324e-06, "loss": 0.0089, "step": 21271 }, { "epoch": 2.9813594954449893, "grad_norm": 0.15646785497665405, "learning_rate": 2.037790002391772e-06, "loss": 0.0227, "step": 21272 }, { "epoch": 2.981499649614576, "grad_norm": 0.2971125841140747, "learning_rate": 2.023439368572112e-06, "loss": 0.0716, "step": 21273 }, { "epoch": 2.9816398037841627, "grad_norm": 0.8802768588066101, "learning_rate": 2.0090887347524515e-06, "loss": 0.0774, "step": 21274 }, { "epoch": 2.981779957953749, "grad_norm": 0.120146244764328, "learning_rate": 1.994738100932791e-06, "loss": 0.0082, "step": 21275 }, { "epoch": 2.9819201121233356, "grad_norm": 0.2393527626991272, "learning_rate": 1.9803874671131307e-06, "loss": 0.0176, "step": 21276 }, { "epoch": 2.982060266292922, "grad_norm": 0.18858370184898376, "learning_rate": 1.9660368332934702e-06, "loss": 0.0153, "step": 21277 }, { "epoch": 2.982200420462509, "grad_norm": 0.2839561402797699, "learning_rate": 1.95168619947381e-06, "loss": 0.0292, "step": 21278 }, { "epoch": 2.9823405746320955, "grad_norm": 0.2131810337305069, "learning_rate": 1.9373355656541494e-06, "loss": 0.0108, "step": 21279 }, { "epoch": 2.982480728801682, "grad_norm": 0.04391753301024437, "learning_rate": 1.922984931834489e-06, "loss": 0.0029, "step": 21280 }, { "epoch": 2.9826208829712684, "grad_norm": 0.33264344930648804, "learning_rate": 1.908634298014829e-06, "loss": 0.0608, "step": 21281 }, { "epoch": 2.982761037140855, "grad_norm": 0.13141390681266785, "learning_rate": 1.8942836641951685e-06, "loss": 0.0228, "step": 21282 }, { "epoch": 2.9829011913104413, "grad_norm": 0.1918085515499115, "learning_rate": 1.879933030375508e-06, "loss": 0.0096, "step": 21283 }, { "epoch": 2.983041345480028, "grad_norm": 0.152059406042099, "learning_rate": 1.8655823965558477e-06, "loss": 0.0314, "step": 21284 }, { "epoch": 2.9831814996496147, "grad_norm": 0.17291635274887085, "learning_rate": 1.8512317627361872e-06, "loss": 0.0386, "step": 21285 }, { "epoch": 2.983321653819201, "grad_norm": 0.05877182260155678, "learning_rate": 1.8368811289165268e-06, "loss": 0.0035, "step": 21286 }, { "epoch": 2.9834618079887876, "grad_norm": 0.13336123526096344, "learning_rate": 1.8225304950968668e-06, "loss": 0.0142, "step": 21287 }, { "epoch": 2.9836019621583745, "grad_norm": 0.2553093731403351, "learning_rate": 1.8081798612772064e-06, "loss": 0.0566, "step": 21288 }, { "epoch": 2.983742116327961, "grad_norm": 0.15615487098693848, "learning_rate": 1.793829227457546e-06, "loss": 0.0256, "step": 21289 }, { "epoch": 2.9838822704975474, "grad_norm": 0.19886676967144012, "learning_rate": 1.7794785936378855e-06, "loss": 0.0665, "step": 21290 }, { "epoch": 2.984022424667134, "grad_norm": 0.03692477568984032, "learning_rate": 1.765127959818225e-06, "loss": 0.0052, "step": 21291 }, { "epoch": 2.9841625788367203, "grad_norm": 0.2963419556617737, "learning_rate": 1.7507773259985649e-06, "loss": 0.0457, "step": 21292 }, { "epoch": 2.9843027330063068, "grad_norm": 0.2805601954460144, "learning_rate": 1.7364266921789044e-06, "loss": 0.0425, "step": 21293 }, { "epoch": 2.9844428871758932, "grad_norm": 0.6528148651123047, "learning_rate": 1.722076058359244e-06, "loss": 0.0406, "step": 21294 }, { "epoch": 2.98458304134548, "grad_norm": 0.2736854553222656, "learning_rate": 1.7077254245395836e-06, "loss": 0.0068, "step": 21295 }, { "epoch": 2.9847231955150666, "grad_norm": 0.06643366068601608, "learning_rate": 1.6933747907199234e-06, "loss": 0.0037, "step": 21296 }, { "epoch": 2.984863349684653, "grad_norm": 0.710904061794281, "learning_rate": 1.679024156900263e-06, "loss": 0.0413, "step": 21297 }, { "epoch": 2.9850035038542395, "grad_norm": 0.231459379196167, "learning_rate": 1.6646735230806025e-06, "loss": 0.0232, "step": 21298 }, { "epoch": 2.9851436580238264, "grad_norm": 0.051056526601314545, "learning_rate": 1.650322889260942e-06, "loss": 0.0029, "step": 21299 }, { "epoch": 2.985283812193413, "grad_norm": 0.03607397899031639, "learning_rate": 1.6359722554412816e-06, "loss": 0.002, "step": 21300 }, { "epoch": 2.9854239663629993, "grad_norm": 0.14366395771503448, "learning_rate": 1.6216216216216216e-06, "loss": 0.0094, "step": 21301 }, { "epoch": 2.985564120532586, "grad_norm": 0.22030656039714813, "learning_rate": 1.6072709878019612e-06, "loss": 0.0211, "step": 21302 }, { "epoch": 2.9857042747021723, "grad_norm": 0.5090020895004272, "learning_rate": 1.5929203539823008e-06, "loss": 0.024, "step": 21303 }, { "epoch": 2.9858444288717587, "grad_norm": 0.1953844577074051, "learning_rate": 1.5785697201626404e-06, "loss": 0.0234, "step": 21304 }, { "epoch": 2.9859845830413456, "grad_norm": 0.12596209347248077, "learning_rate": 1.5642190863429801e-06, "loss": 0.0037, "step": 21305 }, { "epoch": 2.986124737210932, "grad_norm": 0.22437943518161774, "learning_rate": 1.5498684525233197e-06, "loss": 0.0233, "step": 21306 }, { "epoch": 2.9862648913805185, "grad_norm": 0.09167762845754623, "learning_rate": 1.5355178187036593e-06, "loss": 0.0065, "step": 21307 }, { "epoch": 2.986405045550105, "grad_norm": 0.05860983580350876, "learning_rate": 1.5211671848839989e-06, "loss": 0.0028, "step": 21308 }, { "epoch": 2.986545199719692, "grad_norm": 0.11762921512126923, "learning_rate": 1.5068165510643384e-06, "loss": 0.0271, "step": 21309 }, { "epoch": 2.9866853538892784, "grad_norm": 0.022226884961128235, "learning_rate": 1.4924659172446782e-06, "loss": 0.0022, "step": 21310 }, { "epoch": 2.986825508058865, "grad_norm": 0.03915843367576599, "learning_rate": 1.4781152834250178e-06, "loss": 0.0021, "step": 21311 }, { "epoch": 2.9869656622284513, "grad_norm": 0.09635857492685318, "learning_rate": 1.4637646496053574e-06, "loss": 0.0047, "step": 21312 }, { "epoch": 2.9871058163980377, "grad_norm": 0.051178429275751114, "learning_rate": 1.449414015785697e-06, "loss": 0.0037, "step": 21313 }, { "epoch": 2.987245970567624, "grad_norm": 0.19735737144947052, "learning_rate": 1.4350633819660365e-06, "loss": 0.0386, "step": 21314 }, { "epoch": 2.9873861247372107, "grad_norm": 0.14851321280002594, "learning_rate": 1.4207127481463765e-06, "loss": 0.0079, "step": 21315 }, { "epoch": 2.9875262789067976, "grad_norm": 0.516215443611145, "learning_rate": 1.406362114326716e-06, "loss": 0.0962, "step": 21316 }, { "epoch": 2.987666433076384, "grad_norm": 0.7529941201210022, "learning_rate": 1.3920114805070556e-06, "loss": 0.0861, "step": 21317 }, { "epoch": 2.9878065872459705, "grad_norm": 0.7254121899604797, "learning_rate": 1.3776608466873952e-06, "loss": 0.1756, "step": 21318 }, { "epoch": 2.9879467414155574, "grad_norm": 2.3045730590820312, "learning_rate": 1.363310212867735e-06, "loss": 0.122, "step": 21319 }, { "epoch": 2.988086895585144, "grad_norm": 4.316004753112793, "learning_rate": 1.3489595790480746e-06, "loss": 0.14, "step": 21320 }, { "epoch": 2.9882270497547303, "grad_norm": 0.09581112861633301, "learning_rate": 1.3346089452284141e-06, "loss": 0.0219, "step": 21321 }, { "epoch": 2.9883672039243168, "grad_norm": 0.2235007882118225, "learning_rate": 1.3202583114087537e-06, "loss": 0.0186, "step": 21322 }, { "epoch": 2.988507358093903, "grad_norm": 0.010522022843360901, "learning_rate": 1.3059076775890933e-06, "loss": 0.0012, "step": 21323 }, { "epoch": 2.9886475122634897, "grad_norm": 0.2181089222431183, "learning_rate": 1.291557043769433e-06, "loss": 0.0139, "step": 21324 }, { "epoch": 2.988787666433076, "grad_norm": 0.16764657199382782, "learning_rate": 1.2772064099497726e-06, "loss": 0.014, "step": 21325 }, { "epoch": 2.988927820602663, "grad_norm": 1.1151686906814575, "learning_rate": 1.2628557761301122e-06, "loss": 0.0912, "step": 21326 }, { "epoch": 2.9890679747722495, "grad_norm": 0.20999130606651306, "learning_rate": 1.2485051423104518e-06, "loss": 0.0318, "step": 21327 }, { "epoch": 2.989208128941836, "grad_norm": 0.04904772341251373, "learning_rate": 1.2341545084907913e-06, "loss": 0.0052, "step": 21328 }, { "epoch": 2.9893482831114224, "grad_norm": 0.2710166573524475, "learning_rate": 1.2198038746711313e-06, "loss": 0.0395, "step": 21329 }, { "epoch": 2.9894884372810093, "grad_norm": 0.1824885606765747, "learning_rate": 1.205453240851471e-06, "loss": 0.0268, "step": 21330 }, { "epoch": 2.989628591450596, "grad_norm": 0.1584746390581131, "learning_rate": 1.1911026070318105e-06, "loss": 0.0279, "step": 21331 }, { "epoch": 2.9897687456201822, "grad_norm": 0.1529160737991333, "learning_rate": 1.17675197321215e-06, "loss": 0.0258, "step": 21332 }, { "epoch": 2.9899088997897687, "grad_norm": 0.08004710078239441, "learning_rate": 1.1624013393924896e-06, "loss": 0.0055, "step": 21333 }, { "epoch": 2.990049053959355, "grad_norm": 0.10253626108169556, "learning_rate": 1.1480507055728294e-06, "loss": 0.0083, "step": 21334 }, { "epoch": 2.9901892081289416, "grad_norm": 0.2089439183473587, "learning_rate": 1.133700071753169e-06, "loss": 0.0068, "step": 21335 }, { "epoch": 2.9903293622985285, "grad_norm": 1.4994295835494995, "learning_rate": 1.1193494379335085e-06, "loss": 0.0809, "step": 21336 }, { "epoch": 2.990469516468115, "grad_norm": 0.08568329364061356, "learning_rate": 1.1049988041138483e-06, "loss": 0.0151, "step": 21337 }, { "epoch": 2.9906096706377014, "grad_norm": 0.052259791642427444, "learning_rate": 1.090648170294188e-06, "loss": 0.0049, "step": 21338 }, { "epoch": 2.990749824807288, "grad_norm": 0.2366602122783661, "learning_rate": 1.0762975364745275e-06, "loss": 0.0199, "step": 21339 }, { "epoch": 2.990889978976875, "grad_norm": 0.20673762261867523, "learning_rate": 1.061946902654867e-06, "loss": 0.0403, "step": 21340 }, { "epoch": 2.9910301331464613, "grad_norm": 0.05970551073551178, "learning_rate": 1.0475962688352068e-06, "loss": 0.0031, "step": 21341 }, { "epoch": 2.9911702873160477, "grad_norm": 0.09055374562740326, "learning_rate": 1.0332456350155464e-06, "loss": 0.012, "step": 21342 }, { "epoch": 2.991310441485634, "grad_norm": 0.05353236943483353, "learning_rate": 1.018895001195886e-06, "loss": 0.0009, "step": 21343 }, { "epoch": 2.9914505956552206, "grad_norm": 0.20226335525512695, "learning_rate": 1.0045443673762258e-06, "loss": 0.0185, "step": 21344 }, { "epoch": 2.991590749824807, "grad_norm": 0.14523659646511078, "learning_rate": 9.901937335565653e-07, "loss": 0.0277, "step": 21345 }, { "epoch": 2.9917309039943936, "grad_norm": 0.08523275703191757, "learning_rate": 9.75843099736905e-07, "loss": 0.0064, "step": 21346 }, { "epoch": 2.9918710581639805, "grad_norm": 0.16134516894817352, "learning_rate": 9.614924659172445e-07, "loss": 0.0076, "step": 21347 }, { "epoch": 2.992011212333567, "grad_norm": 0.081428661942482, "learning_rate": 9.471418320975843e-07, "loss": 0.007, "step": 21348 }, { "epoch": 2.9921513665031534, "grad_norm": 0.1241818368434906, "learning_rate": 9.327911982779238e-07, "loss": 0.025, "step": 21349 }, { "epoch": 2.9922915206727403, "grad_norm": 0.14690424501895905, "learning_rate": 9.184405644582634e-07, "loss": 0.0093, "step": 21350 }, { "epoch": 2.9924316748423267, "grad_norm": 1.0814913511276245, "learning_rate": 9.040899306386032e-07, "loss": 0.0431, "step": 21351 }, { "epoch": 2.992571829011913, "grad_norm": 0.10704734176397324, "learning_rate": 8.897392968189428e-07, "loss": 0.0089, "step": 21352 }, { "epoch": 2.9927119831814997, "grad_norm": 0.35222530364990234, "learning_rate": 8.753886629992824e-07, "loss": 0.0514, "step": 21353 }, { "epoch": 2.992852137351086, "grad_norm": 0.1267102211713791, "learning_rate": 8.61038029179622e-07, "loss": 0.0112, "step": 21354 }, { "epoch": 2.9929922915206726, "grad_norm": 0.10336574167013168, "learning_rate": 8.466873953599617e-07, "loss": 0.0115, "step": 21355 }, { "epoch": 2.993132445690259, "grad_norm": 0.16795602440834045, "learning_rate": 8.323367615403013e-07, "loss": 0.018, "step": 21356 }, { "epoch": 2.993272599859846, "grad_norm": 0.10170432180166245, "learning_rate": 8.179861277206408e-07, "loss": 0.005, "step": 21357 }, { "epoch": 2.9934127540294324, "grad_norm": 0.1272394359111786, "learning_rate": 8.036354939009806e-07, "loss": 0.0203, "step": 21358 }, { "epoch": 2.993552908199019, "grad_norm": 0.5077134966850281, "learning_rate": 7.892848600813202e-07, "loss": 0.0343, "step": 21359 }, { "epoch": 2.9936930623686053, "grad_norm": 0.2041243463754654, "learning_rate": 7.749342262616599e-07, "loss": 0.0199, "step": 21360 }, { "epoch": 2.9938332165381922, "grad_norm": 0.2168726772069931, "learning_rate": 7.605835924419994e-07, "loss": 0.0078, "step": 21361 }, { "epoch": 2.9939733707077787, "grad_norm": 0.33751949667930603, "learning_rate": 7.462329586223391e-07, "loss": 0.022, "step": 21362 }, { "epoch": 2.994113524877365, "grad_norm": 0.3545292913913727, "learning_rate": 7.318823248026787e-07, "loss": 0.1091, "step": 21363 }, { "epoch": 2.9942536790469516, "grad_norm": 0.3890790343284607, "learning_rate": 7.175316909830182e-07, "loss": 0.0303, "step": 21364 }, { "epoch": 2.994393833216538, "grad_norm": 0.26840487122535706, "learning_rate": 7.03181057163358e-07, "loss": 0.0202, "step": 21365 }, { "epoch": 2.9945339873861245, "grad_norm": 0.39443060755729675, "learning_rate": 6.888304233436976e-07, "loss": 0.0757, "step": 21366 }, { "epoch": 2.9946741415557114, "grad_norm": 0.2796522080898285, "learning_rate": 6.744797895240373e-07, "loss": 0.0254, "step": 21367 }, { "epoch": 2.994814295725298, "grad_norm": 0.12309366464614868, "learning_rate": 6.601291557043769e-07, "loss": 0.009, "step": 21368 }, { "epoch": 2.9949544498948844, "grad_norm": 0.027093442156910896, "learning_rate": 6.457785218847165e-07, "loss": 0.0015, "step": 21369 }, { "epoch": 2.995094604064471, "grad_norm": 1.459133267402649, "learning_rate": 6.314278880650561e-07, "loss": 0.0662, "step": 21370 }, { "epoch": 2.9952347582340577, "grad_norm": 0.19384761154651642, "learning_rate": 6.170772542453957e-07, "loss": 0.0319, "step": 21371 }, { "epoch": 2.995374912403644, "grad_norm": 0.3839840888977051, "learning_rate": 6.027266204257355e-07, "loss": 0.0403, "step": 21372 }, { "epoch": 2.9955150665732306, "grad_norm": 0.19356566667556763, "learning_rate": 5.88375986606075e-07, "loss": 0.0227, "step": 21373 }, { "epoch": 2.995655220742817, "grad_norm": 0.07258197665214539, "learning_rate": 5.740253527864147e-07, "loss": 0.0063, "step": 21374 }, { "epoch": 2.9957953749124036, "grad_norm": 0.13831639289855957, "learning_rate": 5.596747189667543e-07, "loss": 0.0475, "step": 21375 }, { "epoch": 2.99593552908199, "grad_norm": 0.2333473265171051, "learning_rate": 5.45324085147094e-07, "loss": 0.0209, "step": 21376 }, { "epoch": 2.9960756832515765, "grad_norm": 0.2990017831325531, "learning_rate": 5.309734513274335e-07, "loss": 0.0344, "step": 21377 }, { "epoch": 2.9962158374211634, "grad_norm": 0.6658338308334351, "learning_rate": 5.166228175077732e-07, "loss": 0.0465, "step": 21378 }, { "epoch": 2.99635599159075, "grad_norm": 0.355704128742218, "learning_rate": 5.022721836881129e-07, "loss": 0.0393, "step": 21379 }, { "epoch": 2.9964961457603363, "grad_norm": 0.07343513518571854, "learning_rate": 4.879215498684525e-07, "loss": 0.0151, "step": 21380 }, { "epoch": 2.9966362999299228, "grad_norm": 0.3340516686439514, "learning_rate": 4.7357091604879213e-07, "loss": 0.024, "step": 21381 }, { "epoch": 2.9967764540995097, "grad_norm": 0.4459291696548462, "learning_rate": 4.592202822291317e-07, "loss": 0.0484, "step": 21382 }, { "epoch": 2.996916608269096, "grad_norm": 0.4163734018802643, "learning_rate": 4.448696484094714e-07, "loss": 0.0599, "step": 21383 }, { "epoch": 2.9970567624386826, "grad_norm": 0.06866970658302307, "learning_rate": 4.30519014589811e-07, "loss": 0.0068, "step": 21384 }, { "epoch": 2.997196916608269, "grad_norm": 0.5593795776367188, "learning_rate": 4.161683807701506e-07, "loss": 0.0562, "step": 21385 }, { "epoch": 2.9973370707778555, "grad_norm": 0.2872020900249481, "learning_rate": 4.018177469504903e-07, "loss": 0.0248, "step": 21386 }, { "epoch": 2.997477224947442, "grad_norm": 0.14189307391643524, "learning_rate": 3.8746711313082993e-07, "loss": 0.0235, "step": 21387 }, { "epoch": 2.997617379117029, "grad_norm": 0.21212796866893768, "learning_rate": 3.7311647931116955e-07, "loss": 0.0597, "step": 21388 }, { "epoch": 2.9977575332866153, "grad_norm": 0.09847323596477509, "learning_rate": 3.587658454915091e-07, "loss": 0.0061, "step": 21389 }, { "epoch": 2.9978976874562018, "grad_norm": 0.6569809913635254, "learning_rate": 3.444152116718488e-07, "loss": 0.098, "step": 21390 }, { "epoch": 2.9980378416257882, "grad_norm": 0.15362878143787384, "learning_rate": 3.300645778521884e-07, "loss": 0.0213, "step": 21391 }, { "epoch": 2.998177995795375, "grad_norm": 0.557316243648529, "learning_rate": 3.1571394403252805e-07, "loss": 0.029, "step": 21392 }, { "epoch": 2.9983181499649616, "grad_norm": 0.17047187685966492, "learning_rate": 3.0136331021286773e-07, "loss": 0.0096, "step": 21393 }, { "epoch": 2.998458304134548, "grad_norm": 0.09355489164590836, "learning_rate": 2.8701267639320735e-07, "loss": 0.0142, "step": 21394 }, { "epoch": 2.9985984583041345, "grad_norm": 0.3209400475025177, "learning_rate": 2.72662042573547e-07, "loss": 0.0285, "step": 21395 }, { "epoch": 2.998738612473721, "grad_norm": 0.024161234498023987, "learning_rate": 2.583114087538866e-07, "loss": 0.0014, "step": 21396 }, { "epoch": 2.9988787666433074, "grad_norm": 0.1335512101650238, "learning_rate": 2.439607749342262e-07, "loss": 0.0057, "step": 21397 }, { "epoch": 2.9990189208128943, "grad_norm": 0.16399157047271729, "learning_rate": 2.2961014111456585e-07, "loss": 0.0073, "step": 21398 }, { "epoch": 2.999159074982481, "grad_norm": 0.2587020993232727, "learning_rate": 2.152595072949055e-07, "loss": 0.0263, "step": 21399 }, { "epoch": 2.9992992291520673, "grad_norm": 0.20869041979312897, "learning_rate": 2.0090887347524515e-07, "loss": 0.0302, "step": 21400 }, { "epoch": 2.9994393833216537, "grad_norm": 0.11802941560745239, "learning_rate": 1.8655823965558478e-07, "loss": 0.0109, "step": 21401 }, { "epoch": 2.9995795374912406, "grad_norm": 0.1735084056854248, "learning_rate": 1.722076058359244e-07, "loss": 0.005, "step": 21402 }, { "epoch": 2.999719691660827, "grad_norm": 0.20583558082580566, "learning_rate": 1.5785697201626402e-07, "loss": 0.007, "step": 21403 }, { "epoch": 2.9998598458304135, "grad_norm": 0.97768235206604, "learning_rate": 1.4350633819660368e-07, "loss": 0.0669, "step": 21404 }, { "epoch": 3.0, "grad_norm": 3.6988003253936768, "learning_rate": 1.291557043769433e-07, "loss": 0.2459, "step": 21405 }, { "epoch": 3.0, "step": 21405, "total_flos": 3.3190826326923264e+19, "train_loss": 0.19606071691714266, "train_runtime": 14592.9757, "train_samples_per_second": 5.867, "train_steps_per_second": 1.467 } ], "logging_steps": 1.0, "max_steps": 21405, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.3190826326923264e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }