{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1000, "global_step": 12776, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015654351909830932, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 10.2049, "step": 1 }, { "epoch": 0.00031308703819661864, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 10.1719, "step": 2 }, { "epoch": 0.000469630557294928, "grad_norm": 10.289196014404297, "learning_rate": 6e-07, "loss": 11.8275, "step": 3 }, { "epoch": 0.0006261740763932373, "grad_norm": 8.432656288146973, "learning_rate": 1.2e-06, "loss": 9.669, "step": 4 }, { "epoch": 0.0007827175954915466, "grad_norm": 10.236042022705078, "learning_rate": 1.8e-06, "loss": 11.8878, "step": 5 }, { "epoch": 0.000939261114589856, "grad_norm": 9.023536682128906, "learning_rate": 2.4e-06, "loss": 11.0993, "step": 6 }, { "epoch": 0.0010958046336881652, "grad_norm": 9.157316207885742, "learning_rate": 2.9999999999999997e-06, "loss": 11.0304, "step": 7 }, { "epoch": 0.0012523481527864746, "grad_norm": 15.224575996398926, "learning_rate": 3.6e-06, "loss": 18.5492, "step": 8 }, { "epoch": 0.001408891671884784, "grad_norm": 8.25251579284668, "learning_rate": 4.2e-06, "loss": 9.8113, "step": 9 }, { "epoch": 0.0015654351909830933, "grad_norm": 8.493621826171875, "learning_rate": 4.8e-06, "loss": 10.5777, "step": 10 }, { "epoch": 0.0017219787100814026, "grad_norm": 8.078389167785645, "learning_rate": 5.399999999999999e-06, "loss": 9.6918, "step": 11 }, { "epoch": 0.001878522229179712, "grad_norm": 12.197300910949707, "learning_rate": 5.999999999999999e-06, "loss": 15.5513, "step": 12 }, { "epoch": 0.002035065748278021, "grad_norm": 14.715069770812988, "learning_rate": 6.599999999999999e-06, "loss": 21.1878, "step": 13 }, { "epoch": 0.0021916092673763305, "grad_norm": 14.680959701538086, "learning_rate": 7.2e-06, "loss": 20.1223, "step": 14 }, { "epoch": 0.00234815278647464, "grad_norm": Infinity, "learning_rate": 7.2e-06, "loss": 25.9204, "step": 15 }, { "epoch": 0.002504696305572949, "grad_norm": 7.316311359405518, "learning_rate": 7.799999999999998e-06, "loss": 9.0609, "step": 16 }, { "epoch": 0.0026612398246712585, "grad_norm": 14.48494815826416, "learning_rate": 8.4e-06, "loss": 19.6195, "step": 17 }, { "epoch": 0.002817783343769568, "grad_norm": 24.607234954833984, "learning_rate": 8.999999999999999e-06, "loss": 24.8781, "step": 18 }, { "epoch": 0.002974326862867877, "grad_norm": 10.66619873046875, "learning_rate": 9.6e-06, "loss": 14.3883, "step": 19 }, { "epoch": 0.0031308703819661866, "grad_norm": 9.02160930633545, "learning_rate": 1.02e-05, "loss": 11.2116, "step": 20 }, { "epoch": 0.003287413901064496, "grad_norm": 8.273414611816406, "learning_rate": 1.0799999999999998e-05, "loss": 10.0936, "step": 21 }, { "epoch": 0.0034439574201628053, "grad_norm": 9.83197021484375, "learning_rate": 1.14e-05, "loss": 12.5294, "step": 22 }, { "epoch": 0.0036005009392611146, "grad_norm": 11.494529724121094, "learning_rate": 1.1999999999999999e-05, "loss": 15.1262, "step": 23 }, { "epoch": 0.003757044458359424, "grad_norm": 8.389466285705566, "learning_rate": 1.26e-05, "loss": 11.8818, "step": 24 }, { "epoch": 0.003913587977457733, "grad_norm": 11.484485626220703, "learning_rate": 1.3199999999999997e-05, "loss": 16.7747, "step": 25 }, { "epoch": 0.004070131496556042, "grad_norm": 11.260869026184082, "learning_rate": 1.3799999999999998e-05, "loss": 15.3859, "step": 26 }, { "epoch": 0.004226675015654352, "grad_norm": 17.069162368774414, "learning_rate": 1.44e-05, "loss": 19.4706, "step": 27 }, { "epoch": 0.004383218534752661, "grad_norm": 8.778854370117188, "learning_rate": 1.4999999999999999e-05, "loss": 13.0765, "step": 28 }, { "epoch": 0.004539762053850971, "grad_norm": 10.239326477050781, "learning_rate": 1.5599999999999996e-05, "loss": 14.7478, "step": 29 }, { "epoch": 0.00469630557294928, "grad_norm": 9.703652381896973, "learning_rate": 1.6199999999999997e-05, "loss": 13.5911, "step": 30 }, { "epoch": 0.004852849092047589, "grad_norm": 10.830334663391113, "learning_rate": 1.68e-05, "loss": 16.7388, "step": 31 }, { "epoch": 0.005009392611145898, "grad_norm": 14.102766990661621, "learning_rate": 1.74e-05, "loss": 19.3178, "step": 32 }, { "epoch": 0.005165936130244208, "grad_norm": 12.113805770874023, "learning_rate": 1.7999999999999997e-05, "loss": 17.4729, "step": 33 }, { "epoch": 0.005322479649342517, "grad_norm": 9.666101455688477, "learning_rate": 1.8599999999999998e-05, "loss": 14.8432, "step": 34 }, { "epoch": 0.005479023168440827, "grad_norm": 10.271428108215332, "learning_rate": 1.92e-05, "loss": 15.5869, "step": 35 }, { "epoch": 0.005635566687539136, "grad_norm": 27.508230209350586, "learning_rate": 1.98e-05, "loss": 13.2456, "step": 36 }, { "epoch": 0.0057921102066374455, "grad_norm": 10.40356159210205, "learning_rate": 2.04e-05, "loss": 15.4974, "step": 37 }, { "epoch": 0.005948653725735754, "grad_norm": 10.521185874938965, "learning_rate": 2.1e-05, "loss": 16.509, "step": 38 }, { "epoch": 0.006105197244834064, "grad_norm": 9.262508392333984, "learning_rate": 2.1599999999999996e-05, "loss": 12.5824, "step": 39 }, { "epoch": 0.006261740763932373, "grad_norm": 9.204741477966309, "learning_rate": 2.2199999999999998e-05, "loss": 13.576, "step": 40 }, { "epoch": 0.006418284283030683, "grad_norm": 6.91443395614624, "learning_rate": 2.28e-05, "loss": 11.8722, "step": 41 }, { "epoch": 0.006574827802128992, "grad_norm": 7.96762752532959, "learning_rate": 2.34e-05, "loss": 13.263, "step": 42 }, { "epoch": 0.006731371321227302, "grad_norm": 9.377372741699219, "learning_rate": 2.3999999999999997e-05, "loss": 12.1481, "step": 43 }, { "epoch": 0.0068879148403256105, "grad_norm": 6.440145015716553, "learning_rate": 2.4599999999999998e-05, "loss": 10.7579, "step": 44 }, { "epoch": 0.007044458359423919, "grad_norm": 5.9850077629089355, "learning_rate": 2.52e-05, "loss": 9.8462, "step": 45 }, { "epoch": 0.007201001878522229, "grad_norm": 10.260824203491211, "learning_rate": 2.5799999999999997e-05, "loss": 9.8883, "step": 46 }, { "epoch": 0.007357545397620538, "grad_norm": 10.321931838989258, "learning_rate": 2.6399999999999995e-05, "loss": 8.8922, "step": 47 }, { "epoch": 0.007514088916718848, "grad_norm": 5.632627964019775, "learning_rate": 2.6999999999999996e-05, "loss": 8.3128, "step": 48 }, { "epoch": 0.007670632435817157, "grad_norm": 4.798144817352295, "learning_rate": 2.7599999999999997e-05, "loss": 7.1669, "step": 49 }, { "epoch": 0.007827175954915467, "grad_norm": 4.642441272735596, "learning_rate": 2.8199999999999998e-05, "loss": 6.2127, "step": 50 }, { "epoch": 0.007983719474013776, "grad_norm": 12.44339370727539, "learning_rate": 2.88e-05, "loss": 11.9784, "step": 51 }, { "epoch": 0.008140262993112084, "grad_norm": 11.49549674987793, "learning_rate": 2.94e-05, "loss": 10.976, "step": 52 }, { "epoch": 0.008296806512210394, "grad_norm": 16.98020362854004, "learning_rate": 2.9999999999999997e-05, "loss": 15.17, "step": 53 }, { "epoch": 0.008453350031308704, "grad_norm": 8.904106140136719, "learning_rate": 3.06e-05, "loss": 8.8607, "step": 54 }, { "epoch": 0.008609893550407014, "grad_norm": 9.727025985717773, "learning_rate": 3.119999999999999e-05, "loss": 9.6081, "step": 55 }, { "epoch": 0.008766437069505322, "grad_norm": 9.460471153259277, "learning_rate": 3.1799999999999994e-05, "loss": 9.9255, "step": 56 }, { "epoch": 0.008922980588603632, "grad_norm": 9.601983070373535, "learning_rate": 3.2399999999999995e-05, "loss": 9.9013, "step": 57 }, { "epoch": 0.009079524107701941, "grad_norm": 10.275938034057617, "learning_rate": 3.2999999999999996e-05, "loss": 11.9712, "step": 58 }, { "epoch": 0.009236067626800251, "grad_norm": 9.154601097106934, "learning_rate": 3.36e-05, "loss": 10.6476, "step": 59 }, { "epoch": 0.00939261114589856, "grad_norm": 8.198284149169922, "learning_rate": 3.42e-05, "loss": 9.7442, "step": 60 }, { "epoch": 0.009549154664996869, "grad_norm": 10.912142753601074, "learning_rate": 3.48e-05, "loss": 11.3455, "step": 61 }, { "epoch": 0.009705698184095179, "grad_norm": 6.028509616851807, "learning_rate": 3.539999999999999e-05, "loss": 7.7892, "step": 62 }, { "epoch": 0.009862241703193489, "grad_norm": 10.75712776184082, "learning_rate": 3.5999999999999994e-05, "loss": 10.4904, "step": 63 }, { "epoch": 0.010018785222291797, "grad_norm": 11.000633239746094, "learning_rate": 3.6599999999999995e-05, "loss": 9.4008, "step": 64 }, { "epoch": 0.010175328741390106, "grad_norm": 7.76064395904541, "learning_rate": 3.7199999999999996e-05, "loss": 7.2246, "step": 65 }, { "epoch": 0.010331872260488416, "grad_norm": 23.41922950744629, "learning_rate": 3.78e-05, "loss": 13.3124, "step": 66 }, { "epoch": 0.010488415779586726, "grad_norm": 11.076509475708008, "learning_rate": 3.84e-05, "loss": 7.637, "step": 67 }, { "epoch": 0.010644959298685034, "grad_norm": 29.545185089111328, "learning_rate": 3.9e-05, "loss": 14.3584, "step": 68 }, { "epoch": 0.010801502817783344, "grad_norm": 17.31962776184082, "learning_rate": 3.96e-05, "loss": 8.6004, "step": 69 }, { "epoch": 0.010958046336881654, "grad_norm": 11.55864143371582, "learning_rate": 4.02e-05, "loss": 6.3439, "step": 70 }, { "epoch": 0.011114589855979962, "grad_norm": 25.8043212890625, "learning_rate": 4.08e-05, "loss": 10.298, "step": 71 }, { "epoch": 0.011271133375078271, "grad_norm": 33.71727752685547, "learning_rate": 4.14e-05, "loss": 12.2877, "step": 72 }, { "epoch": 0.011427676894176581, "grad_norm": 20.209596633911133, "learning_rate": 4.2e-05, "loss": 7.7747, "step": 73 }, { "epoch": 0.011584220413274891, "grad_norm": 20.540761947631836, "learning_rate": 4.259999999999999e-05, "loss": 7.6235, "step": 74 }, { "epoch": 0.011740763932373199, "grad_norm": Infinity, "learning_rate": 4.259999999999999e-05, "loss": 12.327, "step": 75 }, { "epoch": 0.011897307451471509, "grad_norm": 19.35993766784668, "learning_rate": 4.319999999999999e-05, "loss": 7.1177, "step": 76 }, { "epoch": 0.012053850970569819, "grad_norm": 36.380619049072266, "learning_rate": 4.3799999999999994e-05, "loss": 10.6781, "step": 77 }, { "epoch": 0.012210394489668128, "grad_norm": 45.644840240478516, "learning_rate": 4.4399999999999995e-05, "loss": 12.4375, "step": 78 }, { "epoch": 0.012366938008766436, "grad_norm": 27.72957992553711, "learning_rate": 4.4999999999999996e-05, "loss": 8.1513, "step": 79 }, { "epoch": 0.012523481527864746, "grad_norm": 35.70543670654297, "learning_rate": 4.56e-05, "loss": 9.3938, "step": 80 }, { "epoch": 0.012680025046963056, "grad_norm": 51.08866500854492, "learning_rate": 4.62e-05, "loss": 12.0758, "step": 81 }, { "epoch": 0.012836568566061366, "grad_norm": 38.18144989013672, "learning_rate": 4.68e-05, "loss": 9.2489, "step": 82 }, { "epoch": 0.012993112085159674, "grad_norm": 25.754817962646484, "learning_rate": 4.7399999999999993e-05, "loss": 6.9829, "step": 83 }, { "epoch": 0.013149655604257984, "grad_norm": 33.58719253540039, "learning_rate": 4.7999999999999994e-05, "loss": 8.0805, "step": 84 }, { "epoch": 0.013306199123356293, "grad_norm": 19.882631301879883, "learning_rate": 4.8599999999999995e-05, "loss": 5.7876, "step": 85 }, { "epoch": 0.013462742642454603, "grad_norm": 28.052623748779297, "learning_rate": 4.9199999999999997e-05, "loss": 6.844, "step": 86 }, { "epoch": 0.013619286161552911, "grad_norm": 24.705547332763672, "learning_rate": 4.98e-05, "loss": 6.2694, "step": 87 }, { "epoch": 0.013775829680651221, "grad_norm": 27.596519470214844, "learning_rate": 5.04e-05, "loss": 6.6974, "step": 88 }, { "epoch": 0.01393237319974953, "grad_norm": 24.197368621826172, "learning_rate": 5.1e-05, "loss": 5.9251, "step": 89 }, { "epoch": 0.014088916718847839, "grad_norm": 33.817039489746094, "learning_rate": 5.1599999999999994e-05, "loss": 7.2591, "step": 90 }, { "epoch": 0.014245460237946149, "grad_norm": 28.664337158203125, "learning_rate": 5.2199999999999995e-05, "loss": 6.3747, "step": 91 }, { "epoch": 0.014402003757044458, "grad_norm": 19.04326057434082, "learning_rate": 5.279999999999999e-05, "loss": 5.2411, "step": 92 }, { "epoch": 0.014558547276142768, "grad_norm": 19.95840835571289, "learning_rate": 5.339999999999999e-05, "loss": 5.2001, "step": 93 }, { "epoch": 0.014715090795241076, "grad_norm": 17.750680923461914, "learning_rate": 5.399999999999999e-05, "loss": 4.9905, "step": 94 }, { "epoch": 0.014871634314339386, "grad_norm": 17.195247650146484, "learning_rate": 5.459999999999999e-05, "loss": 4.8285, "step": 95 }, { "epoch": 0.015028177833437696, "grad_norm": 12.27951431274414, "learning_rate": 5.519999999999999e-05, "loss": 4.311, "step": 96 }, { "epoch": 0.015184721352536006, "grad_norm": 10.93501091003418, "learning_rate": 5.5799999999999994e-05, "loss": 4.0848, "step": 97 }, { "epoch": 0.015341264871634314, "grad_norm": 8.672501564025879, "learning_rate": 5.6399999999999995e-05, "loss": 3.961, "step": 98 }, { "epoch": 0.015497808390732623, "grad_norm": 7.686964988708496, "learning_rate": 5.6999999999999996e-05, "loss": 3.8516, "step": 99 }, { "epoch": 0.015654351909830933, "grad_norm": 5.318123817443848, "learning_rate": 5.76e-05, "loss": 3.6987, "step": 100 }, { "epoch": 0.01581089542892924, "grad_norm": 13.304719924926758, "learning_rate": 5.82e-05, "loss": 4.1017, "step": 101 }, { "epoch": 0.015967438948027553, "grad_norm": 12.244338989257812, "learning_rate": 5.88e-05, "loss": 3.9793, "step": 102 }, { "epoch": 0.01612398246712586, "grad_norm": 11.684955596923828, "learning_rate": 5.94e-05, "loss": 3.9138, "step": 103 }, { "epoch": 0.01628052598622417, "grad_norm": 36.793663024902344, "learning_rate": 5.9999999999999995e-05, "loss": 6.8625, "step": 104 }, { "epoch": 0.01643706950532248, "grad_norm": 20.531827926635742, "learning_rate": 6.0599999999999996e-05, "loss": 4.8527, "step": 105 }, { "epoch": 0.01659361302442079, "grad_norm": 12.65087604522705, "learning_rate": 6.12e-05, "loss": 4.0059, "step": 106 }, { "epoch": 0.0167501565435191, "grad_norm": 9.573142051696777, "learning_rate": 6.18e-05, "loss": 3.7414, "step": 107 }, { "epoch": 0.016906700062617408, "grad_norm": 13.197820663452148, "learning_rate": 6.239999999999999e-05, "loss": 4.1523, "step": 108 }, { "epoch": 0.017063243581715716, "grad_norm": 10.532169342041016, "learning_rate": 6.299999999999999e-05, "loss": 3.8368, "step": 109 }, { "epoch": 0.017219787100814028, "grad_norm": 12.9858980178833, "learning_rate": 6.359999999999999e-05, "loss": 4.1369, "step": 110 }, { "epoch": 0.017376330619912336, "grad_norm": 29.88274574279785, "learning_rate": 6.419999999999999e-05, "loss": 5.8008, "step": 111 }, { "epoch": 0.017532874139010644, "grad_norm": 17.681074142456055, "learning_rate": 6.479999999999999e-05, "loss": 4.4349, "step": 112 }, { "epoch": 0.017689417658108955, "grad_norm": 14.819682121276855, "learning_rate": 6.539999999999999e-05, "loss": 4.3329, "step": 113 }, { "epoch": 0.017845961177207263, "grad_norm": 12.398885726928711, "learning_rate": 6.599999999999999e-05, "loss": 4.0054, "step": 114 }, { "epoch": 0.01800250469630557, "grad_norm": 11.930230140686035, "learning_rate": 6.659999999999999e-05, "loss": 3.9655, "step": 115 }, { "epoch": 0.018159048215403883, "grad_norm": 8.862527847290039, "learning_rate": 6.72e-05, "loss": 3.6875, "step": 116 }, { "epoch": 0.01831559173450219, "grad_norm": 21.191814422607422, "learning_rate": 6.78e-05, "loss": 4.9808, "step": 117 }, { "epoch": 0.018472135253600502, "grad_norm": 5.6609392166137695, "learning_rate": 6.84e-05, "loss": 3.446, "step": 118 }, { "epoch": 0.01862867877269881, "grad_norm": 15.799832344055176, "learning_rate": 6.9e-05, "loss": 4.3345, "step": 119 }, { "epoch": 0.01878522229179712, "grad_norm": 12.569608688354492, "learning_rate": 6.96e-05, "loss": 4.0145, "step": 120 }, { "epoch": 0.01894176581089543, "grad_norm": 6.250307559967041, "learning_rate": 7.02e-05, "loss": 3.531, "step": 121 }, { "epoch": 0.019098309329993738, "grad_norm": 9.823454856872559, "learning_rate": 7.079999999999999e-05, "loss": 3.8133, "step": 122 }, { "epoch": 0.019254852849092046, "grad_norm": 14.932891845703125, "learning_rate": 7.139999999999999e-05, "loss": 4.0484, "step": 123 }, { "epoch": 0.019411396368190358, "grad_norm": 8.835564613342285, "learning_rate": 7.199999999999999e-05, "loss": 3.775, "step": 124 }, { "epoch": 0.019567939887288666, "grad_norm": 2.77044939994812, "learning_rate": 7.259999999999999e-05, "loss": 3.385, "step": 125 }, { "epoch": 0.019724483406386977, "grad_norm": 19.207578659057617, "learning_rate": 7.319999999999999e-05, "loss": 4.4818, "step": 126 }, { "epoch": 0.019881026925485285, "grad_norm": 20.31056785583496, "learning_rate": 7.379999999999999e-05, "loss": 4.5493, "step": 127 }, { "epoch": 0.020037570444583593, "grad_norm": 9.711579322814941, "learning_rate": 7.439999999999999e-05, "loss": 3.7879, "step": 128 }, { "epoch": 0.020194113963681905, "grad_norm": 19.968996047973633, "learning_rate": 7.5e-05, "loss": 4.6096, "step": 129 }, { "epoch": 0.020350657482780213, "grad_norm": 13.640237808227539, "learning_rate": 7.56e-05, "loss": 4.1364, "step": 130 }, { "epoch": 0.02050720100187852, "grad_norm": 18.92345428466797, "learning_rate": 7.62e-05, "loss": 4.4219, "step": 131 }, { "epoch": 0.020663744520976832, "grad_norm": 5.510997295379639, "learning_rate": 7.68e-05, "loss": 3.615, "step": 132 }, { "epoch": 0.02082028804007514, "grad_norm": 11.286620140075684, "learning_rate": 7.74e-05, "loss": 4.0149, "step": 133 }, { "epoch": 0.020976831559173452, "grad_norm": 8.938575744628906, "learning_rate": 7.8e-05, "loss": 3.8435, "step": 134 }, { "epoch": 0.02113337507827176, "grad_norm": 8.202083587646484, "learning_rate": 7.86e-05, "loss": 3.8606, "step": 135 }, { "epoch": 0.021289918597370068, "grad_norm": 7.211292266845703, "learning_rate": 7.92e-05, "loss": 3.6637, "step": 136 }, { "epoch": 0.02144646211646838, "grad_norm": 6.565310001373291, "learning_rate": 7.98e-05, "loss": 3.602, "step": 137 }, { "epoch": 0.021603005635566688, "grad_norm": 5.222198009490967, "learning_rate": 8.04e-05, "loss": 3.6108, "step": 138 }, { "epoch": 0.021759549154664996, "grad_norm": 9.35428237915039, "learning_rate": 8.1e-05, "loss": 3.831, "step": 139 }, { "epoch": 0.021916092673763307, "grad_norm": 9.887560844421387, "learning_rate": 8.16e-05, "loss": 3.8316, "step": 140 }, { "epoch": 0.022072636192861615, "grad_norm": 8.195279121398926, "learning_rate": 8.22e-05, "loss": 3.642, "step": 141 }, { "epoch": 0.022229179711959923, "grad_norm": 3.218400478363037, "learning_rate": 8.28e-05, "loss": 3.44, "step": 142 }, { "epoch": 0.022385723231058235, "grad_norm": 2.472031354904175, "learning_rate": 8.34e-05, "loss": 3.4347, "step": 143 }, { "epoch": 0.022542266750156543, "grad_norm": 2.141615390777588, "learning_rate": 8.4e-05, "loss": 3.3126, "step": 144 }, { "epoch": 0.022698810269254854, "grad_norm": 3.026726007461548, "learning_rate": 8.459999999999998e-05, "loss": 3.3058, "step": 145 }, { "epoch": 0.022855353788353162, "grad_norm": 3.966043472290039, "learning_rate": 8.519999999999998e-05, "loss": 3.3199, "step": 146 }, { "epoch": 0.02301189730745147, "grad_norm": 3.906219720840454, "learning_rate": 8.579999999999998e-05, "loss": 3.2163, "step": 147 }, { "epoch": 0.023168440826549782, "grad_norm": 2.3450818061828613, "learning_rate": 8.639999999999999e-05, "loss": 3.1645, "step": 148 }, { "epoch": 0.02332498434564809, "grad_norm": 3.949995756149292, "learning_rate": 8.699999999999999e-05, "loss": 3.1981, "step": 149 }, { "epoch": 0.023481527864746398, "grad_norm": 2.897047996520996, "learning_rate": 8.759999999999999e-05, "loss": 3.0328, "step": 150 }, { "epoch": 0.02363807138384471, "grad_norm": 9.815628051757812, "learning_rate": 8.819999999999999e-05, "loss": 3.4079, "step": 151 }, { "epoch": 0.023794614902943018, "grad_norm": 8.956120491027832, "learning_rate": 8.879999999999999e-05, "loss": 3.4144, "step": 152 }, { "epoch": 0.02395115842204133, "grad_norm": 5.622773170471191, "learning_rate": 8.939999999999999e-05, "loss": 3.2383, "step": 153 }, { "epoch": 0.024107701941139637, "grad_norm": 10.2916259765625, "learning_rate": 8.999999999999999e-05, "loss": 3.6527, "step": 154 }, { "epoch": 0.024264245460237945, "grad_norm": 3.466508388519287, "learning_rate": 9.059999999999999e-05, "loss": 3.1562, "step": 155 }, { "epoch": 0.024420788979336257, "grad_norm": 3.6558587551116943, "learning_rate": 9.12e-05, "loss": 3.1937, "step": 156 }, { "epoch": 0.024577332498434565, "grad_norm": 1.539302945137024, "learning_rate": 9.18e-05, "loss": 3.0899, "step": 157 }, { "epoch": 0.024733876017532873, "grad_norm": 4.4443230628967285, "learning_rate": 9.24e-05, "loss": 3.3159, "step": 158 }, { "epoch": 0.024890419536631184, "grad_norm": 3.2545831203460693, "learning_rate": 9.3e-05, "loss": 3.1318, "step": 159 }, { "epoch": 0.025046963055729492, "grad_norm": 3.192456007003784, "learning_rate": 9.36e-05, "loss": 3.2075, "step": 160 }, { "epoch": 0.0252035065748278, "grad_norm": 1.9067258834838867, "learning_rate": 9.419999999999999e-05, "loss": 3.1137, "step": 161 }, { "epoch": 0.025360050093926112, "grad_norm": 1.6764792203903198, "learning_rate": 9.479999999999999e-05, "loss": 3.109, "step": 162 }, { "epoch": 0.02551659361302442, "grad_norm": 25.346240997314453, "learning_rate": 9.539999999999999e-05, "loss": 4.8797, "step": 163 }, { "epoch": 0.02567313713212273, "grad_norm": 1.1866326332092285, "learning_rate": 9.599999999999999e-05, "loss": 3.0522, "step": 164 }, { "epoch": 0.02582968065122104, "grad_norm": 1.4010124206542969, "learning_rate": 9.659999999999999e-05, "loss": 3.0789, "step": 165 }, { "epoch": 0.025986224170319348, "grad_norm": 8.419206619262695, "learning_rate": 9.719999999999999e-05, "loss": 3.3802, "step": 166 }, { "epoch": 0.02614276768941766, "grad_norm": 8.86681842803955, "learning_rate": 9.779999999999999e-05, "loss": 3.5289, "step": 167 }, { "epoch": 0.026299311208515967, "grad_norm": 23.243457794189453, "learning_rate": 9.839999999999999e-05, "loss": 4.5939, "step": 168 }, { "epoch": 0.026455854727614275, "grad_norm": 2.583829164505005, "learning_rate": 9.9e-05, "loss": 3.0461, "step": 169 }, { "epoch": 0.026612398246712587, "grad_norm": 3.8143973350524902, "learning_rate": 9.96e-05, "loss": 3.1916, "step": 170 }, { "epoch": 0.026768941765810895, "grad_norm": 3.9216201305389404, "learning_rate": 0.0001002, "loss": 3.2371, "step": 171 }, { "epoch": 0.026925485284909206, "grad_norm": 13.120063781738281, "learning_rate": 0.0001008, "loss": 3.7303, "step": 172 }, { "epoch": 0.027082028804007514, "grad_norm": 1.0374871492385864, "learning_rate": 0.0001014, "loss": 3.1096, "step": 173 }, { "epoch": 0.027238572323105822, "grad_norm": 2.4160315990448, "learning_rate": 0.000102, "loss": 3.2397, "step": 174 }, { "epoch": 0.027395115842204134, "grad_norm": 10.37614631652832, "learning_rate": 0.0001026, "loss": 3.6207, "step": 175 }, { "epoch": 0.027551659361302442, "grad_norm": 3.1003804206848145, "learning_rate": 0.00010319999999999999, "loss": 3.3122, "step": 176 }, { "epoch": 0.02770820288040075, "grad_norm": 1.6606565713882446, "learning_rate": 0.00010379999999999999, "loss": 3.0935, "step": 177 }, { "epoch": 0.02786474639949906, "grad_norm": 1.8800925016403198, "learning_rate": 0.00010439999999999999, "loss": 3.2649, "step": 178 }, { "epoch": 0.02802128991859737, "grad_norm": 6.265193462371826, "learning_rate": 0.00010499999999999999, "loss": 3.4573, "step": 179 }, { "epoch": 0.028177833437695678, "grad_norm": 8.842411994934082, "learning_rate": 0.00010559999999999998, "loss": 3.6856, "step": 180 }, { "epoch": 0.02833437695679399, "grad_norm": 1.783496379852295, "learning_rate": 0.00010619999999999998, "loss": 3.2177, "step": 181 }, { "epoch": 0.028490920475892297, "grad_norm": 3.2805280685424805, "learning_rate": 0.00010679999999999998, "loss": 3.2863, "step": 182 }, { "epoch": 0.02864746399499061, "grad_norm": 3.0294930934906006, "learning_rate": 0.00010739999999999998, "loss": 3.428, "step": 183 }, { "epoch": 0.028804007514088917, "grad_norm": 1.3118689060211182, "learning_rate": 0.00010799999999999998, "loss": 3.3734, "step": 184 }, { "epoch": 0.028960551033187225, "grad_norm": 4.640561103820801, "learning_rate": 0.00010859999999999998, "loss": 3.4229, "step": 185 }, { "epoch": 0.029117094552285536, "grad_norm": 2.2550694942474365, "learning_rate": 0.00010919999999999998, "loss": 3.1565, "step": 186 }, { "epoch": 0.029273638071383844, "grad_norm": 2.734482765197754, "learning_rate": 0.00010979999999999999, "loss": 3.1083, "step": 187 }, { "epoch": 0.029430181590482152, "grad_norm": 2.0161116123199463, "learning_rate": 0.00011039999999999999, "loss": 3.1897, "step": 188 }, { "epoch": 0.029586725109580464, "grad_norm": 1.5846065282821655, "learning_rate": 0.00011099999999999999, "loss": 3.1898, "step": 189 }, { "epoch": 0.029743268628678772, "grad_norm": 7.154541015625, "learning_rate": 0.00011159999999999999, "loss": 3.3738, "step": 190 }, { "epoch": 0.029899812147777084, "grad_norm": 2.3260960578918457, "learning_rate": 0.00011219999999999999, "loss": 3.2111, "step": 191 }, { "epoch": 0.03005635566687539, "grad_norm": 2.29046630859375, "learning_rate": 0.00011279999999999999, "loss": 3.2184, "step": 192 }, { "epoch": 0.0302128991859737, "grad_norm": 3.9989373683929443, "learning_rate": 0.00011339999999999999, "loss": 3.1476, "step": 193 }, { "epoch": 0.03036944270507201, "grad_norm": 1.7007783651351929, "learning_rate": 0.00011399999999999999, "loss": 3.098, "step": 194 }, { "epoch": 0.03052598622417032, "grad_norm": 1.5231850147247314, "learning_rate": 0.0001146, "loss": 3.1793, "step": 195 }, { "epoch": 0.030682529743268627, "grad_norm": 3.336979627609253, "learning_rate": 0.0001152, "loss": 3.0494, "step": 196 }, { "epoch": 0.03083907326236694, "grad_norm": 2.053584098815918, "learning_rate": 0.0001158, "loss": 2.9805, "step": 197 }, { "epoch": 0.030995616781465247, "grad_norm": 1.7036573886871338, "learning_rate": 0.0001164, "loss": 2.9734, "step": 198 }, { "epoch": 0.03115216030056356, "grad_norm": 3.5702524185180664, "learning_rate": 0.000117, "loss": 2.9142, "step": 199 }, { "epoch": 0.031308703819661866, "grad_norm": 3.3421378135681152, "learning_rate": 0.0001176, "loss": 2.954, "step": 200 }, { "epoch": 0.031465247338760174, "grad_norm": 59.050907135009766, "learning_rate": 0.0001182, "loss": 8.3104, "step": 201 }, { "epoch": 0.03162179085785848, "grad_norm": 17.812931060791016, "learning_rate": 0.0001188, "loss": 4.2312, "step": 202 }, { "epoch": 0.03177833437695679, "grad_norm": 3.1730802059173584, "learning_rate": 0.0001194, "loss": 3.0879, "step": 203 }, { "epoch": 0.031934877896055106, "grad_norm": 2.468261241912842, "learning_rate": 0.00011999999999999999, "loss": 3.0617, "step": 204 }, { "epoch": 0.032091421415153414, "grad_norm": 6.175726890563965, "learning_rate": 0.00012059999999999999, "loss": 3.1855, "step": 205 }, { "epoch": 0.03224796493425172, "grad_norm": 3.9613351821899414, "learning_rate": 0.00012119999999999999, "loss": 3.237, "step": 206 }, { "epoch": 0.03240450845335003, "grad_norm": 3.2016634941101074, "learning_rate": 0.00012179999999999999, "loss": 3.21, "step": 207 }, { "epoch": 0.03256105197244834, "grad_norm": 2.0282530784606934, "learning_rate": 0.0001224, "loss": 3.0034, "step": 208 }, { "epoch": 0.03271759549154665, "grad_norm": 1.9264556169509888, "learning_rate": 0.00012299999999999998, "loss": 3.0214, "step": 209 }, { "epoch": 0.03287413901064496, "grad_norm": 1.9033787250518799, "learning_rate": 0.0001236, "loss": 3.0177, "step": 210 }, { "epoch": 0.03303068252974327, "grad_norm": 1.997543215751648, "learning_rate": 0.00012419999999999998, "loss": 3.0139, "step": 211 }, { "epoch": 0.03318722604884158, "grad_norm": 3.2476229667663574, "learning_rate": 0.00012479999999999997, "loss": 3.069, "step": 212 }, { "epoch": 0.033343769567939885, "grad_norm": 2.2744576930999756, "learning_rate": 0.00012539999999999999, "loss": 2.9909, "step": 213 }, { "epoch": 0.0335003130870382, "grad_norm": 1.2331576347351074, "learning_rate": 0.00012599999999999997, "loss": 2.9783, "step": 214 }, { "epoch": 0.03365685660613651, "grad_norm": 2.7535603046417236, "learning_rate": 0.0001266, "loss": 2.9448, "step": 215 }, { "epoch": 0.033813400125234816, "grad_norm": 0.8149605989456177, "learning_rate": 0.00012719999999999997, "loss": 2.9163, "step": 216 }, { "epoch": 0.033969943644333124, "grad_norm": 12.775609970092773, "learning_rate": 0.0001278, "loss": 3.6752, "step": 217 }, { "epoch": 0.03412648716343143, "grad_norm": 3.7931580543518066, "learning_rate": 0.00012839999999999998, "loss": 3.0796, "step": 218 }, { "epoch": 0.03428303068252974, "grad_norm": 4.115076065063477, "learning_rate": 0.000129, "loss": 3.0605, "step": 219 }, { "epoch": 0.034439574201628055, "grad_norm": 0.8560850024223328, "learning_rate": 0.00012959999999999998, "loss": 2.994, "step": 220 }, { "epoch": 0.03459611772072636, "grad_norm": 4.085236072540283, "learning_rate": 0.0001302, "loss": 3.2076, "step": 221 }, { "epoch": 0.03475266123982467, "grad_norm": 1.9408749341964722, "learning_rate": 0.00013079999999999998, "loss": 3.0968, "step": 222 }, { "epoch": 0.03490920475892298, "grad_norm": 2.0618252754211426, "learning_rate": 0.0001314, "loss": 3.0112, "step": 223 }, { "epoch": 0.03506574827802129, "grad_norm": 2.527026414871216, "learning_rate": 0.00013199999999999998, "loss": 2.9357, "step": 224 }, { "epoch": 0.0352222917971196, "grad_norm": 8.99402904510498, "learning_rate": 0.0001326, "loss": 3.2706, "step": 225 }, { "epoch": 0.03537883531621791, "grad_norm": 7.271577835083008, "learning_rate": 0.00013319999999999999, "loss": 3.282, "step": 226 }, { "epoch": 0.03553537883531622, "grad_norm": 0.7550258040428162, "learning_rate": 0.0001338, "loss": 2.9374, "step": 227 }, { "epoch": 0.035691922354414526, "grad_norm": 2.4851882457733154, "learning_rate": 0.0001344, "loss": 3.0337, "step": 228 }, { "epoch": 0.035848465873512834, "grad_norm": 2.233167886734009, "learning_rate": 0.000135, "loss": 2.9723, "step": 229 }, { "epoch": 0.03600500939261114, "grad_norm": 1.3427929878234863, "learning_rate": 0.0001356, "loss": 3.0346, "step": 230 }, { "epoch": 0.03616155291170946, "grad_norm": 1.4141734838485718, "learning_rate": 0.0001362, "loss": 2.9946, "step": 231 }, { "epoch": 0.036318096430807766, "grad_norm": 0.9676278233528137, "learning_rate": 0.0001368, "loss": 3.0399, "step": 232 }, { "epoch": 0.036474639949906074, "grad_norm": 0.9752649664878845, "learning_rate": 0.0001374, "loss": 2.9977, "step": 233 }, { "epoch": 0.03663118346900438, "grad_norm": 1.0055956840515137, "learning_rate": 0.000138, "loss": 3.006, "step": 234 }, { "epoch": 0.03678772698810269, "grad_norm": 2.232508659362793, "learning_rate": 0.0001386, "loss": 2.9964, "step": 235 }, { "epoch": 0.036944270507201005, "grad_norm": 1.2427127361297607, "learning_rate": 0.0001392, "loss": 2.958, "step": 236 }, { "epoch": 0.03710081402629931, "grad_norm": 1.305584192276001, "learning_rate": 0.00013979999999999998, "loss": 3.0754, "step": 237 }, { "epoch": 0.03725735754539762, "grad_norm": 2.785289764404297, "learning_rate": 0.0001404, "loss": 3.0478, "step": 238 }, { "epoch": 0.03741390106449593, "grad_norm": 4.857529640197754, "learning_rate": 0.00014099999999999998, "loss": 3.2, "step": 239 }, { "epoch": 0.03757044458359424, "grad_norm": 2.906057119369507, "learning_rate": 0.00014159999999999997, "loss": 3.0873, "step": 240 }, { "epoch": 0.03772698810269255, "grad_norm": 1.3470922708511353, "learning_rate": 0.0001422, "loss": 3.0688, "step": 241 }, { "epoch": 0.03788353162179086, "grad_norm": 3.2114920616149902, "learning_rate": 0.00014279999999999997, "loss": 3.1128, "step": 242 }, { "epoch": 0.03804007514088917, "grad_norm": 2.603756904602051, "learning_rate": 0.0001434, "loss": 3.0977, "step": 243 }, { "epoch": 0.038196618659987476, "grad_norm": 4.524052143096924, "learning_rate": 0.00014399999999999998, "loss": 3.1806, "step": 244 }, { "epoch": 0.038353162179085784, "grad_norm": 2.9187328815460205, "learning_rate": 0.0001446, "loss": 3.0789, "step": 245 }, { "epoch": 0.03850970569818409, "grad_norm": 3.1184439659118652, "learning_rate": 0.00014519999999999998, "loss": 2.9883, "step": 246 }, { "epoch": 0.03866624921728241, "grad_norm": 3.0836615562438965, "learning_rate": 0.0001458, "loss": 2.9124, "step": 247 }, { "epoch": 0.038822792736380715, "grad_norm": 2.7621877193450928, "learning_rate": 0.00014639999999999998, "loss": 2.9268, "step": 248 }, { "epoch": 0.03897933625547902, "grad_norm": 1.8909045457839966, "learning_rate": 0.000147, "loss": 2.8235, "step": 249 }, { "epoch": 0.03913587977457733, "grad_norm": 3.872973680496216, "learning_rate": 0.00014759999999999998, "loss": 2.7703, "step": 250 }, { "epoch": 0.03929242329367564, "grad_norm": 4.677459716796875, "learning_rate": 0.0001482, "loss": 3.046, "step": 251 }, { "epoch": 0.039448966812773954, "grad_norm": 3.410970449447632, "learning_rate": 0.00014879999999999998, "loss": 3.038, "step": 252 }, { "epoch": 0.03960551033187226, "grad_norm": 6.155794143676758, "learning_rate": 0.0001494, "loss": 3.2031, "step": 253 }, { "epoch": 0.03976205385097057, "grad_norm": 2.345693588256836, "learning_rate": 0.00015, "loss": 2.9807, "step": 254 }, { "epoch": 0.03991859737006888, "grad_norm": 1.6279584169387817, "learning_rate": 0.00015059999999999997, "loss": 2.9993, "step": 255 }, { "epoch": 0.040075140889167186, "grad_norm": 1.0136637687683105, "learning_rate": 0.0001512, "loss": 2.9005, "step": 256 }, { "epoch": 0.040231684408265495, "grad_norm": 1.4269553422927856, "learning_rate": 0.00015179999999999998, "loss": 2.9279, "step": 257 }, { "epoch": 0.04038822792736381, "grad_norm": 1.676174283027649, "learning_rate": 0.0001524, "loss": 2.8742, "step": 258 }, { "epoch": 0.04054477144646212, "grad_norm": 9.41197395324707, "learning_rate": 0.00015299999999999998, "loss": 3.4308, "step": 259 }, { "epoch": 0.040701314965560426, "grad_norm": 3.7625269889831543, "learning_rate": 0.0001536, "loss": 2.9597, "step": 260 }, { "epoch": 0.040857858484658734, "grad_norm": 1.9438997507095337, "learning_rate": 0.00015419999999999998, "loss": 2.9289, "step": 261 }, { "epoch": 0.04101440200375704, "grad_norm": 1.4095935821533203, "learning_rate": 0.0001548, "loss": 2.8869, "step": 262 }, { "epoch": 0.04117094552285536, "grad_norm": 1.280892252922058, "learning_rate": 0.00015539999999999998, "loss": 2.9053, "step": 263 }, { "epoch": 0.041327489041953665, "grad_norm": 0.9870156049728394, "learning_rate": 0.000156, "loss": 2.8905, "step": 264 }, { "epoch": 0.04148403256105197, "grad_norm": 2.140601634979248, "learning_rate": 0.00015659999999999998, "loss": 2.9066, "step": 265 }, { "epoch": 0.04164057608015028, "grad_norm": 3.046438217163086, "learning_rate": 0.0001572, "loss": 2.9958, "step": 266 }, { "epoch": 0.04179711959924859, "grad_norm": 6.271746635437012, "learning_rate": 0.0001578, "loss": 3.2312, "step": 267 }, { "epoch": 0.041953663118346904, "grad_norm": 0.49755653738975525, "learning_rate": 0.0001584, "loss": 2.928, "step": 268 }, { "epoch": 0.04211020663744521, "grad_norm": 1.545334815979004, "learning_rate": 0.000159, "loss": 2.9341, "step": 269 }, { "epoch": 0.04226675015654352, "grad_norm": 0.7185292840003967, "learning_rate": 0.0001596, "loss": 2.9309, "step": 270 }, { "epoch": 0.04242329367564183, "grad_norm": 5.970613479614258, "learning_rate": 0.0001602, "loss": 3.1542, "step": 271 }, { "epoch": 0.042579837194740136, "grad_norm": 3.6000194549560547, "learning_rate": 0.0001608, "loss": 3.0439, "step": 272 }, { "epoch": 0.042736380713838444, "grad_norm": 3.2971136569976807, "learning_rate": 0.0001614, "loss": 2.982, "step": 273 }, { "epoch": 0.04289292423293676, "grad_norm": 1.305440068244934, "learning_rate": 0.000162, "loss": 3.0709, "step": 274 }, { "epoch": 0.04304946775203507, "grad_norm": 3.8561184406280518, "learning_rate": 0.0001626, "loss": 3.0722, "step": 275 }, { "epoch": 0.043206011271133375, "grad_norm": 4.944895267486572, "learning_rate": 0.0001632, "loss": 2.9751, "step": 276 }, { "epoch": 0.04336255479023168, "grad_norm": 5.420694351196289, "learning_rate": 0.0001638, "loss": 3.1554, "step": 277 }, { "epoch": 0.04351909830932999, "grad_norm": 1.4312067031860352, "learning_rate": 0.0001644, "loss": 3.1093, "step": 278 }, { "epoch": 0.043675641828428306, "grad_norm": 1.852827548980713, "learning_rate": 0.000165, "loss": 3.016, "step": 279 }, { "epoch": 0.043832185347526614, "grad_norm": 1.318656325340271, "learning_rate": 0.0001656, "loss": 2.9848, "step": 280 }, { "epoch": 0.04398872886662492, "grad_norm": 2.9079530239105225, "learning_rate": 0.0001662, "loss": 2.9815, "step": 281 }, { "epoch": 0.04414527238572323, "grad_norm": 2.3865318298339844, "learning_rate": 0.0001668, "loss": 2.9969, "step": 282 }, { "epoch": 0.04430181590482154, "grad_norm": 2.2731151580810547, "learning_rate": 0.0001674, "loss": 3.042, "step": 283 }, { "epoch": 0.044458359423919847, "grad_norm": 2.308046340942383, "learning_rate": 0.000168, "loss": 2.9983, "step": 284 }, { "epoch": 0.04461490294301816, "grad_norm": 2.4324429035186768, "learning_rate": 0.0001686, "loss": 3.0874, "step": 285 }, { "epoch": 0.04477144646211647, "grad_norm": 5.1173505783081055, "learning_rate": 0.00016919999999999997, "loss": 3.002, "step": 286 }, { "epoch": 0.04492798998121478, "grad_norm": 3.3832643032073975, "learning_rate": 0.00016979999999999998, "loss": 3.1237, "step": 287 }, { "epoch": 0.045084533500313086, "grad_norm": 1.6564899682998657, "learning_rate": 0.00017039999999999997, "loss": 3.1004, "step": 288 }, { "epoch": 0.045241077019411394, "grad_norm": 1.8179957866668701, "learning_rate": 0.00017099999999999998, "loss": 3.0704, "step": 289 }, { "epoch": 0.04539762053850971, "grad_norm": 1.3986287117004395, "learning_rate": 0.00017159999999999997, "loss": 3.088, "step": 290 }, { "epoch": 0.04555416405760802, "grad_norm": 1.6225603818893433, "learning_rate": 0.00017219999999999998, "loss": 2.9509, "step": 291 }, { "epoch": 0.045710707576706325, "grad_norm": 1.9594841003417969, "learning_rate": 0.00017279999999999997, "loss": 2.8125, "step": 292 }, { "epoch": 0.04586725109580463, "grad_norm": 2.098877191543579, "learning_rate": 0.00017339999999999996, "loss": 2.982, "step": 293 }, { "epoch": 0.04602379461490294, "grad_norm": 2.2931160926818848, "learning_rate": 0.00017399999999999997, "loss": 2.8949, "step": 294 }, { "epoch": 0.04618033813400125, "grad_norm": 1.8731071949005127, "learning_rate": 0.00017459999999999996, "loss": 3.0056, "step": 295 }, { "epoch": 0.046336881653099564, "grad_norm": 1.8290430307388306, "learning_rate": 0.00017519999999999998, "loss": 2.8271, "step": 296 }, { "epoch": 0.04649342517219787, "grad_norm": 2.112307548522949, "learning_rate": 0.00017579999999999996, "loss": 2.5751, "step": 297 }, { "epoch": 0.04664996869129618, "grad_norm": 1.703934907913208, "learning_rate": 0.00017639999999999998, "loss": 2.5481, "step": 298 }, { "epoch": 0.04680651221039449, "grad_norm": 2.3497684001922607, "learning_rate": 0.00017699999999999997, "loss": 2.3719, "step": 299 }, { "epoch": 0.046963055729492796, "grad_norm": 2.5405707359313965, "learning_rate": 0.00017759999999999998, "loss": 2.6147, "step": 300 }, { "epoch": 0.04711959924859111, "grad_norm": 41.07537841796875, "learning_rate": 0.00017819999999999997, "loss": 5.9212, "step": 301 }, { "epoch": 0.04727614276768942, "grad_norm": 3.55108904838562, "learning_rate": 0.00017879999999999998, "loss": 2.9271, "step": 302 }, { "epoch": 0.04743268628678773, "grad_norm": 16.451351165771484, "learning_rate": 0.00017939999999999997, "loss": 3.9858, "step": 303 }, { "epoch": 0.047589229805886035, "grad_norm": 1.0126781463623047, "learning_rate": 0.00017999999999999998, "loss": 2.9336, "step": 304 }, { "epoch": 0.04774577332498434, "grad_norm": 1.0213969945907593, "learning_rate": 0.00018059999999999997, "loss": 2.8967, "step": 305 }, { "epoch": 0.04790231684408266, "grad_norm": 2.1471173763275146, "learning_rate": 0.00018119999999999999, "loss": 2.8706, "step": 306 }, { "epoch": 0.048058860363180966, "grad_norm": 1.895684003829956, "learning_rate": 0.00018179999999999997, "loss": 2.9031, "step": 307 }, { "epoch": 0.048215403882279274, "grad_norm": 2.477410316467285, "learning_rate": 0.0001824, "loss": 2.9365, "step": 308 }, { "epoch": 0.04837194740137758, "grad_norm": 1.3221207857131958, "learning_rate": 0.00018299999999999998, "loss": 2.8607, "step": 309 }, { "epoch": 0.04852849092047589, "grad_norm": 1.6282726526260376, "learning_rate": 0.0001836, "loss": 2.9193, "step": 310 }, { "epoch": 0.0486850344395742, "grad_norm": 0.8280702829360962, "learning_rate": 0.00018419999999999998, "loss": 2.8905, "step": 311 }, { "epoch": 0.048841577958672514, "grad_norm": 1.868796706199646, "learning_rate": 0.0001848, "loss": 2.8843, "step": 312 }, { "epoch": 0.04899812147777082, "grad_norm": 2.179255723953247, "learning_rate": 0.00018539999999999998, "loss": 2.9584, "step": 313 }, { "epoch": 0.04915466499686913, "grad_norm": 1.0927103757858276, "learning_rate": 0.000186, "loss": 2.8541, "step": 314 }, { "epoch": 0.04931120851596744, "grad_norm": 1.2639646530151367, "learning_rate": 0.00018659999999999998, "loss": 2.8738, "step": 315 }, { "epoch": 0.049467752035065746, "grad_norm": 0.666633665561676, "learning_rate": 0.0001872, "loss": 2.9175, "step": 316 }, { "epoch": 0.04962429555416406, "grad_norm": 3.4493837356567383, "learning_rate": 0.00018779999999999998, "loss": 2.9048, "step": 317 }, { "epoch": 0.04978083907326237, "grad_norm": 0.9418443441390991, "learning_rate": 0.00018839999999999997, "loss": 2.8725, "step": 318 }, { "epoch": 0.04993738259236068, "grad_norm": 1.8465272188186646, "learning_rate": 0.00018899999999999999, "loss": 2.8598, "step": 319 }, { "epoch": 0.050093926111458985, "grad_norm": 1.2343814373016357, "learning_rate": 0.00018959999999999997, "loss": 2.9232, "step": 320 }, { "epoch": 0.05025046963055729, "grad_norm": 1.1628434658050537, "learning_rate": 0.0001902, "loss": 2.9062, "step": 321 }, { "epoch": 0.0504070131496556, "grad_norm": 3.26182222366333, "learning_rate": 0.00019079999999999998, "loss": 2.993, "step": 322 }, { "epoch": 0.050563556668753916, "grad_norm": 2.6465306282043457, "learning_rate": 0.0001914, "loss": 2.9229, "step": 323 }, { "epoch": 0.050720100187852224, "grad_norm": 1.6786047220230103, "learning_rate": 0.00019199999999999998, "loss": 2.9405, "step": 324 }, { "epoch": 0.05087664370695053, "grad_norm": 2.313570022583008, "learning_rate": 0.0001926, "loss": 2.8359, "step": 325 }, { "epoch": 0.05103318722604884, "grad_norm": 1.3497850894927979, "learning_rate": 0.00019319999999999998, "loss": 2.9089, "step": 326 }, { "epoch": 0.05118973074514715, "grad_norm": 0.9331809282302856, "learning_rate": 0.0001938, "loss": 2.8491, "step": 327 }, { "epoch": 0.05134627426424546, "grad_norm": 1.8141287565231323, "learning_rate": 0.00019439999999999998, "loss": 2.897, "step": 328 }, { "epoch": 0.05150281778334377, "grad_norm": 1.0781328678131104, "learning_rate": 0.000195, "loss": 2.8376, "step": 329 }, { "epoch": 0.05165936130244208, "grad_norm": 0.9681340456008911, "learning_rate": 0.00019559999999999998, "loss": 2.8983, "step": 330 }, { "epoch": 0.05181590482154039, "grad_norm": 4.107593059539795, "learning_rate": 0.0001962, "loss": 2.8991, "step": 331 }, { "epoch": 0.051972448340638695, "grad_norm": 2.0109505653381348, "learning_rate": 0.00019679999999999999, "loss": 2.8443, "step": 332 }, { "epoch": 0.05212899185973701, "grad_norm": 3.9468677043914795, "learning_rate": 0.0001974, "loss": 2.8434, "step": 333 }, { "epoch": 0.05228553537883532, "grad_norm": 1.277677059173584, "learning_rate": 0.000198, "loss": 2.7809, "step": 334 }, { "epoch": 0.052442078897933626, "grad_norm": 1.9392768144607544, "learning_rate": 0.0001986, "loss": 2.9161, "step": 335 }, { "epoch": 0.052598622417031934, "grad_norm": 3.3220739364624023, "learning_rate": 0.0001992, "loss": 2.7724, "step": 336 }, { "epoch": 0.05275516593613024, "grad_norm": 2.631103754043579, "learning_rate": 0.0001998, "loss": 2.8016, "step": 337 }, { "epoch": 0.05291170945522855, "grad_norm": 2.869077444076538, "learning_rate": 0.0002004, "loss": 2.8939, "step": 338 }, { "epoch": 0.053068252974326866, "grad_norm": 1.6763360500335693, "learning_rate": 0.000201, "loss": 2.7951, "step": 339 }, { "epoch": 0.053224796493425174, "grad_norm": 1.35333251953125, "learning_rate": 0.0002016, "loss": 2.6876, "step": 340 }, { "epoch": 0.05338134001252348, "grad_norm": 1.591059684753418, "learning_rate": 0.0002022, "loss": 2.7753, "step": 341 }, { "epoch": 0.05353788353162179, "grad_norm": 2.1885745525360107, "learning_rate": 0.0002028, "loss": 2.9564, "step": 342 }, { "epoch": 0.0536944270507201, "grad_norm": 6.893604755401611, "learning_rate": 0.00020339999999999998, "loss": 3.0515, "step": 343 }, { "epoch": 0.05385097056981841, "grad_norm": 4.528074264526367, "learning_rate": 0.000204, "loss": 2.6812, "step": 344 }, { "epoch": 0.05400751408891672, "grad_norm": 1.5735338926315308, "learning_rate": 0.00020459999999999999, "loss": 2.6635, "step": 345 }, { "epoch": 0.05416405760801503, "grad_norm": 2.046394109725952, "learning_rate": 0.0002052, "loss": 2.5343, "step": 346 }, { "epoch": 0.05432060112711334, "grad_norm": 1.8369107246398926, "learning_rate": 0.0002058, "loss": 2.4338, "step": 347 }, { "epoch": 0.054477144646211645, "grad_norm": 1.8167654275894165, "learning_rate": 0.00020639999999999998, "loss": 2.2824, "step": 348 }, { "epoch": 0.05463368816530995, "grad_norm": 3.679330825805664, "learning_rate": 0.00020699999999999996, "loss": 2.6324, "step": 349 }, { "epoch": 0.05479023168440827, "grad_norm": 3.2513327598571777, "learning_rate": 0.00020759999999999998, "loss": 2.2228, "step": 350 }, { "epoch": 0.054946775203506576, "grad_norm": 3.872000217437744, "learning_rate": 0.00020819999999999996, "loss": 2.9826, "step": 351 }, { "epoch": 0.055103318722604884, "grad_norm": 4.465301036834717, "learning_rate": 0.00020879999999999998, "loss": 3.3657, "step": 352 }, { "epoch": 0.05525986224170319, "grad_norm": 7.596004486083984, "learning_rate": 0.00020939999999999997, "loss": 3.4431, "step": 353 }, { "epoch": 0.0554164057608015, "grad_norm": 0.8127701282501221, "learning_rate": 0.00020999999999999998, "loss": 2.8607, "step": 354 }, { "epoch": 0.055572949279899815, "grad_norm": 3.6727442741394043, "learning_rate": 0.00021059999999999997, "loss": 2.9144, "step": 355 }, { "epoch": 0.05572949279899812, "grad_norm": 3.5418031215667725, "learning_rate": 0.00021119999999999996, "loss": 2.9496, "step": 356 }, { "epoch": 0.05588603631809643, "grad_norm": 0.981769323348999, "learning_rate": 0.00021179999999999997, "loss": 2.8433, "step": 357 }, { "epoch": 0.05604257983719474, "grad_norm": 0.742219090461731, "learning_rate": 0.00021239999999999996, "loss": 2.8561, "step": 358 }, { "epoch": 0.05619912335629305, "grad_norm": 14.127908706665039, "learning_rate": 0.00021299999999999997, "loss": 3.9582, "step": 359 }, { "epoch": 0.056355666875391355, "grad_norm": 1.440157175064087, "learning_rate": 0.00021359999999999996, "loss": 2.9154, "step": 360 }, { "epoch": 0.05651221039448967, "grad_norm": 12.539861679077148, "learning_rate": 0.00021419999999999998, "loss": 3.58, "step": 361 }, { "epoch": 0.05666875391358798, "grad_norm": 0.9830080270767212, "learning_rate": 0.00021479999999999996, "loss": 3.0251, "step": 362 }, { "epoch": 0.056825297432686286, "grad_norm": 0.5790128707885742, "learning_rate": 0.00021539999999999998, "loss": 2.8486, "step": 363 }, { "epoch": 0.056981840951784594, "grad_norm": 4.547713279724121, "learning_rate": 0.00021599999999999996, "loss": 3.0627, "step": 364 }, { "epoch": 0.0571383844708829, "grad_norm": 1.8868635892868042, "learning_rate": 0.00021659999999999998, "loss": 2.8365, "step": 365 }, { "epoch": 0.05729492798998122, "grad_norm": 1.4860725402832031, "learning_rate": 0.00021719999999999997, "loss": 2.8774, "step": 366 }, { "epoch": 0.057451471509079526, "grad_norm": 1.064705729484558, "learning_rate": 0.00021779999999999998, "loss": 2.9113, "step": 367 }, { "epoch": 0.057608015028177834, "grad_norm": 1.752041220664978, "learning_rate": 0.00021839999999999997, "loss": 2.9108, "step": 368 }, { "epoch": 0.05776455854727614, "grad_norm": 0.8162034153938293, "learning_rate": 0.00021899999999999998, "loss": 2.8598, "step": 369 }, { "epoch": 0.05792110206637445, "grad_norm": 0.7272463440895081, "learning_rate": 0.00021959999999999997, "loss": 2.856, "step": 370 }, { "epoch": 0.058077645585472765, "grad_norm": 0.9358817338943481, "learning_rate": 0.00022019999999999999, "loss": 2.8943, "step": 371 }, { "epoch": 0.05823418910457107, "grad_norm": 0.799842119216919, "learning_rate": 0.00022079999999999997, "loss": 2.822, "step": 372 }, { "epoch": 0.05839073262366938, "grad_norm": 2.1993589401245117, "learning_rate": 0.0002214, "loss": 2.9312, "step": 373 }, { "epoch": 0.05854727614276769, "grad_norm": 7.407040596008301, "learning_rate": 0.00022199999999999998, "loss": 3.2528, "step": 374 }, { "epoch": 0.058703819661866, "grad_norm": 0.9808345437049866, "learning_rate": 0.0002226, "loss": 2.8877, "step": 375 }, { "epoch": 0.058860363180964305, "grad_norm": 1.277418613433838, "learning_rate": 0.00022319999999999998, "loss": 2.779, "step": 376 }, { "epoch": 0.05901690670006262, "grad_norm": 1.093755841255188, "learning_rate": 0.0002238, "loss": 2.8769, "step": 377 }, { "epoch": 0.05917345021916093, "grad_norm": 2.1104671955108643, "learning_rate": 0.00022439999999999998, "loss": 2.8662, "step": 378 }, { "epoch": 0.059329993738259236, "grad_norm": 1.0569154024124146, "learning_rate": 0.000225, "loss": 2.8349, "step": 379 }, { "epoch": 0.059486537257357544, "grad_norm": 1.0086591243743896, "learning_rate": 0.00022559999999999998, "loss": 2.8008, "step": 380 }, { "epoch": 0.05964308077645585, "grad_norm": 2.0983245372772217, "learning_rate": 0.00022619999999999997, "loss": 2.7307, "step": 381 }, { "epoch": 0.05979962429555417, "grad_norm": 1.8184118270874023, "learning_rate": 0.00022679999999999998, "loss": 2.8152, "step": 382 }, { "epoch": 0.059956167814652475, "grad_norm": 1.5371543169021606, "learning_rate": 0.00022739999999999997, "loss": 2.6611, "step": 383 }, { "epoch": 0.06011271133375078, "grad_norm": 2.030109405517578, "learning_rate": 0.00022799999999999999, "loss": 2.9117, "step": 384 }, { "epoch": 0.06026925485284909, "grad_norm": 1.1472651958465576, "learning_rate": 0.00022859999999999997, "loss": 2.7593, "step": 385 }, { "epoch": 0.0604257983719474, "grad_norm": 2.547860622406006, "learning_rate": 0.0002292, "loss": 2.9494, "step": 386 }, { "epoch": 0.06058234189104571, "grad_norm": 4.287590980529785, "learning_rate": 0.00022979999999999997, "loss": 2.7367, "step": 387 }, { "epoch": 0.06073888541014402, "grad_norm": 2.5711324214935303, "learning_rate": 0.0002304, "loss": 2.8195, "step": 388 }, { "epoch": 0.06089542892924233, "grad_norm": 1.1869808435440063, "learning_rate": 0.00023099999999999998, "loss": 2.6613, "step": 389 }, { "epoch": 0.06105197244834064, "grad_norm": 2.388160467147827, "learning_rate": 0.0002316, "loss": 2.7794, "step": 390 }, { "epoch": 0.061208515967438946, "grad_norm": 2.665323257446289, "learning_rate": 0.00023219999999999998, "loss": 2.9327, "step": 391 }, { "epoch": 0.061365059486537255, "grad_norm": 1.2686117887496948, "learning_rate": 0.0002328, "loss": 2.7387, "step": 392 }, { "epoch": 0.06152160300563557, "grad_norm": 2.919185161590576, "learning_rate": 0.00023339999999999998, "loss": 2.6176, "step": 393 }, { "epoch": 0.06167814652473388, "grad_norm": 1.6040527820587158, "learning_rate": 0.000234, "loss": 2.4814, "step": 394 }, { "epoch": 0.061834690043832186, "grad_norm": 1.9547075033187866, "learning_rate": 0.00023459999999999998, "loss": 2.3291, "step": 395 }, { "epoch": 0.061991233562930494, "grad_norm": 2.361668109893799, "learning_rate": 0.0002352, "loss": 2.3533, "step": 396 }, { "epoch": 0.0621477770820288, "grad_norm": 3.003882646560669, "learning_rate": 0.00023579999999999999, "loss": 2.1218, "step": 397 }, { "epoch": 0.06230432060112712, "grad_norm": 2.0260565280914307, "learning_rate": 0.0002364, "loss": 2.3472, "step": 398 }, { "epoch": 0.062460864120225425, "grad_norm": 1.2082924842834473, "learning_rate": 0.000237, "loss": 2.2098, "step": 399 }, { "epoch": 0.06261740763932373, "grad_norm": 2.548179864883423, "learning_rate": 0.0002376, "loss": 2.006, "step": 400 }, { "epoch": 0.06277395115842205, "grad_norm": 25.11155128479004, "learning_rate": 0.0002382, "loss": 5.0331, "step": 401 }, { "epoch": 0.06293049467752035, "grad_norm": 7.020974159240723, "learning_rate": 0.0002388, "loss": 3.1208, "step": 402 }, { "epoch": 0.06308703819661866, "grad_norm": 5.667887210845947, "learning_rate": 0.0002394, "loss": 3.0382, "step": 403 }, { "epoch": 0.06324358171571696, "grad_norm": 1.9799600839614868, "learning_rate": 0.00023999999999999998, "loss": 2.8571, "step": 404 }, { "epoch": 0.06340012523481528, "grad_norm": 2.0452182292938232, "learning_rate": 0.0002406, "loss": 2.8546, "step": 405 }, { "epoch": 0.06355666875391358, "grad_norm": 3.450450897216797, "learning_rate": 0.00024119999999999998, "loss": 2.9338, "step": 406 }, { "epoch": 0.0637132122730119, "grad_norm": 3.7206132411956787, "learning_rate": 0.0002418, "loss": 2.8975, "step": 407 }, { "epoch": 0.06386975579211021, "grad_norm": 2.4928946495056152, "learning_rate": 0.00024239999999999998, "loss": 2.8855, "step": 408 }, { "epoch": 0.06402629931120851, "grad_norm": 1.6292774677276611, "learning_rate": 0.000243, "loss": 2.8278, "step": 409 }, { "epoch": 0.06418284283030683, "grad_norm": 1.6044974327087402, "learning_rate": 0.00024359999999999999, "loss": 2.8089, "step": 410 }, { "epoch": 0.06433938634940513, "grad_norm": 3.7000677585601807, "learning_rate": 0.00024419999999999997, "loss": 2.97, "step": 411 }, { "epoch": 0.06449592986850344, "grad_norm": 12.418329238891602, "learning_rate": 0.0002448, "loss": 3.5765, "step": 412 }, { "epoch": 0.06465247338760176, "grad_norm": 3.708932638168335, "learning_rate": 0.00024539999999999995, "loss": 2.889, "step": 413 }, { "epoch": 0.06480901690670006, "grad_norm": 2.5494682788848877, "learning_rate": 0.00024599999999999996, "loss": 2.8552, "step": 414 }, { "epoch": 0.06496556042579837, "grad_norm": 0.5013105273246765, "learning_rate": 0.0002466, "loss": 2.8204, "step": 415 }, { "epoch": 0.06512210394489668, "grad_norm": 0.6971216201782227, "learning_rate": 0.0002472, "loss": 2.7921, "step": 416 }, { "epoch": 0.06527864746399499, "grad_norm": 1.244098424911499, "learning_rate": 0.00024779999999999995, "loss": 2.8253, "step": 417 }, { "epoch": 0.0654351909830933, "grad_norm": 1.136309027671814, "learning_rate": 0.00024839999999999997, "loss": 2.7789, "step": 418 }, { "epoch": 0.0655917345021916, "grad_norm": 1.103389859199524, "learning_rate": 0.000249, "loss": 2.7981, "step": 419 }, { "epoch": 0.06574827802128992, "grad_norm": 0.6065533757209778, "learning_rate": 0.00024959999999999994, "loss": 2.777, "step": 420 }, { "epoch": 0.06590482154038822, "grad_norm": 1.2943936586380005, "learning_rate": 0.00025019999999999996, "loss": 2.779, "step": 421 }, { "epoch": 0.06606136505948654, "grad_norm": 1.5812445878982544, "learning_rate": 0.00025079999999999997, "loss": 2.7543, "step": 422 }, { "epoch": 0.06621790857858485, "grad_norm": 0.6058809757232666, "learning_rate": 0.0002514, "loss": 2.7617, "step": 423 }, { "epoch": 0.06637445209768315, "grad_norm": 0.6701329946517944, "learning_rate": 0.00025199999999999995, "loss": 2.776, "step": 424 }, { "epoch": 0.06653099561678147, "grad_norm": 1.4818625450134277, "learning_rate": 0.00025259999999999996, "loss": 2.8081, "step": 425 }, { "epoch": 0.06668753913587977, "grad_norm": 0.9696059226989746, "learning_rate": 0.0002532, "loss": 2.7473, "step": 426 }, { "epoch": 0.06684408265497808, "grad_norm": 0.6371885538101196, "learning_rate": 0.0002538, "loss": 2.7107, "step": 427 }, { "epoch": 0.0670006261740764, "grad_norm": 2.670564889907837, "learning_rate": 0.00025439999999999995, "loss": 2.7509, "step": 428 }, { "epoch": 0.0671571696931747, "grad_norm": 1.7934705018997192, "learning_rate": 0.00025499999999999996, "loss": 2.5508, "step": 429 }, { "epoch": 0.06731371321227302, "grad_norm": 1.4174633026123047, "learning_rate": 0.0002556, "loss": 2.7541, "step": 430 }, { "epoch": 0.06747025673137132, "grad_norm": 1.3677154779434204, "learning_rate": 0.0002562, "loss": 2.6337, "step": 431 }, { "epoch": 0.06762680025046963, "grad_norm": 3.6003217697143555, "learning_rate": 0.00025679999999999995, "loss": 2.6692, "step": 432 }, { "epoch": 0.06778334376956793, "grad_norm": 1.2599115371704102, "learning_rate": 0.00025739999999999997, "loss": 2.5543, "step": 433 }, { "epoch": 0.06793988728866625, "grad_norm": 0.8455353379249573, "learning_rate": 0.000258, "loss": 2.6359, "step": 434 }, { "epoch": 0.06809643080776456, "grad_norm": 0.9858604073524475, "learning_rate": 0.0002586, "loss": 2.7305, "step": 435 }, { "epoch": 0.06825297432686286, "grad_norm": 1.948320984840393, "learning_rate": 0.00025919999999999996, "loss": 2.7262, "step": 436 }, { "epoch": 0.06840951784596118, "grad_norm": 1.0108580589294434, "learning_rate": 0.00025979999999999997, "loss": 2.7447, "step": 437 }, { "epoch": 0.06856606136505948, "grad_norm": 1.686102032661438, "learning_rate": 0.0002604, "loss": 2.6928, "step": 438 }, { "epoch": 0.0687226048841578, "grad_norm": 1.4326525926589966, "learning_rate": 0.000261, "loss": 2.4659, "step": 439 }, { "epoch": 0.06887914840325611, "grad_norm": NaN, "learning_rate": 0.000261, "loss": 0.0, "step": 440 }, { "epoch": 0.06903569192235441, "grad_norm": 3.392507791519165, "learning_rate": 0.00026159999999999996, "loss": 2.502, "step": 441 }, { "epoch": 0.06919223544145273, "grad_norm": 1.325119137763977, "learning_rate": 0.0002622, "loss": 2.3608, "step": 442 }, { "epoch": 0.06934877896055103, "grad_norm": 2.0018181800842285, "learning_rate": 0.0002628, "loss": 2.7061, "step": 443 }, { "epoch": 0.06950532247964934, "grad_norm": 2.351816415786743, "learning_rate": 0.00026339999999999995, "loss": 2.0728, "step": 444 }, { "epoch": 0.06966186599874766, "grad_norm": 2.392810583114624, "learning_rate": 0.00026399999999999997, "loss": 2.4649, "step": 445 }, { "epoch": 0.06981840951784596, "grad_norm": 2.2967441082000732, "learning_rate": 0.0002646, "loss": 1.9439, "step": 446 }, { "epoch": 0.06997495303694427, "grad_norm": 1.640772819519043, "learning_rate": 0.0002652, "loss": 2.0041, "step": 447 }, { "epoch": 0.07013149655604257, "grad_norm": 2.1847622394561768, "learning_rate": 0.00026579999999999996, "loss": 1.9843, "step": 448 }, { "epoch": 0.07028804007514089, "grad_norm": 1.9607410430908203, "learning_rate": 0.00026639999999999997, "loss": 2.0595, "step": 449 }, { "epoch": 0.0704445835942392, "grad_norm": 1.476098656654358, "learning_rate": 0.000267, "loss": 1.9382, "step": 450 }, { "epoch": 0.0706011271133375, "grad_norm": 5.004290580749512, "learning_rate": 0.0002676, "loss": 3.0205, "step": 451 }, { "epoch": 0.07075767063243582, "grad_norm": 11.529528617858887, "learning_rate": 0.00026819999999999996, "loss": 3.5319, "step": 452 }, { "epoch": 0.07091421415153412, "grad_norm": 1.561353325843811, "learning_rate": 0.0002688, "loss": 2.7755, "step": 453 }, { "epoch": 0.07107075767063244, "grad_norm": 1.1581677198410034, "learning_rate": 0.0002694, "loss": 2.7657, "step": 454 }, { "epoch": 0.07122730118973075, "grad_norm": 0.837934136390686, "learning_rate": 0.00027, "loss": 2.6956, "step": 455 }, { "epoch": 0.07138384470882905, "grad_norm": 1.2540568113327026, "learning_rate": 0.00027059999999999996, "loss": 2.5976, "step": 456 }, { "epoch": 0.07154038822792737, "grad_norm": 1.265945315361023, "learning_rate": 0.0002712, "loss": 2.5529, "step": 457 }, { "epoch": 0.07169693174702567, "grad_norm": 0.9604879021644592, "learning_rate": 0.0002718, "loss": 2.5779, "step": 458 }, { "epoch": 0.07185347526612398, "grad_norm": 0.6527476906776428, "learning_rate": 0.0002724, "loss": 2.6235, "step": 459 }, { "epoch": 0.07201001878522229, "grad_norm": 0.6187277436256409, "learning_rate": 0.00027299999999999997, "loss": 2.4643, "step": 460 }, { "epoch": 0.0721665623043206, "grad_norm": 0.8495888710021973, "learning_rate": 0.0002736, "loss": 2.4603, "step": 461 }, { "epoch": 0.07232310582341892, "grad_norm": 0.812981128692627, "learning_rate": 0.0002742, "loss": 2.4654, "step": 462 }, { "epoch": 0.07247964934251722, "grad_norm": 3.680403709411621, "learning_rate": 0.0002748, "loss": 2.7148, "step": 463 }, { "epoch": 0.07263619286161553, "grad_norm": 0.837788462638855, "learning_rate": 0.00027539999999999997, "loss": 2.4459, "step": 464 }, { "epoch": 0.07279273638071383, "grad_norm": 0.9086034893989563, "learning_rate": 0.000276, "loss": 2.3291, "step": 465 }, { "epoch": 0.07294927989981215, "grad_norm": 1.12948739528656, "learning_rate": 0.0002766, "loss": 2.4085, "step": 466 }, { "epoch": 0.07310582341891046, "grad_norm": 0.9835497140884399, "learning_rate": 0.0002772, "loss": 2.2966, "step": 467 }, { "epoch": 0.07326236693800876, "grad_norm": 2.1482276916503906, "learning_rate": 0.0002778, "loss": 2.3619, "step": 468 }, { "epoch": 0.07341891045710708, "grad_norm": 3.3188507556915283, "learning_rate": 0.0002784, "loss": 2.3791, "step": 469 }, { "epoch": 0.07357545397620538, "grad_norm": 1.2607046365737915, "learning_rate": 0.000279, "loss": 2.3225, "step": 470 }, { "epoch": 0.0737319974953037, "grad_norm": 3.159202814102173, "learning_rate": 0.00027959999999999997, "loss": 2.4383, "step": 471 }, { "epoch": 0.07388854101440201, "grad_norm": 1.8510031700134277, "learning_rate": 0.0002802, "loss": 2.3881, "step": 472 }, { "epoch": 0.07404508453350031, "grad_norm": 1.1104274988174438, "learning_rate": 0.0002808, "loss": 2.2954, "step": 473 }, { "epoch": 0.07420162805259863, "grad_norm": 1.1513651609420776, "learning_rate": 0.00028139999999999996, "loss": 2.2092, "step": 474 }, { "epoch": 0.07435817157169693, "grad_norm": 1.2425847053527832, "learning_rate": 0.00028199999999999997, "loss": 2.2104, "step": 475 }, { "epoch": 0.07451471509079524, "grad_norm": 1.6782479286193848, "learning_rate": 0.0002826, "loss": 2.4311, "step": 476 }, { "epoch": 0.07467125860989356, "grad_norm": 3.411275625228882, "learning_rate": 0.00028319999999999994, "loss": 2.3067, "step": 477 }, { "epoch": 0.07482780212899186, "grad_norm": 1.4562594890594482, "learning_rate": 0.00028379999999999996, "loss": 2.2745, "step": 478 }, { "epoch": 0.07498434564809017, "grad_norm": 1.7229201793670654, "learning_rate": 0.0002844, "loss": 2.2636, "step": 479 }, { "epoch": 0.07514088916718847, "grad_norm": 1.804953932762146, "learning_rate": 0.000285, "loss": 2.2304, "step": 480 }, { "epoch": 0.07529743268628679, "grad_norm": 6.132996559143066, "learning_rate": 0.00028559999999999995, "loss": 2.9409, "step": 481 }, { "epoch": 0.0754539762053851, "grad_norm": 1.6565462350845337, "learning_rate": 0.00028619999999999996, "loss": 2.2299, "step": 482 }, { "epoch": 0.0756105197244834, "grad_norm": 1.50054132938385, "learning_rate": 0.0002868, "loss": 2.3357, "step": 483 }, { "epoch": 0.07576706324358172, "grad_norm": 2.0970003604888916, "learning_rate": 0.00028739999999999994, "loss": 2.4912, "step": 484 }, { "epoch": 0.07592360676268002, "grad_norm": 1.7060633897781372, "learning_rate": 0.00028799999999999995, "loss": 2.2346, "step": 485 }, { "epoch": 0.07608015028177834, "grad_norm": 3.9570698738098145, "learning_rate": 0.00028859999999999997, "loss": 2.2125, "step": 486 }, { "epoch": 0.07623669380087664, "grad_norm": 3.4521429538726807, "learning_rate": 0.0002892, "loss": 2.4241, "step": 487 }, { "epoch": 0.07639323731997495, "grad_norm": 2.9334945678710938, "learning_rate": 0.00028979999999999994, "loss": 2.4198, "step": 488 }, { "epoch": 0.07654978083907327, "grad_norm": 2.5098342895507812, "learning_rate": 0.00029039999999999996, "loss": 2.3674, "step": 489 }, { "epoch": 0.07670632435817157, "grad_norm": 2.8507261276245117, "learning_rate": 0.00029099999999999997, "loss": 2.1336, "step": 490 }, { "epoch": 0.07686286787726988, "grad_norm": 2.2666025161743164, "learning_rate": 0.0002916, "loss": 2.3893, "step": 491 }, { "epoch": 0.07701941139636818, "grad_norm": 2.3513152599334717, "learning_rate": 0.00029219999999999995, "loss": 1.7857, "step": 492 }, { "epoch": 0.0771759549154665, "grad_norm": 2.0074493885040283, "learning_rate": 0.00029279999999999996, "loss": 2.3626, "step": 493 }, { "epoch": 0.07733249843456481, "grad_norm": 1.9822458028793335, "learning_rate": 0.0002934, "loss": 2.0882, "step": 494 }, { "epoch": 0.07748904195366312, "grad_norm": 3.779900074005127, "learning_rate": 0.000294, "loss": 2.1448, "step": 495 }, { "epoch": 0.07764558547276143, "grad_norm": 2.198042869567871, "learning_rate": 0.00029459999999999995, "loss": 1.6344, "step": 496 }, { "epoch": 0.07780212899185973, "grad_norm": 1.7770249843597412, "learning_rate": 0.00029519999999999997, "loss": 1.8025, "step": 497 }, { "epoch": 0.07795867251095805, "grad_norm": 3.967764139175415, "learning_rate": 0.0002958, "loss": 1.6183, "step": 498 }, { "epoch": 0.07811521603005636, "grad_norm": 3.88845157623291, "learning_rate": 0.0002964, "loss": 1.6993, "step": 499 }, { "epoch": 0.07827175954915466, "grad_norm": 4.8480963706970215, "learning_rate": 0.00029699999999999996, "loss": 1.5443, "step": 500 }, { "epoch": 0.07842830306825298, "grad_norm": 1.4311107397079468, "learning_rate": 0.00029759999999999997, "loss": 2.4391, "step": 501 }, { "epoch": 0.07858484658735128, "grad_norm": 1.0577871799468994, "learning_rate": 0.0002982, "loss": 2.1257, "step": 502 }, { "epoch": 0.0787413901064496, "grad_norm": 0.7990356087684631, "learning_rate": 0.0002988, "loss": 2.0302, "step": 503 }, { "epoch": 0.07889793362554791, "grad_norm": 0.9843720197677612, "learning_rate": 0.00029939999999999996, "loss": 2.0466, "step": 504 }, { "epoch": 0.07905447714464621, "grad_norm": 0.8615551590919495, "learning_rate": 0.0003, "loss": 1.7693, "step": 505 }, { "epoch": 0.07921102066374452, "grad_norm": 1.1965469121932983, "learning_rate": 0.0002999755620723362, "loss": 1.7733, "step": 506 }, { "epoch": 0.07936756418284283, "grad_norm": 1.6512717008590698, "learning_rate": 0.0002999511241446725, "loss": 1.8785, "step": 507 }, { "epoch": 0.07952410770194114, "grad_norm": 1.4600224494934082, "learning_rate": 0.0002999266862170088, "loss": 1.807, "step": 508 }, { "epoch": 0.07968065122103946, "grad_norm": 0.969097375869751, "learning_rate": 0.00029990224828934503, "loss": 1.6916, "step": 509 }, { "epoch": 0.07983719474013776, "grad_norm": 10.695720672607422, "learning_rate": 0.0002998778103616813, "loss": 2.2747, "step": 510 }, { "epoch": 0.07999373825923607, "grad_norm": 1.4272046089172363, "learning_rate": 0.0002998533724340176, "loss": 1.7943, "step": 511 }, { "epoch": 0.08015028177833437, "grad_norm": 1.0465346574783325, "learning_rate": 0.00029982893450635384, "loss": 1.6995, "step": 512 }, { "epoch": 0.08030682529743269, "grad_norm": 1.2503383159637451, "learning_rate": 0.0002998044965786901, "loss": 1.6331, "step": 513 }, { "epoch": 0.08046336881653099, "grad_norm": 1.0055828094482422, "learning_rate": 0.00029978005865102634, "loss": 1.593, "step": 514 }, { "epoch": 0.0806199123356293, "grad_norm": 0.7897710800170898, "learning_rate": 0.00029975562072336265, "loss": 1.6474, "step": 515 }, { "epoch": 0.08077645585472762, "grad_norm": 2.2324774265289307, "learning_rate": 0.0002997311827956989, "loss": 1.8418, "step": 516 }, { "epoch": 0.08093299937382592, "grad_norm": 1.3233245611190796, "learning_rate": 0.00029970674486803515, "loss": 1.5869, "step": 517 }, { "epoch": 0.08108954289292424, "grad_norm": 1.2391108274459839, "learning_rate": 0.00029968230694037146, "loss": 1.604, "step": 518 }, { "epoch": 0.08124608641202254, "grad_norm": 1.746536135673523, "learning_rate": 0.0002996578690127077, "loss": 1.7185, "step": 519 }, { "epoch": 0.08140262993112085, "grad_norm": 2.1696672439575195, "learning_rate": 0.00029963343108504396, "loss": 1.7549, "step": 520 }, { "epoch": 0.08155917345021917, "grad_norm": 2.327106237411499, "learning_rate": 0.00029960899315738026, "loss": 1.7399, "step": 521 }, { "epoch": 0.08171571696931747, "grad_norm": 2.274477481842041, "learning_rate": 0.00029958455522971646, "loss": 1.6924, "step": 522 }, { "epoch": 0.08187226048841578, "grad_norm": 2.649080991744995, "learning_rate": 0.00029956011730205277, "loss": 1.7547, "step": 523 }, { "epoch": 0.08202880400751408, "grad_norm": 2.5556795597076416, "learning_rate": 0.000299535679374389, "loss": 1.7752, "step": 524 }, { "epoch": 0.0821853475266124, "grad_norm": 3.2737300395965576, "learning_rate": 0.00029951124144672527, "loss": 2.1346, "step": 525 }, { "epoch": 0.08234189104571071, "grad_norm": 1.695064663887024, "learning_rate": 0.0002994868035190616, "loss": 1.6453, "step": 526 }, { "epoch": 0.08249843456480901, "grad_norm": 9.204750061035156, "learning_rate": 0.0002994623655913978, "loss": 2.3186, "step": 527 }, { "epoch": 0.08265497808390733, "grad_norm": 3.981736660003662, "learning_rate": 0.0002994379276637341, "loss": 2.1661, "step": 528 }, { "epoch": 0.08281152160300563, "grad_norm": 4.276581764221191, "learning_rate": 0.00029941348973607033, "loss": 1.9903, "step": 529 }, { "epoch": 0.08296806512210395, "grad_norm": 3.1463425159454346, "learning_rate": 0.00029938905180840663, "loss": 1.9789, "step": 530 }, { "epoch": 0.08312460864120226, "grad_norm": 2.7212626934051514, "learning_rate": 0.0002993646138807429, "loss": 1.916, "step": 531 }, { "epoch": 0.08328115216030056, "grad_norm": 3.936119794845581, "learning_rate": 0.00029934017595307914, "loss": 2.077, "step": 532 }, { "epoch": 0.08343769567939888, "grad_norm": 3.289414882659912, "learning_rate": 0.00029931573802541544, "loss": 1.8905, "step": 533 }, { "epoch": 0.08359423919849718, "grad_norm": 4.605769157409668, "learning_rate": 0.0002992913000977517, "loss": 1.9716, "step": 534 }, { "epoch": 0.08375078271759549, "grad_norm": 1.6472567319869995, "learning_rate": 0.00029926686217008794, "loss": 1.9615, "step": 535 }, { "epoch": 0.08390732623669381, "grad_norm": 3.2355268001556396, "learning_rate": 0.00029924242424242425, "loss": 1.9301, "step": 536 }, { "epoch": 0.08406386975579211, "grad_norm": 1.935306429862976, "learning_rate": 0.00029921798631476045, "loss": 1.8207, "step": 537 }, { "epoch": 0.08422041327489042, "grad_norm": 2.387748956680298, "learning_rate": 0.00029919354838709675, "loss": 1.9752, "step": 538 }, { "epoch": 0.08437695679398872, "grad_norm": 3.236212730407715, "learning_rate": 0.000299169110459433, "loss": 2.2058, "step": 539 }, { "epoch": 0.08453350031308704, "grad_norm": 3.333184003829956, "learning_rate": 0.00029914467253176926, "loss": 2.0538, "step": 540 }, { "epoch": 0.08469004383218534, "grad_norm": 2.5119690895080566, "learning_rate": 0.00029912023460410556, "loss": 1.9744, "step": 541 }, { "epoch": 0.08484658735128366, "grad_norm": 3.832310438156128, "learning_rate": 0.0002990957966764418, "loss": 1.9949, "step": 542 }, { "epoch": 0.08500313087038197, "grad_norm": 3.179901599884033, "learning_rate": 0.00029907135874877806, "loss": 1.8598, "step": 543 }, { "epoch": 0.08515967438948027, "grad_norm": 4.294993877410889, "learning_rate": 0.00029904692082111437, "loss": 2.1715, "step": 544 }, { "epoch": 0.08531621790857859, "grad_norm": 5.152624607086182, "learning_rate": 0.0002990224828934506, "loss": 2.3794, "step": 545 }, { "epoch": 0.08547276142767689, "grad_norm": 3.927077054977417, "learning_rate": 0.00029899804496578687, "loss": 1.3454, "step": 546 }, { "epoch": 0.0856293049467752, "grad_norm": 2.089223623275757, "learning_rate": 0.0002989736070381231, "loss": 1.8446, "step": 547 }, { "epoch": 0.08578584846587352, "grad_norm": 3.640803813934326, "learning_rate": 0.00029894916911045943, "loss": 1.7771, "step": 548 }, { "epoch": 0.08594239198497182, "grad_norm": 2.4625141620635986, "learning_rate": 0.0002989247311827957, "loss": 1.6782, "step": 549 }, { "epoch": 0.08609893550407013, "grad_norm": 4.571941375732422, "learning_rate": 0.00029890029325513193, "loss": 2.3454, "step": 550 }, { "epoch": 0.08625547902316844, "grad_norm": 1.4779797792434692, "learning_rate": 0.00029887585532746824, "loss": 1.7439, "step": 551 }, { "epoch": 0.08641202254226675, "grad_norm": 1.2959303855895996, "learning_rate": 0.0002988514173998045, "loss": 1.6772, "step": 552 }, { "epoch": 0.08656856606136507, "grad_norm": 1.5447274446487427, "learning_rate": 0.00029882697947214074, "loss": 1.6696, "step": 553 }, { "epoch": 0.08672510958046337, "grad_norm": 1.5669810771942139, "learning_rate": 0.000298802541544477, "loss": 1.4124, "step": 554 }, { "epoch": 0.08688165309956168, "grad_norm": 1.109596848487854, "learning_rate": 0.00029877810361681324, "loss": 1.2168, "step": 555 }, { "epoch": 0.08703819661865998, "grad_norm": 1.1062586307525635, "learning_rate": 0.00029875366568914955, "loss": 1.35, "step": 556 }, { "epoch": 0.0871947401377583, "grad_norm": 1.2158498764038086, "learning_rate": 0.0002987292277614858, "loss": 1.2897, "step": 557 }, { "epoch": 0.08735128365685661, "grad_norm": 1.4337570667266846, "learning_rate": 0.00029870478983382205, "loss": 1.3757, "step": 558 }, { "epoch": 0.08750782717595491, "grad_norm": 1.0046571493148804, "learning_rate": 0.00029868035190615835, "loss": 1.5378, "step": 559 }, { "epoch": 0.08766437069505323, "grad_norm": 1.03334641456604, "learning_rate": 0.0002986559139784946, "loss": 1.2713, "step": 560 }, { "epoch": 0.08782091421415153, "grad_norm": 1.219976782798767, "learning_rate": 0.00029863147605083086, "loss": 1.3718, "step": 561 }, { "epoch": 0.08797745773324984, "grad_norm": 1.4024769067764282, "learning_rate": 0.0002986070381231671, "loss": 1.3139, "step": 562 }, { "epoch": 0.08813400125234815, "grad_norm": 1.8966326713562012, "learning_rate": 0.0002985826001955034, "loss": 1.4525, "step": 563 }, { "epoch": 0.08829054477144646, "grad_norm": 1.385165810585022, "learning_rate": 0.00029855816226783966, "loss": 1.2881, "step": 564 }, { "epoch": 0.08844708829054478, "grad_norm": 1.713485598564148, "learning_rate": 0.0002985337243401759, "loss": 1.1926, "step": 565 }, { "epoch": 0.08860363180964308, "grad_norm": 1.2496473789215088, "learning_rate": 0.0002985092864125122, "loss": 1.3264, "step": 566 }, { "epoch": 0.08876017532874139, "grad_norm": 2.36457896232605, "learning_rate": 0.00029848484848484847, "loss": 1.5377, "step": 567 }, { "epoch": 0.08891671884783969, "grad_norm": 1.7575597763061523, "learning_rate": 0.0002984604105571847, "loss": 1.562, "step": 568 }, { "epoch": 0.08907326236693801, "grad_norm": 1.9847209453582764, "learning_rate": 0.00029843597262952103, "loss": 1.2985, "step": 569 }, { "epoch": 0.08922980588603632, "grad_norm": 1.8547005653381348, "learning_rate": 0.0002984115347018572, "loss": 1.4982, "step": 570 }, { "epoch": 0.08938634940513462, "grad_norm": 1.179679036140442, "learning_rate": 0.00029838709677419353, "loss": 1.3506, "step": 571 }, { "epoch": 0.08954289292423294, "grad_norm": 2.9201154708862305, "learning_rate": 0.0002983626588465298, "loss": 1.6808, "step": 572 }, { "epoch": 0.08969943644333124, "grad_norm": 2.8319106101989746, "learning_rate": 0.00029833822091886603, "loss": 1.6371, "step": 573 }, { "epoch": 0.08985597996242956, "grad_norm": 3.5341687202453613, "learning_rate": 0.00029831378299120234, "loss": 2.353, "step": 574 }, { "epoch": 0.09001252348152787, "grad_norm": 1.8579005002975464, "learning_rate": 0.0002982893450635386, "loss": 1.3278, "step": 575 }, { "epoch": 0.09016906700062617, "grad_norm": 3.4177498817443848, "learning_rate": 0.00029826490713587484, "loss": 1.9979, "step": 576 }, { "epoch": 0.09032561051972449, "grad_norm": 3.2669591903686523, "learning_rate": 0.0002982404692082111, "loss": 1.9626, "step": 577 }, { "epoch": 0.09048215403882279, "grad_norm": 1.9549978971481323, "learning_rate": 0.0002982160312805474, "loss": 1.724, "step": 578 }, { "epoch": 0.0906386975579211, "grad_norm": 1.7460333108901978, "learning_rate": 0.00029819159335288365, "loss": 1.3989, "step": 579 }, { "epoch": 0.09079524107701942, "grad_norm": 3.614212989807129, "learning_rate": 0.0002981671554252199, "loss": 1.8983, "step": 580 }, { "epoch": 0.09095178459611772, "grad_norm": 3.4498374462127686, "learning_rate": 0.0002981427174975562, "loss": 1.606, "step": 581 }, { "epoch": 0.09110832811521603, "grad_norm": 4.136172771453857, "learning_rate": 0.00029811827956989246, "loss": 1.8912, "step": 582 }, { "epoch": 0.09126487163431433, "grad_norm": 3.7499725818634033, "learning_rate": 0.0002980938416422287, "loss": 1.941, "step": 583 }, { "epoch": 0.09142141515341265, "grad_norm": 4.676743507385254, "learning_rate": 0.000298069403714565, "loss": 1.6427, "step": 584 }, { "epoch": 0.09157795867251096, "grad_norm": 2.1480143070220947, "learning_rate": 0.0002980449657869012, "loss": 1.8666, "step": 585 }, { "epoch": 0.09173450219160927, "grad_norm": 2.891366481781006, "learning_rate": 0.0002980205278592375, "loss": 2.0098, "step": 586 }, { "epoch": 0.09189104571070758, "grad_norm": 2.557764768600464, "learning_rate": 0.00029799608993157377, "loss": 1.7633, "step": 587 }, { "epoch": 0.09204758922980588, "grad_norm": 2.8896427154541016, "learning_rate": 0.00029797165200391, "loss": 1.7614, "step": 588 }, { "epoch": 0.0922041327489042, "grad_norm": 2.5814003944396973, "learning_rate": 0.0002979472140762463, "loss": 1.5253, "step": 589 }, { "epoch": 0.0923606762680025, "grad_norm": 8.051748275756836, "learning_rate": 0.0002979227761485826, "loss": 2.0882, "step": 590 }, { "epoch": 0.09251721978710081, "grad_norm": 2.6856775283813477, "learning_rate": 0.00029789833822091883, "loss": 1.7916, "step": 591 }, { "epoch": 0.09267376330619913, "grad_norm": 2.2569031715393066, "learning_rate": 0.00029787390029325513, "loss": 1.8805, "step": 592 }, { "epoch": 0.09283030682529743, "grad_norm": 11.610511779785156, "learning_rate": 0.0002978494623655914, "loss": 1.635, "step": 593 }, { "epoch": 0.09298685034439574, "grad_norm": 2.5398268699645996, "learning_rate": 0.00029782502443792764, "loss": 1.8779, "step": 594 }, { "epoch": 0.09314339386349405, "grad_norm": 2.323976516723633, "learning_rate": 0.0002978005865102639, "loss": 1.9912, "step": 595 }, { "epoch": 0.09329993738259236, "grad_norm": 2.0534749031066895, "learning_rate": 0.0002977761485826002, "loss": 1.0067, "step": 596 }, { "epoch": 0.09345648090169068, "grad_norm": 3.8560521602630615, "learning_rate": 0.00029775171065493644, "loss": 1.5773, "step": 597 }, { "epoch": 0.09361302442078898, "grad_norm": 3.9110920429229736, "learning_rate": 0.0002977272727272727, "loss": 1.7498, "step": 598 }, { "epoch": 0.09376956793988729, "grad_norm": 3.5379350185394287, "learning_rate": 0.000297702834799609, "loss": 1.7439, "step": 599 }, { "epoch": 0.09392611145898559, "grad_norm": 3.4338223934173584, "learning_rate": 0.0002976783968719452, "loss": 1.3979, "step": 600 }, { "epoch": 0.09408265497808391, "grad_norm": 1.8097113370895386, "learning_rate": 0.0002976539589442815, "loss": 1.4324, "step": 601 }, { "epoch": 0.09423919849718222, "grad_norm": 1.1481614112854004, "learning_rate": 0.00029762952101661775, "loss": 1.3141, "step": 602 }, { "epoch": 0.09439574201628052, "grad_norm": 0.7774419784545898, "learning_rate": 0.000297605083088954, "loss": 1.1048, "step": 603 }, { "epoch": 0.09455228553537884, "grad_norm": 1.0336264371871948, "learning_rate": 0.0002975806451612903, "loss": 1.2183, "step": 604 }, { "epoch": 0.09470882905447714, "grad_norm": 0.7348364591598511, "learning_rate": 0.00029755620723362656, "loss": 1.0482, "step": 605 }, { "epoch": 0.09486537257357545, "grad_norm": 0.8298443555831909, "learning_rate": 0.0002975317693059628, "loss": 1.0892, "step": 606 }, { "epoch": 0.09502191609267377, "grad_norm": 0.7758945226669312, "learning_rate": 0.0002975073313782991, "loss": 0.9726, "step": 607 }, { "epoch": 0.09517845961177207, "grad_norm": 1.721008062362671, "learning_rate": 0.00029748289345063537, "loss": 1.0492, "step": 608 }, { "epoch": 0.09533500313087039, "grad_norm": 1.345603585243225, "learning_rate": 0.0002974584555229716, "loss": 1.0549, "step": 609 }, { "epoch": 0.09549154664996869, "grad_norm": 1.4059998989105225, "learning_rate": 0.00029743401759530787, "loss": 1.0506, "step": 610 }, { "epoch": 0.095648090169067, "grad_norm": 1.0252139568328857, "learning_rate": 0.0002974095796676442, "loss": 1.0309, "step": 611 }, { "epoch": 0.09580463368816532, "grad_norm": 1.4364956617355347, "learning_rate": 0.00029738514173998043, "loss": 1.0449, "step": 612 }, { "epoch": 0.09596117720726362, "grad_norm": 1.9643033742904663, "learning_rate": 0.0002973607038123167, "loss": 1.2821, "step": 613 }, { "epoch": 0.09611772072636193, "grad_norm": 1.3634884357452393, "learning_rate": 0.000297336265884653, "loss": 1.2619, "step": 614 }, { "epoch": 0.09627426424546023, "grad_norm": 2.063399314880371, "learning_rate": 0.00029731182795698924, "loss": 1.4142, "step": 615 }, { "epoch": 0.09643080776455855, "grad_norm": 1.287653923034668, "learning_rate": 0.0002972873900293255, "loss": 1.0253, "step": 616 }, { "epoch": 0.09658735128365685, "grad_norm": 1.6113243103027344, "learning_rate": 0.00029726295210166174, "loss": 1.2315, "step": 617 }, { "epoch": 0.09674389480275516, "grad_norm": 1.7345733642578125, "learning_rate": 0.000297238514173998, "loss": 1.1453, "step": 618 }, { "epoch": 0.09690043832185348, "grad_norm": 1.8146443367004395, "learning_rate": 0.0002972140762463343, "loss": 1.0814, "step": 619 }, { "epoch": 0.09705698184095178, "grad_norm": 1.6718642711639404, "learning_rate": 0.00029718963831867055, "loss": 1.4222, "step": 620 }, { "epoch": 0.0972135253600501, "grad_norm": 1.9962431192398071, "learning_rate": 0.0002971652003910068, "loss": 1.5162, "step": 621 }, { "epoch": 0.0973700688791484, "grad_norm": 1.7549268007278442, "learning_rate": 0.0002971407624633431, "loss": 1.5312, "step": 622 }, { "epoch": 0.09752661239824671, "grad_norm": 2.5582468509674072, "learning_rate": 0.00029711632453567936, "loss": 1.5691, "step": 623 }, { "epoch": 0.09768315591734503, "grad_norm": 2.2081313133239746, "learning_rate": 0.0002970918866080156, "loss": 1.1799, "step": 624 }, { "epoch": 0.09783969943644333, "grad_norm": 1.4021954536437988, "learning_rate": 0.00029706744868035186, "loss": 1.1547, "step": 625 }, { "epoch": 0.09799624295554164, "grad_norm": 1.895840048789978, "learning_rate": 0.00029704301075268816, "loss": 1.5232, "step": 626 }, { "epoch": 0.09815278647463994, "grad_norm": 2.2723934650421143, "learning_rate": 0.0002970185728250244, "loss": 1.6906, "step": 627 }, { "epoch": 0.09830932999373826, "grad_norm": 2.5103864669799805, "learning_rate": 0.00029699413489736067, "loss": 1.755, "step": 628 }, { "epoch": 0.09846587351283657, "grad_norm": 1.7477445602416992, "learning_rate": 0.00029696969696969697, "loss": 1.3139, "step": 629 }, { "epoch": 0.09862241703193488, "grad_norm": 2.9243004322052, "learning_rate": 0.0002969452590420332, "loss": 1.5794, "step": 630 }, { "epoch": 0.09877896055103319, "grad_norm": 3.4790265560150146, "learning_rate": 0.0002969208211143695, "loss": 1.5047, "step": 631 }, { "epoch": 0.09893550407013149, "grad_norm": 2.3917460441589355, "learning_rate": 0.0002968963831867058, "loss": 1.9341, "step": 632 }, { "epoch": 0.0990920475892298, "grad_norm": 3.003119707107544, "learning_rate": 0.000296871945259042, "loss": 1.6668, "step": 633 }, { "epoch": 0.09924859110832812, "grad_norm": 1.7967157363891602, "learning_rate": 0.0002968475073313783, "loss": 1.3301, "step": 634 }, { "epoch": 0.09940513462742642, "grad_norm": 9.354352951049805, "learning_rate": 0.00029682306940371453, "loss": 2.0308, "step": 635 }, { "epoch": 0.09956167814652474, "grad_norm": 2.55900502204895, "learning_rate": 0.0002967986314760508, "loss": 2.2328, "step": 636 }, { "epoch": 0.09971822166562304, "grad_norm": 3.0861406326293945, "learning_rate": 0.0002967741935483871, "loss": 1.5009, "step": 637 }, { "epoch": 0.09987476518472135, "grad_norm": 2.7047877311706543, "learning_rate": 0.00029674975562072334, "loss": 1.8303, "step": 638 }, { "epoch": 0.10003130870381967, "grad_norm": 1.7027578353881836, "learning_rate": 0.0002967253176930596, "loss": 1.4659, "step": 639 }, { "epoch": 0.10018785222291797, "grad_norm": 3.0926079750061035, "learning_rate": 0.0002967008797653959, "loss": 1.7596, "step": 640 }, { "epoch": 0.10034439574201628, "grad_norm": 3.1565775871276855, "learning_rate": 0.00029667644183773215, "loss": 2.2201, "step": 641 }, { "epoch": 0.10050093926111459, "grad_norm": 2.1882317066192627, "learning_rate": 0.0002966520039100684, "loss": 1.4988, "step": 642 }, { "epoch": 0.1006574827802129, "grad_norm": 3.5301172733306885, "learning_rate": 0.00029662756598240465, "loss": 1.6186, "step": 643 }, { "epoch": 0.1008140262993112, "grad_norm": 3.971935272216797, "learning_rate": 0.00029660312805474096, "loss": 2.141, "step": 644 }, { "epoch": 0.10097056981840952, "grad_norm": 4.350378513336182, "learning_rate": 0.0002965786901270772, "loss": 1.8005, "step": 645 }, { "epoch": 0.10112711333750783, "grad_norm": 4.860312461853027, "learning_rate": 0.00029655425219941346, "loss": 1.7147, "step": 646 }, { "epoch": 0.10128365685660613, "grad_norm": 4.879358768463135, "learning_rate": 0.00029652981427174977, "loss": 1.8563, "step": 647 }, { "epoch": 0.10144020037570445, "grad_norm": 2.9095821380615234, "learning_rate": 0.00029650537634408596, "loss": 1.569, "step": 648 }, { "epoch": 0.10159674389480275, "grad_norm": 2.8750619888305664, "learning_rate": 0.00029648093841642227, "loss": 1.724, "step": 649 }, { "epoch": 0.10175328741390106, "grad_norm": 4.468817234039307, "learning_rate": 0.0002964565004887585, "loss": 1.792, "step": 650 }, { "epoch": 0.10190983093299938, "grad_norm": 2.051830768585205, "learning_rate": 0.00029643206256109477, "loss": 1.322, "step": 651 }, { "epoch": 0.10206637445209768, "grad_norm": 1.3749841451644897, "learning_rate": 0.0002964076246334311, "loss": 1.0688, "step": 652 }, { "epoch": 0.102222917971196, "grad_norm": 1.0187915563583374, "learning_rate": 0.0002963831867057673, "loss": 1.0538, "step": 653 }, { "epoch": 0.1023794614902943, "grad_norm": 0.9093784689903259, "learning_rate": 0.0002963587487781036, "loss": 0.894, "step": 654 }, { "epoch": 0.10253600500939261, "grad_norm": 0.7980338931083679, "learning_rate": 0.0002963343108504399, "loss": 1.0852, "step": 655 }, { "epoch": 0.10269254852849093, "grad_norm": 1.0717920064926147, "learning_rate": 0.00029630987292277613, "loss": 0.9796, "step": 656 }, { "epoch": 0.10284909204758923, "grad_norm": 1.0452539920806885, "learning_rate": 0.0002962854349951124, "loss": 0.9234, "step": 657 }, { "epoch": 0.10300563556668754, "grad_norm": 1.031282901763916, "learning_rate": 0.00029626099706744864, "loss": 1.0287, "step": 658 }, { "epoch": 0.10316217908578584, "grad_norm": 1.1783666610717773, "learning_rate": 0.00029623655913978494, "loss": 0.8954, "step": 659 }, { "epoch": 0.10331872260488416, "grad_norm": 0.8439245223999023, "learning_rate": 0.0002962121212121212, "loss": 0.7738, "step": 660 }, { "epoch": 0.10347526612398247, "grad_norm": 0.914787769317627, "learning_rate": 0.00029618768328445745, "loss": 0.8843, "step": 661 }, { "epoch": 0.10363180964308077, "grad_norm": 1.0592864751815796, "learning_rate": 0.00029616324535679375, "loss": 1.0231, "step": 662 }, { "epoch": 0.10378835316217909, "grad_norm": 1.1694962978363037, "learning_rate": 0.00029613880742913, "loss": 1.096, "step": 663 }, { "epoch": 0.10394489668127739, "grad_norm": 2.1501312255859375, "learning_rate": 0.00029611436950146625, "loss": 0.9233, "step": 664 }, { "epoch": 0.1041014402003757, "grad_norm": 0.7547603249549866, "learning_rate": 0.0002960899315738025, "loss": 0.9509, "step": 665 }, { "epoch": 0.10425798371947402, "grad_norm": 1.070226788520813, "learning_rate": 0.00029606549364613876, "loss": 0.865, "step": 666 }, { "epoch": 0.10441452723857232, "grad_norm": 3.9022319316864014, "learning_rate": 0.00029604105571847506, "loss": 1.893, "step": 667 }, { "epoch": 0.10457107075767064, "grad_norm": 1.9020326137542725, "learning_rate": 0.0002960166177908113, "loss": 1.1844, "step": 668 }, { "epoch": 0.10472761427676894, "grad_norm": 1.318651795387268, "learning_rate": 0.00029599217986314756, "loss": 1.0922, "step": 669 }, { "epoch": 0.10488415779586725, "grad_norm": 1.523789405822754, "learning_rate": 0.00029596774193548387, "loss": 1.0611, "step": 670 }, { "epoch": 0.10504070131496555, "grad_norm": 1.3359650373458862, "learning_rate": 0.0002959433040078201, "loss": 1.0302, "step": 671 }, { "epoch": 0.10519724483406387, "grad_norm": 1.3721380233764648, "learning_rate": 0.00029591886608015637, "loss": 1.0434, "step": 672 }, { "epoch": 0.10535378835316218, "grad_norm": 1.5583863258361816, "learning_rate": 0.0002958944281524926, "loss": 1.4901, "step": 673 }, { "epoch": 0.10551033187226048, "grad_norm": 3.990788221359253, "learning_rate": 0.00029586999022482893, "loss": 1.4394, "step": 674 }, { "epoch": 0.1056668753913588, "grad_norm": 2.45947003364563, "learning_rate": 0.0002958455522971652, "loss": 1.4246, "step": 675 }, { "epoch": 0.1058234189104571, "grad_norm": 2.0882105827331543, "learning_rate": 0.00029582111436950143, "loss": 1.3557, "step": 676 }, { "epoch": 0.10597996242955542, "grad_norm": 1.8049339056015015, "learning_rate": 0.00029579667644183774, "loss": 0.9934, "step": 677 }, { "epoch": 0.10613650594865373, "grad_norm": 2.7573509216308594, "learning_rate": 0.000295772238514174, "loss": 1.2822, "step": 678 }, { "epoch": 0.10629304946775203, "grad_norm": 1.7646522521972656, "learning_rate": 0.00029574780058651024, "loss": 1.2641, "step": 679 }, { "epoch": 0.10644959298685035, "grad_norm": 3.6935081481933594, "learning_rate": 0.00029572336265884654, "loss": 1.6484, "step": 680 }, { "epoch": 0.10660613650594865, "grad_norm": 3.90690541267395, "learning_rate": 0.00029569892473118274, "loss": 1.738, "step": 681 }, { "epoch": 0.10676268002504696, "grad_norm": 3.629829168319702, "learning_rate": 0.00029567448680351905, "loss": 1.2565, "step": 682 }, { "epoch": 0.10691922354414528, "grad_norm": 2.447295904159546, "learning_rate": 0.0002956500488758553, "loss": 1.3839, "step": 683 }, { "epoch": 0.10707576706324358, "grad_norm": 3.1187667846679688, "learning_rate": 0.00029562561094819155, "loss": 1.2876, "step": 684 }, { "epoch": 0.1072323105823419, "grad_norm": 3.4593164920806885, "learning_rate": 0.00029560117302052785, "loss": 1.8006, "step": 685 }, { "epoch": 0.1073888541014402, "grad_norm": 2.576993465423584, "learning_rate": 0.0002955767350928641, "loss": 1.7238, "step": 686 }, { "epoch": 0.10754539762053851, "grad_norm": 4.6910319328308105, "learning_rate": 0.00029555229716520036, "loss": 1.8431, "step": 687 }, { "epoch": 0.10770194113963683, "grad_norm": 2.633889675140381, "learning_rate": 0.0002955278592375366, "loss": 1.9383, "step": 688 }, { "epoch": 0.10785848465873513, "grad_norm": 6.47677755355835, "learning_rate": 0.0002955034213098729, "loss": 2.1373, "step": 689 }, { "epoch": 0.10801502817783344, "grad_norm": 1.8168855905532837, "learning_rate": 0.00029547898338220917, "loss": 1.8526, "step": 690 }, { "epoch": 0.10817157169693174, "grad_norm": 2.5182387828826904, "learning_rate": 0.0002954545454545454, "loss": 1.632, "step": 691 }, { "epoch": 0.10832811521603006, "grad_norm": 2.240593671798706, "learning_rate": 0.0002954301075268817, "loss": 2.1292, "step": 692 }, { "epoch": 0.10848465873512837, "grad_norm": 2.15621018409729, "learning_rate": 0.000295405669599218, "loss": 1.9179, "step": 693 }, { "epoch": 0.10864120225422667, "grad_norm": 1.9122724533081055, "learning_rate": 0.0002953812316715542, "loss": 1.8236, "step": 694 }, { "epoch": 0.10879774577332499, "grad_norm": 2.539159059524536, "learning_rate": 0.00029535679374389053, "loss": 2.0443, "step": 695 }, { "epoch": 0.10895428929242329, "grad_norm": 1.3785215616226196, "learning_rate": 0.0002953323558162267, "loss": 1.0128, "step": 696 }, { "epoch": 0.1091108328115216, "grad_norm": 4.100296974182129, "learning_rate": 0.00029530791788856303, "loss": 1.38, "step": 697 }, { "epoch": 0.1092673763306199, "grad_norm": 3.4671149253845215, "learning_rate": 0.0002952834799608993, "loss": 1.4838, "step": 698 }, { "epoch": 0.10942391984971822, "grad_norm": 2.7299959659576416, "learning_rate": 0.00029525904203323553, "loss": 1.5185, "step": 699 }, { "epoch": 0.10958046336881654, "grad_norm": 3.1657235622406006, "learning_rate": 0.00029523460410557184, "loss": 1.625, "step": 700 }, { "epoch": 0.10973700688791484, "grad_norm": 1.4762587547302246, "learning_rate": 0.0002952101661779081, "loss": 1.2705, "step": 701 }, { "epoch": 0.10989355040701315, "grad_norm": 1.150429368019104, "learning_rate": 0.00029518572825024434, "loss": 1.068, "step": 702 }, { "epoch": 0.11005009392611145, "grad_norm": 1.3497153520584106, "learning_rate": 0.00029516129032258065, "loss": 1.1242, "step": 703 }, { "epoch": 0.11020663744520977, "grad_norm": 0.7703993320465088, "learning_rate": 0.0002951368523949169, "loss": 0.9952, "step": 704 }, { "epoch": 0.11036318096430808, "grad_norm": 1.7000598907470703, "learning_rate": 0.00029511241446725315, "loss": 0.9605, "step": 705 }, { "epoch": 0.11051972448340638, "grad_norm": 0.7951465845108032, "learning_rate": 0.0002950879765395894, "loss": 1.1336, "step": 706 }, { "epoch": 0.1106762680025047, "grad_norm": 0.787284791469574, "learning_rate": 0.0002950635386119257, "loss": 0.7303, "step": 707 }, { "epoch": 0.110832811521603, "grad_norm": 1.1658583879470825, "learning_rate": 0.00029503910068426196, "loss": 0.8081, "step": 708 }, { "epoch": 0.11098935504070132, "grad_norm": 1.4762721061706543, "learning_rate": 0.0002950146627565982, "loss": 1.0007, "step": 709 }, { "epoch": 0.11114589855979963, "grad_norm": 0.9633269309997559, "learning_rate": 0.0002949902248289345, "loss": 0.799, "step": 710 }, { "epoch": 0.11130244207889793, "grad_norm": 1.539566159248352, "learning_rate": 0.00029496578690127077, "loss": 1.012, "step": 711 }, { "epoch": 0.11145898559799625, "grad_norm": 1.1010069847106934, "learning_rate": 0.000294941348973607, "loss": 0.8838, "step": 712 }, { "epoch": 0.11161552911709455, "grad_norm": 1.7431788444519043, "learning_rate": 0.00029491691104594327, "loss": 1.1716, "step": 713 }, { "epoch": 0.11177207263619286, "grad_norm": 1.6751221418380737, "learning_rate": 0.0002948924731182795, "loss": 1.2777, "step": 714 }, { "epoch": 0.11192861615529118, "grad_norm": 1.2621047496795654, "learning_rate": 0.0002948680351906158, "loss": 0.887, "step": 715 }, { "epoch": 0.11208515967438948, "grad_norm": 2.204878091812134, "learning_rate": 0.0002948435972629521, "loss": 1.3467, "step": 716 }, { "epoch": 0.1122417031934878, "grad_norm": 1.4875999689102173, "learning_rate": 0.00029481915933528833, "loss": 1.2575, "step": 717 }, { "epoch": 0.1123982467125861, "grad_norm": 3.0924177169799805, "learning_rate": 0.00029479472140762463, "loss": 1.4872, "step": 718 }, { "epoch": 0.11255479023168441, "grad_norm": 2.1464388370513916, "learning_rate": 0.0002947702834799609, "loss": 1.063, "step": 719 }, { "epoch": 0.11271133375078271, "grad_norm": 2.1253466606140137, "learning_rate": 0.00029474584555229714, "loss": 1.0376, "step": 720 }, { "epoch": 0.11286787726988103, "grad_norm": 1.9266527891159058, "learning_rate": 0.0002947214076246334, "loss": 1.3224, "step": 721 }, { "epoch": 0.11302442078897934, "grad_norm": 1.8208385705947876, "learning_rate": 0.0002946969696969697, "loss": 1.186, "step": 722 }, { "epoch": 0.11318096430807764, "grad_norm": 1.7924067974090576, "learning_rate": 0.00029467253176930594, "loss": 1.2369, "step": 723 }, { "epoch": 0.11333750782717596, "grad_norm": 1.3854628801345825, "learning_rate": 0.0002946480938416422, "loss": 1.1493, "step": 724 }, { "epoch": 0.11349405134627426, "grad_norm": 2.6812336444854736, "learning_rate": 0.0002946236559139785, "loss": 1.3814, "step": 725 }, { "epoch": 0.11365059486537257, "grad_norm": 1.894299864768982, "learning_rate": 0.00029459921798631475, "loss": 1.117, "step": 726 }, { "epoch": 0.11380713838447089, "grad_norm": 4.2252726554870605, "learning_rate": 0.000294574780058651, "loss": 1.6541, "step": 727 }, { "epoch": 0.11396368190356919, "grad_norm": 1.660989761352539, "learning_rate": 0.0002945503421309873, "loss": 1.3064, "step": 728 }, { "epoch": 0.1141202254226675, "grad_norm": 1.3125436305999756, "learning_rate": 0.0002945259042033235, "loss": 1.2461, "step": 729 }, { "epoch": 0.1142767689417658, "grad_norm": 1.556378722190857, "learning_rate": 0.0002945014662756598, "loss": 1.4757, "step": 730 }, { "epoch": 0.11443331246086412, "grad_norm": 1.8206431865692139, "learning_rate": 0.00029447702834799606, "loss": 1.472, "step": 731 }, { "epoch": 0.11458985597996243, "grad_norm": 2.837712287902832, "learning_rate": 0.0002944525904203323, "loss": 1.6409, "step": 732 }, { "epoch": 0.11474639949906074, "grad_norm": 2.3481979370117188, "learning_rate": 0.0002944281524926686, "loss": 1.618, "step": 733 }, { "epoch": 0.11490294301815905, "grad_norm": 2.627960443496704, "learning_rate": 0.00029440371456500487, "loss": 1.1409, "step": 734 }, { "epoch": 0.11505948653725735, "grad_norm": 1.7479578256607056, "learning_rate": 0.0002943792766373411, "loss": 1.4743, "step": 735 }, { "epoch": 0.11521603005635567, "grad_norm": 2.662626028060913, "learning_rate": 0.0002943548387096774, "loss": 1.8053, "step": 736 }, { "epoch": 0.11537257357545398, "grad_norm": 3.780766487121582, "learning_rate": 0.0002943304007820137, "loss": 1.8668, "step": 737 }, { "epoch": 0.11552911709455228, "grad_norm": 3.494009017944336, "learning_rate": 0.00029430596285434993, "loss": 1.9487, "step": 738 }, { "epoch": 0.1156856606136506, "grad_norm": 2.3752057552337646, "learning_rate": 0.0002942815249266862, "loss": 1.8814, "step": 739 }, { "epoch": 0.1158422041327489, "grad_norm": 3.852254629135132, "learning_rate": 0.0002942570869990225, "loss": 1.9308, "step": 740 }, { "epoch": 0.11599874765184721, "grad_norm": 1.938531756401062, "learning_rate": 0.00029423264907135874, "loss": 1.4905, "step": 741 }, { "epoch": 0.11615529117094553, "grad_norm": 3.246478319168091, "learning_rate": 0.000294208211143695, "loss": 1.3597, "step": 742 }, { "epoch": 0.11631183469004383, "grad_norm": 2.4071877002716064, "learning_rate": 0.0002941837732160313, "loss": 1.1944, "step": 743 }, { "epoch": 0.11646837820914215, "grad_norm": 1.2850818634033203, "learning_rate": 0.0002941593352883675, "loss": 1.3349, "step": 744 }, { "epoch": 0.11662492172824045, "grad_norm": 2.346933126449585, "learning_rate": 0.0002941348973607038, "loss": 0.904, "step": 745 }, { "epoch": 0.11678146524733876, "grad_norm": 3.941329002380371, "learning_rate": 0.00029411045943304005, "loss": 2.2503, "step": 746 }, { "epoch": 0.11693800876643706, "grad_norm": 2.6362099647521973, "learning_rate": 0.0002940860215053763, "loss": 1.5694, "step": 747 }, { "epoch": 0.11709455228553538, "grad_norm": 4.092523097991943, "learning_rate": 0.0002940615835777126, "loss": 1.9898, "step": 748 }, { "epoch": 0.11725109580463369, "grad_norm": 3.4004411697387695, "learning_rate": 0.00029403714565004886, "loss": 1.589, "step": 749 }, { "epoch": 0.117407639323732, "grad_norm": 3.5993781089782715, "learning_rate": 0.0002940127077223851, "loss": 1.6423, "step": 750 }, { "epoch": 0.11756418284283031, "grad_norm": 0.811040461063385, "learning_rate": 0.0002939882697947214, "loss": 1.0251, "step": 751 }, { "epoch": 0.11772072636192861, "grad_norm": 0.8086991310119629, "learning_rate": 0.00029396383186705766, "loss": 0.9569, "step": 752 }, { "epoch": 0.11787726988102692, "grad_norm": 0.6266233325004578, "learning_rate": 0.0002939393939393939, "loss": 0.8385, "step": 753 }, { "epoch": 0.11803381340012524, "grad_norm": 2.2221486568450928, "learning_rate": 0.00029391495601173017, "loss": 1.0404, "step": 754 }, { "epoch": 0.11819035691922354, "grad_norm": 0.7100171446800232, "learning_rate": 0.00029389051808406647, "loss": 0.8026, "step": 755 }, { "epoch": 0.11834690043832186, "grad_norm": 1.3321337699890137, "learning_rate": 0.0002938660801564027, "loss": 0.8826, "step": 756 }, { "epoch": 0.11850344395742016, "grad_norm": 0.7835633754730225, "learning_rate": 0.000293841642228739, "loss": 0.7613, "step": 757 }, { "epoch": 0.11865998747651847, "grad_norm": 0.6961237788200378, "learning_rate": 0.0002938172043010753, "loss": 0.6241, "step": 758 }, { "epoch": 0.11881653099561679, "grad_norm": 1.128588080406189, "learning_rate": 0.0002937927663734115, "loss": 0.8578, "step": 759 }, { "epoch": 0.11897307451471509, "grad_norm": 0.9707612991333008, "learning_rate": 0.0002937683284457478, "loss": 0.9736, "step": 760 }, { "epoch": 0.1191296180338134, "grad_norm": 0.9797849655151367, "learning_rate": 0.00029374389051808403, "loss": 0.8492, "step": 761 }, { "epoch": 0.1192861615529117, "grad_norm": 1.5107133388519287, "learning_rate": 0.0002937194525904203, "loss": 0.7293, "step": 762 }, { "epoch": 0.11944270507201002, "grad_norm": 1.1908730268478394, "learning_rate": 0.0002936950146627566, "loss": 0.7209, "step": 763 }, { "epoch": 0.11959924859110833, "grad_norm": 1.0134694576263428, "learning_rate": 0.00029367057673509284, "loss": 0.8672, "step": 764 }, { "epoch": 0.11975579211020664, "grad_norm": 1.0121101140975952, "learning_rate": 0.0002936461388074291, "loss": 0.8697, "step": 765 }, { "epoch": 0.11991233562930495, "grad_norm": 0.948444128036499, "learning_rate": 0.0002936217008797654, "loss": 1.0135, "step": 766 }, { "epoch": 0.12006887914840325, "grad_norm": 1.0948662757873535, "learning_rate": 0.0002935972629521016, "loss": 0.7778, "step": 767 }, { "epoch": 0.12022542266750157, "grad_norm": 2.0492300987243652, "learning_rate": 0.0002935728250244379, "loss": 1.1036, "step": 768 }, { "epoch": 0.12038196618659988, "grad_norm": 1.7745238542556763, "learning_rate": 0.00029354838709677415, "loss": 0.9884, "step": 769 }, { "epoch": 0.12053850970569818, "grad_norm": 1.6761794090270996, "learning_rate": 0.0002935239491691104, "loss": 1.0637, "step": 770 }, { "epoch": 0.1206950532247965, "grad_norm": 1.1787947416305542, "learning_rate": 0.0002934995112414467, "loss": 0.8567, "step": 771 }, { "epoch": 0.1208515967438948, "grad_norm": 2.7158119678497314, "learning_rate": 0.00029347507331378296, "loss": 1.1482, "step": 772 }, { "epoch": 0.12100814026299311, "grad_norm": 1.5714401006698608, "learning_rate": 0.0002934506353861192, "loss": 1.0865, "step": 773 }, { "epoch": 0.12116468378209141, "grad_norm": 1.6347661018371582, "learning_rate": 0.0002934261974584555, "loss": 1.1607, "step": 774 }, { "epoch": 0.12132122730118973, "grad_norm": 2.3367502689361572, "learning_rate": 0.00029340175953079177, "loss": 1.1489, "step": 775 }, { "epoch": 0.12147777082028804, "grad_norm": 2.6965532302856445, "learning_rate": 0.000293377321603128, "loss": 1.0683, "step": 776 }, { "epoch": 0.12163431433938635, "grad_norm": 1.9645942449569702, "learning_rate": 0.00029335288367546427, "loss": 1.4028, "step": 777 }, { "epoch": 0.12179085785848466, "grad_norm": 2.6310694217681885, "learning_rate": 0.0002933284457478006, "loss": 1.2004, "step": 778 }, { "epoch": 0.12194740137758296, "grad_norm": 1.9831539392471313, "learning_rate": 0.00029330400782013683, "loss": 1.4612, "step": 779 }, { "epoch": 0.12210394489668128, "grad_norm": 2.4186670780181885, "learning_rate": 0.0002932795698924731, "loss": 1.4779, "step": 780 }, { "epoch": 0.12226048841577959, "grad_norm": 4.081188201904297, "learning_rate": 0.0002932551319648094, "loss": 1.713, "step": 781 }, { "epoch": 0.12241703193487789, "grad_norm": 2.4842689037323, "learning_rate": 0.00029323069403714564, "loss": 2.0171, "step": 782 }, { "epoch": 0.12257357545397621, "grad_norm": 2.6195974349975586, "learning_rate": 0.0002932062561094819, "loss": 1.212, "step": 783 }, { "epoch": 0.12273011897307451, "grad_norm": 2.697697162628174, "learning_rate": 0.00029318181818181814, "loss": 1.7433, "step": 784 }, { "epoch": 0.12288666249217282, "grad_norm": 2.484013319015503, "learning_rate": 0.0002931573802541544, "loss": 1.8923, "step": 785 }, { "epoch": 0.12304320601127114, "grad_norm": 4.01732873916626, "learning_rate": 0.0002931329423264907, "loss": 2.1281, "step": 786 }, { "epoch": 0.12319974953036944, "grad_norm": 2.3246097564697266, "learning_rate": 0.00029310850439882695, "loss": 2.1824, "step": 787 }, { "epoch": 0.12335629304946776, "grad_norm": 3.7635695934295654, "learning_rate": 0.0002930840664711632, "loss": 1.5211, "step": 788 }, { "epoch": 0.12351283656856606, "grad_norm": 2.600083827972412, "learning_rate": 0.0002930596285434995, "loss": 1.4717, "step": 789 }, { "epoch": 0.12366938008766437, "grad_norm": 4.064560890197754, "learning_rate": 0.00029303519061583575, "loss": 1.7544, "step": 790 }, { "epoch": 0.12382592360676269, "grad_norm": 4.859133720397949, "learning_rate": 0.000293010752688172, "loss": 1.9283, "step": 791 }, { "epoch": 0.12398246712586099, "grad_norm": 2.1018145084381104, "learning_rate": 0.00029298631476050826, "loss": 1.0601, "step": 792 }, { "epoch": 0.1241390106449593, "grad_norm": 2.4449431896209717, "learning_rate": 0.00029296187683284456, "loss": 1.8295, "step": 793 }, { "epoch": 0.1242955541640576, "grad_norm": 2.4376046657562256, "learning_rate": 0.0002929374389051808, "loss": 1.2816, "step": 794 }, { "epoch": 0.12445209768315592, "grad_norm": 3.415478229522705, "learning_rate": 0.00029291300097751706, "loss": 1.9171, "step": 795 }, { "epoch": 0.12460864120225423, "grad_norm": 3.3648712635040283, "learning_rate": 0.00029288856304985337, "loss": 1.2723, "step": 796 }, { "epoch": 0.12476518472135253, "grad_norm": 3.2523117065429688, "learning_rate": 0.0002928641251221896, "loss": 1.2449, "step": 797 }, { "epoch": 0.12492172824045085, "grad_norm": 2.2647125720977783, "learning_rate": 0.00029283968719452587, "loss": 1.2996, "step": 798 }, { "epoch": 0.12507827175954916, "grad_norm": 2.5591723918914795, "learning_rate": 0.0002928152492668622, "loss": 1.4211, "step": 799 }, { "epoch": 0.12523481527864747, "grad_norm": 1.7322767972946167, "learning_rate": 0.0002927908113391984, "loss": 1.3324, "step": 800 }, { "epoch": 0.12539135879774577, "grad_norm": 1.5625592470169067, "learning_rate": 0.0002927663734115347, "loss": 1.0354, "step": 801 }, { "epoch": 0.1255479023168441, "grad_norm": 1.2151803970336914, "learning_rate": 0.00029274193548387093, "loss": 0.7138, "step": 802 }, { "epoch": 0.1257044458359424, "grad_norm": 0.8814300894737244, "learning_rate": 0.0002927174975562072, "loss": 0.6718, "step": 803 }, { "epoch": 0.1258609893550407, "grad_norm": 0.789391279220581, "learning_rate": 0.0002926930596285435, "loss": 0.645, "step": 804 }, { "epoch": 0.126017532874139, "grad_norm": 0.6633252501487732, "learning_rate": 0.00029266862170087974, "loss": 0.6388, "step": 805 }, { "epoch": 0.12617407639323733, "grad_norm": 0.9132680892944336, "learning_rate": 0.000292644183773216, "loss": 0.8446, "step": 806 }, { "epoch": 0.12633061991233563, "grad_norm": 0.9453927278518677, "learning_rate": 0.00029261974584555224, "loss": 0.6193, "step": 807 }, { "epoch": 0.12648716343143393, "grad_norm": 1.8710074424743652, "learning_rate": 0.00029259530791788855, "loss": 0.8318, "step": 808 }, { "epoch": 0.12664370695053226, "grad_norm": 1.0109128952026367, "learning_rate": 0.0002925708699902248, "loss": 0.6593, "step": 809 }, { "epoch": 0.12680025046963056, "grad_norm": 1.1044448614120483, "learning_rate": 0.00029254643206256105, "loss": 0.7262, "step": 810 }, { "epoch": 0.12695679398872886, "grad_norm": 1.3992559909820557, "learning_rate": 0.00029252199413489736, "loss": 0.9756, "step": 811 }, { "epoch": 0.12711333750782716, "grad_norm": 1.5335423946380615, "learning_rate": 0.0002924975562072336, "loss": 0.7762, "step": 812 }, { "epoch": 0.1272698810269255, "grad_norm": 1.3663071393966675, "learning_rate": 0.00029247311827956986, "loss": 0.8961, "step": 813 }, { "epoch": 0.1274264245460238, "grad_norm": 1.9370208978652954, "learning_rate": 0.00029244868035190616, "loss": 0.8597, "step": 814 }, { "epoch": 0.1275829680651221, "grad_norm": 1.6557672023773193, "learning_rate": 0.00029242424242424236, "loss": 1.0157, "step": 815 }, { "epoch": 0.12773951158422042, "grad_norm": 1.4072177410125732, "learning_rate": 0.00029239980449657867, "loss": 0.9113, "step": 816 }, { "epoch": 0.12789605510331872, "grad_norm": 1.654747724533081, "learning_rate": 0.0002923753665689149, "loss": 0.8022, "step": 817 }, { "epoch": 0.12805259862241702, "grad_norm": 1.677295207977295, "learning_rate": 0.00029235092864125117, "loss": 0.7312, "step": 818 }, { "epoch": 0.12820914214151535, "grad_norm": 1.2632160186767578, "learning_rate": 0.0002923264907135875, "loss": 0.9197, "step": 819 }, { "epoch": 0.12836568566061365, "grad_norm": 1.6950252056121826, "learning_rate": 0.0002923020527859237, "loss": 1.268, "step": 820 }, { "epoch": 0.12852222917971196, "grad_norm": 1.9522795677185059, "learning_rate": 0.00029227761485826, "loss": 0.9591, "step": 821 }, { "epoch": 0.12867877269881026, "grad_norm": 2.316823959350586, "learning_rate": 0.0002922531769305963, "loss": 1.0732, "step": 822 }, { "epoch": 0.12883531621790859, "grad_norm": 2.60040283203125, "learning_rate": 0.00029222873900293253, "loss": 1.3269, "step": 823 }, { "epoch": 0.1289918597370069, "grad_norm": 2.1569325923919678, "learning_rate": 0.0002922043010752688, "loss": 1.3893, "step": 824 }, { "epoch": 0.1291484032561052, "grad_norm": 3.817329168319702, "learning_rate": 0.00029217986314760504, "loss": 1.3369, "step": 825 }, { "epoch": 0.12930494677520352, "grad_norm": 1.4851608276367188, "learning_rate": 0.00029215542521994134, "loss": 0.9969, "step": 826 }, { "epoch": 0.12946149029430182, "grad_norm": 2.9426980018615723, "learning_rate": 0.0002921309872922776, "loss": 1.6128, "step": 827 }, { "epoch": 0.12961803381340012, "grad_norm": 1.6474707126617432, "learning_rate": 0.00029210654936461384, "loss": 0.8915, "step": 828 }, { "epoch": 0.12977457733249845, "grad_norm": 2.1298272609710693, "learning_rate": 0.00029208211143695015, "loss": 1.0587, "step": 829 }, { "epoch": 0.12993112085159675, "grad_norm": 2.768442153930664, "learning_rate": 0.00029205767350928635, "loss": 1.544, "step": 830 }, { "epoch": 0.13008766437069505, "grad_norm": 2.516650676727295, "learning_rate": 0.00029203323558162265, "loss": 1.0509, "step": 831 }, { "epoch": 0.13024420788979335, "grad_norm": 2.4558215141296387, "learning_rate": 0.0002920087976539589, "loss": 1.4115, "step": 832 }, { "epoch": 0.13040075140889168, "grad_norm": 2.536071300506592, "learning_rate": 0.00029198435972629515, "loss": 1.4077, "step": 833 }, { "epoch": 0.13055729492798998, "grad_norm": 3.4631125926971436, "learning_rate": 0.00029195992179863146, "loss": 1.7084, "step": 834 }, { "epoch": 0.13071383844708828, "grad_norm": 2.3752031326293945, "learning_rate": 0.0002919354838709677, "loss": 1.1893, "step": 835 }, { "epoch": 0.1308703819661866, "grad_norm": 1.6237187385559082, "learning_rate": 0.00029191104594330396, "loss": 1.0723, "step": 836 }, { "epoch": 0.1310269254852849, "grad_norm": 2.3249778747558594, "learning_rate": 0.00029188660801564027, "loss": 1.6326, "step": 837 }, { "epoch": 0.1311834690043832, "grad_norm": 2.8457751274108887, "learning_rate": 0.0002918621700879765, "loss": 1.4018, "step": 838 }, { "epoch": 0.13134001252348151, "grad_norm": 3.0899088382720947, "learning_rate": 0.00029183773216031277, "loss": 2.1144, "step": 839 }, { "epoch": 0.13149655604257984, "grad_norm": 4.969150543212891, "learning_rate": 0.000291813294232649, "loss": 1.9739, "step": 840 }, { "epoch": 0.13165309956167814, "grad_norm": 2.7233245372772217, "learning_rate": 0.0002917888563049853, "loss": 1.7328, "step": 841 }, { "epoch": 0.13180964308077645, "grad_norm": 2.1063811779022217, "learning_rate": 0.0002917644183773216, "loss": 1.5687, "step": 842 }, { "epoch": 0.13196618659987477, "grad_norm": 1.7449232339859009, "learning_rate": 0.00029173998044965783, "loss": 1.8963, "step": 843 }, { "epoch": 0.13212273011897308, "grad_norm": 2.4782533645629883, "learning_rate": 0.00029171554252199413, "loss": 1.5947, "step": 844 }, { "epoch": 0.13227927363807138, "grad_norm": 2.6746630668640137, "learning_rate": 0.0002916911045943304, "loss": 2.6498, "step": 845 }, { "epoch": 0.1324358171571697, "grad_norm": 2.479112148284912, "learning_rate": 0.00029166666666666664, "loss": 1.5459, "step": 846 }, { "epoch": 0.132592360676268, "grad_norm": 1.7579686641693115, "learning_rate": 0.0002916422287390029, "loss": 0.6981, "step": 847 }, { "epoch": 0.1327489041953663, "grad_norm": 1.6483287811279297, "learning_rate": 0.00029161779081133914, "loss": 1.307, "step": 848 }, { "epoch": 0.1329054477144646, "grad_norm": 2.1981475353240967, "learning_rate": 0.00029159335288367544, "loss": 1.0251, "step": 849 }, { "epoch": 0.13306199123356294, "grad_norm": 2.7301342487335205, "learning_rate": 0.0002915689149560117, "loss": 1.0604, "step": 850 }, { "epoch": 0.13321853475266124, "grad_norm": 1.2275153398513794, "learning_rate": 0.00029154447702834795, "loss": 1.1503, "step": 851 }, { "epoch": 0.13337507827175954, "grad_norm": 0.9788094758987427, "learning_rate": 0.00029152003910068425, "loss": 0.8434, "step": 852 }, { "epoch": 0.13353162179085787, "grad_norm": 0.7427651286125183, "learning_rate": 0.0002914956011730205, "loss": 0.6953, "step": 853 }, { "epoch": 0.13368816530995617, "grad_norm": 0.8753380179405212, "learning_rate": 0.00029147116324535676, "loss": 0.6854, "step": 854 }, { "epoch": 0.13384470882905447, "grad_norm": 1.1522316932678223, "learning_rate": 0.000291446725317693, "loss": 0.5981, "step": 855 }, { "epoch": 0.1340012523481528, "grad_norm": 0.9101697206497192, "learning_rate": 0.0002914222873900293, "loss": 0.534, "step": 856 }, { "epoch": 0.1341577958672511, "grad_norm": 1.2821955680847168, "learning_rate": 0.00029139784946236556, "loss": 0.6715, "step": 857 }, { "epoch": 0.1343143393863494, "grad_norm": 1.4305757284164429, "learning_rate": 0.0002913734115347018, "loss": 0.8797, "step": 858 }, { "epoch": 0.1344708829054477, "grad_norm": 1.9941664934158325, "learning_rate": 0.0002913489736070381, "loss": 0.8174, "step": 859 }, { "epoch": 0.13462742642454603, "grad_norm": 1.482197880744934, "learning_rate": 0.00029132453567937437, "loss": 0.6398, "step": 860 }, { "epoch": 0.13478396994364433, "grad_norm": 1.1646844148635864, "learning_rate": 0.0002913000977517106, "loss": 0.6547, "step": 861 }, { "epoch": 0.13494051346274263, "grad_norm": 1.6788074970245361, "learning_rate": 0.00029127565982404693, "loss": 0.7803, "step": 862 }, { "epoch": 0.13509705698184096, "grad_norm": 2.2960433959960938, "learning_rate": 0.0002912512218963831, "loss": 1.1557, "step": 863 }, { "epoch": 0.13525360050093926, "grad_norm": 2.2098381519317627, "learning_rate": 0.00029122678396871943, "loss": 0.9451, "step": 864 }, { "epoch": 0.13541014402003757, "grad_norm": 1.51131272315979, "learning_rate": 0.0002912023460410557, "loss": 0.9505, "step": 865 }, { "epoch": 0.13556668753913587, "grad_norm": 0.9991070032119751, "learning_rate": 0.00029117790811339193, "loss": 0.7682, "step": 866 }, { "epoch": 0.1357232310582342, "grad_norm": 2.670018196105957, "learning_rate": 0.00029115347018572824, "loss": 1.3066, "step": 867 }, { "epoch": 0.1358797745773325, "grad_norm": 1.4845349788665771, "learning_rate": 0.0002911290322580645, "loss": 0.9315, "step": 868 }, { "epoch": 0.1360363180964308, "grad_norm": 1.2157962322235107, "learning_rate": 0.00029110459433040074, "loss": 0.6735, "step": 869 }, { "epoch": 0.13619286161552913, "grad_norm": 1.6585222482681274, "learning_rate": 0.00029108015640273705, "loss": 1.0187, "step": 870 }, { "epoch": 0.13634940513462743, "grad_norm": 1.76893949508667, "learning_rate": 0.0002910557184750733, "loss": 1.0663, "step": 871 }, { "epoch": 0.13650594865372573, "grad_norm": 0.9581948518753052, "learning_rate": 0.00029103128054740955, "loss": 0.7532, "step": 872 }, { "epoch": 0.13666249217282406, "grad_norm": 2.227740526199341, "learning_rate": 0.0002910068426197458, "loss": 0.8671, "step": 873 }, { "epoch": 0.13681903569192236, "grad_norm": 3.511131525039673, "learning_rate": 0.0002909824046920821, "loss": 1.1649, "step": 874 }, { "epoch": 0.13697557921102066, "grad_norm": 2.581437826156616, "learning_rate": 0.00029095796676441836, "loss": 1.1585, "step": 875 }, { "epoch": 0.13713212273011896, "grad_norm": 1.5962125062942505, "learning_rate": 0.0002909335288367546, "loss": 1.295, "step": 876 }, { "epoch": 0.1372886662492173, "grad_norm": 2.135364055633545, "learning_rate": 0.0002909090909090909, "loss": 1.5455, "step": 877 }, { "epoch": 0.1374452097683156, "grad_norm": 2.586113452911377, "learning_rate": 0.0002908846529814271, "loss": 1.6572, "step": 878 }, { "epoch": 0.1376017532874139, "grad_norm": 3.2886767387390137, "learning_rate": 0.0002908602150537634, "loss": 1.721, "step": 879 }, { "epoch": 0.13775829680651222, "grad_norm": 2.5917325019836426, "learning_rate": 0.00029083577712609967, "loss": 0.9838, "step": 880 }, { "epoch": 0.13791484032561052, "grad_norm": 2.5062735080718994, "learning_rate": 0.0002908113391984359, "loss": 1.3576, "step": 881 }, { "epoch": 0.13807138384470882, "grad_norm": 3.9728665351867676, "learning_rate": 0.0002907869012707722, "loss": 1.2969, "step": 882 }, { "epoch": 0.13822792736380715, "grad_norm": 2.223118305206299, "learning_rate": 0.0002907624633431085, "loss": 1.8066, "step": 883 }, { "epoch": 0.13838447088290545, "grad_norm": 3.254514694213867, "learning_rate": 0.0002907380254154447, "loss": 1.1451, "step": 884 }, { "epoch": 0.13854101440200375, "grad_norm": 7.31671667098999, "learning_rate": 0.00029071358748778103, "loss": 1.7801, "step": 885 }, { "epoch": 0.13869755792110205, "grad_norm": 3.2049777507781982, "learning_rate": 0.0002906891495601173, "loss": 2.0279, "step": 886 }, { "epoch": 0.13885410144020038, "grad_norm": 3.6071975231170654, "learning_rate": 0.00029066471163245353, "loss": 1.5702, "step": 887 }, { "epoch": 0.13901064495929868, "grad_norm": 2.798166275024414, "learning_rate": 0.0002906402737047898, "loss": 1.576, "step": 888 }, { "epoch": 0.13916718847839699, "grad_norm": 6.099799633026123, "learning_rate": 0.0002906158357771261, "loss": 1.8335, "step": 889 }, { "epoch": 0.13932373199749531, "grad_norm": 2.965550184249878, "learning_rate": 0.00029059139784946234, "loss": 1.548, "step": 890 }, { "epoch": 0.13948027551659362, "grad_norm": 2.5119330883026123, "learning_rate": 0.0002905669599217986, "loss": 1.9851, "step": 891 }, { "epoch": 0.13963681903569192, "grad_norm": 5.480480194091797, "learning_rate": 0.0002905425219941349, "loss": 1.4258, "step": 892 }, { "epoch": 0.13979336255479022, "grad_norm": 3.603321075439453, "learning_rate": 0.00029051808406647115, "loss": 1.541, "step": 893 }, { "epoch": 0.13994990607388855, "grad_norm": 4.580733776092529, "learning_rate": 0.0002904936461388074, "loss": 2.0883, "step": 894 }, { "epoch": 0.14010644959298685, "grad_norm": 2.0983352661132812, "learning_rate": 0.00029046920821114365, "loss": 1.5889, "step": 895 }, { "epoch": 0.14026299311208515, "grad_norm": 2.2917943000793457, "learning_rate": 0.0002904447702834799, "loss": 1.6588, "step": 896 }, { "epoch": 0.14041953663118348, "grad_norm": 3.498607873916626, "learning_rate": 0.0002904203323558162, "loss": 1.3828, "step": 897 }, { "epoch": 0.14057608015028178, "grad_norm": 2.6161184310913086, "learning_rate": 0.00029039589442815246, "loss": 1.1676, "step": 898 }, { "epoch": 0.14073262366938008, "grad_norm": 3.8199074268341064, "learning_rate": 0.0002903714565004887, "loss": 1.2942, "step": 899 }, { "epoch": 0.1408891671884784, "grad_norm": 1.9904534816741943, "learning_rate": 0.000290347018572825, "loss": 1.5531, "step": 900 }, { "epoch": 0.1410457107075767, "grad_norm": 0.8975843191146851, "learning_rate": 0.00029032258064516127, "loss": 0.8606, "step": 901 }, { "epoch": 0.141202254226675, "grad_norm": 0.7283688187599182, "learning_rate": 0.0002902981427174975, "loss": 0.7228, "step": 902 }, { "epoch": 0.1413587977457733, "grad_norm": 0.6623715162277222, "learning_rate": 0.00029027370478983377, "loss": 0.7472, "step": 903 }, { "epoch": 0.14151534126487164, "grad_norm": 0.7772536873817444, "learning_rate": 0.0002902492668621701, "loss": 0.5521, "step": 904 }, { "epoch": 0.14167188478396994, "grad_norm": 0.9219233989715576, "learning_rate": 0.00029022482893450633, "loss": 0.7454, "step": 905 }, { "epoch": 0.14182842830306824, "grad_norm": 0.8416048884391785, "learning_rate": 0.0002902003910068426, "loss": 0.7557, "step": 906 }, { "epoch": 0.14198497182216657, "grad_norm": 0.8040184378623962, "learning_rate": 0.0002901759530791789, "loss": 0.4868, "step": 907 }, { "epoch": 0.14214151534126487, "grad_norm": 0.9409207105636597, "learning_rate": 0.00029015151515151514, "loss": 0.6294, "step": 908 }, { "epoch": 0.14229805886036317, "grad_norm": 1.9797828197479248, "learning_rate": 0.0002901270772238514, "loss": 0.8302, "step": 909 }, { "epoch": 0.1424546023794615, "grad_norm": 1.6101166009902954, "learning_rate": 0.0002901026392961877, "loss": 1.0439, "step": 910 }, { "epoch": 0.1426111458985598, "grad_norm": 1.9140416383743286, "learning_rate": 0.0002900782013685239, "loss": 0.6344, "step": 911 }, { "epoch": 0.1427676894176581, "grad_norm": 0.9907447695732117, "learning_rate": 0.0002900537634408602, "loss": 0.6403, "step": 912 }, { "epoch": 0.1429242329367564, "grad_norm": 2.3162434101104736, "learning_rate": 0.00029002932551319645, "loss": 0.7338, "step": 913 }, { "epoch": 0.14308077645585474, "grad_norm": 1.7492724657058716, "learning_rate": 0.0002900048875855327, "loss": 0.6497, "step": 914 }, { "epoch": 0.14323731997495304, "grad_norm": 1.4608025550842285, "learning_rate": 0.000289980449657869, "loss": 0.7873, "step": 915 }, { "epoch": 0.14339386349405134, "grad_norm": 2.1269257068634033, "learning_rate": 0.00028995601173020525, "loss": 0.836, "step": 916 }, { "epoch": 0.14355040701314967, "grad_norm": 3.4742913246154785, "learning_rate": 0.0002899315738025415, "loss": 1.3075, "step": 917 }, { "epoch": 0.14370695053224797, "grad_norm": 2.038989305496216, "learning_rate": 0.00028990713587487776, "loss": 0.5859, "step": 918 }, { "epoch": 0.14386349405134627, "grad_norm": 2.4943230152130127, "learning_rate": 0.00028988269794721406, "loss": 0.925, "step": 919 }, { "epoch": 0.14402003757044457, "grad_norm": 1.818659782409668, "learning_rate": 0.0002898582600195503, "loss": 1.0819, "step": 920 }, { "epoch": 0.1441765810895429, "grad_norm": 2.3081328868865967, "learning_rate": 0.00028983382209188656, "loss": 0.7397, "step": 921 }, { "epoch": 0.1443331246086412, "grad_norm": 2.66398024559021, "learning_rate": 0.00028980938416422287, "loss": 0.7526, "step": 922 }, { "epoch": 0.1444896681277395, "grad_norm": 1.6944574117660522, "learning_rate": 0.0002897849462365591, "loss": 1.3631, "step": 923 }, { "epoch": 0.14464621164683783, "grad_norm": 3.4251976013183594, "learning_rate": 0.00028976050830889537, "loss": 1.302, "step": 924 }, { "epoch": 0.14480275516593613, "grad_norm": 6.1071062088012695, "learning_rate": 0.0002897360703812317, "loss": 0.9843, "step": 925 }, { "epoch": 0.14495929868503443, "grad_norm": 4.849330902099609, "learning_rate": 0.0002897116324535679, "loss": 1.802, "step": 926 }, { "epoch": 0.14511584220413276, "grad_norm": 3.4698123931884766, "learning_rate": 0.0002896871945259042, "loss": 1.2331, "step": 927 }, { "epoch": 0.14527238572323106, "grad_norm": 2.4112536907196045, "learning_rate": 0.00028966275659824043, "loss": 1.5486, "step": 928 }, { "epoch": 0.14542892924232936, "grad_norm": 3.808506965637207, "learning_rate": 0.0002896383186705767, "loss": 1.6323, "step": 929 }, { "epoch": 0.14558547276142766, "grad_norm": 2.8173084259033203, "learning_rate": 0.000289613880742913, "loss": 1.4004, "step": 930 }, { "epoch": 0.145742016280526, "grad_norm": 2.575488805770874, "learning_rate": 0.00028958944281524924, "loss": 1.2148, "step": 931 }, { "epoch": 0.1458985597996243, "grad_norm": 2.422727108001709, "learning_rate": 0.0002895650048875855, "loss": 1.6757, "step": 932 }, { "epoch": 0.1460551033187226, "grad_norm": 1.202463984489441, "learning_rate": 0.0002895405669599218, "loss": 0.7642, "step": 933 }, { "epoch": 0.14621164683782092, "grad_norm": 2.1168575286865234, "learning_rate": 0.00028951612903225805, "loss": 1.628, "step": 934 }, { "epoch": 0.14636819035691923, "grad_norm": 5.3358917236328125, "learning_rate": 0.0002894916911045943, "loss": 1.873, "step": 935 }, { "epoch": 0.14652473387601753, "grad_norm": 4.962363243103027, "learning_rate": 0.00028946725317693055, "loss": 1.5203, "step": 936 }, { "epoch": 0.14668127739511586, "grad_norm": 2.7353806495666504, "learning_rate": 0.00028944281524926686, "loss": 1.3402, "step": 937 }, { "epoch": 0.14683782091421416, "grad_norm": 2.9888954162597656, "learning_rate": 0.0002894183773216031, "loss": 1.7889, "step": 938 }, { "epoch": 0.14699436443331246, "grad_norm": 2.7625157833099365, "learning_rate": 0.00028939393939393936, "loss": 1.7595, "step": 939 }, { "epoch": 0.14715090795241076, "grad_norm": 1.8934247493743896, "learning_rate": 0.00028936950146627566, "loss": 1.1726, "step": 940 }, { "epoch": 0.1473074514715091, "grad_norm": 2.089092969894409, "learning_rate": 0.0002893450635386119, "loss": 1.0545, "step": 941 }, { "epoch": 0.1474639949906074, "grad_norm": 2.7847442626953125, "learning_rate": 0.00028932062561094817, "loss": 1.4845, "step": 942 }, { "epoch": 0.1476205385097057, "grad_norm": 5.1051740646362305, "learning_rate": 0.0002892961876832844, "loss": 1.5035, "step": 943 }, { "epoch": 0.14777708202880402, "grad_norm": 4.7735161781311035, "learning_rate": 0.00028927174975562067, "loss": 1.6538, "step": 944 }, { "epoch": 0.14793362554790232, "grad_norm": 2.4726431369781494, "learning_rate": 0.000289247311827957, "loss": 1.2094, "step": 945 }, { "epoch": 0.14809016906700062, "grad_norm": 3.6386454105377197, "learning_rate": 0.0002892228739002932, "loss": 1.1057, "step": 946 }, { "epoch": 0.14824671258609892, "grad_norm": 2.5107581615448, "learning_rate": 0.0002891984359726295, "loss": 0.9869, "step": 947 }, { "epoch": 0.14840325610519725, "grad_norm": 2.7597451210021973, "learning_rate": 0.0002891739980449658, "loss": 1.413, "step": 948 }, { "epoch": 0.14855979962429555, "grad_norm": 5.333527565002441, "learning_rate": 0.00028914956011730203, "loss": 1.6703, "step": 949 }, { "epoch": 0.14871634314339385, "grad_norm": 2.9326138496398926, "learning_rate": 0.0002891251221896383, "loss": 1.8701, "step": 950 }, { "epoch": 0.14887288666249218, "grad_norm": 1.1659024953842163, "learning_rate": 0.00028910068426197454, "loss": 0.9318, "step": 951 }, { "epoch": 0.14902943018159048, "grad_norm": 0.989399790763855, "learning_rate": 0.00028907624633431084, "loss": 0.824, "step": 952 }, { "epoch": 0.14918597370068878, "grad_norm": 0.8183377385139465, "learning_rate": 0.0002890518084066471, "loss": 0.9731, "step": 953 }, { "epoch": 0.1493425172197871, "grad_norm": 0.9016216993331909, "learning_rate": 0.00028902737047898334, "loss": 0.6404, "step": 954 }, { "epoch": 0.14949906073888541, "grad_norm": 0.7788622975349426, "learning_rate": 0.00028900293255131965, "loss": 0.7309, "step": 955 }, { "epoch": 0.14965560425798372, "grad_norm": 0.9964921474456787, "learning_rate": 0.0002889784946236559, "loss": 0.8296, "step": 956 }, { "epoch": 0.14981214777708202, "grad_norm": 1.0400006771087646, "learning_rate": 0.00028895405669599215, "loss": 0.7825, "step": 957 }, { "epoch": 0.14996869129618035, "grad_norm": 1.093723177909851, "learning_rate": 0.00028892961876832846, "loss": 0.6284, "step": 958 }, { "epoch": 0.15012523481527865, "grad_norm": 0.8602253198623657, "learning_rate": 0.00028890518084066465, "loss": 0.606, "step": 959 }, { "epoch": 0.15028177833437695, "grad_norm": 1.0207772254943848, "learning_rate": 0.00028888074291300096, "loss": 0.6316, "step": 960 }, { "epoch": 0.15043832185347528, "grad_norm": 1.8470277786254883, "learning_rate": 0.0002888563049853372, "loss": 0.9523, "step": 961 }, { "epoch": 0.15059486537257358, "grad_norm": 1.5962973833084106, "learning_rate": 0.00028883186705767346, "loss": 0.7024, "step": 962 }, { "epoch": 0.15075140889167188, "grad_norm": 1.2844789028167725, "learning_rate": 0.00028880742913000977, "loss": 1.0965, "step": 963 }, { "epoch": 0.1509079524107702, "grad_norm": 1.1468544006347656, "learning_rate": 0.000288782991202346, "loss": 0.8194, "step": 964 }, { "epoch": 0.1510644959298685, "grad_norm": 1.3954484462738037, "learning_rate": 0.00028875855327468227, "loss": 0.5367, "step": 965 }, { "epoch": 0.1512210394489668, "grad_norm": 1.3916863203048706, "learning_rate": 0.0002887341153470185, "loss": 0.7351, "step": 966 }, { "epoch": 0.1513775829680651, "grad_norm": 1.7426979541778564, "learning_rate": 0.0002887096774193548, "loss": 1.0045, "step": 967 }, { "epoch": 0.15153412648716344, "grad_norm": 1.407542109489441, "learning_rate": 0.0002886852394916911, "loss": 0.889, "step": 968 }, { "epoch": 0.15169067000626174, "grad_norm": 1.1983486413955688, "learning_rate": 0.00028866080156402733, "loss": 0.9749, "step": 969 }, { "epoch": 0.15184721352536004, "grad_norm": 2.5818674564361572, "learning_rate": 0.00028863636363636363, "loss": 0.8094, "step": 970 }, { "epoch": 0.15200375704445837, "grad_norm": 1.2199925184249878, "learning_rate": 0.0002886119257086999, "loss": 1.3122, "step": 971 }, { "epoch": 0.15216030056355667, "grad_norm": 1.924811840057373, "learning_rate": 0.00028858748778103614, "loss": 0.9118, "step": 972 }, { "epoch": 0.15231684408265497, "grad_norm": 1.6180088520050049, "learning_rate": 0.00028856304985337244, "loss": 0.9576, "step": 973 }, { "epoch": 0.15247338760175327, "grad_norm": 2.2538881301879883, "learning_rate": 0.00028853861192570864, "loss": 1.1137, "step": 974 }, { "epoch": 0.1526299311208516, "grad_norm": 3.2477221488952637, "learning_rate": 0.00028851417399804494, "loss": 1.6183, "step": 975 }, { "epoch": 0.1527864746399499, "grad_norm": 2.37235426902771, "learning_rate": 0.0002884897360703812, "loss": 0.978, "step": 976 }, { "epoch": 0.1529430181590482, "grad_norm": 2.178849697113037, "learning_rate": 0.00028846529814271745, "loss": 1.0761, "step": 977 }, { "epoch": 0.15309956167814653, "grad_norm": 2.8794000148773193, "learning_rate": 0.00028844086021505375, "loss": 1.2112, "step": 978 }, { "epoch": 0.15325610519724484, "grad_norm": 2.8864026069641113, "learning_rate": 0.00028841642228739, "loss": 1.4875, "step": 979 }, { "epoch": 0.15341264871634314, "grad_norm": 2.0675783157348633, "learning_rate": 0.00028839198435972626, "loss": 1.2043, "step": 980 }, { "epoch": 0.15356919223544147, "grad_norm": 1.339497685432434, "learning_rate": 0.00028836754643206256, "loss": 1.2546, "step": 981 }, { "epoch": 0.15372573575453977, "grad_norm": 2.3659565448760986, "learning_rate": 0.0002883431085043988, "loss": 1.3646, "step": 982 }, { "epoch": 0.15388227927363807, "grad_norm": 2.711576461791992, "learning_rate": 0.00028831867057673506, "loss": 1.3132, "step": 983 }, { "epoch": 0.15403882279273637, "grad_norm": 2.318260431289673, "learning_rate": 0.0002882942326490713, "loss": 1.5859, "step": 984 }, { "epoch": 0.1541953663118347, "grad_norm": 1.689608097076416, "learning_rate": 0.0002882697947214076, "loss": 1.1586, "step": 985 }, { "epoch": 0.154351909830933, "grad_norm": 2.5589444637298584, "learning_rate": 0.00028824535679374387, "loss": 1.1056, "step": 986 }, { "epoch": 0.1545084533500313, "grad_norm": 2.314589262008667, "learning_rate": 0.0002882209188660801, "loss": 1.7521, "step": 987 }, { "epoch": 0.15466499686912963, "grad_norm": 3.8083949089050293, "learning_rate": 0.00028819648093841643, "loss": 1.7522, "step": 988 }, { "epoch": 0.15482154038822793, "grad_norm": 2.452573537826538, "learning_rate": 0.0002881720430107526, "loss": 1.1393, "step": 989 }, { "epoch": 0.15497808390732623, "grad_norm": 2.592909336090088, "learning_rate": 0.00028814760508308893, "loss": 2.0492, "step": 990 }, { "epoch": 0.15513462742642456, "grad_norm": 2.1113009452819824, "learning_rate": 0.0002881231671554252, "loss": 1.3064, "step": 991 }, { "epoch": 0.15529117094552286, "grad_norm": 2.0039310455322266, "learning_rate": 0.00028809872922776143, "loss": 1.1432, "step": 992 }, { "epoch": 0.15544771446462116, "grad_norm": 3.2623660564422607, "learning_rate": 0.00028807429130009774, "loss": 1.5518, "step": 993 }, { "epoch": 0.15560425798371946, "grad_norm": 1.7344200611114502, "learning_rate": 0.000288049853372434, "loss": 1.2908, "step": 994 }, { "epoch": 0.1557608015028178, "grad_norm": 3.0457141399383545, "learning_rate": 0.00028802541544477024, "loss": 1.8197, "step": 995 }, { "epoch": 0.1559173450219161, "grad_norm": 3.5465786457061768, "learning_rate": 0.00028800097751710655, "loss": 1.2153, "step": 996 }, { "epoch": 0.1560738885410144, "grad_norm": 2.4094431400299072, "learning_rate": 0.0002879765395894428, "loss": 1.3038, "step": 997 }, { "epoch": 0.15623043206011272, "grad_norm": 4.36583948135376, "learning_rate": 0.00028795210166177905, "loss": 1.4192, "step": 998 }, { "epoch": 0.15638697557921102, "grad_norm": 2.3406410217285156, "learning_rate": 0.0002879276637341153, "loss": 1.0493, "step": 999 }, { "epoch": 0.15654351909830932, "grad_norm": 1.9417458772659302, "learning_rate": 0.0002879032258064516, "loss": 1.0919, "step": 1000 }, { "epoch": 0.15654351909830932, "eval_loss": 1.01686429977417, "eval_runtime": 205.1557, "eval_samples_per_second": 60.359, "eval_steps_per_second": 3.773, "eval_wer": 0.7063561212997004, "step": 1000 }, { "epoch": 0.15670006261740763, "grad_norm": 1.0563576221466064, "learning_rate": 0.00028787878787878786, "loss": 0.8153, "step": 1001 }, { "epoch": 0.15685660613650595, "grad_norm": 0.817237913608551, "learning_rate": 0.0002878543499511241, "loss": 0.6999, "step": 1002 }, { "epoch": 0.15701314965560426, "grad_norm": 0.6916754841804504, "learning_rate": 0.0002878299120234604, "loss": 0.5458, "step": 1003 }, { "epoch": 0.15716969317470256, "grad_norm": 0.7108484506607056, "learning_rate": 0.00028780547409579666, "loss": 0.677, "step": 1004 }, { "epoch": 0.15732623669380089, "grad_norm": 1.1377077102661133, "learning_rate": 0.0002877810361681329, "loss": 0.7012, "step": 1005 }, { "epoch": 0.1574827802128992, "grad_norm": 1.456892967224121, "learning_rate": 0.00028775659824046917, "loss": 0.6656, "step": 1006 }, { "epoch": 0.1576393237319975, "grad_norm": 1.1236454248428345, "learning_rate": 0.0002877321603128054, "loss": 0.6829, "step": 1007 }, { "epoch": 0.15779586725109582, "grad_norm": 2.5703699588775635, "learning_rate": 0.0002877077223851417, "loss": 1.0528, "step": 1008 }, { "epoch": 0.15795241077019412, "grad_norm": 1.1044304370880127, "learning_rate": 0.000287683284457478, "loss": 0.6504, "step": 1009 }, { "epoch": 0.15810895428929242, "grad_norm": 1.558677315711975, "learning_rate": 0.0002876588465298142, "loss": 0.654, "step": 1010 }, { "epoch": 0.15826549780839072, "grad_norm": 1.6468641757965088, "learning_rate": 0.00028763440860215053, "loss": 0.7489, "step": 1011 }, { "epoch": 0.15842204132748905, "grad_norm": 1.1569092273712158, "learning_rate": 0.0002876099706744868, "loss": 0.8098, "step": 1012 }, { "epoch": 0.15857858484658735, "grad_norm": 1.2596278190612793, "learning_rate": 0.00028758553274682303, "loss": 0.8457, "step": 1013 }, { "epoch": 0.15873512836568565, "grad_norm": 0.9945486187934875, "learning_rate": 0.0002875610948191593, "loss": 0.6101, "step": 1014 }, { "epoch": 0.15889167188478398, "grad_norm": 1.4740206003189087, "learning_rate": 0.0002875366568914956, "loss": 0.7989, "step": 1015 }, { "epoch": 0.15904821540388228, "grad_norm": 4.350058555603027, "learning_rate": 0.00028751221896383184, "loss": 0.9334, "step": 1016 }, { "epoch": 0.15920475892298058, "grad_norm": 1.1375359296798706, "learning_rate": 0.0002874877810361681, "loss": 0.6877, "step": 1017 }, { "epoch": 0.1593613024420789, "grad_norm": 2.1333682537078857, "learning_rate": 0.0002874633431085044, "loss": 0.8379, "step": 1018 }, { "epoch": 0.1595178459611772, "grad_norm": 1.3724780082702637, "learning_rate": 0.00028743890518084065, "loss": 1.0991, "step": 1019 }, { "epoch": 0.1596743894802755, "grad_norm": 4.463011741638184, "learning_rate": 0.0002874144672531769, "loss": 1.4866, "step": 1020 }, { "epoch": 0.15983093299937381, "grad_norm": 2.178107738494873, "learning_rate": 0.0002873900293255132, "loss": 0.7986, "step": 1021 }, { "epoch": 0.15998747651847214, "grad_norm": 1.3757675886154175, "learning_rate": 0.0002873655913978494, "loss": 0.8131, "step": 1022 }, { "epoch": 0.16014402003757044, "grad_norm": 1.6082704067230225, "learning_rate": 0.0002873411534701857, "loss": 1.0131, "step": 1023 }, { "epoch": 0.16030056355666875, "grad_norm": 1.9035452604293823, "learning_rate": 0.00028731671554252196, "loss": 0.9901, "step": 1024 }, { "epoch": 0.16045710707576707, "grad_norm": 2.227452039718628, "learning_rate": 0.0002872922776148582, "loss": 1.0808, "step": 1025 }, { "epoch": 0.16061365059486538, "grad_norm": 2.5133438110351562, "learning_rate": 0.0002872678396871945, "loss": 1.4922, "step": 1026 }, { "epoch": 0.16077019411396368, "grad_norm": 3.3356120586395264, "learning_rate": 0.00028724340175953077, "loss": 1.2555, "step": 1027 }, { "epoch": 0.16092673763306198, "grad_norm": 2.868877649307251, "learning_rate": 0.000287218963831867, "loss": 1.3604, "step": 1028 }, { "epoch": 0.1610832811521603, "grad_norm": 2.3197638988494873, "learning_rate": 0.0002871945259042033, "loss": 1.4672, "step": 1029 }, { "epoch": 0.1612398246712586, "grad_norm": 1.5867116451263428, "learning_rate": 0.0002871700879765396, "loss": 0.7615, "step": 1030 }, { "epoch": 0.1613963681903569, "grad_norm": 1.823372721672058, "learning_rate": 0.00028714565004887583, "loss": 1.4227, "step": 1031 }, { "epoch": 0.16155291170945524, "grad_norm": 3.756685972213745, "learning_rate": 0.0002871212121212121, "loss": 1.9678, "step": 1032 }, { "epoch": 0.16170945522855354, "grad_norm": 2.313270330429077, "learning_rate": 0.0002870967741935484, "loss": 0.9197, "step": 1033 }, { "epoch": 0.16186599874765184, "grad_norm": 1.9607908725738525, "learning_rate": 0.00028707233626588464, "loss": 1.4155, "step": 1034 }, { "epoch": 0.16202254226675017, "grad_norm": 2.0244407653808594, "learning_rate": 0.0002870478983382209, "loss": 1.2159, "step": 1035 }, { "epoch": 0.16217908578584847, "grad_norm": 2.4551143646240234, "learning_rate": 0.0002870234604105572, "loss": 1.8048, "step": 1036 }, { "epoch": 0.16233562930494677, "grad_norm": 2.5346062183380127, "learning_rate": 0.0002869990224828934, "loss": 1.2969, "step": 1037 }, { "epoch": 0.16249217282404507, "grad_norm": 3.634019374847412, "learning_rate": 0.0002869745845552297, "loss": 2.0716, "step": 1038 }, { "epoch": 0.1626487163431434, "grad_norm": 2.7607200145721436, "learning_rate": 0.00028695014662756595, "loss": 0.8884, "step": 1039 }, { "epoch": 0.1628052598622417, "grad_norm": 2.8792600631713867, "learning_rate": 0.0002869257086999022, "loss": 1.5306, "step": 1040 }, { "epoch": 0.16296180338134, "grad_norm": 5.755788803100586, "learning_rate": 0.0002869012707722385, "loss": 1.4916, "step": 1041 }, { "epoch": 0.16311834690043833, "grad_norm": 2.4009017944335938, "learning_rate": 0.00028687683284457475, "loss": 1.3055, "step": 1042 }, { "epoch": 0.16327489041953663, "grad_norm": 2.3000261783599854, "learning_rate": 0.000286852394916911, "loss": 1.3189, "step": 1043 }, { "epoch": 0.16343143393863493, "grad_norm": 2.4022369384765625, "learning_rate": 0.0002868279569892473, "loss": 1.5442, "step": 1044 }, { "epoch": 0.16358797745773326, "grad_norm": 2.216566801071167, "learning_rate": 0.00028680351906158356, "loss": 1.7154, "step": 1045 }, { "epoch": 0.16374452097683156, "grad_norm": 2.9098117351531982, "learning_rate": 0.0002867790811339198, "loss": 1.1951, "step": 1046 }, { "epoch": 0.16390106449592987, "grad_norm": 4.8952178955078125, "learning_rate": 0.00028675464320625606, "loss": 1.6244, "step": 1047 }, { "epoch": 0.16405760801502817, "grad_norm": 4.096086502075195, "learning_rate": 0.00028673020527859237, "loss": 1.3874, "step": 1048 }, { "epoch": 0.1642141515341265, "grad_norm": 3.0238194465637207, "learning_rate": 0.0002867057673509286, "loss": 1.345, "step": 1049 }, { "epoch": 0.1643706950532248, "grad_norm": 2.8986921310424805, "learning_rate": 0.00028668132942326487, "loss": 1.2903, "step": 1050 }, { "epoch": 0.1645272385723231, "grad_norm": 0.9072627425193787, "learning_rate": 0.0002866568914956012, "loss": 0.6808, "step": 1051 }, { "epoch": 0.16468378209142143, "grad_norm": 0.8426551222801208, "learning_rate": 0.00028663245356793743, "loss": 0.6256, "step": 1052 }, { "epoch": 0.16484032561051973, "grad_norm": 0.7460830807685852, "learning_rate": 0.0002866080156402737, "loss": 0.5074, "step": 1053 }, { "epoch": 0.16499686912961803, "grad_norm": 0.606028139591217, "learning_rate": 0.00028658357771260993, "loss": 0.6061, "step": 1054 }, { "epoch": 0.16515341264871633, "grad_norm": 0.833991527557373, "learning_rate": 0.0002865591397849462, "loss": 0.5883, "step": 1055 }, { "epoch": 0.16530995616781466, "grad_norm": 1.9847928285598755, "learning_rate": 0.0002865347018572825, "loss": 0.7446, "step": 1056 }, { "epoch": 0.16546649968691296, "grad_norm": 0.9032483100891113, "learning_rate": 0.00028651026392961874, "loss": 0.5827, "step": 1057 }, { "epoch": 0.16562304320601126, "grad_norm": 0.7685295343399048, "learning_rate": 0.000286485826001955, "loss": 0.6579, "step": 1058 }, { "epoch": 0.1657795867251096, "grad_norm": 0.6518795490264893, "learning_rate": 0.0002864613880742913, "loss": 0.5019, "step": 1059 }, { "epoch": 0.1659361302442079, "grad_norm": 1.02455735206604, "learning_rate": 0.00028643695014662755, "loss": 0.4796, "step": 1060 }, { "epoch": 0.1660926737633062, "grad_norm": 1.0418224334716797, "learning_rate": 0.0002864125122189638, "loss": 0.5102, "step": 1061 }, { "epoch": 0.16624921728240452, "grad_norm": 2.0449554920196533, "learning_rate": 0.00028638807429130005, "loss": 0.7181, "step": 1062 }, { "epoch": 0.16640576080150282, "grad_norm": 1.0994571447372437, "learning_rate": 0.00028636363636363636, "loss": 0.7923, "step": 1063 }, { "epoch": 0.16656230432060112, "grad_norm": 0.9208387136459351, "learning_rate": 0.0002863391984359726, "loss": 0.5434, "step": 1064 }, { "epoch": 0.16671884783969942, "grad_norm": 1.2651475667953491, "learning_rate": 0.00028631476050830886, "loss": 1.0223, "step": 1065 }, { "epoch": 0.16687539135879775, "grad_norm": 1.9309346675872803, "learning_rate": 0.00028629032258064516, "loss": 0.8762, "step": 1066 }, { "epoch": 0.16703193487789605, "grad_norm": 2.4084434509277344, "learning_rate": 0.0002862658846529814, "loss": 0.9494, "step": 1067 }, { "epoch": 0.16718847839699436, "grad_norm": 1.4075591564178467, "learning_rate": 0.00028624144672531767, "loss": 0.7541, "step": 1068 }, { "epoch": 0.16734502191609268, "grad_norm": 2.102005958557129, "learning_rate": 0.00028621700879765397, "loss": 0.8695, "step": 1069 }, { "epoch": 0.16750156543519099, "grad_norm": 2.49064040184021, "learning_rate": 0.00028619257086999017, "loss": 0.7304, "step": 1070 }, { "epoch": 0.1676581089542893, "grad_norm": 1.221191167831421, "learning_rate": 0.0002861681329423265, "loss": 0.6588, "step": 1071 }, { "epoch": 0.16781465247338762, "grad_norm": 1.5001965761184692, "learning_rate": 0.0002861436950146627, "loss": 1.1985, "step": 1072 }, { "epoch": 0.16797119599248592, "grad_norm": 1.4838846921920776, "learning_rate": 0.000286119257086999, "loss": 0.9565, "step": 1073 }, { "epoch": 0.16812773951158422, "grad_norm": 1.5427438020706177, "learning_rate": 0.0002860948191593353, "loss": 0.8253, "step": 1074 }, { "epoch": 0.16828428303068252, "grad_norm": 1.4111217260360718, "learning_rate": 0.00028607038123167153, "loss": 0.6618, "step": 1075 }, { "epoch": 0.16844082654978085, "grad_norm": 1.6827608346939087, "learning_rate": 0.0002860459433040078, "loss": 0.9508, "step": 1076 }, { "epoch": 0.16859737006887915, "grad_norm": 1.9233096837997437, "learning_rate": 0.00028602150537634404, "loss": 1.5639, "step": 1077 }, { "epoch": 0.16875391358797745, "grad_norm": 1.4510252475738525, "learning_rate": 0.00028599706744868034, "loss": 0.9704, "step": 1078 }, { "epoch": 0.16891045710707578, "grad_norm": 3.087716817855835, "learning_rate": 0.0002859726295210166, "loss": 0.979, "step": 1079 }, { "epoch": 0.16906700062617408, "grad_norm": 3.0797057151794434, "learning_rate": 0.00028594819159335284, "loss": 0.6662, "step": 1080 }, { "epoch": 0.16922354414527238, "grad_norm": 2.5428311824798584, "learning_rate": 0.00028592375366568915, "loss": 1.4854, "step": 1081 }, { "epoch": 0.16938008766437068, "grad_norm": 1.8000260591506958, "learning_rate": 0.0002858993157380254, "loss": 1.3866, "step": 1082 }, { "epoch": 0.169536631183469, "grad_norm": 3.2034716606140137, "learning_rate": 0.00028587487781036165, "loss": 1.1556, "step": 1083 }, { "epoch": 0.1696931747025673, "grad_norm": 1.544206142425537, "learning_rate": 0.00028585043988269796, "loss": 1.0316, "step": 1084 }, { "epoch": 0.1698497182216656, "grad_norm": 2.489361047744751, "learning_rate": 0.00028582600195503415, "loss": 1.1934, "step": 1085 }, { "epoch": 0.17000626174076394, "grad_norm": 2.156745672225952, "learning_rate": 0.00028580156402737046, "loss": 1.4824, "step": 1086 }, { "epoch": 0.17016280525986224, "grad_norm": 2.317676305770874, "learning_rate": 0.0002857771260997067, "loss": 1.266, "step": 1087 }, { "epoch": 0.17031934877896054, "grad_norm": 1.6004157066345215, "learning_rate": 0.00028575268817204296, "loss": 1.0838, "step": 1088 }, { "epoch": 0.17047589229805887, "grad_norm": 1.9848181009292603, "learning_rate": 0.00028572825024437927, "loss": 1.274, "step": 1089 }, { "epoch": 0.17063243581715717, "grad_norm": 2.925771474838257, "learning_rate": 0.0002857038123167155, "loss": 1.2818, "step": 1090 }, { "epoch": 0.17078897933625548, "grad_norm": 1.8362958431243896, "learning_rate": 0.00028567937438905177, "loss": 1.0246, "step": 1091 }, { "epoch": 0.17094552285535378, "grad_norm": 4.179997444152832, "learning_rate": 0.0002856549364613881, "loss": 2.1207, "step": 1092 }, { "epoch": 0.1711020663744521, "grad_norm": 4.3508429527282715, "learning_rate": 0.0002856304985337243, "loss": 2.0854, "step": 1093 }, { "epoch": 0.1712586098935504, "grad_norm": 6.442362308502197, "learning_rate": 0.0002856060606060606, "loss": 1.654, "step": 1094 }, { "epoch": 0.1714151534126487, "grad_norm": 1.9675512313842773, "learning_rate": 0.00028558162267839683, "loss": 1.2959, "step": 1095 }, { "epoch": 0.17157169693174704, "grad_norm": 2.1139070987701416, "learning_rate": 0.00028555718475073313, "loss": 1.4101, "step": 1096 }, { "epoch": 0.17172824045084534, "grad_norm": 1.998644232749939, "learning_rate": 0.0002855327468230694, "loss": 0.6606, "step": 1097 }, { "epoch": 0.17188478396994364, "grad_norm": 3.3093795776367188, "learning_rate": 0.00028550830889540564, "loss": 0.8759, "step": 1098 }, { "epoch": 0.17204132748904197, "grad_norm": 3.0865354537963867, "learning_rate": 0.00028548387096774194, "loss": 0.5971, "step": 1099 }, { "epoch": 0.17219787100814027, "grad_norm": 1.6972962617874146, "learning_rate": 0.0002854594330400782, "loss": 0.8417, "step": 1100 }, { "epoch": 0.17235441452723857, "grad_norm": 0.6339183449745178, "learning_rate": 0.00028543499511241445, "loss": 0.6272, "step": 1101 }, { "epoch": 0.17251095804633687, "grad_norm": 0.7550747990608215, "learning_rate": 0.0002854105571847507, "loss": 0.6742, "step": 1102 }, { "epoch": 0.1726675015654352, "grad_norm": 0.7940789461135864, "learning_rate": 0.00028538611925708695, "loss": 0.4962, "step": 1103 }, { "epoch": 0.1728240450845335, "grad_norm": 0.7967172265052795, "learning_rate": 0.00028536168132942325, "loss": 0.591, "step": 1104 }, { "epoch": 0.1729805886036318, "grad_norm": 0.9077056050300598, "learning_rate": 0.0002853372434017595, "loss": 0.5865, "step": 1105 }, { "epoch": 0.17313713212273013, "grad_norm": 0.7266988158226013, "learning_rate": 0.00028531280547409576, "loss": 0.5431, "step": 1106 }, { "epoch": 0.17329367564182843, "grad_norm": 1.1024484634399414, "learning_rate": 0.00028528836754643206, "loss": 0.68, "step": 1107 }, { "epoch": 0.17345021916092673, "grad_norm": 1.058496117591858, "learning_rate": 0.0002852639296187683, "loss": 0.6728, "step": 1108 }, { "epoch": 0.17360676268002503, "grad_norm": 1.3563542366027832, "learning_rate": 0.00028523949169110456, "loss": 0.6104, "step": 1109 }, { "epoch": 0.17376330619912336, "grad_norm": 0.9580490589141846, "learning_rate": 0.0002852150537634408, "loss": 0.4989, "step": 1110 }, { "epoch": 0.17391984971822166, "grad_norm": 1.4669703245162964, "learning_rate": 0.0002851906158357771, "loss": 0.7394, "step": 1111 }, { "epoch": 0.17407639323731997, "grad_norm": 1.063644289970398, "learning_rate": 0.00028516617790811337, "loss": 0.5999, "step": 1112 }, { "epoch": 0.1742329367564183, "grad_norm": 1.5729597806930542, "learning_rate": 0.0002851417399804496, "loss": 0.8043, "step": 1113 }, { "epoch": 0.1743894802755166, "grad_norm": 0.8515031933784485, "learning_rate": 0.00028511730205278593, "loss": 0.5842, "step": 1114 }, { "epoch": 0.1745460237946149, "grad_norm": 2.940446138381958, "learning_rate": 0.0002850928641251222, "loss": 1.1475, "step": 1115 }, { "epoch": 0.17470256731371323, "grad_norm": 1.5992013216018677, "learning_rate": 0.00028506842619745843, "loss": 1.125, "step": 1116 }, { "epoch": 0.17485911083281153, "grad_norm": 1.8026546239852905, "learning_rate": 0.00028504398826979474, "loss": 0.8755, "step": 1117 }, { "epoch": 0.17501565435190983, "grad_norm": 0.9795113801956177, "learning_rate": 0.00028501955034213093, "loss": 0.5423, "step": 1118 }, { "epoch": 0.17517219787100813, "grad_norm": 2.1793277263641357, "learning_rate": 0.00028499511241446724, "loss": 0.907, "step": 1119 }, { "epoch": 0.17532874139010646, "grad_norm": 6.160722255706787, "learning_rate": 0.0002849706744868035, "loss": 1.1782, "step": 1120 }, { "epoch": 0.17548528490920476, "grad_norm": 3.347403049468994, "learning_rate": 0.00028494623655913974, "loss": 1.0413, "step": 1121 }, { "epoch": 0.17564182842830306, "grad_norm": 2.783139705657959, "learning_rate": 0.00028492179863147605, "loss": 0.8199, "step": 1122 }, { "epoch": 0.1757983719474014, "grad_norm": 1.6603683233261108, "learning_rate": 0.0002848973607038123, "loss": 0.7946, "step": 1123 }, { "epoch": 0.1759549154664997, "grad_norm": 2.4885950088500977, "learning_rate": 0.00028487292277614855, "loss": 1.1803, "step": 1124 }, { "epoch": 0.176111458985598, "grad_norm": 2.89567494392395, "learning_rate": 0.0002848484848484848, "loss": 1.2259, "step": 1125 }, { "epoch": 0.1762680025046963, "grad_norm": 1.2964775562286377, "learning_rate": 0.0002848240469208211, "loss": 1.0174, "step": 1126 }, { "epoch": 0.17642454602379462, "grad_norm": 1.6365723609924316, "learning_rate": 0.00028479960899315736, "loss": 0.9492, "step": 1127 }, { "epoch": 0.17658108954289292, "grad_norm": 1.2834248542785645, "learning_rate": 0.0002847751710654936, "loss": 0.7886, "step": 1128 }, { "epoch": 0.17673763306199122, "grad_norm": 1.8275222778320312, "learning_rate": 0.0002847507331378299, "loss": 1.1145, "step": 1129 }, { "epoch": 0.17689417658108955, "grad_norm": 2.4772050380706787, "learning_rate": 0.00028472629521016617, "loss": 0.9815, "step": 1130 }, { "epoch": 0.17705072010018785, "grad_norm": 5.258149147033691, "learning_rate": 0.0002847018572825024, "loss": 1.1874, "step": 1131 }, { "epoch": 0.17720726361928615, "grad_norm": 2.3318662643432617, "learning_rate": 0.0002846774193548387, "loss": 1.308, "step": 1132 }, { "epoch": 0.17736380713838448, "grad_norm": 3.756080389022827, "learning_rate": 0.0002846529814271749, "loss": 1.0855, "step": 1133 }, { "epoch": 0.17752035065748278, "grad_norm": 2.8273825645446777, "learning_rate": 0.0002846285434995112, "loss": 1.395, "step": 1134 }, { "epoch": 0.17767689417658108, "grad_norm": 1.8789290189743042, "learning_rate": 0.0002846041055718475, "loss": 1.2854, "step": 1135 }, { "epoch": 0.17783343769567939, "grad_norm": 5.230047225952148, "learning_rate": 0.0002845796676441837, "loss": 1.3411, "step": 1136 }, { "epoch": 0.17798998121477771, "grad_norm": 3.2810747623443604, "learning_rate": 0.00028455522971652003, "loss": 1.972, "step": 1137 }, { "epoch": 0.17814652473387602, "grad_norm": 3.59177565574646, "learning_rate": 0.0002845307917888563, "loss": 1.4195, "step": 1138 }, { "epoch": 0.17830306825297432, "grad_norm": 3.564577341079712, "learning_rate": 0.00028450635386119253, "loss": 1.0904, "step": 1139 }, { "epoch": 0.17845961177207265, "grad_norm": 2.349719762802124, "learning_rate": 0.00028448191593352884, "loss": 1.4299, "step": 1140 }, { "epoch": 0.17861615529117095, "grad_norm": 2.8419268131256104, "learning_rate": 0.0002844574780058651, "loss": 1.9473, "step": 1141 }, { "epoch": 0.17877269881026925, "grad_norm": 5.5093159675598145, "learning_rate": 0.00028443304007820134, "loss": 1.7942, "step": 1142 }, { "epoch": 0.17892924232936758, "grad_norm": 2.4165680408477783, "learning_rate": 0.0002844086021505376, "loss": 1.3642, "step": 1143 }, { "epoch": 0.17908578584846588, "grad_norm": 3.020798444747925, "learning_rate": 0.0002843841642228739, "loss": 1.649, "step": 1144 }, { "epoch": 0.17924232936756418, "grad_norm": 2.0584144592285156, "learning_rate": 0.00028435972629521015, "loss": 1.4339, "step": 1145 }, { "epoch": 0.17939887288666248, "grad_norm": 3.8811330795288086, "learning_rate": 0.0002843352883675464, "loss": 1.143, "step": 1146 }, { "epoch": 0.1795554164057608, "grad_norm": 3.469479560852051, "learning_rate": 0.0002843108504398827, "loss": 1.3439, "step": 1147 }, { "epoch": 0.1797119599248591, "grad_norm": 2.772313117980957, "learning_rate": 0.0002842864125122189, "loss": 1.253, "step": 1148 }, { "epoch": 0.1798685034439574, "grad_norm": 2.618361473083496, "learning_rate": 0.0002842619745845552, "loss": 1.6485, "step": 1149 }, { "epoch": 0.18002504696305574, "grad_norm": 1.9248026609420776, "learning_rate": 0.00028423753665689146, "loss": 1.4382, "step": 1150 }, { "epoch": 0.18018159048215404, "grad_norm": 1.16201913356781, "learning_rate": 0.0002842130987292277, "loss": 0.6461, "step": 1151 }, { "epoch": 0.18033813400125234, "grad_norm": 0.9263600707054138, "learning_rate": 0.000284188660801564, "loss": 0.6146, "step": 1152 }, { "epoch": 0.18049467752035064, "grad_norm": 0.6873730421066284, "learning_rate": 0.00028416422287390027, "loss": 0.5212, "step": 1153 }, { "epoch": 0.18065122103944897, "grad_norm": 0.7068181037902832, "learning_rate": 0.0002841397849462365, "loss": 0.478, "step": 1154 }, { "epoch": 0.18080776455854727, "grad_norm": 0.824680745601654, "learning_rate": 0.0002841153470185728, "loss": 0.7398, "step": 1155 }, { "epoch": 0.18096430807764557, "grad_norm": 0.7223101854324341, "learning_rate": 0.0002840909090909091, "loss": 0.4756, "step": 1156 }, { "epoch": 0.1811208515967439, "grad_norm": 1.0076377391815186, "learning_rate": 0.00028406647116324533, "loss": 0.518, "step": 1157 }, { "epoch": 0.1812773951158422, "grad_norm": 1.0383641719818115, "learning_rate": 0.0002840420332355816, "loss": 0.604, "step": 1158 }, { "epoch": 0.1814339386349405, "grad_norm": 1.6516761779785156, "learning_rate": 0.0002840175953079179, "loss": 0.6473, "step": 1159 }, { "epoch": 0.18159048215403883, "grad_norm": 1.3219435214996338, "learning_rate": 0.00028399315738025414, "loss": 0.8136, "step": 1160 }, { "epoch": 0.18174702567313714, "grad_norm": 1.0588476657867432, "learning_rate": 0.0002839687194525904, "loss": 0.5458, "step": 1161 }, { "epoch": 0.18190356919223544, "grad_norm": 1.2911880016326904, "learning_rate": 0.0002839442815249267, "loss": 0.6775, "step": 1162 }, { "epoch": 0.18206011271133374, "grad_norm": 1.0801639556884766, "learning_rate": 0.00028391984359726294, "loss": 0.7164, "step": 1163 }, { "epoch": 0.18221665623043207, "grad_norm": 1.2428066730499268, "learning_rate": 0.0002838954056695992, "loss": 0.7186, "step": 1164 }, { "epoch": 0.18237319974953037, "grad_norm": 2.018033504486084, "learning_rate": 0.00028387096774193545, "loss": 1.0134, "step": 1165 }, { "epoch": 0.18252974326862867, "grad_norm": 1.2658600807189941, "learning_rate": 0.0002838465298142717, "loss": 0.6296, "step": 1166 }, { "epoch": 0.182686286787727, "grad_norm": 1.356482744216919, "learning_rate": 0.000283822091886608, "loss": 0.8566, "step": 1167 }, { "epoch": 0.1828428303068253, "grad_norm": 1.4970476627349854, "learning_rate": 0.00028379765395894425, "loss": 0.9361, "step": 1168 }, { "epoch": 0.1829993738259236, "grad_norm": 2.531439781188965, "learning_rate": 0.0002837732160312805, "loss": 1.3125, "step": 1169 }, { "epoch": 0.18315591734502193, "grad_norm": 1.9374449253082275, "learning_rate": 0.0002837487781036168, "loss": 1.1296, "step": 1170 }, { "epoch": 0.18331246086412023, "grad_norm": 1.2566038370132446, "learning_rate": 0.00028372434017595306, "loss": 0.9607, "step": 1171 }, { "epoch": 0.18346900438321853, "grad_norm": 1.5955857038497925, "learning_rate": 0.0002836999022482893, "loss": 1.2914, "step": 1172 }, { "epoch": 0.18362554790231683, "grad_norm": 1.207210898399353, "learning_rate": 0.00028367546432062557, "loss": 0.8891, "step": 1173 }, { "epoch": 0.18378209142141516, "grad_norm": 1.754010796546936, "learning_rate": 0.00028365102639296187, "loss": 0.8335, "step": 1174 }, { "epoch": 0.18393863494051346, "grad_norm": 1.5274769067764282, "learning_rate": 0.0002836265884652981, "loss": 1.0153, "step": 1175 }, { "epoch": 0.18409517845961176, "grad_norm": 1.2438735961914062, "learning_rate": 0.0002836021505376344, "loss": 0.7795, "step": 1176 }, { "epoch": 0.1842517219787101, "grad_norm": 1.854723572731018, "learning_rate": 0.0002835777126099707, "loss": 1.1812, "step": 1177 }, { "epoch": 0.1844082654978084, "grad_norm": 1.8179014921188354, "learning_rate": 0.00028355327468230693, "loss": 1.0775, "step": 1178 }, { "epoch": 0.1845648090169067, "grad_norm": 1.427675724029541, "learning_rate": 0.0002835288367546432, "loss": 0.7761, "step": 1179 }, { "epoch": 0.184721352536005, "grad_norm": 2.1459126472473145, "learning_rate": 0.0002835043988269795, "loss": 1.3916, "step": 1180 }, { "epoch": 0.18487789605510332, "grad_norm": 1.9748454093933105, "learning_rate": 0.0002834799608993157, "loss": 1.0506, "step": 1181 }, { "epoch": 0.18503443957420163, "grad_norm": 2.326005220413208, "learning_rate": 0.000283455522971652, "loss": 1.7368, "step": 1182 }, { "epoch": 0.18519098309329993, "grad_norm": 3.0131938457489014, "learning_rate": 0.00028343108504398824, "loss": 1.5369, "step": 1183 }, { "epoch": 0.18534752661239826, "grad_norm": 2.223701000213623, "learning_rate": 0.0002834066471163245, "loss": 1.1345, "step": 1184 }, { "epoch": 0.18550407013149656, "grad_norm": 3.37308669090271, "learning_rate": 0.0002833822091886608, "loss": 1.7771, "step": 1185 }, { "epoch": 0.18566061365059486, "grad_norm": 2.0770747661590576, "learning_rate": 0.00028335777126099705, "loss": 1.2859, "step": 1186 }, { "epoch": 0.1858171571696932, "grad_norm": 2.7626192569732666, "learning_rate": 0.0002833333333333333, "loss": 1.3574, "step": 1187 }, { "epoch": 0.1859737006887915, "grad_norm": 2.9744863510131836, "learning_rate": 0.0002833088954056696, "loss": 1.9453, "step": 1188 }, { "epoch": 0.1861302442078898, "grad_norm": 2.143836736679077, "learning_rate": 0.00028328445747800586, "loss": 1.4961, "step": 1189 }, { "epoch": 0.1862867877269881, "grad_norm": 1.8865716457366943, "learning_rate": 0.0002832600195503421, "loss": 1.7137, "step": 1190 }, { "epoch": 0.18644333124608642, "grad_norm": 2.325896978378296, "learning_rate": 0.00028323558162267836, "loss": 1.4174, "step": 1191 }, { "epoch": 0.18659987476518472, "grad_norm": 2.0641846656799316, "learning_rate": 0.00028321114369501466, "loss": 1.7222, "step": 1192 }, { "epoch": 0.18675641828428302, "grad_norm": 1.5752524137496948, "learning_rate": 0.0002831867057673509, "loss": 1.4698, "step": 1193 }, { "epoch": 0.18691296180338135, "grad_norm": 1.7678941488265991, "learning_rate": 0.00028316226783968717, "loss": 1.1898, "step": 1194 }, { "epoch": 0.18706950532247965, "grad_norm": 2.59869384765625, "learning_rate": 0.00028313782991202347, "loss": 1.2828, "step": 1195 }, { "epoch": 0.18722604884157795, "grad_norm": 1.4962000846862793, "learning_rate": 0.00028311339198435967, "loss": 0.9471, "step": 1196 }, { "epoch": 0.18738259236067628, "grad_norm": 2.0714030265808105, "learning_rate": 0.000283088954056696, "loss": 1.1181, "step": 1197 }, { "epoch": 0.18753913587977458, "grad_norm": 1.3907320499420166, "learning_rate": 0.0002830645161290322, "loss": 1.0763, "step": 1198 }, { "epoch": 0.18769567939887288, "grad_norm": 2.7507219314575195, "learning_rate": 0.0002830400782013685, "loss": 1.3243, "step": 1199 }, { "epoch": 0.18785222291797118, "grad_norm": 2.2302870750427246, "learning_rate": 0.0002830156402737048, "loss": 1.1937, "step": 1200 }, { "epoch": 0.1880087664370695, "grad_norm": 0.9416796565055847, "learning_rate": 0.00028299120234604103, "loss": 0.6451, "step": 1201 }, { "epoch": 0.18816530995616781, "grad_norm": 0.6642339825630188, "learning_rate": 0.0002829667644183773, "loss": 0.483, "step": 1202 }, { "epoch": 0.18832185347526612, "grad_norm": 0.9438838958740234, "learning_rate": 0.0002829423264907136, "loss": 0.9268, "step": 1203 }, { "epoch": 0.18847839699436444, "grad_norm": 0.6609278917312622, "learning_rate": 0.00028291788856304984, "loss": 0.6353, "step": 1204 }, { "epoch": 0.18863494051346275, "grad_norm": 0.7672062516212463, "learning_rate": 0.0002828934506353861, "loss": 0.6421, "step": 1205 }, { "epoch": 0.18879148403256105, "grad_norm": 0.7625839114189148, "learning_rate": 0.00028286901270772234, "loss": 0.579, "step": 1206 }, { "epoch": 0.18894802755165935, "grad_norm": 1.4474471807479858, "learning_rate": 0.00028284457478005865, "loss": 0.7153, "step": 1207 }, { "epoch": 0.18910457107075768, "grad_norm": 1.2392566204071045, "learning_rate": 0.0002828201368523949, "loss": 0.5436, "step": 1208 }, { "epoch": 0.18926111458985598, "grad_norm": 1.2932655811309814, "learning_rate": 0.00028279569892473115, "loss": 0.5473, "step": 1209 }, { "epoch": 0.18941765810895428, "grad_norm": 1.3752779960632324, "learning_rate": 0.00028277126099706746, "loss": 0.5555, "step": 1210 }, { "epoch": 0.1895742016280526, "grad_norm": 0.6900332570075989, "learning_rate": 0.0002827468230694037, "loss": 0.4844, "step": 1211 }, { "epoch": 0.1897307451471509, "grad_norm": 1.8527002334594727, "learning_rate": 0.00028272238514173996, "loss": 1.2104, "step": 1212 }, { "epoch": 0.1898872886662492, "grad_norm": 1.6955235004425049, "learning_rate": 0.0002826979472140762, "loss": 0.7639, "step": 1213 }, { "epoch": 0.19004383218534754, "grad_norm": 1.2351977825164795, "learning_rate": 0.00028267350928641246, "loss": 0.6054, "step": 1214 }, { "epoch": 0.19020037570444584, "grad_norm": 1.1425282955169678, "learning_rate": 0.00028264907135874877, "loss": 0.6811, "step": 1215 }, { "epoch": 0.19035691922354414, "grad_norm": 1.043776273727417, "learning_rate": 0.000282624633431085, "loss": 0.7065, "step": 1216 }, { "epoch": 0.19051346274264244, "grad_norm": 1.4897061586380005, "learning_rate": 0.00028260019550342127, "loss": 0.8076, "step": 1217 }, { "epoch": 0.19067000626174077, "grad_norm": 1.984785556793213, "learning_rate": 0.0002825757575757576, "loss": 1.0173, "step": 1218 }, { "epoch": 0.19082654978083907, "grad_norm": 1.847044587135315, "learning_rate": 0.00028255131964809383, "loss": 1.0036, "step": 1219 }, { "epoch": 0.19098309329993737, "grad_norm": 1.3059883117675781, "learning_rate": 0.0002825268817204301, "loss": 0.7437, "step": 1220 }, { "epoch": 0.1911396368190357, "grad_norm": 1.8383262157440186, "learning_rate": 0.00028250244379276633, "loss": 0.785, "step": 1221 }, { "epoch": 0.191296180338134, "grad_norm": 1.516766905784607, "learning_rate": 0.00028247800586510264, "loss": 1.0545, "step": 1222 }, { "epoch": 0.1914527238572323, "grad_norm": 1.1955336332321167, "learning_rate": 0.0002824535679374389, "loss": 0.7535, "step": 1223 }, { "epoch": 0.19160926737633063, "grad_norm": 1.8770792484283447, "learning_rate": 0.00028242913000977514, "loss": 0.7899, "step": 1224 }, { "epoch": 0.19176581089542893, "grad_norm": 2.0006120204925537, "learning_rate": 0.00028240469208211144, "loss": 0.903, "step": 1225 }, { "epoch": 0.19192235441452724, "grad_norm": 1.9902926683425903, "learning_rate": 0.0002823802541544477, "loss": 1.5116, "step": 1226 }, { "epoch": 0.19207889793362554, "grad_norm": 1.5198619365692139, "learning_rate": 0.00028235581622678395, "loss": 0.917, "step": 1227 }, { "epoch": 0.19223544145272387, "grad_norm": 1.8800374269485474, "learning_rate": 0.00028233137829912025, "loss": 1.1258, "step": 1228 }, { "epoch": 0.19239198497182217, "grad_norm": 1.7788026332855225, "learning_rate": 0.00028230694037145645, "loss": 1.0894, "step": 1229 }, { "epoch": 0.19254852849092047, "grad_norm": 1.4421257972717285, "learning_rate": 0.00028228250244379275, "loss": 0.812, "step": 1230 }, { "epoch": 0.1927050720100188, "grad_norm": 2.770679235458374, "learning_rate": 0.000282258064516129, "loss": 1.2619, "step": 1231 }, { "epoch": 0.1928616155291171, "grad_norm": 1.2351292371749878, "learning_rate": 0.00028223362658846526, "loss": 0.9904, "step": 1232 }, { "epoch": 0.1930181590482154, "grad_norm": 1.4025189876556396, "learning_rate": 0.00028220918866080156, "loss": 1.2112, "step": 1233 }, { "epoch": 0.1931747025673137, "grad_norm": 1.9613655805587769, "learning_rate": 0.0002821847507331378, "loss": 1.3794, "step": 1234 }, { "epoch": 0.19333124608641203, "grad_norm": 1.3618839979171753, "learning_rate": 0.00028216031280547406, "loss": 1.0788, "step": 1235 }, { "epoch": 0.19348778960551033, "grad_norm": 2.786076068878174, "learning_rate": 0.0002821358748778103, "loss": 1.62, "step": 1236 }, { "epoch": 0.19364433312460863, "grad_norm": 2.0850629806518555, "learning_rate": 0.0002821114369501466, "loss": 1.5648, "step": 1237 }, { "epoch": 0.19380087664370696, "grad_norm": 2.4790842533111572, "learning_rate": 0.00028208699902248287, "loss": 1.2221, "step": 1238 }, { "epoch": 0.19395742016280526, "grad_norm": 2.154008388519287, "learning_rate": 0.0002820625610948191, "loss": 1.0456, "step": 1239 }, { "epoch": 0.19411396368190356, "grad_norm": 4.071086883544922, "learning_rate": 0.00028203812316715543, "loss": 1.4998, "step": 1240 }, { "epoch": 0.1942705072010019, "grad_norm": 2.761547803878784, "learning_rate": 0.0002820136852394917, "loss": 2.0477, "step": 1241 }, { "epoch": 0.1944270507201002, "grad_norm": 3.3864057064056396, "learning_rate": 0.00028198924731182793, "loss": 1.1553, "step": 1242 }, { "epoch": 0.1945835942391985, "grad_norm": 2.1232552528381348, "learning_rate": 0.00028196480938416424, "loss": 1.2943, "step": 1243 }, { "epoch": 0.1947401377582968, "grad_norm": 4.488090991973877, "learning_rate": 0.00028194037145650043, "loss": 1.0515, "step": 1244 }, { "epoch": 0.19489668127739512, "grad_norm": 2.625746726989746, "learning_rate": 0.00028191593352883674, "loss": 0.3637, "step": 1245 }, { "epoch": 0.19505322479649342, "grad_norm": 3.204906940460205, "learning_rate": 0.000281891495601173, "loss": 0.7695, "step": 1246 }, { "epoch": 0.19520976831559173, "grad_norm": 2.906982660293579, "learning_rate": 0.00028186705767350924, "loss": 0.871, "step": 1247 }, { "epoch": 0.19536631183469005, "grad_norm": 3.8746962547302246, "learning_rate": 0.00028184261974584555, "loss": 1.6993, "step": 1248 }, { "epoch": 0.19552285535378836, "grad_norm": 5.038626194000244, "learning_rate": 0.0002818181818181818, "loss": 1.3819, "step": 1249 }, { "epoch": 0.19567939887288666, "grad_norm": 2.480462074279785, "learning_rate": 0.00028179374389051805, "loss": 0.9957, "step": 1250 }, { "epoch": 0.19583594239198499, "grad_norm": 0.802400529384613, "learning_rate": 0.00028176930596285436, "loss": 0.5353, "step": 1251 }, { "epoch": 0.1959924859110833, "grad_norm": 0.9732524752616882, "learning_rate": 0.0002817448680351906, "loss": 0.7606, "step": 1252 }, { "epoch": 0.1961490294301816, "grad_norm": 1.2732630968093872, "learning_rate": 0.00028172043010752686, "loss": 0.7711, "step": 1253 }, { "epoch": 0.1963055729492799, "grad_norm": 0.6224183440208435, "learning_rate": 0.0002816959921798631, "loss": 0.5173, "step": 1254 }, { "epoch": 0.19646211646837822, "grad_norm": 0.9690751433372498, "learning_rate": 0.0002816715542521994, "loss": 0.5248, "step": 1255 }, { "epoch": 0.19661865998747652, "grad_norm": 1.045830249786377, "learning_rate": 0.00028164711632453567, "loss": 0.4887, "step": 1256 }, { "epoch": 0.19677520350657482, "grad_norm": 0.9513697624206543, "learning_rate": 0.0002816226783968719, "loss": 0.5496, "step": 1257 }, { "epoch": 0.19693174702567315, "grad_norm": 0.6548385620117188, "learning_rate": 0.0002815982404692082, "loss": 0.4298, "step": 1258 }, { "epoch": 0.19708829054477145, "grad_norm": 0.9877871870994568, "learning_rate": 0.0002815738025415445, "loss": 0.603, "step": 1259 }, { "epoch": 0.19724483406386975, "grad_norm": 0.9719914197921753, "learning_rate": 0.0002815493646138807, "loss": 0.5035, "step": 1260 }, { "epoch": 0.19740137758296805, "grad_norm": 0.5327731966972351, "learning_rate": 0.000281524926686217, "loss": 0.3911, "step": 1261 }, { "epoch": 0.19755792110206638, "grad_norm": 0.874661386013031, "learning_rate": 0.00028150048875855323, "loss": 0.7128, "step": 1262 }, { "epoch": 0.19771446462116468, "grad_norm": 1.2676417827606201, "learning_rate": 0.00028147605083088953, "loss": 0.6342, "step": 1263 }, { "epoch": 0.19787100814026298, "grad_norm": 1.1647354364395142, "learning_rate": 0.0002814516129032258, "loss": 0.7849, "step": 1264 }, { "epoch": 0.1980275516593613, "grad_norm": 2.0960235595703125, "learning_rate": 0.00028142717497556204, "loss": 0.4386, "step": 1265 }, { "epoch": 0.1981840951784596, "grad_norm": 0.8338558673858643, "learning_rate": 0.00028140273704789834, "loss": 0.5409, "step": 1266 }, { "epoch": 0.1983406386975579, "grad_norm": 1.7632123231887817, "learning_rate": 0.0002813782991202346, "loss": 0.7232, "step": 1267 }, { "epoch": 0.19849718221665624, "grad_norm": 2.810450792312622, "learning_rate": 0.00028135386119257084, "loss": 0.6558, "step": 1268 }, { "epoch": 0.19865372573575454, "grad_norm": 1.2503775358200073, "learning_rate": 0.0002813294232649071, "loss": 0.84, "step": 1269 }, { "epoch": 0.19881026925485284, "grad_norm": 1.8836698532104492, "learning_rate": 0.0002813049853372434, "loss": 0.8859, "step": 1270 }, { "epoch": 0.19896681277395115, "grad_norm": 1.3264356851577759, "learning_rate": 0.00028128054740957965, "loss": 0.7314, "step": 1271 }, { "epoch": 0.19912335629304947, "grad_norm": 4.775301933288574, "learning_rate": 0.0002812561094819159, "loss": 1.4003, "step": 1272 }, { "epoch": 0.19927989981214778, "grad_norm": 0.8449872136116028, "learning_rate": 0.00028123167155425215, "loss": 0.7736, "step": 1273 }, { "epoch": 0.19943644333124608, "grad_norm": 1.8365225791931152, "learning_rate": 0.00028120723362658846, "loss": 1.187, "step": 1274 }, { "epoch": 0.1995929868503444, "grad_norm": 1.0617682933807373, "learning_rate": 0.0002811827956989247, "loss": 0.9421, "step": 1275 }, { "epoch": 0.1997495303694427, "grad_norm": 1.617153525352478, "learning_rate": 0.00028115835777126096, "loss": 1.0415, "step": 1276 }, { "epoch": 0.199906073888541, "grad_norm": 1.6880416870117188, "learning_rate": 0.0002811339198435972, "loss": 0.984, "step": 1277 }, { "epoch": 0.20006261740763934, "grad_norm": 1.4345347881317139, "learning_rate": 0.0002811094819159335, "loss": 1.0823, "step": 1278 }, { "epoch": 0.20021916092673764, "grad_norm": 1.5855309963226318, "learning_rate": 0.00028108504398826977, "loss": 1.1963, "step": 1279 }, { "epoch": 0.20037570444583594, "grad_norm": 1.754294991493225, "learning_rate": 0.000281060606060606, "loss": 1.0804, "step": 1280 }, { "epoch": 0.20053224796493424, "grad_norm": 2.036597967147827, "learning_rate": 0.0002810361681329423, "loss": 0.9677, "step": 1281 }, { "epoch": 0.20068879148403257, "grad_norm": 3.59908390045166, "learning_rate": 0.0002810117302052786, "loss": 1.2978, "step": 1282 }, { "epoch": 0.20084533500313087, "grad_norm": 3.524487257003784, "learning_rate": 0.00028098729227761483, "loss": 1.0612, "step": 1283 }, { "epoch": 0.20100187852222917, "grad_norm": 2.481109857559204, "learning_rate": 0.0002809628543499511, "loss": 1.1174, "step": 1284 }, { "epoch": 0.2011584220413275, "grad_norm": 1.8362330198287964, "learning_rate": 0.00028093841642228733, "loss": 1.2175, "step": 1285 }, { "epoch": 0.2013149655604258, "grad_norm": 2.3617823123931885, "learning_rate": 0.00028091397849462364, "loss": 1.3735, "step": 1286 }, { "epoch": 0.2014715090795241, "grad_norm": 2.753002405166626, "learning_rate": 0.0002808895405669599, "loss": 1.0858, "step": 1287 }, { "epoch": 0.2016280525986224, "grad_norm": 2.5607731342315674, "learning_rate": 0.00028086510263929614, "loss": 0.93, "step": 1288 }, { "epoch": 0.20178459611772073, "grad_norm": 2.9476540088653564, "learning_rate": 0.00028084066471163244, "loss": 1.6911, "step": 1289 }, { "epoch": 0.20194113963681903, "grad_norm": 3.2327563762664795, "learning_rate": 0.0002808162267839687, "loss": 1.402, "step": 1290 }, { "epoch": 0.20209768315591733, "grad_norm": 3.618028163909912, "learning_rate": 0.00028079178885630495, "loss": 1.945, "step": 1291 }, { "epoch": 0.20225422667501566, "grad_norm": 2.141831159591675, "learning_rate": 0.0002807673509286412, "loss": 1.8992, "step": 1292 }, { "epoch": 0.20241077019411396, "grad_norm": 2.144073963165283, "learning_rate": 0.0002807429130009775, "loss": 1.0292, "step": 1293 }, { "epoch": 0.20256731371321227, "grad_norm": 2.449118137359619, "learning_rate": 0.00028071847507331376, "loss": 1.18, "step": 1294 }, { "epoch": 0.2027238572323106, "grad_norm": 2.5604381561279297, "learning_rate": 0.00028069403714565, "loss": 1.7014, "step": 1295 }, { "epoch": 0.2028804007514089, "grad_norm": 1.5793492794036865, "learning_rate": 0.0002806695992179863, "loss": 0.9777, "step": 1296 }, { "epoch": 0.2030369442705072, "grad_norm": 2.9324707984924316, "learning_rate": 0.00028064516129032256, "loss": 1.0947, "step": 1297 }, { "epoch": 0.2031934877896055, "grad_norm": 1.8581868410110474, "learning_rate": 0.0002806207233626588, "loss": 0.8496, "step": 1298 }, { "epoch": 0.20335003130870383, "grad_norm": 2.117748975753784, "learning_rate": 0.0002805962854349951, "loss": 1.2021, "step": 1299 }, { "epoch": 0.20350657482780213, "grad_norm": 3.1243643760681152, "learning_rate": 0.0002805718475073313, "loss": 1.5356, "step": 1300 }, { "epoch": 0.20366311834690043, "grad_norm": 1.2260499000549316, "learning_rate": 0.0002805474095796676, "loss": 0.745, "step": 1301 }, { "epoch": 0.20381966186599876, "grad_norm": 0.8840999007225037, "learning_rate": 0.0002805229716520039, "loss": 0.5955, "step": 1302 }, { "epoch": 0.20397620538509706, "grad_norm": 0.9450700879096985, "learning_rate": 0.0002804985337243401, "loss": 0.8156, "step": 1303 }, { "epoch": 0.20413274890419536, "grad_norm": 0.9631698727607727, "learning_rate": 0.00028047409579667643, "loss": 0.6363, "step": 1304 }, { "epoch": 0.2042892924232937, "grad_norm": 0.7808576822280884, "learning_rate": 0.0002804496578690127, "loss": 0.6415, "step": 1305 }, { "epoch": 0.204445835942392, "grad_norm": 0.8394602537155151, "learning_rate": 0.00028042521994134893, "loss": 0.5683, "step": 1306 }, { "epoch": 0.2046023794614903, "grad_norm": 0.9493279457092285, "learning_rate": 0.0002804007820136852, "loss": 0.6006, "step": 1307 }, { "epoch": 0.2047589229805886, "grad_norm": 1.2991026639938354, "learning_rate": 0.0002803763440860215, "loss": 0.5773, "step": 1308 }, { "epoch": 0.20491546649968692, "grad_norm": 0.9779866337776184, "learning_rate": 0.00028035190615835774, "loss": 0.6043, "step": 1309 }, { "epoch": 0.20507201001878522, "grad_norm": 0.8985393047332764, "learning_rate": 0.000280327468230694, "loss": 0.5852, "step": 1310 }, { "epoch": 0.20522855353788352, "grad_norm": 6.3509039878845215, "learning_rate": 0.0002803030303030303, "loss": 1.5883, "step": 1311 }, { "epoch": 0.20538509705698185, "grad_norm": 0.9153957366943359, "learning_rate": 0.00028027859237536655, "loss": 0.4863, "step": 1312 }, { "epoch": 0.20554164057608015, "grad_norm": 1.6397264003753662, "learning_rate": 0.0002802541544477028, "loss": 0.7647, "step": 1313 }, { "epoch": 0.20569818409517845, "grad_norm": 1.2118104696273804, "learning_rate": 0.0002802297165200391, "loss": 0.5911, "step": 1314 }, { "epoch": 0.20585472761427676, "grad_norm": 1.76604425907135, "learning_rate": 0.0002802052785923753, "loss": 0.9774, "step": 1315 }, { "epoch": 0.20601127113337508, "grad_norm": 1.2922953367233276, "learning_rate": 0.0002801808406647116, "loss": 0.6841, "step": 1316 }, { "epoch": 0.20616781465247339, "grad_norm": 1.8913264274597168, "learning_rate": 0.00028015640273704786, "loss": 0.5421, "step": 1317 }, { "epoch": 0.2063243581715717, "grad_norm": 4.230543613433838, "learning_rate": 0.0002801319648093841, "loss": 0.8927, "step": 1318 }, { "epoch": 0.20648090169067002, "grad_norm": 1.4822667837142944, "learning_rate": 0.0002801075268817204, "loss": 0.8219, "step": 1319 }, { "epoch": 0.20663744520976832, "grad_norm": 1.4265705347061157, "learning_rate": 0.00028008308895405667, "loss": 0.7132, "step": 1320 }, { "epoch": 0.20679398872886662, "grad_norm": 1.4394387006759644, "learning_rate": 0.0002800586510263929, "loss": 0.6372, "step": 1321 }, { "epoch": 0.20695053224796495, "grad_norm": 1.241612434387207, "learning_rate": 0.0002800342130987292, "loss": 0.7006, "step": 1322 }, { "epoch": 0.20710707576706325, "grad_norm": 1.5274121761322021, "learning_rate": 0.0002800097751710655, "loss": 1.122, "step": 1323 }, { "epoch": 0.20726361928616155, "grad_norm": 1.7099493741989136, "learning_rate": 0.0002799853372434017, "loss": 0.7326, "step": 1324 }, { "epoch": 0.20742016280525985, "grad_norm": 2.0768227577209473, "learning_rate": 0.000279960899315738, "loss": 0.7534, "step": 1325 }, { "epoch": 0.20757670632435818, "grad_norm": 0.9548947811126709, "learning_rate": 0.0002799364613880743, "loss": 0.6914, "step": 1326 }, { "epoch": 0.20773324984345648, "grad_norm": 2.6987247467041016, "learning_rate": 0.00027991202346041053, "loss": 1.5067, "step": 1327 }, { "epoch": 0.20788979336255478, "grad_norm": 1.8522530794143677, "learning_rate": 0.0002798875855327468, "loss": 0.8489, "step": 1328 }, { "epoch": 0.2080463368816531, "grad_norm": 2.1194634437561035, "learning_rate": 0.0002798631476050831, "loss": 0.7161, "step": 1329 }, { "epoch": 0.2082028804007514, "grad_norm": 1.979345440864563, "learning_rate": 0.0002798387096774193, "loss": 1.1511, "step": 1330 }, { "epoch": 0.2083594239198497, "grad_norm": 2.7786378860473633, "learning_rate": 0.0002798142717497556, "loss": 1.0264, "step": 1331 }, { "epoch": 0.20851596743894804, "grad_norm": 1.8285077810287476, "learning_rate": 0.00027978983382209184, "loss": 0.9509, "step": 1332 }, { "epoch": 0.20867251095804634, "grad_norm": 1.5117707252502441, "learning_rate": 0.0002797653958944281, "loss": 1.1794, "step": 1333 }, { "epoch": 0.20882905447714464, "grad_norm": 1.830258846282959, "learning_rate": 0.0002797409579667644, "loss": 1.2072, "step": 1334 }, { "epoch": 0.20898559799624294, "grad_norm": 2.282845973968506, "learning_rate": 0.00027971652003910065, "loss": 1.6122, "step": 1335 }, { "epoch": 0.20914214151534127, "grad_norm": 2.070908308029175, "learning_rate": 0.0002796920821114369, "loss": 1.3019, "step": 1336 }, { "epoch": 0.20929868503443957, "grad_norm": 1.86088228225708, "learning_rate": 0.0002796676441837732, "loss": 1.2159, "step": 1337 }, { "epoch": 0.20945522855353788, "grad_norm": 1.9150466918945312, "learning_rate": 0.00027964320625610946, "loss": 1.5197, "step": 1338 }, { "epoch": 0.2096117720726362, "grad_norm": 2.406750202178955, "learning_rate": 0.0002796187683284457, "loss": 1.5689, "step": 1339 }, { "epoch": 0.2097683155917345, "grad_norm": 3.363661289215088, "learning_rate": 0.00027959433040078196, "loss": 1.4533, "step": 1340 }, { "epoch": 0.2099248591108328, "grad_norm": 2.635338306427002, "learning_rate": 0.00027956989247311827, "loss": 1.4408, "step": 1341 }, { "epoch": 0.2100814026299311, "grad_norm": 2.8413336277008057, "learning_rate": 0.0002795454545454545, "loss": 1.3341, "step": 1342 }, { "epoch": 0.21023794614902944, "grad_norm": 1.84250009059906, "learning_rate": 0.00027952101661779077, "loss": 1.2058, "step": 1343 }, { "epoch": 0.21039448966812774, "grad_norm": 2.388916492462158, "learning_rate": 0.0002794965786901271, "loss": 1.3711, "step": 1344 }, { "epoch": 0.21055103318722604, "grad_norm": 2.6313962936401367, "learning_rate": 0.00027947214076246333, "loss": 1.0988, "step": 1345 }, { "epoch": 0.21070757670632437, "grad_norm": 1.7488915920257568, "learning_rate": 0.0002794477028347996, "loss": 1.0163, "step": 1346 }, { "epoch": 0.21086412022542267, "grad_norm": 2.03892183303833, "learning_rate": 0.0002794232649071359, "loss": 0.9676, "step": 1347 }, { "epoch": 0.21102066374452097, "grad_norm": 1.5612317323684692, "learning_rate": 0.0002793988269794721, "loss": 0.8602, "step": 1348 }, { "epoch": 0.2111772072636193, "grad_norm": 2.5052318572998047, "learning_rate": 0.0002793743890518084, "loss": 0.6831, "step": 1349 }, { "epoch": 0.2113337507827176, "grad_norm": 2.1519994735717773, "learning_rate": 0.00027934995112414464, "loss": 1.084, "step": 1350 }, { "epoch": 0.2114902943018159, "grad_norm": 0.670945405960083, "learning_rate": 0.0002793255131964809, "loss": 0.4423, "step": 1351 }, { "epoch": 0.2116468378209142, "grad_norm": 1.1006921529769897, "learning_rate": 0.0002793010752688172, "loss": 0.6916, "step": 1352 }, { "epoch": 0.21180338134001253, "grad_norm": 0.8006137609481812, "learning_rate": 0.00027927663734115345, "loss": 0.4832, "step": 1353 }, { "epoch": 0.21195992485911083, "grad_norm": 0.7051506042480469, "learning_rate": 0.0002792521994134897, "loss": 0.5289, "step": 1354 }, { "epoch": 0.21211646837820913, "grad_norm": 0.6033449769020081, "learning_rate": 0.00027922776148582595, "loss": 0.423, "step": 1355 }, { "epoch": 0.21227301189730746, "grad_norm": 1.0505086183547974, "learning_rate": 0.00027920332355816225, "loss": 0.5403, "step": 1356 }, { "epoch": 0.21242955541640576, "grad_norm": 1.063795804977417, "learning_rate": 0.0002791788856304985, "loss": 0.6797, "step": 1357 }, { "epoch": 0.21258609893550406, "grad_norm": 0.77169269323349, "learning_rate": 0.00027915444770283476, "loss": 0.7163, "step": 1358 }, { "epoch": 0.2127426424546024, "grad_norm": 1.3137171268463135, "learning_rate": 0.00027913000977517106, "loss": 0.6437, "step": 1359 }, { "epoch": 0.2128991859737007, "grad_norm": 1.344262719154358, "learning_rate": 0.0002791055718475073, "loss": 0.5225, "step": 1360 }, { "epoch": 0.213055729492799, "grad_norm": 0.8262065649032593, "learning_rate": 0.00027908113391984356, "loss": 0.6403, "step": 1361 }, { "epoch": 0.2132122730118973, "grad_norm": 1.4010947942733765, "learning_rate": 0.00027905669599217987, "loss": 0.8737, "step": 1362 }, { "epoch": 0.21336881653099563, "grad_norm": 1.177219033241272, "learning_rate": 0.00027903225806451607, "loss": 0.7244, "step": 1363 }, { "epoch": 0.21352536005009393, "grad_norm": 1.3933367729187012, "learning_rate": 0.00027900782013685237, "loss": 0.4727, "step": 1364 }, { "epoch": 0.21368190356919223, "grad_norm": 1.8369626998901367, "learning_rate": 0.0002789833822091886, "loss": 0.6863, "step": 1365 }, { "epoch": 0.21383844708829056, "grad_norm": 1.523794174194336, "learning_rate": 0.0002789589442815249, "loss": 0.667, "step": 1366 }, { "epoch": 0.21399499060738886, "grad_norm": 1.0985807180404663, "learning_rate": 0.0002789345063538612, "loss": 1.0209, "step": 1367 }, { "epoch": 0.21415153412648716, "grad_norm": 1.0308488607406616, "learning_rate": 0.00027891006842619743, "loss": 0.6309, "step": 1368 }, { "epoch": 0.21430807764558546, "grad_norm": 2.2205519676208496, "learning_rate": 0.0002788856304985337, "loss": 1.1807, "step": 1369 }, { "epoch": 0.2144646211646838, "grad_norm": 1.8049101829528809, "learning_rate": 0.00027886119257087, "loss": 0.9443, "step": 1370 }, { "epoch": 0.2146211646837821, "grad_norm": 1.7300370931625366, "learning_rate": 0.00027883675464320624, "loss": 0.7854, "step": 1371 }, { "epoch": 0.2147777082028804, "grad_norm": 1.587482213973999, "learning_rate": 0.0002788123167155425, "loss": 0.5499, "step": 1372 }, { "epoch": 0.21493425172197872, "grad_norm": 1.539663553237915, "learning_rate": 0.00027878787878787874, "loss": 0.9599, "step": 1373 }, { "epoch": 0.21509079524107702, "grad_norm": 3.231137990951538, "learning_rate": 0.00027876344086021505, "loss": 0.9099, "step": 1374 }, { "epoch": 0.21524733876017532, "grad_norm": 1.5350894927978516, "learning_rate": 0.0002787390029325513, "loss": 1.0147, "step": 1375 }, { "epoch": 0.21540388227927365, "grad_norm": 2.6407129764556885, "learning_rate": 0.00027871456500488755, "loss": 0.7973, "step": 1376 }, { "epoch": 0.21556042579837195, "grad_norm": 2.9032108783721924, "learning_rate": 0.00027869012707722386, "loss": 1.1, "step": 1377 }, { "epoch": 0.21571696931747025, "grad_norm": 2.3868675231933594, "learning_rate": 0.00027866568914956005, "loss": 0.625, "step": 1378 }, { "epoch": 0.21587351283656855, "grad_norm": 1.203199863433838, "learning_rate": 0.00027864125122189636, "loss": 0.7701, "step": 1379 }, { "epoch": 0.21603005635566688, "grad_norm": 5.6415205001831055, "learning_rate": 0.0002786168132942326, "loss": 1.1647, "step": 1380 }, { "epoch": 0.21618659987476518, "grad_norm": 1.5552973747253418, "learning_rate": 0.00027859237536656886, "loss": 1.0549, "step": 1381 }, { "epoch": 0.21634314339386349, "grad_norm": 4.893740653991699, "learning_rate": 0.00027856793743890517, "loss": 1.311, "step": 1382 }, { "epoch": 0.21649968691296181, "grad_norm": 5.882252216339111, "learning_rate": 0.0002785434995112414, "loss": 1.5808, "step": 1383 }, { "epoch": 0.21665623043206012, "grad_norm": 2.043102502822876, "learning_rate": 0.00027851906158357767, "loss": 1.5347, "step": 1384 }, { "epoch": 0.21681277395115842, "grad_norm": 2.2881131172180176, "learning_rate": 0.000278494623655914, "loss": 1.37, "step": 1385 }, { "epoch": 0.21696931747025675, "grad_norm": 2.5173704624176025, "learning_rate": 0.0002784701857282502, "loss": 1.5788, "step": 1386 }, { "epoch": 0.21712586098935505, "grad_norm": 2.1015539169311523, "learning_rate": 0.0002784457478005865, "loss": 1.5984, "step": 1387 }, { "epoch": 0.21728240450845335, "grad_norm": 2.4034738540649414, "learning_rate": 0.00027842130987292273, "loss": 1.49, "step": 1388 }, { "epoch": 0.21743894802755165, "grad_norm": 2.5903968811035156, "learning_rate": 0.00027839687194525903, "loss": 1.4753, "step": 1389 }, { "epoch": 0.21759549154664998, "grad_norm": 3.2042367458343506, "learning_rate": 0.0002783724340175953, "loss": 1.6426, "step": 1390 }, { "epoch": 0.21775203506574828, "grad_norm": 4.184605598449707, "learning_rate": 0.00027834799608993154, "loss": 1.9565, "step": 1391 }, { "epoch": 0.21790857858484658, "grad_norm": 2.502986431121826, "learning_rate": 0.00027832355816226784, "loss": 1.7027, "step": 1392 }, { "epoch": 0.2180651221039449, "grad_norm": 2.451610565185547, "learning_rate": 0.0002782991202346041, "loss": 1.2844, "step": 1393 }, { "epoch": 0.2182216656230432, "grad_norm": 1.821632742881775, "learning_rate": 0.00027827468230694034, "loss": 0.9655, "step": 1394 }, { "epoch": 0.2183782091421415, "grad_norm": 4.602065563201904, "learning_rate": 0.0002782502443792766, "loss": 2.3643, "step": 1395 }, { "epoch": 0.2185347526612398, "grad_norm": 3.144266366958618, "learning_rate": 0.00027822580645161285, "loss": 1.0854, "step": 1396 }, { "epoch": 0.21869129618033814, "grad_norm": 2.5353446006774902, "learning_rate": 0.00027820136852394915, "loss": 1.2546, "step": 1397 }, { "epoch": 0.21884783969943644, "grad_norm": 2.2268052101135254, "learning_rate": 0.0002781769305962854, "loss": 1.4616, "step": 1398 }, { "epoch": 0.21900438321853474, "grad_norm": 3.656038284301758, "learning_rate": 0.00027815249266862165, "loss": 1.7655, "step": 1399 }, { "epoch": 0.21916092673763307, "grad_norm": 2.7248010635375977, "learning_rate": 0.00027812805474095796, "loss": 1.8593, "step": 1400 }, { "epoch": 0.21931747025673137, "grad_norm": 0.8559685945510864, "learning_rate": 0.0002781036168132942, "loss": 0.5366, "step": 1401 }, { "epoch": 0.21947401377582967, "grad_norm": 0.8044238090515137, "learning_rate": 0.00027807917888563046, "loss": 0.5512, "step": 1402 }, { "epoch": 0.219630557294928, "grad_norm": 0.5511070489883423, "learning_rate": 0.0002780547409579667, "loss": 0.4996, "step": 1403 }, { "epoch": 0.2197871008140263, "grad_norm": 0.9777888655662537, "learning_rate": 0.000278030303030303, "loss": 0.6064, "step": 1404 }, { "epoch": 0.2199436443331246, "grad_norm": 0.7745730876922607, "learning_rate": 0.00027800586510263927, "loss": 0.4713, "step": 1405 }, { "epoch": 0.2201001878522229, "grad_norm": 1.0580544471740723, "learning_rate": 0.0002779814271749755, "loss": 0.6403, "step": 1406 }, { "epoch": 0.22025673137132123, "grad_norm": 0.7956675291061401, "learning_rate": 0.0002779569892473118, "loss": 0.4613, "step": 1407 }, { "epoch": 0.22041327489041954, "grad_norm": 0.678325891494751, "learning_rate": 0.0002779325513196481, "loss": 0.3812, "step": 1408 }, { "epoch": 0.22056981840951784, "grad_norm": 0.7760977149009705, "learning_rate": 0.00027790811339198433, "loss": 0.3709, "step": 1409 }, { "epoch": 0.22072636192861617, "grad_norm": 1.0391250848770142, "learning_rate": 0.00027788367546432063, "loss": 0.4915, "step": 1410 }, { "epoch": 0.22088290544771447, "grad_norm": 0.9946457743644714, "learning_rate": 0.00027785923753665683, "loss": 0.5706, "step": 1411 }, { "epoch": 0.22103944896681277, "grad_norm": 1.5022060871124268, "learning_rate": 0.00027783479960899314, "loss": 0.6768, "step": 1412 }, { "epoch": 0.2211959924859111, "grad_norm": 1.4307317733764648, "learning_rate": 0.0002778103616813294, "loss": 0.6929, "step": 1413 }, { "epoch": 0.2213525360050094, "grad_norm": 0.9843283891677856, "learning_rate": 0.00027778592375366564, "loss": 0.4171, "step": 1414 }, { "epoch": 0.2215090795241077, "grad_norm": 1.1952210664749146, "learning_rate": 0.00027776148582600195, "loss": 0.5666, "step": 1415 }, { "epoch": 0.221665623043206, "grad_norm": 1.7626519203186035, "learning_rate": 0.0002777370478983382, "loss": 0.8112, "step": 1416 }, { "epoch": 0.22182216656230433, "grad_norm": 3.326972246170044, "learning_rate": 0.00027771260997067445, "loss": 1.1599, "step": 1417 }, { "epoch": 0.22197871008140263, "grad_norm": 0.9253565669059753, "learning_rate": 0.00027768817204301075, "loss": 0.6991, "step": 1418 }, { "epoch": 0.22213525360050093, "grad_norm": 1.0200566053390503, "learning_rate": 0.000277663734115347, "loss": 0.5801, "step": 1419 }, { "epoch": 0.22229179711959926, "grad_norm": 1.5075781345367432, "learning_rate": 0.00027763929618768326, "loss": 0.4883, "step": 1420 }, { "epoch": 0.22244834063869756, "grad_norm": 1.3740901947021484, "learning_rate": 0.0002776148582600195, "loss": 0.7034, "step": 1421 }, { "epoch": 0.22260488415779586, "grad_norm": 2.125110149383545, "learning_rate": 0.0002775904203323558, "loss": 0.7774, "step": 1422 }, { "epoch": 0.22276142767689416, "grad_norm": 2.414090633392334, "learning_rate": 0.00027756598240469206, "loss": 0.7952, "step": 1423 }, { "epoch": 0.2229179711959925, "grad_norm": 4.026956081390381, "learning_rate": 0.0002775415444770283, "loss": 0.9317, "step": 1424 }, { "epoch": 0.2230745147150908, "grad_norm": 1.9889763593673706, "learning_rate": 0.0002775171065493646, "loss": 1.0914, "step": 1425 }, { "epoch": 0.2232310582341891, "grad_norm": 1.7664798498153687, "learning_rate": 0.0002774926686217008, "loss": 0.7606, "step": 1426 }, { "epoch": 0.22338760175328742, "grad_norm": 1.880422830581665, "learning_rate": 0.0002774682306940371, "loss": 1.0761, "step": 1427 }, { "epoch": 0.22354414527238572, "grad_norm": 2.686260223388672, "learning_rate": 0.0002774437927663734, "loss": 0.9571, "step": 1428 }, { "epoch": 0.22370068879148403, "grad_norm": 1.939051628112793, "learning_rate": 0.0002774193548387096, "loss": 1.0255, "step": 1429 }, { "epoch": 0.22385723231058235, "grad_norm": 2.358743190765381, "learning_rate": 0.00027739491691104593, "loss": 1.0628, "step": 1430 }, { "epoch": 0.22401377582968066, "grad_norm": 2.4641621112823486, "learning_rate": 0.0002773704789833822, "loss": 1.2966, "step": 1431 }, { "epoch": 0.22417031934877896, "grad_norm": 1.8473012447357178, "learning_rate": 0.00027734604105571843, "loss": 1.1513, "step": 1432 }, { "epoch": 0.22432686286787726, "grad_norm": 1.4949876070022583, "learning_rate": 0.00027732160312805474, "loss": 1.306, "step": 1433 }, { "epoch": 0.2244834063869756, "grad_norm": 1.902312159538269, "learning_rate": 0.000277297165200391, "loss": 1.6001, "step": 1434 }, { "epoch": 0.2246399499060739, "grad_norm": 1.7652263641357422, "learning_rate": 0.00027727272727272724, "loss": 1.3238, "step": 1435 }, { "epoch": 0.2247964934251722, "grad_norm": 2.4492361545562744, "learning_rate": 0.0002772482893450635, "loss": 1.3513, "step": 1436 }, { "epoch": 0.22495303694427052, "grad_norm": 2.270918369293213, "learning_rate": 0.0002772238514173998, "loss": 1.626, "step": 1437 }, { "epoch": 0.22510958046336882, "grad_norm": 2.2354772090911865, "learning_rate": 0.00027719941348973605, "loss": 1.3422, "step": 1438 }, { "epoch": 0.22526612398246712, "grad_norm": 1.9211229085922241, "learning_rate": 0.0002771749755620723, "loss": 1.5828, "step": 1439 }, { "epoch": 0.22542266750156542, "grad_norm": 1.6848493814468384, "learning_rate": 0.0002771505376344086, "loss": 1.2256, "step": 1440 }, { "epoch": 0.22557921102066375, "grad_norm": 2.3783888816833496, "learning_rate": 0.00027712609970674486, "loss": 1.1445, "step": 1441 }, { "epoch": 0.22573575453976205, "grad_norm": 3.050572156906128, "learning_rate": 0.0002771016617790811, "loss": 1.2094, "step": 1442 }, { "epoch": 0.22589229805886035, "grad_norm": 2.4172866344451904, "learning_rate": 0.00027707722385141736, "loss": 1.415, "step": 1443 }, { "epoch": 0.22604884157795868, "grad_norm": 2.319094181060791, "learning_rate": 0.0002770527859237536, "loss": 1.7239, "step": 1444 }, { "epoch": 0.22620538509705698, "grad_norm": 2.561666965484619, "learning_rate": 0.0002770283479960899, "loss": 1.2432, "step": 1445 }, { "epoch": 0.22636192861615528, "grad_norm": 2.667259454727173, "learning_rate": 0.00027700391006842617, "loss": 0.9331, "step": 1446 }, { "epoch": 0.2265184721352536, "grad_norm": 2.7518651485443115, "learning_rate": 0.0002769794721407624, "loss": 1.0571, "step": 1447 }, { "epoch": 0.2266750156543519, "grad_norm": 2.4335968494415283, "learning_rate": 0.0002769550342130987, "loss": 1.1156, "step": 1448 }, { "epoch": 0.22683155917345021, "grad_norm": 5.546756744384766, "learning_rate": 0.000276930596285435, "loss": 0.6606, "step": 1449 }, { "epoch": 0.22698810269254852, "grad_norm": 2.5599935054779053, "learning_rate": 0.0002769061583577712, "loss": 1.3594, "step": 1450 }, { "epoch": 0.22714464621164684, "grad_norm": 0.9069414734840393, "learning_rate": 0.0002768817204301075, "loss": 0.533, "step": 1451 }, { "epoch": 0.22730118973074515, "grad_norm": 0.9586019515991211, "learning_rate": 0.0002768572825024438, "loss": 0.5733, "step": 1452 }, { "epoch": 0.22745773324984345, "grad_norm": 1.2516900300979614, "learning_rate": 0.00027683284457478003, "loss": 0.6321, "step": 1453 }, { "epoch": 0.22761427676894178, "grad_norm": 0.7019465565681458, "learning_rate": 0.0002768084066471163, "loss": 0.534, "step": 1454 }, { "epoch": 0.22777082028804008, "grad_norm": 0.9529553651809692, "learning_rate": 0.0002767839687194526, "loss": 0.4528, "step": 1455 }, { "epoch": 0.22792736380713838, "grad_norm": 2.888197660446167, "learning_rate": 0.00027675953079178884, "loss": 0.5788, "step": 1456 }, { "epoch": 0.2280839073262367, "grad_norm": 0.9398375153541565, "learning_rate": 0.0002767350928641251, "loss": 0.7626, "step": 1457 }, { "epoch": 0.228240450845335, "grad_norm": 1.111086368560791, "learning_rate": 0.0002767106549364614, "loss": 0.4823, "step": 1458 }, { "epoch": 0.2283969943644333, "grad_norm": 0.8638045191764832, "learning_rate": 0.0002766862170087976, "loss": 0.5664, "step": 1459 }, { "epoch": 0.2285535378835316, "grad_norm": 1.1331050395965576, "learning_rate": 0.0002766617790811339, "loss": 0.5838, "step": 1460 }, { "epoch": 0.22871008140262994, "grad_norm": 1.5367311239242554, "learning_rate": 0.00027663734115347015, "loss": 0.5199, "step": 1461 }, { "epoch": 0.22886662492172824, "grad_norm": 1.0833733081817627, "learning_rate": 0.0002766129032258064, "loss": 0.6322, "step": 1462 }, { "epoch": 0.22902316844082654, "grad_norm": 1.2636278867721558, "learning_rate": 0.0002765884652981427, "loss": 0.8835, "step": 1463 }, { "epoch": 0.22917971195992487, "grad_norm": 0.8570249080657959, "learning_rate": 0.00027656402737047896, "loss": 0.5407, "step": 1464 }, { "epoch": 0.22933625547902317, "grad_norm": 1.7874391078948975, "learning_rate": 0.0002765395894428152, "loss": 0.6971, "step": 1465 }, { "epoch": 0.22949279899812147, "grad_norm": 1.138791799545288, "learning_rate": 0.00027651515151515146, "loss": 0.6228, "step": 1466 }, { "epoch": 0.22964934251721977, "grad_norm": 1.4818170070648193, "learning_rate": 0.00027649071358748777, "loss": 0.6939, "step": 1467 }, { "epoch": 0.2298058860363181, "grad_norm": 1.402114987373352, "learning_rate": 0.000276466275659824, "loss": 0.6931, "step": 1468 }, { "epoch": 0.2299624295554164, "grad_norm": 1.5063778162002563, "learning_rate": 0.00027644183773216027, "loss": 0.7895, "step": 1469 }, { "epoch": 0.2301189730745147, "grad_norm": 2.0992255210876465, "learning_rate": 0.0002764173998044966, "loss": 0.6326, "step": 1470 }, { "epoch": 0.23027551659361303, "grad_norm": 1.7399237155914307, "learning_rate": 0.00027639296187683283, "loss": 0.5391, "step": 1471 }, { "epoch": 0.23043206011271133, "grad_norm": 2.003880023956299, "learning_rate": 0.0002763685239491691, "loss": 0.7821, "step": 1472 }, { "epoch": 0.23058860363180964, "grad_norm": 1.794185996055603, "learning_rate": 0.0002763440860215054, "loss": 0.7965, "step": 1473 }, { "epoch": 0.23074514715090796, "grad_norm": 1.893991470336914, "learning_rate": 0.0002763196480938416, "loss": 0.6783, "step": 1474 }, { "epoch": 0.23090169067000627, "grad_norm": 2.7751851081848145, "learning_rate": 0.0002762952101661779, "loss": 0.9839, "step": 1475 }, { "epoch": 0.23105823418910457, "grad_norm": 1.9507943391799927, "learning_rate": 0.00027627077223851414, "loss": 0.5881, "step": 1476 }, { "epoch": 0.23121477770820287, "grad_norm": 2.9333903789520264, "learning_rate": 0.0002762463343108504, "loss": 0.6818, "step": 1477 }, { "epoch": 0.2313713212273012, "grad_norm": 2.3272409439086914, "learning_rate": 0.0002762218963831867, "loss": 0.9943, "step": 1478 }, { "epoch": 0.2315278647463995, "grad_norm": 3.2229695320129395, "learning_rate": 0.00027619745845552295, "loss": 1.225, "step": 1479 }, { "epoch": 0.2316844082654978, "grad_norm": 2.4953155517578125, "learning_rate": 0.0002761730205278592, "loss": 1.034, "step": 1480 }, { "epoch": 0.23184095178459613, "grad_norm": 2.438466787338257, "learning_rate": 0.0002761485826001955, "loss": 1.2024, "step": 1481 }, { "epoch": 0.23199749530369443, "grad_norm": 4.884873390197754, "learning_rate": 0.00027612414467253175, "loss": 1.0249, "step": 1482 }, { "epoch": 0.23215403882279273, "grad_norm": 2.4104747772216797, "learning_rate": 0.000276099706744868, "loss": 1.2849, "step": 1483 }, { "epoch": 0.23231058234189106, "grad_norm": 3.2531588077545166, "learning_rate": 0.00027607526881720426, "loss": 0.8408, "step": 1484 }, { "epoch": 0.23246712586098936, "grad_norm": 2.4069766998291016, "learning_rate": 0.00027605083088954056, "loss": 1.4731, "step": 1485 }, { "epoch": 0.23262366938008766, "grad_norm": 2.5518875122070312, "learning_rate": 0.0002760263929618768, "loss": 1.1409, "step": 1486 }, { "epoch": 0.23278021289918596, "grad_norm": 2.899477958679199, "learning_rate": 0.00027600195503421306, "loss": 1.2555, "step": 1487 }, { "epoch": 0.2329367564182843, "grad_norm": 1.8513123989105225, "learning_rate": 0.00027597751710654937, "loss": 0.8074, "step": 1488 }, { "epoch": 0.2330932999373826, "grad_norm": 2.3798718452453613, "learning_rate": 0.00027595307917888557, "loss": 1.7678, "step": 1489 }, { "epoch": 0.2332498434564809, "grad_norm": 2.9262709617614746, "learning_rate": 0.00027592864125122187, "loss": 1.5967, "step": 1490 }, { "epoch": 0.23340638697557922, "grad_norm": 2.1684608459472656, "learning_rate": 0.0002759042033235581, "loss": 1.9874, "step": 1491 }, { "epoch": 0.23356293049467752, "grad_norm": 2.6140377521514893, "learning_rate": 0.0002758797653958944, "loss": 0.7234, "step": 1492 }, { "epoch": 0.23371947401377582, "grad_norm": 2.1321074962615967, "learning_rate": 0.0002758553274682307, "loss": 1.0605, "step": 1493 }, { "epoch": 0.23387601753287413, "grad_norm": 2.2342660427093506, "learning_rate": 0.00027583088954056693, "loss": 1.8914, "step": 1494 }, { "epoch": 0.23403256105197245, "grad_norm": 3.242147445678711, "learning_rate": 0.0002758064516129032, "loss": 1.8859, "step": 1495 }, { "epoch": 0.23418910457107076, "grad_norm": 4.4763946533203125, "learning_rate": 0.0002757820136852395, "loss": 1.3996, "step": 1496 }, { "epoch": 0.23434564809016906, "grad_norm": 2.6398067474365234, "learning_rate": 0.00027575757575757574, "loss": 0.9282, "step": 1497 }, { "epoch": 0.23450219160926739, "grad_norm": 1.8949030637741089, "learning_rate": 0.000275733137829912, "loss": 0.9894, "step": 1498 }, { "epoch": 0.2346587351283657, "grad_norm": 1.6546046733856201, "learning_rate": 0.00027570869990224824, "loss": 0.7834, "step": 1499 }, { "epoch": 0.234815278647464, "grad_norm": 1.8807889223098755, "learning_rate": 0.00027568426197458455, "loss": 1.1045, "step": 1500 }, { "epoch": 0.23497182216656232, "grad_norm": 0.7086061835289001, "learning_rate": 0.0002756598240469208, "loss": 0.5635, "step": 1501 }, { "epoch": 0.23512836568566062, "grad_norm": 1.064099669456482, "learning_rate": 0.00027563538611925705, "loss": 0.6135, "step": 1502 }, { "epoch": 0.23528490920475892, "grad_norm": 0.6339705586433411, "learning_rate": 0.00027561094819159336, "loss": 0.4156, "step": 1503 }, { "epoch": 0.23544145272385722, "grad_norm": 0.6692638397216797, "learning_rate": 0.0002755865102639296, "loss": 0.5255, "step": 1504 }, { "epoch": 0.23559799624295555, "grad_norm": 0.6374533772468567, "learning_rate": 0.00027556207233626586, "loss": 0.4684, "step": 1505 }, { "epoch": 0.23575453976205385, "grad_norm": 0.621849000453949, "learning_rate": 0.00027553763440860216, "loss": 0.4455, "step": 1506 }, { "epoch": 0.23591108328115215, "grad_norm": 1.2488616704940796, "learning_rate": 0.00027551319648093836, "loss": 0.5984, "step": 1507 }, { "epoch": 0.23606762680025048, "grad_norm": 0.7494067549705505, "learning_rate": 0.00027548875855327467, "loss": 0.5569, "step": 1508 }, { "epoch": 0.23622417031934878, "grad_norm": 5.239253520965576, "learning_rate": 0.0002754643206256109, "loss": 1.5669, "step": 1509 }, { "epoch": 0.23638071383844708, "grad_norm": 1.0418435335159302, "learning_rate": 0.00027543988269794717, "loss": 0.4972, "step": 1510 }, { "epoch": 0.2365372573575454, "grad_norm": 1.3810733556747437, "learning_rate": 0.0002754154447702835, "loss": 0.7008, "step": 1511 }, { "epoch": 0.2366938008766437, "grad_norm": 1.964684247970581, "learning_rate": 0.0002753910068426197, "loss": 0.598, "step": 1512 }, { "epoch": 0.236850344395742, "grad_norm": 1.2020512819290161, "learning_rate": 0.000275366568914956, "loss": 0.7735, "step": 1513 }, { "epoch": 0.23700688791484031, "grad_norm": 1.2619682550430298, "learning_rate": 0.00027534213098729223, "loss": 0.6185, "step": 1514 }, { "epoch": 0.23716343143393864, "grad_norm": 1.121608018875122, "learning_rate": 0.00027531769305962853, "loss": 0.6125, "step": 1515 }, { "epoch": 0.23731997495303694, "grad_norm": 1.6488595008850098, "learning_rate": 0.0002752932551319648, "loss": 0.5002, "step": 1516 }, { "epoch": 0.23747651847213525, "grad_norm": 2.0960159301757812, "learning_rate": 0.00027526881720430104, "loss": 0.9657, "step": 1517 }, { "epoch": 0.23763306199123357, "grad_norm": 1.6600353717803955, "learning_rate": 0.00027524437927663734, "loss": 0.6478, "step": 1518 }, { "epoch": 0.23778960551033188, "grad_norm": 1.770453691482544, "learning_rate": 0.0002752199413489736, "loss": 0.8878, "step": 1519 }, { "epoch": 0.23794614902943018, "grad_norm": 1.5966682434082031, "learning_rate": 0.00027519550342130984, "loss": 1.122, "step": 1520 }, { "epoch": 0.23810269254852848, "grad_norm": 1.4263012409210205, "learning_rate": 0.00027517106549364615, "loss": 0.6308, "step": 1521 }, { "epoch": 0.2382592360676268, "grad_norm": 3.0444369316101074, "learning_rate": 0.00027514662756598235, "loss": 0.8983, "step": 1522 }, { "epoch": 0.2384157795867251, "grad_norm": 1.5377203226089478, "learning_rate": 0.00027512218963831865, "loss": 0.6754, "step": 1523 }, { "epoch": 0.2385723231058234, "grad_norm": 1.37147855758667, "learning_rate": 0.0002750977517106549, "loss": 0.8112, "step": 1524 }, { "epoch": 0.23872886662492174, "grad_norm": 1.6660315990447998, "learning_rate": 0.00027507331378299115, "loss": 1.0561, "step": 1525 }, { "epoch": 0.23888541014402004, "grad_norm": 1.588474988937378, "learning_rate": 0.00027504887585532746, "loss": 1.1573, "step": 1526 }, { "epoch": 0.23904195366311834, "grad_norm": 2.331662178039551, "learning_rate": 0.0002750244379276637, "loss": 1.0771, "step": 1527 }, { "epoch": 0.23919849718221667, "grad_norm": 3.051088571548462, "learning_rate": 0.00027499999999999996, "loss": 0.8527, "step": 1528 }, { "epoch": 0.23935504070131497, "grad_norm": 2.258211374282837, "learning_rate": 0.00027497556207233627, "loss": 1.2319, "step": 1529 }, { "epoch": 0.23951158422041327, "grad_norm": 2.485002279281616, "learning_rate": 0.0002749511241446725, "loss": 1.1426, "step": 1530 }, { "epoch": 0.23966812773951157, "grad_norm": 2.4680216312408447, "learning_rate": 0.00027492668621700877, "loss": 1.1472, "step": 1531 }, { "epoch": 0.2398246712586099, "grad_norm": 1.421920895576477, "learning_rate": 0.000274902248289345, "loss": 1.0561, "step": 1532 }, { "epoch": 0.2399812147777082, "grad_norm": 2.4300615787506104, "learning_rate": 0.0002748778103616813, "loss": 1.5544, "step": 1533 }, { "epoch": 0.2401377582968065, "grad_norm": 3.0856986045837402, "learning_rate": 0.0002748533724340176, "loss": 0.8778, "step": 1534 }, { "epoch": 0.24029430181590483, "grad_norm": 2.003622055053711, "learning_rate": 0.00027482893450635383, "loss": 1.1826, "step": 1535 }, { "epoch": 0.24045084533500313, "grad_norm": 2.1644046306610107, "learning_rate": 0.00027480449657869013, "loss": 1.3446, "step": 1536 }, { "epoch": 0.24060738885410143, "grad_norm": 2.3185083866119385, "learning_rate": 0.00027478005865102633, "loss": 1.2308, "step": 1537 }, { "epoch": 0.24076393237319976, "grad_norm": 2.0810067653656006, "learning_rate": 0.00027475562072336264, "loss": 0.9243, "step": 1538 }, { "epoch": 0.24092047589229806, "grad_norm": 2.293769359588623, "learning_rate": 0.0002747311827956989, "loss": 1.7686, "step": 1539 }, { "epoch": 0.24107701941139636, "grad_norm": 2.4113895893096924, "learning_rate": 0.00027470674486803514, "loss": 1.3467, "step": 1540 }, { "epoch": 0.24123356293049467, "grad_norm": 2.769763708114624, "learning_rate": 0.00027468230694037145, "loss": 1.8965, "step": 1541 }, { "epoch": 0.241390106449593, "grad_norm": 2.0843498706817627, "learning_rate": 0.0002746578690127077, "loss": 1.2388, "step": 1542 }, { "epoch": 0.2415466499686913, "grad_norm": 3.0376205444335938, "learning_rate": 0.00027463343108504395, "loss": 1.3435, "step": 1543 }, { "epoch": 0.2417031934877896, "grad_norm": 4.603314399719238, "learning_rate": 0.00027460899315738025, "loss": 1.5941, "step": 1544 }, { "epoch": 0.24185973700688793, "grad_norm": 3.8465962409973145, "learning_rate": 0.0002745845552297165, "loss": 1.5791, "step": 1545 }, { "epoch": 0.24201628052598623, "grad_norm": 1.3982967138290405, "learning_rate": 0.00027456011730205276, "loss": 1.0725, "step": 1546 }, { "epoch": 0.24217282404508453, "grad_norm": 2.9826741218566895, "learning_rate": 0.000274535679374389, "loss": 1.2196, "step": 1547 }, { "epoch": 0.24232936756418283, "grad_norm": 4.3426055908203125, "learning_rate": 0.0002745112414467253, "loss": 1.1231, "step": 1548 }, { "epoch": 0.24248591108328116, "grad_norm": 2.9004628658294678, "learning_rate": 0.00027448680351906156, "loss": 1.4186, "step": 1549 }, { "epoch": 0.24264245460237946, "grad_norm": 1.7319072484970093, "learning_rate": 0.0002744623655913978, "loss": 0.8112, "step": 1550 }, { "epoch": 0.24279899812147776, "grad_norm": 0.6421094536781311, "learning_rate": 0.0002744379276637341, "loss": 0.463, "step": 1551 }, { "epoch": 0.2429555416405761, "grad_norm": 0.7612909078598022, "learning_rate": 0.00027441348973607037, "loss": 0.4454, "step": 1552 }, { "epoch": 0.2431120851596744, "grad_norm": 0.5956478118896484, "learning_rate": 0.0002743890518084066, "loss": 0.5241, "step": 1553 }, { "epoch": 0.2432686286787727, "grad_norm": 0.6601691246032715, "learning_rate": 0.0002743646138807429, "loss": 0.481, "step": 1554 }, { "epoch": 0.24342517219787102, "grad_norm": 0.757583498954773, "learning_rate": 0.0002743401759530791, "loss": 0.565, "step": 1555 }, { "epoch": 0.24358171571696932, "grad_norm": 1.0948114395141602, "learning_rate": 0.00027431573802541543, "loss": 0.555, "step": 1556 }, { "epoch": 0.24373825923606762, "grad_norm": 0.9005800485610962, "learning_rate": 0.0002742913000977517, "loss": 0.4812, "step": 1557 }, { "epoch": 0.24389480275516592, "grad_norm": 1.3342481851577759, "learning_rate": 0.00027426686217008793, "loss": 0.5122, "step": 1558 }, { "epoch": 0.24405134627426425, "grad_norm": 0.7070523500442505, "learning_rate": 0.00027424242424242424, "loss": 0.3781, "step": 1559 }, { "epoch": 0.24420788979336255, "grad_norm": 0.6875860095024109, "learning_rate": 0.0002742179863147605, "loss": 0.5231, "step": 1560 }, { "epoch": 0.24436443331246085, "grad_norm": 1.033418893814087, "learning_rate": 0.00027419354838709674, "loss": 0.5012, "step": 1561 }, { "epoch": 0.24452097683155918, "grad_norm": 2.32181978225708, "learning_rate": 0.000274169110459433, "loss": 0.6205, "step": 1562 }, { "epoch": 0.24467752035065748, "grad_norm": 1.1538715362548828, "learning_rate": 0.0002741446725317693, "loss": 0.5589, "step": 1563 }, { "epoch": 0.24483406386975579, "grad_norm": 1.1103618144989014, "learning_rate": 0.00027412023460410555, "loss": 0.5474, "step": 1564 }, { "epoch": 0.24499060738885411, "grad_norm": 2.351726531982422, "learning_rate": 0.0002740957966764418, "loss": 0.5257, "step": 1565 }, { "epoch": 0.24514715090795242, "grad_norm": 1.7007243633270264, "learning_rate": 0.0002740713587487781, "loss": 0.6285, "step": 1566 }, { "epoch": 0.24530369442705072, "grad_norm": 1.454067349433899, "learning_rate": 0.00027404692082111436, "loss": 0.9199, "step": 1567 }, { "epoch": 0.24546023794614902, "grad_norm": 0.8819766640663147, "learning_rate": 0.0002740224828934506, "loss": 0.7621, "step": 1568 }, { "epoch": 0.24561678146524735, "grad_norm": 1.5095868110656738, "learning_rate": 0.0002739980449657869, "loss": 1.0348, "step": 1569 }, { "epoch": 0.24577332498434565, "grad_norm": 0.8652186393737793, "learning_rate": 0.0002739736070381231, "loss": 0.8568, "step": 1570 }, { "epoch": 0.24592986850344395, "grad_norm": 1.0354799032211304, "learning_rate": 0.0002739491691104594, "loss": 0.5486, "step": 1571 }, { "epoch": 0.24608641202254228, "grad_norm": 1.751391887664795, "learning_rate": 0.00027392473118279567, "loss": 0.9729, "step": 1572 }, { "epoch": 0.24624295554164058, "grad_norm": 1.665823221206665, "learning_rate": 0.0002739002932551319, "loss": 1.1064, "step": 1573 }, { "epoch": 0.24639949906073888, "grad_norm": 1.3109946250915527, "learning_rate": 0.0002738758553274682, "loss": 0.7137, "step": 1574 }, { "epoch": 0.24655604257983718, "grad_norm": 3.6172003746032715, "learning_rate": 0.0002738514173998045, "loss": 1.0663, "step": 1575 }, { "epoch": 0.2467125860989355, "grad_norm": 1.8106416463851929, "learning_rate": 0.0002738269794721407, "loss": 0.8352, "step": 1576 }, { "epoch": 0.2468691296180338, "grad_norm": 1.7313213348388672, "learning_rate": 0.00027380254154447703, "loss": 1.143, "step": 1577 }, { "epoch": 0.2470256731371321, "grad_norm": 2.3070340156555176, "learning_rate": 0.0002737781036168133, "loss": 0.8652, "step": 1578 }, { "epoch": 0.24718221665623044, "grad_norm": 1.9005930423736572, "learning_rate": 0.00027375366568914953, "loss": 1.108, "step": 1579 }, { "epoch": 0.24733876017532874, "grad_norm": 1.746368408203125, "learning_rate": 0.0002737292277614858, "loss": 0.9733, "step": 1580 }, { "epoch": 0.24749530369442704, "grad_norm": 2.9574499130249023, "learning_rate": 0.0002737047898338221, "loss": 0.8982, "step": 1581 }, { "epoch": 0.24765184721352537, "grad_norm": 2.634615659713745, "learning_rate": 0.00027368035190615834, "loss": 1.5592, "step": 1582 }, { "epoch": 0.24780839073262367, "grad_norm": 2.2195236682891846, "learning_rate": 0.0002736559139784946, "loss": 1.8192, "step": 1583 }, { "epoch": 0.24796493425172197, "grad_norm": 3.19441294670105, "learning_rate": 0.0002736314760508309, "loss": 1.2147, "step": 1584 }, { "epoch": 0.24812147777082028, "grad_norm": 1.6516034603118896, "learning_rate": 0.0002736070381231671, "loss": 1.0827, "step": 1585 }, { "epoch": 0.2482780212899186, "grad_norm": 3.0163512229919434, "learning_rate": 0.0002735826001955034, "loss": 1.8071, "step": 1586 }, { "epoch": 0.2484345648090169, "grad_norm": 3.311854362487793, "learning_rate": 0.00027355816226783965, "loss": 1.1725, "step": 1587 }, { "epoch": 0.2485911083281152, "grad_norm": 2.583314895629883, "learning_rate": 0.0002735337243401759, "loss": 1.9088, "step": 1588 }, { "epoch": 0.24874765184721354, "grad_norm": 3.644285202026367, "learning_rate": 0.0002735092864125122, "loss": 2.057, "step": 1589 }, { "epoch": 0.24890419536631184, "grad_norm": 2.473665475845337, "learning_rate": 0.00027348484848484846, "loss": 1.0609, "step": 1590 }, { "epoch": 0.24906073888541014, "grad_norm": 2.75217342376709, "learning_rate": 0.0002734604105571847, "loss": 1.9401, "step": 1591 }, { "epoch": 0.24921728240450847, "grad_norm": 4.07191801071167, "learning_rate": 0.000273435972629521, "loss": 1.9969, "step": 1592 }, { "epoch": 0.24937382592360677, "grad_norm": 2.538170337677002, "learning_rate": 0.00027341153470185727, "loss": 1.451, "step": 1593 }, { "epoch": 0.24953036944270507, "grad_norm": 1.8366115093231201, "learning_rate": 0.0002733870967741935, "loss": 1.7211, "step": 1594 }, { "epoch": 0.24968691296180337, "grad_norm": 2.4829647541046143, "learning_rate": 0.00027336265884652977, "loss": 1.3786, "step": 1595 }, { "epoch": 0.2498434564809017, "grad_norm": 2.2429263591766357, "learning_rate": 0.0002733382209188661, "loss": 1.4635, "step": 1596 }, { "epoch": 0.25, "grad_norm": 2.1900274753570557, "learning_rate": 0.00027331378299120233, "loss": 0.9664, "step": 1597 }, { "epoch": 0.25015654351909833, "grad_norm": 1.736470341682434, "learning_rate": 0.0002732893450635386, "loss": 1.2012, "step": 1598 }, { "epoch": 0.2503130870381966, "grad_norm": 2.750772714614868, "learning_rate": 0.0002732649071358749, "loss": 0.5847, "step": 1599 }, { "epoch": 0.25046963055729493, "grad_norm": 3.358440637588501, "learning_rate": 0.00027324046920821114, "loss": 1.3006, "step": 1600 }, { "epoch": 0.25062617407639326, "grad_norm": 0.7446082830429077, "learning_rate": 0.0002732160312805474, "loss": 0.474, "step": 1601 }, { "epoch": 0.25078271759549153, "grad_norm": 0.5928311347961426, "learning_rate": 0.00027319159335288364, "loss": 0.4473, "step": 1602 }, { "epoch": 0.25093926111458986, "grad_norm": 0.8611072897911072, "learning_rate": 0.0002731671554252199, "loss": 0.4695, "step": 1603 }, { "epoch": 0.2510958046336882, "grad_norm": 0.6917060613632202, "learning_rate": 0.0002731427174975562, "loss": 0.5228, "step": 1604 }, { "epoch": 0.25125234815278646, "grad_norm": 1.070335865020752, "learning_rate": 0.00027311827956989245, "loss": 1.0444, "step": 1605 }, { "epoch": 0.2514088916718848, "grad_norm": 1.0247135162353516, "learning_rate": 0.0002730938416422287, "loss": 0.7059, "step": 1606 }, { "epoch": 0.25156543519098307, "grad_norm": 1.059765100479126, "learning_rate": 0.000273069403714565, "loss": 0.5032, "step": 1607 }, { "epoch": 0.2517219787100814, "grad_norm": 0.8535358905792236, "learning_rate": 0.00027304496578690125, "loss": 0.4397, "step": 1608 }, { "epoch": 0.2518785222291797, "grad_norm": 1.4279924631118774, "learning_rate": 0.0002730205278592375, "loss": 0.6977, "step": 1609 }, { "epoch": 0.252035065748278, "grad_norm": 0.8265482187271118, "learning_rate": 0.00027299608993157376, "loss": 0.4416, "step": 1610 }, { "epoch": 0.2521916092673763, "grad_norm": 1.0426527261734009, "learning_rate": 0.00027297165200391006, "loss": 0.4451, "step": 1611 }, { "epoch": 0.25234815278647466, "grad_norm": 1.2460665702819824, "learning_rate": 0.0002729472140762463, "loss": 0.7127, "step": 1612 }, { "epoch": 0.25250469630557293, "grad_norm": 1.153203010559082, "learning_rate": 0.00027292277614858257, "loss": 0.5075, "step": 1613 }, { "epoch": 0.25266123982467126, "grad_norm": 1.497168779373169, "learning_rate": 0.00027289833822091887, "loss": 0.6462, "step": 1614 }, { "epoch": 0.2528177833437696, "grad_norm": 0.8022387027740479, "learning_rate": 0.0002728739002932551, "loss": 0.5667, "step": 1615 }, { "epoch": 0.25297432686286786, "grad_norm": 1.270932912826538, "learning_rate": 0.0002728494623655914, "loss": 0.4302, "step": 1616 }, { "epoch": 0.2531308703819662, "grad_norm": 2.111393928527832, "learning_rate": 0.0002728250244379277, "loss": 0.7754, "step": 1617 }, { "epoch": 0.2532874139010645, "grad_norm": 0.9829224348068237, "learning_rate": 0.0002728005865102639, "loss": 0.5654, "step": 1618 }, { "epoch": 0.2534439574201628, "grad_norm": 1.0931804180145264, "learning_rate": 0.0002727761485826002, "loss": 0.4941, "step": 1619 }, { "epoch": 0.2536005009392611, "grad_norm": 1.211100459098816, "learning_rate": 0.00027275171065493643, "loss": 0.8934, "step": 1620 }, { "epoch": 0.25375704445835945, "grad_norm": 1.7143733501434326, "learning_rate": 0.0002727272727272727, "loss": 1.0193, "step": 1621 }, { "epoch": 0.2539135879774577, "grad_norm": 1.8827818632125854, "learning_rate": 0.000272702834799609, "loss": 1.0734, "step": 1622 }, { "epoch": 0.25407013149655605, "grad_norm": 1.6230251789093018, "learning_rate": 0.00027267839687194524, "loss": 1.0196, "step": 1623 }, { "epoch": 0.2542266750156543, "grad_norm": 1.9909404516220093, "learning_rate": 0.0002726539589442815, "loss": 0.8388, "step": 1624 }, { "epoch": 0.25438321853475265, "grad_norm": 1.6396865844726562, "learning_rate": 0.00027262952101661774, "loss": 0.803, "step": 1625 }, { "epoch": 0.254539762053851, "grad_norm": 1.7983092069625854, "learning_rate": 0.00027260508308895405, "loss": 0.8712, "step": 1626 }, { "epoch": 0.25469630557294926, "grad_norm": 2.2103474140167236, "learning_rate": 0.0002725806451612903, "loss": 0.9533, "step": 1627 }, { "epoch": 0.2548528490920476, "grad_norm": 1.7631006240844727, "learning_rate": 0.00027255620723362655, "loss": 0.9976, "step": 1628 }, { "epoch": 0.2550093926111459, "grad_norm": 3.220822811126709, "learning_rate": 0.00027253176930596286, "loss": 1.206, "step": 1629 }, { "epoch": 0.2551659361302442, "grad_norm": 1.8891594409942627, "learning_rate": 0.0002725073313782991, "loss": 1.0133, "step": 1630 }, { "epoch": 0.2553224796493425, "grad_norm": 1.985506534576416, "learning_rate": 0.00027248289345063536, "loss": 0.8946, "step": 1631 }, { "epoch": 0.25547902316844084, "grad_norm": 3.2439825534820557, "learning_rate": 0.00027245845552297166, "loss": 1.3356, "step": 1632 }, { "epoch": 0.2556355666875391, "grad_norm": 1.7113157510757446, "learning_rate": 0.00027243401759530786, "loss": 0.8516, "step": 1633 }, { "epoch": 0.25579211020663745, "grad_norm": 2.8625941276550293, "learning_rate": 0.00027240957966764417, "loss": 1.5037, "step": 1634 }, { "epoch": 0.2559486537257358, "grad_norm": 1.7289516925811768, "learning_rate": 0.0002723851417399804, "loss": 1.167, "step": 1635 }, { "epoch": 0.25610519724483405, "grad_norm": 1.9103654623031616, "learning_rate": 0.00027236070381231667, "loss": 1.2579, "step": 1636 }, { "epoch": 0.2562617407639324, "grad_norm": 2.2144134044647217, "learning_rate": 0.000272336265884653, "loss": 1.6795, "step": 1637 }, { "epoch": 0.2564182842830307, "grad_norm": 2.3531837463378906, "learning_rate": 0.0002723118279569892, "loss": 1.2621, "step": 1638 }, { "epoch": 0.256574827802129, "grad_norm": 3.0877432823181152, "learning_rate": 0.0002722873900293255, "loss": 1.9715, "step": 1639 }, { "epoch": 0.2567313713212273, "grad_norm": 1.3705271482467651, "learning_rate": 0.0002722629521016618, "loss": 1.0049, "step": 1640 }, { "epoch": 0.25688791484032564, "grad_norm": 4.535170078277588, "learning_rate": 0.00027223851417399803, "loss": 1.4665, "step": 1641 }, { "epoch": 0.2570444583594239, "grad_norm": 2.5787107944488525, "learning_rate": 0.0002722140762463343, "loss": 1.2625, "step": 1642 }, { "epoch": 0.25720100187852224, "grad_norm": 2.6251347064971924, "learning_rate": 0.00027218963831867054, "loss": 1.1023, "step": 1643 }, { "epoch": 0.2573575453976205, "grad_norm": 1.5550967454910278, "learning_rate": 0.00027216520039100684, "loss": 0.7943, "step": 1644 }, { "epoch": 0.25751408891671884, "grad_norm": 3.5643835067749023, "learning_rate": 0.0002721407624633431, "loss": 1.731, "step": 1645 }, { "epoch": 0.25767063243581717, "grad_norm": 1.838036298751831, "learning_rate": 0.00027211632453567934, "loss": 1.2029, "step": 1646 }, { "epoch": 0.25782717595491544, "grad_norm": 1.7118128538131714, "learning_rate": 0.00027209188660801565, "loss": 0.5492, "step": 1647 }, { "epoch": 0.2579837194740138, "grad_norm": 1.6779016256332397, "learning_rate": 0.00027206744868035185, "loss": 0.6806, "step": 1648 }, { "epoch": 0.2581402629931121, "grad_norm": 1.5262585878372192, "learning_rate": 0.00027204301075268815, "loss": 0.4437, "step": 1649 }, { "epoch": 0.2582968065122104, "grad_norm": 4.473031520843506, "learning_rate": 0.0002720185728250244, "loss": 1.9419, "step": 1650 }, { "epoch": 0.2584533500313087, "grad_norm": 0.5745874643325806, "learning_rate": 0.00027199413489736065, "loss": 0.4091, "step": 1651 }, { "epoch": 0.25860989355040703, "grad_norm": 0.6224820017814636, "learning_rate": 0.00027196969696969696, "loss": 0.3966, "step": 1652 }, { "epoch": 0.2587664370695053, "grad_norm": 1.2538436651229858, "learning_rate": 0.0002719452590420332, "loss": 0.6685, "step": 1653 }, { "epoch": 0.25892298058860364, "grad_norm": 0.558447003364563, "learning_rate": 0.00027192082111436946, "loss": 0.3777, "step": 1654 }, { "epoch": 0.25907952410770196, "grad_norm": 0.6431753635406494, "learning_rate": 0.00027189638318670577, "loss": 0.3588, "step": 1655 }, { "epoch": 0.25923606762680024, "grad_norm": 0.5876404047012329, "learning_rate": 0.000271871945259042, "loss": 0.3271, "step": 1656 }, { "epoch": 0.25939261114589857, "grad_norm": 1.1488398313522339, "learning_rate": 0.00027184750733137827, "loss": 0.5317, "step": 1657 }, { "epoch": 0.2595491546649969, "grad_norm": 1.7112611532211304, "learning_rate": 0.0002718230694037145, "loss": 0.7552, "step": 1658 }, { "epoch": 0.25970569818409517, "grad_norm": 0.7966535687446594, "learning_rate": 0.00027179863147605083, "loss": 0.5836, "step": 1659 }, { "epoch": 0.2598622417031935, "grad_norm": 0.847993016242981, "learning_rate": 0.0002717741935483871, "loss": 0.3743, "step": 1660 }, { "epoch": 0.26001878522229177, "grad_norm": 1.1151633262634277, "learning_rate": 0.00027174975562072333, "loss": 0.5694, "step": 1661 }, { "epoch": 0.2601753287413901, "grad_norm": 1.010614275932312, "learning_rate": 0.00027172531769305964, "loss": 0.5105, "step": 1662 }, { "epoch": 0.26033187226048843, "grad_norm": 1.286232590675354, "learning_rate": 0.0002717008797653959, "loss": 0.4341, "step": 1663 }, { "epoch": 0.2604884157795867, "grad_norm": 1.0436598062515259, "learning_rate": 0.00027167644183773214, "loss": 0.5435, "step": 1664 }, { "epoch": 0.26064495929868503, "grad_norm": 1.534713625907898, "learning_rate": 0.00027165200391006844, "loss": 0.703, "step": 1665 }, { "epoch": 0.26080150281778336, "grad_norm": 1.9755645990371704, "learning_rate": 0.00027162756598240464, "loss": 0.8479, "step": 1666 }, { "epoch": 0.26095804633688163, "grad_norm": 2.3660671710968018, "learning_rate": 0.00027160312805474095, "loss": 0.9621, "step": 1667 }, { "epoch": 0.26111458985597996, "grad_norm": 0.8814460039138794, "learning_rate": 0.0002715786901270772, "loss": 0.6693, "step": 1668 }, { "epoch": 0.2612711333750783, "grad_norm": 1.214966893196106, "learning_rate": 0.00027155425219941345, "loss": 0.5287, "step": 1669 }, { "epoch": 0.26142767689417656, "grad_norm": 1.7603254318237305, "learning_rate": 0.00027152981427174975, "loss": 0.9414, "step": 1670 }, { "epoch": 0.2615842204132749, "grad_norm": 1.4818147420883179, "learning_rate": 0.000271505376344086, "loss": 0.6992, "step": 1671 }, { "epoch": 0.2617407639323732, "grad_norm": 1.955994725227356, "learning_rate": 0.00027148093841642226, "loss": 0.6655, "step": 1672 }, { "epoch": 0.2618973074514715, "grad_norm": 1.3508237600326538, "learning_rate": 0.0002714565004887585, "loss": 0.9702, "step": 1673 }, { "epoch": 0.2620538509705698, "grad_norm": 2.0264861583709717, "learning_rate": 0.0002714320625610948, "loss": 1.1312, "step": 1674 }, { "epoch": 0.26221039448966815, "grad_norm": 1.562551736831665, "learning_rate": 0.00027140762463343106, "loss": 0.8464, "step": 1675 }, { "epoch": 0.2623669380087664, "grad_norm": 1.3331489562988281, "learning_rate": 0.0002713831867057673, "loss": 0.9695, "step": 1676 }, { "epoch": 0.26252348152786475, "grad_norm": 1.8584641218185425, "learning_rate": 0.0002713587487781036, "loss": 0.8289, "step": 1677 }, { "epoch": 0.26268002504696303, "grad_norm": 2.1813502311706543, "learning_rate": 0.00027133431085043987, "loss": 0.8063, "step": 1678 }, { "epoch": 0.26283656856606136, "grad_norm": 1.9859607219696045, "learning_rate": 0.0002713098729227761, "loss": 0.9312, "step": 1679 }, { "epoch": 0.2629931120851597, "grad_norm": 4.345942974090576, "learning_rate": 0.00027128543499511243, "loss": 1.1479, "step": 1680 }, { "epoch": 0.26314965560425796, "grad_norm": 2.3821566104888916, "learning_rate": 0.0002712609970674486, "loss": 1.2104, "step": 1681 }, { "epoch": 0.2633061991233563, "grad_norm": 1.971257209777832, "learning_rate": 0.00027123655913978493, "loss": 1.3765, "step": 1682 }, { "epoch": 0.2634627426424546, "grad_norm": 2.1938669681549072, "learning_rate": 0.0002712121212121212, "loss": 1.3679, "step": 1683 }, { "epoch": 0.2636192861615529, "grad_norm": 2.430959701538086, "learning_rate": 0.00027118768328445743, "loss": 0.8468, "step": 1684 }, { "epoch": 0.2637758296806512, "grad_norm": 3.02817964553833, "learning_rate": 0.00027116324535679374, "loss": 1.5774, "step": 1685 }, { "epoch": 0.26393237319974955, "grad_norm": 2.2461435794830322, "learning_rate": 0.00027113880742913, "loss": 0.9381, "step": 1686 }, { "epoch": 0.2640889167188478, "grad_norm": 1.5934573411941528, "learning_rate": 0.00027111436950146624, "loss": 0.7952, "step": 1687 }, { "epoch": 0.26424546023794615, "grad_norm": 2.398531436920166, "learning_rate": 0.00027108993157380255, "loss": 1.4084, "step": 1688 }, { "epoch": 0.2644020037570445, "grad_norm": 2.056870698928833, "learning_rate": 0.0002710654936461388, "loss": 1.2535, "step": 1689 }, { "epoch": 0.26455854727614275, "grad_norm": 2.2823963165283203, "learning_rate": 0.00027104105571847505, "loss": 1.2529, "step": 1690 }, { "epoch": 0.2647150907952411, "grad_norm": 1.608046293258667, "learning_rate": 0.0002710166177908113, "loss": 1.3346, "step": 1691 }, { "epoch": 0.2648716343143394, "grad_norm": 3.5349557399749756, "learning_rate": 0.0002709921798631476, "loss": 1.4861, "step": 1692 }, { "epoch": 0.2650281778334377, "grad_norm": 2.209035634994507, "learning_rate": 0.00027096774193548386, "loss": 1.4499, "step": 1693 }, { "epoch": 0.265184721352536, "grad_norm": 2.169724464416504, "learning_rate": 0.0002709433040078201, "loss": 0.9006, "step": 1694 }, { "epoch": 0.26534126487163434, "grad_norm": 2.5454139709472656, "learning_rate": 0.0002709188660801564, "loss": 1.2534, "step": 1695 }, { "epoch": 0.2654978083907326, "grad_norm": 1.16903555393219, "learning_rate": 0.0002708944281524926, "loss": 0.8634, "step": 1696 }, { "epoch": 0.26565435190983094, "grad_norm": 2.4895148277282715, "learning_rate": 0.0002708699902248289, "loss": 1.026, "step": 1697 }, { "epoch": 0.2658108954289292, "grad_norm": 1.859477162361145, "learning_rate": 0.00027084555229716517, "loss": 0.9984, "step": 1698 }, { "epoch": 0.26596743894802755, "grad_norm": 3.284623146057129, "learning_rate": 0.0002708211143695014, "loss": 1.1007, "step": 1699 }, { "epoch": 0.2661239824671259, "grad_norm": 3.7506582736968994, "learning_rate": 0.0002707966764418377, "loss": 1.955, "step": 1700 }, { "epoch": 0.26628052598622415, "grad_norm": 0.6009975671768188, "learning_rate": 0.000270772238514174, "loss": 0.4298, "step": 1701 }, { "epoch": 0.2664370695053225, "grad_norm": 1.3551870584487915, "learning_rate": 0.00027074780058651023, "loss": 0.622, "step": 1702 }, { "epoch": 0.2665936130244208, "grad_norm": 1.262035608291626, "learning_rate": 0.00027072336265884653, "loss": 0.6164, "step": 1703 }, { "epoch": 0.2667501565435191, "grad_norm": 0.7926015257835388, "learning_rate": 0.0002706989247311828, "loss": 0.4694, "step": 1704 }, { "epoch": 0.2669067000626174, "grad_norm": 1.2867344617843628, "learning_rate": 0.00027067448680351904, "loss": 0.66, "step": 1705 }, { "epoch": 0.26706324358171574, "grad_norm": 1.134917140007019, "learning_rate": 0.0002706500488758553, "loss": 0.9503, "step": 1706 }, { "epoch": 0.267219787100814, "grad_norm": 3.0994365215301514, "learning_rate": 0.0002706256109481916, "loss": 0.9773, "step": 1707 }, { "epoch": 0.26737633061991234, "grad_norm": 0.9256912469863892, "learning_rate": 0.00027060117302052784, "loss": 0.4688, "step": 1708 }, { "epoch": 0.26753287413901067, "grad_norm": 1.101236343383789, "learning_rate": 0.0002705767350928641, "loss": 0.6223, "step": 1709 }, { "epoch": 0.26768941765810894, "grad_norm": 0.7278368473052979, "learning_rate": 0.0002705522971652004, "loss": 0.4361, "step": 1710 }, { "epoch": 0.26784596117720727, "grad_norm": 2.448662757873535, "learning_rate": 0.00027052785923753665, "loss": 0.8, "step": 1711 }, { "epoch": 0.2680025046963056, "grad_norm": 1.1800535917282104, "learning_rate": 0.0002705034213098729, "loss": 0.5331, "step": 1712 }, { "epoch": 0.2681590482154039, "grad_norm": 2.716262102127075, "learning_rate": 0.00027047898338220915, "loss": 1.0572, "step": 1713 }, { "epoch": 0.2683155917345022, "grad_norm": 0.9216450452804565, "learning_rate": 0.0002704545454545454, "loss": 0.6161, "step": 1714 }, { "epoch": 0.2684721352536005, "grad_norm": 1.1275360584259033, "learning_rate": 0.0002704301075268817, "loss": 0.6809, "step": 1715 }, { "epoch": 0.2686286787726988, "grad_norm": 1.6321802139282227, "learning_rate": 0.00027040566959921796, "loss": 0.8089, "step": 1716 }, { "epoch": 0.26878522229179713, "grad_norm": 1.1545872688293457, "learning_rate": 0.0002703812316715542, "loss": 0.9202, "step": 1717 }, { "epoch": 0.2689417658108954, "grad_norm": 2.506645679473877, "learning_rate": 0.0002703567937438905, "loss": 0.7897, "step": 1718 }, { "epoch": 0.26909830932999373, "grad_norm": 1.1123408079147339, "learning_rate": 0.00027033235581622677, "loss": 0.5747, "step": 1719 }, { "epoch": 0.26925485284909206, "grad_norm": 0.9035351872444153, "learning_rate": 0.000270307917888563, "loss": 0.5402, "step": 1720 }, { "epoch": 0.26941139636819034, "grad_norm": 1.5112823247909546, "learning_rate": 0.00027028347996089927, "loss": 0.5699, "step": 1721 }, { "epoch": 0.26956793988728867, "grad_norm": 1.4688304662704468, "learning_rate": 0.0002702590420332356, "loss": 0.5282, "step": 1722 }, { "epoch": 0.269724483406387, "grad_norm": 1.3641126155853271, "learning_rate": 0.00027023460410557183, "loss": 0.6053, "step": 1723 }, { "epoch": 0.26988102692548527, "grad_norm": 1.88762366771698, "learning_rate": 0.0002702101661779081, "loss": 0.8346, "step": 1724 }, { "epoch": 0.2700375704445836, "grad_norm": 1.9120090007781982, "learning_rate": 0.0002701857282502444, "loss": 0.9424, "step": 1725 }, { "epoch": 0.2701941139636819, "grad_norm": 1.8165515661239624, "learning_rate": 0.00027016129032258064, "loss": 0.9418, "step": 1726 }, { "epoch": 0.2703506574827802, "grad_norm": 2.26983904838562, "learning_rate": 0.0002701368523949169, "loss": 0.8333, "step": 1727 }, { "epoch": 0.27050720100187853, "grad_norm": 1.2658028602600098, "learning_rate": 0.0002701124144672532, "loss": 0.8788, "step": 1728 }, { "epoch": 0.27066374452097686, "grad_norm": 2.336582660675049, "learning_rate": 0.0002700879765395894, "loss": 0.8146, "step": 1729 }, { "epoch": 0.27082028804007513, "grad_norm": 1.6825093030929565, "learning_rate": 0.0002700635386119257, "loss": 0.8878, "step": 1730 }, { "epoch": 0.27097683155917346, "grad_norm": 2.3204588890075684, "learning_rate": 0.00027003910068426195, "loss": 1.2894, "step": 1731 }, { "epoch": 0.27113337507827173, "grad_norm": 2.438342809677124, "learning_rate": 0.0002700146627565982, "loss": 1.2633, "step": 1732 }, { "epoch": 0.27128991859737006, "grad_norm": 2.8188915252685547, "learning_rate": 0.0002699902248289345, "loss": 1.0154, "step": 1733 }, { "epoch": 0.2714464621164684, "grad_norm": 2.1101574897766113, "learning_rate": 0.00026996578690127076, "loss": 1.3788, "step": 1734 }, { "epoch": 0.27160300563556666, "grad_norm": 2.3884902000427246, "learning_rate": 0.000269941348973607, "loss": 1.0987, "step": 1735 }, { "epoch": 0.271759549154665, "grad_norm": 3.389667510986328, "learning_rate": 0.0002699169110459433, "loss": 1.1042, "step": 1736 }, { "epoch": 0.2719160926737633, "grad_norm": 2.0286519527435303, "learning_rate": 0.00026989247311827956, "loss": 1.6512, "step": 1737 }, { "epoch": 0.2720726361928616, "grad_norm": 2.0738837718963623, "learning_rate": 0.0002698680351906158, "loss": 1.1985, "step": 1738 }, { "epoch": 0.2722291797119599, "grad_norm": 2.440089464187622, "learning_rate": 0.00026984359726295207, "loss": 1.225, "step": 1739 }, { "epoch": 0.27238572323105825, "grad_norm": 2.8969979286193848, "learning_rate": 0.00026981915933528837, "loss": 2.0094, "step": 1740 }, { "epoch": 0.2725422667501565, "grad_norm": 2.208665609359741, "learning_rate": 0.0002697947214076246, "loss": 1.367, "step": 1741 }, { "epoch": 0.27269881026925485, "grad_norm": 2.2335593700408936, "learning_rate": 0.0002697702834799609, "loss": 1.2281, "step": 1742 }, { "epoch": 0.2728553537883532, "grad_norm": 1.435701608657837, "learning_rate": 0.0002697458455522972, "loss": 0.98, "step": 1743 }, { "epoch": 0.27301189730745146, "grad_norm": 2.6410810947418213, "learning_rate": 0.0002697214076246334, "loss": 1.5297, "step": 1744 }, { "epoch": 0.2731684408265498, "grad_norm": 3.8984954357147217, "learning_rate": 0.0002696969696969697, "loss": 1.528, "step": 1745 }, { "epoch": 0.2733249843456481, "grad_norm": 1.9594566822052002, "learning_rate": 0.00026967253176930593, "loss": 1.1372, "step": 1746 }, { "epoch": 0.2734815278647464, "grad_norm": 1.6567978858947754, "learning_rate": 0.0002696480938416422, "loss": 0.8802, "step": 1747 }, { "epoch": 0.2736380713838447, "grad_norm": 1.2935997247695923, "learning_rate": 0.0002696236559139785, "loss": 0.7041, "step": 1748 }, { "epoch": 0.27379461490294305, "grad_norm": 3.2540674209594727, "learning_rate": 0.00026959921798631474, "loss": 1.4213, "step": 1749 }, { "epoch": 0.2739511584220413, "grad_norm": 2.091407060623169, "learning_rate": 0.000269574780058651, "loss": 1.246, "step": 1750 }, { "epoch": 0.27410770194113965, "grad_norm": 0.7957701683044434, "learning_rate": 0.0002695503421309873, "loss": 0.4907, "step": 1751 }, { "epoch": 0.2742642454602379, "grad_norm": 0.6549016833305359, "learning_rate": 0.00026952590420332355, "loss": 0.3429, "step": 1752 }, { "epoch": 0.27442078897933625, "grad_norm": 0.8715705275535583, "learning_rate": 0.0002695014662756598, "loss": 0.4866, "step": 1753 }, { "epoch": 0.2745773324984346, "grad_norm": 0.8090335726737976, "learning_rate": 0.00026947702834799605, "loss": 0.4019, "step": 1754 }, { "epoch": 0.27473387601753285, "grad_norm": 0.5464131236076355, "learning_rate": 0.00026945259042033236, "loss": 0.4006, "step": 1755 }, { "epoch": 0.2748904195366312, "grad_norm": 1.1286975145339966, "learning_rate": 0.0002694281524926686, "loss": 0.6231, "step": 1756 }, { "epoch": 0.2750469630557295, "grad_norm": 1.059890627861023, "learning_rate": 0.00026940371456500486, "loss": 0.4803, "step": 1757 }, { "epoch": 0.2752035065748278, "grad_norm": 1.019981861114502, "learning_rate": 0.00026937927663734116, "loss": 0.4935, "step": 1758 }, { "epoch": 0.2753600500939261, "grad_norm": 0.9815790057182312, "learning_rate": 0.0002693548387096774, "loss": 0.5164, "step": 1759 }, { "epoch": 0.27551659361302444, "grad_norm": 1.1376479864120483, "learning_rate": 0.00026933040078201367, "loss": 0.5096, "step": 1760 }, { "epoch": 0.2756731371321227, "grad_norm": 0.9377894997596741, "learning_rate": 0.0002693059628543499, "loss": 0.4121, "step": 1761 }, { "epoch": 0.27582968065122104, "grad_norm": 0.6440132856369019, "learning_rate": 0.00026928152492668617, "loss": 0.4009, "step": 1762 }, { "epoch": 0.27598622417031937, "grad_norm": 1.1063646078109741, "learning_rate": 0.0002692570869990225, "loss": 0.5908, "step": 1763 }, { "epoch": 0.27614276768941765, "grad_norm": 1.2366384267807007, "learning_rate": 0.0002692326490713587, "loss": 0.6754, "step": 1764 }, { "epoch": 0.276299311208516, "grad_norm": 1.511243224143982, "learning_rate": 0.000269208211143695, "loss": 0.6837, "step": 1765 }, { "epoch": 0.2764558547276143, "grad_norm": 2.002196788787842, "learning_rate": 0.0002691837732160313, "loss": 0.9414, "step": 1766 }, { "epoch": 0.2766123982467126, "grad_norm": 0.955176591873169, "learning_rate": 0.00026915933528836753, "loss": 0.6387, "step": 1767 }, { "epoch": 0.2767689417658109, "grad_norm": 1.9328628778457642, "learning_rate": 0.0002691348973607038, "loss": 0.7687, "step": 1768 }, { "epoch": 0.2769254852849092, "grad_norm": 1.1169955730438232, "learning_rate": 0.00026911045943304004, "loss": 0.5347, "step": 1769 }, { "epoch": 0.2770820288040075, "grad_norm": 1.7990258932113647, "learning_rate": 0.00026908602150537634, "loss": 0.4861, "step": 1770 }, { "epoch": 0.27723857232310584, "grad_norm": 1.4510148763656616, "learning_rate": 0.0002690615835777126, "loss": 0.7257, "step": 1771 }, { "epoch": 0.2773951158422041, "grad_norm": 2.4672670364379883, "learning_rate": 0.00026903714565004884, "loss": 0.7709, "step": 1772 }, { "epoch": 0.27755165936130244, "grad_norm": 1.4069247245788574, "learning_rate": 0.00026901270772238515, "loss": 0.7594, "step": 1773 }, { "epoch": 0.27770820288040077, "grad_norm": 1.4989938735961914, "learning_rate": 0.0002689882697947214, "loss": 0.7431, "step": 1774 }, { "epoch": 0.27786474639949904, "grad_norm": 1.9093029499053955, "learning_rate": 0.00026896383186705765, "loss": 1.0465, "step": 1775 }, { "epoch": 0.27802128991859737, "grad_norm": 1.3027005195617676, "learning_rate": 0.00026893939393939396, "loss": 1.0814, "step": 1776 }, { "epoch": 0.2781778334376957, "grad_norm": 2.1441051959991455, "learning_rate": 0.00026891495601173016, "loss": 0.8204, "step": 1777 }, { "epoch": 0.27833437695679397, "grad_norm": 2.118739128112793, "learning_rate": 0.00026889051808406646, "loss": 1.1269, "step": 1778 }, { "epoch": 0.2784909204758923, "grad_norm": 1.8077781200408936, "learning_rate": 0.0002688660801564027, "loss": 0.8659, "step": 1779 }, { "epoch": 0.27864746399499063, "grad_norm": 2.058222770690918, "learning_rate": 0.00026884164222873896, "loss": 0.9428, "step": 1780 }, { "epoch": 0.2788040075140889, "grad_norm": 2.2564380168914795, "learning_rate": 0.00026881720430107527, "loss": 0.8841, "step": 1781 }, { "epoch": 0.27896055103318723, "grad_norm": 3.851144313812256, "learning_rate": 0.0002687927663734115, "loss": 0.7061, "step": 1782 }, { "epoch": 0.27911709455228556, "grad_norm": 1.6981579065322876, "learning_rate": 0.00026876832844574777, "loss": 1.2807, "step": 1783 }, { "epoch": 0.27927363807138383, "grad_norm": 3.6595795154571533, "learning_rate": 0.000268743890518084, "loss": 1.2373, "step": 1784 }, { "epoch": 0.27943018159048216, "grad_norm": 2.8983232975006104, "learning_rate": 0.0002687194525904203, "loss": 1.029, "step": 1785 }, { "epoch": 0.27958672510958044, "grad_norm": 1.8303897380828857, "learning_rate": 0.0002686950146627566, "loss": 1.1833, "step": 1786 }, { "epoch": 0.27974326862867877, "grad_norm": 2.681189775466919, "learning_rate": 0.00026867057673509283, "loss": 1.5727, "step": 1787 }, { "epoch": 0.2798998121477771, "grad_norm": 2.851393699645996, "learning_rate": 0.0002686461388074291, "loss": 1.4353, "step": 1788 }, { "epoch": 0.28005635566687537, "grad_norm": 2.83847713470459, "learning_rate": 0.0002686217008797654, "loss": 1.3998, "step": 1789 }, { "epoch": 0.2802128991859737, "grad_norm": 2.401233673095703, "learning_rate": 0.00026859726295210164, "loss": 1.2927, "step": 1790 }, { "epoch": 0.280369442705072, "grad_norm": 2.81550931930542, "learning_rate": 0.0002685728250244379, "loss": 1.3452, "step": 1791 }, { "epoch": 0.2805259862241703, "grad_norm": 2.0500288009643555, "learning_rate": 0.00026854838709677414, "loss": 1.2581, "step": 1792 }, { "epoch": 0.2806825297432686, "grad_norm": 2.6344480514526367, "learning_rate": 0.00026852394916911045, "loss": 0.9827, "step": 1793 }, { "epoch": 0.28083907326236696, "grad_norm": 1.7978938817977905, "learning_rate": 0.0002684995112414467, "loss": 0.8994, "step": 1794 }, { "epoch": 0.28099561678146523, "grad_norm": 2.9591362476348877, "learning_rate": 0.00026847507331378295, "loss": 1.9596, "step": 1795 }, { "epoch": 0.28115216030056356, "grad_norm": 2.2683701515197754, "learning_rate": 0.00026845063538611925, "loss": 1.1068, "step": 1796 }, { "epoch": 0.2813087038196619, "grad_norm": 1.6625263690948486, "learning_rate": 0.0002684261974584555, "loss": 0.515, "step": 1797 }, { "epoch": 0.28146524733876016, "grad_norm": 1.8417325019836426, "learning_rate": 0.00026840175953079176, "loss": 0.6351, "step": 1798 }, { "epoch": 0.2816217908578585, "grad_norm": 2.7256836891174316, "learning_rate": 0.00026837732160312806, "loss": 1.6654, "step": 1799 }, { "epoch": 0.2817783343769568, "grad_norm": 2.1390318870544434, "learning_rate": 0.00026835288367546426, "loss": 1.1826, "step": 1800 }, { "epoch": 0.2819348778960551, "grad_norm": 0.6134200096130371, "learning_rate": 0.00026832844574780056, "loss": 0.4343, "step": 1801 }, { "epoch": 0.2820914214151534, "grad_norm": 0.9784292578697205, "learning_rate": 0.0002683040078201368, "loss": 0.3972, "step": 1802 }, { "epoch": 0.28224796493425175, "grad_norm": 0.6980729699134827, "learning_rate": 0.00026827956989247307, "loss": 0.4933, "step": 1803 }, { "epoch": 0.28240450845335, "grad_norm": 0.6361396312713623, "learning_rate": 0.00026825513196480937, "loss": 0.3944, "step": 1804 }, { "epoch": 0.28256105197244835, "grad_norm": 0.7284366488456726, "learning_rate": 0.0002682306940371456, "loss": 0.443, "step": 1805 }, { "epoch": 0.2827175954915466, "grad_norm": 0.8181057572364807, "learning_rate": 0.0002682062561094819, "loss": 0.5435, "step": 1806 }, { "epoch": 0.28287413901064495, "grad_norm": 0.7504498362541199, "learning_rate": 0.0002681818181818181, "loss": 0.4323, "step": 1807 }, { "epoch": 0.2830306825297433, "grad_norm": 1.1806275844573975, "learning_rate": 0.00026815738025415443, "loss": 0.5852, "step": 1808 }, { "epoch": 0.28318722604884156, "grad_norm": 1.2418806552886963, "learning_rate": 0.0002681329423264907, "loss": 0.4522, "step": 1809 }, { "epoch": 0.2833437695679399, "grad_norm": 1.2037335634231567, "learning_rate": 0.00026810850439882693, "loss": 0.6192, "step": 1810 }, { "epoch": 0.2835003130870382, "grad_norm": 1.9327640533447266, "learning_rate": 0.00026808406647116324, "loss": 0.4402, "step": 1811 }, { "epoch": 0.2836568566061365, "grad_norm": 1.134433388710022, "learning_rate": 0.0002680596285434995, "loss": 0.4974, "step": 1812 }, { "epoch": 0.2838134001252348, "grad_norm": 0.9470615386962891, "learning_rate": 0.00026803519061583574, "loss": 0.4516, "step": 1813 }, { "epoch": 0.28396994364433314, "grad_norm": 1.848379135131836, "learning_rate": 0.00026801075268817205, "loss": 0.7034, "step": 1814 }, { "epoch": 0.2841264871634314, "grad_norm": 1.2914631366729736, "learning_rate": 0.00026798631476050824, "loss": 0.6472, "step": 1815 }, { "epoch": 0.28428303068252975, "grad_norm": 1.4809837341308594, "learning_rate": 0.00026796187683284455, "loss": 0.5035, "step": 1816 }, { "epoch": 0.2844395742016281, "grad_norm": 1.478471279144287, "learning_rate": 0.0002679374389051808, "loss": 0.7554, "step": 1817 }, { "epoch": 0.28459611772072635, "grad_norm": 1.0676370859146118, "learning_rate": 0.00026791300097751705, "loss": 0.5768, "step": 1818 }, { "epoch": 0.2847526612398247, "grad_norm": 1.5760576725006104, "learning_rate": 0.00026788856304985336, "loss": 0.6962, "step": 1819 }, { "epoch": 0.284909204758923, "grad_norm": 0.8975147604942322, "learning_rate": 0.0002678641251221896, "loss": 0.4515, "step": 1820 }, { "epoch": 0.2850657482780213, "grad_norm": 2.1367173194885254, "learning_rate": 0.00026783968719452586, "loss": 0.6175, "step": 1821 }, { "epoch": 0.2852222917971196, "grad_norm": 2.5454366207122803, "learning_rate": 0.00026781524926686217, "loss": 0.7304, "step": 1822 }, { "epoch": 0.2853788353162179, "grad_norm": 1.3795878887176514, "learning_rate": 0.0002677908113391984, "loss": 0.7314, "step": 1823 }, { "epoch": 0.2855353788353162, "grad_norm": 1.7124171257019043, "learning_rate": 0.00026776637341153467, "loss": 0.9597, "step": 1824 }, { "epoch": 0.28569192235441454, "grad_norm": 2.3599631786346436, "learning_rate": 0.0002677419354838709, "loss": 1.0985, "step": 1825 }, { "epoch": 0.2858484658735128, "grad_norm": 2.4604930877685547, "learning_rate": 0.0002677174975562072, "loss": 0.7528, "step": 1826 }, { "epoch": 0.28600500939261114, "grad_norm": 2.542212724685669, "learning_rate": 0.0002676930596285435, "loss": 1.0192, "step": 1827 }, { "epoch": 0.28616155291170947, "grad_norm": 2.8099851608276367, "learning_rate": 0.00026766862170087973, "loss": 1.0454, "step": 1828 }, { "epoch": 0.28631809643080774, "grad_norm": 1.8754301071166992, "learning_rate": 0.00026764418377321603, "loss": 1.0623, "step": 1829 }, { "epoch": 0.2864746399499061, "grad_norm": 1.9678064584732056, "learning_rate": 0.0002676197458455523, "loss": 0.9486, "step": 1830 }, { "epoch": 0.2866311834690044, "grad_norm": 1.242255687713623, "learning_rate": 0.00026759530791788854, "loss": 0.6753, "step": 1831 }, { "epoch": 0.2867877269881027, "grad_norm": 2.036165475845337, "learning_rate": 0.0002675708699902248, "loss": 1.168, "step": 1832 }, { "epoch": 0.286944270507201, "grad_norm": 2.8120241165161133, "learning_rate": 0.00026754643206256104, "loss": 0.8798, "step": 1833 }, { "epoch": 0.28710081402629933, "grad_norm": 1.734350562095642, "learning_rate": 0.00026752199413489734, "loss": 1.0636, "step": 1834 }, { "epoch": 0.2872573575453976, "grad_norm": 2.014326572418213, "learning_rate": 0.0002674975562072336, "loss": 1.011, "step": 1835 }, { "epoch": 0.28741390106449594, "grad_norm": 2.908611536026001, "learning_rate": 0.00026747311827956985, "loss": 1.0242, "step": 1836 }, { "epoch": 0.28757044458359426, "grad_norm": 2.8601036071777344, "learning_rate": 0.00026744868035190615, "loss": 1.1949, "step": 1837 }, { "epoch": 0.28772698810269254, "grad_norm": 2.5931968688964844, "learning_rate": 0.0002674242424242424, "loss": 0.8634, "step": 1838 }, { "epoch": 0.28788353162179087, "grad_norm": 2.2925524711608887, "learning_rate": 0.00026739980449657865, "loss": 1.0344, "step": 1839 }, { "epoch": 0.28804007514088914, "grad_norm": 1.680653691291809, "learning_rate": 0.0002673753665689149, "loss": 1.3297, "step": 1840 }, { "epoch": 0.28819661865998747, "grad_norm": 1.445891261100769, "learning_rate": 0.0002673509286412512, "loss": 1.2022, "step": 1841 }, { "epoch": 0.2883531621790858, "grad_norm": 3.1469690799713135, "learning_rate": 0.00026732649071358746, "loss": 1.3316, "step": 1842 }, { "epoch": 0.28850970569818407, "grad_norm": 1.903037190437317, "learning_rate": 0.0002673020527859237, "loss": 1.4019, "step": 1843 }, { "epoch": 0.2886662492172824, "grad_norm": 5.236410617828369, "learning_rate": 0.00026727761485826, "loss": 1.8396, "step": 1844 }, { "epoch": 0.28882279273638073, "grad_norm": 4.735574722290039, "learning_rate": 0.00026725317693059627, "loss": 1.8712, "step": 1845 }, { "epoch": 0.288979336255479, "grad_norm": 1.4214756488800049, "learning_rate": 0.0002672287390029325, "loss": 1.0403, "step": 1846 }, { "epoch": 0.28913587977457733, "grad_norm": 2.3396573066711426, "learning_rate": 0.0002672043010752688, "loss": 1.6399, "step": 1847 }, { "epoch": 0.28929242329367566, "grad_norm": 3.088986873626709, "learning_rate": 0.000267179863147605, "loss": 1.184, "step": 1848 }, { "epoch": 0.28944896681277393, "grad_norm": 2.6393306255340576, "learning_rate": 0.00026715542521994133, "loss": 1.3135, "step": 1849 }, { "epoch": 0.28960551033187226, "grad_norm": 1.8152228593826294, "learning_rate": 0.0002671309872922776, "loss": 1.4623, "step": 1850 }, { "epoch": 0.2897620538509706, "grad_norm": 0.7032530903816223, "learning_rate": 0.00026710654936461383, "loss": 0.5949, "step": 1851 }, { "epoch": 0.28991859737006886, "grad_norm": 0.7973015904426575, "learning_rate": 0.00026708211143695014, "loss": 0.3533, "step": 1852 }, { "epoch": 0.2900751408891672, "grad_norm": 1.0158485174179077, "learning_rate": 0.0002670576735092864, "loss": 0.5212, "step": 1853 }, { "epoch": 0.2902316844082655, "grad_norm": 0.6405438184738159, "learning_rate": 0.00026703323558162264, "loss": 0.5227, "step": 1854 }, { "epoch": 0.2903882279273638, "grad_norm": 0.8163948059082031, "learning_rate": 0.0002670087976539589, "loss": 0.5069, "step": 1855 }, { "epoch": 0.2905447714464621, "grad_norm": 2.606863021850586, "learning_rate": 0.0002669843597262952, "loss": 0.9599, "step": 1856 }, { "epoch": 0.29070131496556045, "grad_norm": 0.8287584781646729, "learning_rate": 0.00026695992179863145, "loss": 0.5773, "step": 1857 }, { "epoch": 0.2908578584846587, "grad_norm": 0.5821240544319153, "learning_rate": 0.0002669354838709677, "loss": 0.2348, "step": 1858 }, { "epoch": 0.29101440200375706, "grad_norm": 0.9026615023612976, "learning_rate": 0.000266911045943304, "loss": 0.3831, "step": 1859 }, { "epoch": 0.29117094552285533, "grad_norm": 1.199336290359497, "learning_rate": 0.00026688660801564026, "loss": 0.4268, "step": 1860 }, { "epoch": 0.29132748904195366, "grad_norm": 1.1749603748321533, "learning_rate": 0.0002668621700879765, "loss": 0.5279, "step": 1861 }, { "epoch": 0.291484032561052, "grad_norm": 0.8679521083831787, "learning_rate": 0.0002668377321603128, "loss": 0.3649, "step": 1862 }, { "epoch": 0.29164057608015026, "grad_norm": 1.6801387071609497, "learning_rate": 0.000266813294232649, "loss": 0.6133, "step": 1863 }, { "epoch": 0.2917971195992486, "grad_norm": 1.3388230800628662, "learning_rate": 0.0002667888563049853, "loss": 0.4792, "step": 1864 }, { "epoch": 0.2919536631183469, "grad_norm": 1.3929903507232666, "learning_rate": 0.00026676441837732157, "loss": 0.6117, "step": 1865 }, { "epoch": 0.2921102066374452, "grad_norm": 1.5032627582550049, "learning_rate": 0.0002667399804496578, "loss": 0.9218, "step": 1866 }, { "epoch": 0.2922667501565435, "grad_norm": 1.732055902481079, "learning_rate": 0.0002667155425219941, "loss": 0.7628, "step": 1867 }, { "epoch": 0.29242329367564185, "grad_norm": 1.4332983493804932, "learning_rate": 0.0002666911045943304, "loss": 0.652, "step": 1868 }, { "epoch": 0.2925798371947401, "grad_norm": 2.0694615840911865, "learning_rate": 0.0002666666666666666, "loss": 0.7529, "step": 1869 }, { "epoch": 0.29273638071383845, "grad_norm": 1.9924960136413574, "learning_rate": 0.00026664222873900293, "loss": 0.7831, "step": 1870 }, { "epoch": 0.2928929242329368, "grad_norm": 1.686233401298523, "learning_rate": 0.0002666177908113392, "loss": 1.0141, "step": 1871 }, { "epoch": 0.29304946775203505, "grad_norm": 1.5367885828018188, "learning_rate": 0.00026659335288367543, "loss": 0.6643, "step": 1872 }, { "epoch": 0.2932060112711334, "grad_norm": 1.6426396369934082, "learning_rate": 0.0002665689149560117, "loss": 0.8695, "step": 1873 }, { "epoch": 0.2933625547902317, "grad_norm": 2.6876118183135986, "learning_rate": 0.000266544477028348, "loss": 0.7657, "step": 1874 }, { "epoch": 0.29351909830933, "grad_norm": 2.0909829139709473, "learning_rate": 0.00026652003910068424, "loss": 1.0113, "step": 1875 }, { "epoch": 0.2936756418284283, "grad_norm": 1.8119468688964844, "learning_rate": 0.0002664956011730205, "loss": 1.0539, "step": 1876 }, { "epoch": 0.2938321853475266, "grad_norm": 2.4083352088928223, "learning_rate": 0.0002664711632453568, "loss": 0.8561, "step": 1877 }, { "epoch": 0.2939887288666249, "grad_norm": 1.916272521018982, "learning_rate": 0.000266446725317693, "loss": 0.6055, "step": 1878 }, { "epoch": 0.29414527238572324, "grad_norm": 1.9073026180267334, "learning_rate": 0.0002664222873900293, "loss": 1.3, "step": 1879 }, { "epoch": 0.2943018159048215, "grad_norm": 2.932504415512085, "learning_rate": 0.00026639784946236555, "loss": 1.0639, "step": 1880 }, { "epoch": 0.29445835942391985, "grad_norm": 2.1795926094055176, "learning_rate": 0.0002663734115347018, "loss": 1.1687, "step": 1881 }, { "epoch": 0.2946149029430182, "grad_norm": 2.62552809715271, "learning_rate": 0.0002663489736070381, "loss": 1.2471, "step": 1882 }, { "epoch": 0.29477144646211645, "grad_norm": 2.430758237838745, "learning_rate": 0.00026632453567937436, "loss": 1.0549, "step": 1883 }, { "epoch": 0.2949279899812148, "grad_norm": 1.8063433170318604, "learning_rate": 0.0002663000977517106, "loss": 1.0626, "step": 1884 }, { "epoch": 0.2950845335003131, "grad_norm": 7.484827995300293, "learning_rate": 0.0002662756598240469, "loss": 1.1665, "step": 1885 }, { "epoch": 0.2952410770194114, "grad_norm": 3.3804683685302734, "learning_rate": 0.00026625122189638317, "loss": 1.9209, "step": 1886 }, { "epoch": 0.2953976205385097, "grad_norm": 1.9869343042373657, "learning_rate": 0.0002662267839687194, "loss": 1.1098, "step": 1887 }, { "epoch": 0.29555416405760804, "grad_norm": 3.7342841625213623, "learning_rate": 0.00026620234604105567, "loss": 1.3265, "step": 1888 }, { "epoch": 0.2957107075767063, "grad_norm": 1.7713956832885742, "learning_rate": 0.000266177908113392, "loss": 1.5005, "step": 1889 }, { "epoch": 0.29586725109580464, "grad_norm": 2.1043894290924072, "learning_rate": 0.0002661534701857282, "loss": 0.8232, "step": 1890 }, { "epoch": 0.29602379461490297, "grad_norm": 2.7855064868927, "learning_rate": 0.0002661290322580645, "loss": 1.1112, "step": 1891 }, { "epoch": 0.29618033813400124, "grad_norm": 2.4430272579193115, "learning_rate": 0.0002661045943304008, "loss": 1.4161, "step": 1892 }, { "epoch": 0.29633688165309957, "grad_norm": 2.2559332847595215, "learning_rate": 0.00026608015640273703, "loss": 1.8431, "step": 1893 }, { "epoch": 0.29649342517219784, "grad_norm": 6.513355255126953, "learning_rate": 0.0002660557184750733, "loss": 1.9822, "step": 1894 }, { "epoch": 0.2966499686912962, "grad_norm": 2.599079132080078, "learning_rate": 0.0002660312805474096, "loss": 1.2078, "step": 1895 }, { "epoch": 0.2968065122103945, "grad_norm": NaN, "learning_rate": 0.0002660312805474096, "loss": 0.0, "step": 1896 }, { "epoch": 0.2969630557294928, "grad_norm": 5.364233016967773, "learning_rate": 0.0002660068426197458, "loss": 1.1353, "step": 1897 }, { "epoch": 0.2971195992485911, "grad_norm": 3.103199005126953, "learning_rate": 0.0002659824046920821, "loss": 0.9987, "step": 1898 }, { "epoch": 0.29727614276768943, "grad_norm": 1.9960848093032837, "learning_rate": 0.00026595796676441835, "loss": 0.7689, "step": 1899 }, { "epoch": 0.2974326862867877, "grad_norm": 1.9379894733428955, "learning_rate": 0.0002659335288367546, "loss": 1.1175, "step": 1900 }, { "epoch": 0.29758922980588604, "grad_norm": 0.6607356667518616, "learning_rate": 0.0002659090909090909, "loss": 0.3971, "step": 1901 }, { "epoch": 0.29774577332498436, "grad_norm": 0.8567434549331665, "learning_rate": 0.00026588465298142715, "loss": 0.4854, "step": 1902 }, { "epoch": 0.29790231684408264, "grad_norm": 0.6764739751815796, "learning_rate": 0.0002658602150537634, "loss": 0.3991, "step": 1903 }, { "epoch": 0.29805886036318097, "grad_norm": 0.630438506603241, "learning_rate": 0.00026583577712609966, "loss": 0.3442, "step": 1904 }, { "epoch": 0.2982154038822793, "grad_norm": 0.5661547780036926, "learning_rate": 0.00026581133919843596, "loss": 0.3289, "step": 1905 }, { "epoch": 0.29837194740137757, "grad_norm": 0.7079960107803345, "learning_rate": 0.0002657869012707722, "loss": 0.4095, "step": 1906 }, { "epoch": 0.2985284909204759, "grad_norm": 0.7579357624053955, "learning_rate": 0.00026576246334310846, "loss": 0.4561, "step": 1907 }, { "epoch": 0.2986850344395742, "grad_norm": 0.753103494644165, "learning_rate": 0.00026573802541544477, "loss": 0.4748, "step": 1908 }, { "epoch": 0.2988415779586725, "grad_norm": 0.5085161328315735, "learning_rate": 0.000265713587487781, "loss": 0.3129, "step": 1909 }, { "epoch": 0.29899812147777083, "grad_norm": 1.400939702987671, "learning_rate": 0.00026568914956011727, "loss": 0.8525, "step": 1910 }, { "epoch": 0.29915466499686916, "grad_norm": 1.0124390125274658, "learning_rate": 0.0002656647116324536, "loss": 0.5729, "step": 1911 }, { "epoch": 0.29931120851596743, "grad_norm": 1.1342532634735107, "learning_rate": 0.0002656402737047898, "loss": 0.6094, "step": 1912 }, { "epoch": 0.29946775203506576, "grad_norm": 0.8373773694038391, "learning_rate": 0.0002656158357771261, "loss": 0.5798, "step": 1913 }, { "epoch": 0.29962429555416403, "grad_norm": 1.389040470123291, "learning_rate": 0.00026559139784946233, "loss": 0.6012, "step": 1914 }, { "epoch": 0.29978083907326236, "grad_norm": 0.9821066856384277, "learning_rate": 0.0002655669599217986, "loss": 0.7038, "step": 1915 }, { "epoch": 0.2999373825923607, "grad_norm": 0.9827203750610352, "learning_rate": 0.0002655425219941349, "loss": 0.5822, "step": 1916 }, { "epoch": 0.30009392611145896, "grad_norm": 1.2951587438583374, "learning_rate": 0.00026551808406647114, "loss": 0.4766, "step": 1917 }, { "epoch": 0.3002504696305573, "grad_norm": 1.0430768728256226, "learning_rate": 0.0002654936461388074, "loss": 0.5216, "step": 1918 }, { "epoch": 0.3004070131496556, "grad_norm": 0.9382654428482056, "learning_rate": 0.0002654692082111437, "loss": 0.4903, "step": 1919 }, { "epoch": 0.3005635566687539, "grad_norm": 4.105330944061279, "learning_rate": 0.00026544477028347995, "loss": 1.2844, "step": 1920 }, { "epoch": 0.3007201001878522, "grad_norm": 1.663966178894043, "learning_rate": 0.0002654203323558162, "loss": 0.9027, "step": 1921 }, { "epoch": 0.30087664370695055, "grad_norm": 2.5586514472961426, "learning_rate": 0.00026539589442815245, "loss": 1.0099, "step": 1922 }, { "epoch": 0.3010331872260488, "grad_norm": 1.8075790405273438, "learning_rate": 0.00026537145650048875, "loss": 0.6638, "step": 1923 }, { "epoch": 0.30118973074514716, "grad_norm": 1.4178240299224854, "learning_rate": 0.000265347018572825, "loss": 0.5234, "step": 1924 }, { "epoch": 0.3013462742642455, "grad_norm": 8.215217590332031, "learning_rate": 0.00026532258064516126, "loss": 1.948, "step": 1925 }, { "epoch": 0.30150281778334376, "grad_norm": 2.2477364540100098, "learning_rate": 0.00026529814271749756, "loss": 0.7896, "step": 1926 }, { "epoch": 0.3016593613024421, "grad_norm": 2.111276626586914, "learning_rate": 0.00026527370478983376, "loss": 0.9689, "step": 1927 }, { "epoch": 0.3018159048215404, "grad_norm": 5.124137878417969, "learning_rate": 0.00026524926686217006, "loss": 1.6356, "step": 1928 }, { "epoch": 0.3019724483406387, "grad_norm": 1.8129172325134277, "learning_rate": 0.0002652248289345063, "loss": 0.7388, "step": 1929 }, { "epoch": 0.302128991859737, "grad_norm": 2.3607561588287354, "learning_rate": 0.00026520039100684257, "loss": 0.8196, "step": 1930 }, { "epoch": 0.3022855353788353, "grad_norm": 2.1512415409088135, "learning_rate": 0.0002651759530791789, "loss": 0.8933, "step": 1931 }, { "epoch": 0.3024420788979336, "grad_norm": 1.5390371084213257, "learning_rate": 0.0002651515151515151, "loss": 0.944, "step": 1932 }, { "epoch": 0.30259862241703195, "grad_norm": 1.9705681800842285, "learning_rate": 0.0002651270772238514, "loss": 1.0435, "step": 1933 }, { "epoch": 0.3027551659361302, "grad_norm": 1.8132861852645874, "learning_rate": 0.0002651026392961877, "loss": 0.7962, "step": 1934 }, { "epoch": 0.30291170945522855, "grad_norm": 3.0275063514709473, "learning_rate": 0.00026507820136852393, "loss": 1.2715, "step": 1935 }, { "epoch": 0.3030682529743269, "grad_norm": 1.90733003616333, "learning_rate": 0.0002650537634408602, "loss": 1.5763, "step": 1936 }, { "epoch": 0.30322479649342515, "grad_norm": 2.937795400619507, "learning_rate": 0.00026502932551319643, "loss": 1.7163, "step": 1937 }, { "epoch": 0.3033813400125235, "grad_norm": 3.4657444953918457, "learning_rate": 0.00026500488758553274, "loss": 1.1086, "step": 1938 }, { "epoch": 0.3035378835316218, "grad_norm": 2.1386613845825195, "learning_rate": 0.000264980449657869, "loss": 1.3036, "step": 1939 }, { "epoch": 0.3036944270507201, "grad_norm": 3.0543034076690674, "learning_rate": 0.00026495601173020524, "loss": 1.1613, "step": 1940 }, { "epoch": 0.3038509705698184, "grad_norm": 4.59813117980957, "learning_rate": 0.00026493157380254155, "loss": 1.7809, "step": 1941 }, { "epoch": 0.30400751408891674, "grad_norm": 1.5989995002746582, "learning_rate": 0.0002649071358748778, "loss": 1.3399, "step": 1942 }, { "epoch": 0.304164057608015, "grad_norm": 3.1781742572784424, "learning_rate": 0.00026488269794721405, "loss": 1.2275, "step": 1943 }, { "epoch": 0.30432060112711334, "grad_norm": 1.8627430200576782, "learning_rate": 0.0002648582600195503, "loss": 1.4865, "step": 1944 }, { "epoch": 0.3044771446462117, "grad_norm": 2.872692823410034, "learning_rate": 0.00026483382209188655, "loss": 1.359, "step": 1945 }, { "epoch": 0.30463368816530995, "grad_norm": 2.231991767883301, "learning_rate": 0.00026480938416422286, "loss": 1.3721, "step": 1946 }, { "epoch": 0.3047902316844083, "grad_norm": 4.821183681488037, "learning_rate": 0.0002647849462365591, "loss": 1.0801, "step": 1947 }, { "epoch": 0.30494677520350655, "grad_norm": 3.1386799812316895, "learning_rate": 0.00026476050830889536, "loss": 0.9191, "step": 1948 }, { "epoch": 0.3051033187226049, "grad_norm": 1.6399694681167603, "learning_rate": 0.00026473607038123167, "loss": 1.0087, "step": 1949 }, { "epoch": 0.3052598622417032, "grad_norm": 1.6671782732009888, "learning_rate": 0.0002647116324535679, "loss": 1.5208, "step": 1950 }, { "epoch": 0.3054164057608015, "grad_norm": 0.6199254393577576, "learning_rate": 0.00026468719452590417, "loss": 0.4585, "step": 1951 }, { "epoch": 0.3055729492798998, "grad_norm": 0.783209502696991, "learning_rate": 0.0002646627565982404, "loss": 0.4495, "step": 1952 }, { "epoch": 0.30572949279899814, "grad_norm": 0.6731662154197693, "learning_rate": 0.0002646383186705767, "loss": 0.452, "step": 1953 }, { "epoch": 0.3058860363180964, "grad_norm": 0.6478911638259888, "learning_rate": 0.000264613880742913, "loss": 0.4503, "step": 1954 }, { "epoch": 0.30604257983719474, "grad_norm": 1.0483900308609009, "learning_rate": 0.00026458944281524923, "loss": 0.4677, "step": 1955 }, { "epoch": 0.30619912335629307, "grad_norm": 0.8272550106048584, "learning_rate": 0.00026456500488758553, "loss": 0.4934, "step": 1956 }, { "epoch": 0.30635566687539134, "grad_norm": 0.5366849899291992, "learning_rate": 0.0002645405669599218, "loss": 0.3189, "step": 1957 }, { "epoch": 0.30651221039448967, "grad_norm": 2.845299243927002, "learning_rate": 0.00026451612903225804, "loss": 0.4301, "step": 1958 }, { "epoch": 0.306668753913588, "grad_norm": 1.312286615371704, "learning_rate": 0.00026449169110459434, "loss": 0.5793, "step": 1959 }, { "epoch": 0.3068252974326863, "grad_norm": 1.1284027099609375, "learning_rate": 0.00026446725317693054, "loss": 0.4355, "step": 1960 }, { "epoch": 0.3069818409517846, "grad_norm": 1.250265121459961, "learning_rate": 0.00026444281524926684, "loss": 0.6103, "step": 1961 }, { "epoch": 0.30713838447088293, "grad_norm": 1.059794545173645, "learning_rate": 0.0002644183773216031, "loss": 0.4448, "step": 1962 }, { "epoch": 0.3072949279899812, "grad_norm": 1.0672193765640259, "learning_rate": 0.00026439393939393935, "loss": 0.4652, "step": 1963 }, { "epoch": 0.30745147150907953, "grad_norm": 1.6198948621749878, "learning_rate": 0.00026436950146627565, "loss": 0.855, "step": 1964 }, { "epoch": 0.3076080150281778, "grad_norm": 1.385394811630249, "learning_rate": 0.0002643450635386119, "loss": 0.6643, "step": 1965 }, { "epoch": 0.30776455854727613, "grad_norm": 1.2622168064117432, "learning_rate": 0.00026432062561094815, "loss": 0.7861, "step": 1966 }, { "epoch": 0.30792110206637446, "grad_norm": 1.0766664743423462, "learning_rate": 0.0002642961876832844, "loss": 0.5213, "step": 1967 }, { "epoch": 0.30807764558547274, "grad_norm": 1.1218162775039673, "learning_rate": 0.0002642717497556207, "loss": 0.6966, "step": 1968 }, { "epoch": 0.30823418910457107, "grad_norm": 1.4492287635803223, "learning_rate": 0.00026424731182795696, "loss": 0.538, "step": 1969 }, { "epoch": 0.3083907326236694, "grad_norm": 1.70439875125885, "learning_rate": 0.0002642228739002932, "loss": 0.8476, "step": 1970 }, { "epoch": 0.30854727614276767, "grad_norm": 1.5610077381134033, "learning_rate": 0.0002641984359726295, "loss": 0.641, "step": 1971 }, { "epoch": 0.308703819661866, "grad_norm": 1.6430991888046265, "learning_rate": 0.00026417399804496577, "loss": 0.9228, "step": 1972 }, { "epoch": 0.3088603631809643, "grad_norm": 3.3181350231170654, "learning_rate": 0.000264149560117302, "loss": 1.2539, "step": 1973 }, { "epoch": 0.3090169067000626, "grad_norm": 1.5349977016448975, "learning_rate": 0.00026412512218963833, "loss": 0.8461, "step": 1974 }, { "epoch": 0.30917345021916093, "grad_norm": 1.3889473676681519, "learning_rate": 0.0002641006842619745, "loss": 0.8088, "step": 1975 }, { "epoch": 0.30932999373825926, "grad_norm": 3.5759482383728027, "learning_rate": 0.00026407624633431083, "loss": 1.0715, "step": 1976 }, { "epoch": 0.30948653725735753, "grad_norm": 5.998546123504639, "learning_rate": 0.0002640518084066471, "loss": 1.1101, "step": 1977 }, { "epoch": 0.30964308077645586, "grad_norm": 1.6314876079559326, "learning_rate": 0.00026402737047898333, "loss": 0.9514, "step": 1978 }, { "epoch": 0.3097996242955542, "grad_norm": 1.3417260646820068, "learning_rate": 0.00026400293255131964, "loss": 0.9667, "step": 1979 }, { "epoch": 0.30995616781465246, "grad_norm": 1.557165265083313, "learning_rate": 0.0002639784946236559, "loss": 0.9157, "step": 1980 }, { "epoch": 0.3101127113337508, "grad_norm": 1.574750542640686, "learning_rate": 0.00026395405669599214, "loss": 0.7029, "step": 1981 }, { "epoch": 0.3102692548528491, "grad_norm": 2.596886157989502, "learning_rate": 0.00026392961876832845, "loss": 1.3352, "step": 1982 }, { "epoch": 0.3104257983719474, "grad_norm": 1.6578302383422852, "learning_rate": 0.0002639051808406647, "loss": 0.6754, "step": 1983 }, { "epoch": 0.3105823418910457, "grad_norm": 1.7295254468917847, "learning_rate": 0.00026388074291300095, "loss": 1.0541, "step": 1984 }, { "epoch": 0.310738885410144, "grad_norm": 1.5874801874160767, "learning_rate": 0.0002638563049853372, "loss": 1.1757, "step": 1985 }, { "epoch": 0.3108954289292423, "grad_norm": 2.339158535003662, "learning_rate": 0.0002638318670576735, "loss": 1.4245, "step": 1986 }, { "epoch": 0.31105197244834065, "grad_norm": 1.606540560722351, "learning_rate": 0.00026380742913000976, "loss": 1.1071, "step": 1987 }, { "epoch": 0.3112085159674389, "grad_norm": 3.946507215499878, "learning_rate": 0.000263782991202346, "loss": 1.4799, "step": 1988 }, { "epoch": 0.31136505948653725, "grad_norm": 3.6631851196289062, "learning_rate": 0.0002637585532746823, "loss": 1.7003, "step": 1989 }, { "epoch": 0.3115216030056356, "grad_norm": 2.883183240890503, "learning_rate": 0.00026373411534701856, "loss": 1.3078, "step": 1990 }, { "epoch": 0.31167814652473386, "grad_norm": 1.4559465646743774, "learning_rate": 0.0002637096774193548, "loss": 1.4201, "step": 1991 }, { "epoch": 0.3118346900438322, "grad_norm": 1.9634108543395996, "learning_rate": 0.00026368523949169107, "loss": 1.3366, "step": 1992 }, { "epoch": 0.3119912335629305, "grad_norm": 2.3829219341278076, "learning_rate": 0.0002636608015640273, "loss": 1.6637, "step": 1993 }, { "epoch": 0.3121477770820288, "grad_norm": 2.1768481731414795, "learning_rate": 0.0002636363636363636, "loss": 1.1262, "step": 1994 }, { "epoch": 0.3123043206011271, "grad_norm": 2.132509231567383, "learning_rate": 0.0002636119257086999, "loss": 1.3264, "step": 1995 }, { "epoch": 0.31246086412022545, "grad_norm": 3.3182246685028076, "learning_rate": 0.0002635874877810361, "loss": 0.7347, "step": 1996 }, { "epoch": 0.3126174076393237, "grad_norm": 1.8568942546844482, "learning_rate": 0.00026356304985337243, "loss": 0.9086, "step": 1997 }, { "epoch": 0.31277395115842205, "grad_norm": 2.9322288036346436, "learning_rate": 0.0002635386119257087, "loss": 1.2884, "step": 1998 }, { "epoch": 0.3129304946775204, "grad_norm": 5.352544784545898, "learning_rate": 0.00026351417399804493, "loss": 1.1595, "step": 1999 }, { "epoch": 0.31308703819661865, "grad_norm": 2.5578665733337402, "learning_rate": 0.0002634897360703812, "loss": 1.4768, "step": 2000 }, { "epoch": 0.31308703819661865, "eval_loss": 0.7156243920326233, "eval_runtime": 203.0672, "eval_samples_per_second": 60.98, "eval_steps_per_second": 3.812, "eval_wer": 0.4355993794759547, "step": 2000 }, { "epoch": 0.313243581715717, "grad_norm": 0.7377122044563293, "learning_rate": 0.0002634652981427175, "loss": 0.5763, "step": 2001 }, { "epoch": 0.31340012523481525, "grad_norm": 0.6151639819145203, "learning_rate": 0.00026344086021505374, "loss": 0.4244, "step": 2002 }, { "epoch": 0.3135566687539136, "grad_norm": 0.7877262830734253, "learning_rate": 0.00026341642228739, "loss": 0.4682, "step": 2003 }, { "epoch": 0.3137132122730119, "grad_norm": 0.6403424143791199, "learning_rate": 0.0002633919843597263, "loss": 0.4186, "step": 2004 }, { "epoch": 0.3138697557921102, "grad_norm": 0.7786056995391846, "learning_rate": 0.00026336754643206255, "loss": 0.5447, "step": 2005 }, { "epoch": 0.3140262993112085, "grad_norm": 1.1472690105438232, "learning_rate": 0.0002633431085043988, "loss": 0.4301, "step": 2006 }, { "epoch": 0.31418284283030684, "grad_norm": 0.6084722280502319, "learning_rate": 0.0002633186705767351, "loss": 0.3747, "step": 2007 }, { "epoch": 0.3143393863494051, "grad_norm": 0.780753493309021, "learning_rate": 0.0002632942326490713, "loss": 0.6128, "step": 2008 }, { "epoch": 0.31449592986850344, "grad_norm": 0.6669083833694458, "learning_rate": 0.0002632697947214076, "loss": 0.5328, "step": 2009 }, { "epoch": 0.31465247338760177, "grad_norm": 1.1285808086395264, "learning_rate": 0.00026324535679374386, "loss": 0.457, "step": 2010 }, { "epoch": 0.31480901690670005, "grad_norm": 0.7840350270271301, "learning_rate": 0.0002632209188660801, "loss": 0.5403, "step": 2011 }, { "epoch": 0.3149655604257984, "grad_norm": 0.7977207899093628, "learning_rate": 0.0002631964809384164, "loss": 0.5125, "step": 2012 }, { "epoch": 0.3151221039448967, "grad_norm": 3.0473263263702393, "learning_rate": 0.00026317204301075267, "loss": 0.5996, "step": 2013 }, { "epoch": 0.315278647463995, "grad_norm": 1.2086435556411743, "learning_rate": 0.0002631476050830889, "loss": 0.5371, "step": 2014 }, { "epoch": 0.3154351909830933, "grad_norm": 1.242156744003296, "learning_rate": 0.00026312316715542517, "loss": 0.5623, "step": 2015 }, { "epoch": 0.31559173450219163, "grad_norm": 1.3142002820968628, "learning_rate": 0.0002630987292277615, "loss": 0.7507, "step": 2016 }, { "epoch": 0.3157482780212899, "grad_norm": 1.0907604694366455, "learning_rate": 0.0002630742913000977, "loss": 0.5674, "step": 2017 }, { "epoch": 0.31590482154038824, "grad_norm": 2.107612133026123, "learning_rate": 0.000263049853372434, "loss": 0.9088, "step": 2018 }, { "epoch": 0.3160613650594865, "grad_norm": 2.506220817565918, "learning_rate": 0.0002630254154447703, "loss": 0.9719, "step": 2019 }, { "epoch": 0.31621790857858484, "grad_norm": 1.3462073802947998, "learning_rate": 0.00026300097751710654, "loss": 0.7515, "step": 2020 }, { "epoch": 0.31637445209768317, "grad_norm": 2.0875937938690186, "learning_rate": 0.0002629765395894428, "loss": 0.7944, "step": 2021 }, { "epoch": 0.31653099561678144, "grad_norm": 2.3353264331817627, "learning_rate": 0.0002629521016617791, "loss": 1.2749, "step": 2022 }, { "epoch": 0.31668753913587977, "grad_norm": 2.3827357292175293, "learning_rate": 0.0002629276637341153, "loss": 1.3145, "step": 2023 }, { "epoch": 0.3168440826549781, "grad_norm": 1.2165812253952026, "learning_rate": 0.0002629032258064516, "loss": 0.6288, "step": 2024 }, { "epoch": 0.31700062617407637, "grad_norm": 2.692824363708496, "learning_rate": 0.00026287878787878785, "loss": 0.9532, "step": 2025 }, { "epoch": 0.3171571696931747, "grad_norm": 1.6349315643310547, "learning_rate": 0.0002628543499511241, "loss": 0.6626, "step": 2026 }, { "epoch": 0.31731371321227303, "grad_norm": 1.819525957107544, "learning_rate": 0.0002628299120234604, "loss": 0.6427, "step": 2027 }, { "epoch": 0.3174702567313713, "grad_norm": 1.595823049545288, "learning_rate": 0.00026280547409579665, "loss": 0.7039, "step": 2028 }, { "epoch": 0.31762680025046963, "grad_norm": 1.4594745635986328, "learning_rate": 0.0002627810361681329, "loss": 1.2991, "step": 2029 }, { "epoch": 0.31778334376956796, "grad_norm": 2.08130145072937, "learning_rate": 0.0002627565982404692, "loss": 0.9092, "step": 2030 }, { "epoch": 0.31793988728866623, "grad_norm": 1.4197502136230469, "learning_rate": 0.00026273216031280546, "loss": 0.8779, "step": 2031 }, { "epoch": 0.31809643080776456, "grad_norm": 1.6842551231384277, "learning_rate": 0.0002627077223851417, "loss": 0.8164, "step": 2032 }, { "epoch": 0.3182529743268629, "grad_norm": 2.0086166858673096, "learning_rate": 0.00026268328445747796, "loss": 1.2897, "step": 2033 }, { "epoch": 0.31840951784596117, "grad_norm": 1.6585288047790527, "learning_rate": 0.00026265884652981427, "loss": 0.9483, "step": 2034 }, { "epoch": 0.3185660613650595, "grad_norm": 2.2773189544677734, "learning_rate": 0.0002626344086021505, "loss": 0.9985, "step": 2035 }, { "epoch": 0.3187226048841578, "grad_norm": 1.6106942892074585, "learning_rate": 0.00026260997067448677, "loss": 0.8284, "step": 2036 }, { "epoch": 0.3188791484032561, "grad_norm": 4.228276252746582, "learning_rate": 0.0002625855327468231, "loss": 1.1095, "step": 2037 }, { "epoch": 0.3190356919223544, "grad_norm": 2.243229866027832, "learning_rate": 0.0002625610948191593, "loss": 1.3409, "step": 2038 }, { "epoch": 0.3191922354414527, "grad_norm": 1.684822678565979, "learning_rate": 0.0002625366568914956, "loss": 1.0103, "step": 2039 }, { "epoch": 0.319348778960551, "grad_norm": 2.6656551361083984, "learning_rate": 0.00026251221896383183, "loss": 1.5314, "step": 2040 }, { "epoch": 0.31950532247964936, "grad_norm": 2.3108103275299072, "learning_rate": 0.0002624877810361681, "loss": 1.4516, "step": 2041 }, { "epoch": 0.31966186599874763, "grad_norm": 3.1501247882843018, "learning_rate": 0.0002624633431085044, "loss": 1.5163, "step": 2042 }, { "epoch": 0.31981840951784596, "grad_norm": 3.26369571685791, "learning_rate": 0.00026243890518084064, "loss": 1.4617, "step": 2043 }, { "epoch": 0.3199749530369443, "grad_norm": 2.4706554412841797, "learning_rate": 0.0002624144672531769, "loss": 1.5977, "step": 2044 }, { "epoch": 0.32013149655604256, "grad_norm": 2.1074883937835693, "learning_rate": 0.0002623900293255132, "loss": 1.0742, "step": 2045 }, { "epoch": 0.3202880400751409, "grad_norm": 1.818274974822998, "learning_rate": 0.00026236559139784945, "loss": 0.9828, "step": 2046 }, { "epoch": 0.3204445835942392, "grad_norm": 4.391941070556641, "learning_rate": 0.0002623411534701857, "loss": 1.3465, "step": 2047 }, { "epoch": 0.3206011271133375, "grad_norm": 2.8843443393707275, "learning_rate": 0.00026231671554252195, "loss": 1.4826, "step": 2048 }, { "epoch": 0.3207576706324358, "grad_norm": 1.6411322355270386, "learning_rate": 0.00026229227761485825, "loss": 0.8778, "step": 2049 }, { "epoch": 0.32091421415153415, "grad_norm": 1.7798197269439697, "learning_rate": 0.0002622678396871945, "loss": 1.4236, "step": 2050 }, { "epoch": 0.3210707576706324, "grad_norm": 0.6216705441474915, "learning_rate": 0.00026224340175953076, "loss": 0.3424, "step": 2051 }, { "epoch": 0.32122730118973075, "grad_norm": 0.7121636271476746, "learning_rate": 0.00026221896383186706, "loss": 0.3667, "step": 2052 }, { "epoch": 0.3213838447088291, "grad_norm": 1.5780848264694214, "learning_rate": 0.0002621945259042033, "loss": 0.5881, "step": 2053 }, { "epoch": 0.32154038822792735, "grad_norm": 1.3239952325820923, "learning_rate": 0.00026217008797653957, "loss": 0.6034, "step": 2054 }, { "epoch": 0.3216969317470257, "grad_norm": 0.8389119505882263, "learning_rate": 0.00026214565004887587, "loss": 0.4193, "step": 2055 }, { "epoch": 0.32185347526612396, "grad_norm": 1.1425360441207886, "learning_rate": 0.00026212121212121207, "loss": 0.4454, "step": 2056 }, { "epoch": 0.3220100187852223, "grad_norm": 0.9988243579864502, "learning_rate": 0.0002620967741935484, "loss": 0.4994, "step": 2057 }, { "epoch": 0.3221665623043206, "grad_norm": 0.9738520383834839, "learning_rate": 0.0002620723362658846, "loss": 0.4687, "step": 2058 }, { "epoch": 0.3223231058234189, "grad_norm": 0.7718535661697388, "learning_rate": 0.0002620478983382209, "loss": 0.5803, "step": 2059 }, { "epoch": 0.3224796493425172, "grad_norm": 1.0445855855941772, "learning_rate": 0.0002620234604105572, "loss": 0.5174, "step": 2060 }, { "epoch": 0.32263619286161555, "grad_norm": 1.0972551107406616, "learning_rate": 0.00026199902248289343, "loss": 0.5589, "step": 2061 }, { "epoch": 0.3227927363807138, "grad_norm": 0.9006467461585999, "learning_rate": 0.0002619745845552297, "loss": 0.6339, "step": 2062 }, { "epoch": 0.32294927989981215, "grad_norm": 1.221784234046936, "learning_rate": 0.00026195014662756594, "loss": 0.6673, "step": 2063 }, { "epoch": 0.3231058234189105, "grad_norm": 0.9309597611427307, "learning_rate": 0.00026192570869990224, "loss": 0.5886, "step": 2064 }, { "epoch": 0.32326236693800875, "grad_norm": 1.0560715198516846, "learning_rate": 0.0002619012707722385, "loss": 0.8406, "step": 2065 }, { "epoch": 0.3234189104571071, "grad_norm": 1.26978600025177, "learning_rate": 0.00026187683284457474, "loss": 0.4253, "step": 2066 }, { "epoch": 0.3235754539762054, "grad_norm": 1.005332350730896, "learning_rate": 0.00026185239491691105, "loss": 0.6387, "step": 2067 }, { "epoch": 0.3237319974953037, "grad_norm": 2.363250970840454, "learning_rate": 0.0002618279569892473, "loss": 0.7234, "step": 2068 }, { "epoch": 0.323888541014402, "grad_norm": 1.3625165224075317, "learning_rate": 0.00026180351906158355, "loss": 0.6617, "step": 2069 }, { "epoch": 0.32404508453350034, "grad_norm": 1.591560959815979, "learning_rate": 0.00026177908113391986, "loss": 0.7068, "step": 2070 }, { "epoch": 0.3242016280525986, "grad_norm": 1.429697036743164, "learning_rate": 0.00026175464320625605, "loss": 0.7811, "step": 2071 }, { "epoch": 0.32435817157169694, "grad_norm": 2.3727924823760986, "learning_rate": 0.00026173020527859236, "loss": 0.885, "step": 2072 }, { "epoch": 0.3245147150907952, "grad_norm": 1.7005720138549805, "learning_rate": 0.0002617057673509286, "loss": 0.8557, "step": 2073 }, { "epoch": 0.32467125860989354, "grad_norm": 1.7432682514190674, "learning_rate": 0.00026168132942326486, "loss": 0.8502, "step": 2074 }, { "epoch": 0.32482780212899187, "grad_norm": 2.942030906677246, "learning_rate": 0.00026165689149560117, "loss": 1.4696, "step": 2075 }, { "epoch": 0.32498434564809014, "grad_norm": 1.5261107683181763, "learning_rate": 0.0002616324535679374, "loss": 0.5626, "step": 2076 }, { "epoch": 0.3251408891671885, "grad_norm": 1.8354300260543823, "learning_rate": 0.00026160801564027367, "loss": 0.7561, "step": 2077 }, { "epoch": 0.3252974326862868, "grad_norm": 1.2063688039779663, "learning_rate": 0.00026158357771261, "loss": 0.4853, "step": 2078 }, { "epoch": 0.3254539762053851, "grad_norm": 2.0545923709869385, "learning_rate": 0.0002615591397849462, "loss": 0.8312, "step": 2079 }, { "epoch": 0.3256105197244834, "grad_norm": 8.21367073059082, "learning_rate": 0.0002615347018572825, "loss": 1.2077, "step": 2080 }, { "epoch": 0.32576706324358173, "grad_norm": 1.7242724895477295, "learning_rate": 0.00026151026392961873, "loss": 0.5682, "step": 2081 }, { "epoch": 0.32592360676268, "grad_norm": 3.253326177597046, "learning_rate": 0.00026148582600195503, "loss": 1.3158, "step": 2082 }, { "epoch": 0.32608015028177834, "grad_norm": 2.2287137508392334, "learning_rate": 0.0002614613880742913, "loss": 1.1925, "step": 2083 }, { "epoch": 0.32623669380087666, "grad_norm": 2.752636194229126, "learning_rate": 0.00026143695014662754, "loss": 1.5877, "step": 2084 }, { "epoch": 0.32639323731997494, "grad_norm": 1.1275129318237305, "learning_rate": 0.00026141251221896384, "loss": 0.4494, "step": 2085 }, { "epoch": 0.32654978083907327, "grad_norm": 2.6209516525268555, "learning_rate": 0.00026138807429130004, "loss": 1.5737, "step": 2086 }, { "epoch": 0.3267063243581716, "grad_norm": 2.409445285797119, "learning_rate": 0.00026136363636363634, "loss": 0.9766, "step": 2087 }, { "epoch": 0.32686286787726987, "grad_norm": 3.750723123550415, "learning_rate": 0.0002613391984359726, "loss": 1.4515, "step": 2088 }, { "epoch": 0.3270194113963682, "grad_norm": 2.0334136486053467, "learning_rate": 0.00026131476050830885, "loss": 0.7738, "step": 2089 }, { "epoch": 0.3271759549154665, "grad_norm": 3.3774590492248535, "learning_rate": 0.00026129032258064515, "loss": 1.5601, "step": 2090 }, { "epoch": 0.3273324984345648, "grad_norm": 1.3902050256729126, "learning_rate": 0.0002612658846529814, "loss": 0.8408, "step": 2091 }, { "epoch": 0.32748904195366313, "grad_norm": 1.9110989570617676, "learning_rate": 0.00026124144672531765, "loss": 0.886, "step": 2092 }, { "epoch": 0.3276455854727614, "grad_norm": 1.9537615776062012, "learning_rate": 0.00026121700879765396, "loss": 1.8936, "step": 2093 }, { "epoch": 0.32780212899185973, "grad_norm": 3.947450876235962, "learning_rate": 0.0002611925708699902, "loss": 1.3683, "step": 2094 }, { "epoch": 0.32795867251095806, "grad_norm": 1.9001954793930054, "learning_rate": 0.00026116813294232646, "loss": 1.0555, "step": 2095 }, { "epoch": 0.32811521603005633, "grad_norm": 1.3852555751800537, "learning_rate": 0.0002611436950146627, "loss": 0.8577, "step": 2096 }, { "epoch": 0.32827175954915466, "grad_norm": 1.6378505229949951, "learning_rate": 0.000261119257086999, "loss": 0.5502, "step": 2097 }, { "epoch": 0.328428303068253, "grad_norm": 1.4456698894500732, "learning_rate": 0.00026109481915933527, "loss": 1.164, "step": 2098 }, { "epoch": 0.32858484658735126, "grad_norm": 2.254760265350342, "learning_rate": 0.0002610703812316715, "loss": 1.174, "step": 2099 }, { "epoch": 0.3287413901064496, "grad_norm": 2.9925920963287354, "learning_rate": 0.00026104594330400783, "loss": 1.4058, "step": 2100 }, { "epoch": 0.3288979336255479, "grad_norm": 0.5673936605453491, "learning_rate": 0.0002610215053763441, "loss": 0.4183, "step": 2101 }, { "epoch": 0.3290544771446462, "grad_norm": 0.6673199534416199, "learning_rate": 0.00026099706744868033, "loss": 0.3369, "step": 2102 }, { "epoch": 0.3292110206637445, "grad_norm": 0.8384460806846619, "learning_rate": 0.0002609726295210166, "loss": 0.4785, "step": 2103 }, { "epoch": 0.32936756418284285, "grad_norm": 0.8750125765800476, "learning_rate": 0.00026094819159335283, "loss": 0.5129, "step": 2104 }, { "epoch": 0.3295241077019411, "grad_norm": 0.729326069355011, "learning_rate": 0.00026092375366568914, "loss": 0.5408, "step": 2105 }, { "epoch": 0.32968065122103946, "grad_norm": 0.8103402256965637, "learning_rate": 0.0002608993157380254, "loss": 0.5732, "step": 2106 }, { "epoch": 0.3298371947401378, "grad_norm": 1.0650441646575928, "learning_rate": 0.00026087487781036164, "loss": 0.5434, "step": 2107 }, { "epoch": 0.32999373825923606, "grad_norm": 0.8528415560722351, "learning_rate": 0.00026085043988269795, "loss": 0.4231, "step": 2108 }, { "epoch": 0.3301502817783344, "grad_norm": 1.0440559387207031, "learning_rate": 0.0002608260019550342, "loss": 0.533, "step": 2109 }, { "epoch": 0.33030682529743266, "grad_norm": 0.9997760653495789, "learning_rate": 0.00026080156402737045, "loss": 0.6891, "step": 2110 }, { "epoch": 0.330463368816531, "grad_norm": 1.1891223192214966, "learning_rate": 0.0002607771260997067, "loss": 0.4688, "step": 2111 }, { "epoch": 0.3306199123356293, "grad_norm": 1.3714631795883179, "learning_rate": 0.000260752688172043, "loss": 0.5096, "step": 2112 }, { "epoch": 0.3307764558547276, "grad_norm": 1.0900437831878662, "learning_rate": 0.00026072825024437926, "loss": 0.6755, "step": 2113 }, { "epoch": 0.3309329993738259, "grad_norm": 2.1338839530944824, "learning_rate": 0.0002607038123167155, "loss": 0.8436, "step": 2114 }, { "epoch": 0.33108954289292425, "grad_norm": 1.6171543598175049, "learning_rate": 0.0002606793743890518, "loss": 0.7621, "step": 2115 }, { "epoch": 0.3312460864120225, "grad_norm": 0.8001659512519836, "learning_rate": 0.00026065493646138806, "loss": 0.491, "step": 2116 }, { "epoch": 0.33140262993112085, "grad_norm": 1.0575730800628662, "learning_rate": 0.0002606304985337243, "loss": 0.6333, "step": 2117 }, { "epoch": 0.3315591734502192, "grad_norm": 1.6386489868164062, "learning_rate": 0.0002606060606060606, "loss": 0.73, "step": 2118 }, { "epoch": 0.33171571696931745, "grad_norm": 1.1848156452178955, "learning_rate": 0.0002605816226783968, "loss": 0.5608, "step": 2119 }, { "epoch": 0.3318722604884158, "grad_norm": 1.1664621829986572, "learning_rate": 0.0002605571847507331, "loss": 0.5895, "step": 2120 }, { "epoch": 0.3320288040075141, "grad_norm": 1.0166929960250854, "learning_rate": 0.0002605327468230694, "loss": 0.5018, "step": 2121 }, { "epoch": 0.3321853475266124, "grad_norm": 3.1172375679016113, "learning_rate": 0.0002605083088954056, "loss": 0.7414, "step": 2122 }, { "epoch": 0.3323418910457107, "grad_norm": 1.3032346963882446, "learning_rate": 0.00026048387096774193, "loss": 0.5268, "step": 2123 }, { "epoch": 0.33249843456480904, "grad_norm": 2.1473283767700195, "learning_rate": 0.0002604594330400782, "loss": 0.8175, "step": 2124 }, { "epoch": 0.3326549780839073, "grad_norm": 2.331975221633911, "learning_rate": 0.00026043499511241443, "loss": 1.0549, "step": 2125 }, { "epoch": 0.33281152160300564, "grad_norm": 2.3260395526885986, "learning_rate": 0.0002604105571847507, "loss": 1.1403, "step": 2126 }, { "epoch": 0.3329680651221039, "grad_norm": 3.693405866622925, "learning_rate": 0.000260386119257087, "loss": 0.8268, "step": 2127 }, { "epoch": 0.33312460864120225, "grad_norm": 1.5741246938705444, "learning_rate": 0.00026036168132942324, "loss": 1.0704, "step": 2128 }, { "epoch": 0.3332811521603006, "grad_norm": 1.5389891862869263, "learning_rate": 0.0002603372434017595, "loss": 0.8378, "step": 2129 }, { "epoch": 0.33343769567939885, "grad_norm": 1.7030190229415894, "learning_rate": 0.0002603128054740958, "loss": 1.0205, "step": 2130 }, { "epoch": 0.3335942391984972, "grad_norm": 2.1614034175872803, "learning_rate": 0.00026028836754643205, "loss": 1.128, "step": 2131 }, { "epoch": 0.3337507827175955, "grad_norm": 1.5320353507995605, "learning_rate": 0.0002602639296187683, "loss": 0.7876, "step": 2132 }, { "epoch": 0.3339073262366938, "grad_norm": 1.6267305612564087, "learning_rate": 0.0002602394916911046, "loss": 0.8237, "step": 2133 }, { "epoch": 0.3340638697557921, "grad_norm": 3.2950265407562256, "learning_rate": 0.0002602150537634408, "loss": 1.1185, "step": 2134 }, { "epoch": 0.33422041327489044, "grad_norm": 2.2613959312438965, "learning_rate": 0.0002601906158357771, "loss": 0.91, "step": 2135 }, { "epoch": 0.3343769567939887, "grad_norm": 1.922223687171936, "learning_rate": 0.00026016617790811336, "loss": 1.006, "step": 2136 }, { "epoch": 0.33453350031308704, "grad_norm": 3.277189016342163, "learning_rate": 0.0002601417399804496, "loss": 1.4087, "step": 2137 }, { "epoch": 0.33469004383218537, "grad_norm": 2.7033369541168213, "learning_rate": 0.0002601173020527859, "loss": 1.3359, "step": 2138 }, { "epoch": 0.33484658735128364, "grad_norm": 1.8570423126220703, "learning_rate": 0.00026009286412512217, "loss": 0.9543, "step": 2139 }, { "epoch": 0.33500313087038197, "grad_norm": 4.569610595703125, "learning_rate": 0.0002600684261974584, "loss": 1.0798, "step": 2140 }, { "epoch": 0.3351596743894803, "grad_norm": 2.035679578781128, "learning_rate": 0.0002600439882697947, "loss": 2.1632, "step": 2141 }, { "epoch": 0.3353162179085786, "grad_norm": 1.7678215503692627, "learning_rate": 0.000260019550342131, "loss": 1.4521, "step": 2142 }, { "epoch": 0.3354727614276769, "grad_norm": 2.673259735107422, "learning_rate": 0.00025999511241446723, "loss": 1.5297, "step": 2143 }, { "epoch": 0.33562930494677523, "grad_norm": 2.435270309448242, "learning_rate": 0.0002599706744868035, "loss": 1.1949, "step": 2144 }, { "epoch": 0.3357858484658735, "grad_norm": 2.4045560359954834, "learning_rate": 0.0002599462365591398, "loss": 1.356, "step": 2145 }, { "epoch": 0.33594239198497183, "grad_norm": 2.4822990894317627, "learning_rate": 0.00025992179863147604, "loss": 0.9344, "step": 2146 }, { "epoch": 0.3360989355040701, "grad_norm": 1.8672354221343994, "learning_rate": 0.0002598973607038123, "loss": 0.7547, "step": 2147 }, { "epoch": 0.33625547902316844, "grad_norm": 0.980976939201355, "learning_rate": 0.0002598729227761486, "loss": 0.8016, "step": 2148 }, { "epoch": 0.33641202254226676, "grad_norm": 1.5991452932357788, "learning_rate": 0.00025984848484848484, "loss": 1.0073, "step": 2149 }, { "epoch": 0.33656856606136504, "grad_norm": 1.6782243251800537, "learning_rate": 0.0002598240469208211, "loss": 1.0327, "step": 2150 }, { "epoch": 0.33672510958046337, "grad_norm": 0.7231286764144897, "learning_rate": 0.00025979960899315735, "loss": 0.4323, "step": 2151 }, { "epoch": 0.3368816530995617, "grad_norm": 0.8727384805679321, "learning_rate": 0.0002597751710654936, "loss": 0.5182, "step": 2152 }, { "epoch": 0.33703819661865997, "grad_norm": 0.49551793932914734, "learning_rate": 0.0002597507331378299, "loss": 0.374, "step": 2153 }, { "epoch": 0.3371947401377583, "grad_norm": 1.4629757404327393, "learning_rate": 0.00025972629521016615, "loss": 0.4585, "step": 2154 }, { "epoch": 0.3373512836568566, "grad_norm": 0.6694740653038025, "learning_rate": 0.0002597018572825024, "loss": 0.4198, "step": 2155 }, { "epoch": 0.3375078271759549, "grad_norm": 0.9819236993789673, "learning_rate": 0.0002596774193548387, "loss": 0.4105, "step": 2156 }, { "epoch": 0.33766437069505323, "grad_norm": 1.0089393854141235, "learning_rate": 0.00025965298142717496, "loss": 0.5237, "step": 2157 }, { "epoch": 0.33782091421415156, "grad_norm": 0.5175392627716064, "learning_rate": 0.0002596285434995112, "loss": 0.4374, "step": 2158 }, { "epoch": 0.33797745773324983, "grad_norm": 0.8120709657669067, "learning_rate": 0.00025960410557184746, "loss": 0.5646, "step": 2159 }, { "epoch": 0.33813400125234816, "grad_norm": 0.9096924662590027, "learning_rate": 0.00025957966764418377, "loss": 0.5908, "step": 2160 }, { "epoch": 0.3382905447714465, "grad_norm": 0.8007375001907349, "learning_rate": 0.00025955522971652, "loss": 0.4136, "step": 2161 }, { "epoch": 0.33844708829054476, "grad_norm": 1.0222002267837524, "learning_rate": 0.00025953079178885627, "loss": 0.7746, "step": 2162 }, { "epoch": 0.3386036318096431, "grad_norm": 0.897656261920929, "learning_rate": 0.0002595063538611926, "loss": 0.4193, "step": 2163 }, { "epoch": 0.33876017532874136, "grad_norm": 1.0817437171936035, "learning_rate": 0.00025948191593352883, "loss": 0.3561, "step": 2164 }, { "epoch": 0.3389167188478397, "grad_norm": 1.0257635116577148, "learning_rate": 0.0002594574780058651, "loss": 0.4638, "step": 2165 }, { "epoch": 0.339073262366938, "grad_norm": 1.651721715927124, "learning_rate": 0.0002594330400782014, "loss": 0.8187, "step": 2166 }, { "epoch": 0.3392298058860363, "grad_norm": 1.1372004747390747, "learning_rate": 0.0002594086021505376, "loss": 0.5079, "step": 2167 }, { "epoch": 0.3393863494051346, "grad_norm": 1.8931221961975098, "learning_rate": 0.0002593841642228739, "loss": 0.7663, "step": 2168 }, { "epoch": 0.33954289292423295, "grad_norm": 1.1629186868667603, "learning_rate": 0.00025935972629521014, "loss": 0.6915, "step": 2169 }, { "epoch": 0.3396994364433312, "grad_norm": 1.5948940515518188, "learning_rate": 0.0002593352883675464, "loss": 0.9156, "step": 2170 }, { "epoch": 0.33985597996242956, "grad_norm": 1.6868611574172974, "learning_rate": 0.0002593108504398827, "loss": 0.6997, "step": 2171 }, { "epoch": 0.3400125234815279, "grad_norm": 2.6906371116638184, "learning_rate": 0.00025928641251221895, "loss": 0.744, "step": 2172 }, { "epoch": 0.34016906700062616, "grad_norm": 1.4386720657348633, "learning_rate": 0.0002592619745845552, "loss": 0.5701, "step": 2173 }, { "epoch": 0.3403256105197245, "grad_norm": 1.7945785522460938, "learning_rate": 0.00025923753665689145, "loss": 0.9001, "step": 2174 }, { "epoch": 0.3404821540388228, "grad_norm": 1.8731091022491455, "learning_rate": 0.00025921309872922776, "loss": 0.7199, "step": 2175 }, { "epoch": 0.3406386975579211, "grad_norm": 2.1651723384857178, "learning_rate": 0.000259188660801564, "loss": 1.005, "step": 2176 }, { "epoch": 0.3407952410770194, "grad_norm": 2.3972442150115967, "learning_rate": 0.00025916422287390026, "loss": 1.1601, "step": 2177 }, { "epoch": 0.34095178459611775, "grad_norm": 1.9120584726333618, "learning_rate": 0.00025913978494623656, "loss": 0.8559, "step": 2178 }, { "epoch": 0.341108328115216, "grad_norm": 2.741961717605591, "learning_rate": 0.0002591153470185728, "loss": 0.9065, "step": 2179 }, { "epoch": 0.34126487163431435, "grad_norm": 2.225860834121704, "learning_rate": 0.00025909090909090907, "loss": 1.1946, "step": 2180 }, { "epoch": 0.3414214151534126, "grad_norm": 2.113170623779297, "learning_rate": 0.00025906647116324537, "loss": 1.0144, "step": 2181 }, { "epoch": 0.34157795867251095, "grad_norm": 3.7552993297576904, "learning_rate": 0.00025904203323558157, "loss": 1.0589, "step": 2182 }, { "epoch": 0.3417345021916093, "grad_norm": 2.8972368240356445, "learning_rate": 0.0002590175953079179, "loss": 1.034, "step": 2183 }, { "epoch": 0.34189104571070755, "grad_norm": 9.414438247680664, "learning_rate": 0.0002589931573802541, "loss": 1.3211, "step": 2184 }, { "epoch": 0.3420475892298059, "grad_norm": 2.1980931758880615, "learning_rate": 0.0002589687194525904, "loss": 1.1937, "step": 2185 }, { "epoch": 0.3422041327489042, "grad_norm": 1.7976131439208984, "learning_rate": 0.0002589442815249267, "loss": 1.2868, "step": 2186 }, { "epoch": 0.3423606762680025, "grad_norm": 3.20818829536438, "learning_rate": 0.00025891984359726293, "loss": 1.3121, "step": 2187 }, { "epoch": 0.3425172197871008, "grad_norm": 2.546349287033081, "learning_rate": 0.0002588954056695992, "loss": 1.4822, "step": 2188 }, { "epoch": 0.34267376330619914, "grad_norm": 2.756331205368042, "learning_rate": 0.0002588709677419355, "loss": 1.3909, "step": 2189 }, { "epoch": 0.3428303068252974, "grad_norm": 2.7843143939971924, "learning_rate": 0.00025884652981427174, "loss": 1.4207, "step": 2190 }, { "epoch": 0.34298685034439574, "grad_norm": 2.695613145828247, "learning_rate": 0.000258822091886608, "loss": 1.0539, "step": 2191 }, { "epoch": 0.3431433938634941, "grad_norm": 2.704921007156372, "learning_rate": 0.00025879765395894424, "loss": 1.2176, "step": 2192 }, { "epoch": 0.34329993738259235, "grad_norm": 2.946237802505493, "learning_rate": 0.00025877321603128055, "loss": 1.6507, "step": 2193 }, { "epoch": 0.3434564809016907, "grad_norm": 2.0598270893096924, "learning_rate": 0.0002587487781036168, "loss": 1.5005, "step": 2194 }, { "epoch": 0.343613024420789, "grad_norm": 2.0431604385375977, "learning_rate": 0.00025872434017595305, "loss": 0.7967, "step": 2195 }, { "epoch": 0.3437695679398873, "grad_norm": 3.8533642292022705, "learning_rate": 0.00025869990224828936, "loss": 1.6454, "step": 2196 }, { "epoch": 0.3439261114589856, "grad_norm": 1.8645732402801514, "learning_rate": 0.00025867546432062555, "loss": 1.0377, "step": 2197 }, { "epoch": 0.34408265497808393, "grad_norm": 2.5659990310668945, "learning_rate": 0.00025865102639296186, "loss": 0.9068, "step": 2198 }, { "epoch": 0.3442391984971822, "grad_norm": 2.7070820331573486, "learning_rate": 0.0002586265884652981, "loss": 0.9772, "step": 2199 }, { "epoch": 0.34439574201628054, "grad_norm": 1.378251075744629, "learning_rate": 0.00025860215053763436, "loss": 1.4371, "step": 2200 }, { "epoch": 0.3445522855353788, "grad_norm": 0.5317636132240295, "learning_rate": 0.00025857771260997067, "loss": 0.4837, "step": 2201 }, { "epoch": 0.34470882905447714, "grad_norm": 0.7095066905021667, "learning_rate": 0.0002585532746823069, "loss": 0.579, "step": 2202 }, { "epoch": 0.34486537257357547, "grad_norm": 0.9275551438331604, "learning_rate": 0.00025852883675464317, "loss": 0.6504, "step": 2203 }, { "epoch": 0.34502191609267374, "grad_norm": 1.716207504272461, "learning_rate": 0.0002585043988269795, "loss": 0.4966, "step": 2204 }, { "epoch": 0.34517845961177207, "grad_norm": 0.6072659492492676, "learning_rate": 0.0002584799608993157, "loss": 0.5413, "step": 2205 }, { "epoch": 0.3453350031308704, "grad_norm": 1.4741132259368896, "learning_rate": 0.000258455522971652, "loss": 0.4245, "step": 2206 }, { "epoch": 0.3454915466499687, "grad_norm": 0.9101395010948181, "learning_rate": 0.00025843108504398823, "loss": 0.5105, "step": 2207 }, { "epoch": 0.345648090169067, "grad_norm": 0.8929093480110168, "learning_rate": 0.00025840664711632453, "loss": 0.5917, "step": 2208 }, { "epoch": 0.34580463368816533, "grad_norm": 0.9677044153213501, "learning_rate": 0.0002583822091886608, "loss": 0.618, "step": 2209 }, { "epoch": 0.3459611772072636, "grad_norm": 0.9117663502693176, "learning_rate": 0.00025835777126099704, "loss": 0.5954, "step": 2210 }, { "epoch": 0.34611772072636193, "grad_norm": 0.8379285335540771, "learning_rate": 0.00025833333333333334, "loss": 0.4431, "step": 2211 }, { "epoch": 0.34627426424546026, "grad_norm": 0.8588935136795044, "learning_rate": 0.0002583088954056696, "loss": 0.4436, "step": 2212 }, { "epoch": 0.34643080776455853, "grad_norm": 1.0925568342208862, "learning_rate": 0.00025828445747800584, "loss": 0.3352, "step": 2213 }, { "epoch": 0.34658735128365686, "grad_norm": 1.6619585752487183, "learning_rate": 0.00025826001955034215, "loss": 0.6069, "step": 2214 }, { "epoch": 0.3467438948027552, "grad_norm": 2.354327440261841, "learning_rate": 0.00025823558162267835, "loss": 0.7189, "step": 2215 }, { "epoch": 0.34690043832185347, "grad_norm": 1.1300790309906006, "learning_rate": 0.00025821114369501465, "loss": 0.4776, "step": 2216 }, { "epoch": 0.3470569818409518, "grad_norm": 1.8510233163833618, "learning_rate": 0.0002581867057673509, "loss": 0.8701, "step": 2217 }, { "epoch": 0.34721352536005007, "grad_norm": 1.0941369533538818, "learning_rate": 0.00025816226783968716, "loss": 0.621, "step": 2218 }, { "epoch": 0.3473700688791484, "grad_norm": 1.9270963668823242, "learning_rate": 0.00025813782991202346, "loss": 0.4966, "step": 2219 }, { "epoch": 0.3475266123982467, "grad_norm": 2.781996726989746, "learning_rate": 0.0002581133919843597, "loss": 0.7897, "step": 2220 }, { "epoch": 0.347683155917345, "grad_norm": 1.3982248306274414, "learning_rate": 0.00025808895405669596, "loss": 0.7361, "step": 2221 }, { "epoch": 0.34783969943644333, "grad_norm": 1.4898085594177246, "learning_rate": 0.0002580645161290322, "loss": 0.6928, "step": 2222 }, { "epoch": 0.34799624295554166, "grad_norm": 1.5134382247924805, "learning_rate": 0.0002580400782013685, "loss": 0.6501, "step": 2223 }, { "epoch": 0.34815278647463993, "grad_norm": 2.149883985519409, "learning_rate": 0.00025801564027370477, "loss": 0.8948, "step": 2224 }, { "epoch": 0.34830932999373826, "grad_norm": 2.79316782951355, "learning_rate": 0.000257991202346041, "loss": 0.9539, "step": 2225 }, { "epoch": 0.3484658735128366, "grad_norm": 1.0646382570266724, "learning_rate": 0.00025796676441837733, "loss": 0.6291, "step": 2226 }, { "epoch": 0.34862241703193486, "grad_norm": 1.6639641523361206, "learning_rate": 0.0002579423264907136, "loss": 0.8177, "step": 2227 }, { "epoch": 0.3487789605510332, "grad_norm": 1.5588454008102417, "learning_rate": 0.00025791788856304983, "loss": 0.6531, "step": 2228 }, { "epoch": 0.3489355040701315, "grad_norm": 2.3921139240264893, "learning_rate": 0.00025789345063538614, "loss": 0.976, "step": 2229 }, { "epoch": 0.3490920475892298, "grad_norm": 3.0724098682403564, "learning_rate": 0.00025786901270772233, "loss": 1.2253, "step": 2230 }, { "epoch": 0.3492485911083281, "grad_norm": 1.900876760482788, "learning_rate": 0.00025784457478005864, "loss": 1.3439, "step": 2231 }, { "epoch": 0.34940513462742645, "grad_norm": 1.975421667098999, "learning_rate": 0.0002578201368523949, "loss": 1.0304, "step": 2232 }, { "epoch": 0.3495616781465247, "grad_norm": 1.7853161096572876, "learning_rate": 0.00025779569892473114, "loss": 0.8912, "step": 2233 }, { "epoch": 0.34971822166562305, "grad_norm": 2.0983622074127197, "learning_rate": 0.00025777126099706745, "loss": 0.7942, "step": 2234 }, { "epoch": 0.3498747651847213, "grad_norm": 2.663015127182007, "learning_rate": 0.0002577468230694037, "loss": 1.3245, "step": 2235 }, { "epoch": 0.35003130870381965, "grad_norm": 3.564725399017334, "learning_rate": 0.00025772238514173995, "loss": 1.5237, "step": 2236 }, { "epoch": 0.350187852222918, "grad_norm": 3.92618727684021, "learning_rate": 0.00025769794721407625, "loss": 0.9078, "step": 2237 }, { "epoch": 0.35034439574201626, "grad_norm": 2.12117862701416, "learning_rate": 0.0002576735092864125, "loss": 1.2595, "step": 2238 }, { "epoch": 0.3505009392611146, "grad_norm": 2.0806806087493896, "learning_rate": 0.00025764907135874876, "loss": 1.0926, "step": 2239 }, { "epoch": 0.3506574827802129, "grad_norm": 2.7820568084716797, "learning_rate": 0.000257624633431085, "loss": 1.1132, "step": 2240 }, { "epoch": 0.3508140262993112, "grad_norm": 2.921229362487793, "learning_rate": 0.0002576001955034213, "loss": 1.3909, "step": 2241 }, { "epoch": 0.3509705698184095, "grad_norm": 2.3714778423309326, "learning_rate": 0.00025757575757575756, "loss": 1.3431, "step": 2242 }, { "epoch": 0.35112711333750785, "grad_norm": 1.7340811491012573, "learning_rate": 0.0002575513196480938, "loss": 1.0429, "step": 2243 }, { "epoch": 0.3512836568566061, "grad_norm": 2.6049928665161133, "learning_rate": 0.0002575268817204301, "loss": 1.5378, "step": 2244 }, { "epoch": 0.35144020037570445, "grad_norm": 2.498192310333252, "learning_rate": 0.0002575024437927663, "loss": 0.9605, "step": 2245 }, { "epoch": 0.3515967438948028, "grad_norm": 2.289841890335083, "learning_rate": 0.0002574780058651026, "loss": 1.601, "step": 2246 }, { "epoch": 0.35175328741390105, "grad_norm": 3.376864194869995, "learning_rate": 0.0002574535679374389, "loss": 0.9655, "step": 2247 }, { "epoch": 0.3519098309329994, "grad_norm": 1.3867840766906738, "learning_rate": 0.0002574291300097751, "loss": 0.8568, "step": 2248 }, { "epoch": 0.3520663744520977, "grad_norm": 2.4591383934020996, "learning_rate": 0.00025740469208211143, "loss": 0.9717, "step": 2249 }, { "epoch": 0.352222917971196, "grad_norm": 2.244575023651123, "learning_rate": 0.0002573802541544477, "loss": 1.1796, "step": 2250 }, { "epoch": 0.3523794614902943, "grad_norm": 0.7717861533164978, "learning_rate": 0.00025735581622678393, "loss": 0.3882, "step": 2251 }, { "epoch": 0.3525360050093926, "grad_norm": 0.5575485825538635, "learning_rate": 0.00025733137829912024, "loss": 0.2957, "step": 2252 }, { "epoch": 0.3526925485284909, "grad_norm": 0.7117191553115845, "learning_rate": 0.0002573069403714565, "loss": 0.5051, "step": 2253 }, { "epoch": 0.35284909204758924, "grad_norm": 0.5327755808830261, "learning_rate": 0.00025728250244379274, "loss": 0.3964, "step": 2254 }, { "epoch": 0.3530056355666875, "grad_norm": 0.7493363618850708, "learning_rate": 0.000257258064516129, "loss": 0.4411, "step": 2255 }, { "epoch": 0.35316217908578584, "grad_norm": 0.5741692185401917, "learning_rate": 0.0002572336265884653, "loss": 0.3451, "step": 2256 }, { "epoch": 0.3533187226048842, "grad_norm": 0.6874625086784363, "learning_rate": 0.00025720918866080155, "loss": 0.3136, "step": 2257 }, { "epoch": 0.35347526612398245, "grad_norm": 1.1320338249206543, "learning_rate": 0.0002571847507331378, "loss": 0.5177, "step": 2258 }, { "epoch": 0.3536318096430808, "grad_norm": 1.1008598804473877, "learning_rate": 0.0002571603128054741, "loss": 0.3906, "step": 2259 }, { "epoch": 0.3537883531621791, "grad_norm": 1.950392484664917, "learning_rate": 0.00025713587487781036, "loss": 0.4661, "step": 2260 }, { "epoch": 0.3539448966812774, "grad_norm": 0.9202426075935364, "learning_rate": 0.0002571114369501466, "loss": 0.3923, "step": 2261 }, { "epoch": 0.3541014402003757, "grad_norm": 1.0292601585388184, "learning_rate": 0.00025708699902248286, "loss": 0.5517, "step": 2262 }, { "epoch": 0.35425798371947403, "grad_norm": 0.9722676277160645, "learning_rate": 0.0002570625610948191, "loss": 0.432, "step": 2263 }, { "epoch": 0.3544145272385723, "grad_norm": 2.0393378734588623, "learning_rate": 0.0002570381231671554, "loss": 0.4121, "step": 2264 }, { "epoch": 0.35457107075767064, "grad_norm": 1.5482399463653564, "learning_rate": 0.00025701368523949167, "loss": 0.571, "step": 2265 }, { "epoch": 0.35472761427676897, "grad_norm": 1.6801763772964478, "learning_rate": 0.0002569892473118279, "loss": 0.6699, "step": 2266 }, { "epoch": 0.35488415779586724, "grad_norm": 1.4256728887557983, "learning_rate": 0.0002569648093841642, "loss": 0.9005, "step": 2267 }, { "epoch": 0.35504070131496557, "grad_norm": 1.3114697933197021, "learning_rate": 0.0002569403714565005, "loss": 0.5094, "step": 2268 }, { "epoch": 0.3551972448340639, "grad_norm": 1.3349591493606567, "learning_rate": 0.00025691593352883673, "loss": 0.7312, "step": 2269 }, { "epoch": 0.35535378835316217, "grad_norm": 1.4924392700195312, "learning_rate": 0.000256891495601173, "loss": 0.6074, "step": 2270 }, { "epoch": 0.3555103318722605, "grad_norm": 1.610140323638916, "learning_rate": 0.0002568670576735093, "loss": 0.9784, "step": 2271 }, { "epoch": 0.35566687539135877, "grad_norm": 3.816253662109375, "learning_rate": 0.00025684261974584554, "loss": 0.738, "step": 2272 }, { "epoch": 0.3558234189104571, "grad_norm": 2.6387505531311035, "learning_rate": 0.0002568181818181818, "loss": 0.6391, "step": 2273 }, { "epoch": 0.35597996242955543, "grad_norm": 2.228098154067993, "learning_rate": 0.0002567937438905181, "loss": 1.1959, "step": 2274 }, { "epoch": 0.3561365059486537, "grad_norm": 2.4509754180908203, "learning_rate": 0.00025676930596285434, "loss": 0.9127, "step": 2275 }, { "epoch": 0.35629304946775203, "grad_norm": 2.0629775524139404, "learning_rate": 0.0002567448680351906, "loss": 1.3263, "step": 2276 }, { "epoch": 0.35644959298685036, "grad_norm": 1.7958539724349976, "learning_rate": 0.0002567204301075269, "loss": 1.176, "step": 2277 }, { "epoch": 0.35660613650594863, "grad_norm": 1.8170077800750732, "learning_rate": 0.0002566959921798631, "loss": 0.7305, "step": 2278 }, { "epoch": 0.35676268002504696, "grad_norm": 2.358222007751465, "learning_rate": 0.0002566715542521994, "loss": 1.1738, "step": 2279 }, { "epoch": 0.3569192235441453, "grad_norm": 1.7362068891525269, "learning_rate": 0.00025664711632453565, "loss": 1.1321, "step": 2280 }, { "epoch": 0.35707576706324357, "grad_norm": 1.7529265880584717, "learning_rate": 0.0002566226783968719, "loss": 1.4302, "step": 2281 }, { "epoch": 0.3572323105823419, "grad_norm": 2.1040377616882324, "learning_rate": 0.0002565982404692082, "loss": 0.9679, "step": 2282 }, { "epoch": 0.3573888541014402, "grad_norm": 3.2066938877105713, "learning_rate": 0.00025657380254154446, "loss": 0.93, "step": 2283 }, { "epoch": 0.3575453976205385, "grad_norm": 1.855434536933899, "learning_rate": 0.0002565493646138807, "loss": 1.1485, "step": 2284 }, { "epoch": 0.3577019411396368, "grad_norm": 1.5777641534805298, "learning_rate": 0.00025652492668621696, "loss": 1.2011, "step": 2285 }, { "epoch": 0.35785848465873515, "grad_norm": 2.2551398277282715, "learning_rate": 0.00025650048875855327, "loss": 1.5703, "step": 2286 }, { "epoch": 0.3580150281778334, "grad_norm": 2.300269365310669, "learning_rate": 0.0002564760508308895, "loss": 1.4385, "step": 2287 }, { "epoch": 0.35817157169693176, "grad_norm": 1.81515634059906, "learning_rate": 0.00025645161290322577, "loss": 1.1124, "step": 2288 }, { "epoch": 0.35832811521603003, "grad_norm": 1.9418299198150635, "learning_rate": 0.0002564271749755621, "loss": 1.3713, "step": 2289 }, { "epoch": 0.35848465873512836, "grad_norm": 2.397949695587158, "learning_rate": 0.00025640273704789833, "loss": 1.1791, "step": 2290 }, { "epoch": 0.3586412022542267, "grad_norm": 2.4161155223846436, "learning_rate": 0.0002563782991202346, "loss": 1.7395, "step": 2291 }, { "epoch": 0.35879774577332496, "grad_norm": 2.1113173961639404, "learning_rate": 0.0002563538611925709, "loss": 0.9496, "step": 2292 }, { "epoch": 0.3589542892924233, "grad_norm": 1.8949174880981445, "learning_rate": 0.0002563294232649071, "loss": 1.4005, "step": 2293 }, { "epoch": 0.3591108328115216, "grad_norm": 3.1635854244232178, "learning_rate": 0.0002563049853372434, "loss": 1.3552, "step": 2294 }, { "epoch": 0.3592673763306199, "grad_norm": 1.8511496782302856, "learning_rate": 0.00025628054740957964, "loss": 1.6418, "step": 2295 }, { "epoch": 0.3594239198497182, "grad_norm": 2.0520694255828857, "learning_rate": 0.0002562561094819159, "loss": 0.8151, "step": 2296 }, { "epoch": 0.35958046336881655, "grad_norm": 2.5839831829071045, "learning_rate": 0.0002562316715542522, "loss": 0.7499, "step": 2297 }, { "epoch": 0.3597370068879148, "grad_norm": 1.368067979812622, "learning_rate": 0.00025620723362658845, "loss": 0.8066, "step": 2298 }, { "epoch": 0.35989355040701315, "grad_norm": 2.371605634689331, "learning_rate": 0.0002561827956989247, "loss": 1.376, "step": 2299 }, { "epoch": 0.3600500939261115, "grad_norm": 2.2831456661224365, "learning_rate": 0.000256158357771261, "loss": 1.2232, "step": 2300 }, { "epoch": 0.36020663744520975, "grad_norm": 0.44485318660736084, "learning_rate": 0.0002561339198435972, "loss": 0.3244, "step": 2301 }, { "epoch": 0.3603631809643081, "grad_norm": 0.7267107367515564, "learning_rate": 0.0002561094819159335, "loss": 0.5188, "step": 2302 }, { "epoch": 0.3605197244834064, "grad_norm": 0.7034205794334412, "learning_rate": 0.00025608504398826976, "loss": 0.404, "step": 2303 }, { "epoch": 0.3606762680025047, "grad_norm": 0.8780544996261597, "learning_rate": 0.000256060606060606, "loss": 0.4213, "step": 2304 }, { "epoch": 0.360832811521603, "grad_norm": 0.5896530747413635, "learning_rate": 0.0002560361681329423, "loss": 0.387, "step": 2305 }, { "epoch": 0.3609893550407013, "grad_norm": 0.7313452959060669, "learning_rate": 0.00025601173020527857, "loss": 0.3646, "step": 2306 }, { "epoch": 0.3611458985597996, "grad_norm": 0.7097526788711548, "learning_rate": 0.0002559872922776148, "loss": 0.4382, "step": 2307 }, { "epoch": 0.36130244207889795, "grad_norm": 0.685795247554779, "learning_rate": 0.0002559628543499511, "loss": 0.4068, "step": 2308 }, { "epoch": 0.3614589855979962, "grad_norm": 0.6047770380973816, "learning_rate": 0.0002559384164222874, "loss": 0.4213, "step": 2309 }, { "epoch": 0.36161552911709455, "grad_norm": 13.313465118408203, "learning_rate": 0.0002559139784946236, "loss": 3.0132, "step": 2310 }, { "epoch": 0.3617720726361929, "grad_norm": 1.098598599433899, "learning_rate": 0.0002558895405669599, "loss": 0.396, "step": 2311 }, { "epoch": 0.36192861615529115, "grad_norm": 0.8045850396156311, "learning_rate": 0.0002558651026392962, "loss": 0.4162, "step": 2312 }, { "epoch": 0.3620851596743895, "grad_norm": 1.3596242666244507, "learning_rate": 0.00025584066471163243, "loss": 0.7236, "step": 2313 }, { "epoch": 0.3622417031934878, "grad_norm": 1.0989056825637817, "learning_rate": 0.0002558162267839687, "loss": 0.4784, "step": 2314 }, { "epoch": 0.3623982467125861, "grad_norm": 5.293839931488037, "learning_rate": 0.000255791788856305, "loss": 0.9534, "step": 2315 }, { "epoch": 0.3625547902316844, "grad_norm": 0.8783222436904907, "learning_rate": 0.0002557673509286412, "loss": 0.7846, "step": 2316 }, { "epoch": 0.36271133375078274, "grad_norm": 1.2049531936645508, "learning_rate": 0.0002557429130009775, "loss": 0.5091, "step": 2317 }, { "epoch": 0.362867877269881, "grad_norm": 1.0293569564819336, "learning_rate": 0.00025571847507331374, "loss": 0.5985, "step": 2318 }, { "epoch": 0.36302442078897934, "grad_norm": 1.8985306024551392, "learning_rate": 0.00025569403714565, "loss": 0.7279, "step": 2319 }, { "epoch": 0.36318096430807767, "grad_norm": 1.2464323043823242, "learning_rate": 0.0002556695992179863, "loss": 0.6954, "step": 2320 }, { "epoch": 0.36333750782717594, "grad_norm": 0.8314011096954346, "learning_rate": 0.00025564516129032255, "loss": 0.461, "step": 2321 }, { "epoch": 0.36349405134627427, "grad_norm": 1.2355091571807861, "learning_rate": 0.0002556207233626588, "loss": 0.719, "step": 2322 }, { "epoch": 0.3636505948653726, "grad_norm": 1.9257937669754028, "learning_rate": 0.0002555962854349951, "loss": 0.5286, "step": 2323 }, { "epoch": 0.3638071383844709, "grad_norm": 1.4777895212173462, "learning_rate": 0.00025557184750733136, "loss": 0.709, "step": 2324 }, { "epoch": 0.3639636819035692, "grad_norm": 2.033358573913574, "learning_rate": 0.0002555474095796676, "loss": 0.936, "step": 2325 }, { "epoch": 0.3641202254226675, "grad_norm": 2.625703811645508, "learning_rate": 0.00025552297165200386, "loss": 1.2012, "step": 2326 }, { "epoch": 0.3642767689417658, "grad_norm": 2.080801486968994, "learning_rate": 0.00025549853372434017, "loss": 0.7433, "step": 2327 }, { "epoch": 0.36443331246086413, "grad_norm": 1.9644887447357178, "learning_rate": 0.0002554740957966764, "loss": 0.8065, "step": 2328 }, { "epoch": 0.3645898559799624, "grad_norm": 3.320422410964966, "learning_rate": 0.00025544965786901267, "loss": 1.1196, "step": 2329 }, { "epoch": 0.36474639949906074, "grad_norm": 1.20960533618927, "learning_rate": 0.000255425219941349, "loss": 0.8819, "step": 2330 }, { "epoch": 0.36490294301815906, "grad_norm": 1.6307368278503418, "learning_rate": 0.0002554007820136852, "loss": 0.5572, "step": 2331 }, { "epoch": 0.36505948653725734, "grad_norm": 1.9175331592559814, "learning_rate": 0.0002553763440860215, "loss": 1.4483, "step": 2332 }, { "epoch": 0.36521603005635567, "grad_norm": 3.3411760330200195, "learning_rate": 0.00025535190615835773, "loss": 1.2187, "step": 2333 }, { "epoch": 0.365372573575454, "grad_norm": 2.1116483211517334, "learning_rate": 0.000255327468230694, "loss": 1.3473, "step": 2334 }, { "epoch": 0.36552911709455227, "grad_norm": 3.1497905254364014, "learning_rate": 0.0002553030303030303, "loss": 0.8682, "step": 2335 }, { "epoch": 0.3656856606136506, "grad_norm": 2.2446794509887695, "learning_rate": 0.00025527859237536654, "loss": 1.1501, "step": 2336 }, { "epoch": 0.3658422041327489, "grad_norm": 1.977690577507019, "learning_rate": 0.0002552541544477028, "loss": 1.2884, "step": 2337 }, { "epoch": 0.3659987476518472, "grad_norm": 2.598881721496582, "learning_rate": 0.0002552297165200391, "loss": 1.3231, "step": 2338 }, { "epoch": 0.36615529117094553, "grad_norm": 2.4923884868621826, "learning_rate": 0.00025520527859237535, "loss": 0.86, "step": 2339 }, { "epoch": 0.36631183469004386, "grad_norm": 2.453099250793457, "learning_rate": 0.0002551808406647116, "loss": 1.0986, "step": 2340 }, { "epoch": 0.36646837820914213, "grad_norm": 3.555971384048462, "learning_rate": 0.00025515640273704785, "loss": 1.8547, "step": 2341 }, { "epoch": 0.36662492172824046, "grad_norm": 1.9464694261550903, "learning_rate": 0.00025513196480938415, "loss": 1.1017, "step": 2342 }, { "epoch": 0.36678146524733873, "grad_norm": 3.1742851734161377, "learning_rate": 0.0002551075268817204, "loss": 1.7284, "step": 2343 }, { "epoch": 0.36693800876643706, "grad_norm": 1.5996363162994385, "learning_rate": 0.00025508308895405666, "loss": 1.1764, "step": 2344 }, { "epoch": 0.3670945522855354, "grad_norm": 3.3336408138275146, "learning_rate": 0.00025505865102639296, "loss": 1.5985, "step": 2345 }, { "epoch": 0.36725109580463366, "grad_norm": 1.3342739343643188, "learning_rate": 0.0002550342130987292, "loss": 1.2361, "step": 2346 }, { "epoch": 0.367407639323732, "grad_norm": 2.0389742851257324, "learning_rate": 0.00025500977517106546, "loss": 0.6936, "step": 2347 }, { "epoch": 0.3675641828428303, "grad_norm": 1.7741936445236206, "learning_rate": 0.00025498533724340177, "loss": 0.6738, "step": 2348 }, { "epoch": 0.3677207263619286, "grad_norm": 2.689279079437256, "learning_rate": 0.00025496089931573797, "loss": 1.4255, "step": 2349 }, { "epoch": 0.3678772698810269, "grad_norm": 1.4633771181106567, "learning_rate": 0.00025493646138807427, "loss": 0.8265, "step": 2350 }, { "epoch": 0.36803381340012525, "grad_norm": 0.6005833148956299, "learning_rate": 0.0002549120234604105, "loss": 0.3965, "step": 2351 }, { "epoch": 0.3681903569192235, "grad_norm": 0.5253506898880005, "learning_rate": 0.0002548875855327468, "loss": 0.4102, "step": 2352 }, { "epoch": 0.36834690043832186, "grad_norm": 0.582460880279541, "learning_rate": 0.0002548631476050831, "loss": 0.2965, "step": 2353 }, { "epoch": 0.3685034439574202, "grad_norm": 0.8253412246704102, "learning_rate": 0.00025483870967741933, "loss": 0.3247, "step": 2354 }, { "epoch": 0.36865998747651846, "grad_norm": 0.6425923705101013, "learning_rate": 0.0002548142717497556, "loss": 0.4133, "step": 2355 }, { "epoch": 0.3688165309956168, "grad_norm": 1.085408091545105, "learning_rate": 0.00025478983382209183, "loss": 0.4947, "step": 2356 }, { "epoch": 0.3689730745147151, "grad_norm": 1.145398497581482, "learning_rate": 0.00025476539589442814, "loss": 0.6325, "step": 2357 }, { "epoch": 0.3691296180338134, "grad_norm": 0.7742732167243958, "learning_rate": 0.0002547409579667644, "loss": 0.4053, "step": 2358 }, { "epoch": 0.3692861615529117, "grad_norm": 1.4050893783569336, "learning_rate": 0.00025471652003910064, "loss": 0.7099, "step": 2359 }, { "epoch": 0.36944270507201, "grad_norm": 0.5724520683288574, "learning_rate": 0.00025469208211143695, "loss": 0.4195, "step": 2360 }, { "epoch": 0.3695992485911083, "grad_norm": 1.2148267030715942, "learning_rate": 0.0002546676441837732, "loss": 0.6613, "step": 2361 }, { "epoch": 0.36975579211020665, "grad_norm": 1.6140097379684448, "learning_rate": 0.00025464320625610945, "loss": 0.5106, "step": 2362 }, { "epoch": 0.3699123356293049, "grad_norm": 1.0580832958221436, "learning_rate": 0.00025461876832844575, "loss": 0.6509, "step": 2363 }, { "epoch": 0.37006887914840325, "grad_norm": 1.0968374013900757, "learning_rate": 0.00025459433040078195, "loss": 0.4407, "step": 2364 }, { "epoch": 0.3702254226675016, "grad_norm": 1.8869554996490479, "learning_rate": 0.00025456989247311826, "loss": 0.8746, "step": 2365 }, { "epoch": 0.37038196618659985, "grad_norm": 1.1201070547103882, "learning_rate": 0.0002545454545454545, "loss": 0.4115, "step": 2366 }, { "epoch": 0.3705385097056982, "grad_norm": 1.391060709953308, "learning_rate": 0.00025452101661779076, "loss": 0.8105, "step": 2367 }, { "epoch": 0.3706950532247965, "grad_norm": 1.707555890083313, "learning_rate": 0.00025449657869012707, "loss": 0.5989, "step": 2368 }, { "epoch": 0.3708515967438948, "grad_norm": 1.1574084758758545, "learning_rate": 0.0002544721407624633, "loss": 0.7763, "step": 2369 }, { "epoch": 0.3710081402629931, "grad_norm": 4.23545503616333, "learning_rate": 0.00025444770283479957, "loss": 1.369, "step": 2370 }, { "epoch": 0.37116468378209144, "grad_norm": 1.9482706785202026, "learning_rate": 0.0002544232649071359, "loss": 0.5819, "step": 2371 }, { "epoch": 0.3713212273011897, "grad_norm": 1.2901068925857544, "learning_rate": 0.0002543988269794721, "loss": 1.0075, "step": 2372 }, { "epoch": 0.37147777082028804, "grad_norm": 1.395151138305664, "learning_rate": 0.0002543743890518084, "loss": 0.5711, "step": 2373 }, { "epoch": 0.3716343143393864, "grad_norm": 1.3401544094085693, "learning_rate": 0.0002543499511241446, "loss": 0.7202, "step": 2374 }, { "epoch": 0.37179085785848465, "grad_norm": 0.9693850874900818, "learning_rate": 0.00025432551319648093, "loss": 0.7058, "step": 2375 }, { "epoch": 0.371947401377583, "grad_norm": 1.058971881866455, "learning_rate": 0.0002543010752688172, "loss": 0.6258, "step": 2376 }, { "epoch": 0.3721039448966813, "grad_norm": 2.7098138332366943, "learning_rate": 0.00025427663734115343, "loss": 0.741, "step": 2377 }, { "epoch": 0.3722604884157796, "grad_norm": 1.293344259262085, "learning_rate": 0.00025425219941348974, "loss": 0.7096, "step": 2378 }, { "epoch": 0.3724170319348779, "grad_norm": 6.409755706787109, "learning_rate": 0.000254227761485826, "loss": 0.9657, "step": 2379 }, { "epoch": 0.3725735754539762, "grad_norm": 3.3196206092834473, "learning_rate": 0.00025420332355816224, "loss": 0.8149, "step": 2380 }, { "epoch": 0.3727301189730745, "grad_norm": 1.7499516010284424, "learning_rate": 0.0002541788856304985, "loss": 1.0022, "step": 2381 }, { "epoch": 0.37288666249217284, "grad_norm": 2.6857728958129883, "learning_rate": 0.00025415444770283475, "loss": 0.8736, "step": 2382 }, { "epoch": 0.3730432060112711, "grad_norm": 2.264000415802002, "learning_rate": 0.00025413000977517105, "loss": 0.6507, "step": 2383 }, { "epoch": 0.37319974953036944, "grad_norm": 2.5130653381347656, "learning_rate": 0.0002541055718475073, "loss": 0.8562, "step": 2384 }, { "epoch": 0.37335629304946777, "grad_norm": 6.680568218231201, "learning_rate": 0.00025408113391984355, "loss": 1.6511, "step": 2385 }, { "epoch": 0.37351283656856604, "grad_norm": 3.2456836700439453, "learning_rate": 0.00025405669599217986, "loss": 1.1887, "step": 2386 }, { "epoch": 0.37366938008766437, "grad_norm": 2.358682632446289, "learning_rate": 0.0002540322580645161, "loss": 1.1949, "step": 2387 }, { "epoch": 0.3738259236067627, "grad_norm": 1.589746117591858, "learning_rate": 0.00025400782013685236, "loss": 1.248, "step": 2388 }, { "epoch": 0.373982467125861, "grad_norm": 3.969412326812744, "learning_rate": 0.0002539833822091886, "loss": 1.4367, "step": 2389 }, { "epoch": 0.3741390106449593, "grad_norm": 1.8342620134353638, "learning_rate": 0.0002539589442815249, "loss": 0.6383, "step": 2390 }, { "epoch": 0.37429555416405763, "grad_norm": 5.95599365234375, "learning_rate": 0.00025393450635386117, "loss": 1.3961, "step": 2391 }, { "epoch": 0.3744520976831559, "grad_norm": 2.8930656909942627, "learning_rate": 0.0002539100684261974, "loss": 1.5589, "step": 2392 }, { "epoch": 0.37460864120225423, "grad_norm": 3.1002862453460693, "learning_rate": 0.0002538856304985337, "loss": 1.4962, "step": 2393 }, { "epoch": 0.37476518472135256, "grad_norm": 4.003942489624023, "learning_rate": 0.00025386119257087, "loss": 1.2726, "step": 2394 }, { "epoch": 0.37492172824045084, "grad_norm": 3.102221965789795, "learning_rate": 0.00025383675464320623, "loss": 1.2946, "step": 2395 }, { "epoch": 0.37507827175954916, "grad_norm": 2.444598913192749, "learning_rate": 0.00025381231671554253, "loss": 1.1978, "step": 2396 }, { "epoch": 0.37523481527864744, "grad_norm": 2.2664988040924072, "learning_rate": 0.00025378787878787873, "loss": 0.7367, "step": 2397 }, { "epoch": 0.37539135879774577, "grad_norm": 1.9962775707244873, "learning_rate": 0.00025376344086021504, "loss": 0.4199, "step": 2398 }, { "epoch": 0.3755479023168441, "grad_norm": 2.8011834621429443, "learning_rate": 0.0002537390029325513, "loss": 1.2034, "step": 2399 }, { "epoch": 0.37570444583594237, "grad_norm": 1.9701213836669922, "learning_rate": 0.00025371456500488754, "loss": 1.0624, "step": 2400 }, { "epoch": 0.3758609893550407, "grad_norm": 0.7180734276771545, "learning_rate": 0.00025369012707722384, "loss": 0.3932, "step": 2401 }, { "epoch": 0.376017532874139, "grad_norm": 0.5675762295722961, "learning_rate": 0.0002536656891495601, "loss": 0.3901, "step": 2402 }, { "epoch": 0.3761740763932373, "grad_norm": 0.6413562893867493, "learning_rate": 0.00025364125122189635, "loss": 0.4672, "step": 2403 }, { "epoch": 0.37633061991233563, "grad_norm": 0.9517688155174255, "learning_rate": 0.0002536168132942326, "loss": 0.6919, "step": 2404 }, { "epoch": 0.37648716343143396, "grad_norm": 0.6282806396484375, "learning_rate": 0.0002535923753665689, "loss": 0.451, "step": 2405 }, { "epoch": 0.37664370695053223, "grad_norm": 0.590168833732605, "learning_rate": 0.00025356793743890515, "loss": 0.4078, "step": 2406 }, { "epoch": 0.37680025046963056, "grad_norm": 0.7684517502784729, "learning_rate": 0.0002535434995112414, "loss": 0.4757, "step": 2407 }, { "epoch": 0.3769567939887289, "grad_norm": 0.9231551289558411, "learning_rate": 0.0002535190615835777, "loss": 0.6356, "step": 2408 }, { "epoch": 0.37711333750782716, "grad_norm": 0.8362495303153992, "learning_rate": 0.00025349462365591396, "loss": 0.4089, "step": 2409 }, { "epoch": 0.3772698810269255, "grad_norm": 0.8577970862388611, "learning_rate": 0.0002534701857282502, "loss": 0.4651, "step": 2410 }, { "epoch": 0.3774264245460238, "grad_norm": 1.4198194742202759, "learning_rate": 0.0002534457478005865, "loss": 0.6993, "step": 2411 }, { "epoch": 0.3775829680651221, "grad_norm": 1.1189966201782227, "learning_rate": 0.0002534213098729227, "loss": 0.7051, "step": 2412 }, { "epoch": 0.3777395115842204, "grad_norm": 0.8378294706344604, "learning_rate": 0.000253396871945259, "loss": 0.4614, "step": 2413 }, { "epoch": 0.3778960551033187, "grad_norm": 0.8094697594642639, "learning_rate": 0.0002533724340175953, "loss": 0.4466, "step": 2414 }, { "epoch": 0.378052598622417, "grad_norm": 1.419098138809204, "learning_rate": 0.0002533479960899315, "loss": 0.7824, "step": 2415 }, { "epoch": 0.37820914214151535, "grad_norm": 2.8481099605560303, "learning_rate": 0.00025332355816226783, "loss": 0.643, "step": 2416 }, { "epoch": 0.3783656856606136, "grad_norm": 1.6152693033218384, "learning_rate": 0.0002532991202346041, "loss": 0.6802, "step": 2417 }, { "epoch": 0.37852222917971196, "grad_norm": 1.3891299962997437, "learning_rate": 0.00025327468230694033, "loss": 0.9134, "step": 2418 }, { "epoch": 0.3786787726988103, "grad_norm": 0.8565380573272705, "learning_rate": 0.00025325024437927664, "loss": 0.5214, "step": 2419 }, { "epoch": 0.37883531621790856, "grad_norm": 1.4971650838851929, "learning_rate": 0.0002532258064516129, "loss": 0.6881, "step": 2420 }, { "epoch": 0.3789918597370069, "grad_norm": 2.303706169128418, "learning_rate": 0.00025320136852394914, "loss": 0.9213, "step": 2421 }, { "epoch": 0.3791484032561052, "grad_norm": 1.7516024112701416, "learning_rate": 0.0002531769305962854, "loss": 0.7242, "step": 2422 }, { "epoch": 0.3793049467752035, "grad_norm": 1.990359902381897, "learning_rate": 0.0002531524926686217, "loss": 0.829, "step": 2423 }, { "epoch": 0.3794614902943018, "grad_norm": 3.0393333435058594, "learning_rate": 0.00025312805474095795, "loss": 1.1958, "step": 2424 }, { "epoch": 0.37961803381340015, "grad_norm": 1.408226728439331, "learning_rate": 0.0002531036168132942, "loss": 0.7958, "step": 2425 }, { "epoch": 0.3797745773324984, "grad_norm": 4.30161190032959, "learning_rate": 0.0002530791788856305, "loss": 1.0334, "step": 2426 }, { "epoch": 0.37993112085159675, "grad_norm": 1.420586109161377, "learning_rate": 0.0002530547409579667, "loss": 0.8091, "step": 2427 }, { "epoch": 0.3800876643706951, "grad_norm": 1.1459065675735474, "learning_rate": 0.000253030303030303, "loss": 0.5026, "step": 2428 }, { "epoch": 0.38024420788979335, "grad_norm": 1.717860460281372, "learning_rate": 0.00025300586510263926, "loss": 0.8863, "step": 2429 }, { "epoch": 0.3804007514088917, "grad_norm": 1.8267467021942139, "learning_rate": 0.0002529814271749755, "loss": 0.926, "step": 2430 }, { "epoch": 0.38055729492799, "grad_norm": 1.825463056564331, "learning_rate": 0.0002529569892473118, "loss": 1.062, "step": 2431 }, { "epoch": 0.3807138384470883, "grad_norm": 4.6027140617370605, "learning_rate": 0.00025293255131964807, "loss": 1.0772, "step": 2432 }, { "epoch": 0.3808703819661866, "grad_norm": 1.6209831237792969, "learning_rate": 0.0002529081133919843, "loss": 1.2083, "step": 2433 }, { "epoch": 0.3810269254852849, "grad_norm": 1.9430205821990967, "learning_rate": 0.0002528836754643206, "loss": 0.8559, "step": 2434 }, { "epoch": 0.3811834690043832, "grad_norm": 5.019834041595459, "learning_rate": 0.0002528592375366569, "loss": 0.9144, "step": 2435 }, { "epoch": 0.38134001252348154, "grad_norm": 1.813771367073059, "learning_rate": 0.0002528347996089931, "loss": 0.5865, "step": 2436 }, { "epoch": 0.3814965560425798, "grad_norm": 6.178098201751709, "learning_rate": 0.0002528103616813294, "loss": 1.0331, "step": 2437 }, { "epoch": 0.38165309956167814, "grad_norm": 2.6770286560058594, "learning_rate": 0.0002527859237536657, "loss": 1.4776, "step": 2438 }, { "epoch": 0.3818096430807765, "grad_norm": 2.0814406871795654, "learning_rate": 0.00025276148582600193, "loss": 1.2574, "step": 2439 }, { "epoch": 0.38196618659987475, "grad_norm": 2.1399121284484863, "learning_rate": 0.0002527370478983382, "loss": 1.2914, "step": 2440 }, { "epoch": 0.3821227301189731, "grad_norm": 2.3518238067626953, "learning_rate": 0.0002527126099706745, "loss": 1.0346, "step": 2441 }, { "epoch": 0.3822792736380714, "grad_norm": 2.9069864749908447, "learning_rate": 0.00025268817204301074, "loss": 1.0897, "step": 2442 }, { "epoch": 0.3824358171571697, "grad_norm": 10.248844146728516, "learning_rate": 0.000252663734115347, "loss": 1.375, "step": 2443 }, { "epoch": 0.382592360676268, "grad_norm": 2.556163787841797, "learning_rate": 0.00025263929618768324, "loss": 1.5689, "step": 2444 }, { "epoch": 0.38274890419536634, "grad_norm": 6.150379657745361, "learning_rate": 0.0002526148582600195, "loss": 1.5573, "step": 2445 }, { "epoch": 0.3829054477144646, "grad_norm": 2.1215577125549316, "learning_rate": 0.0002525904203323558, "loss": 0.7524, "step": 2446 }, { "epoch": 0.38306199123356294, "grad_norm": 1.778437852859497, "learning_rate": 0.00025256598240469205, "loss": 1.0496, "step": 2447 }, { "epoch": 0.38321853475266127, "grad_norm": 1.631048560142517, "learning_rate": 0.0002525415444770283, "loss": 0.676, "step": 2448 }, { "epoch": 0.38337507827175954, "grad_norm": 2.2763094902038574, "learning_rate": 0.0002525171065493646, "loss": 1.1262, "step": 2449 }, { "epoch": 0.38353162179085787, "grad_norm": 1.9400389194488525, "learning_rate": 0.00025249266862170086, "loss": 1.5447, "step": 2450 }, { "epoch": 0.38368816530995614, "grad_norm": 0.6042339205741882, "learning_rate": 0.0002524682306940371, "loss": 0.4302, "step": 2451 }, { "epoch": 0.38384470882905447, "grad_norm": 0.6273007392883301, "learning_rate": 0.00025244379276637336, "loss": 0.4748, "step": 2452 }, { "epoch": 0.3840012523481528, "grad_norm": 0.5941194891929626, "learning_rate": 0.00025241935483870967, "loss": 0.3699, "step": 2453 }, { "epoch": 0.3841577958672511, "grad_norm": 0.6583676338195801, "learning_rate": 0.0002523949169110459, "loss": 0.5617, "step": 2454 }, { "epoch": 0.3843143393863494, "grad_norm": 0.9693568348884583, "learning_rate": 0.00025237047898338217, "loss": 0.4727, "step": 2455 }, { "epoch": 0.38447088290544773, "grad_norm": 0.7565197944641113, "learning_rate": 0.0002523460410557185, "loss": 0.4105, "step": 2456 }, { "epoch": 0.384627426424546, "grad_norm": 0.9457018375396729, "learning_rate": 0.00025232160312805473, "loss": 0.6106, "step": 2457 }, { "epoch": 0.38478396994364433, "grad_norm": 0.8594042062759399, "learning_rate": 0.000252297165200391, "loss": 0.4825, "step": 2458 }, { "epoch": 0.38494051346274266, "grad_norm": 0.9214059114456177, "learning_rate": 0.0002522727272727273, "loss": 0.4283, "step": 2459 }, { "epoch": 0.38509705698184094, "grad_norm": 1.0324896574020386, "learning_rate": 0.0002522482893450635, "loss": 0.4448, "step": 2460 }, { "epoch": 0.38525360050093926, "grad_norm": 0.8373879194259644, "learning_rate": 0.0002522238514173998, "loss": 0.4815, "step": 2461 }, { "epoch": 0.3854101440200376, "grad_norm": 0.5907357931137085, "learning_rate": 0.00025219941348973604, "loss": 0.3629, "step": 2462 }, { "epoch": 0.38556668753913587, "grad_norm": 0.8201125860214233, "learning_rate": 0.0002521749755620723, "loss": 0.4437, "step": 2463 }, { "epoch": 0.3857232310582342, "grad_norm": 1.6409950256347656, "learning_rate": 0.0002521505376344086, "loss": 0.6879, "step": 2464 }, { "epoch": 0.3858797745773325, "grad_norm": 1.9288908243179321, "learning_rate": 0.00025212609970674485, "loss": 0.8157, "step": 2465 }, { "epoch": 0.3860363180964308, "grad_norm": 1.1502376794815063, "learning_rate": 0.0002521016617790811, "loss": 0.746, "step": 2466 }, { "epoch": 0.3861928616155291, "grad_norm": 1.607825756072998, "learning_rate": 0.0002520772238514174, "loss": 0.4716, "step": 2467 }, { "epoch": 0.3863494051346274, "grad_norm": 1.3579707145690918, "learning_rate": 0.00025205278592375365, "loss": 0.486, "step": 2468 }, { "epoch": 0.38650594865372573, "grad_norm": 1.369374394416809, "learning_rate": 0.0002520283479960899, "loss": 0.6688, "step": 2469 }, { "epoch": 0.38666249217282406, "grad_norm": 0.9472814798355103, "learning_rate": 0.00025200391006842616, "loss": 0.4558, "step": 2470 }, { "epoch": 0.38681903569192233, "grad_norm": 1.0959384441375732, "learning_rate": 0.00025197947214076246, "loss": 0.7678, "step": 2471 }, { "epoch": 0.38697557921102066, "grad_norm": 1.783223032951355, "learning_rate": 0.0002519550342130987, "loss": 0.8826, "step": 2472 }, { "epoch": 0.387132122730119, "grad_norm": 0.7266517281532288, "learning_rate": 0.00025193059628543496, "loss": 0.3746, "step": 2473 }, { "epoch": 0.38728866624921726, "grad_norm": 1.6912201642990112, "learning_rate": 0.00025190615835777127, "loss": 0.7786, "step": 2474 }, { "epoch": 0.3874452097683156, "grad_norm": 2.1097683906555176, "learning_rate": 0.00025188172043010747, "loss": 1.0013, "step": 2475 }, { "epoch": 0.3876017532874139, "grad_norm": 2.697650671005249, "learning_rate": 0.00025185728250244377, "loss": 0.9238, "step": 2476 }, { "epoch": 0.3877582968065122, "grad_norm": 1.2909862995147705, "learning_rate": 0.00025183284457478, "loss": 0.613, "step": 2477 }, { "epoch": 0.3879148403256105, "grad_norm": 2.043966293334961, "learning_rate": 0.0002518084066471163, "loss": 0.698, "step": 2478 }, { "epoch": 0.38807138384470885, "grad_norm": 1.7088145017623901, "learning_rate": 0.0002517839687194526, "loss": 0.8166, "step": 2479 }, { "epoch": 0.3882279273638071, "grad_norm": 1.015830397605896, "learning_rate": 0.00025175953079178883, "loss": 0.5839, "step": 2480 }, { "epoch": 0.38838447088290545, "grad_norm": 1.558098316192627, "learning_rate": 0.0002517350928641251, "loss": 0.7847, "step": 2481 }, { "epoch": 0.3885410144020038, "grad_norm": 1.4762747287750244, "learning_rate": 0.0002517106549364614, "loss": 0.6532, "step": 2482 }, { "epoch": 0.38869755792110205, "grad_norm": 2.2666919231414795, "learning_rate": 0.00025168621700879764, "loss": 0.9531, "step": 2483 }, { "epoch": 0.3888541014402004, "grad_norm": 3.644498586654663, "learning_rate": 0.0002516617790811339, "loss": 1.3798, "step": 2484 }, { "epoch": 0.3890106449592987, "grad_norm": 1.4934632778167725, "learning_rate": 0.00025163734115347014, "loss": 0.6577, "step": 2485 }, { "epoch": 0.389167188478397, "grad_norm": 2.9513094425201416, "learning_rate": 0.00025161290322580645, "loss": 1.0388, "step": 2486 }, { "epoch": 0.3893237319974953, "grad_norm": 2.517242670059204, "learning_rate": 0.0002515884652981427, "loss": 1.1773, "step": 2487 }, { "epoch": 0.3894802755165936, "grad_norm": 2.8088479042053223, "learning_rate": 0.00025156402737047895, "loss": 1.2082, "step": 2488 }, { "epoch": 0.3896368190356919, "grad_norm": 2.6290640830993652, "learning_rate": 0.00025153958944281526, "loss": 1.6824, "step": 2489 }, { "epoch": 0.38979336255479025, "grad_norm": 1.979951024055481, "learning_rate": 0.0002515151515151515, "loss": 1.1155, "step": 2490 }, { "epoch": 0.3899499060738885, "grad_norm": 3.4212875366210938, "learning_rate": 0.00025149071358748776, "loss": 1.1699, "step": 2491 }, { "epoch": 0.39010644959298685, "grad_norm": 2.455173969268799, "learning_rate": 0.000251466275659824, "loss": 1.516, "step": 2492 }, { "epoch": 0.3902629931120852, "grad_norm": 3.4120118618011475, "learning_rate": 0.00025144183773216026, "loss": 1.3543, "step": 2493 }, { "epoch": 0.39041953663118345, "grad_norm": 1.8049226999282837, "learning_rate": 0.00025141739980449657, "loss": 1.1935, "step": 2494 }, { "epoch": 0.3905760801502818, "grad_norm": 2.091107130050659, "learning_rate": 0.0002513929618768328, "loss": 1.9788, "step": 2495 }, { "epoch": 0.3907326236693801, "grad_norm": 1.4167218208312988, "learning_rate": 0.00025136852394916907, "loss": 0.7783, "step": 2496 }, { "epoch": 0.3908891671884784, "grad_norm": 2.366852045059204, "learning_rate": 0.0002513440860215054, "loss": 0.9097, "step": 2497 }, { "epoch": 0.3910457107075767, "grad_norm": 1.7226179838180542, "learning_rate": 0.0002513196480938416, "loss": 0.9851, "step": 2498 }, { "epoch": 0.39120225422667504, "grad_norm": 1.691483974456787, "learning_rate": 0.0002512952101661779, "loss": 1.281, "step": 2499 }, { "epoch": 0.3913587977457733, "grad_norm": 2.1025145053863525, "learning_rate": 0.00025127077223851413, "loss": 1.495, "step": 2500 }, { "epoch": 0.39151534126487164, "grad_norm": 0.49316316843032837, "learning_rate": 0.00025124633431085043, "loss": 0.4291, "step": 2501 }, { "epoch": 0.39167188478396997, "grad_norm": 0.47687578201293945, "learning_rate": 0.0002512218963831867, "loss": 0.3186, "step": 2502 }, { "epoch": 0.39182842830306824, "grad_norm": 0.6680770516395569, "learning_rate": 0.00025119745845552294, "loss": 0.4185, "step": 2503 }, { "epoch": 0.3919849718221666, "grad_norm": 0.6595609188079834, "learning_rate": 0.00025117302052785924, "loss": 0.3147, "step": 2504 }, { "epoch": 0.39214151534126485, "grad_norm": 0.8479176759719849, "learning_rate": 0.0002511485826001955, "loss": 0.3578, "step": 2505 }, { "epoch": 0.3922980588603632, "grad_norm": 0.6460444331169128, "learning_rate": 0.00025112414467253174, "loss": 0.3305, "step": 2506 }, { "epoch": 0.3924546023794615, "grad_norm": 0.7635279297828674, "learning_rate": 0.00025109970674486805, "loss": 0.3328, "step": 2507 }, { "epoch": 0.3926111458985598, "grad_norm": 0.6837889552116394, "learning_rate": 0.00025107526881720425, "loss": 0.3137, "step": 2508 }, { "epoch": 0.3927676894176581, "grad_norm": 3.4760055541992188, "learning_rate": 0.00025105083088954055, "loss": 1.145, "step": 2509 }, { "epoch": 0.39292423293675643, "grad_norm": 0.936640739440918, "learning_rate": 0.0002510263929618768, "loss": 0.4839, "step": 2510 }, { "epoch": 0.3930807764558547, "grad_norm": 0.6478950381278992, "learning_rate": 0.00025100195503421305, "loss": 0.239, "step": 2511 }, { "epoch": 0.39323731997495304, "grad_norm": 1.125580906867981, "learning_rate": 0.00025097751710654936, "loss": 0.5681, "step": 2512 }, { "epoch": 0.39339386349405137, "grad_norm": 1.4228874444961548, "learning_rate": 0.0002509530791788856, "loss": 0.7442, "step": 2513 }, { "epoch": 0.39355040701314964, "grad_norm": 1.260831356048584, "learning_rate": 0.00025092864125122186, "loss": 0.6783, "step": 2514 }, { "epoch": 0.39370695053224797, "grad_norm": 1.2701672315597534, "learning_rate": 0.0002509042033235581, "loss": 0.426, "step": 2515 }, { "epoch": 0.3938634940513463, "grad_norm": 0.7969043850898743, "learning_rate": 0.0002508797653958944, "loss": 0.4279, "step": 2516 }, { "epoch": 0.39402003757044457, "grad_norm": 2.375305414199829, "learning_rate": 0.00025085532746823067, "loss": 0.8547, "step": 2517 }, { "epoch": 0.3941765810895429, "grad_norm": 1.0514167547225952, "learning_rate": 0.0002508308895405669, "loss": 0.5921, "step": 2518 }, { "epoch": 0.39433312460864123, "grad_norm": 1.659162998199463, "learning_rate": 0.0002508064516129032, "loss": 0.792, "step": 2519 }, { "epoch": 0.3944896681277395, "grad_norm": 1.6312371492385864, "learning_rate": 0.0002507820136852395, "loss": 0.8341, "step": 2520 }, { "epoch": 0.39464621164683783, "grad_norm": 1.0578227043151855, "learning_rate": 0.00025075757575757573, "loss": 0.6725, "step": 2521 }, { "epoch": 0.3948027551659361, "grad_norm": 1.3463261127471924, "learning_rate": 0.00025073313782991203, "loss": 0.753, "step": 2522 }, { "epoch": 0.39495929868503443, "grad_norm": 1.1711524724960327, "learning_rate": 0.00025070869990224823, "loss": 0.6932, "step": 2523 }, { "epoch": 0.39511584220413276, "grad_norm": 1.7872284650802612, "learning_rate": 0.00025068426197458454, "loss": 0.8229, "step": 2524 }, { "epoch": 0.39527238572323103, "grad_norm": 1.4881881475448608, "learning_rate": 0.0002506598240469208, "loss": 1.1885, "step": 2525 }, { "epoch": 0.39542892924232936, "grad_norm": 2.591526508331299, "learning_rate": 0.00025063538611925704, "loss": 0.9122, "step": 2526 }, { "epoch": 0.3955854727614277, "grad_norm": 2.517484188079834, "learning_rate": 0.00025061094819159334, "loss": 1.1493, "step": 2527 }, { "epoch": 0.39574201628052597, "grad_norm": 1.369888424873352, "learning_rate": 0.0002505865102639296, "loss": 0.7883, "step": 2528 }, { "epoch": 0.3958985597996243, "grad_norm": 1.5327993631362915, "learning_rate": 0.00025056207233626585, "loss": 0.5566, "step": 2529 }, { "epoch": 0.3960551033187226, "grad_norm": 2.494425058364868, "learning_rate": 0.00025053763440860215, "loss": 0.8489, "step": 2530 }, { "epoch": 0.3962116468378209, "grad_norm": 1.5433497428894043, "learning_rate": 0.0002505131964809384, "loss": 0.8051, "step": 2531 }, { "epoch": 0.3963681903569192, "grad_norm": 3.3626677989959717, "learning_rate": 0.00025048875855327465, "loss": 1.0817, "step": 2532 }, { "epoch": 0.39652473387601755, "grad_norm": 2.291754722595215, "learning_rate": 0.0002504643206256109, "loss": 0.9022, "step": 2533 }, { "epoch": 0.3966812773951158, "grad_norm": 2.296135425567627, "learning_rate": 0.0002504398826979472, "loss": 1.4655, "step": 2534 }, { "epoch": 0.39683782091421416, "grad_norm": 3.5448482036590576, "learning_rate": 0.00025041544477028346, "loss": 0.9305, "step": 2535 }, { "epoch": 0.3969943644333125, "grad_norm": 2.3300135135650635, "learning_rate": 0.0002503910068426197, "loss": 1.3307, "step": 2536 }, { "epoch": 0.39715090795241076, "grad_norm": 1.741479516029358, "learning_rate": 0.000250366568914956, "loss": 0.7833, "step": 2537 }, { "epoch": 0.3973074514715091, "grad_norm": 4.329046249389648, "learning_rate": 0.00025034213098729227, "loss": 1.0903, "step": 2538 }, { "epoch": 0.3974639949906074, "grad_norm": 5.144536972045898, "learning_rate": 0.0002503176930596285, "loss": 1.4296, "step": 2539 }, { "epoch": 0.3976205385097057, "grad_norm": 2.033552408218384, "learning_rate": 0.0002502932551319648, "loss": 1.2994, "step": 2540 }, { "epoch": 0.397777082028804, "grad_norm": 2.1732325553894043, "learning_rate": 0.000250268817204301, "loss": 1.3316, "step": 2541 }, { "epoch": 0.3979336255479023, "grad_norm": 4.526739597320557, "learning_rate": 0.00025024437927663733, "loss": 1.187, "step": 2542 }, { "epoch": 0.3980901690670006, "grad_norm": 1.4033749103546143, "learning_rate": 0.0002502199413489736, "loss": 0.8175, "step": 2543 }, { "epoch": 0.39824671258609895, "grad_norm": 3.880048990249634, "learning_rate": 0.00025019550342130983, "loss": 1.6048, "step": 2544 }, { "epoch": 0.3984032561051972, "grad_norm": 2.1508264541625977, "learning_rate": 0.00025017106549364614, "loss": 1.3564, "step": 2545 }, { "epoch": 0.39855979962429555, "grad_norm": 1.6752632856369019, "learning_rate": 0.0002501466275659824, "loss": 0.6449, "step": 2546 }, { "epoch": 0.3987163431433939, "grad_norm": 3.97983980178833, "learning_rate": 0.00025012218963831864, "loss": 1.7772, "step": 2547 }, { "epoch": 0.39887288666249215, "grad_norm": 2.7829971313476562, "learning_rate": 0.0002500977517106549, "loss": 1.109, "step": 2548 }, { "epoch": 0.3990294301815905, "grad_norm": 1.2870094776153564, "learning_rate": 0.0002500733137829912, "loss": 0.4101, "step": 2549 }, { "epoch": 0.3991859737006888, "grad_norm": 2.9641854763031006, "learning_rate": 0.00025004887585532745, "loss": 1.1036, "step": 2550 }, { "epoch": 0.3993425172197871, "grad_norm": 0.6355754137039185, "learning_rate": 0.0002500244379276637, "loss": 0.3884, "step": 2551 }, { "epoch": 0.3994990607388854, "grad_norm": 0.7654311656951904, "learning_rate": 0.00025, "loss": 0.4459, "step": 2552 }, { "epoch": 0.39965560425798374, "grad_norm": 0.6441348791122437, "learning_rate": 0.00024997556207233626, "loss": 0.3962, "step": 2553 }, { "epoch": 0.399812147777082, "grad_norm": 0.8454661965370178, "learning_rate": 0.0002499511241446725, "loss": 0.3226, "step": 2554 }, { "epoch": 0.39996869129618035, "grad_norm": 0.7234505414962769, "learning_rate": 0.0002499266862170088, "loss": 0.3463, "step": 2555 }, { "epoch": 0.4001252348152787, "grad_norm": 0.7027657628059387, "learning_rate": 0.000249902248289345, "loss": 0.2914, "step": 2556 }, { "epoch": 0.40028177833437695, "grad_norm": 0.9003955721855164, "learning_rate": 0.0002498778103616813, "loss": 0.3581, "step": 2557 }, { "epoch": 0.4004383218534753, "grad_norm": 0.8857301473617554, "learning_rate": 0.00024985337243401757, "loss": 0.6009, "step": 2558 }, { "epoch": 0.40059486537257355, "grad_norm": 0.8024937510490417, "learning_rate": 0.0002498289345063538, "loss": 0.407, "step": 2559 }, { "epoch": 0.4007514088916719, "grad_norm": 1.1914548873901367, "learning_rate": 0.0002498044965786901, "loss": 0.5759, "step": 2560 }, { "epoch": 0.4009079524107702, "grad_norm": 1.0876412391662598, "learning_rate": 0.0002497800586510264, "loss": 0.4644, "step": 2561 }, { "epoch": 0.4010644959298685, "grad_norm": 0.8005790710449219, "learning_rate": 0.0002497556207233626, "loss": 0.327, "step": 2562 }, { "epoch": 0.4012210394489668, "grad_norm": 0.8703690767288208, "learning_rate": 0.0002497311827956989, "loss": 0.6214, "step": 2563 }, { "epoch": 0.40137758296806514, "grad_norm": 1.3815338611602783, "learning_rate": 0.0002497067448680352, "loss": 0.4908, "step": 2564 }, { "epoch": 0.4015341264871634, "grad_norm": 3.5727379322052, "learning_rate": 0.00024968230694037143, "loss": 0.4925, "step": 2565 }, { "epoch": 0.40169067000626174, "grad_norm": 1.0240099430084229, "learning_rate": 0.0002496578690127077, "loss": 0.661, "step": 2566 }, { "epoch": 0.40184721352536007, "grad_norm": 1.9735991954803467, "learning_rate": 0.000249633431085044, "loss": 0.5323, "step": 2567 }, { "epoch": 0.40200375704445834, "grad_norm": 1.2893562316894531, "learning_rate": 0.00024960899315738024, "loss": 0.5383, "step": 2568 }, { "epoch": 0.40216030056355667, "grad_norm": 0.8876537680625916, "learning_rate": 0.0002495845552297165, "loss": 0.6309, "step": 2569 }, { "epoch": 0.402316844082655, "grad_norm": 1.0564181804656982, "learning_rate": 0.0002495601173020528, "loss": 0.5627, "step": 2570 }, { "epoch": 0.4024733876017533, "grad_norm": 1.0052175521850586, "learning_rate": 0.000249535679374389, "loss": 0.6418, "step": 2571 }, { "epoch": 0.4026299311208516, "grad_norm": 0.7646985054016113, "learning_rate": 0.0002495112414467253, "loss": 0.4353, "step": 2572 }, { "epoch": 0.40278647463994993, "grad_norm": 1.8414807319641113, "learning_rate": 0.00024948680351906155, "loss": 0.5096, "step": 2573 }, { "epoch": 0.4029430181590482, "grad_norm": 1.9422529935836792, "learning_rate": 0.0002494623655913978, "loss": 0.6357, "step": 2574 }, { "epoch": 0.40309956167814653, "grad_norm": 2.9409773349761963, "learning_rate": 0.0002494379276637341, "loss": 0.5801, "step": 2575 }, { "epoch": 0.4032561051972448, "grad_norm": 1.64481782913208, "learning_rate": 0.00024941348973607036, "loss": 1.0987, "step": 2576 }, { "epoch": 0.40341264871634314, "grad_norm": 2.1421914100646973, "learning_rate": 0.0002493890518084066, "loss": 0.7097, "step": 2577 }, { "epoch": 0.40356919223544147, "grad_norm": 1.6423346996307373, "learning_rate": 0.0002493646138807429, "loss": 0.8432, "step": 2578 }, { "epoch": 0.40372573575453974, "grad_norm": 4.2599005699157715, "learning_rate": 0.00024934017595307917, "loss": 1.2652, "step": 2579 }, { "epoch": 0.40388227927363807, "grad_norm": 2.666485071182251, "learning_rate": 0.0002493157380254154, "loss": 1.0066, "step": 2580 }, { "epoch": 0.4040388227927364, "grad_norm": 3.4429917335510254, "learning_rate": 0.00024929130009775167, "loss": 1.0087, "step": 2581 }, { "epoch": 0.40419536631183467, "grad_norm": 3.632044553756714, "learning_rate": 0.000249266862170088, "loss": 1.2341, "step": 2582 }, { "epoch": 0.404351909830933, "grad_norm": 1.675054907798767, "learning_rate": 0.00024924242424242423, "loss": 0.939, "step": 2583 }, { "epoch": 0.4045084533500313, "grad_norm": 2.2389726638793945, "learning_rate": 0.0002492179863147605, "loss": 1.3942, "step": 2584 }, { "epoch": 0.4046649968691296, "grad_norm": 1.801397442817688, "learning_rate": 0.0002491935483870968, "loss": 0.9123, "step": 2585 }, { "epoch": 0.40482154038822793, "grad_norm": 3.3526382446289062, "learning_rate": 0.000249169110459433, "loss": 1.5424, "step": 2586 }, { "epoch": 0.40497808390732626, "grad_norm": 1.4892598390579224, "learning_rate": 0.0002491446725317693, "loss": 0.7216, "step": 2587 }, { "epoch": 0.40513462742642453, "grad_norm": 2.2398314476013184, "learning_rate": 0.00024912023460410554, "loss": 1.1292, "step": 2588 }, { "epoch": 0.40529117094552286, "grad_norm": 7.038582801818848, "learning_rate": 0.0002490957966764418, "loss": 1.1889, "step": 2589 }, { "epoch": 0.4054477144646212, "grad_norm": 2.2800958156585693, "learning_rate": 0.0002490713587487781, "loss": 1.0995, "step": 2590 }, { "epoch": 0.40560425798371946, "grad_norm": 2.6573739051818848, "learning_rate": 0.00024904692082111435, "loss": 1.5452, "step": 2591 }, { "epoch": 0.4057608015028178, "grad_norm": 2.7160732746124268, "learning_rate": 0.0002490224828934506, "loss": 1.134, "step": 2592 }, { "epoch": 0.40591734502191607, "grad_norm": 3.265036106109619, "learning_rate": 0.0002489980449657869, "loss": 1.2917, "step": 2593 }, { "epoch": 0.4060738885410144, "grad_norm": 3.3533453941345215, "learning_rate": 0.00024897360703812315, "loss": 1.5418, "step": 2594 }, { "epoch": 0.4062304320601127, "grad_norm": 1.740634560585022, "learning_rate": 0.0002489491691104594, "loss": 1.3963, "step": 2595 }, { "epoch": 0.406386975579211, "grad_norm": 5.385989665985107, "learning_rate": 0.00024892473118279566, "loss": 0.9535, "step": 2596 }, { "epoch": 0.4065435190983093, "grad_norm": 2.453171730041504, "learning_rate": 0.00024890029325513196, "loss": 1.1681, "step": 2597 }, { "epoch": 0.40670006261740765, "grad_norm": 2.1263811588287354, "learning_rate": 0.0002488758553274682, "loss": 0.795, "step": 2598 }, { "epoch": 0.4068566061365059, "grad_norm": 2.388746738433838, "learning_rate": 0.00024885141739980446, "loss": 0.9264, "step": 2599 }, { "epoch": 0.40701314965560426, "grad_norm": 1.1103012561798096, "learning_rate": 0.00024882697947214077, "loss": 0.8049, "step": 2600 }, { "epoch": 0.4071696931747026, "grad_norm": 0.601675271987915, "learning_rate": 0.000248802541544477, "loss": 0.365, "step": 2601 }, { "epoch": 0.40732623669380086, "grad_norm": 0.5282478332519531, "learning_rate": 0.00024877810361681327, "loss": 0.3728, "step": 2602 }, { "epoch": 0.4074827802128992, "grad_norm": 0.5197291374206543, "learning_rate": 0.0002487536656891495, "loss": 0.3763, "step": 2603 }, { "epoch": 0.4076393237319975, "grad_norm": 1.0491033792495728, "learning_rate": 0.0002487292277614858, "loss": 0.4424, "step": 2604 }, { "epoch": 0.4077958672510958, "grad_norm": 1.314231038093567, "learning_rate": 0.0002487047898338221, "loss": 0.5056, "step": 2605 }, { "epoch": 0.4079524107701941, "grad_norm": 0.6822581887245178, "learning_rate": 0.00024868035190615833, "loss": 0.3068, "step": 2606 }, { "epoch": 0.40810895428929245, "grad_norm": 1.021838665008545, "learning_rate": 0.0002486559139784946, "loss": 0.4735, "step": 2607 }, { "epoch": 0.4082654978083907, "grad_norm": 0.770111083984375, "learning_rate": 0.0002486314760508309, "loss": 0.4265, "step": 2608 }, { "epoch": 0.40842204132748905, "grad_norm": 0.6351111531257629, "learning_rate": 0.00024860703812316714, "loss": 0.4441, "step": 2609 }, { "epoch": 0.4085785848465874, "grad_norm": 1.667920470237732, "learning_rate": 0.0002485826001955034, "loss": 0.8832, "step": 2610 }, { "epoch": 0.40873512836568565, "grad_norm": 0.9965152740478516, "learning_rate": 0.00024855816226783964, "loss": 0.4595, "step": 2611 }, { "epoch": 0.408891671884784, "grad_norm": 1.3201706409454346, "learning_rate": 0.00024853372434017595, "loss": 0.7611, "step": 2612 }, { "epoch": 0.40904821540388225, "grad_norm": 0.8612977266311646, "learning_rate": 0.0002485092864125122, "loss": 0.4856, "step": 2613 }, { "epoch": 0.4092047589229806, "grad_norm": 2.6356048583984375, "learning_rate": 0.00024848484848484845, "loss": 0.9506, "step": 2614 }, { "epoch": 0.4093613024420789, "grad_norm": 1.1005220413208008, "learning_rate": 0.00024846041055718476, "loss": 0.4625, "step": 2615 }, { "epoch": 0.4095178459611772, "grad_norm": 1.277478814125061, "learning_rate": 0.000248435972629521, "loss": 0.747, "step": 2616 }, { "epoch": 0.4096743894802755, "grad_norm": 1.216950535774231, "learning_rate": 0.00024841153470185726, "loss": 0.5756, "step": 2617 }, { "epoch": 0.40983093299937384, "grad_norm": 1.0903007984161377, "learning_rate": 0.00024838709677419356, "loss": 0.7352, "step": 2618 }, { "epoch": 0.4099874765184721, "grad_norm": 1.3664308786392212, "learning_rate": 0.00024836265884652976, "loss": 0.6177, "step": 2619 }, { "epoch": 0.41014402003757044, "grad_norm": 1.4325757026672363, "learning_rate": 0.00024833822091886607, "loss": 0.7601, "step": 2620 }, { "epoch": 0.4103005635566688, "grad_norm": 1.858127236366272, "learning_rate": 0.0002483137829912023, "loss": 0.7936, "step": 2621 }, { "epoch": 0.41045710707576705, "grad_norm": 3.4538111686706543, "learning_rate": 0.00024828934506353857, "loss": 0.9523, "step": 2622 }, { "epoch": 0.4106136505948654, "grad_norm": 1.925356388092041, "learning_rate": 0.0002482649071358749, "loss": 0.8096, "step": 2623 }, { "epoch": 0.4107701941139637, "grad_norm": 1.6007717847824097, "learning_rate": 0.0002482404692082111, "loss": 0.9204, "step": 2624 }, { "epoch": 0.410926737633062, "grad_norm": 2.12320613861084, "learning_rate": 0.0002482160312805474, "loss": 0.9734, "step": 2625 }, { "epoch": 0.4110832811521603, "grad_norm": 1.3324270248413086, "learning_rate": 0.0002481915933528837, "loss": 1.0131, "step": 2626 }, { "epoch": 0.41123982467125864, "grad_norm": 1.9677183628082275, "learning_rate": 0.00024816715542521993, "loss": 0.9945, "step": 2627 }, { "epoch": 0.4113963681903569, "grad_norm": 1.487841010093689, "learning_rate": 0.0002481427174975562, "loss": 0.813, "step": 2628 }, { "epoch": 0.41155291170945524, "grad_norm": 2.215928554534912, "learning_rate": 0.00024811827956989244, "loss": 0.8284, "step": 2629 }, { "epoch": 0.4117094552285535, "grad_norm": 2.1060638427734375, "learning_rate": 0.00024809384164222874, "loss": 0.9693, "step": 2630 }, { "epoch": 0.41186599874765184, "grad_norm": 1.2802146673202515, "learning_rate": 0.000248069403714565, "loss": 0.5589, "step": 2631 }, { "epoch": 0.41202254226675017, "grad_norm": 2.4846386909484863, "learning_rate": 0.00024804496578690124, "loss": 0.9417, "step": 2632 }, { "epoch": 0.41217908578584844, "grad_norm": 1.465616226196289, "learning_rate": 0.00024802052785923755, "loss": 0.523, "step": 2633 }, { "epoch": 0.41233562930494677, "grad_norm": 1.689579963684082, "learning_rate": 0.00024799608993157375, "loss": 0.7951, "step": 2634 }, { "epoch": 0.4124921728240451, "grad_norm": 2.5636866092681885, "learning_rate": 0.00024797165200391005, "loss": 1.4816, "step": 2635 }, { "epoch": 0.4126487163431434, "grad_norm": 2.3543753623962402, "learning_rate": 0.0002479472140762463, "loss": 1.4351, "step": 2636 }, { "epoch": 0.4128052598622417, "grad_norm": 2.550464630126953, "learning_rate": 0.00024792277614858255, "loss": 1.5062, "step": 2637 }, { "epoch": 0.41296180338134003, "grad_norm": 2.0879738330841064, "learning_rate": 0.00024789833822091886, "loss": 1.5573, "step": 2638 }, { "epoch": 0.4131183469004383, "grad_norm": 2.426447629928589, "learning_rate": 0.0002478739002932551, "loss": 1.5449, "step": 2639 }, { "epoch": 0.41327489041953663, "grad_norm": 2.501875877380371, "learning_rate": 0.00024784946236559136, "loss": 1.1526, "step": 2640 }, { "epoch": 0.41343143393863496, "grad_norm": 3.55033540725708, "learning_rate": 0.00024782502443792767, "loss": 1.4765, "step": 2641 }, { "epoch": 0.41358797745773324, "grad_norm": 4.644171237945557, "learning_rate": 0.0002478005865102639, "loss": 1.6887, "step": 2642 }, { "epoch": 0.41374452097683156, "grad_norm": 2.9000136852264404, "learning_rate": 0.00024777614858260017, "loss": 1.3512, "step": 2643 }, { "epoch": 0.4139010644959299, "grad_norm": 2.809741735458374, "learning_rate": 0.0002477517106549364, "loss": 1.1566, "step": 2644 }, { "epoch": 0.41405760801502817, "grad_norm": 2.349257230758667, "learning_rate": 0.0002477272727272727, "loss": 1.4035, "step": 2645 }, { "epoch": 0.4142141515341265, "grad_norm": 3.1145989894866943, "learning_rate": 0.000247702834799609, "loss": 1.3006, "step": 2646 }, { "epoch": 0.41437069505322477, "grad_norm": 1.8506964445114136, "learning_rate": 0.00024767839687194523, "loss": 1.3462, "step": 2647 }, { "epoch": 0.4145272385723231, "grad_norm": 1.8519948720932007, "learning_rate": 0.00024765395894428153, "loss": 0.7869, "step": 2648 }, { "epoch": 0.4146837820914214, "grad_norm": 1.9720544815063477, "learning_rate": 0.0002476295210166178, "loss": 1.4319, "step": 2649 }, { "epoch": 0.4148403256105197, "grad_norm": 3.180431365966797, "learning_rate": 0.00024760508308895404, "loss": 2.3146, "step": 2650 }, { "epoch": 0.41499686912961803, "grad_norm": 0.8477596640586853, "learning_rate": 0.0002475806451612903, "loss": 0.4092, "step": 2651 }, { "epoch": 0.41515341264871636, "grad_norm": 0.5798821449279785, "learning_rate": 0.00024755620723362654, "loss": 0.3916, "step": 2652 }, { "epoch": 0.41530995616781463, "grad_norm": 0.638412356376648, "learning_rate": 0.00024753176930596284, "loss": 0.3177, "step": 2653 }, { "epoch": 0.41546649968691296, "grad_norm": 0.5856349468231201, "learning_rate": 0.0002475073313782991, "loss": 0.3968, "step": 2654 }, { "epoch": 0.4156230432060113, "grad_norm": 0.852562665939331, "learning_rate": 0.00024748289345063535, "loss": 0.3886, "step": 2655 }, { "epoch": 0.41577958672510956, "grad_norm": 0.7616521716117859, "learning_rate": 0.00024745845552297165, "loss": 0.4342, "step": 2656 }, { "epoch": 0.4159361302442079, "grad_norm": 0.7631804943084717, "learning_rate": 0.0002474340175953079, "loss": 0.4359, "step": 2657 }, { "epoch": 0.4160926737633062, "grad_norm": 1.0951449871063232, "learning_rate": 0.00024740957966764416, "loss": 0.3401, "step": 2658 }, { "epoch": 0.4162492172824045, "grad_norm": 0.5410890579223633, "learning_rate": 0.0002473851417399804, "loss": 0.3564, "step": 2659 }, { "epoch": 0.4164057608015028, "grad_norm": 0.9513525366783142, "learning_rate": 0.0002473607038123167, "loss": 0.5835, "step": 2660 }, { "epoch": 0.41656230432060115, "grad_norm": 1.1458948850631714, "learning_rate": 0.00024733626588465296, "loss": 0.5584, "step": 2661 }, { "epoch": 0.4167188478396994, "grad_norm": 1.0255465507507324, "learning_rate": 0.0002473118279569892, "loss": 0.4138, "step": 2662 }, { "epoch": 0.41687539135879775, "grad_norm": 2.739941120147705, "learning_rate": 0.0002472873900293255, "loss": 0.7455, "step": 2663 }, { "epoch": 0.4170319348778961, "grad_norm": 1.1839171648025513, "learning_rate": 0.00024726295210166177, "loss": 0.6217, "step": 2664 }, { "epoch": 0.41718847839699436, "grad_norm": 1.1585807800292969, "learning_rate": 0.000247238514173998, "loss": 0.5179, "step": 2665 }, { "epoch": 0.4173450219160927, "grad_norm": 1.2621674537658691, "learning_rate": 0.00024721407624633433, "loss": 0.5774, "step": 2666 }, { "epoch": 0.41750156543519096, "grad_norm": 1.1701115369796753, "learning_rate": 0.0002471896383186705, "loss": 0.6508, "step": 2667 }, { "epoch": 0.4176581089542893, "grad_norm": 1.1155378818511963, "learning_rate": 0.00024716520039100683, "loss": 0.809, "step": 2668 }, { "epoch": 0.4178146524733876, "grad_norm": 1.2293123006820679, "learning_rate": 0.0002471407624633431, "loss": 0.5816, "step": 2669 }, { "epoch": 0.4179711959924859, "grad_norm": 1.4187663793563843, "learning_rate": 0.00024711632453567933, "loss": 0.6369, "step": 2670 }, { "epoch": 0.4181277395115842, "grad_norm": 1.2454630136489868, "learning_rate": 0.00024709188660801564, "loss": 0.715, "step": 2671 }, { "epoch": 0.41828428303068255, "grad_norm": 1.0575640201568604, "learning_rate": 0.0002470674486803519, "loss": 0.5052, "step": 2672 }, { "epoch": 0.4184408265497808, "grad_norm": 1.1785027980804443, "learning_rate": 0.00024704301075268814, "loss": 0.8882, "step": 2673 }, { "epoch": 0.41859737006887915, "grad_norm": 2.9896340370178223, "learning_rate": 0.0002470185728250244, "loss": 1.3041, "step": 2674 }, { "epoch": 0.4187539135879775, "grad_norm": 2.0894863605499268, "learning_rate": 0.0002469941348973607, "loss": 0.7306, "step": 2675 }, { "epoch": 0.41891045710707575, "grad_norm": 1.6486024856567383, "learning_rate": 0.00024696969696969695, "loss": 0.846, "step": 2676 }, { "epoch": 0.4190670006261741, "grad_norm": 2.0100576877593994, "learning_rate": 0.0002469452590420332, "loss": 0.8831, "step": 2677 }, { "epoch": 0.4192235441452724, "grad_norm": 2.7158892154693604, "learning_rate": 0.0002469208211143695, "loss": 0.8862, "step": 2678 }, { "epoch": 0.4193800876643707, "grad_norm": 2.027427911758423, "learning_rate": 0.00024689638318670576, "loss": 1.0893, "step": 2679 }, { "epoch": 0.419536631183469, "grad_norm": 1.634826421737671, "learning_rate": 0.000246871945259042, "loss": 0.9038, "step": 2680 }, { "epoch": 0.41969317470256734, "grad_norm": 1.4327385425567627, "learning_rate": 0.0002468475073313783, "loss": 0.6251, "step": 2681 }, { "epoch": 0.4198497182216656, "grad_norm": 2.376344919204712, "learning_rate": 0.0002468230694037145, "loss": 0.9759, "step": 2682 }, { "epoch": 0.42000626174076394, "grad_norm": 2.1796486377716064, "learning_rate": 0.0002467986314760508, "loss": 1.1159, "step": 2683 }, { "epoch": 0.4201628052598622, "grad_norm": 2.264559507369995, "learning_rate": 0.00024677419354838707, "loss": 1.0303, "step": 2684 }, { "epoch": 0.42031934877896054, "grad_norm": 3.5693933963775635, "learning_rate": 0.0002467497556207233, "loss": 1.2559, "step": 2685 }, { "epoch": 0.4204758922980589, "grad_norm": 1.9500762224197388, "learning_rate": 0.0002467253176930596, "loss": 1.3884, "step": 2686 }, { "epoch": 0.42063243581715715, "grad_norm": 1.6748422384262085, "learning_rate": 0.0002467008797653959, "loss": 1.1449, "step": 2687 }, { "epoch": 0.4207889793362555, "grad_norm": 1.6067205667495728, "learning_rate": 0.0002466764418377321, "loss": 0.8418, "step": 2688 }, { "epoch": 0.4209455228553538, "grad_norm": 1.628675937652588, "learning_rate": 0.00024665200391006843, "loss": 0.8212, "step": 2689 }, { "epoch": 0.4211020663744521, "grad_norm": 2.2137458324432373, "learning_rate": 0.0002466275659824047, "loss": 1.5843, "step": 2690 }, { "epoch": 0.4212586098935504, "grad_norm": 2.58758282661438, "learning_rate": 0.00024660312805474093, "loss": 1.4545, "step": 2691 }, { "epoch": 0.42141515341264874, "grad_norm": 2.075272560119629, "learning_rate": 0.0002465786901270772, "loss": 1.8048, "step": 2692 }, { "epoch": 0.421571696931747, "grad_norm": 2.119429588317871, "learning_rate": 0.0002465542521994135, "loss": 1.3897, "step": 2693 }, { "epoch": 0.42172824045084534, "grad_norm": 1.7578437328338623, "learning_rate": 0.00024652981427174974, "loss": 1.8498, "step": 2694 }, { "epoch": 0.42188478396994367, "grad_norm": 1.8442625999450684, "learning_rate": 0.000246505376344086, "loss": 1.3389, "step": 2695 }, { "epoch": 0.42204132748904194, "grad_norm": 1.9508384466171265, "learning_rate": 0.0002464809384164223, "loss": 0.8474, "step": 2696 }, { "epoch": 0.42219787100814027, "grad_norm": 1.3713496923446655, "learning_rate": 0.00024645650048875855, "loss": 0.8622, "step": 2697 }, { "epoch": 0.4223544145272386, "grad_norm": 2.2516250610351562, "learning_rate": 0.0002464320625610948, "loss": 0.6771, "step": 2698 }, { "epoch": 0.42251095804633687, "grad_norm": 2.1349215507507324, "learning_rate": 0.00024640762463343105, "loss": 1.3575, "step": 2699 }, { "epoch": 0.4226675015654352, "grad_norm": 3.8896472454071045, "learning_rate": 0.0002463831867057673, "loss": 1.4243, "step": 2700 }, { "epoch": 0.4228240450845335, "grad_norm": 0.8927620053291321, "learning_rate": 0.0002463587487781036, "loss": 0.3883, "step": 2701 }, { "epoch": 0.4229805886036318, "grad_norm": 0.9689701199531555, "learning_rate": 0.00024633431085043986, "loss": 0.5873, "step": 2702 }, { "epoch": 0.42313713212273013, "grad_norm": 0.4493149220943451, "learning_rate": 0.0002463098729227761, "loss": 0.276, "step": 2703 }, { "epoch": 0.4232936756418284, "grad_norm": 0.5155251026153564, "learning_rate": 0.0002462854349951124, "loss": 0.3299, "step": 2704 }, { "epoch": 0.42345021916092673, "grad_norm": 0.7345922589302063, "learning_rate": 0.00024626099706744867, "loss": 0.3684, "step": 2705 }, { "epoch": 0.42360676268002506, "grad_norm": 0.633811354637146, "learning_rate": 0.0002462365591397849, "loss": 0.4542, "step": 2706 }, { "epoch": 0.42376330619912334, "grad_norm": 0.9088723063468933, "learning_rate": 0.00024621212121212117, "loss": 0.4833, "step": 2707 }, { "epoch": 0.42391984971822166, "grad_norm": 1.057075023651123, "learning_rate": 0.0002461876832844575, "loss": 0.461, "step": 2708 }, { "epoch": 0.42407639323732, "grad_norm": 0.996385395526886, "learning_rate": 0.00024616324535679373, "loss": 0.3916, "step": 2709 }, { "epoch": 0.42423293675641827, "grad_norm": 1.1317174434661865, "learning_rate": 0.00024613880742913, "loss": 0.4329, "step": 2710 }, { "epoch": 0.4243894802755166, "grad_norm": 0.8854171633720398, "learning_rate": 0.0002461143695014663, "loss": 0.3573, "step": 2711 }, { "epoch": 0.4245460237946149, "grad_norm": 1.3034350872039795, "learning_rate": 0.00024608993157380254, "loss": 0.6152, "step": 2712 }, { "epoch": 0.4247025673137132, "grad_norm": 0.8485596776008606, "learning_rate": 0.0002460654936461388, "loss": 0.5314, "step": 2713 }, { "epoch": 0.4248591108328115, "grad_norm": 1.0428731441497803, "learning_rate": 0.0002460410557184751, "loss": 0.4594, "step": 2714 }, { "epoch": 0.42501565435190986, "grad_norm": 1.12111496925354, "learning_rate": 0.0002460166177908113, "loss": 0.4709, "step": 2715 }, { "epoch": 0.42517219787100813, "grad_norm": 1.1140966415405273, "learning_rate": 0.0002459921798631476, "loss": 0.5229, "step": 2716 }, { "epoch": 0.42532874139010646, "grad_norm": 0.8085500597953796, "learning_rate": 0.00024596774193548385, "loss": 0.5124, "step": 2717 }, { "epoch": 0.4254852849092048, "grad_norm": 1.5303175449371338, "learning_rate": 0.0002459433040078201, "loss": 0.6329, "step": 2718 }, { "epoch": 0.42564182842830306, "grad_norm": 1.0877023935317993, "learning_rate": 0.0002459188660801564, "loss": 0.6225, "step": 2719 }, { "epoch": 0.4257983719474014, "grad_norm": 1.3418067693710327, "learning_rate": 0.00024589442815249265, "loss": 0.5107, "step": 2720 }, { "epoch": 0.42595491546649966, "grad_norm": 1.9135111570358276, "learning_rate": 0.0002458699902248289, "loss": 0.6342, "step": 2721 }, { "epoch": 0.426111458985598, "grad_norm": 1.0036910772323608, "learning_rate": 0.00024584555229716516, "loss": 0.5549, "step": 2722 }, { "epoch": 0.4262680025046963, "grad_norm": 1.8668835163116455, "learning_rate": 0.00024582111436950146, "loss": 0.6197, "step": 2723 }, { "epoch": 0.4264245460237946, "grad_norm": 2.176187515258789, "learning_rate": 0.0002457966764418377, "loss": 0.6798, "step": 2724 }, { "epoch": 0.4265810895428929, "grad_norm": 1.4894177913665771, "learning_rate": 0.00024577223851417396, "loss": 1.0341, "step": 2725 }, { "epoch": 0.42673763306199125, "grad_norm": 2.1725120544433594, "learning_rate": 0.00024574780058651027, "loss": 0.8838, "step": 2726 }, { "epoch": 0.4268941765810895, "grad_norm": 1.8074668645858765, "learning_rate": 0.0002457233626588465, "loss": 0.5326, "step": 2727 }, { "epoch": 0.42705072010018785, "grad_norm": 2.6459243297576904, "learning_rate": 0.00024569892473118277, "loss": 0.6351, "step": 2728 }, { "epoch": 0.4272072636192862, "grad_norm": 1.6028372049331665, "learning_rate": 0.0002456744868035191, "loss": 0.9502, "step": 2729 }, { "epoch": 0.42736380713838445, "grad_norm": 2.833115816116333, "learning_rate": 0.0002456500488758553, "loss": 1.3844, "step": 2730 }, { "epoch": 0.4275203506574828, "grad_norm": 1.2911146879196167, "learning_rate": 0.0002456256109481916, "loss": 1.0888, "step": 2731 }, { "epoch": 0.4276768941765811, "grad_norm": 1.904571771621704, "learning_rate": 0.00024560117302052783, "loss": 1.0575, "step": 2732 }, { "epoch": 0.4278334376956794, "grad_norm": 2.163661241531372, "learning_rate": 0.0002455767350928641, "loss": 1.5075, "step": 2733 }, { "epoch": 0.4279899812147777, "grad_norm": 5.065518379211426, "learning_rate": 0.0002455522971652004, "loss": 1.4266, "step": 2734 }, { "epoch": 0.42814652473387604, "grad_norm": 1.9618192911148071, "learning_rate": 0.00024552785923753664, "loss": 1.0812, "step": 2735 }, { "epoch": 0.4283030682529743, "grad_norm": 2.5747668743133545, "learning_rate": 0.0002455034213098729, "loss": 1.0698, "step": 2736 }, { "epoch": 0.42845961177207265, "grad_norm": 2.1782901287078857, "learning_rate": 0.0002454789833822092, "loss": 0.883, "step": 2737 }, { "epoch": 0.4286161552911709, "grad_norm": 5.35093879699707, "learning_rate": 0.00024545454545454545, "loss": 1.4407, "step": 2738 }, { "epoch": 0.42877269881026925, "grad_norm": 2.183814287185669, "learning_rate": 0.0002454301075268817, "loss": 1.0695, "step": 2739 }, { "epoch": 0.4289292423293676, "grad_norm": 2.1607635021209717, "learning_rate": 0.00024540566959921795, "loss": 1.0842, "step": 2740 }, { "epoch": 0.42908578584846585, "grad_norm": 2.969998598098755, "learning_rate": 0.00024538123167155426, "loss": 1.4593, "step": 2741 }, { "epoch": 0.4292423293675642, "grad_norm": 1.807499647140503, "learning_rate": 0.0002453567937438905, "loss": 0.8874, "step": 2742 }, { "epoch": 0.4293988728866625, "grad_norm": 1.7791450023651123, "learning_rate": 0.00024533235581622676, "loss": 1.5912, "step": 2743 }, { "epoch": 0.4295554164057608, "grad_norm": 1.3194993734359741, "learning_rate": 0.00024530791788856306, "loss": 1.0056, "step": 2744 }, { "epoch": 0.4297119599248591, "grad_norm": 2.4051010608673096, "learning_rate": 0.00024528347996089926, "loss": 1.4255, "step": 2745 }, { "epoch": 0.42986850344395744, "grad_norm": 3.256500482559204, "learning_rate": 0.00024525904203323557, "loss": 1.2331, "step": 2746 }, { "epoch": 0.4300250469630557, "grad_norm": 2.3121085166931152, "learning_rate": 0.0002452346041055718, "loss": 0.845, "step": 2747 }, { "epoch": 0.43018159048215404, "grad_norm": 2.94036865234375, "learning_rate": 0.00024521016617790807, "loss": 0.851, "step": 2748 }, { "epoch": 0.43033813400125237, "grad_norm": 2.072420597076416, "learning_rate": 0.0002451857282502444, "loss": 0.5205, "step": 2749 }, { "epoch": 0.43049467752035064, "grad_norm": 2.3945791721343994, "learning_rate": 0.0002451612903225806, "loss": 0.9723, "step": 2750 }, { "epoch": 0.430651221039449, "grad_norm": 0.6630196571350098, "learning_rate": 0.0002451368523949169, "loss": 0.3922, "step": 2751 }, { "epoch": 0.4308077645585473, "grad_norm": 0.6978053450584412, "learning_rate": 0.0002451124144672532, "loss": 0.4675, "step": 2752 }, { "epoch": 0.4309643080776456, "grad_norm": 0.570740818977356, "learning_rate": 0.00024508797653958943, "loss": 0.3968, "step": 2753 }, { "epoch": 0.4311208515967439, "grad_norm": 0.5810028314590454, "learning_rate": 0.0002450635386119257, "loss": 0.3249, "step": 2754 }, { "epoch": 0.4312773951158422, "grad_norm": 1.180005431175232, "learning_rate": 0.00024503910068426194, "loss": 0.4897, "step": 2755 }, { "epoch": 0.4314339386349405, "grad_norm": 0.7174784541130066, "learning_rate": 0.00024501466275659824, "loss": 0.3243, "step": 2756 }, { "epoch": 0.43159048215403883, "grad_norm": 0.7873983979225159, "learning_rate": 0.0002449902248289345, "loss": 0.5946, "step": 2757 }, { "epoch": 0.4317470256731371, "grad_norm": 0.7748199105262756, "learning_rate": 0.00024496578690127074, "loss": 0.4663, "step": 2758 }, { "epoch": 0.43190356919223544, "grad_norm": 0.571399450302124, "learning_rate": 0.00024494134897360705, "loss": 0.3024, "step": 2759 }, { "epoch": 0.43206011271133377, "grad_norm": 0.838038980960846, "learning_rate": 0.0002449169110459433, "loss": 0.5797, "step": 2760 }, { "epoch": 0.43221665623043204, "grad_norm": 0.9804670214653015, "learning_rate": 0.00024489247311827955, "loss": 0.4022, "step": 2761 }, { "epoch": 0.43237319974953037, "grad_norm": 0.7213824391365051, "learning_rate": 0.0002448680351906158, "loss": 0.3703, "step": 2762 }, { "epoch": 0.4325297432686287, "grad_norm": 1.013265609741211, "learning_rate": 0.00024484359726295205, "loss": 0.3412, "step": 2763 }, { "epoch": 0.43268628678772697, "grad_norm": 1.2366046905517578, "learning_rate": 0.00024481915933528836, "loss": 0.8917, "step": 2764 }, { "epoch": 0.4328428303068253, "grad_norm": 0.8313443660736084, "learning_rate": 0.0002447947214076246, "loss": 0.3694, "step": 2765 }, { "epoch": 0.43299937382592363, "grad_norm": 1.9965428113937378, "learning_rate": 0.00024477028347996086, "loss": 0.7908, "step": 2766 }, { "epoch": 0.4331559173450219, "grad_norm": 1.3897510766983032, "learning_rate": 0.00024474584555229717, "loss": 0.5598, "step": 2767 }, { "epoch": 0.43331246086412023, "grad_norm": 1.1880437135696411, "learning_rate": 0.0002447214076246334, "loss": 0.5628, "step": 2768 }, { "epoch": 0.43346900438321856, "grad_norm": 3.2662549018859863, "learning_rate": 0.00024469696969696967, "loss": 0.9813, "step": 2769 }, { "epoch": 0.43362554790231683, "grad_norm": 1.4377611875534058, "learning_rate": 0.0002446725317693059, "loss": 0.5714, "step": 2770 }, { "epoch": 0.43378209142141516, "grad_norm": 2.769331693649292, "learning_rate": 0.0002446480938416422, "loss": 1.2337, "step": 2771 }, { "epoch": 0.4339386349405135, "grad_norm": 1.2153619527816772, "learning_rate": 0.0002446236559139785, "loss": 0.5278, "step": 2772 }, { "epoch": 0.43409517845961176, "grad_norm": 2.0345957279205322, "learning_rate": 0.00024459921798631473, "loss": 0.7495, "step": 2773 }, { "epoch": 0.4342517219787101, "grad_norm": 1.450657844543457, "learning_rate": 0.00024457478005865103, "loss": 0.6209, "step": 2774 }, { "epoch": 0.43440826549780837, "grad_norm": 1.648074746131897, "learning_rate": 0.0002445503421309873, "loss": 0.6324, "step": 2775 }, { "epoch": 0.4345648090169067, "grad_norm": 1.3603814840316772, "learning_rate": 0.00024452590420332354, "loss": 0.754, "step": 2776 }, { "epoch": 0.434721352536005, "grad_norm": 3.350707769393921, "learning_rate": 0.00024450146627565984, "loss": 0.73, "step": 2777 }, { "epoch": 0.4348778960551033, "grad_norm": 2.1769869327545166, "learning_rate": 0.00024447702834799604, "loss": 0.9358, "step": 2778 }, { "epoch": 0.4350344395742016, "grad_norm": 1.703329086303711, "learning_rate": 0.00024445259042033235, "loss": 0.6444, "step": 2779 }, { "epoch": 0.43519098309329995, "grad_norm": 2.654348611831665, "learning_rate": 0.0002444281524926686, "loss": 0.7654, "step": 2780 }, { "epoch": 0.43534752661239823, "grad_norm": 1.5147393941879272, "learning_rate": 0.00024440371456500485, "loss": 0.9084, "step": 2781 }, { "epoch": 0.43550407013149656, "grad_norm": 2.269134759902954, "learning_rate": 0.00024437927663734115, "loss": 1.033, "step": 2782 }, { "epoch": 0.4356606136505949, "grad_norm": 1.4680894613265991, "learning_rate": 0.0002443548387096774, "loss": 1.0404, "step": 2783 }, { "epoch": 0.43581715716969316, "grad_norm": 1.8765166997909546, "learning_rate": 0.00024433040078201366, "loss": 1.4057, "step": 2784 }, { "epoch": 0.4359737006887915, "grad_norm": 1.2388074398040771, "learning_rate": 0.00024430596285434996, "loss": 0.7339, "step": 2785 }, { "epoch": 0.4361302442078898, "grad_norm": 1.540697693824768, "learning_rate": 0.0002442815249266862, "loss": 0.8482, "step": 2786 }, { "epoch": 0.4362867877269881, "grad_norm": 2.0315816402435303, "learning_rate": 0.00024425708699902246, "loss": 0.9921, "step": 2787 }, { "epoch": 0.4364433312460864, "grad_norm": 2.7498373985290527, "learning_rate": 0.0002442326490713587, "loss": 1.6373, "step": 2788 }, { "epoch": 0.43659987476518475, "grad_norm": 2.1657485961914062, "learning_rate": 0.000244208211143695, "loss": 1.6076, "step": 2789 }, { "epoch": 0.436756418284283, "grad_norm": 3.260566234588623, "learning_rate": 0.00024418377321603127, "loss": 1.1333, "step": 2790 }, { "epoch": 0.43691296180338135, "grad_norm": 2.2284598350524902, "learning_rate": 0.0002441593352883675, "loss": 0.9482, "step": 2791 }, { "epoch": 0.4370695053224796, "grad_norm": 2.1188924312591553, "learning_rate": 0.0002441348973607038, "loss": 1.0199, "step": 2792 }, { "epoch": 0.43722604884157795, "grad_norm": 5.20643949508667, "learning_rate": 0.00024411045943304005, "loss": 1.8085, "step": 2793 }, { "epoch": 0.4373825923606763, "grad_norm": 4.37985372543335, "learning_rate": 0.00024408602150537633, "loss": 1.2478, "step": 2794 }, { "epoch": 0.43753913587977455, "grad_norm": 1.3027615547180176, "learning_rate": 0.0002440615835777126, "loss": 1.2622, "step": 2795 }, { "epoch": 0.4376956793988729, "grad_norm": 2.000991106033325, "learning_rate": 0.00024403714565004886, "loss": 0.8131, "step": 2796 }, { "epoch": 0.4378522229179712, "grad_norm": 2.1807901859283447, "learning_rate": 0.00024401270772238514, "loss": 0.7618, "step": 2797 }, { "epoch": 0.4380087664370695, "grad_norm": 2.6867918968200684, "learning_rate": 0.0002439882697947214, "loss": 0.6396, "step": 2798 }, { "epoch": 0.4381653099561678, "grad_norm": 1.687910795211792, "learning_rate": 0.00024396383186705764, "loss": 1.2688, "step": 2799 }, { "epoch": 0.43832185347526614, "grad_norm": 2.035445213317871, "learning_rate": 0.00024393939393939392, "loss": 1.1152, "step": 2800 }, { "epoch": 0.4384783969943644, "grad_norm": 1.4942790269851685, "learning_rate": 0.0002439149560117302, "loss": 0.6478, "step": 2801 }, { "epoch": 0.43863494051346275, "grad_norm": 0.5058338642120361, "learning_rate": 0.00024389051808406645, "loss": 0.257, "step": 2802 }, { "epoch": 0.4387914840325611, "grad_norm": 0.5301083922386169, "learning_rate": 0.00024386608015640273, "loss": 0.3152, "step": 2803 }, { "epoch": 0.43894802755165935, "grad_norm": 3.6920485496520996, "learning_rate": 0.000243841642228739, "loss": 1.2555, "step": 2804 }, { "epoch": 0.4391045710707577, "grad_norm": 0.8687499761581421, "learning_rate": 0.00024381720430107523, "loss": 0.4101, "step": 2805 }, { "epoch": 0.439261114589856, "grad_norm": 0.5601373314857483, "learning_rate": 0.0002437927663734115, "loss": 0.4841, "step": 2806 }, { "epoch": 0.4394176581089543, "grad_norm": 0.5990231037139893, "learning_rate": 0.0002437683284457478, "loss": 0.3959, "step": 2807 }, { "epoch": 0.4395742016280526, "grad_norm": 0.8136112093925476, "learning_rate": 0.00024374389051808404, "loss": 0.5858, "step": 2808 }, { "epoch": 0.4397307451471509, "grad_norm": 0.8082829713821411, "learning_rate": 0.00024371945259042032, "loss": 0.4747, "step": 2809 }, { "epoch": 0.4398872886662492, "grad_norm": 0.751895010471344, "learning_rate": 0.00024369501466275657, "loss": 0.4398, "step": 2810 }, { "epoch": 0.44004383218534754, "grad_norm": 0.7629719972610474, "learning_rate": 0.00024367057673509285, "loss": 0.3338, "step": 2811 }, { "epoch": 0.4402003757044458, "grad_norm": 0.9700342416763306, "learning_rate": 0.00024364613880742912, "loss": 0.5572, "step": 2812 }, { "epoch": 0.44035691922354414, "grad_norm": 1.136712670326233, "learning_rate": 0.00024362170087976535, "loss": 0.5201, "step": 2813 }, { "epoch": 0.44051346274264247, "grad_norm": 0.9968575239181519, "learning_rate": 0.00024359726295210163, "loss": 0.5493, "step": 2814 }, { "epoch": 0.44067000626174074, "grad_norm": 1.318066120147705, "learning_rate": 0.0002435728250244379, "loss": 0.5117, "step": 2815 }, { "epoch": 0.44082654978083907, "grad_norm": 1.940822720527649, "learning_rate": 0.00024354838709677416, "loss": 0.6577, "step": 2816 }, { "epoch": 0.4409830932999374, "grad_norm": 1.8296147584915161, "learning_rate": 0.00024352394916911043, "loss": 0.7105, "step": 2817 }, { "epoch": 0.4411396368190357, "grad_norm": 2.1246185302734375, "learning_rate": 0.0002434995112414467, "loss": 0.8139, "step": 2818 }, { "epoch": 0.441296180338134, "grad_norm": 1.4536235332489014, "learning_rate": 0.00024347507331378296, "loss": 0.6666, "step": 2819 }, { "epoch": 0.44145272385723233, "grad_norm": 1.9893560409545898, "learning_rate": 0.00024345063538611924, "loss": 0.7701, "step": 2820 }, { "epoch": 0.4416092673763306, "grad_norm": 2.051508903503418, "learning_rate": 0.0002434261974584555, "loss": 0.4967, "step": 2821 }, { "epoch": 0.44176581089542893, "grad_norm": 3.648225784301758, "learning_rate": 0.00024340175953079175, "loss": 0.7787, "step": 2822 }, { "epoch": 0.44192235441452726, "grad_norm": 1.885310411453247, "learning_rate": 0.00024337732160312802, "loss": 0.5775, "step": 2823 }, { "epoch": 0.44207889793362554, "grad_norm": 2.8299691677093506, "learning_rate": 0.0002433528836754643, "loss": 0.9215, "step": 2824 }, { "epoch": 0.44223544145272387, "grad_norm": 1.727311134338379, "learning_rate": 0.00024332844574780055, "loss": 0.8729, "step": 2825 }, { "epoch": 0.4423919849718222, "grad_norm": 3.334357738494873, "learning_rate": 0.00024330400782013683, "loss": 0.7647, "step": 2826 }, { "epoch": 0.44254852849092047, "grad_norm": 2.5358595848083496, "learning_rate": 0.0002432795698924731, "loss": 0.808, "step": 2827 }, { "epoch": 0.4427050720100188, "grad_norm": 1.919858694076538, "learning_rate": 0.00024325513196480933, "loss": 0.8237, "step": 2828 }, { "epoch": 0.44286161552911707, "grad_norm": 2.2071852684020996, "learning_rate": 0.0002432306940371456, "loss": 0.7848, "step": 2829 }, { "epoch": 0.4430181590482154, "grad_norm": 2.1200320720672607, "learning_rate": 0.0002432062561094819, "loss": 0.8957, "step": 2830 }, { "epoch": 0.4431747025673137, "grad_norm": 2.553398609161377, "learning_rate": 0.00024318181818181814, "loss": 1.4184, "step": 2831 }, { "epoch": 0.443331246086412, "grad_norm": 1.989119529724121, "learning_rate": 0.00024315738025415442, "loss": 1.1927, "step": 2832 }, { "epoch": 0.44348778960551033, "grad_norm": 1.7680295705795288, "learning_rate": 0.0002431329423264907, "loss": 0.7261, "step": 2833 }, { "epoch": 0.44364433312460866, "grad_norm": 3.1489715576171875, "learning_rate": 0.00024310850439882695, "loss": 1.2965, "step": 2834 }, { "epoch": 0.44380087664370693, "grad_norm": 2.7406511306762695, "learning_rate": 0.00024308406647116323, "loss": 0.9612, "step": 2835 }, { "epoch": 0.44395742016280526, "grad_norm": 2.736985921859741, "learning_rate": 0.0002430596285434995, "loss": 1.1864, "step": 2836 }, { "epoch": 0.4441139636819036, "grad_norm": 2.205655097961426, "learning_rate": 0.00024303519061583573, "loss": 1.6187, "step": 2837 }, { "epoch": 0.44427050720100186, "grad_norm": 1.7284249067306519, "learning_rate": 0.000243010752688172, "loss": 0.9562, "step": 2838 }, { "epoch": 0.4444270507201002, "grad_norm": 2.0134189128875732, "learning_rate": 0.0002429863147605083, "loss": 1.3006, "step": 2839 }, { "epoch": 0.4445835942391985, "grad_norm": 2.37020206451416, "learning_rate": 0.00024296187683284454, "loss": 1.5716, "step": 2840 }, { "epoch": 0.4447401377582968, "grad_norm": 2.1553592681884766, "learning_rate": 0.00024293743890518082, "loss": 1.1951, "step": 2841 }, { "epoch": 0.4448966812773951, "grad_norm": 2.4819650650024414, "learning_rate": 0.0002429130009775171, "loss": 0.8878, "step": 2842 }, { "epoch": 0.44505322479649345, "grad_norm": 2.196474552154541, "learning_rate": 0.00024288856304985335, "loss": 1.4834, "step": 2843 }, { "epoch": 0.4452097683155917, "grad_norm": 3.71829891204834, "learning_rate": 0.00024286412512218963, "loss": 1.1742, "step": 2844 }, { "epoch": 0.44536631183469005, "grad_norm": 2.282411575317383, "learning_rate": 0.00024283968719452588, "loss": 1.3603, "step": 2845 }, { "epoch": 0.4455228553537883, "grad_norm": 1.549609899520874, "learning_rate": 0.00024281524926686213, "loss": 0.7373, "step": 2846 }, { "epoch": 0.44567939887288666, "grad_norm": 0.8783185482025146, "learning_rate": 0.0002427908113391984, "loss": 0.435, "step": 2847 }, { "epoch": 0.445835942391985, "grad_norm": 4.473077774047852, "learning_rate": 0.00024276637341153468, "loss": 1.096, "step": 2848 }, { "epoch": 0.44599248591108326, "grad_norm": 1.7083899974822998, "learning_rate": 0.00024274193548387094, "loss": 0.9034, "step": 2849 }, { "epoch": 0.4461490294301816, "grad_norm": 1.6015340089797974, "learning_rate": 0.00024271749755620721, "loss": 0.8184, "step": 2850 }, { "epoch": 0.4463055729492799, "grad_norm": 0.5668478608131409, "learning_rate": 0.0002426930596285435, "loss": 0.3711, "step": 2851 }, { "epoch": 0.4464621164683782, "grad_norm": 0.603583812713623, "learning_rate": 0.00024266862170087972, "loss": 0.284, "step": 2852 }, { "epoch": 0.4466186599874765, "grad_norm": 0.8786858916282654, "learning_rate": 0.000242644183773216, "loss": 0.2688, "step": 2853 }, { "epoch": 0.44677520350657485, "grad_norm": 0.5734667778015137, "learning_rate": 0.00024261974584555227, "loss": 0.3223, "step": 2854 }, { "epoch": 0.4469317470256731, "grad_norm": 0.926365077495575, "learning_rate": 0.00024259530791788852, "loss": 0.44, "step": 2855 }, { "epoch": 0.44708829054477145, "grad_norm": 0.661888062953949, "learning_rate": 0.0002425708699902248, "loss": 0.3443, "step": 2856 }, { "epoch": 0.4472448340638698, "grad_norm": 0.6842008233070374, "learning_rate": 0.00024254643206256108, "loss": 0.4118, "step": 2857 }, { "epoch": 0.44740137758296805, "grad_norm": 0.7839653491973877, "learning_rate": 0.00024252199413489733, "loss": 0.3634, "step": 2858 }, { "epoch": 0.4475579211020664, "grad_norm": 1.1856542825698853, "learning_rate": 0.0002424975562072336, "loss": 0.6146, "step": 2859 }, { "epoch": 0.4477144646211647, "grad_norm": 1.2870744466781616, "learning_rate": 0.0002424731182795699, "loss": 0.4388, "step": 2860 }, { "epoch": 0.447871008140263, "grad_norm": 0.8525822162628174, "learning_rate": 0.0002424486803519061, "loss": 0.5828, "step": 2861 }, { "epoch": 0.4480275516593613, "grad_norm": 0.8361726403236389, "learning_rate": 0.0002424242424242424, "loss": 0.4072, "step": 2862 }, { "epoch": 0.4481840951784596, "grad_norm": 3.588017225265503, "learning_rate": 0.00024239980449657867, "loss": 0.7043, "step": 2863 }, { "epoch": 0.4483406386975579, "grad_norm": 0.9456626176834106, "learning_rate": 0.00024237536656891492, "loss": 0.6288, "step": 2864 }, { "epoch": 0.44849718221665624, "grad_norm": 0.8043878078460693, "learning_rate": 0.0002423509286412512, "loss": 0.3946, "step": 2865 }, { "epoch": 0.4486537257357545, "grad_norm": 1.2611615657806396, "learning_rate": 0.00024232649071358748, "loss": 0.5556, "step": 2866 }, { "epoch": 0.44881026925485284, "grad_norm": 1.1336512565612793, "learning_rate": 0.00024230205278592373, "loss": 0.6662, "step": 2867 }, { "epoch": 0.4489668127739512, "grad_norm": 1.0719926357269287, "learning_rate": 0.00024227761485826, "loss": 0.4993, "step": 2868 }, { "epoch": 0.44912335629304945, "grad_norm": 1.3766525983810425, "learning_rate": 0.00024225317693059626, "loss": 0.5825, "step": 2869 }, { "epoch": 0.4492798998121478, "grad_norm": 1.647732138633728, "learning_rate": 0.0002422287390029325, "loss": 0.5897, "step": 2870 }, { "epoch": 0.4494364433312461, "grad_norm": 2.6894371509552, "learning_rate": 0.0002422043010752688, "loss": 0.8135, "step": 2871 }, { "epoch": 0.4495929868503444, "grad_norm": 2.326774835586548, "learning_rate": 0.00024217986314760507, "loss": 0.6992, "step": 2872 }, { "epoch": 0.4497495303694427, "grad_norm": 0.996810257434845, "learning_rate": 0.00024215542521994132, "loss": 0.4537, "step": 2873 }, { "epoch": 0.44990607388854104, "grad_norm": 1.5761237144470215, "learning_rate": 0.0002421309872922776, "loss": 0.6694, "step": 2874 }, { "epoch": 0.4500626174076393, "grad_norm": 3.633239507675171, "learning_rate": 0.00024210654936461387, "loss": 0.9042, "step": 2875 }, { "epoch": 0.45021916092673764, "grad_norm": 2.0842907428741455, "learning_rate": 0.0002420821114369501, "loss": 1.0301, "step": 2876 }, { "epoch": 0.45037570444583597, "grad_norm": 1.3994956016540527, "learning_rate": 0.00024205767350928638, "loss": 0.6357, "step": 2877 }, { "epoch": 0.45053224796493424, "grad_norm": 1.4533970355987549, "learning_rate": 0.00024203323558162266, "loss": 0.6302, "step": 2878 }, { "epoch": 0.45068879148403257, "grad_norm": 1.189319133758545, "learning_rate": 0.0002420087976539589, "loss": 0.5899, "step": 2879 }, { "epoch": 0.45084533500313084, "grad_norm": 3.1489529609680176, "learning_rate": 0.00024198435972629519, "loss": 0.931, "step": 2880 }, { "epoch": 0.45100187852222917, "grad_norm": 1.8433654308319092, "learning_rate": 0.00024195992179863146, "loss": 0.4861, "step": 2881 }, { "epoch": 0.4511584220413275, "grad_norm": 3.465445041656494, "learning_rate": 0.00024193548387096771, "loss": 0.6557, "step": 2882 }, { "epoch": 0.4513149655604258, "grad_norm": 2.037400484085083, "learning_rate": 0.000241911045943304, "loss": 1.3114, "step": 2883 }, { "epoch": 0.4514715090795241, "grad_norm": 1.4593843221664429, "learning_rate": 0.00024188660801564027, "loss": 0.81, "step": 2884 }, { "epoch": 0.45162805259862243, "grad_norm": 2.352095365524292, "learning_rate": 0.0002418621700879765, "loss": 0.6537, "step": 2885 }, { "epoch": 0.4517845961177207, "grad_norm": 2.0871784687042236, "learning_rate": 0.00024183773216031277, "loss": 1.0747, "step": 2886 }, { "epoch": 0.45194113963681903, "grad_norm": 3.217430591583252, "learning_rate": 0.00024181329423264905, "loss": 0.8365, "step": 2887 }, { "epoch": 0.45209768315591736, "grad_norm": 3.391012668609619, "learning_rate": 0.0002417888563049853, "loss": 0.7699, "step": 2888 }, { "epoch": 0.45225422667501564, "grad_norm": 4.206727981567383, "learning_rate": 0.00024176441837732158, "loss": 1.3047, "step": 2889 }, { "epoch": 0.45241077019411396, "grad_norm": 2.0663089752197266, "learning_rate": 0.00024173998044965786, "loss": 0.8944, "step": 2890 }, { "epoch": 0.4525673137132123, "grad_norm": 3.9211676120758057, "learning_rate": 0.0002417155425219941, "loss": 2.0434, "step": 2891 }, { "epoch": 0.45272385723231057, "grad_norm": 1.6745312213897705, "learning_rate": 0.00024169110459433036, "loss": 1.4715, "step": 2892 }, { "epoch": 0.4528804007514089, "grad_norm": 2.4523682594299316, "learning_rate": 0.00024166666666666664, "loss": 0.9836, "step": 2893 }, { "epoch": 0.4530369442705072, "grad_norm": 1.776292085647583, "learning_rate": 0.0002416422287390029, "loss": 1.4415, "step": 2894 }, { "epoch": 0.4531934877896055, "grad_norm": 2.345961570739746, "learning_rate": 0.00024161779081133917, "loss": 1.3364, "step": 2895 }, { "epoch": 0.4533500313087038, "grad_norm": 2.3358938694000244, "learning_rate": 0.00024159335288367545, "loss": 1.2469, "step": 2896 }, { "epoch": 0.45350657482780216, "grad_norm": 2.8481311798095703, "learning_rate": 0.0002415689149560117, "loss": 0.6862, "step": 2897 }, { "epoch": 0.45366311834690043, "grad_norm": 3.2002439498901367, "learning_rate": 0.00024154447702834798, "loss": 0.8734, "step": 2898 }, { "epoch": 0.45381966186599876, "grad_norm": 3.30496883392334, "learning_rate": 0.00024152003910068426, "loss": 1.1847, "step": 2899 }, { "epoch": 0.45397620538509703, "grad_norm": 2.5524821281433105, "learning_rate": 0.00024149560117302048, "loss": 1.1897, "step": 2900 }, { "epoch": 0.45413274890419536, "grad_norm": 0.7851200103759766, "learning_rate": 0.00024147116324535676, "loss": 0.5016, "step": 2901 }, { "epoch": 0.4542892924232937, "grad_norm": 0.8391879796981812, "learning_rate": 0.00024144672531769304, "loss": 0.4045, "step": 2902 }, { "epoch": 0.45444583594239196, "grad_norm": 1.0512319803237915, "learning_rate": 0.0002414222873900293, "loss": 0.4671, "step": 2903 }, { "epoch": 0.4546023794614903, "grad_norm": 0.7638587951660156, "learning_rate": 0.00024139784946236557, "loss": 0.4057, "step": 2904 }, { "epoch": 0.4547589229805886, "grad_norm": 0.5966224670410156, "learning_rate": 0.00024137341153470185, "loss": 0.3474, "step": 2905 }, { "epoch": 0.4549154664996869, "grad_norm": 0.6843605041503906, "learning_rate": 0.0002413489736070381, "loss": 0.259, "step": 2906 }, { "epoch": 0.4550720100187852, "grad_norm": 0.8733233213424683, "learning_rate": 0.00024132453567937438, "loss": 0.5368, "step": 2907 }, { "epoch": 0.45522855353788355, "grad_norm": 0.8052546977996826, "learning_rate": 0.00024130009775171065, "loss": 0.4997, "step": 2908 }, { "epoch": 0.4553850970569818, "grad_norm": 0.7011566162109375, "learning_rate": 0.00024127565982404688, "loss": 0.4456, "step": 2909 }, { "epoch": 0.45554164057608015, "grad_norm": 2.446593999862671, "learning_rate": 0.00024125122189638316, "loss": 0.4917, "step": 2910 }, { "epoch": 0.4556981840951785, "grad_norm": 0.5837518572807312, "learning_rate": 0.00024122678396871943, "loss": 0.4428, "step": 2911 }, { "epoch": 0.45585472761427676, "grad_norm": 0.7179192900657654, "learning_rate": 0.00024120234604105569, "loss": 0.4092, "step": 2912 }, { "epoch": 0.4560112711333751, "grad_norm": 0.9555642008781433, "learning_rate": 0.00024117790811339196, "loss": 0.4673, "step": 2913 }, { "epoch": 0.4561678146524734, "grad_norm": 0.891258955001831, "learning_rate": 0.00024115347018572824, "loss": 0.3904, "step": 2914 }, { "epoch": 0.4563243581715717, "grad_norm": 0.9778416752815247, "learning_rate": 0.0002411290322580645, "loss": 0.4525, "step": 2915 }, { "epoch": 0.45648090169067, "grad_norm": 1.230553150177002, "learning_rate": 0.00024110459433040074, "loss": 0.3054, "step": 2916 }, { "epoch": 0.4566374452097683, "grad_norm": 1.3313859701156616, "learning_rate": 0.00024108015640273702, "loss": 0.3882, "step": 2917 }, { "epoch": 0.4567939887288666, "grad_norm": 1.3717501163482666, "learning_rate": 0.00024105571847507327, "loss": 0.6825, "step": 2918 }, { "epoch": 0.45695053224796495, "grad_norm": 2.073382616043091, "learning_rate": 0.00024103128054740955, "loss": 0.5989, "step": 2919 }, { "epoch": 0.4571070757670632, "grad_norm": 1.1343777179718018, "learning_rate": 0.00024100684261974583, "loss": 0.5252, "step": 2920 }, { "epoch": 0.45726361928616155, "grad_norm": 1.8335095643997192, "learning_rate": 0.00024098240469208208, "loss": 0.4679, "step": 2921 }, { "epoch": 0.4574201628052599, "grad_norm": 2.010408639907837, "learning_rate": 0.00024095796676441836, "loss": 0.6568, "step": 2922 }, { "epoch": 0.45757670632435815, "grad_norm": 1.3354651927947998, "learning_rate": 0.00024093352883675464, "loss": 0.545, "step": 2923 }, { "epoch": 0.4577332498434565, "grad_norm": 2.0071985721588135, "learning_rate": 0.00024090909090909086, "loss": 0.9399, "step": 2924 }, { "epoch": 0.4578897933625548, "grad_norm": 3.2481436729431152, "learning_rate": 0.00024088465298142714, "loss": 1.0217, "step": 2925 }, { "epoch": 0.4580463368816531, "grad_norm": 2.576655387878418, "learning_rate": 0.00024086021505376342, "loss": 0.5155, "step": 2926 }, { "epoch": 0.4582028804007514, "grad_norm": 1.2466546297073364, "learning_rate": 0.00024083577712609967, "loss": 0.6947, "step": 2927 }, { "epoch": 0.45835942391984974, "grad_norm": 1.3399920463562012, "learning_rate": 0.00024081133919843595, "loss": 0.6762, "step": 2928 }, { "epoch": 0.458515967438948, "grad_norm": 1.9821996688842773, "learning_rate": 0.00024078690127077223, "loss": 0.8415, "step": 2929 }, { "epoch": 0.45867251095804634, "grad_norm": 2.0268774032592773, "learning_rate": 0.00024076246334310848, "loss": 0.7196, "step": 2930 }, { "epoch": 0.45882905447714467, "grad_norm": 1.9509254693984985, "learning_rate": 0.00024073802541544476, "loss": 0.7176, "step": 2931 }, { "epoch": 0.45898559799624294, "grad_norm": 1.5287137031555176, "learning_rate": 0.00024071358748778104, "loss": 0.9125, "step": 2932 }, { "epoch": 0.4591421415153413, "grad_norm": 2.1340198516845703, "learning_rate": 0.00024068914956011726, "loss": 0.7953, "step": 2933 }, { "epoch": 0.45929868503443955, "grad_norm": 1.94748055934906, "learning_rate": 0.00024066471163245354, "loss": 0.9377, "step": 2934 }, { "epoch": 0.4594552285535379, "grad_norm": 1.8639601469039917, "learning_rate": 0.00024064027370478982, "loss": 0.9098, "step": 2935 }, { "epoch": 0.4596117720726362, "grad_norm": 1.7481262683868408, "learning_rate": 0.00024061583577712607, "loss": 1.1671, "step": 2936 }, { "epoch": 0.4597683155917345, "grad_norm": 2.4911282062530518, "learning_rate": 0.00024059139784946235, "loss": 1.5963, "step": 2937 }, { "epoch": 0.4599248591108328, "grad_norm": 3.1671178340911865, "learning_rate": 0.00024056695992179862, "loss": 0.8903, "step": 2938 }, { "epoch": 0.46008140262993114, "grad_norm": 2.2914581298828125, "learning_rate": 0.00024054252199413488, "loss": 1.2117, "step": 2939 }, { "epoch": 0.4602379461490294, "grad_norm": 2.8251593112945557, "learning_rate": 0.00024051808406647113, "loss": 1.4033, "step": 2940 }, { "epoch": 0.46039448966812774, "grad_norm": 2.4043309688568115, "learning_rate": 0.0002404936461388074, "loss": 0.9579, "step": 2941 }, { "epoch": 0.46055103318722607, "grad_norm": 2.894033908843994, "learning_rate": 0.00024046920821114366, "loss": 0.833, "step": 2942 }, { "epoch": 0.46070757670632434, "grad_norm": 2.439354658126831, "learning_rate": 0.00024044477028347994, "loss": 1.7688, "step": 2943 }, { "epoch": 0.46086412022542267, "grad_norm": 3.430408000946045, "learning_rate": 0.00024042033235581621, "loss": 1.5622, "step": 2944 }, { "epoch": 0.461020663744521, "grad_norm": 1.8975231647491455, "learning_rate": 0.00024039589442815246, "loss": 1.3692, "step": 2945 }, { "epoch": 0.46117720726361927, "grad_norm": 1.8430036306381226, "learning_rate": 0.00024037145650048874, "loss": 1.1524, "step": 2946 }, { "epoch": 0.4613337507827176, "grad_norm": 3.267629861831665, "learning_rate": 0.00024034701857282502, "loss": 0.8915, "step": 2947 }, { "epoch": 0.46149029430181593, "grad_norm": 2.6302919387817383, "learning_rate": 0.00024032258064516125, "loss": 1.2047, "step": 2948 }, { "epoch": 0.4616468378209142, "grad_norm": 2.9128334522247314, "learning_rate": 0.00024029814271749752, "loss": 1.3952, "step": 2949 }, { "epoch": 0.46180338134001253, "grad_norm": 1.4925674200057983, "learning_rate": 0.0002402737047898338, "loss": 1.4002, "step": 2950 }, { "epoch": 0.46195992485911086, "grad_norm": 0.5663087964057922, "learning_rate": 0.00024024926686217005, "loss": 0.3536, "step": 2951 }, { "epoch": 0.46211646837820913, "grad_norm": 0.5711171627044678, "learning_rate": 0.00024022482893450633, "loss": 0.4303, "step": 2952 }, { "epoch": 0.46227301189730746, "grad_norm": 0.7608370184898376, "learning_rate": 0.0002402003910068426, "loss": 0.4472, "step": 2953 }, { "epoch": 0.46242955541640574, "grad_norm": 0.5618414282798767, "learning_rate": 0.00024017595307917886, "loss": 0.4728, "step": 2954 }, { "epoch": 0.46258609893550406, "grad_norm": 0.5996711254119873, "learning_rate": 0.00024015151515151514, "loss": 0.4194, "step": 2955 }, { "epoch": 0.4627426424546024, "grad_norm": 0.6966267228126526, "learning_rate": 0.00024012707722385142, "loss": 0.4361, "step": 2956 }, { "epoch": 0.46289918597370067, "grad_norm": 0.6041519045829773, "learning_rate": 0.00024010263929618764, "loss": 0.3586, "step": 2957 }, { "epoch": 0.463055729492799, "grad_norm": 0.6143047213554382, "learning_rate": 0.00024007820136852392, "loss": 0.3958, "step": 2958 }, { "epoch": 0.4632122730118973, "grad_norm": 0.727200448513031, "learning_rate": 0.0002400537634408602, "loss": 0.4222, "step": 2959 }, { "epoch": 0.4633688165309956, "grad_norm": 0.7016879320144653, "learning_rate": 0.00024002932551319645, "loss": 0.3269, "step": 2960 }, { "epoch": 0.4635253600500939, "grad_norm": 1.3324402570724487, "learning_rate": 0.00024000488758553273, "loss": 0.4065, "step": 2961 }, { "epoch": 0.46368190356919226, "grad_norm": 0.8815485835075378, "learning_rate": 0.000239980449657869, "loss": 0.4625, "step": 2962 }, { "epoch": 0.46383844708829053, "grad_norm": 1.0616453886032104, "learning_rate": 0.00023995601173020523, "loss": 0.5971, "step": 2963 }, { "epoch": 0.46399499060738886, "grad_norm": 0.8224049806594849, "learning_rate": 0.0002399315738025415, "loss": 0.4368, "step": 2964 }, { "epoch": 0.4641515341264872, "grad_norm": 1.420790195465088, "learning_rate": 0.0002399071358748778, "loss": 0.5855, "step": 2965 }, { "epoch": 0.46430807764558546, "grad_norm": 1.2988914251327515, "learning_rate": 0.00023988269794721404, "loss": 0.6298, "step": 2966 }, { "epoch": 0.4644646211646838, "grad_norm": 2.2684569358825684, "learning_rate": 0.00023985826001955032, "loss": 0.6326, "step": 2967 }, { "epoch": 0.4646211646837821, "grad_norm": 2.2783150672912598, "learning_rate": 0.0002398338220918866, "loss": 0.7841, "step": 2968 }, { "epoch": 0.4647777082028804, "grad_norm": 1.206459879875183, "learning_rate": 0.00023980938416422285, "loss": 0.7821, "step": 2969 }, { "epoch": 0.4649342517219787, "grad_norm": 3.7730114459991455, "learning_rate": 0.00023978494623655913, "loss": 1.0646, "step": 2970 }, { "epoch": 0.465090795241077, "grad_norm": 1.3835830688476562, "learning_rate": 0.0002397605083088954, "loss": 0.452, "step": 2971 }, { "epoch": 0.4652473387601753, "grad_norm": 2.1477081775665283, "learning_rate": 0.00023973607038123163, "loss": 0.9363, "step": 2972 }, { "epoch": 0.46540388227927365, "grad_norm": 1.1559685468673706, "learning_rate": 0.0002397116324535679, "loss": 0.5786, "step": 2973 }, { "epoch": 0.4655604257983719, "grad_norm": 2.111488103866577, "learning_rate": 0.00023968719452590418, "loss": 0.5976, "step": 2974 }, { "epoch": 0.46571696931747025, "grad_norm": 1.2530162334442139, "learning_rate": 0.00023966275659824044, "loss": 0.5426, "step": 2975 }, { "epoch": 0.4658735128365686, "grad_norm": 4.318511962890625, "learning_rate": 0.00023963831867057671, "loss": 1.0584, "step": 2976 }, { "epoch": 0.46603005635566686, "grad_norm": 1.5593619346618652, "learning_rate": 0.000239613880742913, "loss": 0.5268, "step": 2977 }, { "epoch": 0.4661865998747652, "grad_norm": 1.6333059072494507, "learning_rate": 0.00023958944281524924, "loss": 0.9127, "step": 2978 }, { "epoch": 0.4663431433938635, "grad_norm": 1.942104697227478, "learning_rate": 0.00023956500488758552, "loss": 1.2134, "step": 2979 }, { "epoch": 0.4664996869129618, "grad_norm": 3.2156219482421875, "learning_rate": 0.00023954056695992177, "loss": 1.0458, "step": 2980 }, { "epoch": 0.4666562304320601, "grad_norm": 1.9920734167099, "learning_rate": 0.00023951612903225802, "loss": 0.7571, "step": 2981 }, { "epoch": 0.46681277395115844, "grad_norm": 4.246744632720947, "learning_rate": 0.0002394916911045943, "loss": 0.7142, "step": 2982 }, { "epoch": 0.4669693174702567, "grad_norm": 1.2510920763015747, "learning_rate": 0.00023946725317693058, "loss": 0.8802, "step": 2983 }, { "epoch": 0.46712586098935505, "grad_norm": 1.446738839149475, "learning_rate": 0.00023944281524926683, "loss": 0.9898, "step": 2984 }, { "epoch": 0.4672824045084534, "grad_norm": 2.1608774662017822, "learning_rate": 0.0002394183773216031, "loss": 1.2076, "step": 2985 }, { "epoch": 0.46743894802755165, "grad_norm": 2.4812753200531006, "learning_rate": 0.0002393939393939394, "loss": 0.8113, "step": 2986 }, { "epoch": 0.46759549154665, "grad_norm": 2.0435714721679688, "learning_rate": 0.0002393695014662756, "loss": 1.0752, "step": 2987 }, { "epoch": 0.46775203506574825, "grad_norm": 3.043316602706909, "learning_rate": 0.0002393450635386119, "loss": 1.6354, "step": 2988 }, { "epoch": 0.4679085785848466, "grad_norm": 3.5616087913513184, "learning_rate": 0.00023932062561094817, "loss": 1.0435, "step": 2989 }, { "epoch": 0.4680651221039449, "grad_norm": 3.942680835723877, "learning_rate": 0.00023929618768328442, "loss": 1.6489, "step": 2990 }, { "epoch": 0.4682216656230432, "grad_norm": 1.8026114702224731, "learning_rate": 0.0002392717497556207, "loss": 1.0482, "step": 2991 }, { "epoch": 0.4683782091421415, "grad_norm": 3.0630009174346924, "learning_rate": 0.00023924731182795698, "loss": 1.5673, "step": 2992 }, { "epoch": 0.46853475266123984, "grad_norm": 2.128530740737915, "learning_rate": 0.00023922287390029323, "loss": 1.3448, "step": 2993 }, { "epoch": 0.4686912961803381, "grad_norm": 2.782782554626465, "learning_rate": 0.0002391984359726295, "loss": 1.4261, "step": 2994 }, { "epoch": 0.46884783969943644, "grad_norm": 2.1580779552459717, "learning_rate": 0.00023917399804496579, "loss": 1.6046, "step": 2995 }, { "epoch": 0.46900438321853477, "grad_norm": 1.6903536319732666, "learning_rate": 0.000239149560117302, "loss": 0.9642, "step": 2996 }, { "epoch": 0.46916092673763304, "grad_norm": 3.0075113773345947, "learning_rate": 0.0002391251221896383, "loss": 0.7686, "step": 2997 }, { "epoch": 0.4693174702567314, "grad_norm": 1.6930934190750122, "learning_rate": 0.00023910068426197457, "loss": 1.2032, "step": 2998 }, { "epoch": 0.4694740137758297, "grad_norm": 2.7833595275878906, "learning_rate": 0.00023907624633431082, "loss": 1.0076, "step": 2999 }, { "epoch": 0.469630557294928, "grad_norm": 1.994335651397705, "learning_rate": 0.0002390518084066471, "loss": 0.9728, "step": 3000 }, { "epoch": 0.469630557294928, "eval_loss": 0.6461995244026184, "eval_runtime": 205.814, "eval_samples_per_second": 60.166, "eval_steps_per_second": 3.761, "eval_wer": 0.4029793654504112, "step": 3000 }, { "epoch": 0.4697871008140263, "grad_norm": 0.5218053460121155, "learning_rate": 0.00023902737047898337, "loss": 0.2968, "step": 3001 }, { "epoch": 0.46994364433312463, "grad_norm": 0.8626418113708496, "learning_rate": 0.00023900293255131963, "loss": 0.363, "step": 3002 }, { "epoch": 0.4701001878522229, "grad_norm": 0.6451046466827393, "learning_rate": 0.0002389784946236559, "loss": 0.3505, "step": 3003 }, { "epoch": 0.47025673137132123, "grad_norm": 1.0618706941604614, "learning_rate": 0.00023895405669599216, "loss": 0.3542, "step": 3004 }, { "epoch": 0.47041327489041956, "grad_norm": 0.6961786150932312, "learning_rate": 0.0002389296187683284, "loss": 0.4272, "step": 3005 }, { "epoch": 0.47056981840951784, "grad_norm": 0.6767577528953552, "learning_rate": 0.00023890518084066469, "loss": 0.3825, "step": 3006 }, { "epoch": 0.47072636192861617, "grad_norm": 0.8151198029518127, "learning_rate": 0.00023888074291300096, "loss": 0.3995, "step": 3007 }, { "epoch": 0.47088290544771444, "grad_norm": 1.8509833812713623, "learning_rate": 0.00023885630498533722, "loss": 0.5605, "step": 3008 }, { "epoch": 0.47103944896681277, "grad_norm": 0.6328085660934448, "learning_rate": 0.0002388318670576735, "loss": 0.4549, "step": 3009 }, { "epoch": 0.4711959924859111, "grad_norm": 1.1745184659957886, "learning_rate": 0.00023880742913000977, "loss": 0.5468, "step": 3010 }, { "epoch": 0.47135253600500937, "grad_norm": 1.2673914432525635, "learning_rate": 0.000238782991202346, "loss": 0.6493, "step": 3011 }, { "epoch": 0.4715090795241077, "grad_norm": 0.6951119899749756, "learning_rate": 0.00023875855327468227, "loss": 0.3245, "step": 3012 }, { "epoch": 0.47166562304320603, "grad_norm": 1.6163688898086548, "learning_rate": 0.00023873411534701855, "loss": 0.4878, "step": 3013 }, { "epoch": 0.4718221665623043, "grad_norm": 1.3467936515808105, "learning_rate": 0.0002387096774193548, "loss": 0.4346, "step": 3014 }, { "epoch": 0.47197871008140263, "grad_norm": 0.9435334205627441, "learning_rate": 0.00023868523949169108, "loss": 0.6272, "step": 3015 }, { "epoch": 0.47213525360050096, "grad_norm": 1.2836298942565918, "learning_rate": 0.00023866080156402736, "loss": 0.6137, "step": 3016 }, { "epoch": 0.47229179711959923, "grad_norm": 1.1199934482574463, "learning_rate": 0.0002386363636363636, "loss": 0.5952, "step": 3017 }, { "epoch": 0.47244834063869756, "grad_norm": 1.2082144021987915, "learning_rate": 0.0002386119257086999, "loss": 0.6759, "step": 3018 }, { "epoch": 0.4726048841577959, "grad_norm": 0.8209431171417236, "learning_rate": 0.00023858748778103617, "loss": 0.4364, "step": 3019 }, { "epoch": 0.47276142767689416, "grad_norm": 1.712845802307129, "learning_rate": 0.0002385630498533724, "loss": 0.7077, "step": 3020 }, { "epoch": 0.4729179711959925, "grad_norm": 2.234483242034912, "learning_rate": 0.00023853861192570867, "loss": 0.7401, "step": 3021 }, { "epoch": 0.4730745147150908, "grad_norm": 2.4135541915893555, "learning_rate": 0.00023851417399804495, "loss": 1.2215, "step": 3022 }, { "epoch": 0.4732310582341891, "grad_norm": 1.4404560327529907, "learning_rate": 0.0002384897360703812, "loss": 0.8349, "step": 3023 }, { "epoch": 0.4733876017532874, "grad_norm": 2.8168258666992188, "learning_rate": 0.00023846529814271748, "loss": 1.2226, "step": 3024 }, { "epoch": 0.4735441452723857, "grad_norm": 2.413266181945801, "learning_rate": 0.00023844086021505376, "loss": 0.7827, "step": 3025 }, { "epoch": 0.473700688791484, "grad_norm": 1.8607373237609863, "learning_rate": 0.00023841642228739, "loss": 0.9485, "step": 3026 }, { "epoch": 0.47385723231058235, "grad_norm": 2.2613914012908936, "learning_rate": 0.0002383919843597263, "loss": 0.6483, "step": 3027 }, { "epoch": 0.47401377582968063, "grad_norm": 1.8476412296295166, "learning_rate": 0.00023836754643206254, "loss": 0.9358, "step": 3028 }, { "epoch": 0.47417031934877896, "grad_norm": 5.6817851066589355, "learning_rate": 0.0002383431085043988, "loss": 1.1602, "step": 3029 }, { "epoch": 0.4743268628678773, "grad_norm": 1.4337266683578491, "learning_rate": 0.00023831867057673507, "loss": 0.5406, "step": 3030 }, { "epoch": 0.47448340638697556, "grad_norm": 1.3688822984695435, "learning_rate": 0.00023829423264907135, "loss": 0.6749, "step": 3031 }, { "epoch": 0.4746399499060739, "grad_norm": 1.8527987003326416, "learning_rate": 0.0002382697947214076, "loss": 0.6256, "step": 3032 }, { "epoch": 0.4747964934251722, "grad_norm": 2.1295547485351562, "learning_rate": 0.00023824535679374388, "loss": 0.8839, "step": 3033 }, { "epoch": 0.4749530369442705, "grad_norm": 7.561639308929443, "learning_rate": 0.00023822091886608015, "loss": 1.5188, "step": 3034 }, { "epoch": 0.4751095804633688, "grad_norm": 2.218928575515747, "learning_rate": 0.00023819648093841638, "loss": 1.0713, "step": 3035 }, { "epoch": 0.47526612398246715, "grad_norm": 1.8199081420898438, "learning_rate": 0.00023817204301075266, "loss": 0.9386, "step": 3036 }, { "epoch": 0.4754226675015654, "grad_norm": 2.755200147628784, "learning_rate": 0.00023814760508308893, "loss": 1.3609, "step": 3037 }, { "epoch": 0.47557921102066375, "grad_norm": 2.402803897857666, "learning_rate": 0.00023812316715542519, "loss": 0.9989, "step": 3038 }, { "epoch": 0.4757357545397621, "grad_norm": 2.486337184906006, "learning_rate": 0.00023809872922776146, "loss": 1.2102, "step": 3039 }, { "epoch": 0.47589229805886035, "grad_norm": 2.5728185176849365, "learning_rate": 0.00023807429130009774, "loss": 1.1949, "step": 3040 }, { "epoch": 0.4760488415779587, "grad_norm": 2.1376590728759766, "learning_rate": 0.000238049853372434, "loss": 1.2828, "step": 3041 }, { "epoch": 0.47620538509705695, "grad_norm": 1.944007158279419, "learning_rate": 0.00023802541544477027, "loss": 2.2113, "step": 3042 }, { "epoch": 0.4763619286161553, "grad_norm": 3.550218343734741, "learning_rate": 0.00023800097751710655, "loss": 0.9221, "step": 3043 }, { "epoch": 0.4765184721352536, "grad_norm": 3.1462228298187256, "learning_rate": 0.00023797653958944277, "loss": 1.6563, "step": 3044 }, { "epoch": 0.4766750156543519, "grad_norm": 4.181352615356445, "learning_rate": 0.00023795210166177905, "loss": 1.7042, "step": 3045 }, { "epoch": 0.4768315591734502, "grad_norm": 2.818664312362671, "learning_rate": 0.00023792766373411533, "loss": 1.0479, "step": 3046 }, { "epoch": 0.47698810269254854, "grad_norm": 1.8017176389694214, "learning_rate": 0.00023790322580645158, "loss": 0.4576, "step": 3047 }, { "epoch": 0.4771446462116468, "grad_norm": 3.6153724193573, "learning_rate": 0.00023787878787878786, "loss": 1.3568, "step": 3048 }, { "epoch": 0.47730118973074515, "grad_norm": 2.630364418029785, "learning_rate": 0.00023785434995112414, "loss": 1.0744, "step": 3049 }, { "epoch": 0.4774577332498435, "grad_norm": 2.6814486980438232, "learning_rate": 0.0002378299120234604, "loss": 1.015, "step": 3050 }, { "epoch": 0.47761427676894175, "grad_norm": 0.47881409525871277, "learning_rate": 0.00023780547409579664, "loss": 0.4242, "step": 3051 }, { "epoch": 0.4777708202880401, "grad_norm": 0.47194862365722656, "learning_rate": 0.00023778103616813292, "loss": 0.3254, "step": 3052 }, { "epoch": 0.4779273638071384, "grad_norm": 1.0471508502960205, "learning_rate": 0.00023775659824046917, "loss": 0.5085, "step": 3053 }, { "epoch": 0.4780839073262367, "grad_norm": 0.921794593334198, "learning_rate": 0.00023773216031280545, "loss": 0.5577, "step": 3054 }, { "epoch": 0.478240450845335, "grad_norm": 0.5158277750015259, "learning_rate": 0.00023770772238514173, "loss": 0.3387, "step": 3055 }, { "epoch": 0.47839699436443334, "grad_norm": 0.7019078135490417, "learning_rate": 0.00023768328445747798, "loss": 0.3237, "step": 3056 }, { "epoch": 0.4785535378835316, "grad_norm": 0.46547719836235046, "learning_rate": 0.00023765884652981426, "loss": 0.2525, "step": 3057 }, { "epoch": 0.47871008140262994, "grad_norm": 1.2258836030960083, "learning_rate": 0.00023763440860215054, "loss": 0.5739, "step": 3058 }, { "epoch": 0.47886662492172827, "grad_norm": 0.8825581073760986, "learning_rate": 0.00023760997067448676, "loss": 0.3721, "step": 3059 }, { "epoch": 0.47902316844082654, "grad_norm": 0.7724276185035706, "learning_rate": 0.00023758553274682304, "loss": 0.3145, "step": 3060 }, { "epoch": 0.47917971195992487, "grad_norm": 6.942146301269531, "learning_rate": 0.00023756109481915932, "loss": 0.716, "step": 3061 }, { "epoch": 0.47933625547902314, "grad_norm": 0.8972859978675842, "learning_rate": 0.00023753665689149557, "loss": 0.4538, "step": 3062 }, { "epoch": 0.47949279899812147, "grad_norm": 1.213957667350769, "learning_rate": 0.00023751221896383185, "loss": 0.6098, "step": 3063 }, { "epoch": 0.4796493425172198, "grad_norm": 1.26592218875885, "learning_rate": 0.00023748778103616813, "loss": 0.5618, "step": 3064 }, { "epoch": 0.4798058860363181, "grad_norm": 1.0946046113967896, "learning_rate": 0.00023746334310850438, "loss": 0.5241, "step": 3065 }, { "epoch": 0.4799624295554164, "grad_norm": 1.3569597005844116, "learning_rate": 0.00023743890518084065, "loss": 0.5417, "step": 3066 }, { "epoch": 0.48011897307451473, "grad_norm": 1.2722363471984863, "learning_rate": 0.00023741446725317693, "loss": 0.7591, "step": 3067 }, { "epoch": 0.480275516593613, "grad_norm": 2.4486443996429443, "learning_rate": 0.00023739002932551316, "loss": 0.7028, "step": 3068 }, { "epoch": 0.48043206011271133, "grad_norm": 1.7454280853271484, "learning_rate": 0.00023736559139784944, "loss": 0.7086, "step": 3069 }, { "epoch": 0.48058860363180966, "grad_norm": 1.4875236749649048, "learning_rate": 0.00023734115347018571, "loss": 0.7306, "step": 3070 }, { "epoch": 0.48074514715090794, "grad_norm": 1.795547366142273, "learning_rate": 0.00023731671554252197, "loss": 0.6128, "step": 3071 }, { "epoch": 0.48090169067000627, "grad_norm": 1.589440107345581, "learning_rate": 0.00023729227761485824, "loss": 0.5872, "step": 3072 }, { "epoch": 0.4810582341891046, "grad_norm": 2.098665714263916, "learning_rate": 0.00023726783968719452, "loss": 0.7743, "step": 3073 }, { "epoch": 0.48121477770820287, "grad_norm": 1.5738600492477417, "learning_rate": 0.00023724340175953077, "loss": 0.8935, "step": 3074 }, { "epoch": 0.4813713212273012, "grad_norm": 1.6582045555114746, "learning_rate": 0.00023721896383186702, "loss": 0.9405, "step": 3075 }, { "epoch": 0.4815278647463995, "grad_norm": 1.581145167350769, "learning_rate": 0.0002371945259042033, "loss": 0.7447, "step": 3076 }, { "epoch": 0.4816844082654978, "grad_norm": 1.4934779405593872, "learning_rate": 0.00023717008797653955, "loss": 0.5348, "step": 3077 }, { "epoch": 0.4818409517845961, "grad_norm": 1.6471208333969116, "learning_rate": 0.00023714565004887583, "loss": 0.9434, "step": 3078 }, { "epoch": 0.4819974953036944, "grad_norm": 2.7180702686309814, "learning_rate": 0.0002371212121212121, "loss": 0.8578, "step": 3079 }, { "epoch": 0.48215403882279273, "grad_norm": 1.2886486053466797, "learning_rate": 0.00023709677419354836, "loss": 0.7375, "step": 3080 }, { "epoch": 0.48231058234189106, "grad_norm": 2.2205405235290527, "learning_rate": 0.00023707233626588464, "loss": 1.0982, "step": 3081 }, { "epoch": 0.48246712586098933, "grad_norm": 3.079237461090088, "learning_rate": 0.00023704789833822092, "loss": 0.9257, "step": 3082 }, { "epoch": 0.48262366938008766, "grad_norm": 2.3623974323272705, "learning_rate": 0.00023702346041055714, "loss": 1.1405, "step": 3083 }, { "epoch": 0.482780212899186, "grad_norm": 1.2514792680740356, "learning_rate": 0.00023699902248289342, "loss": 0.6099, "step": 3084 }, { "epoch": 0.48293675641828426, "grad_norm": 1.4864928722381592, "learning_rate": 0.0002369745845552297, "loss": 0.82, "step": 3085 }, { "epoch": 0.4830932999373826, "grad_norm": 1.9229499101638794, "learning_rate": 0.00023695014662756595, "loss": 1.0889, "step": 3086 }, { "epoch": 0.4832498434564809, "grad_norm": 2.564469575881958, "learning_rate": 0.00023692570869990223, "loss": 0.9904, "step": 3087 }, { "epoch": 0.4834063869755792, "grad_norm": 3.783491611480713, "learning_rate": 0.0002369012707722385, "loss": 1.594, "step": 3088 }, { "epoch": 0.4835629304946775, "grad_norm": 2.4869840145111084, "learning_rate": 0.00023687683284457476, "loss": 0.8637, "step": 3089 }, { "epoch": 0.48371947401377585, "grad_norm": 2.5603604316711426, "learning_rate": 0.00023685239491691104, "loss": 1.1697, "step": 3090 }, { "epoch": 0.4838760175328741, "grad_norm": 3.291196823120117, "learning_rate": 0.00023682795698924732, "loss": 2.2811, "step": 3091 }, { "epoch": 0.48403256105197245, "grad_norm": 1.8274985551834106, "learning_rate": 0.00023680351906158354, "loss": 1.0696, "step": 3092 }, { "epoch": 0.4841891045710708, "grad_norm": 1.265817403793335, "learning_rate": 0.00023677908113391982, "loss": 0.9953, "step": 3093 }, { "epoch": 0.48434564809016906, "grad_norm": NaN, "learning_rate": 0.00023677908113391982, "loss": 0.0, "step": 3094 }, { "epoch": 0.4845021916092674, "grad_norm": 3.3963782787323, "learning_rate": 0.0002367546432062561, "loss": 1.2792, "step": 3095 }, { "epoch": 0.48465873512836566, "grad_norm": 1.3875449895858765, "learning_rate": 0.00023673020527859235, "loss": 0.7411, "step": 3096 }, { "epoch": 0.484815278647464, "grad_norm": 3.187683582305908, "learning_rate": 0.00023670576735092863, "loss": 0.9312, "step": 3097 }, { "epoch": 0.4849718221665623, "grad_norm": 3.2056405544281006, "learning_rate": 0.0002366813294232649, "loss": 1.432, "step": 3098 }, { "epoch": 0.4851283656856606, "grad_norm": 4.896152019500732, "learning_rate": 0.00023665689149560116, "loss": 0.9695, "step": 3099 }, { "epoch": 0.4852849092047589, "grad_norm": 2.358058452606201, "learning_rate": 0.0002366324535679374, "loss": 1.3928, "step": 3100 }, { "epoch": 0.48544145272385725, "grad_norm": 0.6357040405273438, "learning_rate": 0.00023660801564027369, "loss": 0.3465, "step": 3101 }, { "epoch": 0.4855979962429555, "grad_norm": 0.48957034945487976, "learning_rate": 0.00023658357771260994, "loss": 0.4029, "step": 3102 }, { "epoch": 0.48575453976205385, "grad_norm": 0.7128604054450989, "learning_rate": 0.00023655913978494621, "loss": 0.3123, "step": 3103 }, { "epoch": 0.4859110832811522, "grad_norm": 0.5227282643318176, "learning_rate": 0.0002365347018572825, "loss": 0.3385, "step": 3104 }, { "epoch": 0.48606762680025045, "grad_norm": 0.5651166439056396, "learning_rate": 0.00023651026392961874, "loss": 0.3359, "step": 3105 }, { "epoch": 0.4862241703193488, "grad_norm": 0.8126301169395447, "learning_rate": 0.00023648582600195502, "loss": 0.4309, "step": 3106 }, { "epoch": 0.4863807138384471, "grad_norm": 1.4748018980026245, "learning_rate": 0.0002364613880742913, "loss": 0.4009, "step": 3107 }, { "epoch": 0.4865372573575454, "grad_norm": 1.2960294485092163, "learning_rate": 0.00023643695014662753, "loss": 0.4647, "step": 3108 }, { "epoch": 0.4866938008766437, "grad_norm": 1.0774697065353394, "learning_rate": 0.0002364125122189638, "loss": 0.4079, "step": 3109 }, { "epoch": 0.48685034439574204, "grad_norm": 1.326656460762024, "learning_rate": 0.00023638807429130008, "loss": 0.3332, "step": 3110 }, { "epoch": 0.4870068879148403, "grad_norm": 0.907184362411499, "learning_rate": 0.00023636363636363633, "loss": 0.4756, "step": 3111 }, { "epoch": 0.48716343143393864, "grad_norm": 0.7261903882026672, "learning_rate": 0.0002363391984359726, "loss": 0.3881, "step": 3112 }, { "epoch": 0.48731997495303697, "grad_norm": 1.3236558437347412, "learning_rate": 0.0002363147605083089, "loss": 0.4808, "step": 3113 }, { "epoch": 0.48747651847213525, "grad_norm": 1.082720160484314, "learning_rate": 0.00023629032258064514, "loss": 0.4554, "step": 3114 }, { "epoch": 0.4876330619912336, "grad_norm": 1.2888387441635132, "learning_rate": 0.00023626588465298142, "loss": 0.7315, "step": 3115 }, { "epoch": 0.48778960551033185, "grad_norm": 1.2054266929626465, "learning_rate": 0.0002362414467253177, "loss": 0.8247, "step": 3116 }, { "epoch": 0.4879461490294302, "grad_norm": 0.8966947197914124, "learning_rate": 0.00023621700879765392, "loss": 0.4858, "step": 3117 }, { "epoch": 0.4881026925485285, "grad_norm": 2.668046712875366, "learning_rate": 0.0002361925708699902, "loss": 0.7636, "step": 3118 }, { "epoch": 0.4882592360676268, "grad_norm": 2.526010513305664, "learning_rate": 0.00023616813294232648, "loss": 0.7442, "step": 3119 }, { "epoch": 0.4884157795867251, "grad_norm": 1.6158323287963867, "learning_rate": 0.00023614369501466273, "loss": 0.6907, "step": 3120 }, { "epoch": 0.48857232310582344, "grad_norm": 1.133370041847229, "learning_rate": 0.000236119257086999, "loss": 0.773, "step": 3121 }, { "epoch": 0.4887288666249217, "grad_norm": 1.4854271411895752, "learning_rate": 0.0002360948191593353, "loss": 0.9368, "step": 3122 }, { "epoch": 0.48888541014402004, "grad_norm": 2.469548463821411, "learning_rate": 0.0002360703812316715, "loss": 0.9644, "step": 3123 }, { "epoch": 0.48904195366311837, "grad_norm": 1.3509788513183594, "learning_rate": 0.0002360459433040078, "loss": 0.8894, "step": 3124 }, { "epoch": 0.48919849718221664, "grad_norm": 1.1007205247879028, "learning_rate": 0.00023602150537634407, "loss": 0.6317, "step": 3125 }, { "epoch": 0.48935504070131497, "grad_norm": 1.8395272493362427, "learning_rate": 0.00023599706744868032, "loss": 0.8419, "step": 3126 }, { "epoch": 0.4895115842204133, "grad_norm": 2.4748945236206055, "learning_rate": 0.0002359726295210166, "loss": 1.069, "step": 3127 }, { "epoch": 0.48966812773951157, "grad_norm": 2.535205602645874, "learning_rate": 0.00023594819159335288, "loss": 0.9937, "step": 3128 }, { "epoch": 0.4898246712586099, "grad_norm": 1.4972389936447144, "learning_rate": 0.00023592375366568913, "loss": 0.6853, "step": 3129 }, { "epoch": 0.48998121477770823, "grad_norm": 1.4133657217025757, "learning_rate": 0.0002358993157380254, "loss": 0.6998, "step": 3130 }, { "epoch": 0.4901377582968065, "grad_norm": 2.480767250061035, "learning_rate": 0.00023587487781036168, "loss": 1.6381, "step": 3131 }, { "epoch": 0.49029430181590483, "grad_norm": 2.512920618057251, "learning_rate": 0.0002358504398826979, "loss": 0.7757, "step": 3132 }, { "epoch": 0.4904508453350031, "grad_norm": 2.0442888736724854, "learning_rate": 0.00023582600195503419, "loss": 1.1028, "step": 3133 }, { "epoch": 0.49060738885410143, "grad_norm": 2.166085720062256, "learning_rate": 0.00023580156402737046, "loss": 1.0315, "step": 3134 }, { "epoch": 0.49076393237319976, "grad_norm": 2.705939292907715, "learning_rate": 0.00023577712609970672, "loss": 0.9559, "step": 3135 }, { "epoch": 0.49092047589229804, "grad_norm": 1.7874341011047363, "learning_rate": 0.000235752688172043, "loss": 0.719, "step": 3136 }, { "epoch": 0.49107701941139636, "grad_norm": 2.1287143230438232, "learning_rate": 0.00023572825024437927, "loss": 0.8894, "step": 3137 }, { "epoch": 0.4912335629304947, "grad_norm": 2.1680641174316406, "learning_rate": 0.00023570381231671552, "loss": 1.2279, "step": 3138 }, { "epoch": 0.49139010644959297, "grad_norm": 3.1743721961975098, "learning_rate": 0.0002356793743890518, "loss": 1.1262, "step": 3139 }, { "epoch": 0.4915466499686913, "grad_norm": 2.6501054763793945, "learning_rate": 0.00023565493646138805, "loss": 1.4381, "step": 3140 }, { "epoch": 0.4917031934877896, "grad_norm": 2.002523422241211, "learning_rate": 0.0002356304985337243, "loss": 0.9156, "step": 3141 }, { "epoch": 0.4918597370068879, "grad_norm": 3.3505101203918457, "learning_rate": 0.00023560606060606058, "loss": 1.7951, "step": 3142 }, { "epoch": 0.4920162805259862, "grad_norm": 2.2706339359283447, "learning_rate": 0.00023558162267839686, "loss": 0.957, "step": 3143 }, { "epoch": 0.49217282404508456, "grad_norm": 1.9583147764205933, "learning_rate": 0.0002355571847507331, "loss": 1.1053, "step": 3144 }, { "epoch": 0.49232936756418283, "grad_norm": 1.6880264282226562, "learning_rate": 0.0002355327468230694, "loss": 1.3319, "step": 3145 }, { "epoch": 0.49248591108328116, "grad_norm": 2.1848926544189453, "learning_rate": 0.00023550830889540567, "loss": 1.1314, "step": 3146 }, { "epoch": 0.4926424546023795, "grad_norm": 2.028449773788452, "learning_rate": 0.0002354838709677419, "loss": 1.2468, "step": 3147 }, { "epoch": 0.49279899812147776, "grad_norm": 1.6452484130859375, "learning_rate": 0.00023545943304007817, "loss": 0.7077, "step": 3148 }, { "epoch": 0.4929555416405761, "grad_norm": 1.4108058214187622, "learning_rate": 0.00023543499511241445, "loss": 0.7043, "step": 3149 }, { "epoch": 0.49311208515967436, "grad_norm": 3.064290761947632, "learning_rate": 0.0002354105571847507, "loss": 0.8504, "step": 3150 }, { "epoch": 0.4932686286787727, "grad_norm": 0.6606544256210327, "learning_rate": 0.00023538611925708698, "loss": 0.3642, "step": 3151 }, { "epoch": 0.493425172197871, "grad_norm": 0.7985259890556335, "learning_rate": 0.00023536168132942326, "loss": 0.3771, "step": 3152 }, { "epoch": 0.4935817157169693, "grad_norm": 0.8636269569396973, "learning_rate": 0.0002353372434017595, "loss": 0.2973, "step": 3153 }, { "epoch": 0.4937382592360676, "grad_norm": 0.8866095542907715, "learning_rate": 0.0002353128054740958, "loss": 0.3708, "step": 3154 }, { "epoch": 0.49389480275516595, "grad_norm": 0.6658779978752136, "learning_rate": 0.00023528836754643207, "loss": 0.3621, "step": 3155 }, { "epoch": 0.4940513462742642, "grad_norm": 0.5546128749847412, "learning_rate": 0.0002352639296187683, "loss": 0.2645, "step": 3156 }, { "epoch": 0.49420788979336255, "grad_norm": 0.9286956191062927, "learning_rate": 0.00023523949169110457, "loss": 0.4566, "step": 3157 }, { "epoch": 0.4943644333124609, "grad_norm": 1.118327021598816, "learning_rate": 0.00023521505376344085, "loss": 0.3843, "step": 3158 }, { "epoch": 0.49452097683155916, "grad_norm": 1.691359281539917, "learning_rate": 0.0002351906158357771, "loss": 0.306, "step": 3159 }, { "epoch": 0.4946775203506575, "grad_norm": 1.1028999090194702, "learning_rate": 0.00023516617790811338, "loss": 0.5754, "step": 3160 }, { "epoch": 0.4948340638697558, "grad_norm": 0.7714839577674866, "learning_rate": 0.00023514173998044965, "loss": 0.4509, "step": 3161 }, { "epoch": 0.4949906073888541, "grad_norm": 0.7560392022132874, "learning_rate": 0.0002351173020527859, "loss": 0.3924, "step": 3162 }, { "epoch": 0.4951471509079524, "grad_norm": 0.8828386664390564, "learning_rate": 0.00023509286412512218, "loss": 0.6857, "step": 3163 }, { "epoch": 0.49530369442705074, "grad_norm": 2.5070831775665283, "learning_rate": 0.00023506842619745844, "loss": 0.671, "step": 3164 }, { "epoch": 0.495460237946149, "grad_norm": 1.2520339488983154, "learning_rate": 0.0002350439882697947, "loss": 0.5964, "step": 3165 }, { "epoch": 0.49561678146524735, "grad_norm": 0.9470450282096863, "learning_rate": 0.00023501955034213096, "loss": 0.5922, "step": 3166 }, { "epoch": 0.4957733249843457, "grad_norm": 0.9970950484275818, "learning_rate": 0.00023499511241446724, "loss": 0.6786, "step": 3167 }, { "epoch": 0.49592986850344395, "grad_norm": 1.0567865371704102, "learning_rate": 0.0002349706744868035, "loss": 0.6266, "step": 3168 }, { "epoch": 0.4960864120225423, "grad_norm": 0.6511049866676331, "learning_rate": 0.00023494623655913977, "loss": 0.3637, "step": 3169 }, { "epoch": 0.49624295554164055, "grad_norm": 1.0755113363265991, "learning_rate": 0.00023492179863147605, "loss": 0.538, "step": 3170 }, { "epoch": 0.4963994990607389, "grad_norm": 1.8359781503677368, "learning_rate": 0.00023489736070381228, "loss": 0.8472, "step": 3171 }, { "epoch": 0.4965560425798372, "grad_norm": 0.943196713924408, "learning_rate": 0.00023487292277614855, "loss": 0.5028, "step": 3172 }, { "epoch": 0.4967125860989355, "grad_norm": 2.1813058853149414, "learning_rate": 0.00023484848484848483, "loss": 0.6239, "step": 3173 }, { "epoch": 0.4968691296180338, "grad_norm": 1.8853085041046143, "learning_rate": 0.00023482404692082108, "loss": 0.544, "step": 3174 }, { "epoch": 0.49702567313713214, "grad_norm": 1.5820690393447876, "learning_rate": 0.00023479960899315736, "loss": 0.7816, "step": 3175 }, { "epoch": 0.4971822166562304, "grad_norm": 1.5415321588516235, "learning_rate": 0.00023477517106549364, "loss": 0.5894, "step": 3176 }, { "epoch": 0.49733876017532874, "grad_norm": 1.8562960624694824, "learning_rate": 0.0002347507331378299, "loss": 0.8592, "step": 3177 }, { "epoch": 0.49749530369442707, "grad_norm": 2.251617908477783, "learning_rate": 0.00023472629521016617, "loss": 1.0993, "step": 3178 }, { "epoch": 0.49765184721352534, "grad_norm": 3.1134870052337646, "learning_rate": 0.00023470185728250245, "loss": 0.9617, "step": 3179 }, { "epoch": 0.4978083907326237, "grad_norm": 2.8295364379882812, "learning_rate": 0.00023467741935483867, "loss": 0.7751, "step": 3180 }, { "epoch": 0.497964934251722, "grad_norm": 2.4342901706695557, "learning_rate": 0.00023465298142717495, "loss": 0.638, "step": 3181 }, { "epoch": 0.4981214777708203, "grad_norm": 2.19533371925354, "learning_rate": 0.00023462854349951123, "loss": 1.0993, "step": 3182 }, { "epoch": 0.4982780212899186, "grad_norm": 2.2838051319122314, "learning_rate": 0.00023460410557184748, "loss": 1.1031, "step": 3183 }, { "epoch": 0.49843456480901693, "grad_norm": 2.4727444648742676, "learning_rate": 0.00023457966764418376, "loss": 1.0324, "step": 3184 }, { "epoch": 0.4985911083281152, "grad_norm": 2.0869252681732178, "learning_rate": 0.00023455522971652004, "loss": 1.0483, "step": 3185 }, { "epoch": 0.49874765184721354, "grad_norm": 2.266732931137085, "learning_rate": 0.0002345307917888563, "loss": 0.8777, "step": 3186 }, { "epoch": 0.4989041953663118, "grad_norm": 2.169137954711914, "learning_rate": 0.00023450635386119257, "loss": 1.1718, "step": 3187 }, { "epoch": 0.49906073888541014, "grad_norm": 2.2153878211975098, "learning_rate": 0.00023448191593352882, "loss": 1.1595, "step": 3188 }, { "epoch": 0.49921728240450847, "grad_norm": 2.4565927982330322, "learning_rate": 0.00023445747800586507, "loss": 1.2587, "step": 3189 }, { "epoch": 0.49937382592360674, "grad_norm": 1.9531141519546509, "learning_rate": 0.00023443304007820135, "loss": 0.9619, "step": 3190 }, { "epoch": 0.49953036944270507, "grad_norm": 3.107656478881836, "learning_rate": 0.00023440860215053763, "loss": 1.2016, "step": 3191 }, { "epoch": 0.4996869129618034, "grad_norm": 2.456040143966675, "learning_rate": 0.00023438416422287388, "loss": 1.7411, "step": 3192 }, { "epoch": 0.49984345648090167, "grad_norm": 2.5410706996917725, "learning_rate": 0.00023435972629521016, "loss": 1.5113, "step": 3193 }, { "epoch": 0.5, "grad_norm": 1.8548656702041626, "learning_rate": 0.00023433528836754643, "loss": 1.2314, "step": 3194 }, { "epoch": 0.5001565435190983, "grad_norm": 2.335625648498535, "learning_rate": 0.00023431085043988266, "loss": 0.9746, "step": 3195 }, { "epoch": 0.5003130870381967, "grad_norm": 1.950925350189209, "learning_rate": 0.00023428641251221894, "loss": 0.6818, "step": 3196 }, { "epoch": 0.5004696305572949, "grad_norm": 2.8675320148468018, "learning_rate": 0.00023426197458455521, "loss": 1.3223, "step": 3197 }, { "epoch": 0.5006261740763932, "grad_norm": 7.516252040863037, "learning_rate": 0.00023423753665689147, "loss": 0.8275, "step": 3198 }, { "epoch": 0.5007827175954915, "grad_norm": 3.5480682849884033, "learning_rate": 0.00023421309872922774, "loss": 0.8734, "step": 3199 }, { "epoch": 0.5009392611145899, "grad_norm": 1.8720403909683228, "learning_rate": 0.00023418866080156402, "loss": 0.9164, "step": 3200 }, { "epoch": 0.5010958046336882, "grad_norm": 0.5714183449745178, "learning_rate": 0.00023416422287390027, "loss": 0.3279, "step": 3201 }, { "epoch": 0.5012523481527865, "grad_norm": 0.5906309485435486, "learning_rate": 0.00023413978494623655, "loss": 0.4663, "step": 3202 }, { "epoch": 0.5014088916718847, "grad_norm": 0.49747294187545776, "learning_rate": 0.00023411534701857283, "loss": 0.3761, "step": 3203 }, { "epoch": 0.5015654351909831, "grad_norm": 0.6094551086425781, "learning_rate": 0.00023409090909090905, "loss": 0.3538, "step": 3204 }, { "epoch": 0.5017219787100814, "grad_norm": 0.549359142780304, "learning_rate": 0.00023406647116324533, "loss": 0.259, "step": 3205 }, { "epoch": 0.5018785222291797, "grad_norm": 0.6701558232307434, "learning_rate": 0.0002340420332355816, "loss": 0.3294, "step": 3206 }, { "epoch": 0.502035065748278, "grad_norm": 0.6899073719978333, "learning_rate": 0.00023401759530791786, "loss": 0.3143, "step": 3207 }, { "epoch": 0.5021916092673764, "grad_norm": 0.5624547004699707, "learning_rate": 0.00023399315738025414, "loss": 0.4073, "step": 3208 }, { "epoch": 0.5023481527864746, "grad_norm": 1.0639121532440186, "learning_rate": 0.00023396871945259042, "loss": 0.5554, "step": 3209 }, { "epoch": 0.5025046963055729, "grad_norm": 1.0245072841644287, "learning_rate": 0.00023394428152492667, "loss": 1.0342, "step": 3210 }, { "epoch": 0.5026612398246713, "grad_norm": 0.8923421502113342, "learning_rate": 0.00023391984359726292, "loss": 0.3688, "step": 3211 }, { "epoch": 0.5028177833437696, "grad_norm": 1.4009363651275635, "learning_rate": 0.0002338954056695992, "loss": 0.6074, "step": 3212 }, { "epoch": 0.5029743268628679, "grad_norm": 1.919011116027832, "learning_rate": 0.00023387096774193545, "loss": 0.5968, "step": 3213 }, { "epoch": 0.5031308703819661, "grad_norm": 1.0158270597457886, "learning_rate": 0.00023384652981427173, "loss": 0.5708, "step": 3214 }, { "epoch": 0.5032874139010645, "grad_norm": 1.5985990762710571, "learning_rate": 0.000233822091886608, "loss": 0.7346, "step": 3215 }, { "epoch": 0.5034439574201628, "grad_norm": 1.1101726293563843, "learning_rate": 0.00023379765395894426, "loss": 0.5624, "step": 3216 }, { "epoch": 0.5036005009392611, "grad_norm": 1.8938688039779663, "learning_rate": 0.00023377321603128054, "loss": 0.4229, "step": 3217 }, { "epoch": 0.5037570444583594, "grad_norm": 0.9973450303077698, "learning_rate": 0.00023374877810361682, "loss": 0.6207, "step": 3218 }, { "epoch": 0.5039135879774578, "grad_norm": 1.006881833076477, "learning_rate": 0.00023372434017595304, "loss": 0.4491, "step": 3219 }, { "epoch": 0.504070131496556, "grad_norm": 1.4391586780548096, "learning_rate": 0.00023369990224828932, "loss": 0.6227, "step": 3220 }, { "epoch": 0.5042266750156543, "grad_norm": 1.5591343641281128, "learning_rate": 0.0002336754643206256, "loss": 0.5966, "step": 3221 }, { "epoch": 0.5043832185347527, "grad_norm": 1.0018322467803955, "learning_rate": 0.00023365102639296185, "loss": 0.5168, "step": 3222 }, { "epoch": 0.504539762053851, "grad_norm": 1.1433907747268677, "learning_rate": 0.00023362658846529813, "loss": 0.6349, "step": 3223 }, { "epoch": 0.5046963055729493, "grad_norm": 1.683487057685852, "learning_rate": 0.0002336021505376344, "loss": 0.9885, "step": 3224 }, { "epoch": 0.5048528490920476, "grad_norm": 1.9962869882583618, "learning_rate": 0.00023357771260997066, "loss": 0.9242, "step": 3225 }, { "epoch": 0.5050093926111459, "grad_norm": 2.4794111251831055, "learning_rate": 0.00023355327468230693, "loss": 1.2199, "step": 3226 }, { "epoch": 0.5051659361302442, "grad_norm": 1.2889044284820557, "learning_rate": 0.0002335288367546432, "loss": 0.4802, "step": 3227 }, { "epoch": 0.5053224796493425, "grad_norm": 1.9017808437347412, "learning_rate": 0.00023350439882697944, "loss": 0.9971, "step": 3228 }, { "epoch": 0.5054790231684408, "grad_norm": 2.0930917263031006, "learning_rate": 0.00023347996089931572, "loss": 0.6098, "step": 3229 }, { "epoch": 0.5056355666875392, "grad_norm": 2.39898943901062, "learning_rate": 0.000233455522971652, "loss": 1.0619, "step": 3230 }, { "epoch": 0.5057921102066374, "grad_norm": 2.192613124847412, "learning_rate": 0.00023343108504398824, "loss": 0.9109, "step": 3231 }, { "epoch": 0.5059486537257357, "grad_norm": 2.8761684894561768, "learning_rate": 0.00023340664711632452, "loss": 0.8734, "step": 3232 }, { "epoch": 0.506105197244834, "grad_norm": 3.7933554649353027, "learning_rate": 0.0002333822091886608, "loss": 1.1158, "step": 3233 }, { "epoch": 0.5062617407639324, "grad_norm": 3.8589084148406982, "learning_rate": 0.00023335777126099705, "loss": 0.9443, "step": 3234 }, { "epoch": 0.5064182842830307, "grad_norm": 2.620635986328125, "learning_rate": 0.0002333333333333333, "loss": 1.1503, "step": 3235 }, { "epoch": 0.506574827802129, "grad_norm": 2.9023118019104004, "learning_rate": 0.00023330889540566958, "loss": 1.1161, "step": 3236 }, { "epoch": 0.5067313713212273, "grad_norm": 2.676131248474121, "learning_rate": 0.00023328445747800583, "loss": 1.3424, "step": 3237 }, { "epoch": 0.5068879148403256, "grad_norm": 5.856595516204834, "learning_rate": 0.0002332600195503421, "loss": 1.2241, "step": 3238 }, { "epoch": 0.5070444583594239, "grad_norm": 2.3947947025299072, "learning_rate": 0.0002332355816226784, "loss": 1.5444, "step": 3239 }, { "epoch": 0.5072010018785222, "grad_norm": 2.4484972953796387, "learning_rate": 0.00023321114369501464, "loss": 0.983, "step": 3240 }, { "epoch": 0.5073575453976206, "grad_norm": 1.774258017539978, "learning_rate": 0.00023318670576735092, "loss": 1.0836, "step": 3241 }, { "epoch": 0.5075140889167189, "grad_norm": 3.7993736267089844, "learning_rate": 0.0002331622678396872, "loss": 1.9356, "step": 3242 }, { "epoch": 0.5076706324358171, "grad_norm": 2.240050792694092, "learning_rate": 0.00023313782991202342, "loss": 1.4901, "step": 3243 }, { "epoch": 0.5078271759549154, "grad_norm": 2.437676429748535, "learning_rate": 0.0002331133919843597, "loss": 1.0782, "step": 3244 }, { "epoch": 0.5079837194740138, "grad_norm": 2.2974166870117188, "learning_rate": 0.00023308895405669598, "loss": 1.5938, "step": 3245 }, { "epoch": 0.5081402629931121, "grad_norm": 1.5529897212982178, "learning_rate": 0.00023306451612903223, "loss": 0.4601, "step": 3246 }, { "epoch": 0.5082968065122104, "grad_norm": 1.3996665477752686, "learning_rate": 0.0002330400782013685, "loss": 0.6735, "step": 3247 }, { "epoch": 0.5084533500313086, "grad_norm": 1.607757329940796, "learning_rate": 0.0002330156402737048, "loss": 0.9503, "step": 3248 }, { "epoch": 0.508609893550407, "grad_norm": 1.984406590461731, "learning_rate": 0.00023299120234604104, "loss": 0.844, "step": 3249 }, { "epoch": 0.5087664370695053, "grad_norm": 2.105788230895996, "learning_rate": 0.00023296676441837732, "loss": 1.3768, "step": 3250 }, { "epoch": 0.5089229805886036, "grad_norm": 0.5248891115188599, "learning_rate": 0.0002329423264907136, "loss": 0.3916, "step": 3251 }, { "epoch": 0.509079524107702, "grad_norm": 0.6104065179824829, "learning_rate": 0.00023291788856304982, "loss": 0.3574, "step": 3252 }, { "epoch": 0.5092360676268003, "grad_norm": 0.3868466913700104, "learning_rate": 0.0002328934506353861, "loss": 0.2812, "step": 3253 }, { "epoch": 0.5093926111458985, "grad_norm": 0.5593306422233582, "learning_rate": 0.00023286901270772238, "loss": 0.3395, "step": 3254 }, { "epoch": 0.5095491546649968, "grad_norm": 0.5168365836143494, "learning_rate": 0.00023284457478005863, "loss": 0.2767, "step": 3255 }, { "epoch": 0.5097056981840952, "grad_norm": 0.5223838686943054, "learning_rate": 0.0002328201368523949, "loss": 0.4546, "step": 3256 }, { "epoch": 0.5098622417031935, "grad_norm": 0.9906821846961975, "learning_rate": 0.00023279569892473118, "loss": 0.4196, "step": 3257 }, { "epoch": 0.5100187852222918, "grad_norm": 1.1748069524765015, "learning_rate": 0.00023277126099706743, "loss": 0.6491, "step": 3258 }, { "epoch": 0.5101753287413902, "grad_norm": 0.8128048777580261, "learning_rate": 0.00023274682306940369, "loss": 0.3803, "step": 3259 }, { "epoch": 0.5103318722604884, "grad_norm": 1.0079333782196045, "learning_rate": 0.00023272238514173996, "loss": 0.4582, "step": 3260 }, { "epoch": 0.5104884157795867, "grad_norm": 1.1329032182693481, "learning_rate": 0.00023269794721407622, "loss": 0.5789, "step": 3261 }, { "epoch": 0.510644959298685, "grad_norm": 0.8500834703445435, "learning_rate": 0.0002326735092864125, "loss": 0.3144, "step": 3262 }, { "epoch": 0.5108015028177834, "grad_norm": 0.8295961022377014, "learning_rate": 0.00023264907135874877, "loss": 0.4592, "step": 3263 }, { "epoch": 0.5109580463368817, "grad_norm": 1.165689468383789, "learning_rate": 0.00023262463343108502, "loss": 0.4923, "step": 3264 }, { "epoch": 0.51111458985598, "grad_norm": 1.2372647523880005, "learning_rate": 0.0002326001955034213, "loss": 0.4771, "step": 3265 }, { "epoch": 0.5112711333750782, "grad_norm": 1.0081822872161865, "learning_rate": 0.00023257575757575758, "loss": 0.4397, "step": 3266 }, { "epoch": 0.5114276768941766, "grad_norm": 1.4410061836242676, "learning_rate": 0.0002325513196480938, "loss": 0.5526, "step": 3267 }, { "epoch": 0.5115842204132749, "grad_norm": 1.1677322387695312, "learning_rate": 0.00023252688172043008, "loss": 0.4355, "step": 3268 }, { "epoch": 0.5117407639323732, "grad_norm": 1.1747709512710571, "learning_rate": 0.00023250244379276636, "loss": 0.5189, "step": 3269 }, { "epoch": 0.5118973074514716, "grad_norm": 1.338614821434021, "learning_rate": 0.0002324780058651026, "loss": 0.7244, "step": 3270 }, { "epoch": 0.5120538509705698, "grad_norm": 1.5333219766616821, "learning_rate": 0.0002324535679374389, "loss": 0.727, "step": 3271 }, { "epoch": 0.5122103944896681, "grad_norm": 1.5182764530181885, "learning_rate": 0.00023242913000977517, "loss": 0.737, "step": 3272 }, { "epoch": 0.5123669380087664, "grad_norm": 1.514541745185852, "learning_rate": 0.00023240469208211142, "loss": 0.672, "step": 3273 }, { "epoch": 0.5125234815278648, "grad_norm": 1.7871018648147583, "learning_rate": 0.0002323802541544477, "loss": 0.8974, "step": 3274 }, { "epoch": 0.5126800250469631, "grad_norm": 3.3677568435668945, "learning_rate": 0.00023235581622678398, "loss": 0.7339, "step": 3275 }, { "epoch": 0.5128365685660614, "grad_norm": 1.6806715726852417, "learning_rate": 0.0002323313782991202, "loss": 0.6502, "step": 3276 }, { "epoch": 0.5129931120851596, "grad_norm": 3.0515427589416504, "learning_rate": 0.00023230694037145648, "loss": 0.7692, "step": 3277 }, { "epoch": 0.513149655604258, "grad_norm": 1.5666495561599731, "learning_rate": 0.00023228250244379276, "loss": 1.0262, "step": 3278 }, { "epoch": 0.5133061991233563, "grad_norm": 2.228578805923462, "learning_rate": 0.000232258064516129, "loss": 0.5212, "step": 3279 }, { "epoch": 0.5134627426424546, "grad_norm": 1.372183084487915, "learning_rate": 0.0002322336265884653, "loss": 0.5467, "step": 3280 }, { "epoch": 0.513619286161553, "grad_norm": 3.1377692222595215, "learning_rate": 0.00023220918866080157, "loss": 1.0146, "step": 3281 }, { "epoch": 0.5137758296806513, "grad_norm": 2.914731979370117, "learning_rate": 0.0002321847507331378, "loss": 0.6966, "step": 3282 }, { "epoch": 0.5139323731997495, "grad_norm": 1.5598711967468262, "learning_rate": 0.00023216031280547407, "loss": 0.9771, "step": 3283 }, { "epoch": 0.5140889167188478, "grad_norm": 2.859375238418579, "learning_rate": 0.00023213587487781035, "loss": 1.0935, "step": 3284 }, { "epoch": 0.5142454602379462, "grad_norm": NaN, "learning_rate": 0.00023213587487781035, "loss": 0.0, "step": 3285 }, { "epoch": 0.5144020037570445, "grad_norm": 3.383396625518799, "learning_rate": 0.0002321114369501466, "loss": 1.3124, "step": 3286 }, { "epoch": 0.5145585472761428, "grad_norm": 4.065854549407959, "learning_rate": 0.00023208699902248288, "loss": 0.9329, "step": 3287 }, { "epoch": 0.514715090795241, "grad_norm": 2.2295284271240234, "learning_rate": 0.00023206256109481915, "loss": 1.4921, "step": 3288 }, { "epoch": 0.5148716343143394, "grad_norm": 1.8836601972579956, "learning_rate": 0.0002320381231671554, "loss": 1.0573, "step": 3289 }, { "epoch": 0.5150281778334377, "grad_norm": 3.414989709854126, "learning_rate": 0.00023201368523949168, "loss": 1.5235, "step": 3290 }, { "epoch": 0.515184721352536, "grad_norm": 2.405909776687622, "learning_rate": 0.00023198924731182796, "loss": 1.2622, "step": 3291 }, { "epoch": 0.5153412648716343, "grad_norm": 2.359362840652466, "learning_rate": 0.0002319648093841642, "loss": 1.4015, "step": 3292 }, { "epoch": 0.5154978083907327, "grad_norm": 1.3080377578735352, "learning_rate": 0.00023194037145650047, "loss": 1.2348, "step": 3293 }, { "epoch": 0.5156543519098309, "grad_norm": 3.4239389896392822, "learning_rate": 0.00023191593352883674, "loss": 1.8937, "step": 3294 }, { "epoch": 0.5158108954289292, "grad_norm": 2.4649817943573, "learning_rate": 0.000231891495601173, "loss": 1.7055, "step": 3295 }, { "epoch": 0.5159674389480275, "grad_norm": 2.2212533950805664, "learning_rate": 0.00023186705767350927, "loss": 0.9789, "step": 3296 }, { "epoch": 0.5161239824671259, "grad_norm": 1.765533685684204, "learning_rate": 0.00023184261974584555, "loss": 1.141, "step": 3297 }, { "epoch": 0.5162805259862242, "grad_norm": 2.404294013977051, "learning_rate": 0.0002318181818181818, "loss": 0.4344, "step": 3298 }, { "epoch": 0.5164370695053225, "grad_norm": 2.090888500213623, "learning_rate": 0.00023179374389051808, "loss": 0.9269, "step": 3299 }, { "epoch": 0.5165936130244208, "grad_norm": 1.3735568523406982, "learning_rate": 0.00023176930596285433, "loss": 1.101, "step": 3300 }, { "epoch": 0.5167501565435191, "grad_norm": 0.6953723430633545, "learning_rate": 0.00023174486803519058, "loss": 0.3792, "step": 3301 }, { "epoch": 0.5169067000626174, "grad_norm": 1.5250662565231323, "learning_rate": 0.00023172043010752686, "loss": 0.4269, "step": 3302 }, { "epoch": 0.5170632435817157, "grad_norm": 0.602741539478302, "learning_rate": 0.00023169599217986314, "loss": 0.3612, "step": 3303 }, { "epoch": 0.5172197871008141, "grad_norm": 2.0099756717681885, "learning_rate": 0.0002316715542521994, "loss": 0.5513, "step": 3304 }, { "epoch": 0.5173763306199123, "grad_norm": 0.6258875131607056, "learning_rate": 0.00023164711632453567, "loss": 0.3664, "step": 3305 }, { "epoch": 0.5175328741390106, "grad_norm": 0.8901511430740356, "learning_rate": 0.00023162267839687195, "loss": 0.5725, "step": 3306 }, { "epoch": 0.5176894176581089, "grad_norm": 0.6851283311843872, "learning_rate": 0.00023159824046920817, "loss": 0.6214, "step": 3307 }, { "epoch": 0.5178459611772073, "grad_norm": 0.9573870897293091, "learning_rate": 0.00023157380254154445, "loss": 0.4796, "step": 3308 }, { "epoch": 0.5180025046963056, "grad_norm": 1.0000059604644775, "learning_rate": 0.00023154936461388073, "loss": 0.3986, "step": 3309 }, { "epoch": 0.5181590482154039, "grad_norm": 2.725654125213623, "learning_rate": 0.00023152492668621698, "loss": 0.8138, "step": 3310 }, { "epoch": 0.5183155917345021, "grad_norm": 0.7232942581176758, "learning_rate": 0.00023150048875855326, "loss": 0.4892, "step": 3311 }, { "epoch": 0.5184721352536005, "grad_norm": 0.7372367978096008, "learning_rate": 0.00023147605083088954, "loss": 0.359, "step": 3312 }, { "epoch": 0.5186286787726988, "grad_norm": 0.6439573168754578, "learning_rate": 0.0002314516129032258, "loss": 0.4568, "step": 3313 }, { "epoch": 0.5187852222917971, "grad_norm": 0.9462945461273193, "learning_rate": 0.00023142717497556207, "loss": 0.4037, "step": 3314 }, { "epoch": 0.5189417658108955, "grad_norm": 1.0442487001419067, "learning_rate": 0.00023140273704789835, "loss": 0.5252, "step": 3315 }, { "epoch": 0.5190983093299938, "grad_norm": 1.7141872644424438, "learning_rate": 0.00023137829912023457, "loss": 0.6814, "step": 3316 }, { "epoch": 0.519254852849092, "grad_norm": 1.502058982849121, "learning_rate": 0.00023135386119257085, "loss": 0.7735, "step": 3317 }, { "epoch": 0.5194113963681903, "grad_norm": 1.0354453325271606, "learning_rate": 0.00023132942326490713, "loss": 0.4362, "step": 3318 }, { "epoch": 0.5195679398872887, "grad_norm": 0.9755420088768005, "learning_rate": 0.00023130498533724338, "loss": 0.5186, "step": 3319 }, { "epoch": 0.519724483406387, "grad_norm": 2.601315975189209, "learning_rate": 0.00023128054740957966, "loss": 0.5573, "step": 3320 }, { "epoch": 0.5198810269254853, "grad_norm": 0.9432253241539001, "learning_rate": 0.00023125610948191593, "loss": 0.5652, "step": 3321 }, { "epoch": 0.5200375704445835, "grad_norm": 1.0160396099090576, "learning_rate": 0.00023123167155425219, "loss": 0.63, "step": 3322 }, { "epoch": 0.5201941139636819, "grad_norm": 1.788796067237854, "learning_rate": 0.00023120723362658846, "loss": 0.6753, "step": 3323 }, { "epoch": 0.5203506574827802, "grad_norm": 2.158668041229248, "learning_rate": 0.0002311827956989247, "loss": 0.5324, "step": 3324 }, { "epoch": 0.5205072010018785, "grad_norm": 2.5072875022888184, "learning_rate": 0.00023115835777126097, "loss": 0.8801, "step": 3325 }, { "epoch": 0.5206637445209769, "grad_norm": 2.151085138320923, "learning_rate": 0.00023113391984359724, "loss": 0.6469, "step": 3326 }, { "epoch": 0.5208202880400752, "grad_norm": 1.2307536602020264, "learning_rate": 0.0002311094819159335, "loss": 0.5898, "step": 3327 }, { "epoch": 0.5209768315591734, "grad_norm": 1.785718321800232, "learning_rate": 0.00023108504398826977, "loss": 0.498, "step": 3328 }, { "epoch": 0.5211333750782717, "grad_norm": 2.295013666152954, "learning_rate": 0.00023106060606060605, "loss": 0.9652, "step": 3329 }, { "epoch": 0.5212899185973701, "grad_norm": 2.417727470397949, "learning_rate": 0.00023103616813294228, "loss": 0.7332, "step": 3330 }, { "epoch": 0.5214464621164684, "grad_norm": 2.488974094390869, "learning_rate": 0.00023101173020527855, "loss": 0.6885, "step": 3331 }, { "epoch": 0.5216030056355667, "grad_norm": 1.3490437269210815, "learning_rate": 0.00023098729227761483, "loss": 0.5777, "step": 3332 }, { "epoch": 0.521759549154665, "grad_norm": 2.2661380767822266, "learning_rate": 0.00023096285434995108, "loss": 0.9452, "step": 3333 }, { "epoch": 0.5219160926737633, "grad_norm": 3.1678786277770996, "learning_rate": 0.00023093841642228736, "loss": 0.7481, "step": 3334 }, { "epoch": 0.5220726361928616, "grad_norm": 3.216134548187256, "learning_rate": 0.00023091397849462364, "loss": 1.22, "step": 3335 }, { "epoch": 0.5222291797119599, "grad_norm": 1.301865816116333, "learning_rate": 0.0002308895405669599, "loss": 0.9253, "step": 3336 }, { "epoch": 0.5223857232310583, "grad_norm": 2.037886142730713, "learning_rate": 0.00023086510263929617, "loss": 0.5437, "step": 3337 }, { "epoch": 0.5225422667501566, "grad_norm": 2.8060178756713867, "learning_rate": 0.00023084066471163245, "loss": 1.2142, "step": 3338 }, { "epoch": 0.5226988102692548, "grad_norm": 2.1127607822418213, "learning_rate": 0.00023081622678396867, "loss": 1.3183, "step": 3339 }, { "epoch": 0.5228553537883531, "grad_norm": 2.004124879837036, "learning_rate": 0.00023079178885630495, "loss": 1.2346, "step": 3340 }, { "epoch": 0.5230118973074515, "grad_norm": 3.113992691040039, "learning_rate": 0.00023076735092864123, "loss": 1.6289, "step": 3341 }, { "epoch": 0.5231684408265498, "grad_norm": 1.6076124906539917, "learning_rate": 0.00023074291300097748, "loss": 1.2212, "step": 3342 }, { "epoch": 0.5233249843456481, "grad_norm": 1.8852514028549194, "learning_rate": 0.00023071847507331376, "loss": 1.0095, "step": 3343 }, { "epoch": 0.5234815278647464, "grad_norm": 4.059223651885986, "learning_rate": 0.00023069403714565004, "loss": 1.042, "step": 3344 }, { "epoch": 0.5236380713838447, "grad_norm": 2.242157459259033, "learning_rate": 0.0002306695992179863, "loss": 1.6616, "step": 3345 }, { "epoch": 0.523794614902943, "grad_norm": 2.6829006671905518, "learning_rate": 0.00023064516129032257, "loss": 1.2571, "step": 3346 }, { "epoch": 0.5239511584220413, "grad_norm": 1.4247349500656128, "learning_rate": 0.00023062072336265885, "loss": 0.7588, "step": 3347 }, { "epoch": 0.5241077019411396, "grad_norm": 4.124013423919678, "learning_rate": 0.00023059628543499507, "loss": 0.9346, "step": 3348 }, { "epoch": 0.524264245460238, "grad_norm": 1.38236665725708, "learning_rate": 0.00023057184750733135, "loss": 0.7006, "step": 3349 }, { "epoch": 0.5244207889793363, "grad_norm": 1.1349596977233887, "learning_rate": 0.00023054740957966763, "loss": 0.7983, "step": 3350 }, { "epoch": 0.5245773324984345, "grad_norm": 0.6386610269546509, "learning_rate": 0.00023052297165200388, "loss": 0.4338, "step": 3351 }, { "epoch": 0.5247338760175329, "grad_norm": 0.4819841980934143, "learning_rate": 0.00023049853372434016, "loss": 0.2964, "step": 3352 }, { "epoch": 0.5248904195366312, "grad_norm": 0.8476313352584839, "learning_rate": 0.00023047409579667643, "loss": 0.4256, "step": 3353 }, { "epoch": 0.5250469630557295, "grad_norm": 0.8270666003227234, "learning_rate": 0.00023044965786901266, "loss": 0.3657, "step": 3354 }, { "epoch": 0.5252035065748278, "grad_norm": 0.8374744653701782, "learning_rate": 0.00023042521994134894, "loss": 0.4129, "step": 3355 }, { "epoch": 0.5253600500939261, "grad_norm": 0.6448666453361511, "learning_rate": 0.00023040078201368522, "loss": 0.3006, "step": 3356 }, { "epoch": 0.5255165936130244, "grad_norm": 1.3359466791152954, "learning_rate": 0.00023037634408602147, "loss": 0.5692, "step": 3357 }, { "epoch": 0.5256731371321227, "grad_norm": 0.5898628830909729, "learning_rate": 0.00023035190615835775, "loss": 0.3944, "step": 3358 }, { "epoch": 0.525829680651221, "grad_norm": 1.0883667469024658, "learning_rate": 0.00023032746823069402, "loss": 0.5376, "step": 3359 }, { "epoch": 0.5259862241703194, "grad_norm": 0.5729881525039673, "learning_rate": 0.00023030303030303027, "loss": 0.4935, "step": 3360 }, { "epoch": 0.5261427676894177, "grad_norm": 1.0635184049606323, "learning_rate": 0.00023027859237536655, "loss": 0.4492, "step": 3361 }, { "epoch": 0.5262993112085159, "grad_norm": 15.57529067993164, "learning_rate": 0.00023025415444770283, "loss": 3.5422, "step": 3362 }, { "epoch": 0.5264558547276142, "grad_norm": 0.9564080238342285, "learning_rate": 0.00023022971652003906, "loss": 0.3876, "step": 3363 }, { "epoch": 0.5266123982467126, "grad_norm": 1.117563247680664, "learning_rate": 0.00023020527859237533, "loss": 0.493, "step": 3364 }, { "epoch": 0.5267689417658109, "grad_norm": 1.619956135749817, "learning_rate": 0.0002301808406647116, "loss": 0.7056, "step": 3365 }, { "epoch": 0.5269254852849092, "grad_norm": 1.5382646322250366, "learning_rate": 0.00023015640273704786, "loss": 0.5172, "step": 3366 }, { "epoch": 0.5270820288040076, "grad_norm": 2.17067813873291, "learning_rate": 0.00023013196480938414, "loss": 0.8012, "step": 3367 }, { "epoch": 0.5272385723231058, "grad_norm": 1.4361052513122559, "learning_rate": 0.00023010752688172042, "loss": 0.6448, "step": 3368 }, { "epoch": 0.5273951158422041, "grad_norm": 1.2153189182281494, "learning_rate": 0.00023008308895405667, "loss": 0.5594, "step": 3369 }, { "epoch": 0.5275516593613024, "grad_norm": 1.1564387083053589, "learning_rate": 0.00023005865102639295, "loss": 0.6764, "step": 3370 }, { "epoch": 0.5277082028804008, "grad_norm": 1.91134512424469, "learning_rate": 0.0002300342130987292, "loss": 0.3817, "step": 3371 }, { "epoch": 0.5278647463994991, "grad_norm": 1.3591880798339844, "learning_rate": 0.00023000977517106545, "loss": 0.8981, "step": 3372 }, { "epoch": 0.5280212899185974, "grad_norm": 0.9462252259254456, "learning_rate": 0.00022998533724340173, "loss": 0.777, "step": 3373 }, { "epoch": 0.5281778334376956, "grad_norm": 1.3694474697113037, "learning_rate": 0.000229960899315738, "loss": 0.5385, "step": 3374 }, { "epoch": 0.528334376956794, "grad_norm": 1.7175570726394653, "learning_rate": 0.00022993646138807426, "loss": 0.7427, "step": 3375 }, { "epoch": 0.5284909204758923, "grad_norm": 1.3409452438354492, "learning_rate": 0.00022991202346041054, "loss": 0.8763, "step": 3376 }, { "epoch": 0.5286474639949906, "grad_norm": 2.1568386554718018, "learning_rate": 0.00022988758553274682, "loss": 0.8583, "step": 3377 }, { "epoch": 0.528804007514089, "grad_norm": 1.5670572519302368, "learning_rate": 0.00022986314760508304, "loss": 0.7728, "step": 3378 }, { "epoch": 0.5289605510331872, "grad_norm": 3.02644419670105, "learning_rate": 0.00022983870967741932, "loss": 1.4348, "step": 3379 }, { "epoch": 0.5291170945522855, "grad_norm": 1.4505369663238525, "learning_rate": 0.0002298142717497556, "loss": 0.7838, "step": 3380 }, { "epoch": 0.5292736380713838, "grad_norm": 1.8097779750823975, "learning_rate": 0.00022978983382209185, "loss": 0.6987, "step": 3381 }, { "epoch": 0.5294301815904822, "grad_norm": 2.318065643310547, "learning_rate": 0.00022976539589442813, "loss": 0.9155, "step": 3382 }, { "epoch": 0.5295867251095805, "grad_norm": 1.7021375894546509, "learning_rate": 0.0002297409579667644, "loss": 0.4227, "step": 3383 }, { "epoch": 0.5297432686286788, "grad_norm": 1.7113087177276611, "learning_rate": 0.00022971652003910066, "loss": 0.9867, "step": 3384 }, { "epoch": 0.529899812147777, "grad_norm": 1.9517741203308105, "learning_rate": 0.00022969208211143694, "loss": 0.745, "step": 3385 }, { "epoch": 0.5300563556668754, "grad_norm": 2.208434820175171, "learning_rate": 0.00022966764418377321, "loss": 1.193, "step": 3386 }, { "epoch": 0.5302128991859737, "grad_norm": 1.7686792612075806, "learning_rate": 0.00022964320625610944, "loss": 1.2062, "step": 3387 }, { "epoch": 0.530369442705072, "grad_norm": 2.9696571826934814, "learning_rate": 0.00022961876832844572, "loss": 1.2893, "step": 3388 }, { "epoch": 0.5305259862241704, "grad_norm": 1.779247522354126, "learning_rate": 0.000229594330400782, "loss": 0.9809, "step": 3389 }, { "epoch": 0.5306825297432687, "grad_norm": 3.7535805702209473, "learning_rate": 0.00022956989247311825, "loss": 0.998, "step": 3390 }, { "epoch": 0.5308390732623669, "grad_norm": 4.040312767028809, "learning_rate": 0.00022954545454545452, "loss": 1.7128, "step": 3391 }, { "epoch": 0.5309956167814652, "grad_norm": 3.6977481842041016, "learning_rate": 0.0002295210166177908, "loss": 1.7015, "step": 3392 }, { "epoch": 0.5311521603005636, "grad_norm": 3.1999452114105225, "learning_rate": 0.00022949657869012705, "loss": 1.397, "step": 3393 }, { "epoch": 0.5313087038196619, "grad_norm": 1.5966978073120117, "learning_rate": 0.00022947214076246333, "loss": 0.6898, "step": 3394 }, { "epoch": 0.5314652473387602, "grad_norm": 3.1094870567321777, "learning_rate": 0.00022944770283479958, "loss": 1.1193, "step": 3395 }, { "epoch": 0.5316217908578584, "grad_norm": 4.34362268447876, "learning_rate": 0.00022942326490713583, "loss": 1.4771, "step": 3396 }, { "epoch": 0.5317783343769568, "grad_norm": 2.2344250679016113, "learning_rate": 0.0002293988269794721, "loss": 0.9172, "step": 3397 }, { "epoch": 0.5319348778960551, "grad_norm": 1.4617431163787842, "learning_rate": 0.0002293743890518084, "loss": 0.6987, "step": 3398 }, { "epoch": 0.5320914214151534, "grad_norm": 2.481635093688965, "learning_rate": 0.00022934995112414464, "loss": 1.653, "step": 3399 }, { "epoch": 0.5322479649342517, "grad_norm": 6.106346607208252, "learning_rate": 0.00022932551319648092, "loss": 1.5556, "step": 3400 }, { "epoch": 0.5324045084533501, "grad_norm": 0.7310137748718262, "learning_rate": 0.0002293010752688172, "loss": 0.4391, "step": 3401 }, { "epoch": 0.5325610519724483, "grad_norm": 0.5438380241394043, "learning_rate": 0.00022927663734115342, "loss": 0.3034, "step": 3402 }, { "epoch": 0.5327175954915466, "grad_norm": 0.5217984318733215, "learning_rate": 0.0002292521994134897, "loss": 0.3638, "step": 3403 }, { "epoch": 0.532874139010645, "grad_norm": 0.9906480312347412, "learning_rate": 0.00022922776148582598, "loss": 0.5653, "step": 3404 }, { "epoch": 0.5330306825297433, "grad_norm": 0.5368504524230957, "learning_rate": 0.00022920332355816223, "loss": 0.2615, "step": 3405 }, { "epoch": 0.5331872260488416, "grad_norm": 0.5763323903083801, "learning_rate": 0.0002291788856304985, "loss": 0.3562, "step": 3406 }, { "epoch": 0.5333437695679399, "grad_norm": 0.5000870227813721, "learning_rate": 0.0002291544477028348, "loss": 0.3222, "step": 3407 }, { "epoch": 0.5335003130870382, "grad_norm": 1.1395305395126343, "learning_rate": 0.00022913000977517104, "loss": 0.5258, "step": 3408 }, { "epoch": 0.5336568566061365, "grad_norm": 0.6404957175254822, "learning_rate": 0.00022910557184750732, "loss": 0.394, "step": 3409 }, { "epoch": 0.5338134001252348, "grad_norm": 0.6041001677513123, "learning_rate": 0.0002290811339198436, "loss": 0.4798, "step": 3410 }, { "epoch": 0.5339699436443331, "grad_norm": 0.9585782289505005, "learning_rate": 0.00022905669599217982, "loss": 0.4944, "step": 3411 }, { "epoch": 0.5341264871634315, "grad_norm": 1.1818557977676392, "learning_rate": 0.0002290322580645161, "loss": 0.4911, "step": 3412 }, { "epoch": 0.5342830306825297, "grad_norm": 1.2576113939285278, "learning_rate": 0.00022900782013685238, "loss": 0.4152, "step": 3413 }, { "epoch": 0.534439574201628, "grad_norm": 1.7009164094924927, "learning_rate": 0.00022898338220918863, "loss": 0.5462, "step": 3414 }, { "epoch": 0.5345961177207263, "grad_norm": 2.0021321773529053, "learning_rate": 0.0002289589442815249, "loss": 0.5423, "step": 3415 }, { "epoch": 0.5347526612398247, "grad_norm": 1.420309066772461, "learning_rate": 0.00022893450635386118, "loss": 0.4347, "step": 3416 }, { "epoch": 0.534909204758923, "grad_norm": 1.0252100229263306, "learning_rate": 0.00022891006842619744, "loss": 0.5028, "step": 3417 }, { "epoch": 0.5350657482780213, "grad_norm": 1.3138428926467896, "learning_rate": 0.00022888563049853371, "loss": 0.7375, "step": 3418 }, { "epoch": 0.5352222917971196, "grad_norm": 0.9500388503074646, "learning_rate": 0.00022886119257086997, "loss": 0.3795, "step": 3419 }, { "epoch": 0.5353788353162179, "grad_norm": 0.8862390518188477, "learning_rate": 0.00022883675464320622, "loss": 0.3207, "step": 3420 }, { "epoch": 0.5355353788353162, "grad_norm": 1.376956582069397, "learning_rate": 0.0002288123167155425, "loss": 0.6341, "step": 3421 }, { "epoch": 0.5356919223544145, "grad_norm": 3.1740610599517822, "learning_rate": 0.00022878787878787877, "loss": 0.7873, "step": 3422 }, { "epoch": 0.5358484658735129, "grad_norm": 1.5563631057739258, "learning_rate": 0.00022876344086021502, "loss": 0.6119, "step": 3423 }, { "epoch": 0.5360050093926112, "grad_norm": 2.3107411861419678, "learning_rate": 0.0002287390029325513, "loss": 0.9051, "step": 3424 }, { "epoch": 0.5361615529117094, "grad_norm": 1.220873236656189, "learning_rate": 0.00022871456500488758, "loss": 0.5476, "step": 3425 }, { "epoch": 0.5363180964308077, "grad_norm": 3.4480738639831543, "learning_rate": 0.0002286901270772238, "loss": 1.1999, "step": 3426 }, { "epoch": 0.5364746399499061, "grad_norm": 2.0392181873321533, "learning_rate": 0.00022866568914956008, "loss": 0.8199, "step": 3427 }, { "epoch": 0.5366311834690044, "grad_norm": 1.5251580476760864, "learning_rate": 0.00022864125122189636, "loss": 0.9086, "step": 3428 }, { "epoch": 0.5367877269881027, "grad_norm": 1.5712864398956299, "learning_rate": 0.00022861681329423261, "loss": 1.0547, "step": 3429 }, { "epoch": 0.536944270507201, "grad_norm": 2.3451268672943115, "learning_rate": 0.0002285923753665689, "loss": 0.6112, "step": 3430 }, { "epoch": 0.5371008140262993, "grad_norm": 2.3855226039886475, "learning_rate": 0.00022856793743890517, "loss": 1.3906, "step": 3431 }, { "epoch": 0.5372573575453976, "grad_norm": 2.014420747756958, "learning_rate": 0.00022854349951124142, "loss": 0.9376, "step": 3432 }, { "epoch": 0.5374139010644959, "grad_norm": 3.3102900981903076, "learning_rate": 0.0002285190615835777, "loss": 0.8464, "step": 3433 }, { "epoch": 0.5375704445835943, "grad_norm": 2.707771062850952, "learning_rate": 0.00022849462365591398, "loss": 0.8186, "step": 3434 }, { "epoch": 0.5377269881026926, "grad_norm": 1.4002277851104736, "learning_rate": 0.0002284701857282502, "loss": 1.0236, "step": 3435 }, { "epoch": 0.5378835316217908, "grad_norm": 2.523838996887207, "learning_rate": 0.00022844574780058648, "loss": 1.027, "step": 3436 }, { "epoch": 0.5380400751408891, "grad_norm": 2.5714972019195557, "learning_rate": 0.00022842130987292276, "loss": 1.0235, "step": 3437 }, { "epoch": 0.5381966186599875, "grad_norm": 3.162550926208496, "learning_rate": 0.000228396871945259, "loss": 1.1276, "step": 3438 }, { "epoch": 0.5383531621790858, "grad_norm": 1.9457319974899292, "learning_rate": 0.0002283724340175953, "loss": 1.0137, "step": 3439 }, { "epoch": 0.5385097056981841, "grad_norm": 1.903373122215271, "learning_rate": 0.00022834799608993157, "loss": 1.1942, "step": 3440 }, { "epoch": 0.5386662492172825, "grad_norm": 2.4530985355377197, "learning_rate": 0.00022832355816226782, "loss": 0.9727, "step": 3441 }, { "epoch": 0.5388227927363807, "grad_norm": 3.6654627323150635, "learning_rate": 0.00022829912023460407, "loss": 1.8357, "step": 3442 }, { "epoch": 0.538979336255479, "grad_norm": 2.052656650543213, "learning_rate": 0.00022827468230694035, "loss": 1.2149, "step": 3443 }, { "epoch": 0.5391358797745773, "grad_norm": 2.6127777099609375, "learning_rate": 0.0002282502443792766, "loss": 1.7517, "step": 3444 }, { "epoch": 0.5392924232936757, "grad_norm": 1.7864444255828857, "learning_rate": 0.00022822580645161288, "loss": 2.0915, "step": 3445 }, { "epoch": 0.539448966812774, "grad_norm": 1.7597533464431763, "learning_rate": 0.00022820136852394916, "loss": 0.6787, "step": 3446 }, { "epoch": 0.5396055103318722, "grad_norm": 1.367319941520691, "learning_rate": 0.0002281769305962854, "loss": 0.7049, "step": 3447 }, { "epoch": 0.5397620538509705, "grad_norm": 3.0287587642669678, "learning_rate": 0.00022815249266862169, "loss": 1.3499, "step": 3448 }, { "epoch": 0.5399185973700689, "grad_norm": 1.6455621719360352, "learning_rate": 0.00022812805474095796, "loss": 0.6301, "step": 3449 }, { "epoch": 0.5400751408891672, "grad_norm": 1.5209710597991943, "learning_rate": 0.0002281036168132942, "loss": 1.35, "step": 3450 }, { "epoch": 0.5402316844082655, "grad_norm": 0.5051299333572388, "learning_rate": 0.00022807917888563047, "loss": 0.3548, "step": 3451 }, { "epoch": 0.5403882279273639, "grad_norm": 0.9242395162582397, "learning_rate": 0.00022805474095796674, "loss": 0.518, "step": 3452 }, { "epoch": 0.5405447714464621, "grad_norm": 0.676452100276947, "learning_rate": 0.000228030303030303, "loss": 0.2494, "step": 3453 }, { "epoch": 0.5407013149655604, "grad_norm": 0.5453509092330933, "learning_rate": 0.00022800586510263927, "loss": 0.3723, "step": 3454 }, { "epoch": 0.5408578584846587, "grad_norm": 0.7466689348220825, "learning_rate": 0.00022798142717497555, "loss": 0.3795, "step": 3455 }, { "epoch": 0.5410144020037571, "grad_norm": 2.0270116329193115, "learning_rate": 0.0002279569892473118, "loss": 0.5283, "step": 3456 }, { "epoch": 0.5411709455228554, "grad_norm": 0.8889615535736084, "learning_rate": 0.00022793255131964808, "loss": 0.4212, "step": 3457 }, { "epoch": 0.5413274890419537, "grad_norm": 0.9550113677978516, "learning_rate": 0.00022790811339198436, "loss": 0.4152, "step": 3458 }, { "epoch": 0.5414840325610519, "grad_norm": 0.8343170881271362, "learning_rate": 0.00022788367546432058, "loss": 0.298, "step": 3459 }, { "epoch": 0.5416405760801503, "grad_norm": 0.9619960784912109, "learning_rate": 0.00022785923753665686, "loss": 0.4314, "step": 3460 }, { "epoch": 0.5417971195992486, "grad_norm": 0.5965786576271057, "learning_rate": 0.00022783479960899314, "loss": 0.3688, "step": 3461 }, { "epoch": 0.5419536631183469, "grad_norm": 0.9296935200691223, "learning_rate": 0.0002278103616813294, "loss": 0.4797, "step": 3462 }, { "epoch": 0.5421102066374452, "grad_norm": 1.0688129663467407, "learning_rate": 0.00022778592375366567, "loss": 0.513, "step": 3463 }, { "epoch": 0.5422667501565435, "grad_norm": 1.701122522354126, "learning_rate": 0.00022776148582600195, "loss": 0.6811, "step": 3464 }, { "epoch": 0.5424232936756418, "grad_norm": 1.191488265991211, "learning_rate": 0.0002277370478983382, "loss": 0.5554, "step": 3465 }, { "epoch": 0.5425798371947401, "grad_norm": 0.9095986485481262, "learning_rate": 0.00022771260997067445, "loss": 0.5331, "step": 3466 }, { "epoch": 0.5427363807138385, "grad_norm": 1.1927762031555176, "learning_rate": 0.00022768817204301073, "loss": 0.4866, "step": 3467 }, { "epoch": 0.5428929242329368, "grad_norm": 1.4253158569335938, "learning_rate": 0.00022766373411534698, "loss": 0.9596, "step": 3468 }, { "epoch": 0.5430494677520351, "grad_norm": 1.2457616329193115, "learning_rate": 0.00022763929618768326, "loss": 0.7098, "step": 3469 }, { "epoch": 0.5432060112711333, "grad_norm": 1.5363194942474365, "learning_rate": 0.00022761485826001954, "loss": 0.9893, "step": 3470 }, { "epoch": 0.5433625547902317, "grad_norm": 1.4052916765213013, "learning_rate": 0.0002275904203323558, "loss": 0.671, "step": 3471 }, { "epoch": 0.54351909830933, "grad_norm": 1.5679124593734741, "learning_rate": 0.00022756598240469207, "loss": 0.9099, "step": 3472 }, { "epoch": 0.5436756418284283, "grad_norm": 1.642220139503479, "learning_rate": 0.00022754154447702835, "loss": 0.6937, "step": 3473 }, { "epoch": 0.5438321853475266, "grad_norm": 2.2005414962768555, "learning_rate": 0.00022751710654936457, "loss": 0.6677, "step": 3474 }, { "epoch": 0.543988728866625, "grad_norm": 1.1939611434936523, "learning_rate": 0.00022749266862170085, "loss": 0.6974, "step": 3475 }, { "epoch": 0.5441452723857232, "grad_norm": 1.181011438369751, "learning_rate": 0.00022746823069403713, "loss": 0.7005, "step": 3476 }, { "epoch": 0.5443018159048215, "grad_norm": 2.0277912616729736, "learning_rate": 0.00022744379276637338, "loss": 1.0437, "step": 3477 }, { "epoch": 0.5444583594239198, "grad_norm": 2.596615791320801, "learning_rate": 0.00022741935483870966, "loss": 1.2138, "step": 3478 }, { "epoch": 0.5446149029430182, "grad_norm": 1.2545539140701294, "learning_rate": 0.00022739491691104593, "loss": 0.6488, "step": 3479 }, { "epoch": 0.5447714464621165, "grad_norm": 1.7148044109344482, "learning_rate": 0.00022737047898338219, "loss": 0.8829, "step": 3480 }, { "epoch": 0.5449279899812148, "grad_norm": 3.2928473949432373, "learning_rate": 0.00022734604105571846, "loss": 1.189, "step": 3481 }, { "epoch": 0.545084533500313, "grad_norm": 2.6867687702178955, "learning_rate": 0.00022732160312805474, "loss": 1.0388, "step": 3482 }, { "epoch": 0.5452410770194114, "grad_norm": 2.231804132461548, "learning_rate": 0.00022729716520039097, "loss": 0.8676, "step": 3483 }, { "epoch": 0.5453976205385097, "grad_norm": 1.7448841333389282, "learning_rate": 0.00022727272727272725, "loss": 0.8428, "step": 3484 }, { "epoch": 0.545554164057608, "grad_norm": 1.843652606010437, "learning_rate": 0.00022724828934506352, "loss": 0.788, "step": 3485 }, { "epoch": 0.5457107075767064, "grad_norm": 2.4735724925994873, "learning_rate": 0.00022722385141739978, "loss": 1.2652, "step": 3486 }, { "epoch": 0.5458672510958046, "grad_norm": 1.6568492650985718, "learning_rate": 0.00022719941348973605, "loss": 1.159, "step": 3487 }, { "epoch": 0.5460237946149029, "grad_norm": 2.2103464603424072, "learning_rate": 0.00022717497556207233, "loss": 1.0109, "step": 3488 }, { "epoch": 0.5461803381340012, "grad_norm": 2.2817304134368896, "learning_rate": 0.00022715053763440856, "loss": 0.7408, "step": 3489 }, { "epoch": 0.5463368816530996, "grad_norm": 1.5361602306365967, "learning_rate": 0.00022712609970674483, "loss": 0.9899, "step": 3490 }, { "epoch": 0.5464934251721979, "grad_norm": 2.22003436088562, "learning_rate": 0.0002271016617790811, "loss": 1.565, "step": 3491 }, { "epoch": 0.5466499686912962, "grad_norm": 1.5563693046569824, "learning_rate": 0.00022707722385141736, "loss": 1.1689, "step": 3492 }, { "epoch": 0.5468065122103944, "grad_norm": 1.7934682369232178, "learning_rate": 0.00022705278592375364, "loss": 1.2611, "step": 3493 }, { "epoch": 0.5469630557294928, "grad_norm": 1.765387773513794, "learning_rate": 0.00022702834799608992, "loss": 1.3886, "step": 3494 }, { "epoch": 0.5471195992485911, "grad_norm": 4.801526069641113, "learning_rate": 0.00022700391006842617, "loss": 1.7589, "step": 3495 }, { "epoch": 0.5472761427676894, "grad_norm": NaN, "learning_rate": 0.00022700391006842617, "loss": 0.0, "step": 3496 }, { "epoch": 0.5474326862867878, "grad_norm": 2.799443244934082, "learning_rate": 0.00022697947214076245, "loss": 1.0594, "step": 3497 }, { "epoch": 0.5475892298058861, "grad_norm": 1.0865267515182495, "learning_rate": 0.00022695503421309873, "loss": 0.6357, "step": 3498 }, { "epoch": 0.5477457733249843, "grad_norm": 1.8002020120620728, "learning_rate": 0.00022693059628543495, "loss": 0.5283, "step": 3499 }, { "epoch": 0.5479023168440826, "grad_norm": 2.474811315536499, "learning_rate": 0.00022690615835777123, "loss": 1.3573, "step": 3500 }, { "epoch": 0.548058860363181, "grad_norm": 0.7308578491210938, "learning_rate": 0.0002268817204301075, "loss": 0.255, "step": 3501 }, { "epoch": 0.5482154038822793, "grad_norm": 0.9229421615600586, "learning_rate": 0.00022685728250244376, "loss": 0.5089, "step": 3502 }, { "epoch": 0.5483719474013776, "grad_norm": 0.6674167513847351, "learning_rate": 0.00022683284457478004, "loss": 0.3171, "step": 3503 }, { "epoch": 0.5485284909204758, "grad_norm": 0.6612511873245239, "learning_rate": 0.00022680840664711632, "loss": 0.3403, "step": 3504 }, { "epoch": 0.5486850344395742, "grad_norm": 0.6101376414299011, "learning_rate": 0.00022678396871945257, "loss": 0.3597, "step": 3505 }, { "epoch": 0.5488415779586725, "grad_norm": 0.6959841251373291, "learning_rate": 0.00022675953079178885, "loss": 0.4338, "step": 3506 }, { "epoch": 0.5489981214777708, "grad_norm": 0.6419839262962341, "learning_rate": 0.00022673509286412513, "loss": 0.3483, "step": 3507 }, { "epoch": 0.5491546649968692, "grad_norm": 0.5177487134933472, "learning_rate": 0.00022671065493646135, "loss": 0.2916, "step": 3508 }, { "epoch": 0.5493112085159675, "grad_norm": 0.5536925196647644, "learning_rate": 0.00022668621700879763, "loss": 0.2922, "step": 3509 }, { "epoch": 0.5494677520350657, "grad_norm": 0.7985596656799316, "learning_rate": 0.0002266617790811339, "loss": 0.4654, "step": 3510 }, { "epoch": 0.549624295554164, "grad_norm": 0.6217026710510254, "learning_rate": 0.00022663734115347016, "loss": 0.423, "step": 3511 }, { "epoch": 0.5497808390732624, "grad_norm": 0.8252955675125122, "learning_rate": 0.00022661290322580644, "loss": 0.3649, "step": 3512 }, { "epoch": 0.5499373825923607, "grad_norm": 0.7620217204093933, "learning_rate": 0.00022658846529814271, "loss": 0.2662, "step": 3513 }, { "epoch": 0.550093926111459, "grad_norm": 1.2428134679794312, "learning_rate": 0.00022656402737047894, "loss": 0.8678, "step": 3514 }, { "epoch": 0.5502504696305573, "grad_norm": 1.2475255727767944, "learning_rate": 0.00022653958944281522, "loss": 0.6157, "step": 3515 }, { "epoch": 0.5504070131496556, "grad_norm": 0.6180065870285034, "learning_rate": 0.0002265151515151515, "loss": 0.4179, "step": 3516 }, { "epoch": 0.5505635566687539, "grad_norm": 1.0421558618545532, "learning_rate": 0.00022649071358748775, "loss": 0.5168, "step": 3517 }, { "epoch": 0.5507201001878522, "grad_norm": 1.2510666847229004, "learning_rate": 0.00022646627565982402, "loss": 0.4618, "step": 3518 }, { "epoch": 0.5508766437069506, "grad_norm": 0.9947577118873596, "learning_rate": 0.0002264418377321603, "loss": 0.4542, "step": 3519 }, { "epoch": 0.5510331872260489, "grad_norm": 1.770212173461914, "learning_rate": 0.00022641739980449655, "loss": 0.5551, "step": 3520 }, { "epoch": 0.5511897307451471, "grad_norm": 1.2235960960388184, "learning_rate": 0.00022639296187683283, "loss": 0.5831, "step": 3521 }, { "epoch": 0.5513462742642454, "grad_norm": 3.5953123569488525, "learning_rate": 0.0002263685239491691, "loss": 0.8161, "step": 3522 }, { "epoch": 0.5515028177833438, "grad_norm": 2.296663761138916, "learning_rate": 0.00022634408602150533, "loss": 0.8925, "step": 3523 }, { "epoch": 0.5516593613024421, "grad_norm": 1.9417630434036255, "learning_rate": 0.0002263196480938416, "loss": 1.1062, "step": 3524 }, { "epoch": 0.5518159048215404, "grad_norm": 1.476485013961792, "learning_rate": 0.0002262952101661779, "loss": 0.7668, "step": 3525 }, { "epoch": 0.5519724483406387, "grad_norm": 2.0487520694732666, "learning_rate": 0.00022627077223851414, "loss": 0.9898, "step": 3526 }, { "epoch": 0.552128991859737, "grad_norm": 4.198084354400635, "learning_rate": 0.00022624633431085042, "loss": 1.3848, "step": 3527 }, { "epoch": 0.5522855353788353, "grad_norm": 2.556408166885376, "learning_rate": 0.0002262218963831867, "loss": 0.8235, "step": 3528 }, { "epoch": 0.5524420788979336, "grad_norm": 2.0827739238739014, "learning_rate": 0.00022619745845552295, "loss": 0.8973, "step": 3529 }, { "epoch": 0.552598622417032, "grad_norm": 3.2092297077178955, "learning_rate": 0.00022617302052785923, "loss": 1.2371, "step": 3530 }, { "epoch": 0.5527551659361303, "grad_norm": 2.5607619285583496, "learning_rate": 0.00022614858260019548, "loss": 0.9781, "step": 3531 }, { "epoch": 0.5529117094552286, "grad_norm": 1.599326729774475, "learning_rate": 0.00022612414467253173, "loss": 1.0107, "step": 3532 }, { "epoch": 0.5530682529743268, "grad_norm": 2.7297661304473877, "learning_rate": 0.000226099706744868, "loss": 0.766, "step": 3533 }, { "epoch": 0.5532247964934252, "grad_norm": 2.919910430908203, "learning_rate": 0.0002260752688172043, "loss": 0.9601, "step": 3534 }, { "epoch": 0.5533813400125235, "grad_norm": 2.319716691970825, "learning_rate": 0.00022605083088954054, "loss": 0.9165, "step": 3535 }, { "epoch": 0.5535378835316218, "grad_norm": 2.5673739910125732, "learning_rate": 0.00022602639296187682, "loss": 0.954, "step": 3536 }, { "epoch": 0.5536944270507201, "grad_norm": 3.195618152618408, "learning_rate": 0.0002260019550342131, "loss": 0.9416, "step": 3537 }, { "epoch": 0.5538509705698184, "grad_norm": 3.3584465980529785, "learning_rate": 0.00022597751710654932, "loss": 1.2491, "step": 3538 }, { "epoch": 0.5540075140889167, "grad_norm": 2.711599349975586, "learning_rate": 0.0002259530791788856, "loss": 0.9559, "step": 3539 }, { "epoch": 0.554164057608015, "grad_norm": 3.927182912826538, "learning_rate": 0.00022592864125122188, "loss": 1.0383, "step": 3540 }, { "epoch": 0.5543206011271133, "grad_norm": 2.409851312637329, "learning_rate": 0.00022590420332355813, "loss": 1.2182, "step": 3541 }, { "epoch": 0.5544771446462117, "grad_norm": 2.19413423538208, "learning_rate": 0.0002258797653958944, "loss": 1.6216, "step": 3542 }, { "epoch": 0.55463368816531, "grad_norm": 2.6822988986968994, "learning_rate": 0.00022585532746823069, "loss": 1.5274, "step": 3543 }, { "epoch": 0.5547902316844082, "grad_norm": 1.415163516998291, "learning_rate": 0.00022583088954056694, "loss": 1.097, "step": 3544 }, { "epoch": 0.5549467752035065, "grad_norm": 2.0902304649353027, "learning_rate": 0.00022580645161290321, "loss": 0.7808, "step": 3545 }, { "epoch": 0.5551033187226049, "grad_norm": 2.3548595905303955, "learning_rate": 0.0002257820136852395, "loss": 0.4876, "step": 3546 }, { "epoch": 0.5552598622417032, "grad_norm": 1.715476632118225, "learning_rate": 0.00022575757575757572, "loss": 0.8252, "step": 3547 }, { "epoch": 0.5554164057608015, "grad_norm": 2.3043689727783203, "learning_rate": 0.000225733137829912, "loss": 0.8234, "step": 3548 }, { "epoch": 0.5555729492798999, "grad_norm": 1.6674572229385376, "learning_rate": 0.00022570869990224827, "loss": 1.091, "step": 3549 }, { "epoch": 0.5557294927989981, "grad_norm": 4.341742038726807, "learning_rate": 0.00022568426197458453, "loss": 1.4916, "step": 3550 }, { "epoch": 0.5558860363180964, "grad_norm": 0.5292091369628906, "learning_rate": 0.0002256598240469208, "loss": 0.3373, "step": 3551 }, { "epoch": 0.5560425798371947, "grad_norm": 0.5695773959159851, "learning_rate": 0.00022563538611925708, "loss": 0.316, "step": 3552 }, { "epoch": 0.5561991233562931, "grad_norm": 0.6100854277610779, "learning_rate": 0.00022561094819159333, "loss": 0.3825, "step": 3553 }, { "epoch": 0.5563556668753914, "grad_norm": 0.7489566802978516, "learning_rate": 0.0002255865102639296, "loss": 0.4018, "step": 3554 }, { "epoch": 0.5565122103944896, "grad_norm": 0.8366956114768982, "learning_rate": 0.00022556207233626586, "loss": 0.4573, "step": 3555 }, { "epoch": 0.5566687539135879, "grad_norm": 0.6405544877052307, "learning_rate": 0.00022553763440860211, "loss": 0.4246, "step": 3556 }, { "epoch": 0.5568252974326863, "grad_norm": 0.7330952882766724, "learning_rate": 0.0002255131964809384, "loss": 0.2276, "step": 3557 }, { "epoch": 0.5569818409517846, "grad_norm": 0.7685224413871765, "learning_rate": 0.00022548875855327467, "loss": 0.3543, "step": 3558 }, { "epoch": 0.5571383844708829, "grad_norm": 0.679145872592926, "learning_rate": 0.00022546432062561092, "loss": 0.506, "step": 3559 }, { "epoch": 0.5572949279899813, "grad_norm": 0.8586009740829468, "learning_rate": 0.0002254398826979472, "loss": 0.4348, "step": 3560 }, { "epoch": 0.5574514715090795, "grad_norm": 0.8567745685577393, "learning_rate": 0.00022541544477028348, "loss": 0.3119, "step": 3561 }, { "epoch": 0.5576080150281778, "grad_norm": 0.9251272082328796, "learning_rate": 0.0002253910068426197, "loss": 0.4098, "step": 3562 }, { "epoch": 0.5577645585472761, "grad_norm": 0.8240041136741638, "learning_rate": 0.00022536656891495598, "loss": 0.3483, "step": 3563 }, { "epoch": 0.5579211020663745, "grad_norm": 0.5420299172401428, "learning_rate": 0.00022534213098729226, "loss": 0.3981, "step": 3564 }, { "epoch": 0.5580776455854728, "grad_norm": NaN, "learning_rate": 0.00022534213098729226, "loss": 0.0, "step": 3565 }, { "epoch": 0.5582341891045711, "grad_norm": 5.230978488922119, "learning_rate": 0.0002253176930596285, "loss": 0.8704, "step": 3566 }, { "epoch": 0.5583907326236693, "grad_norm": 1.3879233598709106, "learning_rate": 0.0002252932551319648, "loss": 0.5912, "step": 3567 }, { "epoch": 0.5585472761427677, "grad_norm": 1.5150583982467651, "learning_rate": 0.00022526881720430107, "loss": 0.6871, "step": 3568 }, { "epoch": 0.558703819661866, "grad_norm": 1.0342755317687988, "learning_rate": 0.00022524437927663732, "loss": 0.6863, "step": 3569 }, { "epoch": 0.5588603631809643, "grad_norm": 1.3024189472198486, "learning_rate": 0.0002252199413489736, "loss": 0.7084, "step": 3570 }, { "epoch": 0.5590169067000627, "grad_norm": 1.985766887664795, "learning_rate": 0.00022519550342130988, "loss": 0.9109, "step": 3571 }, { "epoch": 0.5591734502191609, "grad_norm": 1.3010162115097046, "learning_rate": 0.0002251710654936461, "loss": 0.5658, "step": 3572 }, { "epoch": 0.5593299937382592, "grad_norm": 1.4666105508804321, "learning_rate": 0.00022514662756598238, "loss": 1.1083, "step": 3573 }, { "epoch": 0.5594865372573575, "grad_norm": 1.7869631052017212, "learning_rate": 0.00022512218963831866, "loss": 0.655, "step": 3574 }, { "epoch": 0.5596430807764559, "grad_norm": 1.4627180099487305, "learning_rate": 0.0002250977517106549, "loss": 0.62, "step": 3575 }, { "epoch": 0.5597996242955542, "grad_norm": 2.270118236541748, "learning_rate": 0.00022507331378299119, "loss": 1.0856, "step": 3576 }, { "epoch": 0.5599561678146525, "grad_norm": 1.2834758758544922, "learning_rate": 0.00022504887585532746, "loss": 0.3643, "step": 3577 }, { "epoch": 0.5601127113337507, "grad_norm": 2.207064628601074, "learning_rate": 0.00022502443792766372, "loss": 0.6055, "step": 3578 }, { "epoch": 0.5602692548528491, "grad_norm": 4.434436798095703, "learning_rate": 0.000225, "loss": 1.3798, "step": 3579 }, { "epoch": 0.5604257983719474, "grad_norm": 2.0381810665130615, "learning_rate": 0.00022497556207233625, "loss": 1.0831, "step": 3580 }, { "epoch": 0.5605823418910457, "grad_norm": 2.15852689743042, "learning_rate": 0.0002249511241446725, "loss": 0.7524, "step": 3581 }, { "epoch": 0.560738885410144, "grad_norm": 3.1960198879241943, "learning_rate": 0.00022492668621700877, "loss": 0.8807, "step": 3582 }, { "epoch": 0.5608954289292424, "grad_norm": 1.4990772008895874, "learning_rate": 0.00022490224828934505, "loss": 0.4864, "step": 3583 }, { "epoch": 0.5610519724483406, "grad_norm": 2.38069486618042, "learning_rate": 0.0002248778103616813, "loss": 0.9871, "step": 3584 }, { "epoch": 0.5612085159674389, "grad_norm": 3.0171408653259277, "learning_rate": 0.00022485337243401758, "loss": 1.1582, "step": 3585 }, { "epoch": 0.5613650594865373, "grad_norm": 2.1977977752685547, "learning_rate": 0.00022482893450635386, "loss": 0.6874, "step": 3586 }, { "epoch": 0.5615216030056356, "grad_norm": 2.109252452850342, "learning_rate": 0.00022480449657869009, "loss": 0.8322, "step": 3587 }, { "epoch": 0.5616781465247339, "grad_norm": 2.189072370529175, "learning_rate": 0.00022478005865102636, "loss": 1.0376, "step": 3588 }, { "epoch": 0.5618346900438321, "grad_norm": 3.6662800312042236, "learning_rate": 0.00022475562072336264, "loss": 1.7067, "step": 3589 }, { "epoch": 0.5619912335629305, "grad_norm": 2.0984466075897217, "learning_rate": 0.0002247311827956989, "loss": 0.9702, "step": 3590 }, { "epoch": 0.5621477770820288, "grad_norm": 1.8233792781829834, "learning_rate": 0.00022470674486803517, "loss": 1.1355, "step": 3591 }, { "epoch": 0.5623043206011271, "grad_norm": 1.7763501405715942, "learning_rate": 0.00022468230694037145, "loss": 1.3035, "step": 3592 }, { "epoch": 0.5624608641202254, "grad_norm": 3.240084171295166, "learning_rate": 0.0002246578690127077, "loss": 1.4573, "step": 3593 }, { "epoch": 0.5626174076393238, "grad_norm": 2.5310521125793457, "learning_rate": 0.00022463343108504398, "loss": 1.5207, "step": 3594 }, { "epoch": 0.562773951158422, "grad_norm": 1.6432092189788818, "learning_rate": 0.00022460899315738026, "loss": 1.0834, "step": 3595 }, { "epoch": 0.5629304946775203, "grad_norm": 0.8675611019134521, "learning_rate": 0.00022458455522971648, "loss": 0.5104, "step": 3596 }, { "epoch": 0.5630870381966186, "grad_norm": 1.3985061645507812, "learning_rate": 0.00022456011730205276, "loss": 0.7592, "step": 3597 }, { "epoch": 0.563243581715717, "grad_norm": 2.843593120574951, "learning_rate": 0.00022453567937438904, "loss": 0.8605, "step": 3598 }, { "epoch": 0.5634001252348153, "grad_norm": 2.444558620452881, "learning_rate": 0.0002245112414467253, "loss": 0.9727, "step": 3599 }, { "epoch": 0.5635566687539136, "grad_norm": 2.5446102619171143, "learning_rate": 0.00022448680351906157, "loss": 1.3655, "step": 3600 }, { "epoch": 0.5637132122730119, "grad_norm": 0.5711503624916077, "learning_rate": 0.00022446236559139785, "loss": 0.35, "step": 3601 }, { "epoch": 0.5638697557921102, "grad_norm": 0.39799460768699646, "learning_rate": 0.0002244379276637341, "loss": 0.238, "step": 3602 }, { "epoch": 0.5640262993112085, "grad_norm": 0.5093404054641724, "learning_rate": 0.00022441348973607035, "loss": 0.4052, "step": 3603 }, { "epoch": 0.5641828428303068, "grad_norm": 0.7445962429046631, "learning_rate": 0.00022438905180840663, "loss": 0.6028, "step": 3604 }, { "epoch": 0.5643393863494052, "grad_norm": 0.8046715259552002, "learning_rate": 0.00022436461388074288, "loss": 0.3982, "step": 3605 }, { "epoch": 0.5644959298685035, "grad_norm": 0.7545210123062134, "learning_rate": 0.00022434017595307916, "loss": 0.3914, "step": 3606 }, { "epoch": 0.5646524733876017, "grad_norm": 0.8643534183502197, "learning_rate": 0.00022431573802541544, "loss": 0.4382, "step": 3607 }, { "epoch": 0.5648090169067, "grad_norm": 0.7606629133224487, "learning_rate": 0.0002242913000977517, "loss": 0.3623, "step": 3608 }, { "epoch": 0.5649655604257984, "grad_norm": 1.1187993288040161, "learning_rate": 0.00022426686217008796, "loss": 0.5698, "step": 3609 }, { "epoch": 0.5651221039448967, "grad_norm": 0.6099269390106201, "learning_rate": 0.00022424242424242424, "loss": 0.319, "step": 3610 }, { "epoch": 0.565278647463995, "grad_norm": 1.3326719999313354, "learning_rate": 0.00022421798631476047, "loss": 0.6025, "step": 3611 }, { "epoch": 0.5654351909830932, "grad_norm": 2.37567400932312, "learning_rate": 0.00022419354838709675, "loss": 1.0935, "step": 3612 }, { "epoch": 0.5655917345021916, "grad_norm": 0.8539568185806274, "learning_rate": 0.00022416911045943302, "loss": 0.3043, "step": 3613 }, { "epoch": 0.5657482780212899, "grad_norm": 1.237491488456726, "learning_rate": 0.00022414467253176928, "loss": 0.4338, "step": 3614 }, { "epoch": 0.5659048215403882, "grad_norm": 0.9956717491149902, "learning_rate": 0.00022412023460410555, "loss": 0.4656, "step": 3615 }, { "epoch": 0.5660613650594866, "grad_norm": 1.1546032428741455, "learning_rate": 0.00022409579667644183, "loss": 0.7444, "step": 3616 }, { "epoch": 0.5662179085785849, "grad_norm": 1.377625823020935, "learning_rate": 0.00022407135874877808, "loss": 0.8117, "step": 3617 }, { "epoch": 0.5663744520976831, "grad_norm": 1.8123804330825806, "learning_rate": 0.00022404692082111436, "loss": 0.566, "step": 3618 }, { "epoch": 0.5665309956167814, "grad_norm": 1.1724187135696411, "learning_rate": 0.00022402248289345064, "loss": 0.5954, "step": 3619 }, { "epoch": 0.5666875391358798, "grad_norm": 1.4252662658691406, "learning_rate": 0.00022399804496578686, "loss": 0.6772, "step": 3620 }, { "epoch": 0.5668440826549781, "grad_norm": 2.668489456176758, "learning_rate": 0.00022397360703812314, "loss": 0.897, "step": 3621 }, { "epoch": 0.5670006261740764, "grad_norm": 1.0190789699554443, "learning_rate": 0.00022394916911045942, "loss": 0.6887, "step": 3622 }, { "epoch": 0.5671571696931748, "grad_norm": 1.3881101608276367, "learning_rate": 0.00022392473118279567, "loss": 0.7768, "step": 3623 }, { "epoch": 0.567313713212273, "grad_norm": 1.1338660717010498, "learning_rate": 0.00022390029325513195, "loss": 0.6853, "step": 3624 }, { "epoch": 0.5674702567313713, "grad_norm": 1.3985800743103027, "learning_rate": 0.00022387585532746823, "loss": 0.7319, "step": 3625 }, { "epoch": 0.5676268002504696, "grad_norm": 3.043156147003174, "learning_rate": 0.00022385141739980448, "loss": 1.2041, "step": 3626 }, { "epoch": 0.567783343769568, "grad_norm": 1.2043315172195435, "learning_rate": 0.00022382697947214073, "loss": 0.5941, "step": 3627 }, { "epoch": 0.5679398872886663, "grad_norm": 1.9357560873031616, "learning_rate": 0.000223802541544477, "loss": 1.1688, "step": 3628 }, { "epoch": 0.5680964308077645, "grad_norm": 2.4211204051971436, "learning_rate": 0.00022377810361681326, "loss": 0.9962, "step": 3629 }, { "epoch": 0.5682529743268628, "grad_norm": 2.997868299484253, "learning_rate": 0.00022375366568914954, "loss": 0.7847, "step": 3630 }, { "epoch": 0.5684095178459612, "grad_norm": 2.5542614459991455, "learning_rate": 0.00022372922776148582, "loss": 1.0314, "step": 3631 }, { "epoch": 0.5685660613650595, "grad_norm": 2.0041849613189697, "learning_rate": 0.00022370478983382207, "loss": 1.4012, "step": 3632 }, { "epoch": 0.5687226048841578, "grad_norm": 2.1820318698883057, "learning_rate": 0.00022368035190615835, "loss": 1.2137, "step": 3633 }, { "epoch": 0.5688791484032562, "grad_norm": 2.45853853225708, "learning_rate": 0.00022365591397849463, "loss": 1.0993, "step": 3634 }, { "epoch": 0.5690356919223544, "grad_norm": 2.2817904949188232, "learning_rate": 0.00022363147605083085, "loss": 1.1108, "step": 3635 }, { "epoch": 0.5691922354414527, "grad_norm": 1.8414242267608643, "learning_rate": 0.00022360703812316713, "loss": 0.9754, "step": 3636 }, { "epoch": 0.569348778960551, "grad_norm": 1.4161016941070557, "learning_rate": 0.0002235826001955034, "loss": 0.6568, "step": 3637 }, { "epoch": 0.5695053224796494, "grad_norm": 1.995675802230835, "learning_rate": 0.00022355816226783966, "loss": 1.1306, "step": 3638 }, { "epoch": 0.5696618659987477, "grad_norm": 2.4192090034484863, "learning_rate": 0.00022353372434017594, "loss": 1.55, "step": 3639 }, { "epoch": 0.569818409517846, "grad_norm": 3.0786073207855225, "learning_rate": 0.00022350928641251221, "loss": 1.8578, "step": 3640 }, { "epoch": 0.5699749530369442, "grad_norm": 2.814448118209839, "learning_rate": 0.00022348484848484847, "loss": 1.5919, "step": 3641 }, { "epoch": 0.5701314965560426, "grad_norm": 1.6886667013168335, "learning_rate": 0.00022346041055718474, "loss": 0.9254, "step": 3642 }, { "epoch": 0.5702880400751409, "grad_norm": 3.294468879699707, "learning_rate": 0.00022343597262952102, "loss": 1.1636, "step": 3643 }, { "epoch": 0.5704445835942392, "grad_norm": 2.3327720165252686, "learning_rate": 0.00022341153470185725, "loss": 1.2624, "step": 3644 }, { "epoch": 0.5706011271133375, "grad_norm": 1.5095564126968384, "learning_rate": 0.00022338709677419352, "loss": 0.4536, "step": 3645 }, { "epoch": 0.5707576706324358, "grad_norm": 3.851837635040283, "learning_rate": 0.0002233626588465298, "loss": 0.9293, "step": 3646 }, { "epoch": 0.5709142141515341, "grad_norm": 2.2999038696289062, "learning_rate": 0.00022333822091886605, "loss": 0.8068, "step": 3647 }, { "epoch": 0.5710707576706324, "grad_norm": 1.686329960823059, "learning_rate": 0.00022331378299120233, "loss": 0.3304, "step": 3648 }, { "epoch": 0.5712273011897308, "grad_norm": 2.073951244354248, "learning_rate": 0.0002232893450635386, "loss": 0.7134, "step": 3649 }, { "epoch": 0.5713838447088291, "grad_norm": 1.7170746326446533, "learning_rate": 0.00022326490713587484, "loss": 1.1497, "step": 3650 }, { "epoch": 0.5715403882279274, "grad_norm": 0.47634878754615784, "learning_rate": 0.00022324046920821111, "loss": 0.3378, "step": 3651 }, { "epoch": 0.5716969317470256, "grad_norm": 0.629563570022583, "learning_rate": 0.0002232160312805474, "loss": 0.4027, "step": 3652 }, { "epoch": 0.571853475266124, "grad_norm": 0.8603024482727051, "learning_rate": 0.00022319159335288364, "loss": 0.3611, "step": 3653 }, { "epoch": 0.5720100187852223, "grad_norm": 0.5923214554786682, "learning_rate": 0.00022316715542521992, "loss": 0.3809, "step": 3654 }, { "epoch": 0.5721665623043206, "grad_norm": 0.8817975521087646, "learning_rate": 0.0002231427174975562, "loss": 0.4392, "step": 3655 }, { "epoch": 0.5723231058234189, "grad_norm": 0.8190473914146423, "learning_rate": 0.00022311827956989245, "loss": 0.4117, "step": 3656 }, { "epoch": 0.5724796493425173, "grad_norm": 0.6798378229141235, "learning_rate": 0.00022309384164222873, "loss": 0.2774, "step": 3657 }, { "epoch": 0.5726361928616155, "grad_norm": 0.8386220932006836, "learning_rate": 0.000223069403714565, "loss": 0.4713, "step": 3658 }, { "epoch": 0.5727927363807138, "grad_norm": 0.8392332196235657, "learning_rate": 0.00022304496578690123, "loss": 0.5223, "step": 3659 }, { "epoch": 0.5729492798998121, "grad_norm": 1.1230872869491577, "learning_rate": 0.0002230205278592375, "loss": 0.4972, "step": 3660 }, { "epoch": 0.5731058234189105, "grad_norm": 1.0100561380386353, "learning_rate": 0.0002229960899315738, "loss": 0.3689, "step": 3661 }, { "epoch": 0.5732623669380088, "grad_norm": 0.7954285144805908, "learning_rate": 0.00022297165200391004, "loss": 0.3825, "step": 3662 }, { "epoch": 0.573418910457107, "grad_norm": 1.0235164165496826, "learning_rate": 0.00022294721407624632, "loss": 0.5164, "step": 3663 }, { "epoch": 0.5735754539762054, "grad_norm": 1.4720885753631592, "learning_rate": 0.0002229227761485826, "loss": 0.5941, "step": 3664 }, { "epoch": 0.5737319974953037, "grad_norm": 0.6552546620368958, "learning_rate": 0.00022289833822091885, "loss": 0.4673, "step": 3665 }, { "epoch": 0.573888541014402, "grad_norm": 0.9891802072525024, "learning_rate": 0.00022287390029325513, "loss": 0.5022, "step": 3666 }, { "epoch": 0.5740450845335003, "grad_norm": 1.503885269165039, "learning_rate": 0.0002228494623655914, "loss": 0.5776, "step": 3667 }, { "epoch": 0.5742016280525987, "grad_norm": 1.1407116651535034, "learning_rate": 0.00022282502443792763, "loss": 0.5049, "step": 3668 }, { "epoch": 0.5743581715716969, "grad_norm": 1.1594735383987427, "learning_rate": 0.0002228005865102639, "loss": 0.5241, "step": 3669 }, { "epoch": 0.5745147150907952, "grad_norm": 0.8916786313056946, "learning_rate": 0.00022277614858260019, "loss": 0.3553, "step": 3670 }, { "epoch": 0.5746712586098935, "grad_norm": 1.9808990955352783, "learning_rate": 0.00022275171065493644, "loss": 0.7108, "step": 3671 }, { "epoch": 0.5748278021289919, "grad_norm": 1.4254517555236816, "learning_rate": 0.00022272727272727272, "loss": 0.5996, "step": 3672 }, { "epoch": 0.5749843456480902, "grad_norm": 1.409542441368103, "learning_rate": 0.000222702834799609, "loss": 0.691, "step": 3673 }, { "epoch": 0.5751408891671885, "grad_norm": 1.5165317058563232, "learning_rate": 0.00022267839687194522, "loss": 0.5769, "step": 3674 }, { "epoch": 0.5752974326862867, "grad_norm": 2.6416120529174805, "learning_rate": 0.0002226539589442815, "loss": 0.7262, "step": 3675 }, { "epoch": 0.5754539762053851, "grad_norm": 2.1240227222442627, "learning_rate": 0.00022262952101661777, "loss": 0.6493, "step": 3676 }, { "epoch": 0.5756105197244834, "grad_norm": 1.722774624824524, "learning_rate": 0.00022260508308895403, "loss": 0.8552, "step": 3677 }, { "epoch": 0.5757670632435817, "grad_norm": 1.5990309715270996, "learning_rate": 0.0002225806451612903, "loss": 1.0542, "step": 3678 }, { "epoch": 0.5759236067626801, "grad_norm": 1.7411298751831055, "learning_rate": 0.00022255620723362658, "loss": 0.752, "step": 3679 }, { "epoch": 0.5760801502817783, "grad_norm": 2.393709182739258, "learning_rate": 0.00022253176930596283, "loss": 1.3362, "step": 3680 }, { "epoch": 0.5762366938008766, "grad_norm": 1.4573614597320557, "learning_rate": 0.0002225073313782991, "loss": 0.77, "step": 3681 }, { "epoch": 0.5763932373199749, "grad_norm": 1.8054333925247192, "learning_rate": 0.0002224828934506354, "loss": 0.9428, "step": 3682 }, { "epoch": 0.5765497808390733, "grad_norm": 2.158707618713379, "learning_rate": 0.00022245845552297161, "loss": 0.8786, "step": 3683 }, { "epoch": 0.5767063243581716, "grad_norm": 1.175864577293396, "learning_rate": 0.0002224340175953079, "loss": 0.7271, "step": 3684 }, { "epoch": 0.5768628678772699, "grad_norm": 3.2046947479248047, "learning_rate": 0.00022240957966764417, "loss": 1.1071, "step": 3685 }, { "epoch": 0.5770194113963681, "grad_norm": 2.6925206184387207, "learning_rate": 0.00022238514173998042, "loss": 1.247, "step": 3686 }, { "epoch": 0.5771759549154665, "grad_norm": 2.9383318424224854, "learning_rate": 0.0002223607038123167, "loss": 0.9495, "step": 3687 }, { "epoch": 0.5773324984345648, "grad_norm": 2.484539031982422, "learning_rate": 0.00022233626588465298, "loss": 0.862, "step": 3688 }, { "epoch": 0.5774890419536631, "grad_norm": 3.069092035293579, "learning_rate": 0.00022231182795698923, "loss": 1.6457, "step": 3689 }, { "epoch": 0.5776455854727615, "grad_norm": 2.6385087966918945, "learning_rate": 0.0002222873900293255, "loss": 0.7699, "step": 3690 }, { "epoch": 0.5778021289918598, "grad_norm": 4.0282487869262695, "learning_rate": 0.00022226295210166176, "loss": 1.2982, "step": 3691 }, { "epoch": 0.577958672510958, "grad_norm": 1.7925729751586914, "learning_rate": 0.000222238514173998, "loss": 1.1737, "step": 3692 }, { "epoch": 0.5781152160300563, "grad_norm": 3.2553768157958984, "learning_rate": 0.0002222140762463343, "loss": 1.3953, "step": 3693 }, { "epoch": 0.5782717595491547, "grad_norm": 3.523599147796631, "learning_rate": 0.00022218963831867057, "loss": 0.9012, "step": 3694 }, { "epoch": 0.578428303068253, "grad_norm": 2.5558550357818604, "learning_rate": 0.00022216520039100682, "loss": 1.0458, "step": 3695 }, { "epoch": 0.5785848465873513, "grad_norm": 0.8834622502326965, "learning_rate": 0.0002221407624633431, "loss": 0.251, "step": 3696 }, { "epoch": 0.5787413901064495, "grad_norm": 1.9280049800872803, "learning_rate": 0.00022211632453567938, "loss": 0.653, "step": 3697 }, { "epoch": 0.5788979336255479, "grad_norm": 2.2830820083618164, "learning_rate": 0.0002220918866080156, "loss": 0.8649, "step": 3698 }, { "epoch": 0.5790544771446462, "grad_norm": 3.383026123046875, "learning_rate": 0.00022206744868035188, "loss": 0.7386, "step": 3699 }, { "epoch": 0.5792110206637445, "grad_norm": 2.898946523666382, "learning_rate": 0.00022204301075268816, "loss": 1.1781, "step": 3700 }, { "epoch": 0.5793675641828429, "grad_norm": 0.6618011593818665, "learning_rate": 0.0002220185728250244, "loss": 0.4284, "step": 3701 }, { "epoch": 0.5795241077019412, "grad_norm": 0.7165639996528625, "learning_rate": 0.00022199413489736069, "loss": 0.3873, "step": 3702 }, { "epoch": 0.5796806512210394, "grad_norm": 0.8477781414985657, "learning_rate": 0.00022196969696969696, "loss": 0.5621, "step": 3703 }, { "epoch": 0.5798371947401377, "grad_norm": 1.0597972869873047, "learning_rate": 0.00022194525904203322, "loss": 0.5527, "step": 3704 }, { "epoch": 0.5799937382592361, "grad_norm": 0.9503958225250244, "learning_rate": 0.0002219208211143695, "loss": 0.4481, "step": 3705 }, { "epoch": 0.5801502817783344, "grad_norm": 1.0318113565444946, "learning_rate": 0.00022189638318670577, "loss": 0.4143, "step": 3706 }, { "epoch": 0.5803068252974327, "grad_norm": 0.5940424203872681, "learning_rate": 0.000221871945259042, "loss": 0.3822, "step": 3707 }, { "epoch": 0.580463368816531, "grad_norm": 0.8478888869285583, "learning_rate": 0.00022184750733137828, "loss": 0.3572, "step": 3708 }, { "epoch": 0.5806199123356293, "grad_norm": 0.69258052110672, "learning_rate": 0.00022182306940371455, "loss": 0.2797, "step": 3709 }, { "epoch": 0.5807764558547276, "grad_norm": 0.7424476742744446, "learning_rate": 0.0002217986314760508, "loss": 0.4239, "step": 3710 }, { "epoch": 0.5809329993738259, "grad_norm": 1.1544733047485352, "learning_rate": 0.00022177419354838708, "loss": 0.4964, "step": 3711 }, { "epoch": 0.5810895428929242, "grad_norm": 1.1875557899475098, "learning_rate": 0.00022174975562072336, "loss": 0.5127, "step": 3712 }, { "epoch": 0.5812460864120226, "grad_norm": 1.065439224243164, "learning_rate": 0.0002217253176930596, "loss": 0.4603, "step": 3713 }, { "epoch": 0.5814026299311209, "grad_norm": 1.477260708808899, "learning_rate": 0.0002217008797653959, "loss": 0.6937, "step": 3714 }, { "epoch": 0.5815591734502191, "grad_norm": 1.7115600109100342, "learning_rate": 0.00022167644183773214, "loss": 0.6814, "step": 3715 }, { "epoch": 0.5817157169693175, "grad_norm": 1.2147424221038818, "learning_rate": 0.0002216520039100684, "loss": 0.5137, "step": 3716 }, { "epoch": 0.5818722604884158, "grad_norm": 1.544769287109375, "learning_rate": 0.00022162756598240467, "loss": 0.4772, "step": 3717 }, { "epoch": 0.5820288040075141, "grad_norm": 1.2717753648757935, "learning_rate": 0.00022160312805474095, "loss": 0.7419, "step": 3718 }, { "epoch": 0.5821853475266124, "grad_norm": 1.403295636177063, "learning_rate": 0.0002215786901270772, "loss": 0.8306, "step": 3719 }, { "epoch": 0.5823418910457107, "grad_norm": 2.2158901691436768, "learning_rate": 0.00022155425219941348, "loss": 0.8163, "step": 3720 }, { "epoch": 0.582498434564809, "grad_norm": 1.945855975151062, "learning_rate": 0.00022152981427174976, "loss": 0.7549, "step": 3721 }, { "epoch": 0.5826549780839073, "grad_norm": 2.006751775741577, "learning_rate": 0.00022150537634408598, "loss": 0.6222, "step": 3722 }, { "epoch": 0.5828115216030056, "grad_norm": 1.3016244173049927, "learning_rate": 0.00022148093841642226, "loss": 0.5719, "step": 3723 }, { "epoch": 0.582968065122104, "grad_norm": 1.7161877155303955, "learning_rate": 0.00022145650048875854, "loss": 0.8886, "step": 3724 }, { "epoch": 0.5831246086412023, "grad_norm": 1.5674386024475098, "learning_rate": 0.0002214320625610948, "loss": 0.8454, "step": 3725 }, { "epoch": 0.5832811521603005, "grad_norm": 2.3730225563049316, "learning_rate": 0.00022140762463343107, "loss": 1.0747, "step": 3726 }, { "epoch": 0.5834376956793988, "grad_norm": 2.1554417610168457, "learning_rate": 0.00022138318670576735, "loss": 0.6091, "step": 3727 }, { "epoch": 0.5835942391984972, "grad_norm": 1.7122255563735962, "learning_rate": 0.0002213587487781036, "loss": 0.4839, "step": 3728 }, { "epoch": 0.5837507827175955, "grad_norm": 1.168192744255066, "learning_rate": 0.00022133431085043988, "loss": 0.7182, "step": 3729 }, { "epoch": 0.5839073262366938, "grad_norm": 1.9943832159042358, "learning_rate": 0.00022130987292277615, "loss": 1.1607, "step": 3730 }, { "epoch": 0.5840638697557922, "grad_norm": 4.930220603942871, "learning_rate": 0.00022128543499511238, "loss": 0.8025, "step": 3731 }, { "epoch": 0.5842204132748904, "grad_norm": 1.833145260810852, "learning_rate": 0.00022126099706744866, "loss": 0.7035, "step": 3732 }, { "epoch": 0.5843769567939887, "grad_norm": 2.07147479057312, "learning_rate": 0.00022123655913978494, "loss": 1.1647, "step": 3733 }, { "epoch": 0.584533500313087, "grad_norm": 2.026843309402466, "learning_rate": 0.0002212121212121212, "loss": 0.7464, "step": 3734 }, { "epoch": 0.5846900438321854, "grad_norm": 1.1421523094177246, "learning_rate": 0.00022118768328445747, "loss": 1.1607, "step": 3735 }, { "epoch": 0.5848465873512837, "grad_norm": 3.0231730937957764, "learning_rate": 0.00022116324535679374, "loss": 1.0897, "step": 3736 }, { "epoch": 0.5850031308703819, "grad_norm": 1.649326205253601, "learning_rate": 0.00022113880742913, "loss": 0.869, "step": 3737 }, { "epoch": 0.5851596743894802, "grad_norm": 3.129502296447754, "learning_rate": 0.00022111436950146625, "loss": 1.1095, "step": 3738 }, { "epoch": 0.5853162179085786, "grad_norm": 2.2908878326416016, "learning_rate": 0.00022108993157380252, "loss": 0.7454, "step": 3739 }, { "epoch": 0.5854727614276769, "grad_norm": 2.386479377746582, "learning_rate": 0.00022106549364613878, "loss": 1.3902, "step": 3740 }, { "epoch": 0.5856293049467752, "grad_norm": 2.256403923034668, "learning_rate": 0.00022104105571847505, "loss": 1.2762, "step": 3741 }, { "epoch": 0.5857858484658736, "grad_norm": 3.2702090740203857, "learning_rate": 0.00022101661779081133, "loss": 1.1909, "step": 3742 }, { "epoch": 0.5859423919849718, "grad_norm": 1.4588786363601685, "learning_rate": 0.00022099217986314758, "loss": 1.6311, "step": 3743 }, { "epoch": 0.5860989355040701, "grad_norm": 1.102078914642334, "learning_rate": 0.00022096774193548386, "loss": 0.6543, "step": 3744 }, { "epoch": 0.5862554790231684, "grad_norm": 2.0560879707336426, "learning_rate": 0.00022094330400782014, "loss": 1.4536, "step": 3745 }, { "epoch": 0.5864120225422668, "grad_norm": 1.8703267574310303, "learning_rate": 0.00022091886608015636, "loss": 1.5638, "step": 3746 }, { "epoch": 0.5865685660613651, "grad_norm": 1.5793174505233765, "learning_rate": 0.00022089442815249264, "loss": 0.919, "step": 3747 }, { "epoch": 0.5867251095804634, "grad_norm": 3.379425287246704, "learning_rate": 0.00022086999022482892, "loss": 1.7933, "step": 3748 }, { "epoch": 0.5868816530995616, "grad_norm": 2.630582571029663, "learning_rate": 0.00022084555229716517, "loss": 0.6506, "step": 3749 }, { "epoch": 0.58703819661866, "grad_norm": 1.3981788158416748, "learning_rate": 0.00022082111436950145, "loss": 1.0893, "step": 3750 }, { "epoch": 0.5871947401377583, "grad_norm": 0.4805038273334503, "learning_rate": 0.00022079667644183773, "loss": 0.3598, "step": 3751 }, { "epoch": 0.5873512836568566, "grad_norm": 0.3983316719532013, "learning_rate": 0.00022077223851417398, "loss": 0.3031, "step": 3752 }, { "epoch": 0.587507827175955, "grad_norm": 1.9718865156173706, "learning_rate": 0.00022074780058651026, "loss": 1.3055, "step": 3753 }, { "epoch": 0.5876643706950532, "grad_norm": 0.5567598938941956, "learning_rate": 0.00022072336265884654, "loss": 0.3184, "step": 3754 }, { "epoch": 0.5878209142141515, "grad_norm": 0.6025465130805969, "learning_rate": 0.00022069892473118276, "loss": 0.3816, "step": 3755 }, { "epoch": 0.5879774577332498, "grad_norm": 0.9688357710838318, "learning_rate": 0.00022067448680351904, "loss": 0.4014, "step": 3756 }, { "epoch": 0.5881340012523482, "grad_norm": 0.6536909341812134, "learning_rate": 0.00022065004887585532, "loss": 0.4133, "step": 3757 }, { "epoch": 0.5882905447714465, "grad_norm": 0.6324391961097717, "learning_rate": 0.00022062561094819157, "loss": 0.3517, "step": 3758 }, { "epoch": 0.5884470882905448, "grad_norm": 0.7064645886421204, "learning_rate": 0.00022060117302052785, "loss": 0.404, "step": 3759 }, { "epoch": 0.588603631809643, "grad_norm": 0.8982453942298889, "learning_rate": 0.00022057673509286413, "loss": 0.517, "step": 3760 }, { "epoch": 0.5887601753287414, "grad_norm": 1.20693838596344, "learning_rate": 0.00022055229716520038, "loss": 0.4526, "step": 3761 }, { "epoch": 0.5889167188478397, "grad_norm": 0.7581743597984314, "learning_rate": 0.00022052785923753663, "loss": 0.3104, "step": 3762 }, { "epoch": 0.589073262366938, "grad_norm": 0.5927846431732178, "learning_rate": 0.0002205034213098729, "loss": 0.2852, "step": 3763 }, { "epoch": 0.5892298058860364, "grad_norm": 1.2253514528274536, "learning_rate": 0.00022047898338220916, "loss": 0.3494, "step": 3764 }, { "epoch": 0.5893863494051347, "grad_norm": 0.9083871245384216, "learning_rate": 0.00022045454545454544, "loss": 0.3888, "step": 3765 }, { "epoch": 0.5895428929242329, "grad_norm": 0.8482705950737, "learning_rate": 0.00022043010752688171, "loss": 0.487, "step": 3766 }, { "epoch": 0.5896994364433312, "grad_norm": 0.903735876083374, "learning_rate": 0.00022040566959921797, "loss": 0.4058, "step": 3767 }, { "epoch": 0.5898559799624296, "grad_norm": 1.6391104459762573, "learning_rate": 0.00022038123167155424, "loss": 0.7215, "step": 3768 }, { "epoch": 0.5900125234815279, "grad_norm": 1.211521863937378, "learning_rate": 0.00022035679374389052, "loss": 0.3585, "step": 3769 }, { "epoch": 0.5901690670006262, "grad_norm": 1.2085306644439697, "learning_rate": 0.00022033235581622675, "loss": 0.5602, "step": 3770 }, { "epoch": 0.5903256105197244, "grad_norm": 1.251050591468811, "learning_rate": 0.00022030791788856303, "loss": 0.6018, "step": 3771 }, { "epoch": 0.5904821540388228, "grad_norm": 3.4225316047668457, "learning_rate": 0.0002202834799608993, "loss": 1.5824, "step": 3772 }, { "epoch": 0.5906386975579211, "grad_norm": 1.3945788145065308, "learning_rate": 0.00022025904203323555, "loss": 0.6043, "step": 3773 }, { "epoch": 0.5907952410770194, "grad_norm": 2.437675952911377, "learning_rate": 0.00022023460410557183, "loss": 0.7169, "step": 3774 }, { "epoch": 0.5909517845961177, "grad_norm": 1.3050737380981445, "learning_rate": 0.0002202101661779081, "loss": 0.7835, "step": 3775 }, { "epoch": 0.5911083281152161, "grad_norm": 2.816662549972534, "learning_rate": 0.00022018572825024436, "loss": 0.6321, "step": 3776 }, { "epoch": 0.5912648716343143, "grad_norm": 1.4117590188980103, "learning_rate": 0.00022016129032258064, "loss": 0.479, "step": 3777 }, { "epoch": 0.5914214151534126, "grad_norm": 1.9365350008010864, "learning_rate": 0.00022013685239491692, "loss": 0.8931, "step": 3778 }, { "epoch": 0.591577958672511, "grad_norm": 1.9675894975662231, "learning_rate": 0.00022011241446725314, "loss": 0.6348, "step": 3779 }, { "epoch": 0.5917345021916093, "grad_norm": 1.806617259979248, "learning_rate": 0.00022008797653958942, "loss": 0.7128, "step": 3780 }, { "epoch": 0.5918910457107076, "grad_norm": 2.15679669380188, "learning_rate": 0.0002200635386119257, "loss": 0.7107, "step": 3781 }, { "epoch": 0.5920475892298059, "grad_norm": 1.5504449605941772, "learning_rate": 0.00022003910068426195, "loss": 0.6923, "step": 3782 }, { "epoch": 0.5922041327489042, "grad_norm": 2.190402030944824, "learning_rate": 0.00022001466275659823, "loss": 0.9243, "step": 3783 }, { "epoch": 0.5923606762680025, "grad_norm": 2.572727918624878, "learning_rate": 0.0002199902248289345, "loss": 1.3498, "step": 3784 }, { "epoch": 0.5925172197871008, "grad_norm": 2.3958308696746826, "learning_rate": 0.00021996578690127076, "loss": 1.3511, "step": 3785 }, { "epoch": 0.5926737633061991, "grad_norm": 3.290600538253784, "learning_rate": 0.000219941348973607, "loss": 1.1582, "step": 3786 }, { "epoch": 0.5928303068252975, "grad_norm": 2.0067548751831055, "learning_rate": 0.0002199169110459433, "loss": 0.6797, "step": 3787 }, { "epoch": 0.5929868503443957, "grad_norm": 2.599518060684204, "learning_rate": 0.00021989247311827954, "loss": 1.1569, "step": 3788 }, { "epoch": 0.593143393863494, "grad_norm": 3.275362968444824, "learning_rate": 0.00021986803519061582, "loss": 1.0037, "step": 3789 }, { "epoch": 0.5932999373825923, "grad_norm": 1.9998888969421387, "learning_rate": 0.0002198435972629521, "loss": 1.6154, "step": 3790 }, { "epoch": 0.5934564809016907, "grad_norm": 2.5467236042022705, "learning_rate": 0.00021981915933528835, "loss": 1.5551, "step": 3791 }, { "epoch": 0.593613024420789, "grad_norm": 5.1116042137146, "learning_rate": 0.00021979472140762463, "loss": 1.3565, "step": 3792 }, { "epoch": 0.5937695679398873, "grad_norm": 1.5216659307479858, "learning_rate": 0.0002197702834799609, "loss": 1.0188, "step": 3793 }, { "epoch": 0.5939261114589856, "grad_norm": 2.700352191925049, "learning_rate": 0.00021974584555229713, "loss": 1.9111, "step": 3794 }, { "epoch": 0.5940826549780839, "grad_norm": 2.896486759185791, "learning_rate": 0.0002197214076246334, "loss": 1.1349, "step": 3795 }, { "epoch": 0.5942391984971822, "grad_norm": 2.902263879776001, "learning_rate": 0.00021969696969696969, "loss": 0.7021, "step": 3796 }, { "epoch": 0.5943957420162805, "grad_norm": 1.9411492347717285, "learning_rate": 0.00021967253176930594, "loss": 0.9319, "step": 3797 }, { "epoch": 0.5945522855353789, "grad_norm": 1.5800654888153076, "learning_rate": 0.00021964809384164222, "loss": 0.6419, "step": 3798 }, { "epoch": 0.5947088290544772, "grad_norm": 2.5261833667755127, "learning_rate": 0.0002196236559139785, "loss": 1.0969, "step": 3799 }, { "epoch": 0.5948653725735754, "grad_norm": 3.4451301097869873, "learning_rate": 0.00021959921798631475, "loss": 1.2766, "step": 3800 }, { "epoch": 0.5950219160926737, "grad_norm": 0.6500614285469055, "learning_rate": 0.00021957478005865102, "loss": 0.461, "step": 3801 }, { "epoch": 0.5951784596117721, "grad_norm": 0.5970913767814636, "learning_rate": 0.0002195503421309873, "loss": 0.4256, "step": 3802 }, { "epoch": 0.5953350031308704, "grad_norm": 0.7132553458213806, "learning_rate": 0.00021952590420332353, "loss": 0.3076, "step": 3803 }, { "epoch": 0.5954915466499687, "grad_norm": 0.6507532000541687, "learning_rate": 0.0002195014662756598, "loss": 0.3146, "step": 3804 }, { "epoch": 0.595648090169067, "grad_norm": 0.8440534472465515, "learning_rate": 0.00021947702834799608, "loss": 0.3291, "step": 3805 }, { "epoch": 0.5958046336881653, "grad_norm": 0.7817156314849854, "learning_rate": 0.00021945259042033233, "loss": 0.2867, "step": 3806 }, { "epoch": 0.5959611772072636, "grad_norm": 0.5882556438446045, "learning_rate": 0.0002194281524926686, "loss": 0.3247, "step": 3807 }, { "epoch": 0.5961177207263619, "grad_norm": 2.025453805923462, "learning_rate": 0.0002194037145650049, "loss": 0.4633, "step": 3808 }, { "epoch": 0.5962742642454603, "grad_norm": 0.8439894318580627, "learning_rate": 0.00021937927663734111, "loss": 0.4273, "step": 3809 }, { "epoch": 0.5964308077645586, "grad_norm": 4.892188549041748, "learning_rate": 0.0002193548387096774, "loss": 0.6922, "step": 3810 }, { "epoch": 0.5965873512836568, "grad_norm": 1.2125036716461182, "learning_rate": 0.00021933040078201367, "loss": 0.4494, "step": 3811 }, { "epoch": 0.5967438948027551, "grad_norm": 1.2884728908538818, "learning_rate": 0.00021930596285434992, "loss": 0.4823, "step": 3812 }, { "epoch": 0.5969004383218535, "grad_norm": 1.1847399473190308, "learning_rate": 0.0002192815249266862, "loss": 0.5722, "step": 3813 }, { "epoch": 0.5970569818409518, "grad_norm": 0.9156528115272522, "learning_rate": 0.00021925708699902248, "loss": 0.3829, "step": 3814 }, { "epoch": 0.5972135253600501, "grad_norm": 1.0122822523117065, "learning_rate": 0.00021923264907135873, "loss": 0.364, "step": 3815 }, { "epoch": 0.5973700688791485, "grad_norm": 0.5517539978027344, "learning_rate": 0.000219208211143695, "loss": 0.2851, "step": 3816 }, { "epoch": 0.5975266123982467, "grad_norm": 0.777860164642334, "learning_rate": 0.0002191837732160313, "loss": 0.363, "step": 3817 }, { "epoch": 0.597683155917345, "grad_norm": 3.110654354095459, "learning_rate": 0.0002191593352883675, "loss": 0.9008, "step": 3818 }, { "epoch": 0.5978396994364433, "grad_norm": 1.7335413694381714, "learning_rate": 0.0002191348973607038, "loss": 0.5437, "step": 3819 }, { "epoch": 0.5979962429555417, "grad_norm": 1.2909818887710571, "learning_rate": 0.00021911045943304007, "loss": 0.3403, "step": 3820 }, { "epoch": 0.59815278647464, "grad_norm": 2.242083787918091, "learning_rate": 0.00021908602150537632, "loss": 0.6593, "step": 3821 }, { "epoch": 0.5983093299937383, "grad_norm": 1.45481538772583, "learning_rate": 0.0002190615835777126, "loss": 0.64, "step": 3822 }, { "epoch": 0.5984658735128365, "grad_norm": 1.841384768486023, "learning_rate": 0.00021903714565004888, "loss": 0.8033, "step": 3823 }, { "epoch": 0.5986224170319349, "grad_norm": 4.309696197509766, "learning_rate": 0.00021901270772238513, "loss": 0.9931, "step": 3824 }, { "epoch": 0.5987789605510332, "grad_norm": 2.3548316955566406, "learning_rate": 0.0002189882697947214, "loss": 0.7233, "step": 3825 }, { "epoch": 0.5989355040701315, "grad_norm": 2.9939208030700684, "learning_rate": 0.00021896383186705768, "loss": 1.0121, "step": 3826 }, { "epoch": 0.5990920475892298, "grad_norm": 1.7699570655822754, "learning_rate": 0.0002189393939393939, "loss": 0.6097, "step": 3827 }, { "epoch": 0.5992485911083281, "grad_norm": 2.700235605239868, "learning_rate": 0.0002189149560117302, "loss": 0.8177, "step": 3828 }, { "epoch": 0.5994051346274264, "grad_norm": 1.7766852378845215, "learning_rate": 0.00021889051808406647, "loss": 0.6307, "step": 3829 }, { "epoch": 0.5995616781465247, "grad_norm": 1.4954442977905273, "learning_rate": 0.00021886608015640272, "loss": 0.9373, "step": 3830 }, { "epoch": 0.599718221665623, "grad_norm": 1.9515974521636963, "learning_rate": 0.000218841642228739, "loss": 0.627, "step": 3831 }, { "epoch": 0.5998747651847214, "grad_norm": 1.922837734222412, "learning_rate": 0.00021881720430107527, "loss": 0.8141, "step": 3832 }, { "epoch": 0.6000313087038197, "grad_norm": 2.2533392906188965, "learning_rate": 0.0002187927663734115, "loss": 1.1225, "step": 3833 }, { "epoch": 0.6001878522229179, "grad_norm": 1.5181680917739868, "learning_rate": 0.00021876832844574778, "loss": 0.8138, "step": 3834 }, { "epoch": 0.6003443957420163, "grad_norm": 2.555852174758911, "learning_rate": 0.00021874389051808403, "loss": 1.0635, "step": 3835 }, { "epoch": 0.6005009392611146, "grad_norm": 1.9086858034133911, "learning_rate": 0.0002187194525904203, "loss": 1.2145, "step": 3836 }, { "epoch": 0.6006574827802129, "grad_norm": 2.0855298042297363, "learning_rate": 0.00021869501466275658, "loss": 0.4878, "step": 3837 }, { "epoch": 0.6008140262993112, "grad_norm": 2.812436819076538, "learning_rate": 0.00021867057673509283, "loss": 1.3879, "step": 3838 }, { "epoch": 0.6009705698184096, "grad_norm": 2.317641258239746, "learning_rate": 0.0002186461388074291, "loss": 1.0922, "step": 3839 }, { "epoch": 0.6011271133375078, "grad_norm": 1.9752837419509888, "learning_rate": 0.0002186217008797654, "loss": 1.0511, "step": 3840 }, { "epoch": 0.6012836568566061, "grad_norm": 3.892122745513916, "learning_rate": 0.00021859726295210162, "loss": 1.2536, "step": 3841 }, { "epoch": 0.6014402003757044, "grad_norm": 3.568765640258789, "learning_rate": 0.0002185728250244379, "loss": 1.7708, "step": 3842 }, { "epoch": 0.6015967438948028, "grad_norm": 4.544713497161865, "learning_rate": 0.00021854838709677417, "loss": 1.8441, "step": 3843 }, { "epoch": 0.6017532874139011, "grad_norm": 3.862272024154663, "learning_rate": 0.00021852394916911042, "loss": 1.3045, "step": 3844 }, { "epoch": 0.6019098309329993, "grad_norm": 2.4139466285705566, "learning_rate": 0.0002184995112414467, "loss": 0.8241, "step": 3845 }, { "epoch": 0.6020663744520977, "grad_norm": 2.8001489639282227, "learning_rate": 0.00021847507331378298, "loss": 1.4736, "step": 3846 }, { "epoch": 0.602222917971196, "grad_norm": 1.7694708108901978, "learning_rate": 0.00021845063538611923, "loss": 0.8626, "step": 3847 }, { "epoch": 0.6023794614902943, "grad_norm": 1.846111536026001, "learning_rate": 0.0002184261974584555, "loss": 0.8587, "step": 3848 }, { "epoch": 0.6025360050093926, "grad_norm": 2.2559170722961426, "learning_rate": 0.0002184017595307918, "loss": 0.8416, "step": 3849 }, { "epoch": 0.602692548528491, "grad_norm": 3.0026137828826904, "learning_rate": 0.000218377321603128, "loss": 0.8497, "step": 3850 }, { "epoch": 0.6028490920475892, "grad_norm": 2.2647368907928467, "learning_rate": 0.0002183528836754643, "loss": 0.4743, "step": 3851 }, { "epoch": 0.6030056355666875, "grad_norm": 0.49257951974868774, "learning_rate": 0.00021832844574780057, "loss": 0.3079, "step": 3852 }, { "epoch": 0.6031621790857858, "grad_norm": 0.505824089050293, "learning_rate": 0.00021830400782013682, "loss": 0.4189, "step": 3853 }, { "epoch": 0.6033187226048842, "grad_norm": 0.44036567211151123, "learning_rate": 0.0002182795698924731, "loss": 0.2503, "step": 3854 }, { "epoch": 0.6034752661239825, "grad_norm": 0.5498443245887756, "learning_rate": 0.00021825513196480938, "loss": 0.2296, "step": 3855 }, { "epoch": 0.6036318096430808, "grad_norm": 0.8247083425521851, "learning_rate": 0.00021823069403714563, "loss": 0.4274, "step": 3856 }, { "epoch": 0.603788353162179, "grad_norm": 0.6773415207862854, "learning_rate": 0.00021820625610948188, "loss": 0.2763, "step": 3857 }, { "epoch": 0.6039448966812774, "grad_norm": 0.810204803943634, "learning_rate": 0.00021818181818181816, "loss": 0.4864, "step": 3858 }, { "epoch": 0.6041014402003757, "grad_norm": 0.6132627129554749, "learning_rate": 0.0002181573802541544, "loss": 0.3392, "step": 3859 }, { "epoch": 0.604257983719474, "grad_norm": 0.5890900492668152, "learning_rate": 0.0002181329423264907, "loss": 0.3409, "step": 3860 }, { "epoch": 0.6044145272385724, "grad_norm": 1.143919587135315, "learning_rate": 0.00021810850439882697, "loss": 0.5649, "step": 3861 }, { "epoch": 0.6045710707576706, "grad_norm": 0.9753417372703552, "learning_rate": 0.00021808406647116322, "loss": 0.354, "step": 3862 }, { "epoch": 0.6047276142767689, "grad_norm": 0.9301658272743225, "learning_rate": 0.0002180596285434995, "loss": 0.2972, "step": 3863 }, { "epoch": 0.6048841577958672, "grad_norm": 0.7712864279747009, "learning_rate": 0.00021803519061583577, "loss": 0.3733, "step": 3864 }, { "epoch": 0.6050407013149656, "grad_norm": 1.1033275127410889, "learning_rate": 0.000218010752688172, "loss": 0.5233, "step": 3865 }, { "epoch": 0.6051972448340639, "grad_norm": 1.8046352863311768, "learning_rate": 0.00021798631476050828, "loss": 0.6654, "step": 3866 }, { "epoch": 0.6053537883531622, "grad_norm": 1.1659013032913208, "learning_rate": 0.00021796187683284455, "loss": 0.4882, "step": 3867 }, { "epoch": 0.6055103318722604, "grad_norm": 2.8485543727874756, "learning_rate": 0.0002179374389051808, "loss": 0.7751, "step": 3868 }, { "epoch": 0.6056668753913588, "grad_norm": 2.1419053077697754, "learning_rate": 0.00021791300097751708, "loss": 0.7052, "step": 3869 }, { "epoch": 0.6058234189104571, "grad_norm": 1.2547987699508667, "learning_rate": 0.00021788856304985336, "loss": 0.4797, "step": 3870 }, { "epoch": 0.6059799624295554, "grad_norm": 1.7469266653060913, "learning_rate": 0.00021786412512218961, "loss": 0.5729, "step": 3871 }, { "epoch": 0.6061365059486538, "grad_norm": 1.2163182497024536, "learning_rate": 0.0002178396871945259, "loss": 0.63, "step": 3872 }, { "epoch": 0.6062930494677521, "grad_norm": 1.283544898033142, "learning_rate": 0.00021781524926686217, "loss": 0.723, "step": 3873 }, { "epoch": 0.6064495929868503, "grad_norm": 1.9158495664596558, "learning_rate": 0.0002177908113391984, "loss": 0.7387, "step": 3874 }, { "epoch": 0.6066061365059486, "grad_norm": 2.14650821685791, "learning_rate": 0.00021776637341153467, "loss": 0.8682, "step": 3875 }, { "epoch": 0.606762680025047, "grad_norm": 1.1081241369247437, "learning_rate": 0.00021774193548387095, "loss": 0.5418, "step": 3876 }, { "epoch": 0.6069192235441453, "grad_norm": 1.6972672939300537, "learning_rate": 0.0002177174975562072, "loss": 0.9634, "step": 3877 }, { "epoch": 0.6070757670632436, "grad_norm": 2.2414941787719727, "learning_rate": 0.00021769305962854348, "loss": 0.9434, "step": 3878 }, { "epoch": 0.6072323105823418, "grad_norm": 1.9545202255249023, "learning_rate": 0.00021766862170087976, "loss": 0.5801, "step": 3879 }, { "epoch": 0.6073888541014402, "grad_norm": 2.0093443393707275, "learning_rate": 0.00021764418377321598, "loss": 0.9481, "step": 3880 }, { "epoch": 0.6075453976205385, "grad_norm": 1.2470927238464355, "learning_rate": 0.00021761974584555226, "loss": 0.3291, "step": 3881 }, { "epoch": 0.6077019411396368, "grad_norm": 3.9128541946411133, "learning_rate": 0.00021759530791788854, "loss": 0.4683, "step": 3882 }, { "epoch": 0.6078584846587352, "grad_norm": 1.703982949256897, "learning_rate": 0.0002175708699902248, "loss": 0.9931, "step": 3883 }, { "epoch": 0.6080150281778335, "grad_norm": 2.462613582611084, "learning_rate": 0.00021754643206256107, "loss": 1.0189, "step": 3884 }, { "epoch": 0.6081715716969317, "grad_norm": 5.5664591789245605, "learning_rate": 0.00021752199413489735, "loss": 0.7844, "step": 3885 }, { "epoch": 0.60832811521603, "grad_norm": 2.5077731609344482, "learning_rate": 0.0002174975562072336, "loss": 1.414, "step": 3886 }, { "epoch": 0.6084846587351284, "grad_norm": 2.240034341812134, "learning_rate": 0.00021747311827956988, "loss": 0.8179, "step": 3887 }, { "epoch": 0.6086412022542267, "grad_norm": 1.7230664491653442, "learning_rate": 0.00021744868035190616, "loss": 1.0263, "step": 3888 }, { "epoch": 0.608797745773325, "grad_norm": 3.0299770832061768, "learning_rate": 0.00021742424242424238, "loss": 1.0993, "step": 3889 }, { "epoch": 0.6089542892924233, "grad_norm": 4.0090413093566895, "learning_rate": 0.00021739980449657866, "loss": 1.0955, "step": 3890 }, { "epoch": 0.6091108328115216, "grad_norm": 3.151949644088745, "learning_rate": 0.00021737536656891494, "loss": 1.6831, "step": 3891 }, { "epoch": 0.6092673763306199, "grad_norm": 1.1991798877716064, "learning_rate": 0.0002173509286412512, "loss": 1.0109, "step": 3892 }, { "epoch": 0.6094239198497182, "grad_norm": 3.1792802810668945, "learning_rate": 0.00021732649071358747, "loss": 1.4323, "step": 3893 }, { "epoch": 0.6095804633688165, "grad_norm": 1.9545060396194458, "learning_rate": 0.00021730205278592374, "loss": 1.1116, "step": 3894 }, { "epoch": 0.6097370068879149, "grad_norm": 2.42977237701416, "learning_rate": 0.00021727761485826, "loss": 1.1406, "step": 3895 }, { "epoch": 0.6098935504070131, "grad_norm": 1.8895028829574585, "learning_rate": 0.00021725317693059627, "loss": 0.7086, "step": 3896 }, { "epoch": 0.6100500939261114, "grad_norm": 4.542184829711914, "learning_rate": 0.00021722873900293253, "loss": 1.2922, "step": 3897 }, { "epoch": 0.6102066374452098, "grad_norm": 1.5227937698364258, "learning_rate": 0.00021720430107526878, "loss": 0.777, "step": 3898 }, { "epoch": 0.6103631809643081, "grad_norm": 2.370554208755493, "learning_rate": 0.00021717986314760506, "loss": 0.5624, "step": 3899 }, { "epoch": 0.6105197244834064, "grad_norm": 1.582291603088379, "learning_rate": 0.00021715542521994133, "loss": 0.91, "step": 3900 }, { "epoch": 0.6106762680025047, "grad_norm": 0.5427595973014832, "learning_rate": 0.00021713098729227758, "loss": 0.4112, "step": 3901 }, { "epoch": 0.610832811521603, "grad_norm": 0.9115118980407715, "learning_rate": 0.00021710654936461386, "loss": 0.3359, "step": 3902 }, { "epoch": 0.6109893550407013, "grad_norm": 0.6298912763595581, "learning_rate": 0.00021708211143695014, "loss": 0.3834, "step": 3903 }, { "epoch": 0.6111458985597996, "grad_norm": 0.669316828250885, "learning_rate": 0.00021705767350928637, "loss": 0.5009, "step": 3904 }, { "epoch": 0.611302442078898, "grad_norm": 0.608873188495636, "learning_rate": 0.00021703323558162264, "loss": 0.2752, "step": 3905 }, { "epoch": 0.6114589855979963, "grad_norm": 0.4760167598724365, "learning_rate": 0.00021700879765395892, "loss": 0.3135, "step": 3906 }, { "epoch": 0.6116155291170946, "grad_norm": 0.5999681353569031, "learning_rate": 0.00021698435972629517, "loss": 0.4159, "step": 3907 }, { "epoch": 0.6117720726361928, "grad_norm": 0.8838676810264587, "learning_rate": 0.00021695992179863145, "loss": 0.5722, "step": 3908 }, { "epoch": 0.6119286161552911, "grad_norm": 0.8558535575866699, "learning_rate": 0.00021693548387096773, "loss": 0.5293, "step": 3909 }, { "epoch": 0.6120851596743895, "grad_norm": 0.6933575868606567, "learning_rate": 0.00021691104594330398, "loss": 0.3242, "step": 3910 }, { "epoch": 0.6122417031934878, "grad_norm": 0.6868442893028259, "learning_rate": 0.00021688660801564026, "loss": 0.288, "step": 3911 }, { "epoch": 0.6123982467125861, "grad_norm": 0.9867046475410461, "learning_rate": 0.00021686217008797654, "loss": 0.49, "step": 3912 }, { "epoch": 0.6125547902316844, "grad_norm": 0.8089808225631714, "learning_rate": 0.00021683773216031276, "loss": 0.4821, "step": 3913 }, { "epoch": 0.6127113337507827, "grad_norm": 1.2595003843307495, "learning_rate": 0.00021681329423264904, "loss": 0.5644, "step": 3914 }, { "epoch": 0.612867877269881, "grad_norm": 1.136533260345459, "learning_rate": 0.00021678885630498532, "loss": 0.3876, "step": 3915 }, { "epoch": 0.6130244207889793, "grad_norm": 0.9469622373580933, "learning_rate": 0.00021676441837732157, "loss": 0.3581, "step": 3916 }, { "epoch": 0.6131809643080777, "grad_norm": 1.390121340751648, "learning_rate": 0.00021673998044965785, "loss": 0.9731, "step": 3917 }, { "epoch": 0.613337507827176, "grad_norm": 0.9338005185127258, "learning_rate": 0.00021671554252199413, "loss": 0.5445, "step": 3918 }, { "epoch": 0.6134940513462742, "grad_norm": 1.3326369524002075, "learning_rate": 0.00021669110459433038, "loss": 0.5307, "step": 3919 }, { "epoch": 0.6136505948653725, "grad_norm": 1.399505615234375, "learning_rate": 0.00021666666666666666, "loss": 0.7355, "step": 3920 }, { "epoch": 0.6138071383844709, "grad_norm": 0.6472836136817932, "learning_rate": 0.0002166422287390029, "loss": 0.3582, "step": 3921 }, { "epoch": 0.6139636819035692, "grad_norm": 1.1896679401397705, "learning_rate": 0.00021661779081133916, "loss": 0.5519, "step": 3922 }, { "epoch": 0.6141202254226675, "grad_norm": 1.0599162578582764, "learning_rate": 0.00021659335288367544, "loss": 0.5286, "step": 3923 }, { "epoch": 0.6142767689417659, "grad_norm": 4.264036178588867, "learning_rate": 0.00021656891495601172, "loss": 0.5916, "step": 3924 }, { "epoch": 0.6144333124608641, "grad_norm": 5.218554973602295, "learning_rate": 0.00021654447702834797, "loss": 1.2145, "step": 3925 }, { "epoch": 0.6145898559799624, "grad_norm": 1.6409800052642822, "learning_rate": 0.00021652003910068425, "loss": 0.6093, "step": 3926 }, { "epoch": 0.6147463994990607, "grad_norm": 1.286957025527954, "learning_rate": 0.00021649560117302052, "loss": 0.7161, "step": 3927 }, { "epoch": 0.6149029430181591, "grad_norm": 2.0003950595855713, "learning_rate": 0.00021647116324535675, "loss": 0.7036, "step": 3928 }, { "epoch": 0.6150594865372574, "grad_norm": 1.561766505241394, "learning_rate": 0.00021644672531769303, "loss": 0.683, "step": 3929 }, { "epoch": 0.6152160300563556, "grad_norm": 1.2200226783752441, "learning_rate": 0.0002164222873900293, "loss": 0.5201, "step": 3930 }, { "epoch": 0.6153725735754539, "grad_norm": 3.1129469871520996, "learning_rate": 0.00021639784946236556, "loss": 0.7908, "step": 3931 }, { "epoch": 0.6155291170945523, "grad_norm": 2.578989028930664, "learning_rate": 0.00021637341153470183, "loss": 1.0388, "step": 3932 }, { "epoch": 0.6156856606136506, "grad_norm": 1.8467810153961182, "learning_rate": 0.0002163489736070381, "loss": 0.6982, "step": 3933 }, { "epoch": 0.6158422041327489, "grad_norm": 2.9458441734313965, "learning_rate": 0.00021632453567937436, "loss": 0.8299, "step": 3934 }, { "epoch": 0.6159987476518473, "grad_norm": 4.411853313446045, "learning_rate": 0.00021630009775171064, "loss": 0.9103, "step": 3935 }, { "epoch": 0.6161552911709455, "grad_norm": 1.8267143964767456, "learning_rate": 0.00021627565982404692, "loss": 0.9272, "step": 3936 }, { "epoch": 0.6163118346900438, "grad_norm": 5.815080165863037, "learning_rate": 0.00021625122189638314, "loss": 2.282, "step": 3937 }, { "epoch": 0.6164683782091421, "grad_norm": 3.6911022663116455, "learning_rate": 0.00021622678396871942, "loss": 1.254, "step": 3938 }, { "epoch": 0.6166249217282405, "grad_norm": 2.4619340896606445, "learning_rate": 0.0002162023460410557, "loss": 1.1136, "step": 3939 }, { "epoch": 0.6167814652473388, "grad_norm": 6.795989036560059, "learning_rate": 0.00021617790811339195, "loss": 1.067, "step": 3940 }, { "epoch": 0.6169380087664371, "grad_norm": 4.4798126220703125, "learning_rate": 0.00021615347018572823, "loss": 1.8717, "step": 3941 }, { "epoch": 0.6170945522855353, "grad_norm": 5.152458667755127, "learning_rate": 0.0002161290322580645, "loss": 1.3237, "step": 3942 }, { "epoch": 0.6172510958046337, "grad_norm": 4.771958827972412, "learning_rate": 0.00021610459433040076, "loss": 1.6574, "step": 3943 }, { "epoch": 0.617407639323732, "grad_norm": 2.8596343994140625, "learning_rate": 0.00021608015640273704, "loss": 2.0387, "step": 3944 }, { "epoch": 0.6175641828428303, "grad_norm": 2.4196653366088867, "learning_rate": 0.0002160557184750733, "loss": 1.2219, "step": 3945 }, { "epoch": 0.6177207263619287, "grad_norm": 1.575811505317688, "learning_rate": 0.00021603128054740954, "loss": 0.9666, "step": 3946 }, { "epoch": 0.617877269881027, "grad_norm": 1.7998102903366089, "learning_rate": 0.00021600684261974582, "loss": 0.6596, "step": 3947 }, { "epoch": 0.6180338134001252, "grad_norm": 2.5774621963500977, "learning_rate": 0.0002159824046920821, "loss": 0.7692, "step": 3948 }, { "epoch": 0.6181903569192235, "grad_norm": 2.7812228202819824, "learning_rate": 0.00021595796676441835, "loss": 1.069, "step": 3949 }, { "epoch": 0.6183469004383219, "grad_norm": 3.195280075073242, "learning_rate": 0.00021593352883675463, "loss": 1.0517, "step": 3950 }, { "epoch": 0.6185034439574202, "grad_norm": 0.4578677713871002, "learning_rate": 0.0002159090909090909, "loss": 0.2721, "step": 3951 }, { "epoch": 0.6186599874765185, "grad_norm": 0.7027708292007446, "learning_rate": 0.00021588465298142713, "loss": 0.3176, "step": 3952 }, { "epoch": 0.6188165309956167, "grad_norm": 0.6740389466285706, "learning_rate": 0.0002158602150537634, "loss": 0.416, "step": 3953 }, { "epoch": 0.6189730745147151, "grad_norm": 0.6556975245475769, "learning_rate": 0.0002158357771260997, "loss": 0.3664, "step": 3954 }, { "epoch": 0.6191296180338134, "grad_norm": 0.4339908957481384, "learning_rate": 0.00021581133919843594, "loss": 0.3161, "step": 3955 }, { "epoch": 0.6192861615529117, "grad_norm": 0.7102330923080444, "learning_rate": 0.00021578690127077222, "loss": 0.4315, "step": 3956 }, { "epoch": 0.61944270507201, "grad_norm": 0.7351087331771851, "learning_rate": 0.0002157624633431085, "loss": 0.3424, "step": 3957 }, { "epoch": 0.6195992485911084, "grad_norm": 0.7561622858047485, "learning_rate": 0.00021573802541544475, "loss": 0.4188, "step": 3958 }, { "epoch": 0.6197557921102066, "grad_norm": 1.3164188861846924, "learning_rate": 0.00021571358748778102, "loss": 0.4811, "step": 3959 }, { "epoch": 0.6199123356293049, "grad_norm": 0.9041202664375305, "learning_rate": 0.0002156891495601173, "loss": 0.3488, "step": 3960 }, { "epoch": 0.6200688791484033, "grad_norm": 1.003924012184143, "learning_rate": 0.00021566471163245353, "loss": 0.3205, "step": 3961 }, { "epoch": 0.6202254226675016, "grad_norm": 0.5604303479194641, "learning_rate": 0.0002156402737047898, "loss": 0.2906, "step": 3962 }, { "epoch": 0.6203819661865999, "grad_norm": 0.638725221157074, "learning_rate": 0.00021561583577712608, "loss": 0.3154, "step": 3963 }, { "epoch": 0.6205385097056982, "grad_norm": 1.5510947704315186, "learning_rate": 0.00021559139784946234, "loss": 0.5327, "step": 3964 }, { "epoch": 0.6206950532247965, "grad_norm": 1.3755923509597778, "learning_rate": 0.0002155669599217986, "loss": 0.5919, "step": 3965 }, { "epoch": 0.6208515967438948, "grad_norm": 0.9352520704269409, "learning_rate": 0.0002155425219941349, "loss": 0.2272, "step": 3966 }, { "epoch": 0.6210081402629931, "grad_norm": 1.7107229232788086, "learning_rate": 0.00021551808406647114, "loss": 0.352, "step": 3967 }, { "epoch": 0.6211646837820914, "grad_norm": 1.7082419395446777, "learning_rate": 0.0002154936461388074, "loss": 0.7203, "step": 3968 }, { "epoch": 0.6213212273011898, "grad_norm": 2.179602861404419, "learning_rate": 0.00021546920821114367, "loss": 0.8749, "step": 3969 }, { "epoch": 0.621477770820288, "grad_norm": 1.580485224723816, "learning_rate": 0.00021544477028347992, "loss": 0.6856, "step": 3970 }, { "epoch": 0.6216343143393863, "grad_norm": 1.842761754989624, "learning_rate": 0.0002154203323558162, "loss": 0.4401, "step": 3971 }, { "epoch": 0.6217908578584846, "grad_norm": 1.207093596458435, "learning_rate": 0.00021539589442815248, "loss": 0.6815, "step": 3972 }, { "epoch": 0.621947401377583, "grad_norm": 1.4717131853103638, "learning_rate": 0.00021537145650048873, "loss": 0.5138, "step": 3973 }, { "epoch": 0.6221039448966813, "grad_norm": 1.9231157302856445, "learning_rate": 0.000215347018572825, "loss": 0.6397, "step": 3974 }, { "epoch": 0.6222604884157796, "grad_norm": 1.4516040086746216, "learning_rate": 0.0002153225806451613, "loss": 0.6357, "step": 3975 }, { "epoch": 0.6224170319348779, "grad_norm": 1.3185113668441772, "learning_rate": 0.0002152981427174975, "loss": 0.497, "step": 3976 }, { "epoch": 0.6225735754539762, "grad_norm": 1.5022931098937988, "learning_rate": 0.0002152737047898338, "loss": 0.6394, "step": 3977 }, { "epoch": 0.6227301189730745, "grad_norm": 2.147803544998169, "learning_rate": 0.00021524926686217007, "loss": 0.6296, "step": 3978 }, { "epoch": 0.6228866624921728, "grad_norm": 2.3835220336914062, "learning_rate": 0.00021522482893450632, "loss": 1.1217, "step": 3979 }, { "epoch": 0.6230432060112712, "grad_norm": 2.3003957271575928, "learning_rate": 0.0002152003910068426, "loss": 0.9852, "step": 3980 }, { "epoch": 0.6231997495303695, "grad_norm": 2.8181819915771484, "learning_rate": 0.00021517595307917888, "loss": 0.9064, "step": 3981 }, { "epoch": 0.6233562930494677, "grad_norm": 1.4563791751861572, "learning_rate": 0.00021515151515151513, "loss": 0.8785, "step": 3982 }, { "epoch": 0.623512836568566, "grad_norm": 1.4717764854431152, "learning_rate": 0.0002151270772238514, "loss": 0.708, "step": 3983 }, { "epoch": 0.6236693800876644, "grad_norm": 1.8528285026550293, "learning_rate": 0.00021510263929618769, "loss": 1.0, "step": 3984 }, { "epoch": 0.6238259236067627, "grad_norm": 1.295715093612671, "learning_rate": 0.0002150782013685239, "loss": 0.6012, "step": 3985 }, { "epoch": 0.623982467125861, "grad_norm": 3.2727482318878174, "learning_rate": 0.0002150537634408602, "loss": 1.2052, "step": 3986 }, { "epoch": 0.6241390106449592, "grad_norm": 2.074563980102539, "learning_rate": 0.00021502932551319647, "loss": 1.0637, "step": 3987 }, { "epoch": 0.6242955541640576, "grad_norm": 2.350457191467285, "learning_rate": 0.00021500488758553272, "loss": 0.8989, "step": 3988 }, { "epoch": 0.6244520976831559, "grad_norm": 2.035271406173706, "learning_rate": 0.000214980449657869, "loss": 0.3382, "step": 3989 }, { "epoch": 0.6246086412022542, "grad_norm": 2.9534759521484375, "learning_rate": 0.00021495601173020527, "loss": 1.3783, "step": 3990 }, { "epoch": 0.6247651847213526, "grad_norm": 1.612892508506775, "learning_rate": 0.00021493157380254153, "loss": 1.3872, "step": 3991 }, { "epoch": 0.6249217282404509, "grad_norm": 1.9635756015777588, "learning_rate": 0.00021490713587487778, "loss": 1.1709, "step": 3992 }, { "epoch": 0.6250782717595491, "grad_norm": 3.8679616451263428, "learning_rate": 0.00021488269794721405, "loss": 1.7638, "step": 3993 }, { "epoch": 0.6252348152786474, "grad_norm": 2.8190555572509766, "learning_rate": 0.0002148582600195503, "loss": 1.1448, "step": 3994 }, { "epoch": 0.6253913587977458, "grad_norm": 1.001844882965088, "learning_rate": 0.00021483382209188658, "loss": 0.749, "step": 3995 }, { "epoch": 0.6255479023168441, "grad_norm": 1.1124273538589478, "learning_rate": 0.00021480938416422286, "loss": 0.5163, "step": 3996 }, { "epoch": 0.6257044458359424, "grad_norm": 2.1196939945220947, "learning_rate": 0.00021478494623655911, "loss": 0.2587, "step": 3997 }, { "epoch": 0.6258609893550408, "grad_norm": 3.0413784980773926, "learning_rate": 0.0002147605083088954, "loss": 0.7465, "step": 3998 }, { "epoch": 0.626017532874139, "grad_norm": 2.728447437286377, "learning_rate": 0.00021473607038123167, "loss": 0.7421, "step": 3999 }, { "epoch": 0.6261740763932373, "grad_norm": 1.2172927856445312, "learning_rate": 0.0002147116324535679, "loss": 0.5418, "step": 4000 }, { "epoch": 0.6261740763932373, "eval_loss": 0.6170632839202881, "eval_runtime": 206.2566, "eval_samples_per_second": 60.037, "eval_steps_per_second": 3.753, "eval_wer": 0.37072061542384765, "step": 4000 }, { "epoch": 0.6263306199123356, "grad_norm": 0.6230515837669373, "learning_rate": 0.00021468719452590417, "loss": 0.3052, "step": 4001 }, { "epoch": 0.626487163431434, "grad_norm": 0.36038628220558167, "learning_rate": 0.00021466275659824045, "loss": 0.253, "step": 4002 }, { "epoch": 0.6266437069505323, "grad_norm": 0.40060994029045105, "learning_rate": 0.0002146383186705767, "loss": 0.1601, "step": 4003 }, { "epoch": 0.6268002504696305, "grad_norm": 0.9217357635498047, "learning_rate": 0.00021461388074291298, "loss": 0.3302, "step": 4004 }, { "epoch": 0.6269567939887288, "grad_norm": 0.5147327780723572, "learning_rate": 0.00021458944281524926, "loss": 0.3022, "step": 4005 }, { "epoch": 0.6271133375078272, "grad_norm": 0.6115538477897644, "learning_rate": 0.0002145650048875855, "loss": 0.3489, "step": 4006 }, { "epoch": 0.6272698810269255, "grad_norm": 0.9012541174888611, "learning_rate": 0.0002145405669599218, "loss": 0.2962, "step": 4007 }, { "epoch": 0.6274264245460238, "grad_norm": 0.6350597143173218, "learning_rate": 0.00021451612903225807, "loss": 0.3945, "step": 4008 }, { "epoch": 0.6275829680651221, "grad_norm": 0.8227857947349548, "learning_rate": 0.0002144916911045943, "loss": 0.411, "step": 4009 }, { "epoch": 0.6277395115842204, "grad_norm": 0.618043839931488, "learning_rate": 0.00021446725317693057, "loss": 0.3542, "step": 4010 }, { "epoch": 0.6278960551033187, "grad_norm": 1.9552290439605713, "learning_rate": 0.00021444281524926685, "loss": 0.9303, "step": 4011 }, { "epoch": 0.628052598622417, "grad_norm": 0.5457572937011719, "learning_rate": 0.0002144183773216031, "loss": 0.3111, "step": 4012 }, { "epoch": 0.6282091421415154, "grad_norm": 1.2322858572006226, "learning_rate": 0.00021439393939393938, "loss": 0.3717, "step": 4013 }, { "epoch": 0.6283656856606137, "grad_norm": 1.5767748355865479, "learning_rate": 0.00021436950146627566, "loss": 0.4199, "step": 4014 }, { "epoch": 0.628522229179712, "grad_norm": 1.3694690465927124, "learning_rate": 0.0002143450635386119, "loss": 0.416, "step": 4015 }, { "epoch": 0.6286787726988102, "grad_norm": 0.9982991814613342, "learning_rate": 0.00021432062561094816, "loss": 0.4201, "step": 4016 }, { "epoch": 0.6288353162179086, "grad_norm": 1.6237698793411255, "learning_rate": 0.00021429618768328444, "loss": 0.677, "step": 4017 }, { "epoch": 0.6289918597370069, "grad_norm": 1.1065876483917236, "learning_rate": 0.0002142717497556207, "loss": 0.5136, "step": 4018 }, { "epoch": 0.6291484032561052, "grad_norm": 0.8187892436981201, "learning_rate": 0.00021424731182795697, "loss": 0.4697, "step": 4019 }, { "epoch": 0.6293049467752035, "grad_norm": 0.7985676527023315, "learning_rate": 0.00021422287390029325, "loss": 0.3248, "step": 4020 }, { "epoch": 0.6294614902943018, "grad_norm": 2.245656967163086, "learning_rate": 0.0002141984359726295, "loss": 0.788, "step": 4021 }, { "epoch": 0.6296180338134001, "grad_norm": 2.0072994232177734, "learning_rate": 0.00021417399804496577, "loss": 0.9463, "step": 4022 }, { "epoch": 0.6297745773324984, "grad_norm": 2.0363926887512207, "learning_rate": 0.00021414956011730205, "loss": 0.7412, "step": 4023 }, { "epoch": 0.6299311208515967, "grad_norm": 2.1571948528289795, "learning_rate": 0.00021412512218963828, "loss": 0.435, "step": 4024 }, { "epoch": 0.6300876643706951, "grad_norm": 1.8495596647262573, "learning_rate": 0.00021410068426197456, "loss": 0.7009, "step": 4025 }, { "epoch": 0.6302442078897934, "grad_norm": 1.2187695503234863, "learning_rate": 0.00021407624633431083, "loss": 0.5252, "step": 4026 }, { "epoch": 0.6304007514088916, "grad_norm": 1.3533210754394531, "learning_rate": 0.00021405180840664709, "loss": 0.586, "step": 4027 }, { "epoch": 0.63055729492799, "grad_norm": 3.1232190132141113, "learning_rate": 0.00021402737047898336, "loss": 0.9532, "step": 4028 }, { "epoch": 0.6307138384470883, "grad_norm": 1.3464336395263672, "learning_rate": 0.00021400293255131964, "loss": 0.7114, "step": 4029 }, { "epoch": 0.6308703819661866, "grad_norm": 1.8784873485565186, "learning_rate": 0.0002139784946236559, "loss": 1.0843, "step": 4030 }, { "epoch": 0.6310269254852849, "grad_norm": 1.593939185142517, "learning_rate": 0.00021395405669599217, "loss": 0.8251, "step": 4031 }, { "epoch": 0.6311834690043833, "grad_norm": 3.432223320007324, "learning_rate": 0.00021392961876832845, "loss": 0.9525, "step": 4032 }, { "epoch": 0.6313400125234815, "grad_norm": 2.074753522872925, "learning_rate": 0.00021390518084066467, "loss": 0.5878, "step": 4033 }, { "epoch": 0.6314965560425798, "grad_norm": 1.5874582529067993, "learning_rate": 0.00021388074291300095, "loss": 0.6233, "step": 4034 }, { "epoch": 0.6316530995616781, "grad_norm": 1.6352641582489014, "learning_rate": 0.00021385630498533723, "loss": 0.9886, "step": 4035 }, { "epoch": 0.6318096430807765, "grad_norm": 2.6665916442871094, "learning_rate": 0.00021383186705767348, "loss": 1.6576, "step": 4036 }, { "epoch": 0.6319661865998748, "grad_norm": 1.7960261106491089, "learning_rate": 0.00021380742913000976, "loss": 0.6991, "step": 4037 }, { "epoch": 0.632122730118973, "grad_norm": 1.923484206199646, "learning_rate": 0.00021378299120234604, "loss": 0.8864, "step": 4038 }, { "epoch": 0.6322792736380713, "grad_norm": 1.8930014371871948, "learning_rate": 0.00021375855327468226, "loss": 0.9939, "step": 4039 }, { "epoch": 0.6324358171571697, "grad_norm": 3.678338050842285, "learning_rate": 0.00021373411534701854, "loss": 1.4059, "step": 4040 }, { "epoch": 0.632592360676268, "grad_norm": 2.632375955581665, "learning_rate": 0.00021370967741935482, "loss": 1.4417, "step": 4041 }, { "epoch": 0.6327489041953663, "grad_norm": 2.788484573364258, "learning_rate": 0.00021368523949169107, "loss": 0.9454, "step": 4042 }, { "epoch": 0.6329054477144647, "grad_norm": 2.319645643234253, "learning_rate": 0.00021366080156402735, "loss": 1.4575, "step": 4043 }, { "epoch": 0.6330619912335629, "grad_norm": 3.663972854614258, "learning_rate": 0.00021363636363636363, "loss": 1.7115, "step": 4044 }, { "epoch": 0.6332185347526612, "grad_norm": 1.7628999948501587, "learning_rate": 0.00021361192570869988, "loss": 0.8049, "step": 4045 }, { "epoch": 0.6333750782717595, "grad_norm": 1.7270902395248413, "learning_rate": 0.00021358748778103616, "loss": 0.9988, "step": 4046 }, { "epoch": 0.6335316217908579, "grad_norm": 2.860292911529541, "learning_rate": 0.00021356304985337244, "loss": 0.9895, "step": 4047 }, { "epoch": 0.6336881653099562, "grad_norm": 3.5345377922058105, "learning_rate": 0.00021353861192570866, "loss": 1.4816, "step": 4048 }, { "epoch": 0.6338447088290545, "grad_norm": 3.5948832035064697, "learning_rate": 0.00021351417399804494, "loss": 1.0884, "step": 4049 }, { "epoch": 0.6340012523481527, "grad_norm": 3.8039002418518066, "learning_rate": 0.00021348973607038122, "loss": 1.7915, "step": 4050 }, { "epoch": 0.6341577958672511, "grad_norm": 0.4440103769302368, "learning_rate": 0.00021346529814271747, "loss": 0.313, "step": 4051 }, { "epoch": 0.6343143393863494, "grad_norm": 0.5781520009040833, "learning_rate": 0.00021344086021505375, "loss": 0.3249, "step": 4052 }, { "epoch": 0.6344708829054477, "grad_norm": 0.5911738276481628, "learning_rate": 0.00021341642228739002, "loss": 0.3927, "step": 4053 }, { "epoch": 0.6346274264245461, "grad_norm": 0.4731442332267761, "learning_rate": 0.00021339198435972628, "loss": 0.243, "step": 4054 }, { "epoch": 0.6347839699436444, "grad_norm": 0.5945417881011963, "learning_rate": 0.00021336754643206255, "loss": 0.3089, "step": 4055 }, { "epoch": 0.6349405134627426, "grad_norm": 0.6877188086509705, "learning_rate": 0.0002133431085043988, "loss": 0.4119, "step": 4056 }, { "epoch": 0.6350970569818409, "grad_norm": 0.788280725479126, "learning_rate": 0.00021331867057673506, "loss": 0.3965, "step": 4057 }, { "epoch": 0.6352536005009393, "grad_norm": 0.7950897216796875, "learning_rate": 0.00021329423264907133, "loss": 0.4293, "step": 4058 }, { "epoch": 0.6354101440200376, "grad_norm": 0.7185248732566833, "learning_rate": 0.0002132697947214076, "loss": 0.3425, "step": 4059 }, { "epoch": 0.6355666875391359, "grad_norm": 1.1782954931259155, "learning_rate": 0.00021324535679374386, "loss": 0.3744, "step": 4060 }, { "epoch": 0.6357232310582341, "grad_norm": 1.3076963424682617, "learning_rate": 0.00021322091886608014, "loss": 0.7161, "step": 4061 }, { "epoch": 0.6358797745773325, "grad_norm": 0.9288998246192932, "learning_rate": 0.00021319648093841642, "loss": 0.3778, "step": 4062 }, { "epoch": 0.6360363180964308, "grad_norm": 1.1923285722732544, "learning_rate": 0.00021317204301075265, "loss": 0.5499, "step": 4063 }, { "epoch": 0.6361928616155291, "grad_norm": 0.8304926156997681, "learning_rate": 0.00021314760508308892, "loss": 0.4468, "step": 4064 }, { "epoch": 0.6363494051346275, "grad_norm": 0.9042999744415283, "learning_rate": 0.0002131231671554252, "loss": 0.4214, "step": 4065 }, { "epoch": 0.6365059486537258, "grad_norm": 1.1802743673324585, "learning_rate": 0.00021309872922776145, "loss": 0.4863, "step": 4066 }, { "epoch": 0.636662492172824, "grad_norm": 0.9650530219078064, "learning_rate": 0.00021307429130009773, "loss": 0.5456, "step": 4067 }, { "epoch": 0.6368190356919223, "grad_norm": 1.8889888525009155, "learning_rate": 0.000213049853372434, "loss": 0.4523, "step": 4068 }, { "epoch": 0.6369755792110207, "grad_norm": 1.3778254985809326, "learning_rate": 0.00021302541544477026, "loss": 0.5295, "step": 4069 }, { "epoch": 0.637132122730119, "grad_norm": 1.3689180612564087, "learning_rate": 0.00021300097751710654, "loss": 0.8532, "step": 4070 }, { "epoch": 0.6372886662492173, "grad_norm": 1.6118444204330444, "learning_rate": 0.00021297653958944282, "loss": 0.5106, "step": 4071 }, { "epoch": 0.6374452097683156, "grad_norm": 2.750725507736206, "learning_rate": 0.00021295210166177904, "loss": 0.8254, "step": 4072 }, { "epoch": 0.6376017532874139, "grad_norm": 2.30176043510437, "learning_rate": 0.00021292766373411532, "loss": 0.8185, "step": 4073 }, { "epoch": 0.6377582968065122, "grad_norm": 2.448899745941162, "learning_rate": 0.0002129032258064516, "loss": 0.7568, "step": 4074 }, { "epoch": 0.6379148403256105, "grad_norm": 1.9214074611663818, "learning_rate": 0.00021287878787878785, "loss": 0.7279, "step": 4075 }, { "epoch": 0.6380713838447089, "grad_norm": 1.4484351873397827, "learning_rate": 0.00021285434995112413, "loss": 0.8629, "step": 4076 }, { "epoch": 0.6382279273638072, "grad_norm": 1.9307438135147095, "learning_rate": 0.0002128299120234604, "loss": 0.7699, "step": 4077 }, { "epoch": 0.6383844708829054, "grad_norm": 1.6077808141708374, "learning_rate": 0.00021280547409579666, "loss": 0.5525, "step": 4078 }, { "epoch": 0.6385410144020037, "grad_norm": 2.4951558113098145, "learning_rate": 0.00021278103616813294, "loss": 0.688, "step": 4079 }, { "epoch": 0.638697557921102, "grad_norm": 3.6255581378936768, "learning_rate": 0.0002127565982404692, "loss": 1.2918, "step": 4080 }, { "epoch": 0.6388541014402004, "grad_norm": 2.62573504447937, "learning_rate": 0.00021273216031280544, "loss": 0.8204, "step": 4081 }, { "epoch": 0.6390106449592987, "grad_norm": 1.5834779739379883, "learning_rate": 0.00021270772238514172, "loss": 0.5305, "step": 4082 }, { "epoch": 0.639167188478397, "grad_norm": 2.354691505432129, "learning_rate": 0.000212683284457478, "loss": 1.1743, "step": 4083 }, { "epoch": 0.6393237319974953, "grad_norm": 1.5539332628250122, "learning_rate": 0.00021265884652981425, "loss": 0.5419, "step": 4084 }, { "epoch": 0.6394802755165936, "grad_norm": 1.5284450054168701, "learning_rate": 0.00021263440860215052, "loss": 1.0443, "step": 4085 }, { "epoch": 0.6396368190356919, "grad_norm": 3.700737476348877, "learning_rate": 0.0002126099706744868, "loss": 1.6458, "step": 4086 }, { "epoch": 0.6397933625547902, "grad_norm": 2.4430134296417236, "learning_rate": 0.00021258553274682303, "loss": 0.8026, "step": 4087 }, { "epoch": 0.6399499060738886, "grad_norm": 2.9838156700134277, "learning_rate": 0.0002125610948191593, "loss": 1.4549, "step": 4088 }, { "epoch": 0.6401064495929869, "grad_norm": 1.9823230504989624, "learning_rate": 0.00021253665689149558, "loss": 1.2308, "step": 4089 }, { "epoch": 0.6402629931120851, "grad_norm": 2.805058717727661, "learning_rate": 0.00021251221896383184, "loss": 1.7271, "step": 4090 }, { "epoch": 0.6404195366311835, "grad_norm": 3.0151147842407227, "learning_rate": 0.00021248778103616811, "loss": 1.0577, "step": 4091 }, { "epoch": 0.6405760801502818, "grad_norm": 2.8304717540740967, "learning_rate": 0.0002124633431085044, "loss": 0.9225, "step": 4092 }, { "epoch": 0.6407326236693801, "grad_norm": 1.2722463607788086, "learning_rate": 0.00021243890518084064, "loss": 1.0837, "step": 4093 }, { "epoch": 0.6408891671884784, "grad_norm": 2.478703498840332, "learning_rate": 0.00021241446725317692, "loss": 1.4426, "step": 4094 }, { "epoch": 0.6410457107075767, "grad_norm": 1.236244559288025, "learning_rate": 0.0002123900293255132, "loss": 0.5695, "step": 4095 }, { "epoch": 0.641202254226675, "grad_norm": 1.9802755117416382, "learning_rate": 0.00021236559139784942, "loss": 0.6294, "step": 4096 }, { "epoch": 0.6413587977457733, "grad_norm": 2.97894549369812, "learning_rate": 0.0002123411534701857, "loss": 1.5215, "step": 4097 }, { "epoch": 0.6415153412648716, "grad_norm": 7.575010776519775, "learning_rate": 0.00021231671554252198, "loss": 1.141, "step": 4098 }, { "epoch": 0.64167188478397, "grad_norm": 2.812551975250244, "learning_rate": 0.00021229227761485823, "loss": 1.2502, "step": 4099 }, { "epoch": 0.6418284283030683, "grad_norm": 1.490632176399231, "learning_rate": 0.0002122678396871945, "loss": 1.0142, "step": 4100 }, { "epoch": 0.6419849718221665, "grad_norm": 0.6560826897621155, "learning_rate": 0.0002122434017595308, "loss": 0.479, "step": 4101 }, { "epoch": 0.6421415153412648, "grad_norm": 0.4517360329627991, "learning_rate": 0.00021221896383186704, "loss": 0.2937, "step": 4102 }, { "epoch": 0.6422980588603632, "grad_norm": 0.5508907437324524, "learning_rate": 0.00021219452590420332, "loss": 0.2687, "step": 4103 }, { "epoch": 0.6424546023794615, "grad_norm": 0.5405100584030151, "learning_rate": 0.00021217008797653957, "loss": 0.3279, "step": 4104 }, { "epoch": 0.6426111458985598, "grad_norm": 0.8774932622909546, "learning_rate": 0.00021214565004887582, "loss": 0.4322, "step": 4105 }, { "epoch": 0.6427676894176582, "grad_norm": 0.5635179281234741, "learning_rate": 0.0002121212121212121, "loss": 0.2838, "step": 4106 }, { "epoch": 0.6429242329367564, "grad_norm": 2.6404831409454346, "learning_rate": 0.00021209677419354838, "loss": 0.3582, "step": 4107 }, { "epoch": 0.6430807764558547, "grad_norm": 0.9229952692985535, "learning_rate": 0.00021207233626588463, "loss": 0.4306, "step": 4108 }, { "epoch": 0.643237319974953, "grad_norm": 1.216314435005188, "learning_rate": 0.0002120478983382209, "loss": 0.4203, "step": 4109 }, { "epoch": 0.6433938634940514, "grad_norm": 0.6581313014030457, "learning_rate": 0.00021202346041055719, "loss": 0.3948, "step": 4110 }, { "epoch": 0.6435504070131497, "grad_norm": 0.8522962331771851, "learning_rate": 0.0002119990224828934, "loss": 0.4591, "step": 4111 }, { "epoch": 0.6437069505322479, "grad_norm": 1.1675865650177002, "learning_rate": 0.0002119745845552297, "loss": 0.581, "step": 4112 }, { "epoch": 0.6438634940513462, "grad_norm": 0.7074406147003174, "learning_rate": 0.00021195014662756597, "loss": 0.3727, "step": 4113 }, { "epoch": 0.6440200375704446, "grad_norm": 1.105228066444397, "learning_rate": 0.00021192570869990222, "loss": 0.4141, "step": 4114 }, { "epoch": 0.6441765810895429, "grad_norm": 1.1589969396591187, "learning_rate": 0.0002119012707722385, "loss": 0.4892, "step": 4115 }, { "epoch": 0.6443331246086412, "grad_norm": 2.829336404800415, "learning_rate": 0.00021187683284457477, "loss": 0.861, "step": 4116 }, { "epoch": 0.6444896681277396, "grad_norm": 0.7126592993736267, "learning_rate": 0.00021185239491691103, "loss": 0.3913, "step": 4117 }, { "epoch": 0.6446462116468378, "grad_norm": 1.3239237070083618, "learning_rate": 0.0002118279569892473, "loss": 0.7084, "step": 4118 }, { "epoch": 0.6448027551659361, "grad_norm": 0.9522157311439514, "learning_rate": 0.00021180351906158358, "loss": 0.4494, "step": 4119 }, { "epoch": 0.6449592986850344, "grad_norm": 1.3508566617965698, "learning_rate": 0.0002117790811339198, "loss": 0.6281, "step": 4120 }, { "epoch": 0.6451158422041328, "grad_norm": 2.4964592456817627, "learning_rate": 0.00021175464320625608, "loss": 0.5852, "step": 4121 }, { "epoch": 0.6452723857232311, "grad_norm": 1.3371846675872803, "learning_rate": 0.00021173020527859236, "loss": 0.5809, "step": 4122 }, { "epoch": 0.6454289292423294, "grad_norm": 1.801464319229126, "learning_rate": 0.00021170576735092861, "loss": 0.5822, "step": 4123 }, { "epoch": 0.6455854727614276, "grad_norm": 1.4076706171035767, "learning_rate": 0.0002116813294232649, "loss": 0.6492, "step": 4124 }, { "epoch": 0.645742016280526, "grad_norm": 1.2247923612594604, "learning_rate": 0.00021165689149560117, "loss": 0.7113, "step": 4125 }, { "epoch": 0.6458985597996243, "grad_norm": 2.9736526012420654, "learning_rate": 0.00021163245356793742, "loss": 0.9173, "step": 4126 }, { "epoch": 0.6460551033187226, "grad_norm": 1.522207498550415, "learning_rate": 0.00021160801564027367, "loss": 0.717, "step": 4127 }, { "epoch": 0.646211646837821, "grad_norm": 2.8256654739379883, "learning_rate": 0.00021158357771260995, "loss": 0.7599, "step": 4128 }, { "epoch": 0.6463681903569192, "grad_norm": 2.5539469718933105, "learning_rate": 0.0002115591397849462, "loss": 0.9544, "step": 4129 }, { "epoch": 0.6465247338760175, "grad_norm": 1.8205986022949219, "learning_rate": 0.00021153470185728248, "loss": 0.9117, "step": 4130 }, { "epoch": 0.6466812773951158, "grad_norm": 3.9707062244415283, "learning_rate": 0.00021151026392961876, "loss": 1.1332, "step": 4131 }, { "epoch": 0.6468378209142142, "grad_norm": 1.7716293334960938, "learning_rate": 0.000211485826001955, "loss": 0.5995, "step": 4132 }, { "epoch": 0.6469943644333125, "grad_norm": 2.0093953609466553, "learning_rate": 0.0002114613880742913, "loss": 1.1797, "step": 4133 }, { "epoch": 0.6471509079524108, "grad_norm": 1.3525505065917969, "learning_rate": 0.00021143695014662757, "loss": 0.5744, "step": 4134 }, { "epoch": 0.647307451471509, "grad_norm": 0.9893065690994263, "learning_rate": 0.0002114125122189638, "loss": 0.8285, "step": 4135 }, { "epoch": 0.6474639949906074, "grad_norm": 3.0879127979278564, "learning_rate": 0.00021138807429130007, "loss": 1.8302, "step": 4136 }, { "epoch": 0.6476205385097057, "grad_norm": 1.6415681838989258, "learning_rate": 0.00021136363636363635, "loss": 0.8313, "step": 4137 }, { "epoch": 0.647777082028804, "grad_norm": 2.8045177459716797, "learning_rate": 0.0002113391984359726, "loss": 0.9433, "step": 4138 }, { "epoch": 0.6479336255479023, "grad_norm": 1.968576431274414, "learning_rate": 0.00021131476050830888, "loss": 0.7906, "step": 4139 }, { "epoch": 0.6480901690670007, "grad_norm": 1.4854947328567505, "learning_rate": 0.00021129032258064516, "loss": 1.2082, "step": 4140 }, { "epoch": 0.6482467125860989, "grad_norm": 3.7475380897521973, "learning_rate": 0.0002112658846529814, "loss": 1.9279, "step": 4141 }, { "epoch": 0.6484032561051972, "grad_norm": 1.9504555463790894, "learning_rate": 0.00021124144672531769, "loss": 1.4245, "step": 4142 }, { "epoch": 0.6485597996242956, "grad_norm": 2.5647873878479004, "learning_rate": 0.00021121700879765396, "loss": 1.1829, "step": 4143 }, { "epoch": 0.6487163431433939, "grad_norm": 1.7342371940612793, "learning_rate": 0.0002111925708699902, "loss": 1.759, "step": 4144 }, { "epoch": 0.6488728866624922, "grad_norm": 1.559617519378662, "learning_rate": 0.00021116813294232647, "loss": 1.0318, "step": 4145 }, { "epoch": 0.6490294301815904, "grad_norm": 1.5105706453323364, "learning_rate": 0.00021114369501466275, "loss": 1.1075, "step": 4146 }, { "epoch": 0.6491859737006888, "grad_norm": 1.054640769958496, "learning_rate": 0.000211119257086999, "loss": 0.7295, "step": 4147 }, { "epoch": 0.6493425172197871, "grad_norm": 1.3204190731048584, "learning_rate": 0.00021109481915933528, "loss": 0.6747, "step": 4148 }, { "epoch": 0.6494990607388854, "grad_norm": 1.6427069902420044, "learning_rate": 0.00021107038123167155, "loss": 1.0632, "step": 4149 }, { "epoch": 0.6496556042579837, "grad_norm": 2.6483640670776367, "learning_rate": 0.0002110459433040078, "loss": 1.724, "step": 4150 }, { "epoch": 0.6498121477770821, "grad_norm": 0.44321414828300476, "learning_rate": 0.00021102150537634406, "loss": 0.2836, "step": 4151 }, { "epoch": 0.6499686912961803, "grad_norm": 0.5823266506195068, "learning_rate": 0.00021099706744868033, "loss": 0.2743, "step": 4152 }, { "epoch": 0.6501252348152786, "grad_norm": 0.6924779415130615, "learning_rate": 0.00021097262952101659, "loss": 0.2912, "step": 4153 }, { "epoch": 0.650281778334377, "grad_norm": 0.7143043279647827, "learning_rate": 0.00021094819159335286, "loss": 0.4401, "step": 4154 }, { "epoch": 0.6504383218534753, "grad_norm": 1.3243781328201294, "learning_rate": 0.00021092375366568914, "loss": 0.3095, "step": 4155 }, { "epoch": 0.6505948653725736, "grad_norm": 0.8137784600257874, "learning_rate": 0.0002108993157380254, "loss": 0.3707, "step": 4156 }, { "epoch": 0.6507514088916719, "grad_norm": 0.5884976983070374, "learning_rate": 0.00021087487781036167, "loss": 0.2581, "step": 4157 }, { "epoch": 0.6509079524107702, "grad_norm": 0.9753175973892212, "learning_rate": 0.00021085043988269795, "loss": 0.4062, "step": 4158 }, { "epoch": 0.6510644959298685, "grad_norm": 0.8055758476257324, "learning_rate": 0.00021082600195503417, "loss": 0.4112, "step": 4159 }, { "epoch": 0.6512210394489668, "grad_norm": 0.7871150374412537, "learning_rate": 0.00021080156402737045, "loss": 0.5229, "step": 4160 }, { "epoch": 0.6513775829680651, "grad_norm": 0.666612982749939, "learning_rate": 0.00021077712609970673, "loss": 0.2359, "step": 4161 }, { "epoch": 0.6515341264871635, "grad_norm": 4.991723537445068, "learning_rate": 0.00021075268817204298, "loss": 1.1647, "step": 4162 }, { "epoch": 0.6516906700062617, "grad_norm": 0.6733570098876953, "learning_rate": 0.00021072825024437926, "loss": 0.2791, "step": 4163 }, { "epoch": 0.65184721352536, "grad_norm": 1.0828582048416138, "learning_rate": 0.00021070381231671554, "loss": 0.4028, "step": 4164 }, { "epoch": 0.6520037570444583, "grad_norm": 1.031664252281189, "learning_rate": 0.0002106793743890518, "loss": 0.4959, "step": 4165 }, { "epoch": 0.6521603005635567, "grad_norm": 1.7756761312484741, "learning_rate": 0.00021065493646138807, "loss": 0.5123, "step": 4166 }, { "epoch": 0.652316844082655, "grad_norm": 1.409739375114441, "learning_rate": 0.00021063049853372435, "loss": 0.5411, "step": 4167 }, { "epoch": 0.6524733876017533, "grad_norm": 0.8608100414276123, "learning_rate": 0.00021060606060606057, "loss": 0.5408, "step": 4168 }, { "epoch": 0.6526299311208515, "grad_norm": 1.9020010232925415, "learning_rate": 0.00021058162267839685, "loss": 0.5228, "step": 4169 }, { "epoch": 0.6527864746399499, "grad_norm": 1.2698709964752197, "learning_rate": 0.00021055718475073313, "loss": 0.4451, "step": 4170 }, { "epoch": 0.6529430181590482, "grad_norm": 1.3573118448257446, "learning_rate": 0.00021053274682306938, "loss": 0.5889, "step": 4171 }, { "epoch": 0.6530995616781465, "grad_norm": 1.1787749528884888, "learning_rate": 0.00021050830889540566, "loss": 0.4531, "step": 4172 }, { "epoch": 0.6532561051972449, "grad_norm": 1.463240146636963, "learning_rate": 0.00021048387096774194, "loss": 0.6581, "step": 4173 }, { "epoch": 0.6534126487163432, "grad_norm": 1.1334830522537231, "learning_rate": 0.0002104594330400782, "loss": 0.6735, "step": 4174 }, { "epoch": 0.6535691922354414, "grad_norm": 0.9221059083938599, "learning_rate": 0.00021043499511241444, "loss": 0.5811, "step": 4175 }, { "epoch": 0.6537257357545397, "grad_norm": 0.7749282717704773, "learning_rate": 0.00021041055718475072, "loss": 0.3367, "step": 4176 }, { "epoch": 0.6538822792736381, "grad_norm": 2.028270721435547, "learning_rate": 0.00021038611925708697, "loss": 0.744, "step": 4177 }, { "epoch": 0.6540388227927364, "grad_norm": 2.1838300228118896, "learning_rate": 0.00021036168132942325, "loss": 0.7978, "step": 4178 }, { "epoch": 0.6541953663118347, "grad_norm": 2.014901876449585, "learning_rate": 0.00021033724340175952, "loss": 0.7741, "step": 4179 }, { "epoch": 0.654351909830933, "grad_norm": 2.365983009338379, "learning_rate": 0.00021031280547409578, "loss": 0.8107, "step": 4180 }, { "epoch": 0.6545084533500313, "grad_norm": 2.7603774070739746, "learning_rate": 0.00021028836754643205, "loss": 1.2278, "step": 4181 }, { "epoch": 0.6546649968691296, "grad_norm": 1.7093403339385986, "learning_rate": 0.00021026392961876833, "loss": 0.8463, "step": 4182 }, { "epoch": 0.6548215403882279, "grad_norm": 1.8434052467346191, "learning_rate": 0.00021023949169110456, "loss": 0.8286, "step": 4183 }, { "epoch": 0.6549780839073263, "grad_norm": 1.2847257852554321, "learning_rate": 0.00021021505376344084, "loss": 0.7018, "step": 4184 }, { "epoch": 0.6551346274264246, "grad_norm": 1.887803077697754, "learning_rate": 0.0002101906158357771, "loss": 0.8022, "step": 4185 }, { "epoch": 0.6552911709455228, "grad_norm": 2.067678928375244, "learning_rate": 0.00021016617790811336, "loss": 0.5651, "step": 4186 }, { "epoch": 0.6554477144646211, "grad_norm": 1.9013502597808838, "learning_rate": 0.00021014173998044964, "loss": 0.9908, "step": 4187 }, { "epoch": 0.6556042579837195, "grad_norm": 2.773240566253662, "learning_rate": 0.00021011730205278592, "loss": 0.9834, "step": 4188 }, { "epoch": 0.6557608015028178, "grad_norm": 2.2640483379364014, "learning_rate": 0.00021009286412512217, "loss": 1.1264, "step": 4189 }, { "epoch": 0.6559173450219161, "grad_norm": 1.875777006149292, "learning_rate": 0.00021006842619745845, "loss": 1.1565, "step": 4190 }, { "epoch": 0.6560738885410144, "grad_norm": 4.020966529846191, "learning_rate": 0.00021004398826979473, "loss": 1.6043, "step": 4191 }, { "epoch": 0.6562304320601127, "grad_norm": 2.4041779041290283, "learning_rate": 0.00021001955034213095, "loss": 1.0613, "step": 4192 }, { "epoch": 0.656386975579211, "grad_norm": 2.9603583812713623, "learning_rate": 0.00020999511241446723, "loss": 1.2259, "step": 4193 }, { "epoch": 0.6565435190983093, "grad_norm": 3.0527658462524414, "learning_rate": 0.0002099706744868035, "loss": 0.9003, "step": 4194 }, { "epoch": 0.6567000626174077, "grad_norm": 2.1919569969177246, "learning_rate": 0.00020994623655913976, "loss": 1.3881, "step": 4195 }, { "epoch": 0.656856606136506, "grad_norm": 2.547144889831543, "learning_rate": 0.00020992179863147604, "loss": 0.6151, "step": 4196 }, { "epoch": 0.6570131496556043, "grad_norm": 1.940132975578308, "learning_rate": 0.00020989736070381232, "loss": 0.7662, "step": 4197 }, { "epoch": 0.6571696931747025, "grad_norm": 1.2711700201034546, "learning_rate": 0.00020987292277614854, "loss": 0.6982, "step": 4198 }, { "epoch": 0.6573262366938009, "grad_norm": 1.6666027307510376, "learning_rate": 0.00020984848484848482, "loss": 0.4992, "step": 4199 }, { "epoch": 0.6574827802128992, "grad_norm": 4.075043201446533, "learning_rate": 0.0002098240469208211, "loss": 1.2499, "step": 4200 }, { "epoch": 0.6576393237319975, "grad_norm": 0.5925846695899963, "learning_rate": 0.00020979960899315735, "loss": 0.2544, "step": 4201 }, { "epoch": 0.6577958672510958, "grad_norm": 0.90831458568573, "learning_rate": 0.00020977517106549363, "loss": 0.3609, "step": 4202 }, { "epoch": 0.6579524107701941, "grad_norm": 0.6981661319732666, "learning_rate": 0.0002097507331378299, "loss": 0.2574, "step": 4203 }, { "epoch": 0.6581089542892924, "grad_norm": 0.8402687311172485, "learning_rate": 0.00020972629521016616, "loss": 0.2842, "step": 4204 }, { "epoch": 0.6582654978083907, "grad_norm": 0.7427016496658325, "learning_rate": 0.00020970185728250244, "loss": 0.3959, "step": 4205 }, { "epoch": 0.658422041327489, "grad_norm": 0.7101203799247742, "learning_rate": 0.00020967741935483871, "loss": 0.4118, "step": 4206 }, { "epoch": 0.6585785848465874, "grad_norm": 0.7009015083312988, "learning_rate": 0.00020965298142717494, "loss": 0.3418, "step": 4207 }, { "epoch": 0.6587351283656857, "grad_norm": 0.8235505223274231, "learning_rate": 0.00020962854349951122, "loss": 0.3614, "step": 4208 }, { "epoch": 0.6588916718847839, "grad_norm": 0.49669182300567627, "learning_rate": 0.0002096041055718475, "loss": 0.3866, "step": 4209 }, { "epoch": 0.6590482154038823, "grad_norm": 8.812310218811035, "learning_rate": 0.00020957966764418375, "loss": 0.8613, "step": 4210 }, { "epoch": 0.6592047589229806, "grad_norm": 6.276821613311768, "learning_rate": 0.00020955522971652003, "loss": 1.7947, "step": 4211 }, { "epoch": 0.6593613024420789, "grad_norm": 2.7206177711486816, "learning_rate": 0.0002095307917888563, "loss": 0.5293, "step": 4212 }, { "epoch": 0.6595178459611772, "grad_norm": 2.726612091064453, "learning_rate": 0.00020950635386119255, "loss": 0.832, "step": 4213 }, { "epoch": 0.6596743894802756, "grad_norm": 0.86571204662323, "learning_rate": 0.00020948191593352883, "loss": 0.556, "step": 4214 }, { "epoch": 0.6598309329993738, "grad_norm": 1.1770457029342651, "learning_rate": 0.00020945747800586508, "loss": 0.5059, "step": 4215 }, { "epoch": 0.6599874765184721, "grad_norm": 1.2586264610290527, "learning_rate": 0.00020943304007820134, "loss": 0.6573, "step": 4216 }, { "epoch": 0.6601440200375704, "grad_norm": 1.2371019124984741, "learning_rate": 0.00020940860215053761, "loss": 0.471, "step": 4217 }, { "epoch": 0.6603005635566688, "grad_norm": 0.7235432863235474, "learning_rate": 0.0002093841642228739, "loss": 0.4126, "step": 4218 }, { "epoch": 0.6604571070757671, "grad_norm": 1.9548500776290894, "learning_rate": 0.00020935972629521014, "loss": 0.9866, "step": 4219 }, { "epoch": 0.6606136505948653, "grad_norm": 1.3150765895843506, "learning_rate": 0.00020933528836754642, "loss": 0.5777, "step": 4220 }, { "epoch": 0.6607701941139636, "grad_norm": 1.9192475080490112, "learning_rate": 0.0002093108504398827, "loss": 0.7398, "step": 4221 }, { "epoch": 0.660926737633062, "grad_norm": 2.385049819946289, "learning_rate": 0.00020928641251221892, "loss": 0.5958, "step": 4222 }, { "epoch": 0.6610832811521603, "grad_norm": 1.309512734413147, "learning_rate": 0.0002092619745845552, "loss": 0.6067, "step": 4223 }, { "epoch": 0.6612398246712586, "grad_norm": 1.5893750190734863, "learning_rate": 0.00020923753665689148, "loss": 0.7334, "step": 4224 }, { "epoch": 0.661396368190357, "grad_norm": 2.3424973487854004, "learning_rate": 0.00020921309872922773, "loss": 0.8073, "step": 4225 }, { "epoch": 0.6615529117094552, "grad_norm": 1.3689500093460083, "learning_rate": 0.000209188660801564, "loss": 0.6511, "step": 4226 }, { "epoch": 0.6617094552285535, "grad_norm": 1.4567039012908936, "learning_rate": 0.0002091642228739003, "loss": 0.8244, "step": 4227 }, { "epoch": 0.6618659987476518, "grad_norm": 5.747929573059082, "learning_rate": 0.00020913978494623654, "loss": 0.774, "step": 4228 }, { "epoch": 0.6620225422667502, "grad_norm": 1.2225399017333984, "learning_rate": 0.00020911534701857282, "loss": 0.6109, "step": 4229 }, { "epoch": 0.6621790857858485, "grad_norm": 1.25154709815979, "learning_rate": 0.0002090909090909091, "loss": 0.8214, "step": 4230 }, { "epoch": 0.6623356293049468, "grad_norm": 2.5937745571136475, "learning_rate": 0.00020906647116324532, "loss": 0.9792, "step": 4231 }, { "epoch": 0.662492172824045, "grad_norm": 4.251043796539307, "learning_rate": 0.0002090420332355816, "loss": 0.6997, "step": 4232 }, { "epoch": 0.6626487163431434, "grad_norm": 2.7848551273345947, "learning_rate": 0.00020901759530791788, "loss": 0.6984, "step": 4233 }, { "epoch": 0.6628052598622417, "grad_norm": 3.419072151184082, "learning_rate": 0.00020899315738025413, "loss": 0.8251, "step": 4234 }, { "epoch": 0.66296180338134, "grad_norm": 4.448485374450684, "learning_rate": 0.0002089687194525904, "loss": 0.9583, "step": 4235 }, { "epoch": 0.6631183469004384, "grad_norm": 3.649228572845459, "learning_rate": 0.00020894428152492669, "loss": 1.222, "step": 4236 }, { "epoch": 0.6632748904195366, "grad_norm": 2.3916118144989014, "learning_rate": 0.00020891984359726294, "loss": 1.2783, "step": 4237 }, { "epoch": 0.6634314339386349, "grad_norm": 2.919701337814331, "learning_rate": 0.00020889540566959922, "loss": 0.9857, "step": 4238 }, { "epoch": 0.6635879774577332, "grad_norm": 2.3915016651153564, "learning_rate": 0.00020887096774193547, "loss": 1.1059, "step": 4239 }, { "epoch": 0.6637445209768316, "grad_norm": 4.138210773468018, "learning_rate": 0.00020884652981427172, "loss": 1.2545, "step": 4240 }, { "epoch": 0.6639010644959299, "grad_norm": 4.440970420837402, "learning_rate": 0.000208822091886608, "loss": 1.5261, "step": 4241 }, { "epoch": 0.6640576080150282, "grad_norm": 3.668529748916626, "learning_rate": 0.00020879765395894427, "loss": 1.3974, "step": 4242 }, { "epoch": 0.6642141515341264, "grad_norm": 2.6934828758239746, "learning_rate": 0.00020877321603128053, "loss": 1.0031, "step": 4243 }, { "epoch": 0.6643706950532248, "grad_norm": 3.754547119140625, "learning_rate": 0.0002087487781036168, "loss": 1.5351, "step": 4244 }, { "epoch": 0.6645272385723231, "grad_norm": 2.1990721225738525, "learning_rate": 0.00020872434017595308, "loss": 1.2908, "step": 4245 }, { "epoch": 0.6646837820914214, "grad_norm": 1.8534988164901733, "learning_rate": 0.0002086999022482893, "loss": 0.7223, "step": 4246 }, { "epoch": 0.6648403256105198, "grad_norm": 1.668362021446228, "learning_rate": 0.00020867546432062559, "loss": 0.7743, "step": 4247 }, { "epoch": 0.6649968691296181, "grad_norm": 3.3219528198242188, "learning_rate": 0.00020865102639296186, "loss": 1.1719, "step": 4248 }, { "epoch": 0.6651534126487163, "grad_norm": 1.7605839967727661, "learning_rate": 0.00020862658846529811, "loss": 0.5382, "step": 4249 }, { "epoch": 0.6653099561678146, "grad_norm": 2.431262731552124, "learning_rate": 0.0002086021505376344, "loss": 1.0354, "step": 4250 }, { "epoch": 0.665466499686913, "grad_norm": 0.6644131541252136, "learning_rate": 0.00020857771260997067, "loss": 0.3289, "step": 4251 }, { "epoch": 0.6656230432060113, "grad_norm": 0.4004727303981781, "learning_rate": 0.00020855327468230692, "loss": 0.2793, "step": 4252 }, { "epoch": 0.6657795867251096, "grad_norm": 0.5396392345428467, "learning_rate": 0.0002085288367546432, "loss": 0.2703, "step": 4253 }, { "epoch": 0.6659361302442078, "grad_norm": 0.4889543056488037, "learning_rate": 0.00020850439882697948, "loss": 0.2565, "step": 4254 }, { "epoch": 0.6660926737633062, "grad_norm": 0.5937919616699219, "learning_rate": 0.0002084799608993157, "loss": 0.294, "step": 4255 }, { "epoch": 0.6662492172824045, "grad_norm": 1.0246959924697876, "learning_rate": 0.00020845552297165198, "loss": 0.4547, "step": 4256 }, { "epoch": 0.6664057608015028, "grad_norm": 0.8385992646217346, "learning_rate": 0.00020843108504398826, "loss": 0.558, "step": 4257 }, { "epoch": 0.6665623043206012, "grad_norm": 0.6174805760383606, "learning_rate": 0.0002084066471163245, "loss": 0.3394, "step": 4258 }, { "epoch": 0.6667188478396995, "grad_norm": 0.82181715965271, "learning_rate": 0.0002083822091886608, "loss": 0.2792, "step": 4259 }, { "epoch": 0.6668753913587977, "grad_norm": 0.5891256332397461, "learning_rate": 0.00020835777126099707, "loss": 0.3115, "step": 4260 }, { "epoch": 0.667031934877896, "grad_norm": 0.9402687549591064, "learning_rate": 0.00020833333333333332, "loss": 0.3959, "step": 4261 }, { "epoch": 0.6671884783969944, "grad_norm": 0.887904942035675, "learning_rate": 0.0002083088954056696, "loss": 0.3962, "step": 4262 }, { "epoch": 0.6673450219160927, "grad_norm": 1.0026453733444214, "learning_rate": 0.00020828445747800585, "loss": 0.4687, "step": 4263 }, { "epoch": 0.667501565435191, "grad_norm": 0.8495490550994873, "learning_rate": 0.0002082600195503421, "loss": 0.3504, "step": 4264 }, { "epoch": 0.6676581089542893, "grad_norm": 1.067874789237976, "learning_rate": 0.00020823558162267838, "loss": 0.4712, "step": 4265 }, { "epoch": 0.6678146524733876, "grad_norm": 1.5945181846618652, "learning_rate": 0.00020821114369501466, "loss": 0.5911, "step": 4266 }, { "epoch": 0.6679711959924859, "grad_norm": 1.3359369039535522, "learning_rate": 0.0002081867057673509, "loss": 0.5162, "step": 4267 }, { "epoch": 0.6681277395115842, "grad_norm": 1.5492264032363892, "learning_rate": 0.0002081622678396872, "loss": 0.5094, "step": 4268 }, { "epoch": 0.6682842830306825, "grad_norm": 1.4912946224212646, "learning_rate": 0.00020813782991202347, "loss": 0.5471, "step": 4269 }, { "epoch": 0.6684408265497809, "grad_norm": 0.7569031715393066, "learning_rate": 0.0002081133919843597, "loss": 0.3852, "step": 4270 }, { "epoch": 0.6685973700688791, "grad_norm": 1.4911766052246094, "learning_rate": 0.00020808895405669597, "loss": 0.5473, "step": 4271 }, { "epoch": 0.6687539135879774, "grad_norm": 1.7154449224472046, "learning_rate": 0.00020806451612903225, "loss": 0.4659, "step": 4272 }, { "epoch": 0.6689104571070758, "grad_norm": 1.293839693069458, "learning_rate": 0.0002080400782013685, "loss": 0.8146, "step": 4273 }, { "epoch": 0.6690670006261741, "grad_norm": 1.7432725429534912, "learning_rate": 0.00020801564027370478, "loss": 0.5831, "step": 4274 }, { "epoch": 0.6692235441452724, "grad_norm": 1.6460049152374268, "learning_rate": 0.00020799120234604105, "loss": 0.7064, "step": 4275 }, { "epoch": 0.6693800876643707, "grad_norm": 1.205126404762268, "learning_rate": 0.0002079667644183773, "loss": 0.6556, "step": 4276 }, { "epoch": 0.669536631183469, "grad_norm": 2.2884862422943115, "learning_rate": 0.00020794232649071358, "loss": 0.5786, "step": 4277 }, { "epoch": 0.6696931747025673, "grad_norm": 1.5183991193771362, "learning_rate": 0.00020791788856304986, "loss": 0.916, "step": 4278 }, { "epoch": 0.6698497182216656, "grad_norm": 1.8654146194458008, "learning_rate": 0.00020789345063538609, "loss": 0.6673, "step": 4279 }, { "epoch": 0.6700062617407639, "grad_norm": 1.886267066001892, "learning_rate": 0.00020786901270772236, "loss": 0.6066, "step": 4280 }, { "epoch": 0.6701628052598623, "grad_norm": 1.4289623498916626, "learning_rate": 0.00020784457478005864, "loss": 0.8144, "step": 4281 }, { "epoch": 0.6703193487789606, "grad_norm": 2.9784719944000244, "learning_rate": 0.0002078201368523949, "loss": 0.879, "step": 4282 }, { "epoch": 0.6704758922980588, "grad_norm": 3.7526068687438965, "learning_rate": 0.00020779569892473117, "loss": 1.1636, "step": 4283 }, { "epoch": 0.6706324358171571, "grad_norm": 2.4929449558258057, "learning_rate": 0.00020777126099706745, "loss": 0.9424, "step": 4284 }, { "epoch": 0.6707889793362555, "grad_norm": 3.227921962738037, "learning_rate": 0.0002077468230694037, "loss": 0.8243, "step": 4285 }, { "epoch": 0.6709455228553538, "grad_norm": 1.7560867071151733, "learning_rate": 0.00020772238514173995, "loss": 0.8561, "step": 4286 }, { "epoch": 0.6711020663744521, "grad_norm": 2.7342591285705566, "learning_rate": 0.00020769794721407623, "loss": 1.1016, "step": 4287 }, { "epoch": 0.6712586098935505, "grad_norm": 1.8622885942459106, "learning_rate": 0.00020767350928641248, "loss": 1.1822, "step": 4288 }, { "epoch": 0.6714151534126487, "grad_norm": 3.4083244800567627, "learning_rate": 0.00020764907135874876, "loss": 1.1509, "step": 4289 }, { "epoch": 0.671571696931747, "grad_norm": 2.9127471446990967, "learning_rate": 0.00020762463343108504, "loss": 0.8396, "step": 4290 }, { "epoch": 0.6717282404508453, "grad_norm": 2.4927799701690674, "learning_rate": 0.0002076001955034213, "loss": 1.2386, "step": 4291 }, { "epoch": 0.6718847839699437, "grad_norm": 2.8656435012817383, "learning_rate": 0.00020757575757575757, "loss": 1.2362, "step": 4292 }, { "epoch": 0.672041327489042, "grad_norm": 3.2088873386383057, "learning_rate": 0.00020755131964809385, "loss": 1.3128, "step": 4293 }, { "epoch": 0.6721978710081402, "grad_norm": 3.3077950477600098, "learning_rate": 0.00020752688172043007, "loss": 1.6932, "step": 4294 }, { "epoch": 0.6723544145272385, "grad_norm": 4.320693016052246, "learning_rate": 0.00020750244379276635, "loss": 1.554, "step": 4295 }, { "epoch": 0.6725109580463369, "grad_norm": 3.5774495601654053, "learning_rate": 0.00020747800586510263, "loss": 1.0525, "step": 4296 }, { "epoch": 0.6726675015654352, "grad_norm": 2.9771318435668945, "learning_rate": 0.00020745356793743888, "loss": 0.9957, "step": 4297 }, { "epoch": 0.6728240450845335, "grad_norm": 2.046910524368286, "learning_rate": 0.00020742913000977516, "loss": 0.903, "step": 4298 }, { "epoch": 0.6729805886036319, "grad_norm": 1.8642005920410156, "learning_rate": 0.00020740469208211144, "loss": 0.8665, "step": 4299 }, { "epoch": 0.6731371321227301, "grad_norm": 1.3706653118133545, "learning_rate": 0.0002073802541544477, "loss": 0.5211, "step": 4300 }, { "epoch": 0.6732936756418284, "grad_norm": 0.5348394513130188, "learning_rate": 0.00020735581622678397, "loss": 0.4051, "step": 4301 }, { "epoch": 0.6734502191609267, "grad_norm": 0.43285202980041504, "learning_rate": 0.00020733137829912024, "loss": 0.2932, "step": 4302 }, { "epoch": 0.6736067626800251, "grad_norm": 0.6725158095359802, "learning_rate": 0.00020730694037145647, "loss": 0.4606, "step": 4303 }, { "epoch": 0.6737633061991234, "grad_norm": 0.676227867603302, "learning_rate": 0.00020728250244379275, "loss": 0.3906, "step": 4304 }, { "epoch": 0.6739198497182217, "grad_norm": 0.6109701991081238, "learning_rate": 0.00020725806451612903, "loss": 0.5082, "step": 4305 }, { "epoch": 0.6740763932373199, "grad_norm": 0.6717798113822937, "learning_rate": 0.00020723362658846528, "loss": 0.3191, "step": 4306 }, { "epoch": 0.6742329367564183, "grad_norm": 0.5643997192382812, "learning_rate": 0.00020720918866080155, "loss": 0.5007, "step": 4307 }, { "epoch": 0.6743894802755166, "grad_norm": 0.6461310386657715, "learning_rate": 0.00020718475073313783, "loss": 0.3413, "step": 4308 }, { "epoch": 0.6745460237946149, "grad_norm": 0.7936927080154419, "learning_rate": 0.00020716031280547408, "loss": 0.4067, "step": 4309 }, { "epoch": 0.6747025673137133, "grad_norm": 0.7589799761772156, "learning_rate": 0.00020713587487781034, "loss": 0.4756, "step": 4310 }, { "epoch": 0.6748591108328115, "grad_norm": 0.9731733202934265, "learning_rate": 0.00020711143695014661, "loss": 0.3809, "step": 4311 }, { "epoch": 0.6750156543519098, "grad_norm": 0.774403989315033, "learning_rate": 0.00020708699902248287, "loss": 0.4063, "step": 4312 }, { "epoch": 0.6751721978710081, "grad_norm": 1.026542067527771, "learning_rate": 0.00020706256109481914, "loss": 0.3735, "step": 4313 }, { "epoch": 0.6753287413901065, "grad_norm": 1.497208595275879, "learning_rate": 0.00020703812316715542, "loss": 0.501, "step": 4314 }, { "epoch": 0.6754852849092048, "grad_norm": 1.3015997409820557, "learning_rate": 0.00020701368523949167, "loss": 0.607, "step": 4315 }, { "epoch": 0.6756418284283031, "grad_norm": 1.392624855041504, "learning_rate": 0.00020698924731182795, "loss": 0.7354, "step": 4316 }, { "epoch": 0.6757983719474013, "grad_norm": 1.2120251655578613, "learning_rate": 0.00020696480938416423, "loss": 0.3351, "step": 4317 }, { "epoch": 0.6759549154664997, "grad_norm": 1.3814586400985718, "learning_rate": 0.00020694037145650045, "loss": 0.5827, "step": 4318 }, { "epoch": 0.676111458985598, "grad_norm": 1.2726234197616577, "learning_rate": 0.00020691593352883673, "loss": 0.4963, "step": 4319 }, { "epoch": 0.6762680025046963, "grad_norm": 2.120060682296753, "learning_rate": 0.000206891495601173, "loss": 0.6605, "step": 4320 }, { "epoch": 0.6764245460237946, "grad_norm": 1.4398250579833984, "learning_rate": 0.00020686705767350926, "loss": 0.7302, "step": 4321 }, { "epoch": 0.676581089542893, "grad_norm": 1.0549432039260864, "learning_rate": 0.00020684261974584554, "loss": 0.5445, "step": 4322 }, { "epoch": 0.6767376330619912, "grad_norm": 1.3273248672485352, "learning_rate": 0.00020681818181818182, "loss": 0.6731, "step": 4323 }, { "epoch": 0.6768941765810895, "grad_norm": 1.9823793172836304, "learning_rate": 0.00020679374389051807, "loss": 1.0642, "step": 4324 }, { "epoch": 0.6770507201001879, "grad_norm": 1.5976402759552002, "learning_rate": 0.00020676930596285435, "loss": 0.897, "step": 4325 }, { "epoch": 0.6772072636192862, "grad_norm": 1.3136533498764038, "learning_rate": 0.00020674486803519063, "loss": 0.5817, "step": 4326 }, { "epoch": 0.6773638071383845, "grad_norm": 3.931715726852417, "learning_rate": 0.00020672043010752685, "loss": 0.67, "step": 4327 }, { "epoch": 0.6775203506574827, "grad_norm": 1.4402580261230469, "learning_rate": 0.00020669599217986313, "loss": 0.573, "step": 4328 }, { "epoch": 0.6776768941765811, "grad_norm": 3.5042459964752197, "learning_rate": 0.0002066715542521994, "loss": 0.6495, "step": 4329 }, { "epoch": 0.6778334376956794, "grad_norm": 1.4907385110855103, "learning_rate": 0.00020664711632453566, "loss": 0.689, "step": 4330 }, { "epoch": 0.6779899812147777, "grad_norm": 1.5746352672576904, "learning_rate": 0.00020662267839687194, "loss": 0.8336, "step": 4331 }, { "epoch": 0.678146524733876, "grad_norm": 1.0189085006713867, "learning_rate": 0.00020659824046920822, "loss": 0.8657, "step": 4332 }, { "epoch": 0.6783030682529744, "grad_norm": 2.78194522857666, "learning_rate": 0.00020657380254154447, "loss": 1.1317, "step": 4333 }, { "epoch": 0.6784596117720726, "grad_norm": 1.732728362083435, "learning_rate": 0.00020654936461388072, "loss": 0.7439, "step": 4334 }, { "epoch": 0.6786161552911709, "grad_norm": 3.1215643882751465, "learning_rate": 0.000206524926686217, "loss": 1.0976, "step": 4335 }, { "epoch": 0.6787726988102692, "grad_norm": 2.303900957107544, "learning_rate": 0.00020650048875855325, "loss": 0.7992, "step": 4336 }, { "epoch": 0.6789292423293676, "grad_norm": 2.1387338638305664, "learning_rate": 0.00020647605083088953, "loss": 1.4244, "step": 4337 }, { "epoch": 0.6790857858484659, "grad_norm": 2.9875400066375732, "learning_rate": 0.0002064516129032258, "loss": 1.5699, "step": 4338 }, { "epoch": 0.6792423293675642, "grad_norm": 1.6865546703338623, "learning_rate": 0.00020642717497556206, "loss": 0.9713, "step": 4339 }, { "epoch": 0.6793988728866625, "grad_norm": 2.4787464141845703, "learning_rate": 0.00020640273704789833, "loss": 1.4294, "step": 4340 }, { "epoch": 0.6795554164057608, "grad_norm": 2.0415139198303223, "learning_rate": 0.0002063782991202346, "loss": 0.7225, "step": 4341 }, { "epoch": 0.6797119599248591, "grad_norm": 3.089674234390259, "learning_rate": 0.00020635386119257084, "loss": 1.3035, "step": 4342 }, { "epoch": 0.6798685034439574, "grad_norm": 1.6210919618606567, "learning_rate": 0.00020632942326490711, "loss": 1.1855, "step": 4343 }, { "epoch": 0.6800250469630558, "grad_norm": 3.708653450012207, "learning_rate": 0.0002063049853372434, "loss": 1.2739, "step": 4344 }, { "epoch": 0.680181590482154, "grad_norm": 2.1804747581481934, "learning_rate": 0.00020628054740957964, "loss": 1.0516, "step": 4345 }, { "epoch": 0.6803381340012523, "grad_norm": 2.3872623443603516, "learning_rate": 0.00020625610948191592, "loss": 0.8218, "step": 4346 }, { "epoch": 0.6804946775203506, "grad_norm": 2.274266242980957, "learning_rate": 0.00020623167155425217, "loss": 0.6364, "step": 4347 }, { "epoch": 0.680651221039449, "grad_norm": 4.519956588745117, "learning_rate": 0.00020620723362658845, "loss": 0.6342, "step": 4348 }, { "epoch": 0.6808077645585473, "grad_norm": 1.9020459651947021, "learning_rate": 0.00020618279569892473, "loss": 1.1602, "step": 4349 }, { "epoch": 0.6809643080776456, "grad_norm": 2.6886696815490723, "learning_rate": 0.00020615835777126095, "loss": 1.2329, "step": 4350 }, { "epoch": 0.6811208515967438, "grad_norm": 0.6094191670417786, "learning_rate": 0.00020613391984359723, "loss": 0.2924, "step": 4351 }, { "epoch": 0.6812773951158422, "grad_norm": 0.5536388158798218, "learning_rate": 0.0002061094819159335, "loss": 0.3504, "step": 4352 }, { "epoch": 0.6814339386349405, "grad_norm": 0.5976225137710571, "learning_rate": 0.00020608504398826976, "loss": 0.3809, "step": 4353 }, { "epoch": 0.6815904821540388, "grad_norm": 0.694050133228302, "learning_rate": 0.00020606060606060604, "loss": 0.4363, "step": 4354 }, { "epoch": 0.6817470256731372, "grad_norm": 0.606235682964325, "learning_rate": 0.00020603616813294232, "loss": 0.2675, "step": 4355 }, { "epoch": 0.6819035691922355, "grad_norm": 2.016716957092285, "learning_rate": 0.00020601173020527857, "loss": 0.4803, "step": 4356 }, { "epoch": 0.6820601127113337, "grad_norm": 0.9147260785102844, "learning_rate": 0.00020598729227761482, "loss": 0.5548, "step": 4357 }, { "epoch": 0.682216656230432, "grad_norm": 0.6617628931999207, "learning_rate": 0.0002059628543499511, "loss": 0.4761, "step": 4358 }, { "epoch": 0.6823731997495304, "grad_norm": 0.9042291045188904, "learning_rate": 0.00020593841642228735, "loss": 0.3996, "step": 4359 }, { "epoch": 0.6825297432686287, "grad_norm": 1.5656142234802246, "learning_rate": 0.00020591397849462363, "loss": 0.6516, "step": 4360 }, { "epoch": 0.682686286787727, "grad_norm": 0.7753491401672363, "learning_rate": 0.0002058895405669599, "loss": 0.3016, "step": 4361 }, { "epoch": 0.6828428303068252, "grad_norm": 0.5845422148704529, "learning_rate": 0.00020586510263929616, "loss": 0.2986, "step": 4362 }, { "epoch": 0.6829993738259236, "grad_norm": 2.0732033252716064, "learning_rate": 0.00020584066471163244, "loss": 0.7511, "step": 4363 }, { "epoch": 0.6831559173450219, "grad_norm": 1.0156924724578857, "learning_rate": 0.00020581622678396872, "loss": 0.5104, "step": 4364 }, { "epoch": 0.6833124608641202, "grad_norm": 1.2725284099578857, "learning_rate": 0.00020579178885630494, "loss": 0.2841, "step": 4365 }, { "epoch": 0.6834690043832186, "grad_norm": 0.7375205159187317, "learning_rate": 0.00020576735092864122, "loss": 0.3947, "step": 4366 }, { "epoch": 0.6836255479023169, "grad_norm": 1.245803952217102, "learning_rate": 0.0002057429130009775, "loss": 0.5679, "step": 4367 }, { "epoch": 0.6837820914214151, "grad_norm": 1.113433599472046, "learning_rate": 0.00020571847507331375, "loss": 0.3995, "step": 4368 }, { "epoch": 0.6839386349405134, "grad_norm": 1.771336555480957, "learning_rate": 0.00020569403714565003, "loss": 0.5824, "step": 4369 }, { "epoch": 0.6840951784596118, "grad_norm": 0.9144507050514221, "learning_rate": 0.0002056695992179863, "loss": 0.576, "step": 4370 }, { "epoch": 0.6842517219787101, "grad_norm": 1.0565403699874878, "learning_rate": 0.00020564516129032256, "loss": 0.4776, "step": 4371 }, { "epoch": 0.6844082654978084, "grad_norm": 2.380849838256836, "learning_rate": 0.00020562072336265883, "loss": 0.9487, "step": 4372 }, { "epoch": 0.6845648090169068, "grad_norm": 1.3692196607589722, "learning_rate": 0.0002055962854349951, "loss": 0.5143, "step": 4373 }, { "epoch": 0.684721352536005, "grad_norm": 2.105653762817383, "learning_rate": 0.00020557184750733134, "loss": 0.5443, "step": 4374 }, { "epoch": 0.6848778960551033, "grad_norm": 1.329337239265442, "learning_rate": 0.00020554740957966762, "loss": 0.7253, "step": 4375 }, { "epoch": 0.6850344395742016, "grad_norm": 0.9854122400283813, "learning_rate": 0.0002055229716520039, "loss": 0.5775, "step": 4376 }, { "epoch": 0.6851909830933, "grad_norm": 1.4902323484420776, "learning_rate": 0.00020549853372434014, "loss": 0.6427, "step": 4377 }, { "epoch": 0.6853475266123983, "grad_norm": 2.347221851348877, "learning_rate": 0.00020547409579667642, "loss": 0.8651, "step": 4378 }, { "epoch": 0.6855040701314965, "grad_norm": 1.5970897674560547, "learning_rate": 0.0002054496578690127, "loss": 0.8242, "step": 4379 }, { "epoch": 0.6856606136505948, "grad_norm": 1.8963264226913452, "learning_rate": 0.00020542521994134895, "loss": 0.6639, "step": 4380 }, { "epoch": 0.6858171571696932, "grad_norm": 1.861736536026001, "learning_rate": 0.0002054007820136852, "loss": 0.882, "step": 4381 }, { "epoch": 0.6859737006887915, "grad_norm": 2.6222732067108154, "learning_rate": 0.00020537634408602148, "loss": 0.5547, "step": 4382 }, { "epoch": 0.6861302442078898, "grad_norm": 7.720530033111572, "learning_rate": 0.00020535190615835773, "loss": 0.9057, "step": 4383 }, { "epoch": 0.6862867877269881, "grad_norm": 2.147437810897827, "learning_rate": 0.000205327468230694, "loss": 1.5026, "step": 4384 }, { "epoch": 0.6864433312460864, "grad_norm": 1.9895732402801514, "learning_rate": 0.0002053030303030303, "loss": 1.0872, "step": 4385 }, { "epoch": 0.6865998747651847, "grad_norm": 3.379164218902588, "learning_rate": 0.00020527859237536654, "loss": 1.4365, "step": 4386 }, { "epoch": 0.686756418284283, "grad_norm": 3.2199456691741943, "learning_rate": 0.00020525415444770282, "loss": 0.855, "step": 4387 }, { "epoch": 0.6869129618033814, "grad_norm": 4.4102911949157715, "learning_rate": 0.0002052297165200391, "loss": 1.2534, "step": 4388 }, { "epoch": 0.6870695053224797, "grad_norm": 1.3978711366653442, "learning_rate": 0.00020520527859237532, "loss": 0.9027, "step": 4389 }, { "epoch": 0.687226048841578, "grad_norm": 2.3685648441314697, "learning_rate": 0.0002051808406647116, "loss": 0.8522, "step": 4390 }, { "epoch": 0.6873825923606762, "grad_norm": 2.080855369567871, "learning_rate": 0.00020515640273704788, "loss": 1.2799, "step": 4391 }, { "epoch": 0.6875391358797746, "grad_norm": 2.1453280448913574, "learning_rate": 0.00020513196480938413, "loss": 1.1435, "step": 4392 }, { "epoch": 0.6876956793988729, "grad_norm": 2.5470387935638428, "learning_rate": 0.0002051075268817204, "loss": 1.9692, "step": 4393 }, { "epoch": 0.6878522229179712, "grad_norm": 2.8885648250579834, "learning_rate": 0.0002050830889540567, "loss": 1.7749, "step": 4394 }, { "epoch": 0.6880087664370695, "grad_norm": 2.181378126144409, "learning_rate": 0.00020505865102639294, "loss": 1.5839, "step": 4395 }, { "epoch": 0.6881653099561679, "grad_norm": 4.625649452209473, "learning_rate": 0.00020503421309872922, "loss": 1.4834, "step": 4396 }, { "epoch": 0.6883218534752661, "grad_norm": 2.8574166297912598, "learning_rate": 0.0002050097751710655, "loss": 1.4019, "step": 4397 }, { "epoch": 0.6884783969943644, "grad_norm": 2.1024327278137207, "learning_rate": 0.00020498533724340172, "loss": 1.1237, "step": 4398 }, { "epoch": 0.6886349405134627, "grad_norm": 1.9445034265518188, "learning_rate": 0.000204960899315738, "loss": 0.9635, "step": 4399 }, { "epoch": 0.6887914840325611, "grad_norm": 2.038120746612549, "learning_rate": 0.00020493646138807428, "loss": 1.1126, "step": 4400 }, { "epoch": 0.6889480275516594, "grad_norm": 0.9593400955200195, "learning_rate": 0.00020491202346041053, "loss": 0.8911, "step": 4401 }, { "epoch": 0.6891045710707576, "grad_norm": 0.4626724421977997, "learning_rate": 0.0002048875855327468, "loss": 0.306, "step": 4402 }, { "epoch": 0.689261114589856, "grad_norm": 0.5017557144165039, "learning_rate": 0.00020486314760508308, "loss": 0.387, "step": 4403 }, { "epoch": 0.6894176581089543, "grad_norm": 0.7594949007034302, "learning_rate": 0.0002048387096774193, "loss": 0.2952, "step": 4404 }, { "epoch": 0.6895742016280526, "grad_norm": 0.5855520367622375, "learning_rate": 0.00020481427174975559, "loss": 0.3796, "step": 4405 }, { "epoch": 0.6897307451471509, "grad_norm": 0.7784630656242371, "learning_rate": 0.00020478983382209186, "loss": 0.3505, "step": 4406 }, { "epoch": 0.6898872886662493, "grad_norm": 0.7261280417442322, "learning_rate": 0.00020476539589442812, "loss": 0.3887, "step": 4407 }, { "epoch": 0.6900438321853475, "grad_norm": 0.5250700116157532, "learning_rate": 0.0002047409579667644, "loss": 0.3455, "step": 4408 }, { "epoch": 0.6902003757044458, "grad_norm": 1.4963864088058472, "learning_rate": 0.00020471652003910067, "loss": 0.4713, "step": 4409 }, { "epoch": 0.6903569192235441, "grad_norm": 0.7633784413337708, "learning_rate": 0.00020469208211143692, "loss": 0.3757, "step": 4410 }, { "epoch": 0.6905134627426425, "grad_norm": 0.6645981669425964, "learning_rate": 0.0002046676441837732, "loss": 0.3691, "step": 4411 }, { "epoch": 0.6906700062617408, "grad_norm": 0.977664589881897, "learning_rate": 0.00020464320625610948, "loss": 0.4197, "step": 4412 }, { "epoch": 0.6908265497808391, "grad_norm": 0.827433168888092, "learning_rate": 0.0002046187683284457, "loss": 0.4272, "step": 4413 }, { "epoch": 0.6909830932999373, "grad_norm": 1.0397074222564697, "learning_rate": 0.00020459433040078198, "loss": 0.546, "step": 4414 }, { "epoch": 0.6911396368190357, "grad_norm": 0.9832563400268555, "learning_rate": 0.00020456989247311826, "loss": 0.4227, "step": 4415 }, { "epoch": 0.691296180338134, "grad_norm": 1.1884939670562744, "learning_rate": 0.0002045454545454545, "loss": 0.4485, "step": 4416 }, { "epoch": 0.6914527238572323, "grad_norm": 1.7477940320968628, "learning_rate": 0.0002045210166177908, "loss": 0.5891, "step": 4417 }, { "epoch": 0.6916092673763307, "grad_norm": 1.1390234231948853, "learning_rate": 0.00020449657869012707, "loss": 0.4141, "step": 4418 }, { "epoch": 0.6917658108954289, "grad_norm": 2.4235641956329346, "learning_rate": 0.00020447214076246332, "loss": 0.5264, "step": 4419 }, { "epoch": 0.6919223544145272, "grad_norm": 2.1330161094665527, "learning_rate": 0.0002044477028347996, "loss": 0.7413, "step": 4420 }, { "epoch": 0.6920788979336255, "grad_norm": 1.950913429260254, "learning_rate": 0.00020442326490713588, "loss": 0.5941, "step": 4421 }, { "epoch": 0.6922354414527239, "grad_norm": 1.4877159595489502, "learning_rate": 0.0002043988269794721, "loss": 0.6298, "step": 4422 }, { "epoch": 0.6923919849718222, "grad_norm": 2.8081557750701904, "learning_rate": 0.00020437438905180838, "loss": 0.6594, "step": 4423 }, { "epoch": 0.6925485284909205, "grad_norm": 3.2152159214019775, "learning_rate": 0.00020434995112414466, "loss": 0.8644, "step": 4424 }, { "epoch": 0.6927050720100187, "grad_norm": 1.4229108095169067, "learning_rate": 0.0002043255131964809, "loss": 0.5714, "step": 4425 }, { "epoch": 0.6928616155291171, "grad_norm": 2.301750421524048, "learning_rate": 0.0002043010752688172, "loss": 0.7934, "step": 4426 }, { "epoch": 0.6930181590482154, "grad_norm": 1.235784888267517, "learning_rate": 0.00020427663734115347, "loss": 0.7241, "step": 4427 }, { "epoch": 0.6931747025673137, "grad_norm": 1.9009183645248413, "learning_rate": 0.0002042521994134897, "loss": 1.0616, "step": 4428 }, { "epoch": 0.6933312460864121, "grad_norm": 1.6725027561187744, "learning_rate": 0.00020422776148582597, "loss": 0.6872, "step": 4429 }, { "epoch": 0.6934877896055104, "grad_norm": 2.6750926971435547, "learning_rate": 0.00020420332355816225, "loss": 1.1431, "step": 4430 }, { "epoch": 0.6936443331246086, "grad_norm": 3.524231195449829, "learning_rate": 0.0002041788856304985, "loss": 1.205, "step": 4431 }, { "epoch": 0.6938008766437069, "grad_norm": 1.7456165552139282, "learning_rate": 0.00020415444770283478, "loss": 0.9005, "step": 4432 }, { "epoch": 0.6939574201628053, "grad_norm": 1.8678324222564697, "learning_rate": 0.00020413000977517106, "loss": 0.6964, "step": 4433 }, { "epoch": 0.6941139636819036, "grad_norm": 2.4247958660125732, "learning_rate": 0.0002041055718475073, "loss": 1.397, "step": 4434 }, { "epoch": 0.6942705072010019, "grad_norm": 3.6125667095184326, "learning_rate": 0.00020408113391984358, "loss": 0.9847, "step": 4435 }, { "epoch": 0.6944270507201001, "grad_norm": 2.9348018169403076, "learning_rate": 0.00020405669599217986, "loss": 1.3243, "step": 4436 }, { "epoch": 0.6945835942391985, "grad_norm": 1.7903168201446533, "learning_rate": 0.0002040322580645161, "loss": 1.0472, "step": 4437 }, { "epoch": 0.6947401377582968, "grad_norm": 1.7623295783996582, "learning_rate": 0.00020400782013685237, "loss": 0.8571, "step": 4438 }, { "epoch": 0.6948966812773951, "grad_norm": 1.318037986755371, "learning_rate": 0.00020398338220918864, "loss": 0.6425, "step": 4439 }, { "epoch": 0.6950532247964935, "grad_norm": 2.563493013381958, "learning_rate": 0.0002039589442815249, "loss": 1.0581, "step": 4440 }, { "epoch": 0.6952097683155918, "grad_norm": 2.8916120529174805, "learning_rate": 0.00020393450635386117, "loss": 1.1032, "step": 4441 }, { "epoch": 0.69536631183469, "grad_norm": 2.9763169288635254, "learning_rate": 0.00020391006842619745, "loss": 1.2579, "step": 4442 }, { "epoch": 0.6955228553537883, "grad_norm": 1.8792319297790527, "learning_rate": 0.0002038856304985337, "loss": 1.4755, "step": 4443 }, { "epoch": 0.6956793988728867, "grad_norm": 2.4958150386810303, "learning_rate": 0.00020386119257086998, "loss": 1.6926, "step": 4444 }, { "epoch": 0.695835942391985, "grad_norm": 3.446523904800415, "learning_rate": 0.00020383675464320623, "loss": 0.8777, "step": 4445 }, { "epoch": 0.6959924859110833, "grad_norm": 1.1154353618621826, "learning_rate": 0.00020381231671554248, "loss": 0.5229, "step": 4446 }, { "epoch": 0.6961490294301816, "grad_norm": 1.8287882804870605, "learning_rate": 0.00020378787878787876, "loss": 0.7689, "step": 4447 }, { "epoch": 0.6963055729492799, "grad_norm": 2.325324535369873, "learning_rate": 0.00020376344086021504, "loss": 0.6508, "step": 4448 }, { "epoch": 0.6964621164683782, "grad_norm": 1.61253821849823, "learning_rate": 0.0002037390029325513, "loss": 0.4842, "step": 4449 }, { "epoch": 0.6966186599874765, "grad_norm": 3.248868703842163, "learning_rate": 0.00020371456500488757, "loss": 1.2176, "step": 4450 }, { "epoch": 0.6967752035065748, "grad_norm": 0.74549800157547, "learning_rate": 0.00020369012707722385, "loss": 0.4581, "step": 4451 }, { "epoch": 0.6969317470256732, "grad_norm": 0.5620818734169006, "learning_rate": 0.00020366568914956007, "loss": 0.2706, "step": 4452 }, { "epoch": 0.6970882905447714, "grad_norm": 0.6187224984169006, "learning_rate": 0.00020364125122189635, "loss": 0.2828, "step": 4453 }, { "epoch": 0.6972448340638697, "grad_norm": 1.0300192832946777, "learning_rate": 0.00020361681329423263, "loss": 0.5353, "step": 4454 }, { "epoch": 0.697401377582968, "grad_norm": 0.5869678854942322, "learning_rate": 0.00020359237536656888, "loss": 0.3123, "step": 4455 }, { "epoch": 0.6975579211020664, "grad_norm": 0.944622814655304, "learning_rate": 0.00020356793743890516, "loss": 0.4034, "step": 4456 }, { "epoch": 0.6977144646211647, "grad_norm": 0.7487533688545227, "learning_rate": 0.00020354349951124144, "loss": 0.5087, "step": 4457 }, { "epoch": 0.697871008140263, "grad_norm": 0.7087098956108093, "learning_rate": 0.0002035190615835777, "loss": 0.2688, "step": 4458 }, { "epoch": 0.6980275516593613, "grad_norm": 1.030664324760437, "learning_rate": 0.00020349462365591397, "loss": 0.4653, "step": 4459 }, { "epoch": 0.6981840951784596, "grad_norm": 1.0674512386322021, "learning_rate": 0.00020347018572825025, "loss": 0.4403, "step": 4460 }, { "epoch": 0.6983406386975579, "grad_norm": 0.9911973476409912, "learning_rate": 0.00020344574780058647, "loss": 0.322, "step": 4461 }, { "epoch": 0.6984971822166562, "grad_norm": 2.1932692527770996, "learning_rate": 0.00020342130987292275, "loss": 0.4332, "step": 4462 }, { "epoch": 0.6986537257357546, "grad_norm": 0.7170542478561401, "learning_rate": 0.00020339687194525903, "loss": 0.3078, "step": 4463 }, { "epoch": 0.6988102692548529, "grad_norm": 1.0266473293304443, "learning_rate": 0.00020337243401759528, "loss": 0.3968, "step": 4464 }, { "epoch": 0.6989668127739511, "grad_norm": 0.8614751100540161, "learning_rate": 0.00020334799608993156, "loss": 0.2991, "step": 4465 }, { "epoch": 0.6991233562930494, "grad_norm": 0.9565255641937256, "learning_rate": 0.00020332355816226783, "loss": 0.6496, "step": 4466 }, { "epoch": 0.6992798998121478, "grad_norm": 1.1029607057571411, "learning_rate": 0.00020329912023460409, "loss": 0.569, "step": 4467 }, { "epoch": 0.6994364433312461, "grad_norm": 1.1075776815414429, "learning_rate": 0.00020327468230694036, "loss": 0.5768, "step": 4468 }, { "epoch": 0.6995929868503444, "grad_norm": 1.6944200992584229, "learning_rate": 0.00020325024437927661, "loss": 0.9883, "step": 4469 }, { "epoch": 0.6997495303694427, "grad_norm": 1.856938362121582, "learning_rate": 0.00020322580645161287, "loss": 0.6218, "step": 4470 }, { "epoch": 0.699906073888541, "grad_norm": 1.5838960409164429, "learning_rate": 0.00020320136852394914, "loss": 0.6972, "step": 4471 }, { "epoch": 0.7000626174076393, "grad_norm": 1.5133699178695679, "learning_rate": 0.00020317693059628542, "loss": 0.8786, "step": 4472 }, { "epoch": 0.7002191609267376, "grad_norm": 0.9104510545730591, "learning_rate": 0.00020315249266862167, "loss": 0.4001, "step": 4473 }, { "epoch": 0.700375704445836, "grad_norm": 1.3532549142837524, "learning_rate": 0.00020312805474095795, "loss": 0.4596, "step": 4474 }, { "epoch": 0.7005322479649343, "grad_norm": 0.9898942708969116, "learning_rate": 0.00020310361681329423, "loss": 0.6143, "step": 4475 }, { "epoch": 0.7006887914840325, "grad_norm": 1.7784004211425781, "learning_rate": 0.00020307917888563045, "loss": 0.9761, "step": 4476 }, { "epoch": 0.7008453350031308, "grad_norm": 2.9220640659332275, "learning_rate": 0.00020305474095796673, "loss": 1.0946, "step": 4477 }, { "epoch": 0.7010018785222292, "grad_norm": 1.6523720026016235, "learning_rate": 0.000203030303030303, "loss": 0.6745, "step": 4478 }, { "epoch": 0.7011584220413275, "grad_norm": 2.706207275390625, "learning_rate": 0.00020300586510263926, "loss": 0.5833, "step": 4479 }, { "epoch": 0.7013149655604258, "grad_norm": 1.879623532295227, "learning_rate": 0.00020298142717497554, "loss": 0.752, "step": 4480 }, { "epoch": 0.7014715090795242, "grad_norm": 2.1002285480499268, "learning_rate": 0.00020295698924731182, "loss": 0.7268, "step": 4481 }, { "epoch": 0.7016280525986224, "grad_norm": 2.6373350620269775, "learning_rate": 0.00020293255131964807, "loss": 0.9332, "step": 4482 }, { "epoch": 0.7017845961177207, "grad_norm": 3.0177090167999268, "learning_rate": 0.00020290811339198435, "loss": 1.4244, "step": 4483 }, { "epoch": 0.701941139636819, "grad_norm": 2.546877384185791, "learning_rate": 0.00020288367546432063, "loss": 0.6665, "step": 4484 }, { "epoch": 0.7020976831559174, "grad_norm": 1.9773567914962769, "learning_rate": 0.00020285923753665685, "loss": 1.2049, "step": 4485 }, { "epoch": 0.7022542266750157, "grad_norm": 3.617976188659668, "learning_rate": 0.00020283479960899313, "loss": 1.2953, "step": 4486 }, { "epoch": 0.7024107701941139, "grad_norm": 2.691190719604492, "learning_rate": 0.0002028103616813294, "loss": 0.722, "step": 4487 }, { "epoch": 0.7025673137132122, "grad_norm": 2.3003101348876953, "learning_rate": 0.00020278592375366566, "loss": 0.8476, "step": 4488 }, { "epoch": 0.7027238572323106, "grad_norm": 2.354837656021118, "learning_rate": 0.00020276148582600194, "loss": 0.6616, "step": 4489 }, { "epoch": 0.7028804007514089, "grad_norm": 2.914757013320923, "learning_rate": 0.00020273704789833822, "loss": 1.2246, "step": 4490 }, { "epoch": 0.7030369442705072, "grad_norm": 2.373718500137329, "learning_rate": 0.00020271260997067447, "loss": 1.2729, "step": 4491 }, { "epoch": 0.7031934877896056, "grad_norm": 2.7361257076263428, "learning_rate": 0.00020268817204301075, "loss": 1.2303, "step": 4492 }, { "epoch": 0.7033500313087038, "grad_norm": 1.035384178161621, "learning_rate": 0.000202663734115347, "loss": 0.6874, "step": 4493 }, { "epoch": 0.7035065748278021, "grad_norm": 3.0830559730529785, "learning_rate": 0.00020263929618768325, "loss": 1.0254, "step": 4494 }, { "epoch": 0.7036631183469004, "grad_norm": 5.6683478355407715, "learning_rate": 0.00020261485826001953, "loss": 0.7345, "step": 4495 }, { "epoch": 0.7038196618659988, "grad_norm": 2.4761593341827393, "learning_rate": 0.0002025904203323558, "loss": 0.9819, "step": 4496 }, { "epoch": 0.7039762053850971, "grad_norm": 4.851938724517822, "learning_rate": 0.00020256598240469206, "loss": 0.9002, "step": 4497 }, { "epoch": 0.7041327489041954, "grad_norm": 1.542232871055603, "learning_rate": 0.00020254154447702833, "loss": 0.7226, "step": 4498 }, { "epoch": 0.7042892924232936, "grad_norm": 1.4405412673950195, "learning_rate": 0.0002025171065493646, "loss": 1.0137, "step": 4499 }, { "epoch": 0.704445835942392, "grad_norm": 1.6948432922363281, "learning_rate": 0.00020249266862170084, "loss": 0.725, "step": 4500 }, { "epoch": 0.7046023794614903, "grad_norm": 0.5752367973327637, "learning_rate": 0.00020246823069403712, "loss": 0.2645, "step": 4501 }, { "epoch": 0.7047589229805886, "grad_norm": 0.5872207880020142, "learning_rate": 0.0002024437927663734, "loss": 0.3297, "step": 4502 }, { "epoch": 0.704915466499687, "grad_norm": 0.5640816688537598, "learning_rate": 0.00020241935483870965, "loss": 0.379, "step": 4503 }, { "epoch": 0.7050720100187852, "grad_norm": 0.9761353731155396, "learning_rate": 0.00020239491691104592, "loss": 0.3643, "step": 4504 }, { "epoch": 0.7052285535378835, "grad_norm": 0.6274861693382263, "learning_rate": 0.0002023704789833822, "loss": 0.3448, "step": 4505 }, { "epoch": 0.7053850970569818, "grad_norm": 0.8410617709159851, "learning_rate": 0.00020234604105571845, "loss": 0.3694, "step": 4506 }, { "epoch": 0.7055416405760802, "grad_norm": 0.7062875032424927, "learning_rate": 0.00020232160312805473, "loss": 0.3699, "step": 4507 }, { "epoch": 0.7056981840951785, "grad_norm": 0.783191442489624, "learning_rate": 0.000202297165200391, "loss": 0.478, "step": 4508 }, { "epoch": 0.7058547276142768, "grad_norm": 0.8087450265884399, "learning_rate": 0.00020227272727272723, "loss": 0.3735, "step": 4509 }, { "epoch": 0.706011271133375, "grad_norm": 0.7015910148620605, "learning_rate": 0.0002022482893450635, "loss": 0.4296, "step": 4510 }, { "epoch": 0.7061678146524734, "grad_norm": 1.2119005918502808, "learning_rate": 0.0002022238514173998, "loss": 0.4157, "step": 4511 }, { "epoch": 0.7063243581715717, "grad_norm": 0.7165002226829529, "learning_rate": 0.00020219941348973604, "loss": 0.3681, "step": 4512 }, { "epoch": 0.70648090169067, "grad_norm": 0.9335975050926208, "learning_rate": 0.00020217497556207232, "loss": 0.472, "step": 4513 }, { "epoch": 0.7066374452097683, "grad_norm": 1.0434401035308838, "learning_rate": 0.0002021505376344086, "loss": 0.6244, "step": 4514 }, { "epoch": 0.7067939887288667, "grad_norm": 1.2325775623321533, "learning_rate": 0.00020212609970674485, "loss": 0.5675, "step": 4515 }, { "epoch": 0.7069505322479649, "grad_norm": 1.6948950290679932, "learning_rate": 0.0002021016617790811, "loss": 0.6639, "step": 4516 }, { "epoch": 0.7071070757670632, "grad_norm": 1.0508677959442139, "learning_rate": 0.00020207722385141738, "loss": 0.6657, "step": 4517 }, { "epoch": 0.7072636192861615, "grad_norm": 3.625051259994507, "learning_rate": 0.00020205278592375363, "loss": 0.5776, "step": 4518 }, { "epoch": 0.7074201628052599, "grad_norm": 3.57965087890625, "learning_rate": 0.0002020283479960899, "loss": 0.7766, "step": 4519 }, { "epoch": 0.7075767063243582, "grad_norm": 3.801496744155884, "learning_rate": 0.0002020039100684262, "loss": 1.1872, "step": 4520 }, { "epoch": 0.7077332498434565, "grad_norm": 1.3635119199752808, "learning_rate": 0.00020197947214076244, "loss": 0.9458, "step": 4521 }, { "epoch": 0.7078897933625548, "grad_norm": 3.277301788330078, "learning_rate": 0.00020195503421309872, "loss": 0.8063, "step": 4522 }, { "epoch": 0.7080463368816531, "grad_norm": 2.7568883895874023, "learning_rate": 0.000201930596285435, "loss": 0.552, "step": 4523 }, { "epoch": 0.7082028804007514, "grad_norm": 1.8860586881637573, "learning_rate": 0.00020190615835777122, "loss": 0.6122, "step": 4524 }, { "epoch": 0.7083594239198497, "grad_norm": 1.4238359928131104, "learning_rate": 0.0002018817204301075, "loss": 0.7285, "step": 4525 }, { "epoch": 0.7085159674389481, "grad_norm": 0.9961578845977783, "learning_rate": 0.00020185728250244378, "loss": 0.4278, "step": 4526 }, { "epoch": 0.7086725109580463, "grad_norm": 1.8736709356307983, "learning_rate": 0.00020183284457478003, "loss": 1.2713, "step": 4527 }, { "epoch": 0.7088290544771446, "grad_norm": 1.6112703084945679, "learning_rate": 0.0002018084066471163, "loss": 0.7608, "step": 4528 }, { "epoch": 0.7089855979962429, "grad_norm": 1.8438599109649658, "learning_rate": 0.00020178396871945258, "loss": 0.6969, "step": 4529 }, { "epoch": 0.7091421415153413, "grad_norm": 2.1331958770751953, "learning_rate": 0.00020175953079178884, "loss": 0.7564, "step": 4530 }, { "epoch": 0.7092986850344396, "grad_norm": 1.4046313762664795, "learning_rate": 0.00020173509286412511, "loss": 0.5279, "step": 4531 }, { "epoch": 0.7094552285535379, "grad_norm": 2.5683090686798096, "learning_rate": 0.0002017106549364614, "loss": 1.1941, "step": 4532 }, { "epoch": 0.7096117720726361, "grad_norm": 2.2568979263305664, "learning_rate": 0.00020168621700879762, "loss": 1.0081, "step": 4533 }, { "epoch": 0.7097683155917345, "grad_norm": 2.451312780380249, "learning_rate": 0.0002016617790811339, "loss": 1.0977, "step": 4534 }, { "epoch": 0.7099248591108328, "grad_norm": 2.566115379333496, "learning_rate": 0.00020163734115347017, "loss": 0.9091, "step": 4535 }, { "epoch": 0.7100814026299311, "grad_norm": 1.651110053062439, "learning_rate": 0.00020161290322580642, "loss": 1.1188, "step": 4536 }, { "epoch": 0.7102379461490295, "grad_norm": 1.3296421766281128, "learning_rate": 0.0002015884652981427, "loss": 0.9653, "step": 4537 }, { "epoch": 0.7103944896681278, "grad_norm": 4.0526862144470215, "learning_rate": 0.00020156402737047898, "loss": 1.0826, "step": 4538 }, { "epoch": 0.710551033187226, "grad_norm": 2.9230265617370605, "learning_rate": 0.00020153958944281523, "loss": 1.4499, "step": 4539 }, { "epoch": 0.7107075767063243, "grad_norm": 3.018975257873535, "learning_rate": 0.00020151515151515148, "loss": 0.9125, "step": 4540 }, { "epoch": 0.7108641202254227, "grad_norm": 1.6214097738265991, "learning_rate": 0.00020149071358748776, "loss": 1.3237, "step": 4541 }, { "epoch": 0.711020663744521, "grad_norm": 2.642343521118164, "learning_rate": 0.000201466275659824, "loss": 1.8457, "step": 4542 }, { "epoch": 0.7111772072636193, "grad_norm": 2.0636792182922363, "learning_rate": 0.0002014418377321603, "loss": 1.279, "step": 4543 }, { "epoch": 0.7113337507827175, "grad_norm": 3.2463300228118896, "learning_rate": 0.00020141739980449657, "loss": 1.4645, "step": 4544 }, { "epoch": 0.7114902943018159, "grad_norm": 2.8252651691436768, "learning_rate": 0.00020139296187683282, "loss": 1.3413, "step": 4545 }, { "epoch": 0.7116468378209142, "grad_norm": 0.8941448330879211, "learning_rate": 0.0002013685239491691, "loss": 0.7135, "step": 4546 }, { "epoch": 0.7118033813400125, "grad_norm": 2.6736679077148438, "learning_rate": 0.00020134408602150538, "loss": 0.9046, "step": 4547 }, { "epoch": 0.7119599248591109, "grad_norm": 2.5625529289245605, "learning_rate": 0.0002013196480938416, "loss": 1.2271, "step": 4548 }, { "epoch": 0.7121164683782092, "grad_norm": 0.8302935361862183, "learning_rate": 0.00020129521016617788, "loss": 0.3858, "step": 4549 }, { "epoch": 0.7122730118973074, "grad_norm": 3.529597043991089, "learning_rate": 0.00020127077223851416, "loss": 1.4059, "step": 4550 }, { "epoch": 0.7124295554164057, "grad_norm": 1.3789228200912476, "learning_rate": 0.0002012463343108504, "loss": 0.7232, "step": 4551 }, { "epoch": 0.7125860989355041, "grad_norm": 0.34966397285461426, "learning_rate": 0.0002012218963831867, "loss": 0.2367, "step": 4552 }, { "epoch": 0.7127426424546024, "grad_norm": 0.5851748585700989, "learning_rate": 0.00020119745845552297, "loss": 0.2883, "step": 4553 }, { "epoch": 0.7128991859737007, "grad_norm": 0.4715137779712677, "learning_rate": 0.00020117302052785922, "loss": 0.3685, "step": 4554 }, { "epoch": 0.713055729492799, "grad_norm": 0.7511829733848572, "learning_rate": 0.0002011485826001955, "loss": 0.3928, "step": 4555 }, { "epoch": 0.7132122730118973, "grad_norm": 0.8073253035545349, "learning_rate": 0.00020112414467253177, "loss": 0.3393, "step": 4556 }, { "epoch": 0.7133688165309956, "grad_norm": 1.0048470497131348, "learning_rate": 0.000201099706744868, "loss": 0.3951, "step": 4557 }, { "epoch": 0.7135253600500939, "grad_norm": 0.5629849433898926, "learning_rate": 0.00020107526881720428, "loss": 0.3419, "step": 4558 }, { "epoch": 0.7136819035691923, "grad_norm": 1.2211352586746216, "learning_rate": 0.00020105083088954056, "loss": 0.3908, "step": 4559 }, { "epoch": 0.7138384470882906, "grad_norm": 0.7062564492225647, "learning_rate": 0.0002010263929618768, "loss": 0.395, "step": 4560 }, { "epoch": 0.7139949906073888, "grad_norm": 1.036588430404663, "learning_rate": 0.00020100195503421309, "loss": 0.4042, "step": 4561 }, { "epoch": 0.7141515341264871, "grad_norm": 1.1843818426132202, "learning_rate": 0.00020097751710654936, "loss": 0.7025, "step": 4562 }, { "epoch": 0.7143080776455855, "grad_norm": 1.298744559288025, "learning_rate": 0.0002009530791788856, "loss": 0.6033, "step": 4563 }, { "epoch": 0.7144646211646838, "grad_norm": 1.004136085510254, "learning_rate": 0.00020092864125122187, "loss": 0.4368, "step": 4564 }, { "epoch": 0.7146211646837821, "grad_norm": 0.6956773996353149, "learning_rate": 0.00020090420332355814, "loss": 0.2452, "step": 4565 }, { "epoch": 0.7147777082028804, "grad_norm": 1.565514087677002, "learning_rate": 0.0002008797653958944, "loss": 0.7385, "step": 4566 }, { "epoch": 0.7149342517219787, "grad_norm": 1.181185007095337, "learning_rate": 0.00020085532746823067, "loss": 0.5778, "step": 4567 }, { "epoch": 0.715090795241077, "grad_norm": 1.010599970817566, "learning_rate": 0.00020083088954056695, "loss": 0.6435, "step": 4568 }, { "epoch": 0.7152473387601753, "grad_norm": 1.560499668121338, "learning_rate": 0.0002008064516129032, "loss": 0.6996, "step": 4569 }, { "epoch": 0.7154038822792737, "grad_norm": 2.0916194915771484, "learning_rate": 0.00020078201368523948, "loss": 0.5439, "step": 4570 }, { "epoch": 0.715560425798372, "grad_norm": 2.5257811546325684, "learning_rate": 0.00020075757575757576, "loss": 0.8417, "step": 4571 }, { "epoch": 0.7157169693174703, "grad_norm": 1.5999330282211304, "learning_rate": 0.00020073313782991198, "loss": 0.7435, "step": 4572 }, { "epoch": 0.7158735128365685, "grad_norm": 1.6199560165405273, "learning_rate": 0.00020070869990224826, "loss": 0.5395, "step": 4573 }, { "epoch": 0.7160300563556669, "grad_norm": 6.264113426208496, "learning_rate": 0.00020068426197458454, "loss": 1.2339, "step": 4574 }, { "epoch": 0.7161865998747652, "grad_norm": 1.9628108739852905, "learning_rate": 0.0002006598240469208, "loss": 0.6285, "step": 4575 }, { "epoch": 0.7163431433938635, "grad_norm": 2.0946178436279297, "learning_rate": 0.00020063538611925707, "loss": 0.5511, "step": 4576 }, { "epoch": 0.7164996869129618, "grad_norm": 1.8806025981903076, "learning_rate": 0.00020061094819159335, "loss": 0.8856, "step": 4577 }, { "epoch": 0.7166562304320601, "grad_norm": 1.6844736337661743, "learning_rate": 0.0002005865102639296, "loss": 0.9052, "step": 4578 }, { "epoch": 0.7168127739511584, "grad_norm": 1.8848754167556763, "learning_rate": 0.00020056207233626588, "loss": 0.7984, "step": 4579 }, { "epoch": 0.7169693174702567, "grad_norm": 1.9630393981933594, "learning_rate": 0.00020053763440860216, "loss": 0.7998, "step": 4580 }, { "epoch": 0.717125860989355, "grad_norm": 1.6129907369613647, "learning_rate": 0.00020051319648093838, "loss": 0.6739, "step": 4581 }, { "epoch": 0.7172824045084534, "grad_norm": 1.1391911506652832, "learning_rate": 0.00020048875855327466, "loss": 0.8227, "step": 4582 }, { "epoch": 0.7174389480275517, "grad_norm": 2.1303088665008545, "learning_rate": 0.00020046432062561094, "loss": 0.717, "step": 4583 }, { "epoch": 0.7175954915466499, "grad_norm": 2.0801308155059814, "learning_rate": 0.0002004398826979472, "loss": 1.046, "step": 4584 }, { "epoch": 0.7177520350657483, "grad_norm": 3.0255331993103027, "learning_rate": 0.00020041544477028347, "loss": 1.8212, "step": 4585 }, { "epoch": 0.7179085785848466, "grad_norm": 1.748745322227478, "learning_rate": 0.00020039100684261975, "loss": 0.8383, "step": 4586 }, { "epoch": 0.7180651221039449, "grad_norm": 2.0003836154937744, "learning_rate": 0.00020036656891495597, "loss": 0.6246, "step": 4587 }, { "epoch": 0.7182216656230432, "grad_norm": 2.874161720275879, "learning_rate": 0.00020034213098729225, "loss": 0.9589, "step": 4588 }, { "epoch": 0.7183782091421416, "grad_norm": 3.0715625286102295, "learning_rate": 0.00020031769305962853, "loss": 1.201, "step": 4589 }, { "epoch": 0.7185347526612398, "grad_norm": 2.9870784282684326, "learning_rate": 0.00020029325513196478, "loss": 1.5703, "step": 4590 }, { "epoch": 0.7186912961803381, "grad_norm": 2.393979549407959, "learning_rate": 0.00020026881720430106, "loss": 0.8619, "step": 4591 }, { "epoch": 0.7188478396994364, "grad_norm": 3.002680540084839, "learning_rate": 0.00020024437927663733, "loss": 1.3913, "step": 4592 }, { "epoch": 0.7190043832185348, "grad_norm": 2.3127334117889404, "learning_rate": 0.00020021994134897359, "loss": 1.3388, "step": 4593 }, { "epoch": 0.7191609267376331, "grad_norm": 1.3608214855194092, "learning_rate": 0.00020019550342130986, "loss": 0.948, "step": 4594 }, { "epoch": 0.7193174702567313, "grad_norm": 3.9520657062530518, "learning_rate": 0.00020017106549364614, "loss": 1.2869, "step": 4595 }, { "epoch": 0.7194740137758296, "grad_norm": 2.2881007194519043, "learning_rate": 0.00020014662756598237, "loss": 1.098, "step": 4596 }, { "epoch": 0.719630557294928, "grad_norm": 1.3503066301345825, "learning_rate": 0.00020012218963831864, "loss": 0.5111, "step": 4597 }, { "epoch": 0.7197871008140263, "grad_norm": 2.778672695159912, "learning_rate": 0.00020009775171065492, "loss": 0.8472, "step": 4598 }, { "epoch": 0.7199436443331246, "grad_norm": 3.3491392135620117, "learning_rate": 0.00020007331378299117, "loss": 0.7971, "step": 4599 }, { "epoch": 0.720100187852223, "grad_norm": 2.288565158843994, "learning_rate": 0.00020004887585532745, "loss": 0.8447, "step": 4600 }, { "epoch": 0.7202567313713212, "grad_norm": 0.5646255612373352, "learning_rate": 0.00020002443792766373, "loss": 0.3159, "step": 4601 }, { "epoch": 0.7204132748904195, "grad_norm": 0.8426042199134827, "learning_rate": 0.00019999999999999998, "loss": 0.3544, "step": 4602 }, { "epoch": 0.7205698184095178, "grad_norm": 0.6321278214454651, "learning_rate": 0.00019997556207233626, "loss": 0.2904, "step": 4603 }, { "epoch": 0.7207263619286162, "grad_norm": 0.8084971904754639, "learning_rate": 0.0001999511241446725, "loss": 0.274, "step": 4604 }, { "epoch": 0.7208829054477145, "grad_norm": 0.5742786526679993, "learning_rate": 0.00019992668621700876, "loss": 0.2366, "step": 4605 }, { "epoch": 0.7210394489668128, "grad_norm": 1.111324667930603, "learning_rate": 0.00019990224828934504, "loss": 0.324, "step": 4606 }, { "epoch": 0.721195992485911, "grad_norm": 0.9967480301856995, "learning_rate": 0.00019987781036168132, "loss": 0.3936, "step": 4607 }, { "epoch": 0.7213525360050094, "grad_norm": 0.802562952041626, "learning_rate": 0.00019985337243401757, "loss": 0.3368, "step": 4608 }, { "epoch": 0.7215090795241077, "grad_norm": 0.7982587218284607, "learning_rate": 0.00019982893450635385, "loss": 0.342, "step": 4609 }, { "epoch": 0.721665623043206, "grad_norm": 0.8451456427574158, "learning_rate": 0.00019980449657869013, "loss": 0.3102, "step": 4610 }, { "epoch": 0.7218221665623044, "grad_norm": 2.590470314025879, "learning_rate": 0.00019978005865102635, "loss": 0.8298, "step": 4611 }, { "epoch": 0.7219787100814026, "grad_norm": 1.0624793767929077, "learning_rate": 0.00019975562072336263, "loss": 0.5124, "step": 4612 }, { "epoch": 0.7221352536005009, "grad_norm": 0.9378711581230164, "learning_rate": 0.0001997311827956989, "loss": 0.4405, "step": 4613 }, { "epoch": 0.7222917971195992, "grad_norm": 1.3992924690246582, "learning_rate": 0.00019970674486803516, "loss": 0.4996, "step": 4614 }, { "epoch": 0.7224483406386976, "grad_norm": 0.9326929450035095, "learning_rate": 0.00019968230694037144, "loss": 0.5062, "step": 4615 }, { "epoch": 0.7226048841577959, "grad_norm": 1.211406946182251, "learning_rate": 0.00019965786901270772, "loss": 0.5575, "step": 4616 }, { "epoch": 0.7227614276768942, "grad_norm": 0.5172481536865234, "learning_rate": 0.00019963343108504397, "loss": 0.2156, "step": 4617 }, { "epoch": 0.7229179711959924, "grad_norm": 1.6718294620513916, "learning_rate": 0.00019960899315738025, "loss": 0.8636, "step": 4618 }, { "epoch": 0.7230745147150908, "grad_norm": 1.0531017780303955, "learning_rate": 0.00019958455522971652, "loss": 0.4851, "step": 4619 }, { "epoch": 0.7232310582341891, "grad_norm": 1.2837929725646973, "learning_rate": 0.00019956011730205275, "loss": 0.4803, "step": 4620 }, { "epoch": 0.7233876017532874, "grad_norm": 1.3181400299072266, "learning_rate": 0.00019953567937438903, "loss": 0.5956, "step": 4621 }, { "epoch": 0.7235441452723858, "grad_norm": 1.248836874961853, "learning_rate": 0.0001995112414467253, "loss": 0.6616, "step": 4622 }, { "epoch": 0.7237006887914841, "grad_norm": 1.2771812677383423, "learning_rate": 0.00019948680351906156, "loss": 0.2288, "step": 4623 }, { "epoch": 0.7238572323105823, "grad_norm": 1.3296245336532593, "learning_rate": 0.00019946236559139784, "loss": 0.6624, "step": 4624 }, { "epoch": 0.7240137758296806, "grad_norm": 1.7813029289245605, "learning_rate": 0.0001994379276637341, "loss": 0.6186, "step": 4625 }, { "epoch": 0.724170319348779, "grad_norm": 1.5180063247680664, "learning_rate": 0.00019941348973607036, "loss": 0.4384, "step": 4626 }, { "epoch": 0.7243268628678773, "grad_norm": 1.4584667682647705, "learning_rate": 0.00019938905180840664, "loss": 0.6051, "step": 4627 }, { "epoch": 0.7244834063869756, "grad_norm": 2.218289852142334, "learning_rate": 0.0001993646138807429, "loss": 0.5041, "step": 4628 }, { "epoch": 0.7246399499060739, "grad_norm": 2.1602261066436768, "learning_rate": 0.00019934017595307915, "loss": 0.7868, "step": 4629 }, { "epoch": 0.7247964934251722, "grad_norm": 1.7537661790847778, "learning_rate": 0.00019931573802541542, "loss": 0.72, "step": 4630 }, { "epoch": 0.7249530369442705, "grad_norm": 3.2519748210906982, "learning_rate": 0.0001992913000977517, "loss": 1.0954, "step": 4631 }, { "epoch": 0.7251095804633688, "grad_norm": 2.7691283226013184, "learning_rate": 0.00019926686217008795, "loss": 1.229, "step": 4632 }, { "epoch": 0.7252661239824671, "grad_norm": 2.6838536262512207, "learning_rate": 0.00019924242424242423, "loss": 0.9008, "step": 4633 }, { "epoch": 0.7254226675015655, "grad_norm": 2.501833915710449, "learning_rate": 0.0001992179863147605, "loss": 1.4006, "step": 4634 }, { "epoch": 0.7255792110206637, "grad_norm": 3.2722952365875244, "learning_rate": 0.00019919354838709673, "loss": 0.8523, "step": 4635 }, { "epoch": 0.725735754539762, "grad_norm": 2.0954771041870117, "learning_rate": 0.000199169110459433, "loss": 0.9759, "step": 4636 }, { "epoch": 0.7258922980588604, "grad_norm": 3.6169707775115967, "learning_rate": 0.0001991446725317693, "loss": 0.8356, "step": 4637 }, { "epoch": 0.7260488415779587, "grad_norm": 1.3619242906570435, "learning_rate": 0.00019912023460410554, "loss": 0.3973, "step": 4638 }, { "epoch": 0.726205385097057, "grad_norm": 2.543351650238037, "learning_rate": 0.00019909579667644182, "loss": 1.3629, "step": 4639 }, { "epoch": 0.7263619286161553, "grad_norm": 2.047996759414673, "learning_rate": 0.0001990713587487781, "loss": 0.9438, "step": 4640 }, { "epoch": 0.7265184721352536, "grad_norm": 1.984495997428894, "learning_rate": 0.00019904692082111435, "loss": 1.1399, "step": 4641 }, { "epoch": 0.7266750156543519, "grad_norm": 2.7449281215667725, "learning_rate": 0.00019902248289345063, "loss": 0.7897, "step": 4642 }, { "epoch": 0.7268315591734502, "grad_norm": 2.5032405853271484, "learning_rate": 0.0001989980449657869, "loss": 0.9229, "step": 4643 }, { "epoch": 0.7269881026925485, "grad_norm": 2.4411966800689697, "learning_rate": 0.00019897360703812313, "loss": 1.2118, "step": 4644 }, { "epoch": 0.7271446462116469, "grad_norm": 2.0347883701324463, "learning_rate": 0.0001989491691104594, "loss": 0.4972, "step": 4645 }, { "epoch": 0.7273011897307452, "grad_norm": 2.888296604156494, "learning_rate": 0.0001989247311827957, "loss": 1.3068, "step": 4646 }, { "epoch": 0.7274577332498434, "grad_norm": 1.6684255599975586, "learning_rate": 0.00019890029325513194, "loss": 0.9385, "step": 4647 }, { "epoch": 0.7276142767689417, "grad_norm": 3.1219706535339355, "learning_rate": 0.00019887585532746822, "loss": 1.1736, "step": 4648 }, { "epoch": 0.7277708202880401, "grad_norm": 2.7629895210266113, "learning_rate": 0.0001988514173998045, "loss": 0.7565, "step": 4649 }, { "epoch": 0.7279273638071384, "grad_norm": 2.8487377166748047, "learning_rate": 0.00019882697947214075, "loss": 1.5535, "step": 4650 }, { "epoch": 0.7280839073262367, "grad_norm": 0.5729645490646362, "learning_rate": 0.00019880254154447703, "loss": 0.3549, "step": 4651 }, { "epoch": 0.728240450845335, "grad_norm": 0.7285500764846802, "learning_rate": 0.00019877810361681328, "loss": 0.2756, "step": 4652 }, { "epoch": 0.7283969943644333, "grad_norm": 0.9222913384437561, "learning_rate": 0.00019875366568914953, "loss": 0.3588, "step": 4653 }, { "epoch": 0.7285535378835316, "grad_norm": 0.7123448848724365, "learning_rate": 0.0001987292277614858, "loss": 0.3541, "step": 4654 }, { "epoch": 0.7287100814026299, "grad_norm": 0.6087605357170105, "learning_rate": 0.00019870478983382208, "loss": 0.339, "step": 4655 }, { "epoch": 0.7288666249217283, "grad_norm": 0.6646626591682434, "learning_rate": 0.00019868035190615834, "loss": 0.5141, "step": 4656 }, { "epoch": 0.7290231684408266, "grad_norm": 0.6121235489845276, "learning_rate": 0.00019865591397849461, "loss": 0.3261, "step": 4657 }, { "epoch": 0.7291797119599248, "grad_norm": 0.7372736930847168, "learning_rate": 0.0001986314760508309, "loss": 0.3063, "step": 4658 }, { "epoch": 0.7293362554790231, "grad_norm": 2.8387651443481445, "learning_rate": 0.00019860703812316712, "loss": 0.4006, "step": 4659 }, { "epoch": 0.7294927989981215, "grad_norm": 0.7166696190834045, "learning_rate": 0.0001985826001955034, "loss": 0.3821, "step": 4660 }, { "epoch": 0.7296493425172198, "grad_norm": 0.9727258682250977, "learning_rate": 0.00019855816226783967, "loss": 0.3255, "step": 4661 }, { "epoch": 0.7298058860363181, "grad_norm": 0.5781895518302917, "learning_rate": 0.00019853372434017592, "loss": 0.3748, "step": 4662 }, { "epoch": 0.7299624295554165, "grad_norm": 0.8979054093360901, "learning_rate": 0.0001985092864125122, "loss": 0.3613, "step": 4663 }, { "epoch": 0.7301189730745147, "grad_norm": 1.2119108438491821, "learning_rate": 0.00019848484848484848, "loss": 0.5699, "step": 4664 }, { "epoch": 0.730275516593613, "grad_norm": 1.14756441116333, "learning_rate": 0.00019846041055718473, "loss": 0.4611, "step": 4665 }, { "epoch": 0.7304320601127113, "grad_norm": 0.9472888708114624, "learning_rate": 0.000198435972629521, "loss": 0.3666, "step": 4666 }, { "epoch": 0.7305886036318097, "grad_norm": 4.8855695724487305, "learning_rate": 0.0001984115347018573, "loss": 2.9419, "step": 4667 }, { "epoch": 0.730745147150908, "grad_norm": 1.7084994316101074, "learning_rate": 0.0001983870967741935, "loss": 0.7695, "step": 4668 }, { "epoch": 0.7309016906700062, "grad_norm": 1.1830832958221436, "learning_rate": 0.0001983626588465298, "loss": 0.5826, "step": 4669 }, { "epoch": 0.7310582341891045, "grad_norm": 1.2845669984817505, "learning_rate": 0.00019833822091886607, "loss": 0.6607, "step": 4670 }, { "epoch": 0.7312147777082029, "grad_norm": 1.136726975440979, "learning_rate": 0.00019831378299120232, "loss": 0.7845, "step": 4671 }, { "epoch": 0.7313713212273012, "grad_norm": 1.3713886737823486, "learning_rate": 0.0001982893450635386, "loss": 0.5098, "step": 4672 }, { "epoch": 0.7315278647463995, "grad_norm": 1.7736725807189941, "learning_rate": 0.00019826490713587488, "loss": 0.6415, "step": 4673 }, { "epoch": 0.7316844082654979, "grad_norm": 1.0796916484832764, "learning_rate": 0.00019824046920821113, "loss": 0.5017, "step": 4674 }, { "epoch": 0.7318409517845961, "grad_norm": 1.7601935863494873, "learning_rate": 0.00019821603128054738, "loss": 0.7534, "step": 4675 }, { "epoch": 0.7319974953036944, "grad_norm": 1.5599554777145386, "learning_rate": 0.00019819159335288366, "loss": 0.7588, "step": 4676 }, { "epoch": 0.7321540388227927, "grad_norm": 3.366999626159668, "learning_rate": 0.0001981671554252199, "loss": 0.9686, "step": 4677 }, { "epoch": 0.7323105823418911, "grad_norm": 2.5471243858337402, "learning_rate": 0.0001981427174975562, "loss": 0.8723, "step": 4678 }, { "epoch": 0.7324671258609894, "grad_norm": 1.7258960008621216, "learning_rate": 0.00019811827956989247, "loss": 0.6776, "step": 4679 }, { "epoch": 0.7326236693800877, "grad_norm": 2.3678998947143555, "learning_rate": 0.00019809384164222872, "loss": 0.821, "step": 4680 }, { "epoch": 0.7327802128991859, "grad_norm": 2.2252848148345947, "learning_rate": 0.000198069403714565, "loss": 1.0205, "step": 4681 }, { "epoch": 0.7329367564182843, "grad_norm": 1.9169201850891113, "learning_rate": 0.00019804496578690127, "loss": 0.8085, "step": 4682 }, { "epoch": 0.7330932999373826, "grad_norm": 2.3197524547576904, "learning_rate": 0.0001980205278592375, "loss": 0.6308, "step": 4683 }, { "epoch": 0.7332498434564809, "grad_norm": 2.232663631439209, "learning_rate": 0.00019799608993157378, "loss": 0.9189, "step": 4684 }, { "epoch": 0.7334063869755792, "grad_norm": 3.014984130859375, "learning_rate": 0.00019797165200391006, "loss": 0.8978, "step": 4685 }, { "epoch": 0.7335629304946775, "grad_norm": 2.799757480621338, "learning_rate": 0.0001979472140762463, "loss": 0.7204, "step": 4686 }, { "epoch": 0.7337194740137758, "grad_norm": 4.892414569854736, "learning_rate": 0.00019792277614858259, "loss": 0.8103, "step": 4687 }, { "epoch": 0.7338760175328741, "grad_norm": 2.7434020042419434, "learning_rate": 0.00019789833822091886, "loss": 1.2881, "step": 4688 }, { "epoch": 0.7340325610519725, "grad_norm": 2.714355945587158, "learning_rate": 0.00019787390029325511, "loss": 1.6406, "step": 4689 }, { "epoch": 0.7341891045710708, "grad_norm": 2.0118868350982666, "learning_rate": 0.0001978494623655914, "loss": 1.2144, "step": 4690 }, { "epoch": 0.7343456480901691, "grad_norm": 1.1020734310150146, "learning_rate": 0.00019782502443792767, "loss": 0.742, "step": 4691 }, { "epoch": 0.7345021916092673, "grad_norm": 4.144432544708252, "learning_rate": 0.0001978005865102639, "loss": 1.6325, "step": 4692 }, { "epoch": 0.7346587351283657, "grad_norm": 1.8533861637115479, "learning_rate": 0.00019777614858260017, "loss": 0.9883, "step": 4693 }, { "epoch": 0.734815278647464, "grad_norm": 2.328321695327759, "learning_rate": 0.00019775171065493645, "loss": 1.4588, "step": 4694 }, { "epoch": 0.7349718221665623, "grad_norm": 1.688111424446106, "learning_rate": 0.0001977272727272727, "loss": 0.7339, "step": 4695 }, { "epoch": 0.7351283656856606, "grad_norm": 1.9149725437164307, "learning_rate": 0.00019770283479960898, "loss": 0.7413, "step": 4696 }, { "epoch": 0.735284909204759, "grad_norm": 1.6234050989151, "learning_rate": 0.00019767839687194526, "loss": 0.5104, "step": 4697 }, { "epoch": 0.7354414527238572, "grad_norm": 2.461130142211914, "learning_rate": 0.0001976539589442815, "loss": 1.3501, "step": 4698 }, { "epoch": 0.7355979962429555, "grad_norm": 1.6503095626831055, "learning_rate": 0.00019762952101661776, "loss": 0.9382, "step": 4699 }, { "epoch": 0.7357545397620538, "grad_norm": 3.062802314758301, "learning_rate": 0.00019760508308895404, "loss": 1.4864, "step": 4700 }, { "epoch": 0.7359110832811522, "grad_norm": 0.5734681487083435, "learning_rate": 0.0001975806451612903, "loss": 0.2212, "step": 4701 }, { "epoch": 0.7360676268002505, "grad_norm": 0.5626726150512695, "learning_rate": 0.00019755620723362657, "loss": 0.2945, "step": 4702 }, { "epoch": 0.7362241703193487, "grad_norm": 0.5201683044433594, "learning_rate": 0.00019753176930596285, "loss": 0.2841, "step": 4703 }, { "epoch": 0.736380713838447, "grad_norm": 0.7159972190856934, "learning_rate": 0.0001975073313782991, "loss": 0.5189, "step": 4704 }, { "epoch": 0.7365372573575454, "grad_norm": 0.732814610004425, "learning_rate": 0.00019748289345063538, "loss": 0.3791, "step": 4705 }, { "epoch": 0.7366938008766437, "grad_norm": 0.7638669610023499, "learning_rate": 0.00019745845552297166, "loss": 0.4011, "step": 4706 }, { "epoch": 0.736850344395742, "grad_norm": 0.7000142931938171, "learning_rate": 0.00019743401759530788, "loss": 0.3322, "step": 4707 }, { "epoch": 0.7370068879148404, "grad_norm": 0.7838113307952881, "learning_rate": 0.00019740957966764416, "loss": 0.2646, "step": 4708 }, { "epoch": 0.7371634314339386, "grad_norm": 0.7795481085777283, "learning_rate": 0.00019738514173998044, "loss": 0.4024, "step": 4709 }, { "epoch": 0.7373199749530369, "grad_norm": 0.6289814114570618, "learning_rate": 0.0001973607038123167, "loss": 0.381, "step": 4710 }, { "epoch": 0.7374765184721352, "grad_norm": 1.3623909950256348, "learning_rate": 0.00019733626588465297, "loss": 0.4522, "step": 4711 }, { "epoch": 0.7376330619912336, "grad_norm": 1.1188793182373047, "learning_rate": 0.00019731182795698925, "loss": 0.4841, "step": 4712 }, { "epoch": 0.7377896055103319, "grad_norm": 1.1815077066421509, "learning_rate": 0.0001972873900293255, "loss": 0.8086, "step": 4713 }, { "epoch": 0.7379461490294302, "grad_norm": 1.2694467306137085, "learning_rate": 0.00019726295210166178, "loss": 0.6665, "step": 4714 }, { "epoch": 0.7381026925485284, "grad_norm": 1.3122878074645996, "learning_rate": 0.00019723851417399805, "loss": 0.5785, "step": 4715 }, { "epoch": 0.7382592360676268, "grad_norm": 1.089698314666748, "learning_rate": 0.00019721407624633428, "loss": 0.4062, "step": 4716 }, { "epoch": 0.7384157795867251, "grad_norm": 0.9487230181694031, "learning_rate": 0.00019718963831867056, "loss": 0.455, "step": 4717 }, { "epoch": 0.7385723231058234, "grad_norm": 1.9743801355361938, "learning_rate": 0.00019716520039100683, "loss": 0.6051, "step": 4718 }, { "epoch": 0.7387288666249218, "grad_norm": 1.2036941051483154, "learning_rate": 0.00019714076246334309, "loss": 0.5583, "step": 4719 }, { "epoch": 0.73888541014402, "grad_norm": 1.425595760345459, "learning_rate": 0.00019711632453567936, "loss": 0.562, "step": 4720 }, { "epoch": 0.7390419536631183, "grad_norm": 0.8784242272377014, "learning_rate": 0.00019709188660801564, "loss": 0.4964, "step": 4721 }, { "epoch": 0.7391984971822166, "grad_norm": 3.1630759239196777, "learning_rate": 0.00019706744868035187, "loss": 0.411, "step": 4722 }, { "epoch": 0.739355040701315, "grad_norm": 1.366860270500183, "learning_rate": 0.00019704301075268815, "loss": 0.2949, "step": 4723 }, { "epoch": 0.7395115842204133, "grad_norm": 1.7974003553390503, "learning_rate": 0.00019701857282502442, "loss": 0.7963, "step": 4724 }, { "epoch": 0.7396681277395116, "grad_norm": 2.272477388381958, "learning_rate": 0.00019699413489736067, "loss": 0.7553, "step": 4725 }, { "epoch": 0.7398246712586098, "grad_norm": 1.5582921504974365, "learning_rate": 0.00019696969696969695, "loss": 0.7692, "step": 4726 }, { "epoch": 0.7399812147777082, "grad_norm": 2.499539613723755, "learning_rate": 0.00019694525904203323, "loss": 0.6417, "step": 4727 }, { "epoch": 0.7401377582968065, "grad_norm": 2.7339725494384766, "learning_rate": 0.00019692082111436948, "loss": 0.8532, "step": 4728 }, { "epoch": 0.7402943018159048, "grad_norm": 1.615174651145935, "learning_rate": 0.00019689638318670576, "loss": 0.6366, "step": 4729 }, { "epoch": 0.7404508453350032, "grad_norm": 2.133688449859619, "learning_rate": 0.00019687194525904204, "loss": 1.0497, "step": 4730 }, { "epoch": 0.7406073888541015, "grad_norm": 1.7480201721191406, "learning_rate": 0.00019684750733137826, "loss": 0.7695, "step": 4731 }, { "epoch": 0.7407639323731997, "grad_norm": 3.059267282485962, "learning_rate": 0.00019682306940371454, "loss": 1.0978, "step": 4732 }, { "epoch": 0.740920475892298, "grad_norm": 2.2188549041748047, "learning_rate": 0.00019679863147605082, "loss": 1.1363, "step": 4733 }, { "epoch": 0.7410770194113964, "grad_norm": 1.8795181512832642, "learning_rate": 0.00019677419354838707, "loss": 0.6612, "step": 4734 }, { "epoch": 0.7412335629304947, "grad_norm": 1.7549196481704712, "learning_rate": 0.00019674975562072335, "loss": 0.987, "step": 4735 }, { "epoch": 0.741390106449593, "grad_norm": 2.692260265350342, "learning_rate": 0.00019672531769305963, "loss": 1.1045, "step": 4736 }, { "epoch": 0.7415466499686914, "grad_norm": 2.230609893798828, "learning_rate": 0.00019670087976539588, "loss": 1.2261, "step": 4737 }, { "epoch": 0.7417031934877896, "grad_norm": 3.0220465660095215, "learning_rate": 0.00019667644183773216, "loss": 1.0071, "step": 4738 }, { "epoch": 0.7418597370068879, "grad_norm": 1.864869236946106, "learning_rate": 0.00019665200391006844, "loss": 1.1151, "step": 4739 }, { "epoch": 0.7420162805259862, "grad_norm": 5.262318134307861, "learning_rate": 0.00019662756598240466, "loss": 1.2847, "step": 4740 }, { "epoch": 0.7421728240450846, "grad_norm": 2.782365083694458, "learning_rate": 0.00019660312805474094, "loss": 1.131, "step": 4741 }, { "epoch": 0.7423293675641829, "grad_norm": 1.8691052198410034, "learning_rate": 0.00019657869012707722, "loss": 1.2404, "step": 4742 }, { "epoch": 0.7424859110832811, "grad_norm": 1.3026301860809326, "learning_rate": 0.00019655425219941347, "loss": 0.7896, "step": 4743 }, { "epoch": 0.7426424546023794, "grad_norm": 2.631027936935425, "learning_rate": 0.00019652981427174975, "loss": 1.0242, "step": 4744 }, { "epoch": 0.7427989981214778, "grad_norm": 2.3272483348846436, "learning_rate": 0.00019650537634408603, "loss": 1.4005, "step": 4745 }, { "epoch": 0.7429555416405761, "grad_norm": 2.3887293338775635, "learning_rate": 0.00019648093841642225, "loss": 0.7136, "step": 4746 }, { "epoch": 0.7431120851596744, "grad_norm": 1.8265565633773804, "learning_rate": 0.00019645650048875853, "loss": 0.6281, "step": 4747 }, { "epoch": 0.7432686286787727, "grad_norm": 2.172302007675171, "learning_rate": 0.0001964320625610948, "loss": 1.017, "step": 4748 }, { "epoch": 0.743425172197871, "grad_norm": 2.7405848503112793, "learning_rate": 0.00019640762463343106, "loss": 1.1234, "step": 4749 }, { "epoch": 0.7435817157169693, "grad_norm": 1.8919093608856201, "learning_rate": 0.00019638318670576734, "loss": 0.8493, "step": 4750 }, { "epoch": 0.7437382592360676, "grad_norm": 0.5198003053665161, "learning_rate": 0.00019635874877810361, "loss": 0.338, "step": 4751 }, { "epoch": 0.743894802755166, "grad_norm": 0.49822479486465454, "learning_rate": 0.00019633431085043987, "loss": 0.253, "step": 4752 }, { "epoch": 0.7440513462742643, "grad_norm": 0.4993680417537689, "learning_rate": 0.00019630987292277614, "loss": 0.2911, "step": 4753 }, { "epoch": 0.7442078897933626, "grad_norm": 0.7572706341743469, "learning_rate": 0.00019628543499511242, "loss": 0.3687, "step": 4754 }, { "epoch": 0.7443644333124608, "grad_norm": 0.4129246175289154, "learning_rate": 0.00019626099706744865, "loss": 0.2211, "step": 4755 }, { "epoch": 0.7445209768315592, "grad_norm": 0.4937713146209717, "learning_rate": 0.00019623655913978492, "loss": 0.3346, "step": 4756 }, { "epoch": 0.7446775203506575, "grad_norm": 0.7381402850151062, "learning_rate": 0.0001962121212121212, "loss": 0.3343, "step": 4757 }, { "epoch": 0.7448340638697558, "grad_norm": 0.9552004933357239, "learning_rate": 0.00019618768328445745, "loss": 0.3558, "step": 4758 }, { "epoch": 0.7449906073888541, "grad_norm": 0.7461534142494202, "learning_rate": 0.00019616324535679373, "loss": 0.3339, "step": 4759 }, { "epoch": 0.7451471509079524, "grad_norm": 0.7386569976806641, "learning_rate": 0.00019613880742913, "loss": 0.4365, "step": 4760 }, { "epoch": 0.7453036944270507, "grad_norm": 0.824906587600708, "learning_rate": 0.00019611436950146626, "loss": 0.3719, "step": 4761 }, { "epoch": 0.745460237946149, "grad_norm": 2.553926467895508, "learning_rate": 0.00019608993157380254, "loss": 0.3857, "step": 4762 }, { "epoch": 0.7456167814652473, "grad_norm": 1.7398136854171753, "learning_rate": 0.0001960654936461388, "loss": 0.6648, "step": 4763 }, { "epoch": 0.7457733249843457, "grad_norm": 1.5790472030639648, "learning_rate": 0.00019604105571847504, "loss": 0.5358, "step": 4764 }, { "epoch": 0.745929868503444, "grad_norm": 0.9220511317253113, "learning_rate": 0.00019601661779081132, "loss": 0.3689, "step": 4765 }, { "epoch": 0.7460864120225422, "grad_norm": 2.0262274742126465, "learning_rate": 0.0001959921798631476, "loss": 1.2123, "step": 4766 }, { "epoch": 0.7462429555416406, "grad_norm": 0.8574962615966797, "learning_rate": 0.00019596774193548385, "loss": 0.5389, "step": 4767 }, { "epoch": 0.7463994990607389, "grad_norm": 0.822786271572113, "learning_rate": 0.00019594330400782013, "loss": 0.5364, "step": 4768 }, { "epoch": 0.7465560425798372, "grad_norm": 1.0409291982650757, "learning_rate": 0.0001959188660801564, "loss": 0.8335, "step": 4769 }, { "epoch": 0.7467125860989355, "grad_norm": 0.9173699617385864, "learning_rate": 0.00019589442815249263, "loss": 0.5052, "step": 4770 }, { "epoch": 0.7468691296180339, "grad_norm": 1.4677929878234863, "learning_rate": 0.0001958699902248289, "loss": 0.6807, "step": 4771 }, { "epoch": 0.7470256731371321, "grad_norm": 1.5798442363739014, "learning_rate": 0.0001958455522971652, "loss": 0.6056, "step": 4772 }, { "epoch": 0.7471822166562304, "grad_norm": 1.009047508239746, "learning_rate": 0.00019582111436950144, "loss": 0.5881, "step": 4773 }, { "epoch": 0.7473387601753287, "grad_norm": 2.4256644248962402, "learning_rate": 0.00019579667644183772, "loss": 0.5304, "step": 4774 }, { "epoch": 0.7474953036944271, "grad_norm": 2.2748332023620605, "learning_rate": 0.000195772238514174, "loss": 0.6864, "step": 4775 }, { "epoch": 0.7476518472135254, "grad_norm": 2.184377431869507, "learning_rate": 0.00019574780058651025, "loss": 0.7061, "step": 4776 }, { "epoch": 0.7478083907326236, "grad_norm": 2.4698145389556885, "learning_rate": 0.00019572336265884653, "loss": 0.8436, "step": 4777 }, { "epoch": 0.747964934251722, "grad_norm": 0.8937560319900513, "learning_rate": 0.0001956989247311828, "loss": 0.4022, "step": 4778 }, { "epoch": 0.7481214777708203, "grad_norm": 1.8450487852096558, "learning_rate": 0.00019567448680351903, "loss": 0.6108, "step": 4779 }, { "epoch": 0.7482780212899186, "grad_norm": 1.5226956605911255, "learning_rate": 0.0001956500488758553, "loss": 0.7764, "step": 4780 }, { "epoch": 0.7484345648090169, "grad_norm": 3.9673829078674316, "learning_rate": 0.00019562561094819159, "loss": 0.9701, "step": 4781 }, { "epoch": 0.7485911083281153, "grad_norm": 1.7222696542739868, "learning_rate": 0.00019560117302052784, "loss": 0.887, "step": 4782 }, { "epoch": 0.7487476518472135, "grad_norm": 3.377755641937256, "learning_rate": 0.00019557673509286411, "loss": 0.993, "step": 4783 }, { "epoch": 0.7489041953663118, "grad_norm": 2.456941604614258, "learning_rate": 0.0001955522971652004, "loss": 1.0556, "step": 4784 }, { "epoch": 0.7490607388854101, "grad_norm": 3.0926427841186523, "learning_rate": 0.00019552785923753664, "loss": 1.115, "step": 4785 }, { "epoch": 0.7492172824045085, "grad_norm": 5.375258445739746, "learning_rate": 0.00019550342130987292, "loss": 1.5124, "step": 4786 }, { "epoch": 0.7493738259236068, "grad_norm": 2.7964589595794678, "learning_rate": 0.00019547898338220917, "loss": 1.1732, "step": 4787 }, { "epoch": 0.7495303694427051, "grad_norm": 1.498062014579773, "learning_rate": 0.00019545454545454543, "loss": 1.0657, "step": 4788 }, { "epoch": 0.7496869129618033, "grad_norm": 6.085139751434326, "learning_rate": 0.0001954301075268817, "loss": 1.1077, "step": 4789 }, { "epoch": 0.7498434564809017, "grad_norm": 3.144690990447998, "learning_rate": 0.00019540566959921798, "loss": 1.4877, "step": 4790 }, { "epoch": 0.75, "grad_norm": 3.460711717605591, "learning_rate": 0.00019538123167155423, "loss": 1.4845, "step": 4791 }, { "epoch": 0.7501565435190983, "grad_norm": 2.4659714698791504, "learning_rate": 0.0001953567937438905, "loss": 1.4173, "step": 4792 }, { "epoch": 0.7503130870381967, "grad_norm": 2.4354169368743896, "learning_rate": 0.0001953323558162268, "loss": 1.5278, "step": 4793 }, { "epoch": 0.7504696305572949, "grad_norm": 4.406135559082031, "learning_rate": 0.00019530791788856301, "loss": 1.5933, "step": 4794 }, { "epoch": 0.7506261740763932, "grad_norm": 2.1102359294891357, "learning_rate": 0.0001952834799608993, "loss": 1.278, "step": 4795 }, { "epoch": 0.7507827175954915, "grad_norm": 1.0757032632827759, "learning_rate": 0.00019525904203323557, "loss": 0.439, "step": 4796 }, { "epoch": 0.7509392611145899, "grad_norm": 1.6576645374298096, "learning_rate": 0.00019523460410557182, "loss": 0.5876, "step": 4797 }, { "epoch": 0.7510958046336882, "grad_norm": 2.0612480640411377, "learning_rate": 0.0001952101661779081, "loss": 1.1348, "step": 4798 }, { "epoch": 0.7512523481527865, "grad_norm": 4.78619384765625, "learning_rate": 0.00019518572825024438, "loss": 1.1104, "step": 4799 }, { "epoch": 0.7514088916718847, "grad_norm": 2.2526986598968506, "learning_rate": 0.00019516129032258063, "loss": 1.3444, "step": 4800 }, { "epoch": 0.7515654351909831, "grad_norm": 0.41874295473098755, "learning_rate": 0.0001951368523949169, "loss": 0.322, "step": 4801 }, { "epoch": 0.7517219787100814, "grad_norm": 0.5169926285743713, "learning_rate": 0.0001951124144672532, "loss": 0.2551, "step": 4802 }, { "epoch": 0.7518785222291797, "grad_norm": 0.8094984292984009, "learning_rate": 0.0001950879765395894, "loss": 0.3832, "step": 4803 }, { "epoch": 0.752035065748278, "grad_norm": 0.5564207434654236, "learning_rate": 0.0001950635386119257, "loss": 0.2254, "step": 4804 }, { "epoch": 0.7521916092673764, "grad_norm": 0.42472612857818604, "learning_rate": 0.00019503910068426197, "loss": 0.3473, "step": 4805 }, { "epoch": 0.7523481527864746, "grad_norm": 0.7204293012619019, "learning_rate": 0.00019501466275659822, "loss": 0.3752, "step": 4806 }, { "epoch": 0.7525046963055729, "grad_norm": 1.2864282131195068, "learning_rate": 0.0001949902248289345, "loss": 0.4892, "step": 4807 }, { "epoch": 0.7526612398246713, "grad_norm": 0.8378409743309021, "learning_rate": 0.00019496578690127078, "loss": 0.4192, "step": 4808 }, { "epoch": 0.7528177833437696, "grad_norm": 0.6147698163986206, "learning_rate": 0.00019494134897360703, "loss": 0.3945, "step": 4809 }, { "epoch": 0.7529743268628679, "grad_norm": 0.4717334508895874, "learning_rate": 0.0001949169110459433, "loss": 0.2257, "step": 4810 }, { "epoch": 0.7531308703819661, "grad_norm": 1.3184202909469604, "learning_rate": 0.00019489247311827956, "loss": 0.597, "step": 4811 }, { "epoch": 0.7532874139010645, "grad_norm": 1.2259258031845093, "learning_rate": 0.0001948680351906158, "loss": 0.4335, "step": 4812 }, { "epoch": 0.7534439574201628, "grad_norm": 0.6783806085586548, "learning_rate": 0.00019484359726295209, "loss": 0.3843, "step": 4813 }, { "epoch": 0.7536005009392611, "grad_norm": 1.3408708572387695, "learning_rate": 0.00019481915933528836, "loss": 0.3306, "step": 4814 }, { "epoch": 0.7537570444583594, "grad_norm": 0.9002304673194885, "learning_rate": 0.00019479472140762462, "loss": 0.4586, "step": 4815 }, { "epoch": 0.7539135879774578, "grad_norm": 1.12655770778656, "learning_rate": 0.0001947702834799609, "loss": 0.5046, "step": 4816 }, { "epoch": 0.754070131496556, "grad_norm": 0.9550591111183167, "learning_rate": 0.00019474584555229717, "loss": 0.5049, "step": 4817 }, { "epoch": 0.7542266750156543, "grad_norm": 1.1619162559509277, "learning_rate": 0.0001947214076246334, "loss": 0.5001, "step": 4818 }, { "epoch": 0.7543832185347527, "grad_norm": 0.7153265476226807, "learning_rate": 0.00019469696969696967, "loss": 0.3228, "step": 4819 }, { "epoch": 0.754539762053851, "grad_norm": 0.8718234300613403, "learning_rate": 0.00019467253176930595, "loss": 0.3079, "step": 4820 }, { "epoch": 0.7546963055729493, "grad_norm": 1.3940988779067993, "learning_rate": 0.0001946480938416422, "loss": 0.5131, "step": 4821 }, { "epoch": 0.7548528490920476, "grad_norm": 1.4768911600112915, "learning_rate": 0.00019462365591397848, "loss": 0.6426, "step": 4822 }, { "epoch": 0.7550093926111459, "grad_norm": 1.1347852945327759, "learning_rate": 0.00019459921798631476, "loss": 0.5, "step": 4823 }, { "epoch": 0.7551659361302442, "grad_norm": 2.5851755142211914, "learning_rate": 0.000194574780058651, "loss": 0.7089, "step": 4824 }, { "epoch": 0.7553224796493425, "grad_norm": 1.3372689485549927, "learning_rate": 0.0001945503421309873, "loss": 0.7053, "step": 4825 }, { "epoch": 0.7554790231684408, "grad_norm": 2.5522773265838623, "learning_rate": 0.00019452590420332357, "loss": 0.6847, "step": 4826 }, { "epoch": 0.7556355666875392, "grad_norm": 1.1774206161499023, "learning_rate": 0.0001945014662756598, "loss": 0.5898, "step": 4827 }, { "epoch": 0.7557921102066374, "grad_norm": 1.0767072439193726, "learning_rate": 0.00019447702834799607, "loss": 0.529, "step": 4828 }, { "epoch": 0.7559486537257357, "grad_norm": 1.9729124307632446, "learning_rate": 0.00019445259042033235, "loss": 0.4727, "step": 4829 }, { "epoch": 0.756105197244834, "grad_norm": 5.122827053070068, "learning_rate": 0.0001944281524926686, "loss": 1.1988, "step": 4830 }, { "epoch": 0.7562617407639324, "grad_norm": 1.9116922616958618, "learning_rate": 0.00019440371456500488, "loss": 0.8946, "step": 4831 }, { "epoch": 0.7564182842830307, "grad_norm": 2.4748942852020264, "learning_rate": 0.00019437927663734116, "loss": 1.0248, "step": 4832 }, { "epoch": 0.756574827802129, "grad_norm": 2.4819397926330566, "learning_rate": 0.0001943548387096774, "loss": 1.1779, "step": 4833 }, { "epoch": 0.7567313713212273, "grad_norm": 2.278400421142578, "learning_rate": 0.00019433040078201366, "loss": 0.7033, "step": 4834 }, { "epoch": 0.7568879148403256, "grad_norm": 2.6924045085906982, "learning_rate": 0.00019430596285434994, "loss": 0.8813, "step": 4835 }, { "epoch": 0.7570444583594239, "grad_norm": 1.8537712097167969, "learning_rate": 0.0001942815249266862, "loss": 0.9923, "step": 4836 }, { "epoch": 0.7572010018785222, "grad_norm": 3.0504987239837646, "learning_rate": 0.00019425708699902247, "loss": 0.8471, "step": 4837 }, { "epoch": 0.7573575453976206, "grad_norm": 3.1439967155456543, "learning_rate": 0.00019423264907135875, "loss": 1.0216, "step": 4838 }, { "epoch": 0.7575140889167189, "grad_norm": 4.802983283996582, "learning_rate": 0.000194208211143695, "loss": 0.8951, "step": 4839 }, { "epoch": 0.7576706324358171, "grad_norm": 2.6184773445129395, "learning_rate": 0.00019418377321603128, "loss": 1.1434, "step": 4840 }, { "epoch": 0.7578271759549154, "grad_norm": 4.851845741271973, "learning_rate": 0.00019415933528836755, "loss": 1.3546, "step": 4841 }, { "epoch": 0.7579837194740138, "grad_norm": 2.406771421432495, "learning_rate": 0.00019413489736070378, "loss": 1.5022, "step": 4842 }, { "epoch": 0.7581402629931121, "grad_norm": 3.594590663909912, "learning_rate": 0.00019411045943304006, "loss": 1.6779, "step": 4843 }, { "epoch": 0.7582968065122104, "grad_norm": 2.279587984085083, "learning_rate": 0.00019408602150537634, "loss": 1.8704, "step": 4844 }, { "epoch": 0.7584533500313086, "grad_norm": 2.240976333618164, "learning_rate": 0.0001940615835777126, "loss": 1.3369, "step": 4845 }, { "epoch": 0.758609893550407, "grad_norm": 2.4125685691833496, "learning_rate": 0.00019403714565004886, "loss": 0.6641, "step": 4846 }, { "epoch": 0.7587664370695053, "grad_norm": 3.437535285949707, "learning_rate": 0.00019401270772238514, "loss": 0.9713, "step": 4847 }, { "epoch": 0.7589229805886036, "grad_norm": 5.112056732177734, "learning_rate": 0.0001939882697947214, "loss": 0.6326, "step": 4848 }, { "epoch": 0.759079524107702, "grad_norm": 3.5413787364959717, "learning_rate": 0.00019396383186705767, "loss": 1.0366, "step": 4849 }, { "epoch": 0.7592360676268003, "grad_norm": 1.493313193321228, "learning_rate": 0.00019393939393939395, "loss": 0.7756, "step": 4850 }, { "epoch": 0.7593926111458985, "grad_norm": 0.4888933598995209, "learning_rate": 0.00019391495601173018, "loss": 0.2853, "step": 4851 }, { "epoch": 0.7595491546649968, "grad_norm": 0.5343114137649536, "learning_rate": 0.00019389051808406645, "loss": 0.3158, "step": 4852 }, { "epoch": 0.7597056981840952, "grad_norm": 0.571465790271759, "learning_rate": 0.00019386608015640273, "loss": 0.322, "step": 4853 }, { "epoch": 0.7598622417031935, "grad_norm": 0.4364553689956665, "learning_rate": 0.00019384164222873898, "loss": 0.2126, "step": 4854 }, { "epoch": 0.7600187852222918, "grad_norm": 1.6458876132965088, "learning_rate": 0.00019381720430107526, "loss": 0.5031, "step": 4855 }, { "epoch": 0.7601753287413902, "grad_norm": 0.8052377104759216, "learning_rate": 0.00019379276637341154, "loss": 0.3538, "step": 4856 }, { "epoch": 0.7603318722604884, "grad_norm": 0.7143148183822632, "learning_rate": 0.0001937683284457478, "loss": 0.2654, "step": 4857 }, { "epoch": 0.7604884157795867, "grad_norm": 1.215345859527588, "learning_rate": 0.00019374389051808404, "loss": 0.3274, "step": 4858 }, { "epoch": 0.760644959298685, "grad_norm": 1.0219749212265015, "learning_rate": 0.0001937194525904203, "loss": 0.4571, "step": 4859 }, { "epoch": 0.7608015028177834, "grad_norm": 1.1042121648788452, "learning_rate": 0.00019369501466275657, "loss": 0.469, "step": 4860 }, { "epoch": 0.7609580463368817, "grad_norm": 0.9635404944419861, "learning_rate": 0.00019367057673509285, "loss": 0.5121, "step": 4861 }, { "epoch": 0.76111458985598, "grad_norm": 1.2368090152740479, "learning_rate": 0.0001936461388074291, "loss": 0.4804, "step": 4862 }, { "epoch": 0.7612711333750782, "grad_norm": 1.2510402202606201, "learning_rate": 0.00019362170087976538, "loss": 0.5622, "step": 4863 }, { "epoch": 0.7614276768941766, "grad_norm": 0.9782634973526001, "learning_rate": 0.00019359726295210166, "loss": 0.495, "step": 4864 }, { "epoch": 0.7615842204132749, "grad_norm": 1.6762943267822266, "learning_rate": 0.00019357282502443788, "loss": 0.5429, "step": 4865 }, { "epoch": 0.7617407639323732, "grad_norm": 0.6857059597969055, "learning_rate": 0.00019354838709677416, "loss": 0.3212, "step": 4866 }, { "epoch": 0.7618973074514716, "grad_norm": 1.0604544878005981, "learning_rate": 0.00019352394916911044, "loss": 0.4868, "step": 4867 }, { "epoch": 0.7620538509705698, "grad_norm": 1.5608930587768555, "learning_rate": 0.0001934995112414467, "loss": 0.7897, "step": 4868 }, { "epoch": 0.7622103944896681, "grad_norm": 1.5711655616760254, "learning_rate": 0.00019347507331378297, "loss": 0.6437, "step": 4869 }, { "epoch": 0.7623669380087664, "grad_norm": 1.5553734302520752, "learning_rate": 0.00019345063538611925, "loss": 0.4996, "step": 4870 }, { "epoch": 0.7625234815278648, "grad_norm": 1.692755937576294, "learning_rate": 0.0001934261974584555, "loss": 0.5887, "step": 4871 }, { "epoch": 0.7626800250469631, "grad_norm": 1.0537468194961548, "learning_rate": 0.00019340175953079178, "loss": 0.4608, "step": 4872 }, { "epoch": 0.7628365685660614, "grad_norm": 1.4170693159103394, "learning_rate": 0.00019337732160312806, "loss": 0.4771, "step": 4873 }, { "epoch": 0.7629931120851596, "grad_norm": 2.4024274349212646, "learning_rate": 0.00019335288367546428, "loss": 0.8474, "step": 4874 }, { "epoch": 0.763149655604258, "grad_norm": 1.5059177875518799, "learning_rate": 0.00019332844574780056, "loss": 0.559, "step": 4875 }, { "epoch": 0.7633061991233563, "grad_norm": 1.0502533912658691, "learning_rate": 0.00019330400782013684, "loss": 0.4221, "step": 4876 }, { "epoch": 0.7634627426424546, "grad_norm": 2.7585763931274414, "learning_rate": 0.0001932795698924731, "loss": 0.8188, "step": 4877 }, { "epoch": 0.763619286161553, "grad_norm": 1.459269642829895, "learning_rate": 0.00019325513196480937, "loss": 0.584, "step": 4878 }, { "epoch": 0.7637758296806513, "grad_norm": 3.582939624786377, "learning_rate": 0.00019323069403714564, "loss": 1.2836, "step": 4879 }, { "epoch": 0.7639323731997495, "grad_norm": 2.4551584720611572, "learning_rate": 0.0001932062561094819, "loss": 1.0656, "step": 4880 }, { "epoch": 0.7640889167188478, "grad_norm": 1.6925396919250488, "learning_rate": 0.00019318181818181815, "loss": 0.8687, "step": 4881 }, { "epoch": 0.7642454602379462, "grad_norm": 2.231828451156616, "learning_rate": 0.00019315738025415442, "loss": 1.13, "step": 4882 }, { "epoch": 0.7644020037570445, "grad_norm": 2.1356468200683594, "learning_rate": 0.00019313294232649068, "loss": 1.1292, "step": 4883 }, { "epoch": 0.7645585472761428, "grad_norm": 2.5488100051879883, "learning_rate": 0.00019310850439882695, "loss": 0.9482, "step": 4884 }, { "epoch": 0.764715090795241, "grad_norm": 2.787428617477417, "learning_rate": 0.00019308406647116323, "loss": 1.275, "step": 4885 }, { "epoch": 0.7648716343143394, "grad_norm": 2.909212112426758, "learning_rate": 0.00019305962854349948, "loss": 1.1905, "step": 4886 }, { "epoch": 0.7650281778334377, "grad_norm": 1.801330804824829, "learning_rate": 0.00019303519061583576, "loss": 0.9273, "step": 4887 }, { "epoch": 0.765184721352536, "grad_norm": 1.8224399089813232, "learning_rate": 0.00019301075268817204, "loss": 1.3595, "step": 4888 }, { "epoch": 0.7653412648716343, "grad_norm": 1.63428795337677, "learning_rate": 0.00019298631476050826, "loss": 1.1251, "step": 4889 }, { "epoch": 0.7654978083907327, "grad_norm": 2.024421215057373, "learning_rate": 0.00019296187683284454, "loss": 1.0573, "step": 4890 }, { "epoch": 0.7656543519098309, "grad_norm": 2.4601943492889404, "learning_rate": 0.00019293743890518082, "loss": 1.2419, "step": 4891 }, { "epoch": 0.7658108954289292, "grad_norm": 1.7658562660217285, "learning_rate": 0.00019291300097751707, "loss": 0.7137, "step": 4892 }, { "epoch": 0.7659674389480275, "grad_norm": 2.124553680419922, "learning_rate": 0.00019288856304985335, "loss": 1.0605, "step": 4893 }, { "epoch": 0.7661239824671259, "grad_norm": 1.8370262384414673, "learning_rate": 0.00019286412512218963, "loss": 1.3344, "step": 4894 }, { "epoch": 0.7662805259862242, "grad_norm": 1.633594036102295, "learning_rate": 0.00019283968719452588, "loss": 1.9377, "step": 4895 }, { "epoch": 0.7664370695053225, "grad_norm": 1.5041015148162842, "learning_rate": 0.00019281524926686216, "loss": 0.7699, "step": 4896 }, { "epoch": 0.7665936130244208, "grad_norm": NaN, "learning_rate": 0.00019281524926686216, "loss": 0.0, "step": 4897 }, { "epoch": 0.7667501565435191, "grad_norm": 2.5734617710113525, "learning_rate": 0.00019279081133919844, "loss": 0.6776, "step": 4898 }, { "epoch": 0.7669067000626174, "grad_norm": 2.468716859817505, "learning_rate": 0.00019276637341153466, "loss": 0.8875, "step": 4899 }, { "epoch": 0.7670632435817157, "grad_norm": 2.735874891281128, "learning_rate": 0.00019274193548387094, "loss": 0.6044, "step": 4900 }, { "epoch": 0.7672197871008141, "grad_norm": 0.6753793954849243, "learning_rate": 0.00019271749755620722, "loss": 0.3667, "step": 4901 }, { "epoch": 0.7673763306199123, "grad_norm": 0.5195480585098267, "learning_rate": 0.00019269305962854347, "loss": 0.3415, "step": 4902 }, { "epoch": 0.7675328741390106, "grad_norm": 0.5045526623725891, "learning_rate": 0.00019266862170087975, "loss": 0.3182, "step": 4903 }, { "epoch": 0.7676894176581089, "grad_norm": 2.755859136581421, "learning_rate": 0.00019264418377321603, "loss": 0.5985, "step": 4904 }, { "epoch": 0.7678459611772073, "grad_norm": 0.4260087013244629, "learning_rate": 0.00019261974584555228, "loss": 0.2805, "step": 4905 }, { "epoch": 0.7680025046963056, "grad_norm": 0.8395310044288635, "learning_rate": 0.00019259530791788853, "loss": 0.4325, "step": 4906 }, { "epoch": 0.7681590482154039, "grad_norm": 0.5676664710044861, "learning_rate": 0.0001925708699902248, "loss": 0.2764, "step": 4907 }, { "epoch": 0.7683155917345021, "grad_norm": 0.5876914262771606, "learning_rate": 0.00019254643206256106, "loss": 0.3626, "step": 4908 }, { "epoch": 0.7684721352536005, "grad_norm": 0.6870219707489014, "learning_rate": 0.00019252199413489734, "loss": 0.3014, "step": 4909 }, { "epoch": 0.7686286787726988, "grad_norm": 0.8017184138298035, "learning_rate": 0.00019249755620723362, "loss": 0.5405, "step": 4910 }, { "epoch": 0.7687852222917971, "grad_norm": 0.7471238374710083, "learning_rate": 0.00019247311827956987, "loss": 0.4282, "step": 4911 }, { "epoch": 0.7689417658108955, "grad_norm": 0.7291784286499023, "learning_rate": 0.00019244868035190614, "loss": 0.466, "step": 4912 }, { "epoch": 0.7690983093299938, "grad_norm": 0.8841153383255005, "learning_rate": 0.00019242424242424242, "loss": 0.4488, "step": 4913 }, { "epoch": 0.769254852849092, "grad_norm": 0.8288737535476685, "learning_rate": 0.00019239980449657865, "loss": 0.3224, "step": 4914 }, { "epoch": 0.7694113963681903, "grad_norm": 2.8311057090759277, "learning_rate": 0.00019237536656891493, "loss": 0.5345, "step": 4915 }, { "epoch": 0.7695679398872887, "grad_norm": 0.9499149322509766, "learning_rate": 0.0001923509286412512, "loss": 0.3726, "step": 4916 }, { "epoch": 0.769724483406387, "grad_norm": 1.6338462829589844, "learning_rate": 0.00019232649071358746, "loss": 0.4571, "step": 4917 }, { "epoch": 0.7698810269254853, "grad_norm": 1.347562313079834, "learning_rate": 0.00019230205278592373, "loss": 0.6984, "step": 4918 }, { "epoch": 0.7700375704445835, "grad_norm": 1.349605679512024, "learning_rate": 0.00019227761485826, "loss": 0.5362, "step": 4919 }, { "epoch": 0.7701941139636819, "grad_norm": 0.9548114538192749, "learning_rate": 0.00019225317693059626, "loss": 0.3536, "step": 4920 }, { "epoch": 0.7703506574827802, "grad_norm": 1.3998003005981445, "learning_rate": 0.00019222873900293254, "loss": 0.593, "step": 4921 }, { "epoch": 0.7705072010018785, "grad_norm": 2.3617327213287354, "learning_rate": 0.00019220430107526882, "loss": 0.652, "step": 4922 }, { "epoch": 0.7706637445209769, "grad_norm": 2.4933969974517822, "learning_rate": 0.00019217986314760504, "loss": 0.8062, "step": 4923 }, { "epoch": 0.7708202880400752, "grad_norm": 1.7823599576950073, "learning_rate": 0.00019215542521994132, "loss": 0.545, "step": 4924 }, { "epoch": 0.7709768315591734, "grad_norm": 1.9176454544067383, "learning_rate": 0.0001921309872922776, "loss": 0.4175, "step": 4925 }, { "epoch": 0.7711333750782717, "grad_norm": 1.5695958137512207, "learning_rate": 0.00019210654936461385, "loss": 0.7757, "step": 4926 }, { "epoch": 0.7712899185973701, "grad_norm": 1.9228142499923706, "learning_rate": 0.00019208211143695013, "loss": 0.9986, "step": 4927 }, { "epoch": 0.7714464621164684, "grad_norm": 2.260345458984375, "learning_rate": 0.0001920576735092864, "loss": 1.1804, "step": 4928 }, { "epoch": 0.7716030056355667, "grad_norm": 1.4471839666366577, "learning_rate": 0.00019203323558162266, "loss": 0.7815, "step": 4929 }, { "epoch": 0.771759549154665, "grad_norm": 2.359945774078369, "learning_rate": 0.0001920087976539589, "loss": 0.6585, "step": 4930 }, { "epoch": 0.7719160926737633, "grad_norm": 2.4294509887695312, "learning_rate": 0.0001919843597262952, "loss": 0.8588, "step": 4931 }, { "epoch": 0.7720726361928616, "grad_norm": 2.964313268661499, "learning_rate": 0.00019195992179863144, "loss": 1.2107, "step": 4932 }, { "epoch": 0.7722291797119599, "grad_norm": 2.60548996925354, "learning_rate": 0.00019193548387096772, "loss": 0.5851, "step": 4933 }, { "epoch": 0.7723857232310583, "grad_norm": 2.0704550743103027, "learning_rate": 0.000191911045943304, "loss": 0.9809, "step": 4934 }, { "epoch": 0.7725422667501566, "grad_norm": 3.5938777923583984, "learning_rate": 0.00019188660801564025, "loss": 0.8783, "step": 4935 }, { "epoch": 0.7726988102692548, "grad_norm": 1.8894193172454834, "learning_rate": 0.00019186217008797653, "loss": 0.856, "step": 4936 }, { "epoch": 0.7728553537883531, "grad_norm": 2.43099308013916, "learning_rate": 0.0001918377321603128, "loss": 1.0566, "step": 4937 }, { "epoch": 0.7730118973074515, "grad_norm": 2.183492660522461, "learning_rate": 0.00019181329423264903, "loss": 1.2289, "step": 4938 }, { "epoch": 0.7731684408265498, "grad_norm": 1.2107994556427002, "learning_rate": 0.0001917888563049853, "loss": 0.7877, "step": 4939 }, { "epoch": 0.7733249843456481, "grad_norm": 1.4150274991989136, "learning_rate": 0.00019176441837732159, "loss": 0.9841, "step": 4940 }, { "epoch": 0.7734815278647464, "grad_norm": 1.461196780204773, "learning_rate": 0.00019173998044965784, "loss": 1.1294, "step": 4941 }, { "epoch": 0.7736380713838447, "grad_norm": 2.6352620124816895, "learning_rate": 0.00019171554252199412, "loss": 1.2065, "step": 4942 }, { "epoch": 0.773794614902943, "grad_norm": 2.3676352500915527, "learning_rate": 0.0001916911045943304, "loss": 1.1227, "step": 4943 }, { "epoch": 0.7739511584220413, "grad_norm": 2.5080063343048096, "learning_rate": 0.00019166666666666665, "loss": 1.377, "step": 4944 }, { "epoch": 0.7741077019411396, "grad_norm": 2.701669216156006, "learning_rate": 0.00019164222873900292, "loss": 1.71, "step": 4945 }, { "epoch": 0.774264245460238, "grad_norm": 1.4313629865646362, "learning_rate": 0.0001916177908113392, "loss": 0.5902, "step": 4946 }, { "epoch": 0.7744207889793363, "grad_norm": 2.563730478286743, "learning_rate": 0.00019159335288367543, "loss": 1.0311, "step": 4947 }, { "epoch": 0.7745773324984345, "grad_norm": 2.521282434463501, "learning_rate": 0.0001915689149560117, "loss": 0.8272, "step": 4948 }, { "epoch": 0.7747338760175329, "grad_norm": 2.894930601119995, "learning_rate": 0.00019154447702834798, "loss": 0.8662, "step": 4949 }, { "epoch": 0.7748904195366312, "grad_norm": 1.747538685798645, "learning_rate": 0.00019152003910068423, "loss": 0.5643, "step": 4950 }, { "epoch": 0.7750469630557295, "grad_norm": 0.750394880771637, "learning_rate": 0.0001914956011730205, "loss": 0.3981, "step": 4951 }, { "epoch": 0.7752035065748278, "grad_norm": 0.8471237421035767, "learning_rate": 0.0001914711632453568, "loss": 0.711, "step": 4952 }, { "epoch": 0.7753600500939261, "grad_norm": 0.6519607901573181, "learning_rate": 0.00019144672531769302, "loss": 0.4321, "step": 4953 }, { "epoch": 0.7755165936130244, "grad_norm": 0.5569426417350769, "learning_rate": 0.0001914222873900293, "loss": 0.2508, "step": 4954 }, { "epoch": 0.7756731371321227, "grad_norm": 0.8768931031227112, "learning_rate": 0.00019139784946236557, "loss": 0.5589, "step": 4955 }, { "epoch": 0.775829680651221, "grad_norm": 0.6277607083320618, "learning_rate": 0.00019137341153470182, "loss": 0.4621, "step": 4956 }, { "epoch": 0.7759862241703194, "grad_norm": 0.6822944283485413, "learning_rate": 0.0001913489736070381, "loss": 0.4333, "step": 4957 }, { "epoch": 0.7761427676894177, "grad_norm": 0.45079341530799866, "learning_rate": 0.00019132453567937438, "loss": 0.3288, "step": 4958 }, { "epoch": 0.7762993112085159, "grad_norm": 0.6848064661026001, "learning_rate": 0.00019130009775171063, "loss": 0.4235, "step": 4959 }, { "epoch": 0.7764558547276142, "grad_norm": 0.524940013885498, "learning_rate": 0.0001912756598240469, "loss": 0.2753, "step": 4960 }, { "epoch": 0.7766123982467126, "grad_norm": 1.9908454418182373, "learning_rate": 0.0001912512218963832, "loss": 0.421, "step": 4961 }, { "epoch": 0.7767689417658109, "grad_norm": 0.7081311345100403, "learning_rate": 0.0001912267839687194, "loss": 0.383, "step": 4962 }, { "epoch": 0.7769254852849092, "grad_norm": 1.0572980642318726, "learning_rate": 0.0001912023460410557, "loss": 0.5341, "step": 4963 }, { "epoch": 0.7770820288040076, "grad_norm": 7.10352087020874, "learning_rate": 0.00019117790811339197, "loss": 1.4755, "step": 4964 }, { "epoch": 0.7772385723231058, "grad_norm": 1.0377283096313477, "learning_rate": 0.00019115347018572822, "loss": 0.5192, "step": 4965 }, { "epoch": 0.7773951158422041, "grad_norm": 1.1436519622802734, "learning_rate": 0.0001911290322580645, "loss": 0.426, "step": 4966 }, { "epoch": 0.7775516593613024, "grad_norm": 2.1015119552612305, "learning_rate": 0.00019110459433040078, "loss": 0.6584, "step": 4967 }, { "epoch": 0.7777082028804008, "grad_norm": 0.8442532420158386, "learning_rate": 0.00019108015640273703, "loss": 0.3944, "step": 4968 }, { "epoch": 0.7778647463994991, "grad_norm": 1.5063536167144775, "learning_rate": 0.0001910557184750733, "loss": 0.5206, "step": 4969 }, { "epoch": 0.7780212899185974, "grad_norm": 1.2549412250518799, "learning_rate": 0.00019103128054740958, "loss": 0.492, "step": 4970 }, { "epoch": 0.7781778334376956, "grad_norm": 1.9004099369049072, "learning_rate": 0.0001910068426197458, "loss": 0.6607, "step": 4971 }, { "epoch": 0.778334376956794, "grad_norm": 0.9570836424827576, "learning_rate": 0.0001909824046920821, "loss": 0.4599, "step": 4972 }, { "epoch": 0.7784909204758923, "grad_norm": 1.3211475610733032, "learning_rate": 0.00019095796676441837, "loss": 0.4858, "step": 4973 }, { "epoch": 0.7786474639949906, "grad_norm": 1.0831687450408936, "learning_rate": 0.00019093352883675462, "loss": 0.4007, "step": 4974 }, { "epoch": 0.778804007514089, "grad_norm": 1.4682847261428833, "learning_rate": 0.0001909090909090909, "loss": 0.5252, "step": 4975 }, { "epoch": 0.7789605510331872, "grad_norm": 3.620468854904175, "learning_rate": 0.00019088465298142717, "loss": 1.4791, "step": 4976 }, { "epoch": 0.7791170945522855, "grad_norm": 2.0373001098632812, "learning_rate": 0.0001908602150537634, "loss": 0.7857, "step": 4977 }, { "epoch": 0.7792736380713838, "grad_norm": 2.083617925643921, "learning_rate": 0.00019083577712609968, "loss": 0.8352, "step": 4978 }, { "epoch": 0.7794301815904822, "grad_norm": 1.7250630855560303, "learning_rate": 0.00019081133919843595, "loss": 1.4002, "step": 4979 }, { "epoch": 0.7795867251095805, "grad_norm": 1.6049273014068604, "learning_rate": 0.0001907869012707722, "loss": 0.588, "step": 4980 }, { "epoch": 0.7797432686286788, "grad_norm": 1.8046834468841553, "learning_rate": 0.00019076246334310848, "loss": 0.9482, "step": 4981 }, { "epoch": 0.779899812147777, "grad_norm": 2.0542006492614746, "learning_rate": 0.00019073802541544476, "loss": 0.9811, "step": 4982 }, { "epoch": 0.7800563556668754, "grad_norm": 1.8561007976531982, "learning_rate": 0.000190713587487781, "loss": 0.5584, "step": 4983 }, { "epoch": 0.7802128991859737, "grad_norm": 4.412578582763672, "learning_rate": 0.0001906891495601173, "loss": 1.2283, "step": 4984 }, { "epoch": 0.780369442705072, "grad_norm": 2.7626380920410156, "learning_rate": 0.00019066471163245357, "loss": 0.8703, "step": 4985 }, { "epoch": 0.7805259862241704, "grad_norm": 3.2905735969543457, "learning_rate": 0.0001906402737047898, "loss": 1.096, "step": 4986 }, { "epoch": 0.7806825297432687, "grad_norm": 1.68156099319458, "learning_rate": 0.00019061583577712607, "loss": 1.0584, "step": 4987 }, { "epoch": 0.7808390732623669, "grad_norm": 3.365527629852295, "learning_rate": 0.00019059139784946235, "loss": 1.3791, "step": 4988 }, { "epoch": 0.7809956167814652, "grad_norm": 3.2215206623077393, "learning_rate": 0.0001905669599217986, "loss": 1.0837, "step": 4989 }, { "epoch": 0.7811521603005636, "grad_norm": 2.5578112602233887, "learning_rate": 0.00019054252199413488, "loss": 1.0125, "step": 4990 }, { "epoch": 0.7813087038196619, "grad_norm": 4.676730632781982, "learning_rate": 0.00019051808406647116, "loss": 1.3229, "step": 4991 }, { "epoch": 0.7814652473387602, "grad_norm": 3.1872496604919434, "learning_rate": 0.0001904936461388074, "loss": 1.6003, "step": 4992 }, { "epoch": 0.7816217908578584, "grad_norm": 2.7677407264709473, "learning_rate": 0.0001904692082111437, "loss": 0.9255, "step": 4993 }, { "epoch": 0.7817783343769568, "grad_norm": 1.499570608139038, "learning_rate": 0.00019044477028347994, "loss": 1.177, "step": 4994 }, { "epoch": 0.7819348778960551, "grad_norm": 2.0705745220184326, "learning_rate": 0.0001904203323558162, "loss": 1.2654, "step": 4995 }, { "epoch": 0.7820914214151534, "grad_norm": 3.193979263305664, "learning_rate": 0.00019039589442815247, "loss": 0.7452, "step": 4996 }, { "epoch": 0.7822479649342517, "grad_norm": 2.0789194107055664, "learning_rate": 0.00019037145650048875, "loss": 0.5481, "step": 4997 }, { "epoch": 0.7824045084533501, "grad_norm": 2.25919771194458, "learning_rate": 0.000190347018572825, "loss": 1.0294, "step": 4998 }, { "epoch": 0.7825610519724483, "grad_norm": 1.2901616096496582, "learning_rate": 0.00019032258064516128, "loss": 1.025, "step": 4999 }, { "epoch": 0.7827175954915466, "grad_norm": 1.560736894607544, "learning_rate": 0.00019029814271749756, "loss": 0.8492, "step": 5000 }, { "epoch": 0.7827175954915466, "eval_loss": 0.5757540464401245, "eval_runtime": 205.5697, "eval_samples_per_second": 60.237, "eval_steps_per_second": 3.765, "eval_wer": 0.36954119472129543, "step": 5000 }, { "epoch": 0.782874139010645, "grad_norm": 1.0069044828414917, "learning_rate": 0.00019027370478983378, "loss": 0.5492, "step": 5001 }, { "epoch": 0.7830306825297433, "grad_norm": 0.5260990262031555, "learning_rate": 0.00019024926686217006, "loss": 0.2823, "step": 5002 }, { "epoch": 0.7831872260488416, "grad_norm": 0.48100632429122925, "learning_rate": 0.00019022482893450634, "loss": 0.2471, "step": 5003 }, { "epoch": 0.7833437695679399, "grad_norm": 0.6281126141548157, "learning_rate": 0.0001902003910068426, "loss": 0.2494, "step": 5004 }, { "epoch": 0.7835003130870382, "grad_norm": 0.869832456111908, "learning_rate": 0.00019017595307917887, "loss": 0.3811, "step": 5005 }, { "epoch": 0.7836568566061365, "grad_norm": 0.6189180612564087, "learning_rate": 0.00019015151515151514, "loss": 0.2062, "step": 5006 }, { "epoch": 0.7838134001252348, "grad_norm": 1.0378501415252686, "learning_rate": 0.0001901270772238514, "loss": 0.3194, "step": 5007 }, { "epoch": 0.7839699436443331, "grad_norm": 0.7968530654907227, "learning_rate": 0.00019010263929618767, "loss": 0.3416, "step": 5008 }, { "epoch": 0.7841264871634315, "grad_norm": 1.4084452390670776, "learning_rate": 0.00019007820136852395, "loss": 0.4465, "step": 5009 }, { "epoch": 0.7842830306825297, "grad_norm": 0.9723837375640869, "learning_rate": 0.00019005376344086018, "loss": 0.3285, "step": 5010 }, { "epoch": 0.784439574201628, "grad_norm": 1.0934810638427734, "learning_rate": 0.00019002932551319645, "loss": 0.3684, "step": 5011 }, { "epoch": 0.7845961177207263, "grad_norm": 1.6922805309295654, "learning_rate": 0.00019000488758553273, "loss": 0.7612, "step": 5012 }, { "epoch": 0.7847526612398247, "grad_norm": 1.0072715282440186, "learning_rate": 0.00018998044965786898, "loss": 0.7402, "step": 5013 }, { "epoch": 0.784909204758923, "grad_norm": 1.367684245109558, "learning_rate": 0.00018995601173020526, "loss": 0.4451, "step": 5014 }, { "epoch": 0.7850657482780213, "grad_norm": 1.189140796661377, "learning_rate": 0.00018993157380254154, "loss": 0.4766, "step": 5015 }, { "epoch": 0.7852222917971196, "grad_norm": 1.3974438905715942, "learning_rate": 0.0001899071358748778, "loss": 0.6411, "step": 5016 }, { "epoch": 0.7853788353162179, "grad_norm": 1.6327006816864014, "learning_rate": 0.00018988269794721407, "loss": 0.3786, "step": 5017 }, { "epoch": 0.7855353788353162, "grad_norm": 0.9266005754470825, "learning_rate": 0.00018985826001955032, "loss": 0.4677, "step": 5018 }, { "epoch": 0.7856919223544145, "grad_norm": 0.8187036514282227, "learning_rate": 0.00018983382209188657, "loss": 0.5805, "step": 5019 }, { "epoch": 0.7858484658735129, "grad_norm": 1.5598969459533691, "learning_rate": 0.00018980938416422285, "loss": 0.6537, "step": 5020 }, { "epoch": 0.7860050093926112, "grad_norm": 1.5645040273666382, "learning_rate": 0.00018978494623655913, "loss": 0.6621, "step": 5021 }, { "epoch": 0.7861615529117094, "grad_norm": 1.342915654182434, "learning_rate": 0.00018976050830889538, "loss": 0.4445, "step": 5022 }, { "epoch": 0.7863180964308077, "grad_norm": 1.3394932746887207, "learning_rate": 0.00018973607038123166, "loss": 0.7038, "step": 5023 }, { "epoch": 0.7864746399499061, "grad_norm": 1.4908233880996704, "learning_rate": 0.00018971163245356794, "loss": 0.5493, "step": 5024 }, { "epoch": 0.7866311834690044, "grad_norm": 1.5220710039138794, "learning_rate": 0.00018968719452590416, "loss": 0.6794, "step": 5025 }, { "epoch": 0.7867877269881027, "grad_norm": 6.862061023712158, "learning_rate": 0.00018966275659824044, "loss": 1.0358, "step": 5026 }, { "epoch": 0.786944270507201, "grad_norm": 1.3390004634857178, "learning_rate": 0.00018963831867057672, "loss": 0.6504, "step": 5027 }, { "epoch": 0.7871008140262993, "grad_norm": 1.7340364456176758, "learning_rate": 0.00018961388074291297, "loss": 0.9927, "step": 5028 }, { "epoch": 0.7872573575453976, "grad_norm": 2.199751377105713, "learning_rate": 0.00018958944281524925, "loss": 0.7016, "step": 5029 }, { "epoch": 0.7874139010644959, "grad_norm": 1.6952979564666748, "learning_rate": 0.00018956500488758553, "loss": 0.3266, "step": 5030 }, { "epoch": 0.7875704445835943, "grad_norm": 2.159717082977295, "learning_rate": 0.00018954056695992178, "loss": 0.6416, "step": 5031 }, { "epoch": 0.7877269881026926, "grad_norm": 1.6389358043670654, "learning_rate": 0.00018951612903225806, "loss": 0.8744, "step": 5032 }, { "epoch": 0.7878835316217908, "grad_norm": 2.425839424133301, "learning_rate": 0.00018949169110459433, "loss": 1.0329, "step": 5033 }, { "epoch": 0.7880400751408891, "grad_norm": 3.5907609462738037, "learning_rate": 0.00018946725317693056, "loss": 0.8958, "step": 5034 }, { "epoch": 0.7881966186599875, "grad_norm": 1.7229177951812744, "learning_rate": 0.00018944281524926684, "loss": 0.7585, "step": 5035 }, { "epoch": 0.7883531621790858, "grad_norm": 5.789181232452393, "learning_rate": 0.00018941837732160312, "loss": 1.1827, "step": 5036 }, { "epoch": 0.7885097056981841, "grad_norm": 6.507972717285156, "learning_rate": 0.00018939393939393937, "loss": 1.1366, "step": 5037 }, { "epoch": 0.7886662492172825, "grad_norm": 2.509685754776001, "learning_rate": 0.00018936950146627565, "loss": 0.904, "step": 5038 }, { "epoch": 0.7888227927363807, "grad_norm": 3.2579421997070312, "learning_rate": 0.00018934506353861192, "loss": 0.7319, "step": 5039 }, { "epoch": 0.788979336255479, "grad_norm": 2.354316234588623, "learning_rate": 0.00018932062561094817, "loss": 1.0652, "step": 5040 }, { "epoch": 0.7891358797745773, "grad_norm": 1.7672300338745117, "learning_rate": 0.00018929618768328443, "loss": 1.0612, "step": 5041 }, { "epoch": 0.7892924232936757, "grad_norm": 2.5716187953948975, "learning_rate": 0.0001892717497556207, "loss": 1.1822, "step": 5042 }, { "epoch": 0.789448966812774, "grad_norm": 1.6928919553756714, "learning_rate": 0.00018924731182795696, "loss": 1.7569, "step": 5043 }, { "epoch": 0.7896055103318722, "grad_norm": 2.918978452682495, "learning_rate": 0.00018922287390029323, "loss": 1.1707, "step": 5044 }, { "epoch": 0.7897620538509705, "grad_norm": 2.933793783187866, "learning_rate": 0.0001891984359726295, "loss": 1.4186, "step": 5045 }, { "epoch": 0.7899185973700689, "grad_norm": 3.004098653793335, "learning_rate": 0.00018917399804496576, "loss": 1.0802, "step": 5046 }, { "epoch": 0.7900751408891672, "grad_norm": 4.242359638214111, "learning_rate": 0.00018914956011730204, "loss": 1.2112, "step": 5047 }, { "epoch": 0.7902316844082655, "grad_norm": 1.2052658796310425, "learning_rate": 0.00018912512218963832, "loss": 0.6445, "step": 5048 }, { "epoch": 0.7903882279273639, "grad_norm": 2.339385986328125, "learning_rate": 0.00018910068426197454, "loss": 1.21, "step": 5049 }, { "epoch": 0.7905447714464621, "grad_norm": 1.4080181121826172, "learning_rate": 0.00018907624633431082, "loss": 0.8448, "step": 5050 }, { "epoch": 0.7907013149655604, "grad_norm": 0.7067837119102478, "learning_rate": 0.0001890518084066471, "loss": 0.2988, "step": 5051 }, { "epoch": 0.7908578584846587, "grad_norm": 0.7016892433166504, "learning_rate": 0.00018902737047898335, "loss": 0.2903, "step": 5052 }, { "epoch": 0.7910144020037571, "grad_norm": 0.7686540484428406, "learning_rate": 0.00018900293255131963, "loss": 0.3338, "step": 5053 }, { "epoch": 0.7911709455228554, "grad_norm": 0.7467711567878723, "learning_rate": 0.0001889784946236559, "loss": 0.4192, "step": 5054 }, { "epoch": 0.7913274890419537, "grad_norm": 1.1654859781265259, "learning_rate": 0.00018895405669599216, "loss": 0.3853, "step": 5055 }, { "epoch": 0.7914840325610519, "grad_norm": 0.7265309691429138, "learning_rate": 0.00018892961876832844, "loss": 0.3844, "step": 5056 }, { "epoch": 0.7916405760801503, "grad_norm": 0.5267289280891418, "learning_rate": 0.00018890518084066472, "loss": 0.3022, "step": 5057 }, { "epoch": 0.7917971195992486, "grad_norm": 0.6257277727127075, "learning_rate": 0.00018888074291300094, "loss": 0.4393, "step": 5058 }, { "epoch": 0.7919536631183469, "grad_norm": 2.329874277114868, "learning_rate": 0.00018885630498533722, "loss": 0.8329, "step": 5059 }, { "epoch": 0.7921102066374452, "grad_norm": 0.7630550861358643, "learning_rate": 0.0001888318670576735, "loss": 0.3971, "step": 5060 }, { "epoch": 0.7922667501565435, "grad_norm": 0.726796567440033, "learning_rate": 0.00018880742913000975, "loss": 0.3248, "step": 5061 }, { "epoch": 0.7924232936756418, "grad_norm": 0.9759939312934875, "learning_rate": 0.00018878299120234603, "loss": 0.3606, "step": 5062 }, { "epoch": 0.7925798371947401, "grad_norm": 1.23695969581604, "learning_rate": 0.0001887585532746823, "loss": 0.5576, "step": 5063 }, { "epoch": 0.7927363807138385, "grad_norm": 1.5264012813568115, "learning_rate": 0.00018873411534701856, "loss": 0.7945, "step": 5064 }, { "epoch": 0.7928929242329368, "grad_norm": 0.8853788375854492, "learning_rate": 0.0001887096774193548, "loss": 0.5012, "step": 5065 }, { "epoch": 0.7930494677520351, "grad_norm": 0.9085557460784912, "learning_rate": 0.0001886852394916911, "loss": 0.4011, "step": 5066 }, { "epoch": 0.7932060112711333, "grad_norm": 0.9665614366531372, "learning_rate": 0.00018866080156402734, "loss": 0.3467, "step": 5067 }, { "epoch": 0.7933625547902317, "grad_norm": 1.1219550371170044, "learning_rate": 0.00018863636363636362, "loss": 0.6318, "step": 5068 }, { "epoch": 0.79351909830933, "grad_norm": 1.8163543939590454, "learning_rate": 0.0001886119257086999, "loss": 0.5886, "step": 5069 }, { "epoch": 0.7936756418284283, "grad_norm": 1.5584337711334229, "learning_rate": 0.00018858748778103615, "loss": 0.764, "step": 5070 }, { "epoch": 0.7938321853475266, "grad_norm": 2.26932692527771, "learning_rate": 0.00018856304985337242, "loss": 1.0698, "step": 5071 }, { "epoch": 0.793988728866625, "grad_norm": 1.0103601217269897, "learning_rate": 0.0001885386119257087, "loss": 0.4875, "step": 5072 }, { "epoch": 0.7941452723857232, "grad_norm": 2.160799980163574, "learning_rate": 0.00018851417399804493, "loss": 0.7125, "step": 5073 }, { "epoch": 0.7943018159048215, "grad_norm": 1.7695913314819336, "learning_rate": 0.0001884897360703812, "loss": 0.7405, "step": 5074 }, { "epoch": 0.7944583594239198, "grad_norm": 1.6188089847564697, "learning_rate": 0.00018846529814271748, "loss": 0.6548, "step": 5075 }, { "epoch": 0.7946149029430182, "grad_norm": 2.61811900138855, "learning_rate": 0.00018844086021505373, "loss": 1.2027, "step": 5076 }, { "epoch": 0.7947714464621165, "grad_norm": 2.0806033611297607, "learning_rate": 0.00018841642228739, "loss": 0.9647, "step": 5077 }, { "epoch": 0.7949279899812148, "grad_norm": 1.6380743980407715, "learning_rate": 0.0001883919843597263, "loss": 0.73, "step": 5078 }, { "epoch": 0.795084533500313, "grad_norm": 1.2052991390228271, "learning_rate": 0.00018836754643206254, "loss": 0.5505, "step": 5079 }, { "epoch": 0.7952410770194114, "grad_norm": 1.2171603441238403, "learning_rate": 0.00018834310850439882, "loss": 0.6914, "step": 5080 }, { "epoch": 0.7953976205385097, "grad_norm": 1.6889578104019165, "learning_rate": 0.0001883186705767351, "loss": 0.6407, "step": 5081 }, { "epoch": 0.795554164057608, "grad_norm": 2.487717866897583, "learning_rate": 0.00018829423264907132, "loss": 0.9845, "step": 5082 }, { "epoch": 0.7957107075767064, "grad_norm": 2.156898021697998, "learning_rate": 0.0001882697947214076, "loss": 0.7382, "step": 5083 }, { "epoch": 0.7958672510958046, "grad_norm": 2.150857448577881, "learning_rate": 0.00018824535679374388, "loss": 0.6193, "step": 5084 }, { "epoch": 0.7960237946149029, "grad_norm": 2.0828707218170166, "learning_rate": 0.00018822091886608013, "loss": 0.6914, "step": 5085 }, { "epoch": 0.7961803381340012, "grad_norm": 2.405583381652832, "learning_rate": 0.0001881964809384164, "loss": 0.9627, "step": 5086 }, { "epoch": 0.7963368816530996, "grad_norm": 2.8909096717834473, "learning_rate": 0.0001881720430107527, "loss": 1.0168, "step": 5087 }, { "epoch": 0.7964934251721979, "grad_norm": 2.1869802474975586, "learning_rate": 0.00018814760508308894, "loss": 0.8169, "step": 5088 }, { "epoch": 0.7966499686912962, "grad_norm": 2.3138229846954346, "learning_rate": 0.0001881231671554252, "loss": 1.6036, "step": 5089 }, { "epoch": 0.7968065122103944, "grad_norm": 5.03582763671875, "learning_rate": 0.00018809872922776147, "loss": 1.9535, "step": 5090 }, { "epoch": 0.7969630557294928, "grad_norm": 2.7218527793884277, "learning_rate": 0.00018807429130009772, "loss": 1.4739, "step": 5091 }, { "epoch": 0.7971195992485911, "grad_norm": 2.0012664794921875, "learning_rate": 0.000188049853372434, "loss": 0.9925, "step": 5092 }, { "epoch": 0.7972761427676894, "grad_norm": 3.092926502227783, "learning_rate": 0.00018802541544477028, "loss": 1.289, "step": 5093 }, { "epoch": 0.7974326862867878, "grad_norm": 1.7731401920318604, "learning_rate": 0.00018800097751710653, "loss": 1.1165, "step": 5094 }, { "epoch": 0.7975892298058861, "grad_norm": 3.9922702312469482, "learning_rate": 0.0001879765395894428, "loss": 1.2458, "step": 5095 }, { "epoch": 0.7977457733249843, "grad_norm": 1.2292232513427734, "learning_rate": 0.00018795210166177908, "loss": 0.6469, "step": 5096 }, { "epoch": 0.7979023168440826, "grad_norm": 1.4839969873428345, "learning_rate": 0.0001879276637341153, "loss": 0.4321, "step": 5097 }, { "epoch": 0.798058860363181, "grad_norm": 1.8727073669433594, "learning_rate": 0.0001879032258064516, "loss": 0.7269, "step": 5098 }, { "epoch": 0.7982154038822793, "grad_norm": 1.444756269454956, "learning_rate": 0.00018787878787878787, "loss": 0.4632, "step": 5099 }, { "epoch": 0.7983719474013776, "grad_norm": 1.8207427263259888, "learning_rate": 0.00018785434995112412, "loss": 0.7119, "step": 5100 }, { "epoch": 0.7985284909204758, "grad_norm": 2.3908092975616455, "learning_rate": 0.0001878299120234604, "loss": 0.7385, "step": 5101 }, { "epoch": 0.7986850344395742, "grad_norm": 0.8618811964988708, "learning_rate": 0.00018780547409579667, "loss": 0.4004, "step": 5102 }, { "epoch": 0.7988415779586725, "grad_norm": 0.7508620023727417, "learning_rate": 0.00018778103616813292, "loss": 0.3438, "step": 5103 }, { "epoch": 0.7989981214777708, "grad_norm": 0.8002655506134033, "learning_rate": 0.0001877565982404692, "loss": 0.3142, "step": 5104 }, { "epoch": 0.7991546649968692, "grad_norm": 0.8202148675918579, "learning_rate": 0.00018773216031280548, "loss": 0.2769, "step": 5105 }, { "epoch": 0.7993112085159675, "grad_norm": 0.5834996700286865, "learning_rate": 0.0001877077223851417, "loss": 0.2992, "step": 5106 }, { "epoch": 0.7994677520350657, "grad_norm": 0.6148617267608643, "learning_rate": 0.00018768328445747798, "loss": 0.2415, "step": 5107 }, { "epoch": 0.799624295554164, "grad_norm": 1.0197445154190063, "learning_rate": 0.00018765884652981426, "loss": 0.3005, "step": 5108 }, { "epoch": 0.7997808390732624, "grad_norm": 0.8099470138549805, "learning_rate": 0.00018763440860215051, "loss": 0.4335, "step": 5109 }, { "epoch": 0.7999373825923607, "grad_norm": 0.9061375260353088, "learning_rate": 0.0001876099706744868, "loss": 0.4103, "step": 5110 }, { "epoch": 0.800093926111459, "grad_norm": 0.8547300696372986, "learning_rate": 0.00018758553274682307, "loss": 0.4025, "step": 5111 }, { "epoch": 0.8002504696305573, "grad_norm": 5.366735935211182, "learning_rate": 0.0001875610948191593, "loss": 0.8325, "step": 5112 }, { "epoch": 0.8004070131496556, "grad_norm": 0.9143383502960205, "learning_rate": 0.00018753665689149557, "loss": 0.3426, "step": 5113 }, { "epoch": 0.8005635566687539, "grad_norm": 0.7643350958824158, "learning_rate": 0.00018751221896383185, "loss": 0.3594, "step": 5114 }, { "epoch": 0.8007201001878522, "grad_norm": 1.4406952857971191, "learning_rate": 0.0001874877810361681, "loss": 0.3918, "step": 5115 }, { "epoch": 0.8008766437069506, "grad_norm": 1.629372477531433, "learning_rate": 0.00018746334310850438, "loss": 0.441, "step": 5116 }, { "epoch": 0.8010331872260489, "grad_norm": 2.073367118835449, "learning_rate": 0.00018743890518084066, "loss": 0.5043, "step": 5117 }, { "epoch": 0.8011897307451471, "grad_norm": 1.0843485593795776, "learning_rate": 0.0001874144672531769, "loss": 0.5772, "step": 5118 }, { "epoch": 0.8013462742642454, "grad_norm": 1.2174686193466187, "learning_rate": 0.0001873900293255132, "loss": 0.4383, "step": 5119 }, { "epoch": 0.8015028177833438, "grad_norm": 1.9818603992462158, "learning_rate": 0.00018736559139784947, "loss": 0.9853, "step": 5120 }, { "epoch": 0.8016593613024421, "grad_norm": 1.621567726135254, "learning_rate": 0.0001873411534701857, "loss": 0.7447, "step": 5121 }, { "epoch": 0.8018159048215404, "grad_norm": 1.1117850542068481, "learning_rate": 0.00018731671554252197, "loss": 0.745, "step": 5122 }, { "epoch": 0.8019724483406387, "grad_norm": 1.9802541732788086, "learning_rate": 0.00018729227761485825, "loss": 0.673, "step": 5123 }, { "epoch": 0.802128991859737, "grad_norm": 2.39848256111145, "learning_rate": 0.0001872678396871945, "loss": 0.7147, "step": 5124 }, { "epoch": 0.8022855353788353, "grad_norm": 2.0519936084747314, "learning_rate": 0.00018724340175953078, "loss": 0.8749, "step": 5125 }, { "epoch": 0.8024420788979336, "grad_norm": 3.485672950744629, "learning_rate": 0.00018721896383186706, "loss": 0.6292, "step": 5126 }, { "epoch": 0.802598622417032, "grad_norm": 1.4344536066055298, "learning_rate": 0.0001871945259042033, "loss": 0.6966, "step": 5127 }, { "epoch": 0.8027551659361303, "grad_norm": 1.8065955638885498, "learning_rate": 0.00018717008797653959, "loss": 0.7884, "step": 5128 }, { "epoch": 0.8029117094552286, "grad_norm": 3.6765334606170654, "learning_rate": 0.00018714565004887586, "loss": 1.0263, "step": 5129 }, { "epoch": 0.8030682529743268, "grad_norm": 2.0539987087249756, "learning_rate": 0.0001871212121212121, "loss": 0.5238, "step": 5130 }, { "epoch": 0.8032247964934252, "grad_norm": 2.6825201511383057, "learning_rate": 0.00018709677419354837, "loss": 1.0184, "step": 5131 }, { "epoch": 0.8033813400125235, "grad_norm": 3.3649113178253174, "learning_rate": 0.00018707233626588464, "loss": 0.8083, "step": 5132 }, { "epoch": 0.8035378835316218, "grad_norm": 2.283419370651245, "learning_rate": 0.0001870478983382209, "loss": 0.7746, "step": 5133 }, { "epoch": 0.8036944270507201, "grad_norm": 2.9016916751861572, "learning_rate": 0.00018702346041055717, "loss": 0.9586, "step": 5134 }, { "epoch": 0.8038509705698184, "grad_norm": 3.5919530391693115, "learning_rate": 0.00018699902248289345, "loss": 1.083, "step": 5135 }, { "epoch": 0.8040075140889167, "grad_norm": 5.343000888824463, "learning_rate": 0.00018697458455522968, "loss": 1.187, "step": 5136 }, { "epoch": 0.804164057608015, "grad_norm": 2.6350221633911133, "learning_rate": 0.00018695014662756596, "loss": 1.5941, "step": 5137 }, { "epoch": 0.8043206011271133, "grad_norm": 1.6021567583084106, "learning_rate": 0.00018692570869990223, "loss": 0.6295, "step": 5138 }, { "epoch": 0.8044771446462117, "grad_norm": 4.3423075675964355, "learning_rate": 0.00018690127077223848, "loss": 1.3454, "step": 5139 }, { "epoch": 0.80463368816531, "grad_norm": 3.026441812515259, "learning_rate": 0.00018687683284457476, "loss": 1.0692, "step": 5140 }, { "epoch": 0.8047902316844082, "grad_norm": 2.199367046356201, "learning_rate": 0.00018685239491691104, "loss": 0.7581, "step": 5141 }, { "epoch": 0.8049467752035065, "grad_norm": 1.787598967552185, "learning_rate": 0.0001868279569892473, "loss": 0.8282, "step": 5142 }, { "epoch": 0.8051033187226049, "grad_norm": 3.308527708053589, "learning_rate": 0.00018680351906158357, "loss": 1.3213, "step": 5143 }, { "epoch": 0.8052598622417032, "grad_norm": 2.4407522678375244, "learning_rate": 0.00018677908113391985, "loss": 1.6773, "step": 5144 }, { "epoch": 0.8054164057608015, "grad_norm": 0.9789965748786926, "learning_rate": 0.00018675464320625607, "loss": 0.6245, "step": 5145 }, { "epoch": 0.8055729492798999, "grad_norm": 1.3967782258987427, "learning_rate": 0.00018673020527859235, "loss": 0.976, "step": 5146 }, { "epoch": 0.8057294927989981, "grad_norm": 1.6715110540390015, "learning_rate": 0.00018670576735092863, "loss": 0.8646, "step": 5147 }, { "epoch": 0.8058860363180964, "grad_norm": 2.1324195861816406, "learning_rate": 0.00018668132942326488, "loss": 0.6652, "step": 5148 }, { "epoch": 0.8060425798371947, "grad_norm": 1.6442357301712036, "learning_rate": 0.00018665689149560116, "loss": 0.8102, "step": 5149 }, { "epoch": 0.8061991233562931, "grad_norm": 2.52543044090271, "learning_rate": 0.00018663245356793744, "loss": 0.9856, "step": 5150 }, { "epoch": 0.8063556668753914, "grad_norm": 0.6359297633171082, "learning_rate": 0.0001866080156402737, "loss": 0.284, "step": 5151 }, { "epoch": 0.8065122103944896, "grad_norm": 0.6601283550262451, "learning_rate": 0.00018658357771260997, "loss": 0.4191, "step": 5152 }, { "epoch": 0.8066687539135879, "grad_norm": 0.5493868589401245, "learning_rate": 0.00018655913978494622, "loss": 0.3558, "step": 5153 }, { "epoch": 0.8068252974326863, "grad_norm": 0.6157817244529724, "learning_rate": 0.00018653470185728247, "loss": 0.355, "step": 5154 }, { "epoch": 0.8069818409517846, "grad_norm": 0.5704903602600098, "learning_rate": 0.00018651026392961875, "loss": 0.3663, "step": 5155 }, { "epoch": 0.8071383844708829, "grad_norm": 1.178697943687439, "learning_rate": 0.00018648582600195503, "loss": 0.5071, "step": 5156 }, { "epoch": 0.8072949279899813, "grad_norm": 0.8343028426170349, "learning_rate": 0.00018646138807429128, "loss": 0.4386, "step": 5157 }, { "epoch": 0.8074514715090795, "grad_norm": 0.592948317527771, "learning_rate": 0.00018643695014662756, "loss": 0.2441, "step": 5158 }, { "epoch": 0.8076080150281778, "grad_norm": 0.7243725061416626, "learning_rate": 0.00018641251221896383, "loss": 0.3678, "step": 5159 }, { "epoch": 0.8077645585472761, "grad_norm": 0.8186553120613098, "learning_rate": 0.00018638807429130006, "loss": 0.3722, "step": 5160 }, { "epoch": 0.8079211020663745, "grad_norm": 1.4765008687973022, "learning_rate": 0.00018636363636363634, "loss": 0.2819, "step": 5161 }, { "epoch": 0.8080776455854728, "grad_norm": 1.4610421657562256, "learning_rate": 0.00018633919843597262, "loss": 0.3065, "step": 5162 }, { "epoch": 0.8082341891045711, "grad_norm": 1.0413181781768799, "learning_rate": 0.00018631476050830887, "loss": 0.3565, "step": 5163 }, { "epoch": 0.8083907326236693, "grad_norm": 9.378120422363281, "learning_rate": 0.00018629032258064515, "loss": 0.9926, "step": 5164 }, { "epoch": 0.8085472761427677, "grad_norm": 1.3265831470489502, "learning_rate": 0.00018626588465298142, "loss": 0.5953, "step": 5165 }, { "epoch": 0.808703819661866, "grad_norm": 2.425917863845825, "learning_rate": 0.00018624144672531768, "loss": 0.4746, "step": 5166 }, { "epoch": 0.8088603631809643, "grad_norm": 1.6301259994506836, "learning_rate": 0.00018621700879765395, "loss": 0.4585, "step": 5167 }, { "epoch": 0.8090169067000627, "grad_norm": 3.910301446914673, "learning_rate": 0.00018619257086999023, "loss": 1.0197, "step": 5168 }, { "epoch": 0.8091734502191609, "grad_norm": 1.4134447574615479, "learning_rate": 0.00018616813294232646, "loss": 0.5156, "step": 5169 }, { "epoch": 0.8093299937382592, "grad_norm": 1.712280035018921, "learning_rate": 0.00018614369501466273, "loss": 1.0762, "step": 5170 }, { "epoch": 0.8094865372573575, "grad_norm": 1.478226900100708, "learning_rate": 0.000186119257086999, "loss": 0.4157, "step": 5171 }, { "epoch": 0.8096430807764559, "grad_norm": 1.3933120965957642, "learning_rate": 0.00018609481915933526, "loss": 0.4726, "step": 5172 }, { "epoch": 0.8097996242955542, "grad_norm": 1.6714564561843872, "learning_rate": 0.00018607038123167154, "loss": 0.4534, "step": 5173 }, { "epoch": 0.8099561678146525, "grad_norm": 1.5796337127685547, "learning_rate": 0.00018604594330400782, "loss": 0.689, "step": 5174 }, { "epoch": 0.8101127113337507, "grad_norm": 1.966103196144104, "learning_rate": 0.00018602150537634407, "loss": 0.8875, "step": 5175 }, { "epoch": 0.8102692548528491, "grad_norm": 1.9266483783721924, "learning_rate": 0.00018599706744868035, "loss": 0.7517, "step": 5176 }, { "epoch": 0.8104257983719474, "grad_norm": 2.138370990753174, "learning_rate": 0.0001859726295210166, "loss": 0.6377, "step": 5177 }, { "epoch": 0.8105823418910457, "grad_norm": 2.1694908142089844, "learning_rate": 0.00018594819159335285, "loss": 0.7177, "step": 5178 }, { "epoch": 0.810738885410144, "grad_norm": 2.8311069011688232, "learning_rate": 0.00018592375366568913, "loss": 0.7425, "step": 5179 }, { "epoch": 0.8108954289292424, "grad_norm": 3.3369836807250977, "learning_rate": 0.0001858993157380254, "loss": 1.0659, "step": 5180 }, { "epoch": 0.8110519724483406, "grad_norm": 1.3343548774719238, "learning_rate": 0.00018587487781036166, "loss": 0.6171, "step": 5181 }, { "epoch": 0.8112085159674389, "grad_norm": 3.1208863258361816, "learning_rate": 0.00018585043988269794, "loss": 1.0402, "step": 5182 }, { "epoch": 0.8113650594865373, "grad_norm": 2.674014091491699, "learning_rate": 0.00018582600195503422, "loss": 0.8502, "step": 5183 }, { "epoch": 0.8115216030056356, "grad_norm": 2.035822868347168, "learning_rate": 0.00018580156402737044, "loss": 0.7894, "step": 5184 }, { "epoch": 0.8116781465247339, "grad_norm": 3.007331371307373, "learning_rate": 0.00018577712609970672, "loss": 1.1385, "step": 5185 }, { "epoch": 0.8118346900438321, "grad_norm": 1.874494194984436, "learning_rate": 0.000185752688172043, "loss": 0.7571, "step": 5186 }, { "epoch": 0.8119912335629305, "grad_norm": 2.905369758605957, "learning_rate": 0.00018572825024437925, "loss": 1.2572, "step": 5187 }, { "epoch": 0.8121477770820288, "grad_norm": 3.7571399211883545, "learning_rate": 0.00018570381231671553, "loss": 1.2583, "step": 5188 }, { "epoch": 0.8123043206011271, "grad_norm": 2.9104902744293213, "learning_rate": 0.0001856793743890518, "loss": 1.8115, "step": 5189 }, { "epoch": 0.8124608641202254, "grad_norm": 2.058563709259033, "learning_rate": 0.00018565493646138806, "loss": 1.3185, "step": 5190 }, { "epoch": 0.8126174076393238, "grad_norm": 3.4244906902313232, "learning_rate": 0.00018563049853372434, "loss": 1.1396, "step": 5191 }, { "epoch": 0.812773951158422, "grad_norm": 1.9940974712371826, "learning_rate": 0.00018560606060606061, "loss": 1.322, "step": 5192 }, { "epoch": 0.8129304946775203, "grad_norm": 2.131615161895752, "learning_rate": 0.00018558162267839684, "loss": 1.2332, "step": 5193 }, { "epoch": 0.8130870381966186, "grad_norm": 2.615813970565796, "learning_rate": 0.00018555718475073312, "loss": 0.9671, "step": 5194 }, { "epoch": 0.813243581715717, "grad_norm": 1.4775357246398926, "learning_rate": 0.0001855327468230694, "loss": 1.0884, "step": 5195 }, { "epoch": 0.8134001252348153, "grad_norm": 5.091252326965332, "learning_rate": 0.00018550830889540565, "loss": 0.7112, "step": 5196 }, { "epoch": 0.8135566687539136, "grad_norm": 1.498961329460144, "learning_rate": 0.00018548387096774192, "loss": 0.4947, "step": 5197 }, { "epoch": 0.8137132122730119, "grad_norm": 2.671917676925659, "learning_rate": 0.0001854594330400782, "loss": 0.8125, "step": 5198 }, { "epoch": 0.8138697557921102, "grad_norm": 1.2073392868041992, "learning_rate": 0.00018543499511241445, "loss": 0.6217, "step": 5199 }, { "epoch": 0.8140262993112085, "grad_norm": 2.6718733310699463, "learning_rate": 0.0001854105571847507, "loss": 1.5128, "step": 5200 }, { "epoch": 0.8141828428303068, "grad_norm": 0.4015403687953949, "learning_rate": 0.00018538611925708698, "loss": 0.2958, "step": 5201 }, { "epoch": 0.8143393863494052, "grad_norm": 0.7443885803222656, "learning_rate": 0.00018536168132942323, "loss": 0.211, "step": 5202 }, { "epoch": 0.8144959298685035, "grad_norm": 0.7921191453933716, "learning_rate": 0.0001853372434017595, "loss": 0.3133, "step": 5203 }, { "epoch": 0.8146524733876017, "grad_norm": 0.7802867293357849, "learning_rate": 0.0001853128054740958, "loss": 0.2662, "step": 5204 }, { "epoch": 0.8148090169067, "grad_norm": 0.7072981595993042, "learning_rate": 0.00018528836754643204, "loss": 0.3204, "step": 5205 }, { "epoch": 0.8149655604257984, "grad_norm": 0.6935706734657288, "learning_rate": 0.00018526392961876832, "loss": 0.2573, "step": 5206 }, { "epoch": 0.8151221039448967, "grad_norm": 0.6448320746421814, "learning_rate": 0.0001852394916911046, "loss": 0.305, "step": 5207 }, { "epoch": 0.815278647463995, "grad_norm": 0.5979820489883423, "learning_rate": 0.00018521505376344082, "loss": 0.2683, "step": 5208 }, { "epoch": 0.8154351909830932, "grad_norm": 0.9508888125419617, "learning_rate": 0.0001851906158357771, "loss": 0.3316, "step": 5209 }, { "epoch": 0.8155917345021916, "grad_norm": 0.6390425562858582, "learning_rate": 0.00018516617790811338, "loss": 0.2239, "step": 5210 }, { "epoch": 0.8157482780212899, "grad_norm": 0.9599819779396057, "learning_rate": 0.00018514173998044963, "loss": 0.4099, "step": 5211 }, { "epoch": 0.8159048215403882, "grad_norm": 1.198017954826355, "learning_rate": 0.0001851173020527859, "loss": 0.3733, "step": 5212 }, { "epoch": 0.8160613650594866, "grad_norm": 1.6387499570846558, "learning_rate": 0.0001850928641251222, "loss": 0.3879, "step": 5213 }, { "epoch": 0.8162179085785849, "grad_norm": 1.1071934700012207, "learning_rate": 0.00018506842619745844, "loss": 0.6261, "step": 5214 }, { "epoch": 0.8163744520976831, "grad_norm": 0.7644991278648376, "learning_rate": 0.00018504398826979472, "loss": 0.2503, "step": 5215 }, { "epoch": 0.8165309956167814, "grad_norm": 0.9139405488967896, "learning_rate": 0.000185019550342131, "loss": 0.5133, "step": 5216 }, { "epoch": 0.8166875391358798, "grad_norm": 0.9907679557800293, "learning_rate": 0.00018499511241446722, "loss": 0.4415, "step": 5217 }, { "epoch": 0.8168440826549781, "grad_norm": 0.9434964060783386, "learning_rate": 0.0001849706744868035, "loss": 0.491, "step": 5218 }, { "epoch": 0.8170006261740764, "grad_norm": 1.1994105577468872, "learning_rate": 0.00018494623655913978, "loss": 0.673, "step": 5219 }, { "epoch": 0.8171571696931748, "grad_norm": 1.1028125286102295, "learning_rate": 0.00018492179863147603, "loss": 0.5795, "step": 5220 }, { "epoch": 0.817313713212273, "grad_norm": 0.9780545234680176, "learning_rate": 0.0001848973607038123, "loss": 0.5664, "step": 5221 }, { "epoch": 0.8174702567313713, "grad_norm": 2.6875715255737305, "learning_rate": 0.00018487292277614859, "loss": 0.6478, "step": 5222 }, { "epoch": 0.8176268002504696, "grad_norm": 1.439847469329834, "learning_rate": 0.00018484848484848484, "loss": 0.6378, "step": 5223 }, { "epoch": 0.817783343769568, "grad_norm": 1.5552678108215332, "learning_rate": 0.0001848240469208211, "loss": 0.8491, "step": 5224 }, { "epoch": 0.8179398872886663, "grad_norm": 2.3292577266693115, "learning_rate": 0.00018479960899315737, "loss": 0.8801, "step": 5225 }, { "epoch": 0.8180964308077645, "grad_norm": 1.8641091585159302, "learning_rate": 0.00018477517106549362, "loss": 0.7684, "step": 5226 }, { "epoch": 0.8182529743268628, "grad_norm": 1.5867455005645752, "learning_rate": 0.0001847507331378299, "loss": 0.5417, "step": 5227 }, { "epoch": 0.8184095178459612, "grad_norm": 3.227484941482544, "learning_rate": 0.00018472629521016617, "loss": 1.1114, "step": 5228 }, { "epoch": 0.8185660613650595, "grad_norm": 1.3463890552520752, "learning_rate": 0.00018470185728250243, "loss": 0.6013, "step": 5229 }, { "epoch": 0.8187226048841578, "grad_norm": 2.245208501815796, "learning_rate": 0.0001846774193548387, "loss": 0.82, "step": 5230 }, { "epoch": 0.8188791484032562, "grad_norm": 1.4095137119293213, "learning_rate": 0.00018465298142717498, "loss": 0.6866, "step": 5231 }, { "epoch": 0.8190356919223544, "grad_norm": 2.2786121368408203, "learning_rate": 0.0001846285434995112, "loss": 0.7222, "step": 5232 }, { "epoch": 0.8191922354414527, "grad_norm": 1.5390690565109253, "learning_rate": 0.00018460410557184748, "loss": 0.7086, "step": 5233 }, { "epoch": 0.819348778960551, "grad_norm": 1.4006104469299316, "learning_rate": 0.00018457966764418376, "loss": 0.9285, "step": 5234 }, { "epoch": 0.8195053224796494, "grad_norm": 1.5036606788635254, "learning_rate": 0.00018455522971652001, "loss": 0.7367, "step": 5235 }, { "epoch": 0.8196618659987477, "grad_norm": 1.538991928100586, "learning_rate": 0.0001845307917888563, "loss": 0.7435, "step": 5236 }, { "epoch": 0.819818409517846, "grad_norm": 1.5713046789169312, "learning_rate": 0.00018450635386119257, "loss": 0.7751, "step": 5237 }, { "epoch": 0.8199749530369442, "grad_norm": 2.501213788986206, "learning_rate": 0.00018448191593352882, "loss": 1.1718, "step": 5238 }, { "epoch": 0.8201314965560426, "grad_norm": 2.6545050144195557, "learning_rate": 0.0001844574780058651, "loss": 1.362, "step": 5239 }, { "epoch": 0.8202880400751409, "grad_norm": 2.392634153366089, "learning_rate": 0.00018443304007820138, "loss": 1.4286, "step": 5240 }, { "epoch": 0.8204445835942392, "grad_norm": 1.6317548751831055, "learning_rate": 0.0001844086021505376, "loss": 1.2579, "step": 5241 }, { "epoch": 0.8206011271133375, "grad_norm": 8.814602851867676, "learning_rate": 0.00018438416422287388, "loss": 1.1427, "step": 5242 }, { "epoch": 0.8207576706324358, "grad_norm": 3.1028459072113037, "learning_rate": 0.00018435972629521016, "loss": 1.6293, "step": 5243 }, { "epoch": 0.8209142141515341, "grad_norm": 2.7763304710388184, "learning_rate": 0.0001843352883675464, "loss": 1.5323, "step": 5244 }, { "epoch": 0.8210707576706324, "grad_norm": 3.5093159675598145, "learning_rate": 0.0001843108504398827, "loss": 2.2438, "step": 5245 }, { "epoch": 0.8212273011897308, "grad_norm": 2.848104953765869, "learning_rate": 0.00018428641251221897, "loss": 1.2866, "step": 5246 }, { "epoch": 0.8213838447088291, "grad_norm": 1.3957146406173706, "learning_rate": 0.00018426197458455522, "loss": 0.6522, "step": 5247 }, { "epoch": 0.8215403882279274, "grad_norm": 4.863888740539551, "learning_rate": 0.00018423753665689147, "loss": 0.663, "step": 5248 }, { "epoch": 0.8216969317470256, "grad_norm": 2.6501758098602295, "learning_rate": 0.00018421309872922775, "loss": 1.2547, "step": 5249 }, { "epoch": 0.821853475266124, "grad_norm": 1.6998775005340576, "learning_rate": 0.000184188660801564, "loss": 1.0807, "step": 5250 }, { "epoch": 0.8220100187852223, "grad_norm": 0.7901161909103394, "learning_rate": 0.00018416422287390028, "loss": 0.3135, "step": 5251 }, { "epoch": 0.8221665623043206, "grad_norm": 0.7049028277397156, "learning_rate": 0.00018413978494623656, "loss": 0.4211, "step": 5252 }, { "epoch": 0.8223231058234189, "grad_norm": 0.6811177134513855, "learning_rate": 0.0001841153470185728, "loss": 0.2857, "step": 5253 }, { "epoch": 0.8224796493425173, "grad_norm": 0.7868577241897583, "learning_rate": 0.00018409090909090909, "loss": 0.35, "step": 5254 }, { "epoch": 0.8226361928616155, "grad_norm": 0.7949536442756653, "learning_rate": 0.00018406647116324536, "loss": 0.4374, "step": 5255 }, { "epoch": 0.8227927363807138, "grad_norm": 0.6903027296066284, "learning_rate": 0.0001840420332355816, "loss": 0.427, "step": 5256 }, { "epoch": 0.8229492798998121, "grad_norm": 0.6659723520278931, "learning_rate": 0.00018401759530791787, "loss": 0.5036, "step": 5257 }, { "epoch": 0.8231058234189105, "grad_norm": 0.5568656325340271, "learning_rate": 0.00018399315738025415, "loss": 0.3561, "step": 5258 }, { "epoch": 0.8232623669380088, "grad_norm": 0.8612244129180908, "learning_rate": 0.0001839687194525904, "loss": 0.3908, "step": 5259 }, { "epoch": 0.823418910457107, "grad_norm": 0.7384498119354248, "learning_rate": 0.00018394428152492667, "loss": 0.5201, "step": 5260 }, { "epoch": 0.8235754539762054, "grad_norm": 0.9855079054832458, "learning_rate": 0.00018391984359726295, "loss": 0.413, "step": 5261 }, { "epoch": 0.8237319974953037, "grad_norm": 0.768484890460968, "learning_rate": 0.0001838954056695992, "loss": 0.4253, "step": 5262 }, { "epoch": 0.823888541014402, "grad_norm": 0.6024149060249329, "learning_rate": 0.00018387096774193548, "loss": 0.2572, "step": 5263 }, { "epoch": 0.8240450845335003, "grad_norm": 0.5036283135414124, "learning_rate": 0.00018384652981427176, "loss": 0.2692, "step": 5264 }, { "epoch": 0.8242016280525987, "grad_norm": 1.0418510437011719, "learning_rate": 0.00018382209188660799, "loss": 0.4799, "step": 5265 }, { "epoch": 0.8243581715716969, "grad_norm": 0.663507342338562, "learning_rate": 0.00018379765395894426, "loss": 0.3185, "step": 5266 }, { "epoch": 0.8245147150907952, "grad_norm": 0.8213881254196167, "learning_rate": 0.00018377321603128054, "loss": 0.337, "step": 5267 }, { "epoch": 0.8246712586098935, "grad_norm": 1.1728672981262207, "learning_rate": 0.0001837487781036168, "loss": 0.4151, "step": 5268 }, { "epoch": 0.8248278021289919, "grad_norm": 1.2628549337387085, "learning_rate": 0.00018372434017595307, "loss": 0.6478, "step": 5269 }, { "epoch": 0.8249843456480902, "grad_norm": 1.121236801147461, "learning_rate": 0.00018369990224828935, "loss": 0.4379, "step": 5270 }, { "epoch": 0.8251408891671885, "grad_norm": 1.1426113843917847, "learning_rate": 0.00018367546432062557, "loss": 0.461, "step": 5271 }, { "epoch": 0.8252974326862867, "grad_norm": 1.555210828781128, "learning_rate": 0.00018365102639296185, "loss": 0.493, "step": 5272 }, { "epoch": 0.8254539762053851, "grad_norm": 1.8274154663085938, "learning_rate": 0.00018362658846529813, "loss": 0.5785, "step": 5273 }, { "epoch": 0.8256105197244834, "grad_norm": 1.771406888961792, "learning_rate": 0.00018360215053763438, "loss": 0.71, "step": 5274 }, { "epoch": 0.8257670632435817, "grad_norm": 3.0321593284606934, "learning_rate": 0.00018357771260997066, "loss": 0.8284, "step": 5275 }, { "epoch": 0.8259236067626801, "grad_norm": 4.053110599517822, "learning_rate": 0.00018355327468230694, "loss": 1.0345, "step": 5276 }, { "epoch": 0.8260801502817783, "grad_norm": 1.7022113800048828, "learning_rate": 0.0001835288367546432, "loss": 0.8215, "step": 5277 }, { "epoch": 0.8262366938008766, "grad_norm": 2.7974987030029297, "learning_rate": 0.00018350439882697947, "loss": 1.186, "step": 5278 }, { "epoch": 0.8263932373199749, "grad_norm": 2.123547077178955, "learning_rate": 0.00018347996089931575, "loss": 0.7569, "step": 5279 }, { "epoch": 0.8265497808390733, "grad_norm": 2.535295009613037, "learning_rate": 0.00018345552297165197, "loss": 1.1509, "step": 5280 }, { "epoch": 0.8267063243581716, "grad_norm": 2.005593776702881, "learning_rate": 0.00018343108504398825, "loss": 0.6985, "step": 5281 }, { "epoch": 0.8268628678772699, "grad_norm": 2.139389753341675, "learning_rate": 0.00018340664711632453, "loss": 1.401, "step": 5282 }, { "epoch": 0.8270194113963681, "grad_norm": 1.5043542385101318, "learning_rate": 0.00018338220918866078, "loss": 0.829, "step": 5283 }, { "epoch": 0.8271759549154665, "grad_norm": 1.6007672548294067, "learning_rate": 0.00018335777126099706, "loss": 0.8851, "step": 5284 }, { "epoch": 0.8273324984345648, "grad_norm": 2.1892812252044678, "learning_rate": 0.00018333333333333334, "loss": 1.3312, "step": 5285 }, { "epoch": 0.8274890419536631, "grad_norm": 1.2320849895477295, "learning_rate": 0.0001833088954056696, "loss": 0.6038, "step": 5286 }, { "epoch": 0.8276455854727615, "grad_norm": 4.166670322418213, "learning_rate": 0.00018328445747800586, "loss": 0.669, "step": 5287 }, { "epoch": 0.8278021289918598, "grad_norm": 2.1780524253845215, "learning_rate": 0.00018326001955034214, "loss": 1.1425, "step": 5288 }, { "epoch": 0.827958672510958, "grad_norm": 2.1687676906585693, "learning_rate": 0.00018323558162267837, "loss": 1.1492, "step": 5289 }, { "epoch": 0.8281152160300563, "grad_norm": 1.6996123790740967, "learning_rate": 0.00018321114369501465, "loss": 1.0186, "step": 5290 }, { "epoch": 0.8282717595491547, "grad_norm": 3.4165749549865723, "learning_rate": 0.00018318670576735092, "loss": 1.1482, "step": 5291 }, { "epoch": 0.828428303068253, "grad_norm": 2.1775505542755127, "learning_rate": 0.00018316226783968718, "loss": 1.3908, "step": 5292 }, { "epoch": 0.8285848465873513, "grad_norm": 2.812617063522339, "learning_rate": 0.00018313782991202345, "loss": 1.1503, "step": 5293 }, { "epoch": 0.8287413901064495, "grad_norm": 1.5233439207077026, "learning_rate": 0.00018311339198435973, "loss": 1.3862, "step": 5294 }, { "epoch": 0.8288979336255479, "grad_norm": 1.8722327947616577, "learning_rate": 0.00018308895405669596, "loss": 1.5704, "step": 5295 }, { "epoch": 0.8290544771446462, "grad_norm": 2.6162047386169434, "learning_rate": 0.00018306451612903223, "loss": 1.0679, "step": 5296 }, { "epoch": 0.8292110206637445, "grad_norm": 1.2465320825576782, "learning_rate": 0.0001830400782013685, "loss": 0.5608, "step": 5297 }, { "epoch": 0.8293675641828429, "grad_norm": 1.294814109802246, "learning_rate": 0.00018301564027370476, "loss": 0.8224, "step": 5298 }, { "epoch": 0.8295241077019412, "grad_norm": 2.261251926422119, "learning_rate": 0.00018299120234604104, "loss": 1.3591, "step": 5299 }, { "epoch": 0.8296806512210394, "grad_norm": 1.4905081987380981, "learning_rate": 0.00018296676441837732, "loss": 1.4874, "step": 5300 }, { "epoch": 0.8298371947401377, "grad_norm": 0.5138626098632812, "learning_rate": 0.00018294232649071357, "loss": 0.2785, "step": 5301 }, { "epoch": 0.8299937382592361, "grad_norm": 0.5530992150306702, "learning_rate": 0.00018291788856304985, "loss": 0.3083, "step": 5302 }, { "epoch": 0.8301502817783344, "grad_norm": 0.40813249349594116, "learning_rate": 0.00018289345063538613, "loss": 0.3063, "step": 5303 }, { "epoch": 0.8303068252974327, "grad_norm": 0.5321076512336731, "learning_rate": 0.00018286901270772235, "loss": 0.3811, "step": 5304 }, { "epoch": 0.830463368816531, "grad_norm": 0.5852525234222412, "learning_rate": 0.00018284457478005863, "loss": 0.3492, "step": 5305 }, { "epoch": 0.8306199123356293, "grad_norm": 0.7372064590454102, "learning_rate": 0.0001828201368523949, "loss": 0.4506, "step": 5306 }, { "epoch": 0.8307764558547276, "grad_norm": 0.6820610761642456, "learning_rate": 0.00018279569892473116, "loss": 0.3694, "step": 5307 }, { "epoch": 0.8309329993738259, "grad_norm": 1.2214698791503906, "learning_rate": 0.00018277126099706744, "loss": 0.4984, "step": 5308 }, { "epoch": 0.8310895428929242, "grad_norm": 1.288681983947754, "learning_rate": 0.00018274682306940372, "loss": 0.6551, "step": 5309 }, { "epoch": 0.8312460864120226, "grad_norm": 1.0131791830062866, "learning_rate": 0.00018272238514173997, "loss": 0.3613, "step": 5310 }, { "epoch": 0.8314026299311209, "grad_norm": 2.0954971313476562, "learning_rate": 0.00018269794721407625, "loss": 0.5802, "step": 5311 }, { "epoch": 0.8315591734502191, "grad_norm": 0.8063324093818665, "learning_rate": 0.0001826735092864125, "loss": 0.4414, "step": 5312 }, { "epoch": 0.8317157169693175, "grad_norm": 1.2960774898529053, "learning_rate": 0.00018264907135874875, "loss": 0.485, "step": 5313 }, { "epoch": 0.8318722604884158, "grad_norm": 0.9982731342315674, "learning_rate": 0.00018262463343108503, "loss": 0.5702, "step": 5314 }, { "epoch": 0.8320288040075141, "grad_norm": 2.1627659797668457, "learning_rate": 0.0001826001955034213, "loss": 0.5258, "step": 5315 }, { "epoch": 0.8321853475266124, "grad_norm": 1.2846755981445312, "learning_rate": 0.00018257575757575756, "loss": 0.6247, "step": 5316 }, { "epoch": 0.8323418910457107, "grad_norm": 1.6041959524154663, "learning_rate": 0.00018255131964809384, "loss": 0.5156, "step": 5317 }, { "epoch": 0.832498434564809, "grad_norm": 0.9407545328140259, "learning_rate": 0.00018252688172043011, "loss": 0.3831, "step": 5318 }, { "epoch": 0.8326549780839073, "grad_norm": 1.5402854681015015, "learning_rate": 0.00018250244379276634, "loss": 0.9489, "step": 5319 }, { "epoch": 0.8328115216030056, "grad_norm": 0.9646233916282654, "learning_rate": 0.00018247800586510262, "loss": 0.5643, "step": 5320 }, { "epoch": 0.832968065122104, "grad_norm": 1.2722687721252441, "learning_rate": 0.0001824535679374389, "loss": 0.5555, "step": 5321 }, { "epoch": 0.8331246086412023, "grad_norm": 2.5088601112365723, "learning_rate": 0.00018242913000977515, "loss": 0.904, "step": 5322 }, { "epoch": 0.8332811521603005, "grad_norm": 1.9677549600601196, "learning_rate": 0.00018240469208211142, "loss": 0.7769, "step": 5323 }, { "epoch": 0.8334376956793988, "grad_norm": 2.1917471885681152, "learning_rate": 0.0001823802541544477, "loss": 0.8852, "step": 5324 }, { "epoch": 0.8335942391984972, "grad_norm": 2.5052521228790283, "learning_rate": 0.00018235581622678395, "loss": 0.8834, "step": 5325 }, { "epoch": 0.8337507827175955, "grad_norm": 1.5879862308502197, "learning_rate": 0.00018233137829912023, "loss": 0.8438, "step": 5326 }, { "epoch": 0.8339073262366938, "grad_norm": 2.1139187812805176, "learning_rate": 0.0001823069403714565, "loss": 0.5548, "step": 5327 }, { "epoch": 0.8340638697557922, "grad_norm": 1.9790080785751343, "learning_rate": 0.00018228250244379274, "loss": 0.9604, "step": 5328 }, { "epoch": 0.8342204132748904, "grad_norm": 2.8384530544281006, "learning_rate": 0.00018225806451612901, "loss": 0.7413, "step": 5329 }, { "epoch": 0.8343769567939887, "grad_norm": 2.2223446369171143, "learning_rate": 0.0001822336265884653, "loss": 0.5664, "step": 5330 }, { "epoch": 0.834533500313087, "grad_norm": 2.0191867351531982, "learning_rate": 0.00018220918866080154, "loss": 0.6546, "step": 5331 }, { "epoch": 0.8346900438321854, "grad_norm": 2.5596179962158203, "learning_rate": 0.00018218475073313782, "loss": 0.7337, "step": 5332 }, { "epoch": 0.8348465873512837, "grad_norm": 1.8105648756027222, "learning_rate": 0.0001821603128054741, "loss": 0.6803, "step": 5333 }, { "epoch": 0.8350031308703819, "grad_norm": 1.6906538009643555, "learning_rate": 0.00018213587487781035, "loss": 0.8126, "step": 5334 }, { "epoch": 0.8351596743894802, "grad_norm": 1.9950919151306152, "learning_rate": 0.00018211143695014663, "loss": 0.7731, "step": 5335 }, { "epoch": 0.8353162179085786, "grad_norm": 1.5140913724899292, "learning_rate": 0.00018208699902248288, "loss": 0.7505, "step": 5336 }, { "epoch": 0.8354727614276769, "grad_norm": 4.230485439300537, "learning_rate": 0.00018206256109481913, "loss": 1.1235, "step": 5337 }, { "epoch": 0.8356293049467752, "grad_norm": 2.215449094772339, "learning_rate": 0.0001820381231671554, "loss": 1.1191, "step": 5338 }, { "epoch": 0.8357858484658736, "grad_norm": 2.6537933349609375, "learning_rate": 0.0001820136852394917, "loss": 0.9407, "step": 5339 }, { "epoch": 0.8359423919849718, "grad_norm": 1.676081895828247, "learning_rate": 0.00018198924731182794, "loss": 0.6755, "step": 5340 }, { "epoch": 0.8360989355040701, "grad_norm": 3.5831100940704346, "learning_rate": 0.00018196480938416422, "loss": 0.9189, "step": 5341 }, { "epoch": 0.8362554790231684, "grad_norm": 2.116856575012207, "learning_rate": 0.0001819403714565005, "loss": 0.8016, "step": 5342 }, { "epoch": 0.8364120225422668, "grad_norm": 4.248983383178711, "learning_rate": 0.00018191593352883672, "loss": 1.1484, "step": 5343 }, { "epoch": 0.8365685660613651, "grad_norm": 2.514638900756836, "learning_rate": 0.000181891495601173, "loss": 1.1032, "step": 5344 }, { "epoch": 0.8367251095804634, "grad_norm": 2.494845390319824, "learning_rate": 0.00018186705767350928, "loss": 0.8824, "step": 5345 }, { "epoch": 0.8368816530995616, "grad_norm": 1.30543053150177, "learning_rate": 0.00018184261974584553, "loss": 0.596, "step": 5346 }, { "epoch": 0.83703819661866, "grad_norm": 2.425416946411133, "learning_rate": 0.0001818181818181818, "loss": 1.4802, "step": 5347 }, { "epoch": 0.8371947401377583, "grad_norm": 1.7893593311309814, "learning_rate": 0.00018179374389051809, "loss": 0.4148, "step": 5348 }, { "epoch": 0.8373512836568566, "grad_norm": 3.8236687183380127, "learning_rate": 0.00018176930596285434, "loss": 0.7683, "step": 5349 }, { "epoch": 0.837507827175955, "grad_norm": 1.503875494003296, "learning_rate": 0.00018174486803519062, "loss": 0.886, "step": 5350 }, { "epoch": 0.8376643706950532, "grad_norm": 0.4786723256111145, "learning_rate": 0.0001817204301075269, "loss": 0.2861, "step": 5351 }, { "epoch": 0.8378209142141515, "grad_norm": 0.8675291538238525, "learning_rate": 0.00018169599217986312, "loss": 0.4324, "step": 5352 }, { "epoch": 0.8379774577332498, "grad_norm": 0.8914163708686829, "learning_rate": 0.0001816715542521994, "loss": 0.3441, "step": 5353 }, { "epoch": 0.8381340012523482, "grad_norm": 0.8245097994804382, "learning_rate": 0.00018164711632453567, "loss": 0.3934, "step": 5354 }, { "epoch": 0.8382905447714465, "grad_norm": 0.8394196033477783, "learning_rate": 0.00018162267839687193, "loss": 0.2701, "step": 5355 }, { "epoch": 0.8384470882905448, "grad_norm": 1.792810320854187, "learning_rate": 0.0001815982404692082, "loss": 0.4637, "step": 5356 }, { "epoch": 0.838603631809643, "grad_norm": 0.979446291923523, "learning_rate": 0.00018157380254154448, "loss": 0.2882, "step": 5357 }, { "epoch": 0.8387601753287414, "grad_norm": 0.9786679148674011, "learning_rate": 0.00018154936461388073, "loss": 0.3115, "step": 5358 }, { "epoch": 0.8389167188478397, "grad_norm": 0.618453860282898, "learning_rate": 0.00018152492668621698, "loss": 0.3731, "step": 5359 }, { "epoch": 0.839073262366938, "grad_norm": 0.9989930391311646, "learning_rate": 0.00018150048875855326, "loss": 0.3479, "step": 5360 }, { "epoch": 0.8392298058860364, "grad_norm": 0.7567367553710938, "learning_rate": 0.00018147605083088951, "loss": 0.3715, "step": 5361 }, { "epoch": 0.8393863494051347, "grad_norm": 1.233569622039795, "learning_rate": 0.0001814516129032258, "loss": 0.4816, "step": 5362 }, { "epoch": 0.8395428929242329, "grad_norm": 1.9397759437561035, "learning_rate": 0.00018142717497556207, "loss": 0.6499, "step": 5363 }, { "epoch": 0.8396994364433312, "grad_norm": 0.8263518810272217, "learning_rate": 0.00018140273704789832, "loss": 0.3907, "step": 5364 }, { "epoch": 0.8398559799624296, "grad_norm": 0.7921581864356995, "learning_rate": 0.0001813782991202346, "loss": 0.3535, "step": 5365 }, { "epoch": 0.8400125234815279, "grad_norm": 1.2115155458450317, "learning_rate": 0.00018135386119257088, "loss": 0.5176, "step": 5366 }, { "epoch": 0.8401690670006262, "grad_norm": 1.769970417022705, "learning_rate": 0.0001813294232649071, "loss": 0.6016, "step": 5367 }, { "epoch": 0.8403256105197244, "grad_norm": 0.8580262660980225, "learning_rate": 0.00018130498533724338, "loss": 0.4667, "step": 5368 }, { "epoch": 0.8404821540388228, "grad_norm": 0.9332250356674194, "learning_rate": 0.00018128054740957966, "loss": 0.4062, "step": 5369 }, { "epoch": 0.8406386975579211, "grad_norm": 2.4739444255828857, "learning_rate": 0.0001812561094819159, "loss": 0.5315, "step": 5370 }, { "epoch": 0.8407952410770194, "grad_norm": 1.3356553316116333, "learning_rate": 0.0001812316715542522, "loss": 0.4582, "step": 5371 }, { "epoch": 0.8409517845961177, "grad_norm": 1.6975020170211792, "learning_rate": 0.00018120723362658844, "loss": 0.7177, "step": 5372 }, { "epoch": 0.8411083281152161, "grad_norm": 1.566121220588684, "learning_rate": 0.00018118279569892472, "loss": 0.6598, "step": 5373 }, { "epoch": 0.8412648716343143, "grad_norm": 1.5858402252197266, "learning_rate": 0.000181158357771261, "loss": 1.0747, "step": 5374 }, { "epoch": 0.8414214151534126, "grad_norm": 0.9164276719093323, "learning_rate": 0.00018113391984359722, "loss": 0.4249, "step": 5375 }, { "epoch": 0.841577958672511, "grad_norm": 1.1517704725265503, "learning_rate": 0.0001811094819159335, "loss": 0.4706, "step": 5376 }, { "epoch": 0.8417345021916093, "grad_norm": 1.679603099822998, "learning_rate": 0.00018108504398826978, "loss": 0.6945, "step": 5377 }, { "epoch": 0.8418910457107076, "grad_norm": 1.76631760597229, "learning_rate": 0.00018106060606060603, "loss": 0.8951, "step": 5378 }, { "epoch": 0.8420475892298059, "grad_norm": 1.7927947044372559, "learning_rate": 0.0001810361681329423, "loss": 0.4552, "step": 5379 }, { "epoch": 0.8422041327489042, "grad_norm": 2.7323668003082275, "learning_rate": 0.00018101173020527859, "loss": 0.6823, "step": 5380 }, { "epoch": 0.8423606762680025, "grad_norm": 1.3663641214370728, "learning_rate": 0.00018098729227761484, "loss": 0.9579, "step": 5381 }, { "epoch": 0.8425172197871008, "grad_norm": 1.6697648763656616, "learning_rate": 0.00018096285434995112, "loss": 0.8778, "step": 5382 }, { "epoch": 0.8426737633061991, "grad_norm": 1.3440797328948975, "learning_rate": 0.00018093841642228737, "loss": 0.7617, "step": 5383 }, { "epoch": 0.8428303068252975, "grad_norm": 2.0381476879119873, "learning_rate": 0.00018091397849462362, "loss": 0.6647, "step": 5384 }, { "epoch": 0.8429868503443957, "grad_norm": 2.4340994358062744, "learning_rate": 0.0001808895405669599, "loss": 0.8858, "step": 5385 }, { "epoch": 0.843143393863494, "grad_norm": 3.9241204261779785, "learning_rate": 0.00018086510263929618, "loss": 1.9136, "step": 5386 }, { "epoch": 0.8432999373825923, "grad_norm": 1.6787124872207642, "learning_rate": 0.00018084066471163243, "loss": 1.0138, "step": 5387 }, { "epoch": 0.8434564809016907, "grad_norm": 2.1270811557769775, "learning_rate": 0.0001808162267839687, "loss": 1.1926, "step": 5388 }, { "epoch": 0.843613024420789, "grad_norm": 2.4386167526245117, "learning_rate": 0.00018079178885630498, "loss": 0.8631, "step": 5389 }, { "epoch": 0.8437695679398873, "grad_norm": 2.620981216430664, "learning_rate": 0.0001807673509286412, "loss": 1.6441, "step": 5390 }, { "epoch": 0.8439261114589856, "grad_norm": 2.2242493629455566, "learning_rate": 0.00018074291300097749, "loss": 1.1166, "step": 5391 }, { "epoch": 0.8440826549780839, "grad_norm": 2.703012466430664, "learning_rate": 0.00018071847507331376, "loss": 1.421, "step": 5392 }, { "epoch": 0.8442391984971822, "grad_norm": 2.006281852722168, "learning_rate": 0.00018069403714565002, "loss": 1.2213, "step": 5393 }, { "epoch": 0.8443957420162805, "grad_norm": 2.2452259063720703, "learning_rate": 0.0001806695992179863, "loss": 0.8721, "step": 5394 }, { "epoch": 0.8445522855353789, "grad_norm": 2.2399497032165527, "learning_rate": 0.00018064516129032257, "loss": 0.7445, "step": 5395 }, { "epoch": 0.8447088290544772, "grad_norm": 1.1072652339935303, "learning_rate": 0.00018062072336265882, "loss": 0.4617, "step": 5396 }, { "epoch": 0.8448653725735754, "grad_norm": 4.509149074554443, "learning_rate": 0.0001805962854349951, "loss": 1.5535, "step": 5397 }, { "epoch": 0.8450219160926737, "grad_norm": 4.23785924911499, "learning_rate": 0.00018057184750733138, "loss": 0.9141, "step": 5398 }, { "epoch": 0.8451784596117721, "grad_norm": 2.642498254776001, "learning_rate": 0.0001805474095796676, "loss": 1.5931, "step": 5399 }, { "epoch": 0.8453350031308704, "grad_norm": 2.1803855895996094, "learning_rate": 0.00018052297165200388, "loss": 0.9882, "step": 5400 }, { "epoch": 0.8454915466499687, "grad_norm": 0.598574161529541, "learning_rate": 0.00018049853372434016, "loss": 0.2871, "step": 5401 }, { "epoch": 0.845648090169067, "grad_norm": 0.48999595642089844, "learning_rate": 0.0001804740957966764, "loss": 0.2676, "step": 5402 }, { "epoch": 0.8458046336881653, "grad_norm": 0.6198676228523254, "learning_rate": 0.0001804496578690127, "loss": 0.2758, "step": 5403 }, { "epoch": 0.8459611772072636, "grad_norm": 1.454820990562439, "learning_rate": 0.00018042521994134897, "loss": 0.3911, "step": 5404 }, { "epoch": 0.8461177207263619, "grad_norm": 0.7595102787017822, "learning_rate": 0.00018040078201368522, "loss": 0.2041, "step": 5405 }, { "epoch": 0.8462742642454603, "grad_norm": 0.6085957288742065, "learning_rate": 0.0001803763440860215, "loss": 0.3308, "step": 5406 }, { "epoch": 0.8464308077645586, "grad_norm": 0.7562193274497986, "learning_rate": 0.00018035190615835775, "loss": 0.2946, "step": 5407 }, { "epoch": 0.8465873512836568, "grad_norm": 0.44967907667160034, "learning_rate": 0.000180327468230694, "loss": 0.2354, "step": 5408 }, { "epoch": 0.8467438948027551, "grad_norm": 0.9210385680198669, "learning_rate": 0.00018030303030303028, "loss": 0.5657, "step": 5409 }, { "epoch": 0.8469004383218535, "grad_norm": 0.6817724704742432, "learning_rate": 0.00018027859237536656, "loss": 0.3728, "step": 5410 }, { "epoch": 0.8470569818409518, "grad_norm": 0.7915209531784058, "learning_rate": 0.0001802541544477028, "loss": 0.322, "step": 5411 }, { "epoch": 0.8472135253600501, "grad_norm": 1.467758297920227, "learning_rate": 0.0001802297165200391, "loss": 0.3134, "step": 5412 }, { "epoch": 0.8473700688791485, "grad_norm": 1.2615087032318115, "learning_rate": 0.00018020527859237537, "loss": 0.4713, "step": 5413 }, { "epoch": 0.8475266123982467, "grad_norm": 0.9758779406547546, "learning_rate": 0.0001801808406647116, "loss": 0.3469, "step": 5414 }, { "epoch": 0.847683155917345, "grad_norm": 1.8404748439788818, "learning_rate": 0.00018015640273704787, "loss": 0.3852, "step": 5415 }, { "epoch": 0.8478396994364433, "grad_norm": 1.1955705881118774, "learning_rate": 0.00018013196480938415, "loss": 0.9143, "step": 5416 }, { "epoch": 0.8479962429555417, "grad_norm": 1.832085132598877, "learning_rate": 0.0001801075268817204, "loss": 0.6122, "step": 5417 }, { "epoch": 0.84815278647464, "grad_norm": 1.7228481769561768, "learning_rate": 0.00018008308895405668, "loss": 0.6208, "step": 5418 }, { "epoch": 0.8483093299937383, "grad_norm": 1.7654926776885986, "learning_rate": 0.00018005865102639295, "loss": 0.8098, "step": 5419 }, { "epoch": 0.8484658735128365, "grad_norm": 0.9912778735160828, "learning_rate": 0.0001800342130987292, "loss": 0.4749, "step": 5420 }, { "epoch": 0.8486224170319349, "grad_norm": 0.9740948677062988, "learning_rate": 0.00018000977517106548, "loss": 0.456, "step": 5421 }, { "epoch": 0.8487789605510332, "grad_norm": 1.4612542390823364, "learning_rate": 0.00017998533724340176, "loss": 0.6931, "step": 5422 }, { "epoch": 0.8489355040701315, "grad_norm": 1.1655532121658325, "learning_rate": 0.00017996089931573799, "loss": 0.4385, "step": 5423 }, { "epoch": 0.8490920475892298, "grad_norm": 1.642591953277588, "learning_rate": 0.00017993646138807426, "loss": 0.7765, "step": 5424 }, { "epoch": 0.8492485911083281, "grad_norm": 4.441413879394531, "learning_rate": 0.00017991202346041054, "loss": 0.8041, "step": 5425 }, { "epoch": 0.8494051346274264, "grad_norm": 4.761545181274414, "learning_rate": 0.0001798875855327468, "loss": 1.3471, "step": 5426 }, { "epoch": 0.8495616781465247, "grad_norm": 1.85892915725708, "learning_rate": 0.00017986314760508307, "loss": 0.9073, "step": 5427 }, { "epoch": 0.849718221665623, "grad_norm": 1.2642240524291992, "learning_rate": 0.00017983870967741935, "loss": 0.5756, "step": 5428 }, { "epoch": 0.8498747651847214, "grad_norm": 1.6355515718460083, "learning_rate": 0.0001798142717497556, "loss": 0.622, "step": 5429 }, { "epoch": 0.8500313087038197, "grad_norm": 2.2661969661712646, "learning_rate": 0.00017978983382209185, "loss": 0.9791, "step": 5430 }, { "epoch": 0.8501878522229179, "grad_norm": 2.119992256164551, "learning_rate": 0.00017976539589442813, "loss": 0.9954, "step": 5431 }, { "epoch": 0.8503443957420163, "grad_norm": 1.942277431488037, "learning_rate": 0.00017974095796676438, "loss": 0.7139, "step": 5432 }, { "epoch": 0.8505009392611146, "grad_norm": 2.0111358165740967, "learning_rate": 0.00017971652003910066, "loss": 1.0877, "step": 5433 }, { "epoch": 0.8506574827802129, "grad_norm": 20.57202911376953, "learning_rate": 0.00017969208211143694, "loss": 1.0568, "step": 5434 }, { "epoch": 0.8508140262993112, "grad_norm": 6.058287143707275, "learning_rate": 0.0001796676441837732, "loss": 1.0449, "step": 5435 }, { "epoch": 0.8509705698184096, "grad_norm": 2.6762993335723877, "learning_rate": 0.00017964320625610947, "loss": 0.7661, "step": 5436 }, { "epoch": 0.8511271133375078, "grad_norm": 3.0614171028137207, "learning_rate": 0.00017961876832844575, "loss": 0.7657, "step": 5437 }, { "epoch": 0.8512836568566061, "grad_norm": 2.681861162185669, "learning_rate": 0.00017959433040078197, "loss": 1.1165, "step": 5438 }, { "epoch": 0.8514402003757044, "grad_norm": 3.7926366329193115, "learning_rate": 0.00017956989247311825, "loss": 1.3606, "step": 5439 }, { "epoch": 0.8515967438948028, "grad_norm": 2.6977744102478027, "learning_rate": 0.00017954545454545453, "loss": 1.2335, "step": 5440 }, { "epoch": 0.8517532874139011, "grad_norm": 1.4805278778076172, "learning_rate": 0.00017952101661779078, "loss": 0.9316, "step": 5441 }, { "epoch": 0.8519098309329993, "grad_norm": 2.5783205032348633, "learning_rate": 0.00017949657869012706, "loss": 1.1716, "step": 5442 }, { "epoch": 0.8520663744520977, "grad_norm": 2.976900100708008, "learning_rate": 0.00017947214076246334, "loss": 1.5495, "step": 5443 }, { "epoch": 0.852222917971196, "grad_norm": 3.093869924545288, "learning_rate": 0.0001794477028347996, "loss": 1.321, "step": 5444 }, { "epoch": 0.8523794614902943, "grad_norm": 1.297680139541626, "learning_rate": 0.00017942326490713587, "loss": 0.7588, "step": 5445 }, { "epoch": 0.8525360050093926, "grad_norm": 3.9038898944854736, "learning_rate": 0.00017939882697947214, "loss": 1.2392, "step": 5446 }, { "epoch": 0.852692548528491, "grad_norm": 1.2751731872558594, "learning_rate": 0.00017937438905180837, "loss": 0.3811, "step": 5447 }, { "epoch": 0.8528490920475892, "grad_norm": 1.0307965278625488, "learning_rate": 0.00017934995112414465, "loss": 0.382, "step": 5448 }, { "epoch": 0.8530056355666875, "grad_norm": 2.1279537677764893, "learning_rate": 0.00017932551319648093, "loss": 0.8235, "step": 5449 }, { "epoch": 0.8531621790857858, "grad_norm": 2.3851094245910645, "learning_rate": 0.00017930107526881718, "loss": 1.0698, "step": 5450 }, { "epoch": 0.8533187226048842, "grad_norm": 0.3719303011894226, "learning_rate": 0.00017927663734115345, "loss": 0.2337, "step": 5451 }, { "epoch": 0.8534752661239825, "grad_norm": 0.48134690523147583, "learning_rate": 0.00017925219941348973, "loss": 0.4041, "step": 5452 }, { "epoch": 0.8536318096430808, "grad_norm": 0.40173661708831787, "learning_rate": 0.00017922776148582598, "loss": 0.2524, "step": 5453 }, { "epoch": 0.853788353162179, "grad_norm": 0.5400435328483582, "learning_rate": 0.00017920332355816224, "loss": 0.2702, "step": 5454 }, { "epoch": 0.8539448966812774, "grad_norm": 0.8261874318122864, "learning_rate": 0.00017917888563049851, "loss": 0.4001, "step": 5455 }, { "epoch": 0.8541014402003757, "grad_norm": 0.846582293510437, "learning_rate": 0.00017915444770283477, "loss": 0.5371, "step": 5456 }, { "epoch": 0.854257983719474, "grad_norm": 0.961149275302887, "learning_rate": 0.00017913000977517104, "loss": 0.5499, "step": 5457 }, { "epoch": 0.8544145272385724, "grad_norm": 0.5829175710678101, "learning_rate": 0.00017910557184750732, "loss": 0.4273, "step": 5458 }, { "epoch": 0.8545710707576706, "grad_norm": 0.8650907874107361, "learning_rate": 0.00017908113391984357, "loss": 0.2947, "step": 5459 }, { "epoch": 0.8547276142767689, "grad_norm": 0.7858892679214478, "learning_rate": 0.00017905669599217985, "loss": 0.4431, "step": 5460 }, { "epoch": 0.8548841577958672, "grad_norm": 0.6087132692337036, "learning_rate": 0.00017903225806451613, "loss": 0.4506, "step": 5461 }, { "epoch": 0.8550407013149656, "grad_norm": 0.840796709060669, "learning_rate": 0.00017900782013685235, "loss": 0.4651, "step": 5462 }, { "epoch": 0.8551972448340639, "grad_norm": 2.8522396087646484, "learning_rate": 0.00017898338220918863, "loss": 0.5114, "step": 5463 }, { "epoch": 0.8553537883531622, "grad_norm": 1.8388530015945435, "learning_rate": 0.0001789589442815249, "loss": 0.7611, "step": 5464 }, { "epoch": 0.8555103318722604, "grad_norm": 1.0927659273147583, "learning_rate": 0.00017893450635386116, "loss": 0.4343, "step": 5465 }, { "epoch": 0.8556668753913588, "grad_norm": 1.3488267660140991, "learning_rate": 0.00017891006842619744, "loss": 0.591, "step": 5466 }, { "epoch": 0.8558234189104571, "grad_norm": 2.569788932800293, "learning_rate": 0.00017888563049853372, "loss": 0.5916, "step": 5467 }, { "epoch": 0.8559799624295554, "grad_norm": 1.4221092462539673, "learning_rate": 0.00017886119257086997, "loss": 0.7243, "step": 5468 }, { "epoch": 0.8561365059486538, "grad_norm": 0.8458523750305176, "learning_rate": 0.00017883675464320625, "loss": 0.3696, "step": 5469 }, { "epoch": 0.8562930494677521, "grad_norm": 1.2293727397918701, "learning_rate": 0.00017881231671554253, "loss": 0.6373, "step": 5470 }, { "epoch": 0.8564495929868503, "grad_norm": 1.3807203769683838, "learning_rate": 0.00017878787878787875, "loss": 0.6213, "step": 5471 }, { "epoch": 0.8566061365059486, "grad_norm": 3.1636455059051514, "learning_rate": 0.00017876344086021503, "loss": 1.1287, "step": 5472 }, { "epoch": 0.856762680025047, "grad_norm": 1.5414478778839111, "learning_rate": 0.0001787390029325513, "loss": 0.6642, "step": 5473 }, { "epoch": 0.8569192235441453, "grad_norm": 1.7993489503860474, "learning_rate": 0.00017871456500488756, "loss": 0.5733, "step": 5474 }, { "epoch": 0.8570757670632436, "grad_norm": 2.5660245418548584, "learning_rate": 0.00017869012707722384, "loss": 0.976, "step": 5475 }, { "epoch": 0.8572323105823418, "grad_norm": 1.549904465675354, "learning_rate": 0.00017866568914956012, "loss": 1.0276, "step": 5476 }, { "epoch": 0.8573888541014402, "grad_norm": 1.6813697814941406, "learning_rate": 0.00017864125122189634, "loss": 0.6649, "step": 5477 }, { "epoch": 0.8575453976205385, "grad_norm": 1.3475943803787231, "learning_rate": 0.00017861681329423262, "loss": 0.7519, "step": 5478 }, { "epoch": 0.8577019411396368, "grad_norm": 2.2717173099517822, "learning_rate": 0.0001785923753665689, "loss": 0.4629, "step": 5479 }, { "epoch": 0.8578584846587352, "grad_norm": 1.826530933380127, "learning_rate": 0.00017856793743890515, "loss": 0.8551, "step": 5480 }, { "epoch": 0.8580150281778335, "grad_norm": 2.976663112640381, "learning_rate": 0.00017854349951124143, "loss": 0.8756, "step": 5481 }, { "epoch": 0.8581715716969317, "grad_norm": 1.6091177463531494, "learning_rate": 0.0001785190615835777, "loss": 0.8143, "step": 5482 }, { "epoch": 0.85832811521603, "grad_norm": 6.574666500091553, "learning_rate": 0.00017849462365591396, "loss": 1.8195, "step": 5483 }, { "epoch": 0.8584846587351284, "grad_norm": 4.467514514923096, "learning_rate": 0.00017847018572825023, "loss": 1.5207, "step": 5484 }, { "epoch": 0.8586412022542267, "grad_norm": 2.5957164764404297, "learning_rate": 0.0001784457478005865, "loss": 0.6295, "step": 5485 }, { "epoch": 0.858797745773325, "grad_norm": 1.4115712642669678, "learning_rate": 0.00017842130987292274, "loss": 1.1869, "step": 5486 }, { "epoch": 0.8589542892924233, "grad_norm": 3.7182257175445557, "learning_rate": 0.00017839687194525901, "loss": 1.3938, "step": 5487 }, { "epoch": 0.8591108328115216, "grad_norm": 2.587322235107422, "learning_rate": 0.0001783724340175953, "loss": 1.2754, "step": 5488 }, { "epoch": 0.8592673763306199, "grad_norm": 2.736600160598755, "learning_rate": 0.00017834799608993154, "loss": 1.1982, "step": 5489 }, { "epoch": 0.8594239198497182, "grad_norm": 5.915489673614502, "learning_rate": 0.00017832355816226782, "loss": 1.8069, "step": 5490 }, { "epoch": 0.8595804633688165, "grad_norm": 4.513766765594482, "learning_rate": 0.0001782991202346041, "loss": 1.2607, "step": 5491 }, { "epoch": 0.8597370068879149, "grad_norm": 2.3064467906951904, "learning_rate": 0.00017827468230694035, "loss": 0.6809, "step": 5492 }, { "epoch": 0.8598935504070131, "grad_norm": 1.9814224243164062, "learning_rate": 0.00017825024437927663, "loss": 0.4819, "step": 5493 }, { "epoch": 0.8600500939261114, "grad_norm": 2.532914400100708, "learning_rate": 0.0001782258064516129, "loss": 1.5882, "step": 5494 }, { "epoch": 0.8602066374452098, "grad_norm": 1.8483891487121582, "learning_rate": 0.00017820136852394913, "loss": 0.8876, "step": 5495 }, { "epoch": 0.8603631809643081, "grad_norm": 3.369330644607544, "learning_rate": 0.0001781769305962854, "loss": 1.2741, "step": 5496 }, { "epoch": 0.8605197244834064, "grad_norm": 2.431166648864746, "learning_rate": 0.0001781524926686217, "loss": 0.5309, "step": 5497 }, { "epoch": 0.8606762680025047, "grad_norm": 2.442338466644287, "learning_rate": 0.00017812805474095794, "loss": 0.8135, "step": 5498 }, { "epoch": 0.860832811521603, "grad_norm": 2.9823551177978516, "learning_rate": 0.00017810361681329422, "loss": 0.6185, "step": 5499 }, { "epoch": 0.8609893550407013, "grad_norm": 3.414311170578003, "learning_rate": 0.0001780791788856305, "loss": 1.6136, "step": 5500 }, { "epoch": 0.8611458985597996, "grad_norm": 0.6202065348625183, "learning_rate": 0.00017805474095796672, "loss": 0.3127, "step": 5501 }, { "epoch": 0.861302442078898, "grad_norm": 0.6588072776794434, "learning_rate": 0.000178030303030303, "loss": 0.2606, "step": 5502 }, { "epoch": 0.8614589855979963, "grad_norm": 0.9525992274284363, "learning_rate": 0.00017800586510263928, "loss": 0.267, "step": 5503 }, { "epoch": 0.8616155291170946, "grad_norm": 0.8494582176208496, "learning_rate": 0.00017798142717497553, "loss": 0.501, "step": 5504 }, { "epoch": 0.8617720726361928, "grad_norm": 0.632400393486023, "learning_rate": 0.0001779569892473118, "loss": 0.4369, "step": 5505 }, { "epoch": 0.8619286161552911, "grad_norm": 0.5401912331581116, "learning_rate": 0.0001779325513196481, "loss": 0.3133, "step": 5506 }, { "epoch": 0.8620851596743895, "grad_norm": 0.6302811503410339, "learning_rate": 0.00017790811339198434, "loss": 0.3823, "step": 5507 }, { "epoch": 0.8622417031934878, "grad_norm": 0.8073775768280029, "learning_rate": 0.00017788367546432062, "loss": 0.2837, "step": 5508 }, { "epoch": 0.8623982467125861, "grad_norm": 0.7132818698883057, "learning_rate": 0.0001778592375366569, "loss": 0.2525, "step": 5509 }, { "epoch": 0.8625547902316844, "grad_norm": 1.0062077045440674, "learning_rate": 0.00017783479960899312, "loss": 0.3663, "step": 5510 }, { "epoch": 0.8627113337507827, "grad_norm": 0.8359341621398926, "learning_rate": 0.0001778103616813294, "loss": 0.351, "step": 5511 }, { "epoch": 0.862867877269881, "grad_norm": 0.8639108538627625, "learning_rate": 0.00017778592375366568, "loss": 0.3803, "step": 5512 }, { "epoch": 0.8630244207889793, "grad_norm": 0.7320135831832886, "learning_rate": 0.00017776148582600193, "loss": 0.3996, "step": 5513 }, { "epoch": 0.8631809643080777, "grad_norm": 0.9680222868919373, "learning_rate": 0.0001777370478983382, "loss": 0.3478, "step": 5514 }, { "epoch": 0.863337507827176, "grad_norm": 0.8415281176567078, "learning_rate": 0.00017771260997067448, "loss": 0.4855, "step": 5515 }, { "epoch": 0.8634940513462742, "grad_norm": 1.6651570796966553, "learning_rate": 0.00017768817204301073, "loss": 0.6531, "step": 5516 }, { "epoch": 0.8636505948653725, "grad_norm": 1.7974225282669067, "learning_rate": 0.000177663734115347, "loss": 0.7548, "step": 5517 }, { "epoch": 0.8638071383844709, "grad_norm": 1.254573106765747, "learning_rate": 0.00017763929618768326, "loss": 0.6231, "step": 5518 }, { "epoch": 0.8639636819035692, "grad_norm": 0.7575573921203613, "learning_rate": 0.00017761485826001952, "loss": 0.4347, "step": 5519 }, { "epoch": 0.8641202254226675, "grad_norm": 1.291746973991394, "learning_rate": 0.0001775904203323558, "loss": 0.6743, "step": 5520 }, { "epoch": 0.8642767689417659, "grad_norm": 1.3523181676864624, "learning_rate": 0.00017756598240469207, "loss": 0.5223, "step": 5521 }, { "epoch": 0.8644333124608641, "grad_norm": 1.1703637838363647, "learning_rate": 0.00017754154447702832, "loss": 0.6335, "step": 5522 }, { "epoch": 0.8645898559799624, "grad_norm": 2.5771422386169434, "learning_rate": 0.0001775171065493646, "loss": 0.5819, "step": 5523 }, { "epoch": 0.8647463994990607, "grad_norm": 1.401025414466858, "learning_rate": 0.00017749266862170088, "loss": 0.7858, "step": 5524 }, { "epoch": 0.8649029430181591, "grad_norm": 1.5256321430206299, "learning_rate": 0.0001774682306940371, "loss": 0.55, "step": 5525 }, { "epoch": 0.8650594865372574, "grad_norm": 1.255598545074463, "learning_rate": 0.00017744379276637338, "loss": 0.4361, "step": 5526 }, { "epoch": 0.8652160300563556, "grad_norm": 2.342947483062744, "learning_rate": 0.00017741935483870966, "loss": 1.0666, "step": 5527 }, { "epoch": 0.8653725735754539, "grad_norm": 2.2198994159698486, "learning_rate": 0.0001773949169110459, "loss": 1.0796, "step": 5528 }, { "epoch": 0.8655291170945523, "grad_norm": 2.544025182723999, "learning_rate": 0.0001773704789833822, "loss": 1.0962, "step": 5529 }, { "epoch": 0.8656856606136506, "grad_norm": 3.0264079570770264, "learning_rate": 0.00017734604105571847, "loss": 0.7408, "step": 5530 }, { "epoch": 0.8658422041327489, "grad_norm": 1.2119104862213135, "learning_rate": 0.00017732160312805472, "loss": 0.618, "step": 5531 }, { "epoch": 0.8659987476518473, "grad_norm": 1.5257172584533691, "learning_rate": 0.000177297165200391, "loss": 0.631, "step": 5532 }, { "epoch": 0.8661552911709455, "grad_norm": 2.0830912590026855, "learning_rate": 0.00017727272727272728, "loss": 0.7618, "step": 5533 }, { "epoch": 0.8663118346900438, "grad_norm": 2.5756418704986572, "learning_rate": 0.0001772482893450635, "loss": 0.8881, "step": 5534 }, { "epoch": 0.8664683782091421, "grad_norm": 2.78877329826355, "learning_rate": 0.00017722385141739978, "loss": 0.7217, "step": 5535 }, { "epoch": 0.8666249217282405, "grad_norm": 2.5432016849517822, "learning_rate": 0.00017719941348973606, "loss": 0.9808, "step": 5536 }, { "epoch": 0.8667814652473388, "grad_norm": 1.7844754457473755, "learning_rate": 0.0001771749755620723, "loss": 0.8685, "step": 5537 }, { "epoch": 0.8669380087664371, "grad_norm": 2.5685477256774902, "learning_rate": 0.0001771505376344086, "loss": 1.4534, "step": 5538 }, { "epoch": 0.8670945522855353, "grad_norm": 2.9484968185424805, "learning_rate": 0.00017712609970674487, "loss": 1.3919, "step": 5539 }, { "epoch": 0.8672510958046337, "grad_norm": 3.510503053665161, "learning_rate": 0.00017710166177908112, "loss": 0.8989, "step": 5540 }, { "epoch": 0.867407639323732, "grad_norm": 3.30690860748291, "learning_rate": 0.0001770772238514174, "loss": 1.7287, "step": 5541 }, { "epoch": 0.8675641828428303, "grad_norm": 4.295806407928467, "learning_rate": 0.00017705278592375365, "loss": 1.2677, "step": 5542 }, { "epoch": 0.8677207263619287, "grad_norm": 1.898603081703186, "learning_rate": 0.0001770283479960899, "loss": 1.0414, "step": 5543 }, { "epoch": 0.867877269881027, "grad_norm": 1.397929310798645, "learning_rate": 0.00017700391006842618, "loss": 1.0474, "step": 5544 }, { "epoch": 0.8680338134001252, "grad_norm": 3.208662986755371, "learning_rate": 0.00017697947214076245, "loss": 1.4236, "step": 5545 }, { "epoch": 0.8681903569192235, "grad_norm": 4.296112537384033, "learning_rate": 0.0001769550342130987, "loss": 1.4281, "step": 5546 }, { "epoch": 0.8683469004383219, "grad_norm": 5.432612895965576, "learning_rate": 0.00017693059628543498, "loss": 0.9672, "step": 5547 }, { "epoch": 0.8685034439574202, "grad_norm": 2.7205517292022705, "learning_rate": 0.00017690615835777126, "loss": 0.863, "step": 5548 }, { "epoch": 0.8686599874765185, "grad_norm": 2.6513023376464844, "learning_rate": 0.0001768817204301075, "loss": 0.9332, "step": 5549 }, { "epoch": 0.8688165309956167, "grad_norm": 1.974501132965088, "learning_rate": 0.00017685728250244376, "loss": 0.7272, "step": 5550 }, { "epoch": 0.8689730745147151, "grad_norm": 0.6594939827919006, "learning_rate": 0.00017683284457478004, "loss": 0.3761, "step": 5551 }, { "epoch": 0.8691296180338134, "grad_norm": 1.849006175994873, "learning_rate": 0.0001768084066471163, "loss": 0.4447, "step": 5552 }, { "epoch": 0.8692861615529117, "grad_norm": 0.555202841758728, "learning_rate": 0.00017678396871945257, "loss": 0.2471, "step": 5553 }, { "epoch": 0.86944270507201, "grad_norm": 0.5816833972930908, "learning_rate": 0.00017675953079178885, "loss": 0.3585, "step": 5554 }, { "epoch": 0.8695992485911084, "grad_norm": 0.6719066500663757, "learning_rate": 0.0001767350928641251, "loss": 0.363, "step": 5555 }, { "epoch": 0.8697557921102066, "grad_norm": 0.6604121327400208, "learning_rate": 0.00017671065493646138, "loss": 0.3488, "step": 5556 }, { "epoch": 0.8699123356293049, "grad_norm": 0.6314492225646973, "learning_rate": 0.00017668621700879766, "loss": 0.2469, "step": 5557 }, { "epoch": 0.8700688791484033, "grad_norm": 0.6827051639556885, "learning_rate": 0.00017666177908113388, "loss": 0.3692, "step": 5558 }, { "epoch": 0.8702254226675016, "grad_norm": 0.6794304251670837, "learning_rate": 0.00017663734115347016, "loss": 0.2522, "step": 5559 }, { "epoch": 0.8703819661865999, "grad_norm": 0.6768459677696228, "learning_rate": 0.00017661290322580644, "loss": 0.4427, "step": 5560 }, { "epoch": 0.8705385097056982, "grad_norm": 0.9314690232276917, "learning_rate": 0.0001765884652981427, "loss": 0.2897, "step": 5561 }, { "epoch": 0.8706950532247965, "grad_norm": 0.586393415927887, "learning_rate": 0.00017656402737047897, "loss": 0.2953, "step": 5562 }, { "epoch": 0.8708515967438948, "grad_norm": 1.349017858505249, "learning_rate": 0.00017653958944281525, "loss": 0.4265, "step": 5563 }, { "epoch": 0.8710081402629931, "grad_norm": 0.8151659965515137, "learning_rate": 0.0001765151515151515, "loss": 0.4198, "step": 5564 }, { "epoch": 0.8711646837820914, "grad_norm": 0.9918164014816284, "learning_rate": 0.00017649071358748778, "loss": 0.6098, "step": 5565 }, { "epoch": 0.8713212273011898, "grad_norm": 0.7317390441894531, "learning_rate": 0.00017646627565982403, "loss": 0.4211, "step": 5566 }, { "epoch": 0.871477770820288, "grad_norm": 1.2176004648208618, "learning_rate": 0.00017644183773216028, "loss": 0.5503, "step": 5567 }, { "epoch": 0.8716343143393863, "grad_norm": 1.647218108177185, "learning_rate": 0.00017641739980449656, "loss": 0.8489, "step": 5568 }, { "epoch": 0.8717908578584846, "grad_norm": 0.9232958555221558, "learning_rate": 0.00017639296187683284, "loss": 0.4466, "step": 5569 }, { "epoch": 0.871947401377583, "grad_norm": 0.800988495349884, "learning_rate": 0.0001763685239491691, "loss": 0.511, "step": 5570 }, { "epoch": 0.8721039448966813, "grad_norm": 1.9028470516204834, "learning_rate": 0.00017634408602150537, "loss": 0.6131, "step": 5571 }, { "epoch": 0.8722604884157796, "grad_norm": 1.8957113027572632, "learning_rate": 0.00017631964809384164, "loss": 0.5967, "step": 5572 }, { "epoch": 0.8724170319348779, "grad_norm": 2.135235071182251, "learning_rate": 0.00017629521016617787, "loss": 0.7269, "step": 5573 }, { "epoch": 0.8725735754539762, "grad_norm": 3.0502326488494873, "learning_rate": 0.00017627077223851415, "loss": 0.482, "step": 5574 }, { "epoch": 0.8727301189730745, "grad_norm": 1.7193087339401245, "learning_rate": 0.00017624633431085043, "loss": 0.7517, "step": 5575 }, { "epoch": 0.8728866624921728, "grad_norm": 2.7776575088500977, "learning_rate": 0.00017622189638318668, "loss": 0.5461, "step": 5576 }, { "epoch": 0.8730432060112712, "grad_norm": 1.1250964403152466, "learning_rate": 0.00017619745845552296, "loss": 0.4677, "step": 5577 }, { "epoch": 0.8731997495303695, "grad_norm": 2.022542953491211, "learning_rate": 0.00017617302052785923, "loss": 0.7262, "step": 5578 }, { "epoch": 0.8733562930494677, "grad_norm": 2.4576961994171143, "learning_rate": 0.00017614858260019548, "loss": 1.2791, "step": 5579 }, { "epoch": 0.873512836568566, "grad_norm": 2.275444984436035, "learning_rate": 0.00017612414467253176, "loss": 0.6484, "step": 5580 }, { "epoch": 0.8736693800876644, "grad_norm": 2.2250964641571045, "learning_rate": 0.00017609970674486804, "loss": 0.6692, "step": 5581 }, { "epoch": 0.8738259236067627, "grad_norm": 3.6571662425994873, "learning_rate": 0.00017607526881720427, "loss": 1.1964, "step": 5582 }, { "epoch": 0.873982467125861, "grad_norm": 2.21502947807312, "learning_rate": 0.00017605083088954054, "loss": 0.9287, "step": 5583 }, { "epoch": 0.8741390106449592, "grad_norm": 1.8925524950027466, "learning_rate": 0.00017602639296187682, "loss": 1.0618, "step": 5584 }, { "epoch": 0.8742955541640576, "grad_norm": 1.7226258516311646, "learning_rate": 0.00017600195503421307, "loss": 0.9194, "step": 5585 }, { "epoch": 0.8744520976831559, "grad_norm": 2.176206111907959, "learning_rate": 0.00017597751710654935, "loss": 0.6395, "step": 5586 }, { "epoch": 0.8746086412022542, "grad_norm": 2.134561538696289, "learning_rate": 0.00017595307917888563, "loss": 1.4345, "step": 5587 }, { "epoch": 0.8747651847213526, "grad_norm": 3.149474859237671, "learning_rate": 0.00017592864125122188, "loss": 0.8548, "step": 5588 }, { "epoch": 0.8749217282404509, "grad_norm": 1.696314811706543, "learning_rate": 0.00017590420332355813, "loss": 0.9873, "step": 5589 }, { "epoch": 0.8750782717595491, "grad_norm": 2.3394696712493896, "learning_rate": 0.0001758797653958944, "loss": 1.0674, "step": 5590 }, { "epoch": 0.8752348152786474, "grad_norm": 1.807666301727295, "learning_rate": 0.00017585532746823066, "loss": 1.3712, "step": 5591 }, { "epoch": 0.8753913587977458, "grad_norm": 4.91036319732666, "learning_rate": 0.00017583088954056694, "loss": 1.8955, "step": 5592 }, { "epoch": 0.8755479023168441, "grad_norm": 3.2543954849243164, "learning_rate": 0.00017580645161290322, "loss": 1.4988, "step": 5593 }, { "epoch": 0.8757044458359424, "grad_norm": 1.3064311742782593, "learning_rate": 0.00017578201368523947, "loss": 0.6692, "step": 5594 }, { "epoch": 0.8758609893550408, "grad_norm": 3.3625075817108154, "learning_rate": 0.00017575757575757575, "loss": 1.0202, "step": 5595 }, { "epoch": 0.876017532874139, "grad_norm": 1.9790335893630981, "learning_rate": 0.00017573313782991203, "loss": 1.3143, "step": 5596 }, { "epoch": 0.8761740763932373, "grad_norm": 3.206354856491089, "learning_rate": 0.00017570869990224825, "loss": 0.8278, "step": 5597 }, { "epoch": 0.8763306199123356, "grad_norm": 3.371476650238037, "learning_rate": 0.00017568426197458453, "loss": 0.5511, "step": 5598 }, { "epoch": 0.876487163431434, "grad_norm": 3.493743419647217, "learning_rate": 0.0001756598240469208, "loss": 1.5164, "step": 5599 }, { "epoch": 0.8766437069505323, "grad_norm": 2.634359359741211, "learning_rate": 0.00017563538611925706, "loss": 1.186, "step": 5600 }, { "epoch": 0.8768002504696305, "grad_norm": 0.7865440845489502, "learning_rate": 0.00017561094819159334, "loss": 0.3571, "step": 5601 }, { "epoch": 0.8769567939887288, "grad_norm": 0.7957772612571716, "learning_rate": 0.00017558651026392962, "loss": 0.3469, "step": 5602 }, { "epoch": 0.8771133375078272, "grad_norm": 0.6495577692985535, "learning_rate": 0.00017556207233626587, "loss": 0.3274, "step": 5603 }, { "epoch": 0.8772698810269255, "grad_norm": 0.5228533744812012, "learning_rate": 0.00017553763440860215, "loss": 0.2298, "step": 5604 }, { "epoch": 0.8774264245460238, "grad_norm": 1.175572156906128, "learning_rate": 0.00017551319648093842, "loss": 0.5251, "step": 5605 }, { "epoch": 0.8775829680651221, "grad_norm": 0.658734142780304, "learning_rate": 0.00017548875855327465, "loss": 0.3617, "step": 5606 }, { "epoch": 0.8777395115842204, "grad_norm": 0.7342488765716553, "learning_rate": 0.00017546432062561093, "loss": 0.4297, "step": 5607 }, { "epoch": 0.8778960551033187, "grad_norm": 1.1647714376449585, "learning_rate": 0.0001754398826979472, "loss": 0.4839, "step": 5608 }, { "epoch": 0.878052598622417, "grad_norm": 1.1591604948043823, "learning_rate": 0.00017541544477028346, "loss": 0.4779, "step": 5609 }, { "epoch": 0.8782091421415154, "grad_norm": 0.5184983015060425, "learning_rate": 0.00017539100684261973, "loss": 0.2418, "step": 5610 }, { "epoch": 0.8783656856606137, "grad_norm": 0.9003580808639526, "learning_rate": 0.000175366568914956, "loss": 0.4329, "step": 5611 }, { "epoch": 0.878522229179712, "grad_norm": 0.6690883040428162, "learning_rate": 0.00017534213098729226, "loss": 0.337, "step": 5612 }, { "epoch": 0.8786787726988102, "grad_norm": 1.1468678712844849, "learning_rate": 0.00017531769305962852, "loss": 0.3636, "step": 5613 }, { "epoch": 0.8788353162179086, "grad_norm": 0.7906423807144165, "learning_rate": 0.0001752932551319648, "loss": 0.3264, "step": 5614 }, { "epoch": 0.8789918597370069, "grad_norm": 0.82657390832901, "learning_rate": 0.00017526881720430104, "loss": 0.4039, "step": 5615 }, { "epoch": 0.8791484032561052, "grad_norm": 3.476700782775879, "learning_rate": 0.00017524437927663732, "loss": 0.411, "step": 5616 }, { "epoch": 0.8793049467752035, "grad_norm": 1.0398757457733154, "learning_rate": 0.0001752199413489736, "loss": 0.6646, "step": 5617 }, { "epoch": 0.8794614902943018, "grad_norm": 1.3956527709960938, "learning_rate": 0.00017519550342130985, "loss": 0.7714, "step": 5618 }, { "epoch": 0.8796180338134001, "grad_norm": 1.2822397947311401, "learning_rate": 0.00017517106549364613, "loss": 0.5916, "step": 5619 }, { "epoch": 0.8797745773324984, "grad_norm": 1.0409700870513916, "learning_rate": 0.0001751466275659824, "loss": 0.4558, "step": 5620 }, { "epoch": 0.8799311208515967, "grad_norm": 1.218854308128357, "learning_rate": 0.00017512218963831863, "loss": 0.4444, "step": 5621 }, { "epoch": 0.8800876643706951, "grad_norm": 2.0905308723449707, "learning_rate": 0.0001750977517106549, "loss": 0.336, "step": 5622 }, { "epoch": 0.8802442078897934, "grad_norm": 1.4038474559783936, "learning_rate": 0.0001750733137829912, "loss": 0.5637, "step": 5623 }, { "epoch": 0.8804007514088916, "grad_norm": 1.5574206113815308, "learning_rate": 0.00017504887585532744, "loss": 0.6929, "step": 5624 }, { "epoch": 0.88055729492799, "grad_norm": 2.9178993701934814, "learning_rate": 0.00017502443792766372, "loss": 0.572, "step": 5625 }, { "epoch": 0.8807138384470883, "grad_norm": 1.9534040689468384, "learning_rate": 0.000175, "loss": 0.538, "step": 5626 }, { "epoch": 0.8808703819661866, "grad_norm": 2.420355796813965, "learning_rate": 0.00017497556207233625, "loss": 0.7121, "step": 5627 }, { "epoch": 0.8810269254852849, "grad_norm": 2.449112892150879, "learning_rate": 0.00017495112414467253, "loss": 0.8366, "step": 5628 }, { "epoch": 0.8811834690043833, "grad_norm": 1.790299415588379, "learning_rate": 0.0001749266862170088, "loss": 0.3996, "step": 5629 }, { "epoch": 0.8813400125234815, "grad_norm": 2.5560543537139893, "learning_rate": 0.00017490224828934503, "loss": 0.7785, "step": 5630 }, { "epoch": 0.8814965560425798, "grad_norm": 1.6959267854690552, "learning_rate": 0.0001748778103616813, "loss": 0.6992, "step": 5631 }, { "epoch": 0.8816530995616781, "grad_norm": 2.7933099269866943, "learning_rate": 0.0001748533724340176, "loss": 1.0714, "step": 5632 }, { "epoch": 0.8818096430807765, "grad_norm": 3.0920727252960205, "learning_rate": 0.00017482893450635384, "loss": 0.8542, "step": 5633 }, { "epoch": 0.8819661865998748, "grad_norm": 1.7544466257095337, "learning_rate": 0.00017480449657869012, "loss": 0.8824, "step": 5634 }, { "epoch": 0.882122730118973, "grad_norm": 3.3472537994384766, "learning_rate": 0.0001747800586510264, "loss": 0.7097, "step": 5635 }, { "epoch": 0.8822792736380713, "grad_norm": 2.1331112384796143, "learning_rate": 0.00017475562072336262, "loss": 1.0795, "step": 5636 }, { "epoch": 0.8824358171571697, "grad_norm": 3.316182851791382, "learning_rate": 0.0001747311827956989, "loss": 0.8523, "step": 5637 }, { "epoch": 0.882592360676268, "grad_norm": 3.732465982437134, "learning_rate": 0.00017470674486803518, "loss": 0.9487, "step": 5638 }, { "epoch": 0.8827489041953663, "grad_norm": 2.216749668121338, "learning_rate": 0.00017468230694037143, "loss": 0.7443, "step": 5639 }, { "epoch": 0.8829054477144647, "grad_norm": 2.096297025680542, "learning_rate": 0.0001746578690127077, "loss": 1.0188, "step": 5640 }, { "epoch": 0.8830619912335629, "grad_norm": 1.9539072513580322, "learning_rate": 0.00017463343108504398, "loss": 1.7533, "step": 5641 }, { "epoch": 0.8832185347526612, "grad_norm": 1.9574966430664062, "learning_rate": 0.00017460899315738024, "loss": 1.3721, "step": 5642 }, { "epoch": 0.8833750782717595, "grad_norm": 2.3340044021606445, "learning_rate": 0.0001745845552297165, "loss": 1.3333, "step": 5643 }, { "epoch": 0.8835316217908579, "grad_norm": 2.5659821033477783, "learning_rate": 0.0001745601173020528, "loss": 1.2622, "step": 5644 }, { "epoch": 0.8836881653099562, "grad_norm": 2.5627105236053467, "learning_rate": 0.00017453567937438902, "loss": 1.5452, "step": 5645 }, { "epoch": 0.8838447088290545, "grad_norm": 2.1331045627593994, "learning_rate": 0.0001745112414467253, "loss": 0.7612, "step": 5646 }, { "epoch": 0.8840012523481527, "grad_norm": 1.764351725578308, "learning_rate": 0.00017448680351906157, "loss": 0.5008, "step": 5647 }, { "epoch": 0.8841577958672511, "grad_norm": 1.369174838066101, "learning_rate": 0.00017446236559139782, "loss": 0.4137, "step": 5648 }, { "epoch": 0.8843143393863494, "grad_norm": 2.9028494358062744, "learning_rate": 0.0001744379276637341, "loss": 0.7086, "step": 5649 }, { "epoch": 0.8844708829054477, "grad_norm": 3.7896909713745117, "learning_rate": 0.00017441348973607038, "loss": 0.9732, "step": 5650 }, { "epoch": 0.8846274264245461, "grad_norm": 1.121634840965271, "learning_rate": 0.00017438905180840663, "loss": 0.2748, "step": 5651 }, { "epoch": 0.8847839699436444, "grad_norm": 0.823630154132843, "learning_rate": 0.0001743646138807429, "loss": 0.2495, "step": 5652 }, { "epoch": 0.8849405134627426, "grad_norm": 0.7519258260726929, "learning_rate": 0.0001743401759530792, "loss": 0.2669, "step": 5653 }, { "epoch": 0.8850970569818409, "grad_norm": 0.6291525363922119, "learning_rate": 0.0001743157380254154, "loss": 0.3282, "step": 5654 }, { "epoch": 0.8852536005009393, "grad_norm": 0.8792591691017151, "learning_rate": 0.0001742913000977517, "loss": 0.5075, "step": 5655 }, { "epoch": 0.8854101440200376, "grad_norm": 0.6888756155967712, "learning_rate": 0.00017426686217008797, "loss": 0.4798, "step": 5656 }, { "epoch": 0.8855666875391359, "grad_norm": 0.6959285140037537, "learning_rate": 0.00017424242424242422, "loss": 0.2821, "step": 5657 }, { "epoch": 0.8857232310582341, "grad_norm": 0.9070468544960022, "learning_rate": 0.0001742179863147605, "loss": 0.2513, "step": 5658 }, { "epoch": 0.8858797745773325, "grad_norm": 1.2020875215530396, "learning_rate": 0.00017419354838709678, "loss": 0.4034, "step": 5659 }, { "epoch": 0.8860363180964308, "grad_norm": 0.8414081931114197, "learning_rate": 0.000174169110459433, "loss": 0.2479, "step": 5660 }, { "epoch": 0.8861928616155291, "grad_norm": 0.5406526327133179, "learning_rate": 0.00017414467253176928, "loss": 0.3122, "step": 5661 }, { "epoch": 0.8863494051346275, "grad_norm": 0.6893888711929321, "learning_rate": 0.00017412023460410556, "loss": 0.3185, "step": 5662 }, { "epoch": 0.8865059486537258, "grad_norm": 0.9592669606208801, "learning_rate": 0.0001740957966764418, "loss": 0.4614, "step": 5663 }, { "epoch": 0.886662492172824, "grad_norm": 1.71309232711792, "learning_rate": 0.0001740713587487781, "loss": 0.5796, "step": 5664 }, { "epoch": 0.8868190356919223, "grad_norm": 1.557362675666809, "learning_rate": 0.00017404692082111437, "loss": 0.5306, "step": 5665 }, { "epoch": 0.8869755792110207, "grad_norm": 1.2218095064163208, "learning_rate": 0.00017402248289345062, "loss": 0.532, "step": 5666 }, { "epoch": 0.887132122730119, "grad_norm": 1.2000401020050049, "learning_rate": 0.0001739980449657869, "loss": 0.5144, "step": 5667 }, { "epoch": 0.8872886662492173, "grad_norm": 1.0709697008132935, "learning_rate": 0.00017397360703812317, "loss": 0.5698, "step": 5668 }, { "epoch": 0.8874452097683156, "grad_norm": 0.7280417680740356, "learning_rate": 0.0001739491691104594, "loss": 0.4509, "step": 5669 }, { "epoch": 0.8876017532874139, "grad_norm": 1.5254422426223755, "learning_rate": 0.00017392473118279568, "loss": 0.6166, "step": 5670 }, { "epoch": 0.8877582968065122, "grad_norm": 1.4855180978775024, "learning_rate": 0.00017390029325513195, "loss": 0.4885, "step": 5671 }, { "epoch": 0.8879148403256105, "grad_norm": 1.855657935142517, "learning_rate": 0.0001738758553274682, "loss": 0.7829, "step": 5672 }, { "epoch": 0.8880713838447089, "grad_norm": 1.7714046239852905, "learning_rate": 0.00017385141739980448, "loss": 1.2026, "step": 5673 }, { "epoch": 0.8882279273638072, "grad_norm": 2.0689687728881836, "learning_rate": 0.00017382697947214076, "loss": 0.8146, "step": 5674 }, { "epoch": 0.8883844708829054, "grad_norm": 2.669292688369751, "learning_rate": 0.00017380254154447701, "loss": 0.8094, "step": 5675 }, { "epoch": 0.8885410144020037, "grad_norm": 1.2255752086639404, "learning_rate": 0.0001737781036168133, "loss": 0.486, "step": 5676 }, { "epoch": 0.888697557921102, "grad_norm": 0.9265427589416504, "learning_rate": 0.00017375366568914954, "loss": 0.5691, "step": 5677 }, { "epoch": 0.8888541014402004, "grad_norm": 1.7718168497085571, "learning_rate": 0.0001737292277614858, "loss": 0.6208, "step": 5678 }, { "epoch": 0.8890106449592987, "grad_norm": 3.1587021350860596, "learning_rate": 0.00017370478983382207, "loss": 1.0041, "step": 5679 }, { "epoch": 0.889167188478397, "grad_norm": 1.6209098100662231, "learning_rate": 0.00017368035190615835, "loss": 0.4903, "step": 5680 }, { "epoch": 0.8893237319974953, "grad_norm": 1.9299261569976807, "learning_rate": 0.0001736559139784946, "loss": 0.6506, "step": 5681 }, { "epoch": 0.8894802755165936, "grad_norm": 1.872786521911621, "learning_rate": 0.00017363147605083088, "loss": 1.0693, "step": 5682 }, { "epoch": 0.8896368190356919, "grad_norm": 2.118117094039917, "learning_rate": 0.00017360703812316716, "loss": 0.9645, "step": 5683 }, { "epoch": 0.8897933625547902, "grad_norm": 2.271125078201294, "learning_rate": 0.00017358260019550338, "loss": 0.895, "step": 5684 }, { "epoch": 0.8899499060738886, "grad_norm": 2.594882011413574, "learning_rate": 0.00017355816226783966, "loss": 1.2066, "step": 5685 }, { "epoch": 0.8901064495929869, "grad_norm": 1.8685270547866821, "learning_rate": 0.00017353372434017594, "loss": 0.6995, "step": 5686 }, { "epoch": 0.8902629931120851, "grad_norm": 3.1582865715026855, "learning_rate": 0.0001735092864125122, "loss": 1.3999, "step": 5687 }, { "epoch": 0.8904195366311835, "grad_norm": 8.269997596740723, "learning_rate": 0.00017348484848484847, "loss": 1.6178, "step": 5688 }, { "epoch": 0.8905760801502818, "grad_norm": 4.720138072967529, "learning_rate": 0.00017346041055718475, "loss": 1.1089, "step": 5689 }, { "epoch": 0.8907326236693801, "grad_norm": 1.907230257987976, "learning_rate": 0.000173435972629521, "loss": 1.2031, "step": 5690 }, { "epoch": 0.8908891671884784, "grad_norm": 2.7072951793670654, "learning_rate": 0.00017341153470185728, "loss": 1.6695, "step": 5691 }, { "epoch": 0.8910457107075767, "grad_norm": 4.086122989654541, "learning_rate": 0.00017338709677419356, "loss": 1.0969, "step": 5692 }, { "epoch": 0.891202254226675, "grad_norm": 1.9636390209197998, "learning_rate": 0.00017336265884652978, "loss": 0.7329, "step": 5693 }, { "epoch": 0.8913587977457733, "grad_norm": 2.4966297149658203, "learning_rate": 0.00017333822091886606, "loss": 0.7581, "step": 5694 }, { "epoch": 0.8915153412648716, "grad_norm": 1.2184866666793823, "learning_rate": 0.00017331378299120234, "loss": 0.4329, "step": 5695 }, { "epoch": 0.89167188478397, "grad_norm": 3.0518410205841064, "learning_rate": 0.0001732893450635386, "loss": 0.9643, "step": 5696 }, { "epoch": 0.8918284283030683, "grad_norm": 2.205357074737549, "learning_rate": 0.00017326490713587487, "loss": 1.076, "step": 5697 }, { "epoch": 0.8919849718221665, "grad_norm": 1.6067478656768799, "learning_rate": 0.00017324046920821115, "loss": 0.2366, "step": 5698 }, { "epoch": 0.8921415153412648, "grad_norm": 1.8827284574508667, "learning_rate": 0.0001732160312805474, "loss": 0.9772, "step": 5699 }, { "epoch": 0.8922980588603632, "grad_norm": 3.9518826007843018, "learning_rate": 0.00017319159335288367, "loss": 1.1912, "step": 5700 }, { "epoch": 0.8924546023794615, "grad_norm": 0.5631269812583923, "learning_rate": 0.00017316715542521993, "loss": 0.2854, "step": 5701 }, { "epoch": 0.8926111458985598, "grad_norm": 0.7066730856895447, "learning_rate": 0.00017314271749755618, "loss": 0.4003, "step": 5702 }, { "epoch": 0.8927676894176582, "grad_norm": 0.7470924854278564, "learning_rate": 0.00017311827956989246, "loss": 0.2347, "step": 5703 }, { "epoch": 0.8929242329367564, "grad_norm": 0.9745728969573975, "learning_rate": 0.00017309384164222873, "loss": 0.4271, "step": 5704 }, { "epoch": 0.8930807764558547, "grad_norm": 0.6492823362350464, "learning_rate": 0.00017306940371456499, "loss": 0.3031, "step": 5705 }, { "epoch": 0.893237319974953, "grad_norm": 0.7017116546630859, "learning_rate": 0.00017304496578690126, "loss": 0.2764, "step": 5706 }, { "epoch": 0.8933938634940514, "grad_norm": 0.9077380895614624, "learning_rate": 0.00017302052785923754, "loss": 0.3132, "step": 5707 }, { "epoch": 0.8935504070131497, "grad_norm": 0.9337749481201172, "learning_rate": 0.00017299608993157377, "loss": 0.4168, "step": 5708 }, { "epoch": 0.8937069505322479, "grad_norm": 1.3660385608673096, "learning_rate": 0.00017297165200391004, "loss": 0.2644, "step": 5709 }, { "epoch": 0.8938634940513462, "grad_norm": 2.1170074939727783, "learning_rate": 0.00017294721407624632, "loss": 0.4336, "step": 5710 }, { "epoch": 0.8940200375704446, "grad_norm": 0.9623836278915405, "learning_rate": 0.00017292277614858257, "loss": 0.5736, "step": 5711 }, { "epoch": 0.8941765810895429, "grad_norm": 1.502590298652649, "learning_rate": 0.00017289833822091885, "loss": 0.4649, "step": 5712 }, { "epoch": 0.8943331246086412, "grad_norm": 1.0726498365402222, "learning_rate": 0.00017287390029325513, "loss": 0.3547, "step": 5713 }, { "epoch": 0.8944896681277396, "grad_norm": 1.2182965278625488, "learning_rate": 0.00017284946236559138, "loss": 0.4567, "step": 5714 }, { "epoch": 0.8946462116468378, "grad_norm": 1.0065919160842896, "learning_rate": 0.00017282502443792766, "loss": 0.5393, "step": 5715 }, { "epoch": 0.8948027551659361, "grad_norm": 0.693427562713623, "learning_rate": 0.00017280058651026394, "loss": 0.3155, "step": 5716 }, { "epoch": 0.8949592986850344, "grad_norm": 1.1155869960784912, "learning_rate": 0.00017277614858260016, "loss": 0.5297, "step": 5717 }, { "epoch": 0.8951158422041328, "grad_norm": 4.776678562164307, "learning_rate": 0.00017275171065493644, "loss": 1.0583, "step": 5718 }, { "epoch": 0.8952723857232311, "grad_norm": 3.4916136264801025, "learning_rate": 0.00017272727272727272, "loss": 0.7256, "step": 5719 }, { "epoch": 0.8954289292423294, "grad_norm": 1.9992774724960327, "learning_rate": 0.00017270283479960897, "loss": 0.5441, "step": 5720 }, { "epoch": 0.8955854727614276, "grad_norm": 1.5791343450546265, "learning_rate": 0.00017267839687194525, "loss": 0.8787, "step": 5721 }, { "epoch": 0.895742016280526, "grad_norm": 1.3216490745544434, "learning_rate": 0.00017265395894428153, "loss": 0.4736, "step": 5722 }, { "epoch": 0.8958985597996243, "grad_norm": 1.6248879432678223, "learning_rate": 0.00017262952101661778, "loss": 0.4765, "step": 5723 }, { "epoch": 0.8960551033187226, "grad_norm": 1.6858633756637573, "learning_rate": 0.00017260508308895406, "loss": 0.8183, "step": 5724 }, { "epoch": 0.896211646837821, "grad_norm": 1.9334176778793335, "learning_rate": 0.0001725806451612903, "loss": 0.622, "step": 5725 }, { "epoch": 0.8963681903569192, "grad_norm": 1.8685994148254395, "learning_rate": 0.00017255620723362656, "loss": 0.5704, "step": 5726 }, { "epoch": 0.8965247338760175, "grad_norm": 2.087904453277588, "learning_rate": 0.00017253176930596284, "loss": 0.8175, "step": 5727 }, { "epoch": 0.8966812773951158, "grad_norm": 1.4752204418182373, "learning_rate": 0.00017250733137829912, "loss": 0.509, "step": 5728 }, { "epoch": 0.8968378209142142, "grad_norm": 1.6020748615264893, "learning_rate": 0.00017248289345063537, "loss": 0.9958, "step": 5729 }, { "epoch": 0.8969943644333125, "grad_norm": 2.548856258392334, "learning_rate": 0.00017245845552297165, "loss": 0.8474, "step": 5730 }, { "epoch": 0.8971509079524108, "grad_norm": 1.2991943359375, "learning_rate": 0.00017243401759530792, "loss": 0.7504, "step": 5731 }, { "epoch": 0.897307451471509, "grad_norm": 3.308283805847168, "learning_rate": 0.00017240957966764415, "loss": 0.9691, "step": 5732 }, { "epoch": 0.8974639949906074, "grad_norm": 2.927663803100586, "learning_rate": 0.00017238514173998043, "loss": 1.2162, "step": 5733 }, { "epoch": 0.8976205385097057, "grad_norm": 1.6149358749389648, "learning_rate": 0.0001723607038123167, "loss": 0.6097, "step": 5734 }, { "epoch": 0.897777082028804, "grad_norm": 2.015613555908203, "learning_rate": 0.00017233626588465296, "loss": 0.9498, "step": 5735 }, { "epoch": 0.8979336255479023, "grad_norm": 3.3470332622528076, "learning_rate": 0.00017231182795698923, "loss": 1.2403, "step": 5736 }, { "epoch": 0.8980901690670007, "grad_norm": 3.9462878704071045, "learning_rate": 0.0001722873900293255, "loss": 0.6295, "step": 5737 }, { "epoch": 0.8982467125860989, "grad_norm": 1.829472303390503, "learning_rate": 0.00017226295210166176, "loss": 0.6039, "step": 5738 }, { "epoch": 0.8984032561051972, "grad_norm": 1.9444429874420166, "learning_rate": 0.00017223851417399804, "loss": 1.0498, "step": 5739 }, { "epoch": 0.8985597996242956, "grad_norm": 2.516277551651001, "learning_rate": 0.00017221407624633432, "loss": 1.4808, "step": 5740 }, { "epoch": 0.8987163431433939, "grad_norm": 2.0812556743621826, "learning_rate": 0.00017218963831867055, "loss": 1.1102, "step": 5741 }, { "epoch": 0.8988728866624922, "grad_norm": 3.566479206085205, "learning_rate": 0.00017216520039100682, "loss": 1.3883, "step": 5742 }, { "epoch": 0.8990294301815904, "grad_norm": 4.617666244506836, "learning_rate": 0.0001721407624633431, "loss": 1.3953, "step": 5743 }, { "epoch": 0.8991859737006888, "grad_norm": 2.720006227493286, "learning_rate": 0.00017211632453567935, "loss": 1.2931, "step": 5744 }, { "epoch": 0.8993425172197871, "grad_norm": 3.672116756439209, "learning_rate": 0.00017209188660801563, "loss": 1.467, "step": 5745 }, { "epoch": 0.8994990607388854, "grad_norm": 5.463393211364746, "learning_rate": 0.0001720674486803519, "loss": 0.8112, "step": 5746 }, { "epoch": 0.8996556042579837, "grad_norm": 3.226026773452759, "learning_rate": 0.00017204301075268816, "loss": 0.3882, "step": 5747 }, { "epoch": 0.8998121477770821, "grad_norm": 1.810815453529358, "learning_rate": 0.0001720185728250244, "loss": 0.3974, "step": 5748 }, { "epoch": 0.8999686912961803, "grad_norm": 4.585860729217529, "learning_rate": 0.0001719941348973607, "loss": 1.265, "step": 5749 }, { "epoch": 0.9001252348152786, "grad_norm": 1.762209415435791, "learning_rate": 0.00017196969696969694, "loss": 0.8456, "step": 5750 }, { "epoch": 0.900281778334377, "grad_norm": 0.5263707041740417, "learning_rate": 0.00017194525904203322, "loss": 0.3196, "step": 5751 }, { "epoch": 0.9004383218534753, "grad_norm": 0.41388189792633057, "learning_rate": 0.0001719208211143695, "loss": 0.1791, "step": 5752 }, { "epoch": 0.9005948653725736, "grad_norm": 1.194480061531067, "learning_rate": 0.00017189638318670575, "loss": 0.2162, "step": 5753 }, { "epoch": 0.9007514088916719, "grad_norm": 0.5579673647880554, "learning_rate": 0.00017187194525904203, "loss": 0.3029, "step": 5754 }, { "epoch": 0.9009079524107702, "grad_norm": 0.6641569137573242, "learning_rate": 0.0001718475073313783, "loss": 0.3157, "step": 5755 }, { "epoch": 0.9010644959298685, "grad_norm": 0.6336182355880737, "learning_rate": 0.00017182306940371453, "loss": 0.3372, "step": 5756 }, { "epoch": 0.9012210394489668, "grad_norm": 0.9077296853065491, "learning_rate": 0.0001717986314760508, "loss": 0.4165, "step": 5757 }, { "epoch": 0.9013775829680651, "grad_norm": 1.123456597328186, "learning_rate": 0.0001717741935483871, "loss": 0.3048, "step": 5758 }, { "epoch": 0.9015341264871635, "grad_norm": 0.6466776132583618, "learning_rate": 0.00017174975562072334, "loss": 0.4485, "step": 5759 }, { "epoch": 0.9016906700062617, "grad_norm": 0.5841704607009888, "learning_rate": 0.00017172531769305962, "loss": 0.3242, "step": 5760 }, { "epoch": 0.90184721352536, "grad_norm": 0.8340890407562256, "learning_rate": 0.0001717008797653959, "loss": 0.4687, "step": 5761 }, { "epoch": 0.9020037570444583, "grad_norm": 1.2464579343795776, "learning_rate": 0.00017167644183773215, "loss": 0.4741, "step": 5762 }, { "epoch": 0.9021603005635567, "grad_norm": 0.7806826233863831, "learning_rate": 0.00017165200391006842, "loss": 0.4531, "step": 5763 }, { "epoch": 0.902316844082655, "grad_norm": 1.0296310186386108, "learning_rate": 0.0001716275659824047, "loss": 0.4653, "step": 5764 }, { "epoch": 0.9024733876017533, "grad_norm": 0.7613905072212219, "learning_rate": 0.00017160312805474093, "loss": 0.5479, "step": 5765 }, { "epoch": 0.9026299311208515, "grad_norm": 1.586656093597412, "learning_rate": 0.0001715786901270772, "loss": 0.7166, "step": 5766 }, { "epoch": 0.9027864746399499, "grad_norm": 1.1600213050842285, "learning_rate": 0.00017155425219941348, "loss": 0.4473, "step": 5767 }, { "epoch": 0.9029430181590482, "grad_norm": 1.4801439046859741, "learning_rate": 0.00017152981427174974, "loss": 0.5886, "step": 5768 }, { "epoch": 0.9030995616781465, "grad_norm": 1.5481951236724854, "learning_rate": 0.00017150537634408601, "loss": 0.5857, "step": 5769 }, { "epoch": 0.9032561051972449, "grad_norm": 1.2954734563827515, "learning_rate": 0.0001714809384164223, "loss": 0.8743, "step": 5770 }, { "epoch": 0.9034126487163432, "grad_norm": 2.4921019077301025, "learning_rate": 0.00017145650048875854, "loss": 0.6394, "step": 5771 }, { "epoch": 0.9035691922354414, "grad_norm": 1.4190541505813599, "learning_rate": 0.0001714320625610948, "loss": 0.6264, "step": 5772 }, { "epoch": 0.9037257357545397, "grad_norm": 0.9357963800430298, "learning_rate": 0.00017140762463343107, "loss": 0.4692, "step": 5773 }, { "epoch": 0.9038822792736381, "grad_norm": 3.0093657970428467, "learning_rate": 0.00017138318670576732, "loss": 0.6029, "step": 5774 }, { "epoch": 0.9040388227927364, "grad_norm": 2.1367123126983643, "learning_rate": 0.0001713587487781036, "loss": 0.5638, "step": 5775 }, { "epoch": 0.9041953663118347, "grad_norm": 3.115868091583252, "learning_rate": 0.00017133431085043988, "loss": 0.9128, "step": 5776 }, { "epoch": 0.904351909830933, "grad_norm": 1.7553256750106812, "learning_rate": 0.00017130987292277613, "loss": 0.893, "step": 5777 }, { "epoch": 0.9045084533500313, "grad_norm": 1.6324225664138794, "learning_rate": 0.0001712854349951124, "loss": 0.8314, "step": 5778 }, { "epoch": 0.9046649968691296, "grad_norm": 2.084900379180908, "learning_rate": 0.0001712609970674487, "loss": 0.4471, "step": 5779 }, { "epoch": 0.9048215403882279, "grad_norm": 1.5276941061019897, "learning_rate": 0.0001712365591397849, "loss": 0.8395, "step": 5780 }, { "epoch": 0.9049780839073263, "grad_norm": 1.490938663482666, "learning_rate": 0.0001712121212121212, "loss": 0.8451, "step": 5781 }, { "epoch": 0.9051346274264246, "grad_norm": 2.2541961669921875, "learning_rate": 0.00017118768328445747, "loss": 1.2903, "step": 5782 }, { "epoch": 0.9052911709455228, "grad_norm": 2.1898505687713623, "learning_rate": 0.00017116324535679372, "loss": 1.0065, "step": 5783 }, { "epoch": 0.9054477144646211, "grad_norm": 1.505751609802246, "learning_rate": 0.00017113880742913, "loss": 0.8832, "step": 5784 }, { "epoch": 0.9056042579837195, "grad_norm": 2.1820976734161377, "learning_rate": 0.00017111436950146628, "loss": 0.6737, "step": 5785 }, { "epoch": 0.9057608015028178, "grad_norm": 2.6969125270843506, "learning_rate": 0.00017108993157380253, "loss": 0.9483, "step": 5786 }, { "epoch": 0.9059173450219161, "grad_norm": 4.440044403076172, "learning_rate": 0.0001710654936461388, "loss": 1.0961, "step": 5787 }, { "epoch": 0.9060738885410144, "grad_norm": 2.3744637966156006, "learning_rate": 0.00017104105571847509, "loss": 1.0894, "step": 5788 }, { "epoch": 0.9062304320601127, "grad_norm": 2.1773595809936523, "learning_rate": 0.0001710166177908113, "loss": 0.8032, "step": 5789 }, { "epoch": 0.906386975579211, "grad_norm": 2.6673295497894287, "learning_rate": 0.0001709921798631476, "loss": 1.3428, "step": 5790 }, { "epoch": 0.9065435190983093, "grad_norm": 2.424062967300415, "learning_rate": 0.00017096774193548387, "loss": 1.0168, "step": 5791 }, { "epoch": 0.9067000626174077, "grad_norm": 1.6261483430862427, "learning_rate": 0.00017094330400782012, "loss": 0.843, "step": 5792 }, { "epoch": 0.906856606136506, "grad_norm": 2.254560947418213, "learning_rate": 0.0001709188660801564, "loss": 1.4431, "step": 5793 }, { "epoch": 0.9070131496556043, "grad_norm": 1.4824719429016113, "learning_rate": 0.00017089442815249267, "loss": 0.788, "step": 5794 }, { "epoch": 0.9071696931747025, "grad_norm": 1.547086477279663, "learning_rate": 0.0001708699902248289, "loss": 0.9419, "step": 5795 }, { "epoch": 0.9073262366938009, "grad_norm": 2.4336373805999756, "learning_rate": 0.00017084555229716518, "loss": 0.899, "step": 5796 }, { "epoch": 0.9074827802128992, "grad_norm": 1.5645617246627808, "learning_rate": 0.00017082111436950146, "loss": 0.3773, "step": 5797 }, { "epoch": 0.9076393237319975, "grad_norm": 3.626685380935669, "learning_rate": 0.0001707966764418377, "loss": 1.37, "step": 5798 }, { "epoch": 0.9077958672510958, "grad_norm": 2.090294122695923, "learning_rate": 0.00017077223851417398, "loss": 0.4822, "step": 5799 }, { "epoch": 0.9079524107701941, "grad_norm": 3.3169608116149902, "learning_rate": 0.00017074780058651026, "loss": 1.7751, "step": 5800 }, { "epoch": 0.9081089542892924, "grad_norm": 0.6831493377685547, "learning_rate": 0.00017072336265884651, "loss": 0.4204, "step": 5801 }, { "epoch": 0.9082654978083907, "grad_norm": 0.5136266350746155, "learning_rate": 0.0001706989247311828, "loss": 0.2246, "step": 5802 }, { "epoch": 0.908422041327489, "grad_norm": 0.6040349006652832, "learning_rate": 0.00017067448680351907, "loss": 0.2548, "step": 5803 }, { "epoch": 0.9085785848465874, "grad_norm": 0.5405697226524353, "learning_rate": 0.0001706500488758553, "loss": 0.3375, "step": 5804 }, { "epoch": 0.9087351283656857, "grad_norm": 0.4443010985851288, "learning_rate": 0.00017062561094819157, "loss": 0.3044, "step": 5805 }, { "epoch": 0.9088916718847839, "grad_norm": 0.9515902400016785, "learning_rate": 0.00017060117302052785, "loss": 0.5221, "step": 5806 }, { "epoch": 0.9090482154038823, "grad_norm": 0.5636407732963562, "learning_rate": 0.0001705767350928641, "loss": 0.3327, "step": 5807 }, { "epoch": 0.9092047589229806, "grad_norm": 1.611080288887024, "learning_rate": 0.00017055229716520038, "loss": 0.3531, "step": 5808 }, { "epoch": 0.9093613024420789, "grad_norm": 0.7491126656532288, "learning_rate": 0.00017052785923753666, "loss": 0.3976, "step": 5809 }, { "epoch": 0.9095178459611772, "grad_norm": 0.7487491965293884, "learning_rate": 0.0001705034213098729, "loss": 0.2023, "step": 5810 }, { "epoch": 0.9096743894802756, "grad_norm": 0.7374340295791626, "learning_rate": 0.0001704789833822092, "loss": 0.4256, "step": 5811 }, { "epoch": 0.9098309329993738, "grad_norm": 0.7710371017456055, "learning_rate": 0.00017045454545454547, "loss": 0.3024, "step": 5812 }, { "epoch": 0.9099874765184721, "grad_norm": 0.7972769737243652, "learning_rate": 0.0001704301075268817, "loss": 0.4539, "step": 5813 }, { "epoch": 0.9101440200375704, "grad_norm": 2.1603457927703857, "learning_rate": 0.00017040566959921797, "loss": 0.4388, "step": 5814 }, { "epoch": 0.9103005635566688, "grad_norm": 1.0259324312210083, "learning_rate": 0.00017038123167155425, "loss": 0.5987, "step": 5815 }, { "epoch": 0.9104571070757671, "grad_norm": 1.267693281173706, "learning_rate": 0.0001703567937438905, "loss": 0.6287, "step": 5816 }, { "epoch": 0.9106136505948653, "grad_norm": 0.7310298085212708, "learning_rate": 0.00017033235581622678, "loss": 0.3516, "step": 5817 }, { "epoch": 0.9107701941139636, "grad_norm": 0.9882366061210632, "learning_rate": 0.00017030791788856306, "loss": 0.6303, "step": 5818 }, { "epoch": 0.910926737633062, "grad_norm": 1.0292433500289917, "learning_rate": 0.00017028347996089928, "loss": 0.508, "step": 5819 }, { "epoch": 0.9110832811521603, "grad_norm": 2.08266019821167, "learning_rate": 0.00017025904203323556, "loss": 0.986, "step": 5820 }, { "epoch": 0.9112398246712586, "grad_norm": 1.249509572982788, "learning_rate": 0.00017023460410557184, "loss": 0.558, "step": 5821 }, { "epoch": 0.911396368190357, "grad_norm": 1.4662624597549438, "learning_rate": 0.0001702101661779081, "loss": 0.5545, "step": 5822 }, { "epoch": 0.9115529117094552, "grad_norm": 1.311246395111084, "learning_rate": 0.00017018572825024437, "loss": 0.5318, "step": 5823 }, { "epoch": 0.9117094552285535, "grad_norm": 1.9841166734695435, "learning_rate": 0.00017016129032258065, "loss": 0.8669, "step": 5824 }, { "epoch": 0.9118659987476518, "grad_norm": 1.4273552894592285, "learning_rate": 0.0001701368523949169, "loss": 0.3847, "step": 5825 }, { "epoch": 0.9120225422667502, "grad_norm": 1.7364933490753174, "learning_rate": 0.00017011241446725318, "loss": 0.5714, "step": 5826 }, { "epoch": 0.9121790857858485, "grad_norm": 1.4556832313537598, "learning_rate": 0.00017008797653958945, "loss": 0.7064, "step": 5827 }, { "epoch": 0.9123356293049468, "grad_norm": 1.4327338933944702, "learning_rate": 0.00017006353861192568, "loss": 0.6752, "step": 5828 }, { "epoch": 0.912492172824045, "grad_norm": 1.3399438858032227, "learning_rate": 0.00017003910068426196, "loss": 0.6105, "step": 5829 }, { "epoch": 0.9126487163431434, "grad_norm": 1.6405707597732544, "learning_rate": 0.00017001466275659823, "loss": 0.954, "step": 5830 }, { "epoch": 0.9128052598622417, "grad_norm": 2.615856170654297, "learning_rate": 0.00016999022482893449, "loss": 0.7187, "step": 5831 }, { "epoch": 0.91296180338134, "grad_norm": 2.1250553131103516, "learning_rate": 0.00016996578690127076, "loss": 1.097, "step": 5832 }, { "epoch": 0.9131183469004384, "grad_norm": 1.6402782201766968, "learning_rate": 0.00016994134897360704, "loss": 0.6268, "step": 5833 }, { "epoch": 0.9132748904195366, "grad_norm": 2.8018393516540527, "learning_rate": 0.0001699169110459433, "loss": 0.8648, "step": 5834 }, { "epoch": 0.9134314339386349, "grad_norm": 2.0957236289978027, "learning_rate": 0.00016989247311827957, "loss": 0.9006, "step": 5835 }, { "epoch": 0.9135879774577332, "grad_norm": 4.053585529327393, "learning_rate": 0.00016986803519061582, "loss": 1.3675, "step": 5836 }, { "epoch": 0.9137445209768316, "grad_norm": 1.946283221244812, "learning_rate": 0.00016984359726295207, "loss": 0.8759, "step": 5837 }, { "epoch": 0.9139010644959299, "grad_norm": 1.3934121131896973, "learning_rate": 0.00016981915933528835, "loss": 1.0044, "step": 5838 }, { "epoch": 0.9140576080150282, "grad_norm": 4.167563438415527, "learning_rate": 0.00016979472140762463, "loss": 0.9486, "step": 5839 }, { "epoch": 0.9142141515341264, "grad_norm": 3.6018738746643066, "learning_rate": 0.00016977028347996088, "loss": 0.9403, "step": 5840 }, { "epoch": 0.9143706950532248, "grad_norm": 2.0300357341766357, "learning_rate": 0.00016974584555229716, "loss": 1.152, "step": 5841 }, { "epoch": 0.9145272385723231, "grad_norm": 2.6860861778259277, "learning_rate": 0.00016972140762463344, "loss": 0.8827, "step": 5842 }, { "epoch": 0.9146837820914214, "grad_norm": 1.8871678113937378, "learning_rate": 0.00016969696969696966, "loss": 1.57, "step": 5843 }, { "epoch": 0.9148403256105198, "grad_norm": 2.2716727256774902, "learning_rate": 0.00016967253176930594, "loss": 1.4632, "step": 5844 }, { "epoch": 0.9149968691296181, "grad_norm": 3.0800204277038574, "learning_rate": 0.00016964809384164222, "loss": 1.4531, "step": 5845 }, { "epoch": 0.9151534126487163, "grad_norm": 4.430184841156006, "learning_rate": 0.00016962365591397847, "loss": 0.8666, "step": 5846 }, { "epoch": 0.9153099561678146, "grad_norm": 2.232022285461426, "learning_rate": 0.00016959921798631475, "loss": 0.6021, "step": 5847 }, { "epoch": 0.915466499686913, "grad_norm": 2.0096168518066406, "learning_rate": 0.00016957478005865103, "loss": 0.9608, "step": 5848 }, { "epoch": 0.9156230432060113, "grad_norm": 3.4531753063201904, "learning_rate": 0.00016955034213098728, "loss": 0.7093, "step": 5849 }, { "epoch": 0.9157795867251096, "grad_norm": 5.444746971130371, "learning_rate": 0.00016952590420332356, "loss": 0.777, "step": 5850 }, { "epoch": 0.9159361302442078, "grad_norm": 0.5307621359825134, "learning_rate": 0.00016950146627565984, "loss": 0.3685, "step": 5851 }, { "epoch": 0.9160926737633062, "grad_norm": 0.5665816068649292, "learning_rate": 0.00016947702834799606, "loss": 0.3451, "step": 5852 }, { "epoch": 0.9162492172824045, "grad_norm": 1.0723793506622314, "learning_rate": 0.00016945259042033234, "loss": 0.5412, "step": 5853 }, { "epoch": 0.9164057608015028, "grad_norm": 0.6314266324043274, "learning_rate": 0.00016942815249266862, "loss": 0.3838, "step": 5854 }, { "epoch": 0.9165623043206012, "grad_norm": 0.65800541639328, "learning_rate": 0.00016940371456500487, "loss": 0.4242, "step": 5855 }, { "epoch": 0.9167188478396995, "grad_norm": 0.8351601958274841, "learning_rate": 0.00016937927663734115, "loss": 0.3196, "step": 5856 }, { "epoch": 0.9168753913587977, "grad_norm": 0.6536497473716736, "learning_rate": 0.00016935483870967742, "loss": 0.3413, "step": 5857 }, { "epoch": 0.917031934877896, "grad_norm": 0.8502795696258545, "learning_rate": 0.00016933040078201368, "loss": 0.4184, "step": 5858 }, { "epoch": 0.9171884783969944, "grad_norm": 0.9803659319877625, "learning_rate": 0.00016930596285434995, "loss": 0.3015, "step": 5859 }, { "epoch": 0.9173450219160927, "grad_norm": 1.2949978113174438, "learning_rate": 0.0001692815249266862, "loss": 0.3153, "step": 5860 }, { "epoch": 0.917501565435191, "grad_norm": 0.9877235889434814, "learning_rate": 0.00016925708699902246, "loss": 0.3463, "step": 5861 }, { "epoch": 0.9176581089542893, "grad_norm": 1.161659598350525, "learning_rate": 0.00016923264907135874, "loss": 0.5856, "step": 5862 }, { "epoch": 0.9178146524733876, "grad_norm": 0.7833372950553894, "learning_rate": 0.000169208211143695, "loss": 0.3127, "step": 5863 }, { "epoch": 0.9179711959924859, "grad_norm": 1.466927170753479, "learning_rate": 0.00016918377321603126, "loss": 0.4847, "step": 5864 }, { "epoch": 0.9181277395115842, "grad_norm": 1.4383978843688965, "learning_rate": 0.00016915933528836754, "loss": 0.5227, "step": 5865 }, { "epoch": 0.9182842830306825, "grad_norm": 1.0085314512252808, "learning_rate": 0.00016913489736070382, "loss": 0.3953, "step": 5866 }, { "epoch": 0.9184408265497809, "grad_norm": 1.5321907997131348, "learning_rate": 0.00016911045943304005, "loss": 0.6083, "step": 5867 }, { "epoch": 0.9185973700688791, "grad_norm": 4.02199649810791, "learning_rate": 0.00016908602150537632, "loss": 0.7558, "step": 5868 }, { "epoch": 0.9187539135879774, "grad_norm": 1.296752691268921, "learning_rate": 0.0001690615835777126, "loss": 0.3596, "step": 5869 }, { "epoch": 0.9189104571070758, "grad_norm": 2.0699713230133057, "learning_rate": 0.00016903714565004885, "loss": 0.6962, "step": 5870 }, { "epoch": 0.9190670006261741, "grad_norm": 1.0956624746322632, "learning_rate": 0.00016901270772238513, "loss": 0.3088, "step": 5871 }, { "epoch": 0.9192235441452724, "grad_norm": 1.2984445095062256, "learning_rate": 0.0001689882697947214, "loss": 0.5232, "step": 5872 }, { "epoch": 0.9193800876643707, "grad_norm": 1.7365597486495972, "learning_rate": 0.00016896383186705766, "loss": 0.9313, "step": 5873 }, { "epoch": 0.919536631183469, "grad_norm": 1.6211953163146973, "learning_rate": 0.00016893939393939394, "loss": 0.5194, "step": 5874 }, { "epoch": 0.9196931747025673, "grad_norm": 1.4481335878372192, "learning_rate": 0.00016891495601173022, "loss": 0.4233, "step": 5875 }, { "epoch": 0.9198497182216656, "grad_norm": 2.125072956085205, "learning_rate": 0.00016889051808406644, "loss": 0.5753, "step": 5876 }, { "epoch": 0.9200062617407639, "grad_norm": 4.071976184844971, "learning_rate": 0.00016886608015640272, "loss": 0.6559, "step": 5877 }, { "epoch": 0.9201628052598623, "grad_norm": 2.427960157394409, "learning_rate": 0.000168841642228739, "loss": 1.0354, "step": 5878 }, { "epoch": 0.9203193487789606, "grad_norm": 1.5165985822677612, "learning_rate": 0.00016881720430107525, "loss": 0.813, "step": 5879 }, { "epoch": 0.9204758922980588, "grad_norm": 2.0573408603668213, "learning_rate": 0.00016879276637341153, "loss": 1.2233, "step": 5880 }, { "epoch": 0.9206324358171571, "grad_norm": 2.256502866744995, "learning_rate": 0.0001687683284457478, "loss": 0.6152, "step": 5881 }, { "epoch": 0.9207889793362555, "grad_norm": 2.5055789947509766, "learning_rate": 0.00016874389051808406, "loss": 0.7555, "step": 5882 }, { "epoch": 0.9209455228553538, "grad_norm": 1.9413427114486694, "learning_rate": 0.00016871945259042034, "loss": 1.0083, "step": 5883 }, { "epoch": 0.9211020663744521, "grad_norm": 1.8647050857543945, "learning_rate": 0.00016869501466275656, "loss": 1.0279, "step": 5884 }, { "epoch": 0.9212586098935505, "grad_norm": 2.933109998703003, "learning_rate": 0.00016867057673509284, "loss": 0.8053, "step": 5885 }, { "epoch": 0.9214151534126487, "grad_norm": 2.2728209495544434, "learning_rate": 0.00016864613880742912, "loss": 0.9038, "step": 5886 }, { "epoch": 0.921571696931747, "grad_norm": 2.305570125579834, "learning_rate": 0.00016862170087976537, "loss": 0.9049, "step": 5887 }, { "epoch": 0.9217282404508453, "grad_norm": 3.410182476043701, "learning_rate": 0.00016859726295210165, "loss": 1.1697, "step": 5888 }, { "epoch": 0.9218847839699437, "grad_norm": 3.4939680099487305, "learning_rate": 0.00016857282502443793, "loss": 1.5584, "step": 5889 }, { "epoch": 0.922041327489042, "grad_norm": 1.5268841981887817, "learning_rate": 0.00016854838709677415, "loss": 1.1331, "step": 5890 }, { "epoch": 0.9221978710081402, "grad_norm": 1.2586132287979126, "learning_rate": 0.00016852394916911043, "loss": 1.0516, "step": 5891 }, { "epoch": 0.9223544145272385, "grad_norm": 2.3908731937408447, "learning_rate": 0.0001684995112414467, "loss": 1.0746, "step": 5892 }, { "epoch": 0.9225109580463369, "grad_norm": 2.6460394859313965, "learning_rate": 0.00016847507331378296, "loss": 1.5005, "step": 5893 }, { "epoch": 0.9226675015654352, "grad_norm": 1.6901088953018188, "learning_rate": 0.00016845063538611924, "loss": 0.7368, "step": 5894 }, { "epoch": 0.9228240450845335, "grad_norm": 1.7683452367782593, "learning_rate": 0.00016842619745845551, "loss": 0.6092, "step": 5895 }, { "epoch": 0.9229805886036319, "grad_norm": 3.026430606842041, "learning_rate": 0.00016840175953079177, "loss": 0.4762, "step": 5896 }, { "epoch": 0.9231371321227301, "grad_norm": 2.5244085788726807, "learning_rate": 0.00016837732160312804, "loss": 1.0575, "step": 5897 }, { "epoch": 0.9232936756418284, "grad_norm": 1.5700312852859497, "learning_rate": 0.00016835288367546432, "loss": 0.9384, "step": 5898 }, { "epoch": 0.9234502191609267, "grad_norm": 1.4944273233413696, "learning_rate": 0.00016832844574780055, "loss": 0.3906, "step": 5899 }, { "epoch": 0.9236067626800251, "grad_norm": 4.99465799331665, "learning_rate": 0.00016830400782013682, "loss": 0.9438, "step": 5900 }, { "epoch": 0.9237633061991234, "grad_norm": 0.4812981188297272, "learning_rate": 0.0001682795698924731, "loss": 0.3414, "step": 5901 }, { "epoch": 0.9239198497182217, "grad_norm": 0.5931546092033386, "learning_rate": 0.00016825513196480935, "loss": 0.3613, "step": 5902 }, { "epoch": 0.9240763932373199, "grad_norm": 0.6139108538627625, "learning_rate": 0.00016823069403714563, "loss": 0.2214, "step": 5903 }, { "epoch": 0.9242329367564183, "grad_norm": 0.5874446630477905, "learning_rate": 0.0001682062561094819, "loss": 0.3274, "step": 5904 }, { "epoch": 0.9243894802755166, "grad_norm": 0.4940461218357086, "learning_rate": 0.00016818181818181816, "loss": 0.2925, "step": 5905 }, { "epoch": 0.9245460237946149, "grad_norm": 0.5036855340003967, "learning_rate": 0.00016815738025415444, "loss": 0.2816, "step": 5906 }, { "epoch": 0.9247025673137133, "grad_norm": 0.6490256190299988, "learning_rate": 0.0001681329423264907, "loss": 0.2061, "step": 5907 }, { "epoch": 0.9248591108328115, "grad_norm": 1.324135184288025, "learning_rate": 0.00016810850439882694, "loss": 0.5117, "step": 5908 }, { "epoch": 0.9250156543519098, "grad_norm": 0.9895930290222168, "learning_rate": 0.00016808406647116322, "loss": 0.3738, "step": 5909 }, { "epoch": 0.9251721978710081, "grad_norm": 0.761587917804718, "learning_rate": 0.0001680596285434995, "loss": 0.3548, "step": 5910 }, { "epoch": 0.9253287413901065, "grad_norm": 0.905089259147644, "learning_rate": 0.00016803519061583575, "loss": 0.3428, "step": 5911 }, { "epoch": 0.9254852849092048, "grad_norm": 1.7721465826034546, "learning_rate": 0.00016801075268817203, "loss": 0.3537, "step": 5912 }, { "epoch": 0.9256418284283031, "grad_norm": 0.9065170288085938, "learning_rate": 0.0001679863147605083, "loss": 0.7074, "step": 5913 }, { "epoch": 0.9257983719474013, "grad_norm": 1.3769489526748657, "learning_rate": 0.00016796187683284453, "loss": 0.4449, "step": 5914 }, { "epoch": 0.9259549154664997, "grad_norm": 0.8921677470207214, "learning_rate": 0.0001679374389051808, "loss": 0.4508, "step": 5915 }, { "epoch": 0.926111458985598, "grad_norm": 0.827818751335144, "learning_rate": 0.0001679130009775171, "loss": 0.6338, "step": 5916 }, { "epoch": 0.9262680025046963, "grad_norm": 1.2946507930755615, "learning_rate": 0.00016788856304985334, "loss": 0.6249, "step": 5917 }, { "epoch": 0.9264245460237946, "grad_norm": 0.9976163506507874, "learning_rate": 0.00016786412512218962, "loss": 0.2552, "step": 5918 }, { "epoch": 0.926581089542893, "grad_norm": 0.9220026731491089, "learning_rate": 0.0001678396871945259, "loss": 0.3495, "step": 5919 }, { "epoch": 0.9267376330619912, "grad_norm": 1.1707324981689453, "learning_rate": 0.00016781524926686215, "loss": 0.5967, "step": 5920 }, { "epoch": 0.9268941765810895, "grad_norm": 1.8587265014648438, "learning_rate": 0.00016779081133919843, "loss": 0.512, "step": 5921 }, { "epoch": 0.9270507201001879, "grad_norm": 1.1185016632080078, "learning_rate": 0.0001677663734115347, "loss": 0.4552, "step": 5922 }, { "epoch": 0.9272072636192862, "grad_norm": 7.0849127769470215, "learning_rate": 0.00016774193548387093, "loss": 0.5696, "step": 5923 }, { "epoch": 0.9273638071383845, "grad_norm": 1.0725510120391846, "learning_rate": 0.0001677174975562072, "loss": 0.5536, "step": 5924 }, { "epoch": 0.9275203506574827, "grad_norm": 1.6228716373443604, "learning_rate": 0.00016769305962854349, "loss": 0.4286, "step": 5925 }, { "epoch": 0.9276768941765811, "grad_norm": 1.596377968788147, "learning_rate": 0.00016766862170087974, "loss": 0.6547, "step": 5926 }, { "epoch": 0.9278334376956794, "grad_norm": 1.7780789136886597, "learning_rate": 0.00016764418377321601, "loss": 0.8426, "step": 5927 }, { "epoch": 0.9279899812147777, "grad_norm": 1.8917478322982788, "learning_rate": 0.0001676197458455523, "loss": 0.6604, "step": 5928 }, { "epoch": 0.928146524733876, "grad_norm": 1.4411269426345825, "learning_rate": 0.00016759530791788854, "loss": 0.5667, "step": 5929 }, { "epoch": 0.9283030682529744, "grad_norm": 2.5890650749206543, "learning_rate": 0.00016757086999022482, "loss": 0.4491, "step": 5930 }, { "epoch": 0.9284596117720726, "grad_norm": 1.4896984100341797, "learning_rate": 0.00016754643206256107, "loss": 0.7193, "step": 5931 }, { "epoch": 0.9286161552911709, "grad_norm": 2.9723289012908936, "learning_rate": 0.00016752199413489733, "loss": 0.7311, "step": 5932 }, { "epoch": 0.9287726988102692, "grad_norm": 1.3109147548675537, "learning_rate": 0.0001674975562072336, "loss": 0.9025, "step": 5933 }, { "epoch": 0.9289292423293676, "grad_norm": 2.1195662021636963, "learning_rate": 0.00016747311827956988, "loss": 0.8071, "step": 5934 }, { "epoch": 0.9290857858484659, "grad_norm": 2.0129497051239014, "learning_rate": 0.00016744868035190613, "loss": 0.6614, "step": 5935 }, { "epoch": 0.9292423293675642, "grad_norm": 3.3154754638671875, "learning_rate": 0.0001674242424242424, "loss": 1.3507, "step": 5936 }, { "epoch": 0.9293988728866625, "grad_norm": 2.2003567218780518, "learning_rate": 0.0001673998044965787, "loss": 1.2579, "step": 5937 }, { "epoch": 0.9295554164057608, "grad_norm": 2.3545401096343994, "learning_rate": 0.00016737536656891491, "loss": 0.9919, "step": 5938 }, { "epoch": 0.9297119599248591, "grad_norm": 3.617964744567871, "learning_rate": 0.0001673509286412512, "loss": 1.6852, "step": 5939 }, { "epoch": 0.9298685034439574, "grad_norm": 2.4754271507263184, "learning_rate": 0.00016732649071358747, "loss": 0.9846, "step": 5940 }, { "epoch": 0.9300250469630558, "grad_norm": 2.2454872131347656, "learning_rate": 0.00016730205278592372, "loss": 0.7251, "step": 5941 }, { "epoch": 0.930181590482154, "grad_norm": 2.1622064113616943, "learning_rate": 0.00016727761485826, "loss": 0.998, "step": 5942 }, { "epoch": 0.9303381340012523, "grad_norm": 3.354268789291382, "learning_rate": 0.00016725317693059628, "loss": 1.5063, "step": 5943 }, { "epoch": 0.9304946775203506, "grad_norm": 1.39380943775177, "learning_rate": 0.00016722873900293253, "loss": 1.1028, "step": 5944 }, { "epoch": 0.930651221039449, "grad_norm": 2.941927194595337, "learning_rate": 0.0001672043010752688, "loss": 1.2247, "step": 5945 }, { "epoch": 0.9308077645585473, "grad_norm": 6.12858772277832, "learning_rate": 0.0001671798631476051, "loss": 1.3599, "step": 5946 }, { "epoch": 0.9309643080776456, "grad_norm": 1.9864230155944824, "learning_rate": 0.0001671554252199413, "loss": 1.2749, "step": 5947 }, { "epoch": 0.9311208515967438, "grad_norm": 1.012682318687439, "learning_rate": 0.0001671309872922776, "loss": 0.5183, "step": 5948 }, { "epoch": 0.9312773951158422, "grad_norm": 3.2158727645874023, "learning_rate": 0.00016710654936461387, "loss": 0.7957, "step": 5949 }, { "epoch": 0.9314339386349405, "grad_norm": 2.2417471408843994, "learning_rate": 0.00016708211143695012, "loss": 0.8408, "step": 5950 }, { "epoch": 0.9315904821540388, "grad_norm": 0.5746961236000061, "learning_rate": 0.0001670576735092864, "loss": 0.4067, "step": 5951 }, { "epoch": 0.9317470256731372, "grad_norm": 0.5741134881973267, "learning_rate": 0.00016703323558162268, "loss": 0.2959, "step": 5952 }, { "epoch": 0.9319035691922355, "grad_norm": 0.4729560911655426, "learning_rate": 0.00016700879765395893, "loss": 0.232, "step": 5953 }, { "epoch": 0.9320601127113337, "grad_norm": 0.6859723329544067, "learning_rate": 0.00016698435972629518, "loss": 0.3221, "step": 5954 }, { "epoch": 0.932216656230432, "grad_norm": 0.8597524166107178, "learning_rate": 0.00016695992179863146, "loss": 0.4589, "step": 5955 }, { "epoch": 0.9323731997495304, "grad_norm": 0.4495220184326172, "learning_rate": 0.0001669354838709677, "loss": 0.3401, "step": 5956 }, { "epoch": 0.9325297432686287, "grad_norm": 3.076122999191284, "learning_rate": 0.00016691104594330399, "loss": 1.0121, "step": 5957 }, { "epoch": 0.932686286787727, "grad_norm": 0.5101608633995056, "learning_rate": 0.00016688660801564026, "loss": 0.2799, "step": 5958 }, { "epoch": 0.9328428303068252, "grad_norm": 0.4766397476196289, "learning_rate": 0.00016686217008797652, "loss": 0.2636, "step": 5959 }, { "epoch": 0.9329993738259236, "grad_norm": 1.034203052520752, "learning_rate": 0.0001668377321603128, "loss": 0.4569, "step": 5960 }, { "epoch": 0.9331559173450219, "grad_norm": 0.8217408061027527, "learning_rate": 0.00016681329423264907, "loss": 0.3696, "step": 5961 }, { "epoch": 0.9333124608641202, "grad_norm": 1.199652075767517, "learning_rate": 0.0001667888563049853, "loss": 0.5895, "step": 5962 }, { "epoch": 0.9334690043832186, "grad_norm": 0.9428247809410095, "learning_rate": 0.00016676441837732157, "loss": 0.3166, "step": 5963 }, { "epoch": 0.9336255479023169, "grad_norm": 1.7076395750045776, "learning_rate": 0.00016673998044965785, "loss": 0.663, "step": 5964 }, { "epoch": 0.9337820914214151, "grad_norm": 3.6879310607910156, "learning_rate": 0.0001667155425219941, "loss": 1.2153, "step": 5965 }, { "epoch": 0.9339386349405134, "grad_norm": 0.7371928691864014, "learning_rate": 0.00016669110459433038, "loss": 0.2916, "step": 5966 }, { "epoch": 0.9340951784596118, "grad_norm": 0.8231880068778992, "learning_rate": 0.00016666666666666666, "loss": 0.5749, "step": 5967 }, { "epoch": 0.9342517219787101, "grad_norm": 0.9370611310005188, "learning_rate": 0.0001666422287390029, "loss": 0.3539, "step": 5968 }, { "epoch": 0.9344082654978084, "grad_norm": 1.1584241390228271, "learning_rate": 0.0001666177908113392, "loss": 0.5032, "step": 5969 }, { "epoch": 0.9345648090169068, "grad_norm": 1.1886309385299683, "learning_rate": 0.00016659335288367547, "loss": 0.4978, "step": 5970 }, { "epoch": 0.934721352536005, "grad_norm": 1.277079463005066, "learning_rate": 0.0001665689149560117, "loss": 0.5764, "step": 5971 }, { "epoch": 0.9348778960551033, "grad_norm": 1.9561655521392822, "learning_rate": 0.00016654447702834797, "loss": 0.478, "step": 5972 }, { "epoch": 0.9350344395742016, "grad_norm": 2.3838415145874023, "learning_rate": 0.00016652003910068425, "loss": 0.6815, "step": 5973 }, { "epoch": 0.9351909830933, "grad_norm": 1.1179418563842773, "learning_rate": 0.0001664956011730205, "loss": 0.6301, "step": 5974 }, { "epoch": 0.9353475266123983, "grad_norm": 1.8033316135406494, "learning_rate": 0.00016647116324535678, "loss": 0.6859, "step": 5975 }, { "epoch": 0.9355040701314965, "grad_norm": 1.8072056770324707, "learning_rate": 0.00016644672531769306, "loss": 0.9525, "step": 5976 }, { "epoch": 0.9356606136505948, "grad_norm": 1.693436861038208, "learning_rate": 0.0001664222873900293, "loss": 0.645, "step": 5977 }, { "epoch": 0.9358171571696932, "grad_norm": 2.3530519008636475, "learning_rate": 0.00016639784946236556, "loss": 0.8476, "step": 5978 }, { "epoch": 0.9359737006887915, "grad_norm": 1.2091078758239746, "learning_rate": 0.00016637341153470184, "loss": 0.6442, "step": 5979 }, { "epoch": 0.9361302442078898, "grad_norm": 2.406554937362671, "learning_rate": 0.0001663489736070381, "loss": 0.937, "step": 5980 }, { "epoch": 0.9362867877269881, "grad_norm": 1.2704511880874634, "learning_rate": 0.00016632453567937437, "loss": 0.8456, "step": 5981 }, { "epoch": 0.9364433312460864, "grad_norm": 1.3322428464889526, "learning_rate": 0.00016630009775171065, "loss": 0.4806, "step": 5982 }, { "epoch": 0.9365998747651847, "grad_norm": 1.9257659912109375, "learning_rate": 0.0001662756598240469, "loss": 1.2242, "step": 5983 }, { "epoch": 0.936756418284283, "grad_norm": 5.305508136749268, "learning_rate": 0.00016625122189638318, "loss": 0.8894, "step": 5984 }, { "epoch": 0.9369129618033814, "grad_norm": 2.0064687728881836, "learning_rate": 0.00016622678396871945, "loss": 1.1084, "step": 5985 }, { "epoch": 0.9370695053224797, "grad_norm": 0.8927270174026489, "learning_rate": 0.00016620234604105568, "loss": 0.3418, "step": 5986 }, { "epoch": 0.937226048841578, "grad_norm": 2.652418851852417, "learning_rate": 0.00016617790811339196, "loss": 1.2848, "step": 5987 }, { "epoch": 0.9373825923606762, "grad_norm": 1.884332537651062, "learning_rate": 0.00016615347018572824, "loss": 0.8169, "step": 5988 }, { "epoch": 0.9375391358797746, "grad_norm": 2.335824728012085, "learning_rate": 0.0001661290322580645, "loss": 1.0861, "step": 5989 }, { "epoch": 0.9376956793988729, "grad_norm": 2.0490598678588867, "learning_rate": 0.00016610459433040077, "loss": 0.7576, "step": 5990 }, { "epoch": 0.9378522229179712, "grad_norm": 1.8792589902877808, "learning_rate": 0.00016608015640273704, "loss": 1.1315, "step": 5991 }, { "epoch": 0.9380087664370695, "grad_norm": 2.0781798362731934, "learning_rate": 0.0001660557184750733, "loss": 0.4345, "step": 5992 }, { "epoch": 0.9381653099561679, "grad_norm": 1.9264535903930664, "learning_rate": 0.00016603128054740957, "loss": 0.8727, "step": 5993 }, { "epoch": 0.9383218534752661, "grad_norm": 2.6917431354522705, "learning_rate": 0.00016600684261974585, "loss": 0.7562, "step": 5994 }, { "epoch": 0.9384783969943644, "grad_norm": 2.0042667388916016, "learning_rate": 0.00016598240469208208, "loss": 0.9416, "step": 5995 }, { "epoch": 0.9386349405134627, "grad_norm": 3.084071397781372, "learning_rate": 0.00016595796676441835, "loss": 0.9873, "step": 5996 }, { "epoch": 0.9387914840325611, "grad_norm": 2.0100440979003906, "learning_rate": 0.00016593352883675463, "loss": 0.7261, "step": 5997 }, { "epoch": 0.9389480275516594, "grad_norm": 2.626487970352173, "learning_rate": 0.00016590909090909088, "loss": 0.9508, "step": 5998 }, { "epoch": 0.9391045710707576, "grad_norm": 4.288865089416504, "learning_rate": 0.00016588465298142716, "loss": 1.564, "step": 5999 }, { "epoch": 0.939261114589856, "grad_norm": 6.611445903778076, "learning_rate": 0.00016586021505376344, "loss": 1.4826, "step": 6000 }, { "epoch": 0.939261114589856, "eval_loss": 0.580058753490448, "eval_runtime": 206.0189, "eval_samples_per_second": 60.106, "eval_steps_per_second": 3.757, "eval_wer": 0.35448498629321884, "step": 6000 }, { "epoch": 0.9394176581089543, "grad_norm": 0.8797839283943176, "learning_rate": 0.0001658357771260997, "loss": 0.2883, "step": 6001 }, { "epoch": 0.9395742016280526, "grad_norm": 0.6887585520744324, "learning_rate": 0.00016581133919843594, "loss": 0.3189, "step": 6002 }, { "epoch": 0.9397307451471509, "grad_norm": 0.4749625325202942, "learning_rate": 0.00016578690127077222, "loss": 0.3, "step": 6003 }, { "epoch": 0.9398872886662493, "grad_norm": 0.7343231439590454, "learning_rate": 0.00016576246334310847, "loss": 0.2879, "step": 6004 }, { "epoch": 0.9400438321853475, "grad_norm": 0.631679892539978, "learning_rate": 0.00016573802541544475, "loss": 0.4164, "step": 6005 }, { "epoch": 0.9402003757044458, "grad_norm": 1.4770556688308716, "learning_rate": 0.00016571358748778103, "loss": 0.3442, "step": 6006 }, { "epoch": 0.9403569192235441, "grad_norm": 0.5342082977294922, "learning_rate": 0.00016568914956011728, "loss": 0.2752, "step": 6007 }, { "epoch": 0.9405134627426425, "grad_norm": 0.7217116355895996, "learning_rate": 0.00016566471163245356, "loss": 0.3402, "step": 6008 }, { "epoch": 0.9406700062617408, "grad_norm": 0.3949846625328064, "learning_rate": 0.00016564027370478984, "loss": 0.249, "step": 6009 }, { "epoch": 0.9408265497808391, "grad_norm": 0.6596114635467529, "learning_rate": 0.00016561583577712606, "loss": 0.3487, "step": 6010 }, { "epoch": 0.9409830932999373, "grad_norm": 0.5770506858825684, "learning_rate": 0.00016559139784946234, "loss": 0.3152, "step": 6011 }, { "epoch": 0.9411396368190357, "grad_norm": 0.6533299088478088, "learning_rate": 0.00016556695992179862, "loss": 0.3005, "step": 6012 }, { "epoch": 0.941296180338134, "grad_norm": 1.0162473917007446, "learning_rate": 0.00016554252199413487, "loss": 0.5086, "step": 6013 }, { "epoch": 0.9414527238572323, "grad_norm": 4.207003116607666, "learning_rate": 0.00016551808406647115, "loss": 0.4517, "step": 6014 }, { "epoch": 0.9416092673763307, "grad_norm": 1.691664695739746, "learning_rate": 0.00016549364613880743, "loss": 0.4643, "step": 6015 }, { "epoch": 0.9417658108954289, "grad_norm": 1.8534256219863892, "learning_rate": 0.00016546920821114368, "loss": 0.4732, "step": 6016 }, { "epoch": 0.9419223544145272, "grad_norm": 1.1564279794692993, "learning_rate": 0.00016544477028347996, "loss": 0.4391, "step": 6017 }, { "epoch": 0.9420788979336255, "grad_norm": 1.8195252418518066, "learning_rate": 0.00016542033235581623, "loss": 0.7114, "step": 6018 }, { "epoch": 0.9422354414527239, "grad_norm": 1.6135766506195068, "learning_rate": 0.00016539589442815246, "loss": 0.6024, "step": 6019 }, { "epoch": 0.9423919849718222, "grad_norm": 2.4532477855682373, "learning_rate": 0.00016537145650048874, "loss": 0.5552, "step": 6020 }, { "epoch": 0.9425485284909205, "grad_norm": 0.8328799605369568, "learning_rate": 0.00016534701857282501, "loss": 0.3148, "step": 6021 }, { "epoch": 0.9427050720100187, "grad_norm": 1.1934763193130493, "learning_rate": 0.00016532258064516127, "loss": 0.4579, "step": 6022 }, { "epoch": 0.9428616155291171, "grad_norm": 1.3018206357955933, "learning_rate": 0.00016529814271749754, "loss": 0.4106, "step": 6023 }, { "epoch": 0.9430181590482154, "grad_norm": 2.117647647857666, "learning_rate": 0.00016527370478983382, "loss": 0.8004, "step": 6024 }, { "epoch": 0.9431747025673137, "grad_norm": 1.9799567461013794, "learning_rate": 0.00016524926686217005, "loss": 0.4452, "step": 6025 }, { "epoch": 0.9433312460864121, "grad_norm": 1.8844330310821533, "learning_rate": 0.00016522482893450632, "loss": 0.7205, "step": 6026 }, { "epoch": 0.9434877896055104, "grad_norm": 2.3716540336608887, "learning_rate": 0.0001652003910068426, "loss": 0.7866, "step": 6027 }, { "epoch": 0.9436443331246086, "grad_norm": 1.5887513160705566, "learning_rate": 0.00016517595307917885, "loss": 0.8434, "step": 6028 }, { "epoch": 0.9438008766437069, "grad_norm": 1.6907641887664795, "learning_rate": 0.00016515151515151513, "loss": 0.8462, "step": 6029 }, { "epoch": 0.9439574201628053, "grad_norm": 4.261603355407715, "learning_rate": 0.0001651270772238514, "loss": 0.732, "step": 6030 }, { "epoch": 0.9441139636819036, "grad_norm": 2.2813000679016113, "learning_rate": 0.00016510263929618766, "loss": 1.302, "step": 6031 }, { "epoch": 0.9442705072010019, "grad_norm": 1.3129147291183472, "learning_rate": 0.00016507820136852394, "loss": 0.7006, "step": 6032 }, { "epoch": 0.9444270507201001, "grad_norm": 3.804840564727783, "learning_rate": 0.00016505376344086022, "loss": 1.5246, "step": 6033 }, { "epoch": 0.9445835942391985, "grad_norm": 6.360110759735107, "learning_rate": 0.00016502932551319644, "loss": 0.651, "step": 6034 }, { "epoch": 0.9447401377582968, "grad_norm": 1.6431469917297363, "learning_rate": 0.00016500488758553272, "loss": 0.8854, "step": 6035 }, { "epoch": 0.9448966812773951, "grad_norm": 2.868042469024658, "learning_rate": 0.000164980449657869, "loss": 1.4713, "step": 6036 }, { "epoch": 0.9450532247964935, "grad_norm": 1.8416922092437744, "learning_rate": 0.00016495601173020525, "loss": 1.0, "step": 6037 }, { "epoch": 0.9452097683155918, "grad_norm": 2.1547200679779053, "learning_rate": 0.00016493157380254153, "loss": 1.1594, "step": 6038 }, { "epoch": 0.94536631183469, "grad_norm": 3.3095543384552, "learning_rate": 0.0001649071358748778, "loss": 1.5753, "step": 6039 }, { "epoch": 0.9455228553537883, "grad_norm": 3.2236099243164062, "learning_rate": 0.00016488269794721406, "loss": 1.079, "step": 6040 }, { "epoch": 0.9456793988728867, "grad_norm": 1.9828205108642578, "learning_rate": 0.00016485826001955034, "loss": 0.7609, "step": 6041 }, { "epoch": 0.945835942391985, "grad_norm": 2.30930757522583, "learning_rate": 0.00016483382209188662, "loss": 1.2285, "step": 6042 }, { "epoch": 0.9459924859110833, "grad_norm": 2.148881435394287, "learning_rate": 0.00016480938416422284, "loss": 1.3376, "step": 6043 }, { "epoch": 0.9461490294301816, "grad_norm": 2.2145028114318848, "learning_rate": 0.00016478494623655912, "loss": 1.3568, "step": 6044 }, { "epoch": 0.9463055729492799, "grad_norm": 3.270275831222534, "learning_rate": 0.0001647605083088954, "loss": 1.349, "step": 6045 }, { "epoch": 0.9464621164683782, "grad_norm": 1.849869728088379, "learning_rate": 0.00016473607038123165, "loss": 0.9733, "step": 6046 }, { "epoch": 0.9466186599874765, "grad_norm": 2.6626498699188232, "learning_rate": 0.00016471163245356793, "loss": 0.6026, "step": 6047 }, { "epoch": 0.9467752035065748, "grad_norm": 2.1792116165161133, "learning_rate": 0.0001646871945259042, "loss": 0.9302, "step": 6048 }, { "epoch": 0.9469317470256732, "grad_norm": 4.216753959655762, "learning_rate": 0.00016466275659824043, "loss": 1.1798, "step": 6049 }, { "epoch": 0.9470882905447714, "grad_norm": 1.9251540899276733, "learning_rate": 0.0001646383186705767, "loss": 0.8717, "step": 6050 }, { "epoch": 0.9472448340638697, "grad_norm": 0.6738594174385071, "learning_rate": 0.00016461388074291299, "loss": 0.4177, "step": 6051 }, { "epoch": 0.947401377582968, "grad_norm": 0.584973156452179, "learning_rate": 0.00016458944281524924, "loss": 0.3434, "step": 6052 }, { "epoch": 0.9475579211020664, "grad_norm": 0.5951725840568542, "learning_rate": 0.00016456500488758552, "loss": 0.3325, "step": 6053 }, { "epoch": 0.9477144646211647, "grad_norm": 0.6660220623016357, "learning_rate": 0.0001645405669599218, "loss": 0.2946, "step": 6054 }, { "epoch": 0.947871008140263, "grad_norm": 0.7421219348907471, "learning_rate": 0.00016451612903225804, "loss": 0.416, "step": 6055 }, { "epoch": 0.9480275516593613, "grad_norm": 0.5771187543869019, "learning_rate": 0.00016449169110459432, "loss": 0.288, "step": 6056 }, { "epoch": 0.9481840951784596, "grad_norm": 0.9346206784248352, "learning_rate": 0.0001644672531769306, "loss": 0.4272, "step": 6057 }, { "epoch": 0.9483406386975579, "grad_norm": 0.5371845364570618, "learning_rate": 0.00016444281524926683, "loss": 0.3013, "step": 6058 }, { "epoch": 0.9484971822166562, "grad_norm": 0.6031778454780579, "learning_rate": 0.0001644183773216031, "loss": 0.2977, "step": 6059 }, { "epoch": 0.9486537257357546, "grad_norm": 0.7967322468757629, "learning_rate": 0.00016439393939393938, "loss": 0.2708, "step": 6060 }, { "epoch": 0.9488102692548529, "grad_norm": 0.7594047784805298, "learning_rate": 0.00016436950146627563, "loss": 0.3233, "step": 6061 }, { "epoch": 0.9489668127739511, "grad_norm": 0.7103455066680908, "learning_rate": 0.0001643450635386119, "loss": 0.4047, "step": 6062 }, { "epoch": 0.9491233562930494, "grad_norm": 0.8710353970527649, "learning_rate": 0.0001643206256109482, "loss": 0.3425, "step": 6063 }, { "epoch": 0.9492798998121478, "grad_norm": 0.7795736193656921, "learning_rate": 0.00016429618768328444, "loss": 0.4108, "step": 6064 }, { "epoch": 0.9494364433312461, "grad_norm": 1.0512408018112183, "learning_rate": 0.00016427174975562072, "loss": 0.4573, "step": 6065 }, { "epoch": 0.9495929868503444, "grad_norm": 2.8175127506256104, "learning_rate": 0.00016424731182795697, "loss": 0.5165, "step": 6066 }, { "epoch": 0.9497495303694427, "grad_norm": 1.3486829996109009, "learning_rate": 0.00016422287390029322, "loss": 0.5704, "step": 6067 }, { "epoch": 0.949906073888541, "grad_norm": 1.54037344455719, "learning_rate": 0.0001641984359726295, "loss": 0.4442, "step": 6068 }, { "epoch": 0.9500626174076393, "grad_norm": 0.9929664134979248, "learning_rate": 0.00016417399804496578, "loss": 0.5064, "step": 6069 }, { "epoch": 0.9502191609267376, "grad_norm": 1.431089997291565, "learning_rate": 0.00016414956011730203, "loss": 0.5722, "step": 6070 }, { "epoch": 0.950375704445836, "grad_norm": 1.4068636894226074, "learning_rate": 0.0001641251221896383, "loss": 0.6155, "step": 6071 }, { "epoch": 0.9505322479649343, "grad_norm": 1.600339651107788, "learning_rate": 0.0001641006842619746, "loss": 0.5878, "step": 6072 }, { "epoch": 0.9506887914840325, "grad_norm": 1.6295877695083618, "learning_rate": 0.0001640762463343108, "loss": 0.6764, "step": 6073 }, { "epoch": 0.9508453350031308, "grad_norm": 1.2888362407684326, "learning_rate": 0.0001640518084066471, "loss": 0.5213, "step": 6074 }, { "epoch": 0.9510018785222292, "grad_norm": 1.4205604791641235, "learning_rate": 0.00016402737047898337, "loss": 0.6027, "step": 6075 }, { "epoch": 0.9511584220413275, "grad_norm": 1.4100985527038574, "learning_rate": 0.00016400293255131962, "loss": 0.6991, "step": 6076 }, { "epoch": 0.9513149655604258, "grad_norm": 2.3864524364471436, "learning_rate": 0.0001639784946236559, "loss": 0.6062, "step": 6077 }, { "epoch": 0.9514715090795242, "grad_norm": 1.8597694635391235, "learning_rate": 0.00016395405669599218, "loss": 0.5013, "step": 6078 }, { "epoch": 0.9516280525986224, "grad_norm": 1.8252570629119873, "learning_rate": 0.00016392961876832843, "loss": 0.8579, "step": 6079 }, { "epoch": 0.9517845961177207, "grad_norm": 1.7800219058990479, "learning_rate": 0.0001639051808406647, "loss": 0.6946, "step": 6080 }, { "epoch": 0.951941139636819, "grad_norm": 1.8645308017730713, "learning_rate": 0.00016388074291300098, "loss": 1.0379, "step": 6081 }, { "epoch": 0.9520976831559174, "grad_norm": 2.3539228439331055, "learning_rate": 0.0001638563049853372, "loss": 0.9241, "step": 6082 }, { "epoch": 0.9522542266750157, "grad_norm": 5.200006484985352, "learning_rate": 0.00016383186705767349, "loss": 1.2359, "step": 6083 }, { "epoch": 0.9524107701941139, "grad_norm": 1.8422434329986572, "learning_rate": 0.00016380742913000976, "loss": 1.3175, "step": 6084 }, { "epoch": 0.9525673137132122, "grad_norm": 3.778296709060669, "learning_rate": 0.00016378299120234602, "loss": 0.979, "step": 6085 }, { "epoch": 0.9527238572323106, "grad_norm": 2.5928831100463867, "learning_rate": 0.0001637585532746823, "loss": 1.2131, "step": 6086 }, { "epoch": 0.9528804007514089, "grad_norm": 0.75013267993927, "learning_rate": 0.00016373411534701857, "loss": 0.6146, "step": 6087 }, { "epoch": 0.9530369442705072, "grad_norm": 3.9112038612365723, "learning_rate": 0.00016370967741935482, "loss": 0.7088, "step": 6088 }, { "epoch": 0.9531934877896056, "grad_norm": 1.6564382314682007, "learning_rate": 0.0001636852394916911, "loss": 1.0111, "step": 6089 }, { "epoch": 0.9533500313087038, "grad_norm": 4.030745506286621, "learning_rate": 0.00016366080156402735, "loss": 1.3784, "step": 6090 }, { "epoch": 0.9535065748278021, "grad_norm": 3.7477824687957764, "learning_rate": 0.0001636363636363636, "loss": 1.2618, "step": 6091 }, { "epoch": 0.9536631183469004, "grad_norm": 4.694358825683594, "learning_rate": 0.00016361192570869988, "loss": 1.029, "step": 6092 }, { "epoch": 0.9538196618659988, "grad_norm": 2.626904010772705, "learning_rate": 0.00016358748778103616, "loss": 1.1996, "step": 6093 }, { "epoch": 0.9539762053850971, "grad_norm": 1.2721118927001953, "learning_rate": 0.0001635630498533724, "loss": 0.6772, "step": 6094 }, { "epoch": 0.9541327489041954, "grad_norm": 2.771449327468872, "learning_rate": 0.0001635386119257087, "loss": 1.4234, "step": 6095 }, { "epoch": 0.9542892924232936, "grad_norm": 3.96138858795166, "learning_rate": 0.00016351417399804497, "loss": 1.236, "step": 6096 }, { "epoch": 0.954445835942392, "grad_norm": 1.7881155014038086, "learning_rate": 0.0001634897360703812, "loss": 0.3081, "step": 6097 }, { "epoch": 0.9546023794614903, "grad_norm": 3.0272154808044434, "learning_rate": 0.00016346529814271747, "loss": 1.5357, "step": 6098 }, { "epoch": 0.9547589229805886, "grad_norm": 1.5907299518585205, "learning_rate": 0.00016344086021505375, "loss": 0.485, "step": 6099 }, { "epoch": 0.954915466499687, "grad_norm": 2.836285352706909, "learning_rate": 0.00016341642228739, "loss": 1.5678, "step": 6100 }, { "epoch": 0.9550720100187852, "grad_norm": 0.5653170347213745, "learning_rate": 0.00016339198435972628, "loss": 0.3355, "step": 6101 }, { "epoch": 0.9552285535378835, "grad_norm": 0.646906316280365, "learning_rate": 0.00016336754643206256, "loss": 0.3155, "step": 6102 }, { "epoch": 0.9553850970569818, "grad_norm": 0.7139442563056946, "learning_rate": 0.0001633431085043988, "loss": 0.3738, "step": 6103 }, { "epoch": 0.9555416405760802, "grad_norm": 0.612605094909668, "learning_rate": 0.0001633186705767351, "loss": 0.2725, "step": 6104 }, { "epoch": 0.9556981840951785, "grad_norm": 0.4651452898979187, "learning_rate": 0.00016329423264907137, "loss": 0.291, "step": 6105 }, { "epoch": 0.9558547276142768, "grad_norm": 0.6952741146087646, "learning_rate": 0.0001632697947214076, "loss": 0.2871, "step": 6106 }, { "epoch": 0.956011271133375, "grad_norm": 0.5448809266090393, "learning_rate": 0.00016324535679374387, "loss": 0.2545, "step": 6107 }, { "epoch": 0.9561678146524734, "grad_norm": 0.9530249834060669, "learning_rate": 0.00016322091886608015, "loss": 0.3625, "step": 6108 }, { "epoch": 0.9563243581715717, "grad_norm": 0.5093350410461426, "learning_rate": 0.0001631964809384164, "loss": 0.3335, "step": 6109 }, { "epoch": 0.95648090169067, "grad_norm": 0.7554910778999329, "learning_rate": 0.00016317204301075268, "loss": 0.3436, "step": 6110 }, { "epoch": 0.9566374452097683, "grad_norm": 0.7633286118507385, "learning_rate": 0.00016314760508308895, "loss": 0.3616, "step": 6111 }, { "epoch": 0.9567939887288667, "grad_norm": 0.7461150288581848, "learning_rate": 0.0001631231671554252, "loss": 0.3472, "step": 6112 }, { "epoch": 0.9569505322479649, "grad_norm": 1.1075865030288696, "learning_rate": 0.00016309872922776146, "loss": 0.4187, "step": 6113 }, { "epoch": 0.9571070757670632, "grad_norm": 1.0741170644760132, "learning_rate": 0.00016307429130009774, "loss": 0.5136, "step": 6114 }, { "epoch": 0.9572636192861615, "grad_norm": 0.8717575073242188, "learning_rate": 0.000163049853372434, "loss": 0.317, "step": 6115 }, { "epoch": 0.9574201628052599, "grad_norm": 2.2658355236053467, "learning_rate": 0.00016302541544477027, "loss": 0.4035, "step": 6116 }, { "epoch": 0.9575767063243582, "grad_norm": 1.5720932483673096, "learning_rate": 0.00016300097751710654, "loss": 0.7772, "step": 6117 }, { "epoch": 0.9577332498434565, "grad_norm": 2.046311140060425, "learning_rate": 0.0001629765395894428, "loss": 0.4958, "step": 6118 }, { "epoch": 0.9578897933625548, "grad_norm": 1.5623672008514404, "learning_rate": 0.00016295210166177907, "loss": 0.6284, "step": 6119 }, { "epoch": 0.9580463368816531, "grad_norm": 2.6061389446258545, "learning_rate": 0.00016292766373411535, "loss": 0.9757, "step": 6120 }, { "epoch": 0.9582028804007514, "grad_norm": 1.6166800260543823, "learning_rate": 0.00016290322580645158, "loss": 0.5318, "step": 6121 }, { "epoch": 0.9583594239198497, "grad_norm": 1.0401281118392944, "learning_rate": 0.00016287878787878785, "loss": 0.6959, "step": 6122 }, { "epoch": 0.9585159674389481, "grad_norm": 2.1870529651641846, "learning_rate": 0.00016285434995112413, "loss": 0.6678, "step": 6123 }, { "epoch": 0.9586725109580463, "grad_norm": 0.9855921864509583, "learning_rate": 0.00016282991202346038, "loss": 0.3948, "step": 6124 }, { "epoch": 0.9588290544771446, "grad_norm": 1.3813424110412598, "learning_rate": 0.00016280547409579666, "loss": 1.0385, "step": 6125 }, { "epoch": 0.9589855979962429, "grad_norm": 1.2955299615859985, "learning_rate": 0.00016278103616813294, "loss": 0.4331, "step": 6126 }, { "epoch": 0.9591421415153413, "grad_norm": 2.7574148178100586, "learning_rate": 0.0001627565982404692, "loss": 1.1984, "step": 6127 }, { "epoch": 0.9592986850344396, "grad_norm": 2.4210774898529053, "learning_rate": 0.00016273216031280547, "loss": 0.7977, "step": 6128 }, { "epoch": 0.9594552285535379, "grad_norm": 4.516207695007324, "learning_rate": 0.00016270772238514175, "loss": 0.9785, "step": 6129 }, { "epoch": 0.9596117720726361, "grad_norm": 1.7034456729888916, "learning_rate": 0.00016268328445747797, "loss": 0.8274, "step": 6130 }, { "epoch": 0.9597683155917345, "grad_norm": 1.6179447174072266, "learning_rate": 0.00016265884652981425, "loss": 0.8648, "step": 6131 }, { "epoch": 0.9599248591108328, "grad_norm": 1.179610013961792, "learning_rate": 0.00016263440860215053, "loss": 0.49, "step": 6132 }, { "epoch": 0.9600814026299311, "grad_norm": 2.1187167167663574, "learning_rate": 0.00016260997067448678, "loss": 0.9443, "step": 6133 }, { "epoch": 0.9602379461490295, "grad_norm": 2.0016579627990723, "learning_rate": 0.00016258553274682306, "loss": 0.9851, "step": 6134 }, { "epoch": 0.9603944896681278, "grad_norm": 2.0684750080108643, "learning_rate": 0.00016256109481915934, "loss": 0.9574, "step": 6135 }, { "epoch": 0.960551033187226, "grad_norm": 2.0303995609283447, "learning_rate": 0.0001625366568914956, "loss": 1.1024, "step": 6136 }, { "epoch": 0.9607075767063243, "grad_norm": 3.4601995944976807, "learning_rate": 0.00016251221896383184, "loss": 0.6025, "step": 6137 }, { "epoch": 0.9608641202254227, "grad_norm": 3.948913097381592, "learning_rate": 0.00016248778103616812, "loss": 0.6272, "step": 6138 }, { "epoch": 0.961020663744521, "grad_norm": 9.379945755004883, "learning_rate": 0.00016246334310850437, "loss": 1.3707, "step": 6139 }, { "epoch": 0.9611772072636193, "grad_norm": 1.712647557258606, "learning_rate": 0.00016243890518084065, "loss": 1.0695, "step": 6140 }, { "epoch": 0.9613337507827175, "grad_norm": 2.5404181480407715, "learning_rate": 0.00016241446725317693, "loss": 1.3814, "step": 6141 }, { "epoch": 0.9614902943018159, "grad_norm": 1.7662317752838135, "learning_rate": 0.00016239002932551318, "loss": 1.2451, "step": 6142 }, { "epoch": 0.9616468378209142, "grad_norm": 3.7080023288726807, "learning_rate": 0.00016236559139784946, "loss": 1.4876, "step": 6143 }, { "epoch": 0.9618033813400125, "grad_norm": 2.5112531185150146, "learning_rate": 0.00016234115347018573, "loss": 1.0143, "step": 6144 }, { "epoch": 0.9619599248591109, "grad_norm": 2.770322322845459, "learning_rate": 0.00016231671554252196, "loss": 1.2868, "step": 6145 }, { "epoch": 0.9621164683782092, "grad_norm": 1.875613808631897, "learning_rate": 0.00016229227761485824, "loss": 1.1641, "step": 6146 }, { "epoch": 0.9622730118973074, "grad_norm": 3.305671215057373, "learning_rate": 0.00016226783968719451, "loss": 0.8963, "step": 6147 }, { "epoch": 0.9624295554164057, "grad_norm": 1.5809119939804077, "learning_rate": 0.00016224340175953077, "loss": 0.6262, "step": 6148 }, { "epoch": 0.9625860989355041, "grad_norm": 2.6327903270721436, "learning_rate": 0.00016221896383186704, "loss": 0.4588, "step": 6149 }, { "epoch": 0.9627426424546024, "grad_norm": 1.2948060035705566, "learning_rate": 0.00016219452590420332, "loss": 1.0183, "step": 6150 }, { "epoch": 0.9628991859737007, "grad_norm": 0.6153829097747803, "learning_rate": 0.00016217008797653957, "loss": 0.2905, "step": 6151 }, { "epoch": 0.963055729492799, "grad_norm": 0.7002934813499451, "learning_rate": 0.00016214565004887585, "loss": 0.3681, "step": 6152 }, { "epoch": 0.9632122730118973, "grad_norm": 0.4247320294380188, "learning_rate": 0.00016212121212121213, "loss": 0.2522, "step": 6153 }, { "epoch": 0.9633688165309956, "grad_norm": 1.023521900177002, "learning_rate": 0.00016209677419354835, "loss": 0.4101, "step": 6154 }, { "epoch": 0.9635253600500939, "grad_norm": 0.9346298575401306, "learning_rate": 0.00016207233626588463, "loss": 0.4846, "step": 6155 }, { "epoch": 0.9636819035691923, "grad_norm": 1.02970290184021, "learning_rate": 0.0001620478983382209, "loss": 0.526, "step": 6156 }, { "epoch": 0.9638384470882906, "grad_norm": 0.6798651218414307, "learning_rate": 0.00016202346041055716, "loss": 0.346, "step": 6157 }, { "epoch": 0.9639949906073888, "grad_norm": 0.7402242422103882, "learning_rate": 0.00016199902248289344, "loss": 0.3458, "step": 6158 }, { "epoch": 0.9641515341264871, "grad_norm": 0.9456111788749695, "learning_rate": 0.00016197458455522972, "loss": 0.328, "step": 6159 }, { "epoch": 0.9643080776455855, "grad_norm": 0.9947827458381653, "learning_rate": 0.00016195014662756597, "loss": 0.4049, "step": 6160 }, { "epoch": 0.9644646211646838, "grad_norm": 0.9443894028663635, "learning_rate": 0.00016192570869990222, "loss": 0.2766, "step": 6161 }, { "epoch": 0.9646211646837821, "grad_norm": 0.9167144298553467, "learning_rate": 0.0001619012707722385, "loss": 0.4539, "step": 6162 }, { "epoch": 0.9647777082028804, "grad_norm": 1.195816993713379, "learning_rate": 0.00016187683284457475, "loss": 0.608, "step": 6163 }, { "epoch": 0.9649342517219787, "grad_norm": 1.4440490007400513, "learning_rate": 0.00016185239491691103, "loss": 0.5446, "step": 6164 }, { "epoch": 0.965090795241077, "grad_norm": 0.7405814528465271, "learning_rate": 0.0001618279569892473, "loss": 0.3242, "step": 6165 }, { "epoch": 0.9652473387601753, "grad_norm": 1.427616000175476, "learning_rate": 0.00016180351906158356, "loss": 0.4028, "step": 6166 }, { "epoch": 0.9654038822792737, "grad_norm": 1.6844470500946045, "learning_rate": 0.00016177908113391984, "loss": 0.6549, "step": 6167 }, { "epoch": 0.965560425798372, "grad_norm": 2.0406758785247803, "learning_rate": 0.00016175464320625612, "loss": 0.4393, "step": 6168 }, { "epoch": 0.9657169693174703, "grad_norm": 1.3769197463989258, "learning_rate": 0.00016173020527859234, "loss": 0.4588, "step": 6169 }, { "epoch": 0.9658735128365685, "grad_norm": 1.0385569334030151, "learning_rate": 0.00016170576735092862, "loss": 0.4175, "step": 6170 }, { "epoch": 0.9660300563556669, "grad_norm": 1.2535983324050903, "learning_rate": 0.0001616813294232649, "loss": 0.4402, "step": 6171 }, { "epoch": 0.9661865998747652, "grad_norm": 1.0669677257537842, "learning_rate": 0.00016165689149560115, "loss": 0.3781, "step": 6172 }, { "epoch": 0.9663431433938635, "grad_norm": 1.9178048372268677, "learning_rate": 0.00016163245356793743, "loss": 0.5489, "step": 6173 }, { "epoch": 0.9664996869129618, "grad_norm": 1.401336908340454, "learning_rate": 0.0001616080156402737, "loss": 0.6525, "step": 6174 }, { "epoch": 0.9666562304320601, "grad_norm": 1.5251128673553467, "learning_rate": 0.00016158357771260996, "loss": 0.8831, "step": 6175 }, { "epoch": 0.9668127739511584, "grad_norm": 2.3394248485565186, "learning_rate": 0.00016155913978494623, "loss": 0.906, "step": 6176 }, { "epoch": 0.9669693174702567, "grad_norm": 1.7218315601348877, "learning_rate": 0.0001615347018572825, "loss": 0.7642, "step": 6177 }, { "epoch": 0.967125860989355, "grad_norm": 1.3168402910232544, "learning_rate": 0.00016151026392961874, "loss": 0.6152, "step": 6178 }, { "epoch": 0.9672824045084534, "grad_norm": 1.864134430885315, "learning_rate": 0.00016148582600195502, "loss": 1.0541, "step": 6179 }, { "epoch": 0.9674389480275517, "grad_norm": 2.145655870437622, "learning_rate": 0.0001614613880742913, "loss": 0.9664, "step": 6180 }, { "epoch": 0.9675954915466499, "grad_norm": 1.7807810306549072, "learning_rate": 0.00016143695014662755, "loss": 0.7563, "step": 6181 }, { "epoch": 0.9677520350657483, "grad_norm": 2.374579906463623, "learning_rate": 0.00016141251221896382, "loss": 0.7937, "step": 6182 }, { "epoch": 0.9679085785848466, "grad_norm": 2.7262957096099854, "learning_rate": 0.0001613880742913001, "loss": 1.1843, "step": 6183 }, { "epoch": 0.9680651221039449, "grad_norm": 2.082878589630127, "learning_rate": 0.00016136363636363633, "loss": 1.443, "step": 6184 }, { "epoch": 0.9682216656230432, "grad_norm": 3.2135493755340576, "learning_rate": 0.0001613391984359726, "loss": 0.9447, "step": 6185 }, { "epoch": 0.9683782091421416, "grad_norm": 1.5901567935943604, "learning_rate": 0.00016131476050830888, "loss": 0.7122, "step": 6186 }, { "epoch": 0.9685347526612398, "grad_norm": 1.4855965375900269, "learning_rate": 0.00016129032258064513, "loss": 1.1293, "step": 6187 }, { "epoch": 0.9686912961803381, "grad_norm": 3.177372694015503, "learning_rate": 0.0001612658846529814, "loss": 1.2107, "step": 6188 }, { "epoch": 0.9688478396994364, "grad_norm": 2.5584800243377686, "learning_rate": 0.0001612414467253177, "loss": 0.9807, "step": 6189 }, { "epoch": 0.9690043832185348, "grad_norm": 1.333678960800171, "learning_rate": 0.00016121700879765394, "loss": 0.876, "step": 6190 }, { "epoch": 0.9691609267376331, "grad_norm": 2.623502254486084, "learning_rate": 0.00016119257086999022, "loss": 0.8509, "step": 6191 }, { "epoch": 0.9693174702567313, "grad_norm": 1.4758764505386353, "learning_rate": 0.0001611681329423265, "loss": 1.3429, "step": 6192 }, { "epoch": 0.9694740137758296, "grad_norm": 1.8458406925201416, "learning_rate": 0.00016114369501466272, "loss": 1.4386, "step": 6193 }, { "epoch": 0.969630557294928, "grad_norm": 1.940486192703247, "learning_rate": 0.000161119257086999, "loss": 0.7314, "step": 6194 }, { "epoch": 0.9697871008140263, "grad_norm": 2.4906039237976074, "learning_rate": 0.00016109481915933528, "loss": 1.3775, "step": 6195 }, { "epoch": 0.9699436443331246, "grad_norm": 1.3360508680343628, "learning_rate": 0.00016107038123167153, "loss": 0.4396, "step": 6196 }, { "epoch": 0.970100187852223, "grad_norm": 3.2190940380096436, "learning_rate": 0.0001610459433040078, "loss": 0.7913, "step": 6197 }, { "epoch": 0.9702567313713212, "grad_norm": 1.5043747425079346, "learning_rate": 0.0001610215053763441, "loss": 0.5736, "step": 6198 }, { "epoch": 0.9704132748904195, "grad_norm": 2.0739946365356445, "learning_rate": 0.00016099706744868034, "loss": 0.775, "step": 6199 }, { "epoch": 0.9705698184095178, "grad_norm": 3.4089341163635254, "learning_rate": 0.00016097262952101662, "loss": 1.0023, "step": 6200 }, { "epoch": 0.9707263619286162, "grad_norm": 0.49759766459465027, "learning_rate": 0.0001609481915933529, "loss": 0.3349, "step": 6201 }, { "epoch": 0.9708829054477145, "grad_norm": 0.44160133600234985, "learning_rate": 0.00016092375366568912, "loss": 0.2845, "step": 6202 }, { "epoch": 0.9710394489668128, "grad_norm": 0.6713477969169617, "learning_rate": 0.0001608993157380254, "loss": 0.3082, "step": 6203 }, { "epoch": 0.971195992485911, "grad_norm": 0.48395052552223206, "learning_rate": 0.00016087487781036168, "loss": 0.2274, "step": 6204 }, { "epoch": 0.9713525360050094, "grad_norm": 0.5824487805366516, "learning_rate": 0.00016085043988269793, "loss": 0.3094, "step": 6205 }, { "epoch": 0.9715090795241077, "grad_norm": 0.6694998145103455, "learning_rate": 0.0001608260019550342, "loss": 0.2379, "step": 6206 }, { "epoch": 0.971665623043206, "grad_norm": 0.701755166053772, "learning_rate": 0.00016080156402737048, "loss": 0.2673, "step": 6207 }, { "epoch": 0.9718221665623044, "grad_norm": 0.9201617240905762, "learning_rate": 0.0001607771260997067, "loss": 0.403, "step": 6208 }, { "epoch": 0.9719787100814026, "grad_norm": 0.9466022849082947, "learning_rate": 0.000160752688172043, "loss": 0.3029, "step": 6209 }, { "epoch": 0.9721352536005009, "grad_norm": 1.3907926082611084, "learning_rate": 0.00016072825024437927, "loss": 0.3822, "step": 6210 }, { "epoch": 0.9722917971195992, "grad_norm": 0.9519656300544739, "learning_rate": 0.00016070381231671552, "loss": 0.5414, "step": 6211 }, { "epoch": 0.9724483406386976, "grad_norm": 1.2506569623947144, "learning_rate": 0.0001606793743890518, "loss": 0.5634, "step": 6212 }, { "epoch": 0.9726048841577959, "grad_norm": 0.7912557125091553, "learning_rate": 0.00016065493646138807, "loss": 0.4921, "step": 6213 }, { "epoch": 0.9727614276768942, "grad_norm": 1.3480660915374756, "learning_rate": 0.00016063049853372432, "loss": 0.3606, "step": 6214 }, { "epoch": 0.9729179711959924, "grad_norm": 1.24032461643219, "learning_rate": 0.0001606060606060606, "loss": 0.4046, "step": 6215 }, { "epoch": 0.9730745147150908, "grad_norm": 1.4148228168487549, "learning_rate": 0.00016058162267839688, "loss": 0.6278, "step": 6216 }, { "epoch": 0.9732310582341891, "grad_norm": 2.0846927165985107, "learning_rate": 0.0001605571847507331, "loss": 0.6703, "step": 6217 }, { "epoch": 0.9733876017532874, "grad_norm": 2.4335579872131348, "learning_rate": 0.00016053274682306938, "loss": 0.6913, "step": 6218 }, { "epoch": 0.9735441452723858, "grad_norm": 1.5219314098358154, "learning_rate": 0.00016050830889540566, "loss": 0.4337, "step": 6219 }, { "epoch": 0.9737006887914841, "grad_norm": 3.503244400024414, "learning_rate": 0.0001604838709677419, "loss": 0.3213, "step": 6220 }, { "epoch": 0.9738572323105823, "grad_norm": 1.0507615804672241, "learning_rate": 0.0001604594330400782, "loss": 0.3756, "step": 6221 }, { "epoch": 0.9740137758296806, "grad_norm": 1.3683404922485352, "learning_rate": 0.00016043499511241447, "loss": 0.808, "step": 6222 }, { "epoch": 0.974170319348779, "grad_norm": 3.0311946868896484, "learning_rate": 0.00016041055718475072, "loss": 0.7998, "step": 6223 }, { "epoch": 0.9743268628678773, "grad_norm": 1.0792346000671387, "learning_rate": 0.000160386119257087, "loss": 0.473, "step": 6224 }, { "epoch": 0.9744834063869756, "grad_norm": 1.5082753896713257, "learning_rate": 0.00016036168132942325, "loss": 0.4845, "step": 6225 }, { "epoch": 0.9746399499060739, "grad_norm": 1.629244089126587, "learning_rate": 0.0001603372434017595, "loss": 0.4439, "step": 6226 }, { "epoch": 0.9747964934251722, "grad_norm": 2.6280176639556885, "learning_rate": 0.00016031280547409578, "loss": 0.7095, "step": 6227 }, { "epoch": 0.9749530369442705, "grad_norm": 1.1485744714736938, "learning_rate": 0.00016028836754643206, "loss": 0.6459, "step": 6228 }, { "epoch": 0.9751095804633688, "grad_norm": 3.294517755508423, "learning_rate": 0.0001602639296187683, "loss": 0.4601, "step": 6229 }, { "epoch": 0.9752661239824671, "grad_norm": 2.4806206226348877, "learning_rate": 0.0001602394916911046, "loss": 0.9412, "step": 6230 }, { "epoch": 0.9754226675015655, "grad_norm": 1.6524327993392944, "learning_rate": 0.00016021505376344087, "loss": 0.6169, "step": 6231 }, { "epoch": 0.9755792110206637, "grad_norm": 1.4262551069259644, "learning_rate": 0.0001601906158357771, "loss": 0.7836, "step": 6232 }, { "epoch": 0.975735754539762, "grad_norm": 1.37894868850708, "learning_rate": 0.00016016617790811337, "loss": 0.3791, "step": 6233 }, { "epoch": 0.9758922980588604, "grad_norm": 3.260524272918701, "learning_rate": 0.00016014173998044965, "loss": 1.2778, "step": 6234 }, { "epoch": 0.9760488415779587, "grad_norm": 1.1274797916412354, "learning_rate": 0.0001601173020527859, "loss": 0.5008, "step": 6235 }, { "epoch": 0.976205385097057, "grad_norm": 3.6673662662506104, "learning_rate": 0.00016009286412512218, "loss": 1.2082, "step": 6236 }, { "epoch": 0.9763619286161553, "grad_norm": 2.9804086685180664, "learning_rate": 0.00016006842619745846, "loss": 0.8466, "step": 6237 }, { "epoch": 0.9765184721352536, "grad_norm": 2.351480007171631, "learning_rate": 0.0001600439882697947, "loss": 1.1366, "step": 6238 }, { "epoch": 0.9766750156543519, "grad_norm": 2.855189561843872, "learning_rate": 0.00016001955034213098, "loss": 0.8895, "step": 6239 }, { "epoch": 0.9768315591734502, "grad_norm": 3.6381757259368896, "learning_rate": 0.00015999511241446726, "loss": 1.5563, "step": 6240 }, { "epoch": 0.9769881026925485, "grad_norm": 1.6015832424163818, "learning_rate": 0.0001599706744868035, "loss": 1.0543, "step": 6241 }, { "epoch": 0.9771446462116469, "grad_norm": 1.6288787126541138, "learning_rate": 0.00015994623655913977, "loss": 0.8863, "step": 6242 }, { "epoch": 0.9773011897307452, "grad_norm": 3.075251579284668, "learning_rate": 0.00015992179863147604, "loss": 1.1518, "step": 6243 }, { "epoch": 0.9774577332498434, "grad_norm": 1.8009350299835205, "learning_rate": 0.0001598973607038123, "loss": 1.1322, "step": 6244 }, { "epoch": 0.9776142767689417, "grad_norm": 1.083701491355896, "learning_rate": 0.00015987292277614857, "loss": 0.5735, "step": 6245 }, { "epoch": 0.9777708202880401, "grad_norm": 3.801417589187622, "learning_rate": 0.00015984848484848485, "loss": 1.359, "step": 6246 }, { "epoch": 0.9779273638071384, "grad_norm": 3.566204309463501, "learning_rate": 0.0001598240469208211, "loss": 1.027, "step": 6247 }, { "epoch": 0.9780839073262367, "grad_norm": 1.9416583776474, "learning_rate": 0.00015979960899315738, "loss": 1.3133, "step": 6248 }, { "epoch": 0.978240450845335, "grad_norm": 4.796366214752197, "learning_rate": 0.00015977517106549363, "loss": 1.1721, "step": 6249 }, { "epoch": 0.9783969943644333, "grad_norm": 2.206047296524048, "learning_rate": 0.00015975073313782988, "loss": 1.2208, "step": 6250 }, { "epoch": 0.9785535378835316, "grad_norm": 0.7884498834609985, "learning_rate": 0.00015972629521016616, "loss": 0.3513, "step": 6251 }, { "epoch": 0.9787100814026299, "grad_norm": 0.5849615931510925, "learning_rate": 0.00015970185728250244, "loss": 0.2797, "step": 6252 }, { "epoch": 0.9788666249217283, "grad_norm": 0.5109837651252747, "learning_rate": 0.0001596774193548387, "loss": 0.3183, "step": 6253 }, { "epoch": 0.9790231684408266, "grad_norm": 0.5798192620277405, "learning_rate": 0.00015965298142717497, "loss": 0.2323, "step": 6254 }, { "epoch": 0.9791797119599248, "grad_norm": 0.5868884921073914, "learning_rate": 0.00015962854349951125, "loss": 0.259, "step": 6255 }, { "epoch": 0.9793362554790231, "grad_norm": 0.7878360748291016, "learning_rate": 0.00015960410557184747, "loss": 0.2945, "step": 6256 }, { "epoch": 0.9794927989981215, "grad_norm": 0.6286665201187134, "learning_rate": 0.00015957966764418375, "loss": 0.2745, "step": 6257 }, { "epoch": 0.9796493425172198, "grad_norm": 1.0530362129211426, "learning_rate": 0.00015955522971652003, "loss": 0.4147, "step": 6258 }, { "epoch": 0.9798058860363181, "grad_norm": 1.6230087280273438, "learning_rate": 0.00015953079178885628, "loss": 0.3675, "step": 6259 }, { "epoch": 0.9799624295554165, "grad_norm": 0.9370027184486389, "learning_rate": 0.00015950635386119256, "loss": 0.4494, "step": 6260 }, { "epoch": 0.9801189730745147, "grad_norm": 0.6508410573005676, "learning_rate": 0.00015948191593352884, "loss": 0.3247, "step": 6261 }, { "epoch": 0.980275516593613, "grad_norm": 0.7678089141845703, "learning_rate": 0.0001594574780058651, "loss": 0.4053, "step": 6262 }, { "epoch": 0.9804320601127113, "grad_norm": 1.5006393194198608, "learning_rate": 0.00015943304007820137, "loss": 0.4773, "step": 6263 }, { "epoch": 0.9805886036318097, "grad_norm": 1.076756238937378, "learning_rate": 0.00015940860215053765, "loss": 0.4695, "step": 6264 }, { "epoch": 0.980745147150908, "grad_norm": 1.24697744846344, "learning_rate": 0.00015938416422287387, "loss": 0.4956, "step": 6265 }, { "epoch": 0.9809016906700062, "grad_norm": 1.1420774459838867, "learning_rate": 0.00015935972629521015, "loss": 0.4614, "step": 6266 }, { "epoch": 0.9810582341891045, "grad_norm": 1.9122599363327026, "learning_rate": 0.00015933528836754643, "loss": 0.512, "step": 6267 }, { "epoch": 0.9812147777082029, "grad_norm": 1.2293574810028076, "learning_rate": 0.00015931085043988268, "loss": 0.4518, "step": 6268 }, { "epoch": 0.9813713212273012, "grad_norm": 2.1564345359802246, "learning_rate": 0.00015928641251221896, "loss": 0.5496, "step": 6269 }, { "epoch": 0.9815278647463995, "grad_norm": 1.1697052717208862, "learning_rate": 0.00015926197458455523, "loss": 0.6363, "step": 6270 }, { "epoch": 0.9816844082654979, "grad_norm": 1.1160368919372559, "learning_rate": 0.00015923753665689149, "loss": 0.3766, "step": 6271 }, { "epoch": 0.9818409517845961, "grad_norm": 1.3110209703445435, "learning_rate": 0.00015921309872922774, "loss": 0.7845, "step": 6272 }, { "epoch": 0.9819974953036944, "grad_norm": 1.1179189682006836, "learning_rate": 0.00015918866080156402, "loss": 0.3716, "step": 6273 }, { "epoch": 0.9821540388227927, "grad_norm": 0.8737648129463196, "learning_rate": 0.00015916422287390027, "loss": 0.4372, "step": 6274 }, { "epoch": 0.9823105823418911, "grad_norm": 1.6135969161987305, "learning_rate": 0.00015913978494623654, "loss": 0.7372, "step": 6275 }, { "epoch": 0.9824671258609894, "grad_norm": 2.5550239086151123, "learning_rate": 0.00015911534701857282, "loss": 0.713, "step": 6276 }, { "epoch": 0.9826236693800877, "grad_norm": 2.2706096172332764, "learning_rate": 0.00015909090909090907, "loss": 0.967, "step": 6277 }, { "epoch": 0.9827802128991859, "grad_norm": 2.025991678237915, "learning_rate": 0.00015906647116324535, "loss": 0.706, "step": 6278 }, { "epoch": 0.9829367564182843, "grad_norm": 1.7579374313354492, "learning_rate": 0.00015904203323558163, "loss": 0.5235, "step": 6279 }, { "epoch": 0.9830932999373826, "grad_norm": 1.4392457008361816, "learning_rate": 0.00015901759530791786, "loss": 0.608, "step": 6280 }, { "epoch": 0.9832498434564809, "grad_norm": 1.8196216821670532, "learning_rate": 0.00015899315738025413, "loss": 1.2177, "step": 6281 }, { "epoch": 0.9834063869755792, "grad_norm": 4.727586269378662, "learning_rate": 0.0001589687194525904, "loss": 0.6056, "step": 6282 }, { "epoch": 0.9835629304946775, "grad_norm": 1.8476710319519043, "learning_rate": 0.00015894428152492666, "loss": 0.7133, "step": 6283 }, { "epoch": 0.9837194740137758, "grad_norm": 2.2918593883514404, "learning_rate": 0.00015891984359726294, "loss": 1.0066, "step": 6284 }, { "epoch": 0.9838760175328741, "grad_norm": 1.7023167610168457, "learning_rate": 0.00015889540566959922, "loss": 0.7832, "step": 6285 }, { "epoch": 0.9840325610519725, "grad_norm": 1.6587579250335693, "learning_rate": 0.00015887096774193547, "loss": 0.8247, "step": 6286 }, { "epoch": 0.9841891045710708, "grad_norm": 2.7378196716308594, "learning_rate": 0.00015884652981427175, "loss": 1.1478, "step": 6287 }, { "epoch": 0.9843456480901691, "grad_norm": 5.176706314086914, "learning_rate": 0.00015882209188660803, "loss": 0.6136, "step": 6288 }, { "epoch": 0.9845021916092673, "grad_norm": 2.3430936336517334, "learning_rate": 0.00015879765395894425, "loss": 0.8779, "step": 6289 }, { "epoch": 0.9846587351283657, "grad_norm": 2.579249382019043, "learning_rate": 0.00015877321603128053, "loss": 1.3216, "step": 6290 }, { "epoch": 0.984815278647464, "grad_norm": 2.4976139068603516, "learning_rate": 0.0001587487781036168, "loss": 1.1508, "step": 6291 }, { "epoch": 0.9849718221665623, "grad_norm": 1.357665777206421, "learning_rate": 0.00015872434017595306, "loss": 0.8853, "step": 6292 }, { "epoch": 0.9851283656856606, "grad_norm": 2.1828815937042236, "learning_rate": 0.00015869990224828934, "loss": 0.814, "step": 6293 }, { "epoch": 0.985284909204759, "grad_norm": 3.8649942874908447, "learning_rate": 0.00015867546432062562, "loss": 1.0242, "step": 6294 }, { "epoch": 0.9854414527238572, "grad_norm": 2.397716760635376, "learning_rate": 0.00015865102639296187, "loss": 1.4216, "step": 6295 }, { "epoch": 0.9855979962429555, "grad_norm": 2.9856514930725098, "learning_rate": 0.00015862658846529812, "loss": 0.9754, "step": 6296 }, { "epoch": 0.9857545397620538, "grad_norm": 4.637380123138428, "learning_rate": 0.0001586021505376344, "loss": 1.3687, "step": 6297 }, { "epoch": 0.9859110832811522, "grad_norm": 3.106740713119507, "learning_rate": 0.00015857771260997065, "loss": 1.0291, "step": 6298 }, { "epoch": 0.9860676268002505, "grad_norm": 2.850269317626953, "learning_rate": 0.00015855327468230693, "loss": 0.6318, "step": 6299 }, { "epoch": 0.9862241703193487, "grad_norm": 1.4052451848983765, "learning_rate": 0.0001585288367546432, "loss": 0.6223, "step": 6300 }, { "epoch": 0.986380713838447, "grad_norm": 0.5287259817123413, "learning_rate": 0.00015850439882697946, "loss": 0.3596, "step": 6301 }, { "epoch": 0.9865372573575454, "grad_norm": 0.48303574323654175, "learning_rate": 0.00015847996089931574, "loss": 0.2857, "step": 6302 }, { "epoch": 0.9866938008766437, "grad_norm": 0.5090639591217041, "learning_rate": 0.000158455522971652, "loss": 0.3439, "step": 6303 }, { "epoch": 0.986850344395742, "grad_norm": 0.6524388790130615, "learning_rate": 0.00015843108504398824, "loss": 0.2502, "step": 6304 }, { "epoch": 0.9870068879148404, "grad_norm": 0.5750242471694946, "learning_rate": 0.00015840664711632452, "loss": 0.2383, "step": 6305 }, { "epoch": 0.9871634314339386, "grad_norm": 1.1469411849975586, "learning_rate": 0.0001583822091886608, "loss": 0.3987, "step": 6306 }, { "epoch": 0.9873199749530369, "grad_norm": 0.6706929206848145, "learning_rate": 0.00015835777126099705, "loss": 0.4182, "step": 6307 }, { "epoch": 0.9874765184721352, "grad_norm": 0.9753531813621521, "learning_rate": 0.00015833333333333332, "loss": 0.3501, "step": 6308 }, { "epoch": 0.9876330619912336, "grad_norm": 0.6613764762878418, "learning_rate": 0.0001583088954056696, "loss": 0.3439, "step": 6309 }, { "epoch": 0.9877896055103319, "grad_norm": 0.5925769209861755, "learning_rate": 0.00015828445747800585, "loss": 0.3267, "step": 6310 }, { "epoch": 0.9879461490294302, "grad_norm": 1.1225662231445312, "learning_rate": 0.00015826001955034213, "loss": 0.2879, "step": 6311 }, { "epoch": 0.9881026925485284, "grad_norm": 1.3221330642700195, "learning_rate": 0.0001582355816226784, "loss": 0.517, "step": 6312 }, { "epoch": 0.9882592360676268, "grad_norm": 0.802402675151825, "learning_rate": 0.00015821114369501463, "loss": 0.4133, "step": 6313 }, { "epoch": 0.9884157795867251, "grad_norm": 1.222743272781372, "learning_rate": 0.0001581867057673509, "loss": 0.54, "step": 6314 }, { "epoch": 0.9885723231058234, "grad_norm": 0.8106583952903748, "learning_rate": 0.0001581622678396872, "loss": 0.4293, "step": 6315 }, { "epoch": 0.9887288666249218, "grad_norm": 1.7479103803634644, "learning_rate": 0.00015813782991202344, "loss": 0.5579, "step": 6316 }, { "epoch": 0.98888541014402, "grad_norm": 0.8788287043571472, "learning_rate": 0.00015811339198435972, "loss": 0.2549, "step": 6317 }, { "epoch": 0.9890419536631183, "grad_norm": 4.599166393280029, "learning_rate": 0.000158088954056696, "loss": 1.0898, "step": 6318 }, { "epoch": 0.9891984971822166, "grad_norm": 1.6982215642929077, "learning_rate": 0.00015806451612903225, "loss": 0.5713, "step": 6319 }, { "epoch": 0.989355040701315, "grad_norm": 1.5406105518341064, "learning_rate": 0.0001580400782013685, "loss": 0.3639, "step": 6320 }, { "epoch": 0.9895115842204133, "grad_norm": 0.9149010181427002, "learning_rate": 0.00015801564027370478, "loss": 0.4535, "step": 6321 }, { "epoch": 0.9896681277395116, "grad_norm": 2.5321741104125977, "learning_rate": 0.00015799120234604103, "loss": 1.0454, "step": 6322 }, { "epoch": 0.9898246712586098, "grad_norm": 1.6757642030715942, "learning_rate": 0.0001579667644183773, "loss": 0.6215, "step": 6323 }, { "epoch": 0.9899812147777082, "grad_norm": 4.210154056549072, "learning_rate": 0.0001579423264907136, "loss": 0.5722, "step": 6324 }, { "epoch": 0.9901377582968065, "grad_norm": 2.43436598777771, "learning_rate": 0.00015791788856304984, "loss": 0.558, "step": 6325 }, { "epoch": 0.9902943018159048, "grad_norm": 1.8142644166946411, "learning_rate": 0.00015789345063538612, "loss": 0.4534, "step": 6326 }, { "epoch": 0.9904508453350032, "grad_norm": 1.3764431476593018, "learning_rate": 0.0001578690127077224, "loss": 0.4666, "step": 6327 }, { "epoch": 0.9906073888541015, "grad_norm": 1.3100895881652832, "learning_rate": 0.00015784457478005862, "loss": 0.8198, "step": 6328 }, { "epoch": 0.9907639323731997, "grad_norm": 1.911555528640747, "learning_rate": 0.0001578201368523949, "loss": 0.7068, "step": 6329 }, { "epoch": 0.990920475892298, "grad_norm": 2.208777666091919, "learning_rate": 0.00015779569892473118, "loss": 0.8239, "step": 6330 }, { "epoch": 0.9910770194113964, "grad_norm": 2.607349157333374, "learning_rate": 0.00015777126099706743, "loss": 0.3357, "step": 6331 }, { "epoch": 0.9912335629304947, "grad_norm": 2.01904559135437, "learning_rate": 0.0001577468230694037, "loss": 0.6867, "step": 6332 }, { "epoch": 0.991390106449593, "grad_norm": 3.4520325660705566, "learning_rate": 0.00015772238514173998, "loss": 1.0985, "step": 6333 }, { "epoch": 0.9915466499686914, "grad_norm": 1.0999062061309814, "learning_rate": 0.00015769794721407624, "loss": 0.4234, "step": 6334 }, { "epoch": 0.9917031934877896, "grad_norm": 6.374632358551025, "learning_rate": 0.00015767350928641251, "loss": 0.865, "step": 6335 }, { "epoch": 0.9918597370068879, "grad_norm": 2.246000051498413, "learning_rate": 0.0001576490713587488, "loss": 0.8057, "step": 6336 }, { "epoch": 0.9920162805259862, "grad_norm": 2.388260841369629, "learning_rate": 0.00015762463343108502, "loss": 0.8103, "step": 6337 }, { "epoch": 0.9921728240450846, "grad_norm": 2.392380952835083, "learning_rate": 0.0001576001955034213, "loss": 1.3118, "step": 6338 }, { "epoch": 0.9923293675641829, "grad_norm": 2.8762900829315186, "learning_rate": 0.00015757575757575757, "loss": 1.4997, "step": 6339 }, { "epoch": 0.9924859110832811, "grad_norm": 2.6236648559570312, "learning_rate": 0.00015755131964809382, "loss": 1.0189, "step": 6340 }, { "epoch": 0.9926424546023794, "grad_norm": 2.0896835327148438, "learning_rate": 0.0001575268817204301, "loss": 1.0106, "step": 6341 }, { "epoch": 0.9927989981214778, "grad_norm": 1.5402086973190308, "learning_rate": 0.00015750244379276638, "loss": 0.7675, "step": 6342 }, { "epoch": 0.9929555416405761, "grad_norm": 5.9841179847717285, "learning_rate": 0.0001574780058651026, "loss": 1.0019, "step": 6343 }, { "epoch": 0.9931120851596744, "grad_norm": 2.565718412399292, "learning_rate": 0.00015745356793743888, "loss": 1.1758, "step": 6344 }, { "epoch": 0.9932686286787727, "grad_norm": 2.391500473022461, "learning_rate": 0.00015742913000977516, "loss": 1.1052, "step": 6345 }, { "epoch": 0.993425172197871, "grad_norm": 1.9955416917800903, "learning_rate": 0.0001574046920821114, "loss": 0.9472, "step": 6346 }, { "epoch": 0.9935817157169693, "grad_norm": 2.492854356765747, "learning_rate": 0.0001573802541544477, "loss": 0.6311, "step": 6347 }, { "epoch": 0.9937382592360676, "grad_norm": 1.4435681104660034, "learning_rate": 0.00015735581622678397, "loss": 0.3773, "step": 6348 }, { "epoch": 0.993894802755166, "grad_norm": 3.8596134185791016, "learning_rate": 0.00015733137829912022, "loss": 0.9125, "step": 6349 }, { "epoch": 0.9940513462742643, "grad_norm": 2.726219415664673, "learning_rate": 0.0001573069403714565, "loss": 1.5958, "step": 6350 }, { "epoch": 0.9942078897933626, "grad_norm": 0.686896562576294, "learning_rate": 0.00015728250244379278, "loss": 0.3087, "step": 6351 }, { "epoch": 0.9943644333124608, "grad_norm": 0.6529414057731628, "learning_rate": 0.000157258064516129, "loss": 0.3721, "step": 6352 }, { "epoch": 0.9945209768315592, "grad_norm": 0.6401044726371765, "learning_rate": 0.00015723362658846528, "loss": 0.3123, "step": 6353 }, { "epoch": 0.9946775203506575, "grad_norm": 0.6423803567886353, "learning_rate": 0.00015720918866080156, "loss": 0.2896, "step": 6354 }, { "epoch": 0.9948340638697558, "grad_norm": 0.576648473739624, "learning_rate": 0.0001571847507331378, "loss": 0.2859, "step": 6355 }, { "epoch": 0.9949906073888541, "grad_norm": 0.7672856450080872, "learning_rate": 0.0001571603128054741, "loss": 0.3422, "step": 6356 }, { "epoch": 0.9951471509079524, "grad_norm": 1.0555219650268555, "learning_rate": 0.00015713587487781037, "loss": 0.294, "step": 6357 }, { "epoch": 0.9953036944270507, "grad_norm": 0.8917555212974548, "learning_rate": 0.00015711143695014662, "loss": 0.3567, "step": 6358 }, { "epoch": 0.995460237946149, "grad_norm": 0.863348126411438, "learning_rate": 0.0001570869990224829, "loss": 0.3465, "step": 6359 }, { "epoch": 0.9956167814652473, "grad_norm": 1.0300931930541992, "learning_rate": 0.00015706256109481917, "loss": 0.4107, "step": 6360 }, { "epoch": 0.9957733249843457, "grad_norm": 2.140244483947754, "learning_rate": 0.0001570381231671554, "loss": 0.5715, "step": 6361 }, { "epoch": 0.995929868503444, "grad_norm": 1.4137368202209473, "learning_rate": 0.00015701368523949168, "loss": 0.6483, "step": 6362 }, { "epoch": 0.9960864120225422, "grad_norm": 1.0269410610198975, "learning_rate": 0.00015698924731182796, "loss": 0.4882, "step": 6363 }, { "epoch": 0.9962429555416406, "grad_norm": 2.0102436542510986, "learning_rate": 0.0001569648093841642, "loss": 0.4823, "step": 6364 }, { "epoch": 0.9963994990607389, "grad_norm": 2.118197441101074, "learning_rate": 0.00015694037145650049, "loss": 0.5051, "step": 6365 }, { "epoch": 0.9965560425798372, "grad_norm": 1.0090569257736206, "learning_rate": 0.00015691593352883676, "loss": 0.2953, "step": 6366 }, { "epoch": 0.9967125860989355, "grad_norm": 2.38740873336792, "learning_rate": 0.000156891495601173, "loss": 0.6598, "step": 6367 }, { "epoch": 0.9968691296180339, "grad_norm": 1.2782179117202759, "learning_rate": 0.00015686705767350927, "loss": 0.5072, "step": 6368 }, { "epoch": 0.9970256731371321, "grad_norm": 1.9059410095214844, "learning_rate": 0.00015684261974584554, "loss": 0.5702, "step": 6369 }, { "epoch": 0.9971822166562304, "grad_norm": 2.062696933746338, "learning_rate": 0.0001568181818181818, "loss": 0.6943, "step": 6370 }, { "epoch": 0.9973387601753287, "grad_norm": 1.2728642225265503, "learning_rate": 0.00015679374389051807, "loss": 0.5178, "step": 6371 }, { "epoch": 0.9974953036944271, "grad_norm": 2.6865758895874023, "learning_rate": 0.00015676930596285435, "loss": 1.1807, "step": 6372 }, { "epoch": 0.9976518472135254, "grad_norm": 2.504348039627075, "learning_rate": 0.0001567448680351906, "loss": 1.0252, "step": 6373 }, { "epoch": 0.9978083907326236, "grad_norm": 3.697204351425171, "learning_rate": 0.00015672043010752688, "loss": 1.1543, "step": 6374 }, { "epoch": 0.997964934251722, "grad_norm": 1.612819790840149, "learning_rate": 0.00015669599217986316, "loss": 0.8737, "step": 6375 }, { "epoch": 0.9981214777708203, "grad_norm": 3.1941633224487305, "learning_rate": 0.00015667155425219938, "loss": 0.8725, "step": 6376 }, { "epoch": 0.9982780212899186, "grad_norm": 2.2976832389831543, "learning_rate": 0.00015664711632453566, "loss": 1.1326, "step": 6377 }, { "epoch": 0.9984345648090169, "grad_norm": 3.410414457321167, "learning_rate": 0.00015662267839687194, "loss": 0.5624, "step": 6378 }, { "epoch": 0.9985911083281153, "grad_norm": 3.318864107131958, "learning_rate": 0.0001565982404692082, "loss": 0.9388, "step": 6379 }, { "epoch": 0.9987476518472135, "grad_norm": 1.7484426498413086, "learning_rate": 0.00015657380254154447, "loss": 0.9887, "step": 6380 }, { "epoch": 0.9989041953663118, "grad_norm": 3.5230112075805664, "learning_rate": 0.00015654936461388075, "loss": 1.4972, "step": 6381 }, { "epoch": 0.9990607388854101, "grad_norm": 5.785534381866455, "learning_rate": 0.000156524926686217, "loss": 1.1754, "step": 6382 }, { "epoch": 0.9992172824045085, "grad_norm": 2.6626009941101074, "learning_rate": 0.00015650048875855328, "loss": 1.2539, "step": 6383 }, { "epoch": 0.9993738259236068, "grad_norm": 1.2295653820037842, "learning_rate": 0.00015647605083088953, "loss": 1.0544, "step": 6384 }, { "epoch": 0.9995303694427051, "grad_norm": 1.510327935218811, "learning_rate": 0.00015645161290322578, "loss": 0.5378, "step": 6385 }, { "epoch": 0.9996869129618033, "grad_norm": 1.7747565507888794, "learning_rate": 0.00015642717497556206, "loss": 0.8904, "step": 6386 }, { "epoch": 0.9998434564809017, "grad_norm": 6.381943225860596, "learning_rate": 0.00015640273704789834, "loss": 1.7167, "step": 6387 }, { "epoch": 1.0, "grad_norm": 2.6135759353637695, "learning_rate": 0.0001563782991202346, "loss": 1.0733, "step": 6388 }, { "epoch": 1.0001565435190982, "grad_norm": 0.6401867866516113, "learning_rate": 0.00015635386119257087, "loss": 0.3242, "step": 6389 }, { "epoch": 1.0003130870381967, "grad_norm": 0.46278223395347595, "learning_rate": 0.00015632942326490715, "loss": 0.2188, "step": 6390 }, { "epoch": 1.0004696305572949, "grad_norm": 0.5220637917518616, "learning_rate": 0.00015630498533724337, "loss": 0.2602, "step": 6391 }, { "epoch": 1.0006261740763933, "grad_norm": 0.6111396551132202, "learning_rate": 0.00015628054740957965, "loss": 0.3363, "step": 6392 }, { "epoch": 1.0007827175954915, "grad_norm": 0.6467208862304688, "learning_rate": 0.00015625610948191593, "loss": 0.2684, "step": 6393 }, { "epoch": 1.0009392611145898, "grad_norm": 0.5766490697860718, "learning_rate": 0.00015623167155425218, "loss": 0.2703, "step": 6394 }, { "epoch": 1.0010958046336882, "grad_norm": 0.6601141095161438, "learning_rate": 0.00015620723362658846, "loss": 0.3272, "step": 6395 }, { "epoch": 1.0012523481527864, "grad_norm": 0.61875981092453, "learning_rate": 0.0001561827956989247, "loss": 0.2942, "step": 6396 }, { "epoch": 1.0014088916718848, "grad_norm": 0.9664865732192993, "learning_rate": 0.00015615835777126099, "loss": 0.3894, "step": 6397 }, { "epoch": 1.001565435190983, "grad_norm": 0.5463569760322571, "learning_rate": 0.00015613391984359726, "loss": 0.1922, "step": 6398 }, { "epoch": 1.0017219787100815, "grad_norm": 1.0034434795379639, "learning_rate": 0.0001561094819159335, "loss": 0.3852, "step": 6399 }, { "epoch": 1.0018785222291797, "grad_norm": 0.9108190536499023, "learning_rate": 0.00015608504398826977, "loss": 0.3026, "step": 6400 }, { "epoch": 1.002035065748278, "grad_norm": 1.1288667917251587, "learning_rate": 0.00015606060606060605, "loss": 0.4889, "step": 6401 }, { "epoch": 1.0021916092673764, "grad_norm": 0.8430051207542419, "learning_rate": 0.0001560361681329423, "loss": 0.5495, "step": 6402 }, { "epoch": 1.0023481527864746, "grad_norm": 1.0893449783325195, "learning_rate": 0.00015601173020527857, "loss": 0.3109, "step": 6403 }, { "epoch": 1.002504696305573, "grad_norm": 1.9297972917556763, "learning_rate": 0.00015598729227761485, "loss": 0.5138, "step": 6404 }, { "epoch": 1.0026612398246713, "grad_norm": 1.0670981407165527, "learning_rate": 0.0001559628543499511, "loss": 0.3637, "step": 6405 }, { "epoch": 1.0028177833437695, "grad_norm": 1.307648777961731, "learning_rate": 0.00015593841642228738, "loss": 0.4379, "step": 6406 }, { "epoch": 1.002974326862868, "grad_norm": 1.2096155881881714, "learning_rate": 0.00015591397849462366, "loss": 0.4889, "step": 6407 }, { "epoch": 1.0031308703819661, "grad_norm": 1.7205982208251953, "learning_rate": 0.00015588954056695989, "loss": 0.2895, "step": 6408 }, { "epoch": 1.0032874139010646, "grad_norm": 1.0032813549041748, "learning_rate": 0.00015586510263929616, "loss": 0.4063, "step": 6409 }, { "epoch": 1.0034439574201628, "grad_norm": 2.1352264881134033, "learning_rate": 0.00015584066471163244, "loss": 0.4581, "step": 6410 }, { "epoch": 1.003600500939261, "grad_norm": 1.522994041442871, "learning_rate": 0.0001558162267839687, "loss": 0.4092, "step": 6411 }, { "epoch": 1.0037570444583594, "grad_norm": 1.0274169445037842, "learning_rate": 0.00015579178885630497, "loss": 0.3602, "step": 6412 }, { "epoch": 1.0039135879774577, "grad_norm": 1.5904802083969116, "learning_rate": 0.00015576735092864125, "loss": 0.7179, "step": 6413 }, { "epoch": 1.004070131496556, "grad_norm": 1.4016789197921753, "learning_rate": 0.00015574291300097747, "loss": 0.5017, "step": 6414 }, { "epoch": 1.0042266750156543, "grad_norm": 5.12330436706543, "learning_rate": 0.00015571847507331375, "loss": 0.8605, "step": 6415 }, { "epoch": 1.0043832185347528, "grad_norm": 2.253363609313965, "learning_rate": 0.00015569403714565003, "loss": 0.49, "step": 6416 }, { "epoch": 1.004539762053851, "grad_norm": 8.99915599822998, "learning_rate": 0.00015566959921798628, "loss": 0.6846, "step": 6417 }, { "epoch": 1.0046963055729492, "grad_norm": 2.108546018600464, "learning_rate": 0.00015564516129032256, "loss": 0.7329, "step": 6418 }, { "epoch": 1.0048528490920476, "grad_norm": 2.5911080837249756, "learning_rate": 0.00015562072336265884, "loss": 0.7537, "step": 6419 }, { "epoch": 1.0050093926111459, "grad_norm": 9.908422470092773, "learning_rate": 0.0001555962854349951, "loss": 1.2488, "step": 6420 }, { "epoch": 1.0051659361302443, "grad_norm": 2.6049554347991943, "learning_rate": 0.00015557184750733137, "loss": 0.872, "step": 6421 }, { "epoch": 1.0053224796493425, "grad_norm": 4.485533237457275, "learning_rate": 0.00015554740957966765, "loss": 1.3471, "step": 6422 }, { "epoch": 1.0054790231684407, "grad_norm": 3.122943162918091, "learning_rate": 0.00015552297165200387, "loss": 0.7273, "step": 6423 }, { "epoch": 1.0056355666875392, "grad_norm": 2.615659475326538, "learning_rate": 0.00015549853372434015, "loss": 1.014, "step": 6424 }, { "epoch": 1.0057921102066374, "grad_norm": 1.3930341005325317, "learning_rate": 0.00015547409579667643, "loss": 0.3932, "step": 6425 }, { "epoch": 1.0059486537257358, "grad_norm": 2.3153088092803955, "learning_rate": 0.00015544965786901268, "loss": 1.1018, "step": 6426 }, { "epoch": 1.006105197244834, "grad_norm": 4.314839839935303, "learning_rate": 0.00015542521994134896, "loss": 1.1748, "step": 6427 }, { "epoch": 1.0062617407639323, "grad_norm": 3.0916011333465576, "learning_rate": 0.00015540078201368524, "loss": 1.2124, "step": 6428 }, { "epoch": 1.0064182842830307, "grad_norm": 4.510741233825684, "learning_rate": 0.0001553763440860215, "loss": 1.0817, "step": 6429 }, { "epoch": 1.006574827802129, "grad_norm": 5.305693626403809, "learning_rate": 0.00015535190615835777, "loss": 1.6207, "step": 6430 }, { "epoch": 1.0067313713212274, "grad_norm": 2.222895383834839, "learning_rate": 0.00015532746823069402, "loss": 1.512, "step": 6431 }, { "epoch": 1.0068879148403256, "grad_norm": 3.7936456203460693, "learning_rate": 0.00015530303030303027, "loss": 1.712, "step": 6432 }, { "epoch": 1.007044458359424, "grad_norm": 1.699407696723938, "learning_rate": 0.00015527859237536655, "loss": 1.0667, "step": 6433 }, { "epoch": 1.0072010018785222, "grad_norm": 2.3237552642822266, "learning_rate": 0.00015525415444770282, "loss": 1.1835, "step": 6434 }, { "epoch": 1.0073575453976205, "grad_norm": 0.950505793094635, "learning_rate": 0.00015522971652003908, "loss": 0.3155, "step": 6435 }, { "epoch": 1.007514088916719, "grad_norm": 3.0012030601501465, "learning_rate": 0.00015520527859237535, "loss": 0.5834, "step": 6436 }, { "epoch": 1.0076706324358171, "grad_norm": 1.9200234413146973, "learning_rate": 0.00015518084066471163, "loss": 0.8096, "step": 6437 }, { "epoch": 1.0078271759549156, "grad_norm": 3.209449291229248, "learning_rate": 0.00015515640273704786, "loss": 1.6, "step": 6438 }, { "epoch": 1.0079837194740138, "grad_norm": 0.4958963990211487, "learning_rate": 0.00015513196480938413, "loss": 0.2785, "step": 6439 }, { "epoch": 1.008140262993112, "grad_norm": 0.461821585893631, "learning_rate": 0.0001551075268817204, "loss": 0.2305, "step": 6440 }, { "epoch": 1.0082968065122104, "grad_norm": 0.5686135292053223, "learning_rate": 0.00015508308895405666, "loss": 0.1765, "step": 6441 }, { "epoch": 1.0084533500313086, "grad_norm": 0.5753898024559021, "learning_rate": 0.00015505865102639294, "loss": 0.1702, "step": 6442 }, { "epoch": 1.008609893550407, "grad_norm": 0.2971111536026001, "learning_rate": 0.00015503421309872922, "loss": 0.1378, "step": 6443 }, { "epoch": 1.0087664370695053, "grad_norm": 0.6010518074035645, "learning_rate": 0.00015500977517106547, "loss": 0.2316, "step": 6444 }, { "epoch": 1.0089229805886035, "grad_norm": 0.7052372097969055, "learning_rate": 0.00015498533724340175, "loss": 0.1997, "step": 6445 }, { "epoch": 1.009079524107702, "grad_norm": 0.728542685508728, "learning_rate": 0.00015496089931573803, "loss": 0.2774, "step": 6446 }, { "epoch": 1.0092360676268002, "grad_norm": 0.6369313597679138, "learning_rate": 0.00015493646138807425, "loss": 0.2283, "step": 6447 }, { "epoch": 1.0093926111458986, "grad_norm": 0.7698948383331299, "learning_rate": 0.00015491202346041053, "loss": 0.336, "step": 6448 }, { "epoch": 1.0095491546649968, "grad_norm": 0.579069197177887, "learning_rate": 0.0001548875855327468, "loss": 0.275, "step": 6449 }, { "epoch": 1.0097056981840953, "grad_norm": 0.8608638644218445, "learning_rate": 0.00015486314760508306, "loss": 0.2888, "step": 6450 }, { "epoch": 1.0098622417031935, "grad_norm": 0.651512622833252, "learning_rate": 0.00015483870967741934, "loss": 0.2552, "step": 6451 }, { "epoch": 1.0100187852222917, "grad_norm": 1.6202661991119385, "learning_rate": 0.00015481427174975562, "loss": 0.5846, "step": 6452 }, { "epoch": 1.0101753287413902, "grad_norm": 1.3742421865463257, "learning_rate": 0.00015478983382209187, "loss": 0.29, "step": 6453 }, { "epoch": 1.0103318722604884, "grad_norm": 1.502272605895996, "learning_rate": 0.00015476539589442815, "loss": 0.4051, "step": 6454 }, { "epoch": 1.0104884157795868, "grad_norm": 0.8512781262397766, "learning_rate": 0.0001547409579667644, "loss": 0.4372, "step": 6455 }, { "epoch": 1.010644959298685, "grad_norm": 1.3594355583190918, "learning_rate": 0.00015471652003910065, "loss": 0.4414, "step": 6456 }, { "epoch": 1.0108015028177832, "grad_norm": 1.8934544324874878, "learning_rate": 0.00015469208211143693, "loss": 0.5294, "step": 6457 }, { "epoch": 1.0109580463368817, "grad_norm": 1.089806318283081, "learning_rate": 0.0001546676441837732, "loss": 0.2968, "step": 6458 }, { "epoch": 1.01111458985598, "grad_norm": 1.3797799348831177, "learning_rate": 0.00015464320625610946, "loss": 0.2608, "step": 6459 }, { "epoch": 1.0112711333750783, "grad_norm": 1.3458391427993774, "learning_rate": 0.00015461876832844574, "loss": 0.4816, "step": 6460 }, { "epoch": 1.0114276768941766, "grad_norm": 1.3416376113891602, "learning_rate": 0.00015459433040078201, "loss": 0.5628, "step": 6461 }, { "epoch": 1.0115842204132748, "grad_norm": 1.862665057182312, "learning_rate": 0.00015456989247311824, "loss": 0.4349, "step": 6462 }, { "epoch": 1.0117407639323732, "grad_norm": 1.254166603088379, "learning_rate": 0.00015454545454545452, "loss": 0.4393, "step": 6463 }, { "epoch": 1.0118973074514714, "grad_norm": 1.7121856212615967, "learning_rate": 0.0001545210166177908, "loss": 0.5046, "step": 6464 }, { "epoch": 1.0120538509705699, "grad_norm": 1.5799607038497925, "learning_rate": 0.00015449657869012705, "loss": 0.5843, "step": 6465 }, { "epoch": 1.012210394489668, "grad_norm": 2.453345775604248, "learning_rate": 0.00015447214076246333, "loss": 1.143, "step": 6466 }, { "epoch": 1.0123669380087665, "grad_norm": 2.5280091762542725, "learning_rate": 0.0001544477028347996, "loss": 0.6528, "step": 6467 }, { "epoch": 1.0125234815278648, "grad_norm": 1.8263561725616455, "learning_rate": 0.00015442326490713585, "loss": 0.8534, "step": 6468 }, { "epoch": 1.012680025046963, "grad_norm": 1.4700437784194946, "learning_rate": 0.00015439882697947213, "loss": 0.7371, "step": 6469 }, { "epoch": 1.0128365685660614, "grad_norm": 2.2002007961273193, "learning_rate": 0.0001543743890518084, "loss": 0.9834, "step": 6470 }, { "epoch": 1.0129931120851596, "grad_norm": 2.333889961242676, "learning_rate": 0.00015434995112414464, "loss": 0.7919, "step": 6471 }, { "epoch": 1.013149655604258, "grad_norm": 5.423836708068848, "learning_rate": 0.00015432551319648091, "loss": 0.8916, "step": 6472 }, { "epoch": 1.0133061991233563, "grad_norm": 1.438730001449585, "learning_rate": 0.0001543010752688172, "loss": 0.7162, "step": 6473 }, { "epoch": 1.0134627426424545, "grad_norm": 1.9986294507980347, "learning_rate": 0.00015427663734115344, "loss": 1.0373, "step": 6474 }, { "epoch": 1.013619286161553, "grad_norm": 4.634669303894043, "learning_rate": 0.00015425219941348972, "loss": 1.1194, "step": 6475 }, { "epoch": 1.0137758296806512, "grad_norm": 2.2568159103393555, "learning_rate": 0.000154227761485826, "loss": 0.9643, "step": 6476 }, { "epoch": 1.0139323731997496, "grad_norm": 1.3911081552505493, "learning_rate": 0.00015420332355816225, "loss": 0.6398, "step": 6477 }, { "epoch": 1.0140889167188478, "grad_norm": 2.0544090270996094, "learning_rate": 0.00015417888563049853, "loss": 1.4445, "step": 6478 }, { "epoch": 1.014245460237946, "grad_norm": 1.972231388092041, "learning_rate": 0.00015415444770283478, "loss": 0.7207, "step": 6479 }, { "epoch": 1.0144020037570445, "grad_norm": 2.4303290843963623, "learning_rate": 0.00015413000977517103, "loss": 1.3453, "step": 6480 }, { "epoch": 1.0145585472761427, "grad_norm": 5.570343017578125, "learning_rate": 0.0001541055718475073, "loss": 1.5838, "step": 6481 }, { "epoch": 1.0147150907952411, "grad_norm": 1.2938313484191895, "learning_rate": 0.0001540811339198436, "loss": 1.1335, "step": 6482 }, { "epoch": 1.0148716343143394, "grad_norm": 2.1745591163635254, "learning_rate": 0.00015405669599217984, "loss": 0.6899, "step": 6483 }, { "epoch": 1.0150281778334378, "grad_norm": 3.9206926822662354, "learning_rate": 0.00015403225806451612, "loss": 0.8181, "step": 6484 }, { "epoch": 1.015184721352536, "grad_norm": 3.452662706375122, "learning_rate": 0.0001540078201368524, "loss": 0.2675, "step": 6485 }, { "epoch": 1.0153412648716342, "grad_norm": 1.481506109237671, "learning_rate": 0.00015398338220918862, "loss": 0.7693, "step": 6486 }, { "epoch": 1.0154978083907327, "grad_norm": 3.136730670928955, "learning_rate": 0.0001539589442815249, "loss": 0.4043, "step": 6487 }, { "epoch": 1.0156543519098309, "grad_norm": 4.050405502319336, "learning_rate": 0.00015393450635386118, "loss": 1.0601, "step": 6488 }, { "epoch": 1.0158108954289293, "grad_norm": 0.4689655601978302, "learning_rate": 0.00015391006842619743, "loss": 0.312, "step": 6489 }, { "epoch": 1.0159674389480275, "grad_norm": 0.32348453998565674, "learning_rate": 0.0001538856304985337, "loss": 0.1755, "step": 6490 }, { "epoch": 1.0161239824671258, "grad_norm": 0.456193745136261, "learning_rate": 0.00015386119257086999, "loss": 0.2209, "step": 6491 }, { "epoch": 1.0162805259862242, "grad_norm": 0.6613138914108276, "learning_rate": 0.00015383675464320624, "loss": 0.3865, "step": 6492 }, { "epoch": 1.0164370695053224, "grad_norm": 0.45760709047317505, "learning_rate": 0.00015381231671554252, "loss": 0.2318, "step": 6493 }, { "epoch": 1.0165936130244209, "grad_norm": 0.4187608063220978, "learning_rate": 0.0001537878787878788, "loss": 0.2009, "step": 6494 }, { "epoch": 1.016750156543519, "grad_norm": 0.9235534071922302, "learning_rate": 0.00015376344086021502, "loss": 0.3943, "step": 6495 }, { "epoch": 1.0169067000626173, "grad_norm": 0.9163877367973328, "learning_rate": 0.0001537390029325513, "loss": 0.3704, "step": 6496 }, { "epoch": 1.0170632435817157, "grad_norm": 0.5193113684654236, "learning_rate": 0.00015371456500488757, "loss": 0.2252, "step": 6497 }, { "epoch": 1.017219787100814, "grad_norm": 0.8574435710906982, "learning_rate": 0.00015369012707722383, "loss": 0.3135, "step": 6498 }, { "epoch": 1.0173763306199124, "grad_norm": 1.2782654762268066, "learning_rate": 0.0001536656891495601, "loss": 0.3573, "step": 6499 }, { "epoch": 1.0175328741390106, "grad_norm": 4.555233955383301, "learning_rate": 0.00015364125122189638, "loss": 0.9878, "step": 6500 }, { "epoch": 1.017689417658109, "grad_norm": 0.6440590620040894, "learning_rate": 0.00015361681329423263, "loss": 0.212, "step": 6501 }, { "epoch": 1.0178459611772073, "grad_norm": 1.6863186359405518, "learning_rate": 0.00015359237536656889, "loss": 0.5993, "step": 6502 }, { "epoch": 1.0180025046963055, "grad_norm": 1.924282431602478, "learning_rate": 0.00015356793743890516, "loss": 0.3741, "step": 6503 }, { "epoch": 1.018159048215404, "grad_norm": 0.9348188638687134, "learning_rate": 0.00015354349951124141, "loss": 0.4833, "step": 6504 }, { "epoch": 1.0183155917345021, "grad_norm": 1.7575799226760864, "learning_rate": 0.0001535190615835777, "loss": 0.3112, "step": 6505 }, { "epoch": 1.0184721352536006, "grad_norm": 1.014967679977417, "learning_rate": 0.00015349462365591397, "loss": 0.4143, "step": 6506 }, { "epoch": 1.0186286787726988, "grad_norm": 1.0853968858718872, "learning_rate": 0.00015347018572825022, "loss": 0.4754, "step": 6507 }, { "epoch": 1.018785222291797, "grad_norm": 1.5835812091827393, "learning_rate": 0.0001534457478005865, "loss": 0.3803, "step": 6508 }, { "epoch": 1.0189417658108955, "grad_norm": 0.9677704572677612, "learning_rate": 0.00015342130987292278, "loss": 0.4094, "step": 6509 }, { "epoch": 1.0190983093299937, "grad_norm": 1.162556767463684, "learning_rate": 0.000153396871945259, "loss": 0.4622, "step": 6510 }, { "epoch": 1.0192548528490921, "grad_norm": 1.3809646368026733, "learning_rate": 0.00015337243401759528, "loss": 0.361, "step": 6511 }, { "epoch": 1.0194113963681903, "grad_norm": 1.8124253749847412, "learning_rate": 0.00015334799608993156, "loss": 0.4641, "step": 6512 }, { "epoch": 1.0195679398872888, "grad_norm": 1.3627941608428955, "learning_rate": 0.0001533235581622678, "loss": 0.3851, "step": 6513 }, { "epoch": 1.019724483406387, "grad_norm": 4.081453800201416, "learning_rate": 0.0001532991202346041, "loss": 1.1882, "step": 6514 }, { "epoch": 1.0198810269254852, "grad_norm": 1.3441665172576904, "learning_rate": 0.00015327468230694037, "loss": 0.4868, "step": 6515 }, { "epoch": 1.0200375704445837, "grad_norm": 3.412243366241455, "learning_rate": 0.00015325024437927662, "loss": 0.6887, "step": 6516 }, { "epoch": 1.0201941139636819, "grad_norm": 0.8700874447822571, "learning_rate": 0.0001532258064516129, "loss": 0.3311, "step": 6517 }, { "epoch": 1.0203506574827803, "grad_norm": 1.8043177127838135, "learning_rate": 0.00015320136852394918, "loss": 0.607, "step": 6518 }, { "epoch": 1.0205072010018785, "grad_norm": 2.9217605590820312, "learning_rate": 0.0001531769305962854, "loss": 0.8962, "step": 6519 }, { "epoch": 1.0206637445209767, "grad_norm": 1.854831337928772, "learning_rate": 0.00015315249266862168, "loss": 0.4919, "step": 6520 }, { "epoch": 1.0208202880400752, "grad_norm": 2.2908477783203125, "learning_rate": 0.00015312805474095796, "loss": 0.9187, "step": 6521 }, { "epoch": 1.0209768315591734, "grad_norm": 3.0967938899993896, "learning_rate": 0.0001531036168132942, "loss": 0.4282, "step": 6522 }, { "epoch": 1.0211333750782718, "grad_norm": 1.6227751970291138, "learning_rate": 0.0001530791788856305, "loss": 0.6401, "step": 6523 }, { "epoch": 1.02128991859737, "grad_norm": 1.9775352478027344, "learning_rate": 0.00015305474095796676, "loss": 1.1405, "step": 6524 }, { "epoch": 1.0214464621164683, "grad_norm": 2.77632212638855, "learning_rate": 0.00015303030303030302, "loss": 0.6276, "step": 6525 }, { "epoch": 1.0216030056355667, "grad_norm": 5.503850936889648, "learning_rate": 0.00015300586510263927, "loss": 1.3446, "step": 6526 }, { "epoch": 1.021759549154665, "grad_norm": 4.683707237243652, "learning_rate": 0.00015298142717497555, "loss": 1.1199, "step": 6527 }, { "epoch": 1.0219160926737634, "grad_norm": 2.9078924655914307, "learning_rate": 0.0001529569892473118, "loss": 1.3954, "step": 6528 }, { "epoch": 1.0220726361928616, "grad_norm": 1.9573535919189453, "learning_rate": 0.00015293255131964808, "loss": 0.5525, "step": 6529 }, { "epoch": 1.0222291797119598, "grad_norm": 2.339132308959961, "learning_rate": 0.00015290811339198435, "loss": 1.9003, "step": 6530 }, { "epoch": 1.0223857232310583, "grad_norm": 2.6952342987060547, "learning_rate": 0.0001528836754643206, "loss": 1.2449, "step": 6531 }, { "epoch": 1.0225422667501565, "grad_norm": 6.4942216873168945, "learning_rate": 0.00015285923753665688, "loss": 2.0955, "step": 6532 }, { "epoch": 1.022698810269255, "grad_norm": 2.6404001712799072, "learning_rate": 0.00015283479960899316, "loss": 1.3895, "step": 6533 }, { "epoch": 1.0228553537883531, "grad_norm": 1.230292558670044, "learning_rate": 0.00015281036168132939, "loss": 0.4467, "step": 6534 }, { "epoch": 1.0230118973074516, "grad_norm": 1.6380767822265625, "learning_rate": 0.00015278592375366566, "loss": 0.5228, "step": 6535 }, { "epoch": 1.0231684408265498, "grad_norm": 1.788313388824463, "learning_rate": 0.00015276148582600194, "loss": 0.4542, "step": 6536 }, { "epoch": 1.023324984345648, "grad_norm": 2.3561313152313232, "learning_rate": 0.0001527370478983382, "loss": 0.8112, "step": 6537 }, { "epoch": 1.0234815278647464, "grad_norm": 2.4350478649139404, "learning_rate": 0.00015271260997067447, "loss": 1.1279, "step": 6538 }, { "epoch": 1.0236380713838447, "grad_norm": 1.0181922912597656, "learning_rate": 0.00015268817204301075, "loss": 0.3027, "step": 6539 }, { "epoch": 1.023794614902943, "grad_norm": 1.0185344219207764, "learning_rate": 0.000152663734115347, "loss": 0.2241, "step": 6540 }, { "epoch": 1.0239511584220413, "grad_norm": 1.063295841217041, "learning_rate": 0.00015263929618768328, "loss": 0.3597, "step": 6541 }, { "epoch": 1.0241077019411395, "grad_norm": 0.785956621170044, "learning_rate": 0.00015261485826001956, "loss": 0.252, "step": 6542 }, { "epoch": 1.024264245460238, "grad_norm": 0.5662931203842163, "learning_rate": 0.00015259042033235578, "loss": 0.2251, "step": 6543 }, { "epoch": 1.0244207889793362, "grad_norm": 0.4908407926559448, "learning_rate": 0.00015256598240469206, "loss": 0.1643, "step": 6544 }, { "epoch": 1.0245773324984346, "grad_norm": 0.8578366041183472, "learning_rate": 0.00015254154447702834, "loss": 0.2976, "step": 6545 }, { "epoch": 1.0247338760175329, "grad_norm": 0.5953052639961243, "learning_rate": 0.0001525171065493646, "loss": 0.2358, "step": 6546 }, { "epoch": 1.0248904195366313, "grad_norm": 0.401744544506073, "learning_rate": 0.00015249266862170087, "loss": 0.1725, "step": 6547 }, { "epoch": 1.0250469630557295, "grad_norm": 0.6543681621551514, "learning_rate": 0.00015246823069403715, "loss": 0.3137, "step": 6548 }, { "epoch": 1.0252035065748277, "grad_norm": 0.9384623765945435, "learning_rate": 0.0001524437927663734, "loss": 0.2601, "step": 6549 }, { "epoch": 1.0253600500939262, "grad_norm": 1.2204899787902832, "learning_rate": 0.00015241935483870965, "loss": 0.3343, "step": 6550 }, { "epoch": 1.0255165936130244, "grad_norm": 0.8273910880088806, "learning_rate": 0.00015239491691104593, "loss": 0.3626, "step": 6551 }, { "epoch": 1.0256731371321228, "grad_norm": 0.8227601051330566, "learning_rate": 0.00015237047898338218, "loss": 0.2757, "step": 6552 }, { "epoch": 1.025829680651221, "grad_norm": 0.7043795585632324, "learning_rate": 0.00015234604105571846, "loss": 0.3225, "step": 6553 }, { "epoch": 1.0259862241703193, "grad_norm": 1.460331916809082, "learning_rate": 0.00015232160312805474, "loss": 0.2981, "step": 6554 }, { "epoch": 1.0261427676894177, "grad_norm": 1.7429531812667847, "learning_rate": 0.000152297165200391, "loss": 0.7302, "step": 6555 }, { "epoch": 1.026299311208516, "grad_norm": 0.9695984125137329, "learning_rate": 0.00015227272727272727, "loss": 0.4463, "step": 6556 }, { "epoch": 1.0264558547276144, "grad_norm": 0.9565237164497375, "learning_rate": 0.00015224828934506354, "loss": 0.4064, "step": 6557 }, { "epoch": 1.0266123982467126, "grad_norm": 1.8992421627044678, "learning_rate": 0.00015222385141739977, "loss": 0.3796, "step": 6558 }, { "epoch": 1.0267689417658108, "grad_norm": 1.4258291721343994, "learning_rate": 0.00015219941348973605, "loss": 0.3354, "step": 6559 }, { "epoch": 1.0269254852849092, "grad_norm": 2.1398561000823975, "learning_rate": 0.00015217497556207232, "loss": 0.4635, "step": 6560 }, { "epoch": 1.0270820288040075, "grad_norm": 1.383025884628296, "learning_rate": 0.00015215053763440858, "loss": 0.5687, "step": 6561 }, { "epoch": 1.027238572323106, "grad_norm": 1.5760375261306763, "learning_rate": 0.00015212609970674485, "loss": 0.5766, "step": 6562 }, { "epoch": 1.027395115842204, "grad_norm": 1.6190121173858643, "learning_rate": 0.00015210166177908113, "loss": 0.5502, "step": 6563 }, { "epoch": 1.0275516593613025, "grad_norm": 1.2204431295394897, "learning_rate": 0.00015207722385141738, "loss": 0.6046, "step": 6564 }, { "epoch": 1.0277082028804008, "grad_norm": 1.0577452182769775, "learning_rate": 0.00015205278592375366, "loss": 0.3985, "step": 6565 }, { "epoch": 1.027864746399499, "grad_norm": 1.111060619354248, "learning_rate": 0.00015202834799608994, "loss": 0.4528, "step": 6566 }, { "epoch": 1.0280212899185974, "grad_norm": 2.2486462593078613, "learning_rate": 0.00015200391006842616, "loss": 0.8267, "step": 6567 }, { "epoch": 1.0281778334376956, "grad_norm": 1.4202734231948853, "learning_rate": 0.00015197947214076244, "loss": 0.8305, "step": 6568 }, { "epoch": 1.028334376956794, "grad_norm": 2.8072428703308105, "learning_rate": 0.00015195503421309872, "loss": 0.7312, "step": 6569 }, { "epoch": 1.0284909204758923, "grad_norm": 3.127408504486084, "learning_rate": 0.00015193059628543497, "loss": 0.8986, "step": 6570 }, { "epoch": 1.0286474639949905, "grad_norm": 1.911171793937683, "learning_rate": 0.00015190615835777125, "loss": 0.8522, "step": 6571 }, { "epoch": 1.028804007514089, "grad_norm": 2.793549060821533, "learning_rate": 0.00015188172043010753, "loss": 0.9098, "step": 6572 }, { "epoch": 1.0289605510331872, "grad_norm": 1.8251924514770508, "learning_rate": 0.00015185728250244375, "loss": 1.2318, "step": 6573 }, { "epoch": 1.0291170945522856, "grad_norm": 7.607204437255859, "learning_rate": 0.00015183284457478003, "loss": 0.9744, "step": 6574 }, { "epoch": 1.0292736380713838, "grad_norm": 3.5871646404266357, "learning_rate": 0.0001518084066471163, "loss": 1.8618, "step": 6575 }, { "epoch": 1.029430181590482, "grad_norm": 3.6757612228393555, "learning_rate": 0.00015178396871945256, "loss": 1.4547, "step": 6576 }, { "epoch": 1.0295867251095805, "grad_norm": 2.2011477947235107, "learning_rate": 0.00015175953079178884, "loss": 0.8764, "step": 6577 }, { "epoch": 1.0297432686286787, "grad_norm": 2.396167278289795, "learning_rate": 0.00015173509286412512, "loss": 0.9784, "step": 6578 }, { "epoch": 1.0298998121477771, "grad_norm": 2.8914072513580322, "learning_rate": 0.00015171065493646137, "loss": 1.4503, "step": 6579 }, { "epoch": 1.0300563556668754, "grad_norm": 2.1105411052703857, "learning_rate": 0.00015168621700879765, "loss": 1.3224, "step": 6580 }, { "epoch": 1.0302128991859738, "grad_norm": 1.6223315000534058, "learning_rate": 0.00015166177908113393, "loss": 0.9473, "step": 6581 }, { "epoch": 1.030369442705072, "grad_norm": 2.1070399284362793, "learning_rate": 0.00015163734115347015, "loss": 1.2726, "step": 6582 }, { "epoch": 1.0305259862241702, "grad_norm": 2.866163492202759, "learning_rate": 0.00015161290322580643, "loss": 1.6022, "step": 6583 }, { "epoch": 1.0306825297432687, "grad_norm": 1.343387484550476, "learning_rate": 0.0001515884652981427, "loss": 0.6449, "step": 6584 }, { "epoch": 1.030839073262367, "grad_norm": 2.003796339035034, "learning_rate": 0.00015156402737047896, "loss": 0.6767, "step": 6585 }, { "epoch": 1.0309956167814653, "grad_norm": 1.329639196395874, "learning_rate": 0.00015153958944281524, "loss": 0.8415, "step": 6586 }, { "epoch": 1.0311521603005636, "grad_norm": 2.692793607711792, "learning_rate": 0.00015151515151515152, "loss": 0.717, "step": 6587 }, { "epoch": 1.0313087038196618, "grad_norm": 1.7069381475448608, "learning_rate": 0.00015149071358748777, "loss": 0.7249, "step": 6588 }, { "epoch": 1.0314652473387602, "grad_norm": 0.498267263174057, "learning_rate": 0.00015146627565982404, "loss": 0.2442, "step": 6589 }, { "epoch": 1.0316217908578584, "grad_norm": 0.7013705372810364, "learning_rate": 0.0001514418377321603, "loss": 0.3577, "step": 6590 }, { "epoch": 1.0317783343769569, "grad_norm": 1.0541874170303345, "learning_rate": 0.00015141739980449655, "loss": 0.2867, "step": 6591 }, { "epoch": 1.031934877896055, "grad_norm": 1.2763069868087769, "learning_rate": 0.00015139296187683283, "loss": 0.2876, "step": 6592 }, { "epoch": 1.0320914214151533, "grad_norm": 0.702283501625061, "learning_rate": 0.0001513685239491691, "loss": 0.3681, "step": 6593 }, { "epoch": 1.0322479649342517, "grad_norm": 0.6756613850593567, "learning_rate": 0.00015134408602150536, "loss": 0.2794, "step": 6594 }, { "epoch": 1.03240450845335, "grad_norm": 1.0558266639709473, "learning_rate": 0.00015131964809384163, "loss": 0.2589, "step": 6595 }, { "epoch": 1.0325610519724484, "grad_norm": 0.8309182524681091, "learning_rate": 0.0001512952101661779, "loss": 0.4377, "step": 6596 }, { "epoch": 1.0327175954915466, "grad_norm": 0.7502772212028503, "learning_rate": 0.00015127077223851414, "loss": 0.2772, "step": 6597 }, { "epoch": 1.032874139010645, "grad_norm": 1.0841580629348755, "learning_rate": 0.00015124633431085041, "loss": 0.3705, "step": 6598 }, { "epoch": 1.0330306825297433, "grad_norm": 0.5633625388145447, "learning_rate": 0.0001512218963831867, "loss": 0.2433, "step": 6599 }, { "epoch": 1.0331872260488415, "grad_norm": 0.6718013882637024, "learning_rate": 0.00015119745845552294, "loss": 0.2835, "step": 6600 }, { "epoch": 1.03334376956794, "grad_norm": 0.8712942600250244, "learning_rate": 0.00015117302052785922, "loss": 0.3426, "step": 6601 }, { "epoch": 1.0335003130870382, "grad_norm": 0.9348687529563904, "learning_rate": 0.0001511485826001955, "loss": 0.311, "step": 6602 }, { "epoch": 1.0336568566061366, "grad_norm": 0.8929564356803894, "learning_rate": 0.00015112414467253175, "loss": 0.3267, "step": 6603 }, { "epoch": 1.0338134001252348, "grad_norm": 1.1071451902389526, "learning_rate": 0.00015109970674486803, "loss": 0.491, "step": 6604 }, { "epoch": 1.033969943644333, "grad_norm": 0.7876406908035278, "learning_rate": 0.0001510752688172043, "loss": 0.2884, "step": 6605 }, { "epoch": 1.0341264871634315, "grad_norm": 1.126440405845642, "learning_rate": 0.00015105083088954053, "loss": 0.4125, "step": 6606 }, { "epoch": 1.0342830306825297, "grad_norm": 0.6488103866577148, "learning_rate": 0.0001510263929618768, "loss": 0.1574, "step": 6607 }, { "epoch": 1.0344395742016281, "grad_norm": 2.044353485107422, "learning_rate": 0.0001510019550342131, "loss": 0.3864, "step": 6608 }, { "epoch": 1.0345961177207263, "grad_norm": 1.4567679166793823, "learning_rate": 0.00015097751710654934, "loss": 0.4484, "step": 6609 }, { "epoch": 1.0347526612398246, "grad_norm": 2.9722204208374023, "learning_rate": 0.00015095307917888562, "loss": 0.9196, "step": 6610 }, { "epoch": 1.034909204758923, "grad_norm": 1.4675703048706055, "learning_rate": 0.0001509286412512219, "loss": 0.7483, "step": 6611 }, { "epoch": 1.0350657482780212, "grad_norm": 1.8821780681610107, "learning_rate": 0.00015090420332355815, "loss": 0.586, "step": 6612 }, { "epoch": 1.0352222917971197, "grad_norm": 2.960568428039551, "learning_rate": 0.00015087976539589443, "loss": 0.6357, "step": 6613 }, { "epoch": 1.0353788353162179, "grad_norm": 1.3695021867752075, "learning_rate": 0.00015085532746823068, "loss": 0.5007, "step": 6614 }, { "epoch": 1.0355353788353163, "grad_norm": 1.4479131698608398, "learning_rate": 0.00015083088954056693, "loss": 0.5556, "step": 6615 }, { "epoch": 1.0356919223544145, "grad_norm": 1.7650444507598877, "learning_rate": 0.0001508064516129032, "loss": 0.9825, "step": 6616 }, { "epoch": 1.0358484658735128, "grad_norm": 2.19905424118042, "learning_rate": 0.00015078201368523949, "loss": 0.6728, "step": 6617 }, { "epoch": 1.0360050093926112, "grad_norm": 1.4833353757858276, "learning_rate": 0.00015075757575757574, "loss": 0.7472, "step": 6618 }, { "epoch": 1.0361615529117094, "grad_norm": 1.978433609008789, "learning_rate": 0.00015073313782991202, "loss": 0.7654, "step": 6619 }, { "epoch": 1.0363180964308079, "grad_norm": 1.5188682079315186, "learning_rate": 0.0001507086999022483, "loss": 1.1002, "step": 6620 }, { "epoch": 1.036474639949906, "grad_norm": 2.915623188018799, "learning_rate": 0.00015068426197458452, "loss": 0.746, "step": 6621 }, { "epoch": 1.0366311834690043, "grad_norm": 3.499290704727173, "learning_rate": 0.0001506598240469208, "loss": 0.6476, "step": 6622 }, { "epoch": 1.0367877269881027, "grad_norm": 2.9197161197662354, "learning_rate": 0.00015063538611925707, "loss": 0.8943, "step": 6623 }, { "epoch": 1.036944270507201, "grad_norm": 1.2932711839675903, "learning_rate": 0.00015061094819159333, "loss": 0.7917, "step": 6624 }, { "epoch": 1.0371008140262994, "grad_norm": 2.556731939315796, "learning_rate": 0.0001505865102639296, "loss": 0.6509, "step": 6625 }, { "epoch": 1.0372573575453976, "grad_norm": 3.067978620529175, "learning_rate": 0.00015056207233626588, "loss": 1.0172, "step": 6626 }, { "epoch": 1.0374139010644958, "grad_norm": 2.2281928062438965, "learning_rate": 0.00015053763440860213, "loss": 0.5996, "step": 6627 }, { "epoch": 1.0375704445835943, "grad_norm": 2.0016701221466064, "learning_rate": 0.0001505131964809384, "loss": 0.6033, "step": 6628 }, { "epoch": 1.0377269881026925, "grad_norm": 2.9594852924346924, "learning_rate": 0.0001504887585532747, "loss": 1.0168, "step": 6629 }, { "epoch": 1.037883531621791, "grad_norm": 2.735733985900879, "learning_rate": 0.00015046432062561091, "loss": 1.1409, "step": 6630 }, { "epoch": 1.0380400751408891, "grad_norm": 4.281361103057861, "learning_rate": 0.0001504398826979472, "loss": 1.7845, "step": 6631 }, { "epoch": 1.0381966186599876, "grad_norm": 4.471887111663818, "learning_rate": 0.00015041544477028347, "loss": 1.0803, "step": 6632 }, { "epoch": 1.0383531621790858, "grad_norm": 3.5582237243652344, "learning_rate": 0.00015039100684261972, "loss": 1.1523, "step": 6633 }, { "epoch": 1.038509705698184, "grad_norm": 2.951169490814209, "learning_rate": 0.000150366568914956, "loss": 0.6606, "step": 6634 }, { "epoch": 1.0386662492172825, "grad_norm": 1.8436496257781982, "learning_rate": 0.00015034213098729228, "loss": 0.6368, "step": 6635 }, { "epoch": 1.0388227927363807, "grad_norm": 2.94608473777771, "learning_rate": 0.00015031769305962853, "loss": 0.4588, "step": 6636 }, { "epoch": 1.0389793362554791, "grad_norm": 2.0353431701660156, "learning_rate": 0.0001502932551319648, "loss": 0.4628, "step": 6637 }, { "epoch": 1.0391358797745773, "grad_norm": 2.008845329284668, "learning_rate": 0.00015026881720430106, "loss": 0.7833, "step": 6638 }, { "epoch": 1.0392924232936755, "grad_norm": 0.664910078048706, "learning_rate": 0.0001502443792766373, "loss": 0.194, "step": 6639 }, { "epoch": 1.039448966812774, "grad_norm": 0.388094037771225, "learning_rate": 0.0001502199413489736, "loss": 0.1953, "step": 6640 }, { "epoch": 1.0396055103318722, "grad_norm": 0.3977845311164856, "learning_rate": 0.00015019550342130987, "loss": 0.1952, "step": 6641 }, { "epoch": 1.0397620538509706, "grad_norm": 0.4224265515804291, "learning_rate": 0.00015017106549364612, "loss": 0.1599, "step": 6642 }, { "epoch": 1.0399185973700689, "grad_norm": 0.7730469703674316, "learning_rate": 0.0001501466275659824, "loss": 0.2271, "step": 6643 }, { "epoch": 1.040075140889167, "grad_norm": 0.7396877408027649, "learning_rate": 0.00015012218963831868, "loss": 0.328, "step": 6644 }, { "epoch": 1.0402316844082655, "grad_norm": 0.7163776755332947, "learning_rate": 0.0001500977517106549, "loss": 0.2924, "step": 6645 }, { "epoch": 1.0403882279273637, "grad_norm": 0.7537462115287781, "learning_rate": 0.00015007331378299118, "loss": 0.2665, "step": 6646 }, { "epoch": 1.0405447714464622, "grad_norm": 4.295088768005371, "learning_rate": 0.00015004887585532746, "loss": 1.0396, "step": 6647 }, { "epoch": 1.0407013149655604, "grad_norm": 0.830767810344696, "learning_rate": 0.0001500244379276637, "loss": 0.2891, "step": 6648 }, { "epoch": 1.0408578584846588, "grad_norm": 0.9357022643089294, "learning_rate": 0.00015, "loss": 0.3978, "step": 6649 }, { "epoch": 1.041014402003757, "grad_norm": 0.6923136115074158, "learning_rate": 0.00014997556207233624, "loss": 0.2655, "step": 6650 }, { "epoch": 1.0411709455228553, "grad_norm": 1.0531424283981323, "learning_rate": 0.00014995112414467252, "loss": 0.2939, "step": 6651 }, { "epoch": 1.0413274890419537, "grad_norm": 0.8230469822883606, "learning_rate": 0.0001499266862170088, "loss": 0.4025, "step": 6652 }, { "epoch": 1.041484032561052, "grad_norm": 0.7676156163215637, "learning_rate": 0.00014990224828934505, "loss": 0.3075, "step": 6653 }, { "epoch": 1.0416405760801504, "grad_norm": 2.29850697517395, "learning_rate": 0.00014987781036168132, "loss": 0.5837, "step": 6654 }, { "epoch": 1.0417971195992486, "grad_norm": 0.798516571521759, "learning_rate": 0.00014985337243401758, "loss": 0.4561, "step": 6655 }, { "epoch": 1.0419536631183468, "grad_norm": 1.210540771484375, "learning_rate": 0.00014982893450635385, "loss": 0.5438, "step": 6656 }, { "epoch": 1.0421102066374452, "grad_norm": 1.6626873016357422, "learning_rate": 0.00014980449657869013, "loss": 0.6722, "step": 6657 }, { "epoch": 1.0422667501565435, "grad_norm": 0.6938304305076599, "learning_rate": 0.00014978005865102638, "loss": 0.5357, "step": 6658 }, { "epoch": 1.042423293675642, "grad_norm": 0.9663745164871216, "learning_rate": 0.00014975562072336263, "loss": 0.4106, "step": 6659 }, { "epoch": 1.0425798371947401, "grad_norm": 0.8333467245101929, "learning_rate": 0.0001497311827956989, "loss": 0.3207, "step": 6660 }, { "epoch": 1.0427363807138383, "grad_norm": 0.7671467065811157, "learning_rate": 0.00014970674486803516, "loss": 0.3437, "step": 6661 }, { "epoch": 1.0428929242329368, "grad_norm": 0.9945407509803772, "learning_rate": 0.00014968230694037144, "loss": 0.4651, "step": 6662 }, { "epoch": 1.043049467752035, "grad_norm": 1.5871999263763428, "learning_rate": 0.00014965786901270772, "loss": 0.5176, "step": 6663 }, { "epoch": 1.0432060112711334, "grad_norm": 3.3692855834960938, "learning_rate": 0.00014963343108504397, "loss": 0.7892, "step": 6664 }, { "epoch": 1.0433625547902317, "grad_norm": 2.7460215091705322, "learning_rate": 0.00014960899315738022, "loss": 0.4972, "step": 6665 }, { "epoch": 1.04351909830933, "grad_norm": 2.0710816383361816, "learning_rate": 0.0001495845552297165, "loss": 0.4056, "step": 6666 }, { "epoch": 1.0436756418284283, "grad_norm": 2.1053054332733154, "learning_rate": 0.00014956011730205278, "loss": 0.6009, "step": 6667 }, { "epoch": 1.0438321853475265, "grad_norm": 2.5301668643951416, "learning_rate": 0.00014953567937438903, "loss": 0.8748, "step": 6668 }, { "epoch": 1.043988728866625, "grad_norm": 1.9505361318588257, "learning_rate": 0.0001495112414467253, "loss": 0.6859, "step": 6669 }, { "epoch": 1.0441452723857232, "grad_norm": 1.7739278078079224, "learning_rate": 0.00014948680351906156, "loss": 0.4152, "step": 6670 }, { "epoch": 1.0443018159048216, "grad_norm": 2.2372188568115234, "learning_rate": 0.00014946236559139784, "loss": 0.5335, "step": 6671 }, { "epoch": 1.0444583594239198, "grad_norm": 3.2507851123809814, "learning_rate": 0.00014943792766373412, "loss": 0.9134, "step": 6672 }, { "epoch": 1.044614902943018, "grad_norm": 1.7486919164657593, "learning_rate": 0.00014941348973607037, "loss": 0.9072, "step": 6673 }, { "epoch": 1.0447714464621165, "grad_norm": 1.5472897291183472, "learning_rate": 0.00014938905180840662, "loss": 0.8984, "step": 6674 }, { "epoch": 1.0449279899812147, "grad_norm": 2.7646539211273193, "learning_rate": 0.0001493646138807429, "loss": 0.6659, "step": 6675 }, { "epoch": 1.0450845335003132, "grad_norm": 2.3693439960479736, "learning_rate": 0.00014934017595307918, "loss": 1.1551, "step": 6676 }, { "epoch": 1.0452410770194114, "grad_norm": 4.565879821777344, "learning_rate": 0.00014931573802541543, "loss": 1.4091, "step": 6677 }, { "epoch": 1.0453976205385098, "grad_norm": 2.8540971279144287, "learning_rate": 0.0001492913000977517, "loss": 1.1402, "step": 6678 }, { "epoch": 1.045554164057608, "grad_norm": 3.433985948562622, "learning_rate": 0.00014926686217008796, "loss": 1.2866, "step": 6679 }, { "epoch": 1.0457107075767063, "grad_norm": 1.5474061965942383, "learning_rate": 0.00014924242424242424, "loss": 0.4375, "step": 6680 }, { "epoch": 1.0458672510958047, "grad_norm": 1.9934204816818237, "learning_rate": 0.00014921798631476051, "loss": 0.9397, "step": 6681 }, { "epoch": 1.046023794614903, "grad_norm": 4.180722236633301, "learning_rate": 0.00014919354838709677, "loss": 1.5848, "step": 6682 }, { "epoch": 1.0461803381340014, "grad_norm": 2.291654348373413, "learning_rate": 0.00014916911045943302, "loss": 1.2605, "step": 6683 }, { "epoch": 1.0463368816530996, "grad_norm": 2.387073516845703, "learning_rate": 0.0001491446725317693, "loss": 0.4326, "step": 6684 }, { "epoch": 1.0464934251721978, "grad_norm": 2.3532493114471436, "learning_rate": 0.00014912023460410555, "loss": 0.4884, "step": 6685 }, { "epoch": 1.0466499686912962, "grad_norm": 2.2486379146575928, "learning_rate": 0.00014909579667644183, "loss": 1.0124, "step": 6686 }, { "epoch": 1.0468065122103944, "grad_norm": 2.661993980407715, "learning_rate": 0.0001490713587487781, "loss": 1.0389, "step": 6687 }, { "epoch": 1.0469630557294929, "grad_norm": 2.130023717880249, "learning_rate": 0.00014904692082111435, "loss": 0.4506, "step": 6688 }, { "epoch": 1.047119599248591, "grad_norm": 0.6266974806785583, "learning_rate": 0.0001490224828934506, "loss": 0.2949, "step": 6689 }, { "epoch": 1.0472761427676893, "grad_norm": 0.5856244564056396, "learning_rate": 0.00014899804496578688, "loss": 0.2782, "step": 6690 }, { "epoch": 1.0474326862867878, "grad_norm": 0.578836977481842, "learning_rate": 0.00014897360703812316, "loss": 0.244, "step": 6691 }, { "epoch": 1.047589229805886, "grad_norm": 0.5507057905197144, "learning_rate": 0.00014894916911045941, "loss": 0.2312, "step": 6692 }, { "epoch": 1.0477457733249844, "grad_norm": 0.4590769112110138, "learning_rate": 0.0001489247311827957, "loss": 0.2004, "step": 6693 }, { "epoch": 1.0479023168440826, "grad_norm": 0.5535408854484558, "learning_rate": 0.00014890029325513194, "loss": 0.2628, "step": 6694 }, { "epoch": 1.0480588603631809, "grad_norm": 2.307926654815674, "learning_rate": 0.00014887585532746822, "loss": 0.2208, "step": 6695 }, { "epoch": 1.0482154038822793, "grad_norm": 0.5684087872505188, "learning_rate": 0.0001488514173998045, "loss": 0.2184, "step": 6696 }, { "epoch": 1.0483719474013775, "grad_norm": 0.6311387419700623, "learning_rate": 0.00014882697947214075, "loss": 0.2584, "step": 6697 }, { "epoch": 1.048528490920476, "grad_norm": 0.6801841259002686, "learning_rate": 0.000148802541544477, "loss": 0.2507, "step": 6698 }, { "epoch": 1.0486850344395742, "grad_norm": 0.8191403150558472, "learning_rate": 0.00014877810361681328, "loss": 0.3224, "step": 6699 }, { "epoch": 1.0488415779586726, "grad_norm": 0.9720447063446045, "learning_rate": 0.00014875366568914956, "loss": 0.3435, "step": 6700 }, { "epoch": 1.0489981214777708, "grad_norm": 0.380145400762558, "learning_rate": 0.0001487292277614858, "loss": 0.1308, "step": 6701 }, { "epoch": 1.049154664996869, "grad_norm": 0.5885724425315857, "learning_rate": 0.0001487047898338221, "loss": 0.3108, "step": 6702 }, { "epoch": 1.0493112085159675, "grad_norm": 0.8687223792076111, "learning_rate": 0.00014868035190615834, "loss": 0.3114, "step": 6703 }, { "epoch": 1.0494677520350657, "grad_norm": 1.1362780332565308, "learning_rate": 0.00014865591397849462, "loss": 0.3741, "step": 6704 }, { "epoch": 1.0496242955541641, "grad_norm": 1.4467766284942627, "learning_rate": 0.00014863147605083087, "loss": 0.2473, "step": 6705 }, { "epoch": 1.0497808390732624, "grad_norm": 1.3377172946929932, "learning_rate": 0.00014860703812316715, "loss": 0.5426, "step": 6706 }, { "epoch": 1.0499373825923606, "grad_norm": 1.023105502128601, "learning_rate": 0.0001485826001955034, "loss": 0.4587, "step": 6707 }, { "epoch": 1.050093926111459, "grad_norm": 2.0178208351135254, "learning_rate": 0.00014855816226783968, "loss": 0.3052, "step": 6708 }, { "epoch": 1.0502504696305572, "grad_norm": 1.5888885259628296, "learning_rate": 0.00014853372434017593, "loss": 0.4834, "step": 6709 }, { "epoch": 1.0504070131496557, "grad_norm": 2.150106430053711, "learning_rate": 0.0001485092864125122, "loss": 0.6655, "step": 6710 }, { "epoch": 1.050563556668754, "grad_norm": 1.21843421459198, "learning_rate": 0.00014848484848484849, "loss": 0.6639, "step": 6711 }, { "epoch": 1.0507201001878523, "grad_norm": 1.0294426679611206, "learning_rate": 0.00014846041055718474, "loss": 0.3775, "step": 6712 }, { "epoch": 1.0508766437069506, "grad_norm": 1.3102911710739136, "learning_rate": 0.000148435972629521, "loss": 0.6612, "step": 6713 }, { "epoch": 1.0510331872260488, "grad_norm": 2.4257891178131104, "learning_rate": 0.00014841153470185727, "loss": 0.6148, "step": 6714 }, { "epoch": 1.0511897307451472, "grad_norm": 1.66291344165802, "learning_rate": 0.00014838709677419355, "loss": 0.7202, "step": 6715 }, { "epoch": 1.0513462742642454, "grad_norm": 2.0738868713378906, "learning_rate": 0.0001483626588465298, "loss": 0.7751, "step": 6716 }, { "epoch": 1.0515028177833439, "grad_norm": 1.5332388877868652, "learning_rate": 0.00014833822091886607, "loss": 0.5269, "step": 6717 }, { "epoch": 1.051659361302442, "grad_norm": 1.8209433555603027, "learning_rate": 0.00014831378299120233, "loss": 0.8477, "step": 6718 }, { "epoch": 1.0518159048215403, "grad_norm": 1.7813162803649902, "learning_rate": 0.0001482893450635386, "loss": 1.1148, "step": 6719 }, { "epoch": 1.0519724483406387, "grad_norm": 1.9501174688339233, "learning_rate": 0.00014826490713587488, "loss": 0.7127, "step": 6720 }, { "epoch": 1.052128991859737, "grad_norm": 2.0499088764190674, "learning_rate": 0.00014824046920821113, "loss": 1.0263, "step": 6721 }, { "epoch": 1.0522855353788354, "grad_norm": 2.545484781265259, "learning_rate": 0.00014821603128054739, "loss": 0.8959, "step": 6722 }, { "epoch": 1.0524420788979336, "grad_norm": 2.9357028007507324, "learning_rate": 0.00014819159335288366, "loss": 1.346, "step": 6723 }, { "epoch": 1.0525986224170318, "grad_norm": 1.6298185586929321, "learning_rate": 0.00014816715542521994, "loss": 1.0074, "step": 6724 }, { "epoch": 1.0527551659361303, "grad_norm": 2.8814504146575928, "learning_rate": 0.0001481427174975562, "loss": 0.636, "step": 6725 }, { "epoch": 1.0529117094552285, "grad_norm": 3.723740339279175, "learning_rate": 0.00014811827956989247, "loss": 0.7553, "step": 6726 }, { "epoch": 1.053068252974327, "grad_norm": 1.4765031337738037, "learning_rate": 0.00014809384164222872, "loss": 0.9195, "step": 6727 }, { "epoch": 1.0532247964934252, "grad_norm": 1.9450255632400513, "learning_rate": 0.000148069403714565, "loss": 1.3745, "step": 6728 }, { "epoch": 1.0533813400125234, "grad_norm": 2.2991535663604736, "learning_rate": 0.00014804496578690125, "loss": 1.0113, "step": 6729 }, { "epoch": 1.0535378835316218, "grad_norm": 2.7434890270233154, "learning_rate": 0.00014802052785923753, "loss": 1.1969, "step": 6730 }, { "epoch": 1.05369442705072, "grad_norm": 2.5354650020599365, "learning_rate": 0.00014799608993157378, "loss": 1.1237, "step": 6731 }, { "epoch": 1.0538509705698185, "grad_norm": 1.4999096393585205, "learning_rate": 0.00014797165200391006, "loss": 1.2223, "step": 6732 }, { "epoch": 1.0540075140889167, "grad_norm": 2.2378246784210205, "learning_rate": 0.0001479472140762463, "loss": 1.1544, "step": 6733 }, { "epoch": 1.0541640576080151, "grad_norm": 1.6880450248718262, "learning_rate": 0.0001479227761485826, "loss": 0.7526, "step": 6734 }, { "epoch": 1.0543206011271133, "grad_norm": 5.19423246383667, "learning_rate": 0.00014789833822091887, "loss": 1.2331, "step": 6735 }, { "epoch": 1.0544771446462116, "grad_norm": 3.7082395553588867, "learning_rate": 0.00014787390029325512, "loss": 0.4293, "step": 6736 }, { "epoch": 1.05463368816531, "grad_norm": 2.9954659938812256, "learning_rate": 0.00014784946236559137, "loss": 1.0767, "step": 6737 }, { "epoch": 1.0547902316844082, "grad_norm": 1.1557728052139282, "learning_rate": 0.00014782502443792765, "loss": 0.4366, "step": 6738 }, { "epoch": 1.0549467752035067, "grad_norm": 0.6128056049346924, "learning_rate": 0.00014780058651026393, "loss": 0.2985, "step": 6739 }, { "epoch": 1.0551033187226049, "grad_norm": 0.6102817058563232, "learning_rate": 0.00014777614858260018, "loss": 0.3021, "step": 6740 }, { "epoch": 1.055259862241703, "grad_norm": 0.8309397101402283, "learning_rate": 0.00014775171065493646, "loss": 0.2417, "step": 6741 }, { "epoch": 1.0554164057608015, "grad_norm": 0.7619971632957458, "learning_rate": 0.0001477272727272727, "loss": 0.2599, "step": 6742 }, { "epoch": 1.0555729492798998, "grad_norm": 0.6904311180114746, "learning_rate": 0.000147702834799609, "loss": 0.3468, "step": 6743 }, { "epoch": 1.0557294927989982, "grad_norm": 1.885140061378479, "learning_rate": 0.00014767839687194526, "loss": 0.412, "step": 6744 }, { "epoch": 1.0558860363180964, "grad_norm": 0.6122669577598572, "learning_rate": 0.00014765395894428152, "loss": 0.2116, "step": 6745 }, { "epoch": 1.0560425798371949, "grad_norm": 0.754728376865387, "learning_rate": 0.00014762952101661777, "loss": 0.3224, "step": 6746 }, { "epoch": 1.056199123356293, "grad_norm": 2.336873769760132, "learning_rate": 0.00014760508308895405, "loss": 0.3732, "step": 6747 }, { "epoch": 1.0563556668753913, "grad_norm": 1.1394752264022827, "learning_rate": 0.00014758064516129032, "loss": 0.4202, "step": 6748 }, { "epoch": 1.0565122103944897, "grad_norm": 0.6358504891395569, "learning_rate": 0.00014755620723362658, "loss": 0.2911, "step": 6749 }, { "epoch": 1.056668753913588, "grad_norm": 0.8128040432929993, "learning_rate": 0.00014753176930596285, "loss": 0.2091, "step": 6750 }, { "epoch": 1.0568252974326864, "grad_norm": 1.233589768409729, "learning_rate": 0.0001475073313782991, "loss": 0.2296, "step": 6751 }, { "epoch": 1.0569818409517846, "grad_norm": 1.3525103330612183, "learning_rate": 0.00014748289345063538, "loss": 0.4545, "step": 6752 }, { "epoch": 1.0571383844708828, "grad_norm": 1.5476621389389038, "learning_rate": 0.00014745845552297163, "loss": 0.4035, "step": 6753 }, { "epoch": 1.0572949279899813, "grad_norm": 1.2331606149673462, "learning_rate": 0.0001474340175953079, "loss": 0.2726, "step": 6754 }, { "epoch": 1.0574514715090795, "grad_norm": 1.3619499206542969, "learning_rate": 0.00014740957966764416, "loss": 0.5273, "step": 6755 }, { "epoch": 1.057608015028178, "grad_norm": 3.523498773574829, "learning_rate": 0.00014738514173998044, "loss": 0.5354, "step": 6756 }, { "epoch": 1.0577645585472761, "grad_norm": 1.602575659751892, "learning_rate": 0.0001473607038123167, "loss": 0.6084, "step": 6757 }, { "epoch": 1.0579211020663744, "grad_norm": 2.1576313972473145, "learning_rate": 0.00014733626588465297, "loss": 0.3558, "step": 6758 }, { "epoch": 1.0580776455854728, "grad_norm": 1.1257455348968506, "learning_rate": 0.00014731182795698925, "loss": 0.4306, "step": 6759 }, { "epoch": 1.058234189104571, "grad_norm": 1.3478970527648926, "learning_rate": 0.0001472873900293255, "loss": 0.5438, "step": 6760 }, { "epoch": 1.0583907326236695, "grad_norm": 2.616065263748169, "learning_rate": 0.00014726295210166175, "loss": 0.8583, "step": 6761 }, { "epoch": 1.0585472761427677, "grad_norm": 1.401318907737732, "learning_rate": 0.00014723851417399803, "loss": 0.2882, "step": 6762 }, { "epoch": 1.0587038196618659, "grad_norm": 2.02484130859375, "learning_rate": 0.0001472140762463343, "loss": 0.9521, "step": 6763 }, { "epoch": 1.0588603631809643, "grad_norm": 1.5256311893463135, "learning_rate": 0.00014718963831867056, "loss": 0.4258, "step": 6764 }, { "epoch": 1.0590169067000625, "grad_norm": 1.3191006183624268, "learning_rate": 0.00014716520039100684, "loss": 0.4773, "step": 6765 }, { "epoch": 1.059173450219161, "grad_norm": 2.7704293727874756, "learning_rate": 0.0001471407624633431, "loss": 0.6198, "step": 6766 }, { "epoch": 1.0593299937382592, "grad_norm": 1.2429702281951904, "learning_rate": 0.00014711632453567937, "loss": 0.646, "step": 6767 }, { "epoch": 1.0594865372573576, "grad_norm": 1.98096764087677, "learning_rate": 0.00014709188660801565, "loss": 0.6263, "step": 6768 }, { "epoch": 1.0596430807764559, "grad_norm": 5.404338836669922, "learning_rate": 0.0001470674486803519, "loss": 1.4979, "step": 6769 }, { "epoch": 1.059799624295554, "grad_norm": 2.1490466594696045, "learning_rate": 0.00014704301075268815, "loss": 0.7869, "step": 6770 }, { "epoch": 1.0599561678146525, "grad_norm": 1.6337779760360718, "learning_rate": 0.00014701857282502443, "loss": 0.8592, "step": 6771 }, { "epoch": 1.0601127113337507, "grad_norm": 3.1310315132141113, "learning_rate": 0.0001469941348973607, "loss": 0.9336, "step": 6772 }, { "epoch": 1.0602692548528492, "grad_norm": 2.745962619781494, "learning_rate": 0.00014696969696969696, "loss": 1.043, "step": 6773 }, { "epoch": 1.0604257983719474, "grad_norm": 2.7593672275543213, "learning_rate": 0.00014694525904203324, "loss": 1.1677, "step": 6774 }, { "epoch": 1.0605823418910456, "grad_norm": 5.965583801269531, "learning_rate": 0.0001469208211143695, "loss": 1.777, "step": 6775 }, { "epoch": 1.060738885410144, "grad_norm": 3.3984930515289307, "learning_rate": 0.00014689638318670574, "loss": 1.0561, "step": 6776 }, { "epoch": 1.0608954289292423, "grad_norm": 2.305185317993164, "learning_rate": 0.00014687194525904202, "loss": 0.6857, "step": 6777 }, { "epoch": 1.0610519724483407, "grad_norm": 2.9663422107696533, "learning_rate": 0.0001468475073313783, "loss": 0.8298, "step": 6778 }, { "epoch": 1.061208515967439, "grad_norm": 2.1952407360076904, "learning_rate": 0.00014682306940371455, "loss": 0.9201, "step": 6779 }, { "epoch": 1.0613650594865374, "grad_norm": 2.043056011199951, "learning_rate": 0.0001467986314760508, "loss": 1.1013, "step": 6780 }, { "epoch": 1.0615216030056356, "grad_norm": 1.94023597240448, "learning_rate": 0.00014677419354838708, "loss": 1.3418, "step": 6781 }, { "epoch": 1.0616781465247338, "grad_norm": 2.6357297897338867, "learning_rate": 0.00014674975562072335, "loss": 0.8855, "step": 6782 }, { "epoch": 1.0618346900438322, "grad_norm": 2.302753448486328, "learning_rate": 0.0001467253176930596, "loss": 0.9809, "step": 6783 }, { "epoch": 1.0619912335629305, "grad_norm": 2.690652370452881, "learning_rate": 0.00014670087976539588, "loss": 0.4587, "step": 6784 }, { "epoch": 1.062147777082029, "grad_norm": 3.5731654167175293, "learning_rate": 0.00014667644183773214, "loss": 1.0801, "step": 6785 }, { "epoch": 1.0623043206011271, "grad_norm": 2.150663375854492, "learning_rate": 0.00014665200391006841, "loss": 0.7417, "step": 6786 }, { "epoch": 1.0624608641202253, "grad_norm": 2.761242151260376, "learning_rate": 0.0001466275659824047, "loss": 1.2934, "step": 6787 }, { "epoch": 1.0626174076393238, "grad_norm": 2.148921251296997, "learning_rate": 0.00014660312805474094, "loss": 0.5649, "step": 6788 }, { "epoch": 1.062773951158422, "grad_norm": 0.691116452217102, "learning_rate": 0.0001465786901270772, "loss": 0.287, "step": 6789 }, { "epoch": 1.0629304946775204, "grad_norm": 0.4150255024433136, "learning_rate": 0.00014655425219941347, "loss": 0.3013, "step": 6790 }, { "epoch": 1.0630870381966186, "grad_norm": 0.48139479756355286, "learning_rate": 0.00014652981427174975, "loss": 0.3082, "step": 6791 }, { "epoch": 1.0632435817157169, "grad_norm": 0.808285117149353, "learning_rate": 0.000146505376344086, "loss": 0.2616, "step": 6792 }, { "epoch": 1.0634001252348153, "grad_norm": 0.7357711791992188, "learning_rate": 0.00014648093841642228, "loss": 0.2752, "step": 6793 }, { "epoch": 1.0635566687539135, "grad_norm": 0.6319572329521179, "learning_rate": 0.00014645650048875853, "loss": 0.26, "step": 6794 }, { "epoch": 1.063713212273012, "grad_norm": 0.698928952217102, "learning_rate": 0.0001464320625610948, "loss": 0.2896, "step": 6795 }, { "epoch": 1.0638697557921102, "grad_norm": 1.5302014350891113, "learning_rate": 0.0001464076246334311, "loss": 0.3453, "step": 6796 }, { "epoch": 1.0640262993112084, "grad_norm": 1.11781907081604, "learning_rate": 0.00014638318670576734, "loss": 0.3171, "step": 6797 }, { "epoch": 1.0641828428303068, "grad_norm": 1.107818365097046, "learning_rate": 0.0001463587487781036, "loss": 0.2568, "step": 6798 }, { "epoch": 1.064339386349405, "grad_norm": 1.2923407554626465, "learning_rate": 0.00014633431085043987, "loss": 0.2953, "step": 6799 }, { "epoch": 1.0644959298685035, "grad_norm": 1.0053560733795166, "learning_rate": 0.00014630987292277612, "loss": 0.4314, "step": 6800 }, { "epoch": 1.0646524733876017, "grad_norm": 0.73367840051651, "learning_rate": 0.0001462854349951124, "loss": 0.1903, "step": 6801 }, { "epoch": 1.0648090169067002, "grad_norm": 0.6966777443885803, "learning_rate": 0.00014626099706744868, "loss": 0.2807, "step": 6802 }, { "epoch": 1.0649655604257984, "grad_norm": 1.2056246995925903, "learning_rate": 0.00014623655913978493, "loss": 0.5498, "step": 6803 }, { "epoch": 1.0651221039448966, "grad_norm": 1.8048405647277832, "learning_rate": 0.00014621212121212118, "loss": 0.3544, "step": 6804 }, { "epoch": 1.065278647463995, "grad_norm": 1.2983676195144653, "learning_rate": 0.00014618768328445746, "loss": 0.3607, "step": 6805 }, { "epoch": 1.0654351909830932, "grad_norm": 0.5953453183174133, "learning_rate": 0.00014616324535679374, "loss": 0.2121, "step": 6806 }, { "epoch": 1.0655917345021917, "grad_norm": 1.4588236808776855, "learning_rate": 0.00014613880742913, "loss": 0.7027, "step": 6807 }, { "epoch": 1.06574827802129, "grad_norm": 1.0992096662521362, "learning_rate": 0.00014611436950146627, "loss": 0.4823, "step": 6808 }, { "epoch": 1.0659048215403881, "grad_norm": 1.8514833450317383, "learning_rate": 0.00014608993157380252, "loss": 0.5958, "step": 6809 }, { "epoch": 1.0660613650594866, "grad_norm": 1.5322425365447998, "learning_rate": 0.0001460654936461388, "loss": 0.495, "step": 6810 }, { "epoch": 1.0662179085785848, "grad_norm": 0.9982278347015381, "learning_rate": 0.00014604105571847507, "loss": 0.3159, "step": 6811 }, { "epoch": 1.0663744520976832, "grad_norm": 1.4766974449157715, "learning_rate": 0.00014601661779081133, "loss": 0.5013, "step": 6812 }, { "epoch": 1.0665309956167814, "grad_norm": 1.6511269807815552, "learning_rate": 0.00014599217986314758, "loss": 0.5875, "step": 6813 }, { "epoch": 1.0666875391358799, "grad_norm": 1.9279791116714478, "learning_rate": 0.00014596774193548386, "loss": 0.7228, "step": 6814 }, { "epoch": 1.066844082654978, "grad_norm": 2.1245806217193604, "learning_rate": 0.00014594330400782013, "loss": 0.6166, "step": 6815 }, { "epoch": 1.0670006261740763, "grad_norm": 2.9913034439086914, "learning_rate": 0.00014591886608015638, "loss": 0.6317, "step": 6816 }, { "epoch": 1.0671571696931748, "grad_norm": 1.126991868019104, "learning_rate": 0.00014589442815249266, "loss": 0.352, "step": 6817 }, { "epoch": 1.067313713212273, "grad_norm": 1.3619285821914673, "learning_rate": 0.00014586999022482891, "loss": 0.5466, "step": 6818 }, { "epoch": 1.0674702567313714, "grad_norm": 1.5446709394454956, "learning_rate": 0.0001458455522971652, "loss": 0.7798, "step": 6819 }, { "epoch": 1.0676268002504696, "grad_norm": 1.6549172401428223, "learning_rate": 0.00014582111436950144, "loss": 0.7226, "step": 6820 }, { "epoch": 1.0677833437695678, "grad_norm": 1.4883651733398438, "learning_rate": 0.00014579667644183772, "loss": 0.8854, "step": 6821 }, { "epoch": 1.0679398872886663, "grad_norm": 1.6428464651107788, "learning_rate": 0.00014577223851417397, "loss": 0.9569, "step": 6822 }, { "epoch": 1.0680964308077645, "grad_norm": 2.9931986331939697, "learning_rate": 0.00014574780058651025, "loss": 1.1195, "step": 6823 }, { "epoch": 1.068252974326863, "grad_norm": 1.3941583633422852, "learning_rate": 0.0001457233626588465, "loss": 0.6068, "step": 6824 }, { "epoch": 1.0684095178459612, "grad_norm": 1.757436752319336, "learning_rate": 0.00014569892473118278, "loss": 0.5655, "step": 6825 }, { "epoch": 1.0685660613650594, "grad_norm": 2.191127300262451, "learning_rate": 0.00014567448680351906, "loss": 0.8291, "step": 6826 }, { "epoch": 1.0687226048841578, "grad_norm": 3.134359121322632, "learning_rate": 0.0001456500488758553, "loss": 0.9857, "step": 6827 }, { "epoch": 1.068879148403256, "grad_norm": 1.7476389408111572, "learning_rate": 0.00014562561094819156, "loss": 0.7704, "step": 6828 }, { "epoch": 1.0690356919223545, "grad_norm": 2.1934890747070312, "learning_rate": 0.00014560117302052784, "loss": 1.404, "step": 6829 }, { "epoch": 1.0691922354414527, "grad_norm": 3.2194011211395264, "learning_rate": 0.00014557673509286412, "loss": 0.8811, "step": 6830 }, { "epoch": 1.069348778960551, "grad_norm": 2.342510461807251, "learning_rate": 0.00014555229716520037, "loss": 0.781, "step": 6831 }, { "epoch": 1.0695053224796494, "grad_norm": 2.317560911178589, "learning_rate": 0.00014552785923753665, "loss": 0.8468, "step": 6832 }, { "epoch": 1.0696618659987476, "grad_norm": 2.0507400035858154, "learning_rate": 0.0001455034213098729, "loss": 0.8117, "step": 6833 }, { "epoch": 1.069818409517846, "grad_norm": 2.625714063644409, "learning_rate": 0.00014547898338220918, "loss": 0.7835, "step": 6834 }, { "epoch": 1.0699749530369442, "grad_norm": 2.597598075866699, "learning_rate": 0.00014545454545454546, "loss": 0.5083, "step": 6835 }, { "epoch": 1.0701314965560427, "grad_norm": 2.5991411209106445, "learning_rate": 0.0001454301075268817, "loss": 1.1845, "step": 6836 }, { "epoch": 1.070288040075141, "grad_norm": 1.9454537630081177, "learning_rate": 0.00014540566959921796, "loss": 0.414, "step": 6837 }, { "epoch": 1.070444583594239, "grad_norm": 1.6982367038726807, "learning_rate": 0.00014538123167155424, "loss": 0.6621, "step": 6838 }, { "epoch": 1.0706011271133375, "grad_norm": 0.6477040648460388, "learning_rate": 0.00014535679374389052, "loss": 0.3462, "step": 6839 }, { "epoch": 1.0707576706324358, "grad_norm": 0.3390127420425415, "learning_rate": 0.00014533235581622677, "loss": 0.1706, "step": 6840 }, { "epoch": 1.0709142141515342, "grad_norm": 0.8520225882530212, "learning_rate": 0.00014530791788856305, "loss": 0.3586, "step": 6841 }, { "epoch": 1.0710707576706324, "grad_norm": 0.6063798069953918, "learning_rate": 0.0001452834799608993, "loss": 0.1433, "step": 6842 }, { "epoch": 1.0712273011897309, "grad_norm": 0.7696285247802734, "learning_rate": 0.00014525904203323557, "loss": 0.5062, "step": 6843 }, { "epoch": 1.071383844708829, "grad_norm": 0.47626444697380066, "learning_rate": 0.00014523460410557183, "loss": 0.2092, "step": 6844 }, { "epoch": 1.0715403882279273, "grad_norm": 0.7897009253501892, "learning_rate": 0.0001452101661779081, "loss": 0.295, "step": 6845 }, { "epoch": 1.0716969317470257, "grad_norm": 0.6091282963752747, "learning_rate": 0.00014518572825024436, "loss": 0.2313, "step": 6846 }, { "epoch": 1.071853475266124, "grad_norm": 0.9005686044692993, "learning_rate": 0.00014516129032258063, "loss": 0.2965, "step": 6847 }, { "epoch": 1.0720100187852224, "grad_norm": 0.9107126593589783, "learning_rate": 0.00014513685239491689, "loss": 0.2936, "step": 6848 }, { "epoch": 1.0721665623043206, "grad_norm": 0.5607098340988159, "learning_rate": 0.00014511241446725316, "loss": 0.2276, "step": 6849 }, { "epoch": 1.0723231058234188, "grad_norm": 13.283991813659668, "learning_rate": 0.00014508797653958944, "loss": 0.3193, "step": 6850 }, { "epoch": 1.0724796493425173, "grad_norm": 1.3404076099395752, "learning_rate": 0.0001450635386119257, "loss": 0.422, "step": 6851 }, { "epoch": 1.0726361928616155, "grad_norm": 0.9615288376808167, "learning_rate": 0.00014503910068426194, "loss": 0.4161, "step": 6852 }, { "epoch": 1.072792736380714, "grad_norm": 1.6815721988677979, "learning_rate": 0.00014501466275659822, "loss": 0.3811, "step": 6853 }, { "epoch": 1.0729492798998121, "grad_norm": 0.926059901714325, "learning_rate": 0.0001449902248289345, "loss": 0.5096, "step": 6854 }, { "epoch": 1.0731058234189104, "grad_norm": 0.8995077610015869, "learning_rate": 0.00014496578690127075, "loss": 0.4286, "step": 6855 }, { "epoch": 1.0732623669380088, "grad_norm": 0.9982707500457764, "learning_rate": 0.00014494134897360703, "loss": 0.5821, "step": 6856 }, { "epoch": 1.073418910457107, "grad_norm": 1.031527042388916, "learning_rate": 0.00014491691104594328, "loss": 0.3978, "step": 6857 }, { "epoch": 1.0735754539762055, "grad_norm": 0.7758069038391113, "learning_rate": 0.00014489247311827956, "loss": 0.3313, "step": 6858 }, { "epoch": 1.0737319974953037, "grad_norm": 1.3925272226333618, "learning_rate": 0.00014486803519061584, "loss": 0.2895, "step": 6859 }, { "epoch": 1.073888541014402, "grad_norm": 1.40436851978302, "learning_rate": 0.0001448435972629521, "loss": 0.4096, "step": 6860 }, { "epoch": 1.0740450845335003, "grad_norm": 1.4296326637268066, "learning_rate": 0.00014481915933528834, "loss": 0.4963, "step": 6861 }, { "epoch": 1.0742016280525986, "grad_norm": 1.4773507118225098, "learning_rate": 0.00014479472140762462, "loss": 0.4817, "step": 6862 }, { "epoch": 1.074358171571697, "grad_norm": 1.4051315784454346, "learning_rate": 0.0001447702834799609, "loss": 0.6741, "step": 6863 }, { "epoch": 1.0745147150907952, "grad_norm": 1.041042447090149, "learning_rate": 0.00014474584555229715, "loss": 0.2722, "step": 6864 }, { "epoch": 1.0746712586098937, "grad_norm": 2.3967373371124268, "learning_rate": 0.00014472140762463343, "loss": 0.764, "step": 6865 }, { "epoch": 1.0748278021289919, "grad_norm": 4.08719539642334, "learning_rate": 0.00014469696969696968, "loss": 0.4523, "step": 6866 }, { "epoch": 1.07498434564809, "grad_norm": 1.987392783164978, "learning_rate": 0.00014467253176930596, "loss": 0.5414, "step": 6867 }, { "epoch": 1.0751408891671885, "grad_norm": 3.570604085922241, "learning_rate": 0.0001446480938416422, "loss": 0.8434, "step": 6868 }, { "epoch": 1.0752974326862867, "grad_norm": 7.099319934844971, "learning_rate": 0.0001446236559139785, "loss": 0.7103, "step": 6869 }, { "epoch": 1.0754539762053852, "grad_norm": 3.188019037246704, "learning_rate": 0.00014459921798631474, "loss": 0.5604, "step": 6870 }, { "epoch": 1.0756105197244834, "grad_norm": 1.420037865638733, "learning_rate": 0.00014457478005865102, "loss": 0.5358, "step": 6871 }, { "epoch": 1.0757670632435816, "grad_norm": 1.5740822553634644, "learning_rate": 0.00014455034213098727, "loss": 0.6757, "step": 6872 }, { "epoch": 1.07592360676268, "grad_norm": 2.2425143718719482, "learning_rate": 0.00014452590420332355, "loss": 1.1542, "step": 6873 }, { "epoch": 1.0760801502817783, "grad_norm": 3.6810855865478516, "learning_rate": 0.00014450146627565982, "loss": 0.8133, "step": 6874 }, { "epoch": 1.0762366938008767, "grad_norm": 2.337358236312866, "learning_rate": 0.00014447702834799608, "loss": 0.8714, "step": 6875 }, { "epoch": 1.076393237319975, "grad_norm": 3.2725279331207275, "learning_rate": 0.00014445259042033233, "loss": 0.9102, "step": 6876 }, { "epoch": 1.0765497808390734, "grad_norm": 5.1807355880737305, "learning_rate": 0.0001444281524926686, "loss": 1.4275, "step": 6877 }, { "epoch": 1.0767063243581716, "grad_norm": 3.2198193073272705, "learning_rate": 0.00014440371456500488, "loss": 0.6962, "step": 6878 }, { "epoch": 1.0768628678772698, "grad_norm": 5.173696041107178, "learning_rate": 0.00014437927663734113, "loss": 1.4533, "step": 6879 }, { "epoch": 1.0770194113963683, "grad_norm": 2.3174338340759277, "learning_rate": 0.0001443548387096774, "loss": 1.139, "step": 6880 }, { "epoch": 1.0771759549154665, "grad_norm": 2.363914966583252, "learning_rate": 0.00014433040078201366, "loss": 1.0105, "step": 6881 }, { "epoch": 1.077332498434565, "grad_norm": 2.0799217224121094, "learning_rate": 0.00014430596285434994, "loss": 0.8286, "step": 6882 }, { "epoch": 1.0774890419536631, "grad_norm": 2.2222182750701904, "learning_rate": 0.00014428152492668622, "loss": 0.6723, "step": 6883 }, { "epoch": 1.0776455854727613, "grad_norm": 2.70257568359375, "learning_rate": 0.00014425708699902247, "loss": 0.8287, "step": 6884 }, { "epoch": 1.0778021289918598, "grad_norm": 2.3015530109405518, "learning_rate": 0.00014423264907135872, "loss": 0.3816, "step": 6885 }, { "epoch": 1.077958672510958, "grad_norm": 2.944084644317627, "learning_rate": 0.000144208211143695, "loss": 0.9525, "step": 6886 }, { "epoch": 1.0781152160300564, "grad_norm": 2.2739789485931396, "learning_rate": 0.00014418377321603128, "loss": 0.489, "step": 6887 }, { "epoch": 1.0782717595491547, "grad_norm": 6.7248969078063965, "learning_rate": 0.00014415933528836753, "loss": 0.9746, "step": 6888 }, { "epoch": 1.0784283030682529, "grad_norm": 0.4254947006702423, "learning_rate": 0.0001441348973607038, "loss": 0.2798, "step": 6889 }, { "epoch": 1.0785848465873513, "grad_norm": 0.3661378026008606, "learning_rate": 0.00014411045943304006, "loss": 0.1879, "step": 6890 }, { "epoch": 1.0787413901064495, "grad_norm": 0.5312639474868774, "learning_rate": 0.0001440860215053763, "loss": 0.1684, "step": 6891 }, { "epoch": 1.078897933625548, "grad_norm": 0.9988065361976624, "learning_rate": 0.0001440615835777126, "loss": 0.2838, "step": 6892 }, { "epoch": 1.0790544771446462, "grad_norm": 0.5143894553184509, "learning_rate": 0.00014403714565004887, "loss": 0.2015, "step": 6893 }, { "epoch": 1.0792110206637444, "grad_norm": 0.6396801471710205, "learning_rate": 0.00014401270772238512, "loss": 0.2814, "step": 6894 }, { "epoch": 1.0793675641828429, "grad_norm": 0.8477953672409058, "learning_rate": 0.0001439882697947214, "loss": 0.3326, "step": 6895 }, { "epoch": 1.079524107701941, "grad_norm": 0.47463440895080566, "learning_rate": 0.00014396383186705765, "loss": 0.1656, "step": 6896 }, { "epoch": 1.0796806512210395, "grad_norm": 0.7857781052589417, "learning_rate": 0.00014393939393939393, "loss": 0.2643, "step": 6897 }, { "epoch": 1.0798371947401377, "grad_norm": 0.7944388389587402, "learning_rate": 0.0001439149560117302, "loss": 0.3092, "step": 6898 }, { "epoch": 1.0799937382592362, "grad_norm": 1.1828808784484863, "learning_rate": 0.00014389051808406646, "loss": 0.2997, "step": 6899 }, { "epoch": 1.0801502817783344, "grad_norm": 0.6055023074150085, "learning_rate": 0.0001438660801564027, "loss": 0.2784, "step": 6900 }, { "epoch": 1.0803068252974326, "grad_norm": 0.6134934425354004, "learning_rate": 0.000143841642228739, "loss": 0.3366, "step": 6901 }, { "epoch": 1.080463368816531, "grad_norm": 1.3209537267684937, "learning_rate": 0.00014381720430107527, "loss": 0.5705, "step": 6902 }, { "epoch": 1.0806199123356293, "grad_norm": 0.7764045000076294, "learning_rate": 0.00014379276637341152, "loss": 0.3105, "step": 6903 }, { "epoch": 1.0807764558547277, "grad_norm": 1.349913477897644, "learning_rate": 0.0001437683284457478, "loss": 0.2681, "step": 6904 }, { "epoch": 1.080932999373826, "grad_norm": 0.920647919178009, "learning_rate": 0.00014374389051808405, "loss": 0.3003, "step": 6905 }, { "epoch": 1.0810895428929241, "grad_norm": 1.2609316110610962, "learning_rate": 0.00014371945259042033, "loss": 0.3485, "step": 6906 }, { "epoch": 1.0812460864120226, "grad_norm": 1.3429254293441772, "learning_rate": 0.0001436950146627566, "loss": 0.4673, "step": 6907 }, { "epoch": 1.0814026299311208, "grad_norm": 1.207157850265503, "learning_rate": 0.00014367057673509285, "loss": 0.4167, "step": 6908 }, { "epoch": 1.0815591734502192, "grad_norm": 0.8418388366699219, "learning_rate": 0.0001436461388074291, "loss": 0.4355, "step": 6909 }, { "epoch": 1.0817157169693175, "grad_norm": 1.242262363433838, "learning_rate": 0.00014362170087976538, "loss": 0.627, "step": 6910 }, { "epoch": 1.081872260488416, "grad_norm": 2.378530979156494, "learning_rate": 0.00014359726295210166, "loss": 0.9752, "step": 6911 }, { "epoch": 1.0820288040075141, "grad_norm": 2.7604053020477295, "learning_rate": 0.00014357282502443791, "loss": 0.8568, "step": 6912 }, { "epoch": 1.0821853475266123, "grad_norm": 1.2572295665740967, "learning_rate": 0.0001435483870967742, "loss": 0.6182, "step": 6913 }, { "epoch": 1.0823418910457108, "grad_norm": 1.6974389553070068, "learning_rate": 0.00014352394916911044, "loss": 0.6046, "step": 6914 }, { "epoch": 1.082498434564809, "grad_norm": 2.8349547386169434, "learning_rate": 0.0001434995112414467, "loss": 0.7052, "step": 6915 }, { "epoch": 1.0826549780839074, "grad_norm": 3.3643503189086914, "learning_rate": 0.00014347507331378297, "loss": 0.8496, "step": 6916 }, { "epoch": 1.0828115216030056, "grad_norm": 2.5238869190216064, "learning_rate": 0.00014345063538611925, "loss": 0.9502, "step": 6917 }, { "epoch": 1.0829680651221039, "grad_norm": 2.205315113067627, "learning_rate": 0.0001434261974584555, "loss": 1.0775, "step": 6918 }, { "epoch": 1.0831246086412023, "grad_norm": 5.002822399139404, "learning_rate": 0.00014340175953079178, "loss": 0.9507, "step": 6919 }, { "epoch": 1.0832811521603005, "grad_norm": 3.2781922817230225, "learning_rate": 0.00014337732160312803, "loss": 0.8607, "step": 6920 }, { "epoch": 1.083437695679399, "grad_norm": 1.792444109916687, "learning_rate": 0.0001433528836754643, "loss": 0.9261, "step": 6921 }, { "epoch": 1.0835942391984972, "grad_norm": 2.4402613639831543, "learning_rate": 0.0001433284457478006, "loss": 0.9687, "step": 6922 }, { "epoch": 1.0837507827175954, "grad_norm": 2.88875412940979, "learning_rate": 0.00014330400782013684, "loss": 1.0102, "step": 6923 }, { "epoch": 1.0839073262366938, "grad_norm": 2.4586126804351807, "learning_rate": 0.0001432795698924731, "loss": 0.962, "step": 6924 }, { "epoch": 1.084063869755792, "grad_norm": 2.9678328037261963, "learning_rate": 0.00014325513196480937, "loss": 1.0995, "step": 6925 }, { "epoch": 1.0842204132748905, "grad_norm": 2.3805899620056152, "learning_rate": 0.00014323069403714565, "loss": 0.9321, "step": 6926 }, { "epoch": 1.0843769567939887, "grad_norm": 2.5837199687957764, "learning_rate": 0.0001432062561094819, "loss": 0.7621, "step": 6927 }, { "epoch": 1.084533500313087, "grad_norm": 2.366360664367676, "learning_rate": 0.00014318181818181818, "loss": 0.9827, "step": 6928 }, { "epoch": 1.0846900438321854, "grad_norm": 3.093599796295166, "learning_rate": 0.00014315738025415443, "loss": 1.1201, "step": 6929 }, { "epoch": 1.0848465873512836, "grad_norm": 3.7076659202575684, "learning_rate": 0.0001431329423264907, "loss": 1.1055, "step": 6930 }, { "epoch": 1.085003130870382, "grad_norm": 3.0820627212524414, "learning_rate": 0.00014310850439882699, "loss": 1.3536, "step": 6931 }, { "epoch": 1.0851596743894802, "grad_norm": 8.682129859924316, "learning_rate": 0.00014308406647116324, "loss": 1.7647, "step": 6932 }, { "epoch": 1.0853162179085787, "grad_norm": 2.9856128692626953, "learning_rate": 0.0001430596285434995, "loss": 1.6023, "step": 6933 }, { "epoch": 1.085472761427677, "grad_norm": 2.074152708053589, "learning_rate": 0.00014303519061583577, "loss": 0.2357, "step": 6934 }, { "epoch": 1.0856293049467751, "grad_norm": 3.0086069107055664, "learning_rate": 0.00014301075268817202, "loss": 0.9179, "step": 6935 }, { "epoch": 1.0857858484658736, "grad_norm": 4.519247531890869, "learning_rate": 0.0001429863147605083, "loss": 1.0155, "step": 6936 }, { "epoch": 1.0859423919849718, "grad_norm": 1.4745041131973267, "learning_rate": 0.00014296187683284457, "loss": 0.6405, "step": 6937 }, { "epoch": 1.0860989355040702, "grad_norm": 3.711125135421753, "learning_rate": 0.00014293743890518083, "loss": 0.9687, "step": 6938 }, { "epoch": 1.0862554790231684, "grad_norm": 0.41256627440452576, "learning_rate": 0.00014291300097751708, "loss": 0.2144, "step": 6939 }, { "epoch": 1.0864120225422667, "grad_norm": 0.39318832755088806, "learning_rate": 0.00014288856304985336, "loss": 0.2536, "step": 6940 }, { "epoch": 1.086568566061365, "grad_norm": 0.5901644229888916, "learning_rate": 0.00014286412512218963, "loss": 0.2521, "step": 6941 }, { "epoch": 1.0867251095804633, "grad_norm": 0.5370203852653503, "learning_rate": 0.00014283968719452589, "loss": 0.2123, "step": 6942 }, { "epoch": 1.0868816530995618, "grad_norm": 0.546731173992157, "learning_rate": 0.00014281524926686216, "loss": 0.3049, "step": 6943 }, { "epoch": 1.08703819661866, "grad_norm": 0.36624133586883545, "learning_rate": 0.00014279081133919841, "loss": 0.1984, "step": 6944 }, { "epoch": 1.0871947401377584, "grad_norm": 0.6180028319358826, "learning_rate": 0.0001427663734115347, "loss": 0.2255, "step": 6945 }, { "epoch": 1.0873512836568566, "grad_norm": 0.9784983396530151, "learning_rate": 0.00014274193548387097, "loss": 0.4502, "step": 6946 }, { "epoch": 1.0875078271759548, "grad_norm": 0.5127384066581726, "learning_rate": 0.00014271749755620722, "loss": 0.27, "step": 6947 }, { "epoch": 1.0876643706950533, "grad_norm": 0.6251503825187683, "learning_rate": 0.00014269305962854347, "loss": 0.2674, "step": 6948 }, { "epoch": 1.0878209142141515, "grad_norm": 0.6730063557624817, "learning_rate": 0.00014266862170087975, "loss": 0.31, "step": 6949 }, { "epoch": 1.08797745773325, "grad_norm": 0.7769110798835754, "learning_rate": 0.00014264418377321603, "loss": 0.302, "step": 6950 }, { "epoch": 1.0881340012523482, "grad_norm": 1.0139259099960327, "learning_rate": 0.00014261974584555228, "loss": 0.3961, "step": 6951 }, { "epoch": 1.0882905447714464, "grad_norm": 0.7795376777648926, "learning_rate": 0.00014259530791788856, "loss": 0.2495, "step": 6952 }, { "epoch": 1.0884470882905448, "grad_norm": 0.770647406578064, "learning_rate": 0.0001425708699902248, "loss": 0.4041, "step": 6953 }, { "epoch": 1.088603631809643, "grad_norm": 0.9551957845687866, "learning_rate": 0.0001425464320625611, "loss": 0.2077, "step": 6954 }, { "epoch": 1.0887601753287415, "grad_norm": 0.8338924646377563, "learning_rate": 0.00014252199413489737, "loss": 0.4206, "step": 6955 }, { "epoch": 1.0889167188478397, "grad_norm": 0.7971686720848083, "learning_rate": 0.00014249755620723362, "loss": 0.31, "step": 6956 }, { "epoch": 1.089073262366938, "grad_norm": 1.553481101989746, "learning_rate": 0.00014247311827956987, "loss": 0.6022, "step": 6957 }, { "epoch": 1.0892298058860364, "grad_norm": 1.3536430597305298, "learning_rate": 0.00014244868035190615, "loss": 0.4746, "step": 6958 }, { "epoch": 1.0893863494051346, "grad_norm": 0.8568630814552307, "learning_rate": 0.0001424242424242424, "loss": 0.6675, "step": 6959 }, { "epoch": 1.089542892924233, "grad_norm": 0.9841766357421875, "learning_rate": 0.00014239980449657868, "loss": 0.56, "step": 6960 }, { "epoch": 1.0896994364433312, "grad_norm": 1.8160009384155273, "learning_rate": 0.00014237536656891496, "loss": 0.6362, "step": 6961 }, { "epoch": 1.0898559799624294, "grad_norm": 1.0871392488479614, "learning_rate": 0.0001423509286412512, "loss": 0.387, "step": 6962 }, { "epoch": 1.0900125234815279, "grad_norm": 2.788846731185913, "learning_rate": 0.00014232649071358746, "loss": 0.5088, "step": 6963 }, { "epoch": 1.090169067000626, "grad_norm": 2.411051034927368, "learning_rate": 0.00014230205278592374, "loss": 0.6828, "step": 6964 }, { "epoch": 1.0903256105197245, "grad_norm": 1.6987364292144775, "learning_rate": 0.00014227761485826002, "loss": 0.7937, "step": 6965 }, { "epoch": 1.0904821540388228, "grad_norm": 1.3906645774841309, "learning_rate": 0.00014225317693059627, "loss": 0.3764, "step": 6966 }, { "epoch": 1.0906386975579212, "grad_norm": 1.8853800296783447, "learning_rate": 0.00014222873900293255, "loss": 0.4234, "step": 6967 }, { "epoch": 1.0907952410770194, "grad_norm": 4.4653639793396, "learning_rate": 0.0001422043010752688, "loss": 1.001, "step": 6968 }, { "epoch": 1.0909517845961176, "grad_norm": 1.9520578384399414, "learning_rate": 0.00014217986314760508, "loss": 0.6155, "step": 6969 }, { "epoch": 1.091108328115216, "grad_norm": 2.5212900638580322, "learning_rate": 0.00014215542521994135, "loss": 0.5802, "step": 6970 }, { "epoch": 1.0912648716343143, "grad_norm": 3.541121006011963, "learning_rate": 0.0001421309872922776, "loss": 0.819, "step": 6971 }, { "epoch": 1.0914214151534127, "grad_norm": 4.24415922164917, "learning_rate": 0.00014210654936461386, "loss": 1.0289, "step": 6972 }, { "epoch": 1.091577958672511, "grad_norm": 2.449824094772339, "learning_rate": 0.00014208211143695013, "loss": 1.1641, "step": 6973 }, { "epoch": 1.0917345021916092, "grad_norm": 1.3512892723083496, "learning_rate": 0.0001420576735092864, "loss": 0.4655, "step": 6974 }, { "epoch": 1.0918910457107076, "grad_norm": 3.9181694984436035, "learning_rate": 0.00014203323558162266, "loss": 1.4388, "step": 6975 }, { "epoch": 1.0920475892298058, "grad_norm": 3.194889545440674, "learning_rate": 0.00014200879765395894, "loss": 1.5037, "step": 6976 }, { "epoch": 1.0922041327489043, "grad_norm": 3.483785629272461, "learning_rate": 0.0001419843597262952, "loss": 0.8793, "step": 6977 }, { "epoch": 1.0923606762680025, "grad_norm": 2.2053303718566895, "learning_rate": 0.00014195992179863147, "loss": 0.8187, "step": 6978 }, { "epoch": 1.092517219787101, "grad_norm": 3.390990734100342, "learning_rate": 0.00014193548387096772, "loss": 0.9644, "step": 6979 }, { "epoch": 1.0926737633061991, "grad_norm": 3.826531171798706, "learning_rate": 0.000141911045943304, "loss": 1.2942, "step": 6980 }, { "epoch": 1.0928303068252974, "grad_norm": 2.1854097843170166, "learning_rate": 0.00014188660801564025, "loss": 1.08, "step": 6981 }, { "epoch": 1.0929868503443958, "grad_norm": 1.7453972101211548, "learning_rate": 0.00014186217008797653, "loss": 1.066, "step": 6982 }, { "epoch": 1.093143393863494, "grad_norm": 1.322583556175232, "learning_rate": 0.00014183773216031278, "loss": 1.0875, "step": 6983 }, { "epoch": 1.0932999373825925, "grad_norm": 3.8100578784942627, "learning_rate": 0.00014181329423264906, "loss": 0.9858, "step": 6984 }, { "epoch": 1.0934564809016907, "grad_norm": 1.9491938352584839, "learning_rate": 0.00014178885630498534, "loss": 1.0154, "step": 6985 }, { "epoch": 1.093613024420789, "grad_norm": 1.2058466672897339, "learning_rate": 0.0001417644183773216, "loss": 0.5821, "step": 6986 }, { "epoch": 1.0937695679398873, "grad_norm": 1.4509241580963135, "learning_rate": 0.00014173998044965784, "loss": 0.3841, "step": 6987 }, { "epoch": 1.0939261114589856, "grad_norm": 1.7737778425216675, "learning_rate": 0.00014171554252199412, "loss": 0.772, "step": 6988 }, { "epoch": 1.094082654978084, "grad_norm": 0.6553102135658264, "learning_rate": 0.0001416911045943304, "loss": 0.2943, "step": 6989 }, { "epoch": 1.0942391984971822, "grad_norm": 0.4766181409358978, "learning_rate": 0.00014166666666666665, "loss": 0.1739, "step": 6990 }, { "epoch": 1.0943957420162804, "grad_norm": 0.475190132856369, "learning_rate": 0.00014164222873900293, "loss": 0.229, "step": 6991 }, { "epoch": 1.0945522855353789, "grad_norm": 0.4964744448661804, "learning_rate": 0.00014161779081133918, "loss": 0.2992, "step": 6992 }, { "epoch": 1.094708829054477, "grad_norm": 0.7098869681358337, "learning_rate": 0.00014159335288367546, "loss": 0.3533, "step": 6993 }, { "epoch": 1.0948653725735755, "grad_norm": 0.6949512958526611, "learning_rate": 0.00014156891495601174, "loss": 0.3115, "step": 6994 }, { "epoch": 1.0950219160926737, "grad_norm": 1.074985384941101, "learning_rate": 0.000141544477028348, "loss": 0.2106, "step": 6995 }, { "epoch": 1.095178459611772, "grad_norm": 1.048966884613037, "learning_rate": 0.00014152003910068424, "loss": 0.2574, "step": 6996 }, { "epoch": 1.0953350031308704, "grad_norm": 0.6164217591285706, "learning_rate": 0.00014149560117302052, "loss": 0.2964, "step": 6997 }, { "epoch": 1.0954915466499686, "grad_norm": 0.6958305239677429, "learning_rate": 0.0001414711632453568, "loss": 0.2955, "step": 6998 }, { "epoch": 1.095648090169067, "grad_norm": 0.7299469709396362, "learning_rate": 0.00014144672531769305, "loss": 0.332, "step": 6999 }, { "epoch": 1.0958046336881653, "grad_norm": 2.961721181869507, "learning_rate": 0.00014142228739002932, "loss": 0.3274, "step": 7000 }, { "epoch": 1.0958046336881653, "eval_loss": 0.524387538433075, "eval_runtime": 205.3671, "eval_samples_per_second": 60.297, "eval_steps_per_second": 3.769, "eval_wer": 0.33754807998809955, "step": 7000 }, { "epoch": 1.0959611772072637, "grad_norm": 0.8560186624526978, "learning_rate": 0.00014139784946236558, "loss": 0.2684, "step": 7001 }, { "epoch": 1.096117720726362, "grad_norm": 0.9131744503974915, "learning_rate": 0.00014137341153470185, "loss": 0.3631, "step": 7002 }, { "epoch": 1.0962742642454602, "grad_norm": 2.9472861289978027, "learning_rate": 0.0001413489736070381, "loss": 0.7318, "step": 7003 }, { "epoch": 1.0964308077645586, "grad_norm": 1.0932270288467407, "learning_rate": 0.00014132453567937438, "loss": 0.3798, "step": 7004 }, { "epoch": 1.0965873512836568, "grad_norm": 1.729344367980957, "learning_rate": 0.00014130009775171064, "loss": 0.6468, "step": 7005 }, { "epoch": 1.0967438948027552, "grad_norm": 2.079954147338867, "learning_rate": 0.00014127565982404691, "loss": 0.4306, "step": 7006 }, { "epoch": 1.0969004383218535, "grad_norm": 0.8679769039154053, "learning_rate": 0.00014125122189638316, "loss": 0.4454, "step": 7007 }, { "epoch": 1.0970569818409517, "grad_norm": 0.751891553401947, "learning_rate": 0.00014122678396871944, "loss": 0.3538, "step": 7008 }, { "epoch": 1.0972135253600501, "grad_norm": 2.451910972595215, "learning_rate": 0.00014120234604105572, "loss": 0.3732, "step": 7009 }, { "epoch": 1.0973700688791483, "grad_norm": 1.3220970630645752, "learning_rate": 0.00014117790811339197, "loss": 0.6134, "step": 7010 }, { "epoch": 1.0975266123982468, "grad_norm": 2.3990230560302734, "learning_rate": 0.00014115347018572822, "loss": 0.5123, "step": 7011 }, { "epoch": 1.097683155917345, "grad_norm": 3.5617446899414062, "learning_rate": 0.0001411290322580645, "loss": 0.8965, "step": 7012 }, { "epoch": 1.0978396994364434, "grad_norm": 1.2367515563964844, "learning_rate": 0.00014110459433040078, "loss": 0.4453, "step": 7013 }, { "epoch": 1.0979962429555417, "grad_norm": 1.6141263246536255, "learning_rate": 0.00014108015640273703, "loss": 0.6857, "step": 7014 }, { "epoch": 1.0981527864746399, "grad_norm": 1.6750285625457764, "learning_rate": 0.0001410557184750733, "loss": 0.8078, "step": 7015 }, { "epoch": 1.0983093299937383, "grad_norm": 1.4285156726837158, "learning_rate": 0.00014103128054740956, "loss": 0.3836, "step": 7016 }, { "epoch": 1.0984658735128365, "grad_norm": 2.9705681800842285, "learning_rate": 0.00014100684261974584, "loss": 0.4486, "step": 7017 }, { "epoch": 1.098622417031935, "grad_norm": 1.3870073556900024, "learning_rate": 0.00014098240469208212, "loss": 0.5095, "step": 7018 }, { "epoch": 1.0987789605510332, "grad_norm": 1.9252225160598755, "learning_rate": 0.00014095796676441837, "loss": 0.3421, "step": 7019 }, { "epoch": 1.0989355040701314, "grad_norm": 3.1899898052215576, "learning_rate": 0.00014093352883675462, "loss": 0.9082, "step": 7020 }, { "epoch": 1.0990920475892298, "grad_norm": 2.3937432765960693, "learning_rate": 0.0001409090909090909, "loss": 1.0056, "step": 7021 }, { "epoch": 1.099248591108328, "grad_norm": 1.9584646224975586, "learning_rate": 0.00014088465298142718, "loss": 0.7778, "step": 7022 }, { "epoch": 1.0994051346274265, "grad_norm": 1.8999119997024536, "learning_rate": 0.00014086021505376343, "loss": 0.7928, "step": 7023 }, { "epoch": 1.0995616781465247, "grad_norm": 1.100942611694336, "learning_rate": 0.0001408357771260997, "loss": 0.3812, "step": 7024 }, { "epoch": 1.099718221665623, "grad_norm": 3.0417444705963135, "learning_rate": 0.00014081133919843596, "loss": 0.8766, "step": 7025 }, { "epoch": 1.0998747651847214, "grad_norm": 3.2357630729675293, "learning_rate": 0.00014078690127077224, "loss": 1.1982, "step": 7026 }, { "epoch": 1.1000313087038196, "grad_norm": 1.8465913534164429, "learning_rate": 0.0001407624633431085, "loss": 1.1896, "step": 7027 }, { "epoch": 1.100187852222918, "grad_norm": 1.5905226469039917, "learning_rate": 0.00014073802541544477, "loss": 0.9382, "step": 7028 }, { "epoch": 1.1003443957420163, "grad_norm": 2.8563072681427, "learning_rate": 0.00014071358748778102, "loss": 1.0176, "step": 7029 }, { "epoch": 1.1005009392611145, "grad_norm": 3.9410860538482666, "learning_rate": 0.0001406891495601173, "loss": 1.497, "step": 7030 }, { "epoch": 1.100657482780213, "grad_norm": 2.426844358444214, "learning_rate": 0.00014066471163245355, "loss": 1.0797, "step": 7031 }, { "epoch": 1.1008140262993111, "grad_norm": 2.4825949668884277, "learning_rate": 0.00014064027370478983, "loss": 1.0159, "step": 7032 }, { "epoch": 1.1009705698184096, "grad_norm": 2.973639965057373, "learning_rate": 0.00014061583577712608, "loss": 1.1596, "step": 7033 }, { "epoch": 1.1011271133375078, "grad_norm": 2.815019369125366, "learning_rate": 0.00014059139784946236, "loss": 0.3454, "step": 7034 }, { "epoch": 1.1012836568566062, "grad_norm": 1.925430178642273, "learning_rate": 0.0001405669599217986, "loss": 0.6438, "step": 7035 }, { "epoch": 1.1014402003757044, "grad_norm": 4.631162166595459, "learning_rate": 0.00014054252199413488, "loss": 0.5383, "step": 7036 }, { "epoch": 1.1015967438948027, "grad_norm": 3.7617523670196533, "learning_rate": 0.00014051808406647116, "loss": 1.449, "step": 7037 }, { "epoch": 1.101753287413901, "grad_norm": 2.4548864364624023, "learning_rate": 0.00014049364613880741, "loss": 1.1796, "step": 7038 }, { "epoch": 1.1019098309329993, "grad_norm": 0.4076089560985565, "learning_rate": 0.00014046920821114367, "loss": 0.2382, "step": 7039 }, { "epoch": 1.1020663744520978, "grad_norm": 0.7186834216117859, "learning_rate": 0.00014044477028347994, "loss": 0.1617, "step": 7040 }, { "epoch": 1.102222917971196, "grad_norm": 0.419718861579895, "learning_rate": 0.00014042033235581622, "loss": 0.1957, "step": 7041 }, { "epoch": 1.1023794614902942, "grad_norm": 0.6931477189064026, "learning_rate": 0.00014039589442815247, "loss": 0.2296, "step": 7042 }, { "epoch": 1.1025360050093926, "grad_norm": 1.0758944749832153, "learning_rate": 0.00014037145650048875, "loss": 0.4824, "step": 7043 }, { "epoch": 1.1026925485284909, "grad_norm": 0.721367359161377, "learning_rate": 0.000140347018572825, "loss": 0.3532, "step": 7044 }, { "epoch": 1.1028490920475893, "grad_norm": 0.8995819091796875, "learning_rate": 0.00014032258064516128, "loss": 0.2786, "step": 7045 }, { "epoch": 1.1030056355666875, "grad_norm": 0.5277887582778931, "learning_rate": 0.00014029814271749756, "loss": 0.2787, "step": 7046 }, { "epoch": 1.103162179085786, "grad_norm": 0.6943791508674622, "learning_rate": 0.0001402737047898338, "loss": 0.2092, "step": 7047 }, { "epoch": 1.1033187226048842, "grad_norm": 1.1070524454116821, "learning_rate": 0.00014024926686217006, "loss": 0.3526, "step": 7048 }, { "epoch": 1.1034752661239824, "grad_norm": 1.660226583480835, "learning_rate": 0.00014022482893450634, "loss": 0.3538, "step": 7049 }, { "epoch": 1.1036318096430808, "grad_norm": 1.7828032970428467, "learning_rate": 0.0001402003910068426, "loss": 0.6886, "step": 7050 }, { "epoch": 1.103788353162179, "grad_norm": 1.3736459016799927, "learning_rate": 0.00014017595307917887, "loss": 0.3213, "step": 7051 }, { "epoch": 1.1039448966812775, "grad_norm": 1.7011991739273071, "learning_rate": 0.00014015151515151515, "loss": 0.3819, "step": 7052 }, { "epoch": 1.1041014402003757, "grad_norm": 1.1513324975967407, "learning_rate": 0.0001401270772238514, "loss": 0.3103, "step": 7053 }, { "epoch": 1.104257983719474, "grad_norm": 0.6520640254020691, "learning_rate": 0.00014010263929618765, "loss": 0.2689, "step": 7054 }, { "epoch": 1.1044145272385724, "grad_norm": 1.3298591375350952, "learning_rate": 0.00014007820136852393, "loss": 0.7632, "step": 7055 }, { "epoch": 1.1045710707576706, "grad_norm": 1.7201464176177979, "learning_rate": 0.0001400537634408602, "loss": 0.3654, "step": 7056 }, { "epoch": 1.104727614276769, "grad_norm": 0.8514449596405029, "learning_rate": 0.00014002932551319646, "loss": 0.3845, "step": 7057 }, { "epoch": 1.1048841577958672, "grad_norm": 1.0151549577713013, "learning_rate": 0.00014000488758553274, "loss": 0.4584, "step": 7058 }, { "epoch": 1.1050407013149655, "grad_norm": 0.9856629967689514, "learning_rate": 0.000139980449657869, "loss": 0.5458, "step": 7059 }, { "epoch": 1.105197244834064, "grad_norm": 0.9040552377700806, "learning_rate": 0.00013995601173020527, "loss": 0.2539, "step": 7060 }, { "epoch": 1.1053537883531621, "grad_norm": 1.2553309202194214, "learning_rate": 0.00013993157380254155, "loss": 0.6513, "step": 7061 }, { "epoch": 1.1055103318722606, "grad_norm": 3.3496036529541016, "learning_rate": 0.0001399071358748778, "loss": 0.6279, "step": 7062 }, { "epoch": 1.1056668753913588, "grad_norm": 2.5765726566314697, "learning_rate": 0.00013988269794721405, "loss": 0.5166, "step": 7063 }, { "epoch": 1.105823418910457, "grad_norm": 1.883778691291809, "learning_rate": 0.00013985826001955033, "loss": 0.4737, "step": 7064 }, { "epoch": 1.1059799624295554, "grad_norm": 0.8956629037857056, "learning_rate": 0.0001398338220918866, "loss": 0.3679, "step": 7065 }, { "epoch": 1.1061365059486536, "grad_norm": 1.2945466041564941, "learning_rate": 0.00013980938416422286, "loss": 0.5382, "step": 7066 }, { "epoch": 1.106293049467752, "grad_norm": 3.4074881076812744, "learning_rate": 0.00013978494623655913, "loss": 0.3423, "step": 7067 }, { "epoch": 1.1064495929868503, "grad_norm": 1.626989722251892, "learning_rate": 0.00013976050830889539, "loss": 0.6275, "step": 7068 }, { "epoch": 1.1066061365059487, "grad_norm": 1.9460794925689697, "learning_rate": 0.00013973607038123166, "loss": 0.7302, "step": 7069 }, { "epoch": 1.106762680025047, "grad_norm": 1.4970815181732178, "learning_rate": 0.00013971163245356794, "loss": 0.6552, "step": 7070 }, { "epoch": 1.1069192235441452, "grad_norm": 1.4219391345977783, "learning_rate": 0.0001396871945259042, "loss": 0.6406, "step": 7071 }, { "epoch": 1.1070757670632436, "grad_norm": 2.354487180709839, "learning_rate": 0.00013966275659824044, "loss": 0.8659, "step": 7072 }, { "epoch": 1.1072323105823418, "grad_norm": 3.323763370513916, "learning_rate": 0.00013963831867057672, "loss": 0.7318, "step": 7073 }, { "epoch": 1.1073888541014403, "grad_norm": 2.702415943145752, "learning_rate": 0.00013961388074291297, "loss": 1.239, "step": 7074 }, { "epoch": 1.1075453976205385, "grad_norm": 2.1730268001556396, "learning_rate": 0.00013958944281524925, "loss": 1.0215, "step": 7075 }, { "epoch": 1.107701941139637, "grad_norm": 1.8554517030715942, "learning_rate": 0.00013956500488758553, "loss": 0.7656, "step": 7076 }, { "epoch": 1.1078584846587352, "grad_norm": 3.0490665435791016, "learning_rate": 0.00013954056695992178, "loss": 0.793, "step": 7077 }, { "epoch": 1.1080150281778334, "grad_norm": 2.6447458267211914, "learning_rate": 0.00013951612903225803, "loss": 1.0286, "step": 7078 }, { "epoch": 1.1081715716969318, "grad_norm": 0.89915931224823, "learning_rate": 0.0001394916911045943, "loss": 0.5665, "step": 7079 }, { "epoch": 1.10832811521603, "grad_norm": 1.9024105072021484, "learning_rate": 0.0001394672531769306, "loss": 0.6443, "step": 7080 }, { "epoch": 1.1084846587351285, "grad_norm": 1.5853981971740723, "learning_rate": 0.00013944281524926684, "loss": 1.179, "step": 7081 }, { "epoch": 1.1086412022542267, "grad_norm": 3.08502197265625, "learning_rate": 0.00013941837732160312, "loss": 0.9776, "step": 7082 }, { "epoch": 1.108797745773325, "grad_norm": 3.0830800533294678, "learning_rate": 0.00013939393939393937, "loss": 1.2178, "step": 7083 }, { "epoch": 1.1089542892924233, "grad_norm": 2.8586432933807373, "learning_rate": 0.00013936950146627565, "loss": 1.0084, "step": 7084 }, { "epoch": 1.1091108328115216, "grad_norm": 1.6677206754684448, "learning_rate": 0.00013934506353861193, "loss": 0.5648, "step": 7085 }, { "epoch": 1.10926737633062, "grad_norm": 2.1359286308288574, "learning_rate": 0.00013932062561094818, "loss": 0.6297, "step": 7086 }, { "epoch": 1.1094239198497182, "grad_norm": 6.148825168609619, "learning_rate": 0.00013929618768328443, "loss": 0.9579, "step": 7087 }, { "epoch": 1.1095804633688164, "grad_norm": 1.7311344146728516, "learning_rate": 0.0001392717497556207, "loss": 0.6473, "step": 7088 }, { "epoch": 1.1097370068879149, "grad_norm": 0.3622504472732544, "learning_rate": 0.000139247311827957, "loss": 0.1864, "step": 7089 }, { "epoch": 1.109893550407013, "grad_norm": 0.5219933986663818, "learning_rate": 0.00013922287390029324, "loss": 0.189, "step": 7090 }, { "epoch": 1.1100500939261115, "grad_norm": 0.3946225345134735, "learning_rate": 0.00013919843597262952, "loss": 0.2183, "step": 7091 }, { "epoch": 1.1102066374452098, "grad_norm": 0.47833681106567383, "learning_rate": 0.00013917399804496577, "loss": 0.155, "step": 7092 }, { "epoch": 1.110363180964308, "grad_norm": 0.4084630608558655, "learning_rate": 0.00013914956011730205, "loss": 0.1816, "step": 7093 }, { "epoch": 1.1105197244834064, "grad_norm": 0.45835641026496887, "learning_rate": 0.0001391251221896383, "loss": 0.2926, "step": 7094 }, { "epoch": 1.1106762680025046, "grad_norm": 0.5102660655975342, "learning_rate": 0.00013910068426197458, "loss": 0.2767, "step": 7095 }, { "epoch": 1.110832811521603, "grad_norm": 0.914391040802002, "learning_rate": 0.00013907624633431083, "loss": 0.2147, "step": 7096 }, { "epoch": 1.1109893550407013, "grad_norm": 0.6715912222862244, "learning_rate": 0.0001390518084066471, "loss": 0.2518, "step": 7097 }, { "epoch": 1.1111458985597997, "grad_norm": 0.5033890604972839, "learning_rate": 0.00013902737047898336, "loss": 0.3206, "step": 7098 }, { "epoch": 1.111302442078898, "grad_norm": 0.7809054851531982, "learning_rate": 0.00013900293255131963, "loss": 0.2376, "step": 7099 }, { "epoch": 1.1114589855979962, "grad_norm": 0.5782076120376587, "learning_rate": 0.0001389784946236559, "loss": 0.1966, "step": 7100 }, { "epoch": 1.1116155291170946, "grad_norm": 1.2228553295135498, "learning_rate": 0.00013895405669599216, "loss": 0.3316, "step": 7101 }, { "epoch": 1.1117720726361928, "grad_norm": 2.8982632160186768, "learning_rate": 0.00013892961876832842, "loss": 0.5064, "step": 7102 }, { "epoch": 1.1119286161552913, "grad_norm": 1.0576456785202026, "learning_rate": 0.0001389051808406647, "loss": 0.3983, "step": 7103 }, { "epoch": 1.1120851596743895, "grad_norm": 1.5845671892166138, "learning_rate": 0.00013888074291300097, "loss": 0.5427, "step": 7104 }, { "epoch": 1.1122417031934877, "grad_norm": 0.8153006434440613, "learning_rate": 0.00013885630498533722, "loss": 0.4551, "step": 7105 }, { "epoch": 1.1123982467125861, "grad_norm": 1.2537267208099365, "learning_rate": 0.0001388318670576735, "loss": 0.5381, "step": 7106 }, { "epoch": 1.1125547902316844, "grad_norm": 1.0054357051849365, "learning_rate": 0.00013880742913000975, "loss": 0.3709, "step": 7107 }, { "epoch": 1.1127113337507828, "grad_norm": 3.644198179244995, "learning_rate": 0.00013878299120234603, "loss": 0.7605, "step": 7108 }, { "epoch": 1.112867877269881, "grad_norm": 1.2919872999191284, "learning_rate": 0.0001387585532746823, "loss": 0.2972, "step": 7109 }, { "epoch": 1.1130244207889795, "grad_norm": 1.0998269319534302, "learning_rate": 0.00013873411534701856, "loss": 0.4665, "step": 7110 }, { "epoch": 1.1131809643080777, "grad_norm": 3.0610947608947754, "learning_rate": 0.0001387096774193548, "loss": 0.6518, "step": 7111 }, { "epoch": 1.1133375078271759, "grad_norm": 2.023725986480713, "learning_rate": 0.0001386852394916911, "loss": 0.5192, "step": 7112 }, { "epoch": 1.1134940513462743, "grad_norm": 2.0220577716827393, "learning_rate": 0.00013866080156402737, "loss": 0.5305, "step": 7113 }, { "epoch": 1.1136505948653725, "grad_norm": 1.242210030555725, "learning_rate": 0.00013863636363636362, "loss": 0.4925, "step": 7114 }, { "epoch": 1.113807138384471, "grad_norm": 1.3182028532028198, "learning_rate": 0.0001386119257086999, "loss": 0.4868, "step": 7115 }, { "epoch": 1.1139636819035692, "grad_norm": 1.934834361076355, "learning_rate": 0.00013858748778103615, "loss": 0.4217, "step": 7116 }, { "epoch": 1.1141202254226674, "grad_norm": 1.3865692615509033, "learning_rate": 0.00013856304985337243, "loss": 0.5035, "step": 7117 }, { "epoch": 1.1142767689417659, "grad_norm": 1.6283689737319946, "learning_rate": 0.00013853861192570868, "loss": 1.0412, "step": 7118 }, { "epoch": 1.114433312460864, "grad_norm": 1.3820260763168335, "learning_rate": 0.00013851417399804496, "loss": 1.0473, "step": 7119 }, { "epoch": 1.1145898559799625, "grad_norm": 1.8468633890151978, "learning_rate": 0.0001384897360703812, "loss": 1.0301, "step": 7120 }, { "epoch": 1.1147463994990607, "grad_norm": 1.0981626510620117, "learning_rate": 0.0001384652981427175, "loss": 0.4976, "step": 7121 }, { "epoch": 1.114902943018159, "grad_norm": 1.9592245817184448, "learning_rate": 0.00013844086021505374, "loss": 0.6123, "step": 7122 }, { "epoch": 1.1150594865372574, "grad_norm": 4.474161624908447, "learning_rate": 0.00013841642228739002, "loss": 1.0584, "step": 7123 }, { "epoch": 1.1152160300563556, "grad_norm": 2.5803914070129395, "learning_rate": 0.0001383919843597263, "loss": 0.6644, "step": 7124 }, { "epoch": 1.115372573575454, "grad_norm": 1.8294001817703247, "learning_rate": 0.00013836754643206255, "loss": 0.8667, "step": 7125 }, { "epoch": 1.1155291170945523, "grad_norm": 3.3992698192596436, "learning_rate": 0.0001383431085043988, "loss": 0.8291, "step": 7126 }, { "epoch": 1.1156856606136505, "grad_norm": 2.8989360332489014, "learning_rate": 0.00013831867057673508, "loss": 0.9821, "step": 7127 }, { "epoch": 1.115842204132749, "grad_norm": 2.251101016998291, "learning_rate": 0.00013829423264907135, "loss": 0.6455, "step": 7128 }, { "epoch": 1.1159987476518471, "grad_norm": 4.921138763427734, "learning_rate": 0.0001382697947214076, "loss": 1.1246, "step": 7129 }, { "epoch": 1.1161552911709456, "grad_norm": 1.7035528421401978, "learning_rate": 0.00013824535679374388, "loss": 0.997, "step": 7130 }, { "epoch": 1.1163118346900438, "grad_norm": 5.489228248596191, "learning_rate": 0.00013822091886608014, "loss": 0.9107, "step": 7131 }, { "epoch": 1.1164683782091422, "grad_norm": 3.2178499698638916, "learning_rate": 0.00013819648093841641, "loss": 0.7538, "step": 7132 }, { "epoch": 1.1166249217282405, "grad_norm": 2.4438555240631104, "learning_rate": 0.0001381720430107527, "loss": 0.8375, "step": 7133 }, { "epoch": 1.1167814652473387, "grad_norm": 3.7935683727264404, "learning_rate": 0.00013814760508308894, "loss": 1.1526, "step": 7134 }, { "epoch": 1.1169380087664371, "grad_norm": 1.034727931022644, "learning_rate": 0.0001381231671554252, "loss": 0.2834, "step": 7135 }, { "epoch": 1.1170945522855353, "grad_norm": 5.797002792358398, "learning_rate": 0.00013809872922776147, "loss": 0.4101, "step": 7136 }, { "epoch": 1.1172510958046338, "grad_norm": 1.3776600360870361, "learning_rate": 0.00013807429130009775, "loss": 0.589, "step": 7137 }, { "epoch": 1.117407639323732, "grad_norm": 2.727193832397461, "learning_rate": 0.000138049853372434, "loss": 1.4797, "step": 7138 }, { "epoch": 1.1175641828428302, "grad_norm": 0.7956806421279907, "learning_rate": 0.00013802541544477028, "loss": 0.3785, "step": 7139 }, { "epoch": 1.1177207263619287, "grad_norm": 0.5914304256439209, "learning_rate": 0.00013800097751710653, "loss": 0.2568, "step": 7140 }, { "epoch": 1.1178772698810269, "grad_norm": 0.49332305788993835, "learning_rate": 0.00013797653958944278, "loss": 0.2745, "step": 7141 }, { "epoch": 1.1180338134001253, "grad_norm": 0.7752807140350342, "learning_rate": 0.00013795210166177906, "loss": 0.1904, "step": 7142 }, { "epoch": 1.1181903569192235, "grad_norm": 0.6126279234886169, "learning_rate": 0.00013792766373411534, "loss": 0.2937, "step": 7143 }, { "epoch": 1.118346900438322, "grad_norm": 0.710114061832428, "learning_rate": 0.0001379032258064516, "loss": 0.2494, "step": 7144 }, { "epoch": 1.1185034439574202, "grad_norm": 0.5817550420761108, "learning_rate": 0.00013787878787878787, "loss": 0.1989, "step": 7145 }, { "epoch": 1.1186599874765184, "grad_norm": 0.6137755513191223, "learning_rate": 0.00013785434995112412, "loss": 0.2376, "step": 7146 }, { "epoch": 1.1188165309956168, "grad_norm": 0.6869461536407471, "learning_rate": 0.0001378299120234604, "loss": 0.2195, "step": 7147 }, { "epoch": 1.118973074514715, "grad_norm": 0.5993041396141052, "learning_rate": 0.00013780547409579668, "loss": 0.1858, "step": 7148 }, { "epoch": 1.1191296180338135, "grad_norm": 0.7191832065582275, "learning_rate": 0.00013778103616813293, "loss": 0.3344, "step": 7149 }, { "epoch": 1.1192861615529117, "grad_norm": 8.964902877807617, "learning_rate": 0.00013775659824046918, "loss": 2.0161, "step": 7150 }, { "epoch": 1.11944270507201, "grad_norm": 0.8494324088096619, "learning_rate": 0.00013773216031280546, "loss": 0.3409, "step": 7151 }, { "epoch": 1.1195992485911084, "grad_norm": 2.661954402923584, "learning_rate": 0.00013770772238514174, "loss": 0.3471, "step": 7152 }, { "epoch": 1.1197557921102066, "grad_norm": 1.1184935569763184, "learning_rate": 0.000137683284457478, "loss": 0.3072, "step": 7153 }, { "epoch": 1.119912335629305, "grad_norm": 0.8442760109901428, "learning_rate": 0.00013765884652981427, "loss": 0.3564, "step": 7154 }, { "epoch": 1.1200688791484033, "grad_norm": 1.7543659210205078, "learning_rate": 0.00013763440860215052, "loss": 0.537, "step": 7155 }, { "epoch": 1.1202254226675015, "grad_norm": 4.0782060623168945, "learning_rate": 0.0001376099706744868, "loss": 0.5696, "step": 7156 }, { "epoch": 1.1203819661866, "grad_norm": 1.1545088291168213, "learning_rate": 0.00013758553274682307, "loss": 0.4484, "step": 7157 }, { "epoch": 1.1205385097056981, "grad_norm": 1.2286568880081177, "learning_rate": 0.00013756109481915933, "loss": 0.5164, "step": 7158 }, { "epoch": 1.1206950532247966, "grad_norm": 1.0465517044067383, "learning_rate": 0.00013753665689149558, "loss": 0.3714, "step": 7159 }, { "epoch": 1.1208515967438948, "grad_norm": 1.1569772958755493, "learning_rate": 0.00013751221896383186, "loss": 0.2982, "step": 7160 }, { "epoch": 1.121008140262993, "grad_norm": 2.364917755126953, "learning_rate": 0.00013748778103616813, "loss": 0.5142, "step": 7161 }, { "epoch": 1.1211646837820914, "grad_norm": 1.6103262901306152, "learning_rate": 0.00013746334310850439, "loss": 0.5519, "step": 7162 }, { "epoch": 1.1213212273011897, "grad_norm": 2.0225682258605957, "learning_rate": 0.00013743890518084066, "loss": 0.4566, "step": 7163 }, { "epoch": 1.121477770820288, "grad_norm": 1.9558186531066895, "learning_rate": 0.00013741446725317691, "loss": 0.4267, "step": 7164 }, { "epoch": 1.1216343143393863, "grad_norm": 3.2678587436676025, "learning_rate": 0.00013739002932551317, "loss": 0.6969, "step": 7165 }, { "epoch": 1.1217908578584848, "grad_norm": 1.560784101486206, "learning_rate": 0.00013736559139784944, "loss": 0.6032, "step": 7166 }, { "epoch": 1.121947401377583, "grad_norm": 1.4639748334884644, "learning_rate": 0.00013734115347018572, "loss": 0.3596, "step": 7167 }, { "epoch": 1.1221039448966812, "grad_norm": 1.3413069248199463, "learning_rate": 0.00013731671554252197, "loss": 0.7597, "step": 7168 }, { "epoch": 1.1222604884157796, "grad_norm": 2.400970935821533, "learning_rate": 0.00013729227761485825, "loss": 0.8581, "step": 7169 }, { "epoch": 1.1224170319348779, "grad_norm": 1.9984264373779297, "learning_rate": 0.0001372678396871945, "loss": 0.5273, "step": 7170 }, { "epoch": 1.1225735754539763, "grad_norm": 2.334470510482788, "learning_rate": 0.00013724340175953078, "loss": 1.0934, "step": 7171 }, { "epoch": 1.1227301189730745, "grad_norm": 6.798165321350098, "learning_rate": 0.00013721896383186706, "loss": 1.0502, "step": 7172 }, { "epoch": 1.1228866624921727, "grad_norm": 3.1501338481903076, "learning_rate": 0.0001371945259042033, "loss": 0.806, "step": 7173 }, { "epoch": 1.1230432060112712, "grad_norm": 5.227148056030273, "learning_rate": 0.00013717008797653956, "loss": 1.0714, "step": 7174 }, { "epoch": 1.1231997495303694, "grad_norm": 3.071563482284546, "learning_rate": 0.00013714565004887584, "loss": 1.4736, "step": 7175 }, { "epoch": 1.1233562930494678, "grad_norm": 4.112018585205078, "learning_rate": 0.00013712121212121212, "loss": 1.5653, "step": 7176 }, { "epoch": 1.123512836568566, "grad_norm": 3.3567934036254883, "learning_rate": 0.00013709677419354837, "loss": 0.8757, "step": 7177 }, { "epoch": 1.1236693800876645, "grad_norm": 2.7976365089416504, "learning_rate": 0.00013707233626588465, "loss": 0.7167, "step": 7178 }, { "epoch": 1.1238259236067627, "grad_norm": 3.194066286087036, "learning_rate": 0.0001370478983382209, "loss": 1.4257, "step": 7179 }, { "epoch": 1.123982467125861, "grad_norm": 5.53701114654541, "learning_rate": 0.00013702346041055718, "loss": 0.8894, "step": 7180 }, { "epoch": 1.1241390106449594, "grad_norm": 1.579353928565979, "learning_rate": 0.00013699902248289346, "loss": 1.0472, "step": 7181 }, { "epoch": 1.1242955541640576, "grad_norm": 2.461728811264038, "learning_rate": 0.0001369745845552297, "loss": 1.0859, "step": 7182 }, { "epoch": 1.124452097683156, "grad_norm": 4.658527851104736, "learning_rate": 0.00013695014662756596, "loss": 0.7208, "step": 7183 }, { "epoch": 1.1246086412022542, "grad_norm": 2.3477227687835693, "learning_rate": 0.00013692570869990224, "loss": 1.3767, "step": 7184 }, { "epoch": 1.1247651847213525, "grad_norm": 1.8970081806182861, "learning_rate": 0.00013690127077223852, "loss": 1.0941, "step": 7185 }, { "epoch": 1.124921728240451, "grad_norm": 5.8194355964660645, "learning_rate": 0.00013687683284457477, "loss": 0.7754, "step": 7186 }, { "epoch": 1.125078271759549, "grad_norm": 2.781679153442383, "learning_rate": 0.00013685239491691105, "loss": 1.0529, "step": 7187 }, { "epoch": 1.1252348152786475, "grad_norm": 3.368130683898926, "learning_rate": 0.0001368279569892473, "loss": 1.2293, "step": 7188 }, { "epoch": 1.1253913587977458, "grad_norm": 0.35694655776023865, "learning_rate": 0.00013680351906158355, "loss": 0.1819, "step": 7189 }, { "epoch": 1.125547902316844, "grad_norm": 0.6090511083602905, "learning_rate": 0.00013677908113391983, "loss": 0.4191, "step": 7190 }, { "epoch": 1.1257044458359424, "grad_norm": 0.5560696721076965, "learning_rate": 0.0001367546432062561, "loss": 0.202, "step": 7191 }, { "epoch": 1.1258609893550406, "grad_norm": 0.4044315218925476, "learning_rate": 0.00013673020527859236, "loss": 0.1424, "step": 7192 }, { "epoch": 1.126017532874139, "grad_norm": 0.5974329113960266, "learning_rate": 0.00013670576735092863, "loss": 0.2354, "step": 7193 }, { "epoch": 1.1261740763932373, "grad_norm": 0.4592570960521698, "learning_rate": 0.00013668132942326489, "loss": 0.3103, "step": 7194 }, { "epoch": 1.1263306199123355, "grad_norm": 0.8667699694633484, "learning_rate": 0.00013665689149560116, "loss": 0.6261, "step": 7195 }, { "epoch": 1.126487163431434, "grad_norm": 0.5696028470993042, "learning_rate": 0.00013663245356793744, "loss": 0.2512, "step": 7196 }, { "epoch": 1.1266437069505322, "grad_norm": 0.41011011600494385, "learning_rate": 0.0001366080156402737, "loss": 0.1274, "step": 7197 }, { "epoch": 1.1268002504696306, "grad_norm": 0.686440646648407, "learning_rate": 0.00013658357771260995, "loss": 0.3106, "step": 7198 }, { "epoch": 1.1269567939887288, "grad_norm": 0.823204755783081, "learning_rate": 0.00013655913978494622, "loss": 0.2809, "step": 7199 }, { "epoch": 1.127113337507827, "grad_norm": 0.6141254901885986, "learning_rate": 0.0001365347018572825, "loss": 0.263, "step": 7200 }, { "epoch": 1.1272698810269255, "grad_norm": 0.6794694066047668, "learning_rate": 0.00013651026392961875, "loss": 0.2794, "step": 7201 }, { "epoch": 1.1274264245460237, "grad_norm": 0.7313904166221619, "learning_rate": 0.00013648582600195503, "loss": 0.2181, "step": 7202 }, { "epoch": 1.1275829680651221, "grad_norm": 0.7755028009414673, "learning_rate": 0.00013646138807429128, "loss": 0.3791, "step": 7203 }, { "epoch": 1.1277395115842204, "grad_norm": 0.8282299041748047, "learning_rate": 0.00013643695014662756, "loss": 0.3037, "step": 7204 }, { "epoch": 1.1278960551033188, "grad_norm": 1.4426270723342896, "learning_rate": 0.00013641251221896384, "loss": 0.5826, "step": 7205 }, { "epoch": 1.128052598622417, "grad_norm": 1.3686167001724243, "learning_rate": 0.0001363880742913001, "loss": 0.3046, "step": 7206 }, { "epoch": 1.1282091421415155, "grad_norm": 1.07740318775177, "learning_rate": 0.00013636363636363634, "loss": 0.3509, "step": 7207 }, { "epoch": 1.1283656856606137, "grad_norm": 1.3811142444610596, "learning_rate": 0.00013633919843597262, "loss": 0.4518, "step": 7208 }, { "epoch": 1.128522229179712, "grad_norm": 1.2326154708862305, "learning_rate": 0.00013631476050830887, "loss": 0.4252, "step": 7209 }, { "epoch": 1.1286787726988103, "grad_norm": 1.8412114381790161, "learning_rate": 0.00013629032258064515, "loss": 0.5223, "step": 7210 }, { "epoch": 1.1288353162179086, "grad_norm": 1.9933035373687744, "learning_rate": 0.00013626588465298143, "loss": 0.5177, "step": 7211 }, { "epoch": 1.128991859737007, "grad_norm": 1.4402484893798828, "learning_rate": 0.00013624144672531768, "loss": 0.5853, "step": 7212 }, { "epoch": 1.1291484032561052, "grad_norm": 1.115515947341919, "learning_rate": 0.00013621700879765393, "loss": 0.4662, "step": 7213 }, { "epoch": 1.1293049467752034, "grad_norm": 2.486177444458008, "learning_rate": 0.0001361925708699902, "loss": 0.7741, "step": 7214 }, { "epoch": 1.1294614902943019, "grad_norm": 1.2071272134780884, "learning_rate": 0.0001361681329423265, "loss": 0.2729, "step": 7215 }, { "epoch": 1.1296180338134, "grad_norm": 5.3013529777526855, "learning_rate": 0.00013614369501466274, "loss": 0.9455, "step": 7216 }, { "epoch": 1.1297745773324985, "grad_norm": 2.959777593612671, "learning_rate": 0.00013611925708699902, "loss": 0.9704, "step": 7217 }, { "epoch": 1.1299311208515967, "grad_norm": 3.2113773822784424, "learning_rate": 0.00013609481915933527, "loss": 0.8733, "step": 7218 }, { "epoch": 1.130087664370695, "grad_norm": 1.5459614992141724, "learning_rate": 0.00013607038123167155, "loss": 0.616, "step": 7219 }, { "epoch": 1.1302442078897934, "grad_norm": 2.0902748107910156, "learning_rate": 0.00013604594330400782, "loss": 0.2964, "step": 7220 }, { "epoch": 1.1304007514088916, "grad_norm": 3.8055615425109863, "learning_rate": 0.00013602150537634408, "loss": 0.5847, "step": 7221 }, { "epoch": 1.13055729492799, "grad_norm": 2.048724412918091, "learning_rate": 0.00013599706744868033, "loss": 1.0193, "step": 7222 }, { "epoch": 1.1307138384470883, "grad_norm": 2.0475356578826904, "learning_rate": 0.0001359726295210166, "loss": 0.8073, "step": 7223 }, { "epoch": 1.1308703819661865, "grad_norm": 2.0801010131835938, "learning_rate": 0.00013594819159335288, "loss": 0.6983, "step": 7224 }, { "epoch": 1.131026925485285, "grad_norm": 2.2826006412506104, "learning_rate": 0.00013592375366568914, "loss": 0.78, "step": 7225 }, { "epoch": 1.1311834690043832, "grad_norm": 2.933180093765259, "learning_rate": 0.00013589931573802541, "loss": 0.6017, "step": 7226 }, { "epoch": 1.1313400125234816, "grad_norm": 2.438169240951538, "learning_rate": 0.00013587487781036166, "loss": 1.0778, "step": 7227 }, { "epoch": 1.1314965560425798, "grad_norm": 1.5362167358398438, "learning_rate": 0.00013585043988269794, "loss": 0.851, "step": 7228 }, { "epoch": 1.131653099561678, "grad_norm": 3.1869149208068848, "learning_rate": 0.00013582600195503422, "loss": 1.0545, "step": 7229 }, { "epoch": 1.1318096430807765, "grad_norm": 1.230353593826294, "learning_rate": 0.00013580156402737047, "loss": 0.9157, "step": 7230 }, { "epoch": 1.1319661865998747, "grad_norm": 1.9790470600128174, "learning_rate": 0.00013577712609970672, "loss": 1.0721, "step": 7231 }, { "epoch": 1.1321227301189731, "grad_norm": 2.1077675819396973, "learning_rate": 0.000135752688172043, "loss": 1.4632, "step": 7232 }, { "epoch": 1.1322792736380713, "grad_norm": 2.5034217834472656, "learning_rate": 0.00013572825024437925, "loss": 2.0202, "step": 7233 }, { "epoch": 1.1324358171571698, "grad_norm": 1.6274192333221436, "learning_rate": 0.00013570381231671553, "loss": 0.8832, "step": 7234 }, { "epoch": 1.132592360676268, "grad_norm": 1.7369862794876099, "learning_rate": 0.0001356793743890518, "loss": 0.5584, "step": 7235 }, { "epoch": 1.1327489041953662, "grad_norm": 2.9159839153289795, "learning_rate": 0.00013565493646138806, "loss": 0.8328, "step": 7236 }, { "epoch": 1.1329054477144647, "grad_norm": 1.95712411403656, "learning_rate": 0.0001356304985337243, "loss": 0.6125, "step": 7237 }, { "epoch": 1.1330619912335629, "grad_norm": 2.354888916015625, "learning_rate": 0.0001356060606060606, "loss": 1.3286, "step": 7238 }, { "epoch": 1.1332185347526613, "grad_norm": 0.7385460734367371, "learning_rate": 0.00013558162267839687, "loss": 0.303, "step": 7239 }, { "epoch": 1.1333750782717595, "grad_norm": 0.645324170589447, "learning_rate": 0.00013555718475073312, "loss": 0.2112, "step": 7240 }, { "epoch": 1.133531621790858, "grad_norm": 0.979313313961029, "learning_rate": 0.0001355327468230694, "loss": 0.4106, "step": 7241 }, { "epoch": 1.1336881653099562, "grad_norm": 0.601364254951477, "learning_rate": 0.00013550830889540565, "loss": 0.2252, "step": 7242 }, { "epoch": 1.1338447088290544, "grad_norm": 0.45826420187950134, "learning_rate": 0.00013548387096774193, "loss": 0.1746, "step": 7243 }, { "epoch": 1.1340012523481529, "grad_norm": 0.6122409105300903, "learning_rate": 0.0001354594330400782, "loss": 0.3237, "step": 7244 }, { "epoch": 1.134157795867251, "grad_norm": 0.7006223201751709, "learning_rate": 0.00013543499511241446, "loss": 0.2608, "step": 7245 }, { "epoch": 1.1343143393863495, "grad_norm": 0.7755613923072815, "learning_rate": 0.0001354105571847507, "loss": 0.2501, "step": 7246 }, { "epoch": 1.1344708829054477, "grad_norm": 0.7348597645759583, "learning_rate": 0.000135386119257087, "loss": 0.1775, "step": 7247 }, { "epoch": 1.134627426424546, "grad_norm": 1.9953992366790771, "learning_rate": 0.00013536168132942327, "loss": 0.3969, "step": 7248 }, { "epoch": 1.1347839699436444, "grad_norm": 0.8354098796844482, "learning_rate": 0.00013533724340175952, "loss": 0.3183, "step": 7249 }, { "epoch": 1.1349405134627426, "grad_norm": 0.8279335498809814, "learning_rate": 0.0001353128054740958, "loss": 0.3691, "step": 7250 }, { "epoch": 1.135097056981841, "grad_norm": 0.7575170397758484, "learning_rate": 0.00013528836754643205, "loss": 0.2994, "step": 7251 }, { "epoch": 1.1352536005009393, "grad_norm": 0.6239116787910461, "learning_rate": 0.00013526392961876833, "loss": 0.1918, "step": 7252 }, { "epoch": 1.1354101440200375, "grad_norm": 1.2877776622772217, "learning_rate": 0.00013523949169110458, "loss": 0.45, "step": 7253 }, { "epoch": 1.135566687539136, "grad_norm": 1.0643291473388672, "learning_rate": 0.00013521505376344086, "loss": 0.3337, "step": 7254 }, { "epoch": 1.1357232310582341, "grad_norm": 1.3853017091751099, "learning_rate": 0.0001351906158357771, "loss": 0.4557, "step": 7255 }, { "epoch": 1.1358797745773326, "grad_norm": 1.043006181716919, "learning_rate": 0.00013516617790811338, "loss": 0.3404, "step": 7256 }, { "epoch": 1.1360363180964308, "grad_norm": 1.343376636505127, "learning_rate": 0.00013514173998044964, "loss": 0.3246, "step": 7257 }, { "epoch": 1.136192861615529, "grad_norm": 1.1656047105789185, "learning_rate": 0.00013511730205278591, "loss": 0.3422, "step": 7258 }, { "epoch": 1.1363494051346275, "grad_norm": 2.2045726776123047, "learning_rate": 0.0001350928641251222, "loss": 0.7908, "step": 7259 }, { "epoch": 1.1365059486537257, "grad_norm": 1.6047229766845703, "learning_rate": 0.00013506842619745844, "loss": 0.3087, "step": 7260 }, { "epoch": 1.1366624921728241, "grad_norm": 1.924752116203308, "learning_rate": 0.0001350439882697947, "loss": 0.6155, "step": 7261 }, { "epoch": 1.1368190356919223, "grad_norm": 1.1636427640914917, "learning_rate": 0.00013501955034213097, "loss": 0.4651, "step": 7262 }, { "epoch": 1.1369755792110205, "grad_norm": 1.3303437232971191, "learning_rate": 0.00013499511241446725, "loss": 0.3138, "step": 7263 }, { "epoch": 1.137132122730119, "grad_norm": 1.4854735136032104, "learning_rate": 0.0001349706744868035, "loss": 0.5431, "step": 7264 }, { "epoch": 1.1372886662492172, "grad_norm": 1.4238706827163696, "learning_rate": 0.00013494623655913978, "loss": 0.7749, "step": 7265 }, { "epoch": 1.1374452097683156, "grad_norm": 1.6186683177947998, "learning_rate": 0.00013492179863147603, "loss": 0.4627, "step": 7266 }, { "epoch": 1.1376017532874139, "grad_norm": 2.7942728996276855, "learning_rate": 0.0001348973607038123, "loss": 0.6011, "step": 7267 }, { "epoch": 1.1377582968065123, "grad_norm": 1.4344183206558228, "learning_rate": 0.0001348729227761486, "loss": 0.5155, "step": 7268 }, { "epoch": 1.1379148403256105, "grad_norm": 1.8099091053009033, "learning_rate": 0.00013484848484848484, "loss": 0.5645, "step": 7269 }, { "epoch": 1.1380713838447087, "grad_norm": 2.336829900741577, "learning_rate": 0.0001348240469208211, "loss": 0.6922, "step": 7270 }, { "epoch": 1.1382279273638072, "grad_norm": 2.197136640548706, "learning_rate": 0.00013479960899315737, "loss": 0.6654, "step": 7271 }, { "epoch": 1.1383844708829054, "grad_norm": 1.8803507089614868, "learning_rate": 0.00013477517106549365, "loss": 0.6727, "step": 7272 }, { "epoch": 1.1385410144020038, "grad_norm": 2.4235281944274902, "learning_rate": 0.0001347507331378299, "loss": 0.6877, "step": 7273 }, { "epoch": 1.138697557921102, "grad_norm": 6.200526237487793, "learning_rate": 0.00013472629521016618, "loss": 0.9574, "step": 7274 }, { "epoch": 1.1388541014402005, "grad_norm": 4.244660377502441, "learning_rate": 0.00013470185728250243, "loss": 1.4685, "step": 7275 }, { "epoch": 1.1390106449592987, "grad_norm": 1.6314693689346313, "learning_rate": 0.0001346774193548387, "loss": 0.6895, "step": 7276 }, { "epoch": 1.139167188478397, "grad_norm": 2.2845335006713867, "learning_rate": 0.00013465298142717496, "loss": 1.0343, "step": 7277 }, { "epoch": 1.1393237319974954, "grad_norm": 2.063843250274658, "learning_rate": 0.00013462854349951124, "loss": 1.037, "step": 7278 }, { "epoch": 1.1394802755165936, "grad_norm": 3.8249752521514893, "learning_rate": 0.0001346041055718475, "loss": 1.1499, "step": 7279 }, { "epoch": 1.139636819035692, "grad_norm": 2.1687839031219482, "learning_rate": 0.00013457966764418377, "loss": 1.3537, "step": 7280 }, { "epoch": 1.1397933625547902, "grad_norm": 1.7228095531463623, "learning_rate": 0.00013455522971652002, "loss": 1.3049, "step": 7281 }, { "epoch": 1.1399499060738885, "grad_norm": 3.0871424674987793, "learning_rate": 0.0001345307917888563, "loss": 1.2728, "step": 7282 }, { "epoch": 1.140106449592987, "grad_norm": 1.9572391510009766, "learning_rate": 0.00013450635386119258, "loss": 0.8322, "step": 7283 }, { "epoch": 1.1402629931120851, "grad_norm": 1.1564244031906128, "learning_rate": 0.00013448191593352883, "loss": 0.7581, "step": 7284 }, { "epoch": 1.1404195366311836, "grad_norm": 2.321030378341675, "learning_rate": 0.00013445747800586508, "loss": 0.7736, "step": 7285 }, { "epoch": 1.1405760801502818, "grad_norm": 3.072721481323242, "learning_rate": 0.00013443304007820136, "loss": 0.3721, "step": 7286 }, { "epoch": 1.14073262366938, "grad_norm": 2.0024454593658447, "learning_rate": 0.00013440860215053763, "loss": 0.6595, "step": 7287 }, { "epoch": 1.1408891671884784, "grad_norm": 0.9508635401725769, "learning_rate": 0.00013438416422287389, "loss": 0.3166, "step": 7288 }, { "epoch": 1.1410457107075767, "grad_norm": 0.390267014503479, "learning_rate": 0.00013435972629521014, "loss": 0.2544, "step": 7289 }, { "epoch": 1.141202254226675, "grad_norm": 0.6170450448989868, "learning_rate": 0.00013433528836754642, "loss": 0.2775, "step": 7290 }, { "epoch": 1.1413587977457733, "grad_norm": 0.7190653681755066, "learning_rate": 0.0001343108504398827, "loss": 0.2613, "step": 7291 }, { "epoch": 1.1415153412648715, "grad_norm": 0.6273028254508972, "learning_rate": 0.00013428641251221894, "loss": 0.1957, "step": 7292 }, { "epoch": 1.14167188478397, "grad_norm": 1.515903115272522, "learning_rate": 0.00013426197458455522, "loss": 0.3157, "step": 7293 }, { "epoch": 1.1418284283030682, "grad_norm": 0.493855744600296, "learning_rate": 0.00013423753665689147, "loss": 0.2462, "step": 7294 }, { "epoch": 1.1419849718221666, "grad_norm": 1.0583348274230957, "learning_rate": 0.00013421309872922775, "loss": 0.3651, "step": 7295 }, { "epoch": 1.1421415153412648, "grad_norm": 0.5995838642120361, "learning_rate": 0.00013418866080156403, "loss": 0.2172, "step": 7296 }, { "epoch": 1.142298058860363, "grad_norm": 1.2257310152053833, "learning_rate": 0.00013416422287390028, "loss": 0.2177, "step": 7297 }, { "epoch": 1.1424546023794615, "grad_norm": 0.7221239805221558, "learning_rate": 0.00013413978494623653, "loss": 0.1958, "step": 7298 }, { "epoch": 1.1426111458985597, "grad_norm": 0.5078455209732056, "learning_rate": 0.0001341153470185728, "loss": 0.1939, "step": 7299 }, { "epoch": 1.1427676894176582, "grad_norm": 0.8579664826393127, "learning_rate": 0.00013409090909090906, "loss": 0.3449, "step": 7300 }, { "epoch": 1.1429242329367564, "grad_norm": 1.03110933303833, "learning_rate": 0.00013406647116324534, "loss": 0.3881, "step": 7301 }, { "epoch": 1.1430807764558548, "grad_norm": 0.700508713722229, "learning_rate": 0.00013404203323558162, "loss": 0.2653, "step": 7302 }, { "epoch": 1.143237319974953, "grad_norm": 1.0958391427993774, "learning_rate": 0.00013401759530791787, "loss": 0.4662, "step": 7303 }, { "epoch": 1.1433938634940513, "grad_norm": 1.1270815134048462, "learning_rate": 0.00013399315738025412, "loss": 0.2924, "step": 7304 }, { "epoch": 1.1435504070131497, "grad_norm": 1.0125340223312378, "learning_rate": 0.0001339687194525904, "loss": 0.4216, "step": 7305 }, { "epoch": 1.143706950532248, "grad_norm": 3.5537991523742676, "learning_rate": 0.00013394428152492668, "loss": 0.5891, "step": 7306 }, { "epoch": 1.1438634940513464, "grad_norm": 1.405709147453308, "learning_rate": 0.00013391984359726293, "loss": 0.4454, "step": 7307 }, { "epoch": 1.1440200375704446, "grad_norm": 1.4942262172698975, "learning_rate": 0.0001338954056695992, "loss": 0.6456, "step": 7308 }, { "epoch": 1.144176581089543, "grad_norm": 3.732208490371704, "learning_rate": 0.00013387096774193546, "loss": 0.3922, "step": 7309 }, { "epoch": 1.1443331246086412, "grad_norm": 1.3866475820541382, "learning_rate": 0.00013384652981427174, "loss": 0.7081, "step": 7310 }, { "epoch": 1.1444896681277394, "grad_norm": 0.8793286085128784, "learning_rate": 0.00013382209188660802, "loss": 0.2748, "step": 7311 }, { "epoch": 1.1446462116468379, "grad_norm": 1.271528959274292, "learning_rate": 0.00013379765395894427, "loss": 0.6008, "step": 7312 }, { "epoch": 1.144802755165936, "grad_norm": 1.1284598112106323, "learning_rate": 0.00013377321603128052, "loss": 0.5822, "step": 7313 }, { "epoch": 1.1449592986850345, "grad_norm": 1.5115302801132202, "learning_rate": 0.0001337487781036168, "loss": 0.4663, "step": 7314 }, { "epoch": 1.1451158422041328, "grad_norm": 1.522741675376892, "learning_rate": 0.00013372434017595308, "loss": 0.6408, "step": 7315 }, { "epoch": 1.145272385723231, "grad_norm": 3.165752649307251, "learning_rate": 0.00013369990224828933, "loss": 1.0592, "step": 7316 }, { "epoch": 1.1454289292423294, "grad_norm": 1.4550795555114746, "learning_rate": 0.0001336754643206256, "loss": 0.4573, "step": 7317 }, { "epoch": 1.1455854727614276, "grad_norm": 1.3216426372528076, "learning_rate": 0.00013365102639296186, "loss": 0.7879, "step": 7318 }, { "epoch": 1.145742016280526, "grad_norm": 3.195171356201172, "learning_rate": 0.00013362658846529814, "loss": 0.52, "step": 7319 }, { "epoch": 1.1458985597996243, "grad_norm": 2.3996787071228027, "learning_rate": 0.0001336021505376344, "loss": 0.7727, "step": 7320 }, { "epoch": 1.1460551033187225, "grad_norm": 1.751764178276062, "learning_rate": 0.00013357771260997066, "loss": 0.8789, "step": 7321 }, { "epoch": 1.146211646837821, "grad_norm": 1.3786290884017944, "learning_rate": 0.00013355327468230692, "loss": 0.3855, "step": 7322 }, { "epoch": 1.1463681903569192, "grad_norm": 1.4035483598709106, "learning_rate": 0.0001335288367546432, "loss": 0.7553, "step": 7323 }, { "epoch": 1.1465247338760176, "grad_norm": 2.2080776691436768, "learning_rate": 0.00013350439882697945, "loss": 1.1153, "step": 7324 }, { "epoch": 1.1466812773951158, "grad_norm": 3.5789425373077393, "learning_rate": 0.00013347996089931572, "loss": 0.7042, "step": 7325 }, { "epoch": 1.146837820914214, "grad_norm": 2.5934054851531982, "learning_rate": 0.000133455522971652, "loss": 1.1949, "step": 7326 }, { "epoch": 1.1469943644333125, "grad_norm": 1.6251215934753418, "learning_rate": 0.00013343108504398825, "loss": 0.7631, "step": 7327 }, { "epoch": 1.1471509079524107, "grad_norm": 2.1703507900238037, "learning_rate": 0.0001334066471163245, "loss": 0.8629, "step": 7328 }, { "epoch": 1.1473074514715091, "grad_norm": 2.7989585399627686, "learning_rate": 0.00013338220918866078, "loss": 1.0605, "step": 7329 }, { "epoch": 1.1474639949906074, "grad_norm": 2.1508922576904297, "learning_rate": 0.00013335777126099706, "loss": 0.7024, "step": 7330 }, { "epoch": 1.1476205385097056, "grad_norm": 3.7469875812530518, "learning_rate": 0.0001333333333333333, "loss": 1.3555, "step": 7331 }, { "epoch": 1.147777082028804, "grad_norm": 2.172651767730713, "learning_rate": 0.0001333088954056696, "loss": 1.0349, "step": 7332 }, { "epoch": 1.1479336255479022, "grad_norm": 3.3047091960906982, "learning_rate": 0.00013328445747800584, "loss": 1.2347, "step": 7333 }, { "epoch": 1.1480901690670007, "grad_norm": 2.5102274417877197, "learning_rate": 0.00013326001955034212, "loss": 1.2959, "step": 7334 }, { "epoch": 1.148246712586099, "grad_norm": 3.89743971824646, "learning_rate": 0.0001332355816226784, "loss": 0.487, "step": 7335 }, { "epoch": 1.1484032561051973, "grad_norm": 2.789970874786377, "learning_rate": 0.00013321114369501465, "loss": 0.7017, "step": 7336 }, { "epoch": 1.1485597996242956, "grad_norm": 2.094162702560425, "learning_rate": 0.0001331867057673509, "loss": 0.3889, "step": 7337 }, { "epoch": 1.1487163431433938, "grad_norm": 2.4462733268737793, "learning_rate": 0.00013316226783968718, "loss": 1.0959, "step": 7338 }, { "epoch": 1.1488728866624922, "grad_norm": 0.5095207095146179, "learning_rate": 0.00013313782991202346, "loss": 0.2746, "step": 7339 }, { "epoch": 1.1490294301815904, "grad_norm": 0.5467092394828796, "learning_rate": 0.0001331133919843597, "loss": 0.196, "step": 7340 }, { "epoch": 1.1491859737006889, "grad_norm": 0.3943612277507782, "learning_rate": 0.000133088954056696, "loss": 0.2072, "step": 7341 }, { "epoch": 1.149342517219787, "grad_norm": 0.613156259059906, "learning_rate": 0.00013306451612903224, "loss": 0.2318, "step": 7342 }, { "epoch": 1.1494990607388855, "grad_norm": 0.4345967173576355, "learning_rate": 0.00013304007820136852, "loss": 0.1638, "step": 7343 }, { "epoch": 1.1496556042579837, "grad_norm": 0.9504840970039368, "learning_rate": 0.0001330156402737048, "loss": 0.2764, "step": 7344 }, { "epoch": 1.149812147777082, "grad_norm": 0.6306407451629639, "learning_rate": 0.00013299120234604105, "loss": 0.2428, "step": 7345 }, { "epoch": 1.1499686912961804, "grad_norm": 0.6337870955467224, "learning_rate": 0.0001329667644183773, "loss": 0.315, "step": 7346 }, { "epoch": 1.1501252348152786, "grad_norm": 0.47224870324134827, "learning_rate": 0.00013294232649071358, "loss": 0.2152, "step": 7347 }, { "epoch": 1.150281778334377, "grad_norm": 0.8735085725784302, "learning_rate": 0.00013291788856304983, "loss": 0.3225, "step": 7348 }, { "epoch": 1.1504383218534753, "grad_norm": 0.5820073485374451, "learning_rate": 0.0001328934506353861, "loss": 0.1715, "step": 7349 }, { "epoch": 1.1505948653725735, "grad_norm": 1.1216922998428345, "learning_rate": 0.00013286901270772238, "loss": 0.3605, "step": 7350 }, { "epoch": 1.150751408891672, "grad_norm": 1.0515015125274658, "learning_rate": 0.00013284457478005864, "loss": 0.381, "step": 7351 }, { "epoch": 1.1509079524107702, "grad_norm": 0.727737545967102, "learning_rate": 0.0001328201368523949, "loss": 0.3203, "step": 7352 }, { "epoch": 1.1510644959298686, "grad_norm": 1.1194623708724976, "learning_rate": 0.00013279569892473117, "loss": 0.3155, "step": 7353 }, { "epoch": 1.1512210394489668, "grad_norm": 0.822720468044281, "learning_rate": 0.00013277126099706744, "loss": 0.3207, "step": 7354 }, { "epoch": 1.151377582968065, "grad_norm": 1.256633996963501, "learning_rate": 0.0001327468230694037, "loss": 0.4093, "step": 7355 }, { "epoch": 1.1515341264871635, "grad_norm": 1.5676660537719727, "learning_rate": 0.00013272238514173997, "loss": 0.4382, "step": 7356 }, { "epoch": 1.1516906700062617, "grad_norm": 1.2648556232452393, "learning_rate": 0.00013269794721407622, "loss": 0.5514, "step": 7357 }, { "epoch": 1.1518472135253601, "grad_norm": 1.1596871614456177, "learning_rate": 0.0001326735092864125, "loss": 0.5204, "step": 7358 }, { "epoch": 1.1520037570444583, "grad_norm": 1.1163748502731323, "learning_rate": 0.00013264907135874878, "loss": 0.3169, "step": 7359 }, { "epoch": 1.1521603005635566, "grad_norm": 1.4104461669921875, "learning_rate": 0.00013262463343108503, "loss": 0.5897, "step": 7360 }, { "epoch": 1.152316844082655, "grad_norm": 0.9608981013298035, "learning_rate": 0.00013260019550342128, "loss": 0.5129, "step": 7361 }, { "epoch": 1.1524733876017532, "grad_norm": 1.5088484287261963, "learning_rate": 0.00013257575757575756, "loss": 0.6277, "step": 7362 }, { "epoch": 1.1526299311208517, "grad_norm": 1.8139734268188477, "learning_rate": 0.00013255131964809384, "loss": 0.6778, "step": 7363 }, { "epoch": 1.1527864746399499, "grad_norm": 1.7767802476882935, "learning_rate": 0.0001325268817204301, "loss": 0.6887, "step": 7364 }, { "epoch": 1.152943018159048, "grad_norm": 1.6628837585449219, "learning_rate": 0.00013250244379276637, "loss": 0.3635, "step": 7365 }, { "epoch": 1.1530995616781465, "grad_norm": 1.664116621017456, "learning_rate": 0.00013247800586510262, "loss": 0.4019, "step": 7366 }, { "epoch": 1.1532561051972448, "grad_norm": 1.989309549331665, "learning_rate": 0.0001324535679374389, "loss": 0.9246, "step": 7367 }, { "epoch": 1.1534126487163432, "grad_norm": 9.230467796325684, "learning_rate": 0.00013242913000977515, "loss": 0.8514, "step": 7368 }, { "epoch": 1.1535691922354414, "grad_norm": 1.3691836595535278, "learning_rate": 0.00013240469208211143, "loss": 0.4943, "step": 7369 }, { "epoch": 1.1537257357545398, "grad_norm": 1.0850152969360352, "learning_rate": 0.00013238025415444768, "loss": 0.5152, "step": 7370 }, { "epoch": 1.153882279273638, "grad_norm": 2.6206274032592773, "learning_rate": 0.00013235581622678396, "loss": 0.5313, "step": 7371 }, { "epoch": 1.1540388227927363, "grad_norm": 1.9368460178375244, "learning_rate": 0.0001323313782991202, "loss": 0.5789, "step": 7372 }, { "epoch": 1.1541953663118347, "grad_norm": 3.7106120586395264, "learning_rate": 0.0001323069403714565, "loss": 1.0362, "step": 7373 }, { "epoch": 1.154351909830933, "grad_norm": 1.4057819843292236, "learning_rate": 0.00013228250244379277, "loss": 0.5185, "step": 7374 }, { "epoch": 1.1545084533500314, "grad_norm": 2.9499166011810303, "learning_rate": 0.00013225806451612902, "loss": 1.1545, "step": 7375 }, { "epoch": 1.1546649968691296, "grad_norm": 2.847531318664551, "learning_rate": 0.00013223362658846527, "loss": 0.9092, "step": 7376 }, { "epoch": 1.154821540388228, "grad_norm": 2.4017632007598877, "learning_rate": 0.00013220918866080155, "loss": 1.0395, "step": 7377 }, { "epoch": 1.1549780839073263, "grad_norm": 3.083125591278076, "learning_rate": 0.00013218475073313783, "loss": 0.9163, "step": 7378 }, { "epoch": 1.1551346274264245, "grad_norm": 1.6642545461654663, "learning_rate": 0.00013216031280547408, "loss": 0.7357, "step": 7379 }, { "epoch": 1.155291170945523, "grad_norm": 2.6173577308654785, "learning_rate": 0.00013213587487781036, "loss": 1.3048, "step": 7380 }, { "epoch": 1.1554477144646211, "grad_norm": 4.377566337585449, "learning_rate": 0.0001321114369501466, "loss": 1.3639, "step": 7381 }, { "epoch": 1.1556042579837196, "grad_norm": 2.4152894020080566, "learning_rate": 0.00013208699902248289, "loss": 1.1259, "step": 7382 }, { "epoch": 1.1557608015028178, "grad_norm": 2.7679460048675537, "learning_rate": 0.00013206256109481916, "loss": 0.9502, "step": 7383 }, { "epoch": 1.155917345021916, "grad_norm": 1.6410295963287354, "learning_rate": 0.00013203812316715541, "loss": 0.563, "step": 7384 }, { "epoch": 1.1560738885410144, "grad_norm": 1.5366709232330322, "learning_rate": 0.00013201368523949167, "loss": 0.3714, "step": 7385 }, { "epoch": 1.1562304320601127, "grad_norm": 2.130624771118164, "learning_rate": 0.00013198924731182794, "loss": 0.5333, "step": 7386 }, { "epoch": 1.156386975579211, "grad_norm": 0.982833743095398, "learning_rate": 0.00013196480938416422, "loss": 0.1698, "step": 7387 }, { "epoch": 1.1565435190983093, "grad_norm": 3.6173794269561768, "learning_rate": 0.00013194037145650047, "loss": 1.0673, "step": 7388 }, { "epoch": 1.1567000626174075, "grad_norm": 0.45585983991622925, "learning_rate": 0.00013191593352883675, "loss": 0.2432, "step": 7389 }, { "epoch": 1.156856606136506, "grad_norm": 0.5601482391357422, "learning_rate": 0.000131891495601173, "loss": 0.2334, "step": 7390 }, { "epoch": 1.1570131496556042, "grad_norm": 0.5899141430854797, "learning_rate": 0.00013186705767350928, "loss": 0.2739, "step": 7391 }, { "epoch": 1.1571696931747026, "grad_norm": 0.6862410306930542, "learning_rate": 0.00013184261974584553, "loss": 0.1941, "step": 7392 }, { "epoch": 1.1573262366938009, "grad_norm": 0.8341695070266724, "learning_rate": 0.0001318181818181818, "loss": 0.2567, "step": 7393 }, { "epoch": 1.157482780212899, "grad_norm": 0.7754533290863037, "learning_rate": 0.00013179374389051806, "loss": 0.3397, "step": 7394 }, { "epoch": 1.1576393237319975, "grad_norm": 1.1983238458633423, "learning_rate": 0.00013176930596285434, "loss": 0.3212, "step": 7395 }, { "epoch": 1.1577958672510957, "grad_norm": 0.5611344575881958, "learning_rate": 0.0001317448680351906, "loss": 0.2555, "step": 7396 }, { "epoch": 1.1579524107701942, "grad_norm": 0.4834452271461487, "learning_rate": 0.00013172043010752687, "loss": 0.2091, "step": 7397 }, { "epoch": 1.1581089542892924, "grad_norm": 1.169364333152771, "learning_rate": 0.00013169599217986315, "loss": 0.3318, "step": 7398 }, { "epoch": 1.1582654978083906, "grad_norm": 0.8401133418083191, "learning_rate": 0.0001316715542521994, "loss": 0.1914, "step": 7399 }, { "epoch": 1.158422041327489, "grad_norm": 1.0948725938796997, "learning_rate": 0.00013164711632453565, "loss": 0.4167, "step": 7400 }, { "epoch": 1.1585785848465873, "grad_norm": 0.6502231955528259, "learning_rate": 0.00013162267839687193, "loss": 0.3204, "step": 7401 }, { "epoch": 1.1587351283656857, "grad_norm": 0.8095880746841431, "learning_rate": 0.0001315982404692082, "loss": 0.2374, "step": 7402 }, { "epoch": 1.158891671884784, "grad_norm": 1.0704963207244873, "learning_rate": 0.00013157380254154446, "loss": 0.3654, "step": 7403 }, { "epoch": 1.1590482154038824, "grad_norm": 1.0541001558303833, "learning_rate": 0.00013154936461388074, "loss": 0.3836, "step": 7404 }, { "epoch": 1.1592047589229806, "grad_norm": 0.9816923141479492, "learning_rate": 0.000131524926686217, "loss": 0.4202, "step": 7405 }, { "epoch": 1.159361302442079, "grad_norm": 1.2711502313613892, "learning_rate": 0.00013150048875855327, "loss": 0.2639, "step": 7406 }, { "epoch": 1.1595178459611772, "grad_norm": 1.2869318723678589, "learning_rate": 0.00013147605083088955, "loss": 0.3089, "step": 7407 }, { "epoch": 1.1596743894802755, "grad_norm": 1.1554820537567139, "learning_rate": 0.0001314516129032258, "loss": 0.3307, "step": 7408 }, { "epoch": 1.159830932999374, "grad_norm": 1.7759408950805664, "learning_rate": 0.00013142717497556205, "loss": 0.8516, "step": 7409 }, { "epoch": 1.1599874765184721, "grad_norm": 1.0223851203918457, "learning_rate": 0.00013140273704789833, "loss": 0.3851, "step": 7410 }, { "epoch": 1.1601440200375706, "grad_norm": 1.761975646018982, "learning_rate": 0.0001313782991202346, "loss": 0.4526, "step": 7411 }, { "epoch": 1.1603005635566688, "grad_norm": 0.7343161702156067, "learning_rate": 0.00013135386119257086, "loss": 0.43, "step": 7412 }, { "epoch": 1.160457107075767, "grad_norm": 1.6459717750549316, "learning_rate": 0.00013132942326490713, "loss": 0.7205, "step": 7413 }, { "epoch": 1.1606136505948654, "grad_norm": 1.6329326629638672, "learning_rate": 0.00013130498533724339, "loss": 0.5562, "step": 7414 }, { "epoch": 1.1607701941139636, "grad_norm": 2.7363922595977783, "learning_rate": 0.00013128054740957964, "loss": 0.7984, "step": 7415 }, { "epoch": 1.160926737633062, "grad_norm": 1.381791114807129, "learning_rate": 0.00013125610948191592, "loss": 0.5795, "step": 7416 }, { "epoch": 1.1610832811521603, "grad_norm": 1.932962417602539, "learning_rate": 0.0001312316715542522, "loss": 0.3651, "step": 7417 }, { "epoch": 1.1612398246712585, "grad_norm": 1.733339786529541, "learning_rate": 0.00013120723362658845, "loss": 0.4857, "step": 7418 }, { "epoch": 1.161396368190357, "grad_norm": 1.2550915479660034, "learning_rate": 0.00013118279569892472, "loss": 0.5785, "step": 7419 }, { "epoch": 1.1615529117094552, "grad_norm": 1.603405475616455, "learning_rate": 0.00013115835777126097, "loss": 0.4028, "step": 7420 }, { "epoch": 1.1617094552285536, "grad_norm": 0.9606602191925049, "learning_rate": 0.00013113391984359725, "loss": 0.4517, "step": 7421 }, { "epoch": 1.1618659987476518, "grad_norm": 2.063321590423584, "learning_rate": 0.00013110948191593353, "loss": 0.6706, "step": 7422 }, { "epoch": 1.16202254226675, "grad_norm": 2.597672462463379, "learning_rate": 0.00013108504398826978, "loss": 0.7549, "step": 7423 }, { "epoch": 1.1621790857858485, "grad_norm": 3.000110149383545, "learning_rate": 0.00013106060606060603, "loss": 0.9992, "step": 7424 }, { "epoch": 1.1623356293049467, "grad_norm": 2.6195647716522217, "learning_rate": 0.0001310361681329423, "loss": 1.199, "step": 7425 }, { "epoch": 1.1624921728240452, "grad_norm": 3.0307538509368896, "learning_rate": 0.0001310117302052786, "loss": 0.9195, "step": 7426 }, { "epoch": 1.1626487163431434, "grad_norm": 3.3168554306030273, "learning_rate": 0.00013098729227761484, "loss": 1.1316, "step": 7427 }, { "epoch": 1.1628052598622416, "grad_norm": 2.105064630508423, "learning_rate": 0.00013096285434995112, "loss": 1.2571, "step": 7428 }, { "epoch": 1.16296180338134, "grad_norm": 3.6035208702087402, "learning_rate": 0.00013093841642228737, "loss": 0.9023, "step": 7429 }, { "epoch": 1.1631183469004382, "grad_norm": 2.2905807495117188, "learning_rate": 0.00013091397849462365, "loss": 1.6656, "step": 7430 }, { "epoch": 1.1632748904195367, "grad_norm": 2.3140320777893066, "learning_rate": 0.00013088954056695993, "loss": 1.6509, "step": 7431 }, { "epoch": 1.163431433938635, "grad_norm": 4.755015850067139, "learning_rate": 0.00013086510263929618, "loss": 1.0378, "step": 7432 }, { "epoch": 1.1635879774577333, "grad_norm": 1.8031630516052246, "learning_rate": 0.00013084066471163243, "loss": 0.926, "step": 7433 }, { "epoch": 1.1637445209768316, "grad_norm": 1.6042096614837646, "learning_rate": 0.0001308162267839687, "loss": 0.3366, "step": 7434 }, { "epoch": 1.1639010644959298, "grad_norm": 1.8584673404693604, "learning_rate": 0.000130791788856305, "loss": 0.8145, "step": 7435 }, { "epoch": 1.1640576080150282, "grad_norm": 2.789856433868408, "learning_rate": 0.00013076735092864124, "loss": 0.6222, "step": 7436 }, { "epoch": 1.1642141515341264, "grad_norm": 2.3871476650238037, "learning_rate": 0.00013074291300097752, "loss": 0.7207, "step": 7437 }, { "epoch": 1.1643706950532249, "grad_norm": 2.0751960277557373, "learning_rate": 0.00013071847507331377, "loss": 0.6828, "step": 7438 }, { "epoch": 1.164527238572323, "grad_norm": 0.985443115234375, "learning_rate": 0.00013069403714565002, "loss": 0.183, "step": 7439 }, { "epoch": 1.1646837820914215, "grad_norm": 0.46885955333709717, "learning_rate": 0.0001306695992179863, "loss": 0.2147, "step": 7440 }, { "epoch": 1.1648403256105198, "grad_norm": 0.5241273641586304, "learning_rate": 0.00013064516129032258, "loss": 0.2076, "step": 7441 }, { "epoch": 1.164996869129618, "grad_norm": 0.5375959873199463, "learning_rate": 0.00013062072336265883, "loss": 0.1665, "step": 7442 }, { "epoch": 1.1651534126487164, "grad_norm": 0.7745464444160461, "learning_rate": 0.0001305962854349951, "loss": 0.2586, "step": 7443 }, { "epoch": 1.1653099561678146, "grad_norm": 0.511017382144928, "learning_rate": 0.00013057184750733136, "loss": 0.2376, "step": 7444 }, { "epoch": 1.165466499686913, "grad_norm": 0.8513729572296143, "learning_rate": 0.00013054740957966764, "loss": 0.3418, "step": 7445 }, { "epoch": 1.1656230432060113, "grad_norm": 0.9605493545532227, "learning_rate": 0.00013052297165200391, "loss": 0.3008, "step": 7446 }, { "epoch": 1.1657795867251095, "grad_norm": 0.7531507015228271, "learning_rate": 0.00013049853372434016, "loss": 0.2765, "step": 7447 }, { "epoch": 1.165936130244208, "grad_norm": 0.8994906544685364, "learning_rate": 0.00013047409579667642, "loss": 0.3855, "step": 7448 }, { "epoch": 1.1660926737633062, "grad_norm": 0.48256492614746094, "learning_rate": 0.0001304496578690127, "loss": 0.2462, "step": 7449 }, { "epoch": 1.1662492172824046, "grad_norm": 1.1383583545684814, "learning_rate": 0.00013042521994134897, "loss": 0.3641, "step": 7450 }, { "epoch": 1.1664057608015028, "grad_norm": 1.2914742231369019, "learning_rate": 0.00013040078201368522, "loss": 0.2236, "step": 7451 }, { "epoch": 1.166562304320601, "grad_norm": 0.5207766890525818, "learning_rate": 0.0001303763440860215, "loss": 0.2485, "step": 7452 }, { "epoch": 1.1667188478396995, "grad_norm": 1.1940803527832031, "learning_rate": 0.00013035190615835775, "loss": 0.3618, "step": 7453 }, { "epoch": 1.1668753913587977, "grad_norm": 2.0108392238616943, "learning_rate": 0.00013032746823069403, "loss": 0.7458, "step": 7454 }, { "epoch": 1.1670319348778961, "grad_norm": 1.687880277633667, "learning_rate": 0.0001303030303030303, "loss": 0.22, "step": 7455 }, { "epoch": 1.1671884783969944, "grad_norm": 2.061474084854126, "learning_rate": 0.00013027859237536656, "loss": 0.3396, "step": 7456 }, { "epoch": 1.1673450219160926, "grad_norm": 0.789428174495697, "learning_rate": 0.0001302541544477028, "loss": 0.3342, "step": 7457 }, { "epoch": 1.167501565435191, "grad_norm": 1.0013548135757446, "learning_rate": 0.0001302297165200391, "loss": 0.4941, "step": 7458 }, { "epoch": 1.1676581089542892, "grad_norm": 1.2764099836349487, "learning_rate": 0.00013020527859237534, "loss": 0.4534, "step": 7459 }, { "epoch": 1.1678146524733877, "grad_norm": 1.8446581363677979, "learning_rate": 0.00013018084066471162, "loss": 0.9874, "step": 7460 }, { "epoch": 1.1679711959924859, "grad_norm": 1.0535941123962402, "learning_rate": 0.0001301564027370479, "loss": 0.2257, "step": 7461 }, { "epoch": 1.168127739511584, "grad_norm": 2.2027220726013184, "learning_rate": 0.00013013196480938415, "loss": 0.9646, "step": 7462 }, { "epoch": 1.1682842830306825, "grad_norm": 1.3150688409805298, "learning_rate": 0.0001301075268817204, "loss": 0.5224, "step": 7463 }, { "epoch": 1.1684408265497808, "grad_norm": 1.6204612255096436, "learning_rate": 0.00013008308895405668, "loss": 0.5533, "step": 7464 }, { "epoch": 1.1685973700688792, "grad_norm": 7.107356548309326, "learning_rate": 0.00013005865102639296, "loss": 1.438, "step": 7465 }, { "epoch": 1.1687539135879774, "grad_norm": 2.1435375213623047, "learning_rate": 0.0001300342130987292, "loss": 0.5204, "step": 7466 }, { "epoch": 1.1689104571070759, "grad_norm": 2.206997871398926, "learning_rate": 0.0001300097751710655, "loss": 0.8244, "step": 7467 }, { "epoch": 1.169067000626174, "grad_norm": 2.2694783210754395, "learning_rate": 0.00012998533724340174, "loss": 0.5618, "step": 7468 }, { "epoch": 1.1692235441452723, "grad_norm": 2.4387333393096924, "learning_rate": 0.00012996089931573802, "loss": 1.1019, "step": 7469 }, { "epoch": 1.1693800876643707, "grad_norm": 1.3861206769943237, "learning_rate": 0.0001299364613880743, "loss": 0.4208, "step": 7470 }, { "epoch": 1.169536631183469, "grad_norm": 1.4845746755599976, "learning_rate": 0.00012991202346041055, "loss": 0.6122, "step": 7471 }, { "epoch": 1.1696931747025674, "grad_norm": 3.262490749359131, "learning_rate": 0.0001298875855327468, "loss": 0.7805, "step": 7472 }, { "epoch": 1.1698497182216656, "grad_norm": 2.262986421585083, "learning_rate": 0.00012986314760508308, "loss": 0.5199, "step": 7473 }, { "epoch": 1.170006261740764, "grad_norm": 1.8733512163162231, "learning_rate": 0.00012983870967741936, "loss": 0.8374, "step": 7474 }, { "epoch": 1.1701628052598623, "grad_norm": 1.8209384679794312, "learning_rate": 0.0001298142717497556, "loss": 0.7151, "step": 7475 }, { "epoch": 1.1703193487789605, "grad_norm": 1.9471930265426636, "learning_rate": 0.00012978983382209188, "loss": 0.9015, "step": 7476 }, { "epoch": 1.170475892298059, "grad_norm": 2.2029271125793457, "learning_rate": 0.00012976539589442814, "loss": 0.5801, "step": 7477 }, { "epoch": 1.1706324358171571, "grad_norm": 2.9882936477661133, "learning_rate": 0.00012974095796676441, "loss": 1.2584, "step": 7478 }, { "epoch": 1.1707889793362556, "grad_norm": 2.2298552989959717, "learning_rate": 0.0001297165200391007, "loss": 0.8488, "step": 7479 }, { "epoch": 1.1709455228553538, "grad_norm": 2.1507680416107178, "learning_rate": 0.00012969208211143694, "loss": 0.9486, "step": 7480 }, { "epoch": 1.171102066374452, "grad_norm": 2.448796033859253, "learning_rate": 0.0001296676441837732, "loss": 1.1718, "step": 7481 }, { "epoch": 1.1712586098935505, "grad_norm": 1.807196021080017, "learning_rate": 0.00012964320625610947, "loss": 1.2157, "step": 7482 }, { "epoch": 1.1714151534126487, "grad_norm": 1.9326443672180176, "learning_rate": 0.00012961876832844572, "loss": 1.6794, "step": 7483 }, { "epoch": 1.1715716969317471, "grad_norm": 2.4775540828704834, "learning_rate": 0.000129594330400782, "loss": 1.0942, "step": 7484 }, { "epoch": 1.1717282404508453, "grad_norm": 3.0577571392059326, "learning_rate": 0.00012956989247311828, "loss": 0.9137, "step": 7485 }, { "epoch": 1.1718847839699436, "grad_norm": 1.4351956844329834, "learning_rate": 0.00012954545454545453, "loss": 1.0002, "step": 7486 }, { "epoch": 1.172041327489042, "grad_norm": 2.7734296321868896, "learning_rate": 0.00012952101661779078, "loss": 0.6121, "step": 7487 }, { "epoch": 1.1721978710081402, "grad_norm": 3.1361334323883057, "learning_rate": 0.00012949657869012706, "loss": 0.6366, "step": 7488 }, { "epoch": 1.1723544145272387, "grad_norm": 0.458670049905777, "learning_rate": 0.00012947214076246334, "loss": 0.2406, "step": 7489 }, { "epoch": 1.1725109580463369, "grad_norm": 0.4541315734386444, "learning_rate": 0.0001294477028347996, "loss": 0.1887, "step": 7490 }, { "epoch": 1.172667501565435, "grad_norm": 1.0763859748840332, "learning_rate": 0.00012942326490713587, "loss": 0.6726, "step": 7491 }, { "epoch": 1.1728240450845335, "grad_norm": 0.6392644643783569, "learning_rate": 0.00012939882697947212, "loss": 0.2505, "step": 7492 }, { "epoch": 1.1729805886036317, "grad_norm": 0.6221238374710083, "learning_rate": 0.0001293743890518084, "loss": 0.2844, "step": 7493 }, { "epoch": 1.1731371321227302, "grad_norm": 0.6850442886352539, "learning_rate": 0.00012934995112414468, "loss": 0.2483, "step": 7494 }, { "epoch": 1.1732936756418284, "grad_norm": 0.808659553527832, "learning_rate": 0.00012932551319648093, "loss": 0.2442, "step": 7495 }, { "epoch": 1.1734502191609266, "grad_norm": 0.7983716130256653, "learning_rate": 0.00012930107526881718, "loss": 0.3581, "step": 7496 }, { "epoch": 1.173606762680025, "grad_norm": 0.5840036273002625, "learning_rate": 0.00012927663734115346, "loss": 0.4542, "step": 7497 }, { "epoch": 1.1737633061991233, "grad_norm": 0.5525220036506653, "learning_rate": 0.00012925219941348974, "loss": 0.238, "step": 7498 }, { "epoch": 1.1739198497182217, "grad_norm": 1.0121021270751953, "learning_rate": 0.000129227761485826, "loss": 0.4005, "step": 7499 }, { "epoch": 1.17407639323732, "grad_norm": 2.506730556488037, "learning_rate": 0.00012920332355816227, "loss": 0.3244, "step": 7500 }, { "epoch": 1.1742329367564184, "grad_norm": 0.6981834173202515, "learning_rate": 0.00012917888563049852, "loss": 0.2825, "step": 7501 }, { "epoch": 1.1743894802755166, "grad_norm": 1.0318002700805664, "learning_rate": 0.0001291544477028348, "loss": 0.2646, "step": 7502 }, { "epoch": 1.1745460237946148, "grad_norm": 1.0581806898117065, "learning_rate": 0.00012913000977517108, "loss": 0.3837, "step": 7503 }, { "epoch": 1.1747025673137133, "grad_norm": 1.221384048461914, "learning_rate": 0.00012910557184750733, "loss": 0.3138, "step": 7504 }, { "epoch": 1.1748591108328115, "grad_norm": 0.9835034608840942, "learning_rate": 0.00012908113391984358, "loss": 0.241, "step": 7505 }, { "epoch": 1.17501565435191, "grad_norm": 0.9940537810325623, "learning_rate": 0.00012905669599217986, "loss": 0.2297, "step": 7506 }, { "epoch": 1.1751721978710081, "grad_norm": 0.7861664891242981, "learning_rate": 0.0001290322580645161, "loss": 0.4561, "step": 7507 }, { "epoch": 1.1753287413901066, "grad_norm": 1.2852720022201538, "learning_rate": 0.00012900782013685239, "loss": 0.6284, "step": 7508 }, { "epoch": 1.1754852849092048, "grad_norm": 1.645680546760559, "learning_rate": 0.00012898338220918866, "loss": 0.5136, "step": 7509 }, { "epoch": 1.175641828428303, "grad_norm": 1.9410187005996704, "learning_rate": 0.00012895894428152492, "loss": 0.4755, "step": 7510 }, { "epoch": 1.1757983719474014, "grad_norm": 0.807033360004425, "learning_rate": 0.00012893450635386117, "loss": 0.3838, "step": 7511 }, { "epoch": 1.1759549154664997, "grad_norm": 3.5870234966278076, "learning_rate": 0.00012891006842619744, "loss": 0.3806, "step": 7512 }, { "epoch": 1.176111458985598, "grad_norm": 1.55286705493927, "learning_rate": 0.00012888563049853372, "loss": 0.443, "step": 7513 }, { "epoch": 1.1762680025046963, "grad_norm": 1.928011417388916, "learning_rate": 0.00012886119257086997, "loss": 0.8595, "step": 7514 }, { "epoch": 1.1764245460237945, "grad_norm": 1.085798978805542, "learning_rate": 0.00012883675464320625, "loss": 0.503, "step": 7515 }, { "epoch": 1.176581089542893, "grad_norm": 1.993884801864624, "learning_rate": 0.0001288123167155425, "loss": 0.4457, "step": 7516 }, { "epoch": 1.1767376330619912, "grad_norm": 1.3057135343551636, "learning_rate": 0.00012878787878787878, "loss": 0.6351, "step": 7517 }, { "epoch": 1.1768941765810896, "grad_norm": 2.9822328090667725, "learning_rate": 0.00012876344086021506, "loss": 0.8021, "step": 7518 }, { "epoch": 1.1770507201001879, "grad_norm": 1.463084101676941, "learning_rate": 0.0001287390029325513, "loss": 0.5327, "step": 7519 }, { "epoch": 1.177207263619286, "grad_norm": 1.026001214981079, "learning_rate": 0.00012871456500488756, "loss": 0.4669, "step": 7520 }, { "epoch": 1.1773638071383845, "grad_norm": 0.9538169503211975, "learning_rate": 0.00012869012707722384, "loss": 0.4046, "step": 7521 }, { "epoch": 1.1775203506574827, "grad_norm": 1.1642636060714722, "learning_rate": 0.00012866568914956012, "loss": 0.5993, "step": 7522 }, { "epoch": 1.1776768941765812, "grad_norm": 1.9186804294586182, "learning_rate": 0.00012864125122189637, "loss": 0.7352, "step": 7523 }, { "epoch": 1.1778334376956794, "grad_norm": 2.1181206703186035, "learning_rate": 0.00012861681329423265, "loss": 0.8638, "step": 7524 }, { "epoch": 1.1779899812147776, "grad_norm": 2.4149346351623535, "learning_rate": 0.0001285923753665689, "loss": 0.9885, "step": 7525 }, { "epoch": 1.178146524733876, "grad_norm": 1.8316506147384644, "learning_rate": 0.00012856793743890518, "loss": 0.8638, "step": 7526 }, { "epoch": 1.1783030682529743, "grad_norm": 1.9527243375778198, "learning_rate": 0.00012854349951124143, "loss": 0.8473, "step": 7527 }, { "epoch": 1.1784596117720727, "grad_norm": 2.2546331882476807, "learning_rate": 0.0001285190615835777, "loss": 1.134, "step": 7528 }, { "epoch": 1.178616155291171, "grad_norm": 1.679229497909546, "learning_rate": 0.00012849462365591396, "loss": 1.0515, "step": 7529 }, { "epoch": 1.1787726988102691, "grad_norm": 7.857295036315918, "learning_rate": 0.00012847018572825024, "loss": 1.3671, "step": 7530 }, { "epoch": 1.1789292423293676, "grad_norm": 2.6357522010803223, "learning_rate": 0.0001284457478005865, "loss": 1.3619, "step": 7531 }, { "epoch": 1.1790857858484658, "grad_norm": 4.2092084884643555, "learning_rate": 0.00012842130987292277, "loss": 1.5904, "step": 7532 }, { "epoch": 1.1792423293675642, "grad_norm": 2.7293174266815186, "learning_rate": 0.00012839687194525905, "loss": 1.5906, "step": 7533 }, { "epoch": 1.1793988728866625, "grad_norm": 1.7189364433288574, "learning_rate": 0.0001283724340175953, "loss": 1.0352, "step": 7534 }, { "epoch": 1.179555416405761, "grad_norm": 3.9607129096984863, "learning_rate": 0.00012834799608993155, "loss": 0.4995, "step": 7535 }, { "epoch": 1.179711959924859, "grad_norm": 3.564509391784668, "learning_rate": 0.00012832355816226783, "loss": 0.8067, "step": 7536 }, { "epoch": 1.1798685034439573, "grad_norm": 2.912269115447998, "learning_rate": 0.0001282991202346041, "loss": 0.8116, "step": 7537 }, { "epoch": 1.1800250469630558, "grad_norm": 1.9242192506790161, "learning_rate": 0.00012827468230694036, "loss": 0.7333, "step": 7538 }, { "epoch": 1.180181590482154, "grad_norm": 0.3993604779243469, "learning_rate": 0.00012825024437927664, "loss": 0.1944, "step": 7539 }, { "epoch": 1.1803381340012524, "grad_norm": 0.4738137722015381, "learning_rate": 0.00012822580645161289, "loss": 0.1609, "step": 7540 }, { "epoch": 1.1804946775203506, "grad_norm": 0.41736552119255066, "learning_rate": 0.00012820136852394916, "loss": 0.2162, "step": 7541 }, { "epoch": 1.180651221039449, "grad_norm": 0.5973852276802063, "learning_rate": 0.00012817693059628544, "loss": 0.221, "step": 7542 }, { "epoch": 1.1808077645585473, "grad_norm": 0.42182105779647827, "learning_rate": 0.0001281524926686217, "loss": 0.1776, "step": 7543 }, { "epoch": 1.1809643080776455, "grad_norm": 1.2317596673965454, "learning_rate": 0.00012812805474095795, "loss": 0.2, "step": 7544 }, { "epoch": 1.181120851596744, "grad_norm": 0.7421743869781494, "learning_rate": 0.00012810361681329422, "loss": 0.2841, "step": 7545 }, { "epoch": 1.1812773951158422, "grad_norm": 0.7382256388664246, "learning_rate": 0.0001280791788856305, "loss": 0.2966, "step": 7546 }, { "epoch": 1.1814339386349406, "grad_norm": 0.6662998199462891, "learning_rate": 0.00012805474095796675, "loss": 0.2194, "step": 7547 }, { "epoch": 1.1815904821540388, "grad_norm": 0.5631325840950012, "learning_rate": 0.000128030303030303, "loss": 0.3308, "step": 7548 }, { "epoch": 1.181747025673137, "grad_norm": 0.8521823883056641, "learning_rate": 0.00012800586510263928, "loss": 0.2426, "step": 7549 }, { "epoch": 1.1819035691922355, "grad_norm": 0.6082648634910583, "learning_rate": 0.00012798142717497556, "loss": 0.2999, "step": 7550 }, { "epoch": 1.1820601127113337, "grad_norm": 1.2765244245529175, "learning_rate": 0.0001279569892473118, "loss": 0.5316, "step": 7551 }, { "epoch": 1.1822166562304322, "grad_norm": 1.1634889841079712, "learning_rate": 0.0001279325513196481, "loss": 0.5345, "step": 7552 }, { "epoch": 1.1823731997495304, "grad_norm": 1.5110682249069214, "learning_rate": 0.00012790811339198434, "loss": 0.3129, "step": 7553 }, { "epoch": 1.1825297432686286, "grad_norm": 0.8883544206619263, "learning_rate": 0.0001278836754643206, "loss": 0.2753, "step": 7554 }, { "epoch": 1.182686286787727, "grad_norm": 1.2819592952728271, "learning_rate": 0.00012785923753665687, "loss": 0.312, "step": 7555 }, { "epoch": 1.1828428303068252, "grad_norm": 1.9334875345230103, "learning_rate": 0.00012783479960899315, "loss": 0.5772, "step": 7556 }, { "epoch": 1.1829993738259237, "grad_norm": 1.7190932035446167, "learning_rate": 0.0001278103616813294, "loss": 0.4345, "step": 7557 }, { "epoch": 1.183155917345022, "grad_norm": 1.4696842432022095, "learning_rate": 0.00012778592375366568, "loss": 0.4088, "step": 7558 }, { "epoch": 1.1833124608641201, "grad_norm": 0.6554337739944458, "learning_rate": 0.00012776148582600193, "loss": 0.3475, "step": 7559 }, { "epoch": 1.1834690043832186, "grad_norm": 0.9007388353347778, "learning_rate": 0.0001277370478983382, "loss": 0.3618, "step": 7560 }, { "epoch": 1.1836255479023168, "grad_norm": 2.4028732776641846, "learning_rate": 0.0001277126099706745, "loss": 0.511, "step": 7561 }, { "epoch": 1.1837820914214152, "grad_norm": 1.1445221900939941, "learning_rate": 0.00012768817204301074, "loss": 0.4563, "step": 7562 }, { "epoch": 1.1839386349405134, "grad_norm": 1.3807297945022583, "learning_rate": 0.000127663734115347, "loss": 0.3692, "step": 7563 }, { "epoch": 1.1840951784596117, "grad_norm": 1.8694292306900024, "learning_rate": 0.00012763929618768327, "loss": 0.7251, "step": 7564 }, { "epoch": 1.18425172197871, "grad_norm": 2.2322914600372314, "learning_rate": 0.00012761485826001955, "loss": 0.6297, "step": 7565 }, { "epoch": 1.1844082654978083, "grad_norm": 1.5895439386367798, "learning_rate": 0.0001275904203323558, "loss": 0.4646, "step": 7566 }, { "epoch": 1.1845648090169068, "grad_norm": 1.2254077196121216, "learning_rate": 0.00012756598240469208, "loss": 0.4732, "step": 7567 }, { "epoch": 1.184721352536005, "grad_norm": 2.8192131519317627, "learning_rate": 0.00012754154447702833, "loss": 0.4586, "step": 7568 }, { "epoch": 1.1848778960551034, "grad_norm": 1.3876596689224243, "learning_rate": 0.0001275171065493646, "loss": 0.5003, "step": 7569 }, { "epoch": 1.1850344395742016, "grad_norm": 3.00423264503479, "learning_rate": 0.00012749266862170088, "loss": 1.1688, "step": 7570 }, { "epoch": 1.1851909830932998, "grad_norm": 2.323437213897705, "learning_rate": 0.00012746823069403714, "loss": 0.6379, "step": 7571 }, { "epoch": 1.1853475266123983, "grad_norm": 1.7432076930999756, "learning_rate": 0.0001274437927663734, "loss": 0.8426, "step": 7572 }, { "epoch": 1.1855040701314965, "grad_norm": 3.104215621948242, "learning_rate": 0.00012741935483870967, "loss": 0.9046, "step": 7573 }, { "epoch": 1.185660613650595, "grad_norm": 1.629520058631897, "learning_rate": 0.00012739491691104592, "loss": 0.7857, "step": 7574 }, { "epoch": 1.1858171571696932, "grad_norm": 1.4630589485168457, "learning_rate": 0.0001273704789833822, "loss": 0.7239, "step": 7575 }, { "epoch": 1.1859737006887916, "grad_norm": 1.9049055576324463, "learning_rate": 0.00012734604105571847, "loss": 0.8086, "step": 7576 }, { "epoch": 1.1861302442078898, "grad_norm": 2.1848504543304443, "learning_rate": 0.00012732160312805472, "loss": 1.3258, "step": 7577 }, { "epoch": 1.186286787726988, "grad_norm": 3.515286922454834, "learning_rate": 0.00012729716520039098, "loss": 0.7483, "step": 7578 }, { "epoch": 1.1864433312460865, "grad_norm": 2.0993247032165527, "learning_rate": 0.00012727272727272725, "loss": 0.9094, "step": 7579 }, { "epoch": 1.1865998747651847, "grad_norm": 2.1793994903564453, "learning_rate": 0.00012724828934506353, "loss": 0.8514, "step": 7580 }, { "epoch": 1.1867564182842831, "grad_norm": 2.337986469268799, "learning_rate": 0.00012722385141739978, "loss": 0.89, "step": 7581 }, { "epoch": 1.1869129618033814, "grad_norm": 1.6112979650497437, "learning_rate": 0.00012719941348973606, "loss": 0.8589, "step": 7582 }, { "epoch": 1.1870695053224796, "grad_norm": 3.4834136962890625, "learning_rate": 0.0001271749755620723, "loss": 1.0901, "step": 7583 }, { "epoch": 1.187226048841578, "grad_norm": 2.0947000980377197, "learning_rate": 0.0001271505376344086, "loss": 1.0381, "step": 7584 }, { "epoch": 1.1873825923606762, "grad_norm": 3.770942211151123, "learning_rate": 0.00012712609970674487, "loss": 0.812, "step": 7585 }, { "epoch": 1.1875391358797747, "grad_norm": 3.0660061836242676, "learning_rate": 0.00012710166177908112, "loss": 0.4694, "step": 7586 }, { "epoch": 1.1876956793988729, "grad_norm": 2.4887964725494385, "learning_rate": 0.00012707722385141737, "loss": 0.5581, "step": 7587 }, { "epoch": 1.187852222917971, "grad_norm": 1.3472387790679932, "learning_rate": 0.00012705278592375365, "loss": 0.3082, "step": 7588 }, { "epoch": 1.1880087664370695, "grad_norm": 0.712585985660553, "learning_rate": 0.00012702834799608993, "loss": 0.2746, "step": 7589 }, { "epoch": 1.1881653099561678, "grad_norm": 0.5410864353179932, "learning_rate": 0.00012700391006842618, "loss": 0.2139, "step": 7590 }, { "epoch": 1.1883218534752662, "grad_norm": 0.34346261620521545, "learning_rate": 0.00012697947214076246, "loss": 0.1462, "step": 7591 }, { "epoch": 1.1884783969943644, "grad_norm": 0.6002309918403625, "learning_rate": 0.0001269550342130987, "loss": 0.2067, "step": 7592 }, { "epoch": 1.1886349405134626, "grad_norm": 0.6561062335968018, "learning_rate": 0.000126930596285435, "loss": 0.186, "step": 7593 }, { "epoch": 1.188791484032561, "grad_norm": 0.6932880878448486, "learning_rate": 0.00012690615835777127, "loss": 0.233, "step": 7594 }, { "epoch": 1.1889480275516593, "grad_norm": 1.0030970573425293, "learning_rate": 0.00012688172043010752, "loss": 0.409, "step": 7595 }, { "epoch": 1.1891045710707577, "grad_norm": 0.5043971538543701, "learning_rate": 0.00012685728250244377, "loss": 0.1951, "step": 7596 }, { "epoch": 1.189261114589856, "grad_norm": 0.5543043613433838, "learning_rate": 0.00012683284457478005, "loss": 0.2255, "step": 7597 }, { "epoch": 1.1894176581089542, "grad_norm": 0.7690470814704895, "learning_rate": 0.0001268084066471163, "loss": 0.324, "step": 7598 }, { "epoch": 1.1895742016280526, "grad_norm": 0.9051692485809326, "learning_rate": 0.00012678396871945258, "loss": 0.3889, "step": 7599 }, { "epoch": 1.1897307451471508, "grad_norm": 1.1886712312698364, "learning_rate": 0.00012675953079178886, "loss": 0.279, "step": 7600 }, { "epoch": 1.1898872886662493, "grad_norm": 0.8935725688934326, "learning_rate": 0.0001267350928641251, "loss": 0.2286, "step": 7601 }, { "epoch": 1.1900438321853475, "grad_norm": 0.8272036910057068, "learning_rate": 0.00012671065493646136, "loss": 0.2867, "step": 7602 }, { "epoch": 1.190200375704446, "grad_norm": 0.7367708086967468, "learning_rate": 0.00012668621700879764, "loss": 0.1458, "step": 7603 }, { "epoch": 1.1903569192235441, "grad_norm": 0.7323951721191406, "learning_rate": 0.00012666177908113391, "loss": 0.2471, "step": 7604 }, { "epoch": 1.1905134627426424, "grad_norm": 2.0071024894714355, "learning_rate": 0.00012663734115347017, "loss": 0.8892, "step": 7605 }, { "epoch": 1.1906700062617408, "grad_norm": 2.2433035373687744, "learning_rate": 0.00012661290322580644, "loss": 0.5891, "step": 7606 }, { "epoch": 1.190826549780839, "grad_norm": 2.180143356323242, "learning_rate": 0.0001265884652981427, "loss": 0.2786, "step": 7607 }, { "epoch": 1.1909830932999375, "grad_norm": 1.3844753503799438, "learning_rate": 0.00012656402737047897, "loss": 0.4072, "step": 7608 }, { "epoch": 1.1911396368190357, "grad_norm": 1.1671643257141113, "learning_rate": 0.00012653958944281525, "loss": 0.3149, "step": 7609 }, { "epoch": 1.1912961803381341, "grad_norm": 1.9654929637908936, "learning_rate": 0.0001265151515151515, "loss": 0.4545, "step": 7610 }, { "epoch": 1.1914527238572323, "grad_norm": 1.6887761354446411, "learning_rate": 0.00012649071358748775, "loss": 0.6904, "step": 7611 }, { "epoch": 1.1916092673763305, "grad_norm": 1.5617834329605103, "learning_rate": 0.00012646627565982403, "loss": 0.4628, "step": 7612 }, { "epoch": 1.191765810895429, "grad_norm": 4.053790092468262, "learning_rate": 0.0001264418377321603, "loss": 0.9632, "step": 7613 }, { "epoch": 1.1919223544145272, "grad_norm": 1.2538058757781982, "learning_rate": 0.00012641739980449656, "loss": 0.3495, "step": 7614 }, { "epoch": 1.1920788979336256, "grad_norm": 2.1369194984436035, "learning_rate": 0.00012639296187683284, "loss": 0.8079, "step": 7615 }, { "epoch": 1.1922354414527239, "grad_norm": 5.438715934753418, "learning_rate": 0.0001263685239491691, "loss": 0.7607, "step": 7616 }, { "epoch": 1.192391984971822, "grad_norm": 4.414690971374512, "learning_rate": 0.00012634408602150537, "loss": 0.5004, "step": 7617 }, { "epoch": 1.1925485284909205, "grad_norm": 3.980987310409546, "learning_rate": 0.00012631964809384162, "loss": 0.7119, "step": 7618 }, { "epoch": 1.1927050720100187, "grad_norm": 2.606283664703369, "learning_rate": 0.0001262952101661779, "loss": 0.5477, "step": 7619 }, { "epoch": 1.1928616155291172, "grad_norm": 1.2772033214569092, "learning_rate": 0.00012627077223851415, "loss": 0.4294, "step": 7620 }, { "epoch": 1.1930181590482154, "grad_norm": 2.65388822555542, "learning_rate": 0.00012624633431085043, "loss": 0.9099, "step": 7621 }, { "epoch": 1.1931747025673136, "grad_norm": 2.885216236114502, "learning_rate": 0.00012622189638318668, "loss": 1.0008, "step": 7622 }, { "epoch": 1.193331246086412, "grad_norm": 1.257573127746582, "learning_rate": 0.00012619745845552296, "loss": 0.4982, "step": 7623 }, { "epoch": 1.1934877896055103, "grad_norm": 6.712513446807861, "learning_rate": 0.00012617302052785924, "loss": 1.2771, "step": 7624 }, { "epoch": 1.1936443331246087, "grad_norm": 2.6554524898529053, "learning_rate": 0.0001261485826001955, "loss": 0.7371, "step": 7625 }, { "epoch": 1.193800876643707, "grad_norm": 2.5295774936676025, "learning_rate": 0.00012612414467253174, "loss": 1.43, "step": 7626 }, { "epoch": 1.1939574201628051, "grad_norm": 3.8119919300079346, "learning_rate": 0.00012609970674486802, "loss": 1.3552, "step": 7627 }, { "epoch": 1.1941139636819036, "grad_norm": 3.1920182704925537, "learning_rate": 0.0001260752688172043, "loss": 1.7003, "step": 7628 }, { "epoch": 1.1942705072010018, "grad_norm": 3.4198391437530518, "learning_rate": 0.00012605083088954055, "loss": 0.8041, "step": 7629 }, { "epoch": 1.1944270507201002, "grad_norm": 2.1979782581329346, "learning_rate": 0.00012602639296187683, "loss": 0.9902, "step": 7630 }, { "epoch": 1.1945835942391985, "grad_norm": 2.4883174896240234, "learning_rate": 0.00012600195503421308, "loss": 1.122, "step": 7631 }, { "epoch": 1.1947401377582967, "grad_norm": 2.1847050189971924, "learning_rate": 0.00012597751710654936, "loss": 0.9887, "step": 7632 }, { "epoch": 1.1948966812773951, "grad_norm": 2.9857428073883057, "learning_rate": 0.00012595307917888563, "loss": 1.2614, "step": 7633 }, { "epoch": 1.1950532247964933, "grad_norm": 3.8710601329803467, "learning_rate": 0.00012592864125122189, "loss": 1.1563, "step": 7634 }, { "epoch": 1.1952097683155918, "grad_norm": 1.628251314163208, "learning_rate": 0.00012590420332355814, "loss": 0.281, "step": 7635 }, { "epoch": 1.19536631183469, "grad_norm": 2.4681708812713623, "learning_rate": 0.00012587976539589442, "loss": 0.8694, "step": 7636 }, { "epoch": 1.1955228553537884, "grad_norm": 1.361156940460205, "learning_rate": 0.0001258553274682307, "loss": 0.6151, "step": 7637 }, { "epoch": 1.1956793988728867, "grad_norm": 2.5139424800872803, "learning_rate": 0.00012583088954056695, "loss": 0.4191, "step": 7638 }, { "epoch": 1.195835942391985, "grad_norm": 0.4481685161590576, "learning_rate": 0.00012580645161290322, "loss": 0.266, "step": 7639 }, { "epoch": 1.1959924859110833, "grad_norm": 0.5807831287384033, "learning_rate": 0.00012578201368523947, "loss": 0.3183, "step": 7640 }, { "epoch": 1.1961490294301815, "grad_norm": 0.433891236782074, "learning_rate": 0.00012575757575757575, "loss": 0.203, "step": 7641 }, { "epoch": 1.19630557294928, "grad_norm": 0.3798646628856659, "learning_rate": 0.000125733137829912, "loss": 0.2185, "step": 7642 }, { "epoch": 1.1964621164683782, "grad_norm": 0.5597085952758789, "learning_rate": 0.00012570869990224828, "loss": 0.2084, "step": 7643 }, { "epoch": 1.1966186599874766, "grad_norm": 0.8188731670379639, "learning_rate": 0.00012568426197458453, "loss": 0.3176, "step": 7644 }, { "epoch": 1.1967752035065748, "grad_norm": 0.44461318850517273, "learning_rate": 0.0001256598240469208, "loss": 0.2583, "step": 7645 }, { "epoch": 1.196931747025673, "grad_norm": 0.6256333589553833, "learning_rate": 0.00012563538611925706, "loss": 0.2882, "step": 7646 }, { "epoch": 1.1970882905447715, "grad_norm": 0.5847500562667847, "learning_rate": 0.00012561094819159334, "loss": 0.2987, "step": 7647 }, { "epoch": 1.1972448340638697, "grad_norm": 0.4308290183544159, "learning_rate": 0.00012558651026392962, "loss": 0.2004, "step": 7648 }, { "epoch": 1.1974013775829682, "grad_norm": 0.6642153859138489, "learning_rate": 0.00012556207233626587, "loss": 0.2055, "step": 7649 }, { "epoch": 1.1975579211020664, "grad_norm": 1.537541151046753, "learning_rate": 0.00012553763440860212, "loss": 0.5965, "step": 7650 }, { "epoch": 1.1977144646211646, "grad_norm": 0.6113720536231995, "learning_rate": 0.0001255131964809384, "loss": 0.2567, "step": 7651 }, { "epoch": 1.197871008140263, "grad_norm": 1.1813939809799194, "learning_rate": 0.00012548875855327468, "loss": 0.3958, "step": 7652 }, { "epoch": 1.1980275516593613, "grad_norm": 1.415676474571228, "learning_rate": 0.00012546432062561093, "loss": 0.4031, "step": 7653 }, { "epoch": 1.1981840951784597, "grad_norm": 2.827056884765625, "learning_rate": 0.0001254398826979472, "loss": 0.39, "step": 7654 }, { "epoch": 1.198340638697558, "grad_norm": 1.0784145593643188, "learning_rate": 0.00012541544477028346, "loss": 0.4073, "step": 7655 }, { "epoch": 1.1984971822166561, "grad_norm": 1.3074734210968018, "learning_rate": 0.00012539100684261974, "loss": 0.3409, "step": 7656 }, { "epoch": 1.1986537257357546, "grad_norm": 5.250173091888428, "learning_rate": 0.00012536656891495602, "loss": 0.7839, "step": 7657 }, { "epoch": 1.1988102692548528, "grad_norm": 0.9771109223365784, "learning_rate": 0.00012534213098729227, "loss": 0.4488, "step": 7658 }, { "epoch": 1.1989668127739512, "grad_norm": 2.0896804332733154, "learning_rate": 0.00012531769305962852, "loss": 0.5873, "step": 7659 }, { "epoch": 1.1991233562930494, "grad_norm": 1.340659260749817, "learning_rate": 0.0001252932551319648, "loss": 0.3258, "step": 7660 }, { "epoch": 1.1992798998121477, "grad_norm": 2.5258636474609375, "learning_rate": 0.00012526881720430108, "loss": 0.434, "step": 7661 }, { "epoch": 1.199436443331246, "grad_norm": 0.8065431714057922, "learning_rate": 0.00012524437927663733, "loss": 0.4309, "step": 7662 }, { "epoch": 1.1995929868503443, "grad_norm": 2.020216464996338, "learning_rate": 0.0001252199413489736, "loss": 0.8626, "step": 7663 }, { "epoch": 1.1997495303694428, "grad_norm": 1.528834581375122, "learning_rate": 0.00012519550342130986, "loss": 0.4851, "step": 7664 }, { "epoch": 1.199906073888541, "grad_norm": 1.1906168460845947, "learning_rate": 0.00012517106549364614, "loss": 0.4344, "step": 7665 }, { "epoch": 1.2000626174076394, "grad_norm": 1.7082940340042114, "learning_rate": 0.0001251466275659824, "loss": 0.9714, "step": 7666 }, { "epoch": 1.2002191609267376, "grad_norm": 1.5816830396652222, "learning_rate": 0.00012512218963831867, "loss": 0.4113, "step": 7667 }, { "epoch": 1.2003757044458359, "grad_norm": 1.9569361209869385, "learning_rate": 0.00012509775171065492, "loss": 0.9074, "step": 7668 }, { "epoch": 1.2005322479649343, "grad_norm": 2.3706414699554443, "learning_rate": 0.0001250733137829912, "loss": 0.6514, "step": 7669 }, { "epoch": 1.2006887914840325, "grad_norm": 2.0549864768981934, "learning_rate": 0.00012504887585532745, "loss": 0.6741, "step": 7670 }, { "epoch": 1.200845335003131, "grad_norm": 2.067312002182007, "learning_rate": 0.00012502443792766372, "loss": 0.7198, "step": 7671 }, { "epoch": 1.2010018785222292, "grad_norm": 2.430595874786377, "learning_rate": 0.000125, "loss": 0.9955, "step": 7672 }, { "epoch": 1.2011584220413276, "grad_norm": 1.7246347665786743, "learning_rate": 0.00012497556207233625, "loss": 0.8613, "step": 7673 }, { "epoch": 1.2013149655604258, "grad_norm": 3.665044069290161, "learning_rate": 0.0001249511241446725, "loss": 0.8444, "step": 7674 }, { "epoch": 1.201471509079524, "grad_norm": 3.734281539916992, "learning_rate": 0.00012492668621700878, "loss": 0.8613, "step": 7675 }, { "epoch": 1.2016280525986225, "grad_norm": 2.5038020610809326, "learning_rate": 0.00012490224828934506, "loss": 1.4506, "step": 7676 }, { "epoch": 1.2017845961177207, "grad_norm": 2.5142405033111572, "learning_rate": 0.0001248778103616813, "loss": 0.982, "step": 7677 }, { "epoch": 1.2019411396368191, "grad_norm": 4.540086269378662, "learning_rate": 0.0001248533724340176, "loss": 0.9359, "step": 7678 }, { "epoch": 1.2020976831559174, "grad_norm": 1.838870644569397, "learning_rate": 0.00012482893450635384, "loss": 0.6585, "step": 7679 }, { "epoch": 1.2022542266750156, "grad_norm": 3.0553319454193115, "learning_rate": 0.00012480449657869012, "loss": 1.318, "step": 7680 }, { "epoch": 1.202410770194114, "grad_norm": 3.3202075958251953, "learning_rate": 0.0001247800586510264, "loss": 1.5615, "step": 7681 }, { "epoch": 1.2025673137132122, "grad_norm": 5.532286643981934, "learning_rate": 0.00012475562072336265, "loss": 1.1045, "step": 7682 }, { "epoch": 1.2027238572323107, "grad_norm": 2.526785135269165, "learning_rate": 0.0001247311827956989, "loss": 0.7781, "step": 7683 }, { "epoch": 1.202880400751409, "grad_norm": 5.095958709716797, "learning_rate": 0.00012470674486803518, "loss": 0.9252, "step": 7684 }, { "epoch": 1.2030369442705071, "grad_norm": 2.2262609004974365, "learning_rate": 0.00012468230694037146, "loss": 0.793, "step": 7685 }, { "epoch": 1.2031934877896056, "grad_norm": 4.54476261138916, "learning_rate": 0.0001246578690127077, "loss": 1.4444, "step": 7686 }, { "epoch": 1.2033500313087038, "grad_norm": 3.232273578643799, "learning_rate": 0.000124633431085044, "loss": 1.1172, "step": 7687 }, { "epoch": 1.2035065748278022, "grad_norm": 2.0127787590026855, "learning_rate": 0.00012460899315738024, "loss": 0.6069, "step": 7688 }, { "epoch": 1.2036631183469004, "grad_norm": 0.4151703417301178, "learning_rate": 0.0001245845552297165, "loss": 0.2224, "step": 7689 }, { "epoch": 1.2038196618659986, "grad_norm": 0.4115196168422699, "learning_rate": 0.00012456011730205277, "loss": 0.2122, "step": 7690 }, { "epoch": 1.203976205385097, "grad_norm": 0.49626508355140686, "learning_rate": 0.00012453567937438905, "loss": 0.2164, "step": 7691 }, { "epoch": 1.2041327489041953, "grad_norm": 0.6475088000297546, "learning_rate": 0.0001245112414467253, "loss": 0.1638, "step": 7692 }, { "epoch": 1.2042892924232937, "grad_norm": 0.5242400765419006, "learning_rate": 0.00012448680351906158, "loss": 0.1996, "step": 7693 }, { "epoch": 1.204445835942392, "grad_norm": 0.5632511973381042, "learning_rate": 0.00012446236559139783, "loss": 0.2789, "step": 7694 }, { "epoch": 1.2046023794614902, "grad_norm": 0.5035088658332825, "learning_rate": 0.0001244379276637341, "loss": 0.251, "step": 7695 }, { "epoch": 1.2047589229805886, "grad_norm": 0.42443281412124634, "learning_rate": 0.00012441348973607038, "loss": 0.2104, "step": 7696 }, { "epoch": 1.2049154664996868, "grad_norm": 4.329174518585205, "learning_rate": 0.00012438905180840664, "loss": 0.4027, "step": 7697 }, { "epoch": 1.2050720100187853, "grad_norm": 0.7891638875007629, "learning_rate": 0.0001243646138807429, "loss": 0.3523, "step": 7698 }, { "epoch": 1.2052285535378835, "grad_norm": 0.9422610998153687, "learning_rate": 0.00012434017595307917, "loss": 0.3149, "step": 7699 }, { "epoch": 1.205385097056982, "grad_norm": 1.680687427520752, "learning_rate": 0.00012431573802541544, "loss": 0.3606, "step": 7700 }, { "epoch": 1.2055416405760802, "grad_norm": 1.3018105030059814, "learning_rate": 0.0001242913000977517, "loss": 0.2848, "step": 7701 }, { "epoch": 1.2056981840951784, "grad_norm": 2.048955202102661, "learning_rate": 0.00012426686217008797, "loss": 0.2945, "step": 7702 }, { "epoch": 1.2058547276142768, "grad_norm": 1.392334222793579, "learning_rate": 0.00012424242424242422, "loss": 0.4641, "step": 7703 }, { "epoch": 1.206011271133375, "grad_norm": 1.0475993156433105, "learning_rate": 0.0001242179863147605, "loss": 0.4634, "step": 7704 }, { "epoch": 1.2061678146524735, "grad_norm": 1.2346774339675903, "learning_rate": 0.00012419354838709678, "loss": 0.5416, "step": 7705 }, { "epoch": 1.2063243581715717, "grad_norm": 1.159487009048462, "learning_rate": 0.00012416911045943303, "loss": 0.2803, "step": 7706 }, { "epoch": 1.2064809016906701, "grad_norm": 1.400073528289795, "learning_rate": 0.00012414467253176928, "loss": 0.2959, "step": 7707 }, { "epoch": 1.2066374452097683, "grad_norm": 0.6044692397117615, "learning_rate": 0.00012412023460410556, "loss": 0.2158, "step": 7708 }, { "epoch": 1.2067939887288666, "grad_norm": 1.8610830307006836, "learning_rate": 0.00012409579667644184, "loss": 0.6542, "step": 7709 }, { "epoch": 1.206950532247965, "grad_norm": 1.094446063041687, "learning_rate": 0.0001240713587487781, "loss": 0.4033, "step": 7710 }, { "epoch": 1.2071070757670632, "grad_norm": 4.96532678604126, "learning_rate": 0.00012404692082111437, "loss": 0.4836, "step": 7711 }, { "epoch": 1.2072636192861617, "grad_norm": 1.7114042043685913, "learning_rate": 0.00012402248289345062, "loss": 0.4183, "step": 7712 }, { "epoch": 1.2074201628052599, "grad_norm": 1.576427936553955, "learning_rate": 0.00012399804496578687, "loss": 0.4931, "step": 7713 }, { "epoch": 1.207576706324358, "grad_norm": 1.6242084503173828, "learning_rate": 0.00012397360703812315, "loss": 0.3988, "step": 7714 }, { "epoch": 1.2077332498434565, "grad_norm": 6.9016499519348145, "learning_rate": 0.00012394916911045943, "loss": 1.0594, "step": 7715 }, { "epoch": 1.2078897933625548, "grad_norm": 1.4505754709243774, "learning_rate": 0.00012392473118279568, "loss": 0.5999, "step": 7716 }, { "epoch": 1.2080463368816532, "grad_norm": 1.9598108530044556, "learning_rate": 0.00012390029325513196, "loss": 0.4765, "step": 7717 }, { "epoch": 1.2082028804007514, "grad_norm": 1.3153584003448486, "learning_rate": 0.0001238758553274682, "loss": 0.5586, "step": 7718 }, { "epoch": 1.2083594239198496, "grad_norm": 1.5149145126342773, "learning_rate": 0.0001238514173998045, "loss": 0.4036, "step": 7719 }, { "epoch": 1.208515967438948, "grad_norm": 2.801050901412964, "learning_rate": 0.00012382697947214077, "loss": 0.8044, "step": 7720 }, { "epoch": 1.2086725109580463, "grad_norm": 2.656320095062256, "learning_rate": 0.00012380254154447702, "loss": 0.7702, "step": 7721 }, { "epoch": 1.2088290544771447, "grad_norm": 3.1893441677093506, "learning_rate": 0.00012377810361681327, "loss": 0.8827, "step": 7722 }, { "epoch": 1.208985597996243, "grad_norm": 4.1985063552856445, "learning_rate": 0.00012375366568914955, "loss": 1.0204, "step": 7723 }, { "epoch": 1.2091421415153412, "grad_norm": 2.559026002883911, "learning_rate": 0.00012372922776148583, "loss": 0.9891, "step": 7724 }, { "epoch": 1.2092986850344396, "grad_norm": 1.904913067817688, "learning_rate": 0.00012370478983382208, "loss": 1.0455, "step": 7725 }, { "epoch": 1.2094552285535378, "grad_norm": 3.1764469146728516, "learning_rate": 0.00012368035190615836, "loss": 1.2292, "step": 7726 }, { "epoch": 1.2096117720726363, "grad_norm": 1.6916859149932861, "learning_rate": 0.0001236559139784946, "loss": 0.7425, "step": 7727 }, { "epoch": 1.2097683155917345, "grad_norm": 1.8501296043395996, "learning_rate": 0.00012363147605083089, "loss": 0.9403, "step": 7728 }, { "epoch": 1.2099248591108327, "grad_norm": 2.4505608081817627, "learning_rate": 0.00012360703812316716, "loss": 0.6932, "step": 7729 }, { "epoch": 1.2100814026299311, "grad_norm": 2.6362080574035645, "learning_rate": 0.00012358260019550342, "loss": 0.9876, "step": 7730 }, { "epoch": 1.2102379461490294, "grad_norm": 3.168712854385376, "learning_rate": 0.00012355816226783967, "loss": 1.1326, "step": 7731 }, { "epoch": 1.2103944896681278, "grad_norm": 1.8203378915786743, "learning_rate": 0.00012353372434017594, "loss": 0.9867, "step": 7732 }, { "epoch": 1.210551033187226, "grad_norm": 1.6555746793746948, "learning_rate": 0.0001235092864125122, "loss": 1.2667, "step": 7733 }, { "epoch": 1.2107075767063245, "grad_norm": 1.094726324081421, "learning_rate": 0.00012348484848484847, "loss": 0.3442, "step": 7734 }, { "epoch": 1.2108641202254227, "grad_norm": 2.5407097339630127, "learning_rate": 0.00012346041055718475, "loss": 0.5379, "step": 7735 }, { "epoch": 1.2110206637445209, "grad_norm": 1.3805251121520996, "learning_rate": 0.000123435972629521, "loss": 0.1715, "step": 7736 }, { "epoch": 1.2111772072636193, "grad_norm": 3.8366167545318604, "learning_rate": 0.00012341153470185726, "loss": 0.7459, "step": 7737 }, { "epoch": 1.2113337507827175, "grad_norm": 2.2765462398529053, "learning_rate": 0.00012338709677419353, "loss": 0.9848, "step": 7738 }, { "epoch": 1.211490294301816, "grad_norm": 0.41554513573646545, "learning_rate": 0.0001233626588465298, "loss": 0.2307, "step": 7739 }, { "epoch": 1.2116468378209142, "grad_norm": 0.5446311235427856, "learning_rate": 0.00012333822091886606, "loss": 0.1887, "step": 7740 }, { "epoch": 1.2118033813400126, "grad_norm": 0.42583855986595154, "learning_rate": 0.00012331378299120234, "loss": 0.2161, "step": 7741 }, { "epoch": 1.2119599248591109, "grad_norm": 0.40142467617988586, "learning_rate": 0.0001232893450635386, "loss": 0.2454, "step": 7742 }, { "epoch": 1.212116468378209, "grad_norm": 0.6083351969718933, "learning_rate": 0.00012326490713587487, "loss": 0.2714, "step": 7743 }, { "epoch": 1.2122730118973075, "grad_norm": 0.5035018920898438, "learning_rate": 0.00012324046920821115, "loss": 0.3219, "step": 7744 }, { "epoch": 1.2124295554164057, "grad_norm": 0.8749467730522156, "learning_rate": 0.0001232160312805474, "loss": 0.2845, "step": 7745 }, { "epoch": 1.2125860989355042, "grad_norm": 0.8860273957252502, "learning_rate": 0.00012319159335288365, "loss": 0.2443, "step": 7746 }, { "epoch": 1.2127426424546024, "grad_norm": 0.9518566727638245, "learning_rate": 0.00012316715542521993, "loss": 0.3225, "step": 7747 }, { "epoch": 1.2128991859737006, "grad_norm": 0.7895660996437073, "learning_rate": 0.0001231427174975562, "loss": 0.2441, "step": 7748 }, { "epoch": 1.213055729492799, "grad_norm": 0.9704890847206116, "learning_rate": 0.00012311827956989246, "loss": 0.3121, "step": 7749 }, { "epoch": 1.2132122730118973, "grad_norm": 0.8872887492179871, "learning_rate": 0.00012309384164222874, "loss": 0.302, "step": 7750 }, { "epoch": 1.2133688165309957, "grad_norm": 0.5776874423027039, "learning_rate": 0.000123069403714565, "loss": 0.2676, "step": 7751 }, { "epoch": 1.213525360050094, "grad_norm": 0.9891446232795715, "learning_rate": 0.00012304496578690127, "loss": 0.3352, "step": 7752 }, { "epoch": 1.2136819035691921, "grad_norm": 1.1194006204605103, "learning_rate": 0.00012302052785923755, "loss": 0.3109, "step": 7753 }, { "epoch": 1.2138384470882906, "grad_norm": 1.1497498750686646, "learning_rate": 0.0001229960899315738, "loss": 0.3955, "step": 7754 }, { "epoch": 1.2139949906073888, "grad_norm": 2.879631519317627, "learning_rate": 0.00012297165200391005, "loss": 0.8381, "step": 7755 }, { "epoch": 1.2141515341264872, "grad_norm": 2.2817280292510986, "learning_rate": 0.00012294721407624633, "loss": 0.5012, "step": 7756 }, { "epoch": 1.2143080776455855, "grad_norm": 2.1993658542633057, "learning_rate": 0.00012292277614858258, "loss": 0.4131, "step": 7757 }, { "epoch": 1.2144646211646837, "grad_norm": 0.96397864818573, "learning_rate": 0.00012289833822091886, "loss": 0.3529, "step": 7758 }, { "epoch": 1.2146211646837821, "grad_norm": 1.8400605916976929, "learning_rate": 0.00012287390029325514, "loss": 0.3834, "step": 7759 }, { "epoch": 1.2147777082028803, "grad_norm": 2.0213780403137207, "learning_rate": 0.00012284946236559139, "loss": 0.6518, "step": 7760 }, { "epoch": 1.2149342517219788, "grad_norm": 1.1717514991760254, "learning_rate": 0.00012282502443792764, "loss": 0.5768, "step": 7761 }, { "epoch": 1.215090795241077, "grad_norm": 1.5490142107009888, "learning_rate": 0.00012280058651026392, "loss": 0.6203, "step": 7762 }, { "epoch": 1.2152473387601752, "grad_norm": 2.197330951690674, "learning_rate": 0.0001227761485826002, "loss": 0.8325, "step": 7763 }, { "epoch": 1.2154038822792737, "grad_norm": 1.72406804561615, "learning_rate": 0.00012275171065493645, "loss": 0.449, "step": 7764 }, { "epoch": 1.2155604257983719, "grad_norm": 2.904944658279419, "learning_rate": 0.00012272727272727272, "loss": 0.7901, "step": 7765 }, { "epoch": 1.2157169693174703, "grad_norm": 1.9773554801940918, "learning_rate": 0.00012270283479960898, "loss": 0.8409, "step": 7766 }, { "epoch": 1.2158735128365685, "grad_norm": 6.0343337059021, "learning_rate": 0.00012267839687194525, "loss": 1.4507, "step": 7767 }, { "epoch": 1.216030056355667, "grad_norm": 2.157370090484619, "learning_rate": 0.00012265395894428153, "loss": 0.9013, "step": 7768 }, { "epoch": 1.2161865998747652, "grad_norm": 2.05031418800354, "learning_rate": 0.00012262952101661778, "loss": 0.5475, "step": 7769 }, { "epoch": 1.2163431433938634, "grad_norm": 2.67240834236145, "learning_rate": 0.00012260508308895403, "loss": 1.015, "step": 7770 }, { "epoch": 1.2164996869129618, "grad_norm": 4.982744216918945, "learning_rate": 0.0001225806451612903, "loss": 1.03, "step": 7771 }, { "epoch": 1.21665623043206, "grad_norm": 2.2275023460388184, "learning_rate": 0.0001225562072336266, "loss": 0.9372, "step": 7772 }, { "epoch": 1.2168127739511585, "grad_norm": 1.2302268743515015, "learning_rate": 0.00012253176930596284, "loss": 0.4652, "step": 7773 }, { "epoch": 1.2169693174702567, "grad_norm": 2.873462200164795, "learning_rate": 0.00012250733137829912, "loss": 0.8191, "step": 7774 }, { "epoch": 1.2171258609893552, "grad_norm": 2.2558248043060303, "learning_rate": 0.00012248289345063537, "loss": 0.885, "step": 7775 }, { "epoch": 1.2172824045084534, "grad_norm": 2.1223719120025635, "learning_rate": 0.00012245845552297165, "loss": 0.7907, "step": 7776 }, { "epoch": 1.2174389480275516, "grad_norm": 2.519007682800293, "learning_rate": 0.0001224340175953079, "loss": 0.9959, "step": 7777 }, { "epoch": 1.21759549154665, "grad_norm": 2.0962483882904053, "learning_rate": 0.00012240957966764418, "loss": 0.7831, "step": 7778 }, { "epoch": 1.2177520350657483, "grad_norm": 2.0779409408569336, "learning_rate": 0.00012238514173998043, "loss": 0.5034, "step": 7779 }, { "epoch": 1.2179085785848467, "grad_norm": 1.5359145402908325, "learning_rate": 0.0001223607038123167, "loss": 0.5842, "step": 7780 }, { "epoch": 1.218065122103945, "grad_norm": 4.660392761230469, "learning_rate": 0.00012233626588465296, "loss": 1.1284, "step": 7781 }, { "epoch": 1.2182216656230431, "grad_norm": 2.8180553913116455, "learning_rate": 0.00012231182795698924, "loss": 0.5375, "step": 7782 }, { "epoch": 1.2183782091421416, "grad_norm": 2.4774529933929443, "learning_rate": 0.00012228739002932552, "loss": 1.0843, "step": 7783 }, { "epoch": 1.2185347526612398, "grad_norm": 4.466963768005371, "learning_rate": 0.00012226295210166177, "loss": 1.9087, "step": 7784 }, { "epoch": 1.2186912961803382, "grad_norm": 3.7716944217681885, "learning_rate": 0.00012223851417399802, "loss": 0.9215, "step": 7785 }, { "epoch": 1.2188478396994364, "grad_norm": 1.5880441665649414, "learning_rate": 0.0001222140762463343, "loss": 0.4391, "step": 7786 }, { "epoch": 1.2190043832185347, "grad_norm": 1.5129741430282593, "learning_rate": 0.00012218963831867058, "loss": 0.2122, "step": 7787 }, { "epoch": 1.219160926737633, "grad_norm": 2.9316365718841553, "learning_rate": 0.00012216520039100683, "loss": 0.9607, "step": 7788 }, { "epoch": 1.2193174702567313, "grad_norm": 0.47794780135154724, "learning_rate": 0.0001221407624633431, "loss": 0.2167, "step": 7789 }, { "epoch": 1.2194740137758298, "grad_norm": 0.9191654324531555, "learning_rate": 0.00012211632453567936, "loss": 0.2389, "step": 7790 }, { "epoch": 1.219630557294928, "grad_norm": 0.43953564763069153, "learning_rate": 0.00012209188660801564, "loss": 0.1648, "step": 7791 }, { "epoch": 1.2197871008140262, "grad_norm": 0.49689897894859314, "learning_rate": 0.0001220674486803519, "loss": 0.2088, "step": 7792 }, { "epoch": 1.2199436443331246, "grad_norm": 0.5962256193161011, "learning_rate": 0.00012204301075268817, "loss": 0.2505, "step": 7793 }, { "epoch": 1.2201001878522229, "grad_norm": 0.6489814519882202, "learning_rate": 0.00012201857282502443, "loss": 0.2664, "step": 7794 }, { "epoch": 1.2202567313713213, "grad_norm": 0.46907347440719604, "learning_rate": 0.0001219941348973607, "loss": 0.2782, "step": 7795 }, { "epoch": 1.2204132748904195, "grad_norm": 0.5828919410705566, "learning_rate": 0.00012196969696969696, "loss": 0.1701, "step": 7796 }, { "epoch": 1.2205698184095177, "grad_norm": 0.8440871238708496, "learning_rate": 0.00012194525904203322, "loss": 0.2443, "step": 7797 }, { "epoch": 1.2207263619286162, "grad_norm": 0.6275129914283752, "learning_rate": 0.0001219208211143695, "loss": 0.193, "step": 7798 }, { "epoch": 1.2208829054477144, "grad_norm": 0.6567816138267517, "learning_rate": 0.00012189638318670575, "loss": 0.2657, "step": 7799 }, { "epoch": 1.2210394489668128, "grad_norm": 0.7370538711547852, "learning_rate": 0.00012187194525904202, "loss": 0.3339, "step": 7800 }, { "epoch": 1.221195992485911, "grad_norm": 0.6013770699501038, "learning_rate": 0.00012184750733137828, "loss": 0.2557, "step": 7801 }, { "epoch": 1.2213525360050095, "grad_norm": 2.4509990215301514, "learning_rate": 0.00012182306940371456, "loss": 0.7065, "step": 7802 }, { "epoch": 1.2215090795241077, "grad_norm": 0.5421009659767151, "learning_rate": 0.00012179863147605081, "loss": 0.1996, "step": 7803 }, { "epoch": 1.221665623043206, "grad_norm": 1.131841778755188, "learning_rate": 0.00012177419354838708, "loss": 0.2315, "step": 7804 }, { "epoch": 1.2218221665623044, "grad_norm": 1.3324615955352783, "learning_rate": 0.00012174975562072336, "loss": 0.4474, "step": 7805 }, { "epoch": 1.2219787100814026, "grad_norm": 0.9257820248603821, "learning_rate": 0.00012172531769305962, "loss": 0.4004, "step": 7806 }, { "epoch": 1.222135253600501, "grad_norm": 2.541618585586548, "learning_rate": 0.00012170087976539587, "loss": 0.8599, "step": 7807 }, { "epoch": 1.2222917971195992, "grad_norm": 1.269856333732605, "learning_rate": 0.00012167644183773215, "loss": 0.3127, "step": 7808 }, { "epoch": 1.2224483406386977, "grad_norm": 1.0655205249786377, "learning_rate": 0.00012165200391006842, "loss": 0.3841, "step": 7809 }, { "epoch": 1.222604884157796, "grad_norm": 1.21709406375885, "learning_rate": 0.00012162756598240467, "loss": 0.472, "step": 7810 }, { "epoch": 1.222761427676894, "grad_norm": 1.1578439474105835, "learning_rate": 0.00012160312805474095, "loss": 0.4834, "step": 7811 }, { "epoch": 1.2229179711959925, "grad_norm": 1.1873836517333984, "learning_rate": 0.00012157869012707721, "loss": 0.462, "step": 7812 }, { "epoch": 1.2230745147150908, "grad_norm": 1.573827862739563, "learning_rate": 0.00012155425219941347, "loss": 0.7357, "step": 7813 }, { "epoch": 1.2232310582341892, "grad_norm": 2.879338502883911, "learning_rate": 0.00012152981427174975, "loss": 0.8596, "step": 7814 }, { "epoch": 1.2233876017532874, "grad_norm": 1.6422525644302368, "learning_rate": 0.000121505376344086, "loss": 0.2993, "step": 7815 }, { "epoch": 1.2235441452723856, "grad_norm": 1.4364542961120605, "learning_rate": 0.00012148093841642227, "loss": 0.3512, "step": 7816 }, { "epoch": 1.223700688791484, "grad_norm": 2.8140549659729004, "learning_rate": 0.00012145650048875855, "loss": 0.4175, "step": 7817 }, { "epoch": 1.2238572323105823, "grad_norm": 1.8453247547149658, "learning_rate": 0.00012143206256109481, "loss": 0.3645, "step": 7818 }, { "epoch": 1.2240137758296807, "grad_norm": 5.20889139175415, "learning_rate": 0.00012140762463343106, "loss": 0.8623, "step": 7819 }, { "epoch": 1.224170319348779, "grad_norm": 2.917222261428833, "learning_rate": 0.00012138318670576734, "loss": 0.7932, "step": 7820 }, { "epoch": 1.2243268628678772, "grad_norm": 3.251565933227539, "learning_rate": 0.00012135874877810361, "loss": 1.2039, "step": 7821 }, { "epoch": 1.2244834063869756, "grad_norm": 2.457594871520996, "learning_rate": 0.00012133431085043986, "loss": 0.6937, "step": 7822 }, { "epoch": 1.2246399499060738, "grad_norm": 1.1662980318069458, "learning_rate": 0.00012130987292277614, "loss": 0.5691, "step": 7823 }, { "epoch": 1.2247964934251723, "grad_norm": 4.571430206298828, "learning_rate": 0.0001212854349951124, "loss": 1.1899, "step": 7824 }, { "epoch": 1.2249530369442705, "grad_norm": 3.9596447944641113, "learning_rate": 0.00012126099706744867, "loss": 0.8965, "step": 7825 }, { "epoch": 1.2251095804633687, "grad_norm": 3.642796039581299, "learning_rate": 0.00012123655913978494, "loss": 1.2272, "step": 7826 }, { "epoch": 1.2252661239824671, "grad_norm": 2.4847941398620605, "learning_rate": 0.0001212121212121212, "loss": 1.4294, "step": 7827 }, { "epoch": 1.2254226675015654, "grad_norm": 5.294003963470459, "learning_rate": 0.00012118768328445746, "loss": 1.0586, "step": 7828 }, { "epoch": 1.2255792110206638, "grad_norm": 4.286756992340088, "learning_rate": 0.00012116324535679374, "loss": 1.0392, "step": 7829 }, { "epoch": 1.225735754539762, "grad_norm": 3.1308891773223877, "learning_rate": 0.00012113880742913, "loss": 0.8807, "step": 7830 }, { "epoch": 1.2258922980588602, "grad_norm": 4.852659702301025, "learning_rate": 0.00012111436950146625, "loss": 1.1662, "step": 7831 }, { "epoch": 1.2260488415779587, "grad_norm": 4.063992500305176, "learning_rate": 0.00012108993157380253, "loss": 1.4974, "step": 7832 }, { "epoch": 1.226205385097057, "grad_norm": 3.3738880157470703, "learning_rate": 0.0001210654936461388, "loss": 1.2586, "step": 7833 }, { "epoch": 1.2263619286161553, "grad_norm": 2.590223550796509, "learning_rate": 0.00012104105571847505, "loss": 0.4073, "step": 7834 }, { "epoch": 1.2265184721352536, "grad_norm": 2.9616923332214355, "learning_rate": 0.00012101661779081133, "loss": 0.8075, "step": 7835 }, { "epoch": 1.226675015654352, "grad_norm": 3.8399484157562256, "learning_rate": 0.00012099217986314759, "loss": 0.8251, "step": 7836 }, { "epoch": 1.2268315591734502, "grad_norm": 2.232974052429199, "learning_rate": 0.00012096774193548386, "loss": 0.8066, "step": 7837 }, { "epoch": 1.2269881026925484, "grad_norm": 2.5134973526000977, "learning_rate": 0.00012094330400782014, "loss": 0.7184, "step": 7838 }, { "epoch": 1.2271446462116469, "grad_norm": 0.7725585699081421, "learning_rate": 0.00012091886608015639, "loss": 0.2581, "step": 7839 }, { "epoch": 1.227301189730745, "grad_norm": 0.4113602936267853, "learning_rate": 0.00012089442815249265, "loss": 0.21, "step": 7840 }, { "epoch": 1.2274577332498435, "grad_norm": 0.737152636051178, "learning_rate": 0.00012086999022482893, "loss": 0.2286, "step": 7841 }, { "epoch": 1.2276142767689417, "grad_norm": 0.8976015448570251, "learning_rate": 0.00012084555229716518, "loss": 0.197, "step": 7842 }, { "epoch": 1.2277708202880402, "grad_norm": 0.47844988107681274, "learning_rate": 0.00012082111436950145, "loss": 0.2104, "step": 7843 }, { "epoch": 1.2279273638071384, "grad_norm": 0.49797824025154114, "learning_rate": 0.00012079667644183772, "loss": 0.2007, "step": 7844 }, { "epoch": 1.2280839073262366, "grad_norm": 0.6173357963562012, "learning_rate": 0.00012077223851417399, "loss": 0.2887, "step": 7845 }, { "epoch": 1.228240450845335, "grad_norm": 0.406225323677063, "learning_rate": 0.00012074780058651024, "loss": 0.1982, "step": 7846 }, { "epoch": 1.2283969943644333, "grad_norm": 0.7921119332313538, "learning_rate": 0.00012072336265884652, "loss": 0.3479, "step": 7847 }, { "epoch": 1.2285535378835317, "grad_norm": 1.66916024684906, "learning_rate": 0.00012069892473118278, "loss": 0.3086, "step": 7848 }, { "epoch": 1.22871008140263, "grad_norm": 0.6646075248718262, "learning_rate": 0.00012067448680351905, "loss": 0.3681, "step": 7849 }, { "epoch": 1.2288666249217282, "grad_norm": 1.726301670074463, "learning_rate": 0.00012065004887585533, "loss": 0.6128, "step": 7850 }, { "epoch": 1.2290231684408266, "grad_norm": 0.8818532824516296, "learning_rate": 0.00012062561094819158, "loss": 0.3387, "step": 7851 }, { "epoch": 1.2291797119599248, "grad_norm": 0.7736045718193054, "learning_rate": 0.00012060117302052784, "loss": 0.2012, "step": 7852 }, { "epoch": 1.2293362554790233, "grad_norm": 0.7957243919372559, "learning_rate": 0.00012057673509286412, "loss": 0.3497, "step": 7853 }, { "epoch": 1.2294927989981215, "grad_norm": 1.0294607877731323, "learning_rate": 0.00012055229716520037, "loss": 0.3375, "step": 7854 }, { "epoch": 1.2296493425172197, "grad_norm": 3.3992855548858643, "learning_rate": 0.00012052785923753664, "loss": 0.4712, "step": 7855 }, { "epoch": 1.2298058860363181, "grad_norm": 2.0325255393981934, "learning_rate": 0.00012050342130987292, "loss": 0.208, "step": 7856 }, { "epoch": 1.2299624295554163, "grad_norm": 1.33167564868927, "learning_rate": 0.00012047898338220918, "loss": 0.3885, "step": 7857 }, { "epoch": 1.2301189730745148, "grad_norm": 2.1962788105010986, "learning_rate": 0.00012045454545454543, "loss": 0.3926, "step": 7858 }, { "epoch": 1.230275516593613, "grad_norm": 1.1187773942947388, "learning_rate": 0.00012043010752688171, "loss": 0.5303, "step": 7859 }, { "epoch": 1.2304320601127112, "grad_norm": 1.26564621925354, "learning_rate": 0.00012040566959921797, "loss": 0.373, "step": 7860 }, { "epoch": 1.2305886036318097, "grad_norm": 1.3402997255325317, "learning_rate": 0.00012038123167155424, "loss": 0.36, "step": 7861 }, { "epoch": 1.2307451471509079, "grad_norm": 1.738292932510376, "learning_rate": 0.00012035679374389052, "loss": 0.5391, "step": 7862 }, { "epoch": 1.2309016906700063, "grad_norm": 2.344097137451172, "learning_rate": 0.00012033235581622677, "loss": 0.619, "step": 7863 }, { "epoch": 1.2310582341891045, "grad_norm": 1.4428097009658813, "learning_rate": 0.00012030791788856303, "loss": 0.3952, "step": 7864 }, { "epoch": 1.2312147777082028, "grad_norm": 2.1929662227630615, "learning_rate": 0.00012028347996089931, "loss": 0.578, "step": 7865 }, { "epoch": 1.2313713212273012, "grad_norm": 1.8685600757598877, "learning_rate": 0.00012025904203323556, "loss": 0.8856, "step": 7866 }, { "epoch": 1.2315278647463994, "grad_norm": 2.1994404792785645, "learning_rate": 0.00012023460410557183, "loss": 0.4865, "step": 7867 }, { "epoch": 1.2316844082654979, "grad_norm": 8.819302558898926, "learning_rate": 0.00012021016617790811, "loss": 1.14, "step": 7868 }, { "epoch": 1.231840951784596, "grad_norm": 2.020968437194824, "learning_rate": 0.00012018572825024437, "loss": 0.8263, "step": 7869 }, { "epoch": 1.2319974953036945, "grad_norm": 1.5616576671600342, "learning_rate": 0.00012016129032258062, "loss": 0.5087, "step": 7870 }, { "epoch": 1.2321540388227927, "grad_norm": 1.2136770486831665, "learning_rate": 0.0001201368523949169, "loss": 0.6024, "step": 7871 }, { "epoch": 1.2323105823418912, "grad_norm": 1.3263853788375854, "learning_rate": 0.00012011241446725317, "loss": 0.585, "step": 7872 }, { "epoch": 1.2324671258609894, "grad_norm": 3.187013864517212, "learning_rate": 0.00012008797653958943, "loss": 0.9524, "step": 7873 }, { "epoch": 1.2326236693800876, "grad_norm": 4.589766979217529, "learning_rate": 0.00012006353861192571, "loss": 0.8727, "step": 7874 }, { "epoch": 1.232780212899186, "grad_norm": 1.6198341846466064, "learning_rate": 0.00012003910068426196, "loss": 0.5955, "step": 7875 }, { "epoch": 1.2329367564182843, "grad_norm": 2.6360957622528076, "learning_rate": 0.00012001466275659823, "loss": 0.9213, "step": 7876 }, { "epoch": 1.2330932999373827, "grad_norm": 2.5727317333221436, "learning_rate": 0.0001199902248289345, "loss": 0.9638, "step": 7877 }, { "epoch": 1.233249843456481, "grad_norm": 3.1968390941619873, "learning_rate": 0.00011996578690127075, "loss": 1.1984, "step": 7878 }, { "epoch": 1.2334063869755791, "grad_norm": 5.276712417602539, "learning_rate": 0.00011994134897360702, "loss": 0.7227, "step": 7879 }, { "epoch": 1.2335629304946776, "grad_norm": 2.5714964866638184, "learning_rate": 0.0001199169110459433, "loss": 1.0927, "step": 7880 }, { "epoch": 1.2337194740137758, "grad_norm": 2.9321186542510986, "learning_rate": 0.00011989247311827956, "loss": 0.8852, "step": 7881 }, { "epoch": 1.2338760175328742, "grad_norm": 3.1245834827423096, "learning_rate": 0.00011986803519061581, "loss": 1.2498, "step": 7882 }, { "epoch": 1.2340325610519725, "grad_norm": 3.5027828216552734, "learning_rate": 0.00011984359726295209, "loss": 1.1943, "step": 7883 }, { "epoch": 1.2341891045710707, "grad_norm": 2.125910520553589, "learning_rate": 0.00011981915933528836, "loss": 1.0542, "step": 7884 }, { "epoch": 1.2343456480901691, "grad_norm": 1.8235660791397095, "learning_rate": 0.00011979472140762462, "loss": 0.5816, "step": 7885 }, { "epoch": 1.2345021916092673, "grad_norm": 3.679835796356201, "learning_rate": 0.00011977028347996089, "loss": 1.4135, "step": 7886 }, { "epoch": 1.2346587351283658, "grad_norm": 6.588844299316406, "learning_rate": 0.00011974584555229715, "loss": 1.178, "step": 7887 }, { "epoch": 1.234815278647464, "grad_norm": 2.3670153617858887, "learning_rate": 0.00011972140762463342, "loss": 1.2308, "step": 7888 }, { "epoch": 1.2349718221665622, "grad_norm": 0.49170297384262085, "learning_rate": 0.0001196969696969697, "loss": 0.282, "step": 7889 }, { "epoch": 1.2351283656856606, "grad_norm": 0.5877807140350342, "learning_rate": 0.00011967253176930595, "loss": 0.2686, "step": 7890 }, { "epoch": 1.2352849092047589, "grad_norm": 0.6519326567649841, "learning_rate": 0.00011964809384164221, "loss": 0.2191, "step": 7891 }, { "epoch": 1.2354414527238573, "grad_norm": 0.7324579954147339, "learning_rate": 0.00011962365591397849, "loss": 0.2397, "step": 7892 }, { "epoch": 1.2355979962429555, "grad_norm": 0.5463114976882935, "learning_rate": 0.00011959921798631475, "loss": 0.159, "step": 7893 }, { "epoch": 1.2357545397620537, "grad_norm": 0.6586346626281738, "learning_rate": 0.000119574780058651, "loss": 0.2038, "step": 7894 }, { "epoch": 1.2359110832811522, "grad_norm": 0.4430273175239563, "learning_rate": 0.00011955034213098728, "loss": 0.2114, "step": 7895 }, { "epoch": 1.2360676268002504, "grad_norm": 0.7554447054862976, "learning_rate": 0.00011952590420332355, "loss": 0.2591, "step": 7896 }, { "epoch": 1.2362241703193488, "grad_norm": 0.531594455242157, "learning_rate": 0.00011950146627565981, "loss": 0.2403, "step": 7897 }, { "epoch": 1.236380713838447, "grad_norm": 0.8028531670570374, "learning_rate": 0.00011947702834799608, "loss": 0.2436, "step": 7898 }, { "epoch": 1.2365372573575455, "grad_norm": 0.8752454519271851, "learning_rate": 0.00011945259042033234, "loss": 0.3462, "step": 7899 }, { "epoch": 1.2366938008766437, "grad_norm": 1.4788440465927124, "learning_rate": 0.00011942815249266861, "loss": 0.621, "step": 7900 }, { "epoch": 1.236850344395742, "grad_norm": 1.0307644605636597, "learning_rate": 0.00011940371456500489, "loss": 0.6322, "step": 7901 }, { "epoch": 1.2370068879148404, "grad_norm": 0.6076186299324036, "learning_rate": 0.00011937927663734114, "loss": 0.3076, "step": 7902 }, { "epoch": 1.2371634314339386, "grad_norm": 1.0006450414657593, "learning_rate": 0.0001193548387096774, "loss": 0.2975, "step": 7903 }, { "epoch": 1.237319974953037, "grad_norm": 1.008905053138733, "learning_rate": 0.00011933040078201368, "loss": 0.3799, "step": 7904 }, { "epoch": 1.2374765184721352, "grad_norm": 3.882817506790161, "learning_rate": 0.00011930596285434995, "loss": 0.9816, "step": 7905 }, { "epoch": 1.2376330619912337, "grad_norm": 1.3156336545944214, "learning_rate": 0.0001192815249266862, "loss": 0.4128, "step": 7906 }, { "epoch": 1.237789605510332, "grad_norm": 1.1643537282943726, "learning_rate": 0.00011925708699902247, "loss": 0.5488, "step": 7907 }, { "epoch": 1.2379461490294301, "grad_norm": 1.2683813571929932, "learning_rate": 0.00011923264907135874, "loss": 0.595, "step": 7908 }, { "epoch": 1.2381026925485286, "grad_norm": 1.6245311498641968, "learning_rate": 0.000119208211143695, "loss": 0.3329, "step": 7909 }, { "epoch": 1.2382592360676268, "grad_norm": 1.5488073825836182, "learning_rate": 0.00011918377321603127, "loss": 0.6928, "step": 7910 }, { "epoch": 1.2384157795867252, "grad_norm": 1.1169195175170898, "learning_rate": 0.00011915933528836753, "loss": 0.3563, "step": 7911 }, { "epoch": 1.2385723231058234, "grad_norm": 1.5224010944366455, "learning_rate": 0.0001191348973607038, "loss": 0.7176, "step": 7912 }, { "epoch": 1.2387288666249217, "grad_norm": 0.7106965780258179, "learning_rate": 0.00011911045943304008, "loss": 0.3416, "step": 7913 }, { "epoch": 1.23888541014402, "grad_norm": 1.0206400156021118, "learning_rate": 0.00011908602150537633, "loss": 0.3449, "step": 7914 }, { "epoch": 1.2390419536631183, "grad_norm": 1.643692135810852, "learning_rate": 0.00011906158357771259, "loss": 0.744, "step": 7915 }, { "epoch": 1.2391984971822168, "grad_norm": 2.5953471660614014, "learning_rate": 0.00011903714565004887, "loss": 0.9874, "step": 7916 }, { "epoch": 1.239355040701315, "grad_norm": 2.029824733734131, "learning_rate": 0.00011901270772238514, "loss": 0.7063, "step": 7917 }, { "epoch": 1.2395115842204132, "grad_norm": 2.1811952590942383, "learning_rate": 0.00011898826979472139, "loss": 0.6537, "step": 7918 }, { "epoch": 1.2396681277395116, "grad_norm": 2.453786611557007, "learning_rate": 0.00011896383186705767, "loss": 0.9151, "step": 7919 }, { "epoch": 1.2398246712586098, "grad_norm": 2.0153915882110596, "learning_rate": 0.00011893939393939393, "loss": 1.1769, "step": 7920 }, { "epoch": 1.2399812147777083, "grad_norm": 2.056471824645996, "learning_rate": 0.0001189149560117302, "loss": 0.6084, "step": 7921 }, { "epoch": 1.2401377582968065, "grad_norm": 1.4912649393081665, "learning_rate": 0.00011889051808406646, "loss": 1.0018, "step": 7922 }, { "epoch": 1.2402943018159047, "grad_norm": 1.68226158618927, "learning_rate": 0.00011886608015640273, "loss": 1.2432, "step": 7923 }, { "epoch": 1.2404508453350032, "grad_norm": 1.0560473203659058, "learning_rate": 0.00011884164222873899, "loss": 0.5132, "step": 7924 }, { "epoch": 1.2406073888541014, "grad_norm": 1.9787427186965942, "learning_rate": 0.00011881720430107527, "loss": 1.3016, "step": 7925 }, { "epoch": 1.2407639323731998, "grad_norm": 1.528519868850708, "learning_rate": 0.00011879276637341152, "loss": 0.4644, "step": 7926 }, { "epoch": 1.240920475892298, "grad_norm": 1.8350005149841309, "learning_rate": 0.00011876832844574778, "loss": 1.06, "step": 7927 }, { "epoch": 1.2410770194113963, "grad_norm": 2.0662553310394287, "learning_rate": 0.00011874389051808406, "loss": 0.6492, "step": 7928 }, { "epoch": 1.2412335629304947, "grad_norm": 2.514481544494629, "learning_rate": 0.00011871945259042033, "loss": 1.7245, "step": 7929 }, { "epoch": 1.241390106449593, "grad_norm": 1.8698464632034302, "learning_rate": 0.00011869501466275658, "loss": 1.2266, "step": 7930 }, { "epoch": 1.2415466499686914, "grad_norm": 1.368855595588684, "learning_rate": 0.00011867057673509286, "loss": 0.7572, "step": 7931 }, { "epoch": 1.2417031934877896, "grad_norm": 2.3699071407318115, "learning_rate": 0.00011864613880742912, "loss": 0.8298, "step": 7932 }, { "epoch": 1.241859737006888, "grad_norm": 1.3850302696228027, "learning_rate": 0.00011862170087976539, "loss": 0.4536, "step": 7933 }, { "epoch": 1.2420162805259862, "grad_norm": 4.977390289306641, "learning_rate": 0.00011859726295210165, "loss": 1.0689, "step": 7934 }, { "epoch": 1.2421728240450844, "grad_norm": 2.6507201194763184, "learning_rate": 0.00011857282502443792, "loss": 0.8295, "step": 7935 }, { "epoch": 1.2423293675641829, "grad_norm": 1.9919297695159912, "learning_rate": 0.00011854838709677418, "loss": 0.767, "step": 7936 }, { "epoch": 1.242485911083281, "grad_norm": 2.1812164783477783, "learning_rate": 0.00011852394916911046, "loss": 0.7606, "step": 7937 }, { "epoch": 1.2426424546023795, "grad_norm": 4.220646381378174, "learning_rate": 0.00011849951124144671, "loss": 1.381, "step": 7938 }, { "epoch": 1.2427989981214778, "grad_norm": 0.48588910698890686, "learning_rate": 0.00011847507331378298, "loss": 0.2814, "step": 7939 }, { "epoch": 1.2429555416405762, "grad_norm": 0.5703171491622925, "learning_rate": 0.00011845063538611925, "loss": 0.2328, "step": 7940 }, { "epoch": 1.2431120851596744, "grad_norm": 0.4970242977142334, "learning_rate": 0.00011842619745845552, "loss": 0.1882, "step": 7941 }, { "epoch": 1.2432686286787726, "grad_norm": 0.5462607741355896, "learning_rate": 0.00011840175953079177, "loss": 0.1734, "step": 7942 }, { "epoch": 1.243425172197871, "grad_norm": 0.7907721400260925, "learning_rate": 0.00011837732160312805, "loss": 0.2375, "step": 7943 }, { "epoch": 1.2435817157169693, "grad_norm": 0.4387291669845581, "learning_rate": 0.00011835288367546431, "loss": 0.1791, "step": 7944 }, { "epoch": 1.2437382592360677, "grad_norm": 0.5500348210334778, "learning_rate": 0.00011832844574780058, "loss": 0.2238, "step": 7945 }, { "epoch": 1.243894802755166, "grad_norm": 0.6439167261123657, "learning_rate": 0.00011830400782013684, "loss": 0.2178, "step": 7946 }, { "epoch": 1.2440513462742642, "grad_norm": 0.8734773397445679, "learning_rate": 0.00011827956989247311, "loss": 0.2993, "step": 7947 }, { "epoch": 1.2442078897933626, "grad_norm": 1.8835079669952393, "learning_rate": 0.00011825513196480937, "loss": 0.3273, "step": 7948 }, { "epoch": 1.2443644333124608, "grad_norm": 0.723751425743103, "learning_rate": 0.00011823069403714565, "loss": 0.1828, "step": 7949 }, { "epoch": 1.2445209768315593, "grad_norm": 1.6862190961837769, "learning_rate": 0.0001182062561094819, "loss": 0.3214, "step": 7950 }, { "epoch": 1.2446775203506575, "grad_norm": 0.7363371849060059, "learning_rate": 0.00011818181818181817, "loss": 0.2662, "step": 7951 }, { "epoch": 1.2448340638697557, "grad_norm": 1.2656689882278442, "learning_rate": 0.00011815738025415444, "loss": 0.3986, "step": 7952 }, { "epoch": 1.2449906073888541, "grad_norm": 0.9024664759635925, "learning_rate": 0.00011813294232649071, "loss": 0.405, "step": 7953 }, { "epoch": 1.2451471509079524, "grad_norm": 0.8185746669769287, "learning_rate": 0.00011810850439882696, "loss": 0.4005, "step": 7954 }, { "epoch": 1.2453036944270508, "grad_norm": 0.8885353207588196, "learning_rate": 0.00011808406647116324, "loss": 0.1425, "step": 7955 }, { "epoch": 1.245460237946149, "grad_norm": 6.204992294311523, "learning_rate": 0.0001180596285434995, "loss": 0.3497, "step": 7956 }, { "epoch": 1.2456167814652472, "grad_norm": 1.7547203302383423, "learning_rate": 0.00011803519061583576, "loss": 0.3987, "step": 7957 }, { "epoch": 1.2457733249843457, "grad_norm": 1.6890156269073486, "learning_rate": 0.00011801075268817203, "loss": 0.2755, "step": 7958 }, { "epoch": 1.245929868503444, "grad_norm": 1.954634189605713, "learning_rate": 0.0001179863147605083, "loss": 0.6202, "step": 7959 }, { "epoch": 1.2460864120225423, "grad_norm": 1.5542676448822021, "learning_rate": 0.00011796187683284456, "loss": 0.2909, "step": 7960 }, { "epoch": 1.2462429555416406, "grad_norm": 2.495922565460205, "learning_rate": 0.00011793743890518084, "loss": 0.6644, "step": 7961 }, { "epoch": 1.2463994990607388, "grad_norm": 1.6161011457443237, "learning_rate": 0.00011791300097751709, "loss": 0.3718, "step": 7962 }, { "epoch": 1.2465560425798372, "grad_norm": 1.5443997383117676, "learning_rate": 0.00011788856304985336, "loss": 0.397, "step": 7963 }, { "epoch": 1.2467125860989354, "grad_norm": 1.664036750793457, "learning_rate": 0.00011786412512218964, "loss": 0.3962, "step": 7964 }, { "epoch": 1.2468691296180339, "grad_norm": 1.7987858057022095, "learning_rate": 0.0001178396871945259, "loss": 0.4728, "step": 7965 }, { "epoch": 1.247025673137132, "grad_norm": 2.9872450828552246, "learning_rate": 0.00011781524926686215, "loss": 0.5981, "step": 7966 }, { "epoch": 1.2471822166562305, "grad_norm": 1.4099814891815186, "learning_rate": 0.00011779081133919843, "loss": 0.8175, "step": 7967 }, { "epoch": 1.2473387601753287, "grad_norm": 2.3960330486297607, "learning_rate": 0.0001177663734115347, "loss": 0.937, "step": 7968 }, { "epoch": 1.247495303694427, "grad_norm": 1.7411472797393799, "learning_rate": 0.00011774193548387095, "loss": 0.8408, "step": 7969 }, { "epoch": 1.2476518472135254, "grad_norm": 1.6002928018569946, "learning_rate": 0.00011771749755620722, "loss": 0.6916, "step": 7970 }, { "epoch": 1.2478083907326236, "grad_norm": 2.1395416259765625, "learning_rate": 0.00011769305962854349, "loss": 0.6554, "step": 7971 }, { "epoch": 1.247964934251722, "grad_norm": 2.8727903366088867, "learning_rate": 0.00011766862170087975, "loss": 0.5139, "step": 7972 }, { "epoch": 1.2481214777708203, "grad_norm": 1.354569435119629, "learning_rate": 0.00011764418377321603, "loss": 0.5492, "step": 7973 }, { "epoch": 1.2482780212899187, "grad_norm": 4.999713897705078, "learning_rate": 0.00011761974584555228, "loss": 0.7196, "step": 7974 }, { "epoch": 1.248434564809017, "grad_norm": 3.1970131397247314, "learning_rate": 0.00011759530791788855, "loss": 1.0452, "step": 7975 }, { "epoch": 1.2485911083281152, "grad_norm": 3.8125860691070557, "learning_rate": 0.00011757086999022483, "loss": 1.3657, "step": 7976 }, { "epoch": 1.2487476518472136, "grad_norm": 2.1403331756591797, "learning_rate": 0.00011754643206256109, "loss": 0.9489, "step": 7977 }, { "epoch": 1.2489041953663118, "grad_norm": 1.500003457069397, "learning_rate": 0.00011752199413489734, "loss": 0.7548, "step": 7978 }, { "epoch": 1.2490607388854102, "grad_norm": 5.040395259857178, "learning_rate": 0.00011749755620723362, "loss": 0.5617, "step": 7979 }, { "epoch": 1.2492172824045085, "grad_norm": 2.7591681480407715, "learning_rate": 0.00011747311827956989, "loss": 1.0901, "step": 7980 }, { "epoch": 1.2493738259236067, "grad_norm": 1.9304205179214478, "learning_rate": 0.00011744868035190614, "loss": 0.8664, "step": 7981 }, { "epoch": 1.2495303694427051, "grad_norm": 3.9833483695983887, "learning_rate": 0.00011742424242424242, "loss": 0.7079, "step": 7982 }, { "epoch": 1.2496869129618033, "grad_norm": 2.4207115173339844, "learning_rate": 0.00011739980449657868, "loss": 1.1423, "step": 7983 }, { "epoch": 1.2498434564809018, "grad_norm": 2.918473958969116, "learning_rate": 0.00011737536656891495, "loss": 0.9182, "step": 7984 }, { "epoch": 1.25, "grad_norm": 1.54889714717865, "learning_rate": 0.00011735092864125122, "loss": 0.3946, "step": 7985 }, { "epoch": 1.2501565435190982, "grad_norm": 2.440833806991577, "learning_rate": 0.00011732649071358748, "loss": 0.7028, "step": 7986 }, { "epoch": 1.2503130870381967, "grad_norm": 3.6599295139312744, "learning_rate": 0.00011730205278592374, "loss": 0.6834, "step": 7987 }, { "epoch": 1.2504696305572949, "grad_norm": 2.046987533569336, "learning_rate": 0.00011727761485826002, "loss": 0.59, "step": 7988 }, { "epoch": 1.2506261740763933, "grad_norm": 0.6683407425880432, "learning_rate": 0.00011725317693059628, "loss": 0.1788, "step": 7989 }, { "epoch": 1.2507827175954915, "grad_norm": 0.6008917093276978, "learning_rate": 0.00011722873900293253, "loss": 0.1947, "step": 7990 }, { "epoch": 1.2509392611145898, "grad_norm": 0.8003541827201843, "learning_rate": 0.00011720430107526881, "loss": 0.2844, "step": 7991 }, { "epoch": 1.2510958046336882, "grad_norm": 0.4843752980232239, "learning_rate": 0.00011717986314760508, "loss": 0.3327, "step": 7992 }, { "epoch": 1.2512523481527864, "grad_norm": 0.6065008640289307, "learning_rate": 0.00011715542521994133, "loss": 0.2476, "step": 7993 }, { "epoch": 1.2514088916718848, "grad_norm": 0.4822901785373688, "learning_rate": 0.00011713098729227761, "loss": 0.2134, "step": 7994 }, { "epoch": 1.251565435190983, "grad_norm": 0.6061071753501892, "learning_rate": 0.00011710654936461387, "loss": 0.198, "step": 7995 }, { "epoch": 1.2517219787100813, "grad_norm": 0.7697739005088806, "learning_rate": 0.00011708211143695014, "loss": 0.3114, "step": 7996 }, { "epoch": 1.2518785222291797, "grad_norm": 0.9554542899131775, "learning_rate": 0.00011705767350928642, "loss": 0.5542, "step": 7997 }, { "epoch": 1.252035065748278, "grad_norm": 0.5352123379707336, "learning_rate": 0.00011703323558162267, "loss": 0.2086, "step": 7998 }, { "epoch": 1.2521916092673764, "grad_norm": 1.2916967868804932, "learning_rate": 0.00011700879765395893, "loss": 0.1956, "step": 7999 }, { "epoch": 1.2523481527864746, "grad_norm": 0.7141245603561401, "learning_rate": 0.00011698435972629521, "loss": 0.2089, "step": 8000 }, { "epoch": 1.2523481527864746, "eval_loss": 0.5047087669372559, "eval_runtime": 206.5621, "eval_samples_per_second": 59.948, "eval_steps_per_second": 3.747, "eval_wer": 0.3239156767324734, "step": 8000 }, { "epoch": 1.2525046963055728, "grad_norm": 0.8334164023399353, "learning_rate": 0.00011695992179863146, "loss": 0.4006, "step": 8001 }, { "epoch": 1.2526612398246713, "grad_norm": 0.8113241195678711, "learning_rate": 0.00011693548387096773, "loss": 0.3454, "step": 8002 }, { "epoch": 1.2528177833437697, "grad_norm": 0.6484245657920837, "learning_rate": 0.000116911045943304, "loss": 0.2453, "step": 8003 }, { "epoch": 1.252974326862868, "grad_norm": 0.7791106700897217, "learning_rate": 0.00011688660801564027, "loss": 0.2726, "step": 8004 }, { "epoch": 1.2531308703819661, "grad_norm": 1.1766725778579712, "learning_rate": 0.00011686217008797652, "loss": 0.4168, "step": 8005 }, { "epoch": 1.2532874139010646, "grad_norm": 1.342408537864685, "learning_rate": 0.0001168377321603128, "loss": 0.3907, "step": 8006 }, { "epoch": 1.2534439574201628, "grad_norm": 1.233473300933838, "learning_rate": 0.00011681329423264906, "loss": 0.3619, "step": 8007 }, { "epoch": 1.2536005009392612, "grad_norm": 1.4168598651885986, "learning_rate": 0.00011678885630498533, "loss": 0.4103, "step": 8008 }, { "epoch": 1.2537570444583594, "grad_norm": 1.5982956886291504, "learning_rate": 0.0001167644183773216, "loss": 0.433, "step": 8009 }, { "epoch": 1.2539135879774577, "grad_norm": 1.1311919689178467, "learning_rate": 0.00011673998044965786, "loss": 0.4682, "step": 8010 }, { "epoch": 1.254070131496556, "grad_norm": 1.4813308715820312, "learning_rate": 0.00011671554252199412, "loss": 0.3652, "step": 8011 }, { "epoch": 1.2542266750156543, "grad_norm": 1.0279951095581055, "learning_rate": 0.0001166911045943304, "loss": 0.5578, "step": 8012 }, { "epoch": 1.2543832185347528, "grad_norm": 1.014884352684021, "learning_rate": 0.00011666666666666665, "loss": 0.3986, "step": 8013 }, { "epoch": 1.254539762053851, "grad_norm": 2.5004680156707764, "learning_rate": 0.00011664222873900292, "loss": 0.5565, "step": 8014 }, { "epoch": 1.2546963055729492, "grad_norm": 1.6078928709030151, "learning_rate": 0.0001166177908113392, "loss": 0.5358, "step": 8015 }, { "epoch": 1.2548528490920476, "grad_norm": 1.8332239389419556, "learning_rate": 0.00011659335288367546, "loss": 0.2927, "step": 8016 }, { "epoch": 1.2550093926111459, "grad_norm": 1.8943898677825928, "learning_rate": 0.00011656891495601171, "loss": 0.5876, "step": 8017 }, { "epoch": 1.2551659361302443, "grad_norm": 1.8188652992248535, "learning_rate": 0.00011654447702834799, "loss": 0.4997, "step": 8018 }, { "epoch": 1.2553224796493425, "grad_norm": 2.3401331901550293, "learning_rate": 0.00011652003910068425, "loss": 0.3398, "step": 8019 }, { "epoch": 1.2554790231684407, "grad_norm": 1.9932831525802612, "learning_rate": 0.00011649560117302052, "loss": 0.7564, "step": 8020 }, { "epoch": 1.2556355666875392, "grad_norm": 2.9455413818359375, "learning_rate": 0.0001164711632453568, "loss": 0.6872, "step": 8021 }, { "epoch": 1.2557921102066374, "grad_norm": 1.4435310363769531, "learning_rate": 0.00011644672531769305, "loss": 0.6771, "step": 8022 }, { "epoch": 1.2559486537257358, "grad_norm": 3.489488363265991, "learning_rate": 0.00011642228739002931, "loss": 0.7648, "step": 8023 }, { "epoch": 1.256105197244834, "grad_norm": 1.4743152856826782, "learning_rate": 0.00011639784946236559, "loss": 0.4852, "step": 8024 }, { "epoch": 1.2562617407639323, "grad_norm": 3.376526355743408, "learning_rate": 0.00011637341153470184, "loss": 0.6416, "step": 8025 }, { "epoch": 1.2564182842830307, "grad_norm": 1.8703612089157104, "learning_rate": 0.00011634897360703811, "loss": 1.1278, "step": 8026 }, { "epoch": 1.256574827802129, "grad_norm": 2.682764768600464, "learning_rate": 0.00011632453567937439, "loss": 1.1146, "step": 8027 }, { "epoch": 1.2567313713212274, "grad_norm": 2.28507399559021, "learning_rate": 0.00011630009775171065, "loss": 1.0425, "step": 8028 }, { "epoch": 1.2568879148403256, "grad_norm": 4.47053861618042, "learning_rate": 0.0001162756598240469, "loss": 0.8179, "step": 8029 }, { "epoch": 1.2570444583594238, "grad_norm": 3.5144686698913574, "learning_rate": 0.00011625122189638318, "loss": 1.1578, "step": 8030 }, { "epoch": 1.2572010018785222, "grad_norm": 3.6762499809265137, "learning_rate": 0.00011622678396871945, "loss": 1.1386, "step": 8031 }, { "epoch": 1.2573575453976205, "grad_norm": 2.404191017150879, "learning_rate": 0.00011620234604105571, "loss": 0.9114, "step": 8032 }, { "epoch": 1.257514088916719, "grad_norm": 2.5697121620178223, "learning_rate": 0.00011617790811339199, "loss": 0.5878, "step": 8033 }, { "epoch": 1.2576706324358171, "grad_norm": 2.2942793369293213, "learning_rate": 0.00011615347018572824, "loss": 0.6517, "step": 8034 }, { "epoch": 1.2578271759549153, "grad_norm": 1.6695375442504883, "learning_rate": 0.0001161290322580645, "loss": 0.5267, "step": 8035 }, { "epoch": 1.2579837194740138, "grad_norm": 1.7396913766860962, "learning_rate": 0.00011610459433040078, "loss": 0.5404, "step": 8036 }, { "epoch": 1.2581402629931122, "grad_norm": 2.4069020748138428, "learning_rate": 0.00011608015640273703, "loss": 0.7826, "step": 8037 }, { "epoch": 1.2582968065122104, "grad_norm": 2.687602996826172, "learning_rate": 0.0001160557184750733, "loss": 0.7537, "step": 8038 }, { "epoch": 1.2584533500313086, "grad_norm": 0.9946150779724121, "learning_rate": 0.00011603128054740958, "loss": 0.3249, "step": 8039 }, { "epoch": 1.258609893550407, "grad_norm": 0.7107114791870117, "learning_rate": 0.00011600684261974584, "loss": 0.364, "step": 8040 }, { "epoch": 1.2587664370695053, "grad_norm": 1.1093772649765015, "learning_rate": 0.0001159824046920821, "loss": 0.247, "step": 8041 }, { "epoch": 1.2589229805886037, "grad_norm": 0.6720324754714966, "learning_rate": 0.00011595796676441837, "loss": 0.209, "step": 8042 }, { "epoch": 1.259079524107702, "grad_norm": 0.6645235419273376, "learning_rate": 0.00011593352883675464, "loss": 0.2063, "step": 8043 }, { "epoch": 1.2592360676268002, "grad_norm": 0.49136972427368164, "learning_rate": 0.0001159090909090909, "loss": 0.2311, "step": 8044 }, { "epoch": 1.2593926111458986, "grad_norm": 1.026901364326477, "learning_rate": 0.00011588465298142717, "loss": 0.2226, "step": 8045 }, { "epoch": 1.2595491546649968, "grad_norm": 0.6816409826278687, "learning_rate": 0.00011586021505376343, "loss": 0.2241, "step": 8046 }, { "epoch": 1.2597056981840953, "grad_norm": 0.4918442666530609, "learning_rate": 0.0001158357771260997, "loss": 0.2395, "step": 8047 }, { "epoch": 1.2598622417031935, "grad_norm": 0.6494612097740173, "learning_rate": 0.00011581133919843597, "loss": 0.2683, "step": 8048 }, { "epoch": 1.2600187852222917, "grad_norm": 0.875194787979126, "learning_rate": 0.00011578690127077223, "loss": 0.363, "step": 8049 }, { "epoch": 1.2601753287413902, "grad_norm": 0.8091587424278259, "learning_rate": 0.00011576246334310849, "loss": 0.2729, "step": 8050 }, { "epoch": 1.2603318722604884, "grad_norm": 0.5551338791847229, "learning_rate": 0.00011573802541544477, "loss": 0.2604, "step": 8051 }, { "epoch": 1.2604884157795868, "grad_norm": 0.5007133483886719, "learning_rate": 0.00011571358748778103, "loss": 0.2483, "step": 8052 }, { "epoch": 1.260644959298685, "grad_norm": 0.773539662361145, "learning_rate": 0.00011568914956011728, "loss": 0.175, "step": 8053 }, { "epoch": 1.2608015028177832, "grad_norm": 1.5341578722000122, "learning_rate": 0.00011566471163245356, "loss": 0.6749, "step": 8054 }, { "epoch": 1.2609580463368817, "grad_norm": 0.6845079660415649, "learning_rate": 0.00011564027370478983, "loss": 0.2875, "step": 8055 }, { "epoch": 1.26111458985598, "grad_norm": 1.1050750017166138, "learning_rate": 0.00011561583577712609, "loss": 0.2767, "step": 8056 }, { "epoch": 1.2612711333750783, "grad_norm": 0.9473204612731934, "learning_rate": 0.00011559139784946234, "loss": 0.3132, "step": 8057 }, { "epoch": 1.2614276768941766, "grad_norm": 2.2619035243988037, "learning_rate": 0.00011556695992179862, "loss": 0.4922, "step": 8058 }, { "epoch": 1.2615842204132748, "grad_norm": 2.7712318897247314, "learning_rate": 0.00011554252199413489, "loss": 0.9514, "step": 8059 }, { "epoch": 1.2617407639323732, "grad_norm": 1.5762767791748047, "learning_rate": 0.00011551808406647114, "loss": 0.6567, "step": 8060 }, { "epoch": 1.2618973074514714, "grad_norm": 0.8980954885482788, "learning_rate": 0.00011549364613880742, "loss": 0.3136, "step": 8061 }, { "epoch": 1.2620538509705699, "grad_norm": 5.263506889343262, "learning_rate": 0.00011546920821114368, "loss": 0.8192, "step": 8062 }, { "epoch": 1.262210394489668, "grad_norm": 1.3415788412094116, "learning_rate": 0.00011544477028347995, "loss": 0.3902, "step": 8063 }, { "epoch": 1.2623669380087663, "grad_norm": 1.4701817035675049, "learning_rate": 0.00011542033235581622, "loss": 0.7186, "step": 8064 }, { "epoch": 1.2625234815278648, "grad_norm": 1.1412750482559204, "learning_rate": 0.00011539589442815248, "loss": 0.5271, "step": 8065 }, { "epoch": 1.262680025046963, "grad_norm": 1.3679181337356567, "learning_rate": 0.00011537145650048874, "loss": 0.7291, "step": 8066 }, { "epoch": 1.2628365685660614, "grad_norm": 2.3653368949890137, "learning_rate": 0.00011534701857282502, "loss": 0.9745, "step": 8067 }, { "epoch": 1.2629931120851596, "grad_norm": 3.5362980365753174, "learning_rate": 0.00011532258064516128, "loss": 0.8932, "step": 8068 }, { "epoch": 1.2631496556042578, "grad_norm": 1.438018560409546, "learning_rate": 0.00011529814271749753, "loss": 0.5735, "step": 8069 }, { "epoch": 1.2633061991233563, "grad_norm": 1.334162950515747, "learning_rate": 0.00011527370478983381, "loss": 0.4908, "step": 8070 }, { "epoch": 1.2634627426424547, "grad_norm": 1.1316155195236206, "learning_rate": 0.00011524926686217008, "loss": 0.3825, "step": 8071 }, { "epoch": 1.263619286161553, "grad_norm": 3.4603209495544434, "learning_rate": 0.00011522482893450633, "loss": 1.0631, "step": 8072 }, { "epoch": 1.2637758296806512, "grad_norm": 3.4349160194396973, "learning_rate": 0.00011520039100684261, "loss": 0.6785, "step": 8073 }, { "epoch": 1.2639323731997496, "grad_norm": 2.098545789718628, "learning_rate": 0.00011517595307917887, "loss": 0.6231, "step": 8074 }, { "epoch": 1.2640889167188478, "grad_norm": 1.599894642829895, "learning_rate": 0.00011515151515151514, "loss": 0.4584, "step": 8075 }, { "epoch": 1.2642454602379463, "grad_norm": 2.5117294788360596, "learning_rate": 0.00011512707722385142, "loss": 0.7141, "step": 8076 }, { "epoch": 1.2644020037570445, "grad_norm": 3.738943576812744, "learning_rate": 0.00011510263929618767, "loss": 1.4734, "step": 8077 }, { "epoch": 1.2645585472761427, "grad_norm": 2.5964126586914062, "learning_rate": 0.00011507820136852393, "loss": 0.6161, "step": 8078 }, { "epoch": 1.2647150907952411, "grad_norm": 3.135176420211792, "learning_rate": 0.00011505376344086021, "loss": 1.5048, "step": 8079 }, { "epoch": 1.2648716343143394, "grad_norm": 2.939114570617676, "learning_rate": 0.00011502932551319647, "loss": 1.0791, "step": 8080 }, { "epoch": 1.2650281778334378, "grad_norm": 2.810537815093994, "learning_rate": 0.00011500488758553273, "loss": 2.0609, "step": 8081 }, { "epoch": 1.265184721352536, "grad_norm": 3.116837978363037, "learning_rate": 0.000114980449657869, "loss": 0.956, "step": 8082 }, { "epoch": 1.2653412648716342, "grad_norm": 2.0274899005889893, "learning_rate": 0.00011495601173020527, "loss": 1.196, "step": 8083 }, { "epoch": 1.2654978083907327, "grad_norm": 2.4018070697784424, "learning_rate": 0.00011493157380254152, "loss": 1.0615, "step": 8084 }, { "epoch": 1.2656543519098309, "grad_norm": 3.216063976287842, "learning_rate": 0.0001149071358748778, "loss": 0.4063, "step": 8085 }, { "epoch": 1.2658108954289293, "grad_norm": 1.749650001525879, "learning_rate": 0.00011488269794721406, "loss": 0.4202, "step": 8086 }, { "epoch": 1.2659674389480275, "grad_norm": 3.246669292449951, "learning_rate": 0.00011485826001955033, "loss": 0.8565, "step": 8087 }, { "epoch": 1.2661239824671258, "grad_norm": 1.4922568798065186, "learning_rate": 0.00011483382209188661, "loss": 0.634, "step": 8088 }, { "epoch": 1.2662805259862242, "grad_norm": 0.5845685601234436, "learning_rate": 0.00011480938416422286, "loss": 0.2766, "step": 8089 }, { "epoch": 1.2664370695053224, "grad_norm": 0.3810208737850189, "learning_rate": 0.00011478494623655912, "loss": 0.2173, "step": 8090 }, { "epoch": 1.2665936130244209, "grad_norm": 0.5567788481712341, "learning_rate": 0.0001147605083088954, "loss": 0.2469, "step": 8091 }, { "epoch": 1.266750156543519, "grad_norm": 0.5596190690994263, "learning_rate": 0.00011473607038123167, "loss": 0.2548, "step": 8092 }, { "epoch": 1.2669067000626173, "grad_norm": 0.5146242380142212, "learning_rate": 0.00011471163245356792, "loss": 0.2395, "step": 8093 }, { "epoch": 1.2670632435817157, "grad_norm": 0.6236430406570435, "learning_rate": 0.0001146871945259042, "loss": 0.3531, "step": 8094 }, { "epoch": 1.267219787100814, "grad_norm": 0.6425842046737671, "learning_rate": 0.00011466275659824046, "loss": 0.3415, "step": 8095 }, { "epoch": 1.2673763306199124, "grad_norm": 0.39682844281196594, "learning_rate": 0.00011463831867057671, "loss": 0.2194, "step": 8096 }, { "epoch": 1.2675328741390106, "grad_norm": 2.4693057537078857, "learning_rate": 0.00011461388074291299, "loss": 0.3667, "step": 8097 }, { "epoch": 1.2676894176581088, "grad_norm": 0.5568252801895142, "learning_rate": 0.00011458944281524925, "loss": 0.3198, "step": 8098 }, { "epoch": 1.2678459611772073, "grad_norm": 0.7685470581054688, "learning_rate": 0.00011456500488758552, "loss": 0.3658, "step": 8099 }, { "epoch": 1.2680025046963057, "grad_norm": 1.0343455076217651, "learning_rate": 0.0001145405669599218, "loss": 0.2678, "step": 8100 }, { "epoch": 1.268159048215404, "grad_norm": 2.833596706390381, "learning_rate": 0.00011451612903225805, "loss": 0.6284, "step": 8101 }, { "epoch": 1.2683155917345021, "grad_norm": 0.7539536952972412, "learning_rate": 0.00011449169110459431, "loss": 0.2395, "step": 8102 }, { "epoch": 1.2684721352536004, "grad_norm": 1.2098023891448975, "learning_rate": 0.00011446725317693059, "loss": 0.2926, "step": 8103 }, { "epoch": 1.2686286787726988, "grad_norm": 1.0733698606491089, "learning_rate": 0.00011444281524926686, "loss": 0.4996, "step": 8104 }, { "epoch": 1.2687852222917972, "grad_norm": 1.2377598285675049, "learning_rate": 0.00011441837732160311, "loss": 0.5157, "step": 8105 }, { "epoch": 1.2689417658108955, "grad_norm": 0.8723770976066589, "learning_rate": 0.00011439393939393939, "loss": 0.3019, "step": 8106 }, { "epoch": 1.2690983093299937, "grad_norm": 1.6629414558410645, "learning_rate": 0.00011436950146627565, "loss": 0.5106, "step": 8107 }, { "epoch": 1.2692548528490921, "grad_norm": 1.0047154426574707, "learning_rate": 0.0001143450635386119, "loss": 0.4231, "step": 8108 }, { "epoch": 1.2694113963681903, "grad_norm": 1.7095948457717896, "learning_rate": 0.00011432062561094818, "loss": 0.6328, "step": 8109 }, { "epoch": 1.2695679398872888, "grad_norm": 1.4480046033859253, "learning_rate": 0.00011429618768328445, "loss": 0.535, "step": 8110 }, { "epoch": 1.269724483406387, "grad_norm": 0.7682520151138306, "learning_rate": 0.00011427174975562071, "loss": 0.2345, "step": 8111 }, { "epoch": 1.2698810269254852, "grad_norm": 1.8508092164993286, "learning_rate": 0.00011424731182795699, "loss": 0.7018, "step": 8112 }, { "epoch": 1.2700375704445837, "grad_norm": 1.5471041202545166, "learning_rate": 0.00011422287390029324, "loss": 0.5689, "step": 8113 }, { "epoch": 1.2701941139636819, "grad_norm": 1.868023157119751, "learning_rate": 0.0001141984359726295, "loss": 0.5738, "step": 8114 }, { "epoch": 1.2703506574827803, "grad_norm": 5.055905818939209, "learning_rate": 0.00011417399804496578, "loss": 0.5171, "step": 8115 }, { "epoch": 1.2705072010018785, "grad_norm": 1.255279779434204, "learning_rate": 0.00011414956011730203, "loss": 0.6595, "step": 8116 }, { "epoch": 1.2706637445209767, "grad_norm": 2.3022544384002686, "learning_rate": 0.0001141251221896383, "loss": 0.5065, "step": 8117 }, { "epoch": 1.2708202880400752, "grad_norm": 2.671542167663574, "learning_rate": 0.00011410068426197458, "loss": 1.1003, "step": 8118 }, { "epoch": 1.2709768315591734, "grad_norm": 2.1620357036590576, "learning_rate": 0.00011407624633431084, "loss": 0.6959, "step": 8119 }, { "epoch": 1.2711333750782718, "grad_norm": 2.031935214996338, "learning_rate": 0.0001140518084066471, "loss": 0.9013, "step": 8120 }, { "epoch": 1.27128991859737, "grad_norm": 2.194514751434326, "learning_rate": 0.00011402737047898337, "loss": 0.5662, "step": 8121 }, { "epoch": 1.2714464621164683, "grad_norm": 1.3475483655929565, "learning_rate": 0.00011400293255131964, "loss": 0.5597, "step": 8122 }, { "epoch": 1.2716030056355667, "grad_norm": 4.436418533325195, "learning_rate": 0.0001139784946236559, "loss": 0.9418, "step": 8123 }, { "epoch": 1.271759549154665, "grad_norm": 5.266567230224609, "learning_rate": 0.00011395405669599218, "loss": 1.1798, "step": 8124 }, { "epoch": 1.2719160926737634, "grad_norm": 3.8170948028564453, "learning_rate": 0.00011392961876832843, "loss": 0.9634, "step": 8125 }, { "epoch": 1.2720726361928616, "grad_norm": 2.858017921447754, "learning_rate": 0.0001139051808406647, "loss": 1.2201, "step": 8126 }, { "epoch": 1.2722291797119598, "grad_norm": 3.335280418395996, "learning_rate": 0.00011388074291300097, "loss": 0.9677, "step": 8127 }, { "epoch": 1.2723857232310583, "grad_norm": 2.3671767711639404, "learning_rate": 0.00011385630498533723, "loss": 1.3408, "step": 8128 }, { "epoch": 1.2725422667501565, "grad_norm": 1.9784801006317139, "learning_rate": 0.00011383186705767349, "loss": 0.6781, "step": 8129 }, { "epoch": 1.272698810269255, "grad_norm": 2.1394803524017334, "learning_rate": 0.00011380742913000977, "loss": 1.1412, "step": 8130 }, { "epoch": 1.2728553537883531, "grad_norm": 4.030261039733887, "learning_rate": 0.00011378299120234603, "loss": 1.4167, "step": 8131 }, { "epoch": 1.2730118973074513, "grad_norm": 2.028381824493408, "learning_rate": 0.00011375855327468229, "loss": 0.9049, "step": 8132 }, { "epoch": 1.2731684408265498, "grad_norm": 3.869459629058838, "learning_rate": 0.00011373411534701856, "loss": 1.1694, "step": 8133 }, { "epoch": 1.2733249843456482, "grad_norm": 2.090665340423584, "learning_rate": 0.00011370967741935483, "loss": 0.9433, "step": 8134 }, { "epoch": 1.2734815278647464, "grad_norm": 3.8290891647338867, "learning_rate": 0.00011368523949169109, "loss": 1.0372, "step": 8135 }, { "epoch": 1.2736380713838447, "grad_norm": 1.362321138381958, "learning_rate": 0.00011366080156402737, "loss": 0.7292, "step": 8136 }, { "epoch": 1.273794614902943, "grad_norm": 2.492807149887085, "learning_rate": 0.00011363636363636362, "loss": 1.0427, "step": 8137 }, { "epoch": 1.2739511584220413, "grad_norm": 2.186556339263916, "learning_rate": 0.00011361192570869989, "loss": 0.6872, "step": 8138 }, { "epoch": 1.2741077019411398, "grad_norm": 0.4808526933193207, "learning_rate": 0.00011358748778103617, "loss": 0.2192, "step": 8139 }, { "epoch": 1.274264245460238, "grad_norm": 0.7778594493865967, "learning_rate": 0.00011356304985337242, "loss": 0.4221, "step": 8140 }, { "epoch": 1.2744207889793362, "grad_norm": 0.5027886629104614, "learning_rate": 0.00011353861192570868, "loss": 0.2751, "step": 8141 }, { "epoch": 1.2745773324984346, "grad_norm": 0.5371960997581482, "learning_rate": 0.00011351417399804496, "loss": 0.2394, "step": 8142 }, { "epoch": 1.2747338760175329, "grad_norm": 0.42323970794677734, "learning_rate": 0.00011348973607038123, "loss": 0.2188, "step": 8143 }, { "epoch": 1.2748904195366313, "grad_norm": 0.8892855644226074, "learning_rate": 0.00011346529814271748, "loss": 0.2642, "step": 8144 }, { "epoch": 1.2750469630557295, "grad_norm": 0.6918807625770569, "learning_rate": 0.00011344086021505375, "loss": 0.204, "step": 8145 }, { "epoch": 1.2752035065748277, "grad_norm": 0.6264479756355286, "learning_rate": 0.00011341642228739002, "loss": 0.2568, "step": 8146 }, { "epoch": 1.2753600500939262, "grad_norm": 0.6950996518135071, "learning_rate": 0.00011339198435972628, "loss": 0.244, "step": 8147 }, { "epoch": 1.2755165936130244, "grad_norm": 0.6539076566696167, "learning_rate": 0.00011336754643206256, "loss": 0.2628, "step": 8148 }, { "epoch": 1.2756731371321228, "grad_norm": 0.7629345655441284, "learning_rate": 0.00011334310850439881, "loss": 0.2214, "step": 8149 }, { "epoch": 1.275829680651221, "grad_norm": 0.7511631846427917, "learning_rate": 0.00011331867057673508, "loss": 0.2655, "step": 8150 }, { "epoch": 1.2759862241703193, "grad_norm": 3.1061666011810303, "learning_rate": 0.00011329423264907136, "loss": 0.3632, "step": 8151 }, { "epoch": 1.2761427676894177, "grad_norm": 0.8534873127937317, "learning_rate": 0.00011326979472140761, "loss": 0.2821, "step": 8152 }, { "epoch": 1.276299311208516, "grad_norm": 0.879711389541626, "learning_rate": 0.00011324535679374387, "loss": 0.2113, "step": 8153 }, { "epoch": 1.2764558547276144, "grad_norm": 1.0535318851470947, "learning_rate": 0.00011322091886608015, "loss": 0.471, "step": 8154 }, { "epoch": 1.2766123982467126, "grad_norm": 3.926427125930786, "learning_rate": 0.00011319648093841642, "loss": 0.7819, "step": 8155 }, { "epoch": 1.2767689417658108, "grad_norm": 0.8711987137794495, "learning_rate": 0.00011317204301075267, "loss": 0.2567, "step": 8156 }, { "epoch": 1.2769254852849092, "grad_norm": 1.3189749717712402, "learning_rate": 0.00011314760508308895, "loss": 0.5032, "step": 8157 }, { "epoch": 1.2770820288040075, "grad_norm": 2.5916378498077393, "learning_rate": 0.00011312316715542521, "loss": 0.4017, "step": 8158 }, { "epoch": 1.277238572323106, "grad_norm": 2.361572504043579, "learning_rate": 0.00011309872922776148, "loss": 0.5681, "step": 8159 }, { "epoch": 1.277395115842204, "grad_norm": 3.2034995555877686, "learning_rate": 0.00011307429130009774, "loss": 0.8003, "step": 8160 }, { "epoch": 1.2775516593613023, "grad_norm": 0.9315476417541504, "learning_rate": 0.000113049853372434, "loss": 0.3008, "step": 8161 }, { "epoch": 1.2777082028804008, "grad_norm": 1.9534274339675903, "learning_rate": 0.00011302541544477027, "loss": 0.6728, "step": 8162 }, { "epoch": 1.277864746399499, "grad_norm": 1.1545753479003906, "learning_rate": 0.00011300097751710655, "loss": 0.4329, "step": 8163 }, { "epoch": 1.2780212899185974, "grad_norm": 3.3171403408050537, "learning_rate": 0.0001129765395894428, "loss": 0.7543, "step": 8164 }, { "epoch": 1.2781778334376956, "grad_norm": 1.9916654825210571, "learning_rate": 0.00011295210166177906, "loss": 0.7833, "step": 8165 }, { "epoch": 1.2783343769567939, "grad_norm": 1.3301039934158325, "learning_rate": 0.00011292766373411534, "loss": 0.5321, "step": 8166 }, { "epoch": 1.2784909204758923, "grad_norm": 1.8553197383880615, "learning_rate": 0.00011290322580645161, "loss": 0.7389, "step": 8167 }, { "epoch": 1.2786474639949907, "grad_norm": 3.052807092666626, "learning_rate": 0.00011287878787878786, "loss": 0.7611, "step": 8168 }, { "epoch": 1.278804007514089, "grad_norm": 3.283430814743042, "learning_rate": 0.00011285434995112414, "loss": 1.0834, "step": 8169 }, { "epoch": 1.2789605510331872, "grad_norm": 1.6933223009109497, "learning_rate": 0.0001128299120234604, "loss": 0.747, "step": 8170 }, { "epoch": 1.2791170945522856, "grad_norm": 2.0889194011688232, "learning_rate": 0.00011280547409579667, "loss": 0.7694, "step": 8171 }, { "epoch": 1.2792736380713838, "grad_norm": 1.8516852855682373, "learning_rate": 0.00011278103616813293, "loss": 0.7245, "step": 8172 }, { "epoch": 1.2794301815904823, "grad_norm": 2.9009687900543213, "learning_rate": 0.0001127565982404692, "loss": 1.0895, "step": 8173 }, { "epoch": 1.2795867251095805, "grad_norm": 2.16554856300354, "learning_rate": 0.00011273216031280546, "loss": 0.9004, "step": 8174 }, { "epoch": 1.2797432686286787, "grad_norm": 1.9977914094924927, "learning_rate": 0.00011270772238514174, "loss": 0.9605, "step": 8175 }, { "epoch": 1.2798998121477771, "grad_norm": 2.656644344329834, "learning_rate": 0.00011268328445747799, "loss": 0.9607, "step": 8176 }, { "epoch": 1.2800563556668754, "grad_norm": 2.0683369636535645, "learning_rate": 0.00011265884652981426, "loss": 0.866, "step": 8177 }, { "epoch": 1.2802128991859738, "grad_norm": 3.113068103790283, "learning_rate": 0.00011263440860215053, "loss": 1.2594, "step": 8178 }, { "epoch": 1.280369442705072, "grad_norm": 2.857494831085205, "learning_rate": 0.0001126099706744868, "loss": 1.3643, "step": 8179 }, { "epoch": 1.2805259862241702, "grad_norm": 2.787754535675049, "learning_rate": 0.00011258553274682305, "loss": 1.0681, "step": 8180 }, { "epoch": 1.2806825297432687, "grad_norm": 2.6079978942871094, "learning_rate": 0.00011256109481915933, "loss": 0.9953, "step": 8181 }, { "epoch": 1.280839073262367, "grad_norm": 1.9221597909927368, "learning_rate": 0.00011253665689149559, "loss": 0.7956, "step": 8182 }, { "epoch": 1.2809956167814653, "grad_norm": 3.6456334590911865, "learning_rate": 0.00011251221896383186, "loss": 0.6788, "step": 8183 }, { "epoch": 1.2811521603005636, "grad_norm": 4.128708839416504, "learning_rate": 0.00011248778103616812, "loss": 0.808, "step": 8184 }, { "epoch": 1.2813087038196618, "grad_norm": 1.7657461166381836, "learning_rate": 0.00011246334310850439, "loss": 0.5298, "step": 8185 }, { "epoch": 1.2814652473387602, "grad_norm": 2.3742294311523438, "learning_rate": 0.00011243890518084065, "loss": 0.5269, "step": 8186 }, { "epoch": 1.2816217908578584, "grad_norm": 2.828449249267578, "learning_rate": 0.00011241446725317693, "loss": 0.8815, "step": 8187 }, { "epoch": 1.2817783343769569, "grad_norm": 1.5230042934417725, "learning_rate": 0.00011239002932551318, "loss": 0.553, "step": 8188 }, { "epoch": 1.281934877896055, "grad_norm": 0.6481024622917175, "learning_rate": 0.00011236559139784945, "loss": 0.2289, "step": 8189 }, { "epoch": 1.2820914214151533, "grad_norm": 2.089646577835083, "learning_rate": 0.00011234115347018572, "loss": 0.8718, "step": 8190 }, { "epoch": 1.2822479649342517, "grad_norm": 0.42590638995170593, "learning_rate": 0.00011231671554252199, "loss": 0.1615, "step": 8191 }, { "epoch": 1.28240450845335, "grad_norm": 0.3601533770561218, "learning_rate": 0.00011229227761485824, "loss": 0.164, "step": 8192 }, { "epoch": 1.2825610519724484, "grad_norm": 0.6453453898429871, "learning_rate": 0.00011226783968719452, "loss": 0.3217, "step": 8193 }, { "epoch": 1.2827175954915466, "grad_norm": 0.7831723093986511, "learning_rate": 0.00011224340175953078, "loss": 0.2657, "step": 8194 }, { "epoch": 1.2828741390106448, "grad_norm": 0.6272794008255005, "learning_rate": 0.00011221896383186705, "loss": 0.278, "step": 8195 }, { "epoch": 1.2830306825297433, "grad_norm": 0.6463831067085266, "learning_rate": 0.00011219452590420331, "loss": 0.3111, "step": 8196 }, { "epoch": 1.2831872260488415, "grad_norm": 0.7817267775535583, "learning_rate": 0.00011217008797653958, "loss": 0.2155, "step": 8197 }, { "epoch": 1.28334376956794, "grad_norm": 0.6414055824279785, "learning_rate": 0.00011214565004887584, "loss": 0.2571, "step": 8198 }, { "epoch": 1.2835003130870382, "grad_norm": 1.242760181427002, "learning_rate": 0.00011212121212121212, "loss": 0.2441, "step": 8199 }, { "epoch": 1.2836568566061364, "grad_norm": 0.43037867546081543, "learning_rate": 0.00011209677419354837, "loss": 0.206, "step": 8200 }, { "epoch": 1.2838134001252348, "grad_norm": 0.5707399845123291, "learning_rate": 0.00011207233626588464, "loss": 0.2067, "step": 8201 }, { "epoch": 1.2839699436443333, "grad_norm": 1.2979316711425781, "learning_rate": 0.00011204789833822092, "loss": 0.3798, "step": 8202 }, { "epoch": 1.2841264871634315, "grad_norm": 0.5706567764282227, "learning_rate": 0.00011202346041055718, "loss": 0.162, "step": 8203 }, { "epoch": 1.2842830306825297, "grad_norm": 1.7582261562347412, "learning_rate": 0.00011199902248289343, "loss": 0.5603, "step": 8204 }, { "epoch": 1.2844395742016281, "grad_norm": 2.591653823852539, "learning_rate": 0.00011197458455522971, "loss": 0.4609, "step": 8205 }, { "epoch": 1.2845961177207263, "grad_norm": 0.9483798146247864, "learning_rate": 0.00011195014662756598, "loss": 0.3353, "step": 8206 }, { "epoch": 1.2847526612398248, "grad_norm": 1.1728459596633911, "learning_rate": 0.00011192570869990224, "loss": 0.3398, "step": 8207 }, { "epoch": 1.284909204758923, "grad_norm": 0.952708899974823, "learning_rate": 0.0001119012707722385, "loss": 0.3597, "step": 8208 }, { "epoch": 1.2850657482780212, "grad_norm": 1.8662762641906738, "learning_rate": 0.00011187683284457477, "loss": 0.6664, "step": 8209 }, { "epoch": 1.2852222917971197, "grad_norm": 1.1493111848831177, "learning_rate": 0.00011185239491691103, "loss": 0.8306, "step": 8210 }, { "epoch": 1.2853788353162179, "grad_norm": 1.7580465078353882, "learning_rate": 0.00011182795698924731, "loss": 0.4859, "step": 8211 }, { "epoch": 1.2855353788353163, "grad_norm": 0.9127596020698547, "learning_rate": 0.00011180351906158356, "loss": 0.2457, "step": 8212 }, { "epoch": 1.2856919223544145, "grad_norm": 1.9063149690628052, "learning_rate": 0.00011177908113391983, "loss": 0.4663, "step": 8213 }, { "epoch": 1.2858484658735128, "grad_norm": 1.1802544593811035, "learning_rate": 0.00011175464320625611, "loss": 0.3457, "step": 8214 }, { "epoch": 1.2860050093926112, "grad_norm": 2.804399013519287, "learning_rate": 0.00011173020527859237, "loss": 0.6875, "step": 8215 }, { "epoch": 1.2861615529117094, "grad_norm": 1.3575066328048706, "learning_rate": 0.00011170576735092862, "loss": 0.5672, "step": 8216 }, { "epoch": 1.2863180964308079, "grad_norm": 1.3274577856063843, "learning_rate": 0.0001116813294232649, "loss": 0.731, "step": 8217 }, { "epoch": 1.286474639949906, "grad_norm": 1.7587782144546509, "learning_rate": 0.00011165689149560117, "loss": 0.6624, "step": 8218 }, { "epoch": 1.2866311834690043, "grad_norm": 1.3552849292755127, "learning_rate": 0.00011163245356793742, "loss": 0.5267, "step": 8219 }, { "epoch": 1.2867877269881027, "grad_norm": 1.7056719064712524, "learning_rate": 0.0001116080156402737, "loss": 0.8283, "step": 8220 }, { "epoch": 1.286944270507201, "grad_norm": 2.418220281600952, "learning_rate": 0.00011158357771260996, "loss": 0.7403, "step": 8221 }, { "epoch": 1.2871008140262994, "grad_norm": 1.3776272535324097, "learning_rate": 0.00011155913978494623, "loss": 0.4315, "step": 8222 }, { "epoch": 1.2872573575453976, "grad_norm": 1.3834933042526245, "learning_rate": 0.0001115347018572825, "loss": 0.4908, "step": 8223 }, { "epoch": 1.2874139010644958, "grad_norm": 1.5205992460250854, "learning_rate": 0.00011151026392961876, "loss": 0.8942, "step": 8224 }, { "epoch": 1.2875704445835943, "grad_norm": 3.1065549850463867, "learning_rate": 0.00011148582600195502, "loss": 1.2169, "step": 8225 }, { "epoch": 1.2877269881026925, "grad_norm": 3.65362811088562, "learning_rate": 0.0001114613880742913, "loss": 1.0125, "step": 8226 }, { "epoch": 1.287883531621791, "grad_norm": 1.9371777772903442, "learning_rate": 0.00011143695014662756, "loss": 1.1199, "step": 8227 }, { "epoch": 1.2880400751408891, "grad_norm": 3.518394947052002, "learning_rate": 0.00011141251221896381, "loss": 0.8579, "step": 8228 }, { "epoch": 1.2881966186599874, "grad_norm": 2.363788604736328, "learning_rate": 0.00011138807429130009, "loss": 0.6714, "step": 8229 }, { "epoch": 1.2883531621790858, "grad_norm": 2.800300359725952, "learning_rate": 0.00011136363636363636, "loss": 0.9438, "step": 8230 }, { "epoch": 1.288509705698184, "grad_norm": 2.6638309955596924, "learning_rate": 0.00011133919843597261, "loss": 1.5732, "step": 8231 }, { "epoch": 1.2886662492172825, "grad_norm": 1.8096959590911865, "learning_rate": 0.00011131476050830889, "loss": 0.8746, "step": 8232 }, { "epoch": 1.2888227927363807, "grad_norm": 2.5477139949798584, "learning_rate": 0.00011129032258064515, "loss": 0.7608, "step": 8233 }, { "epoch": 1.288979336255479, "grad_norm": 5.085765361785889, "learning_rate": 0.00011126588465298142, "loss": 0.746, "step": 8234 }, { "epoch": 1.2891358797745773, "grad_norm": 2.019724130630493, "learning_rate": 0.0001112414467253177, "loss": 0.3566, "step": 8235 }, { "epoch": 1.2892924232936758, "grad_norm": 6.809519290924072, "learning_rate": 0.00011121700879765395, "loss": 0.8774, "step": 8236 }, { "epoch": 1.289448966812774, "grad_norm": 4.453648090362549, "learning_rate": 0.00011119257086999021, "loss": 0.3884, "step": 8237 }, { "epoch": 1.2896055103318722, "grad_norm": 2.842747449874878, "learning_rate": 0.00011116813294232649, "loss": 1.12, "step": 8238 }, { "epoch": 1.2897620538509706, "grad_norm": 0.7022001147270203, "learning_rate": 0.00011114369501466275, "loss": 0.2758, "step": 8239 }, { "epoch": 1.2899185973700689, "grad_norm": 0.49633824825286865, "learning_rate": 0.000111119257086999, "loss": 0.2534, "step": 8240 }, { "epoch": 1.2900751408891673, "grad_norm": 0.6422197818756104, "learning_rate": 0.00011109481915933528, "loss": 0.2632, "step": 8241 }, { "epoch": 1.2902316844082655, "grad_norm": 1.4231148958206177, "learning_rate": 0.00011107038123167155, "loss": 0.4564, "step": 8242 }, { "epoch": 1.2903882279273637, "grad_norm": 0.8207333087921143, "learning_rate": 0.0001110459433040078, "loss": 0.2579, "step": 8243 }, { "epoch": 1.2905447714464622, "grad_norm": 0.8601287603378296, "learning_rate": 0.00011102150537634408, "loss": 0.3219, "step": 8244 }, { "epoch": 1.2907013149655604, "grad_norm": 0.8240009546279907, "learning_rate": 0.00011099706744868034, "loss": 0.3404, "step": 8245 }, { "epoch": 1.2908578584846588, "grad_norm": 0.8633431792259216, "learning_rate": 0.00011097262952101661, "loss": 0.2921, "step": 8246 }, { "epoch": 1.291014402003757, "grad_norm": 1.7753422260284424, "learning_rate": 0.00011094819159335289, "loss": 0.4439, "step": 8247 }, { "epoch": 1.2911709455228553, "grad_norm": 1.0025125741958618, "learning_rate": 0.00011092375366568914, "loss": 0.317, "step": 8248 }, { "epoch": 1.2913274890419537, "grad_norm": 1.528899073600769, "learning_rate": 0.0001108993157380254, "loss": 0.3887, "step": 8249 }, { "epoch": 1.291484032561052, "grad_norm": 0.7538073062896729, "learning_rate": 0.00011087487781036168, "loss": 0.3759, "step": 8250 }, { "epoch": 1.2916405760801504, "grad_norm": 0.7805300951004028, "learning_rate": 0.00011085043988269795, "loss": 0.2921, "step": 8251 }, { "epoch": 1.2917971195992486, "grad_norm": 1.1637154817581177, "learning_rate": 0.0001108260019550342, "loss": 0.4664, "step": 8252 }, { "epoch": 1.2919536631183468, "grad_norm": 0.8898148536682129, "learning_rate": 0.00011080156402737048, "loss": 0.3022, "step": 8253 }, { "epoch": 1.2921102066374452, "grad_norm": 1.7075831890106201, "learning_rate": 0.00011077712609970674, "loss": 0.4828, "step": 8254 }, { "epoch": 1.2922667501565435, "grad_norm": 1.2227857112884521, "learning_rate": 0.00011075268817204299, "loss": 0.3136, "step": 8255 }, { "epoch": 1.292423293675642, "grad_norm": 1.10588800907135, "learning_rate": 0.00011072825024437927, "loss": 0.209, "step": 8256 }, { "epoch": 1.2925798371947401, "grad_norm": 1.42483389377594, "learning_rate": 0.00011070381231671553, "loss": 0.6039, "step": 8257 }, { "epoch": 1.2927363807138383, "grad_norm": 4.606034755706787, "learning_rate": 0.0001106793743890518, "loss": 1.3905, "step": 8258 }, { "epoch": 1.2928929242329368, "grad_norm": 1.765781283378601, "learning_rate": 0.00011065493646138808, "loss": 0.5845, "step": 8259 }, { "epoch": 1.293049467752035, "grad_norm": 1.1531269550323486, "learning_rate": 0.00011063049853372433, "loss": 0.339, "step": 8260 }, { "epoch": 1.2932060112711334, "grad_norm": 2.018428087234497, "learning_rate": 0.0001106060606060606, "loss": 0.5406, "step": 8261 }, { "epoch": 1.2933625547902317, "grad_norm": 1.3222421407699585, "learning_rate": 0.00011058162267839687, "loss": 0.3733, "step": 8262 }, { "epoch": 1.2935190983093299, "grad_norm": 1.9479771852493286, "learning_rate": 0.00011055718475073312, "loss": 0.5531, "step": 8263 }, { "epoch": 1.2936756418284283, "grad_norm": 0.9404929876327515, "learning_rate": 0.00011053274682306939, "loss": 0.456, "step": 8264 }, { "epoch": 1.2938321853475265, "grad_norm": 2.357956886291504, "learning_rate": 0.00011050830889540567, "loss": 0.6238, "step": 8265 }, { "epoch": 1.293988728866625, "grad_norm": 1.7230677604675293, "learning_rate": 0.00011048387096774193, "loss": 0.565, "step": 8266 }, { "epoch": 1.2941452723857232, "grad_norm": 4.3762526512146, "learning_rate": 0.00011045943304007818, "loss": 0.9608, "step": 8267 }, { "epoch": 1.2943018159048214, "grad_norm": 3.0639162063598633, "learning_rate": 0.00011043499511241446, "loss": 0.7233, "step": 8268 }, { "epoch": 1.2944583594239198, "grad_norm": 1.96674644947052, "learning_rate": 0.00011041055718475073, "loss": 0.7643, "step": 8269 }, { "epoch": 1.2946149029430183, "grad_norm": 4.862483978271484, "learning_rate": 0.00011038611925708699, "loss": 0.6065, "step": 8270 }, { "epoch": 1.2947714464621165, "grad_norm": 2.0144357681274414, "learning_rate": 0.00011036168132942327, "loss": 0.8087, "step": 8271 }, { "epoch": 1.2949279899812147, "grad_norm": 3.6097030639648438, "learning_rate": 0.00011033724340175952, "loss": 1.2482, "step": 8272 }, { "epoch": 1.2950845335003132, "grad_norm": 1.5838818550109863, "learning_rate": 0.00011031280547409578, "loss": 0.5976, "step": 8273 }, { "epoch": 1.2952410770194114, "grad_norm": 2.3606481552124023, "learning_rate": 0.00011028836754643206, "loss": 0.7551, "step": 8274 }, { "epoch": 1.2953976205385098, "grad_norm": 1.6453297138214111, "learning_rate": 0.00011026392961876831, "loss": 0.6356, "step": 8275 }, { "epoch": 1.295554164057608, "grad_norm": 2.8421103954315186, "learning_rate": 0.00011023949169110458, "loss": 0.8155, "step": 8276 }, { "epoch": 1.2957107075767063, "grad_norm": 2.4521589279174805, "learning_rate": 0.00011021505376344086, "loss": 0.813, "step": 8277 }, { "epoch": 1.2958672510958047, "grad_norm": 2.380380868911743, "learning_rate": 0.00011019061583577712, "loss": 0.981, "step": 8278 }, { "epoch": 1.296023794614903, "grad_norm": 2.3057072162628174, "learning_rate": 0.00011016617790811337, "loss": 1.051, "step": 8279 }, { "epoch": 1.2961803381340014, "grad_norm": 2.9905688762664795, "learning_rate": 0.00011014173998044965, "loss": 1.2693, "step": 8280 }, { "epoch": 1.2963368816530996, "grad_norm": 9.889979362487793, "learning_rate": 0.00011011730205278592, "loss": 1.336, "step": 8281 }, { "epoch": 1.2964934251721978, "grad_norm": 2.7968623638153076, "learning_rate": 0.00011009286412512218, "loss": 1.5213, "step": 8282 }, { "epoch": 1.2966499686912962, "grad_norm": 2.403857469558716, "learning_rate": 0.00011006842619745846, "loss": 0.7344, "step": 8283 }, { "epoch": 1.2968065122103944, "grad_norm": 1.8447998762130737, "learning_rate": 0.00011004398826979471, "loss": 0.5804, "step": 8284 }, { "epoch": 1.2969630557294929, "grad_norm": 1.6647576093673706, "learning_rate": 0.00011001955034213098, "loss": 0.6722, "step": 8285 }, { "epoch": 1.297119599248591, "grad_norm": 3.1341192722320557, "learning_rate": 0.00010999511241446725, "loss": 0.8471, "step": 8286 }, { "epoch": 1.2972761427676893, "grad_norm": 2.227797746658325, "learning_rate": 0.0001099706744868035, "loss": 0.7715, "step": 8287 }, { "epoch": 1.2974326862867878, "grad_norm": 1.991193413734436, "learning_rate": 0.00010994623655913977, "loss": 0.7457, "step": 8288 }, { "epoch": 1.297589229805886, "grad_norm": 0.5508354902267456, "learning_rate": 0.00010992179863147605, "loss": 0.2327, "step": 8289 }, { "epoch": 1.2977457733249844, "grad_norm": 0.48610156774520874, "learning_rate": 0.00010989736070381231, "loss": 0.1992, "step": 8290 }, { "epoch": 1.2979023168440826, "grad_norm": 0.6914536356925964, "learning_rate": 0.00010987292277614856, "loss": 0.2157, "step": 8291 }, { "epoch": 1.2980588603631809, "grad_norm": 0.6526252627372742, "learning_rate": 0.00010984848484848484, "loss": 0.2804, "step": 8292 }, { "epoch": 1.2982154038822793, "grad_norm": 0.7797197103500366, "learning_rate": 0.00010982404692082111, "loss": 0.2843, "step": 8293 }, { "epoch": 1.2983719474013775, "grad_norm": 1.6517852544784546, "learning_rate": 0.00010979960899315737, "loss": 0.4454, "step": 8294 }, { "epoch": 1.298528490920476, "grad_norm": 0.5789437294006348, "learning_rate": 0.00010977517106549365, "loss": 0.1868, "step": 8295 }, { "epoch": 1.2986850344395742, "grad_norm": 1.5858449935913086, "learning_rate": 0.0001097507331378299, "loss": 0.3895, "step": 8296 }, { "epoch": 1.2988415779586724, "grad_norm": 0.5253259539604187, "learning_rate": 0.00010972629521016617, "loss": 0.2817, "step": 8297 }, { "epoch": 1.2989981214777708, "grad_norm": 0.7501121163368225, "learning_rate": 0.00010970185728250245, "loss": 0.279, "step": 8298 }, { "epoch": 1.2991546649968693, "grad_norm": 0.6583108305931091, "learning_rate": 0.0001096774193548387, "loss": 0.2974, "step": 8299 }, { "epoch": 1.2993112085159675, "grad_norm": 0.7893651723861694, "learning_rate": 0.00010965298142717496, "loss": 0.1879, "step": 8300 }, { "epoch": 1.2994677520350657, "grad_norm": 0.6830184459686279, "learning_rate": 0.00010962854349951124, "loss": 0.2392, "step": 8301 }, { "epoch": 1.299624295554164, "grad_norm": 0.9842543601989746, "learning_rate": 0.0001096041055718475, "loss": 0.3198, "step": 8302 }, { "epoch": 1.2997808390732624, "grad_norm": 1.0459553003311157, "learning_rate": 0.00010957966764418376, "loss": 0.3716, "step": 8303 }, { "epoch": 1.2999373825923608, "grad_norm": 0.7785957455635071, "learning_rate": 0.00010955522971652003, "loss": 0.2841, "step": 8304 }, { "epoch": 1.300093926111459, "grad_norm": 1.5206092596054077, "learning_rate": 0.0001095307917888563, "loss": 0.2479, "step": 8305 }, { "epoch": 1.3002504696305572, "grad_norm": 1.0379140377044678, "learning_rate": 0.00010950635386119256, "loss": 0.2994, "step": 8306 }, { "epoch": 1.3004070131496557, "grad_norm": 2.3565833568573, "learning_rate": 0.00010948191593352884, "loss": 0.6375, "step": 8307 }, { "epoch": 1.300563556668754, "grad_norm": 1.7786818742752075, "learning_rate": 0.0001094574780058651, "loss": 0.653, "step": 8308 }, { "epoch": 1.3007201001878523, "grad_norm": 1.0531212091445923, "learning_rate": 0.00010943304007820136, "loss": 0.3824, "step": 8309 }, { "epoch": 1.3008766437069506, "grad_norm": 1.700603723526001, "learning_rate": 0.00010940860215053764, "loss": 0.6215, "step": 8310 }, { "epoch": 1.3010331872260488, "grad_norm": 1.701421856880188, "learning_rate": 0.00010938416422287389, "loss": 0.4508, "step": 8311 }, { "epoch": 1.3011897307451472, "grad_norm": 1.6104190349578857, "learning_rate": 0.00010935972629521015, "loss": 0.5921, "step": 8312 }, { "epoch": 1.3013462742642454, "grad_norm": 0.7885456681251526, "learning_rate": 0.00010933528836754642, "loss": 0.3565, "step": 8313 }, { "epoch": 1.3015028177833439, "grad_norm": 1.718679666519165, "learning_rate": 0.0001093108504398827, "loss": 0.6124, "step": 8314 }, { "epoch": 1.301659361302442, "grad_norm": 4.118200302124023, "learning_rate": 0.00010928641251221895, "loss": 0.8574, "step": 8315 }, { "epoch": 1.3018159048215403, "grad_norm": 1.17429518699646, "learning_rate": 0.00010926197458455521, "loss": 0.5231, "step": 8316 }, { "epoch": 1.3019724483406387, "grad_norm": 3.1723527908325195, "learning_rate": 0.00010923753665689149, "loss": 0.3513, "step": 8317 }, { "epoch": 1.302128991859737, "grad_norm": 2.3258113861083984, "learning_rate": 0.00010921309872922775, "loss": 0.5292, "step": 8318 }, { "epoch": 1.3022855353788354, "grad_norm": 3.5120906829833984, "learning_rate": 0.000109188660801564, "loss": 0.2251, "step": 8319 }, { "epoch": 1.3024420788979336, "grad_norm": 1.4291130304336548, "learning_rate": 0.00010916422287390028, "loss": 0.4773, "step": 8320 }, { "epoch": 1.3025986224170318, "grad_norm": 3.090890884399414, "learning_rate": 0.00010913978494623655, "loss": 0.7817, "step": 8321 }, { "epoch": 1.3027551659361303, "grad_norm": 3.119511842727661, "learning_rate": 0.00010911534701857281, "loss": 0.8859, "step": 8322 }, { "epoch": 1.3029117094552285, "grad_norm": 1.6605695486068726, "learning_rate": 0.00010909090909090908, "loss": 0.2614, "step": 8323 }, { "epoch": 1.303068252974327, "grad_norm": 3.2734925746917725, "learning_rate": 0.00010906647116324534, "loss": 0.7882, "step": 8324 }, { "epoch": 1.3032247964934252, "grad_norm": 1.581061601638794, "learning_rate": 0.00010904203323558161, "loss": 0.5519, "step": 8325 }, { "epoch": 1.3033813400125234, "grad_norm": 1.6887787580490112, "learning_rate": 0.00010901759530791789, "loss": 1.1315, "step": 8326 }, { "epoch": 1.3035378835316218, "grad_norm": 5.0255937576293945, "learning_rate": 0.00010899315738025414, "loss": 1.2345, "step": 8327 }, { "epoch": 1.30369442705072, "grad_norm": 3.7134711742401123, "learning_rate": 0.0001089687194525904, "loss": 1.2284, "step": 8328 }, { "epoch": 1.3038509705698185, "grad_norm": 2.470165252685547, "learning_rate": 0.00010894428152492668, "loss": 0.588, "step": 8329 }, { "epoch": 1.3040075140889167, "grad_norm": 2.310760021209717, "learning_rate": 0.00010891984359726295, "loss": 0.8248, "step": 8330 }, { "epoch": 1.304164057608015, "grad_norm": 4.917720794677734, "learning_rate": 0.0001088954056695992, "loss": 1.843, "step": 8331 }, { "epoch": 1.3043206011271133, "grad_norm": 3.521848440170288, "learning_rate": 0.00010887096774193548, "loss": 1.4091, "step": 8332 }, { "epoch": 1.3044771446462118, "grad_norm": 1.4144970178604126, "learning_rate": 0.00010884652981427174, "loss": 1.0417, "step": 8333 }, { "epoch": 1.30463368816531, "grad_norm": 2.08368182182312, "learning_rate": 0.00010882209188660799, "loss": 0.7129, "step": 8334 }, { "epoch": 1.3047902316844082, "grad_norm": 2.0993666648864746, "learning_rate": 0.00010879765395894427, "loss": 1.2135, "step": 8335 }, { "epoch": 1.3049467752035064, "grad_norm": 4.826048374176025, "learning_rate": 0.00010877321603128053, "loss": 0.8379, "step": 8336 }, { "epoch": 1.3051033187226049, "grad_norm": 2.809587001800537, "learning_rate": 0.0001087487781036168, "loss": 0.603, "step": 8337 }, { "epoch": 1.3052598622417033, "grad_norm": 5.488523483276367, "learning_rate": 0.00010872434017595308, "loss": 1.2882, "step": 8338 }, { "epoch": 1.3054164057608015, "grad_norm": 1.1666176319122314, "learning_rate": 0.00010869990224828933, "loss": 0.3117, "step": 8339 }, { "epoch": 1.3055729492798998, "grad_norm": 0.6575944423675537, "learning_rate": 0.0001086754643206256, "loss": 0.2904, "step": 8340 }, { "epoch": 1.3057294927989982, "grad_norm": 0.6943413615226746, "learning_rate": 0.00010865102639296187, "loss": 0.202, "step": 8341 }, { "epoch": 1.3058860363180964, "grad_norm": 0.5365055799484253, "learning_rate": 0.00010862658846529814, "loss": 0.2215, "step": 8342 }, { "epoch": 1.3060425798371949, "grad_norm": 0.6395205855369568, "learning_rate": 0.00010860215053763439, "loss": 0.2398, "step": 8343 }, { "epoch": 1.306199123356293, "grad_norm": 0.6453742980957031, "learning_rate": 0.00010857771260997067, "loss": 0.2191, "step": 8344 }, { "epoch": 1.3063556668753913, "grad_norm": 0.4960818290710449, "learning_rate": 0.00010855327468230693, "loss": 0.1847, "step": 8345 }, { "epoch": 1.3065122103944897, "grad_norm": 0.8823752403259277, "learning_rate": 0.00010852883675464318, "loss": 0.2645, "step": 8346 }, { "epoch": 1.306668753913588, "grad_norm": 0.6216477751731873, "learning_rate": 0.00010850439882697946, "loss": 0.1919, "step": 8347 }, { "epoch": 1.3068252974326864, "grad_norm": 0.8468354344367981, "learning_rate": 0.00010847996089931573, "loss": 0.2694, "step": 8348 }, { "epoch": 1.3069818409517846, "grad_norm": 1.344497799873352, "learning_rate": 0.00010845552297165199, "loss": 0.3727, "step": 8349 }, { "epoch": 1.3071383844708828, "grad_norm": 1.8707456588745117, "learning_rate": 0.00010843108504398827, "loss": 0.3696, "step": 8350 }, { "epoch": 1.3072949279899813, "grad_norm": 1.084493637084961, "learning_rate": 0.00010840664711632452, "loss": 0.2772, "step": 8351 }, { "epoch": 1.3074514715090795, "grad_norm": 0.9700431823730469, "learning_rate": 0.00010838220918866079, "loss": 0.2514, "step": 8352 }, { "epoch": 1.307608015028178, "grad_norm": 1.6982876062393188, "learning_rate": 0.00010835777126099706, "loss": 0.2902, "step": 8353 }, { "epoch": 1.3077645585472761, "grad_norm": 1.004989504814148, "learning_rate": 0.00010833333333333333, "loss": 0.3883, "step": 8354 }, { "epoch": 1.3079211020663744, "grad_norm": 1.6661001443862915, "learning_rate": 0.00010830889540566958, "loss": 0.5643, "step": 8355 }, { "epoch": 1.3080776455854728, "grad_norm": 1.4825246334075928, "learning_rate": 0.00010828445747800586, "loss": 0.5259, "step": 8356 }, { "epoch": 1.308234189104571, "grad_norm": 1.141077995300293, "learning_rate": 0.00010826001955034212, "loss": 0.2891, "step": 8357 }, { "epoch": 1.3083907326236695, "grad_norm": 2.092963695526123, "learning_rate": 0.00010823558162267837, "loss": 0.7122, "step": 8358 }, { "epoch": 1.3085472761427677, "grad_norm": 0.9098227024078369, "learning_rate": 0.00010821114369501465, "loss": 0.3163, "step": 8359 }, { "epoch": 1.3087038196618659, "grad_norm": 2.1590492725372314, "learning_rate": 0.00010818670576735092, "loss": 0.6714, "step": 8360 }, { "epoch": 1.3088603631809643, "grad_norm": 1.1368892192840576, "learning_rate": 0.00010816226783968718, "loss": 0.5353, "step": 8361 }, { "epoch": 1.3090169067000625, "grad_norm": 1.050818681716919, "learning_rate": 0.00010813782991202346, "loss": 0.4366, "step": 8362 }, { "epoch": 1.309173450219161, "grad_norm": 0.9604326486587524, "learning_rate": 0.00010811339198435971, "loss": 0.377, "step": 8363 }, { "epoch": 1.3093299937382592, "grad_norm": 2.909843683242798, "learning_rate": 0.00010808895405669598, "loss": 0.709, "step": 8364 }, { "epoch": 1.3094865372573574, "grad_norm": 2.03232741355896, "learning_rate": 0.00010806451612903225, "loss": 0.5225, "step": 8365 }, { "epoch": 1.3096430807764559, "grad_norm": 1.5411139726638794, "learning_rate": 0.00010804007820136852, "loss": 0.7947, "step": 8366 }, { "epoch": 1.3097996242955543, "grad_norm": 1.4965941905975342, "learning_rate": 0.00010801564027370477, "loss": 0.5839, "step": 8367 }, { "epoch": 1.3099561678146525, "grad_norm": 2.01226806640625, "learning_rate": 0.00010799120234604105, "loss": 0.6892, "step": 8368 }, { "epoch": 1.3101127113337507, "grad_norm": 4.478280067443848, "learning_rate": 0.00010796676441837731, "loss": 0.437, "step": 8369 }, { "epoch": 1.3102692548528492, "grad_norm": 2.566091299057007, "learning_rate": 0.00010794232649071357, "loss": 0.5926, "step": 8370 }, { "epoch": 1.3104257983719474, "grad_norm": 2.0691118240356445, "learning_rate": 0.00010791788856304984, "loss": 0.6444, "step": 8371 }, { "epoch": 1.3105823418910458, "grad_norm": 2.3462331295013428, "learning_rate": 0.00010789345063538611, "loss": 0.8449, "step": 8372 }, { "epoch": 1.310738885410144, "grad_norm": 1.7684667110443115, "learning_rate": 0.00010786901270772237, "loss": 0.6608, "step": 8373 }, { "epoch": 1.3108954289292423, "grad_norm": 2.054476022720337, "learning_rate": 0.00010784457478005865, "loss": 1.0546, "step": 8374 }, { "epoch": 1.3110519724483407, "grad_norm": 2.8694870471954346, "learning_rate": 0.0001078201368523949, "loss": 0.9144, "step": 8375 }, { "epoch": 1.311208515967439, "grad_norm": 1.2306618690490723, "learning_rate": 0.00010779569892473117, "loss": 0.5151, "step": 8376 }, { "epoch": 1.3113650594865374, "grad_norm": 1.6525115966796875, "learning_rate": 0.00010777126099706745, "loss": 0.8821, "step": 8377 }, { "epoch": 1.3115216030056356, "grad_norm": 3.0999720096588135, "learning_rate": 0.0001077468230694037, "loss": 1.6076, "step": 8378 }, { "epoch": 1.3116781465247338, "grad_norm": 1.6865127086639404, "learning_rate": 0.00010772238514173996, "loss": 0.8059, "step": 8379 }, { "epoch": 1.3118346900438322, "grad_norm": 2.4270031452178955, "learning_rate": 0.00010769794721407624, "loss": 0.5467, "step": 8380 }, { "epoch": 1.3119912335629305, "grad_norm": 2.8954577445983887, "learning_rate": 0.0001076735092864125, "loss": 0.532, "step": 8381 }, { "epoch": 1.312147777082029, "grad_norm": 4.275869846343994, "learning_rate": 0.00010764907135874876, "loss": 1.7656, "step": 8382 }, { "epoch": 1.3123043206011271, "grad_norm": 1.7701374292373657, "learning_rate": 0.00010762463343108503, "loss": 1.1036, "step": 8383 }, { "epoch": 1.3124608641202253, "grad_norm": 2.1359615325927734, "learning_rate": 0.0001076001955034213, "loss": 0.4755, "step": 8384 }, { "epoch": 1.3126174076393238, "grad_norm": 1.1913822889328003, "learning_rate": 0.00010757575757575756, "loss": 0.349, "step": 8385 }, { "epoch": 1.312773951158422, "grad_norm": 1.3649957180023193, "learning_rate": 0.00010755131964809384, "loss": 0.4667, "step": 8386 }, { "epoch": 1.3129304946775204, "grad_norm": 2.1878700256347656, "learning_rate": 0.0001075268817204301, "loss": 0.7366, "step": 8387 }, { "epoch": 1.3130870381966186, "grad_norm": 2.3514275550842285, "learning_rate": 0.00010750244379276636, "loss": 1.0322, "step": 8388 }, { "epoch": 1.3132435817157169, "grad_norm": 0.49473220109939575, "learning_rate": 0.00010747800586510264, "loss": 0.211, "step": 8389 }, { "epoch": 1.3134001252348153, "grad_norm": 1.2258360385894775, "learning_rate": 0.00010745356793743889, "loss": 0.2018, "step": 8390 }, { "epoch": 1.3135566687539135, "grad_norm": 0.7705320119857788, "learning_rate": 0.00010742913000977515, "loss": 0.2365, "step": 8391 }, { "epoch": 1.313713212273012, "grad_norm": 0.46400439739227295, "learning_rate": 0.00010740469208211143, "loss": 0.168, "step": 8392 }, { "epoch": 1.3138697557921102, "grad_norm": 0.7878824472427368, "learning_rate": 0.0001073802541544477, "loss": 0.3099, "step": 8393 }, { "epoch": 1.3140262993112084, "grad_norm": 0.9498413801193237, "learning_rate": 0.00010735581622678395, "loss": 0.1739, "step": 8394 }, { "epoch": 1.3141828428303068, "grad_norm": 0.9091798663139343, "learning_rate": 0.00010733137829912023, "loss": 0.2746, "step": 8395 }, { "epoch": 1.314339386349405, "grad_norm": 0.8814572691917419, "learning_rate": 0.00010730694037145649, "loss": 0.2913, "step": 8396 }, { "epoch": 1.3144959298685035, "grad_norm": 0.8799420595169067, "learning_rate": 0.00010728250244379276, "loss": 0.3373, "step": 8397 }, { "epoch": 1.3146524733876017, "grad_norm": 0.6307176351547241, "learning_rate": 0.00010725806451612903, "loss": 0.3035, "step": 8398 }, { "epoch": 1.3148090169067, "grad_norm": 0.962165355682373, "learning_rate": 0.00010723362658846529, "loss": 0.2801, "step": 8399 }, { "epoch": 1.3149655604257984, "grad_norm": 0.4829339385032654, "learning_rate": 0.00010720918866080155, "loss": 0.1575, "step": 8400 }, { "epoch": 1.3151221039448968, "grad_norm": 0.9093053936958313, "learning_rate": 0.00010718475073313783, "loss": 0.3151, "step": 8401 }, { "epoch": 1.315278647463995, "grad_norm": 1.592719316482544, "learning_rate": 0.00010716031280547408, "loss": 0.3769, "step": 8402 }, { "epoch": 1.3154351909830932, "grad_norm": 1.357019066810608, "learning_rate": 0.00010713587487781034, "loss": 0.3612, "step": 8403 }, { "epoch": 1.3155917345021917, "grad_norm": 4.118210315704346, "learning_rate": 0.00010711143695014662, "loss": 0.3585, "step": 8404 }, { "epoch": 1.31574827802129, "grad_norm": 1.2734813690185547, "learning_rate": 0.00010708699902248289, "loss": 0.4046, "step": 8405 }, { "epoch": 1.3159048215403883, "grad_norm": 0.6813839077949524, "learning_rate": 0.00010706256109481914, "loss": 0.2412, "step": 8406 }, { "epoch": 1.3160613650594866, "grad_norm": 1.974395751953125, "learning_rate": 0.00010703812316715542, "loss": 0.3266, "step": 8407 }, { "epoch": 1.3162179085785848, "grad_norm": 1.261323094367981, "learning_rate": 0.00010701368523949168, "loss": 0.3412, "step": 8408 }, { "epoch": 1.3163744520976832, "grad_norm": 1.1508007049560547, "learning_rate": 0.00010698924731182795, "loss": 0.3782, "step": 8409 }, { "epoch": 1.3165309956167814, "grad_norm": 1.8937280178070068, "learning_rate": 0.00010696480938416422, "loss": 0.5881, "step": 8410 }, { "epoch": 1.3166875391358799, "grad_norm": 1.3329272270202637, "learning_rate": 0.00010694037145650048, "loss": 0.3392, "step": 8411 }, { "epoch": 1.316844082654978, "grad_norm": 0.9232223033905029, "learning_rate": 0.00010691593352883674, "loss": 0.5811, "step": 8412 }, { "epoch": 1.3170006261740763, "grad_norm": 1.0369586944580078, "learning_rate": 0.00010689149560117302, "loss": 0.4383, "step": 8413 }, { "epoch": 1.3171571696931748, "grad_norm": 2.1007254123687744, "learning_rate": 0.00010686705767350927, "loss": 0.554, "step": 8414 }, { "epoch": 1.317313713212273, "grad_norm": 1.1402606964111328, "learning_rate": 0.00010684261974584554, "loss": 0.4968, "step": 8415 }, { "epoch": 1.3174702567313714, "grad_norm": 1.1033568382263184, "learning_rate": 0.00010681818181818181, "loss": 0.3661, "step": 8416 }, { "epoch": 1.3176268002504696, "grad_norm": 1.5588769912719727, "learning_rate": 0.00010679374389051808, "loss": 0.3178, "step": 8417 }, { "epoch": 1.3177833437695678, "grad_norm": 1.2382311820983887, "learning_rate": 0.00010676930596285433, "loss": 0.5225, "step": 8418 }, { "epoch": 1.3179398872886663, "grad_norm": 2.5015268325805664, "learning_rate": 0.00010674486803519061, "loss": 0.7478, "step": 8419 }, { "epoch": 1.3180964308077645, "grad_norm": 3.2753753662109375, "learning_rate": 0.00010672043010752687, "loss": 0.5885, "step": 8420 }, { "epoch": 1.318252974326863, "grad_norm": 2.250828742980957, "learning_rate": 0.00010669599217986314, "loss": 0.6413, "step": 8421 }, { "epoch": 1.3184095178459612, "grad_norm": 1.6455849409103394, "learning_rate": 0.0001066715542521994, "loss": 0.8944, "step": 8422 }, { "epoch": 1.3185660613650594, "grad_norm": 1.4225467443466187, "learning_rate": 0.00010664711632453567, "loss": 1.0937, "step": 8423 }, { "epoch": 1.3187226048841578, "grad_norm": 3.475102663040161, "learning_rate": 0.00010662267839687193, "loss": 1.155, "step": 8424 }, { "epoch": 1.318879148403256, "grad_norm": 1.6069629192352295, "learning_rate": 0.00010659824046920821, "loss": 1.1532, "step": 8425 }, { "epoch": 1.3190356919223545, "grad_norm": 2.740570306777954, "learning_rate": 0.00010657380254154446, "loss": 0.9993, "step": 8426 }, { "epoch": 1.3191922354414527, "grad_norm": 1.2884479761123657, "learning_rate": 0.00010654936461388073, "loss": 0.342, "step": 8427 }, { "epoch": 1.319348778960551, "grad_norm": 5.032875061035156, "learning_rate": 0.000106524926686217, "loss": 0.8971, "step": 8428 }, { "epoch": 1.3195053224796494, "grad_norm": 3.1008434295654297, "learning_rate": 0.00010650048875855327, "loss": 1.1503, "step": 8429 }, { "epoch": 1.3196618659987476, "grad_norm": 3.7066233158111572, "learning_rate": 0.00010647605083088952, "loss": 1.0131, "step": 8430 }, { "epoch": 1.319818409517846, "grad_norm": 2.721331834793091, "learning_rate": 0.0001064516129032258, "loss": 1.4015, "step": 8431 }, { "epoch": 1.3199749530369442, "grad_norm": 2.37866473197937, "learning_rate": 0.00010642717497556206, "loss": 0.7506, "step": 8432 }, { "epoch": 1.3201314965560424, "grad_norm": 1.654268503189087, "learning_rate": 0.00010640273704789833, "loss": 1.0727, "step": 8433 }, { "epoch": 1.320288040075141, "grad_norm": 0.7855532765388489, "learning_rate": 0.0001063782991202346, "loss": 0.427, "step": 8434 }, { "epoch": 1.3204445835942393, "grad_norm": 2.7681164741516113, "learning_rate": 0.00010635386119257086, "loss": 0.6105, "step": 8435 }, { "epoch": 1.3206011271133375, "grad_norm": 4.213181495666504, "learning_rate": 0.00010632942326490712, "loss": 0.7922, "step": 8436 }, { "epoch": 1.3207576706324358, "grad_norm": 4.588217258453369, "learning_rate": 0.0001063049853372434, "loss": 1.3575, "step": 8437 }, { "epoch": 1.3209142141515342, "grad_norm": 1.3220864534378052, "learning_rate": 0.00010628054740957965, "loss": 0.9862, "step": 8438 }, { "epoch": 1.3210707576706324, "grad_norm": 0.4366258680820465, "learning_rate": 0.00010625610948191592, "loss": 0.2151, "step": 8439 }, { "epoch": 1.3212273011897309, "grad_norm": 0.3557334244251251, "learning_rate": 0.0001062316715542522, "loss": 0.1961, "step": 8440 }, { "epoch": 1.321383844708829, "grad_norm": 0.6270286440849304, "learning_rate": 0.00010620723362658846, "loss": 0.3202, "step": 8441 }, { "epoch": 1.3215403882279273, "grad_norm": 0.5191538333892822, "learning_rate": 0.00010618279569892471, "loss": 0.1946, "step": 8442 }, { "epoch": 1.3216969317470257, "grad_norm": 0.35942304134368896, "learning_rate": 0.00010615835777126099, "loss": 0.2023, "step": 8443 }, { "epoch": 1.321853475266124, "grad_norm": 0.5088504552841187, "learning_rate": 0.00010613391984359726, "loss": 0.2216, "step": 8444 }, { "epoch": 1.3220100187852224, "grad_norm": 0.6313852667808533, "learning_rate": 0.00010610948191593352, "loss": 0.2147, "step": 8445 }, { "epoch": 1.3221665623043206, "grad_norm": 1.02388596534729, "learning_rate": 0.00010608504398826978, "loss": 0.3264, "step": 8446 }, { "epoch": 1.3223231058234188, "grad_norm": 0.7219918370246887, "learning_rate": 0.00010606060606060605, "loss": 0.3412, "step": 8447 }, { "epoch": 1.3224796493425173, "grad_norm": 1.3611302375793457, "learning_rate": 0.00010603616813294231, "loss": 0.2682, "step": 8448 }, { "epoch": 1.3226361928616155, "grad_norm": 0.6783427596092224, "learning_rate": 0.00010601173020527859, "loss": 0.2816, "step": 8449 }, { "epoch": 1.322792736380714, "grad_norm": 1.3119559288024902, "learning_rate": 0.00010598729227761484, "loss": 0.5017, "step": 8450 }, { "epoch": 1.3229492798998121, "grad_norm": 2.026618719100952, "learning_rate": 0.00010596285434995111, "loss": 0.6098, "step": 8451 }, { "epoch": 1.3231058234189104, "grad_norm": 1.1035137176513672, "learning_rate": 0.00010593841642228739, "loss": 0.3054, "step": 8452 }, { "epoch": 1.3232623669380088, "grad_norm": 1.0470503568649292, "learning_rate": 0.00010591397849462365, "loss": 0.322, "step": 8453 }, { "epoch": 1.323418910457107, "grad_norm": 1.4040778875350952, "learning_rate": 0.0001058895405669599, "loss": 0.4344, "step": 8454 }, { "epoch": 1.3235754539762055, "grad_norm": 1.1632486581802368, "learning_rate": 0.00010586510263929618, "loss": 0.4741, "step": 8455 }, { "epoch": 1.3237319974953037, "grad_norm": 2.275049924850464, "learning_rate": 0.00010584066471163245, "loss": 0.4954, "step": 8456 }, { "epoch": 1.323888541014402, "grad_norm": 0.912585973739624, "learning_rate": 0.00010581622678396871, "loss": 0.2482, "step": 8457 }, { "epoch": 1.3240450845335003, "grad_norm": 2.720503568649292, "learning_rate": 0.00010579178885630498, "loss": 0.563, "step": 8458 }, { "epoch": 1.3242016280525986, "grad_norm": 1.2635301351547241, "learning_rate": 0.00010576735092864124, "loss": 0.3343, "step": 8459 }, { "epoch": 1.324358171571697, "grad_norm": 2.1451432704925537, "learning_rate": 0.0001057429130009775, "loss": 0.4257, "step": 8460 }, { "epoch": 1.3245147150907952, "grad_norm": 0.8080310225486755, "learning_rate": 0.00010571847507331378, "loss": 0.3141, "step": 8461 }, { "epoch": 1.3246712586098934, "grad_norm": 2.107248544692993, "learning_rate": 0.00010569403714565004, "loss": 0.3324, "step": 8462 }, { "epoch": 1.3248278021289919, "grad_norm": 1.0356974601745605, "learning_rate": 0.0001056695992179863, "loss": 0.3303, "step": 8463 }, { "epoch": 1.32498434564809, "grad_norm": 1.8852458000183105, "learning_rate": 0.00010564516129032258, "loss": 0.4119, "step": 8464 }, { "epoch": 1.3251408891671885, "grad_norm": 1.1586437225341797, "learning_rate": 0.00010562072336265884, "loss": 0.4388, "step": 8465 }, { "epoch": 1.3252974326862867, "grad_norm": 2.8293962478637695, "learning_rate": 0.0001055962854349951, "loss": 0.7354, "step": 8466 }, { "epoch": 1.325453976205385, "grad_norm": 3.1717278957366943, "learning_rate": 0.00010557184750733137, "loss": 0.6096, "step": 8467 }, { "epoch": 1.3256105197244834, "grad_norm": 3.8848044872283936, "learning_rate": 0.00010554740957966764, "loss": 1.0067, "step": 8468 }, { "epoch": 1.3257670632435818, "grad_norm": 1.52760648727417, "learning_rate": 0.0001055229716520039, "loss": 0.5702, "step": 8469 }, { "epoch": 1.32592360676268, "grad_norm": 1.8875513076782227, "learning_rate": 0.00010549853372434017, "loss": 0.8501, "step": 8470 }, { "epoch": 1.3260801502817783, "grad_norm": 4.692626953125, "learning_rate": 0.00010547409579667643, "loss": 1.1365, "step": 8471 }, { "epoch": 1.3262366938008767, "grad_norm": 3.195230484008789, "learning_rate": 0.0001054496578690127, "loss": 1.165, "step": 8472 }, { "epoch": 1.326393237319975, "grad_norm": 2.567884683609009, "learning_rate": 0.00010542521994134898, "loss": 1.016, "step": 8473 }, { "epoch": 1.3265497808390734, "grad_norm": 2.327993392944336, "learning_rate": 0.00010540078201368523, "loss": 1.1238, "step": 8474 }, { "epoch": 1.3267063243581716, "grad_norm": 4.008900165557861, "learning_rate": 0.00010537634408602149, "loss": 1.3329, "step": 8475 }, { "epoch": 1.3268628678772698, "grad_norm": 3.4523065090179443, "learning_rate": 0.00010535190615835777, "loss": 1.2717, "step": 8476 }, { "epoch": 1.3270194113963683, "grad_norm": 2.4509525299072266, "learning_rate": 0.00010532746823069403, "loss": 1.3055, "step": 8477 }, { "epoch": 1.3271759549154665, "grad_norm": 2.2657418251037598, "learning_rate": 0.00010530303030303029, "loss": 1.0867, "step": 8478 }, { "epoch": 1.327332498434565, "grad_norm": 10.114274024963379, "learning_rate": 0.00010527859237536656, "loss": 1.2424, "step": 8479 }, { "epoch": 1.3274890419536631, "grad_norm": 2.6086230278015137, "learning_rate": 0.00010525415444770283, "loss": 1.5289, "step": 8480 }, { "epoch": 1.3276455854727613, "grad_norm": 7.500784397125244, "learning_rate": 0.0001052297165200391, "loss": 0.8191, "step": 8481 }, { "epoch": 1.3278021289918598, "grad_norm": 2.1050162315368652, "learning_rate": 0.00010520527859237536, "loss": 0.9757, "step": 8482 }, { "epoch": 1.327958672510958, "grad_norm": 1.964867353439331, "learning_rate": 0.00010518084066471162, "loss": 1.1642, "step": 8483 }, { "epoch": 1.3281152160300564, "grad_norm": 2.479874849319458, "learning_rate": 0.00010515640273704789, "loss": 0.9598, "step": 8484 }, { "epoch": 1.3282717595491547, "grad_norm": 1.8766109943389893, "learning_rate": 0.00010513196480938417, "loss": 0.3375, "step": 8485 }, { "epoch": 1.3284283030682529, "grad_norm": 2.781697988510132, "learning_rate": 0.00010510752688172042, "loss": 1.2596, "step": 8486 }, { "epoch": 1.3285848465873513, "grad_norm": 4.228986740112305, "learning_rate": 0.00010508308895405668, "loss": 1.0103, "step": 8487 }, { "epoch": 1.3287413901064495, "grad_norm": 1.7900171279907227, "learning_rate": 0.00010505865102639296, "loss": 0.83, "step": 8488 }, { "epoch": 1.328897933625548, "grad_norm": 0.6766242384910583, "learning_rate": 0.00010503421309872923, "loss": 0.2403, "step": 8489 }, { "epoch": 1.3290544771446462, "grad_norm": 0.5489671230316162, "learning_rate": 0.00010500977517106548, "loss": 0.3599, "step": 8490 }, { "epoch": 1.3292110206637444, "grad_norm": 0.7693419456481934, "learning_rate": 0.00010498533724340176, "loss": 0.2344, "step": 8491 }, { "epoch": 1.3293675641828429, "grad_norm": 0.5848720073699951, "learning_rate": 0.00010496089931573802, "loss": 0.2692, "step": 8492 }, { "epoch": 1.329524107701941, "grad_norm": 0.4316113591194153, "learning_rate": 0.00010493646138807427, "loss": 0.1891, "step": 8493 }, { "epoch": 1.3296806512210395, "grad_norm": 0.5180829167366028, "learning_rate": 0.00010491202346041055, "loss": 0.2097, "step": 8494 }, { "epoch": 1.3298371947401377, "grad_norm": 0.31930309534072876, "learning_rate": 0.00010488758553274681, "loss": 0.1346, "step": 8495 }, { "epoch": 1.329993738259236, "grad_norm": 0.5279285907745361, "learning_rate": 0.00010486314760508308, "loss": 0.3029, "step": 8496 }, { "epoch": 1.3301502817783344, "grad_norm": 0.6613864898681641, "learning_rate": 0.00010483870967741936, "loss": 0.319, "step": 8497 }, { "epoch": 1.3303068252974326, "grad_norm": 0.5798127055168152, "learning_rate": 0.00010481427174975561, "loss": 0.2005, "step": 8498 }, { "epoch": 1.330463368816531, "grad_norm": 0.7351884245872498, "learning_rate": 0.00010478983382209187, "loss": 0.2422, "step": 8499 }, { "epoch": 1.3306199123356293, "grad_norm": 0.8078888058662415, "learning_rate": 0.00010476539589442815, "loss": 0.3467, "step": 8500 }, { "epoch": 1.3307764558547275, "grad_norm": 0.8279627561569214, "learning_rate": 0.00010474095796676442, "loss": 0.2722, "step": 8501 }, { "epoch": 1.330932999373826, "grad_norm": 1.2416898012161255, "learning_rate": 0.00010471652003910067, "loss": 0.4529, "step": 8502 }, { "epoch": 1.3310895428929244, "grad_norm": 1.2886146306991577, "learning_rate": 0.00010469208211143695, "loss": 0.4516, "step": 8503 }, { "epoch": 1.3312460864120226, "grad_norm": 0.4999758303165436, "learning_rate": 0.00010466764418377321, "loss": 0.1717, "step": 8504 }, { "epoch": 1.3314026299311208, "grad_norm": 1.2985860109329224, "learning_rate": 0.00010464320625610946, "loss": 0.3384, "step": 8505 }, { "epoch": 1.3315591734502192, "grad_norm": 2.1126205921173096, "learning_rate": 0.00010461876832844574, "loss": 0.4438, "step": 8506 }, { "epoch": 1.3317157169693175, "grad_norm": 2.8582603931427, "learning_rate": 0.000104594330400782, "loss": 0.2766, "step": 8507 }, { "epoch": 1.331872260488416, "grad_norm": 1.146053433418274, "learning_rate": 0.00010456989247311827, "loss": 0.3095, "step": 8508 }, { "epoch": 1.3320288040075141, "grad_norm": 1.1559828519821167, "learning_rate": 0.00010454545454545455, "loss": 0.5287, "step": 8509 }, { "epoch": 1.3321853475266123, "grad_norm": 1.217333436012268, "learning_rate": 0.0001045210166177908, "loss": 0.3, "step": 8510 }, { "epoch": 1.3323418910457108, "grad_norm": 1.9483610391616821, "learning_rate": 0.00010449657869012706, "loss": 0.6234, "step": 8511 }, { "epoch": 1.332498434564809, "grad_norm": 1.2724167108535767, "learning_rate": 0.00010447214076246334, "loss": 0.6622, "step": 8512 }, { "epoch": 1.3326549780839074, "grad_norm": 3.2737345695495605, "learning_rate": 0.00010444770283479961, "loss": 0.8264, "step": 8513 }, { "epoch": 1.3328115216030056, "grad_norm": 2.594571352005005, "learning_rate": 0.00010442326490713586, "loss": 0.649, "step": 8514 }, { "epoch": 1.3329680651221039, "grad_norm": 2.1215741634368896, "learning_rate": 0.00010439882697947214, "loss": 0.4745, "step": 8515 }, { "epoch": 1.3331246086412023, "grad_norm": 2.0820720195770264, "learning_rate": 0.0001043743890518084, "loss": 0.8717, "step": 8516 }, { "epoch": 1.3332811521603005, "grad_norm": 1.5317957401275635, "learning_rate": 0.00010434995112414465, "loss": 0.6137, "step": 8517 }, { "epoch": 1.333437695679399, "grad_norm": 2.0376477241516113, "learning_rate": 0.00010432551319648093, "loss": 0.9649, "step": 8518 }, { "epoch": 1.3335942391984972, "grad_norm": 2.7941699028015137, "learning_rate": 0.0001043010752688172, "loss": 0.963, "step": 8519 }, { "epoch": 1.3337507827175954, "grad_norm": 2.3799190521240234, "learning_rate": 0.00010427663734115346, "loss": 0.5505, "step": 8520 }, { "epoch": 1.3339073262366938, "grad_norm": 1.9923593997955322, "learning_rate": 0.00010425219941348974, "loss": 0.4311, "step": 8521 }, { "epoch": 1.334063869755792, "grad_norm": 1.5143426656723022, "learning_rate": 0.00010422776148582599, "loss": 0.8483, "step": 8522 }, { "epoch": 1.3342204132748905, "grad_norm": 1.211293339729309, "learning_rate": 0.00010420332355816226, "loss": 0.7053, "step": 8523 }, { "epoch": 1.3343769567939887, "grad_norm": 2.1427128314971924, "learning_rate": 0.00010417888563049853, "loss": 0.7195, "step": 8524 }, { "epoch": 1.334533500313087, "grad_norm": 2.3068511486053467, "learning_rate": 0.0001041544477028348, "loss": 1.0497, "step": 8525 }, { "epoch": 1.3346900438321854, "grad_norm": 1.6837023496627808, "learning_rate": 0.00010413000977517105, "loss": 0.8406, "step": 8526 }, { "epoch": 1.3348465873512836, "grad_norm": 2.0553340911865234, "learning_rate": 0.00010410557184750733, "loss": 1.1097, "step": 8527 }, { "epoch": 1.335003130870382, "grad_norm": 2.150421619415283, "learning_rate": 0.0001040811339198436, "loss": 1.5506, "step": 8528 }, { "epoch": 1.3351596743894802, "grad_norm": 1.8495726585388184, "learning_rate": 0.00010405669599217984, "loss": 1.0281, "step": 8529 }, { "epoch": 1.3353162179085785, "grad_norm": 5.749996662139893, "learning_rate": 0.00010403225806451612, "loss": 1.2466, "step": 8530 }, { "epoch": 1.335472761427677, "grad_norm": 3.338156223297119, "learning_rate": 0.00010400782013685239, "loss": 0.9421, "step": 8531 }, { "epoch": 1.3356293049467753, "grad_norm": 1.7294635772705078, "learning_rate": 0.00010398338220918865, "loss": 0.594, "step": 8532 }, { "epoch": 1.3357858484658736, "grad_norm": 1.9416723251342773, "learning_rate": 0.00010395894428152493, "loss": 0.5398, "step": 8533 }, { "epoch": 1.3359423919849718, "grad_norm": 4.1962785720825195, "learning_rate": 0.00010393450635386118, "loss": 0.8419, "step": 8534 }, { "epoch": 1.33609893550407, "grad_norm": 1.9734654426574707, "learning_rate": 0.00010391006842619745, "loss": 0.3254, "step": 8535 }, { "epoch": 1.3362554790231684, "grad_norm": 2.3827269077301025, "learning_rate": 0.00010388563049853373, "loss": 0.8437, "step": 8536 }, { "epoch": 1.3364120225422669, "grad_norm": 1.408588171005249, "learning_rate": 0.00010386119257086998, "loss": 0.6051, "step": 8537 }, { "epoch": 1.336568566061365, "grad_norm": 2.1959314346313477, "learning_rate": 0.00010383675464320624, "loss": 0.855, "step": 8538 }, { "epoch": 1.3367251095804633, "grad_norm": 0.5544465780258179, "learning_rate": 0.00010381231671554252, "loss": 0.2188, "step": 8539 }, { "epoch": 1.3368816530995618, "grad_norm": 1.3485573530197144, "learning_rate": 0.00010378787878787878, "loss": 0.2731, "step": 8540 }, { "epoch": 1.33703819661866, "grad_norm": 0.4418382942676544, "learning_rate": 0.00010376344086021504, "loss": 0.2697, "step": 8541 }, { "epoch": 1.3371947401377584, "grad_norm": 0.8457216024398804, "learning_rate": 0.00010373900293255131, "loss": 0.3345, "step": 8542 }, { "epoch": 1.3373512836568566, "grad_norm": 0.6509329080581665, "learning_rate": 0.00010371456500488758, "loss": 0.2579, "step": 8543 }, { "epoch": 1.3375078271759548, "grad_norm": 0.7329632043838501, "learning_rate": 0.00010369012707722384, "loss": 0.3396, "step": 8544 }, { "epoch": 1.3376643706950533, "grad_norm": 0.5292937755584717, "learning_rate": 0.00010366568914956012, "loss": 0.1596, "step": 8545 }, { "epoch": 1.3378209142141515, "grad_norm": 0.620823085308075, "learning_rate": 0.00010364125122189637, "loss": 0.3351, "step": 8546 }, { "epoch": 1.33797745773325, "grad_norm": 0.5701507329940796, "learning_rate": 0.00010361681329423264, "loss": 0.2396, "step": 8547 }, { "epoch": 1.3381340012523482, "grad_norm": 0.6084917783737183, "learning_rate": 0.00010359237536656892, "loss": 0.2507, "step": 8548 }, { "epoch": 1.3382905447714464, "grad_norm": 0.8983578085899353, "learning_rate": 0.00010356793743890517, "loss": 0.305, "step": 8549 }, { "epoch": 1.3384470882905448, "grad_norm": 0.9507225751876831, "learning_rate": 0.00010354349951124143, "loss": 0.3077, "step": 8550 }, { "epoch": 1.338603631809643, "grad_norm": 1.0135985612869263, "learning_rate": 0.00010351906158357771, "loss": 0.2864, "step": 8551 }, { "epoch": 1.3387601753287415, "grad_norm": 1.2849130630493164, "learning_rate": 0.00010349462365591398, "loss": 0.2869, "step": 8552 }, { "epoch": 1.3389167188478397, "grad_norm": 1.036739706993103, "learning_rate": 0.00010347018572825023, "loss": 0.2438, "step": 8553 }, { "epoch": 1.339073262366938, "grad_norm": 0.8827303051948547, "learning_rate": 0.0001034457478005865, "loss": 0.4037, "step": 8554 }, { "epoch": 1.3392298058860364, "grad_norm": 0.9790602326393127, "learning_rate": 0.00010342130987292277, "loss": 0.4922, "step": 8555 }, { "epoch": 1.3393863494051346, "grad_norm": 1.958410382270813, "learning_rate": 0.00010339687194525903, "loss": 0.4532, "step": 8556 }, { "epoch": 1.339542892924233, "grad_norm": 1.1825295686721802, "learning_rate": 0.00010337243401759531, "loss": 0.3544, "step": 8557 }, { "epoch": 1.3396994364433312, "grad_norm": 0.9426168203353882, "learning_rate": 0.00010334799608993156, "loss": 0.1968, "step": 8558 }, { "epoch": 1.3398559799624294, "grad_norm": 0.9069138765335083, "learning_rate": 0.00010332355816226783, "loss": 0.3628, "step": 8559 }, { "epoch": 1.3400125234815279, "grad_norm": 1.00847589969635, "learning_rate": 0.00010329912023460411, "loss": 0.1958, "step": 8560 }, { "epoch": 1.340169067000626, "grad_norm": 2.047116994857788, "learning_rate": 0.00010327468230694036, "loss": 0.3903, "step": 8561 }, { "epoch": 1.3403256105197245, "grad_norm": 2.307471513748169, "learning_rate": 0.00010325024437927662, "loss": 0.6619, "step": 8562 }, { "epoch": 1.3404821540388228, "grad_norm": 2.9408271312713623, "learning_rate": 0.0001032258064516129, "loss": 0.7681, "step": 8563 }, { "epoch": 1.340638697557921, "grad_norm": 1.2898457050323486, "learning_rate": 0.00010320136852394917, "loss": 0.5818, "step": 8564 }, { "epoch": 1.3407952410770194, "grad_norm": 1.0917787551879883, "learning_rate": 0.00010317693059628542, "loss": 0.2375, "step": 8565 }, { "epoch": 1.3409517845961179, "grad_norm": 5.130285739898682, "learning_rate": 0.0001031524926686217, "loss": 0.9421, "step": 8566 }, { "epoch": 1.341108328115216, "grad_norm": 3.367882013320923, "learning_rate": 0.00010312805474095796, "loss": 0.9799, "step": 8567 }, { "epoch": 1.3412648716343143, "grad_norm": 1.5562324523925781, "learning_rate": 0.00010310361681329423, "loss": 0.4556, "step": 8568 }, { "epoch": 1.3414214151534125, "grad_norm": 2.622732400894165, "learning_rate": 0.00010307917888563048, "loss": 0.7766, "step": 8569 }, { "epoch": 1.341577958672511, "grad_norm": 6.2125630378723145, "learning_rate": 0.00010305474095796676, "loss": 0.9861, "step": 8570 }, { "epoch": 1.3417345021916094, "grad_norm": 2.0387191772460938, "learning_rate": 0.00010303030303030302, "loss": 1.042, "step": 8571 }, { "epoch": 1.3418910457107076, "grad_norm": 2.0245776176452637, "learning_rate": 0.00010300586510263929, "loss": 0.9906, "step": 8572 }, { "epoch": 1.3420475892298058, "grad_norm": 1.5084283351898193, "learning_rate": 0.00010298142717497555, "loss": 0.3743, "step": 8573 }, { "epoch": 1.3422041327489043, "grad_norm": 2.843535900115967, "learning_rate": 0.00010295698924731181, "loss": 0.8406, "step": 8574 }, { "epoch": 1.3423606762680025, "grad_norm": 3.4681601524353027, "learning_rate": 0.00010293255131964808, "loss": 0.788, "step": 8575 }, { "epoch": 1.342517219787101, "grad_norm": 2.2604522705078125, "learning_rate": 0.00010290811339198436, "loss": 0.7895, "step": 8576 }, { "epoch": 1.3426737633061991, "grad_norm": 3.683725357055664, "learning_rate": 0.00010288367546432061, "loss": 0.685, "step": 8577 }, { "epoch": 1.3428303068252974, "grad_norm": 2.207420825958252, "learning_rate": 0.00010285923753665687, "loss": 0.9342, "step": 8578 }, { "epoch": 1.3429868503443958, "grad_norm": 2.529273748397827, "learning_rate": 0.00010283479960899315, "loss": 1.3186, "step": 8579 }, { "epoch": 1.343143393863494, "grad_norm": 2.481981039047241, "learning_rate": 0.00010281036168132942, "loss": 1.0946, "step": 8580 }, { "epoch": 1.3432999373825925, "grad_norm": 1.3697443008422852, "learning_rate": 0.00010278592375366567, "loss": 0.6584, "step": 8581 }, { "epoch": 1.3434564809016907, "grad_norm": 2.0801591873168945, "learning_rate": 0.00010276148582600195, "loss": 0.5824, "step": 8582 }, { "epoch": 1.343613024420789, "grad_norm": 2.740255832672119, "learning_rate": 0.00010273704789833821, "loss": 0.7734, "step": 8583 }, { "epoch": 1.3437695679398873, "grad_norm": 2.066756010055542, "learning_rate": 0.00010271260997067448, "loss": 0.8381, "step": 8584 }, { "epoch": 1.3439261114589856, "grad_norm": 2.6519453525543213, "learning_rate": 0.00010268817204301074, "loss": 0.7212, "step": 8585 }, { "epoch": 1.344082654978084, "grad_norm": 2.464167833328247, "learning_rate": 0.000102663734115347, "loss": 0.7086, "step": 8586 }, { "epoch": 1.3442391984971822, "grad_norm": 2.8775999546051025, "learning_rate": 0.00010263929618768327, "loss": 0.8188, "step": 8587 }, { "epoch": 1.3443957420162804, "grad_norm": 2.8111610412597656, "learning_rate": 0.00010261485826001955, "loss": 0.6777, "step": 8588 }, { "epoch": 1.3445522855353789, "grad_norm": 0.4312553405761719, "learning_rate": 0.0001025904203323558, "loss": 0.1694, "step": 8589 }, { "epoch": 1.344708829054477, "grad_norm": 0.5284144282341003, "learning_rate": 0.00010256598240469207, "loss": 0.1899, "step": 8590 }, { "epoch": 1.3448653725735755, "grad_norm": 0.9777967929840088, "learning_rate": 0.00010254154447702834, "loss": 0.3319, "step": 8591 }, { "epoch": 1.3450219160926737, "grad_norm": 1.211936116218567, "learning_rate": 0.00010251710654936461, "loss": 0.1933, "step": 8592 }, { "epoch": 1.345178459611772, "grad_norm": 0.5879170894622803, "learning_rate": 0.00010249266862170086, "loss": 0.2897, "step": 8593 }, { "epoch": 1.3453350031308704, "grad_norm": 1.0071542263031006, "learning_rate": 0.00010246823069403714, "loss": 0.2997, "step": 8594 }, { "epoch": 1.3454915466499686, "grad_norm": 0.840803325176239, "learning_rate": 0.0001024437927663734, "loss": 0.2348, "step": 8595 }, { "epoch": 1.345648090169067, "grad_norm": 0.6389973163604736, "learning_rate": 0.00010241935483870965, "loss": 0.1762, "step": 8596 }, { "epoch": 1.3458046336881653, "grad_norm": 0.771003007888794, "learning_rate": 0.00010239491691104593, "loss": 0.278, "step": 8597 }, { "epoch": 1.3459611772072635, "grad_norm": 1.348070740699768, "learning_rate": 0.0001023704789833822, "loss": 0.3955, "step": 8598 }, { "epoch": 1.346117720726362, "grad_norm": 0.9808512926101685, "learning_rate": 0.00010234604105571846, "loss": 0.3047, "step": 8599 }, { "epoch": 1.3462742642454604, "grad_norm": 1.0971183776855469, "learning_rate": 0.00010232160312805474, "loss": 0.4669, "step": 8600 }, { "epoch": 1.3464308077645586, "grad_norm": 2.088040351867676, "learning_rate": 0.00010229716520039099, "loss": 0.5127, "step": 8601 }, { "epoch": 1.3465873512836568, "grad_norm": 0.7336698770523071, "learning_rate": 0.00010227272727272726, "loss": 0.3575, "step": 8602 }, { "epoch": 1.3467438948027552, "grad_norm": 0.7726986408233643, "learning_rate": 0.00010224828934506353, "loss": 0.2192, "step": 8603 }, { "epoch": 1.3469004383218535, "grad_norm": 1.5100910663604736, "learning_rate": 0.0001022238514173998, "loss": 0.3309, "step": 8604 }, { "epoch": 1.347056981840952, "grad_norm": 1.1917885541915894, "learning_rate": 0.00010219941348973605, "loss": 0.4132, "step": 8605 }, { "epoch": 1.3472135253600501, "grad_norm": 1.4236356019973755, "learning_rate": 0.00010217497556207233, "loss": 0.455, "step": 8606 }, { "epoch": 1.3473700688791483, "grad_norm": 1.214121699333191, "learning_rate": 0.0001021505376344086, "loss": 0.4558, "step": 8607 }, { "epoch": 1.3475266123982468, "grad_norm": 1.2391715049743652, "learning_rate": 0.00010212609970674485, "loss": 0.3848, "step": 8608 }, { "epoch": 1.347683155917345, "grad_norm": 1.3483924865722656, "learning_rate": 0.00010210166177908112, "loss": 0.5066, "step": 8609 }, { "epoch": 1.3478396994364434, "grad_norm": 1.2172237634658813, "learning_rate": 0.00010207722385141739, "loss": 0.373, "step": 8610 }, { "epoch": 1.3479962429555417, "grad_norm": 0.9454227685928345, "learning_rate": 0.00010205278592375365, "loss": 0.3206, "step": 8611 }, { "epoch": 1.3481527864746399, "grad_norm": 2.5474867820739746, "learning_rate": 0.00010202834799608993, "loss": 0.4893, "step": 8612 }, { "epoch": 1.3483093299937383, "grad_norm": 0.9093128442764282, "learning_rate": 0.00010200391006842618, "loss": 0.404, "step": 8613 }, { "epoch": 1.3484658735128365, "grad_norm": 2.522016763687134, "learning_rate": 0.00010197947214076245, "loss": 0.8356, "step": 8614 }, { "epoch": 1.348622417031935, "grad_norm": 1.7662311792373657, "learning_rate": 0.00010195503421309873, "loss": 0.395, "step": 8615 }, { "epoch": 1.3487789605510332, "grad_norm": 1.433812141418457, "learning_rate": 0.00010193059628543499, "loss": 0.5021, "step": 8616 }, { "epoch": 1.3489355040701314, "grad_norm": 1.4101475477218628, "learning_rate": 0.00010190615835777124, "loss": 0.5901, "step": 8617 }, { "epoch": 1.3490920475892298, "grad_norm": 4.594160556793213, "learning_rate": 0.00010188172043010752, "loss": 0.8687, "step": 8618 }, { "epoch": 1.349248591108328, "grad_norm": 1.1630065441131592, "learning_rate": 0.00010185728250244379, "loss": 0.4597, "step": 8619 }, { "epoch": 1.3494051346274265, "grad_norm": 2.0793771743774414, "learning_rate": 0.00010183284457478004, "loss": 0.6061, "step": 8620 }, { "epoch": 1.3495616781465247, "grad_norm": 2.358942985534668, "learning_rate": 0.00010180840664711631, "loss": 0.8025, "step": 8621 }, { "epoch": 1.349718221665623, "grad_norm": 2.1486661434173584, "learning_rate": 0.00010178396871945258, "loss": 0.6873, "step": 8622 }, { "epoch": 1.3498747651847214, "grad_norm": 4.260776519775391, "learning_rate": 0.00010175953079178884, "loss": 1.0033, "step": 8623 }, { "epoch": 1.3500313087038196, "grad_norm": 1.1536997556686401, "learning_rate": 0.00010173509286412512, "loss": 0.3625, "step": 8624 }, { "epoch": 1.350187852222918, "grad_norm": 2.115208625793457, "learning_rate": 0.00010171065493646137, "loss": 0.7172, "step": 8625 }, { "epoch": 1.3503443957420163, "grad_norm": 2.6683943271636963, "learning_rate": 0.00010168621700879764, "loss": 1.299, "step": 8626 }, { "epoch": 1.3505009392611145, "grad_norm": 1.838832139968872, "learning_rate": 0.00010166177908113392, "loss": 1.0832, "step": 8627 }, { "epoch": 1.350657482780213, "grad_norm": 3.921957492828369, "learning_rate": 0.00010163734115347018, "loss": 1.1096, "step": 8628 }, { "epoch": 1.3508140262993111, "grad_norm": 2.774120569229126, "learning_rate": 0.00010161290322580643, "loss": 1.1012, "step": 8629 }, { "epoch": 1.3509705698184096, "grad_norm": 2.269116163253784, "learning_rate": 0.00010158846529814271, "loss": 0.8941, "step": 8630 }, { "epoch": 1.3511271133375078, "grad_norm": 2.243638038635254, "learning_rate": 0.00010156402737047898, "loss": 1.0635, "step": 8631 }, { "epoch": 1.351283656856606, "grad_norm": 1.6823785305023193, "learning_rate": 0.00010153958944281523, "loss": 0.4038, "step": 8632 }, { "epoch": 1.3514402003757044, "grad_norm": 3.375223159790039, "learning_rate": 0.0001015151515151515, "loss": 0.9487, "step": 8633 }, { "epoch": 1.3515967438948029, "grad_norm": 2.887037754058838, "learning_rate": 0.00010149071358748777, "loss": 1.2298, "step": 8634 }, { "epoch": 1.351753287413901, "grad_norm": 2.0357108116149902, "learning_rate": 0.00010146627565982404, "loss": 0.7477, "step": 8635 }, { "epoch": 1.3519098309329993, "grad_norm": 2.5809435844421387, "learning_rate": 0.00010144183773216031, "loss": 1.3611, "step": 8636 }, { "epoch": 1.3520663744520978, "grad_norm": 1.5694135427474976, "learning_rate": 0.00010141739980449657, "loss": 0.5658, "step": 8637 }, { "epoch": 1.352222917971196, "grad_norm": 2.64839243888855, "learning_rate": 0.00010139296187683283, "loss": 0.9227, "step": 8638 }, { "epoch": 1.3523794614902944, "grad_norm": 0.7386291027069092, "learning_rate": 0.00010136852394916911, "loss": 0.2149, "step": 8639 }, { "epoch": 1.3525360050093926, "grad_norm": 0.43771645426750183, "learning_rate": 0.00010134408602150537, "loss": 0.2091, "step": 8640 }, { "epoch": 1.3526925485284909, "grad_norm": 0.7924376726150513, "learning_rate": 0.00010131964809384162, "loss": 0.3522, "step": 8641 }, { "epoch": 1.3528490920475893, "grad_norm": 0.5517721176147461, "learning_rate": 0.0001012952101661779, "loss": 0.264, "step": 8642 }, { "epoch": 1.3530056355666875, "grad_norm": 0.5062223672866821, "learning_rate": 0.00010127077223851417, "loss": 0.1477, "step": 8643 }, { "epoch": 1.353162179085786, "grad_norm": 0.5685814619064331, "learning_rate": 0.00010124633431085042, "loss": 0.3525, "step": 8644 }, { "epoch": 1.3533187226048842, "grad_norm": 0.5402782559394836, "learning_rate": 0.0001012218963831867, "loss": 0.2294, "step": 8645 }, { "epoch": 1.3534752661239824, "grad_norm": 0.5032253861427307, "learning_rate": 0.00010119745845552296, "loss": 0.2081, "step": 8646 }, { "epoch": 1.3536318096430808, "grad_norm": 1.4722563028335571, "learning_rate": 0.00010117302052785923, "loss": 0.3869, "step": 8647 }, { "epoch": 1.353788353162179, "grad_norm": 0.9200348854064941, "learning_rate": 0.0001011485826001955, "loss": 0.3021, "step": 8648 }, { "epoch": 1.3539448966812775, "grad_norm": 0.6962469816207886, "learning_rate": 0.00010112414467253176, "loss": 0.1811, "step": 8649 }, { "epoch": 1.3541014402003757, "grad_norm": 0.8069591522216797, "learning_rate": 0.00010109970674486802, "loss": 0.3023, "step": 8650 }, { "epoch": 1.354257983719474, "grad_norm": 0.669252336025238, "learning_rate": 0.0001010752688172043, "loss": 0.1662, "step": 8651 }, { "epoch": 1.3544145272385724, "grad_norm": 0.6303135752677917, "learning_rate": 0.00010105083088954055, "loss": 0.3415, "step": 8652 }, { "epoch": 1.3545710707576706, "grad_norm": 0.9292386770248413, "learning_rate": 0.00010102639296187682, "loss": 0.284, "step": 8653 }, { "epoch": 1.354727614276769, "grad_norm": 1.1860474348068237, "learning_rate": 0.0001010019550342131, "loss": 0.2997, "step": 8654 }, { "epoch": 1.3548841577958672, "grad_norm": 1.6328331232070923, "learning_rate": 0.00010097751710654936, "loss": 0.5064, "step": 8655 }, { "epoch": 1.3550407013149655, "grad_norm": 1.2533385753631592, "learning_rate": 0.00010095307917888561, "loss": 0.4723, "step": 8656 }, { "epoch": 1.355197244834064, "grad_norm": 1.2162647247314453, "learning_rate": 0.00010092864125122189, "loss": 0.3477, "step": 8657 }, { "epoch": 1.3553537883531621, "grad_norm": 1.2495274543762207, "learning_rate": 0.00010090420332355815, "loss": 0.2559, "step": 8658 }, { "epoch": 1.3555103318722606, "grad_norm": 1.11675226688385, "learning_rate": 0.00010087976539589442, "loss": 0.3749, "step": 8659 }, { "epoch": 1.3556668753913588, "grad_norm": 1.302672028541565, "learning_rate": 0.0001008553274682307, "loss": 0.2769, "step": 8660 }, { "epoch": 1.355823418910457, "grad_norm": 1.504712462425232, "learning_rate": 0.00010083088954056695, "loss": 0.7427, "step": 8661 }, { "epoch": 1.3559799624295554, "grad_norm": 1.3615977764129639, "learning_rate": 0.00010080645161290321, "loss": 0.824, "step": 8662 }, { "epoch": 1.3561365059486536, "grad_norm": 1.5800176858901978, "learning_rate": 0.00010078201368523949, "loss": 0.4904, "step": 8663 }, { "epoch": 1.356293049467752, "grad_norm": 1.6747968196868896, "learning_rate": 0.00010075757575757574, "loss": 0.4018, "step": 8664 }, { "epoch": 1.3564495929868503, "grad_norm": 1.6093438863754272, "learning_rate": 0.000100733137829912, "loss": 0.4234, "step": 8665 }, { "epoch": 1.3566061365059485, "grad_norm": 1.9799742698669434, "learning_rate": 0.00010070869990224828, "loss": 0.6212, "step": 8666 }, { "epoch": 1.356762680025047, "grad_norm": 1.8439103364944458, "learning_rate": 0.00010068426197458455, "loss": 0.5132, "step": 8667 }, { "epoch": 1.3569192235441454, "grad_norm": 2.6742753982543945, "learning_rate": 0.0001006598240469208, "loss": 0.6544, "step": 8668 }, { "epoch": 1.3570757670632436, "grad_norm": 2.446819543838501, "learning_rate": 0.00010063538611925708, "loss": 0.9793, "step": 8669 }, { "epoch": 1.3572323105823418, "grad_norm": 1.334675669670105, "learning_rate": 0.00010061094819159334, "loss": 0.5525, "step": 8670 }, { "epoch": 1.3573888541014403, "grad_norm": 1.5871644020080566, "learning_rate": 0.00010058651026392961, "loss": 0.4494, "step": 8671 }, { "epoch": 1.3575453976205385, "grad_norm": 1.5029113292694092, "learning_rate": 0.00010056207233626589, "loss": 0.8743, "step": 8672 }, { "epoch": 1.357701941139637, "grad_norm": 2.237409830093384, "learning_rate": 0.00010053763440860214, "loss": 0.806, "step": 8673 }, { "epoch": 1.3578584846587352, "grad_norm": 2.4241881370544434, "learning_rate": 0.0001005131964809384, "loss": 0.8528, "step": 8674 }, { "epoch": 1.3580150281778334, "grad_norm": 2.1619865894317627, "learning_rate": 0.00010048875855327468, "loss": 0.5835, "step": 8675 }, { "epoch": 1.3581715716969318, "grad_norm": 2.61045503616333, "learning_rate": 0.00010046432062561093, "loss": 1.6964, "step": 8676 }, { "epoch": 1.35832811521603, "grad_norm": 1.533729910850525, "learning_rate": 0.0001004398826979472, "loss": 0.8914, "step": 8677 }, { "epoch": 1.3584846587351285, "grad_norm": 2.500321388244629, "learning_rate": 0.00010041544477028348, "loss": 0.9521, "step": 8678 }, { "epoch": 1.3586412022542267, "grad_norm": 5.0488080978393555, "learning_rate": 0.00010039100684261974, "loss": 1.4087, "step": 8679 }, { "epoch": 1.358797745773325, "grad_norm": 2.802337408065796, "learning_rate": 0.00010036656891495599, "loss": 0.6997, "step": 8680 }, { "epoch": 1.3589542892924233, "grad_norm": 1.9060883522033691, "learning_rate": 0.00010034213098729227, "loss": 0.6674, "step": 8681 }, { "epoch": 1.3591108328115216, "grad_norm": 6.120393753051758, "learning_rate": 0.00010031769305962854, "loss": 1.335, "step": 8682 }, { "epoch": 1.35926737633062, "grad_norm": 1.4589465856552124, "learning_rate": 0.0001002932551319648, "loss": 0.6943, "step": 8683 }, { "epoch": 1.3594239198497182, "grad_norm": 3.765908718109131, "learning_rate": 0.00010026881720430108, "loss": 0.6131, "step": 8684 }, { "epoch": 1.3595804633688164, "grad_norm": 2.0184710025787354, "learning_rate": 0.00010024437927663733, "loss": 0.577, "step": 8685 }, { "epoch": 1.3597370068879149, "grad_norm": 1.7017927169799805, "learning_rate": 0.0001002199413489736, "loss": 0.6143, "step": 8686 }, { "epoch": 1.359893550407013, "grad_norm": 3.1961417198181152, "learning_rate": 0.00010019550342130987, "loss": 0.6772, "step": 8687 }, { "epoch": 1.3600500939261115, "grad_norm": 4.469157695770264, "learning_rate": 0.00010017106549364612, "loss": 1.4173, "step": 8688 }, { "epoch": 1.3602066374452098, "grad_norm": 0.6359954476356506, "learning_rate": 0.00010014662756598239, "loss": 0.316, "step": 8689 }, { "epoch": 1.360363180964308, "grad_norm": 1.053896427154541, "learning_rate": 0.00010012218963831867, "loss": 0.1854, "step": 8690 }, { "epoch": 1.3605197244834064, "grad_norm": 0.57310950756073, "learning_rate": 0.00010009775171065493, "loss": 0.2186, "step": 8691 }, { "epoch": 1.3606762680025046, "grad_norm": 0.5779302716255188, "learning_rate": 0.00010007331378299118, "loss": 0.2643, "step": 8692 }, { "epoch": 1.360832811521603, "grad_norm": 0.4520057439804077, "learning_rate": 0.00010004887585532746, "loss": 0.2138, "step": 8693 }, { "epoch": 1.3609893550407013, "grad_norm": 1.1857097148895264, "learning_rate": 0.00010002443792766373, "loss": 0.2805, "step": 8694 }, { "epoch": 1.3611458985597995, "grad_norm": 0.5992613434791565, "learning_rate": 9.999999999999999e-05, "loss": 0.3539, "step": 8695 }, { "epoch": 1.361302442078898, "grad_norm": 0.8540019392967224, "learning_rate": 9.997556207233626e-05, "loss": 0.2893, "step": 8696 }, { "epoch": 1.3614589855979962, "grad_norm": 0.5429804921150208, "learning_rate": 9.995112414467252e-05, "loss": 0.2457, "step": 8697 }, { "epoch": 1.3616155291170946, "grad_norm": 1.0506290197372437, "learning_rate": 9.992668621700879e-05, "loss": 0.2567, "step": 8698 }, { "epoch": 1.3617720726361928, "grad_norm": 0.7274707555770874, "learning_rate": 9.990224828934506e-05, "loss": 0.1966, "step": 8699 }, { "epoch": 1.361928616155291, "grad_norm": 1.6726579666137695, "learning_rate": 9.987781036168132e-05, "loss": 0.3843, "step": 8700 }, { "epoch": 1.3620851596743895, "grad_norm": 1.3097379207611084, "learning_rate": 9.985337243401758e-05, "loss": 0.304, "step": 8701 }, { "epoch": 1.362241703193488, "grad_norm": 1.2650363445281982, "learning_rate": 9.982893450635386e-05, "loss": 0.453, "step": 8702 }, { "epoch": 1.3623982467125861, "grad_norm": 0.9016621112823486, "learning_rate": 9.980449657869012e-05, "loss": 0.3309, "step": 8703 }, { "epoch": 1.3625547902316844, "grad_norm": 1.0004808902740479, "learning_rate": 9.978005865102637e-05, "loss": 0.3763, "step": 8704 }, { "epoch": 1.3627113337507828, "grad_norm": 1.2892861366271973, "learning_rate": 9.975562072336265e-05, "loss": 0.4991, "step": 8705 }, { "epoch": 1.362867877269881, "grad_norm": 1.6277682781219482, "learning_rate": 9.973118279569892e-05, "loss": 0.3803, "step": 8706 }, { "epoch": 1.3630244207889795, "grad_norm": 1.4325026273727417, "learning_rate": 9.970674486803518e-05, "loss": 0.4247, "step": 8707 }, { "epoch": 1.3631809643080777, "grad_norm": 1.4070262908935547, "learning_rate": 9.968230694037145e-05, "loss": 0.4286, "step": 8708 }, { "epoch": 1.3633375078271759, "grad_norm": 2.1237456798553467, "learning_rate": 9.965786901270771e-05, "loss": 0.65, "step": 8709 }, { "epoch": 1.3634940513462743, "grad_norm": 2.7738819122314453, "learning_rate": 9.963343108504398e-05, "loss": 0.858, "step": 8710 }, { "epoch": 1.3636505948653725, "grad_norm": 1.125198483467102, "learning_rate": 9.960899315738026e-05, "loss": 0.3916, "step": 8711 }, { "epoch": 1.363807138384471, "grad_norm": 1.3788397312164307, "learning_rate": 9.95845552297165e-05, "loss": 0.3602, "step": 8712 }, { "epoch": 1.3639636819035692, "grad_norm": 1.1802959442138672, "learning_rate": 9.956011730205277e-05, "loss": 0.3027, "step": 8713 }, { "epoch": 1.3641202254226674, "grad_norm": 1.0281505584716797, "learning_rate": 9.953567937438905e-05, "loss": 0.3809, "step": 8714 }, { "epoch": 1.3642767689417659, "grad_norm": 2.2165725231170654, "learning_rate": 9.951124144672531e-05, "loss": 0.8278, "step": 8715 }, { "epoch": 1.364433312460864, "grad_norm": 2.219921350479126, "learning_rate": 9.948680351906157e-05, "loss": 0.944, "step": 8716 }, { "epoch": 1.3645898559799625, "grad_norm": 2.5214898586273193, "learning_rate": 9.946236559139784e-05, "loss": 0.9238, "step": 8717 }, { "epoch": 1.3647463994990607, "grad_norm": 2.641808032989502, "learning_rate": 9.943792766373411e-05, "loss": 0.6352, "step": 8718 }, { "epoch": 1.364902943018159, "grad_norm": 1.4690858125686646, "learning_rate": 9.941348973607037e-05, "loss": 0.7535, "step": 8719 }, { "epoch": 1.3650594865372574, "grad_norm": 1.5977320671081543, "learning_rate": 9.938905180840664e-05, "loss": 0.6958, "step": 8720 }, { "epoch": 1.3652160300563556, "grad_norm": 1.7913562059402466, "learning_rate": 9.93646138807429e-05, "loss": 0.846, "step": 8721 }, { "epoch": 1.365372573575454, "grad_norm": 2.0344135761260986, "learning_rate": 9.934017595307917e-05, "loss": 0.7296, "step": 8722 }, { "epoch": 1.3655291170945523, "grad_norm": 3.8352715969085693, "learning_rate": 9.931573802541545e-05, "loss": 0.7046, "step": 8723 }, { "epoch": 1.3656856606136505, "grad_norm": 1.1829397678375244, "learning_rate": 9.92913000977517e-05, "loss": 0.5948, "step": 8724 }, { "epoch": 1.365842204132749, "grad_norm": 3.249732732772827, "learning_rate": 9.926686217008796e-05, "loss": 1.1317, "step": 8725 }, { "epoch": 1.3659987476518471, "grad_norm": 2.0440311431884766, "learning_rate": 9.924242424242424e-05, "loss": 0.8155, "step": 8726 }, { "epoch": 1.3661552911709456, "grad_norm": 2.4997398853302, "learning_rate": 9.92179863147605e-05, "loss": 0.8181, "step": 8727 }, { "epoch": 1.3663118346900438, "grad_norm": 4.196403503417969, "learning_rate": 9.919354838709676e-05, "loss": 0.573, "step": 8728 }, { "epoch": 1.366468378209142, "grad_norm": 1.9175196886062622, "learning_rate": 9.916911045943304e-05, "loss": 0.5787, "step": 8729 }, { "epoch": 1.3666249217282405, "grad_norm": 1.8270318508148193, "learning_rate": 9.91446725317693e-05, "loss": 0.7122, "step": 8730 }, { "epoch": 1.3667814652473387, "grad_norm": 2.314758539199829, "learning_rate": 9.912023460410556e-05, "loss": 0.9239, "step": 8731 }, { "epoch": 1.3669380087664371, "grad_norm": 1.4689182043075562, "learning_rate": 9.909579667644183e-05, "loss": 0.8547, "step": 8732 }, { "epoch": 1.3670945522855353, "grad_norm": 2.735502004623413, "learning_rate": 9.90713587487781e-05, "loss": 0.6393, "step": 8733 }, { "epoch": 1.3672510958046336, "grad_norm": 4.347554683685303, "learning_rate": 9.904692082111436e-05, "loss": 1.0392, "step": 8734 }, { "epoch": 1.367407639323732, "grad_norm": 1.6307176351547241, "learning_rate": 9.902248289345064e-05, "loss": 0.525, "step": 8735 }, { "epoch": 1.3675641828428304, "grad_norm": 2.497677803039551, "learning_rate": 9.899804496578689e-05, "loss": 0.6347, "step": 8736 }, { "epoch": 1.3677207263619287, "grad_norm": 2.2993502616882324, "learning_rate": 9.897360703812315e-05, "loss": 0.6741, "step": 8737 }, { "epoch": 1.3678772698810269, "grad_norm": 1.1915621757507324, "learning_rate": 9.894916911045943e-05, "loss": 0.4096, "step": 8738 }, { "epoch": 1.3680338134001253, "grad_norm": 0.5528977513313293, "learning_rate": 9.89247311827957e-05, "loss": 0.2614, "step": 8739 }, { "epoch": 1.3681903569192235, "grad_norm": 0.7073773145675659, "learning_rate": 9.890029325513195e-05, "loss": 0.2172, "step": 8740 }, { "epoch": 1.368346900438322, "grad_norm": 0.5945557355880737, "learning_rate": 9.887585532746823e-05, "loss": 0.2084, "step": 8741 }, { "epoch": 1.3685034439574202, "grad_norm": 0.41675809025764465, "learning_rate": 9.885141739980449e-05, "loss": 0.1236, "step": 8742 }, { "epoch": 1.3686599874765184, "grad_norm": 0.6224748492240906, "learning_rate": 9.882697947214076e-05, "loss": 0.2195, "step": 8743 }, { "epoch": 1.3688165309956168, "grad_norm": 1.0293275117874146, "learning_rate": 9.880254154447702e-05, "loss": 0.5121, "step": 8744 }, { "epoch": 1.368973074514715, "grad_norm": 0.8710348606109619, "learning_rate": 9.877810361681329e-05, "loss": 0.2933, "step": 8745 }, { "epoch": 1.3691296180338135, "grad_norm": 0.6916895508766174, "learning_rate": 9.875366568914955e-05, "loss": 0.1909, "step": 8746 }, { "epoch": 1.3692861615529117, "grad_norm": 0.8933185338973999, "learning_rate": 9.872922776148583e-05, "loss": 0.2403, "step": 8747 }, { "epoch": 1.36944270507201, "grad_norm": 1.0398600101470947, "learning_rate": 9.870478983382208e-05, "loss": 0.3148, "step": 8748 }, { "epoch": 1.3695992485911084, "grad_norm": 1.0829379558563232, "learning_rate": 9.868035190615834e-05, "loss": 0.3194, "step": 8749 }, { "epoch": 1.3697557921102066, "grad_norm": 0.7273138761520386, "learning_rate": 9.865591397849462e-05, "loss": 0.2394, "step": 8750 }, { "epoch": 1.369912335629305, "grad_norm": 2.8995516300201416, "learning_rate": 9.863147605083089e-05, "loss": 0.7665, "step": 8751 }, { "epoch": 1.3700688791484033, "grad_norm": 1.2653355598449707, "learning_rate": 9.860703812316714e-05, "loss": 0.3913, "step": 8752 }, { "epoch": 1.3702254226675015, "grad_norm": 4.484793663024902, "learning_rate": 9.858260019550342e-05, "loss": 0.4381, "step": 8753 }, { "epoch": 1.3703819661866, "grad_norm": 0.5917729735374451, "learning_rate": 9.855816226783968e-05, "loss": 0.1318, "step": 8754 }, { "epoch": 1.3705385097056981, "grad_norm": 1.4675835371017456, "learning_rate": 9.853372434017593e-05, "loss": 0.3232, "step": 8755 }, { "epoch": 1.3706950532247966, "grad_norm": 1.0259408950805664, "learning_rate": 9.850928641251221e-05, "loss": 0.3359, "step": 8756 }, { "epoch": 1.3708515967438948, "grad_norm": 1.2516374588012695, "learning_rate": 9.848484848484848e-05, "loss": 0.5625, "step": 8757 }, { "epoch": 1.371008140262993, "grad_norm": 3.212458372116089, "learning_rate": 9.846041055718474e-05, "loss": 0.378, "step": 8758 }, { "epoch": 1.3711646837820914, "grad_norm": 2.0374770164489746, "learning_rate": 9.843597262952102e-05, "loss": 0.4768, "step": 8759 }, { "epoch": 1.3713212273011897, "grad_norm": 2.4900288581848145, "learning_rate": 9.841153470185727e-05, "loss": 0.4226, "step": 8760 }, { "epoch": 1.371477770820288, "grad_norm": 1.8129522800445557, "learning_rate": 9.838709677419354e-05, "loss": 0.5211, "step": 8761 }, { "epoch": 1.3716343143393863, "grad_norm": 1.4478105306625366, "learning_rate": 9.836265884652981e-05, "loss": 0.7839, "step": 8762 }, { "epoch": 1.3717908578584845, "grad_norm": 1.799777626991272, "learning_rate": 9.833822091886608e-05, "loss": 0.4166, "step": 8763 }, { "epoch": 1.371947401377583, "grad_norm": 3.246655225753784, "learning_rate": 9.831378299120233e-05, "loss": 0.8867, "step": 8764 }, { "epoch": 1.3721039448966814, "grad_norm": 1.721030354499817, "learning_rate": 9.828934506353861e-05, "loss": 0.5577, "step": 8765 }, { "epoch": 1.3722604884157796, "grad_norm": 1.6854079961776733, "learning_rate": 9.826490713587487e-05, "loss": 0.573, "step": 8766 }, { "epoch": 1.3724170319348779, "grad_norm": 2.2403340339660645, "learning_rate": 9.824046920821112e-05, "loss": 0.7653, "step": 8767 }, { "epoch": 1.372573575453976, "grad_norm": 1.6862674951553345, "learning_rate": 9.82160312805474e-05, "loss": 0.5029, "step": 8768 }, { "epoch": 1.3727301189730745, "grad_norm": 2.492246627807617, "learning_rate": 9.819159335288367e-05, "loss": 0.772, "step": 8769 }, { "epoch": 1.372886662492173, "grad_norm": 3.606316328048706, "learning_rate": 9.816715542521993e-05, "loss": 1.0772, "step": 8770 }, { "epoch": 1.3730432060112712, "grad_norm": 1.919001579284668, "learning_rate": 9.814271749755621e-05, "loss": 0.3547, "step": 8771 }, { "epoch": 1.3731997495303694, "grad_norm": 1.8840322494506836, "learning_rate": 9.811827956989246e-05, "loss": 0.7453, "step": 8772 }, { "epoch": 1.3733562930494678, "grad_norm": 1.7198199033737183, "learning_rate": 9.809384164222873e-05, "loss": 0.516, "step": 8773 }, { "epoch": 1.373512836568566, "grad_norm": 3.6644818782806396, "learning_rate": 9.8069403714565e-05, "loss": 1.4235, "step": 8774 }, { "epoch": 1.3736693800876645, "grad_norm": 2.1743552684783936, "learning_rate": 9.804496578690127e-05, "loss": 1.0742, "step": 8775 }, { "epoch": 1.3738259236067627, "grad_norm": 4.740699291229248, "learning_rate": 9.802052785923752e-05, "loss": 1.6689, "step": 8776 }, { "epoch": 1.373982467125861, "grad_norm": 3.387509822845459, "learning_rate": 9.79960899315738e-05, "loss": 1.5587, "step": 8777 }, { "epoch": 1.3741390106449594, "grad_norm": 1.966965675354004, "learning_rate": 9.797165200391006e-05, "loss": 0.8358, "step": 8778 }, { "epoch": 1.3742955541640576, "grad_norm": 2.8340470790863037, "learning_rate": 9.794721407624632e-05, "loss": 1.0338, "step": 8779 }, { "epoch": 1.374452097683156, "grad_norm": 3.0556154251098633, "learning_rate": 9.79227761485826e-05, "loss": 1.4119, "step": 8780 }, { "epoch": 1.3746086412022542, "grad_norm": 3.300295829772949, "learning_rate": 9.789833822091886e-05, "loss": 1.1295, "step": 8781 }, { "epoch": 1.3747651847213525, "grad_norm": 2.788999557495117, "learning_rate": 9.787390029325512e-05, "loss": 0.8346, "step": 8782 }, { "epoch": 1.374921728240451, "grad_norm": 1.3150262832641602, "learning_rate": 9.78494623655914e-05, "loss": 0.6381, "step": 8783 }, { "epoch": 1.375078271759549, "grad_norm": 3.5385401248931885, "learning_rate": 9.782502443792765e-05, "loss": 0.5895, "step": 8784 }, { "epoch": 1.3752348152786475, "grad_norm": 1.0753779411315918, "learning_rate": 9.780058651026392e-05, "loss": 0.2589, "step": 8785 }, { "epoch": 1.3753913587977458, "grad_norm": 2.1634578704833984, "learning_rate": 9.77761485826002e-05, "loss": 0.4002, "step": 8786 }, { "epoch": 1.375547902316844, "grad_norm": 2.8193857669830322, "learning_rate": 9.775171065493646e-05, "loss": 1.104, "step": 8787 }, { "epoch": 1.3757044458359424, "grad_norm": 2.0298500061035156, "learning_rate": 9.772727272727271e-05, "loss": 0.8568, "step": 8788 }, { "epoch": 1.3758609893550406, "grad_norm": 0.7730556130409241, "learning_rate": 9.770283479960899e-05, "loss": 0.3035, "step": 8789 }, { "epoch": 1.376017532874139, "grad_norm": 0.873766303062439, "learning_rate": 9.767839687194526e-05, "loss": 0.2784, "step": 8790 }, { "epoch": 1.3761740763932373, "grad_norm": 0.5062800049781799, "learning_rate": 9.765395894428151e-05, "loss": 0.242, "step": 8791 }, { "epoch": 1.3763306199123355, "grad_norm": 0.6650285124778748, "learning_rate": 9.762952101661779e-05, "loss": 0.2121, "step": 8792 }, { "epoch": 1.376487163431434, "grad_norm": 0.42068031430244446, "learning_rate": 9.760508308895405e-05, "loss": 0.2907, "step": 8793 }, { "epoch": 1.3766437069505322, "grad_norm": 0.5812735557556152, "learning_rate": 9.758064516129031e-05, "loss": 0.1929, "step": 8794 }, { "epoch": 1.3768002504696306, "grad_norm": 0.9358962178230286, "learning_rate": 9.75562072336266e-05, "loss": 0.1967, "step": 8795 }, { "epoch": 1.3769567939887288, "grad_norm": 0.5094256401062012, "learning_rate": 9.753176930596284e-05, "loss": 0.1991, "step": 8796 }, { "epoch": 1.377113337507827, "grad_norm": 4.9580302238464355, "learning_rate": 9.750733137829911e-05, "loss": 0.9328, "step": 8797 }, { "epoch": 1.3772698810269255, "grad_norm": 0.7062557935714722, "learning_rate": 9.748289345063539e-05, "loss": 0.2952, "step": 8798 }, { "epoch": 1.377426424546024, "grad_norm": 1.7369906902313232, "learning_rate": 9.745845552297165e-05, "loss": 0.3241, "step": 8799 }, { "epoch": 1.3775829680651221, "grad_norm": 0.8181784152984619, "learning_rate": 9.74340175953079e-05, "loss": 0.3769, "step": 8800 }, { "epoch": 1.3777395115842204, "grad_norm": 0.6165406107902527, "learning_rate": 9.740957966764418e-05, "loss": 0.2526, "step": 8801 }, { "epoch": 1.3778960551033186, "grad_norm": 0.9449729919433594, "learning_rate": 9.738514173998045e-05, "loss": 0.4735, "step": 8802 }, { "epoch": 1.378052598622417, "grad_norm": 1.4198623895645142, "learning_rate": 9.73607038123167e-05, "loss": 0.3983, "step": 8803 }, { "epoch": 1.3782091421415155, "grad_norm": 1.2503142356872559, "learning_rate": 9.733626588465298e-05, "loss": 0.3594, "step": 8804 }, { "epoch": 1.3783656856606137, "grad_norm": 1.537210464477539, "learning_rate": 9.731182795698924e-05, "loss": 0.3658, "step": 8805 }, { "epoch": 1.378522229179712, "grad_norm": 0.8660850524902344, "learning_rate": 9.72873900293255e-05, "loss": 0.2598, "step": 8806 }, { "epoch": 1.3786787726988103, "grad_norm": 0.7644106149673462, "learning_rate": 9.726295210166178e-05, "loss": 0.3427, "step": 8807 }, { "epoch": 1.3788353162179086, "grad_norm": 2.4255013465881348, "learning_rate": 9.723851417399804e-05, "loss": 0.5187, "step": 8808 }, { "epoch": 1.378991859737007, "grad_norm": 0.9475809931755066, "learning_rate": 9.72140762463343e-05, "loss": 0.172, "step": 8809 }, { "epoch": 1.3791484032561052, "grad_norm": 1.3600800037384033, "learning_rate": 9.718963831867058e-05, "loss": 0.6254, "step": 8810 }, { "epoch": 1.3793049467752034, "grad_norm": 1.693412184715271, "learning_rate": 9.716520039100683e-05, "loss": 0.3988, "step": 8811 }, { "epoch": 1.3794614902943019, "grad_norm": 1.2597637176513672, "learning_rate": 9.71407624633431e-05, "loss": 0.3481, "step": 8812 }, { "epoch": 1.3796180338134, "grad_norm": 1.089922308921814, "learning_rate": 9.711632453567937e-05, "loss": 0.3203, "step": 8813 }, { "epoch": 1.3797745773324985, "grad_norm": 1.767945647239685, "learning_rate": 9.709188660801564e-05, "loss": 0.5356, "step": 8814 }, { "epoch": 1.3799311208515967, "grad_norm": 1.7489302158355713, "learning_rate": 9.706744868035189e-05, "loss": 0.8449, "step": 8815 }, { "epoch": 1.380087664370695, "grad_norm": 2.965064525604248, "learning_rate": 9.704301075268817e-05, "loss": 0.4853, "step": 8816 }, { "epoch": 1.3802442078897934, "grad_norm": 3.0038321018218994, "learning_rate": 9.701857282502443e-05, "loss": 0.993, "step": 8817 }, { "epoch": 1.3804007514088916, "grad_norm": 3.7745330333709717, "learning_rate": 9.69941348973607e-05, "loss": 0.7505, "step": 8818 }, { "epoch": 1.38055729492799, "grad_norm": 1.8408567905426025, "learning_rate": 9.696969696969698e-05, "loss": 0.5765, "step": 8819 }, { "epoch": 1.3807138384470883, "grad_norm": 1.4488403797149658, "learning_rate": 9.694525904203323e-05, "loss": 0.7841, "step": 8820 }, { "epoch": 1.3808703819661865, "grad_norm": 2.437166690826416, "learning_rate": 9.692082111436949e-05, "loss": 0.9474, "step": 8821 }, { "epoch": 1.381026925485285, "grad_norm": 1.6625169515609741, "learning_rate": 9.689638318670577e-05, "loss": 0.6246, "step": 8822 }, { "epoch": 1.3811834690043832, "grad_norm": 2.3555285930633545, "learning_rate": 9.687194525904202e-05, "loss": 1.0428, "step": 8823 }, { "epoch": 1.3813400125234816, "grad_norm": 2.821054220199585, "learning_rate": 9.684750733137829e-05, "loss": 0.5075, "step": 8824 }, { "epoch": 1.3814965560425798, "grad_norm": 1.759642243385315, "learning_rate": 9.682306940371455e-05, "loss": 0.6551, "step": 8825 }, { "epoch": 1.381653099561678, "grad_norm": 1.687735676765442, "learning_rate": 9.679863147605083e-05, "loss": 1.2204, "step": 8826 }, { "epoch": 1.3818096430807765, "grad_norm": 1.8050616979599, "learning_rate": 9.677419354838708e-05, "loss": 1.0335, "step": 8827 }, { "epoch": 1.3819661865998747, "grad_norm": 7.936594009399414, "learning_rate": 9.674975562072335e-05, "loss": 0.9699, "step": 8828 }, { "epoch": 1.3821227301189731, "grad_norm": 2.0747787952423096, "learning_rate": 9.672531769305962e-05, "loss": 0.9377, "step": 8829 }, { "epoch": 1.3822792736380713, "grad_norm": 2.84826922416687, "learning_rate": 9.670087976539589e-05, "loss": 1.1424, "step": 8830 }, { "epoch": 1.3824358171571696, "grad_norm": 1.3028104305267334, "learning_rate": 9.667644183773214e-05, "loss": 0.5916, "step": 8831 }, { "epoch": 1.382592360676268, "grad_norm": 2.0030343532562256, "learning_rate": 9.665200391006842e-05, "loss": 1.2034, "step": 8832 }, { "epoch": 1.3827489041953664, "grad_norm": 3.1390602588653564, "learning_rate": 9.662756598240468e-05, "loss": 0.9957, "step": 8833 }, { "epoch": 1.3829054477144647, "grad_norm": 1.9023643732070923, "learning_rate": 9.660312805474095e-05, "loss": 0.4849, "step": 8834 }, { "epoch": 1.3830619912335629, "grad_norm": 5.415816307067871, "learning_rate": 9.657869012707721e-05, "loss": 1.2315, "step": 8835 }, { "epoch": 1.3832185347526613, "grad_norm": 2.7929747104644775, "learning_rate": 9.655425219941348e-05, "loss": 1.195, "step": 8836 }, { "epoch": 1.3833750782717595, "grad_norm": 2.5922513008117676, "learning_rate": 9.652981427174974e-05, "loss": 0.97, "step": 8837 }, { "epoch": 1.383531621790858, "grad_norm": 2.4513044357299805, "learning_rate": 9.650537634408602e-05, "loss": 0.9625, "step": 8838 }, { "epoch": 1.3836881653099562, "grad_norm": 0.5424944758415222, "learning_rate": 9.648093841642227e-05, "loss": 0.2183, "step": 8839 }, { "epoch": 1.3838447088290544, "grad_norm": 1.09577214717865, "learning_rate": 9.645650048875854e-05, "loss": 0.2732, "step": 8840 }, { "epoch": 1.3840012523481529, "grad_norm": 0.6740018129348755, "learning_rate": 9.643206256109481e-05, "loss": 0.1935, "step": 8841 }, { "epoch": 1.384157795867251, "grad_norm": 0.7105699181556702, "learning_rate": 9.640762463343108e-05, "loss": 0.2475, "step": 8842 }, { "epoch": 1.3843143393863495, "grad_norm": 0.4238963723182678, "learning_rate": 9.638318670576733e-05, "loss": 0.273, "step": 8843 }, { "epoch": 1.3844708829054477, "grad_norm": 0.680479884147644, "learning_rate": 9.635874877810361e-05, "loss": 0.2303, "step": 8844 }, { "epoch": 1.384627426424546, "grad_norm": 0.9009299874305725, "learning_rate": 9.633431085043987e-05, "loss": 0.3086, "step": 8845 }, { "epoch": 1.3847839699436444, "grad_norm": 0.7667379379272461, "learning_rate": 9.630987292277614e-05, "loss": 0.2803, "step": 8846 }, { "epoch": 1.3849405134627426, "grad_norm": 0.854888379573822, "learning_rate": 9.62854349951124e-05, "loss": 0.3391, "step": 8847 }, { "epoch": 1.385097056981841, "grad_norm": 0.6643977761268616, "learning_rate": 9.626099706744867e-05, "loss": 0.2158, "step": 8848 }, { "epoch": 1.3852536005009393, "grad_norm": 0.7794937491416931, "learning_rate": 9.623655913978493e-05, "loss": 0.257, "step": 8849 }, { "epoch": 1.3854101440200375, "grad_norm": 0.7044182419776917, "learning_rate": 9.621212121212121e-05, "loss": 0.2123, "step": 8850 }, { "epoch": 1.385566687539136, "grad_norm": 1.4797452688217163, "learning_rate": 9.618768328445746e-05, "loss": 0.5418, "step": 8851 }, { "epoch": 1.3857232310582341, "grad_norm": 0.6202751398086548, "learning_rate": 9.616324535679373e-05, "loss": 0.2076, "step": 8852 }, { "epoch": 1.3858797745773326, "grad_norm": 0.7685756087303162, "learning_rate": 9.613880742913e-05, "loss": 0.2865, "step": 8853 }, { "epoch": 1.3860363180964308, "grad_norm": 0.6632737517356873, "learning_rate": 9.611436950146627e-05, "loss": 0.3327, "step": 8854 }, { "epoch": 1.386192861615529, "grad_norm": 1.1949931383132935, "learning_rate": 9.608993157380252e-05, "loss": 0.4345, "step": 8855 }, { "epoch": 1.3863494051346275, "grad_norm": 0.9757672548294067, "learning_rate": 9.60654936461388e-05, "loss": 0.477, "step": 8856 }, { "epoch": 1.3865059486537257, "grad_norm": 1.91054105758667, "learning_rate": 9.604105571847507e-05, "loss": 0.5074, "step": 8857 }, { "epoch": 1.3866624921728241, "grad_norm": 1.4399696588516235, "learning_rate": 9.601661779081133e-05, "loss": 0.3122, "step": 8858 }, { "epoch": 1.3868190356919223, "grad_norm": 2.84247088432312, "learning_rate": 9.59921798631476e-05, "loss": 0.7512, "step": 8859 }, { "epoch": 1.3869755792110205, "grad_norm": 1.674396276473999, "learning_rate": 9.596774193548386e-05, "loss": 0.4991, "step": 8860 }, { "epoch": 1.387132122730119, "grad_norm": 2.1071906089782715, "learning_rate": 9.594330400782012e-05, "loss": 0.8064, "step": 8861 }, { "epoch": 1.3872886662492172, "grad_norm": 1.3319578170776367, "learning_rate": 9.59188660801564e-05, "loss": 0.3922, "step": 8862 }, { "epoch": 1.3874452097683156, "grad_norm": 1.4761604070663452, "learning_rate": 9.589442815249265e-05, "loss": 0.5648, "step": 8863 }, { "epoch": 1.3876017532874139, "grad_norm": 2.2004549503326416, "learning_rate": 9.586999022482892e-05, "loss": 0.5432, "step": 8864 }, { "epoch": 1.387758296806512, "grad_norm": 1.39911949634552, "learning_rate": 9.58455522971652e-05, "loss": 0.576, "step": 8865 }, { "epoch": 1.3879148403256105, "grad_norm": 1.3971905708312988, "learning_rate": 9.582111436950146e-05, "loss": 0.6516, "step": 8866 }, { "epoch": 1.388071383844709, "grad_norm": 1.2392330169677734, "learning_rate": 9.579667644183771e-05, "loss": 0.457, "step": 8867 }, { "epoch": 1.3882279273638072, "grad_norm": 3.725346088409424, "learning_rate": 9.577223851417399e-05, "loss": 0.6318, "step": 8868 }, { "epoch": 1.3883844708829054, "grad_norm": 4.512824058532715, "learning_rate": 9.574780058651026e-05, "loss": 0.9208, "step": 8869 }, { "epoch": 1.3885410144020038, "grad_norm": 2.2564964294433594, "learning_rate": 9.572336265884651e-05, "loss": 0.852, "step": 8870 }, { "epoch": 1.388697557921102, "grad_norm": 1.9278607368469238, "learning_rate": 9.569892473118279e-05, "loss": 0.7657, "step": 8871 }, { "epoch": 1.3888541014402005, "grad_norm": 2.015615940093994, "learning_rate": 9.567448680351905e-05, "loss": 0.4866, "step": 8872 }, { "epoch": 1.3890106449592987, "grad_norm": 2.1916158199310303, "learning_rate": 9.565004887585532e-05, "loss": 0.7778, "step": 8873 }, { "epoch": 1.389167188478397, "grad_norm": 3.4717800617218018, "learning_rate": 9.56256109481916e-05, "loss": 0.7985, "step": 8874 }, { "epoch": 1.3893237319974954, "grad_norm": 1.8821927309036255, "learning_rate": 9.560117302052785e-05, "loss": 0.7681, "step": 8875 }, { "epoch": 1.3894802755165936, "grad_norm": 1.9250900745391846, "learning_rate": 9.557673509286411e-05, "loss": 0.9574, "step": 8876 }, { "epoch": 1.389636819035692, "grad_norm": 2.0036773681640625, "learning_rate": 9.555229716520039e-05, "loss": 0.8076, "step": 8877 }, { "epoch": 1.3897933625547902, "grad_norm": 1.968286395072937, "learning_rate": 9.552785923753665e-05, "loss": 0.7549, "step": 8878 }, { "epoch": 1.3899499060738885, "grad_norm": 2.4910616874694824, "learning_rate": 9.55034213098729e-05, "loss": 0.7469, "step": 8879 }, { "epoch": 1.390106449592987, "grad_norm": 2.2749216556549072, "learning_rate": 9.547898338220918e-05, "loss": 1.0438, "step": 8880 }, { "epoch": 1.3902629931120851, "grad_norm": 1.8309866189956665, "learning_rate": 9.545454545454545e-05, "loss": 0.8261, "step": 8881 }, { "epoch": 1.3904195366311836, "grad_norm": 3.1002039909362793, "learning_rate": 9.54301075268817e-05, "loss": 0.6295, "step": 8882 }, { "epoch": 1.3905760801502818, "grad_norm": 3.578477144241333, "learning_rate": 9.540566959921798e-05, "loss": 1.5787, "step": 8883 }, { "epoch": 1.39073262366938, "grad_norm": 0.985260009765625, "learning_rate": 9.538123167155424e-05, "loss": 0.5352, "step": 8884 }, { "epoch": 1.3908891671884784, "grad_norm": 1.0070950984954834, "learning_rate": 9.53567937438905e-05, "loss": 0.5997, "step": 8885 }, { "epoch": 1.3910457107075767, "grad_norm": 3.7554800510406494, "learning_rate": 9.533235581622678e-05, "loss": 0.7481, "step": 8886 }, { "epoch": 1.391202254226675, "grad_norm": 1.8878397941589355, "learning_rate": 9.530791788856304e-05, "loss": 1.1695, "step": 8887 }, { "epoch": 1.3913587977457733, "grad_norm": 3.70890474319458, "learning_rate": 9.52834799608993e-05, "loss": 1.3039, "step": 8888 }, { "epoch": 1.3915153412648715, "grad_norm": 0.3560074269771576, "learning_rate": 9.525904203323558e-05, "loss": 0.1757, "step": 8889 }, { "epoch": 1.39167188478397, "grad_norm": 0.8983970880508423, "learning_rate": 9.523460410557184e-05, "loss": 0.2835, "step": 8890 }, { "epoch": 1.3918284283030682, "grad_norm": 0.8294395804405212, "learning_rate": 9.52101661779081e-05, "loss": 0.1884, "step": 8891 }, { "epoch": 1.3919849718221666, "grad_norm": 1.0664957761764526, "learning_rate": 9.518572825024437e-05, "loss": 0.2084, "step": 8892 }, { "epoch": 1.3921415153412648, "grad_norm": 1.1653019189834595, "learning_rate": 9.516129032258064e-05, "loss": 0.2246, "step": 8893 }, { "epoch": 1.392298058860363, "grad_norm": 0.8518573641777039, "learning_rate": 9.513685239491689e-05, "loss": 0.6462, "step": 8894 }, { "epoch": 1.3924546023794615, "grad_norm": 0.7155981063842773, "learning_rate": 9.511241446725317e-05, "loss": 0.278, "step": 8895 }, { "epoch": 1.3926111458985597, "grad_norm": 0.8205424547195435, "learning_rate": 9.508797653958943e-05, "loss": 0.2204, "step": 8896 }, { "epoch": 1.3927676894176582, "grad_norm": 0.9535171389579773, "learning_rate": 9.50635386119257e-05, "loss": 0.1775, "step": 8897 }, { "epoch": 1.3929242329367564, "grad_norm": 0.6753984689712524, "learning_rate": 9.503910068426198e-05, "loss": 0.2603, "step": 8898 }, { "epoch": 1.3930807764558546, "grad_norm": 0.8725956082344055, "learning_rate": 9.501466275659823e-05, "loss": 0.3896, "step": 8899 }, { "epoch": 1.393237319974953, "grad_norm": 0.7949926257133484, "learning_rate": 9.499022482893449e-05, "loss": 0.3452, "step": 8900 }, { "epoch": 1.3933938634940515, "grad_norm": 5.083495140075684, "learning_rate": 9.496578690127077e-05, "loss": 0.6093, "step": 8901 }, { "epoch": 1.3935504070131497, "grad_norm": 0.8620098233222961, "learning_rate": 9.494134897360704e-05, "loss": 0.1909, "step": 8902 }, { "epoch": 1.393706950532248, "grad_norm": 1.0833512544631958, "learning_rate": 9.491691104594329e-05, "loss": 0.4185, "step": 8903 }, { "epoch": 1.3938634940513464, "grad_norm": 1.1511584520339966, "learning_rate": 9.489247311827956e-05, "loss": 0.3882, "step": 8904 }, { "epoch": 1.3940200375704446, "grad_norm": 1.5453732013702393, "learning_rate": 9.486803519061583e-05, "loss": 0.4503, "step": 8905 }, { "epoch": 1.394176581089543, "grad_norm": 1.2637474536895752, "learning_rate": 9.484359726295208e-05, "loss": 0.422, "step": 8906 }, { "epoch": 1.3943331246086412, "grad_norm": 1.7331559658050537, "learning_rate": 9.481915933528836e-05, "loss": 0.349, "step": 8907 }, { "epoch": 1.3944896681277394, "grad_norm": 1.1923069953918457, "learning_rate": 9.479472140762462e-05, "loss": 0.3536, "step": 8908 }, { "epoch": 1.3946462116468379, "grad_norm": 3.23207950592041, "learning_rate": 9.477028347996089e-05, "loss": 0.3895, "step": 8909 }, { "epoch": 1.394802755165936, "grad_norm": 1.1913764476776123, "learning_rate": 9.474584555229717e-05, "loss": 0.6068, "step": 8910 }, { "epoch": 1.3949592986850345, "grad_norm": 1.4866349697113037, "learning_rate": 9.472140762463342e-05, "loss": 0.3515, "step": 8911 }, { "epoch": 1.3951158422041328, "grad_norm": 1.2634825706481934, "learning_rate": 9.469696969696968e-05, "loss": 0.5826, "step": 8912 }, { "epoch": 1.395272385723231, "grad_norm": 1.2857826948165894, "learning_rate": 9.467253176930596e-05, "loss": 0.54, "step": 8913 }, { "epoch": 1.3954289292423294, "grad_norm": 2.951294422149658, "learning_rate": 9.464809384164221e-05, "loss": 0.4376, "step": 8914 }, { "epoch": 1.3955854727614276, "grad_norm": 2.0184364318847656, "learning_rate": 9.462365591397848e-05, "loss": 0.5928, "step": 8915 }, { "epoch": 1.395742016280526, "grad_norm": 4.42757511138916, "learning_rate": 9.459921798631476e-05, "loss": 0.8246, "step": 8916 }, { "epoch": 1.3958985597996243, "grad_norm": 1.6247820854187012, "learning_rate": 9.457478005865102e-05, "loss": 0.658, "step": 8917 }, { "epoch": 1.3960551033187225, "grad_norm": 1.0989845991134644, "learning_rate": 9.455034213098727e-05, "loss": 0.5058, "step": 8918 }, { "epoch": 1.396211646837821, "grad_norm": 3.0606327056884766, "learning_rate": 9.452590420332355e-05, "loss": 0.722, "step": 8919 }, { "epoch": 1.3963681903569192, "grad_norm": 2.1190545558929443, "learning_rate": 9.450146627565982e-05, "loss": 0.8267, "step": 8920 }, { "epoch": 1.3965247338760176, "grad_norm": 2.202465057373047, "learning_rate": 9.447702834799608e-05, "loss": 0.8312, "step": 8921 }, { "epoch": 1.3966812773951158, "grad_norm": 1.6959621906280518, "learning_rate": 9.445259042033236e-05, "loss": 0.8328, "step": 8922 }, { "epoch": 1.396837820914214, "grad_norm": 2.8653564453125, "learning_rate": 9.442815249266861e-05, "loss": 0.6202, "step": 8923 }, { "epoch": 1.3969943644333125, "grad_norm": 1.8655325174331665, "learning_rate": 9.440371456500487e-05, "loss": 0.7007, "step": 8924 }, { "epoch": 1.3971509079524107, "grad_norm": 1.3610215187072754, "learning_rate": 9.437927663734115e-05, "loss": 0.42, "step": 8925 }, { "epoch": 1.3973074514715091, "grad_norm": 2.2759883403778076, "learning_rate": 9.43548387096774e-05, "loss": 0.9368, "step": 8926 }, { "epoch": 1.3974639949906074, "grad_norm": 2.666337728500366, "learning_rate": 9.433040078201367e-05, "loss": 1.1112, "step": 8927 }, { "epoch": 1.3976205385097056, "grad_norm": 3.074618101119995, "learning_rate": 9.430596285434995e-05, "loss": 1.377, "step": 8928 }, { "epoch": 1.397777082028804, "grad_norm": 1.411201000213623, "learning_rate": 9.428152492668621e-05, "loss": 0.9021, "step": 8929 }, { "epoch": 1.3979336255479022, "grad_norm": 2.165524482727051, "learning_rate": 9.425708699902246e-05, "loss": 1.0022, "step": 8930 }, { "epoch": 1.3980901690670007, "grad_norm": 1.9300085306167603, "learning_rate": 9.423264907135874e-05, "loss": 1.6909, "step": 8931 }, { "epoch": 1.398246712586099, "grad_norm": 2.606879711151123, "learning_rate": 9.4208211143695e-05, "loss": 0.7395, "step": 8932 }, { "epoch": 1.3984032561051971, "grad_norm": 2.456814765930176, "learning_rate": 9.418377321603127e-05, "loss": 1.3278, "step": 8933 }, { "epoch": 1.3985597996242956, "grad_norm": 1.9723178148269653, "learning_rate": 9.415933528836755e-05, "loss": 0.3545, "step": 8934 }, { "epoch": 1.398716343143394, "grad_norm": 1.6386150121688843, "learning_rate": 9.41348973607038e-05, "loss": 0.4407, "step": 8935 }, { "epoch": 1.3988728866624922, "grad_norm": 1.4594956636428833, "learning_rate": 9.411045943304007e-05, "loss": 0.6822, "step": 8936 }, { "epoch": 1.3990294301815904, "grad_norm": 2.2683069705963135, "learning_rate": 9.408602150537634e-05, "loss": 0.9911, "step": 8937 }, { "epoch": 1.3991859737006889, "grad_norm": 3.4247801303863525, "learning_rate": 9.40615835777126e-05, "loss": 1.4219, "step": 8938 }, { "epoch": 1.399342517219787, "grad_norm": 0.4289674758911133, "learning_rate": 9.403714565004886e-05, "loss": 0.2495, "step": 8939 }, { "epoch": 1.3994990607388855, "grad_norm": 0.4746815860271454, "learning_rate": 9.401270772238514e-05, "loss": 0.2006, "step": 8940 }, { "epoch": 1.3996556042579837, "grad_norm": 0.42531949281692505, "learning_rate": 9.39882697947214e-05, "loss": 0.1976, "step": 8941 }, { "epoch": 1.399812147777082, "grad_norm": 0.8540092706680298, "learning_rate": 9.396383186705765e-05, "loss": 0.2068, "step": 8942 }, { "epoch": 1.3999686912961804, "grad_norm": 0.6693518757820129, "learning_rate": 9.393939393939393e-05, "loss": 0.242, "step": 8943 }, { "epoch": 1.4001252348152786, "grad_norm": 0.9643524289131165, "learning_rate": 9.39149560117302e-05, "loss": 0.335, "step": 8944 }, { "epoch": 1.400281778334377, "grad_norm": 0.5744693279266357, "learning_rate": 9.389051808406646e-05, "loss": 0.222, "step": 8945 }, { "epoch": 1.4004383218534753, "grad_norm": 1.0014822483062744, "learning_rate": 9.386608015640274e-05, "loss": 0.3107, "step": 8946 }, { "epoch": 1.4005948653725735, "grad_norm": 0.8832296133041382, "learning_rate": 9.384164222873899e-05, "loss": 0.3322, "step": 8947 }, { "epoch": 1.400751408891672, "grad_norm": 0.46709567308425903, "learning_rate": 9.381720430107526e-05, "loss": 0.254, "step": 8948 }, { "epoch": 1.4009079524107702, "grad_norm": 1.0916463136672974, "learning_rate": 9.379276637341154e-05, "loss": 0.3261, "step": 8949 }, { "epoch": 1.4010644959298686, "grad_norm": 1.3483251333236694, "learning_rate": 9.376832844574779e-05, "loss": 0.4645, "step": 8950 }, { "epoch": 1.4012210394489668, "grad_norm": 1.411783218383789, "learning_rate": 9.374389051808405e-05, "loss": 0.3502, "step": 8951 }, { "epoch": 1.401377582968065, "grad_norm": 0.6709024310112, "learning_rate": 9.371945259042033e-05, "loss": 0.2547, "step": 8952 }, { "epoch": 1.4015341264871635, "grad_norm": 0.7980188727378845, "learning_rate": 9.36950146627566e-05, "loss": 0.2641, "step": 8953 }, { "epoch": 1.4016906700062617, "grad_norm": 1.5928226709365845, "learning_rate": 9.367057673509285e-05, "loss": 0.5429, "step": 8954 }, { "epoch": 1.4018472135253601, "grad_norm": 2.243115186691284, "learning_rate": 9.364613880742912e-05, "loss": 1.0404, "step": 8955 }, { "epoch": 1.4020037570444583, "grad_norm": 2.163041591644287, "learning_rate": 9.362170087976539e-05, "loss": 0.5766, "step": 8956 }, { "epoch": 1.4021603005635566, "grad_norm": 1.891203761100769, "learning_rate": 9.359726295210165e-05, "loss": 0.5016, "step": 8957 }, { "epoch": 1.402316844082655, "grad_norm": 0.8651189208030701, "learning_rate": 9.357282502443793e-05, "loss": 0.3276, "step": 8958 }, { "epoch": 1.4024733876017532, "grad_norm": 2.448225736618042, "learning_rate": 9.354838709677418e-05, "loss": 0.6193, "step": 8959 }, { "epoch": 1.4026299311208517, "grad_norm": 2.657435178756714, "learning_rate": 9.352394916911045e-05, "loss": 0.4568, "step": 8960 }, { "epoch": 1.4027864746399499, "grad_norm": 3.460101842880249, "learning_rate": 9.349951124144673e-05, "loss": 1.0414, "step": 8961 }, { "epoch": 1.402943018159048, "grad_norm": 1.3931571245193481, "learning_rate": 9.347507331378298e-05, "loss": 0.6641, "step": 8962 }, { "epoch": 1.4030995616781465, "grad_norm": 1.7879291772842407, "learning_rate": 9.345063538611924e-05, "loss": 0.8282, "step": 8963 }, { "epoch": 1.4032561051972448, "grad_norm": 1.4838361740112305, "learning_rate": 9.342619745845552e-05, "loss": 0.7365, "step": 8964 }, { "epoch": 1.4034126487163432, "grad_norm": 1.6955534219741821, "learning_rate": 9.340175953079179e-05, "loss": 0.24, "step": 8965 }, { "epoch": 1.4035691922354414, "grad_norm": 2.0134198665618896, "learning_rate": 9.337732160312804e-05, "loss": 0.7978, "step": 8966 }, { "epoch": 1.4037257357545396, "grad_norm": 1.6322916746139526, "learning_rate": 9.335288367546432e-05, "loss": 0.6308, "step": 8967 }, { "epoch": 1.403882279273638, "grad_norm": 1.2652108669281006, "learning_rate": 9.332844574780058e-05, "loss": 0.504, "step": 8968 }, { "epoch": 1.4040388227927365, "grad_norm": 1.0733211040496826, "learning_rate": 9.330400782013684e-05, "loss": 0.3422, "step": 8969 }, { "epoch": 1.4041953663118347, "grad_norm": 1.4503707885742188, "learning_rate": 9.327956989247311e-05, "loss": 0.3383, "step": 8970 }, { "epoch": 1.404351909830933, "grad_norm": 1.633813738822937, "learning_rate": 9.325513196480937e-05, "loss": 0.7733, "step": 8971 }, { "epoch": 1.4045084533500314, "grad_norm": 1.3477745056152344, "learning_rate": 9.323069403714564e-05, "loss": 0.3886, "step": 8972 }, { "epoch": 1.4046649968691296, "grad_norm": 2.0735232830047607, "learning_rate": 9.320625610948192e-05, "loss": 0.7784, "step": 8973 }, { "epoch": 1.404821540388228, "grad_norm": 2.603829860687256, "learning_rate": 9.318181818181817e-05, "loss": 0.88, "step": 8974 }, { "epoch": 1.4049780839073263, "grad_norm": 1.9070336818695068, "learning_rate": 9.315738025415443e-05, "loss": 0.9435, "step": 8975 }, { "epoch": 1.4051346274264245, "grad_norm": 1.9831008911132812, "learning_rate": 9.313294232649071e-05, "loss": 0.9925, "step": 8976 }, { "epoch": 1.405291170945523, "grad_norm": 2.742797374725342, "learning_rate": 9.310850439882698e-05, "loss": 1.2224, "step": 8977 }, { "epoch": 1.4054477144646211, "grad_norm": 2.2004201412200928, "learning_rate": 9.308406647116323e-05, "loss": 0.9814, "step": 8978 }, { "epoch": 1.4056042579837196, "grad_norm": 2.0755558013916016, "learning_rate": 9.30596285434995e-05, "loss": 1.2202, "step": 8979 }, { "epoch": 1.4057608015028178, "grad_norm": 2.546623706817627, "learning_rate": 9.303519061583577e-05, "loss": 0.8416, "step": 8980 }, { "epoch": 1.405917345021916, "grad_norm": 4.5670485496521, "learning_rate": 9.301075268817204e-05, "loss": 1.7262, "step": 8981 }, { "epoch": 1.4060738885410144, "grad_norm": 1.8530974388122559, "learning_rate": 9.29863147605083e-05, "loss": 1.209, "step": 8982 }, { "epoch": 1.4062304320601127, "grad_norm": 1.714417815208435, "learning_rate": 9.296187683284457e-05, "loss": 1.0305, "step": 8983 }, { "epoch": 1.406386975579211, "grad_norm": 3.0805559158325195, "learning_rate": 9.293743890518083e-05, "loss": 0.7601, "step": 8984 }, { "epoch": 1.4065435190983093, "grad_norm": 1.3468403816223145, "learning_rate": 9.291300097751711e-05, "loss": 0.1582, "step": 8985 }, { "epoch": 1.4067000626174075, "grad_norm": 1.245559573173523, "learning_rate": 9.288856304985336e-05, "loss": 0.3851, "step": 8986 }, { "epoch": 1.406856606136506, "grad_norm": 2.107041358947754, "learning_rate": 9.286412512218962e-05, "loss": 0.5471, "step": 8987 }, { "epoch": 1.4070131496556042, "grad_norm": 1.9285342693328857, "learning_rate": 9.28396871945259e-05, "loss": 0.5434, "step": 8988 }, { "epoch": 1.4071696931747026, "grad_norm": 0.39765217900276184, "learning_rate": 9.281524926686217e-05, "loss": 0.222, "step": 8989 }, { "epoch": 1.4073262366938009, "grad_norm": 0.596780002117157, "learning_rate": 9.279081133919842e-05, "loss": 0.2276, "step": 8990 }, { "epoch": 1.407482780212899, "grad_norm": 0.4721631705760956, "learning_rate": 9.27663734115347e-05, "loss": 0.196, "step": 8991 }, { "epoch": 1.4076393237319975, "grad_norm": 2.310553550720215, "learning_rate": 9.274193548387096e-05, "loss": 0.4844, "step": 8992 }, { "epoch": 1.4077958672510957, "grad_norm": 0.47364363074302673, "learning_rate": 9.271749755620723e-05, "loss": 0.2752, "step": 8993 }, { "epoch": 1.4079524107701942, "grad_norm": 1.2700831890106201, "learning_rate": 9.269305962854349e-05, "loss": 0.4011, "step": 8994 }, { "epoch": 1.4081089542892924, "grad_norm": 0.6444479823112488, "learning_rate": 9.266862170087976e-05, "loss": 0.2654, "step": 8995 }, { "epoch": 1.4082654978083906, "grad_norm": 0.6023876667022705, "learning_rate": 9.264418377321602e-05, "loss": 0.3546, "step": 8996 }, { "epoch": 1.408422041327489, "grad_norm": 0.7151503562927246, "learning_rate": 9.26197458455523e-05, "loss": 0.249, "step": 8997 }, { "epoch": 1.4085785848465875, "grad_norm": 0.5194156765937805, "learning_rate": 9.259530791788855e-05, "loss": 0.2368, "step": 8998 }, { "epoch": 1.4087351283656857, "grad_norm": 0.761722207069397, "learning_rate": 9.257086999022482e-05, "loss": 0.2769, "step": 8999 }, { "epoch": 1.408891671884784, "grad_norm": 0.7770941853523254, "learning_rate": 9.25464320625611e-05, "loss": 0.2916, "step": 9000 }, { "epoch": 1.408891671884784, "eval_loss": 0.49010393023490906, "eval_runtime": 205.5553, "eval_samples_per_second": 60.242, "eval_steps_per_second": 3.765, "eval_wer": 0.3170729115753235, "step": 9000 }, { "epoch": 1.4090482154038821, "grad_norm": 1.107610821723938, "learning_rate": 9.252199413489736e-05, "loss": 0.4199, "step": 9001 }, { "epoch": 1.4092047589229806, "grad_norm": 1.4401216506958008, "learning_rate": 9.249755620723361e-05, "loss": 0.4704, "step": 9002 }, { "epoch": 1.409361302442079, "grad_norm": 0.8990141749382019, "learning_rate": 9.247311827956989e-05, "loss": 0.325, "step": 9003 }, { "epoch": 1.4095178459611772, "grad_norm": 1.2181795835494995, "learning_rate": 9.244868035190615e-05, "loss": 0.5246, "step": 9004 }, { "epoch": 1.4096743894802755, "grad_norm": 0.9682489037513733, "learning_rate": 9.242424242424242e-05, "loss": 0.3604, "step": 9005 }, { "epoch": 1.409830932999374, "grad_norm": 1.354854702949524, "learning_rate": 9.239980449657868e-05, "loss": 0.5106, "step": 9006 }, { "epoch": 1.4099874765184721, "grad_norm": 0.8772504925727844, "learning_rate": 9.237536656891495e-05, "loss": 0.3421, "step": 9007 }, { "epoch": 1.4101440200375706, "grad_norm": 0.9204466938972473, "learning_rate": 9.235092864125121e-05, "loss": 0.3441, "step": 9008 }, { "epoch": 1.4103005635566688, "grad_norm": 1.3267632722854614, "learning_rate": 9.232649071358749e-05, "loss": 0.5436, "step": 9009 }, { "epoch": 1.410457107075767, "grad_norm": 1.8505562543869019, "learning_rate": 9.230205278592374e-05, "loss": 0.5114, "step": 9010 }, { "epoch": 1.4106136505948654, "grad_norm": 2.1749753952026367, "learning_rate": 9.227761485826001e-05, "loss": 0.4848, "step": 9011 }, { "epoch": 1.4107701941139636, "grad_norm": 1.2745190858840942, "learning_rate": 9.225317693059629e-05, "loss": 0.5289, "step": 9012 }, { "epoch": 1.410926737633062, "grad_norm": 0.8753221035003662, "learning_rate": 9.222873900293255e-05, "loss": 0.2345, "step": 9013 }, { "epoch": 1.4110832811521603, "grad_norm": 1.8300694227218628, "learning_rate": 9.22043010752688e-05, "loss": 0.7186, "step": 9014 }, { "epoch": 1.4112398246712585, "grad_norm": 2.5237860679626465, "learning_rate": 9.217986314760508e-05, "loss": 0.7047, "step": 9015 }, { "epoch": 1.411396368190357, "grad_norm": 1.3644801378250122, "learning_rate": 9.215542521994134e-05, "loss": 0.5634, "step": 9016 }, { "epoch": 1.4115529117094552, "grad_norm": 3.8021445274353027, "learning_rate": 9.213098729227761e-05, "loss": 0.3977, "step": 9017 }, { "epoch": 1.4117094552285536, "grad_norm": 2.972228527069092, "learning_rate": 9.210654936461387e-05, "loss": 1.2041, "step": 9018 }, { "epoch": 1.4118659987476518, "grad_norm": 2.1545019149780273, "learning_rate": 9.208211143695014e-05, "loss": 0.5613, "step": 9019 }, { "epoch": 1.41202254226675, "grad_norm": 2.832612991333008, "learning_rate": 9.20576735092864e-05, "loss": 0.5319, "step": 9020 }, { "epoch": 1.4121790857858485, "grad_norm": 2.0338563919067383, "learning_rate": 9.203323558162268e-05, "loss": 0.7196, "step": 9021 }, { "epoch": 1.4123356293049467, "grad_norm": 3.1499016284942627, "learning_rate": 9.200879765395893e-05, "loss": 1.0724, "step": 9022 }, { "epoch": 1.4124921728240452, "grad_norm": 3.166935443878174, "learning_rate": 9.19843597262952e-05, "loss": 0.8717, "step": 9023 }, { "epoch": 1.4126487163431434, "grad_norm": 4.040381908416748, "learning_rate": 9.195992179863148e-05, "loss": 1.0294, "step": 9024 }, { "epoch": 1.4128052598622416, "grad_norm": 3.9483299255371094, "learning_rate": 9.193548387096774e-05, "loss": 0.8536, "step": 9025 }, { "epoch": 1.41296180338134, "grad_norm": 7.188598155975342, "learning_rate": 9.191104594330399e-05, "loss": 0.9649, "step": 9026 }, { "epoch": 1.4131183469004382, "grad_norm": 2.007357597351074, "learning_rate": 9.188660801564027e-05, "loss": 1.0681, "step": 9027 }, { "epoch": 1.4132748904195367, "grad_norm": 3.2346351146698, "learning_rate": 9.186217008797654e-05, "loss": 0.9114, "step": 9028 }, { "epoch": 1.413431433938635, "grad_norm": 3.834306240081787, "learning_rate": 9.183773216031279e-05, "loss": 0.883, "step": 9029 }, { "epoch": 1.4135879774577331, "grad_norm": 4.527556419372559, "learning_rate": 9.181329423264907e-05, "loss": 0.8806, "step": 9030 }, { "epoch": 1.4137445209768316, "grad_norm": 3.6661274433135986, "learning_rate": 9.178885630498533e-05, "loss": 1.2201, "step": 9031 }, { "epoch": 1.41390106449593, "grad_norm": 1.937548279762268, "learning_rate": 9.17644183773216e-05, "loss": 0.7024, "step": 9032 }, { "epoch": 1.4140576080150282, "grad_norm": 2.842707872390747, "learning_rate": 9.173998044965787e-05, "loss": 0.8705, "step": 9033 }, { "epoch": 1.4142141515341264, "grad_norm": 1.4602514505386353, "learning_rate": 9.171554252199412e-05, "loss": 0.4965, "step": 9034 }, { "epoch": 1.4143706950532247, "grad_norm": 2.891000747680664, "learning_rate": 9.169110459433039e-05, "loss": 1.2006, "step": 9035 }, { "epoch": 1.414527238572323, "grad_norm": 1.664858102798462, "learning_rate": 9.166666666666667e-05, "loss": 0.4995, "step": 9036 }, { "epoch": 1.4146837820914215, "grad_norm": 3.580497980117798, "learning_rate": 9.164222873900293e-05, "loss": 1.5147, "step": 9037 }, { "epoch": 1.4148403256105198, "grad_norm": 2.788262367248535, "learning_rate": 9.161779081133918e-05, "loss": 1.297, "step": 9038 }, { "epoch": 1.414996869129618, "grad_norm": 0.46237197518348694, "learning_rate": 9.159335288367546e-05, "loss": 0.2044, "step": 9039 }, { "epoch": 1.4151534126487164, "grad_norm": 0.7125651836395264, "learning_rate": 9.156891495601173e-05, "loss": 0.2271, "step": 9040 }, { "epoch": 1.4153099561678146, "grad_norm": 0.6166035532951355, "learning_rate": 9.154447702834798e-05, "loss": 0.2674, "step": 9041 }, { "epoch": 1.415466499686913, "grad_norm": 0.3849860727787018, "learning_rate": 9.152003910068426e-05, "loss": 0.2302, "step": 9042 }, { "epoch": 1.4156230432060113, "grad_norm": 0.6451732516288757, "learning_rate": 9.149560117302052e-05, "loss": 0.2397, "step": 9043 }, { "epoch": 1.4157795867251095, "grad_norm": 0.41967400908470154, "learning_rate": 9.147116324535679e-05, "loss": 0.229, "step": 9044 }, { "epoch": 1.415936130244208, "grad_norm": 0.9496431946754456, "learning_rate": 9.144672531769306e-05, "loss": 0.3821, "step": 9045 }, { "epoch": 1.4160926737633062, "grad_norm": 0.634460985660553, "learning_rate": 9.142228739002932e-05, "loss": 0.1948, "step": 9046 }, { "epoch": 1.4162492172824046, "grad_norm": 0.9624339938163757, "learning_rate": 9.139784946236558e-05, "loss": 0.2305, "step": 9047 }, { "epoch": 1.4164057608015028, "grad_norm": 1.082798719406128, "learning_rate": 9.137341153470186e-05, "loss": 0.2501, "step": 9048 }, { "epoch": 1.416562304320601, "grad_norm": 0.5590087175369263, "learning_rate": 9.134897360703812e-05, "loss": 0.1449, "step": 9049 }, { "epoch": 1.4167188478396995, "grad_norm": 1.2364987134933472, "learning_rate": 9.132453567937437e-05, "loss": 0.1856, "step": 9050 }, { "epoch": 1.4168753913587977, "grad_norm": 0.5167357325553894, "learning_rate": 9.130009775171065e-05, "loss": 0.2644, "step": 9051 }, { "epoch": 1.4170319348778961, "grad_norm": 2.4510698318481445, "learning_rate": 9.127565982404692e-05, "loss": 0.4774, "step": 9052 }, { "epoch": 1.4171884783969944, "grad_norm": 1.2611892223358154, "learning_rate": 9.125122189638317e-05, "loss": 0.5563, "step": 9053 }, { "epoch": 1.4173450219160926, "grad_norm": 1.279375672340393, "learning_rate": 9.122678396871945e-05, "loss": 0.4933, "step": 9054 }, { "epoch": 1.417501565435191, "grad_norm": 1.3161873817443848, "learning_rate": 9.120234604105571e-05, "loss": 0.5997, "step": 9055 }, { "epoch": 1.4176581089542892, "grad_norm": 0.7085786461830139, "learning_rate": 9.117790811339198e-05, "loss": 0.2048, "step": 9056 }, { "epoch": 1.4178146524733877, "grad_norm": 1.8899716138839722, "learning_rate": 9.115347018572826e-05, "loss": 0.3395, "step": 9057 }, { "epoch": 1.4179711959924859, "grad_norm": 2.1545331478118896, "learning_rate": 9.112903225806451e-05, "loss": 0.5075, "step": 9058 }, { "epoch": 1.418127739511584, "grad_norm": 1.0476807355880737, "learning_rate": 9.110459433040077e-05, "loss": 0.2844, "step": 9059 }, { "epoch": 1.4182842830306825, "grad_norm": 1.534098505973816, "learning_rate": 9.108015640273705e-05, "loss": 0.481, "step": 9060 }, { "epoch": 1.4184408265497808, "grad_norm": 1.6640297174453735, "learning_rate": 9.105571847507331e-05, "loss": 0.3992, "step": 9061 }, { "epoch": 1.4185973700688792, "grad_norm": 1.0398399829864502, "learning_rate": 9.103128054740957e-05, "loss": 0.484, "step": 9062 }, { "epoch": 1.4187539135879774, "grad_norm": 1.172825574874878, "learning_rate": 9.100684261974584e-05, "loss": 0.5006, "step": 9063 }, { "epoch": 1.4189104571070756, "grad_norm": 1.707702875137329, "learning_rate": 9.098240469208211e-05, "loss": 0.3896, "step": 9064 }, { "epoch": 1.419067000626174, "grad_norm": 1.5226316452026367, "learning_rate": 9.095796676441836e-05, "loss": 0.689, "step": 9065 }, { "epoch": 1.4192235441452725, "grad_norm": 1.6208080053329468, "learning_rate": 9.093352883675464e-05, "loss": 0.4796, "step": 9066 }, { "epoch": 1.4193800876643707, "grad_norm": 2.61185622215271, "learning_rate": 9.09090909090909e-05, "loss": 0.5755, "step": 9067 }, { "epoch": 1.419536631183469, "grad_norm": 2.0593795776367188, "learning_rate": 9.088465298142717e-05, "loss": 0.7231, "step": 9068 }, { "epoch": 1.4196931747025674, "grad_norm": 1.8883817195892334, "learning_rate": 9.086021505376345e-05, "loss": 0.7263, "step": 9069 }, { "epoch": 1.4198497182216656, "grad_norm": 3.10384464263916, "learning_rate": 9.08357771260997e-05, "loss": 0.7273, "step": 9070 }, { "epoch": 1.420006261740764, "grad_norm": 1.468448281288147, "learning_rate": 9.081133919843596e-05, "loss": 0.7773, "step": 9071 }, { "epoch": 1.4201628052598623, "grad_norm": 1.079703450202942, "learning_rate": 9.078690127077224e-05, "loss": 0.4677, "step": 9072 }, { "epoch": 1.4203193487789605, "grad_norm": 2.0892856121063232, "learning_rate": 9.076246334310849e-05, "loss": 0.5249, "step": 9073 }, { "epoch": 1.420475892298059, "grad_norm": 2.025650978088379, "learning_rate": 9.073802541544476e-05, "loss": 0.4734, "step": 9074 }, { "epoch": 1.4206324358171571, "grad_norm": 2.4783313274383545, "learning_rate": 9.071358748778104e-05, "loss": 0.776, "step": 9075 }, { "epoch": 1.4207889793362556, "grad_norm": 2.0602166652679443, "learning_rate": 9.06891495601173e-05, "loss": 0.772, "step": 9076 }, { "epoch": 1.4209455228553538, "grad_norm": 2.0721070766448975, "learning_rate": 9.066471163245355e-05, "loss": 0.5761, "step": 9077 }, { "epoch": 1.421102066374452, "grad_norm": 2.9403793811798096, "learning_rate": 9.064027370478983e-05, "loss": 0.7641, "step": 9078 }, { "epoch": 1.4212586098935505, "grad_norm": 2.754152536392212, "learning_rate": 9.06158357771261e-05, "loss": 1.1597, "step": 9079 }, { "epoch": 1.4214151534126487, "grad_norm": 3.737293243408203, "learning_rate": 9.059139784946236e-05, "loss": 1.0165, "step": 9080 }, { "epoch": 1.4215716969317471, "grad_norm": 4.649268627166748, "learning_rate": 9.056695992179861e-05, "loss": 0.8176, "step": 9081 }, { "epoch": 1.4217282404508453, "grad_norm": 4.204807758331299, "learning_rate": 9.054252199413489e-05, "loss": 1.1525, "step": 9082 }, { "epoch": 1.4218847839699436, "grad_norm": 1.6178537607192993, "learning_rate": 9.051808406647115e-05, "loss": 1.4486, "step": 9083 }, { "epoch": 1.422041327489042, "grad_norm": 1.8884669542312622, "learning_rate": 9.049364613880742e-05, "loss": 0.4542, "step": 9084 }, { "epoch": 1.4221978710081402, "grad_norm": 2.976574182510376, "learning_rate": 9.046920821114368e-05, "loss": 0.7788, "step": 9085 }, { "epoch": 1.4223544145272387, "grad_norm": 2.2067227363586426, "learning_rate": 9.044477028347995e-05, "loss": 0.642, "step": 9086 }, { "epoch": 1.4225109580463369, "grad_norm": 1.5230540037155151, "learning_rate": 9.042033235581621e-05, "loss": 0.4552, "step": 9087 }, { "epoch": 1.422667501565435, "grad_norm": 1.6714625358581543, "learning_rate": 9.039589442815249e-05, "loss": 0.8108, "step": 9088 }, { "epoch": 1.4228240450845335, "grad_norm": 0.5419172644615173, "learning_rate": 9.037145650048874e-05, "loss": 0.2396, "step": 9089 }, { "epoch": 1.4229805886036317, "grad_norm": 0.5144709944725037, "learning_rate": 9.034701857282501e-05, "loss": 0.2611, "step": 9090 }, { "epoch": 1.4231371321227302, "grad_norm": 0.7084149718284607, "learning_rate": 9.032258064516129e-05, "loss": 0.3145, "step": 9091 }, { "epoch": 1.4232936756418284, "grad_norm": 0.7008088827133179, "learning_rate": 9.029814271749755e-05, "loss": 0.265, "step": 9092 }, { "epoch": 1.4234502191609266, "grad_norm": 2.3117635250091553, "learning_rate": 9.02737047898338e-05, "loss": 0.4497, "step": 9093 }, { "epoch": 1.423606762680025, "grad_norm": 0.9773268103599548, "learning_rate": 9.024926686217008e-05, "loss": 0.3003, "step": 9094 }, { "epoch": 1.4237633061991233, "grad_norm": 0.8467937111854553, "learning_rate": 9.022482893450635e-05, "loss": 0.2749, "step": 9095 }, { "epoch": 1.4239198497182217, "grad_norm": 0.7267017960548401, "learning_rate": 9.020039100684261e-05, "loss": 0.2498, "step": 9096 }, { "epoch": 1.42407639323732, "grad_norm": 0.7348589301109314, "learning_rate": 9.017595307917887e-05, "loss": 0.2563, "step": 9097 }, { "epoch": 1.4242329367564182, "grad_norm": 1.283211350440979, "learning_rate": 9.015151515151514e-05, "loss": 0.3757, "step": 9098 }, { "epoch": 1.4243894802755166, "grad_norm": 0.4818941652774811, "learning_rate": 9.01270772238514e-05, "loss": 0.177, "step": 9099 }, { "epoch": 1.424546023794615, "grad_norm": 0.830980122089386, "learning_rate": 9.010263929618768e-05, "loss": 0.2567, "step": 9100 }, { "epoch": 1.4247025673137133, "grad_norm": 1.4470046758651733, "learning_rate": 9.007820136852393e-05, "loss": 0.4034, "step": 9101 }, { "epoch": 1.4248591108328115, "grad_norm": 1.1172550916671753, "learning_rate": 9.00537634408602e-05, "loss": 0.377, "step": 9102 }, { "epoch": 1.42501565435191, "grad_norm": 1.1227866411209106, "learning_rate": 9.002932551319648e-05, "loss": 0.2965, "step": 9103 }, { "epoch": 1.4251721978710081, "grad_norm": 0.9577900767326355, "learning_rate": 9.000488758553274e-05, "loss": 0.3678, "step": 9104 }, { "epoch": 1.4253287413901066, "grad_norm": 1.0543158054351807, "learning_rate": 8.998044965786899e-05, "loss": 0.2968, "step": 9105 }, { "epoch": 1.4254852849092048, "grad_norm": 0.9723239541053772, "learning_rate": 8.995601173020527e-05, "loss": 0.4421, "step": 9106 }, { "epoch": 1.425641828428303, "grad_norm": 1.2686231136322021, "learning_rate": 8.993157380254154e-05, "loss": 0.5202, "step": 9107 }, { "epoch": 1.4257983719474014, "grad_norm": 1.9442496299743652, "learning_rate": 8.99071358748778e-05, "loss": 0.4751, "step": 9108 }, { "epoch": 1.4259549154664997, "grad_norm": 2.063786745071411, "learning_rate": 8.988269794721407e-05, "loss": 0.517, "step": 9109 }, { "epoch": 1.426111458985598, "grad_norm": 0.9845725893974304, "learning_rate": 8.985826001955033e-05, "loss": 0.2919, "step": 9110 }, { "epoch": 1.4262680025046963, "grad_norm": 2.66135311126709, "learning_rate": 8.98338220918866e-05, "loss": 0.9835, "step": 9111 }, { "epoch": 1.4264245460237945, "grad_norm": 2.083529233932495, "learning_rate": 8.980938416422287e-05, "loss": 0.7316, "step": 9112 }, { "epoch": 1.426581089542893, "grad_norm": 1.5067460536956787, "learning_rate": 8.978494623655913e-05, "loss": 0.5985, "step": 9113 }, { "epoch": 1.4267376330619912, "grad_norm": 2.6055350303649902, "learning_rate": 8.976050830889539e-05, "loss": 0.4876, "step": 9114 }, { "epoch": 1.4268941765810896, "grad_norm": 1.7742613554000854, "learning_rate": 8.973607038123167e-05, "loss": 0.4211, "step": 9115 }, { "epoch": 1.4270507201001879, "grad_norm": 0.9075111746788025, "learning_rate": 8.971163245356793e-05, "loss": 0.2719, "step": 9116 }, { "epoch": 1.427207263619286, "grad_norm": 1.751970648765564, "learning_rate": 8.968719452590418e-05, "loss": 0.6691, "step": 9117 }, { "epoch": 1.4273638071383845, "grad_norm": 1.8061598539352417, "learning_rate": 8.966275659824046e-05, "loss": 0.4919, "step": 9118 }, { "epoch": 1.4275203506574827, "grad_norm": 5.433608055114746, "learning_rate": 8.963831867057673e-05, "loss": 1.2975, "step": 9119 }, { "epoch": 1.4276768941765812, "grad_norm": 1.150429129600525, "learning_rate": 8.961388074291299e-05, "loss": 0.3322, "step": 9120 }, { "epoch": 1.4278334376956794, "grad_norm": 1.8451645374298096, "learning_rate": 8.958944281524926e-05, "loss": 0.5617, "step": 9121 }, { "epoch": 1.4279899812147776, "grad_norm": 2.325820207595825, "learning_rate": 8.956500488758552e-05, "loss": 0.9685, "step": 9122 }, { "epoch": 1.428146524733876, "grad_norm": 2.732015609741211, "learning_rate": 8.954056695992179e-05, "loss": 0.6398, "step": 9123 }, { "epoch": 1.4283030682529743, "grad_norm": 2.809544086456299, "learning_rate": 8.951612903225806e-05, "loss": 0.8187, "step": 9124 }, { "epoch": 1.4284596117720727, "grad_norm": 2.3526062965393066, "learning_rate": 8.949169110459432e-05, "loss": 0.9776, "step": 9125 }, { "epoch": 1.428616155291171, "grad_norm": 3.0225517749786377, "learning_rate": 8.946725317693058e-05, "loss": 1.0524, "step": 9126 }, { "epoch": 1.4287726988102691, "grad_norm": 9.465794563293457, "learning_rate": 8.944281524926686e-05, "loss": 0.8393, "step": 9127 }, { "epoch": 1.4289292423293676, "grad_norm": 2.5339739322662354, "learning_rate": 8.941837732160312e-05, "loss": 0.9948, "step": 9128 }, { "epoch": 1.4290857858484658, "grad_norm": 1.795351266860962, "learning_rate": 8.939393939393938e-05, "loss": 0.988, "step": 9129 }, { "epoch": 1.4292423293675642, "grad_norm": 2.1344945430755615, "learning_rate": 8.936950146627565e-05, "loss": 1.1098, "step": 9130 }, { "epoch": 1.4293988728866625, "grad_norm": 2.079683542251587, "learning_rate": 8.934506353861192e-05, "loss": 0.7107, "step": 9131 }, { "epoch": 1.4295554164057607, "grad_norm": 2.6614019870758057, "learning_rate": 8.932062561094817e-05, "loss": 1.8157, "step": 9132 }, { "epoch": 1.429711959924859, "grad_norm": 2.139328718185425, "learning_rate": 8.929618768328445e-05, "loss": 1.1497, "step": 9133 }, { "epoch": 1.4298685034439576, "grad_norm": 1.177253246307373, "learning_rate": 8.927174975562071e-05, "loss": 0.3898, "step": 9134 }, { "epoch": 1.4300250469630558, "grad_norm": 1.5603402853012085, "learning_rate": 8.924731182795698e-05, "loss": 0.8206, "step": 9135 }, { "epoch": 1.430181590482154, "grad_norm": 3.4352011680603027, "learning_rate": 8.922287390029326e-05, "loss": 0.4282, "step": 9136 }, { "epoch": 1.4303381340012524, "grad_norm": 1.563423752784729, "learning_rate": 8.919843597262951e-05, "loss": 0.8339, "step": 9137 }, { "epoch": 1.4304946775203506, "grad_norm": 3.0659782886505127, "learning_rate": 8.917399804496577e-05, "loss": 1.0363, "step": 9138 }, { "epoch": 1.430651221039449, "grad_norm": 0.4259863793849945, "learning_rate": 8.914956011730205e-05, "loss": 0.1943, "step": 9139 }, { "epoch": 1.4308077645585473, "grad_norm": 0.8139044046401978, "learning_rate": 8.912512218963832e-05, "loss": 0.3264, "step": 9140 }, { "epoch": 1.4309643080776455, "grad_norm": 0.38787421584129333, "learning_rate": 8.910068426197457e-05, "loss": 0.2079, "step": 9141 }, { "epoch": 1.431120851596744, "grad_norm": 1.820959210395813, "learning_rate": 8.907624633431084e-05, "loss": 0.2371, "step": 9142 }, { "epoch": 1.4312773951158422, "grad_norm": 0.6413818597793579, "learning_rate": 8.905180840664711e-05, "loss": 0.2075, "step": 9143 }, { "epoch": 1.4314339386349406, "grad_norm": 0.7421493530273438, "learning_rate": 8.902737047898336e-05, "loss": 0.3897, "step": 9144 }, { "epoch": 1.4315904821540388, "grad_norm": 1.2470545768737793, "learning_rate": 8.900293255131964e-05, "loss": 0.2849, "step": 9145 }, { "epoch": 1.431747025673137, "grad_norm": 0.5670220255851746, "learning_rate": 8.89784946236559e-05, "loss": 0.2254, "step": 9146 }, { "epoch": 1.4319035691922355, "grad_norm": 0.7760573625564575, "learning_rate": 8.895405669599217e-05, "loss": 0.2496, "step": 9147 }, { "epoch": 1.4320601127113337, "grad_norm": 1.6587246656417847, "learning_rate": 8.892961876832845e-05, "loss": 0.5412, "step": 9148 }, { "epoch": 1.4322166562304322, "grad_norm": 0.6467307806015015, "learning_rate": 8.89051808406647e-05, "loss": 0.1799, "step": 9149 }, { "epoch": 1.4323731997495304, "grad_norm": 1.049241542816162, "learning_rate": 8.888074291300096e-05, "loss": 0.2824, "step": 9150 }, { "epoch": 1.4325297432686286, "grad_norm": 1.2838311195373535, "learning_rate": 8.885630498533724e-05, "loss": 0.4649, "step": 9151 }, { "epoch": 1.432686286787727, "grad_norm": 0.9091600775718689, "learning_rate": 8.88318670576735e-05, "loss": 0.392, "step": 9152 }, { "epoch": 1.4328428303068252, "grad_norm": 1.10474693775177, "learning_rate": 8.880742913000976e-05, "loss": 0.4008, "step": 9153 }, { "epoch": 1.4329993738259237, "grad_norm": 1.1482359170913696, "learning_rate": 8.878299120234604e-05, "loss": 0.3551, "step": 9154 }, { "epoch": 1.433155917345022, "grad_norm": 0.6834793090820312, "learning_rate": 8.87585532746823e-05, "loss": 0.2164, "step": 9155 }, { "epoch": 1.4333124608641201, "grad_norm": 0.9541566371917725, "learning_rate": 8.873411534701855e-05, "loss": 0.3163, "step": 9156 }, { "epoch": 1.4334690043832186, "grad_norm": 2.6311590671539307, "learning_rate": 8.870967741935483e-05, "loss": 0.3442, "step": 9157 }, { "epoch": 1.4336255479023168, "grad_norm": 1.168099045753479, "learning_rate": 8.86852394916911e-05, "loss": 0.3939, "step": 9158 }, { "epoch": 1.4337820914214152, "grad_norm": 1.946874737739563, "learning_rate": 8.866080156402736e-05, "loss": 0.4121, "step": 9159 }, { "epoch": 1.4339386349405134, "grad_norm": 1.7437493801116943, "learning_rate": 8.863636363636364e-05, "loss": 0.5132, "step": 9160 }, { "epoch": 1.4340951784596117, "grad_norm": 1.0139257907867432, "learning_rate": 8.861192570869989e-05, "loss": 0.2326, "step": 9161 }, { "epoch": 1.43425172197871, "grad_norm": 1.9671473503112793, "learning_rate": 8.858748778103615e-05, "loss": 0.5857, "step": 9162 }, { "epoch": 1.4344082654978083, "grad_norm": 7.848803520202637, "learning_rate": 8.856304985337243e-05, "loss": 0.4004, "step": 9163 }, { "epoch": 1.4345648090169068, "grad_norm": 1.3776273727416992, "learning_rate": 8.85386119257087e-05, "loss": 0.4384, "step": 9164 }, { "epoch": 1.434721352536005, "grad_norm": 1.2949081659317017, "learning_rate": 8.851417399804495e-05, "loss": 0.4875, "step": 9165 }, { "epoch": 1.4348778960551032, "grad_norm": 3.020263433456421, "learning_rate": 8.848973607038123e-05, "loss": 0.7237, "step": 9166 }, { "epoch": 1.4350344395742016, "grad_norm": 2.61968994140625, "learning_rate": 8.846529814271749e-05, "loss": 0.6773, "step": 9167 }, { "epoch": 1.4351909830933, "grad_norm": 1.7759416103363037, "learning_rate": 8.844086021505374e-05, "loss": 0.903, "step": 9168 }, { "epoch": 1.4353475266123983, "grad_norm": 1.5969222784042358, "learning_rate": 8.841642228739002e-05, "loss": 0.4451, "step": 9169 }, { "epoch": 1.4355040701314965, "grad_norm": 1.7365254163742065, "learning_rate": 8.839198435972629e-05, "loss": 0.4141, "step": 9170 }, { "epoch": 1.435660613650595, "grad_norm": 3.0730156898498535, "learning_rate": 8.836754643206255e-05, "loss": 1.1615, "step": 9171 }, { "epoch": 1.4358171571696932, "grad_norm": 2.1355032920837402, "learning_rate": 8.834310850439883e-05, "loss": 0.6218, "step": 9172 }, { "epoch": 1.4359737006887916, "grad_norm": 2.897294759750366, "learning_rate": 8.831867057673508e-05, "loss": 1.368, "step": 9173 }, { "epoch": 1.4361302442078898, "grad_norm": 1.462666630744934, "learning_rate": 8.829423264907135e-05, "loss": 0.4797, "step": 9174 }, { "epoch": 1.436286787726988, "grad_norm": 3.690739631652832, "learning_rate": 8.826979472140762e-05, "loss": 0.8913, "step": 9175 }, { "epoch": 1.4364433312460865, "grad_norm": 3.024446964263916, "learning_rate": 8.824535679374389e-05, "loss": 0.573, "step": 9176 }, { "epoch": 1.4365998747651847, "grad_norm": 1.713948130607605, "learning_rate": 8.822091886608014e-05, "loss": 0.5313, "step": 9177 }, { "epoch": 1.4367564182842831, "grad_norm": 3.0209193229675293, "learning_rate": 8.819648093841642e-05, "loss": 1.3517, "step": 9178 }, { "epoch": 1.4369129618033814, "grad_norm": 4.99892520904541, "learning_rate": 8.817204301075268e-05, "loss": 1.1816, "step": 9179 }, { "epoch": 1.4370695053224796, "grad_norm": 2.933112144470215, "learning_rate": 8.814760508308893e-05, "loss": 0.6138, "step": 9180 }, { "epoch": 1.437226048841578, "grad_norm": 1.887498378753662, "learning_rate": 8.812316715542521e-05, "loss": 1.1627, "step": 9181 }, { "epoch": 1.4373825923606762, "grad_norm": 2.869887351989746, "learning_rate": 8.809872922776148e-05, "loss": 0.8495, "step": 9182 }, { "epoch": 1.4375391358797747, "grad_norm": 2.865903854370117, "learning_rate": 8.807429130009774e-05, "loss": 0.8784, "step": 9183 }, { "epoch": 1.4376956793988729, "grad_norm": 1.4047309160232544, "learning_rate": 8.804985337243402e-05, "loss": 0.5029, "step": 9184 }, { "epoch": 1.437852222917971, "grad_norm": 1.5280976295471191, "learning_rate": 8.802541544477027e-05, "loss": 0.5816, "step": 9185 }, { "epoch": 1.4380087664370695, "grad_norm": 2.952573776245117, "learning_rate": 8.800097751710654e-05, "loss": 0.6115, "step": 9186 }, { "epoch": 1.4381653099561678, "grad_norm": 0.848155677318573, "learning_rate": 8.797653958944282e-05, "loss": 0.3277, "step": 9187 }, { "epoch": 1.4383218534752662, "grad_norm": 4.988794326782227, "learning_rate": 8.795210166177907e-05, "loss": 0.8794, "step": 9188 }, { "epoch": 1.4384783969943644, "grad_norm": 0.3419736325740814, "learning_rate": 8.792766373411533e-05, "loss": 0.2078, "step": 9189 }, { "epoch": 1.4386349405134626, "grad_norm": 0.519394040107727, "learning_rate": 8.790322580645161e-05, "loss": 0.2801, "step": 9190 }, { "epoch": 1.438791484032561, "grad_norm": 0.9486970901489258, "learning_rate": 8.787878787878787e-05, "loss": 0.3893, "step": 9191 }, { "epoch": 1.4389480275516593, "grad_norm": 0.5619981288909912, "learning_rate": 8.785434995112413e-05, "loss": 0.2653, "step": 9192 }, { "epoch": 1.4391045710707577, "grad_norm": 0.7176588773727417, "learning_rate": 8.78299120234604e-05, "loss": 0.3212, "step": 9193 }, { "epoch": 1.439261114589856, "grad_norm": 0.6982567310333252, "learning_rate": 8.780547409579667e-05, "loss": 0.1653, "step": 9194 }, { "epoch": 1.4394176581089542, "grad_norm": 0.5017626881599426, "learning_rate": 8.778103616813293e-05, "loss": 0.2405, "step": 9195 }, { "epoch": 1.4395742016280526, "grad_norm": 0.9920409917831421, "learning_rate": 8.775659824046921e-05, "loss": 0.3396, "step": 9196 }, { "epoch": 1.4397307451471508, "grad_norm": 0.9404519200325012, "learning_rate": 8.773216031280546e-05, "loss": 0.1906, "step": 9197 }, { "epoch": 1.4398872886662493, "grad_norm": 0.6449622511863708, "learning_rate": 8.770772238514173e-05, "loss": 0.2486, "step": 9198 }, { "epoch": 1.4400438321853475, "grad_norm": 0.9190917611122131, "learning_rate": 8.7683284457478e-05, "loss": 0.3537, "step": 9199 }, { "epoch": 1.4402003757044457, "grad_norm": 0.9570406079292297, "learning_rate": 8.765884652981426e-05, "loss": 0.2703, "step": 9200 }, { "epoch": 1.4403569192235441, "grad_norm": 1.3468104600906372, "learning_rate": 8.763440860215052e-05, "loss": 0.4503, "step": 9201 }, { "epoch": 1.4405134627426426, "grad_norm": 0.8499281406402588, "learning_rate": 8.76099706744868e-05, "loss": 0.3656, "step": 9202 }, { "epoch": 1.4406700062617408, "grad_norm": 1.0467718839645386, "learning_rate": 8.758553274682307e-05, "loss": 0.2962, "step": 9203 }, { "epoch": 1.440826549780839, "grad_norm": 0.8589369654655457, "learning_rate": 8.756109481915932e-05, "loss": 0.3295, "step": 9204 }, { "epoch": 1.4409830932999375, "grad_norm": 1.2273787260055542, "learning_rate": 8.75366568914956e-05, "loss": 0.4164, "step": 9205 }, { "epoch": 1.4411396368190357, "grad_norm": 1.0226304531097412, "learning_rate": 8.751221896383186e-05, "loss": 0.5016, "step": 9206 }, { "epoch": 1.4412961803381341, "grad_norm": 1.2805366516113281, "learning_rate": 8.748778103616812e-05, "loss": 0.482, "step": 9207 }, { "epoch": 1.4414527238572323, "grad_norm": 1.0432116985321045, "learning_rate": 8.74633431085044e-05, "loss": 0.3297, "step": 9208 }, { "epoch": 1.4416092673763305, "grad_norm": 3.9404122829437256, "learning_rate": 8.743890518084065e-05, "loss": 0.52, "step": 9209 }, { "epoch": 1.441765810895429, "grad_norm": 5.121649742126465, "learning_rate": 8.741446725317692e-05, "loss": 0.6131, "step": 9210 }, { "epoch": 1.4419223544145272, "grad_norm": 1.2252410650253296, "learning_rate": 8.73900293255132e-05, "loss": 0.4629, "step": 9211 }, { "epoch": 1.4420788979336256, "grad_norm": 1.7832077741622925, "learning_rate": 8.736559139784945e-05, "loss": 0.6523, "step": 9212 }, { "epoch": 1.4422354414527239, "grad_norm": 3.1595590114593506, "learning_rate": 8.734115347018571e-05, "loss": 0.3546, "step": 9213 }, { "epoch": 1.442391984971822, "grad_norm": 1.8645236492156982, "learning_rate": 8.731671554252199e-05, "loss": 0.9762, "step": 9214 }, { "epoch": 1.4425485284909205, "grad_norm": 2.1606290340423584, "learning_rate": 8.729227761485826e-05, "loss": 0.6703, "step": 9215 }, { "epoch": 1.4427050720100187, "grad_norm": 3.289787530899048, "learning_rate": 8.726783968719451e-05, "loss": 0.5665, "step": 9216 }, { "epoch": 1.4428616155291172, "grad_norm": 2.60479736328125, "learning_rate": 8.724340175953079e-05, "loss": 0.864, "step": 9217 }, { "epoch": 1.4430181590482154, "grad_norm": 2.7393736839294434, "learning_rate": 8.721896383186705e-05, "loss": 0.4926, "step": 9218 }, { "epoch": 1.4431747025673136, "grad_norm": 2.50663161277771, "learning_rate": 8.719452590420332e-05, "loss": 0.9697, "step": 9219 }, { "epoch": 1.443331246086412, "grad_norm": 1.2371330261230469, "learning_rate": 8.71700879765396e-05, "loss": 0.3924, "step": 9220 }, { "epoch": 1.4434877896055103, "grad_norm": 2.1638879776000977, "learning_rate": 8.714565004887585e-05, "loss": 0.6051, "step": 9221 }, { "epoch": 1.4436443331246087, "grad_norm": 1.8525105714797974, "learning_rate": 8.712121212121211e-05, "loss": 0.8727, "step": 9222 }, { "epoch": 1.443800876643707, "grad_norm": 4.00303840637207, "learning_rate": 8.709677419354839e-05, "loss": 0.9543, "step": 9223 }, { "epoch": 1.4439574201628051, "grad_norm": 1.3205608129501343, "learning_rate": 8.707233626588464e-05, "loss": 0.4322, "step": 9224 }, { "epoch": 1.4441139636819036, "grad_norm": 2.571690559387207, "learning_rate": 8.70478983382209e-05, "loss": 0.7875, "step": 9225 }, { "epoch": 1.4442705072010018, "grad_norm": 3.207707166671753, "learning_rate": 8.702346041055718e-05, "loss": 1.0932, "step": 9226 }, { "epoch": 1.4444270507201002, "grad_norm": 1.89205002784729, "learning_rate": 8.699902248289345e-05, "loss": 0.7563, "step": 9227 }, { "epoch": 1.4445835942391985, "grad_norm": 1.761757493019104, "learning_rate": 8.69745845552297e-05, "loss": 0.8936, "step": 9228 }, { "epoch": 1.4447401377582967, "grad_norm": 4.034841060638428, "learning_rate": 8.695014662756598e-05, "loss": 1.1292, "step": 9229 }, { "epoch": 1.4448966812773951, "grad_norm": 5.492170333862305, "learning_rate": 8.692570869990224e-05, "loss": 2.0323, "step": 9230 }, { "epoch": 1.4450532247964936, "grad_norm": 3.997959613800049, "learning_rate": 8.690127077223851e-05, "loss": 0.9711, "step": 9231 }, { "epoch": 1.4452097683155918, "grad_norm": 3.610450267791748, "learning_rate": 8.687683284457477e-05, "loss": 1.0842, "step": 9232 }, { "epoch": 1.44536631183469, "grad_norm": 2.2445812225341797, "learning_rate": 8.685239491691104e-05, "loss": 1.499, "step": 9233 }, { "epoch": 1.4455228553537882, "grad_norm": 4.479135513305664, "learning_rate": 8.68279569892473e-05, "loss": 0.4075, "step": 9234 }, { "epoch": 1.4456793988728867, "grad_norm": 1.9963476657867432, "learning_rate": 8.680351906158358e-05, "loss": 0.9783, "step": 9235 }, { "epoch": 1.445835942391985, "grad_norm": 1.3088605403900146, "learning_rate": 8.677908113391983e-05, "loss": 0.4627, "step": 9236 }, { "epoch": 1.4459924859110833, "grad_norm": 3.202507257461548, "learning_rate": 8.67546432062561e-05, "loss": 1.1461, "step": 9237 }, { "epoch": 1.4461490294301815, "grad_norm": 2.1418967247009277, "learning_rate": 8.673020527859237e-05, "loss": 0.9054, "step": 9238 }, { "epoch": 1.44630557294928, "grad_norm": 0.5720778703689575, "learning_rate": 8.670576735092864e-05, "loss": 0.27, "step": 9239 }, { "epoch": 1.4464621164683782, "grad_norm": 0.45177343487739563, "learning_rate": 8.668132942326489e-05, "loss": 0.1783, "step": 9240 }, { "epoch": 1.4466186599874766, "grad_norm": 1.081150770187378, "learning_rate": 8.665689149560117e-05, "loss": 0.4301, "step": 9241 }, { "epoch": 1.4467752035065748, "grad_norm": 0.7842183113098145, "learning_rate": 8.663245356793743e-05, "loss": 0.2019, "step": 9242 }, { "epoch": 1.446931747025673, "grad_norm": 0.7052162885665894, "learning_rate": 8.66080156402737e-05, "loss": 0.2977, "step": 9243 }, { "epoch": 1.4470882905447715, "grad_norm": 0.5708596110343933, "learning_rate": 8.658357771260996e-05, "loss": 0.1986, "step": 9244 }, { "epoch": 1.4472448340638697, "grad_norm": 0.4889955222606659, "learning_rate": 8.655913978494623e-05, "loss": 0.1634, "step": 9245 }, { "epoch": 1.4474013775829682, "grad_norm": 0.5861791372299194, "learning_rate": 8.653470185728249e-05, "loss": 0.156, "step": 9246 }, { "epoch": 1.4475579211020664, "grad_norm": 0.8749104142189026, "learning_rate": 8.651026392961877e-05, "loss": 0.2483, "step": 9247 }, { "epoch": 1.4477144646211646, "grad_norm": 0.7886466383934021, "learning_rate": 8.648582600195502e-05, "loss": 0.339, "step": 9248 }, { "epoch": 1.447871008140263, "grad_norm": 1.1284881830215454, "learning_rate": 8.646138807429129e-05, "loss": 0.3371, "step": 9249 }, { "epoch": 1.4480275516593613, "grad_norm": 11.110546112060547, "learning_rate": 8.643695014662757e-05, "loss": 0.2101, "step": 9250 }, { "epoch": 1.4481840951784597, "grad_norm": 0.8709673881530762, "learning_rate": 8.641251221896383e-05, "loss": 0.3903, "step": 9251 }, { "epoch": 1.448340638697558, "grad_norm": 7.446573257446289, "learning_rate": 8.638807429130008e-05, "loss": 0.495, "step": 9252 }, { "epoch": 1.4484971822166561, "grad_norm": 1.0395238399505615, "learning_rate": 8.636363636363636e-05, "loss": 0.3358, "step": 9253 }, { "epoch": 1.4486537257357546, "grad_norm": 1.0077224969863892, "learning_rate": 8.633919843597262e-05, "loss": 0.3986, "step": 9254 }, { "epoch": 1.4488102692548528, "grad_norm": 1.1264681816101074, "learning_rate": 8.631476050830889e-05, "loss": 0.2916, "step": 9255 }, { "epoch": 1.4489668127739512, "grad_norm": 1.2706413269042969, "learning_rate": 8.629032258064515e-05, "loss": 0.3287, "step": 9256 }, { "epoch": 1.4491233562930494, "grad_norm": 1.2557499408721924, "learning_rate": 8.626588465298142e-05, "loss": 0.3033, "step": 9257 }, { "epoch": 1.4492798998121477, "grad_norm": 0.9736500382423401, "learning_rate": 8.624144672531768e-05, "loss": 0.5071, "step": 9258 }, { "epoch": 1.449436443331246, "grad_norm": 1.106518030166626, "learning_rate": 8.621700879765396e-05, "loss": 0.3498, "step": 9259 }, { "epoch": 1.4495929868503443, "grad_norm": 1.3853764533996582, "learning_rate": 8.619257086999021e-05, "loss": 0.462, "step": 9260 }, { "epoch": 1.4497495303694428, "grad_norm": 1.618614912033081, "learning_rate": 8.616813294232648e-05, "loss": 0.5758, "step": 9261 }, { "epoch": 1.449906073888541, "grad_norm": 1.4383054971694946, "learning_rate": 8.614369501466276e-05, "loss": 0.4159, "step": 9262 }, { "epoch": 1.4500626174076392, "grad_norm": 1.0663392543792725, "learning_rate": 8.611925708699902e-05, "loss": 0.4319, "step": 9263 }, { "epoch": 1.4502191609267376, "grad_norm": 2.9339816570281982, "learning_rate": 8.609481915933527e-05, "loss": 0.6587, "step": 9264 }, { "epoch": 1.450375704445836, "grad_norm": 2.176449775695801, "learning_rate": 8.607038123167155e-05, "loss": 0.6003, "step": 9265 }, { "epoch": 1.4505322479649343, "grad_norm": 2.8206968307495117, "learning_rate": 8.604594330400782e-05, "loss": 0.6367, "step": 9266 }, { "epoch": 1.4506887914840325, "grad_norm": 1.6528189182281494, "learning_rate": 8.602150537634408e-05, "loss": 0.4951, "step": 9267 }, { "epoch": 1.4508453350031307, "grad_norm": 2.3281638622283936, "learning_rate": 8.599706744868035e-05, "loss": 0.7283, "step": 9268 }, { "epoch": 1.4510018785222292, "grad_norm": 1.1274622678756714, "learning_rate": 8.597262952101661e-05, "loss": 0.3834, "step": 9269 }, { "epoch": 1.4511584220413276, "grad_norm": 2.182931423187256, "learning_rate": 8.594819159335287e-05, "loss": 0.7657, "step": 9270 }, { "epoch": 1.4513149655604258, "grad_norm": 2.473872423171997, "learning_rate": 8.592375366568915e-05, "loss": 0.8096, "step": 9271 }, { "epoch": 1.451471509079524, "grad_norm": 1.8091752529144287, "learning_rate": 8.58993157380254e-05, "loss": 0.6692, "step": 9272 }, { "epoch": 1.4516280525986225, "grad_norm": 3.2494728565216064, "learning_rate": 8.587487781036167e-05, "loss": 0.5206, "step": 9273 }, { "epoch": 1.4517845961177207, "grad_norm": 6.569478511810303, "learning_rate": 8.585043988269795e-05, "loss": 0.8174, "step": 9274 }, { "epoch": 1.4519411396368191, "grad_norm": 3.5665347576141357, "learning_rate": 8.582600195503421e-05, "loss": 1.2162, "step": 9275 }, { "epoch": 1.4520976831559174, "grad_norm": 3.3499755859375, "learning_rate": 8.580156402737046e-05, "loss": 1.2148, "step": 9276 }, { "epoch": 1.4522542266750156, "grad_norm": 1.8471848964691162, "learning_rate": 8.577712609970674e-05, "loss": 0.6484, "step": 9277 }, { "epoch": 1.452410770194114, "grad_norm": 3.943901777267456, "learning_rate": 8.575268817204301e-05, "loss": 0.8956, "step": 9278 }, { "epoch": 1.4525673137132122, "grad_norm": 3.428492784500122, "learning_rate": 8.572825024437927e-05, "loss": 1.4366, "step": 9279 }, { "epoch": 1.4527238572323107, "grad_norm": 2.345398426055908, "learning_rate": 8.570381231671554e-05, "loss": 0.7291, "step": 9280 }, { "epoch": 1.452880400751409, "grad_norm": 2.542097806930542, "learning_rate": 8.56793743890518e-05, "loss": 0.9401, "step": 9281 }, { "epoch": 1.4530369442705071, "grad_norm": 1.3730517625808716, "learning_rate": 8.565493646138807e-05, "loss": 1.1182, "step": 9282 }, { "epoch": 1.4531934877896056, "grad_norm": 3.189399003982544, "learning_rate": 8.563049853372434e-05, "loss": 1.2957, "step": 9283 }, { "epoch": 1.4533500313087038, "grad_norm": 2.0311431884765625, "learning_rate": 8.56060606060606e-05, "loss": 0.4811, "step": 9284 }, { "epoch": 1.4535065748278022, "grad_norm": 1.2809655666351318, "learning_rate": 8.558162267839686e-05, "loss": 0.7376, "step": 9285 }, { "epoch": 1.4536631183469004, "grad_norm": 1.898149013519287, "learning_rate": 8.555718475073314e-05, "loss": 0.5024, "step": 9286 }, { "epoch": 1.4538196618659986, "grad_norm": 1.5057889223098755, "learning_rate": 8.55327468230694e-05, "loss": 0.6526, "step": 9287 }, { "epoch": 1.453976205385097, "grad_norm": 6.357172012329102, "learning_rate": 8.550830889540565e-05, "loss": 1.1135, "step": 9288 }, { "epoch": 1.4541327489041953, "grad_norm": 0.4889274537563324, "learning_rate": 8.548387096774193e-05, "loss": 0.1894, "step": 9289 }, { "epoch": 1.4542892924232937, "grad_norm": 0.653469443321228, "learning_rate": 8.54594330400782e-05, "loss": 0.1703, "step": 9290 }, { "epoch": 1.454445835942392, "grad_norm": 0.6301447153091431, "learning_rate": 8.543499511241445e-05, "loss": 0.2752, "step": 9291 }, { "epoch": 1.4546023794614902, "grad_norm": 0.5737555027008057, "learning_rate": 8.541055718475073e-05, "loss": 0.2306, "step": 9292 }, { "epoch": 1.4547589229805886, "grad_norm": 0.48317751288414, "learning_rate": 8.538611925708699e-05, "loss": 0.2238, "step": 9293 }, { "epoch": 1.4549154664996868, "grad_norm": 0.660159170627594, "learning_rate": 8.536168132942326e-05, "loss": 0.25, "step": 9294 }, { "epoch": 1.4550720100187853, "grad_norm": 0.5787570476531982, "learning_rate": 8.533724340175954e-05, "loss": 0.1323, "step": 9295 }, { "epoch": 1.4552285535378835, "grad_norm": 0.7087740898132324, "learning_rate": 8.531280547409579e-05, "loss": 0.2394, "step": 9296 }, { "epoch": 1.4553850970569817, "grad_norm": 1.1337648630142212, "learning_rate": 8.528836754643205e-05, "loss": 0.2902, "step": 9297 }, { "epoch": 1.4555416405760802, "grad_norm": 0.6737481951713562, "learning_rate": 8.526392961876833e-05, "loss": 0.2308, "step": 9298 }, { "epoch": 1.4556981840951786, "grad_norm": 0.6452515721321106, "learning_rate": 8.52394916911046e-05, "loss": 0.1904, "step": 9299 }, { "epoch": 1.4558547276142768, "grad_norm": 0.5262974500656128, "learning_rate": 8.521505376344085e-05, "loss": 0.1995, "step": 9300 }, { "epoch": 1.456011271133375, "grad_norm": 1.1509872674942017, "learning_rate": 8.519061583577712e-05, "loss": 0.4979, "step": 9301 }, { "epoch": 1.4561678146524735, "grad_norm": 0.8436789512634277, "learning_rate": 8.516617790811339e-05, "loss": 0.2421, "step": 9302 }, { "epoch": 1.4563243581715717, "grad_norm": 1.1247527599334717, "learning_rate": 8.514173998044964e-05, "loss": 0.2879, "step": 9303 }, { "epoch": 1.4564809016906701, "grad_norm": 1.1355880498886108, "learning_rate": 8.511730205278592e-05, "loss": 0.328, "step": 9304 }, { "epoch": 1.4566374452097683, "grad_norm": 4.993597984313965, "learning_rate": 8.509286412512218e-05, "loss": 0.7011, "step": 9305 }, { "epoch": 1.4567939887288666, "grad_norm": 0.7840385437011719, "learning_rate": 8.506842619745845e-05, "loss": 0.1718, "step": 9306 }, { "epoch": 1.456950532247965, "grad_norm": 1.4984678030014038, "learning_rate": 8.504398826979473e-05, "loss": 0.5557, "step": 9307 }, { "epoch": 1.4571070757670632, "grad_norm": 1.5178111791610718, "learning_rate": 8.501955034213098e-05, "loss": 0.548, "step": 9308 }, { "epoch": 1.4572636192861617, "grad_norm": 2.9656150341033936, "learning_rate": 8.499511241446724e-05, "loss": 0.5248, "step": 9309 }, { "epoch": 1.4574201628052599, "grad_norm": 1.327226161956787, "learning_rate": 8.497067448680352e-05, "loss": 0.7294, "step": 9310 }, { "epoch": 1.457576706324358, "grad_norm": 1.0923349857330322, "learning_rate": 8.494623655913979e-05, "loss": 0.4204, "step": 9311 }, { "epoch": 1.4577332498434565, "grad_norm": 2.7213759422302246, "learning_rate": 8.492179863147604e-05, "loss": 0.7072, "step": 9312 }, { "epoch": 1.4578897933625548, "grad_norm": 1.255927324295044, "learning_rate": 8.489736070381232e-05, "loss": 0.7492, "step": 9313 }, { "epoch": 1.4580463368816532, "grad_norm": 2.039738416671753, "learning_rate": 8.487292277614858e-05, "loss": 0.5131, "step": 9314 }, { "epoch": 1.4582028804007514, "grad_norm": 2.313304901123047, "learning_rate": 8.484848484848483e-05, "loss": 0.4055, "step": 9315 }, { "epoch": 1.4583594239198496, "grad_norm": 1.3143826723098755, "learning_rate": 8.482404692082111e-05, "loss": 0.5852, "step": 9316 }, { "epoch": 1.458515967438948, "grad_norm": 2.0478479862213135, "learning_rate": 8.479960899315737e-05, "loss": 0.8262, "step": 9317 }, { "epoch": 1.4586725109580463, "grad_norm": 3.0443527698516846, "learning_rate": 8.477517106549364e-05, "loss": 0.9234, "step": 9318 }, { "epoch": 1.4588290544771447, "grad_norm": 5.6164727210998535, "learning_rate": 8.475073313782992e-05, "loss": 0.708, "step": 9319 }, { "epoch": 1.458985597996243, "grad_norm": 1.4889097213745117, "learning_rate": 8.472629521016617e-05, "loss": 0.9464, "step": 9320 }, { "epoch": 1.4591421415153412, "grad_norm": 2.5142250061035156, "learning_rate": 8.470185728250243e-05, "loss": 1.1254, "step": 9321 }, { "epoch": 1.4592986850344396, "grad_norm": 3.2503488063812256, "learning_rate": 8.467741935483871e-05, "loss": 1.037, "step": 9322 }, { "epoch": 1.4594552285535378, "grad_norm": 2.1030449867248535, "learning_rate": 8.465298142717498e-05, "loss": 0.8223, "step": 9323 }, { "epoch": 1.4596117720726363, "grad_norm": 1.6335690021514893, "learning_rate": 8.462854349951123e-05, "loss": 0.5398, "step": 9324 }, { "epoch": 1.4597683155917345, "grad_norm": 2.1362364292144775, "learning_rate": 8.46041055718475e-05, "loss": 0.3341, "step": 9325 }, { "epoch": 1.4599248591108327, "grad_norm": 2.837796449661255, "learning_rate": 8.457966764418377e-05, "loss": 1.4995, "step": 9326 }, { "epoch": 1.4600814026299311, "grad_norm": 4.29352331161499, "learning_rate": 8.455522971652002e-05, "loss": 1.3477, "step": 9327 }, { "epoch": 1.4602379461490294, "grad_norm": 1.990387201309204, "learning_rate": 8.45307917888563e-05, "loss": 0.8274, "step": 9328 }, { "epoch": 1.4603944896681278, "grad_norm": 1.3266398906707764, "learning_rate": 8.450635386119257e-05, "loss": 0.6768, "step": 9329 }, { "epoch": 1.460551033187226, "grad_norm": 4.102444171905518, "learning_rate": 8.448191593352883e-05, "loss": 0.9489, "step": 9330 }, { "epoch": 1.4607075767063242, "grad_norm": 2.765338897705078, "learning_rate": 8.445747800586511e-05, "loss": 0.8909, "step": 9331 }, { "epoch": 1.4608641202254227, "grad_norm": 2.984898805618286, "learning_rate": 8.443304007820136e-05, "loss": 1.2414, "step": 9332 }, { "epoch": 1.461020663744521, "grad_norm": 4.269297122955322, "learning_rate": 8.440860215053763e-05, "loss": 1.6595, "step": 9333 }, { "epoch": 1.4611772072636193, "grad_norm": 1.1461013555526733, "learning_rate": 8.43841642228739e-05, "loss": 0.1954, "step": 9334 }, { "epoch": 1.4613337507827175, "grad_norm": 2.7744386196136475, "learning_rate": 8.435972629521017e-05, "loss": 0.7527, "step": 9335 }, { "epoch": 1.461490294301816, "grad_norm": 1.470504641532898, "learning_rate": 8.433528836754642e-05, "loss": 0.7368, "step": 9336 }, { "epoch": 1.4616468378209142, "grad_norm": 2.8413960933685303, "learning_rate": 8.431085043988268e-05, "loss": 0.3352, "step": 9337 }, { "epoch": 1.4618033813400126, "grad_norm": 1.2316049337387085, "learning_rate": 8.428641251221896e-05, "loss": 0.5112, "step": 9338 }, { "epoch": 1.4619599248591109, "grad_norm": 0.4800587594509125, "learning_rate": 8.426197458455521e-05, "loss": 0.2659, "step": 9339 }, { "epoch": 1.462116468378209, "grad_norm": 0.5300424695014954, "learning_rate": 8.423753665689148e-05, "loss": 0.2848, "step": 9340 }, { "epoch": 1.4622730118973075, "grad_norm": 0.4099821448326111, "learning_rate": 8.421309872922776e-05, "loss": 0.1509, "step": 9341 }, { "epoch": 1.4624295554164057, "grad_norm": 0.5445888042449951, "learning_rate": 8.418866080156402e-05, "loss": 0.2769, "step": 9342 }, { "epoch": 1.4625860989355042, "grad_norm": 0.7307244539260864, "learning_rate": 8.416422287390027e-05, "loss": 0.24, "step": 9343 }, { "epoch": 1.4627426424546024, "grad_norm": 0.590489387512207, "learning_rate": 8.413978494623655e-05, "loss": 0.174, "step": 9344 }, { "epoch": 1.4628991859737006, "grad_norm": 0.8075463175773621, "learning_rate": 8.411534701857282e-05, "loss": 0.2945, "step": 9345 }, { "epoch": 1.463055729492799, "grad_norm": 0.6793466806411743, "learning_rate": 8.409090909090908e-05, "loss": 0.2182, "step": 9346 }, { "epoch": 1.4632122730118973, "grad_norm": 0.9996579885482788, "learning_rate": 8.406647116324535e-05, "loss": 0.347, "step": 9347 }, { "epoch": 1.4633688165309957, "grad_norm": 0.5806265473365784, "learning_rate": 8.404203323558161e-05, "loss": 0.2452, "step": 9348 }, { "epoch": 1.463525360050094, "grad_norm": 0.9214386343955994, "learning_rate": 8.401759530791788e-05, "loss": 0.3277, "step": 9349 }, { "epoch": 1.4636819035691921, "grad_norm": 2.849910020828247, "learning_rate": 8.399315738025415e-05, "loss": 0.7389, "step": 9350 }, { "epoch": 1.4638384470882906, "grad_norm": 0.5131305456161499, "learning_rate": 8.39687194525904e-05, "loss": 0.2119, "step": 9351 }, { "epoch": 1.4639949906073888, "grad_norm": 2.853576898574829, "learning_rate": 8.394428152492667e-05, "loss": 0.3357, "step": 9352 }, { "epoch": 1.4641515341264872, "grad_norm": 1.252537488937378, "learning_rate": 8.391984359726295e-05, "loss": 0.2544, "step": 9353 }, { "epoch": 1.4643080776455855, "grad_norm": 1.4162060022354126, "learning_rate": 8.389540566959921e-05, "loss": 0.4257, "step": 9354 }, { "epoch": 1.4644646211646837, "grad_norm": 1.189596176147461, "learning_rate": 8.387096774193546e-05, "loss": 0.4705, "step": 9355 }, { "epoch": 1.4646211646837821, "grad_norm": 0.7394349575042725, "learning_rate": 8.384652981427174e-05, "loss": 0.411, "step": 9356 }, { "epoch": 1.4647777082028803, "grad_norm": 1.2858307361602783, "learning_rate": 8.382209188660801e-05, "loss": 0.5221, "step": 9357 }, { "epoch": 1.4649342517219788, "grad_norm": 1.020027756690979, "learning_rate": 8.379765395894427e-05, "loss": 0.407, "step": 9358 }, { "epoch": 1.465090795241077, "grad_norm": 1.9283796548843384, "learning_rate": 8.377321603128054e-05, "loss": 0.4782, "step": 9359 }, { "epoch": 1.4652473387601752, "grad_norm": 1.3848600387573242, "learning_rate": 8.37487781036168e-05, "loss": 0.4688, "step": 9360 }, { "epoch": 1.4654038822792737, "grad_norm": 1.769733190536499, "learning_rate": 8.372434017595307e-05, "loss": 0.8337, "step": 9361 }, { "epoch": 1.4655604257983719, "grad_norm": 1.7718392610549927, "learning_rate": 8.369990224828934e-05, "loss": 0.1812, "step": 9362 }, { "epoch": 1.4657169693174703, "grad_norm": 2.2144126892089844, "learning_rate": 8.36754643206256e-05, "loss": 0.5035, "step": 9363 }, { "epoch": 1.4658735128365685, "grad_norm": 0.7767239212989807, "learning_rate": 8.365102639296186e-05, "loss": 0.3048, "step": 9364 }, { "epoch": 1.4660300563556667, "grad_norm": 3.264582872390747, "learning_rate": 8.362658846529814e-05, "loss": 0.7307, "step": 9365 }, { "epoch": 1.4661865998747652, "grad_norm": 1.8177357912063599, "learning_rate": 8.36021505376344e-05, "loss": 0.6826, "step": 9366 }, { "epoch": 1.4663431433938636, "grad_norm": 3.0217385292053223, "learning_rate": 8.357771260997066e-05, "loss": 0.4975, "step": 9367 }, { "epoch": 1.4664996869129618, "grad_norm": 1.1318516731262207, "learning_rate": 8.355327468230693e-05, "loss": 0.3554, "step": 9368 }, { "epoch": 1.46665623043206, "grad_norm": 1.6963642835617065, "learning_rate": 8.35288367546432e-05, "loss": 0.579, "step": 9369 }, { "epoch": 1.4668127739511585, "grad_norm": 2.232435941696167, "learning_rate": 8.350439882697946e-05, "loss": 0.6147, "step": 9370 }, { "epoch": 1.4669693174702567, "grad_norm": 3.5765178203582764, "learning_rate": 8.347996089931573e-05, "loss": 1.0373, "step": 9371 }, { "epoch": 1.4671258609893552, "grad_norm": 1.6168148517608643, "learning_rate": 8.345552297165199e-05, "loss": 0.3683, "step": 9372 }, { "epoch": 1.4672824045084534, "grad_norm": 1.8201894760131836, "learning_rate": 8.343108504398826e-05, "loss": 0.5754, "step": 9373 }, { "epoch": 1.4674389480275516, "grad_norm": 2.91982364654541, "learning_rate": 8.340664711632454e-05, "loss": 0.6643, "step": 9374 }, { "epoch": 1.46759549154665, "grad_norm": 2.993435859680176, "learning_rate": 8.338220918866079e-05, "loss": 0.624, "step": 9375 }, { "epoch": 1.4677520350657483, "grad_norm": 4.31162691116333, "learning_rate": 8.335777126099705e-05, "loss": 0.7056, "step": 9376 }, { "epoch": 1.4679085785848467, "grad_norm": 3.5542516708374023, "learning_rate": 8.333333333333333e-05, "loss": 1.2868, "step": 9377 }, { "epoch": 1.468065122103945, "grad_norm": 2.0428109169006348, "learning_rate": 8.33088954056696e-05, "loss": 0.9045, "step": 9378 }, { "epoch": 1.4682216656230431, "grad_norm": 2.306626796722412, "learning_rate": 8.328445747800585e-05, "loss": 0.8775, "step": 9379 }, { "epoch": 1.4683782091421416, "grad_norm": 2.654895544052124, "learning_rate": 8.326001955034212e-05, "loss": 1.4198, "step": 9380 }, { "epoch": 1.4685347526612398, "grad_norm": 2.5380771160125732, "learning_rate": 8.323558162267839e-05, "loss": 0.9698, "step": 9381 }, { "epoch": 1.4686912961803382, "grad_norm": 3.0304949283599854, "learning_rate": 8.321114369501465e-05, "loss": 1.164, "step": 9382 }, { "epoch": 1.4688478396994364, "grad_norm": 2.77571177482605, "learning_rate": 8.318670576735092e-05, "loss": 1.1842, "step": 9383 }, { "epoch": 1.4690043832185347, "grad_norm": 2.4832112789154053, "learning_rate": 8.316226783968718e-05, "loss": 1.0286, "step": 9384 }, { "epoch": 1.469160926737633, "grad_norm": 4.166812419891357, "learning_rate": 8.313782991202345e-05, "loss": 1.3073, "step": 9385 }, { "epoch": 1.4693174702567313, "grad_norm": 1.6507247686386108, "learning_rate": 8.311339198435973e-05, "loss": 0.5474, "step": 9386 }, { "epoch": 1.4694740137758298, "grad_norm": 4.904731273651123, "learning_rate": 8.308895405669598e-05, "loss": 0.8368, "step": 9387 }, { "epoch": 1.469630557294928, "grad_norm": 1.261414647102356, "learning_rate": 8.306451612903224e-05, "loss": 0.5796, "step": 9388 }, { "epoch": 1.4697871008140262, "grad_norm": 0.3774799704551697, "learning_rate": 8.304007820136852e-05, "loss": 0.2298, "step": 9389 }, { "epoch": 1.4699436443331246, "grad_norm": 0.3805764615535736, "learning_rate": 8.301564027370479e-05, "loss": 0.1553, "step": 9390 }, { "epoch": 1.4701001878522229, "grad_norm": 0.48448646068573, "learning_rate": 8.299120234604104e-05, "loss": 0.2439, "step": 9391 }, { "epoch": 1.4702567313713213, "grad_norm": 0.5146775841712952, "learning_rate": 8.296676441837732e-05, "loss": 0.2297, "step": 9392 }, { "epoch": 1.4704132748904195, "grad_norm": 0.5531075596809387, "learning_rate": 8.294232649071358e-05, "loss": 0.2025, "step": 9393 }, { "epoch": 1.4705698184095177, "grad_norm": 0.606508195400238, "learning_rate": 8.291788856304985e-05, "loss": 0.2208, "step": 9394 }, { "epoch": 1.4707263619286162, "grad_norm": 0.7570547461509705, "learning_rate": 8.289345063538611e-05, "loss": 0.253, "step": 9395 }, { "epoch": 1.4708829054477144, "grad_norm": 0.5803053379058838, "learning_rate": 8.286901270772238e-05, "loss": 0.2676, "step": 9396 }, { "epoch": 1.4710394489668128, "grad_norm": 0.5528594255447388, "learning_rate": 8.284457478005864e-05, "loss": 0.1975, "step": 9397 }, { "epoch": 1.471195992485911, "grad_norm": 0.7934523820877075, "learning_rate": 8.282013685239492e-05, "loss": 0.2068, "step": 9398 }, { "epoch": 1.4713525360050093, "grad_norm": 0.7662394046783447, "learning_rate": 8.279569892473117e-05, "loss": 0.2537, "step": 9399 }, { "epoch": 1.4715090795241077, "grad_norm": 1.507385492324829, "learning_rate": 8.277126099706743e-05, "loss": 0.247, "step": 9400 }, { "epoch": 1.4716656230432061, "grad_norm": 0.87679523229599, "learning_rate": 8.274682306940371e-05, "loss": 0.2944, "step": 9401 }, { "epoch": 1.4718221665623044, "grad_norm": 1.0118069648742676, "learning_rate": 8.272238514173998e-05, "loss": 0.3211, "step": 9402 }, { "epoch": 1.4719787100814026, "grad_norm": 0.34879270195961, "learning_rate": 8.269794721407623e-05, "loss": 0.1825, "step": 9403 }, { "epoch": 1.472135253600501, "grad_norm": 0.8371734619140625, "learning_rate": 8.267350928641251e-05, "loss": 0.3664, "step": 9404 }, { "epoch": 1.4722917971195992, "grad_norm": 1.5805662870407104, "learning_rate": 8.264907135874877e-05, "loss": 0.3631, "step": 9405 }, { "epoch": 1.4724483406386977, "grad_norm": 0.7317862510681152, "learning_rate": 8.262463343108502e-05, "loss": 0.309, "step": 9406 }, { "epoch": 1.472604884157796, "grad_norm": 1.2464789152145386, "learning_rate": 8.26001955034213e-05, "loss": 0.3384, "step": 9407 }, { "epoch": 1.472761427676894, "grad_norm": 0.7848882079124451, "learning_rate": 8.257575757575757e-05, "loss": 0.4214, "step": 9408 }, { "epoch": 1.4729179711959925, "grad_norm": 3.3078129291534424, "learning_rate": 8.255131964809383e-05, "loss": 0.4749, "step": 9409 }, { "epoch": 1.4730745147150908, "grad_norm": 6.444918155670166, "learning_rate": 8.252688172043011e-05, "loss": 0.5161, "step": 9410 }, { "epoch": 1.4732310582341892, "grad_norm": 1.534546136856079, "learning_rate": 8.250244379276636e-05, "loss": 0.5021, "step": 9411 }, { "epoch": 1.4733876017532874, "grad_norm": 1.4558238983154297, "learning_rate": 8.247800586510263e-05, "loss": 0.4859, "step": 9412 }, { "epoch": 1.4735441452723856, "grad_norm": 0.8588922023773193, "learning_rate": 8.24535679374389e-05, "loss": 0.262, "step": 9413 }, { "epoch": 1.473700688791484, "grad_norm": 1.7501471042633057, "learning_rate": 8.242913000977517e-05, "loss": 0.7627, "step": 9414 }, { "epoch": 1.4738572323105823, "grad_norm": 4.139252185821533, "learning_rate": 8.240469208211142e-05, "loss": 0.3825, "step": 9415 }, { "epoch": 1.4740137758296807, "grad_norm": 3.472306489944458, "learning_rate": 8.23802541544477e-05, "loss": 0.9586, "step": 9416 }, { "epoch": 1.474170319348779, "grad_norm": 1.744738221168518, "learning_rate": 8.235581622678396e-05, "loss": 0.6018, "step": 9417 }, { "epoch": 1.4743268628678772, "grad_norm": 2.3839869499206543, "learning_rate": 8.233137829912021e-05, "loss": 0.5255, "step": 9418 }, { "epoch": 1.4744834063869756, "grad_norm": 1.5559438467025757, "learning_rate": 8.230694037145649e-05, "loss": 0.3971, "step": 9419 }, { "epoch": 1.4746399499060738, "grad_norm": 7.917959690093994, "learning_rate": 8.228250244379276e-05, "loss": 0.783, "step": 9420 }, { "epoch": 1.4747964934251723, "grad_norm": 2.138312816619873, "learning_rate": 8.225806451612902e-05, "loss": 0.6701, "step": 9421 }, { "epoch": 1.4749530369442705, "grad_norm": 3.2708122730255127, "learning_rate": 8.22336265884653e-05, "loss": 0.8102, "step": 9422 }, { "epoch": 1.4751095804633687, "grad_norm": 2.629610300064087, "learning_rate": 8.220918866080155e-05, "loss": 0.9962, "step": 9423 }, { "epoch": 1.4752661239824671, "grad_norm": 2.9563984870910645, "learning_rate": 8.218475073313782e-05, "loss": 0.6238, "step": 9424 }, { "epoch": 1.4754226675015654, "grad_norm": 3.6667652130126953, "learning_rate": 8.21603128054741e-05, "loss": 0.965, "step": 9425 }, { "epoch": 1.4755792110206638, "grad_norm": 2.6509387493133545, "learning_rate": 8.213587487781036e-05, "loss": 0.9697, "step": 9426 }, { "epoch": 1.475735754539762, "grad_norm": 2.9789719581604004, "learning_rate": 8.211143695014661e-05, "loss": 0.9831, "step": 9427 }, { "epoch": 1.4758922980588602, "grad_norm": 3.1920371055603027, "learning_rate": 8.208699902248289e-05, "loss": 1.0215, "step": 9428 }, { "epoch": 1.4760488415779587, "grad_norm": 2.6322896480560303, "learning_rate": 8.206256109481915e-05, "loss": 0.941, "step": 9429 }, { "epoch": 1.476205385097057, "grad_norm": 3.8062565326690674, "learning_rate": 8.20381231671554e-05, "loss": 1.4716, "step": 9430 }, { "epoch": 1.4763619286161553, "grad_norm": 1.7152727842330933, "learning_rate": 8.201368523949168e-05, "loss": 1.0996, "step": 9431 }, { "epoch": 1.4765184721352536, "grad_norm": 3.0590524673461914, "learning_rate": 8.198924731182795e-05, "loss": 1.2918, "step": 9432 }, { "epoch": 1.4766750156543518, "grad_norm": 2.010657548904419, "learning_rate": 8.196480938416421e-05, "loss": 0.8202, "step": 9433 }, { "epoch": 1.4768315591734502, "grad_norm": 1.9447054862976074, "learning_rate": 8.194037145650049e-05, "loss": 0.5172, "step": 9434 }, { "epoch": 1.4769881026925487, "grad_norm": 1.379649043083191, "learning_rate": 8.191593352883674e-05, "loss": 0.4932, "step": 9435 }, { "epoch": 1.4771446462116469, "grad_norm": 2.9022107124328613, "learning_rate": 8.189149560117301e-05, "loss": 0.8522, "step": 9436 }, { "epoch": 1.477301189730745, "grad_norm": 2.831977128982544, "learning_rate": 8.186705767350929e-05, "loss": 1.4347, "step": 9437 }, { "epoch": 1.4774577332498435, "grad_norm": 1.9703986644744873, "learning_rate": 8.184261974584555e-05, "loss": 1.0414, "step": 9438 }, { "epoch": 1.4776142767689417, "grad_norm": 0.4396578371524811, "learning_rate": 8.18181818181818e-05, "loss": 0.2257, "step": 9439 }, { "epoch": 1.4777708202880402, "grad_norm": 0.6509549617767334, "learning_rate": 8.179374389051808e-05, "loss": 0.3489, "step": 9440 }, { "epoch": 1.4779273638071384, "grad_norm": 0.587127685546875, "learning_rate": 8.176930596285435e-05, "loss": 0.1851, "step": 9441 }, { "epoch": 1.4780839073262366, "grad_norm": 0.4554985463619232, "learning_rate": 8.17448680351906e-05, "loss": 0.2927, "step": 9442 }, { "epoch": 1.478240450845335, "grad_norm": 0.3806717097759247, "learning_rate": 8.172043010752688e-05, "loss": 0.154, "step": 9443 }, { "epoch": 1.4783969943644333, "grad_norm": 0.5629563331604004, "learning_rate": 8.169599217986314e-05, "loss": 0.2549, "step": 9444 }, { "epoch": 1.4785535378835317, "grad_norm": 0.7409935593605042, "learning_rate": 8.16715542521994e-05, "loss": 0.2599, "step": 9445 }, { "epoch": 1.47871008140263, "grad_norm": 1.7930724620819092, "learning_rate": 8.164711632453568e-05, "loss": 0.3104, "step": 9446 }, { "epoch": 1.4788666249217282, "grad_norm": 0.7437885999679565, "learning_rate": 8.162267839687193e-05, "loss": 0.3755, "step": 9447 }, { "epoch": 1.4790231684408266, "grad_norm": 0.7157233357429504, "learning_rate": 8.15982404692082e-05, "loss": 0.2964, "step": 9448 }, { "epoch": 1.4791797119599248, "grad_norm": 0.8939926028251648, "learning_rate": 8.157380254154448e-05, "loss": 0.1982, "step": 9449 }, { "epoch": 1.4793362554790233, "grad_norm": 1.4006277322769165, "learning_rate": 8.154936461388073e-05, "loss": 0.4599, "step": 9450 }, { "epoch": 1.4794927989981215, "grad_norm": 1.421652913093567, "learning_rate": 8.1524926686217e-05, "loss": 0.4006, "step": 9451 }, { "epoch": 1.4796493425172197, "grad_norm": 1.1265709400177002, "learning_rate": 8.150048875855327e-05, "loss": 0.229, "step": 9452 }, { "epoch": 1.4798058860363181, "grad_norm": 1.248371958732605, "learning_rate": 8.147605083088954e-05, "loss": 0.3313, "step": 9453 }, { "epoch": 1.4799624295554163, "grad_norm": 1.0506163835525513, "learning_rate": 8.145161290322579e-05, "loss": 0.526, "step": 9454 }, { "epoch": 1.4801189730745148, "grad_norm": 0.9769994616508484, "learning_rate": 8.142717497556207e-05, "loss": 0.3684, "step": 9455 }, { "epoch": 1.480275516593613, "grad_norm": 1.5080924034118652, "learning_rate": 8.140273704789833e-05, "loss": 0.4004, "step": 9456 }, { "epoch": 1.4804320601127112, "grad_norm": 2.587855339050293, "learning_rate": 8.13782991202346e-05, "loss": 0.5374, "step": 9457 }, { "epoch": 1.4805886036318097, "grad_norm": 1.3442978858947754, "learning_rate": 8.135386119257087e-05, "loss": 0.4843, "step": 9458 }, { "epoch": 1.4807451471509079, "grad_norm": 1.6153483390808105, "learning_rate": 8.132942326490713e-05, "loss": 0.3719, "step": 9459 }, { "epoch": 1.4809016906700063, "grad_norm": 2.865086317062378, "learning_rate": 8.130498533724339e-05, "loss": 1.0182, "step": 9460 }, { "epoch": 1.4810582341891045, "grad_norm": 1.1315138339996338, "learning_rate": 8.128054740957967e-05, "loss": 0.4096, "step": 9461 }, { "epoch": 1.4812147777082028, "grad_norm": 1.3342171907424927, "learning_rate": 8.125610948191592e-05, "loss": 0.5152, "step": 9462 }, { "epoch": 1.4813713212273012, "grad_norm": 1.2070798873901367, "learning_rate": 8.123167155425218e-05, "loss": 0.4946, "step": 9463 }, { "epoch": 1.4815278647463996, "grad_norm": 1.9415396451950073, "learning_rate": 8.120723362658846e-05, "loss": 0.4527, "step": 9464 }, { "epoch": 1.4816844082654979, "grad_norm": 1.080844759941101, "learning_rate": 8.118279569892473e-05, "loss": 0.2465, "step": 9465 }, { "epoch": 1.481840951784596, "grad_norm": 1.6477303504943848, "learning_rate": 8.115835777126098e-05, "loss": 0.6034, "step": 9466 }, { "epoch": 1.4819974953036943, "grad_norm": 2.1239099502563477, "learning_rate": 8.113391984359726e-05, "loss": 0.6707, "step": 9467 }, { "epoch": 1.4821540388227927, "grad_norm": 1.421504259109497, "learning_rate": 8.110948191593352e-05, "loss": 0.5054, "step": 9468 }, { "epoch": 1.4823105823418912, "grad_norm": 3.8904707431793213, "learning_rate": 8.108504398826979e-05, "loss": 0.4603, "step": 9469 }, { "epoch": 1.4824671258609894, "grad_norm": 1.8012278079986572, "learning_rate": 8.106060606060607e-05, "loss": 0.5213, "step": 9470 }, { "epoch": 1.4826236693800876, "grad_norm": 3.616649627685547, "learning_rate": 8.103616813294232e-05, "loss": 1.315, "step": 9471 }, { "epoch": 1.482780212899186, "grad_norm": 2.4311721324920654, "learning_rate": 8.101173020527858e-05, "loss": 0.9852, "step": 9472 }, { "epoch": 1.4829367564182843, "grad_norm": 1.9556763172149658, "learning_rate": 8.098729227761486e-05, "loss": 0.8551, "step": 9473 }, { "epoch": 1.4830932999373827, "grad_norm": 2.610199213027954, "learning_rate": 8.096285434995111e-05, "loss": 0.4723, "step": 9474 }, { "epoch": 1.483249843456481, "grad_norm": 1.7846730947494507, "learning_rate": 8.093841642228738e-05, "loss": 1.0211, "step": 9475 }, { "epoch": 1.4834063869755791, "grad_norm": 2.926453113555908, "learning_rate": 8.091397849462365e-05, "loss": 1.0827, "step": 9476 }, { "epoch": 1.4835629304946776, "grad_norm": 2.9660816192626953, "learning_rate": 8.088954056695992e-05, "loss": 1.0163, "step": 9477 }, { "epoch": 1.4837194740137758, "grad_norm": 2.5968966484069824, "learning_rate": 8.086510263929617e-05, "loss": 0.6645, "step": 9478 }, { "epoch": 1.4838760175328742, "grad_norm": 4.448347091674805, "learning_rate": 8.084066471163245e-05, "loss": 1.0715, "step": 9479 }, { "epoch": 1.4840325610519725, "grad_norm": 2.39528226852417, "learning_rate": 8.081622678396871e-05, "loss": 1.1425, "step": 9480 }, { "epoch": 1.4841891045710707, "grad_norm": 2.856279134750366, "learning_rate": 8.079178885630498e-05, "loss": 0.9069, "step": 9481 }, { "epoch": 1.4843456480901691, "grad_norm": 2.2662487030029297, "learning_rate": 8.076735092864126e-05, "loss": 0.7697, "step": 9482 }, { "epoch": 1.4845021916092673, "grad_norm": 2.555561065673828, "learning_rate": 8.074291300097751e-05, "loss": 1.2625, "step": 9483 }, { "epoch": 1.4846587351283658, "grad_norm": 1.9698431491851807, "learning_rate": 8.071847507331377e-05, "loss": 1.1531, "step": 9484 }, { "epoch": 1.484815278647464, "grad_norm": 2.155409336090088, "learning_rate": 8.069403714565005e-05, "loss": 0.41, "step": 9485 }, { "epoch": 1.4849718221665622, "grad_norm": 2.2174770832061768, "learning_rate": 8.06695992179863e-05, "loss": 0.8686, "step": 9486 }, { "epoch": 1.4851283656856606, "grad_norm": 3.0642926692962646, "learning_rate": 8.064516129032257e-05, "loss": 1.2679, "step": 9487 }, { "epoch": 1.4852849092047589, "grad_norm": 3.7173335552215576, "learning_rate": 8.062072336265885e-05, "loss": 0.8235, "step": 9488 }, { "epoch": 1.4854414527238573, "grad_norm": 0.439689964056015, "learning_rate": 8.059628543499511e-05, "loss": 0.2204, "step": 9489 }, { "epoch": 1.4855979962429555, "grad_norm": 0.3609512448310852, "learning_rate": 8.057184750733136e-05, "loss": 0.2014, "step": 9490 }, { "epoch": 1.4857545397620537, "grad_norm": 0.530227541923523, "learning_rate": 8.054740957966764e-05, "loss": 0.1972, "step": 9491 }, { "epoch": 1.4859110832811522, "grad_norm": 0.6065565943717957, "learning_rate": 8.05229716520039e-05, "loss": 0.2325, "step": 9492 }, { "epoch": 1.4860676268002504, "grad_norm": 0.9413346648216248, "learning_rate": 8.049853372434017e-05, "loss": 0.3771, "step": 9493 }, { "epoch": 1.4862241703193488, "grad_norm": 0.57166588306427, "learning_rate": 8.047409579667645e-05, "loss": 0.2029, "step": 9494 }, { "epoch": 1.486380713838447, "grad_norm": 0.8557118773460388, "learning_rate": 8.04496578690127e-05, "loss": 0.2793, "step": 9495 }, { "epoch": 1.4865372573575453, "grad_norm": 3.641554117202759, "learning_rate": 8.042521994134896e-05, "loss": 0.764, "step": 9496 }, { "epoch": 1.4866938008766437, "grad_norm": 0.5618619918823242, "learning_rate": 8.040078201368524e-05, "loss": 0.1924, "step": 9497 }, { "epoch": 1.4868503443957422, "grad_norm": 1.0576132535934448, "learning_rate": 8.03763440860215e-05, "loss": 0.3731, "step": 9498 }, { "epoch": 1.4870068879148404, "grad_norm": 0.5164195895195007, "learning_rate": 8.035190615835776e-05, "loss": 0.2424, "step": 9499 }, { "epoch": 1.4871634314339386, "grad_norm": 0.5563727617263794, "learning_rate": 8.032746823069404e-05, "loss": 0.1442, "step": 9500 }, { "epoch": 1.487319974953037, "grad_norm": 0.9167290329933167, "learning_rate": 8.03030303030303e-05, "loss": 0.3146, "step": 9501 }, { "epoch": 1.4874765184721352, "grad_norm": 0.6889357566833496, "learning_rate": 8.027859237536655e-05, "loss": 0.259, "step": 9502 }, { "epoch": 1.4876330619912337, "grad_norm": NaN, "learning_rate": 8.027859237536655e-05, "loss": 0.0, "step": 9503 }, { "epoch": 1.487789605510332, "grad_norm": 0.6606603860855103, "learning_rate": 8.025415444770283e-05, "loss": 0.3823, "step": 9504 }, { "epoch": 1.4879461490294301, "grad_norm": 1.8341822624206543, "learning_rate": 8.02297165200391e-05, "loss": 0.4172, "step": 9505 }, { "epoch": 1.4881026925485286, "grad_norm": 1.2621349096298218, "learning_rate": 8.020527859237536e-05, "loss": 0.3575, "step": 9506 }, { "epoch": 1.4882592360676268, "grad_norm": 0.7620284557342529, "learning_rate": 8.018084066471163e-05, "loss": 0.3535, "step": 9507 }, { "epoch": 1.4884157795867252, "grad_norm": 1.7038427591323853, "learning_rate": 8.015640273704789e-05, "loss": 0.6989, "step": 9508 }, { "epoch": 1.4885723231058234, "grad_norm": 1.006224274635315, "learning_rate": 8.013196480938415e-05, "loss": 0.4143, "step": 9509 }, { "epoch": 1.4887288666249217, "grad_norm": 1.3613799810409546, "learning_rate": 8.010752688172043e-05, "loss": 0.3454, "step": 9510 }, { "epoch": 1.48888541014402, "grad_norm": 2.3779265880584717, "learning_rate": 8.008308895405668e-05, "loss": 0.3727, "step": 9511 }, { "epoch": 1.4890419536631183, "grad_norm": 1.3316359519958496, "learning_rate": 8.005865102639295e-05, "loss": 0.4661, "step": 9512 }, { "epoch": 1.4891984971822168, "grad_norm": 2.452564239501953, "learning_rate": 8.003421309872923e-05, "loss": 0.6581, "step": 9513 }, { "epoch": 1.489355040701315, "grad_norm": 2.7504520416259766, "learning_rate": 8.000977517106549e-05, "loss": 0.7966, "step": 9514 }, { "epoch": 1.4895115842204132, "grad_norm": 2.999328851699829, "learning_rate": 7.998533724340174e-05, "loss": 0.9771, "step": 9515 }, { "epoch": 1.4896681277395116, "grad_norm": 2.237630605697632, "learning_rate": 7.996089931573802e-05, "loss": 0.6239, "step": 9516 }, { "epoch": 1.4898246712586098, "grad_norm": 1.3302862644195557, "learning_rate": 7.993646138807429e-05, "loss": 0.421, "step": 9517 }, { "epoch": 1.4899812147777083, "grad_norm": 1.9129620790481567, "learning_rate": 7.991202346041055e-05, "loss": 0.6495, "step": 9518 }, { "epoch": 1.4901377582968065, "grad_norm": 1.6199758052825928, "learning_rate": 7.988758553274682e-05, "loss": 0.607, "step": 9519 }, { "epoch": 1.4902943018159047, "grad_norm": 2.3590195178985596, "learning_rate": 7.986314760508308e-05, "loss": 1.0782, "step": 9520 }, { "epoch": 1.4904508453350032, "grad_norm": 2.1550941467285156, "learning_rate": 7.983870967741935e-05, "loss": 0.9664, "step": 9521 }, { "epoch": 1.4906073888541014, "grad_norm": 4.303733825683594, "learning_rate": 7.981427174975562e-05, "loss": 0.9356, "step": 9522 }, { "epoch": 1.4907639323731998, "grad_norm": 2.1720597743988037, "learning_rate": 7.978983382209188e-05, "loss": 0.6391, "step": 9523 }, { "epoch": 1.490920475892298, "grad_norm": 2.204709053039551, "learning_rate": 7.976539589442814e-05, "loss": 0.6076, "step": 9524 }, { "epoch": 1.4910770194113963, "grad_norm": 2.43810772895813, "learning_rate": 7.974095796676442e-05, "loss": 1.1823, "step": 9525 }, { "epoch": 1.4912335629304947, "grad_norm": 1.9804140329360962, "learning_rate": 7.971652003910068e-05, "loss": 0.3912, "step": 9526 }, { "epoch": 1.491390106449593, "grad_norm": 2.124976396560669, "learning_rate": 7.969208211143693e-05, "loss": 0.8703, "step": 9527 }, { "epoch": 1.4915466499686914, "grad_norm": 1.996584415435791, "learning_rate": 7.966764418377321e-05, "loss": 0.9466, "step": 9528 }, { "epoch": 1.4917031934877896, "grad_norm": 1.8637217283248901, "learning_rate": 7.964320625610948e-05, "loss": 0.7035, "step": 9529 }, { "epoch": 1.4918597370068878, "grad_norm": 4.413573741912842, "learning_rate": 7.961876832844574e-05, "loss": 0.8354, "step": 9530 }, { "epoch": 1.4920162805259862, "grad_norm": 2.3614120483398438, "learning_rate": 7.959433040078201e-05, "loss": 1.1676, "step": 9531 }, { "epoch": 1.4921728240450847, "grad_norm": 2.0499792098999023, "learning_rate": 7.956989247311827e-05, "loss": 1.3798, "step": 9532 }, { "epoch": 1.4923293675641829, "grad_norm": 2.0810813903808594, "learning_rate": 7.954545454545454e-05, "loss": 0.8426, "step": 9533 }, { "epoch": 1.492485911083281, "grad_norm": 2.218729257583618, "learning_rate": 7.952101661779082e-05, "loss": 1.0149, "step": 9534 }, { "epoch": 1.4926424546023795, "grad_norm": 1.0656377077102661, "learning_rate": 7.949657869012707e-05, "loss": 0.1697, "step": 9535 }, { "epoch": 1.4927989981214778, "grad_norm": 2.4055192470550537, "learning_rate": 7.947214076246333e-05, "loss": 0.6699, "step": 9536 }, { "epoch": 1.4929555416405762, "grad_norm": 1.910409927368164, "learning_rate": 7.944770283479961e-05, "loss": 0.7338, "step": 9537 }, { "epoch": 1.4931120851596744, "grad_norm": 1.1563485860824585, "learning_rate": 7.942326490713587e-05, "loss": 0.3543, "step": 9538 }, { "epoch": 1.4932686286787726, "grad_norm": 0.43213382363319397, "learning_rate": 7.939882697947213e-05, "loss": 0.1907, "step": 9539 }, { "epoch": 1.493425172197871, "grad_norm": 0.5200718641281128, "learning_rate": 7.93743890518084e-05, "loss": 0.2515, "step": 9540 }, { "epoch": 1.4935817157169693, "grad_norm": 1.2368117570877075, "learning_rate": 7.934995112414467e-05, "loss": 0.2597, "step": 9541 }, { "epoch": 1.4937382592360677, "grad_norm": 0.9348035454750061, "learning_rate": 7.932551319648093e-05, "loss": 0.3166, "step": 9542 }, { "epoch": 1.493894802755166, "grad_norm": 0.7543662190437317, "learning_rate": 7.93010752688172e-05, "loss": 0.2521, "step": 9543 }, { "epoch": 1.4940513462742642, "grad_norm": 0.44818371534347534, "learning_rate": 7.927663734115346e-05, "loss": 0.1899, "step": 9544 }, { "epoch": 1.4942078897933626, "grad_norm": 0.6756463050842285, "learning_rate": 7.925219941348973e-05, "loss": 0.2408, "step": 9545 }, { "epoch": 1.4943644333124608, "grad_norm": 1.1319537162780762, "learning_rate": 7.9227761485826e-05, "loss": 0.2446, "step": 9546 }, { "epoch": 1.4945209768315593, "grad_norm": 1.639535903930664, "learning_rate": 7.920332355816226e-05, "loss": 0.2676, "step": 9547 }, { "epoch": 1.4946775203506575, "grad_norm": 0.6942873001098633, "learning_rate": 7.917888563049852e-05, "loss": 0.4021, "step": 9548 }, { "epoch": 1.4948340638697557, "grad_norm": 0.61109858751297, "learning_rate": 7.91544477028348e-05, "loss": 0.2719, "step": 9549 }, { "epoch": 1.4949906073888541, "grad_norm": 0.8826609253883362, "learning_rate": 7.913000977517107e-05, "loss": 0.4091, "step": 9550 }, { "epoch": 1.4951471509079524, "grad_norm": 2.2055704593658447, "learning_rate": 7.910557184750732e-05, "loss": 0.3353, "step": 9551 }, { "epoch": 1.4953036944270508, "grad_norm": 4.32928991317749, "learning_rate": 7.90811339198436e-05, "loss": 0.5081, "step": 9552 }, { "epoch": 1.495460237946149, "grad_norm": 0.8008466958999634, "learning_rate": 7.905669599217986e-05, "loss": 0.2946, "step": 9553 }, { "epoch": 1.4956167814652472, "grad_norm": 1.2389719486236572, "learning_rate": 7.903225806451613e-05, "loss": 0.3579, "step": 9554 }, { "epoch": 1.4957733249843457, "grad_norm": 1.683296799659729, "learning_rate": 7.900782013685239e-05, "loss": 0.4454, "step": 9555 }, { "epoch": 1.495929868503444, "grad_norm": 1.3349992036819458, "learning_rate": 7.898338220918865e-05, "loss": 0.3898, "step": 9556 }, { "epoch": 1.4960864120225423, "grad_norm": 0.86898273229599, "learning_rate": 7.895894428152492e-05, "loss": 0.4127, "step": 9557 }, { "epoch": 1.4962429555416406, "grad_norm": 1.2158864736557007, "learning_rate": 7.89345063538612e-05, "loss": 0.262, "step": 9558 }, { "epoch": 1.4963994990607388, "grad_norm": 1.0197027921676636, "learning_rate": 7.891006842619745e-05, "loss": 0.3522, "step": 9559 }, { "epoch": 1.4965560425798372, "grad_norm": 2.6261444091796875, "learning_rate": 7.888563049853371e-05, "loss": 0.4608, "step": 9560 }, { "epoch": 1.4967125860989354, "grad_norm": 1.4743295907974243, "learning_rate": 7.886119257086999e-05, "loss": 0.4477, "step": 9561 }, { "epoch": 1.4968691296180339, "grad_norm": 1.690913438796997, "learning_rate": 7.883675464320626e-05, "loss": 0.2981, "step": 9562 }, { "epoch": 1.497025673137132, "grad_norm": 1.879437804222107, "learning_rate": 7.881231671554251e-05, "loss": 0.5513, "step": 9563 }, { "epoch": 1.4971822166562303, "grad_norm": 2.7271459102630615, "learning_rate": 7.878787878787879e-05, "loss": 0.677, "step": 9564 }, { "epoch": 1.4973387601753287, "grad_norm": 3.2245678901672363, "learning_rate": 7.876344086021505e-05, "loss": 0.787, "step": 9565 }, { "epoch": 1.4974953036944272, "grad_norm": 1.3379689455032349, "learning_rate": 7.87390029325513e-05, "loss": 0.7679, "step": 9566 }, { "epoch": 1.4976518472135254, "grad_norm": 1.8764911890029907, "learning_rate": 7.871456500488758e-05, "loss": 0.613, "step": 9567 }, { "epoch": 1.4978083907326236, "grad_norm": 3.4302899837493896, "learning_rate": 7.869012707722385e-05, "loss": 0.643, "step": 9568 }, { "epoch": 1.497964934251722, "grad_norm": 2.4290006160736084, "learning_rate": 7.866568914956011e-05, "loss": 0.4512, "step": 9569 }, { "epoch": 1.4981214777708203, "grad_norm": 2.3394429683685303, "learning_rate": 7.864125122189639e-05, "loss": 0.7265, "step": 9570 }, { "epoch": 1.4982780212899187, "grad_norm": 3.2177608013153076, "learning_rate": 7.861681329423264e-05, "loss": 0.7219, "step": 9571 }, { "epoch": 1.498434564809017, "grad_norm": 2.652704954147339, "learning_rate": 7.85923753665689e-05, "loss": 0.921, "step": 9572 }, { "epoch": 1.4985911083281152, "grad_norm": 1.5639492273330688, "learning_rate": 7.856793743890518e-05, "loss": 0.8148, "step": 9573 }, { "epoch": 1.4987476518472136, "grad_norm": 1.7422173023223877, "learning_rate": 7.854349951124145e-05, "loss": 0.429, "step": 9574 }, { "epoch": 1.4989041953663118, "grad_norm": 2.2326202392578125, "learning_rate": 7.85190615835777e-05, "loss": 0.9982, "step": 9575 }, { "epoch": 1.4990607388854102, "grad_norm": 2.4067230224609375, "learning_rate": 7.849462365591398e-05, "loss": 0.8497, "step": 9576 }, { "epoch": 1.4992172824045085, "grad_norm": 5.408649444580078, "learning_rate": 7.847018572825024e-05, "loss": 1.1441, "step": 9577 }, { "epoch": 1.4993738259236067, "grad_norm": 2.413092613220215, "learning_rate": 7.84457478005865e-05, "loss": 1.1049, "step": 9578 }, { "epoch": 1.4995303694427051, "grad_norm": 2.305614471435547, "learning_rate": 7.842130987292277e-05, "loss": 1.061, "step": 9579 }, { "epoch": 1.4996869129618033, "grad_norm": 2.732689619064331, "learning_rate": 7.839687194525904e-05, "loss": 0.5431, "step": 9580 }, { "epoch": 1.4998434564809018, "grad_norm": 2.2175774574279785, "learning_rate": 7.83724340175953e-05, "loss": 1.5274, "step": 9581 }, { "epoch": 1.5, "grad_norm": 3.7100279331207275, "learning_rate": 7.834799608993158e-05, "loss": 1.4016, "step": 9582 }, { "epoch": 1.5001565435190982, "grad_norm": 2.078692674636841, "learning_rate": 7.832355816226783e-05, "loss": 0.6064, "step": 9583 }, { "epoch": 1.5003130870381967, "grad_norm": 2.7273752689361572, "learning_rate": 7.82991202346041e-05, "loss": 0.5273, "step": 9584 }, { "epoch": 1.5004696305572949, "grad_norm": 1.3551000356674194, "learning_rate": 7.827468230694037e-05, "loss": 0.3828, "step": 9585 }, { "epoch": 1.5006261740763933, "grad_norm": 3.331238269805908, "learning_rate": 7.825024437927664e-05, "loss": 1.221, "step": 9586 }, { "epoch": 1.5007827175954915, "grad_norm": 2.849250555038452, "learning_rate": 7.822580645161289e-05, "loss": 0.799, "step": 9587 }, { "epoch": 1.5009392611145898, "grad_norm": 2.9601728916168213, "learning_rate": 7.820136852394917e-05, "loss": 1.6444, "step": 9588 }, { "epoch": 1.5010958046336882, "grad_norm": 0.5723337531089783, "learning_rate": 7.817693059628543e-05, "loss": 0.2622, "step": 9589 }, { "epoch": 1.5012523481527866, "grad_norm": 0.8948503732681274, "learning_rate": 7.815249266862169e-05, "loss": 0.2687, "step": 9590 }, { "epoch": 1.5014088916718848, "grad_norm": 0.6991376876831055, "learning_rate": 7.812805474095796e-05, "loss": 0.2752, "step": 9591 }, { "epoch": 1.501565435190983, "grad_norm": 0.6468504667282104, "learning_rate": 7.810361681329423e-05, "loss": 0.1794, "step": 9592 }, { "epoch": 1.5017219787100813, "grad_norm": 0.5929017066955566, "learning_rate": 7.807917888563049e-05, "loss": 0.2153, "step": 9593 }, { "epoch": 1.5018785222291797, "grad_norm": 7.306670188903809, "learning_rate": 7.805474095796674e-05, "loss": 0.2172, "step": 9594 }, { "epoch": 1.5020350657482782, "grad_norm": 0.5348293781280518, "learning_rate": 7.803030303030302e-05, "loss": 0.2575, "step": 9595 }, { "epoch": 1.5021916092673764, "grad_norm": 0.5054227709770203, "learning_rate": 7.800586510263929e-05, "loss": 0.229, "step": 9596 }, { "epoch": 1.5023481527864746, "grad_norm": 0.5968636274337769, "learning_rate": 7.798142717497555e-05, "loss": 0.1961, "step": 9597 }, { "epoch": 1.5025046963055728, "grad_norm": 1.1959178447723389, "learning_rate": 7.795698924731183e-05, "loss": 0.328, "step": 9598 }, { "epoch": 1.5026612398246713, "grad_norm": 1.3676921129226685, "learning_rate": 7.793255131964808e-05, "loss": 0.294, "step": 9599 }, { "epoch": 1.5028177833437697, "grad_norm": 0.5510010123252869, "learning_rate": 7.790811339198435e-05, "loss": 0.2935, "step": 9600 }, { "epoch": 1.502974326862868, "grad_norm": 1.0297011137008667, "learning_rate": 7.788367546432063e-05, "loss": 0.2587, "step": 9601 }, { "epoch": 1.5031308703819661, "grad_norm": 2.1265127658843994, "learning_rate": 7.785923753665688e-05, "loss": 0.4379, "step": 9602 }, { "epoch": 1.5032874139010644, "grad_norm": 1.283608317375183, "learning_rate": 7.783479960899314e-05, "loss": 0.5179, "step": 9603 }, { "epoch": 1.5034439574201628, "grad_norm": 0.9307227730751038, "learning_rate": 7.781036168132942e-05, "loss": 0.3469, "step": 9604 }, { "epoch": 1.5036005009392612, "grad_norm": 1.4115785360336304, "learning_rate": 7.778592375366568e-05, "loss": 0.3251, "step": 9605 }, { "epoch": 1.5037570444583594, "grad_norm": 1.2950360774993896, "learning_rate": 7.776148582600194e-05, "loss": 0.4412, "step": 9606 }, { "epoch": 1.5039135879774577, "grad_norm": 1.8613760471343994, "learning_rate": 7.773704789833821e-05, "loss": 0.5847, "step": 9607 }, { "epoch": 1.5040701314965559, "grad_norm": 1.611189365386963, "learning_rate": 7.771260997067448e-05, "loss": 0.4761, "step": 9608 }, { "epoch": 1.5042266750156543, "grad_norm": 1.2871978282928467, "learning_rate": 7.768817204301074e-05, "loss": 0.3882, "step": 9609 }, { "epoch": 1.5043832185347528, "grad_norm": 0.9030357599258423, "learning_rate": 7.766373411534701e-05, "loss": 0.3907, "step": 9610 }, { "epoch": 1.504539762053851, "grad_norm": 1.4141883850097656, "learning_rate": 7.763929618768327e-05, "loss": 0.4686, "step": 9611 }, { "epoch": 1.5046963055729492, "grad_norm": 2.3717682361602783, "learning_rate": 7.761485826001954e-05, "loss": 0.5278, "step": 9612 }, { "epoch": 1.5048528490920476, "grad_norm": 0.8933207988739014, "learning_rate": 7.759042033235582e-05, "loss": 0.3828, "step": 9613 }, { "epoch": 1.5050093926111459, "grad_norm": 1.469948649406433, "learning_rate": 7.756598240469207e-05, "loss": 0.6257, "step": 9614 }, { "epoch": 1.5051659361302443, "grad_norm": 2.732593297958374, "learning_rate": 7.754154447702833e-05, "loss": 0.5167, "step": 9615 }, { "epoch": 1.5053224796493425, "grad_norm": 2.5369060039520264, "learning_rate": 7.751710654936461e-05, "loss": 0.4295, "step": 9616 }, { "epoch": 1.5054790231684407, "grad_norm": 3.549316883087158, "learning_rate": 7.749266862170088e-05, "loss": 0.3771, "step": 9617 }, { "epoch": 1.5056355666875392, "grad_norm": 1.7720410823822021, "learning_rate": 7.746823069403713e-05, "loss": 0.6011, "step": 9618 }, { "epoch": 1.5057921102066374, "grad_norm": 0.6939887404441833, "learning_rate": 7.74437927663734e-05, "loss": 0.237, "step": 9619 }, { "epoch": 1.5059486537257358, "grad_norm": 2.6376233100891113, "learning_rate": 7.741935483870967e-05, "loss": 1.1664, "step": 9620 }, { "epoch": 1.506105197244834, "grad_norm": 2.496608018875122, "learning_rate": 7.739491691104593e-05, "loss": 0.7487, "step": 9621 }, { "epoch": 1.5062617407639323, "grad_norm": 2.7778053283691406, "learning_rate": 7.73704789833822e-05, "loss": 0.8468, "step": 9622 }, { "epoch": 1.5064182842830307, "grad_norm": 2.1602282524108887, "learning_rate": 7.734604105571846e-05, "loss": 0.9794, "step": 9623 }, { "epoch": 1.5065748278021291, "grad_norm": 2.9261624813079834, "learning_rate": 7.732160312805473e-05, "loss": 0.953, "step": 9624 }, { "epoch": 1.5067313713212274, "grad_norm": 3.67445969581604, "learning_rate": 7.729716520039101e-05, "loss": 0.8946, "step": 9625 }, { "epoch": 1.5068879148403256, "grad_norm": 2.395833730697632, "learning_rate": 7.727272727272726e-05, "loss": 1.5854, "step": 9626 }, { "epoch": 1.5070444583594238, "grad_norm": 2.6899259090423584, "learning_rate": 7.724828934506352e-05, "loss": 0.5556, "step": 9627 }, { "epoch": 1.5072010018785222, "grad_norm": 1.9388134479522705, "learning_rate": 7.72238514173998e-05, "loss": 1.3441, "step": 9628 }, { "epoch": 1.5073575453976207, "grad_norm": 3.582058906555176, "learning_rate": 7.719941348973607e-05, "loss": 1.3142, "step": 9629 }, { "epoch": 1.507514088916719, "grad_norm": 1.9698890447616577, "learning_rate": 7.717497556207232e-05, "loss": 0.64, "step": 9630 }, { "epoch": 1.5076706324358171, "grad_norm": 2.8033182621002197, "learning_rate": 7.71505376344086e-05, "loss": 1.1611, "step": 9631 }, { "epoch": 1.5078271759549153, "grad_norm": 3.156467914581299, "learning_rate": 7.712609970674486e-05, "loss": 0.9295, "step": 9632 }, { "epoch": 1.5079837194740138, "grad_norm": 1.1335583925247192, "learning_rate": 7.710166177908113e-05, "loss": 0.5743, "step": 9633 }, { "epoch": 1.5081402629931122, "grad_norm": 2.90801739692688, "learning_rate": 7.707722385141739e-05, "loss": 0.6852, "step": 9634 }, { "epoch": 1.5082968065122104, "grad_norm": 2.2356724739074707, "learning_rate": 7.705278592375366e-05, "loss": 0.8531, "step": 9635 }, { "epoch": 1.5084533500313086, "grad_norm": 1.6373522281646729, "learning_rate": 7.702834799608992e-05, "loss": 0.524, "step": 9636 }, { "epoch": 1.5086098935504069, "grad_norm": 1.7506439685821533, "learning_rate": 7.70039100684262e-05, "loss": 0.5397, "step": 9637 }, { "epoch": 1.5087664370695053, "grad_norm": 1.3206775188446045, "learning_rate": 7.697947214076245e-05, "loss": 0.6136, "step": 9638 }, { "epoch": 1.5089229805886037, "grad_norm": 0.5027822256088257, "learning_rate": 7.695503421309871e-05, "loss": 0.2406, "step": 9639 }, { "epoch": 1.509079524107702, "grad_norm": 0.5172881484031677, "learning_rate": 7.693059628543499e-05, "loss": 0.2308, "step": 9640 }, { "epoch": 1.5092360676268002, "grad_norm": 0.5118494033813477, "learning_rate": 7.690615835777126e-05, "loss": 0.2369, "step": 9641 }, { "epoch": 1.5093926111458984, "grad_norm": 0.42256832122802734, "learning_rate": 7.688172043010751e-05, "loss": 0.131, "step": 9642 }, { "epoch": 1.5095491546649968, "grad_norm": 0.41296517848968506, "learning_rate": 7.685728250244379e-05, "loss": 0.1728, "step": 9643 }, { "epoch": 1.5097056981840953, "grad_norm": 1.4930837154388428, "learning_rate": 7.683284457478005e-05, "loss": 0.1832, "step": 9644 }, { "epoch": 1.5098622417031935, "grad_norm": 0.46577295660972595, "learning_rate": 7.680840664711632e-05, "loss": 0.2564, "step": 9645 }, { "epoch": 1.5100187852222917, "grad_norm": 0.6412208676338196, "learning_rate": 7.678396871945258e-05, "loss": 0.3131, "step": 9646 }, { "epoch": 1.5101753287413902, "grad_norm": 1.1731911897659302, "learning_rate": 7.675953079178885e-05, "loss": 0.4227, "step": 9647 }, { "epoch": 1.5103318722604884, "grad_norm": 0.7713680267333984, "learning_rate": 7.673509286412511e-05, "loss": 0.3043, "step": 9648 }, { "epoch": 1.5104884157795868, "grad_norm": 0.8555247187614441, "learning_rate": 7.671065493646139e-05, "loss": 0.3916, "step": 9649 }, { "epoch": 1.510644959298685, "grad_norm": 1.737226128578186, "learning_rate": 7.668621700879764e-05, "loss": 0.2323, "step": 9650 }, { "epoch": 1.5108015028177832, "grad_norm": 2.419440746307373, "learning_rate": 7.66617790811339e-05, "loss": 0.3248, "step": 9651 }, { "epoch": 1.5109580463368817, "grad_norm": 1.3268059492111206, "learning_rate": 7.663734115347018e-05, "loss": 0.37, "step": 9652 }, { "epoch": 1.5111145898559801, "grad_norm": 0.7393986582756042, "learning_rate": 7.661290322580645e-05, "loss": 0.5048, "step": 9653 }, { "epoch": 1.5112711333750783, "grad_norm": 1.3429745435714722, "learning_rate": 7.65884652981427e-05, "loss": 0.3139, "step": 9654 }, { "epoch": 1.5114276768941766, "grad_norm": 1.2537013292312622, "learning_rate": 7.656402737047898e-05, "loss": 0.5404, "step": 9655 }, { "epoch": 1.5115842204132748, "grad_norm": 1.820609211921692, "learning_rate": 7.653958944281524e-05, "loss": 0.4524, "step": 9656 }, { "epoch": 1.5117407639323732, "grad_norm": 1.4393458366394043, "learning_rate": 7.651515151515151e-05, "loss": 0.2998, "step": 9657 }, { "epoch": 1.5118973074514717, "grad_norm": 0.7819269895553589, "learning_rate": 7.649071358748777e-05, "loss": 0.2722, "step": 9658 }, { "epoch": 1.5120538509705699, "grad_norm": 1.3116579055786133, "learning_rate": 7.646627565982404e-05, "loss": 0.3758, "step": 9659 }, { "epoch": 1.512210394489668, "grad_norm": 1.4176701307296753, "learning_rate": 7.64418377321603e-05, "loss": 0.4362, "step": 9660 }, { "epoch": 1.5123669380087663, "grad_norm": 1.270081877708435, "learning_rate": 7.641739980449658e-05, "loss": 0.4902, "step": 9661 }, { "epoch": 1.5125234815278648, "grad_norm": 1.4358556270599365, "learning_rate": 7.639296187683283e-05, "loss": 0.5373, "step": 9662 }, { "epoch": 1.5126800250469632, "grad_norm": 2.2279257774353027, "learning_rate": 7.63685239491691e-05, "loss": 0.6467, "step": 9663 }, { "epoch": 1.5128365685660614, "grad_norm": 1.7583202123641968, "learning_rate": 7.634408602150538e-05, "loss": 0.6748, "step": 9664 }, { "epoch": 1.5129931120851596, "grad_norm": 1.953381896018982, "learning_rate": 7.631964809384164e-05, "loss": 0.7467, "step": 9665 }, { "epoch": 1.5131496556042578, "grad_norm": 2.358065605163574, "learning_rate": 7.629521016617789e-05, "loss": 0.5326, "step": 9666 }, { "epoch": 1.5133061991233563, "grad_norm": 2.178169012069702, "learning_rate": 7.627077223851417e-05, "loss": 0.6585, "step": 9667 }, { "epoch": 1.5134627426424547, "grad_norm": 2.588186502456665, "learning_rate": 7.624633431085043e-05, "loss": 0.7107, "step": 9668 }, { "epoch": 1.513619286161553, "grad_norm": 3.952768087387085, "learning_rate": 7.62218963831867e-05, "loss": 0.578, "step": 9669 }, { "epoch": 1.5137758296806512, "grad_norm": 1.8726907968521118, "learning_rate": 7.619745845552296e-05, "loss": 0.3684, "step": 9670 }, { "epoch": 1.5139323731997494, "grad_norm": 11.03661823272705, "learning_rate": 7.617302052785923e-05, "loss": 0.8064, "step": 9671 }, { "epoch": 1.5140889167188478, "grad_norm": 1.7992969751358032, "learning_rate": 7.61485826001955e-05, "loss": 0.6892, "step": 9672 }, { "epoch": 1.5142454602379463, "grad_norm": 1.6284939050674438, "learning_rate": 7.612414467253177e-05, "loss": 0.4726, "step": 9673 }, { "epoch": 1.5144020037570445, "grad_norm": 4.366304874420166, "learning_rate": 7.609970674486802e-05, "loss": 1.0102, "step": 9674 }, { "epoch": 1.5145585472761427, "grad_norm": 2.8104560375213623, "learning_rate": 7.607526881720429e-05, "loss": 1.1905, "step": 9675 }, { "epoch": 1.514715090795241, "grad_norm": 3.4192862510681152, "learning_rate": 7.605083088954057e-05, "loss": 0.7504, "step": 9676 }, { "epoch": 1.5148716343143394, "grad_norm": 3.793952465057373, "learning_rate": 7.602639296187683e-05, "loss": 0.7833, "step": 9677 }, { "epoch": 1.5150281778334378, "grad_norm": 3.155576229095459, "learning_rate": 7.600195503421308e-05, "loss": 0.9845, "step": 9678 }, { "epoch": 1.515184721352536, "grad_norm": 2.9574239253997803, "learning_rate": 7.597751710654936e-05, "loss": 1.1077, "step": 9679 }, { "epoch": 1.5153412648716342, "grad_norm": 2.5134990215301514, "learning_rate": 7.595307917888563e-05, "loss": 0.9307, "step": 9680 }, { "epoch": 1.5154978083907327, "grad_norm": 2.6495211124420166, "learning_rate": 7.592864125122188e-05, "loss": 1.0685, "step": 9681 }, { "epoch": 1.5156543519098309, "grad_norm": 2.684701919555664, "learning_rate": 7.590420332355816e-05, "loss": 0.8099, "step": 9682 }, { "epoch": 1.5158108954289293, "grad_norm": 2.9182863235473633, "learning_rate": 7.587976539589442e-05, "loss": 1.5933, "step": 9683 }, { "epoch": 1.5159674389480275, "grad_norm": 3.108677387237549, "learning_rate": 7.585532746823068e-05, "loss": 0.9618, "step": 9684 }, { "epoch": 1.5161239824671258, "grad_norm": 1.0323936939239502, "learning_rate": 7.583088954056696e-05, "loss": 0.213, "step": 9685 }, { "epoch": 1.5162805259862242, "grad_norm": 2.6111085414886475, "learning_rate": 7.580645161290321e-05, "loss": 0.8856, "step": 9686 }, { "epoch": 1.5164370695053226, "grad_norm": 0.9904088973999023, "learning_rate": 7.578201368523948e-05, "loss": 0.4987, "step": 9687 }, { "epoch": 1.5165936130244209, "grad_norm": 1.8084644079208374, "learning_rate": 7.575757575757576e-05, "loss": 0.7905, "step": 9688 }, { "epoch": 1.516750156543519, "grad_norm": 0.8530622720718384, "learning_rate": 7.573313782991202e-05, "loss": 0.2475, "step": 9689 }, { "epoch": 1.5169067000626173, "grad_norm": 0.5058773756027222, "learning_rate": 7.570869990224827e-05, "loss": 0.2024, "step": 9690 }, { "epoch": 1.5170632435817157, "grad_norm": 0.4346686005592346, "learning_rate": 7.568426197458455e-05, "loss": 0.185, "step": 9691 }, { "epoch": 1.5172197871008142, "grad_norm": 0.6962853074073792, "learning_rate": 7.565982404692082e-05, "loss": 0.2676, "step": 9692 }, { "epoch": 1.5173763306199124, "grad_norm": 0.5338491201400757, "learning_rate": 7.563538611925707e-05, "loss": 0.243, "step": 9693 }, { "epoch": 1.5175328741390106, "grad_norm": 0.5298436284065247, "learning_rate": 7.561094819159335e-05, "loss": 0.2168, "step": 9694 }, { "epoch": 1.5176894176581088, "grad_norm": 0.6006895899772644, "learning_rate": 7.558651026392961e-05, "loss": 0.2639, "step": 9695 }, { "epoch": 1.5178459611772073, "grad_norm": 1.1772099733352661, "learning_rate": 7.556207233626588e-05, "loss": 0.2938, "step": 9696 }, { "epoch": 1.5180025046963057, "grad_norm": 2.0881564617156982, "learning_rate": 7.553763440860215e-05, "loss": 0.2997, "step": 9697 }, { "epoch": 1.518159048215404, "grad_norm": 0.7094396352767944, "learning_rate": 7.55131964809384e-05, "loss": 0.3687, "step": 9698 }, { "epoch": 1.5183155917345021, "grad_norm": 0.6079855561256409, "learning_rate": 7.548875855327467e-05, "loss": 0.2226, "step": 9699 }, { "epoch": 1.5184721352536004, "grad_norm": 0.6672441363334656, "learning_rate": 7.546432062561095e-05, "loss": 0.2565, "step": 9700 }, { "epoch": 1.5186286787726988, "grad_norm": 0.7931745648384094, "learning_rate": 7.543988269794721e-05, "loss": 0.3589, "step": 9701 }, { "epoch": 1.5187852222917972, "grad_norm": 0.5384275913238525, "learning_rate": 7.541544477028346e-05, "loss": 0.1624, "step": 9702 }, { "epoch": 1.5189417658108955, "grad_norm": 0.7884646058082581, "learning_rate": 7.539100684261974e-05, "loss": 0.2575, "step": 9703 }, { "epoch": 1.5190983093299937, "grad_norm": 0.8177517652511597, "learning_rate": 7.536656891495601e-05, "loss": 0.3014, "step": 9704 }, { "epoch": 1.519254852849092, "grad_norm": 1.0651932954788208, "learning_rate": 7.534213098729226e-05, "loss": 0.1904, "step": 9705 }, { "epoch": 1.5194113963681903, "grad_norm": 1.055517554283142, "learning_rate": 7.531769305962854e-05, "loss": 0.2615, "step": 9706 }, { "epoch": 1.5195679398872888, "grad_norm": 1.7985464334487915, "learning_rate": 7.52932551319648e-05, "loss": 0.5792, "step": 9707 }, { "epoch": 1.519724483406387, "grad_norm": 3.7123966217041016, "learning_rate": 7.526881720430107e-05, "loss": 0.5109, "step": 9708 }, { "epoch": 1.5198810269254852, "grad_norm": 1.6406701803207397, "learning_rate": 7.524437927663735e-05, "loss": 0.4718, "step": 9709 }, { "epoch": 1.5200375704445834, "grad_norm": 1.475159764289856, "learning_rate": 7.52199413489736e-05, "loss": 0.4324, "step": 9710 }, { "epoch": 1.5201941139636819, "grad_norm": 3.5248284339904785, "learning_rate": 7.519550342130986e-05, "loss": 0.663, "step": 9711 }, { "epoch": 1.5203506574827803, "grad_norm": 2.4344213008880615, "learning_rate": 7.517106549364614e-05, "loss": 0.4683, "step": 9712 }, { "epoch": 1.5205072010018785, "grad_norm": 1.7357611656188965, "learning_rate": 7.51466275659824e-05, "loss": 0.6168, "step": 9713 }, { "epoch": 1.5206637445209767, "grad_norm": 8.591681480407715, "learning_rate": 7.512218963831866e-05, "loss": 0.779, "step": 9714 }, { "epoch": 1.5208202880400752, "grad_norm": 3.0469913482666016, "learning_rate": 7.509775171065493e-05, "loss": 0.513, "step": 9715 }, { "epoch": 1.5209768315591734, "grad_norm": 4.075806617736816, "learning_rate": 7.50733137829912e-05, "loss": 0.5417, "step": 9716 }, { "epoch": 1.5211333750782718, "grad_norm": 1.6552000045776367, "learning_rate": 7.504887585532745e-05, "loss": 0.5785, "step": 9717 }, { "epoch": 1.52128991859737, "grad_norm": 2.410032033920288, "learning_rate": 7.502443792766373e-05, "loss": 0.8276, "step": 9718 }, { "epoch": 1.5214464621164683, "grad_norm": 1.1762977838516235, "learning_rate": 7.5e-05, "loss": 0.654, "step": 9719 }, { "epoch": 1.5216030056355667, "grad_norm": 5.700626373291016, "learning_rate": 7.497556207233626e-05, "loss": 0.7655, "step": 9720 }, { "epoch": 1.5217595491546652, "grad_norm": 3.678863763809204, "learning_rate": 7.495112414467252e-05, "loss": 1.2639, "step": 9721 }, { "epoch": 1.5219160926737634, "grad_norm": 2.683408737182617, "learning_rate": 7.492668621700879e-05, "loss": 0.81, "step": 9722 }, { "epoch": 1.5220726361928616, "grad_norm": 2.2185420989990234, "learning_rate": 7.490224828934507e-05, "loss": 0.763, "step": 9723 }, { "epoch": 1.5222291797119598, "grad_norm": 2.0188112258911133, "learning_rate": 7.487781036168132e-05, "loss": 0.7293, "step": 9724 }, { "epoch": 1.5223857232310583, "grad_norm": 2.3393964767456055, "learning_rate": 7.485337243401758e-05, "loss": 1.0011, "step": 9725 }, { "epoch": 1.5225422667501567, "grad_norm": 2.7806484699249268, "learning_rate": 7.482893450635386e-05, "loss": 0.8906, "step": 9726 }, { "epoch": 1.522698810269255, "grad_norm": 2.0687015056610107, "learning_rate": 7.480449657869011e-05, "loss": 0.8827, "step": 9727 }, { "epoch": 1.5228553537883531, "grad_norm": 2.678903818130493, "learning_rate": 7.478005865102639e-05, "loss": 1.36, "step": 9728 }, { "epoch": 1.5230118973074513, "grad_norm": 2.200491428375244, "learning_rate": 7.475562072336265e-05, "loss": 0.89, "step": 9729 }, { "epoch": 1.5231684408265498, "grad_norm": 2.768153429031372, "learning_rate": 7.473118279569892e-05, "loss": 1.4945, "step": 9730 }, { "epoch": 1.5233249843456482, "grad_norm": 6.975987434387207, "learning_rate": 7.470674486803518e-05, "loss": 1.177, "step": 9731 }, { "epoch": 1.5234815278647464, "grad_norm": 2.4341022968292236, "learning_rate": 7.468230694037145e-05, "loss": 1.1726, "step": 9732 }, { "epoch": 1.5236380713838447, "grad_norm": 4.012300491333008, "learning_rate": 7.465786901270771e-05, "loss": 0.6998, "step": 9733 }, { "epoch": 1.5237946149029429, "grad_norm": 2.198310136795044, "learning_rate": 7.463343108504398e-05, "loss": 0.8953, "step": 9734 }, { "epoch": 1.5239511584220413, "grad_norm": 1.1857575178146362, "learning_rate": 7.460899315738026e-05, "loss": 0.5788, "step": 9735 }, { "epoch": 1.5241077019411398, "grad_norm": 2.221917152404785, "learning_rate": 7.458455522971651e-05, "loss": 0.6252, "step": 9736 }, { "epoch": 1.524264245460238, "grad_norm": 4.886264801025391, "learning_rate": 7.456011730205277e-05, "loss": 0.6908, "step": 9737 }, { "epoch": 1.5244207889793362, "grad_norm": 2.3509812355041504, "learning_rate": 7.453567937438905e-05, "loss": 0.8521, "step": 9738 }, { "epoch": 1.5245773324984344, "grad_norm": 0.35476207733154297, "learning_rate": 7.45112414467253e-05, "loss": 0.1878, "step": 9739 }, { "epoch": 1.5247338760175329, "grad_norm": 3.2828149795532227, "learning_rate": 7.448680351906158e-05, "loss": 0.2251, "step": 9740 }, { "epoch": 1.5248904195366313, "grad_norm": 0.5039660334587097, "learning_rate": 7.446236559139785e-05, "loss": 0.19, "step": 9741 }, { "epoch": 1.5250469630557295, "grad_norm": 0.5465099215507507, "learning_rate": 7.443792766373411e-05, "loss": 0.2284, "step": 9742 }, { "epoch": 1.5252035065748277, "grad_norm": 1.618552803993225, "learning_rate": 7.441348973607038e-05, "loss": 0.326, "step": 9743 }, { "epoch": 1.525360050093926, "grad_norm": 0.585069477558136, "learning_rate": 7.438905180840664e-05, "loss": 0.3094, "step": 9744 }, { "epoch": 1.5255165936130244, "grad_norm": 1.64920175075531, "learning_rate": 7.43646138807429e-05, "loss": 0.3384, "step": 9745 }, { "epoch": 1.5256731371321228, "grad_norm": 1.8453181982040405, "learning_rate": 7.434017595307917e-05, "loss": 0.538, "step": 9746 }, { "epoch": 1.525829680651221, "grad_norm": 0.8853111267089844, "learning_rate": 7.431573802541543e-05, "loss": 0.255, "step": 9747 }, { "epoch": 1.5259862241703193, "grad_norm": 0.9123733043670654, "learning_rate": 7.42913000977517e-05, "loss": 0.1581, "step": 9748 }, { "epoch": 1.5261427676894177, "grad_norm": 0.7811870574951172, "learning_rate": 7.426686217008796e-05, "loss": 0.2773, "step": 9749 }, { "epoch": 1.526299311208516, "grad_norm": 0.824766993522644, "learning_rate": 7.424242424242424e-05, "loss": 0.4379, "step": 9750 }, { "epoch": 1.5264558547276144, "grad_norm": 1.7148675918579102, "learning_rate": 7.42179863147605e-05, "loss": 0.4431, "step": 9751 }, { "epoch": 1.5266123982467126, "grad_norm": 1.5956602096557617, "learning_rate": 7.419354838709677e-05, "loss": 0.3849, "step": 9752 }, { "epoch": 1.5267689417658108, "grad_norm": 1.4418418407440186, "learning_rate": 7.416911045943304e-05, "loss": 0.4567, "step": 9753 }, { "epoch": 1.5269254852849092, "grad_norm": 1.1421548128128052, "learning_rate": 7.41446725317693e-05, "loss": 0.5558, "step": 9754 }, { "epoch": 1.5270820288040077, "grad_norm": 2.0118582248687744, "learning_rate": 7.412023460410557e-05, "loss": 0.3335, "step": 9755 }, { "epoch": 1.527238572323106, "grad_norm": 1.003547191619873, "learning_rate": 7.409579667644183e-05, "loss": 0.4836, "step": 9756 }, { "epoch": 1.527395115842204, "grad_norm": 1.8095251321792603, "learning_rate": 7.40713587487781e-05, "loss": 0.4633, "step": 9757 }, { "epoch": 1.5275516593613023, "grad_norm": 0.8199507594108582, "learning_rate": 7.404692082111436e-05, "loss": 0.2844, "step": 9758 }, { "epoch": 1.5277082028804008, "grad_norm": 1.0319750308990479, "learning_rate": 7.402248289345063e-05, "loss": 0.5131, "step": 9759 }, { "epoch": 1.5278647463994992, "grad_norm": 1.8906883001327515, "learning_rate": 7.399804496578689e-05, "loss": 0.5979, "step": 9760 }, { "epoch": 1.5280212899185974, "grad_norm": 0.7006099820137024, "learning_rate": 7.397360703812316e-05, "loss": 0.288, "step": 9761 }, { "epoch": 1.5281778334376956, "grad_norm": 2.609555721282959, "learning_rate": 7.394916911045943e-05, "loss": 0.7689, "step": 9762 }, { "epoch": 1.5283343769567939, "grad_norm": 1.5572997331619263, "learning_rate": 7.392473118279569e-05, "loss": 0.6709, "step": 9763 }, { "epoch": 1.5284909204758923, "grad_norm": 1.1742448806762695, "learning_rate": 7.390029325513196e-05, "loss": 0.4997, "step": 9764 }, { "epoch": 1.5286474639949907, "grad_norm": 1.8424162864685059, "learning_rate": 7.387585532746823e-05, "loss": 0.636, "step": 9765 }, { "epoch": 1.528804007514089, "grad_norm": 1.5619405508041382, "learning_rate": 7.38514173998045e-05, "loss": 0.7927, "step": 9766 }, { "epoch": 1.5289605510331872, "grad_norm": 3.71563982963562, "learning_rate": 7.382697947214076e-05, "loss": 1.1046, "step": 9767 }, { "epoch": 1.5291170945522854, "grad_norm": 1.4069294929504395, "learning_rate": 7.380254154447702e-05, "loss": 0.3665, "step": 9768 }, { "epoch": 1.5292736380713838, "grad_norm": 2.09309983253479, "learning_rate": 7.377810361681329e-05, "loss": 0.6521, "step": 9769 }, { "epoch": 1.5294301815904823, "grad_norm": 1.4793744087219238, "learning_rate": 7.375366568914955e-05, "loss": 0.8215, "step": 9770 }, { "epoch": 1.5295867251095805, "grad_norm": 1.229730248451233, "learning_rate": 7.372922776148582e-05, "loss": 0.4851, "step": 9771 }, { "epoch": 1.5297432686286787, "grad_norm": 1.6996830701828003, "learning_rate": 7.370478983382208e-05, "loss": 0.5851, "step": 9772 }, { "epoch": 1.529899812147777, "grad_norm": 1.705766201019287, "learning_rate": 7.368035190615835e-05, "loss": 0.881, "step": 9773 }, { "epoch": 1.5300563556668754, "grad_norm": 1.5375521183013916, "learning_rate": 7.365591397849463e-05, "loss": 0.3521, "step": 9774 }, { "epoch": 1.5302128991859738, "grad_norm": 2.4411001205444336, "learning_rate": 7.363147605083088e-05, "loss": 0.9904, "step": 9775 }, { "epoch": 1.530369442705072, "grad_norm": 2.59539794921875, "learning_rate": 7.360703812316715e-05, "loss": 1.0197, "step": 9776 }, { "epoch": 1.5305259862241702, "grad_norm": 1.2721023559570312, "learning_rate": 7.358260019550342e-05, "loss": 0.8143, "step": 9777 }, { "epoch": 1.5306825297432687, "grad_norm": 3.4059696197509766, "learning_rate": 7.355816226783968e-05, "loss": 1.3697, "step": 9778 }, { "epoch": 1.530839073262367, "grad_norm": 1.2390133142471313, "learning_rate": 7.353372434017595e-05, "loss": 0.8997, "step": 9779 }, { "epoch": 1.5309956167814653, "grad_norm": 2.6192235946655273, "learning_rate": 7.350928641251221e-05, "loss": 1.2236, "step": 9780 }, { "epoch": 1.5311521603005636, "grad_norm": 2.4125397205352783, "learning_rate": 7.348484848484848e-05, "loss": 0.9419, "step": 9781 }, { "epoch": 1.5313087038196618, "grad_norm": 3.147789716720581, "learning_rate": 7.346041055718474e-05, "loss": 1.8505, "step": 9782 }, { "epoch": 1.5314652473387602, "grad_norm": 2.7218902111053467, "learning_rate": 7.343597262952101e-05, "loss": 0.5833, "step": 9783 }, { "epoch": 1.5316217908578584, "grad_norm": 1.0626882314682007, "learning_rate": 7.341153470185727e-05, "loss": 0.3275, "step": 9784 }, { "epoch": 1.5317783343769569, "grad_norm": 2.7859811782836914, "learning_rate": 7.338709677419354e-05, "loss": 0.8926, "step": 9785 }, { "epoch": 1.531934877896055, "grad_norm": 2.831791877746582, "learning_rate": 7.33626588465298e-05, "loss": 0.5686, "step": 9786 }, { "epoch": 1.5320914214151533, "grad_norm": 2.355695962905884, "learning_rate": 7.333822091886607e-05, "loss": 0.5434, "step": 9787 }, { "epoch": 1.5322479649342517, "grad_norm": 1.321400761604309, "learning_rate": 7.331378299120235e-05, "loss": 0.5117, "step": 9788 }, { "epoch": 1.5324045084533502, "grad_norm": 0.5803260207176208, "learning_rate": 7.32893450635386e-05, "loss": 0.2592, "step": 9789 }, { "epoch": 1.5325610519724484, "grad_norm": 0.40602564811706543, "learning_rate": 7.326490713587488e-05, "loss": 0.1622, "step": 9790 }, { "epoch": 1.5327175954915466, "grad_norm": 1.1273137331008911, "learning_rate": 7.324046920821114e-05, "loss": 0.2955, "step": 9791 }, { "epoch": 1.5328741390106448, "grad_norm": 0.4123140573501587, "learning_rate": 7.32160312805474e-05, "loss": 0.2276, "step": 9792 }, { "epoch": 1.5330306825297433, "grad_norm": 0.5335753560066223, "learning_rate": 7.319159335288367e-05, "loss": 0.1589, "step": 9793 }, { "epoch": 1.5331872260488417, "grad_norm": 0.8474439382553101, "learning_rate": 7.316715542521993e-05, "loss": 0.3083, "step": 9794 }, { "epoch": 1.53334376956794, "grad_norm": 0.48397138714790344, "learning_rate": 7.31427174975562e-05, "loss": 0.1776, "step": 9795 }, { "epoch": 1.5335003130870382, "grad_norm": 0.5532123446464539, "learning_rate": 7.311827956989246e-05, "loss": 0.2416, "step": 9796 }, { "epoch": 1.5336568566061364, "grad_norm": 0.656062126159668, "learning_rate": 7.309384164222873e-05, "loss": 0.2122, "step": 9797 }, { "epoch": 1.5338134001252348, "grad_norm": 1.0843610763549805, "learning_rate": 7.3069403714565e-05, "loss": 0.2524, "step": 9798 }, { "epoch": 1.5339699436443333, "grad_norm": 0.891477644443512, "learning_rate": 7.304496578690126e-05, "loss": 0.437, "step": 9799 }, { "epoch": 1.5341264871634315, "grad_norm": 1.0225049257278442, "learning_rate": 7.302052785923754e-05, "loss": 0.2953, "step": 9800 }, { "epoch": 1.5342830306825297, "grad_norm": 1.2247625589370728, "learning_rate": 7.299608993157379e-05, "loss": 0.492, "step": 9801 }, { "epoch": 1.534439574201628, "grad_norm": 0.9728368520736694, "learning_rate": 7.297165200391007e-05, "loss": 0.3037, "step": 9802 }, { "epoch": 1.5345961177207263, "grad_norm": 1.27720046043396, "learning_rate": 7.294721407624633e-05, "loss": 0.1679, "step": 9803 }, { "epoch": 1.5347526612398248, "grad_norm": 0.6740818023681641, "learning_rate": 7.29227761485826e-05, "loss": 0.1985, "step": 9804 }, { "epoch": 1.534909204758923, "grad_norm": 1.0346983671188354, "learning_rate": 7.289833822091886e-05, "loss": 0.4624, "step": 9805 }, { "epoch": 1.5350657482780212, "grad_norm": 1.6507031917572021, "learning_rate": 7.287390029325513e-05, "loss": 0.1886, "step": 9806 }, { "epoch": 1.5352222917971194, "grad_norm": 1.1130856275558472, "learning_rate": 7.284946236559139e-05, "loss": 0.3239, "step": 9807 }, { "epoch": 1.5353788353162179, "grad_norm": 1.4110175371170044, "learning_rate": 7.282502443792766e-05, "loss": 0.5766, "step": 9808 }, { "epoch": 1.5355353788353163, "grad_norm": 15.8782377243042, "learning_rate": 7.280058651026392e-05, "loss": 0.4304, "step": 9809 }, { "epoch": 1.5356919223544145, "grad_norm": 2.349193811416626, "learning_rate": 7.277614858260019e-05, "loss": 0.4212, "step": 9810 }, { "epoch": 1.5358484658735128, "grad_norm": 2.3084702491760254, "learning_rate": 7.275171065493645e-05, "loss": 0.7621, "step": 9811 }, { "epoch": 1.5360050093926112, "grad_norm": 2.6022043228149414, "learning_rate": 7.272727272727273e-05, "loss": 0.6079, "step": 9812 }, { "epoch": 1.5361615529117094, "grad_norm": 1.9148122072219849, "learning_rate": 7.270283479960898e-05, "loss": 0.505, "step": 9813 }, { "epoch": 1.5363180964308079, "grad_norm": 3.0425705909729004, "learning_rate": 7.267839687194526e-05, "loss": 0.9097, "step": 9814 }, { "epoch": 1.536474639949906, "grad_norm": 2.4766037464141846, "learning_rate": 7.265395894428152e-05, "loss": 0.6503, "step": 9815 }, { "epoch": 1.5366311834690043, "grad_norm": 2.7663185596466064, "learning_rate": 7.262952101661779e-05, "loss": 0.8135, "step": 9816 }, { "epoch": 1.5367877269881027, "grad_norm": 1.486636996269226, "learning_rate": 7.260508308895405e-05, "loss": 0.4896, "step": 9817 }, { "epoch": 1.536944270507201, "grad_norm": 1.2840673923492432, "learning_rate": 7.258064516129032e-05, "loss": 0.3703, "step": 9818 }, { "epoch": 1.5371008140262994, "grad_norm": 1.9025120735168457, "learning_rate": 7.255620723362658e-05, "loss": 0.7038, "step": 9819 }, { "epoch": 1.5372573575453976, "grad_norm": 3.2480320930480957, "learning_rate": 7.253176930596285e-05, "loss": 0.5779, "step": 9820 }, { "epoch": 1.5374139010644958, "grad_norm": 2.224262237548828, "learning_rate": 7.250733137829911e-05, "loss": 1.0415, "step": 9821 }, { "epoch": 1.5375704445835943, "grad_norm": 2.9639852046966553, "learning_rate": 7.248289345063538e-05, "loss": 1.0646, "step": 9822 }, { "epoch": 1.5377269881026927, "grad_norm": 2.493807077407837, "learning_rate": 7.245845552297164e-05, "loss": 0.7018, "step": 9823 }, { "epoch": 1.537883531621791, "grad_norm": 2.37644624710083, "learning_rate": 7.243401759530792e-05, "loss": 0.4942, "step": 9824 }, { "epoch": 1.5380400751408891, "grad_norm": 2.5154953002929688, "learning_rate": 7.240957966764417e-05, "loss": 1.0169, "step": 9825 }, { "epoch": 1.5381966186599874, "grad_norm": 1.9995393753051758, "learning_rate": 7.238514173998045e-05, "loss": 1.0743, "step": 9826 }, { "epoch": 1.5383531621790858, "grad_norm": 2.441619396209717, "learning_rate": 7.236070381231671e-05, "loss": 1.4467, "step": 9827 }, { "epoch": 1.5385097056981842, "grad_norm": 2.3574626445770264, "learning_rate": 7.233626588465298e-05, "loss": 0.5267, "step": 9828 }, { "epoch": 1.5386662492172825, "grad_norm": 1.9852875471115112, "learning_rate": 7.231182795698924e-05, "loss": 0.769, "step": 9829 }, { "epoch": 1.5388227927363807, "grad_norm": 5.952073574066162, "learning_rate": 7.228739002932551e-05, "loss": 1.0293, "step": 9830 }, { "epoch": 1.538979336255479, "grad_norm": 3.0914673805236816, "learning_rate": 7.226295210166177e-05, "loss": 1.2532, "step": 9831 }, { "epoch": 1.5391358797745773, "grad_norm": 2.5825436115264893, "learning_rate": 7.223851417399804e-05, "loss": 0.9199, "step": 9832 }, { "epoch": 1.5392924232936758, "grad_norm": 2.3879377841949463, "learning_rate": 7.22140762463343e-05, "loss": 1.0303, "step": 9833 }, { "epoch": 1.539448966812774, "grad_norm": 0.8389413356781006, "learning_rate": 7.218963831867057e-05, "loss": 0.1337, "step": 9834 }, { "epoch": 1.5396055103318722, "grad_norm": 1.7144027948379517, "learning_rate": 7.216520039100683e-05, "loss": 0.8003, "step": 9835 }, { "epoch": 1.5397620538509704, "grad_norm": 3.8174052238464355, "learning_rate": 7.214076246334311e-05, "loss": 1.1162, "step": 9836 }, { "epoch": 1.5399185973700689, "grad_norm": 3.3473262786865234, "learning_rate": 7.211632453567936e-05, "loss": 1.6102, "step": 9837 }, { "epoch": 1.5400751408891673, "grad_norm": 1.6419070959091187, "learning_rate": 7.209188660801564e-05, "loss": 0.6261, "step": 9838 }, { "epoch": 1.5402316844082655, "grad_norm": 0.6053900718688965, "learning_rate": 7.20674486803519e-05, "loss": 0.198, "step": 9839 }, { "epoch": 1.5403882279273637, "grad_norm": 0.49573662877082825, "learning_rate": 7.204301075268816e-05, "loss": 0.2027, "step": 9840 }, { "epoch": 1.540544771446462, "grad_norm": 0.4826977849006653, "learning_rate": 7.201857282502443e-05, "loss": 0.1637, "step": 9841 }, { "epoch": 1.5407013149655604, "grad_norm": 0.7587850689888, "learning_rate": 7.19941348973607e-05, "loss": 0.1888, "step": 9842 }, { "epoch": 1.5408578584846588, "grad_norm": 0.565876841545105, "learning_rate": 7.196969696969696e-05, "loss": 0.1634, "step": 9843 }, { "epoch": 1.541014402003757, "grad_norm": 0.4626244604587555, "learning_rate": 7.194525904203323e-05, "loss": 0.2195, "step": 9844 }, { "epoch": 1.5411709455228553, "grad_norm": 1.5018632411956787, "learning_rate": 7.19208211143695e-05, "loss": 0.3624, "step": 9845 }, { "epoch": 1.5413274890419537, "grad_norm": 0.6011160016059875, "learning_rate": 7.189638318670576e-05, "loss": 0.2698, "step": 9846 }, { "epoch": 1.541484032561052, "grad_norm": 1.015252947807312, "learning_rate": 7.187194525904202e-05, "loss": 0.3386, "step": 9847 }, { "epoch": 1.5416405760801504, "grad_norm": 1.5964446067810059, "learning_rate": 7.18475073313783e-05, "loss": 0.1781, "step": 9848 }, { "epoch": 1.5417971195992486, "grad_norm": 1.3437423706054688, "learning_rate": 7.182306940371455e-05, "loss": 0.237, "step": 9849 }, { "epoch": 1.5419536631183468, "grad_norm": 0.8205925822257996, "learning_rate": 7.179863147605083e-05, "loss": 0.2588, "step": 9850 }, { "epoch": 1.5421102066374452, "grad_norm": 1.1015372276306152, "learning_rate": 7.17741935483871e-05, "loss": 0.3411, "step": 9851 }, { "epoch": 1.5422667501565435, "grad_norm": 3.2911665439605713, "learning_rate": 7.174975562072335e-05, "loss": 0.2773, "step": 9852 }, { "epoch": 1.542423293675642, "grad_norm": 6.5218186378479, "learning_rate": 7.172531769305963e-05, "loss": 0.4853, "step": 9853 }, { "epoch": 1.5425798371947401, "grad_norm": 1.0145297050476074, "learning_rate": 7.170087976539589e-05, "loss": 0.233, "step": 9854 }, { "epoch": 1.5427363807138383, "grad_norm": 1.0070481300354004, "learning_rate": 7.167644183773216e-05, "loss": 0.3493, "step": 9855 }, { "epoch": 1.5428929242329368, "grad_norm": 2.540808916091919, "learning_rate": 7.165200391006842e-05, "loss": 0.5796, "step": 9856 }, { "epoch": 1.5430494677520352, "grad_norm": 2.2027969360351562, "learning_rate": 7.162756598240468e-05, "loss": 0.4986, "step": 9857 }, { "epoch": 1.5432060112711334, "grad_norm": 2.4842472076416016, "learning_rate": 7.160312805474095e-05, "loss": 0.427, "step": 9858 }, { "epoch": 1.5433625547902317, "grad_norm": 2.618431568145752, "learning_rate": 7.157869012707721e-05, "loss": 0.4044, "step": 9859 }, { "epoch": 1.5435190983093299, "grad_norm": 2.222273349761963, "learning_rate": 7.155425219941349e-05, "loss": 0.5528, "step": 9860 }, { "epoch": 1.5436756418284283, "grad_norm": 1.5825259685516357, "learning_rate": 7.152981427174974e-05, "loss": 0.5638, "step": 9861 }, { "epoch": 1.5438321853475268, "grad_norm": 12.67103099822998, "learning_rate": 7.150537634408601e-05, "loss": 0.4018, "step": 9862 }, { "epoch": 1.543988728866625, "grad_norm": 1.7103662490844727, "learning_rate": 7.148093841642229e-05, "loss": 0.5984, "step": 9863 }, { "epoch": 1.5441452723857232, "grad_norm": 1.5723588466644287, "learning_rate": 7.145650048875854e-05, "loss": 0.6249, "step": 9864 }, { "epoch": 1.5443018159048214, "grad_norm": 1.4810919761657715, "learning_rate": 7.143206256109482e-05, "loss": 0.6729, "step": 9865 }, { "epoch": 1.5444583594239198, "grad_norm": 2.4991166591644287, "learning_rate": 7.140762463343108e-05, "loss": 0.373, "step": 9866 }, { "epoch": 1.5446149029430183, "grad_norm": 1.0830026865005493, "learning_rate": 7.138318670576735e-05, "loss": 0.3133, "step": 9867 }, { "epoch": 1.5447714464621165, "grad_norm": 1.9214036464691162, "learning_rate": 7.135874877810361e-05, "loss": 0.5774, "step": 9868 }, { "epoch": 1.5449279899812147, "grad_norm": 1.623348355293274, "learning_rate": 7.133431085043988e-05, "loss": 0.7693, "step": 9869 }, { "epoch": 1.545084533500313, "grad_norm": 2.111285924911499, "learning_rate": 7.130987292277614e-05, "loss": 0.7588, "step": 9870 }, { "epoch": 1.5452410770194114, "grad_norm": 4.0085248947143555, "learning_rate": 7.12854349951124e-05, "loss": 0.633, "step": 9871 }, { "epoch": 1.5453976205385098, "grad_norm": 1.885757327079773, "learning_rate": 7.126099706744868e-05, "loss": 0.5439, "step": 9872 }, { "epoch": 1.545554164057608, "grad_norm": 2.1260712146759033, "learning_rate": 7.123655913978494e-05, "loss": 0.5721, "step": 9873 }, { "epoch": 1.5457107075767063, "grad_norm": 1.1092246770858765, "learning_rate": 7.12121212121212e-05, "loss": 0.598, "step": 9874 }, { "epoch": 1.5458672510958045, "grad_norm": 2.1099350452423096, "learning_rate": 7.118768328445748e-05, "loss": 1.0518, "step": 9875 }, { "epoch": 1.546023794614903, "grad_norm": 2.708494186401367, "learning_rate": 7.116324535679373e-05, "loss": 0.974, "step": 9876 }, { "epoch": 1.5461803381340014, "grad_norm": 2.1030566692352295, "learning_rate": 7.113880742913001e-05, "loss": 1.064, "step": 9877 }, { "epoch": 1.5463368816530996, "grad_norm": 1.5087107419967651, "learning_rate": 7.111436950146627e-05, "loss": 0.6343, "step": 9878 }, { "epoch": 1.5464934251721978, "grad_norm": 2.708134651184082, "learning_rate": 7.108993157380254e-05, "loss": 1.0731, "step": 9879 }, { "epoch": 1.5466499686912962, "grad_norm": 2.4557900428771973, "learning_rate": 7.10654936461388e-05, "loss": 1.0076, "step": 9880 }, { "epoch": 1.5468065122103944, "grad_norm": 4.119977951049805, "learning_rate": 7.104105571847507e-05, "loss": 0.6316, "step": 9881 }, { "epoch": 1.5469630557294929, "grad_norm": 9.010337829589844, "learning_rate": 7.101661779081133e-05, "loss": 1.0986, "step": 9882 }, { "epoch": 1.547119599248591, "grad_norm": 3.3070311546325684, "learning_rate": 7.09921798631476e-05, "loss": 1.4029, "step": 9883 }, { "epoch": 1.5472761427676893, "grad_norm": 1.8002490997314453, "learning_rate": 7.096774193548386e-05, "loss": 0.467, "step": 9884 }, { "epoch": 1.5474326862867878, "grad_norm": 1.5535470247268677, "learning_rate": 7.094330400782013e-05, "loss": 0.6966, "step": 9885 }, { "epoch": 1.5475892298058862, "grad_norm": 1.3492341041564941, "learning_rate": 7.091886608015639e-05, "loss": 0.4247, "step": 9886 }, { "epoch": 1.5477457733249844, "grad_norm": 1.93979811668396, "learning_rate": 7.089442815249267e-05, "loss": 0.8019, "step": 9887 }, { "epoch": 1.5479023168440826, "grad_norm": 1.897113561630249, "learning_rate": 7.086999022482892e-05, "loss": 0.8177, "step": 9888 }, { "epoch": 1.5480588603631809, "grad_norm": 0.4913039207458496, "learning_rate": 7.08455522971652e-05, "loss": 0.2577, "step": 9889 }, { "epoch": 1.5482154038822793, "grad_norm": 0.9754254221916199, "learning_rate": 7.082111436950146e-05, "loss": 0.2678, "step": 9890 }, { "epoch": 1.5483719474013777, "grad_norm": 0.5141871571540833, "learning_rate": 7.079667644183773e-05, "loss": 0.2229, "step": 9891 }, { "epoch": 1.548528490920476, "grad_norm": 0.7318794131278992, "learning_rate": 7.0772238514174e-05, "loss": 0.2285, "step": 9892 }, { "epoch": 1.5486850344395742, "grad_norm": 1.011592984199524, "learning_rate": 7.074780058651026e-05, "loss": 0.2582, "step": 9893 }, { "epoch": 1.5488415779586724, "grad_norm": 0.5550967454910278, "learning_rate": 7.072336265884652e-05, "loss": 0.3078, "step": 9894 }, { "epoch": 1.5489981214777708, "grad_norm": 0.6541218757629395, "learning_rate": 7.069892473118279e-05, "loss": 0.1527, "step": 9895 }, { "epoch": 1.5491546649968693, "grad_norm": 0.6758529543876648, "learning_rate": 7.067448680351905e-05, "loss": 0.309, "step": 9896 }, { "epoch": 1.5493112085159675, "grad_norm": 0.5618512630462646, "learning_rate": 7.065004887585532e-05, "loss": 0.234, "step": 9897 }, { "epoch": 1.5494677520350657, "grad_norm": 0.8279595971107483, "learning_rate": 7.062561094819158e-05, "loss": 0.3533, "step": 9898 }, { "epoch": 1.549624295554164, "grad_norm": 0.7879864573478699, "learning_rate": 7.060117302052786e-05, "loss": 0.1606, "step": 9899 }, { "epoch": 1.5497808390732624, "grad_norm": 0.8414984941482544, "learning_rate": 7.057673509286411e-05, "loss": 0.33, "step": 9900 }, { "epoch": 1.5499373825923608, "grad_norm": 0.6734075546264648, "learning_rate": 7.055229716520039e-05, "loss": 0.3478, "step": 9901 }, { "epoch": 1.550093926111459, "grad_norm": 0.876607358455658, "learning_rate": 7.052785923753666e-05, "loss": 0.4642, "step": 9902 }, { "epoch": 1.5502504696305572, "grad_norm": 1.32351553440094, "learning_rate": 7.050342130987292e-05, "loss": 0.4359, "step": 9903 }, { "epoch": 1.5504070131496555, "grad_norm": 0.7632149457931519, "learning_rate": 7.047898338220918e-05, "loss": 0.3068, "step": 9904 }, { "epoch": 1.550563556668754, "grad_norm": 1.024189829826355, "learning_rate": 7.045454545454545e-05, "loss": 0.2885, "step": 9905 }, { "epoch": 1.5507201001878523, "grad_norm": 1.50118887424469, "learning_rate": 7.043010752688171e-05, "loss": 0.3361, "step": 9906 }, { "epoch": 1.5508766437069506, "grad_norm": 3.2080376148223877, "learning_rate": 7.040566959921798e-05, "loss": 0.7564, "step": 9907 }, { "epoch": 1.5510331872260488, "grad_norm": 1.7170158624649048, "learning_rate": 7.038123167155424e-05, "loss": 0.471, "step": 9908 }, { "epoch": 1.551189730745147, "grad_norm": 2.007455825805664, "learning_rate": 7.035679374389051e-05, "loss": 0.4193, "step": 9909 }, { "epoch": 1.5513462742642454, "grad_norm": 1.438209891319275, "learning_rate": 7.033235581622677e-05, "loss": 0.4327, "step": 9910 }, { "epoch": 1.5515028177833439, "grad_norm": 1.6576300859451294, "learning_rate": 7.030791788856304e-05, "loss": 0.5447, "step": 9911 }, { "epoch": 1.551659361302442, "grad_norm": 1.656591534614563, "learning_rate": 7.02834799608993e-05, "loss": 0.9359, "step": 9912 }, { "epoch": 1.5518159048215403, "grad_norm": 2.055999279022217, "learning_rate": 7.025904203323558e-05, "loss": 0.7214, "step": 9913 }, { "epoch": 1.5519724483406387, "grad_norm": 2.960740089416504, "learning_rate": 7.023460410557183e-05, "loss": 0.8004, "step": 9914 }, { "epoch": 1.552128991859737, "grad_norm": 1.1211111545562744, "learning_rate": 7.021016617790811e-05, "loss": 0.2811, "step": 9915 }, { "epoch": 1.5522855353788354, "grad_norm": 0.9293329119682312, "learning_rate": 7.018572825024438e-05, "loss": 0.4153, "step": 9916 }, { "epoch": 1.5524420788979336, "grad_norm": 1.6393110752105713, "learning_rate": 7.016129032258064e-05, "loss": 0.6008, "step": 9917 }, { "epoch": 1.5525986224170318, "grad_norm": 2.427534341812134, "learning_rate": 7.01368523949169e-05, "loss": 0.3159, "step": 9918 }, { "epoch": 1.5527551659361303, "grad_norm": 2.0107951164245605, "learning_rate": 7.011241446725317e-05, "loss": 0.677, "step": 9919 }, { "epoch": 1.5529117094552287, "grad_norm": 1.3930466175079346, "learning_rate": 7.008797653958944e-05, "loss": 0.4769, "step": 9920 }, { "epoch": 1.553068252974327, "grad_norm": 3.030660390853882, "learning_rate": 7.00635386119257e-05, "loss": 0.6934, "step": 9921 }, { "epoch": 1.5532247964934252, "grad_norm": 1.1675511598587036, "learning_rate": 7.003910068426196e-05, "loss": 0.3575, "step": 9922 }, { "epoch": 1.5533813400125234, "grad_norm": 2.445157527923584, "learning_rate": 7.001466275659823e-05, "loss": 0.7557, "step": 9923 }, { "epoch": 1.5535378835316218, "grad_norm": 2.884352445602417, "learning_rate": 6.99902248289345e-05, "loss": 0.7685, "step": 9924 }, { "epoch": 1.5536944270507203, "grad_norm": 1.9358322620391846, "learning_rate": 6.996578690127077e-05, "loss": 0.7295, "step": 9925 }, { "epoch": 1.5538509705698185, "grad_norm": 2.95656681060791, "learning_rate": 6.994134897360702e-05, "loss": 0.6324, "step": 9926 }, { "epoch": 1.5540075140889167, "grad_norm": 1.8009204864501953, "learning_rate": 6.99169110459433e-05, "loss": 0.4526, "step": 9927 }, { "epoch": 1.554164057608015, "grad_norm": 3.0422096252441406, "learning_rate": 6.989247311827957e-05, "loss": 0.9872, "step": 9928 }, { "epoch": 1.5543206011271133, "grad_norm": 4.271030902862549, "learning_rate": 6.986803519061583e-05, "loss": 0.8607, "step": 9929 }, { "epoch": 1.5544771446462118, "grad_norm": 2.318971633911133, "learning_rate": 6.98435972629521e-05, "loss": 0.8247, "step": 9930 }, { "epoch": 1.55463368816531, "grad_norm": 1.7466161251068115, "learning_rate": 6.981915933528836e-05, "loss": 0.9417, "step": 9931 }, { "epoch": 1.5547902316844082, "grad_norm": 2.927077531814575, "learning_rate": 6.979472140762463e-05, "loss": 1.8184, "step": 9932 }, { "epoch": 1.5549467752035064, "grad_norm": 2.963963747024536, "learning_rate": 6.977028347996089e-05, "loss": 0.8251, "step": 9933 }, { "epoch": 1.5551033187226049, "grad_norm": 2.6687653064727783, "learning_rate": 6.974584555229716e-05, "loss": 0.8453, "step": 9934 }, { "epoch": 1.5552598622417033, "grad_norm": 3.3215959072113037, "learning_rate": 6.972140762463342e-05, "loss": 0.7546, "step": 9935 }, { "epoch": 1.5554164057608015, "grad_norm": 2.8623807430267334, "learning_rate": 6.969696969696969e-05, "loss": 1.5532, "step": 9936 }, { "epoch": 1.5555729492798998, "grad_norm": 2.0836265087127686, "learning_rate": 6.967253176930596e-05, "loss": 0.8139, "step": 9937 }, { "epoch": 1.555729492798998, "grad_norm": 1.388414978981018, "learning_rate": 6.964809384164222e-05, "loss": 0.8304, "step": 9938 }, { "epoch": 1.5558860363180964, "grad_norm": 0.9456003904342651, "learning_rate": 6.96236559139785e-05, "loss": 0.1613, "step": 9939 }, { "epoch": 1.5560425798371949, "grad_norm": 0.5043039917945862, "learning_rate": 6.959921798631476e-05, "loss": 0.121, "step": 9940 }, { "epoch": 1.556199123356293, "grad_norm": 0.7678887248039246, "learning_rate": 6.957478005865102e-05, "loss": 0.2142, "step": 9941 }, { "epoch": 1.5563556668753913, "grad_norm": 0.7166454792022705, "learning_rate": 6.955034213098729e-05, "loss": 0.2479, "step": 9942 }, { "epoch": 1.5565122103944895, "grad_norm": 0.4818023145198822, "learning_rate": 6.952590420332355e-05, "loss": 0.2025, "step": 9943 }, { "epoch": 1.556668753913588, "grad_norm": 0.3814961314201355, "learning_rate": 6.950146627565982e-05, "loss": 0.1756, "step": 9944 }, { "epoch": 1.5568252974326864, "grad_norm": 0.6317948698997498, "learning_rate": 6.947702834799608e-05, "loss": 0.2258, "step": 9945 }, { "epoch": 1.5569818409517846, "grad_norm": 0.6837334036827087, "learning_rate": 6.945259042033235e-05, "loss": 0.2127, "step": 9946 }, { "epoch": 1.5571383844708828, "grad_norm": 0.6503742337226868, "learning_rate": 6.942815249266861e-05, "loss": 0.2307, "step": 9947 }, { "epoch": 1.5572949279899813, "grad_norm": 0.9390753507614136, "learning_rate": 6.940371456500488e-05, "loss": 0.3621, "step": 9948 }, { "epoch": 1.5574514715090795, "grad_norm": 1.152475118637085, "learning_rate": 6.937927663734116e-05, "loss": 0.2944, "step": 9949 }, { "epoch": 1.557608015028178, "grad_norm": 0.7496026754379272, "learning_rate": 6.93548387096774e-05, "loss": 0.1876, "step": 9950 }, { "epoch": 1.5577645585472761, "grad_norm": 0.6168410778045654, "learning_rate": 6.933040078201368e-05, "loss": 0.2536, "step": 9951 }, { "epoch": 1.5579211020663744, "grad_norm": 1.393839716911316, "learning_rate": 6.930596285434995e-05, "loss": 0.3867, "step": 9952 }, { "epoch": 1.5580776455854728, "grad_norm": 0.946195125579834, "learning_rate": 6.928152492668621e-05, "loss": 0.1936, "step": 9953 }, { "epoch": 1.5582341891045712, "grad_norm": 1.4753316640853882, "learning_rate": 6.925708699902248e-05, "loss": 0.3014, "step": 9954 }, { "epoch": 1.5583907326236695, "grad_norm": 1.1015839576721191, "learning_rate": 6.923264907135874e-05, "loss": 0.4375, "step": 9955 }, { "epoch": 1.5585472761427677, "grad_norm": 1.402594804763794, "learning_rate": 6.920821114369501e-05, "loss": 0.701, "step": 9956 }, { "epoch": 1.5587038196618659, "grad_norm": 2.3376095294952393, "learning_rate": 6.918377321603127e-05, "loss": 0.3449, "step": 9957 }, { "epoch": 1.5588603631809643, "grad_norm": 0.5967291593551636, "learning_rate": 6.915933528836754e-05, "loss": 0.2373, "step": 9958 }, { "epoch": 1.5590169067000628, "grad_norm": 2.35986065864563, "learning_rate": 6.91348973607038e-05, "loss": 0.4976, "step": 9959 }, { "epoch": 1.559173450219161, "grad_norm": 1.496276617050171, "learning_rate": 6.911045943304007e-05, "loss": 0.4235, "step": 9960 }, { "epoch": 1.5593299937382592, "grad_norm": 3.5515880584716797, "learning_rate": 6.908602150537635e-05, "loss": 0.5994, "step": 9961 }, { "epoch": 1.5594865372573574, "grad_norm": 1.0382581949234009, "learning_rate": 6.90615835777126e-05, "loss": 0.3619, "step": 9962 }, { "epoch": 1.5596430807764559, "grad_norm": 1.326045274734497, "learning_rate": 6.903714565004888e-05, "loss": 0.7072, "step": 9963 }, { "epoch": 1.5597996242955543, "grad_norm": 3.173861265182495, "learning_rate": 6.901270772238514e-05, "loss": 0.9478, "step": 9964 }, { "epoch": 1.5599561678146525, "grad_norm": 1.280656099319458, "learning_rate": 6.898826979472139e-05, "loss": 0.5995, "step": 9965 }, { "epoch": 1.5601127113337507, "grad_norm": 1.5048918724060059, "learning_rate": 6.896383186705767e-05, "loss": 0.415, "step": 9966 }, { "epoch": 1.560269254852849, "grad_norm": 1.7540141344070435, "learning_rate": 6.893939393939393e-05, "loss": 0.7435, "step": 9967 }, { "epoch": 1.5604257983719474, "grad_norm": 1.4080053567886353, "learning_rate": 6.89149560117302e-05, "loss": 0.6369, "step": 9968 }, { "epoch": 1.5605823418910458, "grad_norm": 1.5890631675720215, "learning_rate": 6.889051808406646e-05, "loss": 0.4578, "step": 9969 }, { "epoch": 1.560738885410144, "grad_norm": 1.6475311517715454, "learning_rate": 6.886608015640273e-05, "loss": 0.6443, "step": 9970 }, { "epoch": 1.5608954289292423, "grad_norm": 1.769564151763916, "learning_rate": 6.8841642228739e-05, "loss": 0.5241, "step": 9971 }, { "epoch": 1.5610519724483405, "grad_norm": 2.2985165119171143, "learning_rate": 6.881720430107526e-05, "loss": 0.7585, "step": 9972 }, { "epoch": 1.561208515967439, "grad_norm": 1.865506649017334, "learning_rate": 6.879276637341154e-05, "loss": 0.8987, "step": 9973 }, { "epoch": 1.5613650594865374, "grad_norm": 2.5502965450286865, "learning_rate": 6.876832844574779e-05, "loss": 1.1079, "step": 9974 }, { "epoch": 1.5615216030056356, "grad_norm": 3.633234977722168, "learning_rate": 6.874389051808407e-05, "loss": 1.0968, "step": 9975 }, { "epoch": 1.5616781465247338, "grad_norm": 2.496279239654541, "learning_rate": 6.871945259042033e-05, "loss": 0.5893, "step": 9976 }, { "epoch": 1.561834690043832, "grad_norm": 2.3446414470672607, "learning_rate": 6.869501466275658e-05, "loss": 0.8225, "step": 9977 }, { "epoch": 1.5619912335629305, "grad_norm": 1.9929825067520142, "learning_rate": 6.867057673509286e-05, "loss": 0.819, "step": 9978 }, { "epoch": 1.562147777082029, "grad_norm": 5.4597086906433105, "learning_rate": 6.864613880742913e-05, "loss": 1.3806, "step": 9979 }, { "epoch": 1.5623043206011271, "grad_norm": 1.8970288038253784, "learning_rate": 6.862170087976539e-05, "loss": 0.9163, "step": 9980 }, { "epoch": 1.5624608641202253, "grad_norm": 2.849325656890869, "learning_rate": 6.859726295210166e-05, "loss": 0.9537, "step": 9981 }, { "epoch": 1.5626174076393238, "grad_norm": 1.5763049125671387, "learning_rate": 6.857282502443792e-05, "loss": 0.5998, "step": 9982 }, { "epoch": 1.562773951158422, "grad_norm": 2.1057538986206055, "learning_rate": 6.854838709677419e-05, "loss": 1.1581, "step": 9983 }, { "epoch": 1.5629304946775204, "grad_norm": 3.0501420497894287, "learning_rate": 6.852394916911045e-05, "loss": 1.0438, "step": 9984 }, { "epoch": 1.5630870381966186, "grad_norm": 1.4863300323486328, "learning_rate": 6.849951124144673e-05, "loss": 0.4397, "step": 9985 }, { "epoch": 1.5632435817157169, "grad_norm": 1.986098289489746, "learning_rate": 6.847507331378298e-05, "loss": 0.5282, "step": 9986 }, { "epoch": 1.5634001252348153, "grad_norm": 3.5367484092712402, "learning_rate": 6.845063538611926e-05, "loss": 0.4203, "step": 9987 }, { "epoch": 1.5635566687539137, "grad_norm": 3.17246413230896, "learning_rate": 6.842619745845552e-05, "loss": 0.8021, "step": 9988 }, { "epoch": 1.563713212273012, "grad_norm": 0.6066386103630066, "learning_rate": 6.840175953079177e-05, "loss": 0.1862, "step": 9989 }, { "epoch": 1.5638697557921102, "grad_norm": 0.8987689018249512, "learning_rate": 6.837732160312805e-05, "loss": 0.1316, "step": 9990 }, { "epoch": 1.5640262993112084, "grad_norm": 0.6625165343284607, "learning_rate": 6.835288367546432e-05, "loss": 0.2883, "step": 9991 }, { "epoch": 1.5641828428303068, "grad_norm": 0.5179951786994934, "learning_rate": 6.832844574780058e-05, "loss": 0.2005, "step": 9992 }, { "epoch": 1.5643393863494053, "grad_norm": 0.7913885116577148, "learning_rate": 6.830400782013685e-05, "loss": 0.3602, "step": 9993 }, { "epoch": 1.5644959298685035, "grad_norm": 0.5309231281280518, "learning_rate": 6.827956989247311e-05, "loss": 0.3084, "step": 9994 }, { "epoch": 1.5646524733876017, "grad_norm": 1.3907285928726196, "learning_rate": 6.825513196480938e-05, "loss": 0.4261, "step": 9995 }, { "epoch": 1.5648090169067, "grad_norm": 0.567585289478302, "learning_rate": 6.823069403714564e-05, "loss": 0.2173, "step": 9996 }, { "epoch": 1.5649655604257984, "grad_norm": 0.8944092392921448, "learning_rate": 6.820625610948192e-05, "loss": 0.2504, "step": 9997 }, { "epoch": 1.5651221039448968, "grad_norm": 0.9722375273704529, "learning_rate": 6.818181818181817e-05, "loss": 0.3839, "step": 9998 }, { "epoch": 1.565278647463995, "grad_norm": 0.5953250527381897, "learning_rate": 6.815738025415444e-05, "loss": 0.1622, "step": 9999 }, { "epoch": 1.5654351909830932, "grad_norm": 0.7407553195953369, "learning_rate": 6.813294232649071e-05, "loss": 0.1617, "step": 10000 }, { "epoch": 1.5654351909830932, "eval_loss": 0.5069952011108398, "eval_runtime": 205.9415, "eval_samples_per_second": 60.129, "eval_steps_per_second": 3.758, "eval_wer": 0.3151178358161379, "step": 10000 }, { "epoch": 1.5655917345021915, "grad_norm": 0.744092583656311, "learning_rate": 6.810850439882697e-05, "loss": 0.3372, "step": 10001 }, { "epoch": 1.56574827802129, "grad_norm": 2.9647679328918457, "learning_rate": 6.808406647116324e-05, "loss": 0.3241, "step": 10002 }, { "epoch": 1.5659048215403883, "grad_norm": 1.3153102397918701, "learning_rate": 6.805962854349951e-05, "loss": 0.2338, "step": 10003 }, { "epoch": 1.5660613650594866, "grad_norm": 1.2516891956329346, "learning_rate": 6.803519061583577e-05, "loss": 0.2711, "step": 10004 }, { "epoch": 1.5662179085785848, "grad_norm": 0.8349717259407043, "learning_rate": 6.801075268817204e-05, "loss": 0.2158, "step": 10005 }, { "epoch": 1.566374452097683, "grad_norm": 1.2376327514648438, "learning_rate": 6.79863147605083e-05, "loss": 0.5239, "step": 10006 }, { "epoch": 1.5665309956167814, "grad_norm": 1.2116237878799438, "learning_rate": 6.796187683284457e-05, "loss": 0.3544, "step": 10007 }, { "epoch": 1.5666875391358799, "grad_norm": 1.2293322086334229, "learning_rate": 6.793743890518083e-05, "loss": 0.478, "step": 10008 }, { "epoch": 1.566844082654978, "grad_norm": 1.6404024362564087, "learning_rate": 6.791300097751711e-05, "loss": 0.4453, "step": 10009 }, { "epoch": 1.5670006261740763, "grad_norm": 1.6513690948486328, "learning_rate": 6.788856304985336e-05, "loss": 0.4273, "step": 10010 }, { "epoch": 1.5671571696931748, "grad_norm": 1.3088352680206299, "learning_rate": 6.786412512218963e-05, "loss": 0.3235, "step": 10011 }, { "epoch": 1.567313713212273, "grad_norm": 1.2101068496704102, "learning_rate": 6.78396871945259e-05, "loss": 0.2902, "step": 10012 }, { "epoch": 1.5674702567313714, "grad_norm": 2.1694164276123047, "learning_rate": 6.781524926686216e-05, "loss": 0.6311, "step": 10013 }, { "epoch": 1.5676268002504696, "grad_norm": 1.8107420206069946, "learning_rate": 6.779081133919843e-05, "loss": 0.4426, "step": 10014 }, { "epoch": 1.5677833437695678, "grad_norm": 1.5845452547073364, "learning_rate": 6.77663734115347e-05, "loss": 0.3915, "step": 10015 }, { "epoch": 1.5679398872886663, "grad_norm": 4.756518840789795, "learning_rate": 6.774193548387096e-05, "loss": 0.7383, "step": 10016 }, { "epoch": 1.5680964308077645, "grad_norm": 2.210839033126831, "learning_rate": 6.771749755620723e-05, "loss": 0.7217, "step": 10017 }, { "epoch": 1.568252974326863, "grad_norm": 2.380188226699829, "learning_rate": 6.76930596285435e-05, "loss": 0.7003, "step": 10018 }, { "epoch": 1.5684095178459612, "grad_norm": 3.6436707973480225, "learning_rate": 6.766862170087976e-05, "loss": 1.0092, "step": 10019 }, { "epoch": 1.5685660613650594, "grad_norm": 1.1202057600021362, "learning_rate": 6.764418377321602e-05, "loss": 0.6157, "step": 10020 }, { "epoch": 1.5687226048841578, "grad_norm": 3.5373764038085938, "learning_rate": 6.761974584555229e-05, "loss": 1.0375, "step": 10021 }, { "epoch": 1.5688791484032563, "grad_norm": 2.415952205657959, "learning_rate": 6.759530791788855e-05, "loss": 0.6472, "step": 10022 }, { "epoch": 1.5690356919223545, "grad_norm": 2.597670316696167, "learning_rate": 6.757086999022482e-05, "loss": 0.8034, "step": 10023 }, { "epoch": 1.5691922354414527, "grad_norm": 1.0793561935424805, "learning_rate": 6.75464320625611e-05, "loss": 0.4365, "step": 10024 }, { "epoch": 1.569348778960551, "grad_norm": 4.046724796295166, "learning_rate": 6.752199413489735e-05, "loss": 0.825, "step": 10025 }, { "epoch": 1.5695053224796494, "grad_norm": 2.050435781478882, "learning_rate": 6.749755620723363e-05, "loss": 0.833, "step": 10026 }, { "epoch": 1.5696618659987478, "grad_norm": 2.141474723815918, "learning_rate": 6.747311827956989e-05, "loss": 0.5743, "step": 10027 }, { "epoch": 1.569818409517846, "grad_norm": 2.143913507461548, "learning_rate": 6.744868035190616e-05, "loss": 0.8483, "step": 10028 }, { "epoch": 1.5699749530369442, "grad_norm": 3.70343017578125, "learning_rate": 6.742424242424242e-05, "loss": 1.5895, "step": 10029 }, { "epoch": 1.5701314965560424, "grad_norm": 1.8668042421340942, "learning_rate": 6.739980449657869e-05, "loss": 0.7103, "step": 10030 }, { "epoch": 1.570288040075141, "grad_norm": 3.584521770477295, "learning_rate": 6.737536656891495e-05, "loss": 1.2124, "step": 10031 }, { "epoch": 1.5704445835942393, "grad_norm": 1.4927868843078613, "learning_rate": 6.735092864125121e-05, "loss": 0.7392, "step": 10032 }, { "epoch": 1.5706011271133375, "grad_norm": 2.586317300796509, "learning_rate": 6.732649071358748e-05, "loss": 1.2404, "step": 10033 }, { "epoch": 1.5707576706324358, "grad_norm": 1.917419672012329, "learning_rate": 6.730205278592374e-05, "loss": 1.218, "step": 10034 }, { "epoch": 1.570914214151534, "grad_norm": 2.0818874835968018, "learning_rate": 6.727761485826001e-05, "loss": 1.2963, "step": 10035 }, { "epoch": 1.5710707576706324, "grad_norm": 2.2003262042999268, "learning_rate": 6.725317693059629e-05, "loss": 0.6938, "step": 10036 }, { "epoch": 1.5712273011897309, "grad_norm": 2.3637146949768066, "learning_rate": 6.722873900293254e-05, "loss": 0.6664, "step": 10037 }, { "epoch": 1.571383844708829, "grad_norm": 2.3716087341308594, "learning_rate": 6.720430107526882e-05, "loss": 1.2521, "step": 10038 }, { "epoch": 1.5715403882279273, "grad_norm": 0.47229862213134766, "learning_rate": 6.717986314760507e-05, "loss": 0.1909, "step": 10039 }, { "epoch": 1.5716969317470255, "grad_norm": 0.536920428276062, "learning_rate": 6.715542521994135e-05, "loss": 0.2017, "step": 10040 }, { "epoch": 1.571853475266124, "grad_norm": 0.5747900009155273, "learning_rate": 6.713098729227761e-05, "loss": 0.1635, "step": 10041 }, { "epoch": 1.5720100187852224, "grad_norm": 0.4652441740036011, "learning_rate": 6.710654936461388e-05, "loss": 0.1387, "step": 10042 }, { "epoch": 1.5721665623043206, "grad_norm": 0.32238516211509705, "learning_rate": 6.708211143695014e-05, "loss": 0.1682, "step": 10043 }, { "epoch": 1.5723231058234188, "grad_norm": 0.5064279437065125, "learning_rate": 6.70576735092864e-05, "loss": 0.2486, "step": 10044 }, { "epoch": 1.5724796493425173, "grad_norm": 0.5283740758895874, "learning_rate": 6.703323558162267e-05, "loss": 0.2012, "step": 10045 }, { "epoch": 1.5726361928616155, "grad_norm": 0.55204176902771, "learning_rate": 6.700879765395894e-05, "loss": 0.2433, "step": 10046 }, { "epoch": 1.572792736380714, "grad_norm": 0.5996967554092407, "learning_rate": 6.69843597262952e-05, "loss": 0.2243, "step": 10047 }, { "epoch": 1.5729492798998121, "grad_norm": 0.5109484791755676, "learning_rate": 6.695992179863147e-05, "loss": 0.1715, "step": 10048 }, { "epoch": 1.5731058234189104, "grad_norm": 0.9667737483978271, "learning_rate": 6.693548387096773e-05, "loss": 0.2348, "step": 10049 }, { "epoch": 1.5732623669380088, "grad_norm": 1.0587562322616577, "learning_rate": 6.691104594330401e-05, "loss": 0.2966, "step": 10050 }, { "epoch": 1.573418910457107, "grad_norm": 1.2635822296142578, "learning_rate": 6.688660801564026e-05, "loss": 0.1986, "step": 10051 }, { "epoch": 1.5735754539762055, "grad_norm": 0.7891488075256348, "learning_rate": 6.686217008797654e-05, "loss": 0.2691, "step": 10052 }, { "epoch": 1.5737319974953037, "grad_norm": 0.8126229643821716, "learning_rate": 6.68377321603128e-05, "loss": 0.3324, "step": 10053 }, { "epoch": 1.573888541014402, "grad_norm": 1.0948699712753296, "learning_rate": 6.681329423264907e-05, "loss": 0.3169, "step": 10054 }, { "epoch": 1.5740450845335003, "grad_norm": 1.4431370496749878, "learning_rate": 6.678885630498533e-05, "loss": 0.5378, "step": 10055 }, { "epoch": 1.5742016280525988, "grad_norm": 2.1338889598846436, "learning_rate": 6.67644183773216e-05, "loss": 0.4985, "step": 10056 }, { "epoch": 1.574358171571697, "grad_norm": 1.3593388795852661, "learning_rate": 6.673998044965786e-05, "loss": 0.4749, "step": 10057 }, { "epoch": 1.5745147150907952, "grad_norm": 1.260514259338379, "learning_rate": 6.671554252199413e-05, "loss": 0.431, "step": 10058 }, { "epoch": 1.5746712586098934, "grad_norm": 1.087146282196045, "learning_rate": 6.669110459433039e-05, "loss": 0.4252, "step": 10059 }, { "epoch": 1.5748278021289919, "grad_norm": 0.7750207185745239, "learning_rate": 6.666666666666666e-05, "loss": 0.3251, "step": 10060 }, { "epoch": 1.5749843456480903, "grad_norm": 1.3115235567092896, "learning_rate": 6.664222873900292e-05, "loss": 0.3326, "step": 10061 }, { "epoch": 1.5751408891671885, "grad_norm": 1.4149150848388672, "learning_rate": 6.66177908113392e-05, "loss": 0.5962, "step": 10062 }, { "epoch": 1.5752974326862867, "grad_norm": 3.154900550842285, "learning_rate": 6.659335288367545e-05, "loss": 0.4921, "step": 10063 }, { "epoch": 1.575453976205385, "grad_norm": 1.6714290380477905, "learning_rate": 6.656891495601173e-05, "loss": 0.8742, "step": 10064 }, { "epoch": 1.5756105197244834, "grad_norm": 2.3452184200286865, "learning_rate": 6.6544477028348e-05, "loss": 0.767, "step": 10065 }, { "epoch": 1.5757670632435818, "grad_norm": 1.208871603012085, "learning_rate": 6.652003910068426e-05, "loss": 0.3151, "step": 10066 }, { "epoch": 1.57592360676268, "grad_norm": 3.192185878753662, "learning_rate": 6.649560117302052e-05, "loss": 0.8238, "step": 10067 }, { "epoch": 1.5760801502817783, "grad_norm": 3.4253957271575928, "learning_rate": 6.647116324535679e-05, "loss": 0.8813, "step": 10068 }, { "epoch": 1.5762366938008765, "grad_norm": 1.4279910326004028, "learning_rate": 6.644672531769305e-05, "loss": 0.6148, "step": 10069 }, { "epoch": 1.576393237319975, "grad_norm": 3.070469856262207, "learning_rate": 6.642228739002932e-05, "loss": 0.3901, "step": 10070 }, { "epoch": 1.5765497808390734, "grad_norm": 2.3599345684051514, "learning_rate": 6.639784946236558e-05, "loss": 0.4485, "step": 10071 }, { "epoch": 1.5767063243581716, "grad_norm": 1.7810369729995728, "learning_rate": 6.637341153470185e-05, "loss": 0.7455, "step": 10072 }, { "epoch": 1.5768628678772698, "grad_norm": 2.3326375484466553, "learning_rate": 6.634897360703811e-05, "loss": 0.5618, "step": 10073 }, { "epoch": 1.577019411396368, "grad_norm": 2.5533432960510254, "learning_rate": 6.632453567937439e-05, "loss": 0.9591, "step": 10074 }, { "epoch": 1.5771759549154665, "grad_norm": 3.2034530639648438, "learning_rate": 6.630009775171064e-05, "loss": 0.8351, "step": 10075 }, { "epoch": 1.577332498434565, "grad_norm": 2.8962016105651855, "learning_rate": 6.627565982404692e-05, "loss": 1.478, "step": 10076 }, { "epoch": 1.5774890419536631, "grad_norm": 2.253652572631836, "learning_rate": 6.625122189638319e-05, "loss": 1.1017, "step": 10077 }, { "epoch": 1.5776455854727613, "grad_norm": 4.773320198059082, "learning_rate": 6.622678396871945e-05, "loss": 1.4581, "step": 10078 }, { "epoch": 1.5778021289918598, "grad_norm": 4.403872013092041, "learning_rate": 6.620234604105571e-05, "loss": 0.7407, "step": 10079 }, { "epoch": 1.577958672510958, "grad_norm": 2.4393060207366943, "learning_rate": 6.617790811339198e-05, "loss": 0.9164, "step": 10080 }, { "epoch": 1.5781152160300564, "grad_norm": 3.306149959564209, "learning_rate": 6.615347018572824e-05, "loss": 1.3664, "step": 10081 }, { "epoch": 1.5782717595491547, "grad_norm": 2.284524917602539, "learning_rate": 6.612903225806451e-05, "loss": 0.9943, "step": 10082 }, { "epoch": 1.5784283030682529, "grad_norm": 6.304023265838623, "learning_rate": 6.610459433040077e-05, "loss": 0.3365, "step": 10083 }, { "epoch": 1.5785848465873513, "grad_norm": 1.4609630107879639, "learning_rate": 6.608015640273704e-05, "loss": 0.3204, "step": 10084 }, { "epoch": 1.5787413901064495, "grad_norm": 1.2958881855010986, "learning_rate": 6.60557184750733e-05, "loss": 0.5147, "step": 10085 }, { "epoch": 1.578897933625548, "grad_norm": 2.1635255813598633, "learning_rate": 6.603128054740958e-05, "loss": 0.5687, "step": 10086 }, { "epoch": 1.5790544771446462, "grad_norm": 3.585665225982666, "learning_rate": 6.600684261974583e-05, "loss": 1.3545, "step": 10087 }, { "epoch": 1.5792110206637444, "grad_norm": 4.3810834884643555, "learning_rate": 6.598240469208211e-05, "loss": 1.403, "step": 10088 }, { "epoch": 1.5793675641828429, "grad_norm": 1.2125346660614014, "learning_rate": 6.595796676441838e-05, "loss": 0.195, "step": 10089 }, { "epoch": 1.5795241077019413, "grad_norm": 1.367098331451416, "learning_rate": 6.593352883675464e-05, "loss": 0.303, "step": 10090 }, { "epoch": 1.5796806512210395, "grad_norm": 1.04794442653656, "learning_rate": 6.59090909090909e-05, "loss": 0.3611, "step": 10091 }, { "epoch": 1.5798371947401377, "grad_norm": 0.518174946308136, "learning_rate": 6.588465298142717e-05, "loss": 0.1909, "step": 10092 }, { "epoch": 1.579993738259236, "grad_norm": 0.5261414051055908, "learning_rate": 6.586021505376344e-05, "loss": 0.2269, "step": 10093 }, { "epoch": 1.5801502817783344, "grad_norm": 0.4940567910671234, "learning_rate": 6.58357771260997e-05, "loss": 0.1931, "step": 10094 }, { "epoch": 1.5803068252974328, "grad_norm": 0.8084196448326111, "learning_rate": 6.581133919843596e-05, "loss": 0.3157, "step": 10095 }, { "epoch": 1.580463368816531, "grad_norm": 0.5676218867301941, "learning_rate": 6.578690127077223e-05, "loss": 0.2145, "step": 10096 }, { "epoch": 1.5806199123356293, "grad_norm": 0.6355946660041809, "learning_rate": 6.57624633431085e-05, "loss": 0.1708, "step": 10097 }, { "epoch": 1.5807764558547275, "grad_norm": 0.8469789028167725, "learning_rate": 6.573802541544477e-05, "loss": 0.3097, "step": 10098 }, { "epoch": 1.580932999373826, "grad_norm": 2.256359338760376, "learning_rate": 6.571358748778102e-05, "loss": 0.2384, "step": 10099 }, { "epoch": 1.5810895428929244, "grad_norm": 0.8068323731422424, "learning_rate": 6.56891495601173e-05, "loss": 0.2843, "step": 10100 }, { "epoch": 1.5812460864120226, "grad_norm": 0.44950971007347107, "learning_rate": 6.566471163245357e-05, "loss": 0.1872, "step": 10101 }, { "epoch": 1.5814026299311208, "grad_norm": 1.0343793630599976, "learning_rate": 6.564027370478982e-05, "loss": 0.3186, "step": 10102 }, { "epoch": 1.581559173450219, "grad_norm": 2.219566822052002, "learning_rate": 6.56158357771261e-05, "loss": 0.5353, "step": 10103 }, { "epoch": 1.5817157169693175, "grad_norm": 1.2421504259109497, "learning_rate": 6.559139784946236e-05, "loss": 0.4237, "step": 10104 }, { "epoch": 1.581872260488416, "grad_norm": 0.9300140738487244, "learning_rate": 6.556695992179863e-05, "loss": 0.4712, "step": 10105 }, { "epoch": 1.5820288040075141, "grad_norm": 1.3351165056228638, "learning_rate": 6.554252199413489e-05, "loss": 0.4134, "step": 10106 }, { "epoch": 1.5821853475266123, "grad_norm": 1.0536439418792725, "learning_rate": 6.551808406647116e-05, "loss": 0.337, "step": 10107 }, { "epoch": 1.5823418910457105, "grad_norm": 0.8616892695426941, "learning_rate": 6.549364613880742e-05, "loss": 0.4153, "step": 10108 }, { "epoch": 1.582498434564809, "grad_norm": 1.845602035522461, "learning_rate": 6.546920821114369e-05, "loss": 0.3483, "step": 10109 }, { "epoch": 1.5826549780839074, "grad_norm": 4.453863143920898, "learning_rate": 6.544477028347996e-05, "loss": 0.9881, "step": 10110 }, { "epoch": 1.5828115216030056, "grad_norm": 1.3871952295303345, "learning_rate": 6.542033235581622e-05, "loss": 0.5901, "step": 10111 }, { "epoch": 1.5829680651221039, "grad_norm": 1.2875733375549316, "learning_rate": 6.53958944281525e-05, "loss": 0.5883, "step": 10112 }, { "epoch": 1.5831246086412023, "grad_norm": 1.413844108581543, "learning_rate": 6.537145650048876e-05, "loss": 0.3296, "step": 10113 }, { "epoch": 1.5832811521603005, "grad_norm": 1.2688345909118652, "learning_rate": 6.534701857282501e-05, "loss": 0.4407, "step": 10114 }, { "epoch": 1.583437695679399, "grad_norm": 1.8131461143493652, "learning_rate": 6.532258064516129e-05, "loss": 0.2713, "step": 10115 }, { "epoch": 1.5835942391984972, "grad_norm": 1.4916858673095703, "learning_rate": 6.529814271749755e-05, "loss": 0.5562, "step": 10116 }, { "epoch": 1.5837507827175954, "grad_norm": 2.313715696334839, "learning_rate": 6.527370478983382e-05, "loss": 0.5763, "step": 10117 }, { "epoch": 1.5839073262366938, "grad_norm": 1.6103403568267822, "learning_rate": 6.524926686217008e-05, "loss": 0.562, "step": 10118 }, { "epoch": 1.5840638697557923, "grad_norm": 0.9578543901443481, "learning_rate": 6.522482893450635e-05, "loss": 0.475, "step": 10119 }, { "epoch": 1.5842204132748905, "grad_norm": 1.8726333379745483, "learning_rate": 6.520039100684261e-05, "loss": 0.8251, "step": 10120 }, { "epoch": 1.5843769567939887, "grad_norm": 1.8902461528778076, "learning_rate": 6.517595307917888e-05, "loss": 1.235, "step": 10121 }, { "epoch": 1.584533500313087, "grad_norm": 3.1188371181488037, "learning_rate": 6.515151515151516e-05, "loss": 0.5179, "step": 10122 }, { "epoch": 1.5846900438321854, "grad_norm": 4.811651706695557, "learning_rate": 6.51270772238514e-05, "loss": 0.6082, "step": 10123 }, { "epoch": 1.5848465873512838, "grad_norm": 2.434835910797119, "learning_rate": 6.510263929618767e-05, "loss": 0.6315, "step": 10124 }, { "epoch": 1.585003130870382, "grad_norm": 1.2945551872253418, "learning_rate": 6.507820136852395e-05, "loss": 0.2202, "step": 10125 }, { "epoch": 1.5851596743894802, "grad_norm": 1.9833664894104004, "learning_rate": 6.50537634408602e-05, "loss": 0.7926, "step": 10126 }, { "epoch": 1.5853162179085785, "grad_norm": 3.4194037914276123, "learning_rate": 6.502932551319648e-05, "loss": 1.2268, "step": 10127 }, { "epoch": 1.585472761427677, "grad_norm": 2.057821035385132, "learning_rate": 6.500488758553274e-05, "loss": 1.0213, "step": 10128 }, { "epoch": 1.5856293049467753, "grad_norm": 7.126612186431885, "learning_rate": 6.498044965786901e-05, "loss": 1.0572, "step": 10129 }, { "epoch": 1.5857858484658736, "grad_norm": 3.2313385009765625, "learning_rate": 6.495601173020527e-05, "loss": 1.4866, "step": 10130 }, { "epoch": 1.5859423919849718, "grad_norm": 2.4898273944854736, "learning_rate": 6.493157380254154e-05, "loss": 0.851, "step": 10131 }, { "epoch": 1.58609893550407, "grad_norm": 2.793358087539673, "learning_rate": 6.49071358748778e-05, "loss": 1.1641, "step": 10132 }, { "epoch": 1.5862554790231684, "grad_norm": 2.09865665435791, "learning_rate": 6.488269794721407e-05, "loss": 1.1646, "step": 10133 }, { "epoch": 1.5864120225422669, "grad_norm": 1.7419707775115967, "learning_rate": 6.485826001955035e-05, "loss": 0.6815, "step": 10134 }, { "epoch": 1.586568566061365, "grad_norm": 1.2431154251098633, "learning_rate": 6.48338220918866e-05, "loss": 0.5228, "step": 10135 }, { "epoch": 1.5867251095804633, "grad_norm": 0.5367014408111572, "learning_rate": 6.480938416422286e-05, "loss": 0.1294, "step": 10136 }, { "epoch": 1.5868816530995615, "grad_norm": 1.9725390672683716, "learning_rate": 6.478494623655914e-05, "loss": 0.6302, "step": 10137 }, { "epoch": 1.58703819661866, "grad_norm": 2.6568782329559326, "learning_rate": 6.476050830889539e-05, "loss": 0.6514, "step": 10138 }, { "epoch": 1.5871947401377584, "grad_norm": 0.5387908220291138, "learning_rate": 6.473607038123167e-05, "loss": 0.2569, "step": 10139 }, { "epoch": 1.5873512836568566, "grad_norm": 0.6417441964149475, "learning_rate": 6.471163245356794e-05, "loss": 0.2113, "step": 10140 }, { "epoch": 1.5875078271759548, "grad_norm": 0.7953237295150757, "learning_rate": 6.46871945259042e-05, "loss": 0.2751, "step": 10141 }, { "epoch": 1.587664370695053, "grad_norm": 0.5870561003684998, "learning_rate": 6.466275659824046e-05, "loss": 0.274, "step": 10142 }, { "epoch": 1.5878209142141515, "grad_norm": 0.416892409324646, "learning_rate": 6.463831867057673e-05, "loss": 0.2782, "step": 10143 }, { "epoch": 1.58797745773325, "grad_norm": 1.2958848476409912, "learning_rate": 6.4613880742913e-05, "loss": 0.2186, "step": 10144 }, { "epoch": 1.5881340012523482, "grad_norm": 0.7092198133468628, "learning_rate": 6.458944281524926e-05, "loss": 0.3302, "step": 10145 }, { "epoch": 1.5882905447714464, "grad_norm": 1.2143514156341553, "learning_rate": 6.456500488758554e-05, "loss": 0.4434, "step": 10146 }, { "epoch": 1.5884470882905448, "grad_norm": 0.6722570657730103, "learning_rate": 6.454056695992179e-05, "loss": 0.3993, "step": 10147 }, { "epoch": 1.588603631809643, "grad_norm": 0.7443752884864807, "learning_rate": 6.451612903225805e-05, "loss": 0.4719, "step": 10148 }, { "epoch": 1.5887601753287415, "grad_norm": 1.1258270740509033, "learning_rate": 6.449169110459433e-05, "loss": 0.2445, "step": 10149 }, { "epoch": 1.5889167188478397, "grad_norm": 0.5991902351379395, "learning_rate": 6.446725317693058e-05, "loss": 0.3133, "step": 10150 }, { "epoch": 1.589073262366938, "grad_norm": 1.0533815622329712, "learning_rate": 6.444281524926686e-05, "loss": 0.2713, "step": 10151 }, { "epoch": 1.5892298058860364, "grad_norm": 1.2451895475387573, "learning_rate": 6.441837732160313e-05, "loss": 0.2822, "step": 10152 }, { "epoch": 1.5893863494051348, "grad_norm": 1.2398431301116943, "learning_rate": 6.439393939393939e-05, "loss": 0.4126, "step": 10153 }, { "epoch": 1.589542892924233, "grad_norm": 7.360801696777344, "learning_rate": 6.436950146627566e-05, "loss": 0.4618, "step": 10154 }, { "epoch": 1.5896994364433312, "grad_norm": 1.0283269882202148, "learning_rate": 6.434506353861192e-05, "loss": 0.5885, "step": 10155 }, { "epoch": 1.5898559799624294, "grad_norm": 2.743352174758911, "learning_rate": 6.432062561094819e-05, "loss": 0.4484, "step": 10156 }, { "epoch": 1.5900125234815279, "grad_norm": 0.7910766005516052, "learning_rate": 6.429618768328445e-05, "loss": 0.3717, "step": 10157 }, { "epoch": 1.5901690670006263, "grad_norm": 0.8904502987861633, "learning_rate": 6.427174975562072e-05, "loss": 0.372, "step": 10158 }, { "epoch": 1.5903256105197245, "grad_norm": 1.1738675832748413, "learning_rate": 6.424731182795698e-05, "loss": 0.3387, "step": 10159 }, { "epoch": 1.5904821540388228, "grad_norm": 1.5055691003799438, "learning_rate": 6.422287390029324e-05, "loss": 0.4907, "step": 10160 }, { "epoch": 1.590638697557921, "grad_norm": 1.081289291381836, "learning_rate": 6.419843597262952e-05, "loss": 0.3105, "step": 10161 }, { "epoch": 1.5907952410770194, "grad_norm": 1.3567743301391602, "learning_rate": 6.417399804496577e-05, "loss": 0.3565, "step": 10162 }, { "epoch": 1.5909517845961179, "grad_norm": 1.7186039686203003, "learning_rate": 6.414956011730205e-05, "loss": 0.7406, "step": 10163 }, { "epoch": 1.591108328115216, "grad_norm": 1.8988852500915527, "learning_rate": 6.412512218963832e-05, "loss": 0.3946, "step": 10164 }, { "epoch": 1.5912648716343143, "grad_norm": 2.2571020126342773, "learning_rate": 6.410068426197458e-05, "loss": 0.5579, "step": 10165 }, { "epoch": 1.5914214151534125, "grad_norm": 1.9459624290466309, "learning_rate": 6.407624633431085e-05, "loss": 0.7901, "step": 10166 }, { "epoch": 1.591577958672511, "grad_norm": 2.4512932300567627, "learning_rate": 6.405180840664711e-05, "loss": 1.0994, "step": 10167 }, { "epoch": 1.5917345021916094, "grad_norm": 1.6560828685760498, "learning_rate": 6.402737047898338e-05, "loss": 0.4887, "step": 10168 }, { "epoch": 1.5918910457107076, "grad_norm": 1.7962381839752197, "learning_rate": 6.400293255131964e-05, "loss": 0.649, "step": 10169 }, { "epoch": 1.5920475892298058, "grad_norm": 2.8789329528808594, "learning_rate": 6.39784946236559e-05, "loss": 0.7007, "step": 10170 }, { "epoch": 1.592204132748904, "grad_norm": 2.5336639881134033, "learning_rate": 6.395405669599217e-05, "loss": 1.0201, "step": 10171 }, { "epoch": 1.5923606762680025, "grad_norm": 2.5551042556762695, "learning_rate": 6.392961876832844e-05, "loss": 0.7936, "step": 10172 }, { "epoch": 1.592517219787101, "grad_norm": 1.407386064529419, "learning_rate": 6.39051808406647e-05, "loss": 0.6436, "step": 10173 }, { "epoch": 1.5926737633061991, "grad_norm": 2.690272331237793, "learning_rate": 6.388074291300097e-05, "loss": 0.7574, "step": 10174 }, { "epoch": 1.5928303068252974, "grad_norm": 1.8954501152038574, "learning_rate": 6.385630498533724e-05, "loss": 0.7914, "step": 10175 }, { "epoch": 1.5929868503443956, "grad_norm": 2.0154988765716553, "learning_rate": 6.38318670576735e-05, "loss": 0.6738, "step": 10176 }, { "epoch": 1.593143393863494, "grad_norm": 2.260967969894409, "learning_rate": 6.380742913000977e-05, "loss": 0.5325, "step": 10177 }, { "epoch": 1.5932999373825925, "grad_norm": 3.078644037246704, "learning_rate": 6.378299120234604e-05, "loss": 0.6965, "step": 10178 }, { "epoch": 1.5934564809016907, "grad_norm": 2.3561620712280273, "learning_rate": 6.37585532746823e-05, "loss": 0.9808, "step": 10179 }, { "epoch": 1.593613024420789, "grad_norm": 3.07482647895813, "learning_rate": 6.373411534701857e-05, "loss": 0.9701, "step": 10180 }, { "epoch": 1.5937695679398873, "grad_norm": 5.131535530090332, "learning_rate": 6.370967741935483e-05, "loss": 0.8996, "step": 10181 }, { "epoch": 1.5939261114589856, "grad_norm": 2.536735773086548, "learning_rate": 6.36852394916911e-05, "loss": 1.0249, "step": 10182 }, { "epoch": 1.594082654978084, "grad_norm": 1.7344963550567627, "learning_rate": 6.366080156402736e-05, "loss": 0.9431, "step": 10183 }, { "epoch": 1.5942391984971822, "grad_norm": 1.4525513648986816, "learning_rate": 6.363636363636363e-05, "loss": 0.3811, "step": 10184 }, { "epoch": 1.5943957420162804, "grad_norm": 11.553523063659668, "learning_rate": 6.361192570869989e-05, "loss": 0.7413, "step": 10185 }, { "epoch": 1.5945522855353789, "grad_norm": 2.428168535232544, "learning_rate": 6.358748778103616e-05, "loss": 0.6394, "step": 10186 }, { "epoch": 1.5947088290544773, "grad_norm": 5.859124183654785, "learning_rate": 6.356304985337244e-05, "loss": 0.5436, "step": 10187 }, { "epoch": 1.5948653725735755, "grad_norm": 2.319378614425659, "learning_rate": 6.353861192570869e-05, "loss": 1.0681, "step": 10188 }, { "epoch": 1.5950219160926737, "grad_norm": 0.3972637355327606, "learning_rate": 6.351417399804496e-05, "loss": 0.2068, "step": 10189 }, { "epoch": 1.595178459611772, "grad_norm": 0.3770846128463745, "learning_rate": 6.348973607038123e-05, "loss": 0.1627, "step": 10190 }, { "epoch": 1.5953350031308704, "grad_norm": 1.2007943391799927, "learning_rate": 6.34652981427175e-05, "loss": 0.254, "step": 10191 }, { "epoch": 1.5954915466499688, "grad_norm": 2.273597002029419, "learning_rate": 6.344086021505376e-05, "loss": 0.3272, "step": 10192 }, { "epoch": 1.595648090169067, "grad_norm": 0.8018514513969421, "learning_rate": 6.341642228739002e-05, "loss": 0.2878, "step": 10193 }, { "epoch": 1.5958046336881653, "grad_norm": 0.5968719124794006, "learning_rate": 6.339198435972629e-05, "loss": 0.2637, "step": 10194 }, { "epoch": 1.5959611772072635, "grad_norm": 0.6231215000152588, "learning_rate": 6.336754643206255e-05, "loss": 0.2629, "step": 10195 }, { "epoch": 1.596117720726362, "grad_norm": 0.4936104416847229, "learning_rate": 6.334310850439882e-05, "loss": 0.207, "step": 10196 }, { "epoch": 1.5962742642454604, "grad_norm": 0.6326175928115845, "learning_rate": 6.331867057673508e-05, "loss": 0.4249, "step": 10197 }, { "epoch": 1.5964308077645586, "grad_norm": 0.5844799876213074, "learning_rate": 6.329423264907135e-05, "loss": 0.2245, "step": 10198 }, { "epoch": 1.5965873512836568, "grad_norm": 2.547029495239258, "learning_rate": 6.326979472140763e-05, "loss": 0.4301, "step": 10199 }, { "epoch": 1.596743894802755, "grad_norm": 1.4274916648864746, "learning_rate": 6.324535679374388e-05, "loss": 0.4086, "step": 10200 }, { "epoch": 1.5969004383218535, "grad_norm": 1.0012903213500977, "learning_rate": 6.322091886608016e-05, "loss": 0.2683, "step": 10201 }, { "epoch": 1.597056981840952, "grad_norm": 2.6661782264709473, "learning_rate": 6.319648093841642e-05, "loss": 0.5077, "step": 10202 }, { "epoch": 1.5972135253600501, "grad_norm": 0.7351189851760864, "learning_rate": 6.317204301075269e-05, "loss": 0.3245, "step": 10203 }, { "epoch": 1.5973700688791483, "grad_norm": 0.7789361476898193, "learning_rate": 6.314760508308895e-05, "loss": 0.3436, "step": 10204 }, { "epoch": 1.5975266123982466, "grad_norm": 1.3661255836486816, "learning_rate": 6.312316715542522e-05, "loss": 0.381, "step": 10205 }, { "epoch": 1.597683155917345, "grad_norm": 0.8813636898994446, "learning_rate": 6.309872922776148e-05, "loss": 0.3987, "step": 10206 }, { "epoch": 1.5978396994364434, "grad_norm": 0.6111112236976624, "learning_rate": 6.307429130009774e-05, "loss": 0.1548, "step": 10207 }, { "epoch": 1.5979962429555417, "grad_norm": 1.8574801683425903, "learning_rate": 6.304985337243401e-05, "loss": 0.5964, "step": 10208 }, { "epoch": 1.5981527864746399, "grad_norm": 3.280104160308838, "learning_rate": 6.302541544477027e-05, "loss": 0.4009, "step": 10209 }, { "epoch": 1.5983093299937383, "grad_norm": 4.668222427368164, "learning_rate": 6.300097751710654e-05, "loss": 0.4519, "step": 10210 }, { "epoch": 1.5984658735128365, "grad_norm": 2.0994975566864014, "learning_rate": 6.297653958944282e-05, "loss": 0.5817, "step": 10211 }, { "epoch": 1.598622417031935, "grad_norm": 2.334909200668335, "learning_rate": 6.295210166177907e-05, "loss": 0.5803, "step": 10212 }, { "epoch": 1.5987789605510332, "grad_norm": 1.9847476482391357, "learning_rate": 6.292766373411535e-05, "loss": 0.3882, "step": 10213 }, { "epoch": 1.5989355040701314, "grad_norm": 1.3854622840881348, "learning_rate": 6.290322580645161e-05, "loss": 0.477, "step": 10214 }, { "epoch": 1.5990920475892298, "grad_norm": 1.641670823097229, "learning_rate": 6.287878787878788e-05, "loss": 0.3697, "step": 10215 }, { "epoch": 1.599248591108328, "grad_norm": 2.7601287364959717, "learning_rate": 6.285434995112414e-05, "loss": 0.438, "step": 10216 }, { "epoch": 1.5994051346274265, "grad_norm": 2.510101318359375, "learning_rate": 6.28299120234604e-05, "loss": 0.8074, "step": 10217 }, { "epoch": 1.5995616781465247, "grad_norm": 2.204482316970825, "learning_rate": 6.280547409579667e-05, "loss": 0.5739, "step": 10218 }, { "epoch": 1.599718221665623, "grad_norm": 2.250614643096924, "learning_rate": 6.278103616813294e-05, "loss": 0.9905, "step": 10219 }, { "epoch": 1.5998747651847214, "grad_norm": 2.100642204284668, "learning_rate": 6.27565982404692e-05, "loss": 0.7706, "step": 10220 }, { "epoch": 1.6000313087038198, "grad_norm": 2.480836868286133, "learning_rate": 6.273216031280547e-05, "loss": 0.9495, "step": 10221 }, { "epoch": 1.600187852222918, "grad_norm": 2.637479305267334, "learning_rate": 6.270772238514173e-05, "loss": 0.5965, "step": 10222 }, { "epoch": 1.6003443957420163, "grad_norm": 5.914449691772461, "learning_rate": 6.268328445747801e-05, "loss": 1.0819, "step": 10223 }, { "epoch": 1.6005009392611145, "grad_norm": 3.239971876144409, "learning_rate": 6.265884652981426e-05, "loss": 0.9483, "step": 10224 }, { "epoch": 1.600657482780213, "grad_norm": 2.788769483566284, "learning_rate": 6.263440860215054e-05, "loss": 0.8619, "step": 10225 }, { "epoch": 1.6008140262993114, "grad_norm": 3.059751033782959, "learning_rate": 6.26099706744868e-05, "loss": 0.8397, "step": 10226 }, { "epoch": 1.6009705698184096, "grad_norm": 3.003505229949951, "learning_rate": 6.258553274682307e-05, "loss": 0.8119, "step": 10227 }, { "epoch": 1.6011271133375078, "grad_norm": 2.3189423084259033, "learning_rate": 6.256109481915933e-05, "loss": 0.9308, "step": 10228 }, { "epoch": 1.601283656856606, "grad_norm": 5.253555774688721, "learning_rate": 6.25366568914956e-05, "loss": 0.7128, "step": 10229 }, { "epoch": 1.6014402003757044, "grad_norm": 4.00673770904541, "learning_rate": 6.251221896383186e-05, "loss": 1.3895, "step": 10230 }, { "epoch": 1.6015967438948029, "grad_norm": 4.583085060119629, "learning_rate": 6.248778103616813e-05, "loss": 0.7943, "step": 10231 }, { "epoch": 1.601753287413901, "grad_norm": 0.6882913112640381, "learning_rate": 6.246334310850439e-05, "loss": 0.3133, "step": 10232 }, { "epoch": 1.6019098309329993, "grad_norm": 1.9263126850128174, "learning_rate": 6.243890518084066e-05, "loss": 0.4667, "step": 10233 }, { "epoch": 1.6020663744520975, "grad_norm": 13.986974716186523, "learning_rate": 6.241446725317692e-05, "loss": 0.7877, "step": 10234 }, { "epoch": 1.602222917971196, "grad_norm": 0.6357542872428894, "learning_rate": 6.23900293255132e-05, "loss": 0.2021, "step": 10235 }, { "epoch": 1.6023794614902944, "grad_norm": 6.546449661254883, "learning_rate": 6.236559139784945e-05, "loss": 0.728, "step": 10236 }, { "epoch": 1.6025360050093926, "grad_norm": 1.615708351135254, "learning_rate": 6.234115347018573e-05, "loss": 0.843, "step": 10237 }, { "epoch": 1.6026925485284909, "grad_norm": 2.3978271484375, "learning_rate": 6.2316715542522e-05, "loss": 1.1529, "step": 10238 }, { "epoch": 1.602849092047589, "grad_norm": 0.8120617866516113, "learning_rate": 6.229227761485825e-05, "loss": 0.2389, "step": 10239 }, { "epoch": 1.6030056355666875, "grad_norm": 0.506904125213623, "learning_rate": 6.226783968719452e-05, "loss": 0.2102, "step": 10240 }, { "epoch": 1.603162179085786, "grad_norm": 0.43618541955947876, "learning_rate": 6.224340175953079e-05, "loss": 0.1446, "step": 10241 }, { "epoch": 1.6033187226048842, "grad_norm": 0.7896573543548584, "learning_rate": 6.221896383186705e-05, "loss": 0.3323, "step": 10242 }, { "epoch": 1.6034752661239824, "grad_norm": 0.5736374855041504, "learning_rate": 6.219452590420332e-05, "loss": 0.2047, "step": 10243 }, { "epoch": 1.6036318096430808, "grad_norm": 0.6807544231414795, "learning_rate": 6.217008797653958e-05, "loss": 0.3968, "step": 10244 }, { "epoch": 1.603788353162179, "grad_norm": 0.7900474071502686, "learning_rate": 6.214565004887585e-05, "loss": 0.3243, "step": 10245 }, { "epoch": 1.6039448966812775, "grad_norm": 0.6970692873001099, "learning_rate": 6.212121212121211e-05, "loss": 0.1766, "step": 10246 }, { "epoch": 1.6041014402003757, "grad_norm": 2.053403854370117, "learning_rate": 6.209677419354839e-05, "loss": 0.2549, "step": 10247 }, { "epoch": 1.604257983719474, "grad_norm": 2.547222375869751, "learning_rate": 6.207233626588464e-05, "loss": 0.3394, "step": 10248 }, { "epoch": 1.6044145272385724, "grad_norm": 0.9715545773506165, "learning_rate": 6.204789833822092e-05, "loss": 0.2039, "step": 10249 }, { "epoch": 1.6045710707576706, "grad_norm": 0.9468798637390137, "learning_rate": 6.202346041055719e-05, "loss": 0.3098, "step": 10250 }, { "epoch": 1.604727614276769, "grad_norm": 0.8663145303726196, "learning_rate": 6.199902248289344e-05, "loss": 0.1379, "step": 10251 }, { "epoch": 1.6048841577958672, "grad_norm": 0.8937069177627563, "learning_rate": 6.197458455522971e-05, "loss": 0.3302, "step": 10252 }, { "epoch": 1.6050407013149655, "grad_norm": 1.2698081731796265, "learning_rate": 6.195014662756598e-05, "loss": 0.3953, "step": 10253 }, { "epoch": 1.605197244834064, "grad_norm": 0.827766478061676, "learning_rate": 6.192570869990224e-05, "loss": 0.2489, "step": 10254 }, { "epoch": 1.6053537883531623, "grad_norm": 3.880368947982788, "learning_rate": 6.190127077223851e-05, "loss": 0.6099, "step": 10255 }, { "epoch": 1.6055103318722606, "grad_norm": 2.3780124187469482, "learning_rate": 6.187683284457477e-05, "loss": 0.5125, "step": 10256 }, { "epoch": 1.6056668753913588, "grad_norm": 2.895963191986084, "learning_rate": 6.185239491691104e-05, "loss": 0.2951, "step": 10257 }, { "epoch": 1.605823418910457, "grad_norm": 0.9345550537109375, "learning_rate": 6.18279569892473e-05, "loss": 0.397, "step": 10258 }, { "epoch": 1.6059799624295554, "grad_norm": 1.1102604866027832, "learning_rate": 6.180351906158358e-05, "loss": 0.51, "step": 10259 }, { "epoch": 1.6061365059486539, "grad_norm": 0.9499253630638123, "learning_rate": 6.177908113391983e-05, "loss": 0.2253, "step": 10260 }, { "epoch": 1.606293049467752, "grad_norm": 0.8864089250564575, "learning_rate": 6.17546432062561e-05, "loss": 0.2358, "step": 10261 }, { "epoch": 1.6064495929868503, "grad_norm": 1.9460018873214722, "learning_rate": 6.173020527859238e-05, "loss": 0.5664, "step": 10262 }, { "epoch": 1.6066061365059485, "grad_norm": 1.625748634338379, "learning_rate": 6.170576735092863e-05, "loss": 0.4159, "step": 10263 }, { "epoch": 1.606762680025047, "grad_norm": 2.510746479034424, "learning_rate": 6.16813294232649e-05, "loss": 0.7013, "step": 10264 }, { "epoch": 1.6069192235441454, "grad_norm": 3.4259462356567383, "learning_rate": 6.165689149560117e-05, "loss": 0.326, "step": 10265 }, { "epoch": 1.6070757670632436, "grad_norm": 1.4860423803329468, "learning_rate": 6.163245356793744e-05, "loss": 0.3353, "step": 10266 }, { "epoch": 1.6072323105823418, "grad_norm": 1.9358034133911133, "learning_rate": 6.16080156402737e-05, "loss": 0.6318, "step": 10267 }, { "epoch": 1.60738885410144, "grad_norm": 2.1862995624542236, "learning_rate": 6.158357771260997e-05, "loss": 0.5148, "step": 10268 }, { "epoch": 1.6075453976205385, "grad_norm": 1.741811990737915, "learning_rate": 6.155913978494623e-05, "loss": 0.8787, "step": 10269 }, { "epoch": 1.607701941139637, "grad_norm": 1.7040098905563354, "learning_rate": 6.15347018572825e-05, "loss": 0.4273, "step": 10270 }, { "epoch": 1.6078584846587352, "grad_norm": 1.6641523838043213, "learning_rate": 6.151026392961877e-05, "loss": 0.5667, "step": 10271 }, { "epoch": 1.6080150281778334, "grad_norm": 1.784555435180664, "learning_rate": 6.148582600195502e-05, "loss": 0.69, "step": 10272 }, { "epoch": 1.6081715716969316, "grad_norm": 4.743298053741455, "learning_rate": 6.146138807429129e-05, "loss": 0.6946, "step": 10273 }, { "epoch": 1.60832811521603, "grad_norm": 5.469465732574463, "learning_rate": 6.143695014662757e-05, "loss": 1.1488, "step": 10274 }, { "epoch": 1.6084846587351285, "grad_norm": 4.396662712097168, "learning_rate": 6.141251221896382e-05, "loss": 1.2612, "step": 10275 }, { "epoch": 1.6086412022542267, "grad_norm": 2.4530532360076904, "learning_rate": 6.13880742913001e-05, "loss": 0.768, "step": 10276 }, { "epoch": 1.608797745773325, "grad_norm": NaN, "learning_rate": 6.13880742913001e-05, "loss": 0.0, "step": 10277 }, { "epoch": 1.6089542892924233, "grad_norm": 3.0336153507232666, "learning_rate": 6.136363636363636e-05, "loss": 0.9859, "step": 10278 }, { "epoch": 1.6091108328115216, "grad_norm": 2.3858444690704346, "learning_rate": 6.133919843597263e-05, "loss": 0.8229, "step": 10279 }, { "epoch": 1.60926737633062, "grad_norm": 3.8104684352874756, "learning_rate": 6.131476050830889e-05, "loss": 1.1625, "step": 10280 }, { "epoch": 1.6094239198497182, "grad_norm": 1.7261332273483276, "learning_rate": 6.129032258064516e-05, "loss": 0.8723, "step": 10281 }, { "epoch": 1.6095804633688164, "grad_norm": 1.7524932622909546, "learning_rate": 6.126588465298142e-05, "loss": 0.995, "step": 10282 }, { "epoch": 1.6097370068879149, "grad_norm": 2.208996057510376, "learning_rate": 6.124144672531769e-05, "loss": 1.0345, "step": 10283 }, { "epoch": 1.609893550407013, "grad_norm": 9.19688892364502, "learning_rate": 6.121700879765395e-05, "loss": 0.5331, "step": 10284 }, { "epoch": 1.6100500939261115, "grad_norm": 4.053257465362549, "learning_rate": 6.119257086999022e-05, "loss": 0.705, "step": 10285 }, { "epoch": 1.6102066374452098, "grad_norm": 2.6395530700683594, "learning_rate": 6.116813294232648e-05, "loss": 0.8349, "step": 10286 }, { "epoch": 1.610363180964308, "grad_norm": 5.501692771911621, "learning_rate": 6.114369501466276e-05, "loss": 0.9976, "step": 10287 }, { "epoch": 1.6105197244834064, "grad_norm": 2.47517466545105, "learning_rate": 6.111925708699901e-05, "loss": 1.3165, "step": 10288 }, { "epoch": 1.6106762680025049, "grad_norm": 0.6524291634559631, "learning_rate": 6.109481915933529e-05, "loss": 0.1867, "step": 10289 }, { "epoch": 1.610832811521603, "grad_norm": 0.988003134727478, "learning_rate": 6.107038123167155e-05, "loss": 0.2824, "step": 10290 }, { "epoch": 1.6109893550407013, "grad_norm": 0.47198671102523804, "learning_rate": 6.104594330400782e-05, "loss": 0.1858, "step": 10291 }, { "epoch": 1.6111458985597995, "grad_norm": 0.9640408754348755, "learning_rate": 6.102150537634408e-05, "loss": 0.1501, "step": 10292 }, { "epoch": 1.611302442078898, "grad_norm": 1.5908207893371582, "learning_rate": 6.099706744868035e-05, "loss": 0.2762, "step": 10293 }, { "epoch": 1.6114589855979964, "grad_norm": 0.730593740940094, "learning_rate": 6.097262952101661e-05, "loss": 0.1985, "step": 10294 }, { "epoch": 1.6116155291170946, "grad_norm": 0.45398107171058655, "learning_rate": 6.094819159335288e-05, "loss": 0.2736, "step": 10295 }, { "epoch": 1.6117720726361928, "grad_norm": 1.1660237312316895, "learning_rate": 6.092375366568914e-05, "loss": 0.3241, "step": 10296 }, { "epoch": 1.611928616155291, "grad_norm": 0.7459734678268433, "learning_rate": 6.089931573802541e-05, "loss": 0.2403, "step": 10297 }, { "epoch": 1.6120851596743895, "grad_norm": 1.4114768505096436, "learning_rate": 6.087487781036168e-05, "loss": 0.3583, "step": 10298 }, { "epoch": 1.612241703193488, "grad_norm": 0.7342216968536377, "learning_rate": 6.0850439882697936e-05, "loss": 0.2025, "step": 10299 }, { "epoch": 1.6123982467125861, "grad_norm": 0.8763503432273865, "learning_rate": 6.082600195503421e-05, "loss": 0.2146, "step": 10300 }, { "epoch": 1.6125547902316844, "grad_norm": 1.2437353134155273, "learning_rate": 6.080156402737047e-05, "loss": 0.2597, "step": 10301 }, { "epoch": 1.6127113337507826, "grad_norm": 0.5917118191719055, "learning_rate": 6.077712609970674e-05, "loss": 0.1298, "step": 10302 }, { "epoch": 1.612867877269881, "grad_norm": 1.689225196838379, "learning_rate": 6.0752688172043e-05, "loss": 0.2692, "step": 10303 }, { "epoch": 1.6130244207889795, "grad_norm": 1.53922438621521, "learning_rate": 6.0728250244379274e-05, "loss": 0.3584, "step": 10304 }, { "epoch": 1.6131809643080777, "grad_norm": 1.4097189903259277, "learning_rate": 6.070381231671553e-05, "loss": 0.4676, "step": 10305 }, { "epoch": 1.6133375078271759, "grad_norm": 2.491358757019043, "learning_rate": 6.0679374389051803e-05, "loss": 0.4225, "step": 10306 }, { "epoch": 1.613494051346274, "grad_norm": 2.754809617996216, "learning_rate": 6.065493646138807e-05, "loss": 0.5883, "step": 10307 }, { "epoch": 1.6136505948653725, "grad_norm": 1.1778233051300049, "learning_rate": 6.063049853372433e-05, "loss": 0.6693, "step": 10308 }, { "epoch": 1.613807138384471, "grad_norm": 0.8539136052131653, "learning_rate": 6.06060606060606e-05, "loss": 0.2029, "step": 10309 }, { "epoch": 1.6139636819035692, "grad_norm": 2.6600699424743652, "learning_rate": 6.058162267839687e-05, "loss": 0.3794, "step": 10310 }, { "epoch": 1.6141202254226674, "grad_norm": 1.350770354270935, "learning_rate": 6.055718475073313e-05, "loss": 0.3923, "step": 10311 }, { "epoch": 1.6142767689417659, "grad_norm": 2.732351303100586, "learning_rate": 6.05327468230694e-05, "loss": 0.4274, "step": 10312 }, { "epoch": 1.614433312460864, "grad_norm": 1.7970898151397705, "learning_rate": 6.0508308895405664e-05, "loss": 0.5387, "step": 10313 }, { "epoch": 1.6145898559799625, "grad_norm": 2.37106990814209, "learning_rate": 6.048387096774193e-05, "loss": 0.4457, "step": 10314 }, { "epoch": 1.6147463994990607, "grad_norm": 1.6102174520492554, "learning_rate": 6.0459433040078193e-05, "loss": 0.6682, "step": 10315 }, { "epoch": 1.614902943018159, "grad_norm": 1.9735941886901855, "learning_rate": 6.0434995112414465e-05, "loss": 0.7914, "step": 10316 }, { "epoch": 1.6150594865372574, "grad_norm": 1.6379317045211792, "learning_rate": 6.041055718475072e-05, "loss": 0.6588, "step": 10317 }, { "epoch": 1.6152160300563556, "grad_norm": 1.541721224784851, "learning_rate": 6.0386119257086995e-05, "loss": 0.6006, "step": 10318 }, { "epoch": 1.615372573575454, "grad_norm": 5.82126522064209, "learning_rate": 6.036168132942326e-05, "loss": 0.6219, "step": 10319 }, { "epoch": 1.6155291170945523, "grad_norm": 3.0988450050354004, "learning_rate": 6.0337243401759524e-05, "loss": 0.4162, "step": 10320 }, { "epoch": 1.6156856606136505, "grad_norm": 3.262086868286133, "learning_rate": 6.031280547409579e-05, "loss": 1.2557, "step": 10321 }, { "epoch": 1.615842204132749, "grad_norm": 1.9713597297668457, "learning_rate": 6.028836754643206e-05, "loss": 0.8396, "step": 10322 }, { "epoch": 1.6159987476518474, "grad_norm": 2.0286524295806885, "learning_rate": 6.026392961876832e-05, "loss": 0.5553, "step": 10323 }, { "epoch": 1.6161552911709456, "grad_norm": 2.3841817378997803, "learning_rate": 6.023949169110459e-05, "loss": 0.5284, "step": 10324 }, { "epoch": 1.6163118346900438, "grad_norm": 2.634639024734497, "learning_rate": 6.0215053763440855e-05, "loss": 0.7772, "step": 10325 }, { "epoch": 1.616468378209142, "grad_norm": 3.448430299758911, "learning_rate": 6.019061583577712e-05, "loss": 0.5422, "step": 10326 }, { "epoch": 1.6166249217282405, "grad_norm": 1.8632174730300903, "learning_rate": 6.0166177908113385e-05, "loss": 1.0043, "step": 10327 }, { "epoch": 1.616781465247339, "grad_norm": 3.0576517581939697, "learning_rate": 6.0141739980449656e-05, "loss": 0.906, "step": 10328 }, { "epoch": 1.6169380087664371, "grad_norm": 6.012516975402832, "learning_rate": 6.0117302052785914e-05, "loss": 1.7748, "step": 10329 }, { "epoch": 1.6170945522855353, "grad_norm": 1.4931806325912476, "learning_rate": 6.0092864125122186e-05, "loss": 0.441, "step": 10330 }, { "epoch": 1.6172510958046336, "grad_norm": 2.6197221279144287, "learning_rate": 6.006842619745845e-05, "loss": 1.212, "step": 10331 }, { "epoch": 1.617407639323732, "grad_norm": 2.8146133422851562, "learning_rate": 6.0043988269794715e-05, "loss": 0.9955, "step": 10332 }, { "epoch": 1.6175641828428304, "grad_norm": 2.633820056915283, "learning_rate": 6.001955034213098e-05, "loss": 1.4292, "step": 10333 }, { "epoch": 1.6177207263619287, "grad_norm": 5.142744541168213, "learning_rate": 5.999511241446725e-05, "loss": 1.3371, "step": 10334 }, { "epoch": 1.6178772698810269, "grad_norm": 1.9300106763839722, "learning_rate": 5.997067448680351e-05, "loss": 0.5765, "step": 10335 }, { "epoch": 1.618033813400125, "grad_norm": 3.921834945678711, "learning_rate": 5.994623655913978e-05, "loss": 0.6505, "step": 10336 }, { "epoch": 1.6181903569192235, "grad_norm": 2.213167905807495, "learning_rate": 5.9921798631476046e-05, "loss": 0.8817, "step": 10337 }, { "epoch": 1.618346900438322, "grad_norm": 2.0147557258605957, "learning_rate": 5.989736070381231e-05, "loss": 0.6968, "step": 10338 }, { "epoch": 1.6185034439574202, "grad_norm": 0.6969068646430969, "learning_rate": 5.9872922776148576e-05, "loss": 0.1943, "step": 10339 }, { "epoch": 1.6186599874765184, "grad_norm": 0.5584667921066284, "learning_rate": 5.984848484848485e-05, "loss": 0.18, "step": 10340 }, { "epoch": 1.6188165309956166, "grad_norm": 0.6967960596084595, "learning_rate": 5.9824046920821105e-05, "loss": 0.2321, "step": 10341 }, { "epoch": 1.618973074514715, "grad_norm": 0.3680100440979004, "learning_rate": 5.979960899315738e-05, "loss": 0.135, "step": 10342 }, { "epoch": 1.6191296180338135, "grad_norm": 0.755924642086029, "learning_rate": 5.977517106549364e-05, "loss": 0.2955, "step": 10343 }, { "epoch": 1.6192861615529117, "grad_norm": 2.2289376258850098, "learning_rate": 5.9750733137829907e-05, "loss": 0.2681, "step": 10344 }, { "epoch": 1.61944270507201, "grad_norm": 1.0998948812484741, "learning_rate": 5.972629521016617e-05, "loss": 0.1985, "step": 10345 }, { "epoch": 1.6195992485911084, "grad_norm": 0.6350347399711609, "learning_rate": 5.970185728250244e-05, "loss": 0.266, "step": 10346 }, { "epoch": 1.6197557921102066, "grad_norm": 0.7343830466270447, "learning_rate": 5.96774193548387e-05, "loss": 0.1386, "step": 10347 }, { "epoch": 1.619912335629305, "grad_norm": 1.0701186656951904, "learning_rate": 5.965298142717497e-05, "loss": 0.4067, "step": 10348 }, { "epoch": 1.6200688791484033, "grad_norm": 1.645283818244934, "learning_rate": 5.962854349951124e-05, "loss": 0.5609, "step": 10349 }, { "epoch": 1.6202254226675015, "grad_norm": 1.122719168663025, "learning_rate": 5.96041055718475e-05, "loss": 0.2413, "step": 10350 }, { "epoch": 1.6203819661866, "grad_norm": 0.7150799036026001, "learning_rate": 5.957966764418377e-05, "loss": 0.3426, "step": 10351 }, { "epoch": 1.6205385097056983, "grad_norm": 0.598060131072998, "learning_rate": 5.955522971652004e-05, "loss": 0.1765, "step": 10352 }, { "epoch": 1.6206950532247966, "grad_norm": 0.8176664710044861, "learning_rate": 5.9530791788856297e-05, "loss": 0.29, "step": 10353 }, { "epoch": 1.6208515967438948, "grad_norm": 1.5242834091186523, "learning_rate": 5.950635386119257e-05, "loss": 0.3729, "step": 10354 }, { "epoch": 1.621008140262993, "grad_norm": 1.9225590229034424, "learning_rate": 5.948191593352883e-05, "loss": 0.7854, "step": 10355 }, { "epoch": 1.6211646837820914, "grad_norm": 1.2514513731002808, "learning_rate": 5.94574780058651e-05, "loss": 0.5056, "step": 10356 }, { "epoch": 1.6213212273011899, "grad_norm": 1.7647795677185059, "learning_rate": 5.943304007820136e-05, "loss": 0.4021, "step": 10357 }, { "epoch": 1.621477770820288, "grad_norm": 0.9567508697509766, "learning_rate": 5.9408602150537634e-05, "loss": 0.3619, "step": 10358 }, { "epoch": 1.6216343143393863, "grad_norm": 1.5597784519195557, "learning_rate": 5.938416422287389e-05, "loss": 0.4833, "step": 10359 }, { "epoch": 1.6217908578584845, "grad_norm": 2.9853146076202393, "learning_rate": 5.9359726295210164e-05, "loss": 0.4786, "step": 10360 }, { "epoch": 1.621947401377583, "grad_norm": 1.8324689865112305, "learning_rate": 5.933528836754643e-05, "loss": 0.4453, "step": 10361 }, { "epoch": 1.6221039448966814, "grad_norm": 2.3607141971588135, "learning_rate": 5.931085043988269e-05, "loss": 0.5207, "step": 10362 }, { "epoch": 1.6222604884157796, "grad_norm": 1.914950966835022, "learning_rate": 5.928641251221896e-05, "loss": 0.5193, "step": 10363 }, { "epoch": 1.6224170319348779, "grad_norm": 2.1781463623046875, "learning_rate": 5.926197458455523e-05, "loss": 0.6317, "step": 10364 }, { "epoch": 1.622573575453976, "grad_norm": 4.128535270690918, "learning_rate": 5.923753665689149e-05, "loss": 0.7715, "step": 10365 }, { "epoch": 1.6227301189730745, "grad_norm": 3.1875927448272705, "learning_rate": 5.921309872922776e-05, "loss": 0.7828, "step": 10366 }, { "epoch": 1.622886662492173, "grad_norm": 1.828016996383667, "learning_rate": 5.9188660801564024e-05, "loss": 0.6662, "step": 10367 }, { "epoch": 1.6230432060112712, "grad_norm": 2.3392691612243652, "learning_rate": 5.916422287390029e-05, "loss": 0.647, "step": 10368 }, { "epoch": 1.6231997495303694, "grad_norm": 2.4134435653686523, "learning_rate": 5.9139784946236554e-05, "loss": 0.8108, "step": 10369 }, { "epoch": 1.6233562930494676, "grad_norm": 0.9342465996742249, "learning_rate": 5.9115347018572825e-05, "loss": 0.326, "step": 10370 }, { "epoch": 1.623512836568566, "grad_norm": 2.0880491733551025, "learning_rate": 5.909090909090908e-05, "loss": 0.6187, "step": 10371 }, { "epoch": 1.6236693800876645, "grad_norm": 1.288293719291687, "learning_rate": 5.9066471163245355e-05, "loss": 0.5137, "step": 10372 }, { "epoch": 1.6238259236067627, "grad_norm": 2.43259596824646, "learning_rate": 5.904203323558162e-05, "loss": 0.7306, "step": 10373 }, { "epoch": 1.623982467125861, "grad_norm": 1.6936664581298828, "learning_rate": 5.901759530791788e-05, "loss": 0.7173, "step": 10374 }, { "epoch": 1.6241390106449591, "grad_norm": 1.1513222455978394, "learning_rate": 5.899315738025415e-05, "loss": 0.3325, "step": 10375 }, { "epoch": 1.6242955541640576, "grad_norm": 2.4946534633636475, "learning_rate": 5.896871945259042e-05, "loss": 0.8307, "step": 10376 }, { "epoch": 1.624452097683156, "grad_norm": 3.35575270652771, "learning_rate": 5.894428152492668e-05, "loss": 1.3256, "step": 10377 }, { "epoch": 1.6246086412022542, "grad_norm": 8.07103157043457, "learning_rate": 5.891984359726295e-05, "loss": 0.6945, "step": 10378 }, { "epoch": 1.6247651847213525, "grad_norm": 2.7447383403778076, "learning_rate": 5.8895405669599215e-05, "loss": 0.9647, "step": 10379 }, { "epoch": 1.624921728240451, "grad_norm": 2.4406001567840576, "learning_rate": 5.887096774193547e-05, "loss": 1.3698, "step": 10380 }, { "epoch": 1.625078271759549, "grad_norm": 6.9520978927612305, "learning_rate": 5.8846529814271745e-05, "loss": 0.7408, "step": 10381 }, { "epoch": 1.6252348152786475, "grad_norm": 2.9394330978393555, "learning_rate": 5.8822091886608016e-05, "loss": 1.0021, "step": 10382 }, { "epoch": 1.6253913587977458, "grad_norm": 3.4917502403259277, "learning_rate": 5.8797653958944274e-05, "loss": 1.1361, "step": 10383 }, { "epoch": 1.625547902316844, "grad_norm": 1.3461923599243164, "learning_rate": 5.8773216031280546e-05, "loss": 0.2525, "step": 10384 }, { "epoch": 1.6257044458359424, "grad_norm": 1.8824841976165771, "learning_rate": 5.874877810361681e-05, "loss": 0.7553, "step": 10385 }, { "epoch": 1.6258609893550409, "grad_norm": 1.4790881872177124, "learning_rate": 5.872434017595307e-05, "loss": 0.5558, "step": 10386 }, { "epoch": 1.626017532874139, "grad_norm": 1.9646244049072266, "learning_rate": 5.869990224828934e-05, "loss": 0.4602, "step": 10387 }, { "epoch": 1.6261740763932373, "grad_norm": 1.7830438613891602, "learning_rate": 5.867546432062561e-05, "loss": 0.5251, "step": 10388 }, { "epoch": 1.6263306199123355, "grad_norm": 0.5768091678619385, "learning_rate": 5.865102639296187e-05, "loss": 0.2012, "step": 10389 }, { "epoch": 1.626487163431434, "grad_norm": 1.113883137702942, "learning_rate": 5.862658846529814e-05, "loss": 0.1778, "step": 10390 }, { "epoch": 1.6266437069505324, "grad_norm": 0.5060340762138367, "learning_rate": 5.8602150537634406e-05, "loss": 0.1855, "step": 10391 }, { "epoch": 1.6268002504696306, "grad_norm": 0.514045000076294, "learning_rate": 5.8577712609970664e-05, "loss": 0.1701, "step": 10392 }, { "epoch": 1.6269567939887288, "grad_norm": 0.860876202583313, "learning_rate": 5.8553274682306936e-05, "loss": 0.2594, "step": 10393 }, { "epoch": 1.627113337507827, "grad_norm": 1.2746233940124512, "learning_rate": 5.852883675464321e-05, "loss": 0.1743, "step": 10394 }, { "epoch": 1.6272698810269255, "grad_norm": 0.68976891040802, "learning_rate": 5.8504398826979466e-05, "loss": 0.203, "step": 10395 }, { "epoch": 1.627426424546024, "grad_norm": 0.48356834053993225, "learning_rate": 5.847996089931573e-05, "loss": 0.226, "step": 10396 }, { "epoch": 1.6275829680651221, "grad_norm": 1.8807826042175293, "learning_rate": 5.8455522971652e-05, "loss": 0.1366, "step": 10397 }, { "epoch": 1.6277395115842204, "grad_norm": 0.7366307377815247, "learning_rate": 5.843108504398826e-05, "loss": 0.3315, "step": 10398 }, { "epoch": 1.6278960551033186, "grad_norm": 0.8928070068359375, "learning_rate": 5.840664711632453e-05, "loss": 0.2295, "step": 10399 }, { "epoch": 1.628052598622417, "grad_norm": 0.8314988613128662, "learning_rate": 5.83822091886608e-05, "loss": 0.2468, "step": 10400 }, { "epoch": 1.6282091421415155, "grad_norm": 1.0625923871994019, "learning_rate": 5.835777126099706e-05, "loss": 0.2625, "step": 10401 }, { "epoch": 1.6283656856606137, "grad_norm": 3.7548062801361084, "learning_rate": 5.8333333333333326e-05, "loss": 0.4308, "step": 10402 }, { "epoch": 1.628522229179712, "grad_norm": 2.1772689819335938, "learning_rate": 5.83088954056696e-05, "loss": 0.3888, "step": 10403 }, { "epoch": 1.6286787726988101, "grad_norm": 1.233253002166748, "learning_rate": 5.8284457478005856e-05, "loss": 0.2004, "step": 10404 }, { "epoch": 1.6288353162179086, "grad_norm": 1.6462030410766602, "learning_rate": 5.826001955034213e-05, "loss": 0.3962, "step": 10405 }, { "epoch": 1.628991859737007, "grad_norm": 7.115745544433594, "learning_rate": 5.82355816226784e-05, "loss": 0.3691, "step": 10406 }, { "epoch": 1.6291484032561052, "grad_norm": 1.0875389575958252, "learning_rate": 5.821114369501466e-05, "loss": 0.2721, "step": 10407 }, { "epoch": 1.6293049467752034, "grad_norm": 1.3013725280761719, "learning_rate": 5.818670576735092e-05, "loss": 0.4565, "step": 10408 }, { "epoch": 1.6294614902943017, "grad_norm": 2.092606782913208, "learning_rate": 5.816226783968719e-05, "loss": 0.5847, "step": 10409 }, { "epoch": 1.6296180338134, "grad_norm": 1.729488492012024, "learning_rate": 5.813782991202345e-05, "loss": 0.32, "step": 10410 }, { "epoch": 1.6297745773324985, "grad_norm": 1.737029790878296, "learning_rate": 5.811339198435972e-05, "loss": 0.2449, "step": 10411 }, { "epoch": 1.6299311208515967, "grad_norm": 2.1841983795166016, "learning_rate": 5.8088954056695994e-05, "loss": 0.7863, "step": 10412 }, { "epoch": 1.630087664370695, "grad_norm": 0.9990864992141724, "learning_rate": 5.806451612903225e-05, "loss": 0.2027, "step": 10413 }, { "epoch": 1.6302442078897934, "grad_norm": 2.165163040161133, "learning_rate": 5.804007820136852e-05, "loss": 0.5477, "step": 10414 }, { "epoch": 1.6304007514088916, "grad_norm": 5.074351787567139, "learning_rate": 5.801564027370479e-05, "loss": 0.5501, "step": 10415 }, { "epoch": 1.63055729492799, "grad_norm": 1.8900859355926514, "learning_rate": 5.799120234604105e-05, "loss": 0.4008, "step": 10416 }, { "epoch": 1.6307138384470883, "grad_norm": 1.9944469928741455, "learning_rate": 5.796676441837732e-05, "loss": 0.4468, "step": 10417 }, { "epoch": 1.6308703819661865, "grad_norm": 2.274146795272827, "learning_rate": 5.794232649071358e-05, "loss": 0.7135, "step": 10418 }, { "epoch": 1.631026925485285, "grad_norm": 3.8034324645996094, "learning_rate": 5.791788856304985e-05, "loss": 0.4055, "step": 10419 }, { "epoch": 1.6311834690043834, "grad_norm": 2.4638800621032715, "learning_rate": 5.789345063538611e-05, "loss": 0.8925, "step": 10420 }, { "epoch": 1.6313400125234816, "grad_norm": 5.541257858276367, "learning_rate": 5.7869012707722384e-05, "loss": 0.8966, "step": 10421 }, { "epoch": 1.6314965560425798, "grad_norm": 6.215266704559326, "learning_rate": 5.784457478005864e-05, "loss": 0.6739, "step": 10422 }, { "epoch": 1.631653099561678, "grad_norm": 1.26706862449646, "learning_rate": 5.7820136852394914e-05, "loss": 0.5802, "step": 10423 }, { "epoch": 1.6318096430807765, "grad_norm": 2.1300575733184814, "learning_rate": 5.779569892473117e-05, "loss": 0.6051, "step": 10424 }, { "epoch": 1.631966186599875, "grad_norm": 4.257119178771973, "learning_rate": 5.7771260997067443e-05, "loss": 1.0206, "step": 10425 }, { "epoch": 1.6321227301189731, "grad_norm": 2.297468662261963, "learning_rate": 5.774682306940371e-05, "loss": 0.7068, "step": 10426 }, { "epoch": 1.6322792736380713, "grad_norm": 3.372777223587036, "learning_rate": 5.772238514173997e-05, "loss": 0.7299, "step": 10427 }, { "epoch": 1.6324358171571696, "grad_norm": 2.6699585914611816, "learning_rate": 5.769794721407624e-05, "loss": 1.2506, "step": 10428 }, { "epoch": 1.632592360676268, "grad_norm": 2.306000232696533, "learning_rate": 5.767350928641251e-05, "loss": 0.8118, "step": 10429 }, { "epoch": 1.6327489041953664, "grad_norm": 3.001568078994751, "learning_rate": 5.764907135874877e-05, "loss": 0.8326, "step": 10430 }, { "epoch": 1.6329054477144647, "grad_norm": 2.3206942081451416, "learning_rate": 5.762463343108504e-05, "loss": 1.0572, "step": 10431 }, { "epoch": 1.6330619912335629, "grad_norm": 9.964920043945312, "learning_rate": 5.7600195503421304e-05, "loss": 1.1929, "step": 10432 }, { "epoch": 1.633218534752661, "grad_norm": 2.1716456413269043, "learning_rate": 5.757575757575757e-05, "loss": 0.7854, "step": 10433 }, { "epoch": 1.6333750782717595, "grad_norm": 3.3193044662475586, "learning_rate": 5.7551319648093833e-05, "loss": 0.4832, "step": 10434 }, { "epoch": 1.633531621790858, "grad_norm": 3.8049163818359375, "learning_rate": 5.7526881720430105e-05, "loss": 0.343, "step": 10435 }, { "epoch": 1.6336881653099562, "grad_norm": 3.3288490772247314, "learning_rate": 5.750244379276636e-05, "loss": 0.7935, "step": 10436 }, { "epoch": 1.6338447088290544, "grad_norm": 1.9719550609588623, "learning_rate": 5.7478005865102635e-05, "loss": 1.2361, "step": 10437 }, { "epoch": 1.6340012523481526, "grad_norm": 2.7374227046966553, "learning_rate": 5.74535679374389e-05, "loss": 1.1997, "step": 10438 }, { "epoch": 1.634157795867251, "grad_norm": 0.5598339438438416, "learning_rate": 5.7429130009775164e-05, "loss": 0.1796, "step": 10439 }, { "epoch": 1.6343143393863495, "grad_norm": 0.9065269231796265, "learning_rate": 5.740469208211143e-05, "loss": 0.2172, "step": 10440 }, { "epoch": 1.6344708829054477, "grad_norm": 1.6582931280136108, "learning_rate": 5.73802541544477e-05, "loss": 0.1974, "step": 10441 }, { "epoch": 1.634627426424546, "grad_norm": 0.5348918437957764, "learning_rate": 5.735581622678396e-05, "loss": 0.2214, "step": 10442 }, { "epoch": 1.6347839699436444, "grad_norm": 2.297356367111206, "learning_rate": 5.733137829912023e-05, "loss": 0.1988, "step": 10443 }, { "epoch": 1.6349405134627426, "grad_norm": 0.5752388834953308, "learning_rate": 5.7306940371456495e-05, "loss": 0.1346, "step": 10444 }, { "epoch": 1.635097056981841, "grad_norm": 0.8799678683280945, "learning_rate": 5.728250244379276e-05, "loss": 0.2363, "step": 10445 }, { "epoch": 1.6352536005009393, "grad_norm": 0.6415383219718933, "learning_rate": 5.7258064516129025e-05, "loss": 0.2131, "step": 10446 }, { "epoch": 1.6354101440200375, "grad_norm": 17.69931411743164, "learning_rate": 5.7233626588465296e-05, "loss": 0.344, "step": 10447 }, { "epoch": 1.635566687539136, "grad_norm": 1.341117262840271, "learning_rate": 5.7209188660801554e-05, "loss": 0.2848, "step": 10448 }, { "epoch": 1.6357232310582341, "grad_norm": 1.4106292724609375, "learning_rate": 5.7184750733137826e-05, "loss": 0.238, "step": 10449 }, { "epoch": 1.6358797745773326, "grad_norm": 1.0851185321807861, "learning_rate": 5.716031280547409e-05, "loss": 0.3487, "step": 10450 }, { "epoch": 1.6360363180964308, "grad_norm": 1.2558451890945435, "learning_rate": 5.7135874877810355e-05, "loss": 0.4459, "step": 10451 }, { "epoch": 1.636192861615529, "grad_norm": 2.4315550327301025, "learning_rate": 5.711143695014662e-05, "loss": 0.3275, "step": 10452 }, { "epoch": 1.6363494051346275, "grad_norm": 1.1903713941574097, "learning_rate": 5.708699902248289e-05, "loss": 0.5321, "step": 10453 }, { "epoch": 1.636505948653726, "grad_norm": 2.3874330520629883, "learning_rate": 5.706256109481915e-05, "loss": 0.5209, "step": 10454 }, { "epoch": 1.6366624921728241, "grad_norm": 2.2533371448516846, "learning_rate": 5.703812316715542e-05, "loss": 0.4226, "step": 10455 }, { "epoch": 1.6368190356919223, "grad_norm": 1.0902347564697266, "learning_rate": 5.7013685239491686e-05, "loss": 0.205, "step": 10456 }, { "epoch": 1.6369755792110205, "grad_norm": 2.033475875854492, "learning_rate": 5.698924731182795e-05, "loss": 0.1742, "step": 10457 }, { "epoch": 1.637132122730119, "grad_norm": 1.488540530204773, "learning_rate": 5.6964809384164216e-05, "loss": 0.2724, "step": 10458 }, { "epoch": 1.6372886662492174, "grad_norm": 3.625598192214966, "learning_rate": 5.694037145650049e-05, "loss": 0.6245, "step": 10459 }, { "epoch": 1.6374452097683156, "grad_norm": 3.725924015045166, "learning_rate": 5.6915933528836745e-05, "loss": 0.3794, "step": 10460 }, { "epoch": 1.6376017532874139, "grad_norm": 1.230906367301941, "learning_rate": 5.689149560117302e-05, "loss": 0.2737, "step": 10461 }, { "epoch": 1.637758296806512, "grad_norm": 3.53603196144104, "learning_rate": 5.686705767350928e-05, "loss": 0.5026, "step": 10462 }, { "epoch": 1.6379148403256105, "grad_norm": 1.3804330825805664, "learning_rate": 5.6842619745845547e-05, "loss": 0.2446, "step": 10463 }, { "epoch": 1.638071383844709, "grad_norm": 2.4402577877044678, "learning_rate": 5.681818181818181e-05, "loss": 0.4187, "step": 10464 }, { "epoch": 1.6382279273638072, "grad_norm": 1.448628306388855, "learning_rate": 5.679374389051808e-05, "loss": 0.5199, "step": 10465 }, { "epoch": 1.6383844708829054, "grad_norm": 3.0513761043548584, "learning_rate": 5.676930596285434e-05, "loss": 0.7834, "step": 10466 }, { "epoch": 1.6385410144020036, "grad_norm": 2.2523458003997803, "learning_rate": 5.674486803519061e-05, "loss": 0.8243, "step": 10467 }, { "epoch": 1.638697557921102, "grad_norm": 2.0097694396972656, "learning_rate": 5.672043010752688e-05, "loss": 0.3212, "step": 10468 }, { "epoch": 1.6388541014402005, "grad_norm": 4.3316874504089355, "learning_rate": 5.669599217986314e-05, "loss": 0.6163, "step": 10469 }, { "epoch": 1.6390106449592987, "grad_norm": 1.4377896785736084, "learning_rate": 5.667155425219941e-05, "loss": 0.5379, "step": 10470 }, { "epoch": 1.639167188478397, "grad_norm": 3.63924241065979, "learning_rate": 5.664711632453568e-05, "loss": 0.8601, "step": 10471 }, { "epoch": 1.6393237319974951, "grad_norm": 2.5112884044647217, "learning_rate": 5.6622678396871937e-05, "loss": 0.6937, "step": 10472 }, { "epoch": 1.6394802755165936, "grad_norm": 3.881232500076294, "learning_rate": 5.659824046920821e-05, "loss": 1.0227, "step": 10473 }, { "epoch": 1.639636819035692, "grad_norm": 1.790979027748108, "learning_rate": 5.657380254154447e-05, "loss": 0.9926, "step": 10474 }, { "epoch": 1.6397933625547902, "grad_norm": 4.465099811553955, "learning_rate": 5.654936461388074e-05, "loss": 0.9959, "step": 10475 }, { "epoch": 1.6399499060738885, "grad_norm": 1.5608214139938354, "learning_rate": 5.6524926686217e-05, "loss": 0.4801, "step": 10476 }, { "epoch": 1.640106449592987, "grad_norm": 3.728848457336426, "learning_rate": 5.6500488758553274e-05, "loss": 0.6256, "step": 10477 }, { "epoch": 1.6402629931120851, "grad_norm": 1.7102699279785156, "learning_rate": 5.647605083088953e-05, "loss": 1.2323, "step": 10478 }, { "epoch": 1.6404195366311836, "grad_norm": 3.236166477203369, "learning_rate": 5.6451612903225804e-05, "loss": 1.4178, "step": 10479 }, { "epoch": 1.6405760801502818, "grad_norm": 6.140420436859131, "learning_rate": 5.642717497556207e-05, "loss": 1.1856, "step": 10480 }, { "epoch": 1.64073262366938, "grad_norm": 2.2286741733551025, "learning_rate": 5.640273704789833e-05, "loss": 0.6144, "step": 10481 }, { "epoch": 1.6408891671884784, "grad_norm": 2.5856056213378906, "learning_rate": 5.63782991202346e-05, "loss": 1.2648, "step": 10482 }, { "epoch": 1.6410457107075767, "grad_norm": 3.3307247161865234, "learning_rate": 5.635386119257087e-05, "loss": 0.9104, "step": 10483 }, { "epoch": 1.641202254226675, "grad_norm": 2.219688892364502, "learning_rate": 5.632942326490713e-05, "loss": 0.9191, "step": 10484 }, { "epoch": 1.6413587977457733, "grad_norm": 3.4340312480926514, "learning_rate": 5.63049853372434e-05, "loss": 0.5855, "step": 10485 }, { "epoch": 1.6415153412648715, "grad_norm": 1.018791913986206, "learning_rate": 5.6280547409579664e-05, "loss": 0.3031, "step": 10486 }, { "epoch": 1.64167188478397, "grad_norm": 3.0899922847747803, "learning_rate": 5.625610948191593e-05, "loss": 1.0541, "step": 10487 }, { "epoch": 1.6418284283030684, "grad_norm": 2.5007576942443848, "learning_rate": 5.6231671554252194e-05, "loss": 1.1517, "step": 10488 }, { "epoch": 1.6419849718221666, "grad_norm": 0.45454585552215576, "learning_rate": 5.6207233626588465e-05, "loss": 0.1584, "step": 10489 }, { "epoch": 1.6421415153412648, "grad_norm": 0.7792069911956787, "learning_rate": 5.618279569892472e-05, "loss": 0.2598, "step": 10490 }, { "epoch": 1.642298058860363, "grad_norm": 1.4265599250793457, "learning_rate": 5.6158357771260995e-05, "loss": 0.2881, "step": 10491 }, { "epoch": 1.6424546023794615, "grad_norm": 1.610418438911438, "learning_rate": 5.613391984359726e-05, "loss": 0.2933, "step": 10492 }, { "epoch": 1.64261114589856, "grad_norm": 0.5651922225952148, "learning_rate": 5.6109481915933524e-05, "loss": 0.2019, "step": 10493 }, { "epoch": 1.6427676894176582, "grad_norm": 0.757182240486145, "learning_rate": 5.608504398826979e-05, "loss": 0.1858, "step": 10494 }, { "epoch": 1.6429242329367564, "grad_norm": 0.4835294783115387, "learning_rate": 5.606060606060606e-05, "loss": 0.1283, "step": 10495 }, { "epoch": 1.6430807764558546, "grad_norm": 0.6997655034065247, "learning_rate": 5.603616813294232e-05, "loss": 0.224, "step": 10496 }, { "epoch": 1.643237319974953, "grad_norm": 1.8372862339019775, "learning_rate": 5.601173020527859e-05, "loss": 0.4684, "step": 10497 }, { "epoch": 1.6433938634940515, "grad_norm": 1.1879760026931763, "learning_rate": 5.5987292277614855e-05, "loss": 0.2866, "step": 10498 }, { "epoch": 1.6435504070131497, "grad_norm": 0.4589191973209381, "learning_rate": 5.596285434995112e-05, "loss": 0.2133, "step": 10499 }, { "epoch": 1.643706950532248, "grad_norm": 0.8576350212097168, "learning_rate": 5.5938416422287385e-05, "loss": 0.4608, "step": 10500 }, { "epoch": 1.6438634940513461, "grad_norm": 0.8671181201934814, "learning_rate": 5.5913978494623656e-05, "loss": 0.2109, "step": 10501 }, { "epoch": 1.6440200375704446, "grad_norm": 1.038217306137085, "learning_rate": 5.5889540566959914e-05, "loss": 0.3122, "step": 10502 }, { "epoch": 1.644176581089543, "grad_norm": 2.1841461658477783, "learning_rate": 5.5865102639296186e-05, "loss": 0.5782, "step": 10503 }, { "epoch": 1.6443331246086412, "grad_norm": 1.437780737876892, "learning_rate": 5.584066471163245e-05, "loss": 0.4222, "step": 10504 }, { "epoch": 1.6444896681277394, "grad_norm": 1.98343825340271, "learning_rate": 5.581622678396871e-05, "loss": 0.3952, "step": 10505 }, { "epoch": 1.6446462116468377, "grad_norm": 1.1477174758911133, "learning_rate": 5.579178885630498e-05, "loss": 0.42, "step": 10506 }, { "epoch": 1.644802755165936, "grad_norm": 1.4632834196090698, "learning_rate": 5.576735092864125e-05, "loss": 0.4185, "step": 10507 }, { "epoch": 1.6449592986850345, "grad_norm": 1.2096794843673706, "learning_rate": 5.574291300097751e-05, "loss": 0.3385, "step": 10508 }, { "epoch": 1.6451158422041328, "grad_norm": 1.2012349367141724, "learning_rate": 5.571847507331378e-05, "loss": 0.3774, "step": 10509 }, { "epoch": 1.645272385723231, "grad_norm": 1.8027079105377197, "learning_rate": 5.5694037145650046e-05, "loss": 0.7032, "step": 10510 }, { "epoch": 1.6454289292423294, "grad_norm": 2.5102810859680176, "learning_rate": 5.5669599217986304e-05, "loss": 0.7242, "step": 10511 }, { "epoch": 1.6455854727614276, "grad_norm": 2.101835250854492, "learning_rate": 5.5645161290322576e-05, "loss": 0.5148, "step": 10512 }, { "epoch": 1.645742016280526, "grad_norm": 1.309965968132019, "learning_rate": 5.562072336265885e-05, "loss": 0.3692, "step": 10513 }, { "epoch": 1.6458985597996243, "grad_norm": 2.4885432720184326, "learning_rate": 5.5596285434995106e-05, "loss": 0.6005, "step": 10514 }, { "epoch": 1.6460551033187225, "grad_norm": 2.1940715312957764, "learning_rate": 5.557184750733138e-05, "loss": 0.6397, "step": 10515 }, { "epoch": 1.646211646837821, "grad_norm": 4.189990520477295, "learning_rate": 5.554740957966764e-05, "loss": 0.8497, "step": 10516 }, { "epoch": 1.6463681903569192, "grad_norm": 1.0179821252822876, "learning_rate": 5.55229716520039e-05, "loss": 0.3263, "step": 10517 }, { "epoch": 1.6465247338760176, "grad_norm": 2.343964099884033, "learning_rate": 5.549853372434017e-05, "loss": 0.2546, "step": 10518 }, { "epoch": 1.6466812773951158, "grad_norm": 2.5637285709381104, "learning_rate": 5.547409579667644e-05, "loss": 0.4887, "step": 10519 }, { "epoch": 1.646837820914214, "grad_norm": 1.819643497467041, "learning_rate": 5.54496578690127e-05, "loss": 0.623, "step": 10520 }, { "epoch": 1.6469943644333125, "grad_norm": 2.0592472553253174, "learning_rate": 5.542521994134897e-05, "loss": 0.3924, "step": 10521 }, { "epoch": 1.647150907952411, "grad_norm": 1.9924129247665405, "learning_rate": 5.540078201368524e-05, "loss": 0.4991, "step": 10522 }, { "epoch": 1.6473074514715091, "grad_norm": 1.941512942314148, "learning_rate": 5.5376344086021496e-05, "loss": 0.857, "step": 10523 }, { "epoch": 1.6474639949906074, "grad_norm": 1.9502965211868286, "learning_rate": 5.535190615835777e-05, "loss": 1.019, "step": 10524 }, { "epoch": 1.6476205385097056, "grad_norm": 2.0973851680755615, "learning_rate": 5.532746823069404e-05, "loss": 0.5463, "step": 10525 }, { "epoch": 1.647777082028804, "grad_norm": 2.2591187953948975, "learning_rate": 5.53030303030303e-05, "loss": 0.5572, "step": 10526 }, { "epoch": 1.6479336255479025, "grad_norm": 2.8180580139160156, "learning_rate": 5.527859237536656e-05, "loss": 1.1269, "step": 10527 }, { "epoch": 1.6480901690670007, "grad_norm": 3.1859171390533447, "learning_rate": 5.525415444770283e-05, "loss": 1.1379, "step": 10528 }, { "epoch": 1.648246712586099, "grad_norm": 2.108909845352173, "learning_rate": 5.522971652003909e-05, "loss": 0.3368, "step": 10529 }, { "epoch": 1.6484032561051971, "grad_norm": 9.457693099975586, "learning_rate": 5.520527859237536e-05, "loss": 1.0132, "step": 10530 }, { "epoch": 1.6485597996242956, "grad_norm": 4.089739799499512, "learning_rate": 5.5180840664711634e-05, "loss": 1.6879, "step": 10531 }, { "epoch": 1.648716343143394, "grad_norm": 1.7401306629180908, "learning_rate": 5.515640273704789e-05, "loss": 1.105, "step": 10532 }, { "epoch": 1.6488728866624922, "grad_norm": 2.562828540802002, "learning_rate": 5.513196480938416e-05, "loss": 1.4396, "step": 10533 }, { "epoch": 1.6490294301815904, "grad_norm": 3.0045950412750244, "learning_rate": 5.510752688172043e-05, "loss": 0.8768, "step": 10534 }, { "epoch": 1.6491859737006886, "grad_norm": 5.166981220245361, "learning_rate": 5.508308895405669e-05, "loss": 0.6006, "step": 10535 }, { "epoch": 1.649342517219787, "grad_norm": 4.5421929359436035, "learning_rate": 5.505865102639296e-05, "loss": 0.7751, "step": 10536 }, { "epoch": 1.6494990607388855, "grad_norm": 1.448534369468689, "learning_rate": 5.503421309872923e-05, "loss": 0.5581, "step": 10537 }, { "epoch": 1.6496556042579837, "grad_norm": 1.7535744905471802, "learning_rate": 5.500977517106549e-05, "loss": 0.8802, "step": 10538 }, { "epoch": 1.649812147777082, "grad_norm": 0.6873712539672852, "learning_rate": 5.498533724340175e-05, "loss": 0.1992, "step": 10539 }, { "epoch": 1.6499686912961802, "grad_norm": 0.7089383006095886, "learning_rate": 5.4960899315738024e-05, "loss": 0.195, "step": 10540 }, { "epoch": 1.6501252348152786, "grad_norm": 0.3983263671398163, "learning_rate": 5.493646138807428e-05, "loss": 0.1743, "step": 10541 }, { "epoch": 1.650281778334377, "grad_norm": 0.44565579295158386, "learning_rate": 5.4912023460410554e-05, "loss": 0.1656, "step": 10542 }, { "epoch": 1.6504383218534753, "grad_norm": 0.6502916216850281, "learning_rate": 5.4887585532746825e-05, "loss": 0.245, "step": 10543 }, { "epoch": 1.6505948653725735, "grad_norm": 0.8741419315338135, "learning_rate": 5.4863147605083083e-05, "loss": 0.275, "step": 10544 }, { "epoch": 1.650751408891672, "grad_norm": 0.5584045052528381, "learning_rate": 5.483870967741935e-05, "loss": 0.2486, "step": 10545 }, { "epoch": 1.6509079524107702, "grad_norm": 0.7461156845092773, "learning_rate": 5.481427174975562e-05, "loss": 0.3131, "step": 10546 }, { "epoch": 1.6510644959298686, "grad_norm": 0.6761148571968079, "learning_rate": 5.478983382209188e-05, "loss": 0.1941, "step": 10547 }, { "epoch": 1.6512210394489668, "grad_norm": 1.4514672756195068, "learning_rate": 5.476539589442815e-05, "loss": 0.2663, "step": 10548 }, { "epoch": 1.651377582968065, "grad_norm": 0.8751680850982666, "learning_rate": 5.474095796676442e-05, "loss": 0.2844, "step": 10549 }, { "epoch": 1.6515341264871635, "grad_norm": 1.7713919878005981, "learning_rate": 5.471652003910068e-05, "loss": 0.2972, "step": 10550 }, { "epoch": 1.6516906700062617, "grad_norm": 1.0611058473587036, "learning_rate": 5.4692082111436944e-05, "loss": 0.3436, "step": 10551 }, { "epoch": 1.6518472135253601, "grad_norm": 0.7453222274780273, "learning_rate": 5.466764418377321e-05, "loss": 0.2195, "step": 10552 }, { "epoch": 1.6520037570444583, "grad_norm": 2.404334783554077, "learning_rate": 5.4643206256109473e-05, "loss": 0.3868, "step": 10553 }, { "epoch": 1.6521603005635566, "grad_norm": 0.9054138660430908, "learning_rate": 5.4618768328445745e-05, "loss": 0.21, "step": 10554 }, { "epoch": 1.652316844082655, "grad_norm": 2.539930820465088, "learning_rate": 5.4594330400782e-05, "loss": 0.3469, "step": 10555 }, { "epoch": 1.6524733876017534, "grad_norm": 1.4532172679901123, "learning_rate": 5.4569892473118275e-05, "loss": 0.3711, "step": 10556 }, { "epoch": 1.6526299311208517, "grad_norm": 1.0003594160079956, "learning_rate": 5.454545454545454e-05, "loss": 0.2647, "step": 10557 }, { "epoch": 1.6527864746399499, "grad_norm": 1.0202223062515259, "learning_rate": 5.4521016617790804e-05, "loss": 0.3655, "step": 10558 }, { "epoch": 1.652943018159048, "grad_norm": 2.1398379802703857, "learning_rate": 5.449657869012707e-05, "loss": 0.4682, "step": 10559 }, { "epoch": 1.6530995616781465, "grad_norm": 1.0124636888504028, "learning_rate": 5.447214076246334e-05, "loss": 0.273, "step": 10560 }, { "epoch": 1.653256105197245, "grad_norm": 1.2546247243881226, "learning_rate": 5.44477028347996e-05, "loss": 0.3883, "step": 10561 }, { "epoch": 1.6534126487163432, "grad_norm": 1.4561783075332642, "learning_rate": 5.442326490713587e-05, "loss": 0.397, "step": 10562 }, { "epoch": 1.6535691922354414, "grad_norm": 0.9833201169967651, "learning_rate": 5.4398826979472135e-05, "loss": 0.3122, "step": 10563 }, { "epoch": 1.6537257357545396, "grad_norm": 10.254325866699219, "learning_rate": 5.43743890518084e-05, "loss": 0.4702, "step": 10564 }, { "epoch": 1.653882279273638, "grad_norm": 1.9881863594055176, "learning_rate": 5.4349951124144665e-05, "loss": 0.7521, "step": 10565 }, { "epoch": 1.6540388227927365, "grad_norm": 3.919692277908325, "learning_rate": 5.4325513196480936e-05, "loss": 0.5512, "step": 10566 }, { "epoch": 1.6541953663118347, "grad_norm": 2.473768711090088, "learning_rate": 5.4301075268817194e-05, "loss": 0.4922, "step": 10567 }, { "epoch": 1.654351909830933, "grad_norm": 2.2449424266815186, "learning_rate": 5.4276637341153466e-05, "loss": 0.5504, "step": 10568 }, { "epoch": 1.6545084533500312, "grad_norm": 2.9586992263793945, "learning_rate": 5.425219941348973e-05, "loss": 0.8247, "step": 10569 }, { "epoch": 1.6546649968691296, "grad_norm": 1.8599929809570312, "learning_rate": 5.4227761485825995e-05, "loss": 0.461, "step": 10570 }, { "epoch": 1.654821540388228, "grad_norm": 1.9860762357711792, "learning_rate": 5.420332355816226e-05, "loss": 0.958, "step": 10571 }, { "epoch": 1.6549780839073263, "grad_norm": 3.286160707473755, "learning_rate": 5.417888563049853e-05, "loss": 1.1638, "step": 10572 }, { "epoch": 1.6551346274264245, "grad_norm": 1.1726208925247192, "learning_rate": 5.415444770283479e-05, "loss": 0.4416, "step": 10573 }, { "epoch": 1.6552911709455227, "grad_norm": 2.7897346019744873, "learning_rate": 5.413000977517106e-05, "loss": 0.8697, "step": 10574 }, { "epoch": 1.6554477144646211, "grad_norm": 4.233922958374023, "learning_rate": 5.4105571847507326e-05, "loss": 0.8906, "step": 10575 }, { "epoch": 1.6556042579837196, "grad_norm": 1.2990089654922485, "learning_rate": 5.408113391984359e-05, "loss": 0.6652, "step": 10576 }, { "epoch": 1.6557608015028178, "grad_norm": 3.66686749458313, "learning_rate": 5.4056695992179856e-05, "loss": 1.022, "step": 10577 }, { "epoch": 1.655917345021916, "grad_norm": 4.231387615203857, "learning_rate": 5.403225806451613e-05, "loss": 0.9791, "step": 10578 }, { "epoch": 1.6560738885410144, "grad_norm": 2.5381076335906982, "learning_rate": 5.4007820136852385e-05, "loss": 1.2046, "step": 10579 }, { "epoch": 1.6562304320601127, "grad_norm": 3.6664905548095703, "learning_rate": 5.398338220918866e-05, "loss": 1.5159, "step": 10580 }, { "epoch": 1.656386975579211, "grad_norm": 2.639780282974243, "learning_rate": 5.395894428152492e-05, "loss": 1.4945, "step": 10581 }, { "epoch": 1.6565435190983093, "grad_norm": 1.567789912223816, "learning_rate": 5.3934506353861187e-05, "loss": 0.7653, "step": 10582 }, { "epoch": 1.6567000626174075, "grad_norm": 4.89553689956665, "learning_rate": 5.391006842619745e-05, "loss": 1.0403, "step": 10583 }, { "epoch": 1.656856606136506, "grad_norm": NaN, "learning_rate": 5.391006842619745e-05, "loss": 0.0, "step": 10584 }, { "epoch": 1.6570131496556044, "grad_norm": 3.5072648525238037, "learning_rate": 5.388563049853372e-05, "loss": 0.5282, "step": 10585 }, { "epoch": 1.6571696931747026, "grad_norm": 3.3043320178985596, "learning_rate": 5.386119257086998e-05, "loss": 0.6242, "step": 10586 }, { "epoch": 1.6573262366938009, "grad_norm": 2.609529733657837, "learning_rate": 5.383675464320625e-05, "loss": 1.0082, "step": 10587 }, { "epoch": 1.657482780212899, "grad_norm": 1.4475938081741333, "learning_rate": 5.381231671554252e-05, "loss": 0.9866, "step": 10588 }, { "epoch": 1.6576393237319975, "grad_norm": 0.5289618372917175, "learning_rate": 5.378787878787878e-05, "loss": 0.2334, "step": 10589 }, { "epoch": 1.657795867251096, "grad_norm": 0.5513238310813904, "learning_rate": 5.376344086021505e-05, "loss": 0.2082, "step": 10590 }, { "epoch": 1.6579524107701942, "grad_norm": 0.7842631936073303, "learning_rate": 5.373900293255132e-05, "loss": 0.2386, "step": 10591 }, { "epoch": 1.6581089542892924, "grad_norm": 0.7243189811706543, "learning_rate": 5.3714565004887577e-05, "loss": 0.1653, "step": 10592 }, { "epoch": 1.6582654978083906, "grad_norm": 2.165306329727173, "learning_rate": 5.369012707722385e-05, "loss": 0.208, "step": 10593 }, { "epoch": 1.658422041327489, "grad_norm": 0.661629319190979, "learning_rate": 5.366568914956011e-05, "loss": 0.2278, "step": 10594 }, { "epoch": 1.6585785848465875, "grad_norm": 0.931580662727356, "learning_rate": 5.364125122189638e-05, "loss": 0.3416, "step": 10595 }, { "epoch": 1.6587351283656857, "grad_norm": 0.9427894949913025, "learning_rate": 5.361681329423264e-05, "loss": 0.3323, "step": 10596 }, { "epoch": 1.658891671884784, "grad_norm": 0.6458982229232788, "learning_rate": 5.3592375366568914e-05, "loss": 0.2057, "step": 10597 }, { "epoch": 1.6590482154038821, "grad_norm": 1.2524142265319824, "learning_rate": 5.356793743890517e-05, "loss": 0.3066, "step": 10598 }, { "epoch": 1.6592047589229806, "grad_norm": 1.944617509841919, "learning_rate": 5.3543499511241444e-05, "loss": 0.4275, "step": 10599 }, { "epoch": 1.659361302442079, "grad_norm": 3.7578859329223633, "learning_rate": 5.351906158357771e-05, "loss": 0.1748, "step": 10600 }, { "epoch": 1.6595178459611772, "grad_norm": 0.9285878539085388, "learning_rate": 5.349462365591397e-05, "loss": 0.2722, "step": 10601 }, { "epoch": 1.6596743894802755, "grad_norm": 3.483341693878174, "learning_rate": 5.347018572825024e-05, "loss": 0.295, "step": 10602 }, { "epoch": 1.6598309329993737, "grad_norm": 1.6603342294692993, "learning_rate": 5.344574780058651e-05, "loss": 0.2591, "step": 10603 }, { "epoch": 1.6599874765184721, "grad_norm": 2.1465036869049072, "learning_rate": 5.342130987292277e-05, "loss": 0.3515, "step": 10604 }, { "epoch": 1.6601440200375706, "grad_norm": 1.9516934156417847, "learning_rate": 5.339687194525904e-05, "loss": 0.4868, "step": 10605 }, { "epoch": 1.6603005635566688, "grad_norm": 0.8558928966522217, "learning_rate": 5.3372434017595304e-05, "loss": 0.2049, "step": 10606 }, { "epoch": 1.660457107075767, "grad_norm": 1.204277515411377, "learning_rate": 5.334799608993157e-05, "loss": 0.2129, "step": 10607 }, { "epoch": 1.6606136505948652, "grad_norm": 1.400564193725586, "learning_rate": 5.3323558162267834e-05, "loss": 0.2187, "step": 10608 }, { "epoch": 1.6607701941139636, "grad_norm": 1.5953516960144043, "learning_rate": 5.3299120234604105e-05, "loss": 0.3519, "step": 10609 }, { "epoch": 1.660926737633062, "grad_norm": 1.5663790702819824, "learning_rate": 5.327468230694036e-05, "loss": 0.4418, "step": 10610 }, { "epoch": 1.6610832811521603, "grad_norm": 2.3011529445648193, "learning_rate": 5.3250244379276635e-05, "loss": 0.4294, "step": 10611 }, { "epoch": 1.6612398246712585, "grad_norm": 2.8383572101593018, "learning_rate": 5.32258064516129e-05, "loss": 0.6262, "step": 10612 }, { "epoch": 1.661396368190357, "grad_norm": 2.6488254070281982, "learning_rate": 5.3201368523949164e-05, "loss": 0.5056, "step": 10613 }, { "epoch": 1.6615529117094552, "grad_norm": 0.9980300068855286, "learning_rate": 5.317693059628543e-05, "loss": 0.3669, "step": 10614 }, { "epoch": 1.6617094552285536, "grad_norm": 1.887178897857666, "learning_rate": 5.31524926686217e-05, "loss": 0.5514, "step": 10615 }, { "epoch": 1.6618659987476518, "grad_norm": 2.6637489795684814, "learning_rate": 5.312805474095796e-05, "loss": 0.8874, "step": 10616 }, { "epoch": 1.66202254226675, "grad_norm": 2.64481520652771, "learning_rate": 5.310361681329423e-05, "loss": 0.5448, "step": 10617 }, { "epoch": 1.6621790857858485, "grad_norm": 2.7849998474121094, "learning_rate": 5.3079178885630495e-05, "loss": 0.9028, "step": 10618 }, { "epoch": 1.662335629304947, "grad_norm": 3.2674617767333984, "learning_rate": 5.305474095796676e-05, "loss": 0.9225, "step": 10619 }, { "epoch": 1.6624921728240452, "grad_norm": 2.544890880584717, "learning_rate": 5.3030303030303025e-05, "loss": 0.9553, "step": 10620 }, { "epoch": 1.6626487163431434, "grad_norm": 1.3422220945358276, "learning_rate": 5.3005865102639296e-05, "loss": 0.3589, "step": 10621 }, { "epoch": 1.6628052598622416, "grad_norm": 1.9164516925811768, "learning_rate": 5.2981427174975554e-05, "loss": 0.6147, "step": 10622 }, { "epoch": 1.66296180338134, "grad_norm": 5.187346458435059, "learning_rate": 5.2956989247311826e-05, "loss": 0.6961, "step": 10623 }, { "epoch": 1.6631183469004385, "grad_norm": 2.5986785888671875, "learning_rate": 5.293255131964809e-05, "loss": 0.8906, "step": 10624 }, { "epoch": 1.6632748904195367, "grad_norm": 3.034003734588623, "learning_rate": 5.2908113391984356e-05, "loss": 1.0949, "step": 10625 }, { "epoch": 1.663431433938635, "grad_norm": 2.1828017234802246, "learning_rate": 5.288367546432062e-05, "loss": 0.5176, "step": 10626 }, { "epoch": 1.6635879774577331, "grad_norm": 2.277587890625, "learning_rate": 5.285923753665689e-05, "loss": 0.899, "step": 10627 }, { "epoch": 1.6637445209768316, "grad_norm": 3.5474190711975098, "learning_rate": 5.283479960899315e-05, "loss": 0.6992, "step": 10628 }, { "epoch": 1.66390106449593, "grad_norm": 4.0805888175964355, "learning_rate": 5.281036168132942e-05, "loss": 1.1746, "step": 10629 }, { "epoch": 1.6640576080150282, "grad_norm": 3.748154640197754, "learning_rate": 5.2785923753665686e-05, "loss": 0.922, "step": 10630 }, { "epoch": 1.6642141515341264, "grad_norm": 4.198906898498535, "learning_rate": 5.276148582600195e-05, "loss": 1.751, "step": 10631 }, { "epoch": 1.6643706950532247, "grad_norm": 4.466783046722412, "learning_rate": 5.2737047898338216e-05, "loss": 1.1014, "step": 10632 }, { "epoch": 1.664527238572323, "grad_norm": 1.4127699136734009, "learning_rate": 5.271260997067449e-05, "loss": 0.8603, "step": 10633 }, { "epoch": 1.6646837820914215, "grad_norm": 2.8340721130371094, "learning_rate": 5.2688172043010746e-05, "loss": 1.191, "step": 10634 }, { "epoch": 1.6648403256105198, "grad_norm": 2.287118673324585, "learning_rate": 5.266373411534702e-05, "loss": 0.5875, "step": 10635 }, { "epoch": 1.664996869129618, "grad_norm": 2.4103381633758545, "learning_rate": 5.263929618768328e-05, "loss": 0.7849, "step": 10636 }, { "epoch": 1.6651534126487162, "grad_norm": 2.8598053455352783, "learning_rate": 5.261485826001955e-05, "loss": 0.8766, "step": 10637 }, { "epoch": 1.6653099561678146, "grad_norm": 3.413485288619995, "learning_rate": 5.259042033235581e-05, "loss": 1.239, "step": 10638 }, { "epoch": 1.665466499686913, "grad_norm": 1.0679550170898438, "learning_rate": 5.256598240469208e-05, "loss": 0.6029, "step": 10639 }, { "epoch": 1.6656230432060113, "grad_norm": 0.44509389996528625, "learning_rate": 5.254154447702834e-05, "loss": 0.1905, "step": 10640 }, { "epoch": 1.6657795867251095, "grad_norm": 1.0144532918930054, "learning_rate": 5.251710654936461e-05, "loss": 0.269, "step": 10641 }, { "epoch": 1.6659361302442077, "grad_norm": 1.4316686391830444, "learning_rate": 5.249266862170088e-05, "loss": 0.2504, "step": 10642 }, { "epoch": 1.6660926737633062, "grad_norm": 0.9818958044052124, "learning_rate": 5.2468230694037136e-05, "loss": 0.3228, "step": 10643 }, { "epoch": 1.6662492172824046, "grad_norm": 1.124721884727478, "learning_rate": 5.244379276637341e-05, "loss": 0.3437, "step": 10644 }, { "epoch": 1.6664057608015028, "grad_norm": 0.6059370040893555, "learning_rate": 5.241935483870968e-05, "loss": 0.1469, "step": 10645 }, { "epoch": 1.666562304320601, "grad_norm": 1.5121008157730103, "learning_rate": 5.239491691104594e-05, "loss": 0.2796, "step": 10646 }, { "epoch": 1.6667188478396995, "grad_norm": 0.9613409638404846, "learning_rate": 5.237047898338221e-05, "loss": 0.2226, "step": 10647 }, { "epoch": 1.6668753913587977, "grad_norm": 1.1612027883529663, "learning_rate": 5.234604105571847e-05, "loss": 0.3174, "step": 10648 }, { "epoch": 1.6670319348778961, "grad_norm": 2.4976742267608643, "learning_rate": 5.232160312805473e-05, "loss": 0.3316, "step": 10649 }, { "epoch": 1.6671884783969944, "grad_norm": 1.2498027086257935, "learning_rate": 5.2297165200391e-05, "loss": 0.4277, "step": 10650 }, { "epoch": 1.6673450219160926, "grad_norm": 1.24776291847229, "learning_rate": 5.2272727272727274e-05, "loss": 0.2936, "step": 10651 }, { "epoch": 1.667501565435191, "grad_norm": 1.3939722776412964, "learning_rate": 5.224828934506353e-05, "loss": 0.4268, "step": 10652 }, { "epoch": 1.6676581089542895, "grad_norm": 1.675716519355774, "learning_rate": 5.2223851417399804e-05, "loss": 0.4775, "step": 10653 }, { "epoch": 1.6678146524733877, "grad_norm": 1.3907341957092285, "learning_rate": 5.219941348973607e-05, "loss": 0.3869, "step": 10654 }, { "epoch": 1.6679711959924859, "grad_norm": 1.2454923391342163, "learning_rate": 5.217497556207233e-05, "loss": 0.3018, "step": 10655 }, { "epoch": 1.668127739511584, "grad_norm": 3.0145490169525146, "learning_rate": 5.21505376344086e-05, "loss": 0.3795, "step": 10656 }, { "epoch": 1.6682842830306825, "grad_norm": 1.352417230606079, "learning_rate": 5.212609970674487e-05, "loss": 0.3482, "step": 10657 }, { "epoch": 1.668440826549781, "grad_norm": 1.3013248443603516, "learning_rate": 5.210166177908113e-05, "loss": 0.5697, "step": 10658 }, { "epoch": 1.6685973700688792, "grad_norm": 3.581904888153076, "learning_rate": 5.20772238514174e-05, "loss": 0.8786, "step": 10659 }, { "epoch": 1.6687539135879774, "grad_norm": 1.4342955350875854, "learning_rate": 5.2052785923753664e-05, "loss": 0.4929, "step": 10660 }, { "epoch": 1.6689104571070756, "grad_norm": 1.4663060903549194, "learning_rate": 5.202834799608992e-05, "loss": 0.4167, "step": 10661 }, { "epoch": 1.669067000626174, "grad_norm": 1.2736845016479492, "learning_rate": 5.2003910068426194e-05, "loss": 0.44, "step": 10662 }, { "epoch": 1.6692235441452725, "grad_norm": 3.1659364700317383, "learning_rate": 5.1979472140762465e-05, "loss": 0.406, "step": 10663 }, { "epoch": 1.6693800876643707, "grad_norm": 4.501870155334473, "learning_rate": 5.1955034213098723e-05, "loss": 1.0747, "step": 10664 }, { "epoch": 1.669536631183469, "grad_norm": 2.149975299835205, "learning_rate": 5.193059628543499e-05, "loss": 0.5949, "step": 10665 }, { "epoch": 1.6696931747025672, "grad_norm": 1.4252465963363647, "learning_rate": 5.190615835777126e-05, "loss": 0.4715, "step": 10666 }, { "epoch": 1.6698497182216656, "grad_norm": 1.7069036960601807, "learning_rate": 5.188172043010752e-05, "loss": 0.3242, "step": 10667 }, { "epoch": 1.670006261740764, "grad_norm": 2.188441753387451, "learning_rate": 5.185728250244379e-05, "loss": 0.4015, "step": 10668 }, { "epoch": 1.6701628052598623, "grad_norm": 1.5735689401626587, "learning_rate": 5.183284457478006e-05, "loss": 0.4151, "step": 10669 }, { "epoch": 1.6703193487789605, "grad_norm": 4.588582515716553, "learning_rate": 5.180840664711632e-05, "loss": 0.6561, "step": 10670 }, { "epoch": 1.6704758922980587, "grad_norm": 2.366029739379883, "learning_rate": 5.1783968719452584e-05, "loss": 0.48, "step": 10671 }, { "epoch": 1.6706324358171571, "grad_norm": 1.6738721132278442, "learning_rate": 5.1759530791788855e-05, "loss": 0.5077, "step": 10672 }, { "epoch": 1.6707889793362556, "grad_norm": 2.501746892929077, "learning_rate": 5.1735092864125113e-05, "loss": 0.6517, "step": 10673 }, { "epoch": 1.6709455228553538, "grad_norm": 3.7587740421295166, "learning_rate": 5.1710654936461385e-05, "loss": 1.1346, "step": 10674 }, { "epoch": 1.671102066374452, "grad_norm": 15.480533599853516, "learning_rate": 5.168621700879766e-05, "loss": 0.6876, "step": 10675 }, { "epoch": 1.6712586098935505, "grad_norm": 2.9390463829040527, "learning_rate": 5.1661779081133915e-05, "loss": 0.6573, "step": 10676 }, { "epoch": 1.6714151534126487, "grad_norm": 5.4329304695129395, "learning_rate": 5.163734115347018e-05, "loss": 0.788, "step": 10677 }, { "epoch": 1.6715716969317471, "grad_norm": 9.223847389221191, "learning_rate": 5.161290322580645e-05, "loss": 1.1849, "step": 10678 }, { "epoch": 1.6717282404508453, "grad_norm": 2.515810966491699, "learning_rate": 5.158846529814271e-05, "loss": 0.621, "step": 10679 }, { "epoch": 1.6718847839699436, "grad_norm": 3.640105962753296, "learning_rate": 5.156402737047898e-05, "loss": 1.1932, "step": 10680 }, { "epoch": 1.672041327489042, "grad_norm": 5.372037887573242, "learning_rate": 5.153958944281524e-05, "loss": 0.586, "step": 10681 }, { "epoch": 1.6721978710081402, "grad_norm": 1.5914926528930664, "learning_rate": 5.151515151515151e-05, "loss": 1.0401, "step": 10682 }, { "epoch": 1.6723544145272387, "grad_norm": 3.534508466720581, "learning_rate": 5.1490713587487775e-05, "loss": 1.3516, "step": 10683 }, { "epoch": 1.6725109580463369, "grad_norm": 1.6528741121292114, "learning_rate": 5.146627565982404e-05, "loss": 0.6955, "step": 10684 }, { "epoch": 1.672667501565435, "grad_norm": 2.1529715061187744, "learning_rate": 5.1441837732160305e-05, "loss": 0.8107, "step": 10685 }, { "epoch": 1.6728240450845335, "grad_norm": 2.8191683292388916, "learning_rate": 5.1417399804496576e-05, "loss": 0.3083, "step": 10686 }, { "epoch": 1.672980588603632, "grad_norm": 1.5679564476013184, "learning_rate": 5.1392961876832834e-05, "loss": 0.6602, "step": 10687 }, { "epoch": 1.6731371321227302, "grad_norm": 1.8020987510681152, "learning_rate": 5.1368523949169106e-05, "loss": 0.7978, "step": 10688 }, { "epoch": 1.6732936756418284, "grad_norm": 0.6797225475311279, "learning_rate": 5.134408602150537e-05, "loss": 0.1784, "step": 10689 }, { "epoch": 1.6734502191609266, "grad_norm": 1.2861469984054565, "learning_rate": 5.1319648093841635e-05, "loss": 0.1494, "step": 10690 }, { "epoch": 1.673606762680025, "grad_norm": 0.7776259183883667, "learning_rate": 5.12952101661779e-05, "loss": 0.2241, "step": 10691 }, { "epoch": 1.6737633061991235, "grad_norm": 0.7680147290229797, "learning_rate": 5.127077223851417e-05, "loss": 0.1937, "step": 10692 }, { "epoch": 1.6739198497182217, "grad_norm": 0.7081788182258606, "learning_rate": 5.124633431085043e-05, "loss": 0.1732, "step": 10693 }, { "epoch": 1.67407639323732, "grad_norm": 0.6900548338890076, "learning_rate": 5.12218963831867e-05, "loss": 0.2248, "step": 10694 }, { "epoch": 1.6742329367564182, "grad_norm": 1.274192452430725, "learning_rate": 5.1197458455522966e-05, "loss": 0.3333, "step": 10695 }, { "epoch": 1.6743894802755166, "grad_norm": 0.7436234354972839, "learning_rate": 5.117302052785923e-05, "loss": 0.3009, "step": 10696 }, { "epoch": 1.674546023794615, "grad_norm": 1.424438714981079, "learning_rate": 5.1148582600195496e-05, "loss": 0.3789, "step": 10697 }, { "epoch": 1.6747025673137133, "grad_norm": 0.85284423828125, "learning_rate": 5.112414467253177e-05, "loss": 0.3426, "step": 10698 }, { "epoch": 1.6748591108328115, "grad_norm": 1.165490984916687, "learning_rate": 5.1099706744868025e-05, "loss": 0.2127, "step": 10699 }, { "epoch": 1.6750156543519097, "grad_norm": 1.005594253540039, "learning_rate": 5.10752688172043e-05, "loss": 0.3583, "step": 10700 }, { "epoch": 1.6751721978710081, "grad_norm": 2.8059756755828857, "learning_rate": 5.105083088954056e-05, "loss": 0.3293, "step": 10701 }, { "epoch": 1.6753287413901066, "grad_norm": 0.9310165643692017, "learning_rate": 5.1026392961876827e-05, "loss": 0.2471, "step": 10702 }, { "epoch": 1.6754852849092048, "grad_norm": 1.2037992477416992, "learning_rate": 5.100195503421309e-05, "loss": 0.5662, "step": 10703 }, { "epoch": 1.675641828428303, "grad_norm": 2.170868396759033, "learning_rate": 5.097751710654936e-05, "loss": 0.3375, "step": 10704 }, { "epoch": 1.6757983719474012, "grad_norm": 1.1550534963607788, "learning_rate": 5.095307917888562e-05, "loss": 0.3144, "step": 10705 }, { "epoch": 1.6759549154664997, "grad_norm": 2.387791633605957, "learning_rate": 5.092864125122189e-05, "loss": 0.3797, "step": 10706 }, { "epoch": 1.676111458985598, "grad_norm": 1.3175456523895264, "learning_rate": 5.090420332355816e-05, "loss": 0.2326, "step": 10707 }, { "epoch": 1.6762680025046963, "grad_norm": 1.391885757446289, "learning_rate": 5.087976539589442e-05, "loss": 0.3776, "step": 10708 }, { "epoch": 1.6764245460237945, "grad_norm": 3.515990734100342, "learning_rate": 5.085532746823069e-05, "loss": 0.254, "step": 10709 }, { "epoch": 1.676581089542893, "grad_norm": 2.301178455352783, "learning_rate": 5.083088954056696e-05, "loss": 0.3833, "step": 10710 }, { "epoch": 1.6767376330619912, "grad_norm": 0.8507611751556396, "learning_rate": 5.0806451612903217e-05, "loss": 0.3777, "step": 10711 }, { "epoch": 1.6768941765810896, "grad_norm": 1.7067499160766602, "learning_rate": 5.078201368523949e-05, "loss": 0.4903, "step": 10712 }, { "epoch": 1.6770507201001879, "grad_norm": 4.127223014831543, "learning_rate": 5.075757575757575e-05, "loss": 0.7006, "step": 10713 }, { "epoch": 1.677207263619286, "grad_norm": 1.4126865863800049, "learning_rate": 5.073313782991202e-05, "loss": 0.3373, "step": 10714 }, { "epoch": 1.6773638071383845, "grad_norm": 1.455224871635437, "learning_rate": 5.070869990224828e-05, "loss": 0.9498, "step": 10715 }, { "epoch": 1.6775203506574827, "grad_norm": 4.076616287231445, "learning_rate": 5.0684261974584554e-05, "loss": 0.8621, "step": 10716 }, { "epoch": 1.6776768941765812, "grad_norm": 2.2663486003875732, "learning_rate": 5.065982404692081e-05, "loss": 0.461, "step": 10717 }, { "epoch": 1.6778334376956794, "grad_norm": 2.508690118789673, "learning_rate": 5.0635386119257084e-05, "loss": 0.7204, "step": 10718 }, { "epoch": 1.6779899812147776, "grad_norm": 1.673777461051941, "learning_rate": 5.061094819159335e-05, "loss": 0.6898, "step": 10719 }, { "epoch": 1.678146524733876, "grad_norm": 1.676172137260437, "learning_rate": 5.058651026392961e-05, "loss": 0.5936, "step": 10720 }, { "epoch": 1.6783030682529745, "grad_norm": 7.18748664855957, "learning_rate": 5.056207233626588e-05, "loss": 1.0243, "step": 10721 }, { "epoch": 1.6784596117720727, "grad_norm": 2.1801810264587402, "learning_rate": 5.053763440860215e-05, "loss": 0.5048, "step": 10722 }, { "epoch": 1.678616155291171, "grad_norm": 2.0111351013183594, "learning_rate": 5.051319648093841e-05, "loss": 0.8976, "step": 10723 }, { "epoch": 1.6787726988102691, "grad_norm": 5.509698867797852, "learning_rate": 5.048875855327468e-05, "loss": 1.0209, "step": 10724 }, { "epoch": 1.6789292423293676, "grad_norm": 2.151179552078247, "learning_rate": 5.0464320625610944e-05, "loss": 1.3053, "step": 10725 }, { "epoch": 1.679085785848466, "grad_norm": 3.7033746242523193, "learning_rate": 5.043988269794721e-05, "loss": 1.1283, "step": 10726 }, { "epoch": 1.6792423293675642, "grad_norm": 2.599567174911499, "learning_rate": 5.0415444770283474e-05, "loss": 0.6676, "step": 10727 }, { "epoch": 1.6793988728866625, "grad_norm": 2.8762729167938232, "learning_rate": 5.0391006842619745e-05, "loss": 0.5331, "step": 10728 }, { "epoch": 1.6795554164057607, "grad_norm": 4.5754265785217285, "learning_rate": 5.0366568914956e-05, "loss": 0.6954, "step": 10729 }, { "epoch": 1.679711959924859, "grad_norm": 4.394883155822754, "learning_rate": 5.0342130987292275e-05, "loss": 0.8285, "step": 10730 }, { "epoch": 1.6798685034439576, "grad_norm": 3.905444383621216, "learning_rate": 5.031769305962854e-05, "loss": 1.6435, "step": 10731 }, { "epoch": 1.6800250469630558, "grad_norm": 2.222259521484375, "learning_rate": 5.0293255131964804e-05, "loss": 0.625, "step": 10732 }, { "epoch": 1.680181590482154, "grad_norm": 2.369340658187866, "learning_rate": 5.026881720430107e-05, "loss": 1.2802, "step": 10733 }, { "epoch": 1.6803381340012522, "grad_norm": 8.684860229492188, "learning_rate": 5.024437927663734e-05, "loss": 0.7457, "step": 10734 }, { "epoch": 1.6804946775203506, "grad_norm": 0.9671575427055359, "learning_rate": 5.02199413489736e-05, "loss": 0.3532, "step": 10735 }, { "epoch": 1.680651221039449, "grad_norm": 3.1766295433044434, "learning_rate": 5.019550342130987e-05, "loss": 1.1237, "step": 10736 }, { "epoch": 1.6808077645585473, "grad_norm": 4.267437934875488, "learning_rate": 5.0171065493646135e-05, "loss": 0.5706, "step": 10737 }, { "epoch": 1.6809643080776455, "grad_norm": 3.1225411891937256, "learning_rate": 5.01466275659824e-05, "loss": 0.8392, "step": 10738 }, { "epoch": 1.6811208515967437, "grad_norm": 1.9683570861816406, "learning_rate": 5.0122189638318665e-05, "loss": 0.2465, "step": 10739 }, { "epoch": 1.6812773951158422, "grad_norm": 0.6025456190109253, "learning_rate": 5.0097751710654936e-05, "loss": 0.2046, "step": 10740 }, { "epoch": 1.6814339386349406, "grad_norm": 0.7359471321105957, "learning_rate": 5.0073313782991194e-05, "loss": 0.1829, "step": 10741 }, { "epoch": 1.6815904821540388, "grad_norm": 0.568925142288208, "learning_rate": 5.0048875855327466e-05, "loss": 0.2044, "step": 10742 }, { "epoch": 1.681747025673137, "grad_norm": 0.593213677406311, "learning_rate": 5.002443792766373e-05, "loss": 0.3406, "step": 10743 }, { "epoch": 1.6819035691922355, "grad_norm": 0.6833570599555969, "learning_rate": 4.9999999999999996e-05, "loss": 0.2111, "step": 10744 }, { "epoch": 1.6820601127113337, "grad_norm": 1.2374072074890137, "learning_rate": 4.997556207233626e-05, "loss": 0.3462, "step": 10745 }, { "epoch": 1.6822166562304322, "grad_norm": 0.7850791811943054, "learning_rate": 4.995112414467253e-05, "loss": 0.3209, "step": 10746 }, { "epoch": 1.6823731997495304, "grad_norm": 0.6952258944511414, "learning_rate": 4.992668621700879e-05, "loss": 0.3351, "step": 10747 }, { "epoch": 1.6825297432686286, "grad_norm": 1.0042572021484375, "learning_rate": 4.990224828934506e-05, "loss": 0.2355, "step": 10748 }, { "epoch": 1.682686286787727, "grad_norm": 1.752544641494751, "learning_rate": 4.9877810361681326e-05, "loss": 0.2027, "step": 10749 }, { "epoch": 1.6828428303068252, "grad_norm": 1.0626980066299438, "learning_rate": 4.985337243401759e-05, "loss": 0.2959, "step": 10750 }, { "epoch": 1.6829993738259237, "grad_norm": 1.8501675128936768, "learning_rate": 4.9828934506353856e-05, "loss": 0.3076, "step": 10751 }, { "epoch": 1.683155917345022, "grad_norm": 1.9613275527954102, "learning_rate": 4.980449657869013e-05, "loss": 0.4469, "step": 10752 }, { "epoch": 1.6833124608641201, "grad_norm": 0.9938980340957642, "learning_rate": 4.9780058651026386e-05, "loss": 0.3296, "step": 10753 }, { "epoch": 1.6834690043832186, "grad_norm": 3.928609609603882, "learning_rate": 4.975562072336266e-05, "loss": 0.3944, "step": 10754 }, { "epoch": 1.683625547902317, "grad_norm": 1.8537261486053467, "learning_rate": 4.973118279569892e-05, "loss": 0.4749, "step": 10755 }, { "epoch": 1.6837820914214152, "grad_norm": 1.3114187717437744, "learning_rate": 4.970674486803519e-05, "loss": 0.3248, "step": 10756 }, { "epoch": 1.6839386349405134, "grad_norm": 2.2068116664886475, "learning_rate": 4.968230694037145e-05, "loss": 0.3514, "step": 10757 }, { "epoch": 1.6840951784596117, "grad_norm": 1.8806259632110596, "learning_rate": 4.965786901270772e-05, "loss": 0.4841, "step": 10758 }, { "epoch": 1.68425172197871, "grad_norm": 2.068279504776001, "learning_rate": 4.963343108504398e-05, "loss": 0.7084, "step": 10759 }, { "epoch": 1.6844082654978085, "grad_norm": 2.2462806701660156, "learning_rate": 4.960899315738025e-05, "loss": 0.5013, "step": 10760 }, { "epoch": 1.6845648090169068, "grad_norm": 7.449122905731201, "learning_rate": 4.958455522971652e-05, "loss": 0.4172, "step": 10761 }, { "epoch": 1.684721352536005, "grad_norm": 2.3347396850585938, "learning_rate": 4.956011730205278e-05, "loss": 0.5593, "step": 10762 }, { "epoch": 1.6848778960551032, "grad_norm": 1.909669280052185, "learning_rate": 4.953567937438905e-05, "loss": 0.3513, "step": 10763 }, { "epoch": 1.6850344395742016, "grad_norm": 2.685105085372925, "learning_rate": 4.951124144672532e-05, "loss": 0.641, "step": 10764 }, { "epoch": 1.6851909830933, "grad_norm": 1.1838382482528687, "learning_rate": 4.948680351906158e-05, "loss": 0.5653, "step": 10765 }, { "epoch": 1.6853475266123983, "grad_norm": 1.5641990900039673, "learning_rate": 4.946236559139785e-05, "loss": 0.5987, "step": 10766 }, { "epoch": 1.6855040701314965, "grad_norm": 2.0659899711608887, "learning_rate": 4.943792766373411e-05, "loss": 0.651, "step": 10767 }, { "epoch": 1.6856606136505947, "grad_norm": 2.0896155834198, "learning_rate": 4.941348973607038e-05, "loss": 0.7361, "step": 10768 }, { "epoch": 1.6858171571696932, "grad_norm": 4.1929426193237305, "learning_rate": 4.938905180840664e-05, "loss": 0.5407, "step": 10769 }, { "epoch": 1.6859737006887916, "grad_norm": 1.9943493604660034, "learning_rate": 4.9364613880742914e-05, "loss": 0.6625, "step": 10770 }, { "epoch": 1.6861302442078898, "grad_norm": 2.7732839584350586, "learning_rate": 4.934017595307917e-05, "loss": 1.2895, "step": 10771 }, { "epoch": 1.686286787726988, "grad_norm": 2.526482582092285, "learning_rate": 4.9315738025415444e-05, "loss": 1.0281, "step": 10772 }, { "epoch": 1.6864433312460863, "grad_norm": 1.9940377473831177, "learning_rate": 4.929130009775171e-05, "loss": 1.0371, "step": 10773 }, { "epoch": 1.6865998747651847, "grad_norm": 2.744659662246704, "learning_rate": 4.926686217008797e-05, "loss": 0.6451, "step": 10774 }, { "epoch": 1.6867564182842831, "grad_norm": 4.247622013092041, "learning_rate": 4.924242424242424e-05, "loss": 1.0116, "step": 10775 }, { "epoch": 1.6869129618033814, "grad_norm": 2.5660741329193115, "learning_rate": 4.921798631476051e-05, "loss": 0.9087, "step": 10776 }, { "epoch": 1.6870695053224796, "grad_norm": 3.1664459705352783, "learning_rate": 4.919354838709677e-05, "loss": 0.9292, "step": 10777 }, { "epoch": 1.687226048841578, "grad_norm": 1.8890631198883057, "learning_rate": 4.916911045943304e-05, "loss": 0.9163, "step": 10778 }, { "epoch": 1.6873825923606762, "grad_norm": 3.557894706726074, "learning_rate": 4.9144672531769304e-05, "loss": 0.7879, "step": 10779 }, { "epoch": 1.6875391358797747, "grad_norm": 3.2021689414978027, "learning_rate": 4.912023460410556e-05, "loss": 0.7324, "step": 10780 }, { "epoch": 1.6876956793988729, "grad_norm": 3.014575242996216, "learning_rate": 4.9095796676441834e-05, "loss": 0.7799, "step": 10781 }, { "epoch": 1.687852222917971, "grad_norm": 3.13476824760437, "learning_rate": 4.9071358748778105e-05, "loss": 1.1459, "step": 10782 }, { "epoch": 1.6880087664370695, "grad_norm": 2.8570799827575684, "learning_rate": 4.9046920821114363e-05, "loss": 1.0614, "step": 10783 }, { "epoch": 1.688165309956168, "grad_norm": 1.858405351638794, "learning_rate": 4.9022482893450635e-05, "loss": 0.5261, "step": 10784 }, { "epoch": 1.6883218534752662, "grad_norm": 2.642915964126587, "learning_rate": 4.89980449657869e-05, "loss": 0.8285, "step": 10785 }, { "epoch": 1.6884783969943644, "grad_norm": 6.898956775665283, "learning_rate": 4.897360703812316e-05, "loss": 0.6748, "step": 10786 }, { "epoch": 1.6886349405134626, "grad_norm": 2.4896328449249268, "learning_rate": 4.894916911045943e-05, "loss": 0.7534, "step": 10787 }, { "epoch": 1.688791484032561, "grad_norm": 7.32418155670166, "learning_rate": 4.89247311827957e-05, "loss": 1.3809, "step": 10788 }, { "epoch": 1.6889480275516595, "grad_norm": 1.3984144926071167, "learning_rate": 4.890029325513196e-05, "loss": 0.2564, "step": 10789 }, { "epoch": 1.6891045710707577, "grad_norm": 0.826632559299469, "learning_rate": 4.887585532746823e-05, "loss": 0.2788, "step": 10790 }, { "epoch": 1.689261114589856, "grad_norm": 0.395866721868515, "learning_rate": 4.8851417399804495e-05, "loss": 0.1724, "step": 10791 }, { "epoch": 1.6894176581089542, "grad_norm": 0.7708562016487122, "learning_rate": 4.8826979472140753e-05, "loss": 0.2389, "step": 10792 }, { "epoch": 1.6895742016280526, "grad_norm": 0.6404639482498169, "learning_rate": 4.8802541544477025e-05, "loss": 0.2478, "step": 10793 }, { "epoch": 1.689730745147151, "grad_norm": 0.717633068561554, "learning_rate": 4.87781036168133e-05, "loss": 0.3287, "step": 10794 }, { "epoch": 1.6898872886662493, "grad_norm": 1.3549281358718872, "learning_rate": 4.8753665689149555e-05, "loss": 0.3062, "step": 10795 }, { "epoch": 1.6900438321853475, "grad_norm": 1.1032696962356567, "learning_rate": 4.8729227761485826e-05, "loss": 0.3737, "step": 10796 }, { "epoch": 1.6902003757044457, "grad_norm": 1.0608580112457275, "learning_rate": 4.870478983382209e-05, "loss": 0.3077, "step": 10797 }, { "epoch": 1.6903569192235441, "grad_norm": 1.941976547241211, "learning_rate": 4.868035190615835e-05, "loss": 0.3382, "step": 10798 }, { "epoch": 1.6905134627426426, "grad_norm": 1.254552960395813, "learning_rate": 4.865591397849462e-05, "loss": 0.3143, "step": 10799 }, { "epoch": 1.6906700062617408, "grad_norm": 0.9301863312721252, "learning_rate": 4.863147605083089e-05, "loss": 0.2226, "step": 10800 }, { "epoch": 1.690826549780839, "grad_norm": 0.8874600529670715, "learning_rate": 4.860703812316715e-05, "loss": 0.4288, "step": 10801 }, { "epoch": 1.6909830932999372, "grad_norm": 1.8583930730819702, "learning_rate": 4.8582600195503415e-05, "loss": 0.3001, "step": 10802 }, { "epoch": 1.6911396368190357, "grad_norm": 1.5552978515625, "learning_rate": 4.8558162267839687e-05, "loss": 0.2913, "step": 10803 }, { "epoch": 1.6912961803381341, "grad_norm": 0.9442487359046936, "learning_rate": 4.8533724340175945e-05, "loss": 0.2821, "step": 10804 }, { "epoch": 1.6914527238572323, "grad_norm": 1.6942837238311768, "learning_rate": 4.8509286412512216e-05, "loss": 0.4196, "step": 10805 }, { "epoch": 1.6916092673763305, "grad_norm": 1.1128238439559937, "learning_rate": 4.848484848484849e-05, "loss": 0.3276, "step": 10806 }, { "epoch": 1.6917658108954288, "grad_norm": 1.4044594764709473, "learning_rate": 4.8460410557184746e-05, "loss": 0.3271, "step": 10807 }, { "epoch": 1.6919223544145272, "grad_norm": 2.1907525062561035, "learning_rate": 4.843597262952101e-05, "loss": 0.5816, "step": 10808 }, { "epoch": 1.6920788979336256, "grad_norm": 1.5539323091506958, "learning_rate": 4.8411534701857275e-05, "loss": 0.3358, "step": 10809 }, { "epoch": 1.6922354414527239, "grad_norm": 2.5822982788085938, "learning_rate": 4.838709677419354e-05, "loss": 0.53, "step": 10810 }, { "epoch": 1.692391984971822, "grad_norm": 4.984518527984619, "learning_rate": 4.836265884652981e-05, "loss": 0.8552, "step": 10811 }, { "epoch": 1.6925485284909205, "grad_norm": 1.6038222312927246, "learning_rate": 4.833822091886607e-05, "loss": 0.5728, "step": 10812 }, { "epoch": 1.6927050720100187, "grad_norm": 2.4173617362976074, "learning_rate": 4.831378299120234e-05, "loss": 0.6206, "step": 10813 }, { "epoch": 1.6928616155291172, "grad_norm": 3.724741220474243, "learning_rate": 4.8289345063538606e-05, "loss": 0.6327, "step": 10814 }, { "epoch": 1.6930181590482154, "grad_norm": 1.6614710092544556, "learning_rate": 4.826490713587487e-05, "loss": 0.6322, "step": 10815 }, { "epoch": 1.6931747025673136, "grad_norm": 2.336488723754883, "learning_rate": 4.8240469208211136e-05, "loss": 0.6787, "step": 10816 }, { "epoch": 1.693331246086412, "grad_norm": 1.9805657863616943, "learning_rate": 4.821603128054741e-05, "loss": 0.5463, "step": 10817 }, { "epoch": 1.6934877896055105, "grad_norm": 0.8958491086959839, "learning_rate": 4.8191593352883665e-05, "loss": 0.2454, "step": 10818 }, { "epoch": 1.6936443331246087, "grad_norm": 4.5509562492370605, "learning_rate": 4.816715542521994e-05, "loss": 0.7812, "step": 10819 }, { "epoch": 1.693800876643707, "grad_norm": 2.46221923828125, "learning_rate": 4.81427174975562e-05, "loss": 0.7259, "step": 10820 }, { "epoch": 1.6939574201628051, "grad_norm": 2.2861831188201904, "learning_rate": 4.8118279569892467e-05, "loss": 0.6677, "step": 10821 }, { "epoch": 1.6941139636819036, "grad_norm": 2.7029225826263428, "learning_rate": 4.809384164222873e-05, "loss": 0.4636, "step": 10822 }, { "epoch": 1.694270507201002, "grad_norm": 1.2040103673934937, "learning_rate": 4.8069403714565e-05, "loss": 0.5958, "step": 10823 }, { "epoch": 1.6944270507201002, "grad_norm": 5.391923904418945, "learning_rate": 4.804496578690126e-05, "loss": 0.7894, "step": 10824 }, { "epoch": 1.6945835942391985, "grad_norm": 4.4691972732543945, "learning_rate": 4.802052785923753e-05, "loss": 1.516, "step": 10825 }, { "epoch": 1.6947401377582967, "grad_norm": 2.4495632648468018, "learning_rate": 4.79960899315738e-05, "loss": 1.0442, "step": 10826 }, { "epoch": 1.6948966812773951, "grad_norm": 6.339310169219971, "learning_rate": 4.797165200391006e-05, "loss": 0.9353, "step": 10827 }, { "epoch": 1.6950532247964936, "grad_norm": 3.8489506244659424, "learning_rate": 4.794721407624633e-05, "loss": 1.0026, "step": 10828 }, { "epoch": 1.6952097683155918, "grad_norm": 2.871527671813965, "learning_rate": 4.79227761485826e-05, "loss": 0.6062, "step": 10829 }, { "epoch": 1.69536631183469, "grad_norm": 1.528554081916809, "learning_rate": 4.7898338220918857e-05, "loss": 0.4849, "step": 10830 }, { "epoch": 1.6955228553537882, "grad_norm": 4.001394748687744, "learning_rate": 4.787390029325513e-05, "loss": 1.0743, "step": 10831 }, { "epoch": 1.6956793988728867, "grad_norm": 3.781374454498291, "learning_rate": 4.784946236559139e-05, "loss": 1.147, "step": 10832 }, { "epoch": 1.695835942391985, "grad_norm": 4.306796550750732, "learning_rate": 4.782502443792766e-05, "loss": 0.9814, "step": 10833 }, { "epoch": 1.6959924859110833, "grad_norm": 2.652988910675049, "learning_rate": 4.780058651026392e-05, "loss": 0.9608, "step": 10834 }, { "epoch": 1.6961490294301815, "grad_norm": 2.375042676925659, "learning_rate": 4.7776148582600194e-05, "loss": 0.694, "step": 10835 }, { "epoch": 1.6963055729492797, "grad_norm": 1.6284605264663696, "learning_rate": 4.775171065493645e-05, "loss": 0.4705, "step": 10836 }, { "epoch": 1.6964621164683782, "grad_norm": 2.3897979259490967, "learning_rate": 4.7727272727272724e-05, "loss": 0.7331, "step": 10837 }, { "epoch": 1.6966186599874766, "grad_norm": 1.4052478075027466, "learning_rate": 4.770283479960899e-05, "loss": 0.6741, "step": 10838 }, { "epoch": 1.6967752035065748, "grad_norm": 0.5717368125915527, "learning_rate": 4.767839687194525e-05, "loss": 0.2161, "step": 10839 }, { "epoch": 1.696931747025673, "grad_norm": 0.46546775102615356, "learning_rate": 4.765395894428152e-05, "loss": 0.2037, "step": 10840 }, { "epoch": 1.6970882905447713, "grad_norm": 0.6907462477684021, "learning_rate": 4.762952101661779e-05, "loss": 0.2561, "step": 10841 }, { "epoch": 1.6972448340638697, "grad_norm": 0.85224848985672, "learning_rate": 4.760508308895405e-05, "loss": 0.2443, "step": 10842 }, { "epoch": 1.6974013775829682, "grad_norm": 0.8416429162025452, "learning_rate": 4.758064516129032e-05, "loss": 0.1929, "step": 10843 }, { "epoch": 1.6975579211020664, "grad_norm": 0.6139851808547974, "learning_rate": 4.7556207233626584e-05, "loss": 0.2167, "step": 10844 }, { "epoch": 1.6977144646211646, "grad_norm": 0.6043062210083008, "learning_rate": 4.753176930596285e-05, "loss": 0.2006, "step": 10845 }, { "epoch": 1.697871008140263, "grad_norm": 0.43999460339546204, "learning_rate": 4.7507331378299114e-05, "loss": 0.1848, "step": 10846 }, { "epoch": 1.6980275516593613, "grad_norm": 2.4954354763031006, "learning_rate": 4.7482893450635385e-05, "loss": 0.2923, "step": 10847 }, { "epoch": 1.6981840951784597, "grad_norm": 1.6755168437957764, "learning_rate": 4.745845552297164e-05, "loss": 0.365, "step": 10848 }, { "epoch": 1.698340638697558, "grad_norm": 0.7627212405204773, "learning_rate": 4.7434017595307915e-05, "loss": 0.1334, "step": 10849 }, { "epoch": 1.6984971822166561, "grad_norm": 0.7243126034736633, "learning_rate": 4.740957966764418e-05, "loss": 0.206, "step": 10850 }, { "epoch": 1.6986537257357546, "grad_norm": 2.446509599685669, "learning_rate": 4.7385141739980444e-05, "loss": 0.3525, "step": 10851 }, { "epoch": 1.698810269254853, "grad_norm": 2.082062005996704, "learning_rate": 4.736070381231671e-05, "loss": 0.3959, "step": 10852 }, { "epoch": 1.6989668127739512, "grad_norm": 1.239433765411377, "learning_rate": 4.733626588465298e-05, "loss": 0.36, "step": 10853 }, { "epoch": 1.6991233562930494, "grad_norm": 1.6544456481933594, "learning_rate": 4.731182795698924e-05, "loss": 0.3264, "step": 10854 }, { "epoch": 1.6992798998121477, "grad_norm": 16.526798248291016, "learning_rate": 4.728739002932551e-05, "loss": 0.4697, "step": 10855 }, { "epoch": 1.699436443331246, "grad_norm": 1.5879408121109009, "learning_rate": 4.7262952101661775e-05, "loss": 0.4604, "step": 10856 }, { "epoch": 1.6995929868503445, "grad_norm": 1.414926528930664, "learning_rate": 4.723851417399804e-05, "loss": 0.2591, "step": 10857 }, { "epoch": 1.6997495303694428, "grad_norm": 1.0178431272506714, "learning_rate": 4.7214076246334305e-05, "loss": 0.2829, "step": 10858 }, { "epoch": 1.699906073888541, "grad_norm": 1.5842969417572021, "learning_rate": 4.7189638318670576e-05, "loss": 0.2904, "step": 10859 }, { "epoch": 1.7000626174076392, "grad_norm": 1.6290944814682007, "learning_rate": 4.7165200391006834e-05, "loss": 0.3683, "step": 10860 }, { "epoch": 1.7002191609267376, "grad_norm": 1.115478754043579, "learning_rate": 4.7140762463343106e-05, "loss": 0.4134, "step": 10861 }, { "epoch": 1.700375704445836, "grad_norm": 1.0377562046051025, "learning_rate": 4.711632453567937e-05, "loss": 0.3749, "step": 10862 }, { "epoch": 1.7005322479649343, "grad_norm": 3.567615032196045, "learning_rate": 4.7091886608015636e-05, "loss": 0.6543, "step": 10863 }, { "epoch": 1.7006887914840325, "grad_norm": 1.049473524093628, "learning_rate": 4.70674486803519e-05, "loss": 0.4259, "step": 10864 }, { "epoch": 1.7008453350031307, "grad_norm": 0.785376250743866, "learning_rate": 4.704301075268817e-05, "loss": 0.2459, "step": 10865 }, { "epoch": 1.7010018785222292, "grad_norm": 3.4430458545684814, "learning_rate": 4.701857282502443e-05, "loss": 0.7491, "step": 10866 }, { "epoch": 1.7011584220413276, "grad_norm": 0.9417991638183594, "learning_rate": 4.69941348973607e-05, "loss": 0.4102, "step": 10867 }, { "epoch": 1.7013149655604258, "grad_norm": 1.8139441013336182, "learning_rate": 4.6969696969696966e-05, "loss": 0.6608, "step": 10868 }, { "epoch": 1.701471509079524, "grad_norm": 6.10275411605835, "learning_rate": 4.694525904203323e-05, "loss": 0.6622, "step": 10869 }, { "epoch": 1.7016280525986223, "grad_norm": 1.916246771812439, "learning_rate": 4.6920821114369496e-05, "loss": 0.5263, "step": 10870 }, { "epoch": 1.7017845961177207, "grad_norm": 2.4708452224731445, "learning_rate": 4.689638318670577e-05, "loss": 0.7636, "step": 10871 }, { "epoch": 1.7019411396368191, "grad_norm": 1.3258744478225708, "learning_rate": 4.6871945259042026e-05, "loss": 0.3469, "step": 10872 }, { "epoch": 1.7020976831559174, "grad_norm": 1.8512464761734009, "learning_rate": 4.68475073313783e-05, "loss": 0.7012, "step": 10873 }, { "epoch": 1.7022542266750156, "grad_norm": 3.0667710304260254, "learning_rate": 4.682306940371456e-05, "loss": 1.1852, "step": 10874 }, { "epoch": 1.7024107701941138, "grad_norm": 7.509494781494141, "learning_rate": 4.679863147605083e-05, "loss": 0.9591, "step": 10875 }, { "epoch": 1.7025673137132122, "grad_norm": 2.958660364151001, "learning_rate": 4.677419354838709e-05, "loss": 1.3427, "step": 10876 }, { "epoch": 1.7027238572323107, "grad_norm": 2.816685914993286, "learning_rate": 4.674975562072336e-05, "loss": 0.9676, "step": 10877 }, { "epoch": 1.702880400751409, "grad_norm": 3.361295700073242, "learning_rate": 4.672531769305962e-05, "loss": 0.7556, "step": 10878 }, { "epoch": 1.7030369442705071, "grad_norm": 1.7162786722183228, "learning_rate": 4.670087976539589e-05, "loss": 1.2321, "step": 10879 }, { "epoch": 1.7031934877896056, "grad_norm": 9.585728645324707, "learning_rate": 4.667644183773216e-05, "loss": 0.9282, "step": 10880 }, { "epoch": 1.7033500313087038, "grad_norm": 1.799238681793213, "learning_rate": 4.665200391006842e-05, "loss": 0.8398, "step": 10881 }, { "epoch": 1.7035065748278022, "grad_norm": 5.488026142120361, "learning_rate": 4.662756598240469e-05, "loss": 1.5273, "step": 10882 }, { "epoch": 1.7036631183469004, "grad_norm": 3.029956817626953, "learning_rate": 4.660312805474096e-05, "loss": 0.9112, "step": 10883 }, { "epoch": 1.7038196618659986, "grad_norm": 3.336357831954956, "learning_rate": 4.657869012707722e-05, "loss": 1.0213, "step": 10884 }, { "epoch": 1.703976205385097, "grad_norm": 1.5135464668273926, "learning_rate": 4.655425219941349e-05, "loss": 0.6799, "step": 10885 }, { "epoch": 1.7041327489041955, "grad_norm": 2.607713460922241, "learning_rate": 4.652981427174975e-05, "loss": 0.7304, "step": 10886 }, { "epoch": 1.7042892924232937, "grad_norm": 4.478157043457031, "learning_rate": 4.650537634408602e-05, "loss": 1.1594, "step": 10887 }, { "epoch": 1.704445835942392, "grad_norm": 1.5213872194290161, "learning_rate": 4.648093841642228e-05, "loss": 0.6976, "step": 10888 }, { "epoch": 1.7046023794614902, "grad_norm": 1.294400930404663, "learning_rate": 4.6456500488758554e-05, "loss": 0.3431, "step": 10889 }, { "epoch": 1.7047589229805886, "grad_norm": 0.9387235641479492, "learning_rate": 4.643206256109481e-05, "loss": 0.2044, "step": 10890 }, { "epoch": 1.704915466499687, "grad_norm": 0.8334453105926514, "learning_rate": 4.6407624633431084e-05, "loss": 0.2973, "step": 10891 }, { "epoch": 1.7050720100187853, "grad_norm": 0.7536923289299011, "learning_rate": 4.638318670576735e-05, "loss": 0.3179, "step": 10892 }, { "epoch": 1.7052285535378835, "grad_norm": 1.139009714126587, "learning_rate": 4.6358748778103614e-05, "loss": 0.1864, "step": 10893 }, { "epoch": 1.7053850970569817, "grad_norm": 0.825655996799469, "learning_rate": 4.633431085043988e-05, "loss": 0.1953, "step": 10894 }, { "epoch": 1.7055416405760802, "grad_norm": 1.221582055091858, "learning_rate": 4.630987292277615e-05, "loss": 0.2487, "step": 10895 }, { "epoch": 1.7056981840951786, "grad_norm": 0.9275577068328857, "learning_rate": 4.628543499511241e-05, "loss": 0.2307, "step": 10896 }, { "epoch": 1.7058547276142768, "grad_norm": 0.8654758930206299, "learning_rate": 4.626099706744868e-05, "loss": 0.3036, "step": 10897 }, { "epoch": 1.706011271133375, "grad_norm": 5.584901809692383, "learning_rate": 4.6236559139784944e-05, "loss": 0.2112, "step": 10898 }, { "epoch": 1.7061678146524732, "grad_norm": 1.3160961866378784, "learning_rate": 4.621212121212121e-05, "loss": 0.257, "step": 10899 }, { "epoch": 1.7063243581715717, "grad_norm": 0.8160209655761719, "learning_rate": 4.6187683284457474e-05, "loss": 0.1725, "step": 10900 }, { "epoch": 1.7064809016906701, "grad_norm": 1.1387146711349487, "learning_rate": 4.6163245356793745e-05, "loss": 0.3657, "step": 10901 }, { "epoch": 1.7066374452097683, "grad_norm": 2.705634117126465, "learning_rate": 4.6138807429130004e-05, "loss": 0.5964, "step": 10902 }, { "epoch": 1.7067939887288666, "grad_norm": 1.0726594924926758, "learning_rate": 4.6114369501466275e-05, "loss": 0.3722, "step": 10903 }, { "epoch": 1.7069505322479648, "grad_norm": 2.15543794631958, "learning_rate": 4.608993157380254e-05, "loss": 0.6121, "step": 10904 }, { "epoch": 1.7071070757670632, "grad_norm": 2.3426249027252197, "learning_rate": 4.6065493646138805e-05, "loss": 0.3009, "step": 10905 }, { "epoch": 1.7072636192861617, "grad_norm": 1.6663450002670288, "learning_rate": 4.604105571847507e-05, "loss": 0.366, "step": 10906 }, { "epoch": 1.7074201628052599, "grad_norm": 4.950168609619141, "learning_rate": 4.601661779081134e-05, "loss": 0.392, "step": 10907 }, { "epoch": 1.707576706324358, "grad_norm": 2.3819642066955566, "learning_rate": 4.59921798631476e-05, "loss": 0.3112, "step": 10908 }, { "epoch": 1.7077332498434565, "grad_norm": 1.7315618991851807, "learning_rate": 4.596774193548387e-05, "loss": 0.4806, "step": 10909 }, { "epoch": 1.7078897933625548, "grad_norm": 1.356067419052124, "learning_rate": 4.5943304007820135e-05, "loss": 0.4802, "step": 10910 }, { "epoch": 1.7080463368816532, "grad_norm": 1.5837355852127075, "learning_rate": 4.5918866080156393e-05, "loss": 0.282, "step": 10911 }, { "epoch": 1.7082028804007514, "grad_norm": 1.9646797180175781, "learning_rate": 4.5894428152492665e-05, "loss": 0.3698, "step": 10912 }, { "epoch": 1.7083594239198496, "grad_norm": 3.7682149410247803, "learning_rate": 4.586999022482894e-05, "loss": 0.7863, "step": 10913 }, { "epoch": 1.708515967438948, "grad_norm": 2.856004476547241, "learning_rate": 4.5845552297165195e-05, "loss": 0.5196, "step": 10914 }, { "epoch": 1.7086725109580463, "grad_norm": 4.119771957397461, "learning_rate": 4.5821114369501466e-05, "loss": 0.6147, "step": 10915 }, { "epoch": 1.7088290544771447, "grad_norm": 3.8047308921813965, "learning_rate": 4.579667644183773e-05, "loss": 0.5624, "step": 10916 }, { "epoch": 1.708985597996243, "grad_norm": 1.4393452405929565, "learning_rate": 4.577223851417399e-05, "loss": 1.135, "step": 10917 }, { "epoch": 1.7091421415153412, "grad_norm": 2.7042551040649414, "learning_rate": 4.574780058651026e-05, "loss": 0.7294, "step": 10918 }, { "epoch": 1.7092986850344396, "grad_norm": 1.8399097919464111, "learning_rate": 4.572336265884653e-05, "loss": 0.2943, "step": 10919 }, { "epoch": 1.709455228553538, "grad_norm": 3.5511062145233154, "learning_rate": 4.569892473118279e-05, "loss": 0.5965, "step": 10920 }, { "epoch": 1.7096117720726363, "grad_norm": 1.2243882417678833, "learning_rate": 4.567448680351906e-05, "loss": 0.2229, "step": 10921 }, { "epoch": 1.7097683155917345, "grad_norm": 2.0039992332458496, "learning_rate": 4.565004887585533e-05, "loss": 0.9775, "step": 10922 }, { "epoch": 1.7099248591108327, "grad_norm": 2.668609857559204, "learning_rate": 4.5625610948191585e-05, "loss": 0.749, "step": 10923 }, { "epoch": 1.7100814026299311, "grad_norm": 3.119973659515381, "learning_rate": 4.5601173020527856e-05, "loss": 0.6062, "step": 10924 }, { "epoch": 1.7102379461490296, "grad_norm": 4.085299491882324, "learning_rate": 4.557673509286413e-05, "loss": 1.0235, "step": 10925 }, { "epoch": 1.7103944896681278, "grad_norm": 1.9524751901626587, "learning_rate": 4.5552297165200386e-05, "loss": 0.746, "step": 10926 }, { "epoch": 1.710551033187226, "grad_norm": 2.2046310901641846, "learning_rate": 4.552785923753666e-05, "loss": 0.8188, "step": 10927 }, { "epoch": 1.7107075767063242, "grad_norm": 3.634552001953125, "learning_rate": 4.550342130987292e-05, "loss": 1.0935, "step": 10928 }, { "epoch": 1.7108641202254227, "grad_norm": 4.985217571258545, "learning_rate": 4.547898338220918e-05, "loss": 1.4068, "step": 10929 }, { "epoch": 1.711020663744521, "grad_norm": 4.0953288078308105, "learning_rate": 4.545454545454545e-05, "loss": 0.9341, "step": 10930 }, { "epoch": 1.7111772072636193, "grad_norm": 2.7812812328338623, "learning_rate": 4.543010752688172e-05, "loss": 0.7642, "step": 10931 }, { "epoch": 1.7113337507827175, "grad_norm": 2.699666738510132, "learning_rate": 4.540566959921798e-05, "loss": 1.6983, "step": 10932 }, { "epoch": 1.7114902943018158, "grad_norm": 2.4366767406463623, "learning_rate": 4.5381231671554246e-05, "loss": 1.0511, "step": 10933 }, { "epoch": 1.7116468378209142, "grad_norm": 1.5458954572677612, "learning_rate": 4.535679374389052e-05, "loss": 0.9385, "step": 10934 }, { "epoch": 1.7118033813400126, "grad_norm": 0.7985953688621521, "learning_rate": 4.5332355816226776e-05, "loss": 0.1675, "step": 10935 }, { "epoch": 1.7119599248591109, "grad_norm": 2.136017084121704, "learning_rate": 4.530791788856305e-05, "loss": 0.7345, "step": 10936 }, { "epoch": 1.712116468378209, "grad_norm": 1.3035306930541992, "learning_rate": 4.5283479960899305e-05, "loss": 0.3092, "step": 10937 }, { "epoch": 1.7122730118973073, "grad_norm": 2.315929412841797, "learning_rate": 4.525904203323558e-05, "loss": 1.2992, "step": 10938 }, { "epoch": 1.7124295554164057, "grad_norm": 0.8758809566497803, "learning_rate": 4.523460410557184e-05, "loss": 0.2215, "step": 10939 }, { "epoch": 1.7125860989355042, "grad_norm": 0.9055795669555664, "learning_rate": 4.5210166177908107e-05, "loss": 0.2701, "step": 10940 }, { "epoch": 1.7127426424546024, "grad_norm": 0.5900222659111023, "learning_rate": 4.518572825024437e-05, "loss": 0.2285, "step": 10941 }, { "epoch": 1.7128991859737006, "grad_norm": 0.6624120473861694, "learning_rate": 4.516129032258064e-05, "loss": 0.2342, "step": 10942 }, { "epoch": 1.713055729492799, "grad_norm": 0.567440927028656, "learning_rate": 4.51368523949169e-05, "loss": 0.258, "step": 10943 }, { "epoch": 1.7132122730118973, "grad_norm": 0.45192110538482666, "learning_rate": 4.511241446725317e-05, "loss": 0.1772, "step": 10944 }, { "epoch": 1.7133688165309957, "grad_norm": 1.0923038721084595, "learning_rate": 4.508797653958944e-05, "loss": 0.2843, "step": 10945 }, { "epoch": 1.713525360050094, "grad_norm": 0.739136278629303, "learning_rate": 4.50635386119257e-05, "loss": 0.2207, "step": 10946 }, { "epoch": 1.7136819035691921, "grad_norm": 0.9049036502838135, "learning_rate": 4.503910068426197e-05, "loss": 0.1736, "step": 10947 }, { "epoch": 1.7138384470882906, "grad_norm": 1.2949659824371338, "learning_rate": 4.501466275659824e-05, "loss": 0.2189, "step": 10948 }, { "epoch": 1.7139949906073888, "grad_norm": 0.8264017701148987, "learning_rate": 4.4990224828934497e-05, "loss": 0.2894, "step": 10949 }, { "epoch": 1.7141515341264872, "grad_norm": 0.9105465412139893, "learning_rate": 4.496578690127077e-05, "loss": 0.2483, "step": 10950 }, { "epoch": 1.7143080776455855, "grad_norm": 0.6218265891075134, "learning_rate": 4.494134897360703e-05, "loss": 0.2536, "step": 10951 }, { "epoch": 1.7144646211646837, "grad_norm": 1.0677177906036377, "learning_rate": 4.49169110459433e-05, "loss": 0.464, "step": 10952 }, { "epoch": 1.7146211646837821, "grad_norm": 1.5835673809051514, "learning_rate": 4.489247311827956e-05, "loss": 0.4199, "step": 10953 }, { "epoch": 1.7147777082028806, "grad_norm": 3.1192257404327393, "learning_rate": 4.4868035190615834e-05, "loss": 0.4786, "step": 10954 }, { "epoch": 1.7149342517219788, "grad_norm": 2.4409453868865967, "learning_rate": 4.484359726295209e-05, "loss": 0.3151, "step": 10955 }, { "epoch": 1.715090795241077, "grad_norm": 1.3552123308181763, "learning_rate": 4.4819159335288364e-05, "loss": 0.4947, "step": 10956 }, { "epoch": 1.7152473387601752, "grad_norm": 1.606695294380188, "learning_rate": 4.479472140762463e-05, "loss": 0.6383, "step": 10957 }, { "epoch": 1.7154038822792737, "grad_norm": 3.5459699630737305, "learning_rate": 4.477028347996089e-05, "loss": 0.5953, "step": 10958 }, { "epoch": 1.715560425798372, "grad_norm": 1.4051164388656616, "learning_rate": 4.474584555229716e-05, "loss": 0.5194, "step": 10959 }, { "epoch": 1.7157169693174703, "grad_norm": 1.265329122543335, "learning_rate": 4.472140762463343e-05, "loss": 0.3916, "step": 10960 }, { "epoch": 1.7158735128365685, "grad_norm": 2.5221071243286133, "learning_rate": 4.469696969696969e-05, "loss": 0.5158, "step": 10961 }, { "epoch": 1.7160300563556667, "grad_norm": 2.5884549617767334, "learning_rate": 4.467253176930596e-05, "loss": 0.3095, "step": 10962 }, { "epoch": 1.7161865998747652, "grad_norm": 2.0343172550201416, "learning_rate": 4.4648093841642224e-05, "loss": 0.6869, "step": 10963 }, { "epoch": 1.7163431433938636, "grad_norm": 2.2614247798919678, "learning_rate": 4.462365591397849e-05, "loss": 0.7452, "step": 10964 }, { "epoch": 1.7164996869129618, "grad_norm": 1.4859260320663452, "learning_rate": 4.4599217986314754e-05, "loss": 0.4323, "step": 10965 }, { "epoch": 1.71665623043206, "grad_norm": 2.1878414154052734, "learning_rate": 4.4574780058651025e-05, "loss": 0.6757, "step": 10966 }, { "epoch": 1.7168127739511583, "grad_norm": 2.8746695518493652, "learning_rate": 4.455034213098728e-05, "loss": 0.6629, "step": 10967 }, { "epoch": 1.7169693174702567, "grad_norm": 2.4773285388946533, "learning_rate": 4.4525904203323555e-05, "loss": 0.526, "step": 10968 }, { "epoch": 1.7171258609893552, "grad_norm": 4.917586803436279, "learning_rate": 4.450146627565982e-05, "loss": 0.735, "step": 10969 }, { "epoch": 1.7172824045084534, "grad_norm": 3.6827588081359863, "learning_rate": 4.4477028347996084e-05, "loss": 0.9001, "step": 10970 }, { "epoch": 1.7174389480275516, "grad_norm": 3.3840487003326416, "learning_rate": 4.445259042033235e-05, "loss": 0.8196, "step": 10971 }, { "epoch": 1.7175954915466498, "grad_norm": 3.665562391281128, "learning_rate": 4.442815249266862e-05, "loss": 0.8158, "step": 10972 }, { "epoch": 1.7177520350657483, "grad_norm": 1.8126392364501953, "learning_rate": 4.440371456500488e-05, "loss": 0.8043, "step": 10973 }, { "epoch": 1.7179085785848467, "grad_norm": 2.7795164585113525, "learning_rate": 4.437927663734115e-05, "loss": 0.9601, "step": 10974 }, { "epoch": 1.718065122103945, "grad_norm": 3.174985647201538, "learning_rate": 4.4354838709677415e-05, "loss": 0.6358, "step": 10975 }, { "epoch": 1.7182216656230431, "grad_norm": 2.6935548782348633, "learning_rate": 4.433040078201368e-05, "loss": 0.6868, "step": 10976 }, { "epoch": 1.7183782091421416, "grad_norm": 3.725315809249878, "learning_rate": 4.4305962854349945e-05, "loss": 1.1501, "step": 10977 }, { "epoch": 1.7185347526612398, "grad_norm": 3.672032356262207, "learning_rate": 4.4281524926686216e-05, "loss": 1.0729, "step": 10978 }, { "epoch": 1.7186912961803382, "grad_norm": 3.4297823905944824, "learning_rate": 4.4257086999022474e-05, "loss": 0.9444, "step": 10979 }, { "epoch": 1.7188478396994364, "grad_norm": 2.7439005374908447, "learning_rate": 4.4232649071358746e-05, "loss": 0.7806, "step": 10980 }, { "epoch": 1.7190043832185347, "grad_norm": 1.5056949853897095, "learning_rate": 4.420821114369501e-05, "loss": 0.8998, "step": 10981 }, { "epoch": 1.719160926737633, "grad_norm": 4.065816879272461, "learning_rate": 4.4183773216031276e-05, "loss": 0.5133, "step": 10982 }, { "epoch": 1.7193174702567313, "grad_norm": 2.949634552001953, "learning_rate": 4.415933528836754e-05, "loss": 1.242, "step": 10983 }, { "epoch": 1.7194740137758298, "grad_norm": 3.2532920837402344, "learning_rate": 4.413489736070381e-05, "loss": 0.8277, "step": 10984 }, { "epoch": 1.719630557294928, "grad_norm": 2.431811809539795, "learning_rate": 4.411045943304007e-05, "loss": 0.4927, "step": 10985 }, { "epoch": 1.7197871008140262, "grad_norm": 1.749243140220642, "learning_rate": 4.408602150537634e-05, "loss": 0.6917, "step": 10986 }, { "epoch": 1.7199436443331246, "grad_norm": 3.768820285797119, "learning_rate": 4.4061583577712606e-05, "loss": 0.5891, "step": 10987 }, { "epoch": 1.720100187852223, "grad_norm": 1.9826332330703735, "learning_rate": 4.403714565004887e-05, "loss": 0.6472, "step": 10988 }, { "epoch": 1.7202567313713213, "grad_norm": 1.206260323524475, "learning_rate": 4.4012707722385136e-05, "loss": 0.4265, "step": 10989 }, { "epoch": 1.7204132748904195, "grad_norm": 0.634954571723938, "learning_rate": 4.398826979472141e-05, "loss": 0.1923, "step": 10990 }, { "epoch": 1.7205698184095177, "grad_norm": 0.9732500910758972, "learning_rate": 4.3963831867057666e-05, "loss": 0.1833, "step": 10991 }, { "epoch": 1.7207263619286162, "grad_norm": 0.934144139289856, "learning_rate": 4.393939393939394e-05, "loss": 0.2087, "step": 10992 }, { "epoch": 1.7208829054477146, "grad_norm": 1.1597187519073486, "learning_rate": 4.39149560117302e-05, "loss": 0.3764, "step": 10993 }, { "epoch": 1.7210394489668128, "grad_norm": 0.7562986612319946, "learning_rate": 4.389051808406647e-05, "loss": 0.1977, "step": 10994 }, { "epoch": 1.721195992485911, "grad_norm": 0.9375834465026855, "learning_rate": 4.386608015640273e-05, "loss": 0.3025, "step": 10995 }, { "epoch": 1.7213525360050093, "grad_norm": 0.608613908290863, "learning_rate": 4.3841642228739e-05, "loss": 0.2857, "step": 10996 }, { "epoch": 1.7215090795241077, "grad_norm": 0.8662186861038208, "learning_rate": 4.381720430107526e-05, "loss": 0.2124, "step": 10997 }, { "epoch": 1.7216656230432061, "grad_norm": 0.936297595500946, "learning_rate": 4.379276637341153e-05, "loss": 0.25, "step": 10998 }, { "epoch": 1.7218221665623044, "grad_norm": 3.5861268043518066, "learning_rate": 4.37683284457478e-05, "loss": 0.3277, "step": 10999 }, { "epoch": 1.7219787100814026, "grad_norm": 1.0144635438919067, "learning_rate": 4.374389051808406e-05, "loss": 0.3815, "step": 11000 }, { "epoch": 1.7219787100814026, "eval_loss": 0.49481523036956787, "eval_runtime": 205.6846, "eval_samples_per_second": 60.204, "eval_steps_per_second": 3.763, "eval_wer": 0.31803982404318165, "step": 11000 }, { "epoch": 1.7221352536005008, "grad_norm": 1.0101224184036255, "learning_rate": 4.371945259042033e-05, "loss": 0.4074, "step": 11001 }, { "epoch": 1.7222917971195992, "grad_norm": 1.773930549621582, "learning_rate": 4.36950146627566e-05, "loss": 0.3936, "step": 11002 }, { "epoch": 1.7224483406386977, "grad_norm": 1.4694632291793823, "learning_rate": 4.367057673509286e-05, "loss": 0.4899, "step": 11003 }, { "epoch": 1.722604884157796, "grad_norm": 2.8984897136688232, "learning_rate": 4.364613880742913e-05, "loss": 0.4263, "step": 11004 }, { "epoch": 1.722761427676894, "grad_norm": 1.7322542667388916, "learning_rate": 4.362170087976539e-05, "loss": 0.4388, "step": 11005 }, { "epoch": 1.7229179711959923, "grad_norm": 1.0889065265655518, "learning_rate": 4.359726295210166e-05, "loss": 0.441, "step": 11006 }, { "epoch": 1.7230745147150908, "grad_norm": 0.9822616577148438, "learning_rate": 4.357282502443792e-05, "loss": 0.3078, "step": 11007 }, { "epoch": 1.7232310582341892, "grad_norm": 0.8187744617462158, "learning_rate": 4.3548387096774194e-05, "loss": 0.3078, "step": 11008 }, { "epoch": 1.7233876017532874, "grad_norm": 2.007962465286255, "learning_rate": 4.352394916911045e-05, "loss": 0.5249, "step": 11009 }, { "epoch": 1.7235441452723856, "grad_norm": 1.7840535640716553, "learning_rate": 4.3499511241446724e-05, "loss": 0.7433, "step": 11010 }, { "epoch": 1.723700688791484, "grad_norm": 1.7337597608566284, "learning_rate": 4.347507331378299e-05, "loss": 0.5195, "step": 11011 }, { "epoch": 1.7238572323105823, "grad_norm": 1.91448175907135, "learning_rate": 4.3450635386119254e-05, "loss": 0.3144, "step": 11012 }, { "epoch": 1.7240137758296807, "grad_norm": 2.5847277641296387, "learning_rate": 4.342619745845552e-05, "loss": 0.5275, "step": 11013 }, { "epoch": 1.724170319348779, "grad_norm": 8.203601837158203, "learning_rate": 4.340175953079179e-05, "loss": 0.6911, "step": 11014 }, { "epoch": 1.7243268628678772, "grad_norm": 2.416170835494995, "learning_rate": 4.337732160312805e-05, "loss": 0.6947, "step": 11015 }, { "epoch": 1.7244834063869756, "grad_norm": 2.0026931762695312, "learning_rate": 4.335288367546432e-05, "loss": 0.4616, "step": 11016 }, { "epoch": 1.724639949906074, "grad_norm": 2.050572633743286, "learning_rate": 4.3328445747800584e-05, "loss": 0.4845, "step": 11017 }, { "epoch": 1.7247964934251723, "grad_norm": 2.2032740116119385, "learning_rate": 4.330400782013685e-05, "loss": 0.469, "step": 11018 }, { "epoch": 1.7249530369442705, "grad_norm": 2.604659080505371, "learning_rate": 4.3279569892473114e-05, "loss": 0.8241, "step": 11019 }, { "epoch": 1.7251095804633687, "grad_norm": 1.5613411664962769, "learning_rate": 4.3255131964809385e-05, "loss": 0.5109, "step": 11020 }, { "epoch": 1.7252661239824671, "grad_norm": 1.821577548980713, "learning_rate": 4.3230694037145644e-05, "loss": 0.5093, "step": 11021 }, { "epoch": 1.7254226675015656, "grad_norm": 2.0643904209136963, "learning_rate": 4.3206256109481915e-05, "loss": 0.5367, "step": 11022 }, { "epoch": 1.7255792110206638, "grad_norm": 2.7602663040161133, "learning_rate": 4.318181818181818e-05, "loss": 1.0387, "step": 11023 }, { "epoch": 1.725735754539762, "grad_norm": 5.380650520324707, "learning_rate": 4.3157380254154445e-05, "loss": 1.1233, "step": 11024 }, { "epoch": 1.7258922980588602, "grad_norm": 3.1562740802764893, "learning_rate": 4.313294232649071e-05, "loss": 1.0676, "step": 11025 }, { "epoch": 1.7260488415779587, "grad_norm": 3.140995502471924, "learning_rate": 4.310850439882698e-05, "loss": 0.8957, "step": 11026 }, { "epoch": 1.7262053850970571, "grad_norm": 2.0646467208862305, "learning_rate": 4.308406647116324e-05, "loss": 0.5783, "step": 11027 }, { "epoch": 1.7263619286161553, "grad_norm": 3.1891887187957764, "learning_rate": 4.305962854349951e-05, "loss": 1.0697, "step": 11028 }, { "epoch": 1.7265184721352536, "grad_norm": 4.30599308013916, "learning_rate": 4.3035190615835775e-05, "loss": 1.599, "step": 11029 }, { "epoch": 1.7266750156543518, "grad_norm": 1.5007469654083252, "learning_rate": 4.301075268817204e-05, "loss": 0.8014, "step": 11030 }, { "epoch": 1.7268315591734502, "grad_norm": 3.0806431770324707, "learning_rate": 4.2986314760508305e-05, "loss": 1.201, "step": 11031 }, { "epoch": 1.7269881026925487, "grad_norm": 3.5166988372802734, "learning_rate": 4.296187683284458e-05, "loss": 0.8214, "step": 11032 }, { "epoch": 1.7271446462116469, "grad_norm": 2.085771083831787, "learning_rate": 4.2937438905180835e-05, "loss": 1.237, "step": 11033 }, { "epoch": 1.727301189730745, "grad_norm": 1.5638984441757202, "learning_rate": 4.2913000977517106e-05, "loss": 0.512, "step": 11034 }, { "epoch": 1.7274577332498433, "grad_norm": 1.8894531726837158, "learning_rate": 4.288856304985337e-05, "loss": 0.7843, "step": 11035 }, { "epoch": 1.7276142767689417, "grad_norm": 3.0840094089508057, "learning_rate": 4.2864125122189636e-05, "loss": 0.777, "step": 11036 }, { "epoch": 1.7277708202880402, "grad_norm": 3.5215682983398438, "learning_rate": 4.28396871945259e-05, "loss": 0.9629, "step": 11037 }, { "epoch": 1.7279273638071384, "grad_norm": 2.8310680389404297, "learning_rate": 4.281524926686217e-05, "loss": 0.923, "step": 11038 }, { "epoch": 1.7280839073262366, "grad_norm": 1.4804656505584717, "learning_rate": 4.279081133919843e-05, "loss": 0.2002, "step": 11039 }, { "epoch": 1.7282404508453348, "grad_norm": 0.7842070460319519, "learning_rate": 4.27663734115347e-05, "loss": 0.2244, "step": 11040 }, { "epoch": 1.7283969943644333, "grad_norm": 0.7354947328567505, "learning_rate": 4.274193548387097e-05, "loss": 0.277, "step": 11041 }, { "epoch": 1.7285535378835317, "grad_norm": 0.7232000827789307, "learning_rate": 4.2717497556207225e-05, "loss": 0.1575, "step": 11042 }, { "epoch": 1.72871008140263, "grad_norm": 2.255171537399292, "learning_rate": 4.2693059628543496e-05, "loss": 0.2009, "step": 11043 }, { "epoch": 1.7288666249217282, "grad_norm": 1.2567336559295654, "learning_rate": 4.266862170087977e-05, "loss": 0.3623, "step": 11044 }, { "epoch": 1.7290231684408266, "grad_norm": 0.5677285194396973, "learning_rate": 4.2644183773216026e-05, "loss": 0.1847, "step": 11045 }, { "epoch": 1.7291797119599248, "grad_norm": 4.918628215789795, "learning_rate": 4.26197458455523e-05, "loss": 0.2151, "step": 11046 }, { "epoch": 1.7293362554790233, "grad_norm": 0.9489184617996216, "learning_rate": 4.259530791788856e-05, "loss": 0.3459, "step": 11047 }, { "epoch": 1.7294927989981215, "grad_norm": 3.799226999282837, "learning_rate": 4.257086999022482e-05, "loss": 0.3196, "step": 11048 }, { "epoch": 1.7296493425172197, "grad_norm": 0.8148688673973083, "learning_rate": 4.254643206256109e-05, "loss": 0.2697, "step": 11049 }, { "epoch": 1.7298058860363181, "grad_norm": 0.8470799922943115, "learning_rate": 4.252199413489736e-05, "loss": 0.3325, "step": 11050 }, { "epoch": 1.7299624295554166, "grad_norm": 0.811160147190094, "learning_rate": 4.249755620723362e-05, "loss": 0.3461, "step": 11051 }, { "epoch": 1.7301189730745148, "grad_norm": 1.2158924341201782, "learning_rate": 4.247311827956989e-05, "loss": 0.5058, "step": 11052 }, { "epoch": 1.730275516593613, "grad_norm": 7.7890825271606445, "learning_rate": 4.244868035190616e-05, "loss": 0.3067, "step": 11053 }, { "epoch": 1.7304320601127112, "grad_norm": 2.8912837505340576, "learning_rate": 4.2424242424242416e-05, "loss": 0.6897, "step": 11054 }, { "epoch": 1.7305886036318097, "grad_norm": 1.3283599615097046, "learning_rate": 4.239980449657869e-05, "loss": 0.533, "step": 11055 }, { "epoch": 1.730745147150908, "grad_norm": 0.8418616056442261, "learning_rate": 4.237536656891496e-05, "loss": 0.2678, "step": 11056 }, { "epoch": 1.7309016906700063, "grad_norm": 1.1012041568756104, "learning_rate": 4.235092864125122e-05, "loss": 0.2165, "step": 11057 }, { "epoch": 1.7310582341891045, "grad_norm": 1.7594144344329834, "learning_rate": 4.232649071358749e-05, "loss": 0.4197, "step": 11058 }, { "epoch": 1.7312147777082028, "grad_norm": 2.9779410362243652, "learning_rate": 4.230205278592375e-05, "loss": 0.7293, "step": 11059 }, { "epoch": 1.7313713212273012, "grad_norm": 2.907015085220337, "learning_rate": 4.227761485826001e-05, "loss": 0.3366, "step": 11060 }, { "epoch": 1.7315278647463996, "grad_norm": 1.4411675930023193, "learning_rate": 4.225317693059628e-05, "loss": 0.3136, "step": 11061 }, { "epoch": 1.7316844082654979, "grad_norm": 1.9844144582748413, "learning_rate": 4.2228739002932555e-05, "loss": 0.6951, "step": 11062 }, { "epoch": 1.731840951784596, "grad_norm": 1.4029967784881592, "learning_rate": 4.220430107526881e-05, "loss": 0.4267, "step": 11063 }, { "epoch": 1.7319974953036943, "grad_norm": 2.417787551879883, "learning_rate": 4.2179863147605084e-05, "loss": 0.7369, "step": 11064 }, { "epoch": 1.7321540388227927, "grad_norm": 2.0894277095794678, "learning_rate": 4.215542521994134e-05, "loss": 0.7088, "step": 11065 }, { "epoch": 1.7323105823418912, "grad_norm": 3.367473840713501, "learning_rate": 4.213098729227761e-05, "loss": 0.5431, "step": 11066 }, { "epoch": 1.7324671258609894, "grad_norm": 1.4022886753082275, "learning_rate": 4.210654936461388e-05, "loss": 0.3418, "step": 11067 }, { "epoch": 1.7326236693800876, "grad_norm": 3.337224245071411, "learning_rate": 4.2082111436950137e-05, "loss": 0.7644, "step": 11068 }, { "epoch": 1.7327802128991858, "grad_norm": 3.8841137886047363, "learning_rate": 4.205767350928641e-05, "loss": 0.7558, "step": 11069 }, { "epoch": 1.7329367564182843, "grad_norm": 1.932528018951416, "learning_rate": 4.203323558162267e-05, "loss": 0.4611, "step": 11070 }, { "epoch": 1.7330932999373827, "grad_norm": 2.1921749114990234, "learning_rate": 4.200879765395894e-05, "loss": 0.3621, "step": 11071 }, { "epoch": 1.733249843456481, "grad_norm": 1.8706334829330444, "learning_rate": 4.19843597262952e-05, "loss": 0.8474, "step": 11072 }, { "epoch": 1.7334063869755791, "grad_norm": 2.5191290378570557, "learning_rate": 4.1959921798631474e-05, "loss": 0.6197, "step": 11073 }, { "epoch": 1.7335629304946774, "grad_norm": 2.9892024993896484, "learning_rate": 4.193548387096773e-05, "loss": 0.5236, "step": 11074 }, { "epoch": 1.7337194740137758, "grad_norm": 18.517911911010742, "learning_rate": 4.1911045943304004e-05, "loss": 1.1508, "step": 11075 }, { "epoch": 1.7338760175328742, "grad_norm": 2.092811346054077, "learning_rate": 4.188660801564027e-05, "loss": 0.9638, "step": 11076 }, { "epoch": 1.7340325610519725, "grad_norm": 2.0851621627807617, "learning_rate": 4.186217008797653e-05, "loss": 0.8178, "step": 11077 }, { "epoch": 1.7341891045710707, "grad_norm": 2.2361598014831543, "learning_rate": 4.18377321603128e-05, "loss": 0.7663, "step": 11078 }, { "epoch": 1.7343456480901691, "grad_norm": 2.4493370056152344, "learning_rate": 4.181329423264907e-05, "loss": 1.1463, "step": 11079 }, { "epoch": 1.7345021916092673, "grad_norm": 2.9259376525878906, "learning_rate": 4.178885630498533e-05, "loss": 1.4037, "step": 11080 }, { "epoch": 1.7346587351283658, "grad_norm": 2.476630926132202, "learning_rate": 4.17644183773216e-05, "loss": 1.0048, "step": 11081 }, { "epoch": 1.734815278647464, "grad_norm": 2.6673030853271484, "learning_rate": 4.1739980449657864e-05, "loss": 1.3103, "step": 11082 }, { "epoch": 1.7349718221665622, "grad_norm": 2.157806873321533, "learning_rate": 4.171554252199413e-05, "loss": 0.9964, "step": 11083 }, { "epoch": 1.7351283656856606, "grad_norm": 1.6744898557662964, "learning_rate": 4.1691104594330394e-05, "loss": 0.4646, "step": 11084 }, { "epoch": 1.735284909204759, "grad_norm": 2.1040098667144775, "learning_rate": 4.1666666666666665e-05, "loss": 0.6487, "step": 11085 }, { "epoch": 1.7354414527238573, "grad_norm": 1.4487780332565308, "learning_rate": 4.164222873900292e-05, "loss": 0.4765, "step": 11086 }, { "epoch": 1.7355979962429555, "grad_norm": 1.442169189453125, "learning_rate": 4.1617790811339195e-05, "loss": 0.5098, "step": 11087 }, { "epoch": 1.7357545397620537, "grad_norm": 1.7279036045074463, "learning_rate": 4.159335288367546e-05, "loss": 1.1097, "step": 11088 }, { "epoch": 1.7359110832811522, "grad_norm": 0.6806561350822449, "learning_rate": 4.1568914956011724e-05, "loss": 0.2139, "step": 11089 }, { "epoch": 1.7360676268002506, "grad_norm": 0.7717947959899902, "learning_rate": 4.154447702834799e-05, "loss": 0.201, "step": 11090 }, { "epoch": 1.7362241703193488, "grad_norm": 1.283336877822876, "learning_rate": 4.152003910068426e-05, "loss": 0.2105, "step": 11091 }, { "epoch": 1.736380713838447, "grad_norm": 0.7973412275314331, "learning_rate": 4.149560117302052e-05, "loss": 0.2479, "step": 11092 }, { "epoch": 1.7365372573575453, "grad_norm": 1.4070724248886108, "learning_rate": 4.147116324535679e-05, "loss": 0.2806, "step": 11093 }, { "epoch": 1.7366938008766437, "grad_norm": 0.8974490761756897, "learning_rate": 4.1446725317693055e-05, "loss": 0.1904, "step": 11094 }, { "epoch": 1.7368503443957422, "grad_norm": 1.0805352926254272, "learning_rate": 4.142228739002932e-05, "loss": 0.3231, "step": 11095 }, { "epoch": 1.7370068879148404, "grad_norm": 0.7345172762870789, "learning_rate": 4.1397849462365585e-05, "loss": 0.2135, "step": 11096 }, { "epoch": 1.7371634314339386, "grad_norm": 1.2740095853805542, "learning_rate": 4.1373411534701856e-05, "loss": 0.3516, "step": 11097 }, { "epoch": 1.7373199749530368, "grad_norm": 0.7698444724082947, "learning_rate": 4.1348973607038114e-05, "loss": 0.362, "step": 11098 }, { "epoch": 1.7374765184721352, "grad_norm": 1.8207945823669434, "learning_rate": 4.1324535679374386e-05, "loss": 0.4615, "step": 11099 }, { "epoch": 1.7376330619912337, "grad_norm": 2.351137638092041, "learning_rate": 4.130009775171065e-05, "loss": 0.3479, "step": 11100 }, { "epoch": 1.737789605510332, "grad_norm": 0.9402967095375061, "learning_rate": 4.1275659824046916e-05, "loss": 0.2351, "step": 11101 }, { "epoch": 1.7379461490294301, "grad_norm": 0.9055925011634827, "learning_rate": 4.125122189638318e-05, "loss": 0.2323, "step": 11102 }, { "epoch": 1.7381026925485283, "grad_norm": 1.6312155723571777, "learning_rate": 4.122678396871945e-05, "loss": 0.3386, "step": 11103 }, { "epoch": 1.7382592360676268, "grad_norm": 1.4077454805374146, "learning_rate": 4.120234604105571e-05, "loss": 0.4023, "step": 11104 }, { "epoch": 1.7384157795867252, "grad_norm": 1.1839890480041504, "learning_rate": 4.117790811339198e-05, "loss": 0.3056, "step": 11105 }, { "epoch": 1.7385723231058234, "grad_norm": 1.0270860195159912, "learning_rate": 4.1153470185728246e-05, "loss": 0.4321, "step": 11106 }, { "epoch": 1.7387288666249217, "grad_norm": 1.704751968383789, "learning_rate": 4.112903225806451e-05, "loss": 0.3378, "step": 11107 }, { "epoch": 1.7388854101440199, "grad_norm": 1.4541360139846802, "learning_rate": 4.1104594330400776e-05, "loss": 0.3107, "step": 11108 }, { "epoch": 1.7390419536631183, "grad_norm": 2.142528772354126, "learning_rate": 4.108015640273705e-05, "loss": 0.5536, "step": 11109 }, { "epoch": 1.7391984971822168, "grad_norm": 1.4981656074523926, "learning_rate": 4.1055718475073306e-05, "loss": 0.5362, "step": 11110 }, { "epoch": 1.739355040701315, "grad_norm": 2.808462381362915, "learning_rate": 4.103128054740958e-05, "loss": 0.5691, "step": 11111 }, { "epoch": 1.7395115842204132, "grad_norm": 1.8647723197937012, "learning_rate": 4.100684261974584e-05, "loss": 0.6461, "step": 11112 }, { "epoch": 1.7396681277395116, "grad_norm": 1.9372211694717407, "learning_rate": 4.098240469208211e-05, "loss": 0.3173, "step": 11113 }, { "epoch": 1.7398246712586098, "grad_norm": 1.7311444282531738, "learning_rate": 4.095796676441837e-05, "loss": 0.3582, "step": 11114 }, { "epoch": 1.7399812147777083, "grad_norm": 3.7500319480895996, "learning_rate": 4.093352883675464e-05, "loss": 0.7594, "step": 11115 }, { "epoch": 1.7401377582968065, "grad_norm": 1.9873162508010864, "learning_rate": 4.09090909090909e-05, "loss": 0.6123, "step": 11116 }, { "epoch": 1.7402943018159047, "grad_norm": 15.171030044555664, "learning_rate": 4.088465298142717e-05, "loss": 0.719, "step": 11117 }, { "epoch": 1.7404508453350032, "grad_norm": 6.604526996612549, "learning_rate": 4.086021505376344e-05, "loss": 0.5115, "step": 11118 }, { "epoch": 1.7406073888541016, "grad_norm": 1.5956652164459229, "learning_rate": 4.08357771260997e-05, "loss": 0.4616, "step": 11119 }, { "epoch": 1.7407639323731998, "grad_norm": 2.011253833770752, "learning_rate": 4.081133919843597e-05, "loss": 0.4544, "step": 11120 }, { "epoch": 1.740920475892298, "grad_norm": 1.8588051795959473, "learning_rate": 4.078690127077224e-05, "loss": 0.6543, "step": 11121 }, { "epoch": 1.7410770194113963, "grad_norm": 3.617205858230591, "learning_rate": 4.07624633431085e-05, "loss": 0.8809, "step": 11122 }, { "epoch": 1.7412335629304947, "grad_norm": 5.1503825187683105, "learning_rate": 4.073802541544477e-05, "loss": 0.3365, "step": 11123 }, { "epoch": 1.7413901064495931, "grad_norm": 4.6813154220581055, "learning_rate": 4.071358748778103e-05, "loss": 0.8173, "step": 11124 }, { "epoch": 1.7415466499686914, "grad_norm": 2.9903922080993652, "learning_rate": 4.06891495601173e-05, "loss": 0.7053, "step": 11125 }, { "epoch": 1.7417031934877896, "grad_norm": 3.1063003540039062, "learning_rate": 4.066471163245356e-05, "loss": 0.7405, "step": 11126 }, { "epoch": 1.7418597370068878, "grad_norm": 2.323392868041992, "learning_rate": 4.0640273704789834e-05, "loss": 0.837, "step": 11127 }, { "epoch": 1.7420162805259862, "grad_norm": 4.709715843200684, "learning_rate": 4.061583577712609e-05, "loss": 1.2962, "step": 11128 }, { "epoch": 1.7421728240450847, "grad_norm": 3.221708059310913, "learning_rate": 4.0591397849462364e-05, "loss": 1.254, "step": 11129 }, { "epoch": 1.7423293675641829, "grad_norm": 2.867579936981201, "learning_rate": 4.056695992179863e-05, "loss": 1.2712, "step": 11130 }, { "epoch": 1.742485911083281, "grad_norm": 2.7349681854248047, "learning_rate": 4.0542521994134894e-05, "loss": 1.0635, "step": 11131 }, { "epoch": 1.7426424546023793, "grad_norm": 1.872054100036621, "learning_rate": 4.051808406647116e-05, "loss": 0.8331, "step": 11132 }, { "epoch": 1.7427989981214778, "grad_norm": 4.403456211090088, "learning_rate": 4.049364613880743e-05, "loss": 1.7437, "step": 11133 }, { "epoch": 1.7429555416405762, "grad_norm": 2.841325283050537, "learning_rate": 4.046920821114369e-05, "loss": 0.5395, "step": 11134 }, { "epoch": 1.7431120851596744, "grad_norm": NaN, "learning_rate": 4.046920821114369e-05, "loss": 0.0, "step": 11135 }, { "epoch": 1.7432686286787726, "grad_norm": 1.3489454984664917, "learning_rate": 4.044477028347996e-05, "loss": 0.3682, "step": 11136 }, { "epoch": 1.7434251721978709, "grad_norm": 2.201267719268799, "learning_rate": 4.0420332355816224e-05, "loss": 0.7724, "step": 11137 }, { "epoch": 1.7435817157169693, "grad_norm": 2.0831868648529053, "learning_rate": 4.039589442815249e-05, "loss": 1.2445, "step": 11138 }, { "epoch": 1.7437382592360677, "grad_norm": 1.2250694036483765, "learning_rate": 4.0371456500488754e-05, "loss": 0.2516, "step": 11139 }, { "epoch": 1.743894802755166, "grad_norm": 1.6923754215240479, "learning_rate": 4.0347018572825025e-05, "loss": 0.3832, "step": 11140 }, { "epoch": 1.7440513462742642, "grad_norm": 1.450895071029663, "learning_rate": 4.0322580645161284e-05, "loss": 0.1989, "step": 11141 }, { "epoch": 1.7442078897933626, "grad_norm": 2.482814311981201, "learning_rate": 4.0298142717497555e-05, "loss": 0.1985, "step": 11142 }, { "epoch": 1.7443644333124608, "grad_norm": 0.9089357852935791, "learning_rate": 4.027370478983382e-05, "loss": 0.2036, "step": 11143 }, { "epoch": 1.7445209768315593, "grad_norm": 0.5683241486549377, "learning_rate": 4.0249266862170085e-05, "loss": 0.2729, "step": 11144 }, { "epoch": 1.7446775203506575, "grad_norm": 1.6889972686767578, "learning_rate": 4.022482893450635e-05, "loss": 0.279, "step": 11145 }, { "epoch": 1.7448340638697557, "grad_norm": 0.7691648602485657, "learning_rate": 4.020039100684262e-05, "loss": 0.2574, "step": 11146 }, { "epoch": 1.7449906073888541, "grad_norm": 3.3021767139434814, "learning_rate": 4.017595307917888e-05, "loss": 0.2245, "step": 11147 }, { "epoch": 1.7451471509079524, "grad_norm": 1.7656270265579224, "learning_rate": 4.015151515151515e-05, "loss": 0.327, "step": 11148 }, { "epoch": 1.7453036944270508, "grad_norm": 1.371148705482483, "learning_rate": 4.0127077223851415e-05, "loss": 0.2726, "step": 11149 }, { "epoch": 1.745460237946149, "grad_norm": 0.8461998701095581, "learning_rate": 4.010263929618768e-05, "loss": 0.2061, "step": 11150 }, { "epoch": 1.7456167814652472, "grad_norm": 1.068411111831665, "learning_rate": 4.0078201368523945e-05, "loss": 0.1667, "step": 11151 }, { "epoch": 1.7457733249843457, "grad_norm": 1.8549538850784302, "learning_rate": 4.005376344086022e-05, "loss": 0.3076, "step": 11152 }, { "epoch": 1.7459298685034441, "grad_norm": 1.2925175428390503, "learning_rate": 4.0029325513196475e-05, "loss": 0.4524, "step": 11153 }, { "epoch": 1.7460864120225423, "grad_norm": 1.1557775735855103, "learning_rate": 4.0004887585532746e-05, "loss": 0.2918, "step": 11154 }, { "epoch": 1.7462429555416406, "grad_norm": 1.3997137546539307, "learning_rate": 3.998044965786901e-05, "loss": 0.4363, "step": 11155 }, { "epoch": 1.7463994990607388, "grad_norm": 1.8607534170150757, "learning_rate": 3.9956011730205276e-05, "loss": 0.6176, "step": 11156 }, { "epoch": 1.7465560425798372, "grad_norm": 2.017686605453491, "learning_rate": 3.993157380254154e-05, "loss": 0.6712, "step": 11157 }, { "epoch": 1.7467125860989356, "grad_norm": 2.3117191791534424, "learning_rate": 3.990713587487781e-05, "loss": 0.553, "step": 11158 }, { "epoch": 1.7468691296180339, "grad_norm": 2.349550247192383, "learning_rate": 3.988269794721407e-05, "loss": 0.3159, "step": 11159 }, { "epoch": 1.747025673137132, "grad_norm": 2.04817533493042, "learning_rate": 3.985826001955034e-05, "loss": 0.5679, "step": 11160 }, { "epoch": 1.7471822166562303, "grad_norm": 7.92338228225708, "learning_rate": 3.983382209188661e-05, "loss": 0.4161, "step": 11161 }, { "epoch": 1.7473387601753287, "grad_norm": 2.4229795932769775, "learning_rate": 3.980938416422287e-05, "loss": 0.4948, "step": 11162 }, { "epoch": 1.7474953036944272, "grad_norm": 3.308713436126709, "learning_rate": 3.9784946236559136e-05, "loss": 0.7086, "step": 11163 }, { "epoch": 1.7476518472135254, "grad_norm": 1.2768535614013672, "learning_rate": 3.976050830889541e-05, "loss": 0.3026, "step": 11164 }, { "epoch": 1.7478083907326236, "grad_norm": 2.1575565338134766, "learning_rate": 3.9736070381231666e-05, "loss": 0.6669, "step": 11165 }, { "epoch": 1.7479649342517218, "grad_norm": 2.513392448425293, "learning_rate": 3.971163245356794e-05, "loss": 0.5661, "step": 11166 }, { "epoch": 1.7481214777708203, "grad_norm": 2.0377068519592285, "learning_rate": 3.96871945259042e-05, "loss": 0.3623, "step": 11167 }, { "epoch": 1.7482780212899187, "grad_norm": 3.4843387603759766, "learning_rate": 3.966275659824047e-05, "loss": 1.0925, "step": 11168 }, { "epoch": 1.748434564809017, "grad_norm": 2.729309320449829, "learning_rate": 3.963831867057673e-05, "loss": 0.7483, "step": 11169 }, { "epoch": 1.7485911083281152, "grad_norm": 3.7597618103027344, "learning_rate": 3.9613880742913e-05, "loss": 0.5983, "step": 11170 }, { "epoch": 1.7487476518472134, "grad_norm": 3.6038522720336914, "learning_rate": 3.958944281524926e-05, "loss": 0.7542, "step": 11171 }, { "epoch": 1.7489041953663118, "grad_norm": 2.91158390045166, "learning_rate": 3.956500488758553e-05, "loss": 0.9845, "step": 11172 }, { "epoch": 1.7490607388854102, "grad_norm": 5.084097385406494, "learning_rate": 3.95405669599218e-05, "loss": 0.6904, "step": 11173 }, { "epoch": 1.7492172824045085, "grad_norm": 2.9743316173553467, "learning_rate": 3.951612903225806e-05, "loss": 0.6184, "step": 11174 }, { "epoch": 1.7493738259236067, "grad_norm": 5.7044172286987305, "learning_rate": 3.949169110459433e-05, "loss": 0.5974, "step": 11175 }, { "epoch": 1.7495303694427051, "grad_norm": 2.698913097381592, "learning_rate": 3.94672531769306e-05, "loss": 1.2483, "step": 11176 }, { "epoch": 1.7496869129618033, "grad_norm": 3.227285623550415, "learning_rate": 3.944281524926686e-05, "loss": 0.9042, "step": 11177 }, { "epoch": 1.7498434564809018, "grad_norm": 3.3017385005950928, "learning_rate": 3.941837732160313e-05, "loss": 0.9025, "step": 11178 }, { "epoch": 1.75, "grad_norm": 3.2290198802948, "learning_rate": 3.939393939393939e-05, "loss": 0.437, "step": 11179 }, { "epoch": 1.7501565435190982, "grad_norm": 3.120434522628784, "learning_rate": 3.936950146627565e-05, "loss": 0.7402, "step": 11180 }, { "epoch": 1.7503130870381967, "grad_norm": 2.314493417739868, "learning_rate": 3.934506353861192e-05, "loss": 0.5906, "step": 11181 }, { "epoch": 1.7504696305572949, "grad_norm": 1.895832896232605, "learning_rate": 3.9320625610948195e-05, "loss": 1.206, "step": 11182 }, { "epoch": 1.7506261740763933, "grad_norm": 1.9516853094100952, "learning_rate": 3.929618768328445e-05, "loss": 1.0012, "step": 11183 }, { "epoch": 1.7507827175954915, "grad_norm": 1.7214974164962769, "learning_rate": 3.9271749755620724e-05, "loss": 0.6269, "step": 11184 }, { "epoch": 1.7509392611145898, "grad_norm": 2.6578586101531982, "learning_rate": 3.924731182795699e-05, "loss": 0.8477, "step": 11185 }, { "epoch": 1.7510958046336882, "grad_norm": 2.1132657527923584, "learning_rate": 3.922287390029325e-05, "loss": 0.7678, "step": 11186 }, { "epoch": 1.7512523481527866, "grad_norm": 3.0268445014953613, "learning_rate": 3.919843597262952e-05, "loss": 0.6208, "step": 11187 }, { "epoch": 1.7514088916718848, "grad_norm": 4.13357400894165, "learning_rate": 3.917399804496579e-05, "loss": 0.8431, "step": 11188 }, { "epoch": 1.751565435190983, "grad_norm": 0.46277809143066406, "learning_rate": 3.914956011730205e-05, "loss": 0.1721, "step": 11189 }, { "epoch": 1.7517219787100813, "grad_norm": 0.6381204724311829, "learning_rate": 3.912512218963832e-05, "loss": 0.271, "step": 11190 }, { "epoch": 1.7518785222291797, "grad_norm": 1.2064273357391357, "learning_rate": 3.9100684261974585e-05, "loss": 0.3409, "step": 11191 }, { "epoch": 1.7520350657482782, "grad_norm": 0.6451768279075623, "learning_rate": 3.907624633431084e-05, "loss": 0.2178, "step": 11192 }, { "epoch": 1.7521916092673764, "grad_norm": 0.9377066493034363, "learning_rate": 3.9051808406647114e-05, "loss": 0.2548, "step": 11193 }, { "epoch": 1.7523481527864746, "grad_norm": 1.069899320602417, "learning_rate": 3.902737047898337e-05, "loss": 0.2966, "step": 11194 }, { "epoch": 1.7525046963055728, "grad_norm": 1.6345460414886475, "learning_rate": 3.9002932551319644e-05, "loss": 0.211, "step": 11195 }, { "epoch": 1.7526612398246713, "grad_norm": 1.789792537689209, "learning_rate": 3.8978494623655915e-05, "loss": 0.2923, "step": 11196 }, { "epoch": 1.7528177833437697, "grad_norm": 1.5872666835784912, "learning_rate": 3.895405669599217e-05, "loss": 0.5062, "step": 11197 }, { "epoch": 1.752974326862868, "grad_norm": 2.3181369304656982, "learning_rate": 3.892961876832844e-05, "loss": 0.2508, "step": 11198 }, { "epoch": 1.7531308703819661, "grad_norm": 2.8838610649108887, "learning_rate": 3.890518084066471e-05, "loss": 0.3586, "step": 11199 }, { "epoch": 1.7532874139010644, "grad_norm": 1.250127911567688, "learning_rate": 3.888074291300097e-05, "loss": 0.2989, "step": 11200 }, { "epoch": 1.7534439574201628, "grad_norm": 0.9054256677627563, "learning_rate": 3.885630498533724e-05, "loss": 0.3618, "step": 11201 }, { "epoch": 1.7536005009392612, "grad_norm": 3.5090181827545166, "learning_rate": 3.8831867057673504e-05, "loss": 0.6206, "step": 11202 }, { "epoch": 1.7537570444583594, "grad_norm": 1.8899184465408325, "learning_rate": 3.880742913000977e-05, "loss": 0.26, "step": 11203 }, { "epoch": 1.7539135879774577, "grad_norm": 1.9521454572677612, "learning_rate": 3.8782991202346034e-05, "loss": 0.4928, "step": 11204 }, { "epoch": 1.7540701314965559, "grad_norm": 1.03826904296875, "learning_rate": 3.8758553274682305e-05, "loss": 0.3366, "step": 11205 }, { "epoch": 1.7542266750156543, "grad_norm": 0.876063346862793, "learning_rate": 3.873411534701856e-05, "loss": 0.3439, "step": 11206 }, { "epoch": 1.7543832185347528, "grad_norm": 3.456061363220215, "learning_rate": 3.8709677419354835e-05, "loss": 0.2939, "step": 11207 }, { "epoch": 1.754539762053851, "grad_norm": 1.792235016822815, "learning_rate": 3.86852394916911e-05, "loss": 0.5492, "step": 11208 }, { "epoch": 1.7546963055729492, "grad_norm": 8.215913772583008, "learning_rate": 3.8660801564027364e-05, "loss": 0.6838, "step": 11209 }, { "epoch": 1.7548528490920476, "grad_norm": 8.348455429077148, "learning_rate": 3.863636363636363e-05, "loss": 0.3709, "step": 11210 }, { "epoch": 1.7550093926111459, "grad_norm": 1.567761778831482, "learning_rate": 3.86119257086999e-05, "loss": 0.3818, "step": 11211 }, { "epoch": 1.7551659361302443, "grad_norm": 1.7908146381378174, "learning_rate": 3.858748778103616e-05, "loss": 0.606, "step": 11212 }, { "epoch": 1.7553224796493425, "grad_norm": 2.4007041454315186, "learning_rate": 3.856304985337243e-05, "loss": 0.5006, "step": 11213 }, { "epoch": 1.7554790231684407, "grad_norm": 1.7256523370742798, "learning_rate": 3.8538611925708695e-05, "loss": 0.5744, "step": 11214 }, { "epoch": 1.7556355666875392, "grad_norm": 2.6474239826202393, "learning_rate": 3.851417399804496e-05, "loss": 0.4488, "step": 11215 }, { "epoch": 1.7557921102066374, "grad_norm": 6.710626125335693, "learning_rate": 3.8489736070381225e-05, "loss": 0.4047, "step": 11216 }, { "epoch": 1.7559486537257358, "grad_norm": 2.4664697647094727, "learning_rate": 3.8465298142717496e-05, "loss": 0.735, "step": 11217 }, { "epoch": 1.756105197244834, "grad_norm": 1.39060640335083, "learning_rate": 3.8440860215053754e-05, "loss": 0.4921, "step": 11218 }, { "epoch": 1.7562617407639323, "grad_norm": 3.047032117843628, "learning_rate": 3.8416422287390026e-05, "loss": 0.5779, "step": 11219 }, { "epoch": 1.7564182842830307, "grad_norm": 1.9049490690231323, "learning_rate": 3.839198435972629e-05, "loss": 0.6209, "step": 11220 }, { "epoch": 1.7565748278021291, "grad_norm": 2.976374626159668, "learning_rate": 3.8367546432062556e-05, "loss": 0.4992, "step": 11221 }, { "epoch": 1.7567313713212274, "grad_norm": 2.006279706954956, "learning_rate": 3.834310850439882e-05, "loss": 0.8037, "step": 11222 }, { "epoch": 1.7568879148403256, "grad_norm": 4.789150238037109, "learning_rate": 3.831867057673509e-05, "loss": 0.9672, "step": 11223 }, { "epoch": 1.7570444583594238, "grad_norm": 4.674681186676025, "learning_rate": 3.829423264907135e-05, "loss": 0.5118, "step": 11224 }, { "epoch": 1.7572010018785222, "grad_norm": 8.977636337280273, "learning_rate": 3.826979472140762e-05, "loss": 0.7725, "step": 11225 }, { "epoch": 1.7573575453976207, "grad_norm": 3.718141794204712, "learning_rate": 3.8245356793743886e-05, "loss": 0.5044, "step": 11226 }, { "epoch": 1.757514088916719, "grad_norm": 2.4552061557769775, "learning_rate": 3.822091886608015e-05, "loss": 0.8362, "step": 11227 }, { "epoch": 1.7576706324358171, "grad_norm": 4.80755615234375, "learning_rate": 3.8196480938416416e-05, "loss": 1.0312, "step": 11228 }, { "epoch": 1.7578271759549153, "grad_norm": 5.233382225036621, "learning_rate": 3.817204301075269e-05, "loss": 1.6852, "step": 11229 }, { "epoch": 1.7579837194740138, "grad_norm": 5.555087566375732, "learning_rate": 3.8147605083088946e-05, "loss": 1.3879, "step": 11230 }, { "epoch": 1.7581402629931122, "grad_norm": 5.3640923500061035, "learning_rate": 3.812316715542522e-05, "loss": 1.2419, "step": 11231 }, { "epoch": 1.7582968065122104, "grad_norm": 1.4658650159835815, "learning_rate": 3.809872922776148e-05, "loss": 0.566, "step": 11232 }, { "epoch": 1.7584533500313086, "grad_norm": 10.554597854614258, "learning_rate": 3.807429130009775e-05, "loss": 1.0166, "step": 11233 }, { "epoch": 1.7586098935504069, "grad_norm": 2.0296871662139893, "learning_rate": 3.804985337243401e-05, "loss": 0.2771, "step": 11234 }, { "epoch": 1.7587664370695053, "grad_norm": 2.8788039684295654, "learning_rate": 3.802541544477028e-05, "loss": 0.7007, "step": 11235 }, { "epoch": 1.7589229805886037, "grad_norm": 3.588125228881836, "learning_rate": 3.800097751710654e-05, "loss": 0.7984, "step": 11236 }, { "epoch": 1.759079524107702, "grad_norm": 3.308217763900757, "learning_rate": 3.797653958944281e-05, "loss": 1.3304, "step": 11237 }, { "epoch": 1.7592360676268002, "grad_norm": 2.5939760208129883, "learning_rate": 3.795210166177908e-05, "loss": 1.4925, "step": 11238 }, { "epoch": 1.7593926111458984, "grad_norm": 0.596777617931366, "learning_rate": 3.792766373411534e-05, "loss": 0.2275, "step": 11239 }, { "epoch": 1.7595491546649968, "grad_norm": 1.5214952230453491, "learning_rate": 3.790322580645161e-05, "loss": 0.2228, "step": 11240 }, { "epoch": 1.7597056981840953, "grad_norm": 3.9668772220611572, "learning_rate": 3.787878787878788e-05, "loss": 0.2029, "step": 11241 }, { "epoch": 1.7598622417031935, "grad_norm": 2.6096763610839844, "learning_rate": 3.785434995112414e-05, "loss": 0.6679, "step": 11242 }, { "epoch": 1.7600187852222917, "grad_norm": 0.46722492575645447, "learning_rate": 3.782991202346041e-05, "loss": 0.2772, "step": 11243 }, { "epoch": 1.7601753287413902, "grad_norm": 1.0145384073257446, "learning_rate": 3.780547409579667e-05, "loss": 0.2544, "step": 11244 }, { "epoch": 1.7603318722604884, "grad_norm": 0.5798178315162659, "learning_rate": 3.778103616813294e-05, "loss": 0.2445, "step": 11245 }, { "epoch": 1.7604884157795868, "grad_norm": 0.9099991917610168, "learning_rate": 3.77565982404692e-05, "loss": 0.2651, "step": 11246 }, { "epoch": 1.760644959298685, "grad_norm": 0.7325129508972168, "learning_rate": 3.7732160312805474e-05, "loss": 0.2738, "step": 11247 }, { "epoch": 1.7608015028177832, "grad_norm": 0.7348865866661072, "learning_rate": 3.770772238514173e-05, "loss": 0.2302, "step": 11248 }, { "epoch": 1.7609580463368817, "grad_norm": 0.6817787885665894, "learning_rate": 3.7683284457478004e-05, "loss": 0.2498, "step": 11249 }, { "epoch": 1.7611145898559801, "grad_norm": 5.162353038787842, "learning_rate": 3.765884652981427e-05, "loss": 0.3367, "step": 11250 }, { "epoch": 1.7612711333750783, "grad_norm": 0.7174679636955261, "learning_rate": 3.7634408602150534e-05, "loss": 0.24, "step": 11251 }, { "epoch": 1.7614276768941766, "grad_norm": 1.1641637086868286, "learning_rate": 3.76099706744868e-05, "loss": 0.3054, "step": 11252 }, { "epoch": 1.7615842204132748, "grad_norm": 1.7706217765808105, "learning_rate": 3.758553274682307e-05, "loss": 0.3698, "step": 11253 }, { "epoch": 1.7617407639323732, "grad_norm": 0.8290618658065796, "learning_rate": 3.756109481915933e-05, "loss": 0.2319, "step": 11254 }, { "epoch": 1.7618973074514717, "grad_norm": 1.1751283407211304, "learning_rate": 3.75366568914956e-05, "loss": 0.3196, "step": 11255 }, { "epoch": 1.7620538509705699, "grad_norm": 5.826601982116699, "learning_rate": 3.7512218963831864e-05, "loss": 1.5936, "step": 11256 }, { "epoch": 1.762210394489668, "grad_norm": 1.5065512657165527, "learning_rate": 3.748778103616813e-05, "loss": 0.3376, "step": 11257 }, { "epoch": 1.7623669380087663, "grad_norm": 1.3743765354156494, "learning_rate": 3.7463343108504394e-05, "loss": 0.3552, "step": 11258 }, { "epoch": 1.7625234815278648, "grad_norm": 1.3303420543670654, "learning_rate": 3.743890518084066e-05, "loss": 0.3617, "step": 11259 }, { "epoch": 1.7626800250469632, "grad_norm": 0.8232090473175049, "learning_rate": 3.741446725317693e-05, "loss": 0.3999, "step": 11260 }, { "epoch": 1.7628365685660614, "grad_norm": 1.9334932565689087, "learning_rate": 3.7390029325513195e-05, "loss": 0.4174, "step": 11261 }, { "epoch": 1.7629931120851596, "grad_norm": 1.5762377977371216, "learning_rate": 3.736559139784946e-05, "loss": 0.3955, "step": 11262 }, { "epoch": 1.7631496556042578, "grad_norm": 1.6700406074523926, "learning_rate": 3.7341153470185725e-05, "loss": 0.5943, "step": 11263 }, { "epoch": 1.7633061991233563, "grad_norm": 1.3183702230453491, "learning_rate": 3.731671554252199e-05, "loss": 0.4755, "step": 11264 }, { "epoch": 1.7634627426424547, "grad_norm": 1.5598750114440918, "learning_rate": 3.7292277614858254e-05, "loss": 0.4606, "step": 11265 }, { "epoch": 1.763619286161553, "grad_norm": 1.2456644773483276, "learning_rate": 3.7267839687194526e-05, "loss": 0.4197, "step": 11266 }, { "epoch": 1.7637758296806512, "grad_norm": 2.95483660697937, "learning_rate": 3.724340175953079e-05, "loss": 0.6294, "step": 11267 }, { "epoch": 1.7639323731997494, "grad_norm": 1.7271779775619507, "learning_rate": 3.7218963831867055e-05, "loss": 0.3793, "step": 11268 }, { "epoch": 1.7640889167188478, "grad_norm": 3.223212957382202, "learning_rate": 3.719452590420332e-05, "loss": 0.7018, "step": 11269 }, { "epoch": 1.7642454602379463, "grad_norm": 7.701069355010986, "learning_rate": 3.7170087976539585e-05, "loss": 0.6636, "step": 11270 }, { "epoch": 1.7644020037570445, "grad_norm": 1.763765573501587, "learning_rate": 3.714565004887585e-05, "loss": 0.8444, "step": 11271 }, { "epoch": 1.7645585472761427, "grad_norm": 4.53816032409668, "learning_rate": 3.712121212121212e-05, "loss": 0.8607, "step": 11272 }, { "epoch": 1.764715090795241, "grad_norm": 3.1749961376190186, "learning_rate": 3.7096774193548386e-05, "loss": 0.611, "step": 11273 }, { "epoch": 1.7648716343143394, "grad_norm": 5.895051956176758, "learning_rate": 3.707233626588465e-05, "loss": 1.0795, "step": 11274 }, { "epoch": 1.7650281778334378, "grad_norm": 4.479997634887695, "learning_rate": 3.7047898338220916e-05, "loss": 0.9543, "step": 11275 }, { "epoch": 1.765184721352536, "grad_norm": 4.240951061248779, "learning_rate": 3.702346041055718e-05, "loss": 0.9561, "step": 11276 }, { "epoch": 1.7653412648716342, "grad_norm": 3.595991849899292, "learning_rate": 3.6999022482893445e-05, "loss": 0.8473, "step": 11277 }, { "epoch": 1.7654978083907327, "grad_norm": 3.00730037689209, "learning_rate": 3.697458455522972e-05, "loss": 1.0942, "step": 11278 }, { "epoch": 1.7656543519098309, "grad_norm": 2.618523359298706, "learning_rate": 3.695014662756598e-05, "loss": 0.9541, "step": 11279 }, { "epoch": 1.7658108954289293, "grad_norm": 2.935112237930298, "learning_rate": 3.692570869990225e-05, "loss": 1.0638, "step": 11280 }, { "epoch": 1.7659674389480275, "grad_norm": 3.7480924129486084, "learning_rate": 3.690127077223851e-05, "loss": 1.0236, "step": 11281 }, { "epoch": 1.7661239824671258, "grad_norm": 3.3669211864471436, "learning_rate": 3.6876832844574776e-05, "loss": 1.3881, "step": 11282 }, { "epoch": 1.7662805259862242, "grad_norm": 2.294576406478882, "learning_rate": 3.685239491691104e-05, "loss": 1.0829, "step": 11283 }, { "epoch": 1.7664370695053226, "grad_norm": 2.1476564407348633, "learning_rate": 3.682795698924731e-05, "loss": 1.1662, "step": 11284 }, { "epoch": 1.7665936130244209, "grad_norm": 1.508748173713684, "learning_rate": 3.680351906158358e-05, "loss": 0.613, "step": 11285 }, { "epoch": 1.766750156543519, "grad_norm": 1.9185620546340942, "learning_rate": 3.677908113391984e-05, "loss": 0.6305, "step": 11286 }, { "epoch": 1.7669067000626173, "grad_norm": 2.450026750564575, "learning_rate": 3.675464320625611e-05, "loss": 0.6495, "step": 11287 }, { "epoch": 1.7670632435817157, "grad_norm": 2.664174795150757, "learning_rate": 3.673020527859237e-05, "loss": 0.5036, "step": 11288 }, { "epoch": 1.7672197871008142, "grad_norm": 1.3441131114959717, "learning_rate": 3.6705767350928637e-05, "loss": 0.2408, "step": 11289 }, { "epoch": 1.7673763306199124, "grad_norm": 1.2940044403076172, "learning_rate": 3.66813294232649e-05, "loss": 0.2059, "step": 11290 }, { "epoch": 1.7675328741390106, "grad_norm": 0.5093015432357788, "learning_rate": 3.665689149560117e-05, "loss": 0.1938, "step": 11291 }, { "epoch": 1.7676894176581088, "grad_norm": 0.7823761105537415, "learning_rate": 3.663245356793744e-05, "loss": 0.213, "step": 11292 }, { "epoch": 1.7678459611772073, "grad_norm": 1.0145026445388794, "learning_rate": 3.66080156402737e-05, "loss": 0.3456, "step": 11293 }, { "epoch": 1.7680025046963057, "grad_norm": 1.1629821062088013, "learning_rate": 3.658357771260997e-05, "loss": 0.1895, "step": 11294 }, { "epoch": 1.768159048215404, "grad_norm": 1.3927007913589478, "learning_rate": 3.655913978494623e-05, "loss": 0.3128, "step": 11295 }, { "epoch": 1.7683155917345021, "grad_norm": 1.2417762279510498, "learning_rate": 3.65347018572825e-05, "loss": 0.4427, "step": 11296 }, { "epoch": 1.7684721352536004, "grad_norm": 1.2290184497833252, "learning_rate": 3.651026392961877e-05, "loss": 0.3086, "step": 11297 }, { "epoch": 1.7686286787726988, "grad_norm": 1.231251835823059, "learning_rate": 3.648582600195503e-05, "loss": 0.3525, "step": 11298 }, { "epoch": 1.7687852222917972, "grad_norm": 1.0766758918762207, "learning_rate": 3.64613880742913e-05, "loss": 0.2957, "step": 11299 }, { "epoch": 1.7689417658108955, "grad_norm": 0.900638222694397, "learning_rate": 3.643695014662756e-05, "loss": 0.2731, "step": 11300 }, { "epoch": 1.7690983093299937, "grad_norm": 5.631997108459473, "learning_rate": 3.641251221896383e-05, "loss": 0.4606, "step": 11301 }, { "epoch": 1.769254852849092, "grad_norm": 1.9502328634262085, "learning_rate": 3.638807429130009e-05, "loss": 0.4096, "step": 11302 }, { "epoch": 1.7694113963681903, "grad_norm": 2.7071378231048584, "learning_rate": 3.6363636363636364e-05, "loss": 0.3263, "step": 11303 }, { "epoch": 1.7695679398872888, "grad_norm": 2.8107943534851074, "learning_rate": 3.633919843597263e-05, "loss": 0.3564, "step": 11304 }, { "epoch": 1.769724483406387, "grad_norm": 1.432214617729187, "learning_rate": 3.6314760508308894e-05, "loss": 0.3068, "step": 11305 }, { "epoch": 1.7698810269254852, "grad_norm": 6.236049652099609, "learning_rate": 3.629032258064516e-05, "loss": 0.6622, "step": 11306 }, { "epoch": 1.7700375704445834, "grad_norm": 1.3720581531524658, "learning_rate": 3.626588465298142e-05, "loss": 0.4399, "step": 11307 }, { "epoch": 1.7701941139636819, "grad_norm": 6.22260856628418, "learning_rate": 3.624144672531769e-05, "loss": 0.6246, "step": 11308 }, { "epoch": 1.7703506574827803, "grad_norm": 1.9137059450149536, "learning_rate": 3.621700879765396e-05, "loss": 0.3889, "step": 11309 }, { "epoch": 1.7705072010018785, "grad_norm": 2.993480682373047, "learning_rate": 3.6192570869990225e-05, "loss": 0.5285, "step": 11310 }, { "epoch": 1.7706637445209767, "grad_norm": 4.049930095672607, "learning_rate": 3.616813294232649e-05, "loss": 0.484, "step": 11311 }, { "epoch": 1.7708202880400752, "grad_norm": 3.0351674556732178, "learning_rate": 3.6143695014662754e-05, "loss": 0.4673, "step": 11312 }, { "epoch": 1.7709768315591734, "grad_norm": 3.3933756351470947, "learning_rate": 3.611925708699902e-05, "loss": 0.42, "step": 11313 }, { "epoch": 1.7711333750782718, "grad_norm": 2.0746243000030518, "learning_rate": 3.6094819159335284e-05, "loss": 0.5587, "step": 11314 }, { "epoch": 1.77128991859737, "grad_norm": 2.525590658187866, "learning_rate": 3.6070381231671555e-05, "loss": 0.4801, "step": 11315 }, { "epoch": 1.7714464621164683, "grad_norm": 3.3657305240631104, "learning_rate": 3.604594330400782e-05, "loss": 0.6689, "step": 11316 }, { "epoch": 1.7716030056355667, "grad_norm": 2.2926595211029053, "learning_rate": 3.602150537634408e-05, "loss": 0.5339, "step": 11317 }, { "epoch": 1.7717595491546652, "grad_norm": 2.5583808422088623, "learning_rate": 3.599706744868035e-05, "loss": 0.4416, "step": 11318 }, { "epoch": 1.7719160926737634, "grad_norm": 3.0211122035980225, "learning_rate": 3.5972629521016615e-05, "loss": 0.4616, "step": 11319 }, { "epoch": 1.7720726361928616, "grad_norm": 5.442112445831299, "learning_rate": 3.594819159335288e-05, "loss": 0.6978, "step": 11320 }, { "epoch": 1.7722291797119598, "grad_norm": 2.6855411529541016, "learning_rate": 3.592375366568915e-05, "loss": 0.6472, "step": 11321 }, { "epoch": 1.7723857232310583, "grad_norm": 1.714129090309143, "learning_rate": 3.5899315738025416e-05, "loss": 0.5912, "step": 11322 }, { "epoch": 1.7725422667501567, "grad_norm": 5.199019908905029, "learning_rate": 3.5874877810361674e-05, "loss": 0.7321, "step": 11323 }, { "epoch": 1.772698810269255, "grad_norm": 2.025068759918213, "learning_rate": 3.5850439882697945e-05, "loss": 0.9314, "step": 11324 }, { "epoch": 1.7728553537883531, "grad_norm": 3.1110622882843018, "learning_rate": 3.582600195503421e-05, "loss": 0.6754, "step": 11325 }, { "epoch": 1.7730118973074513, "grad_norm": 1.5338470935821533, "learning_rate": 3.5801564027370475e-05, "loss": 0.6682, "step": 11326 }, { "epoch": 1.7731684408265498, "grad_norm": 1.7983545064926147, "learning_rate": 3.5777126099706746e-05, "loss": 0.6782, "step": 11327 }, { "epoch": 1.7733249843456482, "grad_norm": 8.5779390335083, "learning_rate": 3.5752688172043004e-05, "loss": 0.6843, "step": 11328 }, { "epoch": 1.7734815278647464, "grad_norm": 2.1752936840057373, "learning_rate": 3.572825024437927e-05, "loss": 0.8621, "step": 11329 }, { "epoch": 1.7736380713838447, "grad_norm": 4.906739711761475, "learning_rate": 3.570381231671554e-05, "loss": 0.4116, "step": 11330 }, { "epoch": 1.7737946149029429, "grad_norm": 3.4587199687957764, "learning_rate": 3.5679374389051806e-05, "loss": 1.0547, "step": 11331 }, { "epoch": 1.7739511584220413, "grad_norm": 2.0558602809906006, "learning_rate": 3.565493646138807e-05, "loss": 0.8674, "step": 11332 }, { "epoch": 1.7741077019411398, "grad_norm": 6.779541492462158, "learning_rate": 3.563049853372434e-05, "loss": 1.3265, "step": 11333 }, { "epoch": 1.774264245460238, "grad_norm": 4.761612892150879, "learning_rate": 3.56060606060606e-05, "loss": 0.8637, "step": 11334 }, { "epoch": 1.7744207889793362, "grad_norm": 1.2794733047485352, "learning_rate": 3.5581622678396865e-05, "loss": 0.3855, "step": 11335 }, { "epoch": 1.7745773324984344, "grad_norm": 1.7263725996017456, "learning_rate": 3.5557184750733136e-05, "loss": 0.5897, "step": 11336 }, { "epoch": 1.7747338760175329, "grad_norm": 2.5758883953094482, "learning_rate": 3.55327468230694e-05, "loss": 1.131, "step": 11337 }, { "epoch": 1.7748904195366313, "grad_norm": 3.0026354789733887, "learning_rate": 3.5508308895405666e-05, "loss": 0.8907, "step": 11338 }, { "epoch": 1.7750469630557295, "grad_norm": 0.7817611694335938, "learning_rate": 3.548387096774193e-05, "loss": 0.3081, "step": 11339 }, { "epoch": 1.7752035065748277, "grad_norm": 1.6110485792160034, "learning_rate": 3.5459433040078196e-05, "loss": 0.2291, "step": 11340 }, { "epoch": 1.775360050093926, "grad_norm": 1.1875455379486084, "learning_rate": 3.543499511241446e-05, "loss": 0.2239, "step": 11341 }, { "epoch": 1.7755165936130244, "grad_norm": 0.6927761435508728, "learning_rate": 3.541055718475073e-05, "loss": 0.2251, "step": 11342 }, { "epoch": 1.7756731371321228, "grad_norm": 1.9501267671585083, "learning_rate": 3.5386119257087e-05, "loss": 0.3533, "step": 11343 }, { "epoch": 1.775829680651221, "grad_norm": 1.7915222644805908, "learning_rate": 3.536168132942326e-05, "loss": 0.4284, "step": 11344 }, { "epoch": 1.7759862241703193, "grad_norm": 1.3947383165359497, "learning_rate": 3.5337243401759526e-05, "loss": 0.3234, "step": 11345 }, { "epoch": 1.7761427676894177, "grad_norm": 3.555690288543701, "learning_rate": 3.531280547409579e-05, "loss": 0.3943, "step": 11346 }, { "epoch": 1.776299311208516, "grad_norm": 0.8692519068717957, "learning_rate": 3.5288367546432056e-05, "loss": 0.2929, "step": 11347 }, { "epoch": 1.7764558547276144, "grad_norm": 1.724339246749878, "learning_rate": 3.526392961876833e-05, "loss": 0.3083, "step": 11348 }, { "epoch": 1.7766123982467126, "grad_norm": 1.0251134634017944, "learning_rate": 3.523949169110459e-05, "loss": 0.3695, "step": 11349 }, { "epoch": 1.7767689417658108, "grad_norm": 1.2282941341400146, "learning_rate": 3.521505376344086e-05, "loss": 0.4185, "step": 11350 }, { "epoch": 1.7769254852849092, "grad_norm": 0.9312837719917297, "learning_rate": 3.519061583577712e-05, "loss": 0.2435, "step": 11351 }, { "epoch": 1.7770820288040077, "grad_norm": 1.5906509160995483, "learning_rate": 3.516617790811339e-05, "loss": 0.3492, "step": 11352 }, { "epoch": 1.777238572323106, "grad_norm": 0.7517256140708923, "learning_rate": 3.514173998044965e-05, "loss": 0.2204, "step": 11353 }, { "epoch": 1.777395115842204, "grad_norm": 2.044954776763916, "learning_rate": 3.5117302052785916e-05, "loss": 0.3857, "step": 11354 }, { "epoch": 1.7775516593613023, "grad_norm": 5.702359676361084, "learning_rate": 3.509286412512219e-05, "loss": 1.0261, "step": 11355 }, { "epoch": 1.7777082028804008, "grad_norm": 1.4534990787506104, "learning_rate": 3.506842619745845e-05, "loss": 0.5519, "step": 11356 }, { "epoch": 1.7778647463994992, "grad_norm": 1.8530081510543823, "learning_rate": 3.504398826979472e-05, "loss": 0.474, "step": 11357 }, { "epoch": 1.7780212899185974, "grad_norm": 2.5802159309387207, "learning_rate": 3.501955034213098e-05, "loss": 0.3142, "step": 11358 }, { "epoch": 1.7781778334376956, "grad_norm": 0.7793805599212646, "learning_rate": 3.499511241446725e-05, "loss": 0.4216, "step": 11359 }, { "epoch": 1.7783343769567939, "grad_norm": 2.537649631500244, "learning_rate": 3.497067448680351e-05, "loss": 0.5939, "step": 11360 }, { "epoch": 1.7784909204758923, "grad_norm": 3.798841953277588, "learning_rate": 3.4946236559139784e-05, "loss": 0.4864, "step": 11361 }, { "epoch": 1.7786474639949907, "grad_norm": 2.482792377471924, "learning_rate": 3.492179863147605e-05, "loss": 0.6817, "step": 11362 }, { "epoch": 1.778804007514089, "grad_norm": 0.7896741032600403, "learning_rate": 3.489736070381231e-05, "loss": 0.185, "step": 11363 }, { "epoch": 1.7789605510331872, "grad_norm": 2.387078046798706, "learning_rate": 3.487292277614858e-05, "loss": 0.5607, "step": 11364 }, { "epoch": 1.7791170945522854, "grad_norm": 7.810622215270996, "learning_rate": 3.484848484848484e-05, "loss": 0.525, "step": 11365 }, { "epoch": 1.7792736380713838, "grad_norm": 5.911670207977295, "learning_rate": 3.482404692082111e-05, "loss": 0.6811, "step": 11366 }, { "epoch": 1.7794301815904823, "grad_norm": 1.889855146408081, "learning_rate": 3.479960899315738e-05, "loss": 0.4714, "step": 11367 }, { "epoch": 1.7795867251095805, "grad_norm": 1.3557449579238892, "learning_rate": 3.4775171065493644e-05, "loss": 0.3659, "step": 11368 }, { "epoch": 1.7797432686286787, "grad_norm": 3.040102481842041, "learning_rate": 3.475073313782991e-05, "loss": 0.6288, "step": 11369 }, { "epoch": 1.779899812147777, "grad_norm": 7.0420756340026855, "learning_rate": 3.4726295210166174e-05, "loss": 0.5326, "step": 11370 }, { "epoch": 1.7800563556668754, "grad_norm": 2.125647783279419, "learning_rate": 3.470185728250244e-05, "loss": 0.6711, "step": 11371 }, { "epoch": 1.7802128991859738, "grad_norm": 9.777316093444824, "learning_rate": 3.46774193548387e-05, "loss": 0.509, "step": 11372 }, { "epoch": 1.780369442705072, "grad_norm": 2.167560577392578, "learning_rate": 3.4652981427174975e-05, "loss": 0.7898, "step": 11373 }, { "epoch": 1.7805259862241702, "grad_norm": 5.202997207641602, "learning_rate": 3.462854349951124e-05, "loss": 0.7889, "step": 11374 }, { "epoch": 1.7806825297432687, "grad_norm": 3.4150354862213135, "learning_rate": 3.4604105571847504e-05, "loss": 1.0625, "step": 11375 }, { "epoch": 1.780839073262367, "grad_norm": 2.184953451156616, "learning_rate": 3.457966764418377e-05, "loss": 0.8714, "step": 11376 }, { "epoch": 1.7809956167814653, "grad_norm": 2.1605887413024902, "learning_rate": 3.4555229716520034e-05, "loss": 1.0218, "step": 11377 }, { "epoch": 1.7811521603005636, "grad_norm": 3.93656063079834, "learning_rate": 3.45307917888563e-05, "loss": 1.2488, "step": 11378 }, { "epoch": 1.7813087038196618, "grad_norm": 2.7407288551330566, "learning_rate": 3.450635386119257e-05, "loss": 0.8522, "step": 11379 }, { "epoch": 1.7814652473387602, "grad_norm": 2.3924989700317383, "learning_rate": 3.4481915933528835e-05, "loss": 1.3899, "step": 11380 }, { "epoch": 1.7816217908578584, "grad_norm": 2.2750840187072754, "learning_rate": 3.44574780058651e-05, "loss": 1.1462, "step": 11381 }, { "epoch": 1.7817783343769569, "grad_norm": 1.9890302419662476, "learning_rate": 3.4433040078201365e-05, "loss": 1.0301, "step": 11382 }, { "epoch": 1.781934877896055, "grad_norm": 1.8051940202713013, "learning_rate": 3.440860215053763e-05, "loss": 0.8329, "step": 11383 }, { "epoch": 1.7820914214151533, "grad_norm": 2.2022345066070557, "learning_rate": 3.4384164222873894e-05, "loss": 0.2328, "step": 11384 }, { "epoch": 1.7822479649342517, "grad_norm": 2.2471179962158203, "learning_rate": 3.4359726295210166e-05, "loss": 0.4919, "step": 11385 }, { "epoch": 1.7824045084533502, "grad_norm": 3.4314959049224854, "learning_rate": 3.433528836754643e-05, "loss": 0.7256, "step": 11386 }, { "epoch": 1.7825610519724484, "grad_norm": 2.0382392406463623, "learning_rate": 3.4310850439882695e-05, "loss": 0.6489, "step": 11387 }, { "epoch": 1.7827175954915466, "grad_norm": 12.943007469177246, "learning_rate": 3.428641251221896e-05, "loss": 0.5647, "step": 11388 }, { "epoch": 1.7828741390106448, "grad_norm": 3.573213815689087, "learning_rate": 3.4261974584555225e-05, "loss": 0.234, "step": 11389 }, { "epoch": 1.7830306825297433, "grad_norm": 0.828106164932251, "learning_rate": 3.423753665689149e-05, "loss": 0.2099, "step": 11390 }, { "epoch": 1.7831872260488417, "grad_norm": 0.6829121112823486, "learning_rate": 3.421309872922776e-05, "loss": 0.2266, "step": 11391 }, { "epoch": 1.78334376956794, "grad_norm": 0.8972326517105103, "learning_rate": 3.4188660801564026e-05, "loss": 0.2118, "step": 11392 }, { "epoch": 1.7835003130870382, "grad_norm": 2.1261706352233887, "learning_rate": 3.416422287390029e-05, "loss": 0.2796, "step": 11393 }, { "epoch": 1.7836568566061364, "grad_norm": 0.843867838382721, "learning_rate": 3.4139784946236556e-05, "loss": 0.257, "step": 11394 }, { "epoch": 1.7838134001252348, "grad_norm": 0.617863655090332, "learning_rate": 3.411534701857282e-05, "loss": 0.3085, "step": 11395 }, { "epoch": 1.7839699436443333, "grad_norm": 1.1008343696594238, "learning_rate": 3.4090909090909085e-05, "loss": 0.3067, "step": 11396 }, { "epoch": 1.7841264871634315, "grad_norm": 2.097799777984619, "learning_rate": 3.406647116324536e-05, "loss": 0.2741, "step": 11397 }, { "epoch": 1.7842830306825297, "grad_norm": 5.063704967498779, "learning_rate": 3.404203323558162e-05, "loss": 0.4028, "step": 11398 }, { "epoch": 1.784439574201628, "grad_norm": 0.8035471439361572, "learning_rate": 3.401759530791789e-05, "loss": 0.2896, "step": 11399 }, { "epoch": 1.7845961177207263, "grad_norm": 2.3350865840911865, "learning_rate": 3.399315738025415e-05, "loss": 0.4598, "step": 11400 }, { "epoch": 1.7847526612398248, "grad_norm": 1.9042445421218872, "learning_rate": 3.3968719452590416e-05, "loss": 0.2244, "step": 11401 }, { "epoch": 1.784909204758923, "grad_norm": 3.0151047706604004, "learning_rate": 3.394428152492668e-05, "loss": 0.4026, "step": 11402 }, { "epoch": 1.7850657482780212, "grad_norm": 1.309158205986023, "learning_rate": 3.391984359726295e-05, "loss": 0.4166, "step": 11403 }, { "epoch": 1.7852222917971194, "grad_norm": 0.8744258880615234, "learning_rate": 3.389540566959922e-05, "loss": 0.4382, "step": 11404 }, { "epoch": 1.7853788353162179, "grad_norm": 1.478337049484253, "learning_rate": 3.387096774193548e-05, "loss": 0.3675, "step": 11405 }, { "epoch": 1.7855353788353163, "grad_norm": 1.9362159967422485, "learning_rate": 3.384652981427175e-05, "loss": 0.663, "step": 11406 }, { "epoch": 1.7856919223544145, "grad_norm": 1.9572466611862183, "learning_rate": 3.382209188660801e-05, "loss": 0.7617, "step": 11407 }, { "epoch": 1.7858484658735128, "grad_norm": 0.7849676012992859, "learning_rate": 3.379765395894428e-05, "loss": 0.3307, "step": 11408 }, { "epoch": 1.7860050093926112, "grad_norm": 2.1287667751312256, "learning_rate": 3.377321603128055e-05, "loss": 0.5584, "step": 11409 }, { "epoch": 1.7861615529117094, "grad_norm": 1.399261713027954, "learning_rate": 3.374877810361681e-05, "loss": 0.3862, "step": 11410 }, { "epoch": 1.7863180964308079, "grad_norm": 2.173130750656128, "learning_rate": 3.372434017595308e-05, "loss": 0.5256, "step": 11411 }, { "epoch": 1.786474639949906, "grad_norm": 1.911555290222168, "learning_rate": 3.369990224828934e-05, "loss": 0.7287, "step": 11412 }, { "epoch": 1.7866311834690043, "grad_norm": 3.9470415115356445, "learning_rate": 3.367546432062561e-05, "loss": 0.5673, "step": 11413 }, { "epoch": 1.7867877269881027, "grad_norm": 2.7635679244995117, "learning_rate": 3.365102639296187e-05, "loss": 0.3453, "step": 11414 }, { "epoch": 1.786944270507201, "grad_norm": 2.662376880645752, "learning_rate": 3.3626588465298144e-05, "loss": 0.6953, "step": 11415 }, { "epoch": 1.7871008140262994, "grad_norm": 4.1886444091796875, "learning_rate": 3.360215053763441e-05, "loss": 0.7028, "step": 11416 }, { "epoch": 1.7872573575453976, "grad_norm": 3.9045183658599854, "learning_rate": 3.357771260997067e-05, "loss": 0.7296, "step": 11417 }, { "epoch": 1.7874139010644958, "grad_norm": 2.272017240524292, "learning_rate": 3.355327468230694e-05, "loss": 0.6113, "step": 11418 }, { "epoch": 1.7875704445835943, "grad_norm": 2.5397136211395264, "learning_rate": 3.35288367546432e-05, "loss": 0.3347, "step": 11419 }, { "epoch": 1.7877269881026927, "grad_norm": 4.905436992645264, "learning_rate": 3.350439882697947e-05, "loss": 0.5801, "step": 11420 }, { "epoch": 1.787883531621791, "grad_norm": 1.6117602586746216, "learning_rate": 3.347996089931573e-05, "loss": 0.8952, "step": 11421 }, { "epoch": 1.7880400751408891, "grad_norm": 2.91456937789917, "learning_rate": 3.3455522971652004e-05, "loss": 0.6686, "step": 11422 }, { "epoch": 1.7881966186599874, "grad_norm": 3.0732598304748535, "learning_rate": 3.343108504398827e-05, "loss": 0.6183, "step": 11423 }, { "epoch": 1.7883531621790858, "grad_norm": 1.9497085809707642, "learning_rate": 3.3406647116324534e-05, "loss": 0.7562, "step": 11424 }, { "epoch": 1.7885097056981842, "grad_norm": 5.999476909637451, "learning_rate": 3.33822091886608e-05, "loss": 0.7507, "step": 11425 }, { "epoch": 1.7886662492172825, "grad_norm": 2.643611431121826, "learning_rate": 3.335777126099706e-05, "loss": 0.8865, "step": 11426 }, { "epoch": 1.7888227927363807, "grad_norm": 6.0978803634643555, "learning_rate": 3.333333333333333e-05, "loss": 1.0517, "step": 11427 }, { "epoch": 1.788979336255479, "grad_norm": 1.9144535064697266, "learning_rate": 3.33088954056696e-05, "loss": 0.9786, "step": 11428 }, { "epoch": 1.7891358797745773, "grad_norm": 4.194984436035156, "learning_rate": 3.3284457478005865e-05, "loss": 1.137, "step": 11429 }, { "epoch": 1.7892924232936758, "grad_norm": 2.2141382694244385, "learning_rate": 3.326001955034213e-05, "loss": 1.0596, "step": 11430 }, { "epoch": 1.789448966812774, "grad_norm": 1.324143886566162, "learning_rate": 3.3235581622678394e-05, "loss": 0.6956, "step": 11431 }, { "epoch": 1.7896055103318722, "grad_norm": 2.4526519775390625, "learning_rate": 3.321114369501466e-05, "loss": 0.6576, "step": 11432 }, { "epoch": 1.7897620538509704, "grad_norm": 1.4210792779922485, "learning_rate": 3.3186705767350924e-05, "loss": 0.6591, "step": 11433 }, { "epoch": 1.7899185973700689, "grad_norm": 1.1268730163574219, "learning_rate": 3.3162267839687195e-05, "loss": 0.3686, "step": 11434 }, { "epoch": 1.7900751408891673, "grad_norm": 2.319267988204956, "learning_rate": 3.313782991202346e-05, "loss": 0.7514, "step": 11435 }, { "epoch": 1.7902316844082655, "grad_norm": 2.7974939346313477, "learning_rate": 3.3113391984359725e-05, "loss": 0.7639, "step": 11436 }, { "epoch": 1.7903882279273637, "grad_norm": 4.421706676483154, "learning_rate": 3.308895405669599e-05, "loss": 1.1673, "step": 11437 }, { "epoch": 1.790544771446462, "grad_norm": 2.369821310043335, "learning_rate": 3.3064516129032255e-05, "loss": 0.8485, "step": 11438 }, { "epoch": 1.7907013149655604, "grad_norm": 2.404873847961426, "learning_rate": 3.304007820136852e-05, "loss": 0.3957, "step": 11439 }, { "epoch": 1.7908578584846588, "grad_norm": 0.9003664255142212, "learning_rate": 3.301564027370479e-05, "loss": 0.2733, "step": 11440 }, { "epoch": 1.791014402003757, "grad_norm": 0.6666517853736877, "learning_rate": 3.2991202346041056e-05, "loss": 0.2546, "step": 11441 }, { "epoch": 1.7911709455228553, "grad_norm": 0.587860107421875, "learning_rate": 3.296676441837732e-05, "loss": 0.2187, "step": 11442 }, { "epoch": 1.7913274890419537, "grad_norm": 1.2249459028244019, "learning_rate": 3.2942326490713585e-05, "loss": 0.2001, "step": 11443 }, { "epoch": 1.791484032561052, "grad_norm": 1.0504732131958008, "learning_rate": 3.291788856304985e-05, "loss": 0.3076, "step": 11444 }, { "epoch": 1.7916405760801504, "grad_norm": 0.6395697593688965, "learning_rate": 3.2893450635386115e-05, "loss": 0.2633, "step": 11445 }, { "epoch": 1.7917971195992486, "grad_norm": 1.8298180103302002, "learning_rate": 3.2869012707722386e-05, "loss": 0.4211, "step": 11446 }, { "epoch": 1.7919536631183468, "grad_norm": 1.2210631370544434, "learning_rate": 3.284457478005865e-05, "loss": 0.3673, "step": 11447 }, { "epoch": 1.7921102066374452, "grad_norm": 0.7577955722808838, "learning_rate": 3.282013685239491e-05, "loss": 0.2212, "step": 11448 }, { "epoch": 1.7922667501565435, "grad_norm": 1.5126709938049316, "learning_rate": 3.279569892473118e-05, "loss": 0.294, "step": 11449 }, { "epoch": 1.792423293675642, "grad_norm": 1.66423499584198, "learning_rate": 3.2771260997067446e-05, "loss": 0.236, "step": 11450 }, { "epoch": 1.7925798371947401, "grad_norm": 1.5655282735824585, "learning_rate": 3.274682306940371e-05, "loss": 0.3446, "step": 11451 }, { "epoch": 1.7927363807138383, "grad_norm": 0.9023974537849426, "learning_rate": 3.272238514173998e-05, "loss": 0.3498, "step": 11452 }, { "epoch": 1.7928929242329368, "grad_norm": 1.6287553310394287, "learning_rate": 3.269794721407625e-05, "loss": 0.3794, "step": 11453 }, { "epoch": 1.7930494677520352, "grad_norm": 2.9960827827453613, "learning_rate": 3.2673509286412505e-05, "loss": 0.6226, "step": 11454 }, { "epoch": 1.7932060112711334, "grad_norm": 8.331686019897461, "learning_rate": 3.2649071358748776e-05, "loss": 0.3432, "step": 11455 }, { "epoch": 1.7933625547902317, "grad_norm": 1.214149832725525, "learning_rate": 3.262463343108504e-05, "loss": 0.3093, "step": 11456 }, { "epoch": 1.7935190983093299, "grad_norm": 1.9629710912704468, "learning_rate": 3.2600195503421306e-05, "loss": 0.424, "step": 11457 }, { "epoch": 1.7936756418284283, "grad_norm": 4.693020820617676, "learning_rate": 3.257575757575758e-05, "loss": 0.8316, "step": 11458 }, { "epoch": 1.7938321853475268, "grad_norm": 3.069445848464966, "learning_rate": 3.2551319648093836e-05, "loss": 0.5424, "step": 11459 }, { "epoch": 1.793988728866625, "grad_norm": 1.917677879333496, "learning_rate": 3.25268817204301e-05, "loss": 0.3951, "step": 11460 }, { "epoch": 1.7941452723857232, "grad_norm": 1.1094872951507568, "learning_rate": 3.250244379276637e-05, "loss": 0.4418, "step": 11461 }, { "epoch": 1.7943018159048214, "grad_norm": 1.7542659044265747, "learning_rate": 3.247800586510264e-05, "loss": 0.6939, "step": 11462 }, { "epoch": 1.7944583594239198, "grad_norm": 4.421488285064697, "learning_rate": 3.24535679374389e-05, "loss": 0.5249, "step": 11463 }, { "epoch": 1.7946149029430183, "grad_norm": 3.417511224746704, "learning_rate": 3.242913000977517e-05, "loss": 0.6136, "step": 11464 }, { "epoch": 1.7947714464621165, "grad_norm": 4.0269622802734375, "learning_rate": 3.240469208211143e-05, "loss": 1.0622, "step": 11465 }, { "epoch": 1.7949279899812147, "grad_norm": 1.2010209560394287, "learning_rate": 3.2380254154447696e-05, "loss": 0.371, "step": 11466 }, { "epoch": 1.795084533500313, "grad_norm": 3.1197423934936523, "learning_rate": 3.235581622678397e-05, "loss": 0.6811, "step": 11467 }, { "epoch": 1.7952410770194114, "grad_norm": 2.041327714920044, "learning_rate": 3.233137829912023e-05, "loss": 0.4798, "step": 11468 }, { "epoch": 1.7953976205385098, "grad_norm": 2.050049066543579, "learning_rate": 3.23069403714565e-05, "loss": 0.4145, "step": 11469 }, { "epoch": 1.795554164057608, "grad_norm": 5.6066575050354, "learning_rate": 3.228250244379277e-05, "loss": 0.7301, "step": 11470 }, { "epoch": 1.7957107075767063, "grad_norm": 2.6833720207214355, "learning_rate": 3.225806451612903e-05, "loss": 0.9764, "step": 11471 }, { "epoch": 1.7958672510958045, "grad_norm": 1.4853190183639526, "learning_rate": 3.223362658846529e-05, "loss": 0.3526, "step": 11472 }, { "epoch": 1.796023794614903, "grad_norm": 1.8210937976837158, "learning_rate": 3.220918866080156e-05, "loss": 0.4752, "step": 11473 }, { "epoch": 1.7961803381340014, "grad_norm": 2.770622968673706, "learning_rate": 3.218475073313783e-05, "loss": 0.9654, "step": 11474 }, { "epoch": 1.7963368816530996, "grad_norm": 2.7523603439331055, "learning_rate": 3.216031280547409e-05, "loss": 0.4661, "step": 11475 }, { "epoch": 1.7964934251721978, "grad_norm": 4.842382907867432, "learning_rate": 3.213587487781036e-05, "loss": 0.7784, "step": 11476 }, { "epoch": 1.7966499686912962, "grad_norm": 2.4741690158843994, "learning_rate": 3.211143695014662e-05, "loss": 1.0925, "step": 11477 }, { "epoch": 1.7968065122103944, "grad_norm": 1.1925292015075684, "learning_rate": 3.208699902248289e-05, "loss": 0.5622, "step": 11478 }, { "epoch": 1.7969630557294929, "grad_norm": 3.3205652236938477, "learning_rate": 3.206256109481916e-05, "loss": 1.3511, "step": 11479 }, { "epoch": 1.797119599248591, "grad_norm": 3.456491231918335, "learning_rate": 3.2038123167155424e-05, "loss": 1.5569, "step": 11480 }, { "epoch": 1.7972761427676893, "grad_norm": 3.728654146194458, "learning_rate": 3.201368523949169e-05, "loss": 0.8711, "step": 11481 }, { "epoch": 1.7974326862867878, "grad_norm": 2.7066574096679688, "learning_rate": 3.198924731182795e-05, "loss": 1.4595, "step": 11482 }, { "epoch": 1.7975892298058862, "grad_norm": 2.8487234115600586, "learning_rate": 3.196480938416422e-05, "loss": 1.2391, "step": 11483 }, { "epoch": 1.7977457733249844, "grad_norm": 1.6365246772766113, "learning_rate": 3.194037145650048e-05, "loss": 0.6185, "step": 11484 }, { "epoch": 1.7979023168440826, "grad_norm": 0.9667677879333496, "learning_rate": 3.191593352883675e-05, "loss": 0.3946, "step": 11485 }, { "epoch": 1.7980588603631809, "grad_norm": 1.4002686738967896, "learning_rate": 3.189149560117302e-05, "loss": 0.4003, "step": 11486 }, { "epoch": 1.7982154038822793, "grad_norm": 1.4334136247634888, "learning_rate": 3.1867057673509284e-05, "loss": 0.3993, "step": 11487 }, { "epoch": 1.7983719474013777, "grad_norm": 3.9255292415618896, "learning_rate": 3.184261974584555e-05, "loss": 0.837, "step": 11488 }, { "epoch": 1.798528490920476, "grad_norm": 2.0887675285339355, "learning_rate": 3.1818181818181814e-05, "loss": 0.2766, "step": 11489 }, { "epoch": 1.7986850344395742, "grad_norm": 1.7812479734420776, "learning_rate": 3.179374389051808e-05, "loss": 0.4492, "step": 11490 }, { "epoch": 1.7988415779586724, "grad_norm": 1.9391423463821411, "learning_rate": 3.176930596285434e-05, "loss": 0.2828, "step": 11491 }, { "epoch": 1.7989981214777708, "grad_norm": 1.2495836019515991, "learning_rate": 3.1744868035190615e-05, "loss": 0.3079, "step": 11492 }, { "epoch": 1.7991546649968693, "grad_norm": 1.1032434701919556, "learning_rate": 3.172043010752688e-05, "loss": 0.2513, "step": 11493 }, { "epoch": 1.7993112085159675, "grad_norm": 1.8104841709136963, "learning_rate": 3.1695992179863144e-05, "loss": 0.2143, "step": 11494 }, { "epoch": 1.7994677520350657, "grad_norm": 1.4198352098464966, "learning_rate": 3.167155425219941e-05, "loss": 0.3067, "step": 11495 }, { "epoch": 1.799624295554164, "grad_norm": 0.8170778155326843, "learning_rate": 3.1647116324535674e-05, "loss": 0.2628, "step": 11496 }, { "epoch": 1.7997808390732624, "grad_norm": 2.481227159500122, "learning_rate": 3.162267839687194e-05, "loss": 0.455, "step": 11497 }, { "epoch": 1.7999373825923608, "grad_norm": 1.1111358404159546, "learning_rate": 3.159824046920821e-05, "loss": 0.3932, "step": 11498 }, { "epoch": 1.800093926111459, "grad_norm": 2.602691888809204, "learning_rate": 3.1573802541544475e-05, "loss": 0.4981, "step": 11499 }, { "epoch": 1.8002504696305572, "grad_norm": 0.9818456172943115, "learning_rate": 3.154936461388074e-05, "loss": 0.5675, "step": 11500 }, { "epoch": 1.8004070131496555, "grad_norm": 1.2159143686294556, "learning_rate": 3.1524926686217005e-05, "loss": 0.3184, "step": 11501 }, { "epoch": 1.800563556668754, "grad_norm": 2.042997360229492, "learning_rate": 3.150048875855327e-05, "loss": 0.2769, "step": 11502 }, { "epoch": 1.8007201001878523, "grad_norm": 1.841683268547058, "learning_rate": 3.1476050830889534e-05, "loss": 0.4187, "step": 11503 }, { "epoch": 1.8008766437069506, "grad_norm": 1.4786103963851929, "learning_rate": 3.1451612903225806e-05, "loss": 0.3185, "step": 11504 }, { "epoch": 1.8010331872260488, "grad_norm": 1.3413180112838745, "learning_rate": 3.142717497556207e-05, "loss": 0.4065, "step": 11505 }, { "epoch": 1.801189730745147, "grad_norm": 1.192056655883789, "learning_rate": 3.1402737047898335e-05, "loss": 0.2588, "step": 11506 }, { "epoch": 1.8013462742642454, "grad_norm": 3.1147072315216064, "learning_rate": 3.13782991202346e-05, "loss": 0.3511, "step": 11507 }, { "epoch": 1.8015028177833439, "grad_norm": 0.9415939450263977, "learning_rate": 3.1353861192570865e-05, "loss": 0.3664, "step": 11508 }, { "epoch": 1.801659361302442, "grad_norm": 2.014275074005127, "learning_rate": 3.132942326490713e-05, "loss": 0.5367, "step": 11509 }, { "epoch": 1.8018159048215403, "grad_norm": 2.412937879562378, "learning_rate": 3.13049853372434e-05, "loss": 0.4566, "step": 11510 }, { "epoch": 1.8019724483406387, "grad_norm": 1.8484764099121094, "learning_rate": 3.1280547409579666e-05, "loss": 0.4879, "step": 11511 }, { "epoch": 1.802128991859737, "grad_norm": 2.9329864978790283, "learning_rate": 3.125610948191593e-05, "loss": 0.4734, "step": 11512 }, { "epoch": 1.8022855353788354, "grad_norm": 3.0174472332000732, "learning_rate": 3.1231671554252196e-05, "loss": 0.6371, "step": 11513 }, { "epoch": 1.8024420788979336, "grad_norm": 2.343379020690918, "learning_rate": 3.120723362658846e-05, "loss": 0.5017, "step": 11514 }, { "epoch": 1.8025986224170318, "grad_norm": 2.4087560176849365, "learning_rate": 3.1182795698924725e-05, "loss": 0.5473, "step": 11515 }, { "epoch": 1.8027551659361303, "grad_norm": 5.394991874694824, "learning_rate": 3.1158357771261e-05, "loss": 0.5428, "step": 11516 }, { "epoch": 1.8029117094552287, "grad_norm": 1.5382750034332275, "learning_rate": 3.113391984359726e-05, "loss": 0.5551, "step": 11517 }, { "epoch": 1.803068252974327, "grad_norm": 1.568321704864502, "learning_rate": 3.110948191593353e-05, "loss": 0.4123, "step": 11518 }, { "epoch": 1.8032247964934252, "grad_norm": 2.0196034908294678, "learning_rate": 3.108504398826979e-05, "loss": 0.6364, "step": 11519 }, { "epoch": 1.8033813400125234, "grad_norm": 2.843515634536743, "learning_rate": 3.1060606060606056e-05, "loss": 0.8347, "step": 11520 }, { "epoch": 1.8035378835316218, "grad_norm": 2.5024216175079346, "learning_rate": 3.103616813294232e-05, "loss": 0.7907, "step": 11521 }, { "epoch": 1.8036944270507203, "grad_norm": NaN, "learning_rate": 3.103616813294232e-05, "loss": 0.0, "step": 11522 }, { "epoch": 1.8038509705698185, "grad_norm": 2.9168970584869385, "learning_rate": 3.101173020527859e-05, "loss": 0.9792, "step": 11523 }, { "epoch": 1.8040075140889167, "grad_norm": 3.7329742908477783, "learning_rate": 3.098729227761486e-05, "loss": 0.7339, "step": 11524 }, { "epoch": 1.804164057608015, "grad_norm": 4.0727972984313965, "learning_rate": 3.096285434995112e-05, "loss": 1.2615, "step": 11525 }, { "epoch": 1.8043206011271133, "grad_norm": 2.7813782691955566, "learning_rate": 3.093841642228739e-05, "loss": 0.8195, "step": 11526 }, { "epoch": 1.8044771446462118, "grad_norm": 3.9311771392822266, "learning_rate": 3.091397849462365e-05, "loss": 1.0503, "step": 11527 }, { "epoch": 1.80463368816531, "grad_norm": 2.577922821044922, "learning_rate": 3.088954056695992e-05, "loss": 0.7532, "step": 11528 }, { "epoch": 1.8047902316844082, "grad_norm": 5.859790325164795, "learning_rate": 3.086510263929619e-05, "loss": 1.3083, "step": 11529 }, { "epoch": 1.8049467752035064, "grad_norm": 2.1421098709106445, "learning_rate": 3.084066471163245e-05, "loss": 0.8507, "step": 11530 }, { "epoch": 1.8051033187226049, "grad_norm": 3.9190313816070557, "learning_rate": 3.081622678396872e-05, "loss": 1.1791, "step": 11531 }, { "epoch": 1.8052598622417033, "grad_norm": 1.5476529598236084, "learning_rate": 3.079178885630498e-05, "loss": 0.9817, "step": 11532 }, { "epoch": 1.8054164057608015, "grad_norm": 1.8120455741882324, "learning_rate": 3.076735092864125e-05, "loss": 0.6504, "step": 11533 }, { "epoch": 1.8055729492798998, "grad_norm": 1.9655290842056274, "learning_rate": 3.074291300097751e-05, "loss": 0.5684, "step": 11534 }, { "epoch": 1.805729492798998, "grad_norm": 2.028470993041992, "learning_rate": 3.0718475073313784e-05, "loss": 0.7158, "step": 11535 }, { "epoch": 1.8058860363180964, "grad_norm": 1.6118348836898804, "learning_rate": 3.069403714565005e-05, "loss": 0.5727, "step": 11536 }, { "epoch": 1.8060425798371949, "grad_norm": 1.6711117029190063, "learning_rate": 3.066959921798631e-05, "loss": 0.5635, "step": 11537 }, { "epoch": 1.806199123356293, "grad_norm": 1.72394859790802, "learning_rate": 3.064516129032258e-05, "loss": 0.7681, "step": 11538 }, { "epoch": 1.8063556668753913, "grad_norm": 1.3141530752182007, "learning_rate": 3.062072336265884e-05, "loss": 0.3143, "step": 11539 }, { "epoch": 1.8065122103944895, "grad_norm": 2.4097068309783936, "learning_rate": 3.059628543499511e-05, "loss": 0.2761, "step": 11540 }, { "epoch": 1.806668753913588, "grad_norm": 1.3687645196914673, "learning_rate": 3.057184750733138e-05, "loss": 0.2951, "step": 11541 }, { "epoch": 1.8068252974326864, "grad_norm": 1.9082344770431519, "learning_rate": 3.0547409579667644e-05, "loss": 0.3424, "step": 11542 }, { "epoch": 1.8069818409517846, "grad_norm": 1.1238840818405151, "learning_rate": 3.052297165200391e-05, "loss": 0.3282, "step": 11543 }, { "epoch": 1.8071383844708828, "grad_norm": 14.494555473327637, "learning_rate": 3.0498533724340174e-05, "loss": 0.3696, "step": 11544 }, { "epoch": 1.8072949279899813, "grad_norm": 0.5469344854354858, "learning_rate": 3.047409579667644e-05, "loss": 0.3021, "step": 11545 }, { "epoch": 1.8074514715090795, "grad_norm": 0.5608265995979309, "learning_rate": 3.0449657869012703e-05, "loss": 0.2567, "step": 11546 }, { "epoch": 1.807608015028178, "grad_norm": 0.9688588380813599, "learning_rate": 3.0425219941348968e-05, "loss": 0.3498, "step": 11547 }, { "epoch": 1.8077645585472761, "grad_norm": 1.1317929029464722, "learning_rate": 3.0400782013685236e-05, "loss": 0.2691, "step": 11548 }, { "epoch": 1.8079211020663744, "grad_norm": 0.9738819003105164, "learning_rate": 3.03763440860215e-05, "loss": 0.2949, "step": 11549 }, { "epoch": 1.8080776455854728, "grad_norm": 1.1929585933685303, "learning_rate": 3.0351906158357766e-05, "loss": 0.4013, "step": 11550 }, { "epoch": 1.8082341891045712, "grad_norm": 1.0364540815353394, "learning_rate": 3.0327468230694034e-05, "loss": 0.4509, "step": 11551 }, { "epoch": 1.8083907326236695, "grad_norm": 2.415992259979248, "learning_rate": 3.03030303030303e-05, "loss": 0.4985, "step": 11552 }, { "epoch": 1.8085472761427677, "grad_norm": 1.438122272491455, "learning_rate": 3.0278592375366564e-05, "loss": 0.443, "step": 11553 }, { "epoch": 1.8087038196618659, "grad_norm": 1.7964818477630615, "learning_rate": 3.0254154447702832e-05, "loss": 0.4412, "step": 11554 }, { "epoch": 1.8088603631809643, "grad_norm": 4.328434467315674, "learning_rate": 3.0229716520039097e-05, "loss": 0.4248, "step": 11555 }, { "epoch": 1.8090169067000628, "grad_norm": 2.232316017150879, "learning_rate": 3.020527859237536e-05, "loss": 0.4388, "step": 11556 }, { "epoch": 1.809173450219161, "grad_norm": 1.9502800703048706, "learning_rate": 3.018084066471163e-05, "loss": 0.3977, "step": 11557 }, { "epoch": 1.8093299937382592, "grad_norm": 5.183807373046875, "learning_rate": 3.0156402737047895e-05, "loss": 0.7417, "step": 11558 }, { "epoch": 1.8094865372573574, "grad_norm": 4.440307140350342, "learning_rate": 3.013196480938416e-05, "loss": 0.6555, "step": 11559 }, { "epoch": 1.8096430807764559, "grad_norm": 1.0355379581451416, "learning_rate": 3.0107526881720428e-05, "loss": 0.4009, "step": 11560 }, { "epoch": 1.8097996242955543, "grad_norm": 2.554629325866699, "learning_rate": 3.0083088954056692e-05, "loss": 0.4825, "step": 11561 }, { "epoch": 1.8099561678146525, "grad_norm": 1.1384446620941162, "learning_rate": 3.0058651026392957e-05, "loss": 0.5349, "step": 11562 }, { "epoch": 1.8101127113337507, "grad_norm": 1.610360026359558, "learning_rate": 3.0034213098729225e-05, "loss": 0.3389, "step": 11563 }, { "epoch": 1.810269254852849, "grad_norm": 1.8687621355056763, "learning_rate": 3.000977517106549e-05, "loss": 0.5675, "step": 11564 }, { "epoch": 1.8104257983719474, "grad_norm": 4.0817108154296875, "learning_rate": 2.9985337243401755e-05, "loss": 0.3691, "step": 11565 }, { "epoch": 1.8105823418910458, "grad_norm": 5.202278137207031, "learning_rate": 2.9960899315738023e-05, "loss": 0.6155, "step": 11566 }, { "epoch": 1.810738885410144, "grad_norm": 2.133866548538208, "learning_rate": 2.9936461388074288e-05, "loss": 0.7993, "step": 11567 }, { "epoch": 1.8108954289292423, "grad_norm": 3.848477363586426, "learning_rate": 2.9912023460410553e-05, "loss": 1.0991, "step": 11568 }, { "epoch": 1.8110519724483405, "grad_norm": 1.6100995540618896, "learning_rate": 2.988758553274682e-05, "loss": 0.4259, "step": 11569 }, { "epoch": 1.811208515967439, "grad_norm": 3.559994697570801, "learning_rate": 2.9863147605083086e-05, "loss": 0.5782, "step": 11570 }, { "epoch": 1.8113650594865374, "grad_norm": 3.698235034942627, "learning_rate": 2.983870967741935e-05, "loss": 0.8529, "step": 11571 }, { "epoch": 1.8115216030056356, "grad_norm": 3.099733591079712, "learning_rate": 2.981427174975562e-05, "loss": 0.5965, "step": 11572 }, { "epoch": 1.8116781465247338, "grad_norm": 1.290650725364685, "learning_rate": 2.9789833822091883e-05, "loss": 0.6342, "step": 11573 }, { "epoch": 1.811834690043832, "grad_norm": 2.6807796955108643, "learning_rate": 2.9765395894428148e-05, "loss": 1.0052, "step": 11574 }, { "epoch": 1.8119912335629305, "grad_norm": 1.7438832521438599, "learning_rate": 2.9740957966764416e-05, "loss": 0.6427, "step": 11575 }, { "epoch": 1.812147777082029, "grad_norm": 2.453213691711426, "learning_rate": 2.971652003910068e-05, "loss": 1.0218, "step": 11576 }, { "epoch": 1.8123043206011271, "grad_norm": 2.5815582275390625, "learning_rate": 2.9692082111436946e-05, "loss": 0.3113, "step": 11577 }, { "epoch": 1.8124608641202253, "grad_norm": 1.8507500886917114, "learning_rate": 2.9667644183773214e-05, "loss": 0.7022, "step": 11578 }, { "epoch": 1.8126174076393238, "grad_norm": 5.00587797164917, "learning_rate": 2.964320625610948e-05, "loss": 1.1861, "step": 11579 }, { "epoch": 1.812773951158422, "grad_norm": 4.860931873321533, "learning_rate": 2.9618768328445744e-05, "loss": 0.8452, "step": 11580 }, { "epoch": 1.8129304946775204, "grad_norm": 1.9036601781845093, "learning_rate": 2.9594330400782012e-05, "loss": 0.8327, "step": 11581 }, { "epoch": 1.8130870381966186, "grad_norm": 2.3255369663238525, "learning_rate": 2.9569892473118277e-05, "loss": 1.3153, "step": 11582 }, { "epoch": 1.8132435817157169, "grad_norm": 3.8739614486694336, "learning_rate": 2.954545454545454e-05, "loss": 0.8975, "step": 11583 }, { "epoch": 1.8134001252348153, "grad_norm": 2.4773049354553223, "learning_rate": 2.952101661779081e-05, "loss": 1.295, "step": 11584 }, { "epoch": 1.8135566687539137, "grad_norm": 1.24504816532135, "learning_rate": 2.9496578690127075e-05, "loss": 0.1969, "step": 11585 }, { "epoch": 1.813713212273012, "grad_norm": 3.3709805011749268, "learning_rate": 2.947214076246334e-05, "loss": 0.9211, "step": 11586 }, { "epoch": 1.8138697557921102, "grad_norm": 4.954073905944824, "learning_rate": 2.9447702834799608e-05, "loss": 0.5325, "step": 11587 }, { "epoch": 1.8140262993112084, "grad_norm": 2.0279388427734375, "learning_rate": 2.9423264907135872e-05, "loss": 0.796, "step": 11588 }, { "epoch": 1.8141828428303068, "grad_norm": 0.8926810622215271, "learning_rate": 2.9398826979472137e-05, "loss": 0.3563, "step": 11589 }, { "epoch": 1.8143393863494053, "grad_norm": 0.5909395217895508, "learning_rate": 2.9374389051808405e-05, "loss": 0.347, "step": 11590 }, { "epoch": 1.8144959298685035, "grad_norm": 1.289638876914978, "learning_rate": 2.934995112414467e-05, "loss": 0.3006, "step": 11591 }, { "epoch": 1.8146524733876017, "grad_norm": 0.5214605927467346, "learning_rate": 2.9325513196480935e-05, "loss": 0.2591, "step": 11592 }, { "epoch": 1.8148090169067, "grad_norm": 3.6117069721221924, "learning_rate": 2.9301075268817203e-05, "loss": 0.2681, "step": 11593 }, { "epoch": 1.8149655604257984, "grad_norm": 1.9626871347427368, "learning_rate": 2.9276637341153468e-05, "loss": 0.371, "step": 11594 }, { "epoch": 1.8151221039448968, "grad_norm": 0.8882358074188232, "learning_rate": 2.9252199413489733e-05, "loss": 0.3304, "step": 11595 }, { "epoch": 1.815278647463995, "grad_norm": 2.33246111869812, "learning_rate": 2.9227761485826e-05, "loss": 0.3598, "step": 11596 }, { "epoch": 1.8154351909830932, "grad_norm": 1.491754174232483, "learning_rate": 2.9203323558162266e-05, "loss": 0.3543, "step": 11597 }, { "epoch": 1.8155917345021915, "grad_norm": 1.187076449394226, "learning_rate": 2.917888563049853e-05, "loss": 0.3254, "step": 11598 }, { "epoch": 1.81574827802129, "grad_norm": 0.9200171232223511, "learning_rate": 2.91544477028348e-05, "loss": 0.3543, "step": 11599 }, { "epoch": 1.8159048215403883, "grad_norm": 1.1525431871414185, "learning_rate": 2.9130009775171064e-05, "loss": 0.3431, "step": 11600 }, { "epoch": 1.8160613650594866, "grad_norm": 0.8428003787994385, "learning_rate": 2.910557184750733e-05, "loss": 0.2869, "step": 11601 }, { "epoch": 1.8162179085785848, "grad_norm": 2.701383590698242, "learning_rate": 2.9081133919843597e-05, "loss": 0.585, "step": 11602 }, { "epoch": 1.816374452097683, "grad_norm": 1.2327849864959717, "learning_rate": 2.905669599217986e-05, "loss": 0.3806, "step": 11603 }, { "epoch": 1.8165309956167814, "grad_norm": 1.1938436031341553, "learning_rate": 2.9032258064516126e-05, "loss": 0.5627, "step": 11604 }, { "epoch": 1.8166875391358799, "grad_norm": 1.1973328590393066, "learning_rate": 2.9007820136852394e-05, "loss": 0.4786, "step": 11605 }, { "epoch": 1.816844082654978, "grad_norm": 7.545814514160156, "learning_rate": 2.898338220918866e-05, "loss": 0.4375, "step": 11606 }, { "epoch": 1.8170006261740763, "grad_norm": 3.719249963760376, "learning_rate": 2.8958944281524924e-05, "loss": 0.607, "step": 11607 }, { "epoch": 1.8171571696931748, "grad_norm": 1.4751845598220825, "learning_rate": 2.8934506353861192e-05, "loss": 0.5607, "step": 11608 }, { "epoch": 1.817313713212273, "grad_norm": 9.847101211547852, "learning_rate": 2.8910068426197457e-05, "loss": 0.5926, "step": 11609 }, { "epoch": 1.8174702567313714, "grad_norm": 1.2304680347442627, "learning_rate": 2.8885630498533722e-05, "loss": 0.4732, "step": 11610 }, { "epoch": 1.8176268002504696, "grad_norm": 4.059071063995361, "learning_rate": 2.8861192570869987e-05, "loss": 0.338, "step": 11611 }, { "epoch": 1.8177833437695678, "grad_norm": 1.5331727266311646, "learning_rate": 2.8836754643206255e-05, "loss": 0.4844, "step": 11612 }, { "epoch": 1.8179398872886663, "grad_norm": 2.8758628368377686, "learning_rate": 2.881231671554252e-05, "loss": 0.5759, "step": 11613 }, { "epoch": 1.8180964308077645, "grad_norm": 4.602398872375488, "learning_rate": 2.8787878787878784e-05, "loss": 0.4847, "step": 11614 }, { "epoch": 1.818252974326863, "grad_norm": 5.723301410675049, "learning_rate": 2.8763440860215053e-05, "loss": 1.2581, "step": 11615 }, { "epoch": 1.8184095178459612, "grad_norm": 2.905620813369751, "learning_rate": 2.8739002932551317e-05, "loss": 0.5796, "step": 11616 }, { "epoch": 1.8185660613650594, "grad_norm": 2.289865732192993, "learning_rate": 2.8714565004887582e-05, "loss": 0.6131, "step": 11617 }, { "epoch": 1.8187226048841578, "grad_norm": 1.782109022140503, "learning_rate": 2.869012707722385e-05, "loss": 0.523, "step": 11618 }, { "epoch": 1.8188791484032563, "grad_norm": 8.425079345703125, "learning_rate": 2.8665689149560115e-05, "loss": 0.9899, "step": 11619 }, { "epoch": 1.8190356919223545, "grad_norm": 2.678179979324341, "learning_rate": 2.864125122189638e-05, "loss": 0.7031, "step": 11620 }, { "epoch": 1.8191922354414527, "grad_norm": 9.00810432434082, "learning_rate": 2.8616813294232648e-05, "loss": 0.5054, "step": 11621 }, { "epoch": 1.819348778960551, "grad_norm": 2.7023744583129883, "learning_rate": 2.8592375366568913e-05, "loss": 0.8168, "step": 11622 }, { "epoch": 1.8195053224796494, "grad_norm": 2.590367078781128, "learning_rate": 2.8567937438905178e-05, "loss": 0.7713, "step": 11623 }, { "epoch": 1.8196618659987478, "grad_norm": 5.677480220794678, "learning_rate": 2.8543499511241446e-05, "loss": 0.5236, "step": 11624 }, { "epoch": 1.819818409517846, "grad_norm": 5.0781426429748535, "learning_rate": 2.851906158357771e-05, "loss": 0.6324, "step": 11625 }, { "epoch": 1.8199749530369442, "grad_norm": 3.239406108856201, "learning_rate": 2.8494623655913975e-05, "loss": 0.4184, "step": 11626 }, { "epoch": 1.8201314965560424, "grad_norm": 2.7294325828552246, "learning_rate": 2.8470185728250244e-05, "loss": 1.0267, "step": 11627 }, { "epoch": 1.820288040075141, "grad_norm": 3.9341413974761963, "learning_rate": 2.844574780058651e-05, "loss": 1.3683, "step": 11628 }, { "epoch": 1.8204445835942393, "grad_norm": 4.064838409423828, "learning_rate": 2.8421309872922773e-05, "loss": 1.1582, "step": 11629 }, { "epoch": 1.8206011271133375, "grad_norm": 2.5269718170166016, "learning_rate": 2.839687194525904e-05, "loss": 0.4504, "step": 11630 }, { "epoch": 1.8207576706324358, "grad_norm": 2.9901621341705322, "learning_rate": 2.8372434017595306e-05, "loss": 1.0721, "step": 11631 }, { "epoch": 1.820914214151534, "grad_norm": 2.5948750972747803, "learning_rate": 2.834799608993157e-05, "loss": 0.8848, "step": 11632 }, { "epoch": 1.8210707576706324, "grad_norm": 7.479366779327393, "learning_rate": 2.832355816226784e-05, "loss": 0.9791, "step": 11633 }, { "epoch": 1.8212273011897309, "grad_norm": 6.228923797607422, "learning_rate": 2.8299120234604104e-05, "loss": 1.3042, "step": 11634 }, { "epoch": 1.821383844708829, "grad_norm": 2.04010272026062, "learning_rate": 2.827468230694037e-05, "loss": 0.7378, "step": 11635 }, { "epoch": 1.8215403882279273, "grad_norm": 3.85182523727417, "learning_rate": 2.8250244379276637e-05, "loss": 0.6142, "step": 11636 }, { "epoch": 1.8216969317470255, "grad_norm": 2.9816079139709473, "learning_rate": 2.8225806451612902e-05, "loss": 0.6967, "step": 11637 }, { "epoch": 1.821853475266124, "grad_norm": 2.144773483276367, "learning_rate": 2.8201368523949167e-05, "loss": 0.9379, "step": 11638 }, { "epoch": 1.8220100187852224, "grad_norm": 4.1807661056518555, "learning_rate": 2.8176930596285435e-05, "loss": 0.3605, "step": 11639 }, { "epoch": 1.8221665623043206, "grad_norm": 1.5326061248779297, "learning_rate": 2.81524926686217e-05, "loss": 0.345, "step": 11640 }, { "epoch": 1.8223231058234188, "grad_norm": 0.7716324925422668, "learning_rate": 2.8128054740957964e-05, "loss": 0.3745, "step": 11641 }, { "epoch": 1.8224796493425173, "grad_norm": 0.9222685098648071, "learning_rate": 2.8103616813294233e-05, "loss": 0.4254, "step": 11642 }, { "epoch": 1.8226361928616155, "grad_norm": 0.9720975160598755, "learning_rate": 2.8079178885630497e-05, "loss": 0.3535, "step": 11643 }, { "epoch": 1.822792736380714, "grad_norm": 1.215984582901001, "learning_rate": 2.8054740957966762e-05, "loss": 0.4628, "step": 11644 }, { "epoch": 1.8229492798998121, "grad_norm": 1.166796326637268, "learning_rate": 2.803030303030303e-05, "loss": 0.4993, "step": 11645 }, { "epoch": 1.8231058234189104, "grad_norm": 1.0708175897598267, "learning_rate": 2.8005865102639295e-05, "loss": 0.4912, "step": 11646 }, { "epoch": 1.8232623669380088, "grad_norm": 1.6838656663894653, "learning_rate": 2.798142717497556e-05, "loss": 0.5915, "step": 11647 }, { "epoch": 1.823418910457107, "grad_norm": 1.128220558166504, "learning_rate": 2.7956989247311828e-05, "loss": 0.4936, "step": 11648 }, { "epoch": 1.8235754539762055, "grad_norm": 2.355700969696045, "learning_rate": 2.7932551319648093e-05, "loss": 0.7338, "step": 11649 }, { "epoch": 1.8237319974953037, "grad_norm": 2.227602481842041, "learning_rate": 2.7908113391984354e-05, "loss": 0.8051, "step": 11650 }, { "epoch": 1.823888541014402, "grad_norm": 1.5010913610458374, "learning_rate": 2.7883675464320626e-05, "loss": 0.9211, "step": 11651 }, { "epoch": 1.8240450845335003, "grad_norm": 2.1629629135131836, "learning_rate": 2.785923753665689e-05, "loss": 0.7823, "step": 11652 }, { "epoch": 1.8242016280525988, "grad_norm": 1.6575279235839844, "learning_rate": 2.7834799608993152e-05, "loss": 0.7279, "step": 11653 }, { "epoch": 1.824358171571697, "grad_norm": 1.6518474817276, "learning_rate": 2.7810361681329424e-05, "loss": 0.768, "step": 11654 }, { "epoch": 1.8245147150907952, "grad_norm": 2.1042938232421875, "learning_rate": 2.778592375366569e-05, "loss": 0.551, "step": 11655 }, { "epoch": 1.8246712586098934, "grad_norm": 5.784224987030029, "learning_rate": 2.776148582600195e-05, "loss": 0.6896, "step": 11656 }, { "epoch": 1.8248278021289919, "grad_norm": 1.9954174757003784, "learning_rate": 2.773704789833822e-05, "loss": 0.5671, "step": 11657 }, { "epoch": 1.8249843456480903, "grad_norm": 1.8126837015151978, "learning_rate": 2.7712609970674486e-05, "loss": 0.5581, "step": 11658 }, { "epoch": 1.8251408891671885, "grad_norm": 7.1128106117248535, "learning_rate": 2.7688172043010748e-05, "loss": 0.4665, "step": 11659 }, { "epoch": 1.8252974326862867, "grad_norm": 3.6487762928009033, "learning_rate": 2.766373411534702e-05, "loss": 0.6816, "step": 11660 }, { "epoch": 1.825453976205385, "grad_norm": 5.3055338859558105, "learning_rate": 2.763929618768328e-05, "loss": 0.7622, "step": 11661 }, { "epoch": 1.8256105197244834, "grad_norm": 2.0376229286193848, "learning_rate": 2.7614858260019546e-05, "loss": 0.4507, "step": 11662 }, { "epoch": 1.8257670632435818, "grad_norm": 2.7106873989105225, "learning_rate": 2.7590420332355817e-05, "loss": 0.4463, "step": 11663 }, { "epoch": 1.82592360676268, "grad_norm": 2.749912977218628, "learning_rate": 2.756598240469208e-05, "loss": 0.3819, "step": 11664 }, { "epoch": 1.8260801502817783, "grad_norm": 4.844040393829346, "learning_rate": 2.7541544477028343e-05, "loss": 0.4986, "step": 11665 }, { "epoch": 1.8262366938008765, "grad_norm": 2.121793746948242, "learning_rate": 2.7517106549364615e-05, "loss": 0.2946, "step": 11666 }, { "epoch": 1.826393237319975, "grad_norm": 2.5618605613708496, "learning_rate": 2.7492668621700876e-05, "loss": 0.6871, "step": 11667 }, { "epoch": 1.8265497808390734, "grad_norm": 6.57215690612793, "learning_rate": 2.746823069403714e-05, "loss": 0.4507, "step": 11668 }, { "epoch": 1.8267063243581716, "grad_norm": 1.615665316581726, "learning_rate": 2.7443792766373413e-05, "loss": 0.4263, "step": 11669 }, { "epoch": 1.8268628678772698, "grad_norm": 2.805304765701294, "learning_rate": 2.7419354838709674e-05, "loss": 0.9011, "step": 11670 }, { "epoch": 1.827019411396368, "grad_norm": 2.946972370147705, "learning_rate": 2.739491691104594e-05, "loss": 0.4498, "step": 11671 }, { "epoch": 1.8271759549154665, "grad_norm": 2.347245693206787, "learning_rate": 2.737047898338221e-05, "loss": 0.9303, "step": 11672 }, { "epoch": 1.827332498434565, "grad_norm": 2.6300110816955566, "learning_rate": 2.7346041055718472e-05, "loss": 1.0643, "step": 11673 }, { "epoch": 1.8274890419536631, "grad_norm": 5.476263046264648, "learning_rate": 2.7321603128054737e-05, "loss": 1.1481, "step": 11674 }, { "epoch": 1.8276455854727613, "grad_norm": 4.003941059112549, "learning_rate": 2.7297165200391e-05, "loss": 0.8726, "step": 11675 }, { "epoch": 1.8278021289918598, "grad_norm": 3.651909351348877, "learning_rate": 2.727272727272727e-05, "loss": 0.7593, "step": 11676 }, { "epoch": 1.827958672510958, "grad_norm": 5.829402923583984, "learning_rate": 2.7248289345063535e-05, "loss": 1.0815, "step": 11677 }, { "epoch": 1.8281152160300564, "grad_norm": 4.480926036834717, "learning_rate": 2.72238514173998e-05, "loss": 0.8428, "step": 11678 }, { "epoch": 1.8282717595491547, "grad_norm": 3.8357913494110107, "learning_rate": 2.7199413489736068e-05, "loss": 1.0317, "step": 11679 }, { "epoch": 1.8284283030682529, "grad_norm": 6.306792259216309, "learning_rate": 2.7174975562072332e-05, "loss": 1.6432, "step": 11680 }, { "epoch": 1.8285848465873513, "grad_norm": 3.932875156402588, "learning_rate": 2.7150537634408597e-05, "loss": 1.1734, "step": 11681 }, { "epoch": 1.8287413901064495, "grad_norm": 3.3402152061462402, "learning_rate": 2.7126099706744865e-05, "loss": 1.1751, "step": 11682 }, { "epoch": 1.828897933625548, "grad_norm": 4.163675785064697, "learning_rate": 2.710166177908113e-05, "loss": 1.3794, "step": 11683 }, { "epoch": 1.8290544771446462, "grad_norm": 2.3751349449157715, "learning_rate": 2.7077223851417395e-05, "loss": 0.7359, "step": 11684 }, { "epoch": 1.8292110206637444, "grad_norm": 1.7543164491653442, "learning_rate": 2.7052785923753663e-05, "loss": 0.4729, "step": 11685 }, { "epoch": 1.8293675641828429, "grad_norm": 3.514760732650757, "learning_rate": 2.7028347996089928e-05, "loss": 0.4018, "step": 11686 }, { "epoch": 1.8295241077019413, "grad_norm": 2.0676984786987305, "learning_rate": 2.7003910068426193e-05, "loss": 0.6138, "step": 11687 }, { "epoch": 1.8296806512210395, "grad_norm": 2.1401822566986084, "learning_rate": 2.697947214076246e-05, "loss": 0.8691, "step": 11688 }, { "epoch": 1.8298371947401377, "grad_norm": 1.3781170845031738, "learning_rate": 2.6955034213098726e-05, "loss": 0.3156, "step": 11689 }, { "epoch": 1.829993738259236, "grad_norm": 3.473524808883667, "learning_rate": 2.693059628543499e-05, "loss": 0.3452, "step": 11690 }, { "epoch": 1.8301502817783344, "grad_norm": 2.099724531173706, "learning_rate": 2.690615835777126e-05, "loss": 0.2908, "step": 11691 }, { "epoch": 1.8303068252974328, "grad_norm": 1.1955111026763916, "learning_rate": 2.6881720430107523e-05, "loss": 0.3801, "step": 11692 }, { "epoch": 1.830463368816531, "grad_norm": 1.2508939504623413, "learning_rate": 2.6857282502443788e-05, "loss": 0.3253, "step": 11693 }, { "epoch": 1.8306199123356293, "grad_norm": 1.153827428817749, "learning_rate": 2.6832844574780056e-05, "loss": 0.3607, "step": 11694 }, { "epoch": 1.8307764558547275, "grad_norm": 0.8758578300476074, "learning_rate": 2.680840664711632e-05, "loss": 0.411, "step": 11695 }, { "epoch": 1.830932999373826, "grad_norm": 1.912964105606079, "learning_rate": 2.6783968719452586e-05, "loss": 0.3108, "step": 11696 }, { "epoch": 1.8310895428929244, "grad_norm": 1.8823193311691284, "learning_rate": 2.6759530791788854e-05, "loss": 0.4525, "step": 11697 }, { "epoch": 1.8312460864120226, "grad_norm": 1.5550634860992432, "learning_rate": 2.673509286412512e-05, "loss": 0.5261, "step": 11698 }, { "epoch": 1.8314026299311208, "grad_norm": 1.8203532695770264, "learning_rate": 2.6710654936461384e-05, "loss": 0.3801, "step": 11699 }, { "epoch": 1.831559173450219, "grad_norm": 3.2406249046325684, "learning_rate": 2.6686217008797652e-05, "loss": 0.4729, "step": 11700 }, { "epoch": 1.8317157169693175, "grad_norm": 0.9475135207176208, "learning_rate": 2.6661779081133917e-05, "loss": 0.4572, "step": 11701 }, { "epoch": 1.831872260488416, "grad_norm": 1.6032726764678955, "learning_rate": 2.663734115347018e-05, "loss": 0.5409, "step": 11702 }, { "epoch": 1.8320288040075141, "grad_norm": 1.069624423980713, "learning_rate": 2.661290322580645e-05, "loss": 0.4271, "step": 11703 }, { "epoch": 1.8321853475266123, "grad_norm": 1.8359605073928833, "learning_rate": 2.6588465298142715e-05, "loss": 0.4135, "step": 11704 }, { "epoch": 1.8323418910457105, "grad_norm": 4.056179046630859, "learning_rate": 2.656402737047898e-05, "loss": 0.6246, "step": 11705 }, { "epoch": 1.832498434564809, "grad_norm": 1.7407777309417725, "learning_rate": 2.6539589442815248e-05, "loss": 0.4708, "step": 11706 }, { "epoch": 1.8326549780839074, "grad_norm": 1.6690481901168823, "learning_rate": 2.6515151515151512e-05, "loss": 0.5088, "step": 11707 }, { "epoch": 1.8328115216030056, "grad_norm": 1.289447546005249, "learning_rate": 2.6490713587487777e-05, "loss": 0.6305, "step": 11708 }, { "epoch": 1.8329680651221039, "grad_norm": 2.4667999744415283, "learning_rate": 2.6466275659824045e-05, "loss": 0.7835, "step": 11709 }, { "epoch": 1.8331246086412023, "grad_norm": 1.8310896158218384, "learning_rate": 2.644183773216031e-05, "loss": 0.6884, "step": 11710 }, { "epoch": 1.8332811521603005, "grad_norm": 1.9276304244995117, "learning_rate": 2.6417399804496575e-05, "loss": 0.7651, "step": 11711 }, { "epoch": 1.833437695679399, "grad_norm": 2.878755807876587, "learning_rate": 2.6392961876832843e-05, "loss": 0.5969, "step": 11712 }, { "epoch": 1.8335942391984972, "grad_norm": 1.5785597562789917, "learning_rate": 2.6368523949169108e-05, "loss": 0.6253, "step": 11713 }, { "epoch": 1.8337507827175954, "grad_norm": 1.4301279783248901, "learning_rate": 2.6344086021505373e-05, "loss": 0.4936, "step": 11714 }, { "epoch": 1.8339073262366938, "grad_norm": 3.3966879844665527, "learning_rate": 2.631964809384164e-05, "loss": 0.7848, "step": 11715 }, { "epoch": 1.8340638697557923, "grad_norm": 5.173041820526123, "learning_rate": 2.6295210166177906e-05, "loss": 0.7803, "step": 11716 }, { "epoch": 1.8342204132748905, "grad_norm": 3.9286694526672363, "learning_rate": 2.627077223851417e-05, "loss": 0.8126, "step": 11717 }, { "epoch": 1.8343769567939887, "grad_norm": 6.795763969421387, "learning_rate": 2.624633431085044e-05, "loss": 0.6296, "step": 11718 }, { "epoch": 1.834533500313087, "grad_norm": 6.505904197692871, "learning_rate": 2.6221896383186704e-05, "loss": 0.9323, "step": 11719 }, { "epoch": 1.8346900438321854, "grad_norm": 2.0406224727630615, "learning_rate": 2.619745845552297e-05, "loss": 0.8992, "step": 11720 }, { "epoch": 1.8348465873512838, "grad_norm": 11.124521255493164, "learning_rate": 2.6173020527859237e-05, "loss": 0.9607, "step": 11721 }, { "epoch": 1.835003130870382, "grad_norm": 10.044174194335938, "learning_rate": 2.61485826001955e-05, "loss": 1.0505, "step": 11722 }, { "epoch": 1.8351596743894802, "grad_norm": 5.03192138671875, "learning_rate": 2.6124144672531766e-05, "loss": 0.7101, "step": 11723 }, { "epoch": 1.8353162179085785, "grad_norm": 3.8350234031677246, "learning_rate": 2.6099706744868034e-05, "loss": 0.6641, "step": 11724 }, { "epoch": 1.835472761427677, "grad_norm": 1.9279121160507202, "learning_rate": 2.60752688172043e-05, "loss": 0.5772, "step": 11725 }, { "epoch": 1.8356293049467753, "grad_norm": 2.837632656097412, "learning_rate": 2.6050830889540564e-05, "loss": 0.5835, "step": 11726 }, { "epoch": 1.8357858484658736, "grad_norm": 4.1351237297058105, "learning_rate": 2.6026392961876832e-05, "loss": 0.9303, "step": 11727 }, { "epoch": 1.8359423919849718, "grad_norm": 2.9830517768859863, "learning_rate": 2.6001955034213097e-05, "loss": 0.7467, "step": 11728 }, { "epoch": 1.83609893550407, "grad_norm": 2.7831618785858154, "learning_rate": 2.5977517106549362e-05, "loss": 0.8099, "step": 11729 }, { "epoch": 1.8362554790231684, "grad_norm": 2.7024545669555664, "learning_rate": 2.595307917888563e-05, "loss": 1.2066, "step": 11730 }, { "epoch": 1.8364120225422669, "grad_norm": 7.1745524406433105, "learning_rate": 2.5928641251221895e-05, "loss": 1.3392, "step": 11731 }, { "epoch": 1.836568566061365, "grad_norm": 3.5159523487091064, "learning_rate": 2.590420332355816e-05, "loss": 1.2869, "step": 11732 }, { "epoch": 1.8367251095804633, "grad_norm": 1.527916669845581, "learning_rate": 2.5879765395894428e-05, "loss": 0.8537, "step": 11733 }, { "epoch": 1.8368816530995615, "grad_norm": 1.3133277893066406, "learning_rate": 2.5855327468230693e-05, "loss": 0.4337, "step": 11734 }, { "epoch": 1.83703819661866, "grad_norm": 1.1044042110443115, "learning_rate": 2.5830889540566957e-05, "loss": 0.5792, "step": 11735 }, { "epoch": 1.8371947401377584, "grad_norm": 1.173004388809204, "learning_rate": 2.5806451612903226e-05, "loss": 0.3835, "step": 11736 }, { "epoch": 1.8373512836568566, "grad_norm": 3.5974175930023193, "learning_rate": 2.578201368523949e-05, "loss": 0.7743, "step": 11737 }, { "epoch": 1.8375078271759548, "grad_norm": 1.2620445489883423, "learning_rate": 2.5757575757575755e-05, "loss": 0.6945, "step": 11738 }, { "epoch": 1.837664370695053, "grad_norm": 0.7731130719184875, "learning_rate": 2.573313782991202e-05, "loss": 0.4413, "step": 11739 }, { "epoch": 1.8378209142141515, "grad_norm": 1.389115810394287, "learning_rate": 2.5708699902248288e-05, "loss": 0.416, "step": 11740 }, { "epoch": 1.83797745773325, "grad_norm": 0.8706973195075989, "learning_rate": 2.5684261974584553e-05, "loss": 0.5073, "step": 11741 }, { "epoch": 1.8381340012523482, "grad_norm": 0.5937680602073669, "learning_rate": 2.5659824046920818e-05, "loss": 0.3761, "step": 11742 }, { "epoch": 1.8382905447714464, "grad_norm": 1.8303773403167725, "learning_rate": 2.5635386119257086e-05, "loss": 0.4664, "step": 11743 }, { "epoch": 1.8384470882905448, "grad_norm": 1.462870717048645, "learning_rate": 2.561094819159335e-05, "loss": 0.5187, "step": 11744 }, { "epoch": 1.838603631809643, "grad_norm": 1.8268141746520996, "learning_rate": 2.5586510263929615e-05, "loss": 0.5145, "step": 11745 }, { "epoch": 1.8387601753287415, "grad_norm": 1.6612815856933594, "learning_rate": 2.5562072336265884e-05, "loss": 0.5543, "step": 11746 }, { "epoch": 1.8389167188478397, "grad_norm": 3.4995779991149902, "learning_rate": 2.553763440860215e-05, "loss": 0.4739, "step": 11747 }, { "epoch": 1.839073262366938, "grad_norm": 1.3062471151351929, "learning_rate": 2.5513196480938413e-05, "loss": 0.4802, "step": 11748 }, { "epoch": 1.8392298058860364, "grad_norm": 6.06448221206665, "learning_rate": 2.548875855327468e-05, "loss": 1.0193, "step": 11749 }, { "epoch": 1.8393863494051348, "grad_norm": 2.5236899852752686, "learning_rate": 2.5464320625610946e-05, "loss": 0.5685, "step": 11750 }, { "epoch": 1.839542892924233, "grad_norm": 1.4289917945861816, "learning_rate": 2.543988269794721e-05, "loss": 0.5858, "step": 11751 }, { "epoch": 1.8396994364433312, "grad_norm": 1.3175865411758423, "learning_rate": 2.541544477028348e-05, "loss": 0.6848, "step": 11752 }, { "epoch": 1.8398559799624294, "grad_norm": 3.198878288269043, "learning_rate": 2.5391006842619744e-05, "loss": 0.62, "step": 11753 }, { "epoch": 1.8400125234815279, "grad_norm": 2.0949974060058594, "learning_rate": 2.536656891495601e-05, "loss": 0.5892, "step": 11754 }, { "epoch": 1.8401690670006263, "grad_norm": 2.16085147857666, "learning_rate": 2.5342130987292277e-05, "loss": 0.8502, "step": 11755 }, { "epoch": 1.8403256105197245, "grad_norm": 3.7577946186065674, "learning_rate": 2.5317693059628542e-05, "loss": 0.7301, "step": 11756 }, { "epoch": 1.8404821540388228, "grad_norm": 1.981907606124878, "learning_rate": 2.5293255131964807e-05, "loss": 0.7686, "step": 11757 }, { "epoch": 1.840638697557921, "grad_norm": 4.2499308586120605, "learning_rate": 2.5268817204301075e-05, "loss": 0.663, "step": 11758 }, { "epoch": 1.8407952410770194, "grad_norm": 5.370046138763428, "learning_rate": 2.524437927663734e-05, "loss": 0.8082, "step": 11759 }, { "epoch": 1.8409517845961179, "grad_norm": 1.3637969493865967, "learning_rate": 2.5219941348973604e-05, "loss": 0.7207, "step": 11760 }, { "epoch": 1.841108328115216, "grad_norm": 1.314660668373108, "learning_rate": 2.5195503421309873e-05, "loss": 0.6059, "step": 11761 }, { "epoch": 1.8412648716343143, "grad_norm": 6.1758809089660645, "learning_rate": 2.5171065493646137e-05, "loss": 0.684, "step": 11762 }, { "epoch": 1.8414214151534125, "grad_norm": 2.818993330001831, "learning_rate": 2.5146627565982402e-05, "loss": 0.7404, "step": 11763 }, { "epoch": 1.841577958672511, "grad_norm": 1.828629732131958, "learning_rate": 2.512218963831867e-05, "loss": 0.8012, "step": 11764 }, { "epoch": 1.8417345021916094, "grad_norm": 2.7739205360412598, "learning_rate": 2.5097751710654935e-05, "loss": 0.7304, "step": 11765 }, { "epoch": 1.8418910457107076, "grad_norm": 8.794857025146484, "learning_rate": 2.50733137829912e-05, "loss": 0.6273, "step": 11766 }, { "epoch": 1.8420475892298058, "grad_norm": 1.3181114196777344, "learning_rate": 2.5048875855327468e-05, "loss": 0.6637, "step": 11767 }, { "epoch": 1.842204132748904, "grad_norm": 1.4442148208618164, "learning_rate": 2.5024437927663733e-05, "loss": 0.5413, "step": 11768 }, { "epoch": 1.8423606762680025, "grad_norm": 4.040245532989502, "learning_rate": 2.4999999999999998e-05, "loss": 0.7349, "step": 11769 }, { "epoch": 1.842517219787101, "grad_norm": 3.4803292751312256, "learning_rate": 2.4975562072336266e-05, "loss": 0.8002, "step": 11770 }, { "epoch": 1.8426737633061991, "grad_norm": 27.806154251098633, "learning_rate": 2.495112414467253e-05, "loss": 0.4308, "step": 11771 }, { "epoch": 1.8428303068252974, "grad_norm": 6.975183963775635, "learning_rate": 2.4926686217008796e-05, "loss": 1.6088, "step": 11772 }, { "epoch": 1.8429868503443956, "grad_norm": 3.424659013748169, "learning_rate": 2.4902248289345064e-05, "loss": 0.9311, "step": 11773 }, { "epoch": 1.843143393863494, "grad_norm": 2.6333138942718506, "learning_rate": 2.487781036168133e-05, "loss": 1.0048, "step": 11774 }, { "epoch": 1.8432999373825925, "grad_norm": 3.147855520248413, "learning_rate": 2.4853372434017593e-05, "loss": 0.8559, "step": 11775 }, { "epoch": 1.8434564809016907, "grad_norm": 2.6393229961395264, "learning_rate": 2.482893450635386e-05, "loss": 1.0206, "step": 11776 }, { "epoch": 1.843613024420789, "grad_norm": 2.7040412425994873, "learning_rate": 2.4804496578690126e-05, "loss": 0.4979, "step": 11777 }, { "epoch": 1.8437695679398873, "grad_norm": 3.9352643489837646, "learning_rate": 2.478005865102639e-05, "loss": 0.7279, "step": 11778 }, { "epoch": 1.8439261114589856, "grad_norm": 2.641021966934204, "learning_rate": 2.475562072336266e-05, "loss": 0.9773, "step": 11779 }, { "epoch": 1.844082654978084, "grad_norm": 2.157909631729126, "learning_rate": 2.4731182795698924e-05, "loss": 0.9536, "step": 11780 }, { "epoch": 1.8442391984971822, "grad_norm": 3.1116890907287598, "learning_rate": 2.470674486803519e-05, "loss": 1.4514, "step": 11781 }, { "epoch": 1.8443957420162804, "grad_norm": 3.8484981060028076, "learning_rate": 2.4682306940371457e-05, "loss": 0.6041, "step": 11782 }, { "epoch": 1.8445522855353789, "grad_norm": 2.849015951156616, "learning_rate": 2.4657869012707722e-05, "loss": 0.9883, "step": 11783 }, { "epoch": 1.8447088290544773, "grad_norm": 1.907538652420044, "learning_rate": 2.4633431085043983e-05, "loss": 0.4752, "step": 11784 }, { "epoch": 1.8448653725735755, "grad_norm": 4.029622554779053, "learning_rate": 2.4608993157380255e-05, "loss": 1.0383, "step": 11785 }, { "epoch": 1.8450219160926737, "grad_norm": 4.055169105529785, "learning_rate": 2.458455522971652e-05, "loss": 0.9394, "step": 11786 }, { "epoch": 1.845178459611772, "grad_norm": 1.425485372543335, "learning_rate": 2.456011730205278e-05, "loss": 0.3284, "step": 11787 }, { "epoch": 1.8453350031308704, "grad_norm": 3.0947279930114746, "learning_rate": 2.4535679374389053e-05, "loss": 0.9571, "step": 11788 }, { "epoch": 1.8454915466499688, "grad_norm": 2.302706718444824, "learning_rate": 2.4511241446725318e-05, "loss": 0.8344, "step": 11789 }, { "epoch": 1.845648090169067, "grad_norm": 5.67050838470459, "learning_rate": 2.448680351906158e-05, "loss": 0.5112, "step": 11790 }, { "epoch": 1.8458046336881653, "grad_norm": 1.0470259189605713, "learning_rate": 2.446236559139785e-05, "loss": 0.4184, "step": 11791 }, { "epoch": 1.8459611772072635, "grad_norm": 1.2459172010421753, "learning_rate": 2.4437927663734115e-05, "loss": 0.5126, "step": 11792 }, { "epoch": 1.846117720726362, "grad_norm": 0.8190130591392517, "learning_rate": 2.4413489736070377e-05, "loss": 0.4512, "step": 11793 }, { "epoch": 1.8462742642454604, "grad_norm": 3.6219711303710938, "learning_rate": 2.438905180840665e-05, "loss": 0.916, "step": 11794 }, { "epoch": 1.8464308077645586, "grad_norm": 2.1488611698150635, "learning_rate": 2.4364613880742913e-05, "loss": 0.6591, "step": 11795 }, { "epoch": 1.8465873512836568, "grad_norm": 0.7660203576087952, "learning_rate": 2.4340175953079175e-05, "loss": 0.3781, "step": 11796 }, { "epoch": 1.846743894802755, "grad_norm": 1.8825334310531616, "learning_rate": 2.4315738025415446e-05, "loss": 0.4816, "step": 11797 }, { "epoch": 1.8469004383218535, "grad_norm": 1.4968349933624268, "learning_rate": 2.4291300097751708e-05, "loss": 0.5589, "step": 11798 }, { "epoch": 1.847056981840952, "grad_norm": 3.8094868659973145, "learning_rate": 2.4266862170087972e-05, "loss": 0.4607, "step": 11799 }, { "epoch": 1.8472135253600501, "grad_norm": 2.436556816101074, "learning_rate": 2.4242424242424244e-05, "loss": 0.5093, "step": 11800 }, { "epoch": 1.8473700688791483, "grad_norm": 3.266652822494507, "learning_rate": 2.4217986314760505e-05, "loss": 0.6517, "step": 11801 }, { "epoch": 1.8475266123982466, "grad_norm": 2.8735289573669434, "learning_rate": 2.419354838709677e-05, "loss": 0.6738, "step": 11802 }, { "epoch": 1.847683155917345, "grad_norm": 2.9087893962860107, "learning_rate": 2.4169110459433035e-05, "loss": 0.9188, "step": 11803 }, { "epoch": 1.8478396994364434, "grad_norm": 1.770983338356018, "learning_rate": 2.4144672531769303e-05, "loss": 0.5283, "step": 11804 }, { "epoch": 1.8479962429555417, "grad_norm": 4.079898357391357, "learning_rate": 2.4120234604105568e-05, "loss": 0.6439, "step": 11805 }, { "epoch": 1.8481527864746399, "grad_norm": 2.5677294731140137, "learning_rate": 2.4095796676441833e-05, "loss": 0.9994, "step": 11806 }, { "epoch": 1.8483093299937383, "grad_norm": 1.662412166595459, "learning_rate": 2.40713587487781e-05, "loss": 0.7825, "step": 11807 }, { "epoch": 1.8484658735128365, "grad_norm": 1.9668818712234497, "learning_rate": 2.4046920821114366e-05, "loss": 0.6303, "step": 11808 }, { "epoch": 1.848622417031935, "grad_norm": 2.1464879512786865, "learning_rate": 2.402248289345063e-05, "loss": 0.709, "step": 11809 }, { "epoch": 1.8487789605510332, "grad_norm": 5.349493980407715, "learning_rate": 2.39980449657869e-05, "loss": 0.6679, "step": 11810 }, { "epoch": 1.8489355040701314, "grad_norm": 1.8723517656326294, "learning_rate": 2.3973607038123163e-05, "loss": 0.8859, "step": 11811 }, { "epoch": 1.8490920475892298, "grad_norm": 2.6535778045654297, "learning_rate": 2.3949169110459428e-05, "loss": 0.5719, "step": 11812 }, { "epoch": 1.849248591108328, "grad_norm": 1.8219172954559326, "learning_rate": 2.3924731182795696e-05, "loss": 0.6213, "step": 11813 }, { "epoch": 1.8494051346274265, "grad_norm": 2.3253509998321533, "learning_rate": 2.390029325513196e-05, "loss": 0.6391, "step": 11814 }, { "epoch": 1.8495616781465247, "grad_norm": 5.5953545570373535, "learning_rate": 2.3875855327468226e-05, "loss": 0.4743, "step": 11815 }, { "epoch": 1.849718221665623, "grad_norm": 4.139413356781006, "learning_rate": 2.3851417399804494e-05, "loss": 0.798, "step": 11816 }, { "epoch": 1.8498747651847214, "grad_norm": 10.96532154083252, "learning_rate": 2.382697947214076e-05, "loss": 0.4565, "step": 11817 }, { "epoch": 1.8500313087038198, "grad_norm": 2.8876092433929443, "learning_rate": 2.3802541544477024e-05, "loss": 0.9685, "step": 11818 }, { "epoch": 1.850187852222918, "grad_norm": 3.1957509517669678, "learning_rate": 2.3778103616813292e-05, "loss": 1.0197, "step": 11819 }, { "epoch": 1.8503443957420163, "grad_norm": 3.1257941722869873, "learning_rate": 2.3753665689149557e-05, "loss": 0.8237, "step": 11820 }, { "epoch": 1.8505009392611145, "grad_norm": 3.2138302326202393, "learning_rate": 2.372922776148582e-05, "loss": 0.8114, "step": 11821 }, { "epoch": 1.850657482780213, "grad_norm": 7.504042625427246, "learning_rate": 2.370478983382209e-05, "loss": 0.7801, "step": 11822 }, { "epoch": 1.8508140262993114, "grad_norm": 5.465351104736328, "learning_rate": 2.3680351906158355e-05, "loss": 1.1196, "step": 11823 }, { "epoch": 1.8509705698184096, "grad_norm": 5.04585599899292, "learning_rate": 2.365591397849462e-05, "loss": 0.9248, "step": 11824 }, { "epoch": 1.8511271133375078, "grad_norm": 1.4739549160003662, "learning_rate": 2.3631476050830888e-05, "loss": 0.6686, "step": 11825 }, { "epoch": 1.851283656856606, "grad_norm": 3.8544936180114746, "learning_rate": 2.3607038123167152e-05, "loss": 0.923, "step": 11826 }, { "epoch": 1.8514402003757044, "grad_norm": 5.232646942138672, "learning_rate": 2.3582600195503417e-05, "loss": 0.8553, "step": 11827 }, { "epoch": 1.8515967438948029, "grad_norm": 3.4390921592712402, "learning_rate": 2.3558162267839685e-05, "loss": 1.4067, "step": 11828 }, { "epoch": 1.851753287413901, "grad_norm": 4.184832572937012, "learning_rate": 2.353372434017595e-05, "loss": 0.9317, "step": 11829 }, { "epoch": 1.8519098309329993, "grad_norm": 3.6399333477020264, "learning_rate": 2.3509286412512215e-05, "loss": 1.2859, "step": 11830 }, { "epoch": 1.8520663744520975, "grad_norm": 2.263460636138916, "learning_rate": 2.3484848484848483e-05, "loss": 0.8343, "step": 11831 }, { "epoch": 1.852222917971196, "grad_norm": 1.4903042316436768, "learning_rate": 2.3460410557184748e-05, "loss": 0.7115, "step": 11832 }, { "epoch": 1.8523794614902944, "grad_norm": 4.350191593170166, "learning_rate": 2.3435972629521013e-05, "loss": 1.8051, "step": 11833 }, { "epoch": 1.8525360050093926, "grad_norm": 3.3762741088867188, "learning_rate": 2.341153470185728e-05, "loss": 0.6415, "step": 11834 }, { "epoch": 1.8526925485284909, "grad_norm": 2.8959567546844482, "learning_rate": 2.3387096774193546e-05, "loss": 0.9455, "step": 11835 }, { "epoch": 1.852849092047589, "grad_norm": 4.2399444580078125, "learning_rate": 2.336265884652981e-05, "loss": 1.3687, "step": 11836 }, { "epoch": 1.8530056355666875, "grad_norm": 3.185884952545166, "learning_rate": 2.333822091886608e-05, "loss": 0.6854, "step": 11837 }, { "epoch": 1.853162179085786, "grad_norm": 1.1546235084533691, "learning_rate": 2.3313782991202344e-05, "loss": 0.4202, "step": 11838 }, { "epoch": 1.8533187226048842, "grad_norm": 0.8933812975883484, "learning_rate": 2.328934506353861e-05, "loss": 0.594, "step": 11839 }, { "epoch": 1.8534752661239824, "grad_norm": 1.431520938873291, "learning_rate": 2.3264907135874877e-05, "loss": 0.5386, "step": 11840 }, { "epoch": 1.8536318096430808, "grad_norm": 1.0850852727890015, "learning_rate": 2.324046920821114e-05, "loss": 0.596, "step": 11841 }, { "epoch": 1.853788353162179, "grad_norm": 1.6052504777908325, "learning_rate": 2.3216031280547406e-05, "loss": 0.6123, "step": 11842 }, { "epoch": 1.8539448966812775, "grad_norm": 1.0671995878219604, "learning_rate": 2.3191593352883674e-05, "loss": 0.5618, "step": 11843 }, { "epoch": 1.8541014402003757, "grad_norm": 1.0191351175308228, "learning_rate": 2.316715542521994e-05, "loss": 0.5923, "step": 11844 }, { "epoch": 1.854257983719474, "grad_norm": 1.2922497987747192, "learning_rate": 2.3142717497556204e-05, "loss": 0.5884, "step": 11845 }, { "epoch": 1.8544145272385724, "grad_norm": 0.6985080242156982, "learning_rate": 2.3118279569892472e-05, "loss": 0.549, "step": 11846 }, { "epoch": 1.8545710707576706, "grad_norm": 1.6202582120895386, "learning_rate": 2.3093841642228737e-05, "loss": 0.7027, "step": 11847 }, { "epoch": 1.854727614276769, "grad_norm": 6.858791828155518, "learning_rate": 2.3069403714565002e-05, "loss": 1.267, "step": 11848 }, { "epoch": 1.8548841577958672, "grad_norm": 2.2614457607269287, "learning_rate": 2.304496578690127e-05, "loss": 0.7434, "step": 11849 }, { "epoch": 1.8550407013149655, "grad_norm": 6.331788539886475, "learning_rate": 2.3020527859237535e-05, "loss": 0.8123, "step": 11850 }, { "epoch": 1.855197244834064, "grad_norm": 1.8438414335250854, "learning_rate": 2.29960899315738e-05, "loss": 0.754, "step": 11851 }, { "epoch": 1.8553537883531623, "grad_norm": 1.1426644325256348, "learning_rate": 2.2971652003910068e-05, "loss": 0.5913, "step": 11852 }, { "epoch": 1.8555103318722606, "grad_norm": 1.538586974143982, "learning_rate": 2.2947214076246333e-05, "loss": 0.5843, "step": 11853 }, { "epoch": 1.8556668753913588, "grad_norm": 2.8902087211608887, "learning_rate": 2.2922776148582597e-05, "loss": 0.8857, "step": 11854 }, { "epoch": 1.855823418910457, "grad_norm": 2.553431749343872, "learning_rate": 2.2898338220918866e-05, "loss": 0.8052, "step": 11855 }, { "epoch": 1.8559799624295554, "grad_norm": 3.586069107055664, "learning_rate": 2.287390029325513e-05, "loss": 0.7178, "step": 11856 }, { "epoch": 1.8561365059486539, "grad_norm": 1.4194121360778809, "learning_rate": 2.2849462365591395e-05, "loss": 0.7, "step": 11857 }, { "epoch": 1.856293049467752, "grad_norm": 1.709073781967163, "learning_rate": 2.2825024437927663e-05, "loss": 0.613, "step": 11858 }, { "epoch": 1.8564495929868503, "grad_norm": 1.8721200227737427, "learning_rate": 2.2800586510263928e-05, "loss": 0.6448, "step": 11859 }, { "epoch": 1.8566061365059485, "grad_norm": 3.26497745513916, "learning_rate": 2.2776148582600193e-05, "loss": 0.7323, "step": 11860 }, { "epoch": 1.856762680025047, "grad_norm": 3.8040733337402344, "learning_rate": 2.275171065493646e-05, "loss": 1.2313, "step": 11861 }, { "epoch": 1.8569192235441454, "grad_norm": 3.2131476402282715, "learning_rate": 2.2727272727272726e-05, "loss": 0.5404, "step": 11862 }, { "epoch": 1.8570757670632436, "grad_norm": 2.0257785320281982, "learning_rate": 2.270283479960899e-05, "loss": 0.5706, "step": 11863 }, { "epoch": 1.8572323105823418, "grad_norm": 1.4355109930038452, "learning_rate": 2.267839687194526e-05, "loss": 0.7594, "step": 11864 }, { "epoch": 1.85738885410144, "grad_norm": 2.189570426940918, "learning_rate": 2.2653958944281524e-05, "loss": 0.892, "step": 11865 }, { "epoch": 1.8575453976205385, "grad_norm": 1.5247050523757935, "learning_rate": 2.262952101661779e-05, "loss": 0.7958, "step": 11866 }, { "epoch": 1.857701941139637, "grad_norm": 2.8987576961517334, "learning_rate": 2.2605083088954053e-05, "loss": 0.8679, "step": 11867 }, { "epoch": 1.8578584846587352, "grad_norm": 2.054922580718994, "learning_rate": 2.258064516129032e-05, "loss": 0.8147, "step": 11868 }, { "epoch": 1.8580150281778334, "grad_norm": 2.206418752670288, "learning_rate": 2.2556207233626586e-05, "loss": 0.4729, "step": 11869 }, { "epoch": 1.8581715716969316, "grad_norm": 3.0962157249450684, "learning_rate": 2.253176930596285e-05, "loss": 1.0916, "step": 11870 }, { "epoch": 1.85832811521603, "grad_norm": 3.1131982803344727, "learning_rate": 2.250733137829912e-05, "loss": 1.1446, "step": 11871 }, { "epoch": 1.8584846587351285, "grad_norm": 3.34653639793396, "learning_rate": 2.2482893450635384e-05, "loss": 1.1253, "step": 11872 }, { "epoch": 1.8586412022542267, "grad_norm": 7.987216472625732, "learning_rate": 2.245845552297165e-05, "loss": 1.4304, "step": 11873 }, { "epoch": 1.858797745773325, "grad_norm": 3.8446297645568848, "learning_rate": 2.2434017595307917e-05, "loss": 0.5307, "step": 11874 }, { "epoch": 1.8589542892924233, "grad_norm": 2.1328821182250977, "learning_rate": 2.2409579667644182e-05, "loss": 0.765, "step": 11875 }, { "epoch": 1.8591108328115216, "grad_norm": 4.147828578948975, "learning_rate": 2.2385141739980447e-05, "loss": 1.1964, "step": 11876 }, { "epoch": 1.85926737633062, "grad_norm": 1.5424137115478516, "learning_rate": 2.2360703812316715e-05, "loss": 0.7498, "step": 11877 }, { "epoch": 1.8594239198497182, "grad_norm": 2.9707698822021484, "learning_rate": 2.233626588465298e-05, "loss": 0.8177, "step": 11878 }, { "epoch": 1.8595804633688164, "grad_norm": 8.598952293395996, "learning_rate": 2.2311827956989244e-05, "loss": 0.8357, "step": 11879 }, { "epoch": 1.8597370068879149, "grad_norm": 1.6900843381881714, "learning_rate": 2.2287390029325513e-05, "loss": 0.6847, "step": 11880 }, { "epoch": 1.859893550407013, "grad_norm": 5.230086803436279, "learning_rate": 2.2262952101661777e-05, "loss": 1.5568, "step": 11881 }, { "epoch": 1.8600500939261115, "grad_norm": 3.5463037490844727, "learning_rate": 2.2238514173998042e-05, "loss": 1.0774, "step": 11882 }, { "epoch": 1.8602066374452098, "grad_norm": 3.144085168838501, "learning_rate": 2.221407624633431e-05, "loss": 0.8156, "step": 11883 }, { "epoch": 1.860363180964308, "grad_norm": 2.6396665573120117, "learning_rate": 2.2189638318670575e-05, "loss": 0.6252, "step": 11884 }, { "epoch": 1.8605197244834064, "grad_norm": 0.6238581538200378, "learning_rate": 2.216520039100684e-05, "loss": 0.1875, "step": 11885 }, { "epoch": 1.8606762680025049, "grad_norm": 2.998420238494873, "learning_rate": 2.2140762463343108e-05, "loss": 0.52, "step": 11886 }, { "epoch": 1.860832811521603, "grad_norm": 3.8253471851348877, "learning_rate": 2.2116324535679373e-05, "loss": 1.4961, "step": 11887 }, { "epoch": 1.8609893550407013, "grad_norm": 4.558570861816406, "learning_rate": 2.2091886608015638e-05, "loss": 0.85, "step": 11888 }, { "epoch": 1.8611458985597995, "grad_norm": 1.2218017578125, "learning_rate": 2.2067448680351906e-05, "loss": 0.634, "step": 11889 }, { "epoch": 1.861302442078898, "grad_norm": 2.944087266921997, "learning_rate": 2.204301075268817e-05, "loss": 0.5668, "step": 11890 }, { "epoch": 1.8614589855979964, "grad_norm": 1.4352192878723145, "learning_rate": 2.2018572825024436e-05, "loss": 0.6317, "step": 11891 }, { "epoch": 1.8616155291170946, "grad_norm": 1.2094552516937256, "learning_rate": 2.1994134897360704e-05, "loss": 0.5786, "step": 11892 }, { "epoch": 1.8617720726361928, "grad_norm": 1.98757803440094, "learning_rate": 2.196969696969697e-05, "loss": 0.6373, "step": 11893 }, { "epoch": 1.861928616155291, "grad_norm": 1.0095068216323853, "learning_rate": 2.1945259042033233e-05, "loss": 0.6269, "step": 11894 }, { "epoch": 1.8620851596743895, "grad_norm": 1.0049716234207153, "learning_rate": 2.19208211143695e-05, "loss": 0.6092, "step": 11895 }, { "epoch": 1.862241703193488, "grad_norm": 1.7188302278518677, "learning_rate": 2.1896383186705766e-05, "loss": 0.5908, "step": 11896 }, { "epoch": 1.8623982467125861, "grad_norm": 2.2089591026306152, "learning_rate": 2.187194525904203e-05, "loss": 0.6305, "step": 11897 }, { "epoch": 1.8625547902316844, "grad_norm": 2.382549285888672, "learning_rate": 2.18475073313783e-05, "loss": 0.7027, "step": 11898 }, { "epoch": 1.8627113337507826, "grad_norm": 1.0006448030471802, "learning_rate": 2.1823069403714564e-05, "loss": 0.6653, "step": 11899 }, { "epoch": 1.862867877269881, "grad_norm": 1.5975639820098877, "learning_rate": 2.179863147605083e-05, "loss": 0.7094, "step": 11900 }, { "epoch": 1.8630244207889795, "grad_norm": 1.290367841720581, "learning_rate": 2.1774193548387097e-05, "loss": 0.7759, "step": 11901 }, { "epoch": 1.8631809643080777, "grad_norm": 2.142758846282959, "learning_rate": 2.1749755620723362e-05, "loss": 0.7417, "step": 11902 }, { "epoch": 1.8633375078271759, "grad_norm": 1.4143774509429932, "learning_rate": 2.1725317693059627e-05, "loss": 0.8493, "step": 11903 }, { "epoch": 1.863494051346274, "grad_norm": 1.213179588317871, "learning_rate": 2.1700879765395895e-05, "loss": 0.7674, "step": 11904 }, { "epoch": 1.8636505948653725, "grad_norm": 1.534143328666687, "learning_rate": 2.167644183773216e-05, "loss": 0.8457, "step": 11905 }, { "epoch": 1.863807138384471, "grad_norm": 14.284627914428711, "learning_rate": 2.1652003910068425e-05, "loss": 0.7507, "step": 11906 }, { "epoch": 1.8639636819035692, "grad_norm": 1.4157757759094238, "learning_rate": 2.1627565982404693e-05, "loss": 0.7684, "step": 11907 }, { "epoch": 1.8641202254226674, "grad_norm": 1.829573392868042, "learning_rate": 2.1603128054740958e-05, "loss": 0.7432, "step": 11908 }, { "epoch": 1.8642767689417659, "grad_norm": 1.5792940855026245, "learning_rate": 2.1578690127077222e-05, "loss": 0.9471, "step": 11909 }, { "epoch": 1.864433312460864, "grad_norm": 3.3256168365478516, "learning_rate": 2.155425219941349e-05, "loss": 1.042, "step": 11910 }, { "epoch": 1.8645898559799625, "grad_norm": 2.1688032150268555, "learning_rate": 2.1529814271749755e-05, "loss": 0.8677, "step": 11911 }, { "epoch": 1.8647463994990607, "grad_norm": 2.214085578918457, "learning_rate": 2.150537634408602e-05, "loss": 1.0286, "step": 11912 }, { "epoch": 1.864902943018159, "grad_norm": 2.812800168991089, "learning_rate": 2.148093841642229e-05, "loss": 0.8419, "step": 11913 }, { "epoch": 1.8650594865372574, "grad_norm": 2.8499462604522705, "learning_rate": 2.1456500488758553e-05, "loss": 0.7129, "step": 11914 }, { "epoch": 1.8652160300563556, "grad_norm": 2.5196754932403564, "learning_rate": 2.1432062561094818e-05, "loss": 0.9942, "step": 11915 }, { "epoch": 1.865372573575454, "grad_norm": 1.3992620706558228, "learning_rate": 2.1407624633431086e-05, "loss": 0.7659, "step": 11916 }, { "epoch": 1.8655291170945523, "grad_norm": 3.1790847778320312, "learning_rate": 2.138318670576735e-05, "loss": 0.7056, "step": 11917 }, { "epoch": 1.8656856606136505, "grad_norm": 3.775634527206421, "learning_rate": 2.1358748778103612e-05, "loss": 1.2447, "step": 11918 }, { "epoch": 1.865842204132749, "grad_norm": 2.107165813446045, "learning_rate": 2.1334310850439884e-05, "loss": 0.5885, "step": 11919 }, { "epoch": 1.8659987476518474, "grad_norm": 2.489856243133545, "learning_rate": 2.130987292277615e-05, "loss": 0.6303, "step": 11920 }, { "epoch": 1.8661552911709456, "grad_norm": 6.781438827514648, "learning_rate": 2.128543499511241e-05, "loss": 0.6447, "step": 11921 }, { "epoch": 1.8663118346900438, "grad_norm": 3.0962233543395996, "learning_rate": 2.126099706744868e-05, "loss": 0.6563, "step": 11922 }, { "epoch": 1.866468378209142, "grad_norm": 3.9209532737731934, "learning_rate": 2.1236559139784946e-05, "loss": 1.2628, "step": 11923 }, { "epoch": 1.8666249217282405, "grad_norm": 2.2564404010772705, "learning_rate": 2.1212121212121208e-05, "loss": 0.7253, "step": 11924 }, { "epoch": 1.866781465247339, "grad_norm": 5.064412593841553, "learning_rate": 2.118768328445748e-05, "loss": 1.0094, "step": 11925 }, { "epoch": 1.8669380087664371, "grad_norm": 4.73468542098999, "learning_rate": 2.1163245356793744e-05, "loss": 1.0255, "step": 11926 }, { "epoch": 1.8670945522855353, "grad_norm": 4.486331462860107, "learning_rate": 2.1138807429130006e-05, "loss": 1.1333, "step": 11927 }, { "epoch": 1.8672510958046336, "grad_norm": 4.3150811195373535, "learning_rate": 2.1114369501466277e-05, "loss": 0.555, "step": 11928 }, { "epoch": 1.867407639323732, "grad_norm": 4.391129970550537, "learning_rate": 2.1089931573802542e-05, "loss": 1.3198, "step": 11929 }, { "epoch": 1.8675641828428304, "grad_norm": 3.979997158050537, "learning_rate": 2.1065493646138803e-05, "loss": 1.0015, "step": 11930 }, { "epoch": 1.8677207263619287, "grad_norm": 2.4785280227661133, "learning_rate": 2.1041055718475068e-05, "loss": 1.9898, "step": 11931 }, { "epoch": 1.8678772698810269, "grad_norm": 2.6585922241210938, "learning_rate": 2.1016617790811336e-05, "loss": 1.0243, "step": 11932 }, { "epoch": 1.868033813400125, "grad_norm": NaN, "learning_rate": 2.1016617790811336e-05, "loss": 0.0, "step": 11933 }, { "epoch": 1.8681903569192235, "grad_norm": 3.6180975437164307, "learning_rate": 2.09921798631476e-05, "loss": 0.9492, "step": 11934 }, { "epoch": 1.868346900438322, "grad_norm": 4.195194721221924, "learning_rate": 2.0967741935483866e-05, "loss": 1.0283, "step": 11935 }, { "epoch": 1.8685034439574202, "grad_norm": 1.3655951023101807, "learning_rate": 2.0943304007820134e-05, "loss": 0.5503, "step": 11936 }, { "epoch": 1.8686599874765184, "grad_norm": 2.214198112487793, "learning_rate": 2.09188660801564e-05, "loss": 0.5951, "step": 11937 }, { "epoch": 1.8688165309956166, "grad_norm": 2.850289821624756, "learning_rate": 2.0894428152492664e-05, "loss": 0.8592, "step": 11938 }, { "epoch": 1.868973074514715, "grad_norm": 0.8146681189537048, "learning_rate": 2.0869990224828932e-05, "loss": 0.8295, "step": 11939 }, { "epoch": 1.8691296180338135, "grad_norm": 1.004290223121643, "learning_rate": 2.0845552297165197e-05, "loss": 0.8397, "step": 11940 }, { "epoch": 1.8692861615529117, "grad_norm": 1.3244619369506836, "learning_rate": 2.082111436950146e-05, "loss": 0.9296, "step": 11941 }, { "epoch": 1.86944270507201, "grad_norm": 1.1102343797683716, "learning_rate": 2.079667644183773e-05, "loss": 0.8361, "step": 11942 }, { "epoch": 1.8695992485911084, "grad_norm": 1.2211482524871826, "learning_rate": 2.0772238514173995e-05, "loss": 1.0738, "step": 11943 }, { "epoch": 1.8697557921102066, "grad_norm": 0.993922233581543, "learning_rate": 2.074780058651026e-05, "loss": 0.876, "step": 11944 }, { "epoch": 1.869912335629305, "grad_norm": 1.7707405090332031, "learning_rate": 2.0723362658846528e-05, "loss": 0.8152, "step": 11945 }, { "epoch": 1.8700688791484033, "grad_norm": 0.997743546962738, "learning_rate": 2.0698924731182792e-05, "loss": 0.8948, "step": 11946 }, { "epoch": 1.8702254226675015, "grad_norm": 0.9178709983825684, "learning_rate": 2.0674486803519057e-05, "loss": 0.8765, "step": 11947 }, { "epoch": 1.8703819661866, "grad_norm": 2.155320167541504, "learning_rate": 2.0650048875855325e-05, "loss": 0.9537, "step": 11948 }, { "epoch": 1.8705385097056983, "grad_norm": 1.6522964239120483, "learning_rate": 2.062561094819159e-05, "loss": 0.9491, "step": 11949 }, { "epoch": 1.8706950532247966, "grad_norm": 1.9385637044906616, "learning_rate": 2.0601173020527855e-05, "loss": 0.9958, "step": 11950 }, { "epoch": 1.8708515967438948, "grad_norm": 2.451505422592163, "learning_rate": 2.0576735092864123e-05, "loss": 0.9823, "step": 11951 }, { "epoch": 1.871008140262993, "grad_norm": 9.86997127532959, "learning_rate": 2.0552297165200388e-05, "loss": 0.9815, "step": 11952 }, { "epoch": 1.8711646837820914, "grad_norm": 1.331578016281128, "learning_rate": 2.0527859237536653e-05, "loss": 0.9844, "step": 11953 }, { "epoch": 1.8713212273011899, "grad_norm": 2.020040273666382, "learning_rate": 2.050342130987292e-05, "loss": 0.8801, "step": 11954 }, { "epoch": 1.871477770820288, "grad_norm": 2.5946261882781982, "learning_rate": 2.0478983382209186e-05, "loss": 1.0083, "step": 11955 }, { "epoch": 1.8716343143393863, "grad_norm": 1.8665767908096313, "learning_rate": 2.045454545454545e-05, "loss": 0.9543, "step": 11956 }, { "epoch": 1.8717908578584845, "grad_norm": 2.101576089859009, "learning_rate": 2.043010752688172e-05, "loss": 0.8922, "step": 11957 }, { "epoch": 1.871947401377583, "grad_norm": 1.9192637205123901, "learning_rate": 2.0405669599217984e-05, "loss": 0.9835, "step": 11958 }, { "epoch": 1.8721039448966814, "grad_norm": 6.741157054901123, "learning_rate": 2.038123167155425e-05, "loss": 0.9142, "step": 11959 }, { "epoch": 1.8722604884157796, "grad_norm": 2.8808178901672363, "learning_rate": 2.0356793743890517e-05, "loss": 0.9528, "step": 11960 }, { "epoch": 1.8724170319348779, "grad_norm": 7.245316505432129, "learning_rate": 2.033235581622678e-05, "loss": 1.0117, "step": 11961 }, { "epoch": 1.872573575453976, "grad_norm": 1.8030654191970825, "learning_rate": 2.0307917888563046e-05, "loss": 0.736, "step": 11962 }, { "epoch": 1.8727301189730745, "grad_norm": 19.674142837524414, "learning_rate": 2.0283479960899314e-05, "loss": 0.9961, "step": 11963 }, { "epoch": 1.872886662492173, "grad_norm": 4.334254741668701, "learning_rate": 2.025904203323558e-05, "loss": 0.7848, "step": 11964 }, { "epoch": 1.8730432060112712, "grad_norm": 1.3288064002990723, "learning_rate": 2.0234604105571844e-05, "loss": 0.7798, "step": 11965 }, { "epoch": 1.8731997495303694, "grad_norm": 3.3582398891448975, "learning_rate": 2.0210166177908112e-05, "loss": 1.0681, "step": 11966 }, { "epoch": 1.8733562930494676, "grad_norm": 4.449483394622803, "learning_rate": 2.0185728250244377e-05, "loss": 0.8853, "step": 11967 }, { "epoch": 1.873512836568566, "grad_norm": 2.989022731781006, "learning_rate": 2.0161290322580642e-05, "loss": 0.6398, "step": 11968 }, { "epoch": 1.8736693800876645, "grad_norm": 4.922720432281494, "learning_rate": 2.013685239491691e-05, "loss": 0.7581, "step": 11969 }, { "epoch": 1.8738259236067627, "grad_norm": 4.551708221435547, "learning_rate": 2.0112414467253175e-05, "loss": 0.9456, "step": 11970 }, { "epoch": 1.873982467125861, "grad_norm": 2.336358070373535, "learning_rate": 2.008797653958944e-05, "loss": 0.8504, "step": 11971 }, { "epoch": 1.8741390106449591, "grad_norm": 2.310737371444702, "learning_rate": 2.0063538611925708e-05, "loss": 0.7855, "step": 11972 }, { "epoch": 1.8742955541640576, "grad_norm": 10.708081245422363, "learning_rate": 2.0039100684261973e-05, "loss": 0.775, "step": 11973 }, { "epoch": 1.874452097683156, "grad_norm": 3.0271284580230713, "learning_rate": 2.0014662756598237e-05, "loss": 0.7938, "step": 11974 }, { "epoch": 1.8746086412022542, "grad_norm": 7.497602462768555, "learning_rate": 1.9990224828934506e-05, "loss": 1.235, "step": 11975 }, { "epoch": 1.8747651847213525, "grad_norm": 7.694507598876953, "learning_rate": 1.996578690127077e-05, "loss": 1.2037, "step": 11976 }, { "epoch": 1.874921728240451, "grad_norm": 5.209584712982178, "learning_rate": 1.9941348973607035e-05, "loss": 0.6108, "step": 11977 }, { "epoch": 1.875078271759549, "grad_norm": 5.772160053253174, "learning_rate": 1.9916911045943303e-05, "loss": 0.9119, "step": 11978 }, { "epoch": 1.8752348152786475, "grad_norm": 2.1883044242858887, "learning_rate": 1.9892473118279568e-05, "loss": 1.2656, "step": 11979 }, { "epoch": 1.8753913587977458, "grad_norm": 3.705591917037964, "learning_rate": 1.9868035190615833e-05, "loss": 0.9636, "step": 11980 }, { "epoch": 1.875547902316844, "grad_norm": 2.439570188522339, "learning_rate": 1.98435972629521e-05, "loss": 1.0948, "step": 11981 }, { "epoch": 1.8757044458359424, "grad_norm": 2.082089424133301, "learning_rate": 1.9819159335288366e-05, "loss": 0.6857, "step": 11982 }, { "epoch": 1.8758609893550409, "grad_norm": 1.2534931898117065, "learning_rate": 1.979472140762463e-05, "loss": 0.3912, "step": 11983 }, { "epoch": 1.876017532874139, "grad_norm": 3.958746910095215, "learning_rate": 1.97702834799609e-05, "loss": 0.6163, "step": 11984 }, { "epoch": 1.8761740763932373, "grad_norm": 1.1258634328842163, "learning_rate": 1.9745845552297164e-05, "loss": 0.4008, "step": 11985 }, { "epoch": 1.8763306199123355, "grad_norm": 3.0614774227142334, "learning_rate": 1.972140762463343e-05, "loss": 0.7214, "step": 11986 }, { "epoch": 1.876487163431434, "grad_norm": 3.998211145401001, "learning_rate": 1.9696969696969697e-05, "loss": 1.0756, "step": 11987 }, { "epoch": 1.8766437069505324, "grad_norm": 2.4418106079101562, "learning_rate": 1.967253176930596e-05, "loss": 1.0684, "step": 11988 }, { "epoch": 1.8768002504696306, "grad_norm": 0.8472214937210083, "learning_rate": 1.9648093841642226e-05, "loss": 1.1258, "step": 11989 }, { "epoch": 1.8769567939887288, "grad_norm": 1.6514756679534912, "learning_rate": 1.9623655913978494e-05, "loss": 1.1072, "step": 11990 }, { "epoch": 1.877113337507827, "grad_norm": 1.2808918952941895, "learning_rate": 1.959921798631476e-05, "loss": 1.2211, "step": 11991 }, { "epoch": 1.8772698810269255, "grad_norm": 0.9649450182914734, "learning_rate": 1.9574780058651024e-05, "loss": 1.1116, "step": 11992 }, { "epoch": 1.877426424546024, "grad_norm": 1.2473208904266357, "learning_rate": 1.9550342130987292e-05, "loss": 1.1229, "step": 11993 }, { "epoch": 1.8775829680651221, "grad_norm": 1.2597932815551758, "learning_rate": 1.9525904203323557e-05, "loss": 1.1491, "step": 11994 }, { "epoch": 1.8777395115842204, "grad_norm": 1.4384278059005737, "learning_rate": 1.9501466275659822e-05, "loss": 1.136, "step": 11995 }, { "epoch": 1.8778960551033186, "grad_norm": 1.308000087738037, "learning_rate": 1.9477028347996087e-05, "loss": 1.0778, "step": 11996 }, { "epoch": 1.878052598622417, "grad_norm": 1.234831690788269, "learning_rate": 1.9452590420332355e-05, "loss": 1.0682, "step": 11997 }, { "epoch": 1.8782091421415155, "grad_norm": 0.9568355083465576, "learning_rate": 1.942815249266862e-05, "loss": 1.0062, "step": 11998 }, { "epoch": 1.8783656856606137, "grad_norm": 3.727036952972412, "learning_rate": 1.9403714565004884e-05, "loss": 1.0344, "step": 11999 }, { "epoch": 1.878522229179712, "grad_norm": 1.185569167137146, "learning_rate": 1.9379276637341153e-05, "loss": 1.0171, "step": 12000 }, { "epoch": 1.878522229179712, "eval_loss": 0.9464879631996155, "eval_runtime": 205.315, "eval_samples_per_second": 60.312, "eval_steps_per_second": 3.77, "eval_wer": 0.8378987185753448, "step": 12000 }, { "epoch": 1.8786787726988101, "grad_norm": 4.092573165893555, "learning_rate": 1.9354838709677417e-05, "loss": 1.4622, "step": 12001 }, { "epoch": 1.8788353162179086, "grad_norm": 2.7687883377075195, "learning_rate": 1.9330400782013682e-05, "loss": 1.3294, "step": 12002 }, { "epoch": 1.878991859737007, "grad_norm": 1.519518494606018, "learning_rate": 1.930596285434995e-05, "loss": 1.0254, "step": 12003 }, { "epoch": 1.8791484032561052, "grad_norm": 1.2389482259750366, "learning_rate": 1.9281524926686215e-05, "loss": 0.9684, "step": 12004 }, { "epoch": 1.8793049467752034, "grad_norm": 11.871675491333008, "learning_rate": 1.925708699902248e-05, "loss": 1.044, "step": 12005 }, { "epoch": 1.8794614902943017, "grad_norm": 2.5590877532958984, "learning_rate": 1.9232649071358748e-05, "loss": 0.875, "step": 12006 }, { "epoch": 1.8796180338134, "grad_norm": 2.1708011627197266, "learning_rate": 1.9208211143695013e-05, "loss": 0.9461, "step": 12007 }, { "epoch": 1.8797745773324985, "grad_norm": 5.7094502449035645, "learning_rate": 1.9183773216031278e-05, "loss": 0.9795, "step": 12008 }, { "epoch": 1.8799311208515967, "grad_norm": 2.582882881164551, "learning_rate": 1.9159335288367546e-05, "loss": 1.1206, "step": 12009 }, { "epoch": 1.880087664370695, "grad_norm": 2.906423330307007, "learning_rate": 1.913489736070381e-05, "loss": 1.0056, "step": 12010 }, { "epoch": 1.8802442078897934, "grad_norm": 4.6472320556640625, "learning_rate": 1.9110459433040076e-05, "loss": 0.9007, "step": 12011 }, { "epoch": 1.8804007514088916, "grad_norm": 3.4553442001342773, "learning_rate": 1.9086021505376344e-05, "loss": 0.851, "step": 12012 }, { "epoch": 1.88055729492799, "grad_norm": 3.4687905311584473, "learning_rate": 1.906158357771261e-05, "loss": 1.0446, "step": 12013 }, { "epoch": 1.8807138384470883, "grad_norm": 1.8787938356399536, "learning_rate": 1.9037145650048873e-05, "loss": 0.9464, "step": 12014 }, { "epoch": 1.8808703819661865, "grad_norm": 2.372514009475708, "learning_rate": 1.901270772238514e-05, "loss": 0.8582, "step": 12015 }, { "epoch": 1.881026925485285, "grad_norm": 3.6794726848602295, "learning_rate": 1.8988269794721406e-05, "loss": 0.7178, "step": 12016 }, { "epoch": 1.8811834690043834, "grad_norm": 1.5233488082885742, "learning_rate": 1.896383186705767e-05, "loss": 0.8946, "step": 12017 }, { "epoch": 1.8813400125234816, "grad_norm": 2.525583505630493, "learning_rate": 1.893939393939394e-05, "loss": 0.7768, "step": 12018 }, { "epoch": 1.8814965560425798, "grad_norm": 6.541940689086914, "learning_rate": 1.8914956011730204e-05, "loss": 1.2274, "step": 12019 }, { "epoch": 1.881653099561678, "grad_norm": 8.669783592224121, "learning_rate": 1.889051808406647e-05, "loss": 0.8303, "step": 12020 }, { "epoch": 1.8818096430807765, "grad_norm": 5.07046365737915, "learning_rate": 1.8866080156402737e-05, "loss": 1.1194, "step": 12021 }, { "epoch": 1.881966186599875, "grad_norm": 2.6451454162597656, "learning_rate": 1.8841642228739002e-05, "loss": 1.1634, "step": 12022 }, { "epoch": 1.8821227301189731, "grad_norm": 11.504127502441406, "learning_rate": 1.8817204301075267e-05, "loss": 1.2416, "step": 12023 }, { "epoch": 1.8822792736380713, "grad_norm": 4.630967617034912, "learning_rate": 1.8792766373411535e-05, "loss": 0.732, "step": 12024 }, { "epoch": 1.8824358171571696, "grad_norm": 5.2356648445129395, "learning_rate": 1.87683284457478e-05, "loss": 1.012, "step": 12025 }, { "epoch": 1.882592360676268, "grad_norm": 6.520271301269531, "learning_rate": 1.8743890518084065e-05, "loss": 0.7886, "step": 12026 }, { "epoch": 1.8827489041953664, "grad_norm": 8.99795913696289, "learning_rate": 1.871945259042033e-05, "loss": 1.1509, "step": 12027 }, { "epoch": 1.8829054477144647, "grad_norm": 2.0889909267425537, "learning_rate": 1.8695014662756598e-05, "loss": 0.8647, "step": 12028 }, { "epoch": 1.8830619912335629, "grad_norm": 9.168989181518555, "learning_rate": 1.8670576735092862e-05, "loss": 1.0876, "step": 12029 }, { "epoch": 1.883218534752661, "grad_norm": 3.345892906188965, "learning_rate": 1.8646138807429127e-05, "loss": 1.0499, "step": 12030 }, { "epoch": 1.8833750782717595, "grad_norm": 3.142423391342163, "learning_rate": 1.8621700879765395e-05, "loss": 1.5739, "step": 12031 }, { "epoch": 1.883531621790858, "grad_norm": 6.219061851501465, "learning_rate": 1.859726295210166e-05, "loss": 1.2463, "step": 12032 }, { "epoch": 1.8836881653099562, "grad_norm": 3.891333818435669, "learning_rate": 1.8572825024437925e-05, "loss": 0.8387, "step": 12033 }, { "epoch": 1.8838447088290544, "grad_norm": 2.0812160968780518, "learning_rate": 1.8548387096774193e-05, "loss": 0.5832, "step": 12034 }, { "epoch": 1.8840012523481526, "grad_norm": 2.1762547492980957, "learning_rate": 1.8523949169110458e-05, "loss": 0.3952, "step": 12035 }, { "epoch": 1.884157795867251, "grad_norm": 1.8617538213729858, "learning_rate": 1.8499511241446723e-05, "loss": 0.6069, "step": 12036 }, { "epoch": 1.8843143393863495, "grad_norm": 2.6567537784576416, "learning_rate": 1.847507331378299e-05, "loss": 0.8268, "step": 12037 }, { "epoch": 1.8844708829054477, "grad_norm": 3.485652208328247, "learning_rate": 1.8450635386119256e-05, "loss": 0.7337, "step": 12038 }, { "epoch": 1.884627426424546, "grad_norm": 1.9477510452270508, "learning_rate": 1.842619745845552e-05, "loss": 1.2784, "step": 12039 }, { "epoch": 1.8847839699436444, "grad_norm": 0.9730574488639832, "learning_rate": 1.840175953079179e-05, "loss": 1.221, "step": 12040 }, { "epoch": 1.8849405134627426, "grad_norm": 1.2898110151290894, "learning_rate": 1.8377321603128054e-05, "loss": 1.1966, "step": 12041 }, { "epoch": 1.885097056981841, "grad_norm": 0.9540336728096008, "learning_rate": 1.8352883675464318e-05, "loss": 1.1641, "step": 12042 }, { "epoch": 1.8852536005009393, "grad_norm": 4.748780727386475, "learning_rate": 1.8328445747800586e-05, "loss": 1.1545, "step": 12043 }, { "epoch": 1.8854101440200375, "grad_norm": 1.0382719039916992, "learning_rate": 1.830400782013685e-05, "loss": 1.2024, "step": 12044 }, { "epoch": 1.885566687539136, "grad_norm": 3.70912766456604, "learning_rate": 1.8279569892473116e-05, "loss": 1.3272, "step": 12045 }, { "epoch": 1.8857232310582341, "grad_norm": 2.094374656677246, "learning_rate": 1.8255131964809384e-05, "loss": 1.1662, "step": 12046 }, { "epoch": 1.8858797745773326, "grad_norm": 1.1259913444519043, "learning_rate": 1.823069403714565e-05, "loss": 1.0841, "step": 12047 }, { "epoch": 1.8860363180964308, "grad_norm": 1.507389783859253, "learning_rate": 1.8206256109481914e-05, "loss": 1.1885, "step": 12048 }, { "epoch": 1.886192861615529, "grad_norm": 1.2091017961502075, "learning_rate": 1.8181818181818182e-05, "loss": 1.1419, "step": 12049 }, { "epoch": 1.8863494051346275, "grad_norm": 2.392782688140869, "learning_rate": 1.8157380254154447e-05, "loss": 1.1869, "step": 12050 }, { "epoch": 1.886505948653726, "grad_norm": 3.752117156982422, "learning_rate": 1.813294232649071e-05, "loss": 1.1169, "step": 12051 }, { "epoch": 1.8866624921728241, "grad_norm": 3.1273210048675537, "learning_rate": 1.810850439882698e-05, "loss": 1.2533, "step": 12052 }, { "epoch": 1.8868190356919223, "grad_norm": 1.2055354118347168, "learning_rate": 1.8084066471163245e-05, "loss": 1.0157, "step": 12053 }, { "epoch": 1.8869755792110205, "grad_norm": 4.132384300231934, "learning_rate": 1.805962854349951e-05, "loss": 1.0156, "step": 12054 }, { "epoch": 1.887132122730119, "grad_norm": 1.6139036417007446, "learning_rate": 1.8035190615835778e-05, "loss": 1.3168, "step": 12055 }, { "epoch": 1.8872886662492174, "grad_norm": 1.806853175163269, "learning_rate": 1.801075268817204e-05, "loss": 1.38, "step": 12056 }, { "epoch": 1.8874452097683156, "grad_norm": 1.3997470140457153, "learning_rate": 1.7986314760508307e-05, "loss": 1.1021, "step": 12057 }, { "epoch": 1.8876017532874139, "grad_norm": 2.818284511566162, "learning_rate": 1.7961876832844575e-05, "loss": 1.0186, "step": 12058 }, { "epoch": 1.887758296806512, "grad_norm": 10.638510704040527, "learning_rate": 1.7937438905180837e-05, "loss": 1.4395, "step": 12059 }, { "epoch": 1.8879148403256105, "grad_norm": 4.576744079589844, "learning_rate": 1.7913000977517105e-05, "loss": 1.2109, "step": 12060 }, { "epoch": 1.888071383844709, "grad_norm": 9.343724250793457, "learning_rate": 1.7888563049853373e-05, "loss": 1.1902, "step": 12061 }, { "epoch": 1.8882279273638072, "grad_norm": 1.4243091344833374, "learning_rate": 1.7864125122189635e-05, "loss": 1.0696, "step": 12062 }, { "epoch": 1.8883844708829054, "grad_norm": 4.524175643920898, "learning_rate": 1.7839687194525903e-05, "loss": 1.3022, "step": 12063 }, { "epoch": 1.8885410144020036, "grad_norm": 2.529660224914551, "learning_rate": 1.781524926686217e-05, "loss": 1.1638, "step": 12064 }, { "epoch": 1.888697557921102, "grad_norm": 2.6119816303253174, "learning_rate": 1.7790811339198432e-05, "loss": 1.2616, "step": 12065 }, { "epoch": 1.8888541014402005, "grad_norm": 4.102001667022705, "learning_rate": 1.77663734115347e-05, "loss": 0.9599, "step": 12066 }, { "epoch": 1.8890106449592987, "grad_norm": 6.444525241851807, "learning_rate": 1.7741935483870965e-05, "loss": 1.0821, "step": 12067 }, { "epoch": 1.889167188478397, "grad_norm": 3.062340021133423, "learning_rate": 1.771749755620723e-05, "loss": 1.2128, "step": 12068 }, { "epoch": 1.8893237319974951, "grad_norm": 2.605454444885254, "learning_rate": 1.76930596285435e-05, "loss": 1.2174, "step": 12069 }, { "epoch": 1.8894802755165936, "grad_norm": 4.152836799621582, "learning_rate": 1.7668621700879763e-05, "loss": 1.1536, "step": 12070 }, { "epoch": 1.889636819035692, "grad_norm": 5.219440460205078, "learning_rate": 1.7644183773216028e-05, "loss": 1.4407, "step": 12071 }, { "epoch": 1.8897933625547902, "grad_norm": 3.174717426300049, "learning_rate": 1.7619745845552296e-05, "loss": 1.3388, "step": 12072 }, { "epoch": 1.8899499060738885, "grad_norm": 3.0966529846191406, "learning_rate": 1.759530791788856e-05, "loss": 1.0997, "step": 12073 }, { "epoch": 1.890106449592987, "grad_norm": 7.947587013244629, "learning_rate": 1.7570869990224826e-05, "loss": 1.1042, "step": 12074 }, { "epoch": 1.8902629931120851, "grad_norm": 5.379951477050781, "learning_rate": 1.7546432062561094e-05, "loss": 1.5781, "step": 12075 }, { "epoch": 1.8904195366311836, "grad_norm": 3.88952898979187, "learning_rate": 1.752199413489736e-05, "loss": 1.295, "step": 12076 }, { "epoch": 1.8905760801502818, "grad_norm": 3.303056001663208, "learning_rate": 1.7497556207233624e-05, "loss": 1.4006, "step": 12077 }, { "epoch": 1.89073262366938, "grad_norm": 3.121760845184326, "learning_rate": 1.7473118279569892e-05, "loss": 1.1342, "step": 12078 }, { "epoch": 1.8908891671884784, "grad_norm": 6.825216293334961, "learning_rate": 1.7448680351906157e-05, "loss": 1.2744, "step": 12079 }, { "epoch": 1.8910457107075767, "grad_norm": 3.57566237449646, "learning_rate": 1.742424242424242e-05, "loss": 1.141, "step": 12080 }, { "epoch": 1.891202254226675, "grad_norm": 6.941944122314453, "learning_rate": 1.739980449657869e-05, "loss": 1.4476, "step": 12081 }, { "epoch": 1.8913587977457733, "grad_norm": 3.8195106983184814, "learning_rate": 1.7375366568914954e-05, "loss": 1.4218, "step": 12082 }, { "epoch": 1.8915153412648715, "grad_norm": 2.770172119140625, "learning_rate": 1.735092864125122e-05, "loss": 0.7745, "step": 12083 }, { "epoch": 1.89167188478397, "grad_norm": 2.7398681640625, "learning_rate": 1.7326490713587487e-05, "loss": 0.8573, "step": 12084 }, { "epoch": 1.8918284283030684, "grad_norm": 5.767883777618408, "learning_rate": 1.7302052785923752e-05, "loss": 0.7147, "step": 12085 }, { "epoch": 1.8919849718221666, "grad_norm": 2.8740317821502686, "learning_rate": 1.7277614858260017e-05, "loss": 0.7094, "step": 12086 }, { "epoch": 1.8921415153412648, "grad_norm": 3.8529751300811768, "learning_rate": 1.7253176930596285e-05, "loss": 1.3801, "step": 12087 }, { "epoch": 1.892298058860363, "grad_norm": 1.6639562845230103, "learning_rate": 1.722873900293255e-05, "loss": 0.7619, "step": 12088 }, { "epoch": 1.8924546023794615, "grad_norm": 1.0102930068969727, "learning_rate": 1.7204301075268815e-05, "loss": 1.4944, "step": 12089 }, { "epoch": 1.89261114589856, "grad_norm": 1.1898506879806519, "learning_rate": 1.7179863147605083e-05, "loss": 1.4635, "step": 12090 }, { "epoch": 1.8927676894176582, "grad_norm": 1.3134690523147583, "learning_rate": 1.7155425219941348e-05, "loss": 1.491, "step": 12091 }, { "epoch": 1.8929242329367564, "grad_norm": 1.2267783880233765, "learning_rate": 1.7130987292277613e-05, "loss": 1.4765, "step": 12092 }, { "epoch": 1.8930807764558546, "grad_norm": 1.480941653251648, "learning_rate": 1.710654936461388e-05, "loss": 1.3881, "step": 12093 }, { "epoch": 1.893237319974953, "grad_norm": 1.0729501247406006, "learning_rate": 1.7082111436950146e-05, "loss": 1.3982, "step": 12094 }, { "epoch": 1.8933938634940515, "grad_norm": 1.248131275177002, "learning_rate": 1.705767350928641e-05, "loss": 1.5171, "step": 12095 }, { "epoch": 1.8935504070131497, "grad_norm": 2.5417959690093994, "learning_rate": 1.703323558162268e-05, "loss": 1.4509, "step": 12096 }, { "epoch": 1.893706950532248, "grad_norm": 1.5075997114181519, "learning_rate": 1.7008797653958943e-05, "loss": 1.3934, "step": 12097 }, { "epoch": 1.8938634940513461, "grad_norm": 5.500137805938721, "learning_rate": 1.6984359726295208e-05, "loss": 1.4268, "step": 12098 }, { "epoch": 1.8940200375704446, "grad_norm": 1.5557372570037842, "learning_rate": 1.6959921798631476e-05, "loss": 1.368, "step": 12099 }, { "epoch": 1.894176581089543, "grad_norm": 2.184826374053955, "learning_rate": 1.693548387096774e-05, "loss": 1.3948, "step": 12100 }, { "epoch": 1.8943331246086412, "grad_norm": 1.7609930038452148, "learning_rate": 1.6911045943304006e-05, "loss": 1.3585, "step": 12101 }, { "epoch": 1.8944896681277394, "grad_norm": 2.9163129329681396, "learning_rate": 1.6886608015640274e-05, "loss": 1.3926, "step": 12102 }, { "epoch": 1.8946462116468377, "grad_norm": 1.8992156982421875, "learning_rate": 1.686217008797654e-05, "loss": 1.2834, "step": 12103 }, { "epoch": 1.894802755165936, "grad_norm": 2.0363168716430664, "learning_rate": 1.6837732160312804e-05, "loss": 1.4879, "step": 12104 }, { "epoch": 1.8949592986850345, "grad_norm": 1.4370715618133545, "learning_rate": 1.6813294232649072e-05, "loss": 1.119, "step": 12105 }, { "epoch": 1.8951158422041328, "grad_norm": 1.8881416320800781, "learning_rate": 1.6788856304985337e-05, "loss": 1.2856, "step": 12106 }, { "epoch": 1.895272385723231, "grad_norm": 1.4255024194717407, "learning_rate": 1.67644183773216e-05, "loss": 1.3787, "step": 12107 }, { "epoch": 1.8954289292423294, "grad_norm": 2.249227285385132, "learning_rate": 1.6739980449657866e-05, "loss": 1.2557, "step": 12108 }, { "epoch": 1.8955854727614276, "grad_norm": 4.423599720001221, "learning_rate": 1.6715542521994134e-05, "loss": 1.3487, "step": 12109 }, { "epoch": 1.895742016280526, "grad_norm": 4.681179523468018, "learning_rate": 1.66911045943304e-05, "loss": 1.192, "step": 12110 }, { "epoch": 1.8958985597996243, "grad_norm": 1.816692590713501, "learning_rate": 1.6666666666666664e-05, "loss": 1.1183, "step": 12111 }, { "epoch": 1.8960551033187225, "grad_norm": 4.783669948577881, "learning_rate": 1.6642228739002932e-05, "loss": 1.3788, "step": 12112 }, { "epoch": 1.896211646837821, "grad_norm": 3.048429250717163, "learning_rate": 1.6617790811339197e-05, "loss": 1.1074, "step": 12113 }, { "epoch": 1.8963681903569192, "grad_norm": 3.0569400787353516, "learning_rate": 1.6593352883675462e-05, "loss": 1.274, "step": 12114 }, { "epoch": 1.8965247338760176, "grad_norm": 2.585090398788452, "learning_rate": 1.656891495601173e-05, "loss": 1.0185, "step": 12115 }, { "epoch": 1.8966812773951158, "grad_norm": 2.0414528846740723, "learning_rate": 1.6544477028347995e-05, "loss": 1.3508, "step": 12116 }, { "epoch": 1.896837820914214, "grad_norm": 2.784245729446411, "learning_rate": 1.652003910068426e-05, "loss": 1.2198, "step": 12117 }, { "epoch": 1.8969943644333125, "grad_norm": 2.629258394241333, "learning_rate": 1.6495601173020528e-05, "loss": 1.103, "step": 12118 }, { "epoch": 1.897150907952411, "grad_norm": 6.373242378234863, "learning_rate": 1.6471163245356793e-05, "loss": 1.291, "step": 12119 }, { "epoch": 1.8973074514715091, "grad_norm": 3.1650185585021973, "learning_rate": 1.6446725317693057e-05, "loss": 1.0345, "step": 12120 }, { "epoch": 1.8974639949906074, "grad_norm": 2.357555389404297, "learning_rate": 1.6422287390029326e-05, "loss": 0.8684, "step": 12121 }, { "epoch": 1.8976205385097056, "grad_norm": 3.3148272037506104, "learning_rate": 1.639784946236559e-05, "loss": 1.0331, "step": 12122 }, { "epoch": 1.897777082028804, "grad_norm": 3.1584115028381348, "learning_rate": 1.6373411534701855e-05, "loss": 0.9614, "step": 12123 }, { "epoch": 1.8979336255479025, "grad_norm": 5.584422588348389, "learning_rate": 1.6348973607038123e-05, "loss": 1.0514, "step": 12124 }, { "epoch": 1.8980901690670007, "grad_norm": 3.8823444843292236, "learning_rate": 1.6324535679374388e-05, "loss": 1.5685, "step": 12125 }, { "epoch": 1.898246712586099, "grad_norm": 3.763765811920166, "learning_rate": 1.6300097751710653e-05, "loss": 1.2219, "step": 12126 }, { "epoch": 1.8984032561051971, "grad_norm": 2.749150037765503, "learning_rate": 1.6275659824046918e-05, "loss": 1.0447, "step": 12127 }, { "epoch": 1.8985597996242956, "grad_norm": 5.038095951080322, "learning_rate": 1.6251221896383186e-05, "loss": 1.004, "step": 12128 }, { "epoch": 1.898716343143394, "grad_norm": 8.337298393249512, "learning_rate": 1.622678396871945e-05, "loss": 1.027, "step": 12129 }, { "epoch": 1.8988728866624922, "grad_norm": 3.5762979984283447, "learning_rate": 1.6202346041055716e-05, "loss": 1.4483, "step": 12130 }, { "epoch": 1.8990294301815904, "grad_norm": 2.6334099769592285, "learning_rate": 1.6177908113391984e-05, "loss": 1.2446, "step": 12131 }, { "epoch": 1.8991859737006886, "grad_norm": 2.6013121604919434, "learning_rate": 1.615347018572825e-05, "loss": 1.2401, "step": 12132 }, { "epoch": 1.899342517219787, "grad_norm": 3.092857599258423, "learning_rate": 1.6129032258064513e-05, "loss": 1.249, "step": 12133 }, { "epoch": 1.8994990607388855, "grad_norm": 4.714329719543457, "learning_rate": 1.610459433040078e-05, "loss": 0.4669, "step": 12134 }, { "epoch": 1.8996556042579837, "grad_norm": 1.6379783153533936, "learning_rate": 1.6080156402737046e-05, "loss": 0.4258, "step": 12135 }, { "epoch": 1.899812147777082, "grad_norm": 3.6598150730133057, "learning_rate": 1.605571847507331e-05, "loss": 0.6978, "step": 12136 }, { "epoch": 1.8999686912961802, "grad_norm": 4.708147048950195, "learning_rate": 1.603128054740958e-05, "loss": 1.3925, "step": 12137 }, { "epoch": 1.9001252348152786, "grad_norm": 1.7821239233016968, "learning_rate": 1.6006842619745844e-05, "loss": 0.5404, "step": 12138 }, { "epoch": 1.900281778334377, "grad_norm": 1.8768203258514404, "learning_rate": 1.598240469208211e-05, "loss": 1.1651, "step": 12139 }, { "epoch": 1.9004383218534753, "grad_norm": 0.9923833012580872, "learning_rate": 1.5957966764418374e-05, "loss": 1.237, "step": 12140 }, { "epoch": 1.9005948653725735, "grad_norm": 0.9963833689689636, "learning_rate": 1.5933528836754642e-05, "loss": 1.2096, "step": 12141 }, { "epoch": 1.900751408891672, "grad_norm": 1.4401146173477173, "learning_rate": 1.5909090909090907e-05, "loss": 1.2101, "step": 12142 }, { "epoch": 1.9009079524107702, "grad_norm": 0.8284012079238892, "learning_rate": 1.588465298142717e-05, "loss": 1.0725, "step": 12143 }, { "epoch": 1.9010644959298686, "grad_norm": 1.0937573909759521, "learning_rate": 1.586021505376344e-05, "loss": 1.2081, "step": 12144 }, { "epoch": 1.9012210394489668, "grad_norm": 0.7755000591278076, "learning_rate": 1.5835777126099705e-05, "loss": 1.0808, "step": 12145 }, { "epoch": 1.901377582968065, "grad_norm": 1.1034554243087769, "learning_rate": 1.581133919843597e-05, "loss": 1.183, "step": 12146 }, { "epoch": 1.9015341264871635, "grad_norm": 1.378918170928955, "learning_rate": 1.5786901270772238e-05, "loss": 1.0971, "step": 12147 }, { "epoch": 1.9016906700062617, "grad_norm": 1.6147481203079224, "learning_rate": 1.5762463343108502e-05, "loss": 1.2029, "step": 12148 }, { "epoch": 1.9018472135253601, "grad_norm": 1.2892570495605469, "learning_rate": 1.5738025415444767e-05, "loss": 1.002, "step": 12149 }, { "epoch": 1.9020037570444583, "grad_norm": 1.0253342390060425, "learning_rate": 1.5713587487781035e-05, "loss": 1.105, "step": 12150 }, { "epoch": 1.9021603005635566, "grad_norm": 1.6563441753387451, "learning_rate": 1.56891495601173e-05, "loss": 1.1433, "step": 12151 }, { "epoch": 1.902316844082655, "grad_norm": 1.512212872505188, "learning_rate": 1.5664711632453565e-05, "loss": 0.9589, "step": 12152 }, { "epoch": 1.9024733876017534, "grad_norm": 0.9118117690086365, "learning_rate": 1.5640273704789833e-05, "loss": 1.0516, "step": 12153 }, { "epoch": 1.9026299311208517, "grad_norm": 2.5056610107421875, "learning_rate": 1.5615835777126098e-05, "loss": 1.2032, "step": 12154 }, { "epoch": 1.9027864746399499, "grad_norm": 3.2829456329345703, "learning_rate": 1.5591397849462363e-05, "loss": 1.0465, "step": 12155 }, { "epoch": 1.902943018159048, "grad_norm": 4.280078411102295, "learning_rate": 1.556695992179863e-05, "loss": 1.1008, "step": 12156 }, { "epoch": 1.9030995616781465, "grad_norm": 1.5060573816299438, "learning_rate": 1.5542521994134896e-05, "loss": 1.0278, "step": 12157 }, { "epoch": 1.903256105197245, "grad_norm": 2.12257981300354, "learning_rate": 1.551808406647116e-05, "loss": 1.1257, "step": 12158 }, { "epoch": 1.9034126487163432, "grad_norm": 2.310483694076538, "learning_rate": 1.549364613880743e-05, "loss": 1.2765, "step": 12159 }, { "epoch": 1.9035691922354414, "grad_norm": 2.682175636291504, "learning_rate": 1.5469208211143694e-05, "loss": 1.1797, "step": 12160 }, { "epoch": 1.9037257357545396, "grad_norm": 3.6471972465515137, "learning_rate": 1.544477028347996e-05, "loss": 1.0389, "step": 12161 }, { "epoch": 1.903882279273638, "grad_norm": 2.102436065673828, "learning_rate": 1.5420332355816226e-05, "loss": 1.0323, "step": 12162 }, { "epoch": 1.9040388227927365, "grad_norm": 4.041779041290283, "learning_rate": 1.539589442815249e-05, "loss": 1.1483, "step": 12163 }, { "epoch": 1.9041953663118347, "grad_norm": 3.722378730773926, "learning_rate": 1.5371456500488756e-05, "loss": 1.1631, "step": 12164 }, { "epoch": 1.904351909830933, "grad_norm": 4.074342727661133, "learning_rate": 1.5347018572825024e-05, "loss": 1.3785, "step": 12165 }, { "epoch": 1.9045084533500312, "grad_norm": 6.567094326019287, "learning_rate": 1.532258064516129e-05, "loss": 0.945, "step": 12166 }, { "epoch": 1.9046649968691296, "grad_norm": 7.025415897369385, "learning_rate": 1.5298142717497554e-05, "loss": 1.2596, "step": 12167 }, { "epoch": 1.904821540388228, "grad_norm": 3.313201904296875, "learning_rate": 1.5273704789833822e-05, "loss": 1.2277, "step": 12168 }, { "epoch": 1.9049780839073263, "grad_norm": 4.0791120529174805, "learning_rate": 1.5249266862170087e-05, "loss": 1.2767, "step": 12169 }, { "epoch": 1.9051346274264245, "grad_norm": 4.89728307723999, "learning_rate": 1.5224828934506352e-05, "loss": 1.1568, "step": 12170 }, { "epoch": 1.9052911709455227, "grad_norm": 3.8333678245544434, "learning_rate": 1.5200391006842618e-05, "loss": 1.1248, "step": 12171 }, { "epoch": 1.9054477144646211, "grad_norm": 4.54195499420166, "learning_rate": 1.5175953079178883e-05, "loss": 1.0531, "step": 12172 }, { "epoch": 1.9056042579837196, "grad_norm": 4.03914737701416, "learning_rate": 1.515151515151515e-05, "loss": 1.124, "step": 12173 }, { "epoch": 1.9057608015028178, "grad_norm": 7.725468635559082, "learning_rate": 1.5127077223851416e-05, "loss": 1.2547, "step": 12174 }, { "epoch": 1.905917345021916, "grad_norm": 4.000185966491699, "learning_rate": 1.510263929618768e-05, "loss": 1.0578, "step": 12175 }, { "epoch": 1.9060738885410144, "grad_norm": 3.3531341552734375, "learning_rate": 1.5078201368523947e-05, "loss": 1.0488, "step": 12176 }, { "epoch": 1.9062304320601127, "grad_norm": 1.9735767841339111, "learning_rate": 1.5053763440860214e-05, "loss": 1.0028, "step": 12177 }, { "epoch": 1.906386975579211, "grad_norm": 6.0913262367248535, "learning_rate": 1.5029325513196479e-05, "loss": 1.0639, "step": 12178 }, { "epoch": 1.9065435190983093, "grad_norm": 1.993403434753418, "learning_rate": 1.5004887585532745e-05, "loss": 0.9197, "step": 12179 }, { "epoch": 1.9067000626174075, "grad_norm": 3.3327815532684326, "learning_rate": 1.4980449657869012e-05, "loss": 1.3543, "step": 12180 }, { "epoch": 1.906856606136506, "grad_norm": 3.145503044128418, "learning_rate": 1.4956011730205276e-05, "loss": 0.9099, "step": 12181 }, { "epoch": 1.9070131496556044, "grad_norm": 2.0428433418273926, "learning_rate": 1.4931573802541543e-05, "loss": 0.6648, "step": 12182 }, { "epoch": 1.9071696931747026, "grad_norm": 1.2480318546295166, "learning_rate": 1.490713587487781e-05, "loss": 0.533, "step": 12183 }, { "epoch": 1.9073262366938009, "grad_norm": 3.0121002197265625, "learning_rate": 1.4882697947214074e-05, "loss": 1.3884, "step": 12184 }, { "epoch": 1.907482780212899, "grad_norm": 1.4688293933868408, "learning_rate": 1.485826001955034e-05, "loss": 0.562, "step": 12185 }, { "epoch": 1.9076393237319975, "grad_norm": 1.736817479133606, "learning_rate": 1.4833822091886607e-05, "loss": 0.3936, "step": 12186 }, { "epoch": 1.907795867251096, "grad_norm": 2.580030918121338, "learning_rate": 1.4809384164222872e-05, "loss": 0.5264, "step": 12187 }, { "epoch": 1.9079524107701942, "grad_norm": 5.177095413208008, "learning_rate": 1.4784946236559138e-05, "loss": 1.3178, "step": 12188 }, { "epoch": 1.9081089542892924, "grad_norm": 0.8959721326828003, "learning_rate": 1.4760508308895405e-05, "loss": 1.284, "step": 12189 }, { "epoch": 1.9082654978083906, "grad_norm": 1.049804449081421, "learning_rate": 1.473607038123167e-05, "loss": 1.3241, "step": 12190 }, { "epoch": 1.908422041327489, "grad_norm": 4.327507495880127, "learning_rate": 1.4711632453567936e-05, "loss": 1.1876, "step": 12191 }, { "epoch": 1.9085785848465875, "grad_norm": 2.699472188949585, "learning_rate": 1.4687194525904203e-05, "loss": 1.2219, "step": 12192 }, { "epoch": 1.9087351283656857, "grad_norm": 1.9174275398254395, "learning_rate": 1.4662756598240468e-05, "loss": 1.2485, "step": 12193 }, { "epoch": 1.908891671884784, "grad_norm": 3.461306095123291, "learning_rate": 1.4638318670576734e-05, "loss": 1.3244, "step": 12194 }, { "epoch": 1.9090482154038821, "grad_norm": 2.7849996089935303, "learning_rate": 1.4613880742913e-05, "loss": 1.2386, "step": 12195 }, { "epoch": 1.9092047589229806, "grad_norm": 3.3640100955963135, "learning_rate": 1.4589442815249265e-05, "loss": 1.2349, "step": 12196 }, { "epoch": 1.909361302442079, "grad_norm": 0.8825156688690186, "learning_rate": 1.4565004887585532e-05, "loss": 1.253, "step": 12197 }, { "epoch": 1.9095178459611772, "grad_norm": 2.1742002964019775, "learning_rate": 1.4540566959921798e-05, "loss": 1.1779, "step": 12198 }, { "epoch": 1.9096743894802755, "grad_norm": 1.2652525901794434, "learning_rate": 1.4516129032258063e-05, "loss": 1.1674, "step": 12199 }, { "epoch": 1.9098309329993737, "grad_norm": 1.1848137378692627, "learning_rate": 1.449169110459433e-05, "loss": 1.1022, "step": 12200 }, { "epoch": 1.9099874765184721, "grad_norm": 4.88668155670166, "learning_rate": 1.4467253176930596e-05, "loss": 0.9741, "step": 12201 }, { "epoch": 1.9101440200375706, "grad_norm": 1.7493770122528076, "learning_rate": 1.4442815249266861e-05, "loss": 1.3092, "step": 12202 }, { "epoch": 1.9103005635566688, "grad_norm": 2.5054683685302734, "learning_rate": 1.4418377321603127e-05, "loss": 1.1745, "step": 12203 }, { "epoch": 1.910457107075767, "grad_norm": 1.4955793619155884, "learning_rate": 1.4393939393939392e-05, "loss": 1.1197, "step": 12204 }, { "epoch": 1.9106136505948652, "grad_norm": 1.2048388719558716, "learning_rate": 1.4369501466275659e-05, "loss": 1.1417, "step": 12205 }, { "epoch": 1.9107701941139636, "grad_norm": 1.501969814300537, "learning_rate": 1.4345063538611925e-05, "loss": 1.2411, "step": 12206 }, { "epoch": 1.910926737633062, "grad_norm": 1.6169402599334717, "learning_rate": 1.432062561094819e-05, "loss": 1.1179, "step": 12207 }, { "epoch": 1.9110832811521603, "grad_norm": 2.1456000804901123, "learning_rate": 1.4296187683284456e-05, "loss": 1.1619, "step": 12208 }, { "epoch": 1.9112398246712585, "grad_norm": 3.0159125328063965, "learning_rate": 1.4271749755620723e-05, "loss": 1.2492, "step": 12209 }, { "epoch": 1.911396368190357, "grad_norm": 1.6835179328918457, "learning_rate": 1.4247311827956988e-05, "loss": 1.0944, "step": 12210 }, { "epoch": 1.9115529117094552, "grad_norm": 2.42319393157959, "learning_rate": 1.4222873900293254e-05, "loss": 0.9977, "step": 12211 }, { "epoch": 1.9117094552285536, "grad_norm": 1.6393407583236694, "learning_rate": 1.419843597262952e-05, "loss": 1.3491, "step": 12212 }, { "epoch": 1.9118659987476518, "grad_norm": 4.471738815307617, "learning_rate": 1.4173998044965786e-05, "loss": 1.274, "step": 12213 }, { "epoch": 1.91202254226675, "grad_norm": 2.172912836074829, "learning_rate": 1.4149560117302052e-05, "loss": 1.1159, "step": 12214 }, { "epoch": 1.9121790857858485, "grad_norm": 2.280036449432373, "learning_rate": 1.4125122189638319e-05, "loss": 1.0586, "step": 12215 }, { "epoch": 1.912335629304947, "grad_norm": 5.947153568267822, "learning_rate": 1.4100684261974583e-05, "loss": 0.9635, "step": 12216 }, { "epoch": 1.9124921728240452, "grad_norm": 5.538031101226807, "learning_rate": 1.407624633431085e-05, "loss": 1.4723, "step": 12217 }, { "epoch": 1.9126487163431434, "grad_norm": 3.303689479827881, "learning_rate": 1.4051808406647116e-05, "loss": 1.4025, "step": 12218 }, { "epoch": 1.9128052598622416, "grad_norm": 2.7196755409240723, "learning_rate": 1.4027370478983381e-05, "loss": 1.0212, "step": 12219 }, { "epoch": 1.91296180338134, "grad_norm": 14.392826080322266, "learning_rate": 1.4002932551319648e-05, "loss": 1.114, "step": 12220 }, { "epoch": 1.9131183469004385, "grad_norm": 5.36333703994751, "learning_rate": 1.3978494623655914e-05, "loss": 1.1304, "step": 12221 }, { "epoch": 1.9132748904195367, "grad_norm": 2.031944513320923, "learning_rate": 1.3954056695992177e-05, "loss": 0.9993, "step": 12222 }, { "epoch": 1.913431433938635, "grad_norm": 7.611963748931885, "learning_rate": 1.3929618768328445e-05, "loss": 1.1506, "step": 12223 }, { "epoch": 1.9135879774577331, "grad_norm": 2.747000217437744, "learning_rate": 1.3905180840664712e-05, "loss": 0.9725, "step": 12224 }, { "epoch": 1.9137445209768316, "grad_norm": 5.841665267944336, "learning_rate": 1.3880742913000975e-05, "loss": 0.7786, "step": 12225 }, { "epoch": 1.91390106449593, "grad_norm": 6.18753719329834, "learning_rate": 1.3856304985337243e-05, "loss": 1.0598, "step": 12226 }, { "epoch": 1.9140576080150282, "grad_norm": 3.2494606971740723, "learning_rate": 1.383186705767351e-05, "loss": 1.1976, "step": 12227 }, { "epoch": 1.9142141515341264, "grad_norm": 7.703588008880615, "learning_rate": 1.3807429130009773e-05, "loss": 1.413, "step": 12228 }, { "epoch": 1.9143706950532247, "grad_norm": 3.169772148132324, "learning_rate": 1.378299120234604e-05, "loss": 1.4166, "step": 12229 }, { "epoch": 1.914527238572323, "grad_norm": 3.6064870357513428, "learning_rate": 1.3758553274682307e-05, "loss": 1.3009, "step": 12230 }, { "epoch": 1.9146837820914215, "grad_norm": 2.5471582412719727, "learning_rate": 1.373411534701857e-05, "loss": 0.8335, "step": 12231 }, { "epoch": 1.9148403256105198, "grad_norm": 3.2321269512176514, "learning_rate": 1.3709677419354837e-05, "loss": 1.1152, "step": 12232 }, { "epoch": 1.914996869129618, "grad_norm": 3.2195959091186523, "learning_rate": 1.3685239491691105e-05, "loss": 0.8062, "step": 12233 }, { "epoch": 1.9151534126487162, "grad_norm": 2.1859567165374756, "learning_rate": 1.3660801564027368e-05, "loss": 0.541, "step": 12234 }, { "epoch": 1.9153099561678146, "grad_norm": 1.7205095291137695, "learning_rate": 1.3636363636363635e-05, "loss": 0.5953, "step": 12235 }, { "epoch": 1.915466499686913, "grad_norm": 4.676856517791748, "learning_rate": 1.36119257086999e-05, "loss": 0.5505, "step": 12236 }, { "epoch": 1.9156230432060113, "grad_norm": 2.9190919399261475, "learning_rate": 1.3587487781036166e-05, "loss": 0.9582, "step": 12237 }, { "epoch": 1.9157795867251095, "grad_norm": 2.5346970558166504, "learning_rate": 1.3563049853372433e-05, "loss": 1.1218, "step": 12238 }, { "epoch": 1.9159361302442077, "grad_norm": 0.8043964505195618, "learning_rate": 1.3538611925708697e-05, "loss": 1.4646, "step": 12239 }, { "epoch": 1.9160926737633062, "grad_norm": 1.0437607765197754, "learning_rate": 1.3514173998044964e-05, "loss": 1.4516, "step": 12240 }, { "epoch": 1.9162492172824046, "grad_norm": 1.5638072490692139, "learning_rate": 1.348973607038123e-05, "loss": 1.4577, "step": 12241 }, { "epoch": 1.9164057608015028, "grad_norm": 1.5612990856170654, "learning_rate": 1.3465298142717495e-05, "loss": 1.4553, "step": 12242 }, { "epoch": 1.916562304320601, "grad_norm": 1.4811925888061523, "learning_rate": 1.3440860215053762e-05, "loss": 1.5817, "step": 12243 }, { "epoch": 1.9167188478396995, "grad_norm": 1.6641279458999634, "learning_rate": 1.3416422287390028e-05, "loss": 1.4004, "step": 12244 }, { "epoch": 1.9168753913587977, "grad_norm": 1.62802255153656, "learning_rate": 1.3391984359726293e-05, "loss": 1.3879, "step": 12245 }, { "epoch": 1.9170319348778961, "grad_norm": 1.1935174465179443, "learning_rate": 1.336754643206256e-05, "loss": 1.3205, "step": 12246 }, { "epoch": 1.9171884783969944, "grad_norm": 2.306370496749878, "learning_rate": 1.3343108504398826e-05, "loss": 1.2546, "step": 12247 }, { "epoch": 1.9173450219160926, "grad_norm": 3.6753339767456055, "learning_rate": 1.331867057673509e-05, "loss": 1.4271, "step": 12248 }, { "epoch": 1.917501565435191, "grad_norm": 2.2451188564300537, "learning_rate": 1.3294232649071357e-05, "loss": 1.318, "step": 12249 }, { "epoch": 1.9176581089542895, "grad_norm": 3.015744209289551, "learning_rate": 1.3269794721407624e-05, "loss": 1.3066, "step": 12250 }, { "epoch": 1.9178146524733877, "grad_norm": 0.8539578318595886, "learning_rate": 1.3245356793743889e-05, "loss": 1.2688, "step": 12251 }, { "epoch": 1.9179711959924859, "grad_norm": 6.493170261383057, "learning_rate": 1.3220918866080155e-05, "loss": 1.1816, "step": 12252 }, { "epoch": 1.918127739511584, "grad_norm": 1.088154673576355, "learning_rate": 1.3196480938416422e-05, "loss": 1.213, "step": 12253 }, { "epoch": 1.9182842830306825, "grad_norm": 2.4062836170196533, "learning_rate": 1.3172043010752686e-05, "loss": 1.1441, "step": 12254 }, { "epoch": 1.918440826549781, "grad_norm": 1.412260890007019, "learning_rate": 1.3147605083088953e-05, "loss": 1.3002, "step": 12255 }, { "epoch": 1.9185973700688792, "grad_norm": 3.7139716148376465, "learning_rate": 1.312316715542522e-05, "loss": 1.1326, "step": 12256 }, { "epoch": 1.9187539135879774, "grad_norm": 4.428962707519531, "learning_rate": 1.3098729227761484e-05, "loss": 1.2002, "step": 12257 }, { "epoch": 1.9189104571070756, "grad_norm": 1.157462239265442, "learning_rate": 1.307429130009775e-05, "loss": 1.1597, "step": 12258 }, { "epoch": 1.919067000626174, "grad_norm": 3.1243197917938232, "learning_rate": 1.3049853372434017e-05, "loss": 1.1902, "step": 12259 }, { "epoch": 1.9192235441452725, "grad_norm": 1.9715791940689087, "learning_rate": 1.3025415444770282e-05, "loss": 1.0987, "step": 12260 }, { "epoch": 1.9193800876643707, "grad_norm": 8.637343406677246, "learning_rate": 1.3000977517106548e-05, "loss": 1.2952, "step": 12261 }, { "epoch": 1.919536631183469, "grad_norm": 6.371841907501221, "learning_rate": 1.2976539589442815e-05, "loss": 1.6552, "step": 12262 }, { "epoch": 1.9196931747025672, "grad_norm": 2.1483147144317627, "learning_rate": 1.295210166177908e-05, "loss": 1.0953, "step": 12263 }, { "epoch": 1.9198497182216656, "grad_norm": 1.9315729141235352, "learning_rate": 1.2927663734115346e-05, "loss": 1.0004, "step": 12264 }, { "epoch": 1.920006261740764, "grad_norm": 3.5723578929901123, "learning_rate": 1.2903225806451613e-05, "loss": 0.8629, "step": 12265 }, { "epoch": 1.9201628052598623, "grad_norm": 2.428359270095825, "learning_rate": 1.2878787878787878e-05, "loss": 1.2251, "step": 12266 }, { "epoch": 1.9203193487789605, "grad_norm": 3.252751350402832, "learning_rate": 1.2854349951124144e-05, "loss": 1.1408, "step": 12267 }, { "epoch": 1.9204758922980587, "grad_norm": 2.705955743789673, "learning_rate": 1.2829912023460409e-05, "loss": 1.0692, "step": 12268 }, { "epoch": 1.9206324358171571, "grad_norm": 4.607133388519287, "learning_rate": 1.2805474095796675e-05, "loss": 1.0189, "step": 12269 }, { "epoch": 1.9207889793362556, "grad_norm": 2.5843207836151123, "learning_rate": 1.2781036168132942e-05, "loss": 1.3724, "step": 12270 }, { "epoch": 1.9209455228553538, "grad_norm": 4.102626323699951, "learning_rate": 1.2756598240469207e-05, "loss": 0.8495, "step": 12271 }, { "epoch": 1.921102066374452, "grad_norm": 3.620480537414551, "learning_rate": 1.2732160312805473e-05, "loss": 1.047, "step": 12272 }, { "epoch": 1.9212586098935505, "grad_norm": 7.478079795837402, "learning_rate": 1.270772238514174e-05, "loss": 0.7859, "step": 12273 }, { "epoch": 1.9214151534126487, "grad_norm": 25.899625778198242, "learning_rate": 1.2683284457478004e-05, "loss": 0.8097, "step": 12274 }, { "epoch": 1.9215716969317471, "grad_norm": 2.6998584270477295, "learning_rate": 1.2658846529814271e-05, "loss": 1.3919, "step": 12275 }, { "epoch": 1.9217282404508453, "grad_norm": 5.945512771606445, "learning_rate": 1.2634408602150537e-05, "loss": 1.2487, "step": 12276 }, { "epoch": 1.9218847839699436, "grad_norm": 6.3635029792785645, "learning_rate": 1.2609970674486802e-05, "loss": 0.9452, "step": 12277 }, { "epoch": 1.922041327489042, "grad_norm": 5.040246963500977, "learning_rate": 1.2585532746823069e-05, "loss": 1.1909, "step": 12278 }, { "epoch": 1.9221978710081402, "grad_norm": 4.729580402374268, "learning_rate": 1.2561094819159335e-05, "loss": 1.3885, "step": 12279 }, { "epoch": 1.9223544145272387, "grad_norm": 4.690464973449707, "learning_rate": 1.25366568914956e-05, "loss": 1.4546, "step": 12280 }, { "epoch": 1.9225109580463369, "grad_norm": 4.445624828338623, "learning_rate": 1.2512218963831867e-05, "loss": 1.3911, "step": 12281 }, { "epoch": 1.922667501565435, "grad_norm": 2.9584579467773438, "learning_rate": 1.2487781036168133e-05, "loss": 0.9071, "step": 12282 }, { "epoch": 1.9228240450845335, "grad_norm": 4.518764495849609, "learning_rate": 1.2463343108504398e-05, "loss": 0.8461, "step": 12283 }, { "epoch": 1.922980588603632, "grad_norm": 3.6593761444091797, "learning_rate": 1.2438905180840664e-05, "loss": 0.6349, "step": 12284 }, { "epoch": 1.9231371321227302, "grad_norm": 3.229717254638672, "learning_rate": 1.241446725317693e-05, "loss": 0.6635, "step": 12285 }, { "epoch": 1.9232936756418284, "grad_norm": 1.8013453483581543, "learning_rate": 1.2390029325513196e-05, "loss": 0.4047, "step": 12286 }, { "epoch": 1.9234502191609266, "grad_norm": 3.6808831691741943, "learning_rate": 1.2365591397849462e-05, "loss": 0.5197, "step": 12287 }, { "epoch": 1.923606762680025, "grad_norm": 2.7701351642608643, "learning_rate": 1.2341153470185729e-05, "loss": 0.539, "step": 12288 }, { "epoch": 1.9237633061991235, "grad_norm": 1.3340402841567993, "learning_rate": 1.2316715542521992e-05, "loss": 1.382, "step": 12289 }, { "epoch": 1.9239198497182217, "grad_norm": 1.5330002307891846, "learning_rate": 1.229227761485826e-05, "loss": 1.4128, "step": 12290 }, { "epoch": 1.92407639323732, "grad_norm": 0.8415979743003845, "learning_rate": 1.2267839687194526e-05, "loss": 1.3799, "step": 12291 }, { "epoch": 1.9242329367564182, "grad_norm": 0.8485933542251587, "learning_rate": 1.224340175953079e-05, "loss": 1.4151, "step": 12292 }, { "epoch": 1.9243894802755166, "grad_norm": 1.0369019508361816, "learning_rate": 1.2218963831867058e-05, "loss": 1.4682, "step": 12293 }, { "epoch": 1.924546023794615, "grad_norm": 1.0517786741256714, "learning_rate": 1.2194525904203324e-05, "loss": 1.4581, "step": 12294 }, { "epoch": 1.9247025673137133, "grad_norm": 1.096622347831726, "learning_rate": 1.2170087976539587e-05, "loss": 1.3303, "step": 12295 }, { "epoch": 1.9248591108328115, "grad_norm": 3.3548057079315186, "learning_rate": 1.2145650048875854e-05, "loss": 1.2956, "step": 12296 }, { "epoch": 1.9250156543519097, "grad_norm": 1.0958143472671509, "learning_rate": 1.2121212121212122e-05, "loss": 1.3452, "step": 12297 }, { "epoch": 1.9251721978710081, "grad_norm": 2.7665908336639404, "learning_rate": 1.2096774193548385e-05, "loss": 1.322, "step": 12298 }, { "epoch": 1.9253287413901066, "grad_norm": 1.590676188468933, "learning_rate": 1.2072336265884652e-05, "loss": 1.2135, "step": 12299 }, { "epoch": 1.9254852849092048, "grad_norm": 4.261856555938721, "learning_rate": 1.2047898338220916e-05, "loss": 1.506, "step": 12300 }, { "epoch": 1.925641828428303, "grad_norm": 5.3091230392456055, "learning_rate": 1.2023460410557183e-05, "loss": 1.3583, "step": 12301 }, { "epoch": 1.9257983719474012, "grad_norm": 2.1772117614746094, "learning_rate": 1.199902248289345e-05, "loss": 1.3899, "step": 12302 }, { "epoch": 1.9259549154664997, "grad_norm": 1.1379402875900269, "learning_rate": 1.1974584555229714e-05, "loss": 1.2902, "step": 12303 }, { "epoch": 1.926111458985598, "grad_norm": 1.190233826637268, "learning_rate": 1.195014662756598e-05, "loss": 1.4014, "step": 12304 }, { "epoch": 1.9262680025046963, "grad_norm": 1.8833681344985962, "learning_rate": 1.1925708699902247e-05, "loss": 1.2716, "step": 12305 }, { "epoch": 1.9264245460237945, "grad_norm": 2.2918057441711426, "learning_rate": 1.1901270772238512e-05, "loss": 1.2386, "step": 12306 }, { "epoch": 1.926581089542893, "grad_norm": 2.4016504287719727, "learning_rate": 1.1876832844574778e-05, "loss": 1.4535, "step": 12307 }, { "epoch": 1.9267376330619912, "grad_norm": 2.2692906856536865, "learning_rate": 1.1852394916911045e-05, "loss": 1.2666, "step": 12308 }, { "epoch": 1.9268941765810896, "grad_norm": 5.164487838745117, "learning_rate": 1.182795698924731e-05, "loss": 1.1711, "step": 12309 }, { "epoch": 1.9270507201001879, "grad_norm": 3.0277092456817627, "learning_rate": 1.1803519061583576e-05, "loss": 1.2333, "step": 12310 }, { "epoch": 1.927207263619286, "grad_norm": 2.941403865814209, "learning_rate": 1.1779081133919843e-05, "loss": 1.3658, "step": 12311 }, { "epoch": 1.9273638071383845, "grad_norm": 4.588296413421631, "learning_rate": 1.1754643206256108e-05, "loss": 1.2713, "step": 12312 }, { "epoch": 1.9275203506574827, "grad_norm": 1.65639328956604, "learning_rate": 1.1730205278592374e-05, "loss": 1.054, "step": 12313 }, { "epoch": 1.9276768941765812, "grad_norm": 2.7353925704956055, "learning_rate": 1.170576735092864e-05, "loss": 1.3822, "step": 12314 }, { "epoch": 1.9278334376956794, "grad_norm": 4.178305149078369, "learning_rate": 1.1681329423264905e-05, "loss": 1.4245, "step": 12315 }, { "epoch": 1.9279899812147776, "grad_norm": 4.869446754455566, "learning_rate": 1.1656891495601172e-05, "loss": 1.1584, "step": 12316 }, { "epoch": 1.928146524733876, "grad_norm": 2.118698835372925, "learning_rate": 1.1632453567937438e-05, "loss": 1.1799, "step": 12317 }, { "epoch": 1.9283030682529745, "grad_norm": 7.572887897491455, "learning_rate": 1.1608015640273703e-05, "loss": 1.1696, "step": 12318 }, { "epoch": 1.9284596117720727, "grad_norm": 2.0993082523345947, "learning_rate": 1.158357771260997e-05, "loss": 1.0388, "step": 12319 }, { "epoch": 1.928616155291171, "grad_norm": 6.564274787902832, "learning_rate": 1.1559139784946236e-05, "loss": 1.1272, "step": 12320 }, { "epoch": 1.9287726988102691, "grad_norm": 4.388607978820801, "learning_rate": 1.1534701857282501e-05, "loss": 1.0672, "step": 12321 }, { "epoch": 1.9289292423293676, "grad_norm": 3.7181320190429688, "learning_rate": 1.1510263929618767e-05, "loss": 1.1001, "step": 12322 }, { "epoch": 1.929085785848466, "grad_norm": 2.3277645111083984, "learning_rate": 1.1485826001955034e-05, "loss": 1.2546, "step": 12323 }, { "epoch": 1.9292423293675642, "grad_norm": 5.6356282234191895, "learning_rate": 1.1461388074291299e-05, "loss": 1.56, "step": 12324 }, { "epoch": 1.9293988728866625, "grad_norm": 2.452418804168701, "learning_rate": 1.1436950146627565e-05, "loss": 1.4428, "step": 12325 }, { "epoch": 1.9295554164057607, "grad_norm": 8.939882278442383, "learning_rate": 1.1412512218963832e-05, "loss": 1.2936, "step": 12326 }, { "epoch": 1.929711959924859, "grad_norm": 2.89434552192688, "learning_rate": 1.1388074291300096e-05, "loss": 1.0515, "step": 12327 }, { "epoch": 1.9298685034439576, "grad_norm": 5.483556270599365, "learning_rate": 1.1363636363636363e-05, "loss": 1.2122, "step": 12328 }, { "epoch": 1.9300250469630558, "grad_norm": 3.8645455837249756, "learning_rate": 1.133919843597263e-05, "loss": 0.9129, "step": 12329 }, { "epoch": 1.930181590482154, "grad_norm": 3.9111804962158203, "learning_rate": 1.1314760508308894e-05, "loss": 1.2189, "step": 12330 }, { "epoch": 1.9303381340012522, "grad_norm": 3.18015456199646, "learning_rate": 1.129032258064516e-05, "loss": 0.7437, "step": 12331 }, { "epoch": 1.9304946775203506, "grad_norm": 8.127317428588867, "learning_rate": 1.1265884652981426e-05, "loss": 1.3882, "step": 12332 }, { "epoch": 1.930651221039449, "grad_norm": 2.8855273723602295, "learning_rate": 1.1241446725317692e-05, "loss": 1.6402, "step": 12333 }, { "epoch": 1.9308077645585473, "grad_norm": 6.404877662658691, "learning_rate": 1.1217008797653959e-05, "loss": 0.8951, "step": 12334 }, { "epoch": 1.9309643080776455, "grad_norm": 12.837389945983887, "learning_rate": 1.1192570869990223e-05, "loss": 0.9152, "step": 12335 }, { "epoch": 1.9311208515967437, "grad_norm": 1.6619367599487305, "learning_rate": 1.116813294232649e-05, "loss": 0.7218, "step": 12336 }, { "epoch": 1.9312773951158422, "grad_norm": 1.492072343826294, "learning_rate": 1.1143695014662756e-05, "loss": 0.4666, "step": 12337 }, { "epoch": 1.9314339386349406, "grad_norm": 2.7796103954315186, "learning_rate": 1.1119257086999021e-05, "loss": 1.1153, "step": 12338 }, { "epoch": 1.9315904821540388, "grad_norm": 6.7155938148498535, "learning_rate": 1.1094819159335288e-05, "loss": 1.9017, "step": 12339 }, { "epoch": 1.931747025673137, "grad_norm": 0.7669269442558289, "learning_rate": 1.1070381231671554e-05, "loss": 1.356, "step": 12340 }, { "epoch": 1.9319035691922355, "grad_norm": 0.7935886383056641, "learning_rate": 1.1045943304007819e-05, "loss": 1.3973, "step": 12341 }, { "epoch": 1.9320601127113337, "grad_norm": 1.2134813070297241, "learning_rate": 1.1021505376344085e-05, "loss": 1.2953, "step": 12342 }, { "epoch": 1.9322166562304322, "grad_norm": 0.8597235083580017, "learning_rate": 1.0997067448680352e-05, "loss": 1.3599, "step": 12343 }, { "epoch": 1.9323731997495304, "grad_norm": 0.8375195860862732, "learning_rate": 1.0972629521016617e-05, "loss": 1.2758, "step": 12344 }, { "epoch": 1.9325297432686286, "grad_norm": 1.4954638481140137, "learning_rate": 1.0948191593352883e-05, "loss": 1.3688, "step": 12345 }, { "epoch": 1.932686286787727, "grad_norm": 1.4607149362564087, "learning_rate": 1.092375366568915e-05, "loss": 1.3371, "step": 12346 }, { "epoch": 1.9328428303068252, "grad_norm": 1.151136040687561, "learning_rate": 1.0899315738025414e-05, "loss": 1.34, "step": 12347 }, { "epoch": 1.9329993738259237, "grad_norm": 1.224950909614563, "learning_rate": 1.0874877810361681e-05, "loss": 1.3688, "step": 12348 }, { "epoch": 1.933155917345022, "grad_norm": 6.187594890594482, "learning_rate": 1.0850439882697947e-05, "loss": 1.2989, "step": 12349 }, { "epoch": 1.9333124608641201, "grad_norm": 1.705400824546814, "learning_rate": 1.0826001955034212e-05, "loss": 1.6841, "step": 12350 }, { "epoch": 1.9334690043832186, "grad_norm": 1.4183433055877686, "learning_rate": 1.0801564027370479e-05, "loss": 1.2696, "step": 12351 }, { "epoch": 1.933625547902317, "grad_norm": 2.378031015396118, "learning_rate": 1.0777126099706745e-05, "loss": 1.5075, "step": 12352 }, { "epoch": 1.9337820914214152, "grad_norm": 1.5742651224136353, "learning_rate": 1.075268817204301e-05, "loss": 1.2745, "step": 12353 }, { "epoch": 1.9339386349405134, "grad_norm": 1.3239703178405762, "learning_rate": 1.0728250244379277e-05, "loss": 1.3535, "step": 12354 }, { "epoch": 1.9340951784596117, "grad_norm": 3.5635673999786377, "learning_rate": 1.0703812316715543e-05, "loss": 1.6419, "step": 12355 }, { "epoch": 1.93425172197871, "grad_norm": 2.4734835624694824, "learning_rate": 1.0679374389051806e-05, "loss": 1.3285, "step": 12356 }, { "epoch": 1.9344082654978085, "grad_norm": 2.6944169998168945, "learning_rate": 1.0654936461388074e-05, "loss": 1.4164, "step": 12357 }, { "epoch": 1.9345648090169068, "grad_norm": 1.283929467201233, "learning_rate": 1.063049853372434e-05, "loss": 1.3136, "step": 12358 }, { "epoch": 1.934721352536005, "grad_norm": 1.476118564605713, "learning_rate": 1.0606060606060604e-05, "loss": 1.1611, "step": 12359 }, { "epoch": 1.9348778960551032, "grad_norm": 2.960695505142212, "learning_rate": 1.0581622678396872e-05, "loss": 1.3293, "step": 12360 }, { "epoch": 1.9350344395742016, "grad_norm": 4.04657506942749, "learning_rate": 1.0557184750733139e-05, "loss": 1.3852, "step": 12361 }, { "epoch": 1.9351909830933, "grad_norm": 1.8370991945266724, "learning_rate": 1.0532746823069402e-05, "loss": 1.2408, "step": 12362 }, { "epoch": 1.9353475266123983, "grad_norm": 3.660257577896118, "learning_rate": 1.0508308895405668e-05, "loss": 1.395, "step": 12363 }, { "epoch": 1.9355040701314965, "grad_norm": 4.005573749542236, "learning_rate": 1.0483870967741933e-05, "loss": 1.2782, "step": 12364 }, { "epoch": 1.9356606136505947, "grad_norm": 8.64341926574707, "learning_rate": 1.04594330400782e-05, "loss": 1.4618, "step": 12365 }, { "epoch": 1.9358171571696932, "grad_norm": 8.12671947479248, "learning_rate": 1.0434995112414466e-05, "loss": 1.9106, "step": 12366 }, { "epoch": 1.9359737006887916, "grad_norm": 2.215592384338379, "learning_rate": 1.041055718475073e-05, "loss": 1.3265, "step": 12367 }, { "epoch": 1.9361302442078898, "grad_norm": 4.294419288635254, "learning_rate": 1.0386119257086997e-05, "loss": 1.1993, "step": 12368 }, { "epoch": 1.936286787726988, "grad_norm": 2.263684034347534, "learning_rate": 1.0361681329423264e-05, "loss": 1.1445, "step": 12369 }, { "epoch": 1.9364433312460863, "grad_norm": 3.5883326530456543, "learning_rate": 1.0337243401759529e-05, "loss": 1.4243, "step": 12370 }, { "epoch": 1.9365998747651847, "grad_norm": 1.9435137510299683, "learning_rate": 1.0312805474095795e-05, "loss": 0.9561, "step": 12371 }, { "epoch": 1.9367564182842831, "grad_norm": 3.0882208347320557, "learning_rate": 1.0288367546432062e-05, "loss": 0.8797, "step": 12372 }, { "epoch": 1.9369129618033814, "grad_norm": 7.617794036865234, "learning_rate": 1.0263929618768326e-05, "loss": 1.2746, "step": 12373 }, { "epoch": 1.9370695053224796, "grad_norm": 4.225636959075928, "learning_rate": 1.0239491691104593e-05, "loss": 1.4779, "step": 12374 }, { "epoch": 1.937226048841578, "grad_norm": 15.488640785217285, "learning_rate": 1.021505376344086e-05, "loss": 1.4581, "step": 12375 }, { "epoch": 1.9373825923606762, "grad_norm": 5.777149200439453, "learning_rate": 1.0190615835777124e-05, "loss": 1.4015, "step": 12376 }, { "epoch": 1.9375391358797747, "grad_norm": 12.551581382751465, "learning_rate": 1.016617790811339e-05, "loss": 1.2572, "step": 12377 }, { "epoch": 1.9376956793988729, "grad_norm": 4.459105491638184, "learning_rate": 1.0141739980449657e-05, "loss": 1.0849, "step": 12378 }, { "epoch": 1.937852222917971, "grad_norm": 3.434291362762451, "learning_rate": 1.0117302052785922e-05, "loss": 1.3216, "step": 12379 }, { "epoch": 1.9380087664370695, "grad_norm": 2.0103023052215576, "learning_rate": 1.0092864125122188e-05, "loss": 0.7784, "step": 12380 }, { "epoch": 1.938165309956168, "grad_norm": 2.6893234252929688, "learning_rate": 1.0068426197458455e-05, "loss": 0.7125, "step": 12381 }, { "epoch": 1.9383218534752662, "grad_norm": 5.581327438354492, "learning_rate": 1.004398826979472e-05, "loss": 0.998, "step": 12382 }, { "epoch": 1.9384783969943644, "grad_norm": 3.5496435165405273, "learning_rate": 1.0019550342130986e-05, "loss": 1.2393, "step": 12383 }, { "epoch": 1.9386349405134626, "grad_norm": 2.4860739707946777, "learning_rate": 9.995112414467253e-06, "loss": 1.3163, "step": 12384 }, { "epoch": 1.938791484032561, "grad_norm": 1.844963550567627, "learning_rate": 9.970674486803518e-06, "loss": 0.7082, "step": 12385 }, { "epoch": 1.9389480275516595, "grad_norm": 2.7974820137023926, "learning_rate": 9.946236559139784e-06, "loss": 0.6463, "step": 12386 }, { "epoch": 1.9391045710707577, "grad_norm": 1.4978660345077515, "learning_rate": 9.92179863147605e-06, "loss": 0.3869, "step": 12387 }, { "epoch": 1.939261114589856, "grad_norm": 2.3386178016662598, "learning_rate": 9.897360703812315e-06, "loss": 1.1001, "step": 12388 }, { "epoch": 1.9394176581089542, "grad_norm": 0.8785395622253418, "learning_rate": 9.872922776148582e-06, "loss": 1.5033, "step": 12389 }, { "epoch": 1.9395742016280526, "grad_norm": 0.8468477725982666, "learning_rate": 9.848484848484848e-06, "loss": 1.4967, "step": 12390 }, { "epoch": 1.939730745147151, "grad_norm": 0.7762795686721802, "learning_rate": 9.824046920821113e-06, "loss": 1.4881, "step": 12391 }, { "epoch": 1.9398872886662493, "grad_norm": 2.385578155517578, "learning_rate": 9.79960899315738e-06, "loss": 1.3553, "step": 12392 }, { "epoch": 1.9400438321853475, "grad_norm": 1.221888542175293, "learning_rate": 9.775171065493646e-06, "loss": 1.4372, "step": 12393 }, { "epoch": 1.9402003757044457, "grad_norm": 3.5840225219726562, "learning_rate": 9.750733137829911e-06, "loss": 1.4205, "step": 12394 }, { "epoch": 1.9403569192235441, "grad_norm": 0.9820275902748108, "learning_rate": 9.726295210166177e-06, "loss": 1.3966, "step": 12395 }, { "epoch": 1.9405134627426426, "grad_norm": 1.1478222608566284, "learning_rate": 9.701857282502442e-06, "loss": 1.4722, "step": 12396 }, { "epoch": 1.9406700062617408, "grad_norm": 2.0976521968841553, "learning_rate": 9.677419354838709e-06, "loss": 1.4673, "step": 12397 }, { "epoch": 1.940826549780839, "grad_norm": 1.5438917875289917, "learning_rate": 9.652981427174975e-06, "loss": 1.5253, "step": 12398 }, { "epoch": 1.9409830932999372, "grad_norm": 1.2291433811187744, "learning_rate": 9.62854349951124e-06, "loss": 1.4075, "step": 12399 }, { "epoch": 1.9411396368190357, "grad_norm": 1.6943989992141724, "learning_rate": 9.604105571847507e-06, "loss": 1.3334, "step": 12400 }, { "epoch": 1.9412961803381341, "grad_norm": 1.9269627332687378, "learning_rate": 9.579667644183773e-06, "loss": 1.4683, "step": 12401 }, { "epoch": 1.9414527238572323, "grad_norm": 1.9164787530899048, "learning_rate": 9.555229716520038e-06, "loss": 1.5016, "step": 12402 }, { "epoch": 1.9416092673763305, "grad_norm": 1.3316177129745483, "learning_rate": 9.530791788856304e-06, "loss": 1.2375, "step": 12403 }, { "epoch": 1.9417658108954288, "grad_norm": 1.476271390914917, "learning_rate": 9.50635386119257e-06, "loss": 1.2165, "step": 12404 }, { "epoch": 1.9419223544145272, "grad_norm": 1.4863686561584473, "learning_rate": 9.481915933528836e-06, "loss": 1.2883, "step": 12405 }, { "epoch": 1.9420788979336256, "grad_norm": 1.1693488359451294, "learning_rate": 9.457478005865102e-06, "loss": 1.4016, "step": 12406 }, { "epoch": 1.9422354414527239, "grad_norm": 2.0392889976501465, "learning_rate": 9.433040078201369e-06, "loss": 1.2879, "step": 12407 }, { "epoch": 1.942391984971822, "grad_norm": 2.9882824420928955, "learning_rate": 9.408602150537633e-06, "loss": 1.3189, "step": 12408 }, { "epoch": 1.9425485284909205, "grad_norm": 1.9439655542373657, "learning_rate": 9.3841642228739e-06, "loss": 1.2713, "step": 12409 }, { "epoch": 1.9427050720100187, "grad_norm": 2.5149528980255127, "learning_rate": 9.359726295210165e-06, "loss": 1.4599, "step": 12410 }, { "epoch": 1.9428616155291172, "grad_norm": 1.612697720527649, "learning_rate": 9.335288367546431e-06, "loss": 1.2472, "step": 12411 }, { "epoch": 1.9430181590482154, "grad_norm": 4.500641345977783, "learning_rate": 9.310850439882698e-06, "loss": 1.208, "step": 12412 }, { "epoch": 1.9431747025673136, "grad_norm": 1.9317268133163452, "learning_rate": 9.286412512218962e-06, "loss": 1.5878, "step": 12413 }, { "epoch": 1.943331246086412, "grad_norm": 6.950214862823486, "learning_rate": 9.261974584555229e-06, "loss": 1.4057, "step": 12414 }, { "epoch": 1.9434877896055105, "grad_norm": 4.08366584777832, "learning_rate": 9.237536656891495e-06, "loss": 1.3945, "step": 12415 }, { "epoch": 1.9436443331246087, "grad_norm": 1.9059251546859741, "learning_rate": 9.21309872922776e-06, "loss": 1.0823, "step": 12416 }, { "epoch": 1.943800876643707, "grad_norm": 4.59588098526001, "learning_rate": 9.188660801564027e-06, "loss": 1.4755, "step": 12417 }, { "epoch": 1.9439574201628051, "grad_norm": 3.578123092651367, "learning_rate": 9.164222873900293e-06, "loss": 1.1854, "step": 12418 }, { "epoch": 1.9441139636819036, "grad_norm": 1.8698164224624634, "learning_rate": 9.139784946236558e-06, "loss": 1.2789, "step": 12419 }, { "epoch": 1.944270507201002, "grad_norm": 2.4767820835113525, "learning_rate": 9.115347018572825e-06, "loss": 1.3508, "step": 12420 }, { "epoch": 1.9444270507201002, "grad_norm": 1.976932406425476, "learning_rate": 9.090909090909091e-06, "loss": 0.9728, "step": 12421 }, { "epoch": 1.9445835942391985, "grad_norm": 2.43166184425354, "learning_rate": 9.066471163245356e-06, "loss": 1.157, "step": 12422 }, { "epoch": 1.9447401377582967, "grad_norm": 1.8392789363861084, "learning_rate": 9.042033235581622e-06, "loss": 0.9677, "step": 12423 }, { "epoch": 1.9448966812773951, "grad_norm": 6.830517768859863, "learning_rate": 9.017595307917889e-06, "loss": 1.2045, "step": 12424 }, { "epoch": 1.9450532247964936, "grad_norm": 4.780092239379883, "learning_rate": 8.993157380254154e-06, "loss": 1.522, "step": 12425 }, { "epoch": 1.9452097683155918, "grad_norm": 6.710925102233887, "learning_rate": 8.968719452590418e-06, "loss": 1.4294, "step": 12426 }, { "epoch": 1.94536631183469, "grad_norm": 4.024580478668213, "learning_rate": 8.944281524926687e-06, "loss": 1.1853, "step": 12427 }, { "epoch": 1.9455228553537882, "grad_norm": 5.138179302215576, "learning_rate": 8.919843597262951e-06, "loss": 1.195, "step": 12428 }, { "epoch": 1.9456793988728867, "grad_norm": 4.0508270263671875, "learning_rate": 8.895405669599216e-06, "loss": 1.5426, "step": 12429 }, { "epoch": 1.945835942391985, "grad_norm": 5.0509538650512695, "learning_rate": 8.870967741935483e-06, "loss": 1.3934, "step": 12430 }, { "epoch": 1.9459924859110833, "grad_norm": 7.239458084106445, "learning_rate": 8.84652981427175e-06, "loss": 1.361, "step": 12431 }, { "epoch": 1.9461490294301815, "grad_norm": 4.996304988861084, "learning_rate": 8.822091886608014e-06, "loss": 1.1353, "step": 12432 }, { "epoch": 1.9463055729492797, "grad_norm": 1.4053064584732056, "learning_rate": 8.79765395894428e-06, "loss": 1.052, "step": 12433 }, { "epoch": 1.9464621164683782, "grad_norm": 4.434309959411621, "learning_rate": 8.773216031280547e-06, "loss": 1.125, "step": 12434 }, { "epoch": 1.9466186599874766, "grad_norm": 2.092318058013916, "learning_rate": 8.748778103616812e-06, "loss": 0.8836, "step": 12435 }, { "epoch": 1.9467752035065748, "grad_norm": 2.4105422496795654, "learning_rate": 8.724340175953078e-06, "loss": 0.9855, "step": 12436 }, { "epoch": 1.946931747025673, "grad_norm": 3.4820504188537598, "learning_rate": 8.699902248289345e-06, "loss": 1.01, "step": 12437 }, { "epoch": 1.9470882905447713, "grad_norm": 2.5442054271698, "learning_rate": 8.67546432062561e-06, "loss": 0.9834, "step": 12438 }, { "epoch": 1.9472448340638697, "grad_norm": 0.9357713460922241, "learning_rate": 8.651026392961876e-06, "loss": 1.4554, "step": 12439 }, { "epoch": 1.9474013775829682, "grad_norm": 0.6884608268737793, "learning_rate": 8.626588465298143e-06, "loss": 1.4628, "step": 12440 }, { "epoch": 1.9475579211020664, "grad_norm": 1.8610217571258545, "learning_rate": 8.602150537634407e-06, "loss": 1.3942, "step": 12441 }, { "epoch": 1.9477144646211646, "grad_norm": 1.9256157875061035, "learning_rate": 8.577712609970674e-06, "loss": 1.4434, "step": 12442 }, { "epoch": 1.947871008140263, "grad_norm": 1.0926450490951538, "learning_rate": 8.55327468230694e-06, "loss": 1.4785, "step": 12443 }, { "epoch": 1.9480275516593613, "grad_norm": 1.835080623626709, "learning_rate": 8.528836754643205e-06, "loss": 1.4919, "step": 12444 }, { "epoch": 1.9481840951784597, "grad_norm": 0.9636733531951904, "learning_rate": 8.504398826979472e-06, "loss": 1.485, "step": 12445 }, { "epoch": 1.948340638697558, "grad_norm": 1.1740062236785889, "learning_rate": 8.479960899315738e-06, "loss": 1.5098, "step": 12446 }, { "epoch": 1.9484971822166561, "grad_norm": 1.6411094665527344, "learning_rate": 8.455522971652003e-06, "loss": 1.4772, "step": 12447 }, { "epoch": 1.9486537257357546, "grad_norm": 1.150183916091919, "learning_rate": 8.43108504398827e-06, "loss": 1.4486, "step": 12448 }, { "epoch": 1.948810269254853, "grad_norm": 2.3827710151672363, "learning_rate": 8.406647116324536e-06, "loss": 1.4765, "step": 12449 }, { "epoch": 1.9489668127739512, "grad_norm": 1.509945034980774, "learning_rate": 8.3822091886608e-06, "loss": 1.2571, "step": 12450 }, { "epoch": 1.9491233562930494, "grad_norm": 1.6995009183883667, "learning_rate": 8.357771260997067e-06, "loss": 1.3709, "step": 12451 }, { "epoch": 1.9492798998121477, "grad_norm": 1.6770323514938354, "learning_rate": 8.333333333333332e-06, "loss": 1.4892, "step": 12452 }, { "epoch": 1.949436443331246, "grad_norm": 1.3949050903320312, "learning_rate": 8.308895405669599e-06, "loss": 1.4031, "step": 12453 }, { "epoch": 1.9495929868503445, "grad_norm": 4.16380500793457, "learning_rate": 8.284457478005865e-06, "loss": 1.3407, "step": 12454 }, { "epoch": 1.9497495303694428, "grad_norm": 1.7018452882766724, "learning_rate": 8.26001955034213e-06, "loss": 1.6301, "step": 12455 }, { "epoch": 1.949906073888541, "grad_norm": 1.3921611309051514, "learning_rate": 8.235581622678396e-06, "loss": 1.4144, "step": 12456 }, { "epoch": 1.9500626174076392, "grad_norm": 2.7200441360473633, "learning_rate": 8.211143695014663e-06, "loss": 1.4583, "step": 12457 }, { "epoch": 1.9502191609267376, "grad_norm": 1.539747714996338, "learning_rate": 8.186705767350928e-06, "loss": 1.4575, "step": 12458 }, { "epoch": 1.950375704445836, "grad_norm": 1.5239930152893066, "learning_rate": 8.162267839687194e-06, "loss": 1.266, "step": 12459 }, { "epoch": 1.9505322479649343, "grad_norm": 2.7010016441345215, "learning_rate": 8.137829912023459e-06, "loss": 1.2497, "step": 12460 }, { "epoch": 1.9506887914840325, "grad_norm": 3.007016658782959, "learning_rate": 8.113391984359725e-06, "loss": 1.537, "step": 12461 }, { "epoch": 1.9508453350031307, "grad_norm": 1.432244896888733, "learning_rate": 8.088954056695992e-06, "loss": 1.4236, "step": 12462 }, { "epoch": 1.9510018785222292, "grad_norm": 1.9789886474609375, "learning_rate": 8.064516129032257e-06, "loss": 1.5051, "step": 12463 }, { "epoch": 1.9511584220413276, "grad_norm": 1.625560998916626, "learning_rate": 8.040078201368523e-06, "loss": 1.3133, "step": 12464 }, { "epoch": 1.9513149655604258, "grad_norm": 5.823700428009033, "learning_rate": 8.01564027370479e-06, "loss": 1.3131, "step": 12465 }, { "epoch": 1.951471509079524, "grad_norm": 1.7935535907745361, "learning_rate": 7.991202346041054e-06, "loss": 1.2024, "step": 12466 }, { "epoch": 1.9516280525986223, "grad_norm": 2.3289988040924072, "learning_rate": 7.966764418377321e-06, "loss": 1.0846, "step": 12467 }, { "epoch": 1.9517845961177207, "grad_norm": 7.128284454345703, "learning_rate": 7.942326490713586e-06, "loss": 1.53, "step": 12468 }, { "epoch": 1.9519411396368191, "grad_norm": 1.9482355117797852, "learning_rate": 7.917888563049852e-06, "loss": 1.2947, "step": 12469 }, { "epoch": 1.9520976831559174, "grad_norm": 1.5053616762161255, "learning_rate": 7.893450635386119e-06, "loss": 1.2437, "step": 12470 }, { "epoch": 1.9522542266750156, "grad_norm": 8.75932502746582, "learning_rate": 7.869012707722384e-06, "loss": 1.5358, "step": 12471 }, { "epoch": 1.9524107701941138, "grad_norm": 2.771195411682129, "learning_rate": 7.84457478005865e-06, "loss": 0.8694, "step": 12472 }, { "epoch": 1.9525673137132122, "grad_norm": 5.4038262367248535, "learning_rate": 7.820136852394917e-06, "loss": 1.6965, "step": 12473 }, { "epoch": 1.9527238572323107, "grad_norm": 2.8601737022399902, "learning_rate": 7.795698924731181e-06, "loss": 1.3113, "step": 12474 }, { "epoch": 1.952880400751409, "grad_norm": 13.03695011138916, "learning_rate": 7.771260997067448e-06, "loss": 1.778, "step": 12475 }, { "epoch": 1.9530369442705071, "grad_norm": 4.585375785827637, "learning_rate": 7.746823069403714e-06, "loss": 0.9247, "step": 12476 }, { "epoch": 1.9531934877896056, "grad_norm": 2.3865814208984375, "learning_rate": 7.72238514173998e-06, "loss": 1.1538, "step": 12477 }, { "epoch": 1.9533500313087038, "grad_norm": 4.630260944366455, "learning_rate": 7.697947214076246e-06, "loss": 1.0104, "step": 12478 }, { "epoch": 1.9535065748278022, "grad_norm": 2.182964563369751, "learning_rate": 7.673509286412512e-06, "loss": 1.0723, "step": 12479 }, { "epoch": 1.9536631183469004, "grad_norm": 4.5911760330200195, "learning_rate": 7.649071358748777e-06, "loss": 0.6017, "step": 12480 }, { "epoch": 1.9538196618659986, "grad_norm": 3.150099754333496, "learning_rate": 7.6246334310850434e-06, "loss": 0.8234, "step": 12481 }, { "epoch": 1.953976205385097, "grad_norm": 1.7521438598632812, "learning_rate": 7.600195503421309e-06, "loss": 1.0587, "step": 12482 }, { "epoch": 1.9541327489041955, "grad_norm": 3.584641933441162, "learning_rate": 7.575757575757575e-06, "loss": 1.5662, "step": 12483 }, { "epoch": 1.9542892924232937, "grad_norm": 3.3541533946990967, "learning_rate": 7.55131964809384e-06, "loss": 1.109, "step": 12484 }, { "epoch": 1.954445835942392, "grad_norm": 7.451817512512207, "learning_rate": 7.526881720430107e-06, "loss": 1.2002, "step": 12485 }, { "epoch": 1.9546023794614902, "grad_norm": 6.928215503692627, "learning_rate": 7.5024437927663725e-06, "loss": 0.6299, "step": 12486 }, { "epoch": 1.9547589229805886, "grad_norm": 3.0479836463928223, "learning_rate": 7.478005865102638e-06, "loss": 1.0142, "step": 12487 }, { "epoch": 1.954915466499687, "grad_norm": 1.0150418281555176, "learning_rate": 7.453567937438905e-06, "loss": 0.4941, "step": 12488 }, { "epoch": 1.9550720100187853, "grad_norm": 1.5538381338119507, "learning_rate": 7.42913000977517e-06, "loss": 1.4971, "step": 12489 }, { "epoch": 1.9552285535378835, "grad_norm": 0.806178867816925, "learning_rate": 7.404692082111436e-06, "loss": 1.4253, "step": 12490 }, { "epoch": 1.9553850970569817, "grad_norm": 1.2289999723434448, "learning_rate": 7.3802541544477025e-06, "loss": 1.2646, "step": 12491 }, { "epoch": 1.9555416405760802, "grad_norm": 1.0183123350143433, "learning_rate": 7.355816226783968e-06, "loss": 1.4464, "step": 12492 }, { "epoch": 1.9556981840951786, "grad_norm": 0.9893926382064819, "learning_rate": 7.331378299120234e-06, "loss": 1.3698, "step": 12493 }, { "epoch": 1.9558547276142768, "grad_norm": 1.2529398202896118, "learning_rate": 7.3069403714565e-06, "loss": 1.507, "step": 12494 }, { "epoch": 1.956011271133375, "grad_norm": 0.9131374955177307, "learning_rate": 7.282502443792766e-06, "loss": 1.3113, "step": 12495 }, { "epoch": 1.9561678146524732, "grad_norm": 0.9663822650909424, "learning_rate": 7.2580645161290315e-06, "loss": 1.3692, "step": 12496 }, { "epoch": 1.9563243581715717, "grad_norm": 4.0027265548706055, "learning_rate": 7.233626588465298e-06, "loss": 1.4265, "step": 12497 }, { "epoch": 1.9564809016906701, "grad_norm": 1.6491836309432983, "learning_rate": 7.209188660801564e-06, "loss": 1.4167, "step": 12498 }, { "epoch": 1.9566374452097683, "grad_norm": 1.1959609985351562, "learning_rate": 7.184750733137829e-06, "loss": 1.5605, "step": 12499 }, { "epoch": 1.9567939887288666, "grad_norm": 2.8737237453460693, "learning_rate": 7.160312805474095e-06, "loss": 1.417, "step": 12500 }, { "epoch": 1.9569505322479648, "grad_norm": 2.0030691623687744, "learning_rate": 7.1358748778103615e-06, "loss": 1.3524, "step": 12501 }, { "epoch": 1.9571070757670632, "grad_norm": 1.234277606010437, "learning_rate": 7.111436950146627e-06, "loss": 1.3771, "step": 12502 }, { "epoch": 1.9572636192861617, "grad_norm": 3.153789520263672, "learning_rate": 7.086999022482893e-06, "loss": 1.4366, "step": 12503 }, { "epoch": 1.9574201628052599, "grad_norm": 2.164249897003174, "learning_rate": 7.062561094819159e-06, "loss": 1.5128, "step": 12504 }, { "epoch": 1.957576706324358, "grad_norm": 2.0796103477478027, "learning_rate": 7.038123167155425e-06, "loss": 1.3222, "step": 12505 }, { "epoch": 1.9577332498434565, "grad_norm": 10.709492683410645, "learning_rate": 7.0136852394916906e-06, "loss": 1.4278, "step": 12506 }, { "epoch": 1.9578897933625548, "grad_norm": 3.223146915435791, "learning_rate": 6.989247311827957e-06, "loss": 1.3899, "step": 12507 }, { "epoch": 1.9580463368816532, "grad_norm": 1.8081053495407104, "learning_rate": 6.964809384164223e-06, "loss": 1.3115, "step": 12508 }, { "epoch": 1.9582028804007514, "grad_norm": 2.1025900840759277, "learning_rate": 6.9403714565004875e-06, "loss": 1.1652, "step": 12509 }, { "epoch": 1.9583594239198496, "grad_norm": 2.087188720703125, "learning_rate": 6.915933528836755e-06, "loss": 1.33, "step": 12510 }, { "epoch": 1.958515967438948, "grad_norm": 1.6247758865356445, "learning_rate": 6.89149560117302e-06, "loss": 1.3163, "step": 12511 }, { "epoch": 1.9586725109580463, "grad_norm": 5.537015914916992, "learning_rate": 6.867057673509285e-06, "loss": 1.6139, "step": 12512 }, { "epoch": 1.9588290544771447, "grad_norm": 5.143603801727295, "learning_rate": 6.842619745845553e-06, "loss": 1.3413, "step": 12513 }, { "epoch": 1.958985597996243, "grad_norm": 6.505813121795654, "learning_rate": 6.8181818181818174e-06, "loss": 1.2207, "step": 12514 }, { "epoch": 1.9591421415153412, "grad_norm": 3.945194721221924, "learning_rate": 6.793743890518083e-06, "loss": 1.3264, "step": 12515 }, { "epoch": 1.9592986850344396, "grad_norm": 3.9010744094848633, "learning_rate": 6.769305962854349e-06, "loss": 1.1143, "step": 12516 }, { "epoch": 1.959455228553538, "grad_norm": 3.1134190559387207, "learning_rate": 6.744868035190615e-06, "loss": 1.3981, "step": 12517 }, { "epoch": 1.9596117720726363, "grad_norm": 1.898162841796875, "learning_rate": 6.720430107526881e-06, "loss": 0.932, "step": 12518 }, { "epoch": 1.9597683155917345, "grad_norm": 4.892297744750977, "learning_rate": 6.6959921798631465e-06, "loss": 1.4597, "step": 12519 }, { "epoch": 1.9599248591108327, "grad_norm": 2.914050340652466, "learning_rate": 6.671554252199413e-06, "loss": 1.2172, "step": 12520 }, { "epoch": 1.9600814026299311, "grad_norm": 2.2120187282562256, "learning_rate": 6.647116324535679e-06, "loss": 1.0094, "step": 12521 }, { "epoch": 1.9602379461490296, "grad_norm": 3.932375907897949, "learning_rate": 6.622678396871944e-06, "loss": 1.2513, "step": 12522 }, { "epoch": 1.9603944896681278, "grad_norm": 2.4831714630126953, "learning_rate": 6.598240469208211e-06, "loss": 1.0083, "step": 12523 }, { "epoch": 1.960551033187226, "grad_norm": 1.6757017374038696, "learning_rate": 6.5738025415444764e-06, "loss": 1.1801, "step": 12524 }, { "epoch": 1.9607075767063242, "grad_norm": 4.2157745361328125, "learning_rate": 6.549364613880742e-06, "loss": 1.4077, "step": 12525 }, { "epoch": 1.9608641202254227, "grad_norm": 7.748180866241455, "learning_rate": 6.524926686217009e-06, "loss": 1.1976, "step": 12526 }, { "epoch": 1.961020663744521, "grad_norm": 2.130333423614502, "learning_rate": 6.500488758553274e-06, "loss": 0.8903, "step": 12527 }, { "epoch": 1.9611772072636193, "grad_norm": 11.016569137573242, "learning_rate": 6.47605083088954e-06, "loss": 1.4919, "step": 12528 }, { "epoch": 1.9613337507827175, "grad_norm": 3.6152400970458984, "learning_rate": 6.451612903225806e-06, "loss": 1.0693, "step": 12529 }, { "epoch": 1.9614902943018158, "grad_norm": 5.328700542449951, "learning_rate": 6.427174975562072e-06, "loss": 0.9952, "step": 12530 }, { "epoch": 1.9616468378209142, "grad_norm": 4.276764869689941, "learning_rate": 6.402737047898338e-06, "loss": 1.0022, "step": 12531 }, { "epoch": 1.9618033813400126, "grad_norm": 5.339593887329102, "learning_rate": 6.378299120234603e-06, "loss": 1.5189, "step": 12532 }, { "epoch": 1.9619599248591109, "grad_norm": 3.1604864597320557, "learning_rate": 6.35386119257087e-06, "loss": 0.8523, "step": 12533 }, { "epoch": 1.962116468378209, "grad_norm": 2.233135461807251, "learning_rate": 6.3294232649071355e-06, "loss": 1.2364, "step": 12534 }, { "epoch": 1.9622730118973073, "grad_norm": 9.808837890625, "learning_rate": 6.304985337243401e-06, "loss": 0.8228, "step": 12535 }, { "epoch": 1.9624295554164057, "grad_norm": 4.273067474365234, "learning_rate": 6.280547409579668e-06, "loss": 0.731, "step": 12536 }, { "epoch": 1.9625860989355042, "grad_norm": 4.046677112579346, "learning_rate": 6.256109481915933e-06, "loss": 0.725, "step": 12537 }, { "epoch": 1.9627426424546024, "grad_norm": 3.113222599029541, "learning_rate": 6.231671554252199e-06, "loss": 0.5296, "step": 12538 }, { "epoch": 1.9628991859737006, "grad_norm": 1.053043007850647, "learning_rate": 6.207233626588465e-06, "loss": 1.5546, "step": 12539 }, { "epoch": 1.963055729492799, "grad_norm": 1.1030488014221191, "learning_rate": 6.182795698924731e-06, "loss": 1.3777, "step": 12540 }, { "epoch": 1.9632122730118973, "grad_norm": 0.8204811215400696, "learning_rate": 6.158357771260996e-06, "loss": 1.3897, "step": 12541 }, { "epoch": 1.9633688165309957, "grad_norm": 0.8840574622154236, "learning_rate": 6.133919843597263e-06, "loss": 1.4116, "step": 12542 }, { "epoch": 1.963525360050094, "grad_norm": 1.045058012008667, "learning_rate": 6.109481915933529e-06, "loss": 1.4974, "step": 12543 }, { "epoch": 1.9636819035691921, "grad_norm": 2.036625623703003, "learning_rate": 6.085043988269794e-06, "loss": 1.5181, "step": 12544 }, { "epoch": 1.9638384470882906, "grad_norm": 1.2316803932189941, "learning_rate": 6.060606060606061e-06, "loss": 1.4962, "step": 12545 }, { "epoch": 1.9639949906073888, "grad_norm": 1.3155983686447144, "learning_rate": 6.036168132942326e-06, "loss": 1.4232, "step": 12546 }, { "epoch": 1.9641515341264872, "grad_norm": 1.7271487712860107, "learning_rate": 6.011730205278591e-06, "loss": 1.3122, "step": 12547 }, { "epoch": 1.9643080776455855, "grad_norm": 2.852642774581909, "learning_rate": 5.987292277614857e-06, "loss": 1.4496, "step": 12548 }, { "epoch": 1.9644646211646837, "grad_norm": 1.5159505605697632, "learning_rate": 5.9628543499511236e-06, "loss": 1.4374, "step": 12549 }, { "epoch": 1.9646211646837821, "grad_norm": 0.9547608494758606, "learning_rate": 5.938416422287389e-06, "loss": 1.4034, "step": 12550 }, { "epoch": 1.9647777082028806, "grad_norm": 2.335413694381714, "learning_rate": 5.913978494623655e-06, "loss": 1.428, "step": 12551 }, { "epoch": 1.9649342517219788, "grad_norm": 1.940859317779541, "learning_rate": 5.889540566959921e-06, "loss": 1.5437, "step": 12552 }, { "epoch": 1.965090795241077, "grad_norm": 1.7635548114776611, "learning_rate": 5.865102639296187e-06, "loss": 1.5615, "step": 12553 }, { "epoch": 1.9652473387601752, "grad_norm": 1.3363462686538696, "learning_rate": 5.840664711632453e-06, "loss": 1.3193, "step": 12554 }, { "epoch": 1.9654038822792737, "grad_norm": 1.3077514171600342, "learning_rate": 5.816226783968719e-06, "loss": 1.4132, "step": 12555 }, { "epoch": 1.965560425798372, "grad_norm": 1.0712963342666626, "learning_rate": 5.791788856304985e-06, "loss": 1.3992, "step": 12556 }, { "epoch": 1.9657169693174703, "grad_norm": 1.7005242109298706, "learning_rate": 5.7673509286412504e-06, "loss": 1.4594, "step": 12557 }, { "epoch": 1.9658735128365685, "grad_norm": 1.507432460784912, "learning_rate": 5.742913000977517e-06, "loss": 1.3866, "step": 12558 }, { "epoch": 1.9660300563556667, "grad_norm": 2.3320140838623047, "learning_rate": 5.718475073313783e-06, "loss": 1.5171, "step": 12559 }, { "epoch": 1.9661865998747652, "grad_norm": 1.6303120851516724, "learning_rate": 5.694037145650048e-06, "loss": 1.3281, "step": 12560 }, { "epoch": 1.9663431433938636, "grad_norm": 9.600627899169922, "learning_rate": 5.669599217986315e-06, "loss": 1.3929, "step": 12561 }, { "epoch": 1.9664996869129618, "grad_norm": 2.142137050628662, "learning_rate": 5.64516129032258e-06, "loss": 1.2811, "step": 12562 }, { "epoch": 1.96665623043206, "grad_norm": 3.0277087688446045, "learning_rate": 5.620723362658846e-06, "loss": 1.3192, "step": 12563 }, { "epoch": 1.9668127739511583, "grad_norm": 7.777707576751709, "learning_rate": 5.596285434995112e-06, "loss": 1.5502, "step": 12564 }, { "epoch": 1.9669693174702567, "grad_norm": 2.6765975952148438, "learning_rate": 5.571847507331378e-06, "loss": 1.2499, "step": 12565 }, { "epoch": 1.9671258609893552, "grad_norm": 2.20479416847229, "learning_rate": 5.547409579667644e-06, "loss": 1.1884, "step": 12566 }, { "epoch": 1.9672824045084534, "grad_norm": 2.699751377105713, "learning_rate": 5.5229716520039095e-06, "loss": 1.1944, "step": 12567 }, { "epoch": 1.9674389480275516, "grad_norm": 10.28531265258789, "learning_rate": 5.498533724340176e-06, "loss": 1.5012, "step": 12568 }, { "epoch": 1.9675954915466498, "grad_norm": 4.221407890319824, "learning_rate": 5.474095796676442e-06, "loss": 1.1851, "step": 12569 }, { "epoch": 1.9677520350657483, "grad_norm": 4.658267498016357, "learning_rate": 5.449657869012707e-06, "loss": 1.0217, "step": 12570 }, { "epoch": 1.9679085785848467, "grad_norm": 7.977578163146973, "learning_rate": 5.425219941348974e-06, "loss": 1.545, "step": 12571 }, { "epoch": 1.968065122103945, "grad_norm": 2.949538469314575, "learning_rate": 5.400782013685239e-06, "loss": 1.2254, "step": 12572 }, { "epoch": 1.9682216656230431, "grad_norm": 5.280106067657471, "learning_rate": 5.376344086021505e-06, "loss": 1.1792, "step": 12573 }, { "epoch": 1.9683782091421416, "grad_norm": 6.7711710929870605, "learning_rate": 5.3519061583577715e-06, "loss": 1.3272, "step": 12574 }, { "epoch": 1.9685347526612398, "grad_norm": 5.601173400878906, "learning_rate": 5.327468230694037e-06, "loss": 1.4548, "step": 12575 }, { "epoch": 1.9686912961803382, "grad_norm": 2.446711540222168, "learning_rate": 5.303030303030302e-06, "loss": 0.905, "step": 12576 }, { "epoch": 1.9688478396994364, "grad_norm": 2.5086371898651123, "learning_rate": 5.278592375366569e-06, "loss": 1.2203, "step": 12577 }, { "epoch": 1.9690043832185347, "grad_norm": 2.3690879344940186, "learning_rate": 5.254154447702834e-06, "loss": 0.9831, "step": 12578 }, { "epoch": 1.969160926737633, "grad_norm": 4.901893615722656, "learning_rate": 5.2297165200391e-06, "loss": 1.2816, "step": 12579 }, { "epoch": 1.9693174702567313, "grad_norm": 3.7496819496154785, "learning_rate": 5.205278592375365e-06, "loss": 0.8846, "step": 12580 }, { "epoch": 1.9694740137758298, "grad_norm": 6.612450122833252, "learning_rate": 5.180840664711632e-06, "loss": 1.17, "step": 12581 }, { "epoch": 1.969630557294928, "grad_norm": 5.878314018249512, "learning_rate": 5.1564027370478976e-06, "loss": 1.9362, "step": 12582 }, { "epoch": 1.9697871008140262, "grad_norm": 4.377343654632568, "learning_rate": 5.131964809384163e-06, "loss": 1.3451, "step": 12583 }, { "epoch": 1.9699436443331246, "grad_norm": 2.8641626834869385, "learning_rate": 5.10752688172043e-06, "loss": 0.6299, "step": 12584 }, { "epoch": 1.970100187852223, "grad_norm": 2.5689475536346436, "learning_rate": 5.083088954056695e-06, "loss": 0.5064, "step": 12585 }, { "epoch": 1.9702567313713213, "grad_norm": 2.1100947856903076, "learning_rate": 5.058651026392961e-06, "loss": 0.6611, "step": 12586 }, { "epoch": 1.9704132748904195, "grad_norm": 2.1359899044036865, "learning_rate": 5.0342130987292275e-06, "loss": 0.8358, "step": 12587 }, { "epoch": 1.9705698184095177, "grad_norm": 2.32698392868042, "learning_rate": 5.009775171065493e-06, "loss": 0.864, "step": 12588 }, { "epoch": 1.9707263619286162, "grad_norm": 0.9872207641601562, "learning_rate": 4.985337243401759e-06, "loss": 1.5319, "step": 12589 }, { "epoch": 1.9708829054477146, "grad_norm": 1.0228644609451294, "learning_rate": 4.960899315738025e-06, "loss": 1.4884, "step": 12590 }, { "epoch": 1.9710394489668128, "grad_norm": 0.822214663028717, "learning_rate": 4.936461388074291e-06, "loss": 1.5075, "step": 12591 }, { "epoch": 1.971195992485911, "grad_norm": 1.3645509481430054, "learning_rate": 4.9120234604105566e-06, "loss": 1.6097, "step": 12592 }, { "epoch": 1.9713525360050093, "grad_norm": 1.2646301984786987, "learning_rate": 4.887585532746823e-06, "loss": 1.5494, "step": 12593 }, { "epoch": 1.9715090795241077, "grad_norm": 1.6212059259414673, "learning_rate": 4.863147605083089e-06, "loss": 1.4176, "step": 12594 }, { "epoch": 1.9716656230432061, "grad_norm": 2.566788673400879, "learning_rate": 4.838709677419354e-06, "loss": 1.444, "step": 12595 }, { "epoch": 1.9718221665623044, "grad_norm": 1.1608645915985107, "learning_rate": 4.81427174975562e-06, "loss": 1.3891, "step": 12596 }, { "epoch": 1.9719787100814026, "grad_norm": 2.177610397338867, "learning_rate": 4.7898338220918865e-06, "loss": 1.3828, "step": 12597 }, { "epoch": 1.9721352536005008, "grad_norm": 5.3571977615356445, "learning_rate": 4.765395894428152e-06, "loss": 1.3869, "step": 12598 }, { "epoch": 1.9722917971195992, "grad_norm": 1.231022834777832, "learning_rate": 4.740957966764418e-06, "loss": 1.5305, "step": 12599 }, { "epoch": 1.9724483406386977, "grad_norm": 1.084704875946045, "learning_rate": 4.716520039100684e-06, "loss": 1.4779, "step": 12600 }, { "epoch": 1.972604884157796, "grad_norm": 2.484323024749756, "learning_rate": 4.69208211143695e-06, "loss": 1.4689, "step": 12601 }, { "epoch": 1.972761427676894, "grad_norm": 6.887755870819092, "learning_rate": 4.667644183773216e-06, "loss": 1.3976, "step": 12602 }, { "epoch": 1.9729179711959923, "grad_norm": 2.1070051193237305, "learning_rate": 4.643206256109481e-06, "loss": 1.4639, "step": 12603 }, { "epoch": 1.9730745147150908, "grad_norm": 8.759196281433105, "learning_rate": 4.618768328445748e-06, "loss": 1.6144, "step": 12604 }, { "epoch": 1.9732310582341892, "grad_norm": 2.5562074184417725, "learning_rate": 4.594330400782013e-06, "loss": 1.4443, "step": 12605 }, { "epoch": 1.9733876017532874, "grad_norm": 1.4550449848175049, "learning_rate": 4.569892473118279e-06, "loss": 1.4747, "step": 12606 }, { "epoch": 1.9735441452723856, "grad_norm": 3.1739792823791504, "learning_rate": 4.5454545454545455e-06, "loss": 1.3716, "step": 12607 }, { "epoch": 1.973700688791484, "grad_norm": 2.384730577468872, "learning_rate": 4.521016617790811e-06, "loss": 1.4844, "step": 12608 }, { "epoch": 1.9738572323105823, "grad_norm": 1.8984757661819458, "learning_rate": 4.496578690127077e-06, "loss": 1.5907, "step": 12609 }, { "epoch": 1.9740137758296807, "grad_norm": 2.8245761394500732, "learning_rate": 4.472140762463343e-06, "loss": 1.5429, "step": 12610 }, { "epoch": 1.974170319348779, "grad_norm": 3.4093105792999268, "learning_rate": 4.447702834799608e-06, "loss": 1.408, "step": 12611 }, { "epoch": 1.9743268628678772, "grad_norm": 2.0443670749664307, "learning_rate": 4.423264907135875e-06, "loss": 1.2528, "step": 12612 }, { "epoch": 1.9744834063869756, "grad_norm": 2.573211193084717, "learning_rate": 4.39882697947214e-06, "loss": 1.1372, "step": 12613 }, { "epoch": 1.974639949906074, "grad_norm": 3.4609265327453613, "learning_rate": 4.374389051808406e-06, "loss": 1.1394, "step": 12614 }, { "epoch": 1.9747964934251723, "grad_norm": 5.100785255432129, "learning_rate": 4.349951124144672e-06, "loss": 1.2423, "step": 12615 }, { "epoch": 1.9749530369442705, "grad_norm": 2.3350820541381836, "learning_rate": 4.325513196480938e-06, "loss": 1.1019, "step": 12616 }, { "epoch": 1.9751095804633687, "grad_norm": 6.419863224029541, "learning_rate": 4.301075268817204e-06, "loss": 1.3405, "step": 12617 }, { "epoch": 1.9752661239824671, "grad_norm": 2.2042758464813232, "learning_rate": 4.27663734115347e-06, "loss": 1.5057, "step": 12618 }, { "epoch": 1.9754226675015656, "grad_norm": 4.460781574249268, "learning_rate": 4.252199413489736e-06, "loss": 1.1672, "step": 12619 }, { "epoch": 1.9755792110206638, "grad_norm": 3.557311773300171, "learning_rate": 4.2277614858260015e-06, "loss": 1.1246, "step": 12620 }, { "epoch": 1.975735754539762, "grad_norm": 1.4838236570358276, "learning_rate": 4.203323558162268e-06, "loss": 1.149, "step": 12621 }, { "epoch": 1.9758922980588602, "grad_norm": 6.371537685394287, "learning_rate": 4.178885630498534e-06, "loss": 1.1258, "step": 12622 }, { "epoch": 1.9760488415779587, "grad_norm": 4.541806221008301, "learning_rate": 4.154447702834799e-06, "loss": 1.1318, "step": 12623 }, { "epoch": 1.9762053850970571, "grad_norm": 4.556163787841797, "learning_rate": 4.130009775171065e-06, "loss": 1.4951, "step": 12624 }, { "epoch": 1.9763619286161553, "grad_norm": 6.643764495849609, "learning_rate": 4.105571847507331e-06, "loss": 1.6353, "step": 12625 }, { "epoch": 1.9765184721352536, "grad_norm": 3.9981839656829834, "learning_rate": 4.081133919843597e-06, "loss": 1.3794, "step": 12626 }, { "epoch": 1.9766750156543518, "grad_norm": 2.1788060665130615, "learning_rate": 4.056695992179863e-06, "loss": 1.2775, "step": 12627 }, { "epoch": 1.9768315591734502, "grad_norm": 3.0667903423309326, "learning_rate": 4.032258064516128e-06, "loss": 1.3801, "step": 12628 }, { "epoch": 1.9769881026925487, "grad_norm": 5.267396926879883, "learning_rate": 4.007820136852395e-06, "loss": 1.6699, "step": 12629 }, { "epoch": 1.9771446462116469, "grad_norm": 3.0507497787475586, "learning_rate": 3.9833822091886605e-06, "loss": 1.2728, "step": 12630 }, { "epoch": 1.977301189730745, "grad_norm": 2.740083694458008, "learning_rate": 3.958944281524926e-06, "loss": 0.9162, "step": 12631 }, { "epoch": 1.9774577332498433, "grad_norm": 3.808227300643921, "learning_rate": 3.934506353861192e-06, "loss": 1.2745, "step": 12632 }, { "epoch": 1.9776142767689417, "grad_norm": 2.6046314239501953, "learning_rate": 3.910068426197458e-06, "loss": 1.1256, "step": 12633 }, { "epoch": 1.9777708202880402, "grad_norm": 2.029060125350952, "learning_rate": 3.885630498533724e-06, "loss": 0.5949, "step": 12634 }, { "epoch": 1.9779273638071384, "grad_norm": 4.188864707946777, "learning_rate": 3.86119257086999e-06, "loss": 0.8376, "step": 12635 }, { "epoch": 1.9780839073262366, "grad_norm": 1.602839708328247, "learning_rate": 3.836754643206256e-06, "loss": 0.4762, "step": 12636 }, { "epoch": 1.9782404508453348, "grad_norm": 1.9798146486282349, "learning_rate": 3.8123167155425217e-06, "loss": 0.7491, "step": 12637 }, { "epoch": 1.9783969943644333, "grad_norm": 7.58799409866333, "learning_rate": 3.7878787878787874e-06, "loss": 0.9718, "step": 12638 }, { "epoch": 1.9785535378835317, "grad_norm": 0.6994940638542175, "learning_rate": 3.7634408602150534e-06, "loss": 1.5842, "step": 12639 }, { "epoch": 1.97871008140263, "grad_norm": 1.8685157299041748, "learning_rate": 3.739002932551319e-06, "loss": 1.6148, "step": 12640 }, { "epoch": 1.9788666249217282, "grad_norm": 0.7222037315368652, "learning_rate": 3.714565004887585e-06, "loss": 1.5231, "step": 12641 }, { "epoch": 1.9790231684408266, "grad_norm": 0.7957490682601929, "learning_rate": 3.6901270772238512e-06, "loss": 1.5259, "step": 12642 }, { "epoch": 1.9791797119599248, "grad_norm": 0.8446835875511169, "learning_rate": 3.665689149560117e-06, "loss": 1.5504, "step": 12643 }, { "epoch": 1.9793362554790233, "grad_norm": 1.2421315908432007, "learning_rate": 3.641251221896383e-06, "loss": 1.5892, "step": 12644 }, { "epoch": 1.9794927989981215, "grad_norm": 1.1285243034362793, "learning_rate": 3.616813294232649e-06, "loss": 1.4458, "step": 12645 }, { "epoch": 1.9796493425172197, "grad_norm": 0.9664294123649597, "learning_rate": 3.5923753665689147e-06, "loss": 1.4354, "step": 12646 }, { "epoch": 1.9798058860363181, "grad_norm": 1.5649850368499756, "learning_rate": 3.5679374389051807e-06, "loss": 1.4355, "step": 12647 }, { "epoch": 1.9799624295554166, "grad_norm": 1.3277194499969482, "learning_rate": 3.5434995112414464e-06, "loss": 1.6038, "step": 12648 }, { "epoch": 1.9801189730745148, "grad_norm": 1.0830150842666626, "learning_rate": 3.5190615835777125e-06, "loss": 1.4237, "step": 12649 }, { "epoch": 1.980275516593613, "grad_norm": 1.434964895248413, "learning_rate": 3.4946236559139785e-06, "loss": 1.4098, "step": 12650 }, { "epoch": 1.9804320601127112, "grad_norm": 1.861877202987671, "learning_rate": 3.4701857282502437e-06, "loss": 1.5271, "step": 12651 }, { "epoch": 1.9805886036318097, "grad_norm": 2.560774326324463, "learning_rate": 3.44574780058651e-06, "loss": 1.3852, "step": 12652 }, { "epoch": 1.980745147150908, "grad_norm": 1.684672236442566, "learning_rate": 3.4213098729227763e-06, "loss": 1.3889, "step": 12653 }, { "epoch": 1.9809016906700063, "grad_norm": 1.0975278615951538, "learning_rate": 3.3968719452590415e-06, "loss": 1.3657, "step": 12654 }, { "epoch": 1.9810582341891045, "grad_norm": 1.292589783668518, "learning_rate": 3.3724340175953076e-06, "loss": 1.2904, "step": 12655 }, { "epoch": 1.9812147777082028, "grad_norm": 4.135151386260986, "learning_rate": 3.3479960899315733e-06, "loss": 1.4146, "step": 12656 }, { "epoch": 1.9813713212273012, "grad_norm": 2.1695103645324707, "learning_rate": 3.3235581622678393e-06, "loss": 1.4241, "step": 12657 }, { "epoch": 1.9815278647463996, "grad_norm": 5.103790283203125, "learning_rate": 3.2991202346041054e-06, "loss": 1.5548, "step": 12658 }, { "epoch": 1.9816844082654979, "grad_norm": 3.985460042953491, "learning_rate": 3.274682306940371e-06, "loss": 1.4314, "step": 12659 }, { "epoch": 1.981840951784596, "grad_norm": 1.8362746238708496, "learning_rate": 3.250244379276637e-06, "loss": 1.3154, "step": 12660 }, { "epoch": 1.9819974953036943, "grad_norm": 5.180610179901123, "learning_rate": 3.225806451612903e-06, "loss": 1.4386, "step": 12661 }, { "epoch": 1.9821540388227927, "grad_norm": 3.1565520763397217, "learning_rate": 3.201368523949169e-06, "loss": 1.3667, "step": 12662 }, { "epoch": 1.9823105823418912, "grad_norm": 1.2697768211364746, "learning_rate": 3.176930596285435e-06, "loss": 1.1725, "step": 12663 }, { "epoch": 1.9824671258609894, "grad_norm": 12.818161964416504, "learning_rate": 3.1524926686217006e-06, "loss": 1.7743, "step": 12664 }, { "epoch": 1.9826236693800876, "grad_norm": 4.739766597747803, "learning_rate": 3.1280547409579666e-06, "loss": 1.4785, "step": 12665 }, { "epoch": 1.9827802128991858, "grad_norm": 2.6713364124298096, "learning_rate": 3.1036168132942327e-06, "loss": 1.4496, "step": 12666 }, { "epoch": 1.9829367564182843, "grad_norm": 2.46557879447937, "learning_rate": 3.079178885630498e-06, "loss": 1.255, "step": 12667 }, { "epoch": 1.9830932999373827, "grad_norm": 3.2623789310455322, "learning_rate": 3.0547409579667644e-06, "loss": 1.3316, "step": 12668 }, { "epoch": 1.983249843456481, "grad_norm": 3.391611337661743, "learning_rate": 3.0303030303030305e-06, "loss": 1.1054, "step": 12669 }, { "epoch": 1.9834063869755791, "grad_norm": 2.285482168197632, "learning_rate": 3.0058651026392957e-06, "loss": 1.035, "step": 12670 }, { "epoch": 1.9835629304946774, "grad_norm": 1.301674246788025, "learning_rate": 2.9814271749755618e-06, "loss": 0.9496, "step": 12671 }, { "epoch": 1.9837194740137758, "grad_norm": 21.29591178894043, "learning_rate": 2.9569892473118274e-06, "loss": 1.2513, "step": 12672 }, { "epoch": 1.9838760175328742, "grad_norm": 3.679635763168335, "learning_rate": 2.9325513196480935e-06, "loss": 0.8991, "step": 12673 }, { "epoch": 1.9840325610519725, "grad_norm": 3.6654906272888184, "learning_rate": 2.9081133919843596e-06, "loss": 1.2406, "step": 12674 }, { "epoch": 1.9841891045710707, "grad_norm": 3.2650372982025146, "learning_rate": 2.8836754643206252e-06, "loss": 1.1958, "step": 12675 }, { "epoch": 1.9843456480901691, "grad_norm": 4.416281223297119, "learning_rate": 2.8592375366568913e-06, "loss": 1.1306, "step": 12676 }, { "epoch": 1.9845021916092673, "grad_norm": 2.6011712551116943, "learning_rate": 2.8347996089931574e-06, "loss": 1.1018, "step": 12677 }, { "epoch": 1.9846587351283658, "grad_norm": 3.921834945678711, "learning_rate": 2.810361681329423e-06, "loss": 0.9713, "step": 12678 }, { "epoch": 1.984815278647464, "grad_norm": 5.239248275756836, "learning_rate": 2.785923753665689e-06, "loss": 1.1969, "step": 12679 }, { "epoch": 1.9849718221665622, "grad_norm": 7.275485038757324, "learning_rate": 2.7614858260019547e-06, "loss": 0.8116, "step": 12680 }, { "epoch": 1.9851283656856606, "grad_norm": 1.9333630800247192, "learning_rate": 2.737047898338221e-06, "loss": 1.3322, "step": 12681 }, { "epoch": 1.985284909204759, "grad_norm": 3.5819573402404785, "learning_rate": 2.712609970674487e-06, "loss": 1.0275, "step": 12682 }, { "epoch": 1.9854414527238573, "grad_norm": 3.910200834274292, "learning_rate": 2.6881720430107525e-06, "loss": 0.8067, "step": 12683 }, { "epoch": 1.9855979962429555, "grad_norm": 3.8609650135040283, "learning_rate": 2.6637341153470186e-06, "loss": 0.7034, "step": 12684 }, { "epoch": 1.9857545397620537, "grad_norm": 2.1950485706329346, "learning_rate": 2.6392961876832847e-06, "loss": 0.8698, "step": 12685 }, { "epoch": 1.9859110832811522, "grad_norm": 3.267005205154419, "learning_rate": 2.61485826001955e-06, "loss": 0.9502, "step": 12686 }, { "epoch": 1.9860676268002506, "grad_norm": 1.1924513578414917, "learning_rate": 2.590420332355816e-06, "loss": 0.6124, "step": 12687 }, { "epoch": 1.9862241703193488, "grad_norm": 1.8844116926193237, "learning_rate": 2.5659824046920816e-06, "loss": 0.7941, "step": 12688 }, { "epoch": 1.986380713838447, "grad_norm": 0.9816786050796509, "learning_rate": 2.5415444770283477e-06, "loss": 1.4848, "step": 12689 }, { "epoch": 1.9865372573575453, "grad_norm": 0.7692421078681946, "learning_rate": 2.5171065493646137e-06, "loss": 1.4116, "step": 12690 }, { "epoch": 1.9866938008766437, "grad_norm": 0.8593037724494934, "learning_rate": 2.4926686217008794e-06, "loss": 1.4803, "step": 12691 }, { "epoch": 1.9868503443957422, "grad_norm": 1.4960730075836182, "learning_rate": 2.4682306940371455e-06, "loss": 1.4621, "step": 12692 }, { "epoch": 1.9870068879148404, "grad_norm": 1.2484159469604492, "learning_rate": 2.4437927663734115e-06, "loss": 1.3931, "step": 12693 }, { "epoch": 1.9871634314339386, "grad_norm": 2.241774320602417, "learning_rate": 2.419354838709677e-06, "loss": 1.4726, "step": 12694 }, { "epoch": 1.9873199749530368, "grad_norm": 1.0046658515930176, "learning_rate": 2.3949169110459433e-06, "loss": 1.3704, "step": 12695 }, { "epoch": 1.9874765184721352, "grad_norm": 1.600348949432373, "learning_rate": 2.370478983382209e-06, "loss": 1.4287, "step": 12696 }, { "epoch": 1.9876330619912337, "grad_norm": 0.8872906565666199, "learning_rate": 2.346041055718475e-06, "loss": 1.3028, "step": 12697 }, { "epoch": 1.987789605510332, "grad_norm": 1.608568787574768, "learning_rate": 2.3216031280547406e-06, "loss": 1.4942, "step": 12698 }, { "epoch": 1.9879461490294301, "grad_norm": 2.1755223274230957, "learning_rate": 2.2971652003910067e-06, "loss": 1.3595, "step": 12699 }, { "epoch": 1.9881026925485283, "grad_norm": 1.844046711921692, "learning_rate": 2.2727272727272728e-06, "loss": 1.396, "step": 12700 }, { "epoch": 1.9882592360676268, "grad_norm": 1.6896350383758545, "learning_rate": 2.2482893450635384e-06, "loss": 1.4409, "step": 12701 }, { "epoch": 1.9884157795867252, "grad_norm": 2.31750226020813, "learning_rate": 2.223851417399804e-06, "loss": 1.3609, "step": 12702 }, { "epoch": 1.9885723231058234, "grad_norm": 2.511352062225342, "learning_rate": 2.19941348973607e-06, "loss": 1.3918, "step": 12703 }, { "epoch": 1.9887288666249217, "grad_norm": 1.6797364950180054, "learning_rate": 2.174975562072336e-06, "loss": 1.3564, "step": 12704 }, { "epoch": 1.9888854101440199, "grad_norm": 3.3981409072875977, "learning_rate": 2.150537634408602e-06, "loss": 1.4078, "step": 12705 }, { "epoch": 1.9890419536631183, "grad_norm": 1.422497272491455, "learning_rate": 2.126099706744868e-06, "loss": 1.2969, "step": 12706 }, { "epoch": 1.9891984971822168, "grad_norm": 6.27196741104126, "learning_rate": 2.101661779081134e-06, "loss": 1.3743, "step": 12707 }, { "epoch": 1.989355040701315, "grad_norm": 2.9742422103881836, "learning_rate": 2.0772238514173996e-06, "loss": 1.1895, "step": 12708 }, { "epoch": 1.9895115842204132, "grad_norm": 2.017789363861084, "learning_rate": 2.0527859237536657e-06, "loss": 1.3513, "step": 12709 }, { "epoch": 1.9896681277395116, "grad_norm": 4.888723850250244, "learning_rate": 2.0283479960899314e-06, "loss": 1.5357, "step": 12710 }, { "epoch": 1.9898246712586098, "grad_norm": 2.907987594604492, "learning_rate": 2.0039100684261974e-06, "loss": 1.4441, "step": 12711 }, { "epoch": 1.9899812147777083, "grad_norm": 4.175799369812012, "learning_rate": 1.979472140762463e-06, "loss": 1.3473, "step": 12712 }, { "epoch": 1.9901377582968065, "grad_norm": 1.9999713897705078, "learning_rate": 1.955034213098729e-06, "loss": 1.4132, "step": 12713 }, { "epoch": 1.9902943018159047, "grad_norm": 2.5126242637634277, "learning_rate": 1.930596285434995e-06, "loss": 1.2115, "step": 12714 }, { "epoch": 1.9904508453350032, "grad_norm": 1.8449431657791138, "learning_rate": 1.9061583577712609e-06, "loss": 1.1961, "step": 12715 }, { "epoch": 1.9906073888541016, "grad_norm": 8.649978637695312, "learning_rate": 1.8817204301075267e-06, "loss": 1.2299, "step": 12716 }, { "epoch": 1.9907639323731998, "grad_norm": 12.748600006103516, "learning_rate": 1.8572825024437926e-06, "loss": 1.2318, "step": 12717 }, { "epoch": 1.990920475892298, "grad_norm": 9.06901741027832, "learning_rate": 1.8328445747800584e-06, "loss": 1.4941, "step": 12718 }, { "epoch": 1.9910770194113963, "grad_norm": 6.057063102722168, "learning_rate": 1.8084066471163245e-06, "loss": 1.0969, "step": 12719 }, { "epoch": 1.9912335629304947, "grad_norm": 3.5154621601104736, "learning_rate": 1.7839687194525904e-06, "loss": 1.2691, "step": 12720 }, { "epoch": 1.9913901064495931, "grad_norm": 3.144012451171875, "learning_rate": 1.7595307917888562e-06, "loss": 1.0831, "step": 12721 }, { "epoch": 1.9915466499686914, "grad_norm": 7.510293483734131, "learning_rate": 1.7350928641251219e-06, "loss": 1.0199, "step": 12722 }, { "epoch": 1.9917031934877896, "grad_norm": 3.6680872440338135, "learning_rate": 1.7106549364613882e-06, "loss": 1.3326, "step": 12723 }, { "epoch": 1.9918597370068878, "grad_norm": 7.296219348907471, "learning_rate": 1.6862170087976538e-06, "loss": 1.1472, "step": 12724 }, { "epoch": 1.9920162805259862, "grad_norm": 3.799044132232666, "learning_rate": 1.6617790811339197e-06, "loss": 0.9459, "step": 12725 }, { "epoch": 1.9921728240450847, "grad_norm": 4.250785827636719, "learning_rate": 1.6373411534701855e-06, "loss": 1.1241, "step": 12726 }, { "epoch": 1.9923293675641829, "grad_norm": 5.237523078918457, "learning_rate": 1.6129032258064516e-06, "loss": 1.1708, "step": 12727 }, { "epoch": 1.992485911083281, "grad_norm": 2.55849552154541, "learning_rate": 1.5884652981427175e-06, "loss": 1.2067, "step": 12728 }, { "epoch": 1.9926424546023793, "grad_norm": 4.564910411834717, "learning_rate": 1.5640273704789833e-06, "loss": 1.8229, "step": 12729 }, { "epoch": 1.9927989981214778, "grad_norm": 3.0857863426208496, "learning_rate": 1.539589442815249e-06, "loss": 1.2861, "step": 12730 }, { "epoch": 1.9929555416405762, "grad_norm": 3.102818489074707, "learning_rate": 1.5151515151515152e-06, "loss": 1.1077, "step": 12731 }, { "epoch": 1.9931120851596744, "grad_norm": 1.7498348951339722, "learning_rate": 1.4907135874877809e-06, "loss": 0.6339, "step": 12732 }, { "epoch": 1.9932686286787726, "grad_norm": NaN, "learning_rate": 1.4907135874877809e-06, "loss": 0.0, "step": 12733 }, { "epoch": 1.9934251721978709, "grad_norm": 8.903223991394043, "learning_rate": 1.4662756598240468e-06, "loss": 0.9257, "step": 12734 }, { "epoch": 1.9935817157169693, "grad_norm": 9.738998413085938, "learning_rate": 1.4418377321603126e-06, "loss": 1.1337, "step": 12735 }, { "epoch": 1.9937382592360677, "grad_norm": 2.1444084644317627, "learning_rate": 1.4173998044965787e-06, "loss": 0.5903, "step": 12736 }, { "epoch": 1.993894802755166, "grad_norm": 3.0119729042053223, "learning_rate": 1.3929618768328445e-06, "loss": 1.2444, "step": 12737 }, { "epoch": 1.9940513462742642, "grad_norm": 4.827343940734863, "learning_rate": 1.3685239491691104e-06, "loss": 1.2683, "step": 12738 }, { "epoch": 1.9942078897933626, "grad_norm": 1.7269351482391357, "learning_rate": 1.3440860215053763e-06, "loss": 1.4892, "step": 12739 }, { "epoch": 1.9943644333124608, "grad_norm": 1.236871600151062, "learning_rate": 1.3196480938416423e-06, "loss": 1.4841, "step": 12740 }, { "epoch": 1.9945209768315593, "grad_norm": 1.0202114582061768, "learning_rate": 1.295210166177908e-06, "loss": 1.4703, "step": 12741 }, { "epoch": 1.9946775203506575, "grad_norm": 0.5978159308433533, "learning_rate": 1.2707722385141738e-06, "loss": 1.4379, "step": 12742 }, { "epoch": 1.9948340638697557, "grad_norm": 1.8672462701797485, "learning_rate": 1.2463343108504397e-06, "loss": 1.3344, "step": 12743 }, { "epoch": 1.9949906073888541, "grad_norm": 0.9997517466545105, "learning_rate": 1.2218963831867058e-06, "loss": 1.4433, "step": 12744 }, { "epoch": 1.9951471509079524, "grad_norm": 1.3582754135131836, "learning_rate": 1.1974584555229716e-06, "loss": 1.4282, "step": 12745 }, { "epoch": 1.9953036944270508, "grad_norm": 1.0937734842300415, "learning_rate": 1.1730205278592375e-06, "loss": 1.4486, "step": 12746 }, { "epoch": 1.995460237946149, "grad_norm": 0.7297368049621582, "learning_rate": 1.1485826001955033e-06, "loss": 1.3394, "step": 12747 }, { "epoch": 1.9956167814652472, "grad_norm": 1.304893136024475, "learning_rate": 1.1241446725317692e-06, "loss": 1.3208, "step": 12748 }, { "epoch": 1.9957733249843457, "grad_norm": 0.9732430577278137, "learning_rate": 1.099706744868035e-06, "loss": 1.3768, "step": 12749 }, { "epoch": 1.9959298685034441, "grad_norm": 2.0447745323181152, "learning_rate": 1.075268817204301e-06, "loss": 1.3748, "step": 12750 }, { "epoch": 1.9960864120225423, "grad_norm": 1.8304771184921265, "learning_rate": 1.050830889540567e-06, "loss": 1.4384, "step": 12751 }, { "epoch": 1.9962429555416406, "grad_norm": 1.1020729541778564, "learning_rate": 1.0263929618768329e-06, "loss": 1.3156, "step": 12752 }, { "epoch": 1.9963994990607388, "grad_norm": 2.1293833255767822, "learning_rate": 1.0019550342130987e-06, "loss": 1.2381, "step": 12753 }, { "epoch": 1.9965560425798372, "grad_norm": 2.4541094303131104, "learning_rate": 9.775171065493646e-07, "loss": 1.2771, "step": 12754 }, { "epoch": 1.9967125860989356, "grad_norm": 3.102612257003784, "learning_rate": 9.530791788856304e-07, "loss": 1.3604, "step": 12755 }, { "epoch": 1.9968691296180339, "grad_norm": 1.3522253036499023, "learning_rate": 9.286412512218963e-07, "loss": 1.2247, "step": 12756 }, { "epoch": 1.997025673137132, "grad_norm": 4.309826850891113, "learning_rate": 9.042033235581623e-07, "loss": 1.3129, "step": 12757 }, { "epoch": 1.9971822166562303, "grad_norm": 1.946575403213501, "learning_rate": 8.797653958944281e-07, "loss": 1.1901, "step": 12758 }, { "epoch": 1.9973387601753287, "grad_norm": 2.3310208320617676, "learning_rate": 8.553274682306941e-07, "loss": 1.0399, "step": 12759 }, { "epoch": 1.9974953036944272, "grad_norm": 1.9439860582351685, "learning_rate": 8.308895405669598e-07, "loss": 1.1825, "step": 12760 }, { "epoch": 1.9976518472135254, "grad_norm": 2.59775972366333, "learning_rate": 8.064516129032258e-07, "loss": 1.2622, "step": 12761 }, { "epoch": 1.9978083907326236, "grad_norm": 4.020702838897705, "learning_rate": 7.820136852394917e-07, "loss": 1.4661, "step": 12762 }, { "epoch": 1.9979649342517218, "grad_norm": 4.491816997528076, "learning_rate": 7.575757575757576e-07, "loss": 1.2114, "step": 12763 }, { "epoch": 1.9981214777708203, "grad_norm": 3.0681772232055664, "learning_rate": 7.331378299120234e-07, "loss": 1.169, "step": 12764 }, { "epoch": 1.9982780212899187, "grad_norm": 11.430411338806152, "learning_rate": 7.086999022482893e-07, "loss": 1.5927, "step": 12765 }, { "epoch": 1.998434564809017, "grad_norm": 8.486896514892578, "learning_rate": 6.842619745845552e-07, "loss": 1.3744, "step": 12766 }, { "epoch": 1.9985911083281152, "grad_norm": 14.378063201904297, "learning_rate": 6.598240469208212e-07, "loss": 1.1301, "step": 12767 }, { "epoch": 1.9987476518472134, "grad_norm": 15.947986602783203, "learning_rate": 6.353861192570869e-07, "loss": 1.7693, "step": 12768 }, { "epoch": 1.9989041953663118, "grad_norm": 10.535764694213867, "learning_rate": 6.109481915933529e-07, "loss": 1.2029, "step": 12769 }, { "epoch": 1.9990607388854102, "grad_norm": 3.89440655708313, "learning_rate": 5.865102639296187e-07, "loss": 1.4126, "step": 12770 }, { "epoch": 1.9992172824045085, "grad_norm": 2.050438642501831, "learning_rate": 5.620723362658846e-07, "loss": 1.1434, "step": 12771 }, { "epoch": 1.9993738259236067, "grad_norm": 1.9969635009765625, "learning_rate": 5.376344086021505e-07, "loss": 1.0368, "step": 12772 }, { "epoch": 1.9995303694427051, "grad_norm": 4.676234722137451, "learning_rate": 5.131964809384164e-07, "loss": 0.965, "step": 12773 }, { "epoch": 1.9996869129618033, "grad_norm": 1.3593485355377197, "learning_rate": 4.887585532746823e-07, "loss": 0.5821, "step": 12774 }, { "epoch": 1.9998434564809018, "grad_norm": 2.8748373985290527, "learning_rate": 4.6432062561094814e-07, "loss": 1.5815, "step": 12775 }, { "epoch": 2.0, "grad_norm": 4.058191299438477, "learning_rate": 4.3988269794721406e-07, "loss": 1.2623, "step": 12776 }, { "epoch": 2.0, "step": 12776, "total_flos": 1.799905283771071e+19, "train_loss": 0.9086307350367672, "train_runtime": 8068.0189, "train_samples_per_second": 25.335, "train_steps_per_second": 1.584 } ], "logging_steps": 1.0, "max_steps": 12776, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.799905283771071e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }