{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2016, "global_step": 20159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.960563520015874e-05, "grad_norm": 313.8003845214844, "learning_rate": 4.950495049504951e-08, "loss": 1.8631, "step": 1 }, { "epoch": 9.921127040031748e-05, "grad_norm": 60.26166915893555, "learning_rate": 9.900990099009901e-08, "loss": 0.8938, "step": 2 }, { "epoch": 0.00014881690560047622, "grad_norm": 65.89754486083984, "learning_rate": 1.4851485148514852e-07, "loss": 0.8014, "step": 3 }, { "epoch": 0.00019842254080063495, "grad_norm": 109.2684097290039, "learning_rate": 1.9801980198019803e-07, "loss": 1.1732, "step": 4 }, { "epoch": 0.0002480281760007937, "grad_norm": 239.30319213867188, "learning_rate": 2.4752475247524754e-07, "loss": 0.9795, "step": 5 }, { "epoch": 0.00029763381120095244, "grad_norm": 109.19886779785156, "learning_rate": 2.9702970297029703e-07, "loss": 1.1059, "step": 6 }, { "epoch": 0.00034723944640111117, "grad_norm": 334.4134216308594, "learning_rate": 3.4653465346534657e-07, "loss": 1.9264, "step": 7 }, { "epoch": 0.0003968450816012699, "grad_norm": 158.2940673828125, "learning_rate": 3.9603960396039606e-07, "loss": 0.9327, "step": 8 }, { "epoch": 0.00044645071680142863, "grad_norm": 78.48124694824219, "learning_rate": 4.4554455445544555e-07, "loss": 0.8212, "step": 9 }, { "epoch": 0.0004960563520015874, "grad_norm": 301.1368713378906, "learning_rate": 4.950495049504951e-07, "loss": 1.1863, "step": 10 }, { "epoch": 0.0005456619872017462, "grad_norm": 226.20018005371094, "learning_rate": 5.445544554455446e-07, "loss": 1.4193, "step": 11 }, { "epoch": 0.0005952676224019049, "grad_norm": 117.46919250488281, "learning_rate": 5.940594059405941e-07, "loss": 1.1051, "step": 12 }, { "epoch": 0.0006448732576020636, "grad_norm": 49.43025588989258, "learning_rate": 6.435643564356436e-07, "loss": 0.7315, "step": 13 }, { "epoch": 0.0006944788928022223, "grad_norm": 167.45950317382812, "learning_rate": 6.930693069306931e-07, "loss": 1.2401, "step": 14 }, { "epoch": 0.0007440845280023811, "grad_norm": 116.13873291015625, "learning_rate": 7.425742574257426e-07, "loss": 0.9552, "step": 15 }, { "epoch": 0.0007936901632025398, "grad_norm": 113.81809997558594, "learning_rate": 7.920792079207921e-07, "loss": 0.8449, "step": 16 }, { "epoch": 0.0008432957984026985, "grad_norm": 126.81112670898438, "learning_rate": 8.415841584158417e-07, "loss": 0.9571, "step": 17 }, { "epoch": 0.0008929014336028573, "grad_norm": 72.8443374633789, "learning_rate": 8.910891089108911e-07, "loss": 0.7989, "step": 18 }, { "epoch": 0.000942507068803016, "grad_norm": 137.98416137695312, "learning_rate": 9.405940594059406e-07, "loss": 0.9106, "step": 19 }, { "epoch": 0.0009921127040031748, "grad_norm": 133.04995727539062, "learning_rate": 9.900990099009902e-07, "loss": 0.8936, "step": 20 }, { "epoch": 0.0010417183392033335, "grad_norm": 35.246063232421875, "learning_rate": 1.0396039603960397e-06, "loss": 0.6929, "step": 21 }, { "epoch": 0.0010913239744034923, "grad_norm": 38.17347717285156, "learning_rate": 1.0891089108910893e-06, "loss": 0.6641, "step": 22 }, { "epoch": 0.001140929609603651, "grad_norm": 38.74116897583008, "learning_rate": 1.1386138613861388e-06, "loss": 0.7132, "step": 23 }, { "epoch": 0.0011905352448038098, "grad_norm": 63.76765060424805, "learning_rate": 1.1881188118811881e-06, "loss": 0.8797, "step": 24 }, { "epoch": 0.0012401408800039684, "grad_norm": 31.278779983520508, "learning_rate": 1.2376237623762377e-06, "loss": 0.7318, "step": 25 }, { "epoch": 0.0012897465152041272, "grad_norm": 22.666894912719727, "learning_rate": 1.2871287128712872e-06, "loss": 0.6718, "step": 26 }, { "epoch": 0.0013393521504042858, "grad_norm": 31.732378005981445, "learning_rate": 1.3366336633663367e-06, "loss": 0.7167, "step": 27 }, { "epoch": 0.0013889577856044447, "grad_norm": 37.39213943481445, "learning_rate": 1.3861386138613863e-06, "loss": 0.6185, "step": 28 }, { "epoch": 0.0014385634208046033, "grad_norm": 18.69105339050293, "learning_rate": 1.4356435643564356e-06, "loss": 0.6646, "step": 29 }, { "epoch": 0.0014881690560047621, "grad_norm": 15.718194007873535, "learning_rate": 1.4851485148514852e-06, "loss": 0.6275, "step": 30 }, { "epoch": 0.0015377746912049208, "grad_norm": 19.494260787963867, "learning_rate": 1.5346534653465347e-06, "loss": 0.6172, "step": 31 }, { "epoch": 0.0015873803264050796, "grad_norm": 40.715171813964844, "learning_rate": 1.5841584158415842e-06, "loss": 0.6217, "step": 32 }, { "epoch": 0.0016369859616052385, "grad_norm": 34.460723876953125, "learning_rate": 1.6336633663366338e-06, "loss": 0.6514, "step": 33 }, { "epoch": 0.001686591596805397, "grad_norm": 25.157167434692383, "learning_rate": 1.6831683168316833e-06, "loss": 0.5722, "step": 34 }, { "epoch": 0.001736197232005556, "grad_norm": 29.140235900878906, "learning_rate": 1.7326732673267326e-06, "loss": 0.5931, "step": 35 }, { "epoch": 0.0017858028672057145, "grad_norm": 36.135276794433594, "learning_rate": 1.7821782178217822e-06, "loss": 0.6764, "step": 36 }, { "epoch": 0.0018354085024058734, "grad_norm": 49.580787658691406, "learning_rate": 1.8316831683168317e-06, "loss": 0.7997, "step": 37 }, { "epoch": 0.001885014137606032, "grad_norm": 21.373167037963867, "learning_rate": 1.8811881188118813e-06, "loss": 0.5852, "step": 38 }, { "epoch": 0.0019346197728061908, "grad_norm": 27.892375946044922, "learning_rate": 1.930693069306931e-06, "loss": 0.6429, "step": 39 }, { "epoch": 0.0019842254080063497, "grad_norm": 38.952392578125, "learning_rate": 1.9801980198019803e-06, "loss": 0.4714, "step": 40 }, { "epoch": 0.002033831043206508, "grad_norm": 33.0507698059082, "learning_rate": 2.02970297029703e-06, "loss": 0.6219, "step": 41 }, { "epoch": 0.002083436678406667, "grad_norm": 26.93797492980957, "learning_rate": 2.0792079207920794e-06, "loss": 0.4456, "step": 42 }, { "epoch": 0.0021330423136068258, "grad_norm": 29.08717918395996, "learning_rate": 2.1287128712871288e-06, "loss": 0.5599, "step": 43 }, { "epoch": 0.0021826479488069846, "grad_norm": 38.72993850708008, "learning_rate": 2.1782178217821785e-06, "loss": 0.5704, "step": 44 }, { "epoch": 0.002232253584007143, "grad_norm": 45.96347427368164, "learning_rate": 2.227722772277228e-06, "loss": 0.5426, "step": 45 }, { "epoch": 0.002281859219207302, "grad_norm": 37.45112228393555, "learning_rate": 2.2772277227722776e-06, "loss": 0.5467, "step": 46 }, { "epoch": 0.0023314648544074607, "grad_norm": 25.796981811523438, "learning_rate": 2.326732673267327e-06, "loss": 0.4663, "step": 47 }, { "epoch": 0.0023810704896076195, "grad_norm": 38.84114456176758, "learning_rate": 2.3762376237623762e-06, "loss": 0.4936, "step": 48 }, { "epoch": 0.0024306761248077784, "grad_norm": 22.186264038085938, "learning_rate": 2.425742574257426e-06, "loss": 0.4631, "step": 49 }, { "epoch": 0.0024802817600079368, "grad_norm": 42.77132797241211, "learning_rate": 2.4752475247524753e-06, "loss": 0.503, "step": 50 }, { "epoch": 0.0025298873952080956, "grad_norm": 39.474491119384766, "learning_rate": 2.524752475247525e-06, "loss": 0.5422, "step": 51 }, { "epoch": 0.0025794930304082545, "grad_norm": 39.34268569946289, "learning_rate": 2.5742574257425744e-06, "loss": 0.5082, "step": 52 }, { "epoch": 0.0026290986656084133, "grad_norm": 23.07193946838379, "learning_rate": 2.623762376237624e-06, "loss": 0.4663, "step": 53 }, { "epoch": 0.0026787043008085717, "grad_norm": 41.084529876708984, "learning_rate": 2.6732673267326735e-06, "loss": 0.5701, "step": 54 }, { "epoch": 0.0027283099360087305, "grad_norm": 28.03870964050293, "learning_rate": 2.7227722772277232e-06, "loss": 0.4519, "step": 55 }, { "epoch": 0.0027779155712088894, "grad_norm": 40.510475158691406, "learning_rate": 2.7722772277227726e-06, "loss": 0.3547, "step": 56 }, { "epoch": 0.002827521206409048, "grad_norm": 28.692245483398438, "learning_rate": 2.821782178217822e-06, "loss": 0.5383, "step": 57 }, { "epoch": 0.0028771268416092066, "grad_norm": 40.887325286865234, "learning_rate": 2.8712871287128712e-06, "loss": 0.527, "step": 58 }, { "epoch": 0.0029267324768093655, "grad_norm": 19.15994644165039, "learning_rate": 2.920792079207921e-06, "loss": 0.4555, "step": 59 }, { "epoch": 0.0029763381120095243, "grad_norm": 18.261560440063477, "learning_rate": 2.9702970297029703e-06, "loss": 0.435, "step": 60 }, { "epoch": 0.003025943747209683, "grad_norm": 29.299951553344727, "learning_rate": 3.01980198019802e-06, "loss": 0.4893, "step": 61 }, { "epoch": 0.0030755493824098415, "grad_norm": 32.63141632080078, "learning_rate": 3.0693069306930694e-06, "loss": 0.3497, "step": 62 }, { "epoch": 0.0031251550176100004, "grad_norm": 44.92351150512695, "learning_rate": 3.118811881188119e-06, "loss": 0.5047, "step": 63 }, { "epoch": 0.0031747606528101592, "grad_norm": 45.499324798583984, "learning_rate": 3.1683168316831685e-06, "loss": 0.3875, "step": 64 }, { "epoch": 0.003224366288010318, "grad_norm": 42.84821701049805, "learning_rate": 3.2178217821782182e-06, "loss": 0.5132, "step": 65 }, { "epoch": 0.003273971923210477, "grad_norm": 30.28244972229004, "learning_rate": 3.2673267326732676e-06, "loss": 0.4313, "step": 66 }, { "epoch": 0.0033235775584106353, "grad_norm": 26.54020118713379, "learning_rate": 3.3168316831683173e-06, "loss": 0.4391, "step": 67 }, { "epoch": 0.003373183193610794, "grad_norm": 28.584741592407227, "learning_rate": 3.3663366336633666e-06, "loss": 0.4392, "step": 68 }, { "epoch": 0.003422788828810953, "grad_norm": 30.209651947021484, "learning_rate": 3.4158415841584164e-06, "loss": 0.4863, "step": 69 }, { "epoch": 0.003472394464011112, "grad_norm": 16.50473403930664, "learning_rate": 3.4653465346534653e-06, "loss": 0.3452, "step": 70 }, { "epoch": 0.0035220000992112702, "grad_norm": 53.87784957885742, "learning_rate": 3.514851485148515e-06, "loss": 0.5716, "step": 71 }, { "epoch": 0.003571605734411429, "grad_norm": 32.77654266357422, "learning_rate": 3.5643564356435644e-06, "loss": 0.4731, "step": 72 }, { "epoch": 0.003621211369611588, "grad_norm": 17.915454864501953, "learning_rate": 3.613861386138614e-06, "loss": 0.4273, "step": 73 }, { "epoch": 0.0036708170048117468, "grad_norm": 15.455549240112305, "learning_rate": 3.6633663366336635e-06, "loss": 0.4277, "step": 74 }, { "epoch": 0.003720422640011905, "grad_norm": 45.318729400634766, "learning_rate": 3.712871287128713e-06, "loss": 0.4785, "step": 75 }, { "epoch": 0.003770028275212064, "grad_norm": 19.131244659423828, "learning_rate": 3.7623762376237625e-06, "loss": 0.3231, "step": 76 }, { "epoch": 0.003819633910412223, "grad_norm": 27.6126766204834, "learning_rate": 3.8118811881188123e-06, "loss": 0.3868, "step": 77 }, { "epoch": 0.0038692395456123817, "grad_norm": 19.682743072509766, "learning_rate": 3.861386138613862e-06, "loss": 0.4048, "step": 78 }, { "epoch": 0.00391884518081254, "grad_norm": 20.545330047607422, "learning_rate": 3.910891089108911e-06, "loss": 0.4528, "step": 79 }, { "epoch": 0.003968450816012699, "grad_norm": 34.68581008911133, "learning_rate": 3.960396039603961e-06, "loss": 0.3608, "step": 80 }, { "epoch": 0.004018056451212858, "grad_norm": 22.909448623657227, "learning_rate": 4.0099009900990104e-06, "loss": 0.472, "step": 81 }, { "epoch": 0.004067662086413016, "grad_norm": 22.657512664794922, "learning_rate": 4.05940594059406e-06, "loss": 0.4199, "step": 82 }, { "epoch": 0.0041172677216131754, "grad_norm": 23.03064727783203, "learning_rate": 4.108910891089109e-06, "loss": 0.4285, "step": 83 }, { "epoch": 0.004166873356813334, "grad_norm": 20.690202713012695, "learning_rate": 4.158415841584159e-06, "loss": 0.4687, "step": 84 }, { "epoch": 0.004216478992013493, "grad_norm": 15.045668601989746, "learning_rate": 4.207920792079208e-06, "loss": 0.4297, "step": 85 }, { "epoch": 0.0042660846272136515, "grad_norm": 25.45388412475586, "learning_rate": 4.2574257425742575e-06, "loss": 0.42, "step": 86 }, { "epoch": 0.00431569026241381, "grad_norm": 25.7669734954834, "learning_rate": 4.306930693069307e-06, "loss": 0.4688, "step": 87 }, { "epoch": 0.004365295897613969, "grad_norm": 19.247892379760742, "learning_rate": 4.356435643564357e-06, "loss": 0.3891, "step": 88 }, { "epoch": 0.004414901532814128, "grad_norm": 36.323272705078125, "learning_rate": 4.405940594059406e-06, "loss": 0.3822, "step": 89 }, { "epoch": 0.004464507168014286, "grad_norm": 51.86178207397461, "learning_rate": 4.455445544554456e-06, "loss": 0.6362, "step": 90 }, { "epoch": 0.004514112803214445, "grad_norm": 26.09789276123047, "learning_rate": 4.5049504950495054e-06, "loss": 0.4054, "step": 91 }, { "epoch": 0.004563718438414604, "grad_norm": 25.533124923706055, "learning_rate": 4.554455445544555e-06, "loss": 0.5425, "step": 92 }, { "epoch": 0.004613324073614763, "grad_norm": 39.46369171142578, "learning_rate": 4.603960396039605e-06, "loss": 0.5326, "step": 93 }, { "epoch": 0.004662929708814921, "grad_norm": 14.54111385345459, "learning_rate": 4.653465346534654e-06, "loss": 0.4083, "step": 94 }, { "epoch": 0.00471253534401508, "grad_norm": 15.190861701965332, "learning_rate": 4.702970297029703e-06, "loss": 0.4495, "step": 95 }, { "epoch": 0.004762140979215239, "grad_norm": 21.587852478027344, "learning_rate": 4.7524752475247525e-06, "loss": 0.5892, "step": 96 }, { "epoch": 0.0048117466144153975, "grad_norm": 22.922243118286133, "learning_rate": 4.801980198019802e-06, "loss": 0.307, "step": 97 }, { "epoch": 0.004861352249615557, "grad_norm": 15.527511596679688, "learning_rate": 4.851485148514852e-06, "loss": 0.4145, "step": 98 }, { "epoch": 0.004910957884815715, "grad_norm": 18.455883026123047, "learning_rate": 4.900990099009901e-06, "loss": 0.5195, "step": 99 }, { "epoch": 0.0049605635200158735, "grad_norm": 11.696106910705566, "learning_rate": 4.950495049504951e-06, "loss": 0.4076, "step": 100 }, { "epoch": 0.005010169155216033, "grad_norm": 19.642595291137695, "learning_rate": 5e-06, "loss": 0.4612, "step": 101 }, { "epoch": 0.005059774790416191, "grad_norm": 29.922225952148438, "learning_rate": 5.04950495049505e-06, "loss": 0.4847, "step": 102 }, { "epoch": 0.00510938042561635, "grad_norm": 41.38890075683594, "learning_rate": 5.0990099009901e-06, "loss": 0.6425, "step": 103 }, { "epoch": 0.005158986060816509, "grad_norm": 17.741859436035156, "learning_rate": 5.148514851485149e-06, "loss": 0.487, "step": 104 }, { "epoch": 0.005208591696016667, "grad_norm": 14.18952465057373, "learning_rate": 5.1980198019801986e-06, "loss": 0.4204, "step": 105 }, { "epoch": 0.005258197331216827, "grad_norm": 14.204245567321777, "learning_rate": 5.247524752475248e-06, "loss": 0.5631, "step": 106 }, { "epoch": 0.005307802966416985, "grad_norm": 15.618069648742676, "learning_rate": 5.297029702970298e-06, "loss": 0.4203, "step": 107 }, { "epoch": 0.005357408601617143, "grad_norm": 16.275108337402344, "learning_rate": 5.346534653465347e-06, "loss": 0.4403, "step": 108 }, { "epoch": 0.005407014236817303, "grad_norm": 34.771141052246094, "learning_rate": 5.396039603960397e-06, "loss": 0.5103, "step": 109 }, { "epoch": 0.005456619872017461, "grad_norm": 15.471515655517578, "learning_rate": 5.4455445544554465e-06, "loss": 0.3497, "step": 110 }, { "epoch": 0.00550622550721762, "grad_norm": 28.97077751159668, "learning_rate": 5.495049504950496e-06, "loss": 0.5098, "step": 111 }, { "epoch": 0.005555831142417779, "grad_norm": 26.617956161499023, "learning_rate": 5.544554455445545e-06, "loss": 0.4649, "step": 112 }, { "epoch": 0.005605436777617937, "grad_norm": 19.01515769958496, "learning_rate": 5.594059405940595e-06, "loss": 0.4389, "step": 113 }, { "epoch": 0.005655042412818096, "grad_norm": 15.260920524597168, "learning_rate": 5.643564356435644e-06, "loss": 0.5277, "step": 114 }, { "epoch": 0.005704648048018255, "grad_norm": 37.223533630371094, "learning_rate": 5.693069306930693e-06, "loss": 0.5198, "step": 115 }, { "epoch": 0.005754253683218413, "grad_norm": 24.12113380432129, "learning_rate": 5.7425742574257425e-06, "loss": 0.484, "step": 116 }, { "epoch": 0.0058038593184185725, "grad_norm": 15.16081428527832, "learning_rate": 5.792079207920792e-06, "loss": 0.4265, "step": 117 }, { "epoch": 0.005853464953618731, "grad_norm": 27.445985794067383, "learning_rate": 5.841584158415842e-06, "loss": 0.655, "step": 118 }, { "epoch": 0.00590307058881889, "grad_norm": 18.24595832824707, "learning_rate": 5.891089108910891e-06, "loss": 0.3656, "step": 119 }, { "epoch": 0.005952676224019049, "grad_norm": 19.857099533081055, "learning_rate": 5.940594059405941e-06, "loss": 0.4638, "step": 120 }, { "epoch": 0.006002281859219207, "grad_norm": 16.44297981262207, "learning_rate": 5.99009900990099e-06, "loss": 0.452, "step": 121 }, { "epoch": 0.006051887494419366, "grad_norm": 20.350383758544922, "learning_rate": 6.03960396039604e-06, "loss": 0.4105, "step": 122 }, { "epoch": 0.006101493129619525, "grad_norm": 12.230113983154297, "learning_rate": 6.08910891089109e-06, "loss": 0.3818, "step": 123 }, { "epoch": 0.006151098764819683, "grad_norm": 20.425289154052734, "learning_rate": 6.138613861386139e-06, "loss": 0.4502, "step": 124 }, { "epoch": 0.006200704400019842, "grad_norm": 40.304447174072266, "learning_rate": 6.1881188118811885e-06, "loss": 0.5971, "step": 125 }, { "epoch": 0.006250310035220001, "grad_norm": 15.348257064819336, "learning_rate": 6.237623762376238e-06, "loss": 0.3838, "step": 126 }, { "epoch": 0.00629991567042016, "grad_norm": 18.601778030395508, "learning_rate": 6.287128712871288e-06, "loss": 0.4477, "step": 127 }, { "epoch": 0.0063495213056203184, "grad_norm": 18.880813598632812, "learning_rate": 6.336633663366337e-06, "loss": 0.4191, "step": 128 }, { "epoch": 0.006399126940820477, "grad_norm": 14.304143905639648, "learning_rate": 6.386138613861387e-06, "loss": 0.3963, "step": 129 }, { "epoch": 0.006448732576020636, "grad_norm": 30.758941650390625, "learning_rate": 6.4356435643564364e-06, "loss": 0.4746, "step": 130 }, { "epoch": 0.0064983382112207945, "grad_norm": 9.556411743164062, "learning_rate": 6.485148514851486e-06, "loss": 0.4987, "step": 131 }, { "epoch": 0.006547943846420954, "grad_norm": 20.321687698364258, "learning_rate": 6.534653465346535e-06, "loss": 0.3507, "step": 132 }, { "epoch": 0.006597549481621112, "grad_norm": 15.54163932800293, "learning_rate": 6.584158415841585e-06, "loss": 0.4812, "step": 133 }, { "epoch": 0.006647155116821271, "grad_norm": 18.752559661865234, "learning_rate": 6.633663366336635e-06, "loss": 0.5173, "step": 134 }, { "epoch": 0.00669676075202143, "grad_norm": 29.005102157592773, "learning_rate": 6.683168316831684e-06, "loss": 0.3695, "step": 135 }, { "epoch": 0.006746366387221588, "grad_norm": 11.062968254089355, "learning_rate": 6.732673267326733e-06, "loss": 0.316, "step": 136 }, { "epoch": 0.006795972022421747, "grad_norm": 31.756694793701172, "learning_rate": 6.782178217821783e-06, "loss": 0.6682, "step": 137 }, { "epoch": 0.006845577657621906, "grad_norm": 16.75066375732422, "learning_rate": 6.831683168316833e-06, "loss": 0.4343, "step": 138 }, { "epoch": 0.006895183292822064, "grad_norm": 25.214487075805664, "learning_rate": 6.881188118811881e-06, "loss": 0.4732, "step": 139 }, { "epoch": 0.006944788928022224, "grad_norm": 27.664775848388672, "learning_rate": 6.930693069306931e-06, "loss": 0.377, "step": 140 }, { "epoch": 0.006994394563222382, "grad_norm": 21.0042667388916, "learning_rate": 6.98019801980198e-06, "loss": 0.5442, "step": 141 }, { "epoch": 0.0070440001984225405, "grad_norm": 15.766759872436523, "learning_rate": 7.02970297029703e-06, "loss": 0.3951, "step": 142 }, { "epoch": 0.0070936058336227, "grad_norm": 28.293960571289062, "learning_rate": 7.079207920792079e-06, "loss": 0.4418, "step": 143 }, { "epoch": 0.007143211468822858, "grad_norm": 17.290281295776367, "learning_rate": 7.128712871287129e-06, "loss": 0.4819, "step": 144 }, { "epoch": 0.007192817104023017, "grad_norm": 23.701942443847656, "learning_rate": 7.1782178217821785e-06, "loss": 0.5134, "step": 145 }, { "epoch": 0.007242422739223176, "grad_norm": 24.832944869995117, "learning_rate": 7.227722772277228e-06, "loss": 0.3283, "step": 146 }, { "epoch": 0.007292028374423334, "grad_norm": 26.012773513793945, "learning_rate": 7.277227722772278e-06, "loss": 0.3745, "step": 147 }, { "epoch": 0.0073416340096234935, "grad_norm": 19.532501220703125, "learning_rate": 7.326732673267327e-06, "loss": 0.3156, "step": 148 }, { "epoch": 0.007391239644823652, "grad_norm": 22.538162231445312, "learning_rate": 7.376237623762377e-06, "loss": 0.5273, "step": 149 }, { "epoch": 0.00744084528002381, "grad_norm": 26.77420997619629, "learning_rate": 7.425742574257426e-06, "loss": 0.5183, "step": 150 }, { "epoch": 0.00749045091522397, "grad_norm": 20.887500762939453, "learning_rate": 7.475247524752476e-06, "loss": 0.4318, "step": 151 }, { "epoch": 0.007540056550424128, "grad_norm": 28.096464157104492, "learning_rate": 7.524752475247525e-06, "loss": 0.5421, "step": 152 }, { "epoch": 0.007589662185624287, "grad_norm": 32.71369934082031, "learning_rate": 7.574257425742575e-06, "loss": 0.5061, "step": 153 }, { "epoch": 0.007639267820824446, "grad_norm": 30.98775291442871, "learning_rate": 7.6237623762376246e-06, "loss": 0.4908, "step": 154 }, { "epoch": 0.007688873456024604, "grad_norm": 22.786497116088867, "learning_rate": 7.673267326732674e-06, "loss": 0.3436, "step": 155 }, { "epoch": 0.007738479091224763, "grad_norm": 13.01064682006836, "learning_rate": 7.722772277227724e-06, "loss": 0.4111, "step": 156 }, { "epoch": 0.007788084726424922, "grad_norm": 10.633048057556152, "learning_rate": 7.772277227722774e-06, "loss": 0.423, "step": 157 }, { "epoch": 0.00783769036162508, "grad_norm": 13.581253051757812, "learning_rate": 7.821782178217822e-06, "loss": 0.4238, "step": 158 }, { "epoch": 0.00788729599682524, "grad_norm": 31.394105911254883, "learning_rate": 7.871287128712872e-06, "loss": 0.661, "step": 159 }, { "epoch": 0.007936901632025399, "grad_norm": 20.037940979003906, "learning_rate": 7.920792079207921e-06, "loss": 0.4469, "step": 160 }, { "epoch": 0.007986507267225556, "grad_norm": 15.066186904907227, "learning_rate": 7.970297029702971e-06, "loss": 0.3726, "step": 161 }, { "epoch": 0.008036112902425716, "grad_norm": 12.482576370239258, "learning_rate": 8.019801980198021e-06, "loss": 0.4043, "step": 162 }, { "epoch": 0.008085718537625875, "grad_norm": 28.880918502807617, "learning_rate": 8.06930693069307e-06, "loss": 0.5457, "step": 163 }, { "epoch": 0.008135324172826032, "grad_norm": 51.45408248901367, "learning_rate": 8.11881188118812e-06, "loss": 0.5242, "step": 164 }, { "epoch": 0.008184929808026192, "grad_norm": 9.582411766052246, "learning_rate": 8.168316831683168e-06, "loss": 0.2767, "step": 165 }, { "epoch": 0.008234535443226351, "grad_norm": 17.51667022705078, "learning_rate": 8.217821782178218e-06, "loss": 0.4267, "step": 166 }, { "epoch": 0.008284141078426508, "grad_norm": 12.808982849121094, "learning_rate": 8.267326732673268e-06, "loss": 0.4231, "step": 167 }, { "epoch": 0.008333746713626668, "grad_norm": 20.59142303466797, "learning_rate": 8.316831683168318e-06, "loss": 0.4649, "step": 168 }, { "epoch": 0.008383352348826827, "grad_norm": 19.84050941467285, "learning_rate": 8.366336633663367e-06, "loss": 0.4692, "step": 169 }, { "epoch": 0.008432957984026986, "grad_norm": 38.8685302734375, "learning_rate": 8.415841584158416e-06, "loss": 0.7367, "step": 170 }, { "epoch": 0.008482563619227144, "grad_norm": 34.10273361206055, "learning_rate": 8.465346534653465e-06, "loss": 0.5782, "step": 171 }, { "epoch": 0.008532169254427303, "grad_norm": 23.882436752319336, "learning_rate": 8.514851485148515e-06, "loss": 0.442, "step": 172 }, { "epoch": 0.008581774889627462, "grad_norm": 20.873746871948242, "learning_rate": 8.564356435643565e-06, "loss": 0.5067, "step": 173 }, { "epoch": 0.00863138052482762, "grad_norm": 19.665512084960938, "learning_rate": 8.613861386138615e-06, "loss": 0.4387, "step": 174 }, { "epoch": 0.00868098616002778, "grad_norm": 20.98985481262207, "learning_rate": 8.663366336633664e-06, "loss": 0.4785, "step": 175 }, { "epoch": 0.008730591795227938, "grad_norm": 11.630488395690918, "learning_rate": 8.712871287128714e-06, "loss": 0.3436, "step": 176 }, { "epoch": 0.008780197430428096, "grad_norm": 25.02531623840332, "learning_rate": 8.762376237623764e-06, "loss": 0.3906, "step": 177 }, { "epoch": 0.008829803065628255, "grad_norm": 28.315044403076172, "learning_rate": 8.811881188118812e-06, "loss": 0.3649, "step": 178 }, { "epoch": 0.008879408700828414, "grad_norm": 19.961164474487305, "learning_rate": 8.861386138613862e-06, "loss": 0.4086, "step": 179 }, { "epoch": 0.008929014336028572, "grad_norm": 26.48885726928711, "learning_rate": 8.910891089108911e-06, "loss": 0.4841, "step": 180 }, { "epoch": 0.008978619971228731, "grad_norm": 29.412139892578125, "learning_rate": 8.960396039603961e-06, "loss": 0.372, "step": 181 }, { "epoch": 0.00902822560642889, "grad_norm": 38.260223388671875, "learning_rate": 9.009900990099011e-06, "loss": 0.5327, "step": 182 }, { "epoch": 0.00907783124162905, "grad_norm": 26.97695541381836, "learning_rate": 9.05940594059406e-06, "loss": 0.4818, "step": 183 }, { "epoch": 0.009127436876829207, "grad_norm": 14.744832038879395, "learning_rate": 9.10891089108911e-06, "loss": 0.3551, "step": 184 }, { "epoch": 0.009177042512029367, "grad_norm": 12.047884941101074, "learning_rate": 9.15841584158416e-06, "loss": 0.3841, "step": 185 }, { "epoch": 0.009226648147229526, "grad_norm": 13.832958221435547, "learning_rate": 9.20792079207921e-06, "loss": 0.3569, "step": 186 }, { "epoch": 0.009276253782429683, "grad_norm": 19.525348663330078, "learning_rate": 9.257425742574258e-06, "loss": 0.4326, "step": 187 }, { "epoch": 0.009325859417629843, "grad_norm": 20.14533233642578, "learning_rate": 9.306930693069308e-06, "loss": 0.3989, "step": 188 }, { "epoch": 0.009375465052830002, "grad_norm": 32.8289909362793, "learning_rate": 9.356435643564357e-06, "loss": 0.4353, "step": 189 }, { "epoch": 0.00942507068803016, "grad_norm": 16.450084686279297, "learning_rate": 9.405940594059405e-06, "loss": 0.3732, "step": 190 }, { "epoch": 0.009474676323230319, "grad_norm": 12.987335205078125, "learning_rate": 9.455445544554455e-06, "loss": 0.2987, "step": 191 }, { "epoch": 0.009524281958430478, "grad_norm": 21.759675979614258, "learning_rate": 9.504950495049505e-06, "loss": 0.4474, "step": 192 }, { "epoch": 0.009573887593630636, "grad_norm": 21.37806510925293, "learning_rate": 9.554455445544555e-06, "loss": 0.5247, "step": 193 }, { "epoch": 0.009623493228830795, "grad_norm": 12.002108573913574, "learning_rate": 9.603960396039604e-06, "loss": 0.4265, "step": 194 }, { "epoch": 0.009673098864030954, "grad_norm": 18.692930221557617, "learning_rate": 9.653465346534654e-06, "loss": 0.4331, "step": 195 }, { "epoch": 0.009722704499231113, "grad_norm": 21.90009117126465, "learning_rate": 9.702970297029704e-06, "loss": 0.4848, "step": 196 }, { "epoch": 0.009772310134431271, "grad_norm": 30.486127853393555, "learning_rate": 9.752475247524754e-06, "loss": 0.3766, "step": 197 }, { "epoch": 0.00982191576963143, "grad_norm": 13.009577751159668, "learning_rate": 9.801980198019802e-06, "loss": 0.4074, "step": 198 }, { "epoch": 0.00987152140483159, "grad_norm": 19.384672164916992, "learning_rate": 9.851485148514852e-06, "loss": 0.3785, "step": 199 }, { "epoch": 0.009921127040031747, "grad_norm": 13.753976821899414, "learning_rate": 9.900990099009901e-06, "loss": 0.3757, "step": 200 }, { "epoch": 0.009970732675231906, "grad_norm": 14.375900268554688, "learning_rate": 9.950495049504951e-06, "loss": 0.3471, "step": 201 }, { "epoch": 0.010020338310432066, "grad_norm": 15.409178733825684, "learning_rate": 1e-05, "loss": 0.349, "step": 202 }, { "epoch": 0.010069943945632223, "grad_norm": 13.698772430419922, "learning_rate": 9.99999993804887e-06, "loss": 0.418, "step": 203 }, { "epoch": 0.010119549580832382, "grad_norm": 26.598690032958984, "learning_rate": 9.999999752195479e-06, "loss": 0.4845, "step": 204 }, { "epoch": 0.010169155216032542, "grad_norm": 21.517614364624023, "learning_rate": 9.999999442439831e-06, "loss": 0.4987, "step": 205 }, { "epoch": 0.0102187608512327, "grad_norm": 15.28870964050293, "learning_rate": 9.999999008781937e-06, "loss": 0.5215, "step": 206 }, { "epoch": 0.010268366486432859, "grad_norm": 22.7069034576416, "learning_rate": 9.999998451221807e-06, "loss": 0.4262, "step": 207 }, { "epoch": 0.010317972121633018, "grad_norm": 21.875144958496094, "learning_rate": 9.99999776975945e-06, "loss": 0.409, "step": 208 }, { "epoch": 0.010367577756833177, "grad_norm": 19.923049926757812, "learning_rate": 9.99999696439489e-06, "loss": 0.4167, "step": 209 }, { "epoch": 0.010417183392033335, "grad_norm": 20.712142944335938, "learning_rate": 9.999996035128142e-06, "loss": 0.5555, "step": 210 }, { "epoch": 0.010466789027233494, "grad_norm": 27.07956886291504, "learning_rate": 9.999994981959229e-06, "loss": 0.4495, "step": 211 }, { "epoch": 0.010516394662433653, "grad_norm": 20.64076042175293, "learning_rate": 9.99999380488818e-06, "loss": 0.4962, "step": 212 }, { "epoch": 0.01056600029763381, "grad_norm": 10.619914054870605, "learning_rate": 9.999992503915022e-06, "loss": 0.4085, "step": 213 }, { "epoch": 0.01061560593283397, "grad_norm": 12.198948860168457, "learning_rate": 9.99999107903979e-06, "loss": 0.4484, "step": 214 }, { "epoch": 0.01066521156803413, "grad_norm": 10.464597702026367, "learning_rate": 9.999989530262516e-06, "loss": 0.3194, "step": 215 }, { "epoch": 0.010714817203234287, "grad_norm": 33.132850646972656, "learning_rate": 9.99998785758324e-06, "loss": 0.529, "step": 216 }, { "epoch": 0.010764422838434446, "grad_norm": 13.730843544006348, "learning_rate": 9.999986061002003e-06, "loss": 0.4076, "step": 217 }, { "epoch": 0.010814028473634605, "grad_norm": 8.591176986694336, "learning_rate": 9.999984140518848e-06, "loss": 0.3779, "step": 218 }, { "epoch": 0.010863634108834763, "grad_norm": 11.944315910339355, "learning_rate": 9.999982096133826e-06, "loss": 0.4357, "step": 219 }, { "epoch": 0.010913239744034922, "grad_norm": 9.193153381347656, "learning_rate": 9.999979927846985e-06, "loss": 0.4528, "step": 220 }, { "epoch": 0.010962845379235081, "grad_norm": 10.22331714630127, "learning_rate": 9.999977635658382e-06, "loss": 0.4295, "step": 221 }, { "epoch": 0.01101245101443524, "grad_norm": 24.830684661865234, "learning_rate": 9.99997521956807e-06, "loss": 0.5287, "step": 222 }, { "epoch": 0.011062056649635398, "grad_norm": 16.972572326660156, "learning_rate": 9.99997267957611e-06, "loss": 0.3988, "step": 223 }, { "epoch": 0.011111662284835557, "grad_norm": 12.143564224243164, "learning_rate": 9.999970015682565e-06, "loss": 0.3987, "step": 224 }, { "epoch": 0.011161267920035717, "grad_norm": 9.670759201049805, "learning_rate": 9.999967227887501e-06, "loss": 0.3776, "step": 225 }, { "epoch": 0.011210873555235874, "grad_norm": 12.976000785827637, "learning_rate": 9.99996431619099e-06, "loss": 0.4345, "step": 226 }, { "epoch": 0.011260479190436034, "grad_norm": 21.71278953552246, "learning_rate": 9.999961280593099e-06, "loss": 0.4531, "step": 227 }, { "epoch": 0.011310084825636193, "grad_norm": 14.495475769042969, "learning_rate": 9.999958121093906e-06, "loss": 0.2631, "step": 228 }, { "epoch": 0.01135969046083635, "grad_norm": 27.808895111083984, "learning_rate": 9.999954837693489e-06, "loss": 0.6504, "step": 229 }, { "epoch": 0.01140929609603651, "grad_norm": 14.353338241577148, "learning_rate": 9.99995143039193e-06, "loss": 0.4241, "step": 230 }, { "epoch": 0.011458901731236669, "grad_norm": 13.380431175231934, "learning_rate": 9.999947899189314e-06, "loss": 0.3958, "step": 231 }, { "epoch": 0.011508507366436826, "grad_norm": 10.100345611572266, "learning_rate": 9.999944244085725e-06, "loss": 0.3115, "step": 232 }, { "epoch": 0.011558113001636986, "grad_norm": 11.353609085083008, "learning_rate": 9.999940465081256e-06, "loss": 0.3946, "step": 233 }, { "epoch": 0.011607718636837145, "grad_norm": 19.18343162536621, "learning_rate": 9.999936562176002e-06, "loss": 0.305, "step": 234 }, { "epoch": 0.011657324272037304, "grad_norm": 14.72274398803711, "learning_rate": 9.999932535370057e-06, "loss": 0.3652, "step": 235 }, { "epoch": 0.011706929907237462, "grad_norm": 10.340514183044434, "learning_rate": 9.999928384663523e-06, "loss": 0.3808, "step": 236 }, { "epoch": 0.011756535542437621, "grad_norm": 8.93471622467041, "learning_rate": 9.999924110056502e-06, "loss": 0.3929, "step": 237 }, { "epoch": 0.01180614117763778, "grad_norm": 10.258252143859863, "learning_rate": 9.999919711549098e-06, "loss": 0.4, "step": 238 }, { "epoch": 0.011855746812837938, "grad_norm": 14.912345886230469, "learning_rate": 9.999915189141425e-06, "loss": 0.3725, "step": 239 }, { "epoch": 0.011905352448038097, "grad_norm": 26.22113800048828, "learning_rate": 9.99991054283359e-06, "loss": 0.5266, "step": 240 }, { "epoch": 0.011954958083238256, "grad_norm": 18.918439865112305, "learning_rate": 9.99990577262571e-06, "loss": 0.5013, "step": 241 }, { "epoch": 0.012004563718438414, "grad_norm": 8.21634578704834, "learning_rate": 9.999900878517903e-06, "loss": 0.3926, "step": 242 }, { "epoch": 0.012054169353638573, "grad_norm": 16.042329788208008, "learning_rate": 9.999895860510292e-06, "loss": 0.4894, "step": 243 }, { "epoch": 0.012103774988838733, "grad_norm": 9.68657398223877, "learning_rate": 9.999890718602998e-06, "loss": 0.412, "step": 244 }, { "epoch": 0.01215338062403889, "grad_norm": 11.713228225708008, "learning_rate": 9.999885452796152e-06, "loss": 0.3689, "step": 245 }, { "epoch": 0.01220298625923905, "grad_norm": 13.298933982849121, "learning_rate": 9.999880063089881e-06, "loss": 0.4475, "step": 246 }, { "epoch": 0.012252591894439209, "grad_norm": 12.091273307800293, "learning_rate": 9.999874549484321e-06, "loss": 0.4776, "step": 247 }, { "epoch": 0.012302197529639366, "grad_norm": 27.10563850402832, "learning_rate": 9.999868911979608e-06, "loss": 0.5402, "step": 248 }, { "epoch": 0.012351803164839525, "grad_norm": 17.88898277282715, "learning_rate": 9.999863150575884e-06, "loss": 0.3092, "step": 249 }, { "epoch": 0.012401408800039685, "grad_norm": 9.92951774597168, "learning_rate": 9.999857265273287e-06, "loss": 0.4842, "step": 250 }, { "epoch": 0.012451014435239844, "grad_norm": 24.964160919189453, "learning_rate": 9.999851256071965e-06, "loss": 0.4408, "step": 251 }, { "epoch": 0.012500620070440002, "grad_norm": 31.329633712768555, "learning_rate": 9.999845122972067e-06, "loss": 0.5231, "step": 252 }, { "epoch": 0.01255022570564016, "grad_norm": 11.178428649902344, "learning_rate": 9.999838865973748e-06, "loss": 0.383, "step": 253 }, { "epoch": 0.01259983134084032, "grad_norm": 10.125946998596191, "learning_rate": 9.999832485077157e-06, "loss": 0.3947, "step": 254 }, { "epoch": 0.012649436976040478, "grad_norm": 12.391233444213867, "learning_rate": 9.999825980282457e-06, "loss": 0.4103, "step": 255 }, { "epoch": 0.012699042611240637, "grad_norm": 13.901106834411621, "learning_rate": 9.999819351589808e-06, "loss": 0.3907, "step": 256 }, { "epoch": 0.012748648246440796, "grad_norm": 12.584179878234863, "learning_rate": 9.999812598999372e-06, "loss": 0.5139, "step": 257 }, { "epoch": 0.012798253881640954, "grad_norm": 20.1822509765625, "learning_rate": 9.99980572251132e-06, "loss": 0.3508, "step": 258 }, { "epoch": 0.012847859516841113, "grad_norm": 5.677731037139893, "learning_rate": 9.99979872212582e-06, "loss": 0.3848, "step": 259 }, { "epoch": 0.012897465152041272, "grad_norm": 19.721818923950195, "learning_rate": 9.999791597843045e-06, "loss": 0.4569, "step": 260 }, { "epoch": 0.01294707078724143, "grad_norm": 12.047348022460938, "learning_rate": 9.999784349663173e-06, "loss": 0.394, "step": 261 }, { "epoch": 0.012996676422441589, "grad_norm": 15.570510864257812, "learning_rate": 9.999776977586384e-06, "loss": 0.4098, "step": 262 }, { "epoch": 0.013046282057641748, "grad_norm": 24.429771423339844, "learning_rate": 9.999769481612858e-06, "loss": 0.4523, "step": 263 }, { "epoch": 0.013095887692841908, "grad_norm": 24.249452590942383, "learning_rate": 9.999761861742784e-06, "loss": 0.5119, "step": 264 }, { "epoch": 0.013145493328042065, "grad_norm": 49.18372344970703, "learning_rate": 9.999754117976351e-06, "loss": 0.64, "step": 265 }, { "epoch": 0.013195098963242224, "grad_norm": 10.594012260437012, "learning_rate": 9.999746250313746e-06, "loss": 0.3994, "step": 266 }, { "epoch": 0.013244704598442384, "grad_norm": 29.9571590423584, "learning_rate": 9.99973825875517e-06, "loss": 0.4647, "step": 267 }, { "epoch": 0.013294310233642541, "grad_norm": 12.103493690490723, "learning_rate": 9.999730143300816e-06, "loss": 0.4483, "step": 268 }, { "epoch": 0.0133439158688427, "grad_norm": 16.534400939941406, "learning_rate": 9.999721903950889e-06, "loss": 0.4434, "step": 269 }, { "epoch": 0.01339352150404286, "grad_norm": 13.86499309539795, "learning_rate": 9.999713540705592e-06, "loss": 0.3123, "step": 270 }, { "epoch": 0.013443127139243017, "grad_norm": 10.240569114685059, "learning_rate": 9.99970505356513e-06, "loss": 0.3491, "step": 271 }, { "epoch": 0.013492732774443177, "grad_norm": 15.340837478637695, "learning_rate": 9.999696442529716e-06, "loss": 0.4748, "step": 272 }, { "epoch": 0.013542338409643336, "grad_norm": 7.956059455871582, "learning_rate": 9.999687707599564e-06, "loss": 0.2754, "step": 273 }, { "epoch": 0.013591944044843493, "grad_norm": 16.40360450744629, "learning_rate": 9.999678848774887e-06, "loss": 0.4419, "step": 274 }, { "epoch": 0.013641549680043653, "grad_norm": 24.900226593017578, "learning_rate": 9.999669866055908e-06, "loss": 0.4121, "step": 275 }, { "epoch": 0.013691155315243812, "grad_norm": 13.43083667755127, "learning_rate": 9.999660759442848e-06, "loss": 0.3331, "step": 276 }, { "epoch": 0.013740760950443971, "grad_norm": 13.955222129821777, "learning_rate": 9.999651528935932e-06, "loss": 0.3843, "step": 277 }, { "epoch": 0.013790366585644129, "grad_norm": 8.012039184570312, "learning_rate": 9.999642174535389e-06, "loss": 0.2746, "step": 278 }, { "epoch": 0.013839972220844288, "grad_norm": 12.922771453857422, "learning_rate": 9.999632696241453e-06, "loss": 0.4212, "step": 279 }, { "epoch": 0.013889577856044447, "grad_norm": 18.763948440551758, "learning_rate": 9.999623094054357e-06, "loss": 0.3274, "step": 280 }, { "epoch": 0.013939183491244605, "grad_norm": 16.05240821838379, "learning_rate": 9.999613367974337e-06, "loss": 0.5311, "step": 281 }, { "epoch": 0.013988789126444764, "grad_norm": 11.137348175048828, "learning_rate": 9.999603518001637e-06, "loss": 0.3572, "step": 282 }, { "epoch": 0.014038394761644923, "grad_norm": 13.042766571044922, "learning_rate": 9.9995935441365e-06, "loss": 0.453, "step": 283 }, { "epoch": 0.014088000396845081, "grad_norm": 17.55318832397461, "learning_rate": 9.999583446379175e-06, "loss": 0.4704, "step": 284 }, { "epoch": 0.01413760603204524, "grad_norm": 10.601234436035156, "learning_rate": 9.99957322472991e-06, "loss": 0.3601, "step": 285 }, { "epoch": 0.0141872116672454, "grad_norm": 13.283910751342773, "learning_rate": 9.99956287918896e-06, "loss": 0.4359, "step": 286 }, { "epoch": 0.014236817302445557, "grad_norm": 14.877752304077148, "learning_rate": 9.99955240975658e-06, "loss": 0.4386, "step": 287 }, { "epoch": 0.014286422937645716, "grad_norm": 10.061055183410645, "learning_rate": 9.999541816433027e-06, "loss": 0.4016, "step": 288 }, { "epoch": 0.014336028572845876, "grad_norm": 17.01295280456543, "learning_rate": 9.99953109921857e-06, "loss": 0.4255, "step": 289 }, { "epoch": 0.014385634208046035, "grad_norm": 13.189228057861328, "learning_rate": 9.99952025811347e-06, "loss": 0.3914, "step": 290 }, { "epoch": 0.014435239843246192, "grad_norm": 19.832706451416016, "learning_rate": 9.999509293117992e-06, "loss": 0.4622, "step": 291 }, { "epoch": 0.014484845478446352, "grad_norm": 15.312405586242676, "learning_rate": 9.999498204232416e-06, "loss": 0.4329, "step": 292 }, { "epoch": 0.014534451113646511, "grad_norm": 7.846432209014893, "learning_rate": 9.999486991457013e-06, "loss": 0.3259, "step": 293 }, { "epoch": 0.014584056748846668, "grad_norm": 27.996883392333984, "learning_rate": 9.999475654792062e-06, "loss": 0.5251, "step": 294 }, { "epoch": 0.014633662384046828, "grad_norm": 14.8898344039917, "learning_rate": 9.99946419423784e-06, "loss": 0.4803, "step": 295 }, { "epoch": 0.014683268019246987, "grad_norm": 14.037585258483887, "learning_rate": 9.999452609794635e-06, "loss": 0.4257, "step": 296 }, { "epoch": 0.014732873654447145, "grad_norm": 20.218875885009766, "learning_rate": 9.999440901462733e-06, "loss": 0.5109, "step": 297 }, { "epoch": 0.014782479289647304, "grad_norm": 29.076303482055664, "learning_rate": 9.999429069242423e-06, "loss": 0.5259, "step": 298 }, { "epoch": 0.014832084924847463, "grad_norm": 15.889019012451172, "learning_rate": 9.999417113134e-06, "loss": 0.2991, "step": 299 }, { "epoch": 0.01488169056004762, "grad_norm": 15.483980178833008, "learning_rate": 9.999405033137762e-06, "loss": 0.3947, "step": 300 }, { "epoch": 0.01493129619524778, "grad_norm": 13.24488353729248, "learning_rate": 9.999392829254003e-06, "loss": 0.4471, "step": 301 }, { "epoch": 0.01498090183044794, "grad_norm": 9.260814666748047, "learning_rate": 9.999380501483027e-06, "loss": 0.3654, "step": 302 }, { "epoch": 0.015030507465648098, "grad_norm": 10.254755973815918, "learning_rate": 9.999368049825143e-06, "loss": 0.4815, "step": 303 }, { "epoch": 0.015080113100848256, "grad_norm": 21.380714416503906, "learning_rate": 9.999355474280657e-06, "loss": 0.4928, "step": 304 }, { "epoch": 0.015129718736048415, "grad_norm": 10.2161226272583, "learning_rate": 9.999342774849878e-06, "loss": 0.4122, "step": 305 }, { "epoch": 0.015179324371248575, "grad_norm": 24.05419921875, "learning_rate": 9.999329951533129e-06, "loss": 0.478, "step": 306 }, { "epoch": 0.015228930006448732, "grad_norm": 18.84333610534668, "learning_rate": 9.999317004330718e-06, "loss": 0.4268, "step": 307 }, { "epoch": 0.015278535641648891, "grad_norm": 11.39536190032959, "learning_rate": 9.99930393324297e-06, "loss": 0.3741, "step": 308 }, { "epoch": 0.01532814127684905, "grad_norm": 11.91507339477539, "learning_rate": 9.99929073827021e-06, "loss": 0.4303, "step": 309 }, { "epoch": 0.015377746912049208, "grad_norm": 8.837409019470215, "learning_rate": 9.999277419412766e-06, "loss": 0.3672, "step": 310 }, { "epoch": 0.015427352547249367, "grad_norm": 10.731083869934082, "learning_rate": 9.999263976670965e-06, "loss": 0.3476, "step": 311 }, { "epoch": 0.015476958182449527, "grad_norm": 11.694878578186035, "learning_rate": 9.99925041004514e-06, "loss": 0.3668, "step": 312 }, { "epoch": 0.015526563817649684, "grad_norm": 11.778631210327148, "learning_rate": 9.99923671953563e-06, "loss": 0.3873, "step": 313 }, { "epoch": 0.015576169452849844, "grad_norm": 23.1600341796875, "learning_rate": 9.999222905142773e-06, "loss": 0.4854, "step": 314 }, { "epoch": 0.015625775088050003, "grad_norm": 14.060665130615234, "learning_rate": 9.999208966866909e-06, "loss": 0.3314, "step": 315 }, { "epoch": 0.01567538072325016, "grad_norm": 24.538848876953125, "learning_rate": 9.999194904708387e-06, "loss": 0.5194, "step": 316 }, { "epoch": 0.01572498635845032, "grad_norm": 15.4091796875, "learning_rate": 9.999180718667553e-06, "loss": 0.4003, "step": 317 }, { "epoch": 0.01577459199365048, "grad_norm": 39.60602569580078, "learning_rate": 9.99916640874476e-06, "loss": 0.4088, "step": 318 }, { "epoch": 0.015824197628850636, "grad_norm": 11.807711601257324, "learning_rate": 9.999151974940362e-06, "loss": 0.4563, "step": 319 }, { "epoch": 0.015873803264050797, "grad_norm": 11.734817504882812, "learning_rate": 9.999137417254717e-06, "loss": 0.268, "step": 320 }, { "epoch": 0.015923408899250955, "grad_norm": 28.565542221069336, "learning_rate": 9.999122735688185e-06, "loss": 0.529, "step": 321 }, { "epoch": 0.015973014534451112, "grad_norm": 11.676663398742676, "learning_rate": 9.99910793024113e-06, "loss": 0.418, "step": 322 }, { "epoch": 0.016022620169651274, "grad_norm": 11.522047996520996, "learning_rate": 9.99909300091392e-06, "loss": 0.35, "step": 323 }, { "epoch": 0.01607222580485143, "grad_norm": 7.062811851501465, "learning_rate": 9.999077947706924e-06, "loss": 0.2519, "step": 324 }, { "epoch": 0.01612183144005159, "grad_norm": 12.198573112487793, "learning_rate": 9.999062770620514e-06, "loss": 0.4759, "step": 325 }, { "epoch": 0.01617143707525175, "grad_norm": 10.230265617370605, "learning_rate": 9.999047469655067e-06, "loss": 0.3457, "step": 326 }, { "epoch": 0.016221042710451907, "grad_norm": 16.87712860107422, "learning_rate": 9.999032044810963e-06, "loss": 0.5734, "step": 327 }, { "epoch": 0.016270648345652065, "grad_norm": 22.800830841064453, "learning_rate": 9.999016496088585e-06, "loss": 0.5246, "step": 328 }, { "epoch": 0.016320253980852226, "grad_norm": 10.091405868530273, "learning_rate": 9.999000823488317e-06, "loss": 0.3763, "step": 329 }, { "epoch": 0.016369859616052383, "grad_norm": 16.162033081054688, "learning_rate": 9.998985027010545e-06, "loss": 0.4234, "step": 330 }, { "epoch": 0.01641946525125254, "grad_norm": 14.301701545715332, "learning_rate": 9.998969106655663e-06, "loss": 0.3746, "step": 331 }, { "epoch": 0.016469070886452702, "grad_norm": 9.342902183532715, "learning_rate": 9.998953062424067e-06, "loss": 0.3708, "step": 332 }, { "epoch": 0.01651867652165286, "grad_norm": 22.454011917114258, "learning_rate": 9.998936894316151e-06, "loss": 0.5023, "step": 333 }, { "epoch": 0.016568282156853017, "grad_norm": 10.966526985168457, "learning_rate": 9.99892060233232e-06, "loss": 0.3806, "step": 334 }, { "epoch": 0.016617887792053178, "grad_norm": 19.031383514404297, "learning_rate": 9.998904186472973e-06, "loss": 0.2328, "step": 335 }, { "epoch": 0.016667493427253335, "grad_norm": 30.279537200927734, "learning_rate": 9.99888764673852e-06, "loss": 0.5035, "step": 336 }, { "epoch": 0.016717099062453496, "grad_norm": 20.32370376586914, "learning_rate": 9.99887098312937e-06, "loss": 0.5826, "step": 337 }, { "epoch": 0.016766704697653654, "grad_norm": 21.514076232910156, "learning_rate": 9.998854195645935e-06, "loss": 0.5285, "step": 338 }, { "epoch": 0.01681631033285381, "grad_norm": 17.54102897644043, "learning_rate": 9.998837284288632e-06, "loss": 0.4957, "step": 339 }, { "epoch": 0.016865915968053972, "grad_norm": 6.941586017608643, "learning_rate": 9.99882024905788e-06, "loss": 0.2989, "step": 340 }, { "epoch": 0.01691552160325413, "grad_norm": 13.356155395507812, "learning_rate": 9.9988030899541e-06, "loss": 0.4399, "step": 341 }, { "epoch": 0.016965127238454288, "grad_norm": 14.885394096374512, "learning_rate": 9.99878580697772e-06, "loss": 0.4851, "step": 342 }, { "epoch": 0.01701473287365445, "grad_norm": 12.217284202575684, "learning_rate": 9.998768400129166e-06, "loss": 0.424, "step": 343 }, { "epoch": 0.017064338508854606, "grad_norm": 11.978593826293945, "learning_rate": 9.998750869408869e-06, "loss": 0.4047, "step": 344 }, { "epoch": 0.017113944144054764, "grad_norm": 20.279705047607422, "learning_rate": 9.998733214817266e-06, "loss": 0.4108, "step": 345 }, { "epoch": 0.017163549779254925, "grad_norm": 9.960865020751953, "learning_rate": 9.998715436354792e-06, "loss": 0.4542, "step": 346 }, { "epoch": 0.017213155414455082, "grad_norm": 9.98718547821045, "learning_rate": 9.998697534021888e-06, "loss": 0.382, "step": 347 }, { "epoch": 0.01726276104965524, "grad_norm": 20.78769874572754, "learning_rate": 9.998679507818997e-06, "loss": 0.4208, "step": 348 }, { "epoch": 0.0173123666848554, "grad_norm": 22.83961296081543, "learning_rate": 9.998661357746567e-06, "loss": 0.3897, "step": 349 }, { "epoch": 0.01736197232005556, "grad_norm": 10.072334289550781, "learning_rate": 9.998643083805049e-06, "loss": 0.3844, "step": 350 }, { "epoch": 0.017411577955255716, "grad_norm": 16.09838104248047, "learning_rate": 9.998624685994893e-06, "loss": 0.4336, "step": 351 }, { "epoch": 0.017461183590455877, "grad_norm": 13.97887134552002, "learning_rate": 9.998606164316556e-06, "loss": 0.361, "step": 352 }, { "epoch": 0.017510789225656034, "grad_norm": 11.097314834594727, "learning_rate": 9.998587518770498e-06, "loss": 0.266, "step": 353 }, { "epoch": 0.017560394860856192, "grad_norm": 22.219524383544922, "learning_rate": 9.99856874935718e-06, "loss": 0.4594, "step": 354 }, { "epoch": 0.017610000496056353, "grad_norm": 12.547555923461914, "learning_rate": 9.998549856077067e-06, "loss": 0.3446, "step": 355 }, { "epoch": 0.01765960613125651, "grad_norm": 16.861169815063477, "learning_rate": 9.998530838930626e-06, "loss": 0.4569, "step": 356 }, { "epoch": 0.017709211766456668, "grad_norm": 15.513945579528809, "learning_rate": 9.998511697918332e-06, "loss": 0.4558, "step": 357 }, { "epoch": 0.01775881740165683, "grad_norm": 11.585395812988281, "learning_rate": 9.998492433040657e-06, "loss": 0.4801, "step": 358 }, { "epoch": 0.017808423036856987, "grad_norm": 8.36623477935791, "learning_rate": 9.998473044298079e-06, "loss": 0.372, "step": 359 }, { "epoch": 0.017858028672057144, "grad_norm": 11.680685043334961, "learning_rate": 9.998453531691074e-06, "loss": 0.3906, "step": 360 }, { "epoch": 0.017907634307257305, "grad_norm": 15.19757080078125, "learning_rate": 9.998433895220133e-06, "loss": 0.2945, "step": 361 }, { "epoch": 0.017957239942457463, "grad_norm": 9.353394508361816, "learning_rate": 9.998414134885739e-06, "loss": 0.3434, "step": 362 }, { "epoch": 0.018006845577657624, "grad_norm": 12.914165496826172, "learning_rate": 9.99839425068838e-06, "loss": 0.4, "step": 363 }, { "epoch": 0.01805645121285778, "grad_norm": 11.056907653808594, "learning_rate": 9.998374242628552e-06, "loss": 0.341, "step": 364 }, { "epoch": 0.01810605684805794, "grad_norm": 6.651793956756592, "learning_rate": 9.998354110706747e-06, "loss": 0.3404, "step": 365 }, { "epoch": 0.0181556624832581, "grad_norm": 20.384260177612305, "learning_rate": 9.998333854923468e-06, "loss": 0.5098, "step": 366 }, { "epoch": 0.018205268118458257, "grad_norm": 3.819305419921875, "learning_rate": 9.998313475279215e-06, "loss": 0.2349, "step": 367 }, { "epoch": 0.018254873753658415, "grad_norm": 13.505685806274414, "learning_rate": 9.998292971774492e-06, "loss": 0.3343, "step": 368 }, { "epoch": 0.018304479388858576, "grad_norm": 9.305999755859375, "learning_rate": 9.998272344409809e-06, "loss": 0.4606, "step": 369 }, { "epoch": 0.018354085024058733, "grad_norm": 20.636905670166016, "learning_rate": 9.998251593185677e-06, "loss": 0.6397, "step": 370 }, { "epoch": 0.01840369065925889, "grad_norm": 8.336040496826172, "learning_rate": 9.998230718102609e-06, "loss": 0.4612, "step": 371 }, { "epoch": 0.018453296294459052, "grad_norm": 9.643148422241211, "learning_rate": 9.998209719161123e-06, "loss": 0.3945, "step": 372 }, { "epoch": 0.01850290192965921, "grad_norm": 12.103140830993652, "learning_rate": 9.99818859636174e-06, "loss": 0.2417, "step": 373 }, { "epoch": 0.018552507564859367, "grad_norm": 11.691559791564941, "learning_rate": 9.99816734970498e-06, "loss": 0.4602, "step": 374 }, { "epoch": 0.018602113200059528, "grad_norm": 17.199560165405273, "learning_rate": 9.998145979191374e-06, "loss": 0.5749, "step": 375 }, { "epoch": 0.018651718835259685, "grad_norm": 12.08308219909668, "learning_rate": 9.99812448482145e-06, "loss": 0.3799, "step": 376 }, { "epoch": 0.018701324470459843, "grad_norm": 13.102935791015625, "learning_rate": 9.99810286659574e-06, "loss": 0.3331, "step": 377 }, { "epoch": 0.018750930105660004, "grad_norm": 19.76250648498535, "learning_rate": 9.99808112451478e-06, "loss": 0.522, "step": 378 }, { "epoch": 0.01880053574086016, "grad_norm": 10.212170600891113, "learning_rate": 9.998059258579109e-06, "loss": 0.3385, "step": 379 }, { "epoch": 0.01885014137606032, "grad_norm": 11.21021556854248, "learning_rate": 9.99803726878927e-06, "loss": 0.3487, "step": 380 }, { "epoch": 0.01889974701126048, "grad_norm": 14.638076782226562, "learning_rate": 9.998015155145803e-06, "loss": 0.4026, "step": 381 }, { "epoch": 0.018949352646460638, "grad_norm": 18.469663619995117, "learning_rate": 9.997992917649262e-06, "loss": 0.4033, "step": 382 }, { "epoch": 0.018998958281660795, "grad_norm": 12.176090240478516, "learning_rate": 9.997970556300195e-06, "loss": 0.3811, "step": 383 }, { "epoch": 0.019048563916860956, "grad_norm": 16.8322811126709, "learning_rate": 9.997948071099159e-06, "loss": 0.4291, "step": 384 }, { "epoch": 0.019098169552061114, "grad_norm": 10.483285903930664, "learning_rate": 9.997925462046708e-06, "loss": 0.4195, "step": 385 }, { "epoch": 0.01914777518726127, "grad_norm": 12.920680046081543, "learning_rate": 9.9979027291434e-06, "loss": 0.3883, "step": 386 }, { "epoch": 0.019197380822461432, "grad_norm": 9.881131172180176, "learning_rate": 9.997879872389806e-06, "loss": 0.5211, "step": 387 }, { "epoch": 0.01924698645766159, "grad_norm": 8.997169494628906, "learning_rate": 9.997856891786485e-06, "loss": 0.404, "step": 388 }, { "epoch": 0.019296592092861747, "grad_norm": 9.415815353393555, "learning_rate": 9.99783378733401e-06, "loss": 0.2656, "step": 389 }, { "epoch": 0.01934619772806191, "grad_norm": 5.7787184715271, "learning_rate": 9.997810559032955e-06, "loss": 0.3901, "step": 390 }, { "epoch": 0.019395803363262066, "grad_norm": 7.347204685211182, "learning_rate": 9.997787206883891e-06, "loss": 0.3479, "step": 391 }, { "epoch": 0.019445408998462227, "grad_norm": 8.023351669311523, "learning_rate": 9.9977637308874e-06, "loss": 0.3211, "step": 392 }, { "epoch": 0.019495014633662384, "grad_norm": 18.095064163208008, "learning_rate": 9.997740131044064e-06, "loss": 0.469, "step": 393 }, { "epoch": 0.019544620268862542, "grad_norm": 15.8823823928833, "learning_rate": 9.997716407354465e-06, "loss": 0.2877, "step": 394 }, { "epoch": 0.019594225904062703, "grad_norm": 13.68383502960205, "learning_rate": 9.997692559819195e-06, "loss": 0.4052, "step": 395 }, { "epoch": 0.01964383153926286, "grad_norm": 7.030422210693359, "learning_rate": 9.99766858843884e-06, "loss": 0.2103, "step": 396 }, { "epoch": 0.019693437174463018, "grad_norm": 17.060428619384766, "learning_rate": 9.997644493214e-06, "loss": 0.5115, "step": 397 }, { "epoch": 0.01974304280966318, "grad_norm": 9.549432754516602, "learning_rate": 9.997620274145265e-06, "loss": 0.4633, "step": 398 }, { "epoch": 0.019792648444863337, "grad_norm": 16.388534545898438, "learning_rate": 9.997595931233242e-06, "loss": 0.3902, "step": 399 }, { "epoch": 0.019842254080063494, "grad_norm": 13.72014045715332, "learning_rate": 9.997571464478531e-06, "loss": 0.482, "step": 400 }, { "epoch": 0.019891859715263655, "grad_norm": 15.441608428955078, "learning_rate": 9.997546873881736e-06, "loss": 0.4008, "step": 401 }, { "epoch": 0.019941465350463813, "grad_norm": 16.241518020629883, "learning_rate": 9.997522159443472e-06, "loss": 0.3303, "step": 402 }, { "epoch": 0.01999107098566397, "grad_norm": 20.061290740966797, "learning_rate": 9.997497321164345e-06, "loss": 0.4542, "step": 403 }, { "epoch": 0.02004067662086413, "grad_norm": 18.849882125854492, "learning_rate": 9.997472359044976e-06, "loss": 0.5384, "step": 404 }, { "epoch": 0.02009028225606429, "grad_norm": 22.635107040405273, "learning_rate": 9.99744727308598e-06, "loss": 0.4949, "step": 405 }, { "epoch": 0.020139887891264446, "grad_norm": 10.69657039642334, "learning_rate": 9.997422063287981e-06, "loss": 0.3982, "step": 406 }, { "epoch": 0.020189493526464607, "grad_norm": 12.131929397583008, "learning_rate": 9.997396729651603e-06, "loss": 0.4342, "step": 407 }, { "epoch": 0.020239099161664765, "grad_norm": 12.70007038116455, "learning_rate": 9.997371272177472e-06, "loss": 0.3354, "step": 408 }, { "epoch": 0.020288704796864922, "grad_norm": 16.43197250366211, "learning_rate": 9.997345690866223e-06, "loss": 0.3283, "step": 409 }, { "epoch": 0.020338310432065083, "grad_norm": 15.588682174682617, "learning_rate": 9.997319985718485e-06, "loss": 0.3548, "step": 410 }, { "epoch": 0.02038791606726524, "grad_norm": 14.370945930480957, "learning_rate": 9.997294156734897e-06, "loss": 0.3379, "step": 411 }, { "epoch": 0.0204375217024654, "grad_norm": 30.56341552734375, "learning_rate": 9.9972682039161e-06, "loss": 0.6608, "step": 412 }, { "epoch": 0.02048712733766556, "grad_norm": 20.911863327026367, "learning_rate": 9.997242127262736e-06, "loss": 0.3868, "step": 413 }, { "epoch": 0.020536732972865717, "grad_norm": 10.602009773254395, "learning_rate": 9.997215926775452e-06, "loss": 0.3863, "step": 414 }, { "epoch": 0.020586338608065875, "grad_norm": 11.653353691101074, "learning_rate": 9.997189602454897e-06, "loss": 0.4627, "step": 415 }, { "epoch": 0.020635944243266036, "grad_norm": 13.534695625305176, "learning_rate": 9.997163154301724e-06, "loss": 0.5237, "step": 416 }, { "epoch": 0.020685549878466193, "grad_norm": 11.388361930847168, "learning_rate": 9.997136582316586e-06, "loss": 0.3686, "step": 417 }, { "epoch": 0.020735155513666354, "grad_norm": 9.7966890335083, "learning_rate": 9.997109886500143e-06, "loss": 0.3584, "step": 418 }, { "epoch": 0.02078476114886651, "grad_norm": 9.970362663269043, "learning_rate": 9.997083066853058e-06, "loss": 0.3293, "step": 419 }, { "epoch": 0.02083436678406667, "grad_norm": 6.073129177093506, "learning_rate": 9.997056123375993e-06, "loss": 0.3041, "step": 420 }, { "epoch": 0.02088397241926683, "grad_norm": 7.28793478012085, "learning_rate": 9.997029056069617e-06, "loss": 0.3279, "step": 421 }, { "epoch": 0.020933578054466988, "grad_norm": 5.792873859405518, "learning_rate": 9.997001864934601e-06, "loss": 0.3566, "step": 422 }, { "epoch": 0.020983183689667145, "grad_norm": 9.46887493133545, "learning_rate": 9.996974549971618e-06, "loss": 0.3242, "step": 423 }, { "epoch": 0.021032789324867306, "grad_norm": 20.504098892211914, "learning_rate": 9.996947111181346e-06, "loss": 0.5574, "step": 424 }, { "epoch": 0.021082394960067464, "grad_norm": 12.42835521697998, "learning_rate": 9.996919548564464e-06, "loss": 0.3876, "step": 425 }, { "epoch": 0.02113200059526762, "grad_norm": 30.8446044921875, "learning_rate": 9.996891862121655e-06, "loss": 0.2693, "step": 426 }, { "epoch": 0.021181606230467782, "grad_norm": 7.2091193199157715, "learning_rate": 9.996864051853604e-06, "loss": 0.2766, "step": 427 }, { "epoch": 0.02123121186566794, "grad_norm": 14.651542663574219, "learning_rate": 9.996836117761003e-06, "loss": 0.423, "step": 428 }, { "epoch": 0.021280817500868097, "grad_norm": 19.123090744018555, "learning_rate": 9.996808059844542e-06, "loss": 0.4677, "step": 429 }, { "epoch": 0.02133042313606826, "grad_norm": 11.3178129196167, "learning_rate": 9.996779878104918e-06, "loss": 0.284, "step": 430 }, { "epoch": 0.021380028771268416, "grad_norm": 11.62717056274414, "learning_rate": 9.996751572542827e-06, "loss": 0.3749, "step": 431 }, { "epoch": 0.021429634406468574, "grad_norm": 16.725873947143555, "learning_rate": 9.996723143158973e-06, "loss": 0.3753, "step": 432 }, { "epoch": 0.021479240041668735, "grad_norm": 12.795910835266113, "learning_rate": 9.996694589954057e-06, "loss": 0.356, "step": 433 }, { "epoch": 0.021528845676868892, "grad_norm": 14.401754379272461, "learning_rate": 9.996665912928791e-06, "loss": 0.4633, "step": 434 }, { "epoch": 0.02157845131206905, "grad_norm": 8.760015487670898, "learning_rate": 9.996637112083883e-06, "loss": 0.3917, "step": 435 }, { "epoch": 0.02162805694726921, "grad_norm": 7.1756110191345215, "learning_rate": 9.996608187420046e-06, "loss": 0.3931, "step": 436 }, { "epoch": 0.021677662582469368, "grad_norm": 18.42681121826172, "learning_rate": 9.996579138938e-06, "loss": 0.4916, "step": 437 }, { "epoch": 0.021727268217669526, "grad_norm": 9.173389434814453, "learning_rate": 9.996549966638462e-06, "loss": 0.409, "step": 438 }, { "epoch": 0.021776873852869687, "grad_norm": 13.405043601989746, "learning_rate": 9.996520670522155e-06, "loss": 0.4244, "step": 439 }, { "epoch": 0.021826479488069844, "grad_norm": 12.008934020996094, "learning_rate": 9.996491250589805e-06, "loss": 0.4034, "step": 440 }, { "epoch": 0.021876085123270002, "grad_norm": 8.351729393005371, "learning_rate": 9.996461706842144e-06, "loss": 0.3922, "step": 441 }, { "epoch": 0.021925690758470163, "grad_norm": 7.7912187576293945, "learning_rate": 9.9964320392799e-06, "loss": 0.3438, "step": 442 }, { "epoch": 0.02197529639367032, "grad_norm": 8.16402816772461, "learning_rate": 9.99640224790381e-06, "loss": 0.3467, "step": 443 }, { "epoch": 0.02202490202887048, "grad_norm": 8.058507919311523, "learning_rate": 9.996372332714613e-06, "loss": 0.4152, "step": 444 }, { "epoch": 0.02207450766407064, "grad_norm": 10.228119850158691, "learning_rate": 9.996342293713048e-06, "loss": 0.2719, "step": 445 }, { "epoch": 0.022124113299270796, "grad_norm": 14.518315315246582, "learning_rate": 9.996312130899864e-06, "loss": 0.4587, "step": 446 }, { "epoch": 0.022173718934470957, "grad_norm": 8.395744323730469, "learning_rate": 9.996281844275802e-06, "loss": 0.4015, "step": 447 }, { "epoch": 0.022223324569671115, "grad_norm": 6.090627670288086, "learning_rate": 9.996251433841618e-06, "loss": 0.3019, "step": 448 }, { "epoch": 0.022272930204871273, "grad_norm": 6.84417200088501, "learning_rate": 9.996220899598061e-06, "loss": 0.335, "step": 449 }, { "epoch": 0.022322535840071434, "grad_norm": 5.0419816970825195, "learning_rate": 9.99619024154589e-06, "loss": 0.2801, "step": 450 }, { "epoch": 0.02237214147527159, "grad_norm": 12.646452903747559, "learning_rate": 9.996159459685869e-06, "loss": 0.4032, "step": 451 }, { "epoch": 0.02242174711047175, "grad_norm": 10.597658157348633, "learning_rate": 9.996128554018754e-06, "loss": 0.3155, "step": 452 }, { "epoch": 0.02247135274567191, "grad_norm": 24.71767807006836, "learning_rate": 9.996097524545311e-06, "loss": 0.5962, "step": 453 }, { "epoch": 0.022520958380872067, "grad_norm": 23.407312393188477, "learning_rate": 9.996066371266316e-06, "loss": 0.4994, "step": 454 }, { "epoch": 0.022570564016072225, "grad_norm": 12.091894149780273, "learning_rate": 9.996035094182533e-06, "loss": 0.3901, "step": 455 }, { "epoch": 0.022620169651272386, "grad_norm": 14.129914283752441, "learning_rate": 9.996003693294741e-06, "loss": 0.4445, "step": 456 }, { "epoch": 0.022669775286472543, "grad_norm": 8.040448188781738, "learning_rate": 9.995972168603718e-06, "loss": 0.3491, "step": 457 }, { "epoch": 0.0227193809216727, "grad_norm": 16.64242935180664, "learning_rate": 9.995940520110245e-06, "loss": 0.5055, "step": 458 }, { "epoch": 0.022768986556872862, "grad_norm": 11.63039493560791, "learning_rate": 9.995908747815107e-06, "loss": 0.3498, "step": 459 }, { "epoch": 0.02281859219207302, "grad_norm": 13.168502807617188, "learning_rate": 9.995876851719087e-06, "loss": 0.4564, "step": 460 }, { "epoch": 0.022868197827273177, "grad_norm": 11.697187423706055, "learning_rate": 9.99584483182298e-06, "loss": 0.4897, "step": 461 }, { "epoch": 0.022917803462473338, "grad_norm": 12.415094375610352, "learning_rate": 9.99581268812758e-06, "loss": 0.4399, "step": 462 }, { "epoch": 0.022967409097673495, "grad_norm": 7.801938533782959, "learning_rate": 9.99578042063368e-06, "loss": 0.4301, "step": 463 }, { "epoch": 0.023017014732873653, "grad_norm": 10.27669906616211, "learning_rate": 9.995748029342083e-06, "loss": 0.2982, "step": 464 }, { "epoch": 0.023066620368073814, "grad_norm": 11.97591781616211, "learning_rate": 9.995715514253588e-06, "loss": 0.5161, "step": 465 }, { "epoch": 0.02311622600327397, "grad_norm": 11.566670417785645, "learning_rate": 9.995682875369005e-06, "loss": 0.3994, "step": 466 }, { "epoch": 0.02316583163847413, "grad_norm": 11.445612907409668, "learning_rate": 9.99565011268914e-06, "loss": 0.4237, "step": 467 }, { "epoch": 0.02321543727367429, "grad_norm": 9.597240447998047, "learning_rate": 9.995617226214805e-06, "loss": 0.4652, "step": 468 }, { "epoch": 0.023265042908874448, "grad_norm": 8.391619682312012, "learning_rate": 9.995584215946814e-06, "loss": 0.3381, "step": 469 }, { "epoch": 0.02331464854407461, "grad_norm": 6.969283580780029, "learning_rate": 9.995551081885988e-06, "loss": 0.3061, "step": 470 }, { "epoch": 0.023364254179274766, "grad_norm": 10.47339153289795, "learning_rate": 9.995517824033146e-06, "loss": 0.4095, "step": 471 }, { "epoch": 0.023413859814474924, "grad_norm": 9.882850646972656, "learning_rate": 9.995484442389112e-06, "loss": 0.3728, "step": 472 }, { "epoch": 0.023463465449675085, "grad_norm": 10.22131633758545, "learning_rate": 9.995450936954713e-06, "loss": 0.4002, "step": 473 }, { "epoch": 0.023513071084875242, "grad_norm": 15.161450386047363, "learning_rate": 9.995417307730784e-06, "loss": 0.4613, "step": 474 }, { "epoch": 0.0235626767200754, "grad_norm": 13.38765811920166, "learning_rate": 9.995383554718151e-06, "loss": 0.4569, "step": 475 }, { "epoch": 0.02361228235527556, "grad_norm": 12.630879402160645, "learning_rate": 9.995349677917654e-06, "loss": 0.4542, "step": 476 }, { "epoch": 0.02366188799047572, "grad_norm": 22.586881637573242, "learning_rate": 9.995315677330133e-06, "loss": 0.4481, "step": 477 }, { "epoch": 0.023711493625675876, "grad_norm": 21.970510482788086, "learning_rate": 9.99528155295643e-06, "loss": 0.3407, "step": 478 }, { "epoch": 0.023761099260876037, "grad_norm": 9.6087007522583, "learning_rate": 9.99524730479739e-06, "loss": 0.3485, "step": 479 }, { "epoch": 0.023810704896076194, "grad_norm": 8.337587356567383, "learning_rate": 9.995212932853865e-06, "loss": 0.4112, "step": 480 }, { "epoch": 0.023860310531276352, "grad_norm": 13.301960945129395, "learning_rate": 9.995178437126701e-06, "loss": 0.5512, "step": 481 }, { "epoch": 0.023909916166476513, "grad_norm": 12.356836318969727, "learning_rate": 9.995143817616756e-06, "loss": 0.4798, "step": 482 }, { "epoch": 0.02395952180167667, "grad_norm": 10.314250946044922, "learning_rate": 9.995109074324888e-06, "loss": 0.419, "step": 483 }, { "epoch": 0.024009127436876828, "grad_norm": 5.845917224884033, "learning_rate": 9.995074207251958e-06, "loss": 0.3482, "step": 484 }, { "epoch": 0.02405873307207699, "grad_norm": 15.490209579467773, "learning_rate": 9.99503921639883e-06, "loss": 0.2488, "step": 485 }, { "epoch": 0.024108338707277147, "grad_norm": 11.527831077575684, "learning_rate": 9.99500410176637e-06, "loss": 0.4796, "step": 486 }, { "epoch": 0.024157944342477304, "grad_norm": 15.381315231323242, "learning_rate": 9.994968863355447e-06, "loss": 0.4354, "step": 487 }, { "epoch": 0.024207549977677465, "grad_norm": 7.3907999992370605, "learning_rate": 9.994933501166938e-06, "loss": 0.3117, "step": 488 }, { "epoch": 0.024257155612877623, "grad_norm": 14.757328033447266, "learning_rate": 9.994898015201718e-06, "loss": 0.4009, "step": 489 }, { "epoch": 0.02430676124807778, "grad_norm": 9.257136344909668, "learning_rate": 9.994862405460666e-06, "loss": 0.2705, "step": 490 }, { "epoch": 0.02435636688327794, "grad_norm": 6.856899738311768, "learning_rate": 9.994826671944663e-06, "loss": 0.3179, "step": 491 }, { "epoch": 0.0244059725184781, "grad_norm": 9.660896301269531, "learning_rate": 9.994790814654595e-06, "loss": 0.2558, "step": 492 }, { "epoch": 0.024455578153678256, "grad_norm": 8.273330688476562, "learning_rate": 9.994754833591353e-06, "loss": 0.382, "step": 493 }, { "epoch": 0.024505183788878417, "grad_norm": 12.071372032165527, "learning_rate": 9.994718728755825e-06, "loss": 0.4147, "step": 494 }, { "epoch": 0.024554789424078575, "grad_norm": 19.559982299804688, "learning_rate": 9.994682500148908e-06, "loss": 0.3451, "step": 495 }, { "epoch": 0.024604395059278732, "grad_norm": 15.500349998474121, "learning_rate": 9.994646147771501e-06, "loss": 0.3272, "step": 496 }, { "epoch": 0.024654000694478893, "grad_norm": 9.660767555236816, "learning_rate": 9.994609671624503e-06, "loss": 0.3727, "step": 497 }, { "epoch": 0.02470360632967905, "grad_norm": 14.885858535766602, "learning_rate": 9.994573071708817e-06, "loss": 0.4749, "step": 498 }, { "epoch": 0.024753211964879212, "grad_norm": 5.824785232543945, "learning_rate": 9.994536348025349e-06, "loss": 0.4287, "step": 499 }, { "epoch": 0.02480281760007937, "grad_norm": 22.13677215576172, "learning_rate": 9.994499500575014e-06, "loss": 0.4253, "step": 500 }, { "epoch": 0.024852423235279527, "grad_norm": 8.655866622924805, "learning_rate": 9.994462529358722e-06, "loss": 0.2935, "step": 501 }, { "epoch": 0.024902028870479688, "grad_norm": 25.668132781982422, "learning_rate": 9.994425434377388e-06, "loss": 0.3001, "step": 502 }, { "epoch": 0.024951634505679846, "grad_norm": 8.570138931274414, "learning_rate": 9.994388215631933e-06, "loss": 0.3219, "step": 503 }, { "epoch": 0.025001240140880003, "grad_norm": 23.716585159301758, "learning_rate": 9.994350873123278e-06, "loss": 0.4569, "step": 504 }, { "epoch": 0.025050845776080164, "grad_norm": 15.234465599060059, "learning_rate": 9.994313406852351e-06, "loss": 0.476, "step": 505 }, { "epoch": 0.02510045141128032, "grad_norm": 19.788597106933594, "learning_rate": 9.994275816820077e-06, "loss": 0.3009, "step": 506 }, { "epoch": 0.02515005704648048, "grad_norm": 5.049598693847656, "learning_rate": 9.99423810302739e-06, "loss": 0.3256, "step": 507 }, { "epoch": 0.02519966268168064, "grad_norm": 7.893095016479492, "learning_rate": 9.994200265475223e-06, "loss": 0.405, "step": 508 }, { "epoch": 0.025249268316880798, "grad_norm": 14.511290550231934, "learning_rate": 9.994162304164515e-06, "loss": 0.2924, "step": 509 }, { "epoch": 0.025298873952080955, "grad_norm": 8.660490989685059, "learning_rate": 9.994124219096206e-06, "loss": 0.3283, "step": 510 }, { "epoch": 0.025348479587281116, "grad_norm": 10.287842750549316, "learning_rate": 9.994086010271238e-06, "loss": 0.2979, "step": 511 }, { "epoch": 0.025398085222481274, "grad_norm": 10.15223217010498, "learning_rate": 9.994047677690561e-06, "loss": 0.4204, "step": 512 }, { "epoch": 0.02544769085768143, "grad_norm": 14.003740310668945, "learning_rate": 9.994009221355124e-06, "loss": 0.4071, "step": 513 }, { "epoch": 0.025497296492881592, "grad_norm": 17.724767684936523, "learning_rate": 9.993970641265879e-06, "loss": 0.3337, "step": 514 }, { "epoch": 0.02554690212808175, "grad_norm": 11.813190460205078, "learning_rate": 9.993931937423783e-06, "loss": 0.4299, "step": 515 }, { "epoch": 0.025596507763281907, "grad_norm": 15.980755805969238, "learning_rate": 9.993893109829794e-06, "loss": 0.3933, "step": 516 }, { "epoch": 0.02564611339848207, "grad_norm": 11.620376586914062, "learning_rate": 9.993854158484874e-06, "loss": 0.4709, "step": 517 }, { "epoch": 0.025695719033682226, "grad_norm": 6.757266998291016, "learning_rate": 9.99381508338999e-06, "loss": 0.3611, "step": 518 }, { "epoch": 0.025745324668882383, "grad_norm": 13.413163185119629, "learning_rate": 9.99377588454611e-06, "loss": 0.4477, "step": 519 }, { "epoch": 0.025794930304082545, "grad_norm": 7.555251598358154, "learning_rate": 9.993736561954203e-06, "loss": 0.343, "step": 520 }, { "epoch": 0.025844535939282702, "grad_norm": 8.143911361694336, "learning_rate": 9.993697115615244e-06, "loss": 0.3298, "step": 521 }, { "epoch": 0.02589414157448286, "grad_norm": 9.545751571655273, "learning_rate": 9.993657545530216e-06, "loss": 0.4101, "step": 522 }, { "epoch": 0.02594374720968302, "grad_norm": 10.69888973236084, "learning_rate": 9.99361785170009e-06, "loss": 0.397, "step": 523 }, { "epoch": 0.025993352844883178, "grad_norm": 7.008504390716553, "learning_rate": 9.993578034125858e-06, "loss": 0.2863, "step": 524 }, { "epoch": 0.02604295848008334, "grad_norm": 16.12946891784668, "learning_rate": 9.993538092808503e-06, "loss": 0.4143, "step": 525 }, { "epoch": 0.026092564115283497, "grad_norm": 15.156355857849121, "learning_rate": 9.993498027749016e-06, "loss": 0.3369, "step": 526 }, { "epoch": 0.026142169750483654, "grad_norm": 8.32003402709961, "learning_rate": 9.993457838948388e-06, "loss": 0.2633, "step": 527 }, { "epoch": 0.026191775385683815, "grad_norm": 20.194061279296875, "learning_rate": 9.993417526407615e-06, "loss": 0.3887, "step": 528 }, { "epoch": 0.026241381020883973, "grad_norm": 6.857709884643555, "learning_rate": 9.993377090127699e-06, "loss": 0.2977, "step": 529 }, { "epoch": 0.02629098665608413, "grad_norm": 25.10239601135254, "learning_rate": 9.993336530109639e-06, "loss": 0.5938, "step": 530 }, { "epoch": 0.02634059229128429, "grad_norm": 10.476096153259277, "learning_rate": 9.993295846354441e-06, "loss": 0.2552, "step": 531 }, { "epoch": 0.02639019792648445, "grad_norm": 9.82148551940918, "learning_rate": 9.993255038863112e-06, "loss": 0.3272, "step": 532 }, { "epoch": 0.026439803561684606, "grad_norm": 13.136141777038574, "learning_rate": 9.993214107636665e-06, "loss": 0.4659, "step": 533 }, { "epoch": 0.026489409196884767, "grad_norm": 15.461562156677246, "learning_rate": 9.993173052676116e-06, "loss": 0.5273, "step": 534 }, { "epoch": 0.026539014832084925, "grad_norm": 4.0987629890441895, "learning_rate": 9.993131873982479e-06, "loss": 0.3044, "step": 535 }, { "epoch": 0.026588620467285082, "grad_norm": 15.512333869934082, "learning_rate": 9.993090571556774e-06, "loss": 0.3871, "step": 536 }, { "epoch": 0.026638226102485243, "grad_norm": 10.041248321533203, "learning_rate": 9.993049145400027e-06, "loss": 0.4413, "step": 537 }, { "epoch": 0.0266878317376854, "grad_norm": 7.865330219268799, "learning_rate": 9.993007595513263e-06, "loss": 0.382, "step": 538 }, { "epoch": 0.02673743737288556, "grad_norm": 7.524237155914307, "learning_rate": 9.992965921897512e-06, "loss": 0.4162, "step": 539 }, { "epoch": 0.02678704300808572, "grad_norm": 11.188539505004883, "learning_rate": 9.992924124553807e-06, "loss": 0.3894, "step": 540 }, { "epoch": 0.026836648643285877, "grad_norm": 8.989903450012207, "learning_rate": 9.992882203483185e-06, "loss": 0.366, "step": 541 }, { "epoch": 0.026886254278486035, "grad_norm": 13.701549530029297, "learning_rate": 9.992840158686682e-06, "loss": 0.4408, "step": 542 }, { "epoch": 0.026935859913686196, "grad_norm": 14.498603820800781, "learning_rate": 9.992797990165344e-06, "loss": 0.4879, "step": 543 }, { "epoch": 0.026985465548886353, "grad_norm": 9.002799987792969, "learning_rate": 9.99275569792021e-06, "loss": 0.3861, "step": 544 }, { "epoch": 0.02703507118408651, "grad_norm": 12.233821868896484, "learning_rate": 9.992713281952334e-06, "loss": 0.4536, "step": 545 }, { "epoch": 0.02708467681928667, "grad_norm": 9.918171882629395, "learning_rate": 9.992670742262762e-06, "loss": 0.4028, "step": 546 }, { "epoch": 0.02713428245448683, "grad_norm": 11.94956111907959, "learning_rate": 9.992628078852552e-06, "loss": 0.4028, "step": 547 }, { "epoch": 0.027183888089686987, "grad_norm": 7.229408264160156, "learning_rate": 9.99258529172276e-06, "loss": 0.2877, "step": 548 }, { "epoch": 0.027233493724887148, "grad_norm": 7.184460639953613, "learning_rate": 9.992542380874446e-06, "loss": 0.2918, "step": 549 }, { "epoch": 0.027283099360087305, "grad_norm": 7.8028459548950195, "learning_rate": 9.992499346308674e-06, "loss": 0.3242, "step": 550 }, { "epoch": 0.027332704995287466, "grad_norm": 12.241957664489746, "learning_rate": 9.992456188026507e-06, "loss": 0.4026, "step": 551 }, { "epoch": 0.027382310630487624, "grad_norm": 11.228957176208496, "learning_rate": 9.99241290602902e-06, "loss": 0.3113, "step": 552 }, { "epoch": 0.02743191626568778, "grad_norm": 9.538668632507324, "learning_rate": 9.992369500317281e-06, "loss": 0.4339, "step": 553 }, { "epoch": 0.027481521900887942, "grad_norm": 9.698517799377441, "learning_rate": 9.992325970892369e-06, "loss": 0.3587, "step": 554 }, { "epoch": 0.0275311275360881, "grad_norm": 28.0184326171875, "learning_rate": 9.992282317755362e-06, "loss": 0.422, "step": 555 }, { "epoch": 0.027580733171288258, "grad_norm": 12.170258522033691, "learning_rate": 9.992238540907339e-06, "loss": 0.372, "step": 556 }, { "epoch": 0.02763033880648842, "grad_norm": 13.751233100891113, "learning_rate": 9.992194640349386e-06, "loss": 0.25, "step": 557 }, { "epoch": 0.027679944441688576, "grad_norm": 17.423847198486328, "learning_rate": 9.992150616082594e-06, "loss": 0.3673, "step": 558 }, { "epoch": 0.027729550076888734, "grad_norm": 12.02906608581543, "learning_rate": 9.99210646810805e-06, "loss": 0.5644, "step": 559 }, { "epoch": 0.027779155712088895, "grad_norm": 24.18631362915039, "learning_rate": 9.99206219642685e-06, "loss": 0.5378, "step": 560 }, { "epoch": 0.027828761347289052, "grad_norm": 14.912564277648926, "learning_rate": 9.992017801040092e-06, "loss": 0.4702, "step": 561 }, { "epoch": 0.02787836698248921, "grad_norm": 11.377904891967773, "learning_rate": 9.991973281948872e-06, "loss": 0.2788, "step": 562 }, { "epoch": 0.02792797261768937, "grad_norm": 9.183209419250488, "learning_rate": 9.991928639154297e-06, "loss": 0.2701, "step": 563 }, { "epoch": 0.027977578252889528, "grad_norm": 11.473193168640137, "learning_rate": 9.991883872657475e-06, "loss": 0.3195, "step": 564 }, { "epoch": 0.028027183888089686, "grad_norm": 13.664982795715332, "learning_rate": 9.99183898245951e-06, "loss": 0.3933, "step": 565 }, { "epoch": 0.028076789523289847, "grad_norm": 9.42532730102539, "learning_rate": 9.991793968561517e-06, "loss": 0.3624, "step": 566 }, { "epoch": 0.028126395158490004, "grad_norm": 13.053425788879395, "learning_rate": 9.991748830964611e-06, "loss": 0.4786, "step": 567 }, { "epoch": 0.028176000793690162, "grad_norm": 10.254408836364746, "learning_rate": 9.991703569669911e-06, "loss": 0.3766, "step": 568 }, { "epoch": 0.028225606428890323, "grad_norm": 21.700773239135742, "learning_rate": 9.991658184678539e-06, "loss": 0.6414, "step": 569 }, { "epoch": 0.02827521206409048, "grad_norm": 13.70169448852539, "learning_rate": 9.99161267599162e-06, "loss": 0.4507, "step": 570 }, { "epoch": 0.028324817699290638, "grad_norm": 8.175017356872559, "learning_rate": 9.99156704361028e-06, "loss": 0.3842, "step": 571 }, { "epoch": 0.0283744233344908, "grad_norm": 15.63079833984375, "learning_rate": 9.99152128753565e-06, "loss": 0.369, "step": 572 }, { "epoch": 0.028424028969690956, "grad_norm": 16.227134704589844, "learning_rate": 9.991475407768866e-06, "loss": 0.4549, "step": 573 }, { "epoch": 0.028473634604891114, "grad_norm": 12.430327415466309, "learning_rate": 9.991429404311064e-06, "loss": 0.4473, "step": 574 }, { "epoch": 0.028523240240091275, "grad_norm": 8.616314888000488, "learning_rate": 9.991383277163382e-06, "loss": 0.3815, "step": 575 }, { "epoch": 0.028572845875291433, "grad_norm": 6.919626712799072, "learning_rate": 9.991337026326965e-06, "loss": 0.2947, "step": 576 }, { "epoch": 0.02862245151049159, "grad_norm": 11.203679084777832, "learning_rate": 9.991290651802959e-06, "loss": 0.488, "step": 577 }, { "epoch": 0.02867205714569175, "grad_norm": 10.326623916625977, "learning_rate": 9.991244153592512e-06, "loss": 0.4126, "step": 578 }, { "epoch": 0.02872166278089191, "grad_norm": 16.552881240844727, "learning_rate": 9.991197531696778e-06, "loss": 0.4975, "step": 579 }, { "epoch": 0.02877126841609207, "grad_norm": 12.693645477294922, "learning_rate": 9.991150786116911e-06, "loss": 0.3301, "step": 580 }, { "epoch": 0.028820874051292227, "grad_norm": 8.72091007232666, "learning_rate": 9.99110391685407e-06, "loss": 0.3668, "step": 581 }, { "epoch": 0.028870479686492385, "grad_norm": 19.495603561401367, "learning_rate": 9.991056923909417e-06, "loss": 0.4083, "step": 582 }, { "epoch": 0.028920085321692546, "grad_norm": 7.545470237731934, "learning_rate": 9.991009807284113e-06, "loss": 0.4339, "step": 583 }, { "epoch": 0.028969690956892703, "grad_norm": 9.945979118347168, "learning_rate": 9.99096256697933e-06, "loss": 0.2666, "step": 584 }, { "epoch": 0.02901929659209286, "grad_norm": 9.958026885986328, "learning_rate": 9.990915202996239e-06, "loss": 0.314, "step": 585 }, { "epoch": 0.029068902227293022, "grad_norm": 14.253233909606934, "learning_rate": 9.990867715336008e-06, "loss": 0.4019, "step": 586 }, { "epoch": 0.02911850786249318, "grad_norm": 11.458784103393555, "learning_rate": 9.99082010399982e-06, "loss": 0.4052, "step": 587 }, { "epoch": 0.029168113497693337, "grad_norm": 8.80534839630127, "learning_rate": 9.99077236898885e-06, "loss": 0.2336, "step": 588 }, { "epoch": 0.029217719132893498, "grad_norm": 12.616724014282227, "learning_rate": 9.990724510304286e-06, "loss": 0.3889, "step": 589 }, { "epoch": 0.029267324768093655, "grad_norm": 14.401844024658203, "learning_rate": 9.99067652794731e-06, "loss": 0.3713, "step": 590 }, { "epoch": 0.029316930403293813, "grad_norm": 25.739646911621094, "learning_rate": 9.99062842191911e-06, "loss": 0.3912, "step": 591 }, { "epoch": 0.029366536038493974, "grad_norm": 12.887064933776855, "learning_rate": 9.990580192220882e-06, "loss": 0.3987, "step": 592 }, { "epoch": 0.02941614167369413, "grad_norm": 8.970191955566406, "learning_rate": 9.99053183885382e-06, "loss": 0.4727, "step": 593 }, { "epoch": 0.02946574730889429, "grad_norm": 8.133221626281738, "learning_rate": 9.99048336181912e-06, "loss": 0.2839, "step": 594 }, { "epoch": 0.02951535294409445, "grad_norm": 12.475080490112305, "learning_rate": 9.990434761117987e-06, "loss": 0.436, "step": 595 }, { "epoch": 0.029564958579294608, "grad_norm": 9.801752090454102, "learning_rate": 9.99038603675162e-06, "loss": 0.448, "step": 596 }, { "epoch": 0.029614564214494765, "grad_norm": 11.78125, "learning_rate": 9.990337188721232e-06, "loss": 0.4163, "step": 597 }, { "epoch": 0.029664169849694926, "grad_norm": 8.915753364562988, "learning_rate": 9.99028821702803e-06, "loss": 0.3707, "step": 598 }, { "epoch": 0.029713775484895084, "grad_norm": 10.155006408691406, "learning_rate": 9.990239121673228e-06, "loss": 0.4502, "step": 599 }, { "epoch": 0.02976338112009524, "grad_norm": 7.006326675415039, "learning_rate": 9.990189902658044e-06, "loss": 0.4086, "step": 600 }, { "epoch": 0.029812986755295402, "grad_norm": 14.345032691955566, "learning_rate": 9.990140559983699e-06, "loss": 0.4018, "step": 601 }, { "epoch": 0.02986259239049556, "grad_norm": 10.814800262451172, "learning_rate": 9.990091093651412e-06, "loss": 0.5184, "step": 602 }, { "epoch": 0.029912198025695717, "grad_norm": 10.562272071838379, "learning_rate": 9.99004150366241e-06, "loss": 0.3457, "step": 603 }, { "epoch": 0.02996180366089588, "grad_norm": 4.762219429016113, "learning_rate": 9.989991790017923e-06, "loss": 0.447, "step": 604 }, { "epoch": 0.030011409296096036, "grad_norm": 17.608930587768555, "learning_rate": 9.989941952719182e-06, "loss": 0.4334, "step": 605 }, { "epoch": 0.030061014931296197, "grad_norm": 10.67374324798584, "learning_rate": 9.989891991767421e-06, "loss": 0.5036, "step": 606 }, { "epoch": 0.030110620566496354, "grad_norm": 6.803190231323242, "learning_rate": 9.989841907163882e-06, "loss": 0.377, "step": 607 }, { "epoch": 0.030160226201696512, "grad_norm": 8.281670570373535, "learning_rate": 9.989791698909801e-06, "loss": 0.3752, "step": 608 }, { "epoch": 0.030209831836896673, "grad_norm": 9.480658531188965, "learning_rate": 9.989741367006424e-06, "loss": 0.3637, "step": 609 }, { "epoch": 0.03025943747209683, "grad_norm": 22.305246353149414, "learning_rate": 9.989690911455002e-06, "loss": 0.389, "step": 610 }, { "epoch": 0.030309043107296988, "grad_norm": 13.550832748413086, "learning_rate": 9.989640332256779e-06, "loss": 0.4672, "step": 611 }, { "epoch": 0.03035864874249715, "grad_norm": 10.012981414794922, "learning_rate": 9.989589629413012e-06, "loss": 0.4062, "step": 612 }, { "epoch": 0.030408254377697307, "grad_norm": 12.056358337402344, "learning_rate": 9.989538802924959e-06, "loss": 0.4189, "step": 613 }, { "epoch": 0.030457860012897464, "grad_norm": 11.966291427612305, "learning_rate": 9.989487852793875e-06, "loss": 0.3313, "step": 614 }, { "epoch": 0.030507465648097625, "grad_norm": 5.585148811340332, "learning_rate": 9.989436779021026e-06, "loss": 0.3856, "step": 615 }, { "epoch": 0.030557071283297783, "grad_norm": 8.104670524597168, "learning_rate": 9.989385581607677e-06, "loss": 0.4138, "step": 616 }, { "epoch": 0.03060667691849794, "grad_norm": 8.785672187805176, "learning_rate": 9.989334260555098e-06, "loss": 0.4232, "step": 617 }, { "epoch": 0.0306562825536981, "grad_norm": 14.883298873901367, "learning_rate": 9.989282815864556e-06, "loss": 0.45, "step": 618 }, { "epoch": 0.03070588818889826, "grad_norm": 17.81233787536621, "learning_rate": 9.98923124753733e-06, "loss": 0.4406, "step": 619 }, { "epoch": 0.030755493824098416, "grad_norm": 5.78502893447876, "learning_rate": 9.989179555574697e-06, "loss": 0.291, "step": 620 }, { "epoch": 0.030805099459298577, "grad_norm": 32.710601806640625, "learning_rate": 9.989127739977938e-06, "loss": 0.3846, "step": 621 }, { "epoch": 0.030854705094498735, "grad_norm": 5.819981098175049, "learning_rate": 9.989075800748335e-06, "loss": 0.2715, "step": 622 }, { "epoch": 0.030904310729698892, "grad_norm": 9.037997245788574, "learning_rate": 9.98902373788718e-06, "loss": 0.4485, "step": 623 }, { "epoch": 0.030953916364899053, "grad_norm": 15.992900848388672, "learning_rate": 9.988971551395758e-06, "loss": 0.3698, "step": 624 }, { "epoch": 0.03100352200009921, "grad_norm": 14.661538124084473, "learning_rate": 9.988919241275364e-06, "loss": 0.4292, "step": 625 }, { "epoch": 0.03105312763529937, "grad_norm": 13.496278762817383, "learning_rate": 9.988866807527296e-06, "loss": 0.4795, "step": 626 }, { "epoch": 0.03110273327049953, "grad_norm": 6.151333808898926, "learning_rate": 9.988814250152851e-06, "loss": 0.3479, "step": 627 }, { "epoch": 0.031152338905699687, "grad_norm": 8.719094276428223, "learning_rate": 9.988761569153331e-06, "loss": 0.3134, "step": 628 }, { "epoch": 0.031201944540899845, "grad_norm": 14.236063957214355, "learning_rate": 9.988708764530044e-06, "loss": 0.3899, "step": 629 }, { "epoch": 0.031251550176100006, "grad_norm": 10.769611358642578, "learning_rate": 9.988655836284297e-06, "loss": 0.3694, "step": 630 }, { "epoch": 0.03130115581130016, "grad_norm": 13.247830390930176, "learning_rate": 9.988602784417402e-06, "loss": 0.5334, "step": 631 }, { "epoch": 0.03135076144650032, "grad_norm": 22.75662612915039, "learning_rate": 9.988549608930673e-06, "loss": 0.5448, "step": 632 }, { "epoch": 0.03140036708170048, "grad_norm": 15.763792037963867, "learning_rate": 9.988496309825428e-06, "loss": 0.3919, "step": 633 }, { "epoch": 0.03144997271690064, "grad_norm": 6.7500786781311035, "learning_rate": 9.988442887102987e-06, "loss": 0.4338, "step": 634 }, { "epoch": 0.0314995783521008, "grad_norm": 11.341348648071289, "learning_rate": 9.988389340764677e-06, "loss": 0.3104, "step": 635 }, { "epoch": 0.03154918398730096, "grad_norm": 14.58669376373291, "learning_rate": 9.988335670811822e-06, "loss": 0.4566, "step": 636 }, { "epoch": 0.031598789622501115, "grad_norm": 24.457008361816406, "learning_rate": 9.98828187724575e-06, "loss": 0.4782, "step": 637 }, { "epoch": 0.03164839525770127, "grad_norm": 12.767083168029785, "learning_rate": 9.988227960067801e-06, "loss": 0.4885, "step": 638 }, { "epoch": 0.03169800089290143, "grad_norm": 8.285651206970215, "learning_rate": 9.988173919279303e-06, "loss": 0.308, "step": 639 }, { "epoch": 0.031747606528101595, "grad_norm": 9.4503755569458, "learning_rate": 9.9881197548816e-06, "loss": 0.3396, "step": 640 }, { "epoch": 0.03179721216330175, "grad_norm": 5.839423179626465, "learning_rate": 9.988065466876034e-06, "loss": 0.3157, "step": 641 }, { "epoch": 0.03184681779850191, "grad_norm": 6.48547887802124, "learning_rate": 9.988011055263948e-06, "loss": 0.3145, "step": 642 }, { "epoch": 0.03189642343370207, "grad_norm": 21.873926162719727, "learning_rate": 9.987956520046692e-06, "loss": 0.3689, "step": 643 }, { "epoch": 0.031946029068902225, "grad_norm": 20.023855209350586, "learning_rate": 9.98790186122562e-06, "loss": 0.5453, "step": 644 }, { "epoch": 0.03199563470410239, "grad_norm": 23.20545768737793, "learning_rate": 9.98784707880208e-06, "loss": 0.2131, "step": 645 }, { "epoch": 0.03204524033930255, "grad_norm": 12.203849792480469, "learning_rate": 9.987792172777436e-06, "loss": 0.4437, "step": 646 }, { "epoch": 0.032094845974502705, "grad_norm": 9.717498779296875, "learning_rate": 9.987737143153043e-06, "loss": 0.3956, "step": 647 }, { "epoch": 0.03214445160970286, "grad_norm": 7.769961357116699, "learning_rate": 9.987681989930267e-06, "loss": 0.3071, "step": 648 }, { "epoch": 0.03219405724490302, "grad_norm": 9.129886627197266, "learning_rate": 9.987626713110477e-06, "loss": 0.436, "step": 649 }, { "epoch": 0.03224366288010318, "grad_norm": 17.657424926757812, "learning_rate": 9.987571312695041e-06, "loss": 0.4254, "step": 650 }, { "epoch": 0.03229326851530334, "grad_norm": 12.359321594238281, "learning_rate": 9.987515788685332e-06, "loss": 0.2465, "step": 651 }, { "epoch": 0.0323428741505035, "grad_norm": 11.32691764831543, "learning_rate": 9.987460141082724e-06, "loss": 0.4084, "step": 652 }, { "epoch": 0.03239247978570366, "grad_norm": 9.725621223449707, "learning_rate": 9.987404369888598e-06, "loss": 0.3951, "step": 653 }, { "epoch": 0.032442085420903814, "grad_norm": 8.064552307128906, "learning_rate": 9.987348475104337e-06, "loss": 0.3383, "step": 654 }, { "epoch": 0.03249169105610397, "grad_norm": 10.473454475402832, "learning_rate": 9.987292456731324e-06, "loss": 0.4449, "step": 655 }, { "epoch": 0.03254129669130413, "grad_norm": 6.2257795333862305, "learning_rate": 9.987236314770947e-06, "loss": 0.3193, "step": 656 }, { "epoch": 0.032590902326504294, "grad_norm": 11.640898704528809, "learning_rate": 9.9871800492246e-06, "loss": 0.3789, "step": 657 }, { "epoch": 0.03264050796170445, "grad_norm": 8.124027252197266, "learning_rate": 9.987123660093675e-06, "loss": 0.4232, "step": 658 }, { "epoch": 0.03269011359690461, "grad_norm": 14.429254531860352, "learning_rate": 9.987067147379568e-06, "loss": 0.5265, "step": 659 }, { "epoch": 0.032739719232104766, "grad_norm": 9.09094524383545, "learning_rate": 9.987010511083683e-06, "loss": 0.445, "step": 660 }, { "epoch": 0.032789324867304924, "grad_norm": 5.780467510223389, "learning_rate": 9.986953751207421e-06, "loss": 0.2769, "step": 661 }, { "epoch": 0.03283893050250508, "grad_norm": 7.024355411529541, "learning_rate": 9.98689686775219e-06, "loss": 0.3374, "step": 662 }, { "epoch": 0.032888536137705246, "grad_norm": 18.660242080688477, "learning_rate": 9.986839860719398e-06, "loss": 0.4423, "step": 663 }, { "epoch": 0.032938141772905404, "grad_norm": 14.677596092224121, "learning_rate": 9.986782730110459e-06, "loss": 0.3707, "step": 664 }, { "epoch": 0.03298774740810556, "grad_norm": 12.15315055847168, "learning_rate": 9.986725475926787e-06, "loss": 0.278, "step": 665 }, { "epoch": 0.03303735304330572, "grad_norm": 13.9127836227417, "learning_rate": 9.986668098169804e-06, "loss": 0.1995, "step": 666 }, { "epoch": 0.033086958678505876, "grad_norm": 14.029186248779297, "learning_rate": 9.986610596840929e-06, "loss": 0.544, "step": 667 }, { "epoch": 0.033136564313706034, "grad_norm": 7.619348526000977, "learning_rate": 9.986552971941585e-06, "loss": 0.3159, "step": 668 }, { "epoch": 0.0331861699489062, "grad_norm": 13.237956047058105, "learning_rate": 9.986495223473205e-06, "loss": 0.362, "step": 669 }, { "epoch": 0.033235775584106356, "grad_norm": 8.084122657775879, "learning_rate": 9.986437351437218e-06, "loss": 0.3547, "step": 670 }, { "epoch": 0.03328538121930651, "grad_norm": 13.913122177124023, "learning_rate": 9.986379355835059e-06, "loss": 0.2715, "step": 671 }, { "epoch": 0.03333498685450667, "grad_norm": 9.839665412902832, "learning_rate": 9.986321236668162e-06, "loss": 0.3145, "step": 672 }, { "epoch": 0.03338459248970683, "grad_norm": 31.83673667907715, "learning_rate": 9.98626299393797e-06, "loss": 0.4257, "step": 673 }, { "epoch": 0.03343419812490699, "grad_norm": 14.245595932006836, "learning_rate": 9.986204627645924e-06, "loss": 0.506, "step": 674 }, { "epoch": 0.03348380376010715, "grad_norm": 13.760143280029297, "learning_rate": 9.986146137793473e-06, "loss": 0.3173, "step": 675 }, { "epoch": 0.03353340939530731, "grad_norm": 7.827123641967773, "learning_rate": 9.986087524382065e-06, "loss": 0.194, "step": 676 }, { "epoch": 0.033583015030507465, "grad_norm": 25.645912170410156, "learning_rate": 9.986028787413153e-06, "loss": 0.4765, "step": 677 }, { "epoch": 0.03363262066570762, "grad_norm": 7.539290428161621, "learning_rate": 9.985969926888192e-06, "loss": 0.3483, "step": 678 }, { "epoch": 0.03368222630090778, "grad_norm": 11.086501121520996, "learning_rate": 9.98591094280864e-06, "loss": 0.4666, "step": 679 }, { "epoch": 0.033731831936107945, "grad_norm": 10.109540939331055, "learning_rate": 9.985851835175961e-06, "loss": 0.4735, "step": 680 }, { "epoch": 0.0337814375713081, "grad_norm": 6.167145252227783, "learning_rate": 9.985792603991617e-06, "loss": 0.4215, "step": 681 }, { "epoch": 0.03383104320650826, "grad_norm": 11.659085273742676, "learning_rate": 9.985733249257078e-06, "loss": 0.374, "step": 682 }, { "epoch": 0.03388064884170842, "grad_norm": 10.432621002197266, "learning_rate": 9.985673770973815e-06, "loss": 0.3653, "step": 683 }, { "epoch": 0.033930254476908575, "grad_norm": 8.135366439819336, "learning_rate": 9.985614169143299e-06, "loss": 0.2948, "step": 684 }, { "epoch": 0.03397986011210873, "grad_norm": 12.405891418457031, "learning_rate": 9.985554443767008e-06, "loss": 0.3561, "step": 685 }, { "epoch": 0.0340294657473089, "grad_norm": 10.693416595458984, "learning_rate": 9.985494594846424e-06, "loss": 0.3132, "step": 686 }, { "epoch": 0.034079071382509055, "grad_norm": 11.974959373474121, "learning_rate": 9.985434622383027e-06, "loss": 0.3561, "step": 687 }, { "epoch": 0.03412867701770921, "grad_norm": 7.424398899078369, "learning_rate": 9.985374526378306e-06, "loss": 0.2756, "step": 688 }, { "epoch": 0.03417828265290937, "grad_norm": 11.407309532165527, "learning_rate": 9.985314306833749e-06, "loss": 0.4338, "step": 689 }, { "epoch": 0.03422788828810953, "grad_norm": 9.306188583374023, "learning_rate": 9.985253963750849e-06, "loss": 0.3545, "step": 690 }, { "epoch": 0.034277493923309685, "grad_norm": 7.414888858795166, "learning_rate": 9.9851934971311e-06, "loss": 0.3145, "step": 691 }, { "epoch": 0.03432709955850985, "grad_norm": 10.186988830566406, "learning_rate": 9.985132906976001e-06, "loss": 0.3572, "step": 692 }, { "epoch": 0.03437670519371001, "grad_norm": 10.94656753540039, "learning_rate": 9.985072193287052e-06, "loss": 0.3949, "step": 693 }, { "epoch": 0.034426310828910164, "grad_norm": 8.652854919433594, "learning_rate": 9.985011356065763e-06, "loss": 0.44, "step": 694 }, { "epoch": 0.03447591646411032, "grad_norm": 7.164357662200928, "learning_rate": 9.984950395313636e-06, "loss": 0.3074, "step": 695 }, { "epoch": 0.03452552209931048, "grad_norm": 10.965888977050781, "learning_rate": 9.984889311032182e-06, "loss": 0.3253, "step": 696 }, { "epoch": 0.03457512773451064, "grad_norm": 10.967388153076172, "learning_rate": 9.984828103222916e-06, "loss": 0.3933, "step": 697 }, { "epoch": 0.0346247333697108, "grad_norm": 8.309539794921875, "learning_rate": 9.984766771887355e-06, "loss": 0.4272, "step": 698 }, { "epoch": 0.03467433900491096, "grad_norm": 8.393423080444336, "learning_rate": 9.98470531702702e-06, "loss": 0.3586, "step": 699 }, { "epoch": 0.03472394464011112, "grad_norm": 16.906469345092773, "learning_rate": 9.984643738643432e-06, "loss": 0.364, "step": 700 }, { "epoch": 0.034773550275311274, "grad_norm": 10.435324668884277, "learning_rate": 9.984582036738116e-06, "loss": 0.3948, "step": 701 }, { "epoch": 0.03482315591051143, "grad_norm": 17.491365432739258, "learning_rate": 9.984520211312603e-06, "loss": 0.4544, "step": 702 }, { "epoch": 0.034872761545711596, "grad_norm": 10.221844673156738, "learning_rate": 9.984458262368426e-06, "loss": 0.3991, "step": 703 }, { "epoch": 0.034922367180911754, "grad_norm": 16.81808090209961, "learning_rate": 9.984396189907116e-06, "loss": 0.4997, "step": 704 }, { "epoch": 0.03497197281611191, "grad_norm": 9.7240571975708, "learning_rate": 9.984333993930216e-06, "loss": 0.3787, "step": 705 }, { "epoch": 0.03502157845131207, "grad_norm": 10.666502952575684, "learning_rate": 9.984271674439263e-06, "loss": 0.4004, "step": 706 }, { "epoch": 0.035071184086512226, "grad_norm": 11.658364295959473, "learning_rate": 9.984209231435805e-06, "loss": 0.3484, "step": 707 }, { "epoch": 0.035120789721712384, "grad_norm": 16.807538986206055, "learning_rate": 9.984146664921386e-06, "loss": 0.3382, "step": 708 }, { "epoch": 0.03517039535691255, "grad_norm": 9.736056327819824, "learning_rate": 9.984083974897558e-06, "loss": 0.4152, "step": 709 }, { "epoch": 0.035220000992112706, "grad_norm": 11.025428771972656, "learning_rate": 9.984021161365875e-06, "loss": 0.3875, "step": 710 }, { "epoch": 0.03526960662731286, "grad_norm": 30.570478439331055, "learning_rate": 9.983958224327894e-06, "loss": 0.5717, "step": 711 }, { "epoch": 0.03531921226251302, "grad_norm": 8.17177677154541, "learning_rate": 9.983895163785173e-06, "loss": 0.3072, "step": 712 }, { "epoch": 0.03536881789771318, "grad_norm": 10.750396728515625, "learning_rate": 9.983831979739275e-06, "loss": 0.3638, "step": 713 }, { "epoch": 0.035418423532913336, "grad_norm": 10.180347442626953, "learning_rate": 9.983768672191767e-06, "loss": 0.4607, "step": 714 }, { "epoch": 0.0354680291681135, "grad_norm": 8.97248363494873, "learning_rate": 9.983705241144216e-06, "loss": 0.3344, "step": 715 }, { "epoch": 0.03551763480331366, "grad_norm": 11.437776565551758, "learning_rate": 9.983641686598196e-06, "loss": 0.4895, "step": 716 }, { "epoch": 0.035567240438513816, "grad_norm": 5.658726692199707, "learning_rate": 9.983578008555279e-06, "loss": 0.31, "step": 717 }, { "epoch": 0.03561684607371397, "grad_norm": 12.987632751464844, "learning_rate": 9.983514207017046e-06, "loss": 0.4701, "step": 718 }, { "epoch": 0.03566645170891413, "grad_norm": 7.828454494476318, "learning_rate": 9.983450281985076e-06, "loss": 0.3616, "step": 719 }, { "epoch": 0.03571605734411429, "grad_norm": 12.931288719177246, "learning_rate": 9.983386233460954e-06, "loss": 0.3417, "step": 720 }, { "epoch": 0.03576566297931445, "grad_norm": 11.212167739868164, "learning_rate": 9.983322061446267e-06, "loss": 0.4292, "step": 721 }, { "epoch": 0.03581526861451461, "grad_norm": 9.38236141204834, "learning_rate": 9.983257765942604e-06, "loss": 0.4289, "step": 722 }, { "epoch": 0.03586487424971477, "grad_norm": 8.112957000732422, "learning_rate": 9.98319334695156e-06, "loss": 0.4232, "step": 723 }, { "epoch": 0.035914479884914925, "grad_norm": 12.391196250915527, "learning_rate": 9.983128804474733e-06, "loss": 0.3938, "step": 724 }, { "epoch": 0.03596408552011508, "grad_norm": 7.175553798675537, "learning_rate": 9.983064138513718e-06, "loss": 0.3213, "step": 725 }, { "epoch": 0.03601369115531525, "grad_norm": 10.325286865234375, "learning_rate": 9.982999349070119e-06, "loss": 0.3834, "step": 726 }, { "epoch": 0.036063296790515405, "grad_norm": 10.75754165649414, "learning_rate": 9.982934436145544e-06, "loss": 0.3467, "step": 727 }, { "epoch": 0.03611290242571556, "grad_norm": 14.839801788330078, "learning_rate": 9.982869399741598e-06, "loss": 0.384, "step": 728 }, { "epoch": 0.03616250806091572, "grad_norm": 17.744857788085938, "learning_rate": 9.982804239859894e-06, "loss": 0.5095, "step": 729 }, { "epoch": 0.03621211369611588, "grad_norm": 9.653899192810059, "learning_rate": 9.982738956502048e-06, "loss": 0.3221, "step": 730 }, { "epoch": 0.036261719331316035, "grad_norm": 10.55351448059082, "learning_rate": 9.982673549669678e-06, "loss": 0.3469, "step": 731 }, { "epoch": 0.0363113249665162, "grad_norm": 7.8516764640808105, "learning_rate": 9.9826080193644e-06, "loss": 0.4346, "step": 732 }, { "epoch": 0.03636093060171636, "grad_norm": 16.1259822845459, "learning_rate": 9.982542365587844e-06, "loss": 0.4967, "step": 733 }, { "epoch": 0.036410536236916514, "grad_norm": 18.109182357788086, "learning_rate": 9.982476588341634e-06, "loss": 0.4311, "step": 734 }, { "epoch": 0.03646014187211667, "grad_norm": 13.002891540527344, "learning_rate": 9.9824106876274e-06, "loss": 0.3847, "step": 735 }, { "epoch": 0.03650974750731683, "grad_norm": 17.067630767822266, "learning_rate": 9.982344663446775e-06, "loss": 0.51, "step": 736 }, { "epoch": 0.03655935314251699, "grad_norm": 7.512720584869385, "learning_rate": 9.982278515801395e-06, "loss": 0.3569, "step": 737 }, { "epoch": 0.03660895877771715, "grad_norm": 10.507176399230957, "learning_rate": 9.9822122446929e-06, "loss": 0.431, "step": 738 }, { "epoch": 0.03665856441291731, "grad_norm": 10.844327926635742, "learning_rate": 9.982145850122934e-06, "loss": 0.3566, "step": 739 }, { "epoch": 0.03670817004811747, "grad_norm": 4.787062168121338, "learning_rate": 9.98207933209314e-06, "loss": 0.3113, "step": 740 }, { "epoch": 0.036757775683317624, "grad_norm": 6.413367748260498, "learning_rate": 9.982012690605162e-06, "loss": 0.2968, "step": 741 }, { "epoch": 0.03680738131851778, "grad_norm": 11.96470832824707, "learning_rate": 9.981945925660661e-06, "loss": 0.4824, "step": 742 }, { "epoch": 0.03685698695371794, "grad_norm": 9.087897300720215, "learning_rate": 9.981879037261284e-06, "loss": 0.344, "step": 743 }, { "epoch": 0.036906592588918104, "grad_norm": 15.041422843933105, "learning_rate": 9.98181202540869e-06, "loss": 0.419, "step": 744 }, { "epoch": 0.03695619822411826, "grad_norm": 13.214609146118164, "learning_rate": 9.981744890104544e-06, "loss": 0.4136, "step": 745 }, { "epoch": 0.03700580385931842, "grad_norm": 22.712106704711914, "learning_rate": 9.981677631350503e-06, "loss": 0.3383, "step": 746 }, { "epoch": 0.037055409494518576, "grad_norm": 9.265185356140137, "learning_rate": 9.981610249148236e-06, "loss": 0.2796, "step": 747 }, { "epoch": 0.037105015129718734, "grad_norm": 4.871589183807373, "learning_rate": 9.981542743499418e-06, "loss": 0.3608, "step": 748 }, { "epoch": 0.03715462076491889, "grad_norm": 8.915974617004395, "learning_rate": 9.981475114405714e-06, "loss": 0.3383, "step": 749 }, { "epoch": 0.037204226400119056, "grad_norm": 14.845104217529297, "learning_rate": 9.981407361868804e-06, "loss": 0.5794, "step": 750 }, { "epoch": 0.03725383203531921, "grad_norm": 7.930148601531982, "learning_rate": 9.981339485890366e-06, "loss": 0.253, "step": 751 }, { "epoch": 0.03730343767051937, "grad_norm": 6.386418342590332, "learning_rate": 9.981271486472083e-06, "loss": 0.3342, "step": 752 }, { "epoch": 0.03735304330571953, "grad_norm": 14.80245590209961, "learning_rate": 9.981203363615639e-06, "loss": 0.5115, "step": 753 }, { "epoch": 0.037402648940919686, "grad_norm": 8.911993980407715, "learning_rate": 9.981135117322722e-06, "loss": 0.4833, "step": 754 }, { "epoch": 0.03745225457611985, "grad_norm": 9.668716430664062, "learning_rate": 9.981066747595023e-06, "loss": 0.5109, "step": 755 }, { "epoch": 0.03750186021132001, "grad_norm": 9.285423278808594, "learning_rate": 9.980998254434238e-06, "loss": 0.4406, "step": 756 }, { "epoch": 0.037551465846520166, "grad_norm": 14.531684875488281, "learning_rate": 9.980929637842063e-06, "loss": 0.4045, "step": 757 }, { "epoch": 0.03760107148172032, "grad_norm": 11.104723930358887, "learning_rate": 9.980860897820198e-06, "loss": 0.3991, "step": 758 }, { "epoch": 0.03765067711692048, "grad_norm": 11.11839771270752, "learning_rate": 9.980792034370346e-06, "loss": 0.4888, "step": 759 }, { "epoch": 0.03770028275212064, "grad_norm": 10.004018783569336, "learning_rate": 9.980723047494217e-06, "loss": 0.3905, "step": 760 }, { "epoch": 0.0377498883873208, "grad_norm": 16.092979431152344, "learning_rate": 9.980653937193517e-06, "loss": 0.4364, "step": 761 }, { "epoch": 0.03779949402252096, "grad_norm": 8.815252304077148, "learning_rate": 9.980584703469958e-06, "loss": 0.4645, "step": 762 }, { "epoch": 0.03784909965772112, "grad_norm": 10.838136672973633, "learning_rate": 9.980515346325256e-06, "loss": 0.4294, "step": 763 }, { "epoch": 0.037898705292921275, "grad_norm": 9.947502136230469, "learning_rate": 9.980445865761134e-06, "loss": 0.3713, "step": 764 }, { "epoch": 0.03794831092812143, "grad_norm": 6.65382719039917, "learning_rate": 9.980376261779308e-06, "loss": 0.3661, "step": 765 }, { "epoch": 0.03799791656332159, "grad_norm": 11.367217063903809, "learning_rate": 9.980306534381507e-06, "loss": 0.4185, "step": 766 }, { "epoch": 0.038047522198521755, "grad_norm": 16.218120574951172, "learning_rate": 9.980236683569458e-06, "loss": 0.5731, "step": 767 }, { "epoch": 0.03809712783372191, "grad_norm": 17.136690139770508, "learning_rate": 9.980166709344889e-06, "loss": 0.3946, "step": 768 }, { "epoch": 0.03814673346892207, "grad_norm": 7.995262145996094, "learning_rate": 9.980096611709536e-06, "loss": 0.3419, "step": 769 }, { "epoch": 0.03819633910412223, "grad_norm": 10.059579849243164, "learning_rate": 9.980026390665138e-06, "loss": 0.383, "step": 770 }, { "epoch": 0.038245944739322385, "grad_norm": 13.670949935913086, "learning_rate": 9.979956046213432e-06, "loss": 0.4871, "step": 771 }, { "epoch": 0.03829555037452254, "grad_norm": 16.943296432495117, "learning_rate": 9.979885578356163e-06, "loss": 0.5097, "step": 772 }, { "epoch": 0.03834515600972271, "grad_norm": 13.487613677978516, "learning_rate": 9.979814987095079e-06, "loss": 0.3563, "step": 773 }, { "epoch": 0.038394761644922865, "grad_norm": 14.33197021484375, "learning_rate": 9.979744272431924e-06, "loss": 0.3907, "step": 774 }, { "epoch": 0.03844436728012302, "grad_norm": 9.273298263549805, "learning_rate": 9.979673434368453e-06, "loss": 0.2864, "step": 775 }, { "epoch": 0.03849397291532318, "grad_norm": 9.139504432678223, "learning_rate": 9.979602472906423e-06, "loss": 0.3718, "step": 776 }, { "epoch": 0.03854357855052334, "grad_norm": 10.123263359069824, "learning_rate": 9.979531388047592e-06, "loss": 0.42, "step": 777 }, { "epoch": 0.038593184185723495, "grad_norm": 13.040021896362305, "learning_rate": 9.979460179793718e-06, "loss": 0.3961, "step": 778 }, { "epoch": 0.03864278982092366, "grad_norm": 9.134467124938965, "learning_rate": 9.979388848146572e-06, "loss": 0.2975, "step": 779 }, { "epoch": 0.03869239545612382, "grad_norm": 11.15507698059082, "learning_rate": 9.979317393107916e-06, "loss": 0.4774, "step": 780 }, { "epoch": 0.038742001091323974, "grad_norm": 11.727460861206055, "learning_rate": 9.979245814679522e-06, "loss": 0.3766, "step": 781 }, { "epoch": 0.03879160672652413, "grad_norm": 8.043569564819336, "learning_rate": 9.979174112863166e-06, "loss": 0.4002, "step": 782 }, { "epoch": 0.03884121236172429, "grad_norm": 8.259510040283203, "learning_rate": 9.979102287660622e-06, "loss": 0.3572, "step": 783 }, { "epoch": 0.038890817996924454, "grad_norm": 12.873223304748535, "learning_rate": 9.97903033907367e-06, "loss": 0.3178, "step": 784 }, { "epoch": 0.03894042363212461, "grad_norm": 10.001814842224121, "learning_rate": 9.978958267104095e-06, "loss": 0.4674, "step": 785 }, { "epoch": 0.03899002926732477, "grad_norm": 7.509446144104004, "learning_rate": 9.978886071753683e-06, "loss": 0.4285, "step": 786 }, { "epoch": 0.039039634902524926, "grad_norm": 9.105727195739746, "learning_rate": 9.978813753024222e-06, "loss": 0.3547, "step": 787 }, { "epoch": 0.039089240537725084, "grad_norm": 8.009591102600098, "learning_rate": 9.978741310917501e-06, "loss": 0.2395, "step": 788 }, { "epoch": 0.03913884617292524, "grad_norm": 9.340493202209473, "learning_rate": 9.97866874543532e-06, "loss": 0.4295, "step": 789 }, { "epoch": 0.039188451808125406, "grad_norm": 17.538755416870117, "learning_rate": 9.978596056579477e-06, "loss": 0.3852, "step": 790 }, { "epoch": 0.039238057443325564, "grad_norm": 14.352088928222656, "learning_rate": 9.97852324435177e-06, "loss": 0.3552, "step": 791 }, { "epoch": 0.03928766307852572, "grad_norm": 12.923154830932617, "learning_rate": 9.978450308754003e-06, "loss": 0.392, "step": 792 }, { "epoch": 0.03933726871372588, "grad_norm": 8.268378257751465, "learning_rate": 9.97837724978799e-06, "loss": 0.2995, "step": 793 }, { "epoch": 0.039386874348926036, "grad_norm": 7.315726280212402, "learning_rate": 9.978304067455534e-06, "loss": 0.4065, "step": 794 }, { "epoch": 0.039436479984126194, "grad_norm": 9.606846809387207, "learning_rate": 9.978230761758452e-06, "loss": 0.3196, "step": 795 }, { "epoch": 0.03948608561932636, "grad_norm": 20.629802703857422, "learning_rate": 9.97815733269856e-06, "loss": 0.4312, "step": 796 }, { "epoch": 0.039535691254526516, "grad_norm": 10.99075698852539, "learning_rate": 9.978083780277675e-06, "loss": 0.3732, "step": 797 }, { "epoch": 0.03958529688972667, "grad_norm": 18.044137954711914, "learning_rate": 9.978010104497624e-06, "loss": 0.4552, "step": 798 }, { "epoch": 0.03963490252492683, "grad_norm": 11.365370750427246, "learning_rate": 9.977936305360233e-06, "loss": 0.3996, "step": 799 }, { "epoch": 0.03968450816012699, "grad_norm": 10.236783981323242, "learning_rate": 9.977862382867324e-06, "loss": 0.4933, "step": 800 }, { "epoch": 0.039734113795327146, "grad_norm": 16.97047233581543, "learning_rate": 9.977788337020737e-06, "loss": 0.3656, "step": 801 }, { "epoch": 0.03978371943052731, "grad_norm": 8.568534851074219, "learning_rate": 9.977714167822302e-06, "loss": 0.4099, "step": 802 }, { "epoch": 0.03983332506572747, "grad_norm": 14.223505973815918, "learning_rate": 9.977639875273858e-06, "loss": 0.4199, "step": 803 }, { "epoch": 0.039882930700927625, "grad_norm": 10.063803672790527, "learning_rate": 9.977565459377247e-06, "loss": 0.2705, "step": 804 }, { "epoch": 0.03993253633612778, "grad_norm": 10.319005966186523, "learning_rate": 9.977490920134311e-06, "loss": 0.408, "step": 805 }, { "epoch": 0.03998214197132794, "grad_norm": 5.708731651306152, "learning_rate": 9.977416257546899e-06, "loss": 0.3562, "step": 806 }, { "epoch": 0.040031747606528105, "grad_norm": 10.229061126708984, "learning_rate": 9.977341471616861e-06, "loss": 0.3774, "step": 807 }, { "epoch": 0.04008135324172826, "grad_norm": 9.404991149902344, "learning_rate": 9.97726656234605e-06, "loss": 0.4017, "step": 808 }, { "epoch": 0.04013095887692842, "grad_norm": 18.385820388793945, "learning_rate": 9.977191529736323e-06, "loss": 0.4515, "step": 809 }, { "epoch": 0.04018056451212858, "grad_norm": 15.713980674743652, "learning_rate": 9.977116373789538e-06, "loss": 0.3777, "step": 810 }, { "epoch": 0.040230170147328735, "grad_norm": 7.230282783508301, "learning_rate": 9.977041094507555e-06, "loss": 0.3873, "step": 811 }, { "epoch": 0.04027977578252889, "grad_norm": 5.774361610412598, "learning_rate": 9.976965691892246e-06, "loss": 0.3021, "step": 812 }, { "epoch": 0.04032938141772906, "grad_norm": 7.287136554718018, "learning_rate": 9.976890165945474e-06, "loss": 0.3604, "step": 813 }, { "epoch": 0.040378987052929215, "grad_norm": 7.059898853302002, "learning_rate": 9.976814516669112e-06, "loss": 0.3179, "step": 814 }, { "epoch": 0.04042859268812937, "grad_norm": 10.441529273986816, "learning_rate": 9.976738744065037e-06, "loss": 0.3667, "step": 815 }, { "epoch": 0.04047819832332953, "grad_norm": 10.146286964416504, "learning_rate": 9.976662848135123e-06, "loss": 0.4025, "step": 816 }, { "epoch": 0.04052780395852969, "grad_norm": 10.318489074707031, "learning_rate": 9.976586828881253e-06, "loss": 0.2855, "step": 817 }, { "epoch": 0.040577409593729845, "grad_norm": 10.166915893554688, "learning_rate": 9.976510686305309e-06, "loss": 0.347, "step": 818 }, { "epoch": 0.04062701522893001, "grad_norm": 12.428616523742676, "learning_rate": 9.976434420409182e-06, "loss": 0.4208, "step": 819 }, { "epoch": 0.04067662086413017, "grad_norm": 12.408326148986816, "learning_rate": 9.976358031194756e-06, "loss": 0.5229, "step": 820 }, { "epoch": 0.040726226499330324, "grad_norm": 7.695849418640137, "learning_rate": 9.976281518663928e-06, "loss": 0.3146, "step": 821 }, { "epoch": 0.04077583213453048, "grad_norm": 9.711214065551758, "learning_rate": 9.976204882818594e-06, "loss": 0.3304, "step": 822 }, { "epoch": 0.04082543776973064, "grad_norm": 9.610574722290039, "learning_rate": 9.976128123660652e-06, "loss": 0.4667, "step": 823 }, { "epoch": 0.0408750434049308, "grad_norm": 9.060806274414062, "learning_rate": 9.976051241192002e-06, "loss": 0.472, "step": 824 }, { "epoch": 0.04092464904013096, "grad_norm": 6.100584506988525, "learning_rate": 9.975974235414552e-06, "loss": 0.3361, "step": 825 }, { "epoch": 0.04097425467533112, "grad_norm": 8.45361328125, "learning_rate": 9.975897106330212e-06, "loss": 0.3233, "step": 826 }, { "epoch": 0.04102386031053128, "grad_norm": 6.866368293762207, "learning_rate": 9.97581985394089e-06, "loss": 0.3073, "step": 827 }, { "epoch": 0.041073465945731434, "grad_norm": 7.085556983947754, "learning_rate": 9.975742478248498e-06, "loss": 0.2957, "step": 828 }, { "epoch": 0.04112307158093159, "grad_norm": 7.9802656173706055, "learning_rate": 9.97566497925496e-06, "loss": 0.4956, "step": 829 }, { "epoch": 0.04117267721613175, "grad_norm": 6.796988487243652, "learning_rate": 9.97558735696219e-06, "loss": 0.2984, "step": 830 }, { "epoch": 0.041222282851331914, "grad_norm": 6.839824676513672, "learning_rate": 9.97550961137212e-06, "loss": 0.3513, "step": 831 }, { "epoch": 0.04127188848653207, "grad_norm": 12.303881645202637, "learning_rate": 9.975431742486665e-06, "loss": 0.3941, "step": 832 }, { "epoch": 0.04132149412173223, "grad_norm": 7.915771961212158, "learning_rate": 9.975353750307764e-06, "loss": 0.2827, "step": 833 }, { "epoch": 0.041371099756932386, "grad_norm": 11.713117599487305, "learning_rate": 9.975275634837346e-06, "loss": 0.5237, "step": 834 }, { "epoch": 0.041420705392132544, "grad_norm": 8.172499656677246, "learning_rate": 9.975197396077347e-06, "loss": 0.2952, "step": 835 }, { "epoch": 0.04147031102733271, "grad_norm": 6.838170051574707, "learning_rate": 9.975119034029705e-06, "loss": 0.3368, "step": 836 }, { "epoch": 0.041519916662532866, "grad_norm": 9.389993667602539, "learning_rate": 9.975040548696366e-06, "loss": 0.3721, "step": 837 }, { "epoch": 0.04156952229773302, "grad_norm": 16.7191219329834, "learning_rate": 9.97496194007927e-06, "loss": 0.3225, "step": 838 }, { "epoch": 0.04161912793293318, "grad_norm": 15.675971031188965, "learning_rate": 9.974883208180366e-06, "loss": 0.5474, "step": 839 }, { "epoch": 0.04166873356813334, "grad_norm": 10.498997688293457, "learning_rate": 9.974804353001607e-06, "loss": 0.4679, "step": 840 }, { "epoch": 0.041718339203333496, "grad_norm": 7.605239391326904, "learning_rate": 9.974725374544943e-06, "loss": 0.3852, "step": 841 }, { "epoch": 0.04176794483853366, "grad_norm": 12.517326354980469, "learning_rate": 9.974646272812336e-06, "loss": 0.4671, "step": 842 }, { "epoch": 0.04181755047373382, "grad_norm": 7.184145927429199, "learning_rate": 9.974567047805744e-06, "loss": 0.3173, "step": 843 }, { "epoch": 0.041867156108933976, "grad_norm": 9.913939476013184, "learning_rate": 9.974487699527132e-06, "loss": 0.3861, "step": 844 }, { "epoch": 0.04191676174413413, "grad_norm": 6.368533134460449, "learning_rate": 9.974408227978461e-06, "loss": 0.2966, "step": 845 }, { "epoch": 0.04196636737933429, "grad_norm": 4.593013286590576, "learning_rate": 9.974328633161706e-06, "loss": 0.3366, "step": 846 }, { "epoch": 0.04201597301453445, "grad_norm": 20.889123916625977, "learning_rate": 9.974248915078837e-06, "loss": 0.3782, "step": 847 }, { "epoch": 0.04206557864973461, "grad_norm": 14.41612434387207, "learning_rate": 9.97416907373183e-06, "loss": 0.3804, "step": 848 }, { "epoch": 0.04211518428493477, "grad_norm": 10.318294525146484, "learning_rate": 9.974089109122665e-06, "loss": 0.3641, "step": 849 }, { "epoch": 0.04216478992013493, "grad_norm": 17.748903274536133, "learning_rate": 9.97400902125332e-06, "loss": 0.318, "step": 850 }, { "epoch": 0.042214395555335085, "grad_norm": 10.102054595947266, "learning_rate": 9.97392881012578e-06, "loss": 0.3109, "step": 851 }, { "epoch": 0.04226400119053524, "grad_norm": 18.89122772216797, "learning_rate": 9.973848475742035e-06, "loss": 0.3034, "step": 852 }, { "epoch": 0.0423136068257354, "grad_norm": 10.668566703796387, "learning_rate": 9.973768018104076e-06, "loss": 0.3536, "step": 853 }, { "epoch": 0.042363212460935565, "grad_norm": 8.204545021057129, "learning_rate": 9.973687437213894e-06, "loss": 0.3964, "step": 854 }, { "epoch": 0.04241281809613572, "grad_norm": 21.11988067626953, "learning_rate": 9.97360673307349e-06, "loss": 0.4307, "step": 855 }, { "epoch": 0.04246242373133588, "grad_norm": 23.034433364868164, "learning_rate": 9.97352590568486e-06, "loss": 0.3957, "step": 856 }, { "epoch": 0.04251202936653604, "grad_norm": 7.751640319824219, "learning_rate": 9.97344495505001e-06, "loss": 0.3032, "step": 857 }, { "epoch": 0.042561635001736195, "grad_norm": 14.211118698120117, "learning_rate": 9.97336388117094e-06, "loss": 0.3108, "step": 858 }, { "epoch": 0.04261124063693636, "grad_norm": 12.019758224487305, "learning_rate": 9.973282684049667e-06, "loss": 0.3801, "step": 859 }, { "epoch": 0.04266084627213652, "grad_norm": 15.870983123779297, "learning_rate": 9.973201363688198e-06, "loss": 0.4969, "step": 860 }, { "epoch": 0.042710451907336675, "grad_norm": 14.433696746826172, "learning_rate": 9.97311992008855e-06, "loss": 0.5174, "step": 861 }, { "epoch": 0.04276005754253683, "grad_norm": 6.830861568450928, "learning_rate": 9.973038353252739e-06, "loss": 0.4085, "step": 862 }, { "epoch": 0.04280966317773699, "grad_norm": 35.254669189453125, "learning_rate": 9.972956663182791e-06, "loss": 0.3827, "step": 863 }, { "epoch": 0.04285926881293715, "grad_norm": 8.004792213439941, "learning_rate": 9.972874849880727e-06, "loss": 0.2633, "step": 864 }, { "epoch": 0.04290887444813731, "grad_norm": 9.425360679626465, "learning_rate": 9.972792913348573e-06, "loss": 0.384, "step": 865 }, { "epoch": 0.04295848008333747, "grad_norm": 8.98505973815918, "learning_rate": 9.972710853588361e-06, "loss": 0.3473, "step": 866 }, { "epoch": 0.04300808571853763, "grad_norm": 6.759533882141113, "learning_rate": 9.972628670602127e-06, "loss": 0.2959, "step": 867 }, { "epoch": 0.043057691353737784, "grad_norm": 19.071495056152344, "learning_rate": 9.972546364391904e-06, "loss": 0.4095, "step": 868 }, { "epoch": 0.04310729698893794, "grad_norm": 26.17299461364746, "learning_rate": 9.972463934959731e-06, "loss": 0.4455, "step": 869 }, { "epoch": 0.0431569026241381, "grad_norm": 5.109988212585449, "learning_rate": 9.972381382307654e-06, "loss": 0.2584, "step": 870 }, { "epoch": 0.043206508259338264, "grad_norm": 6.845761775970459, "learning_rate": 9.972298706437718e-06, "loss": 0.4113, "step": 871 }, { "epoch": 0.04325611389453842, "grad_norm": 18.081514358520508, "learning_rate": 9.97221590735197e-06, "loss": 0.4165, "step": 872 }, { "epoch": 0.04330571952973858, "grad_norm": 10.1358060836792, "learning_rate": 9.972132985052463e-06, "loss": 0.3447, "step": 873 }, { "epoch": 0.043355325164938736, "grad_norm": 18.943822860717773, "learning_rate": 9.97204993954125e-06, "loss": 0.645, "step": 874 }, { "epoch": 0.043404930800138894, "grad_norm": 8.49262809753418, "learning_rate": 9.971966770820391e-06, "loss": 0.3543, "step": 875 }, { "epoch": 0.04345453643533905, "grad_norm": 10.358294486999512, "learning_rate": 9.971883478891947e-06, "loss": 0.4174, "step": 876 }, { "epoch": 0.043504142070539216, "grad_norm": 12.756848335266113, "learning_rate": 9.971800063757981e-06, "loss": 0.4008, "step": 877 }, { "epoch": 0.043553747705739373, "grad_norm": 11.836742401123047, "learning_rate": 9.971716525420561e-06, "loss": 0.3304, "step": 878 }, { "epoch": 0.04360335334093953, "grad_norm": 7.233951568603516, "learning_rate": 9.971632863881758e-06, "loss": 0.2866, "step": 879 }, { "epoch": 0.04365295897613969, "grad_norm": 12.482137680053711, "learning_rate": 9.971549079143642e-06, "loss": 0.45, "step": 880 }, { "epoch": 0.043702564611339846, "grad_norm": 6.0473103523254395, "learning_rate": 9.97146517120829e-06, "loss": 0.2809, "step": 881 }, { "epoch": 0.043752170246540004, "grad_norm": 7.447120189666748, "learning_rate": 9.971381140077783e-06, "loss": 0.381, "step": 882 }, { "epoch": 0.04380177588174017, "grad_norm": 9.191618919372559, "learning_rate": 9.971296985754202e-06, "loss": 0.414, "step": 883 }, { "epoch": 0.043851381516940326, "grad_norm": 9.211977005004883, "learning_rate": 9.971212708239634e-06, "loss": 0.3732, "step": 884 }, { "epoch": 0.04390098715214048, "grad_norm": 9.271588325500488, "learning_rate": 9.971128307536166e-06, "loss": 0.3983, "step": 885 }, { "epoch": 0.04395059278734064, "grad_norm": 16.46473503112793, "learning_rate": 9.971043783645888e-06, "loss": 0.4651, "step": 886 }, { "epoch": 0.0440001984225408, "grad_norm": 5.914063453674316, "learning_rate": 9.9709591365709e-06, "loss": 0.3292, "step": 887 }, { "epoch": 0.04404980405774096, "grad_norm": 13.254426956176758, "learning_rate": 9.970874366313293e-06, "loss": 0.45, "step": 888 }, { "epoch": 0.04409940969294112, "grad_norm": 14.359028816223145, "learning_rate": 9.970789472875172e-06, "loss": 0.3839, "step": 889 }, { "epoch": 0.04414901532814128, "grad_norm": 7.698498249053955, "learning_rate": 9.970704456258639e-06, "loss": 0.4028, "step": 890 }, { "epoch": 0.044198620963341435, "grad_norm": 15.365623474121094, "learning_rate": 9.970619316465801e-06, "loss": 0.3621, "step": 891 }, { "epoch": 0.04424822659854159, "grad_norm": 11.075539588928223, "learning_rate": 9.970534053498769e-06, "loss": 0.2966, "step": 892 }, { "epoch": 0.04429783223374175, "grad_norm": 5.039615631103516, "learning_rate": 9.970448667359653e-06, "loss": 0.3516, "step": 893 }, { "epoch": 0.044347437868941915, "grad_norm": 22.387348175048828, "learning_rate": 9.97036315805057e-06, "loss": 0.4328, "step": 894 }, { "epoch": 0.04439704350414207, "grad_norm": 10.24851131439209, "learning_rate": 9.970277525573641e-06, "loss": 0.4058, "step": 895 }, { "epoch": 0.04444664913934223, "grad_norm": 15.469100952148438, "learning_rate": 9.970191769930987e-06, "loss": 0.3744, "step": 896 }, { "epoch": 0.04449625477454239, "grad_norm": 8.395829200744629, "learning_rate": 9.970105891124732e-06, "loss": 0.2794, "step": 897 }, { "epoch": 0.044545860409742545, "grad_norm": 9.968338012695312, "learning_rate": 9.970019889157007e-06, "loss": 0.4044, "step": 898 }, { "epoch": 0.0445954660449427, "grad_norm": 11.98898983001709, "learning_rate": 9.969933764029937e-06, "loss": 0.3657, "step": 899 }, { "epoch": 0.04464507168014287, "grad_norm": 12.040323257446289, "learning_rate": 9.969847515745664e-06, "loss": 0.3873, "step": 900 }, { "epoch": 0.044694677315343025, "grad_norm": 11.188394546508789, "learning_rate": 9.969761144306319e-06, "loss": 0.3933, "step": 901 }, { "epoch": 0.04474428295054318, "grad_norm": 13.442262649536133, "learning_rate": 9.969674649714045e-06, "loss": 0.3541, "step": 902 }, { "epoch": 0.04479388858574334, "grad_norm": 10.739370346069336, "learning_rate": 9.969588031970987e-06, "loss": 0.3392, "step": 903 }, { "epoch": 0.0448434942209435, "grad_norm": 10.478513717651367, "learning_rate": 9.969501291079287e-06, "loss": 0.3066, "step": 904 }, { "epoch": 0.044893099856143655, "grad_norm": 13.057941436767578, "learning_rate": 9.9694144270411e-06, "loss": 0.2657, "step": 905 }, { "epoch": 0.04494270549134382, "grad_norm": 13.575414657592773, "learning_rate": 9.969327439858573e-06, "loss": 0.3217, "step": 906 }, { "epoch": 0.04499231112654398, "grad_norm": 13.742748260498047, "learning_rate": 9.969240329533864e-06, "loss": 0.3949, "step": 907 }, { "epoch": 0.045041916761744134, "grad_norm": 10.701850891113281, "learning_rate": 9.969153096069133e-06, "loss": 0.4129, "step": 908 }, { "epoch": 0.04509152239694429, "grad_norm": 6.343343257904053, "learning_rate": 9.969065739466541e-06, "loss": 0.4103, "step": 909 }, { "epoch": 0.04514112803214445, "grad_norm": 17.583208084106445, "learning_rate": 9.96897825972825e-06, "loss": 0.4983, "step": 910 }, { "epoch": 0.04519073366734461, "grad_norm": 11.160701751708984, "learning_rate": 9.968890656856434e-06, "loss": 0.2688, "step": 911 }, { "epoch": 0.04524033930254477, "grad_norm": 19.287784576416016, "learning_rate": 9.968802930853257e-06, "loss": 0.4874, "step": 912 }, { "epoch": 0.04528994493774493, "grad_norm": 13.130720138549805, "learning_rate": 9.968715081720896e-06, "loss": 0.37, "step": 913 }, { "epoch": 0.045339550572945087, "grad_norm": 6.876399040222168, "learning_rate": 9.968627109461528e-06, "loss": 0.3985, "step": 914 }, { "epoch": 0.045389156208145244, "grad_norm": 14.271747589111328, "learning_rate": 9.968539014077331e-06, "loss": 0.4773, "step": 915 }, { "epoch": 0.0454387618433454, "grad_norm": 8.946425437927246, "learning_rate": 9.968450795570493e-06, "loss": 0.3479, "step": 916 }, { "epoch": 0.045488367478545566, "grad_norm": 7.711902618408203, "learning_rate": 9.968362453943195e-06, "loss": 0.2597, "step": 917 }, { "epoch": 0.045537973113745724, "grad_norm": 4.968027114868164, "learning_rate": 9.968273989197626e-06, "loss": 0.3602, "step": 918 }, { "epoch": 0.04558757874894588, "grad_norm": 8.891999244689941, "learning_rate": 9.968185401335984e-06, "loss": 0.365, "step": 919 }, { "epoch": 0.04563718438414604, "grad_norm": 7.438228607177734, "learning_rate": 9.968096690360457e-06, "loss": 0.409, "step": 920 }, { "epoch": 0.045686790019346196, "grad_norm": 14.64187240600586, "learning_rate": 9.96800785627325e-06, "loss": 0.4601, "step": 921 }, { "epoch": 0.045736395654546354, "grad_norm": 22.670516967773438, "learning_rate": 9.967918899076558e-06, "loss": 0.3206, "step": 922 }, { "epoch": 0.04578600128974652, "grad_norm": 6.051904201507568, "learning_rate": 9.96782981877259e-06, "loss": 0.3319, "step": 923 }, { "epoch": 0.045835606924946676, "grad_norm": 8.614150047302246, "learning_rate": 9.96774061536355e-06, "loss": 0.4368, "step": 924 }, { "epoch": 0.04588521256014683, "grad_norm": 11.526495933532715, "learning_rate": 9.967651288851652e-06, "loss": 0.4295, "step": 925 }, { "epoch": 0.04593481819534699, "grad_norm": 21.41152000427246, "learning_rate": 9.967561839239108e-06, "loss": 0.6917, "step": 926 }, { "epoch": 0.04598442383054715, "grad_norm": 14.46728229522705, "learning_rate": 9.967472266528135e-06, "loss": 0.5434, "step": 927 }, { "epoch": 0.046034029465747306, "grad_norm": 9.02022647857666, "learning_rate": 9.967382570720952e-06, "loss": 0.3294, "step": 928 }, { "epoch": 0.04608363510094747, "grad_norm": 11.83472728729248, "learning_rate": 9.967292751819781e-06, "loss": 0.3364, "step": 929 }, { "epoch": 0.04613324073614763, "grad_norm": 6.9548563957214355, "learning_rate": 9.967202809826851e-06, "loss": 0.4111, "step": 930 }, { "epoch": 0.046182846371347785, "grad_norm": 9.060225486755371, "learning_rate": 9.967112744744386e-06, "loss": 0.4819, "step": 931 }, { "epoch": 0.04623245200654794, "grad_norm": 11.469285011291504, "learning_rate": 9.967022556574621e-06, "loss": 0.4327, "step": 932 }, { "epoch": 0.0462820576417481, "grad_norm": 15.914886474609375, "learning_rate": 9.96693224531979e-06, "loss": 0.5703, "step": 933 }, { "epoch": 0.04633166327694826, "grad_norm": 11.684412956237793, "learning_rate": 9.96684181098213e-06, "loss": 0.4873, "step": 934 }, { "epoch": 0.04638126891214842, "grad_norm": 11.170411109924316, "learning_rate": 9.966751253563887e-06, "loss": 0.3358, "step": 935 }, { "epoch": 0.04643087454734858, "grad_norm": 9.664787292480469, "learning_rate": 9.966660573067297e-06, "loss": 0.388, "step": 936 }, { "epoch": 0.04648048018254874, "grad_norm": 14.972965240478516, "learning_rate": 9.966569769494613e-06, "loss": 0.4538, "step": 937 }, { "epoch": 0.046530085817748895, "grad_norm": 8.208768844604492, "learning_rate": 9.966478842848084e-06, "loss": 0.3947, "step": 938 }, { "epoch": 0.04657969145294905, "grad_norm": 8.444974899291992, "learning_rate": 9.966387793129964e-06, "loss": 0.4253, "step": 939 }, { "epoch": 0.04662929708814922, "grad_norm": 7.544724941253662, "learning_rate": 9.966296620342506e-06, "loss": 0.3095, "step": 940 }, { "epoch": 0.046678902723349375, "grad_norm": 6.353827953338623, "learning_rate": 9.966205324487971e-06, "loss": 0.364, "step": 941 }, { "epoch": 0.04672850835854953, "grad_norm": 11.044520378112793, "learning_rate": 9.966113905568621e-06, "loss": 0.3802, "step": 942 }, { "epoch": 0.04677811399374969, "grad_norm": 16.281238555908203, "learning_rate": 9.966022363586725e-06, "loss": 0.34, "step": 943 }, { "epoch": 0.04682771962894985, "grad_norm": 8.28917407989502, "learning_rate": 9.965930698544546e-06, "loss": 0.3394, "step": 944 }, { "epoch": 0.046877325264150005, "grad_norm": 14.065149307250977, "learning_rate": 9.965838910444359e-06, "loss": 0.4327, "step": 945 }, { "epoch": 0.04692693089935017, "grad_norm": 10.463579177856445, "learning_rate": 9.965746999288437e-06, "loss": 0.3785, "step": 946 }, { "epoch": 0.04697653653455033, "grad_norm": 15.169205665588379, "learning_rate": 9.965654965079058e-06, "loss": 0.4461, "step": 947 }, { "epoch": 0.047026142169750484, "grad_norm": 11.309700012207031, "learning_rate": 9.965562807818503e-06, "loss": 0.4648, "step": 948 }, { "epoch": 0.04707574780495064, "grad_norm": 7.481065273284912, "learning_rate": 9.965470527509056e-06, "loss": 0.2968, "step": 949 }, { "epoch": 0.0471253534401508, "grad_norm": 5.5049614906311035, "learning_rate": 9.965378124153003e-06, "loss": 0.3241, "step": 950 }, { "epoch": 0.04717495907535096, "grad_norm": 16.437793731689453, "learning_rate": 9.965285597752631e-06, "loss": 0.3926, "step": 951 }, { "epoch": 0.04722456471055112, "grad_norm": 9.516818046569824, "learning_rate": 9.96519294831024e-06, "loss": 0.1094, "step": 952 }, { "epoch": 0.04727417034575128, "grad_norm": 7.448066711425781, "learning_rate": 9.96510017582812e-06, "loss": 0.2742, "step": 953 }, { "epoch": 0.04732377598095144, "grad_norm": 9.398979187011719, "learning_rate": 9.965007280308572e-06, "loss": 0.3362, "step": 954 }, { "epoch": 0.047373381616151594, "grad_norm": 10.473827362060547, "learning_rate": 9.964914261753897e-06, "loss": 0.4433, "step": 955 }, { "epoch": 0.04742298725135175, "grad_norm": 11.958359718322754, "learning_rate": 9.964821120166402e-06, "loss": 0.4047, "step": 956 }, { "epoch": 0.04747259288655191, "grad_norm": 9.004111289978027, "learning_rate": 9.964727855548393e-06, "loss": 0.4803, "step": 957 }, { "epoch": 0.047522198521752074, "grad_norm": 11.431136131286621, "learning_rate": 9.964634467902182e-06, "loss": 0.4526, "step": 958 }, { "epoch": 0.04757180415695223, "grad_norm": 7.109729766845703, "learning_rate": 9.964540957230082e-06, "loss": 0.3582, "step": 959 }, { "epoch": 0.04762140979215239, "grad_norm": 10.065058708190918, "learning_rate": 9.964447323534413e-06, "loss": 0.2617, "step": 960 }, { "epoch": 0.047671015427352546, "grad_norm": 9.583479881286621, "learning_rate": 9.964353566817493e-06, "loss": 0.3653, "step": 961 }, { "epoch": 0.047720621062552704, "grad_norm": 17.7353515625, "learning_rate": 9.964259687081646e-06, "loss": 0.4113, "step": 962 }, { "epoch": 0.04777022669775286, "grad_norm": 10.26405143737793, "learning_rate": 9.964165684329198e-06, "loss": 0.278, "step": 963 }, { "epoch": 0.047819832332953026, "grad_norm": 20.85086441040039, "learning_rate": 9.964071558562481e-06, "loss": 0.3918, "step": 964 }, { "epoch": 0.04786943796815318, "grad_norm": 7.6617865562438965, "learning_rate": 9.963977309783824e-06, "loss": 0.3124, "step": 965 }, { "epoch": 0.04791904360335334, "grad_norm": 8.405494689941406, "learning_rate": 9.963882937995562e-06, "loss": 0.3357, "step": 966 }, { "epoch": 0.0479686492385535, "grad_norm": 16.22730827331543, "learning_rate": 9.963788443200037e-06, "loss": 0.473, "step": 967 }, { "epoch": 0.048018254873753656, "grad_norm": 11.920428276062012, "learning_rate": 9.96369382539959e-06, "loss": 0.4355, "step": 968 }, { "epoch": 0.04806786050895382, "grad_norm": 21.199708938598633, "learning_rate": 9.963599084596563e-06, "loss": 0.4215, "step": 969 }, { "epoch": 0.04811746614415398, "grad_norm": 7.539324760437012, "learning_rate": 9.963504220793305e-06, "loss": 0.2137, "step": 970 }, { "epoch": 0.048167071779354136, "grad_norm": 10.567262649536133, "learning_rate": 9.963409233992169e-06, "loss": 0.3067, "step": 971 }, { "epoch": 0.04821667741455429, "grad_norm": 20.0361328125, "learning_rate": 9.963314124195506e-06, "loss": 0.3044, "step": 972 }, { "epoch": 0.04826628304975445, "grad_norm": 14.640673637390137, "learning_rate": 9.963218891405673e-06, "loss": 0.3619, "step": 973 }, { "epoch": 0.04831588868495461, "grad_norm": 7.69378137588501, "learning_rate": 9.963123535625032e-06, "loss": 0.4186, "step": 974 }, { "epoch": 0.04836549432015477, "grad_norm": 6.56113338470459, "learning_rate": 9.963028056855944e-06, "loss": 0.3353, "step": 975 }, { "epoch": 0.04841509995535493, "grad_norm": 6.671407222747803, "learning_rate": 9.962932455100777e-06, "loss": 0.2488, "step": 976 }, { "epoch": 0.04846470559055509, "grad_norm": 8.009381294250488, "learning_rate": 9.962836730361898e-06, "loss": 0.4273, "step": 977 }, { "epoch": 0.048514311225755245, "grad_norm": 10.377769470214844, "learning_rate": 9.96274088264168e-06, "loss": 0.4273, "step": 978 }, { "epoch": 0.0485639168609554, "grad_norm": 7.579904079437256, "learning_rate": 9.962644911942497e-06, "loss": 0.2987, "step": 979 }, { "epoch": 0.04861352249615556, "grad_norm": 11.511731147766113, "learning_rate": 9.96254881826673e-06, "loss": 0.478, "step": 980 }, { "epoch": 0.048663128131355725, "grad_norm": 13.680892944335938, "learning_rate": 9.962452601616757e-06, "loss": 0.4812, "step": 981 }, { "epoch": 0.04871273376655588, "grad_norm": 8.070690155029297, "learning_rate": 9.962356261994964e-06, "loss": 0.3881, "step": 982 }, { "epoch": 0.04876233940175604, "grad_norm": 7.401549339294434, "learning_rate": 9.962259799403738e-06, "loss": 0.3037, "step": 983 }, { "epoch": 0.0488119450369562, "grad_norm": 9.04157829284668, "learning_rate": 9.96216321384547e-06, "loss": 0.2723, "step": 984 }, { "epoch": 0.048861550672156355, "grad_norm": 6.120652675628662, "learning_rate": 9.962066505322554e-06, "loss": 0.2583, "step": 985 }, { "epoch": 0.04891115630735651, "grad_norm": 12.67920207977295, "learning_rate": 9.961969673837384e-06, "loss": 0.3849, "step": 986 }, { "epoch": 0.04896076194255668, "grad_norm": 11.467681884765625, "learning_rate": 9.96187271939236e-06, "loss": 0.4317, "step": 987 }, { "epoch": 0.049010367577756835, "grad_norm": 8.730962753295898, "learning_rate": 9.961775641989888e-06, "loss": 0.3568, "step": 988 }, { "epoch": 0.04905997321295699, "grad_norm": 11.357229232788086, "learning_rate": 9.96167844163237e-06, "loss": 0.4782, "step": 989 }, { "epoch": 0.04910957884815715, "grad_norm": 10.434094429016113, "learning_rate": 9.961581118322216e-06, "loss": 0.3578, "step": 990 }, { "epoch": 0.04915918448335731, "grad_norm": 9.701604843139648, "learning_rate": 9.961483672061835e-06, "loss": 0.3222, "step": 991 }, { "epoch": 0.049208790118557465, "grad_norm": 13.193061828613281, "learning_rate": 9.961386102853648e-06, "loss": 0.4459, "step": 992 }, { "epoch": 0.04925839575375763, "grad_norm": 10.686212539672852, "learning_rate": 9.961288410700067e-06, "loss": 0.2677, "step": 993 }, { "epoch": 0.04930800138895779, "grad_norm": 5.724881649017334, "learning_rate": 9.961190595603516e-06, "loss": 0.3616, "step": 994 }, { "epoch": 0.049357607024157944, "grad_norm": 7.6443281173706055, "learning_rate": 9.961092657566419e-06, "loss": 0.2661, "step": 995 }, { "epoch": 0.0494072126593581, "grad_norm": 5.7947306632995605, "learning_rate": 9.960994596591199e-06, "loss": 0.3257, "step": 996 }, { "epoch": 0.04945681829455826, "grad_norm": 8.263628959655762, "learning_rate": 9.96089641268029e-06, "loss": 0.3688, "step": 997 }, { "epoch": 0.049506423929758424, "grad_norm": 5.380758762359619, "learning_rate": 9.960798105836124e-06, "loss": 0.2766, "step": 998 }, { "epoch": 0.04955602956495858, "grad_norm": 9.332192420959473, "learning_rate": 9.960699676061136e-06, "loss": 0.3633, "step": 999 }, { "epoch": 0.04960563520015874, "grad_norm": 12.583104133605957, "learning_rate": 9.960601123357767e-06, "loss": 0.3854, "step": 1000 }, { "epoch": 0.049655240835358896, "grad_norm": 9.633286476135254, "learning_rate": 9.960502447728457e-06, "loss": 0.247, "step": 1001 }, { "epoch": 0.049704846470559054, "grad_norm": 6.781302452087402, "learning_rate": 9.960403649175655e-06, "loss": 0.3056, "step": 1002 }, { "epoch": 0.04975445210575921, "grad_norm": 12.422887802124023, "learning_rate": 9.960304727701803e-06, "loss": 0.3049, "step": 1003 }, { "epoch": 0.049804057740959376, "grad_norm": 18.50078773498535, "learning_rate": 9.96020568330936e-06, "loss": 0.3622, "step": 1004 }, { "epoch": 0.049853663376159534, "grad_norm": 13.853623390197754, "learning_rate": 9.960106516000775e-06, "loss": 0.4227, "step": 1005 }, { "epoch": 0.04990326901135969, "grad_norm": 18.47719383239746, "learning_rate": 9.960007225778506e-06, "loss": 0.3811, "step": 1006 }, { "epoch": 0.04995287464655985, "grad_norm": 12.372018814086914, "learning_rate": 9.959907812645016e-06, "loss": 0.3915, "step": 1007 }, { "epoch": 0.050002480281760006, "grad_norm": 13.337780952453613, "learning_rate": 9.959808276602765e-06, "loss": 0.3962, "step": 1008 }, { "epoch": 0.050052085916960164, "grad_norm": 9.25385570526123, "learning_rate": 9.959708617654222e-06, "loss": 0.4269, "step": 1009 }, { "epoch": 0.05010169155216033, "grad_norm": 8.813361167907715, "learning_rate": 9.959608835801855e-06, "loss": 0.4215, "step": 1010 }, { "epoch": 0.050151297187360486, "grad_norm": 8.913985252380371, "learning_rate": 9.959508931048138e-06, "loss": 0.3876, "step": 1011 }, { "epoch": 0.05020090282256064, "grad_norm": 9.281262397766113, "learning_rate": 9.959408903395548e-06, "loss": 0.2186, "step": 1012 }, { "epoch": 0.0502505084577608, "grad_norm": 16.981828689575195, "learning_rate": 9.959308752846561e-06, "loss": 0.4323, "step": 1013 }, { "epoch": 0.05030011409296096, "grad_norm": 7.355618953704834, "learning_rate": 9.95920847940366e-06, "loss": 0.2568, "step": 1014 }, { "epoch": 0.050349719728161116, "grad_norm": 7.803684234619141, "learning_rate": 9.959108083069327e-06, "loss": 0.328, "step": 1015 }, { "epoch": 0.05039932536336128, "grad_norm": 10.202353477478027, "learning_rate": 9.959007563846055e-06, "loss": 0.3177, "step": 1016 }, { "epoch": 0.05044893099856144, "grad_norm": 10.496834754943848, "learning_rate": 9.958906921736332e-06, "loss": 0.3179, "step": 1017 }, { "epoch": 0.050498536633761595, "grad_norm": 6.869359016418457, "learning_rate": 9.958806156742652e-06, "loss": 0.3603, "step": 1018 }, { "epoch": 0.05054814226896175, "grad_norm": 6.182240009307861, "learning_rate": 9.958705268867513e-06, "loss": 0.3893, "step": 1019 }, { "epoch": 0.05059774790416191, "grad_norm": 12.53410816192627, "learning_rate": 9.958604258113413e-06, "loss": 0.2623, "step": 1020 }, { "epoch": 0.050647353539362075, "grad_norm": 29.42408561706543, "learning_rate": 9.958503124482858e-06, "loss": 0.7065, "step": 1021 }, { "epoch": 0.05069695917456223, "grad_norm": 11.176048278808594, "learning_rate": 9.958401867978353e-06, "loss": 0.3584, "step": 1022 }, { "epoch": 0.05074656480976239, "grad_norm": 9.42575740814209, "learning_rate": 9.958300488602405e-06, "loss": 0.3956, "step": 1023 }, { "epoch": 0.05079617044496255, "grad_norm": 12.941475868225098, "learning_rate": 9.95819898635753e-06, "loss": 0.3811, "step": 1024 }, { "epoch": 0.050845776080162705, "grad_norm": 10.71534538269043, "learning_rate": 9.958097361246239e-06, "loss": 0.4448, "step": 1025 }, { "epoch": 0.05089538171536286, "grad_norm": 18.263965606689453, "learning_rate": 9.957995613271054e-06, "loss": 0.4463, "step": 1026 }, { "epoch": 0.05094498735056303, "grad_norm": 8.171704292297363, "learning_rate": 9.957893742434494e-06, "loss": 0.2581, "step": 1027 }, { "epoch": 0.050994592985763185, "grad_norm": 24.25627326965332, "learning_rate": 9.957791748739085e-06, "loss": 0.3873, "step": 1028 }, { "epoch": 0.05104419862096334, "grad_norm": 20.692031860351562, "learning_rate": 9.957689632187354e-06, "loss": 0.4194, "step": 1029 }, { "epoch": 0.0510938042561635, "grad_norm": 9.781604766845703, "learning_rate": 9.957587392781831e-06, "loss": 0.3761, "step": 1030 }, { "epoch": 0.05114340989136366, "grad_norm": 7.332653045654297, "learning_rate": 9.95748503052505e-06, "loss": 0.3216, "step": 1031 }, { "epoch": 0.051193015526563815, "grad_norm": 13.134318351745605, "learning_rate": 9.957382545419548e-06, "loss": 0.5212, "step": 1032 }, { "epoch": 0.05124262116176398, "grad_norm": 22.107563018798828, "learning_rate": 9.957279937467863e-06, "loss": 0.4082, "step": 1033 }, { "epoch": 0.05129222679696414, "grad_norm": 6.4087934494018555, "learning_rate": 9.95717720667254e-06, "loss": 0.2861, "step": 1034 }, { "epoch": 0.051341832432164294, "grad_norm": 13.859726905822754, "learning_rate": 9.957074353036121e-06, "loss": 0.4258, "step": 1035 }, { "epoch": 0.05139143806736445, "grad_norm": 7.110278129577637, "learning_rate": 9.95697137656116e-06, "loss": 0.344, "step": 1036 }, { "epoch": 0.05144104370256461, "grad_norm": 5.282220840454102, "learning_rate": 9.956868277250205e-06, "loss": 0.3647, "step": 1037 }, { "epoch": 0.05149064933776477, "grad_norm": 7.437203884124756, "learning_rate": 9.95676505510581e-06, "loss": 0.3854, "step": 1038 }, { "epoch": 0.05154025497296493, "grad_norm": 8.35339069366455, "learning_rate": 9.956661710130538e-06, "loss": 0.3591, "step": 1039 }, { "epoch": 0.05158986060816509, "grad_norm": 6.460176944732666, "learning_rate": 9.956558242326943e-06, "loss": 0.3035, "step": 1040 }, { "epoch": 0.05163946624336525, "grad_norm": 7.84578275680542, "learning_rate": 9.956454651697596e-06, "loss": 0.4017, "step": 1041 }, { "epoch": 0.051689071878565404, "grad_norm": 8.390822410583496, "learning_rate": 9.956350938245058e-06, "loss": 0.2344, "step": 1042 }, { "epoch": 0.05173867751376556, "grad_norm": 15.56033706665039, "learning_rate": 9.956247101971904e-06, "loss": 0.4651, "step": 1043 }, { "epoch": 0.05178828314896572, "grad_norm": 15.47616958618164, "learning_rate": 9.956143142880704e-06, "loss": 0.4735, "step": 1044 }, { "epoch": 0.051837888784165884, "grad_norm": 9.907881736755371, "learning_rate": 9.956039060974035e-06, "loss": 0.3497, "step": 1045 }, { "epoch": 0.05188749441936604, "grad_norm": 5.841098308563232, "learning_rate": 9.955934856254477e-06, "loss": 0.3114, "step": 1046 }, { "epoch": 0.0519371000545662, "grad_norm": 10.113273620605469, "learning_rate": 9.95583052872461e-06, "loss": 0.3476, "step": 1047 }, { "epoch": 0.051986705689766356, "grad_norm": 16.199642181396484, "learning_rate": 9.95572607838702e-06, "loss": 0.433, "step": 1048 }, { "epoch": 0.052036311324966514, "grad_norm": 5.156989097595215, "learning_rate": 9.9556215052443e-06, "loss": 0.2545, "step": 1049 }, { "epoch": 0.05208591696016668, "grad_norm": 5.919736862182617, "learning_rate": 9.955516809299034e-06, "loss": 0.3523, "step": 1050 }, { "epoch": 0.052135522595366836, "grad_norm": 10.492144584655762, "learning_rate": 9.95541199055382e-06, "loss": 0.3652, "step": 1051 }, { "epoch": 0.05218512823056699, "grad_norm": 7.273065090179443, "learning_rate": 9.955307049011256e-06, "loss": 0.3035, "step": 1052 }, { "epoch": 0.05223473386576715, "grad_norm": 13.180152893066406, "learning_rate": 9.955201984673943e-06, "loss": 0.4593, "step": 1053 }, { "epoch": 0.05228433950096731, "grad_norm": 10.589286804199219, "learning_rate": 9.955096797544481e-06, "loss": 0.3506, "step": 1054 }, { "epoch": 0.052333945136167466, "grad_norm": 5.835430145263672, "learning_rate": 9.954991487625481e-06, "loss": 0.3816, "step": 1055 }, { "epoch": 0.05238355077136763, "grad_norm": 6.765628337860107, "learning_rate": 9.95488605491955e-06, "loss": 0.3105, "step": 1056 }, { "epoch": 0.05243315640656779, "grad_norm": 6.6446685791015625, "learning_rate": 9.9547804994293e-06, "loss": 0.4103, "step": 1057 }, { "epoch": 0.052482762041767946, "grad_norm": 8.988814353942871, "learning_rate": 9.954674821157348e-06, "loss": 0.255, "step": 1058 }, { "epoch": 0.0525323676769681, "grad_norm": 8.450955390930176, "learning_rate": 9.954569020106316e-06, "loss": 0.302, "step": 1059 }, { "epoch": 0.05258197331216826, "grad_norm": 16.132705688476562, "learning_rate": 9.95446309627882e-06, "loss": 0.4561, "step": 1060 }, { "epoch": 0.05263157894736842, "grad_norm": 10.592117309570312, "learning_rate": 9.954357049677486e-06, "loss": 0.3669, "step": 1061 }, { "epoch": 0.05268118458256858, "grad_norm": 6.79741096496582, "learning_rate": 9.954250880304944e-06, "loss": 0.2794, "step": 1062 }, { "epoch": 0.05273079021776874, "grad_norm": 11.841132164001465, "learning_rate": 9.954144588163826e-06, "loss": 0.4198, "step": 1063 }, { "epoch": 0.0527803958529689, "grad_norm": 6.610678195953369, "learning_rate": 9.954038173256761e-06, "loss": 0.3121, "step": 1064 }, { "epoch": 0.052830001488169055, "grad_norm": 7.2392754554748535, "learning_rate": 9.953931635586391e-06, "loss": 0.298, "step": 1065 }, { "epoch": 0.05287960712336921, "grad_norm": 12.128585815429688, "learning_rate": 9.953824975155354e-06, "loss": 0.5131, "step": 1066 }, { "epoch": 0.05292921275856937, "grad_norm": 15.277811050415039, "learning_rate": 9.953718191966294e-06, "loss": 0.3849, "step": 1067 }, { "epoch": 0.052978818393769535, "grad_norm": 10.57158374786377, "learning_rate": 9.953611286021856e-06, "loss": 0.3823, "step": 1068 }, { "epoch": 0.05302842402896969, "grad_norm": 11.204721450805664, "learning_rate": 9.953504257324689e-06, "loss": 0.4485, "step": 1069 }, { "epoch": 0.05307802966416985, "grad_norm": 17.37415313720703, "learning_rate": 9.953397105877445e-06, "loss": 0.4377, "step": 1070 }, { "epoch": 0.05312763529937001, "grad_norm": 12.1571626663208, "learning_rate": 9.953289831682782e-06, "loss": 0.3024, "step": 1071 }, { "epoch": 0.053177240934570165, "grad_norm": 12.557717323303223, "learning_rate": 9.953182434743356e-06, "loss": 0.4791, "step": 1072 }, { "epoch": 0.05322684656977032, "grad_norm": 13.89306640625, "learning_rate": 9.953074915061828e-06, "loss": 0.4061, "step": 1073 }, { "epoch": 0.05327645220497049, "grad_norm": 7.453466892242432, "learning_rate": 9.952967272640864e-06, "loss": 0.2893, "step": 1074 }, { "epoch": 0.053326057840170644, "grad_norm": 12.683539390563965, "learning_rate": 9.952859507483128e-06, "loss": 0.3344, "step": 1075 }, { "epoch": 0.0533756634753708, "grad_norm": 13.724711418151855, "learning_rate": 9.952751619591296e-06, "loss": 0.3917, "step": 1076 }, { "epoch": 0.05342526911057096, "grad_norm": 8.206006050109863, "learning_rate": 9.952643608968037e-06, "loss": 0.4041, "step": 1077 }, { "epoch": 0.05347487474577112, "grad_norm": 12.886690139770508, "learning_rate": 9.95253547561603e-06, "loss": 0.424, "step": 1078 }, { "epoch": 0.05352448038097128, "grad_norm": 6.765810012817383, "learning_rate": 9.952427219537952e-06, "loss": 0.3185, "step": 1079 }, { "epoch": 0.05357408601617144, "grad_norm": 14.9210205078125, "learning_rate": 9.952318840736488e-06, "loss": 0.4591, "step": 1080 }, { "epoch": 0.0536236916513716, "grad_norm": 4.942264080047607, "learning_rate": 9.952210339214324e-06, "loss": 0.3234, "step": 1081 }, { "epoch": 0.053673297286571754, "grad_norm": 14.209763526916504, "learning_rate": 9.952101714974145e-06, "loss": 0.4459, "step": 1082 }, { "epoch": 0.05372290292177191, "grad_norm": 12.244166374206543, "learning_rate": 9.951992968018648e-06, "loss": 0.3872, "step": 1083 }, { "epoch": 0.05377250855697207, "grad_norm": 7.833450794219971, "learning_rate": 9.951884098350523e-06, "loss": 0.3283, "step": 1084 }, { "epoch": 0.053822114192172234, "grad_norm": 5.961415767669678, "learning_rate": 9.951775105972472e-06, "loss": 0.3236, "step": 1085 }, { "epoch": 0.05387171982737239, "grad_norm": 21.628650665283203, "learning_rate": 9.951665990887193e-06, "loss": 0.3786, "step": 1086 }, { "epoch": 0.05392132546257255, "grad_norm": 13.314116477966309, "learning_rate": 9.951556753097393e-06, "loss": 0.403, "step": 1087 }, { "epoch": 0.053970931097772706, "grad_norm": 8.785979270935059, "learning_rate": 9.951447392605776e-06, "loss": 0.3128, "step": 1088 }, { "epoch": 0.054020536732972864, "grad_norm": 10.670825958251953, "learning_rate": 9.95133790941505e-06, "loss": 0.4066, "step": 1089 }, { "epoch": 0.05407014236817302, "grad_norm": 13.023713111877441, "learning_rate": 9.951228303527933e-06, "loss": 0.4072, "step": 1090 }, { "epoch": 0.054119748003373186, "grad_norm": 10.534777641296387, "learning_rate": 9.951118574947138e-06, "loss": 0.3401, "step": 1091 }, { "epoch": 0.05416935363857334, "grad_norm": 7.276516437530518, "learning_rate": 9.951008723675387e-06, "loss": 0.3152, "step": 1092 }, { "epoch": 0.0542189592737735, "grad_norm": 9.55471420288086, "learning_rate": 9.950898749715398e-06, "loss": 0.3349, "step": 1093 }, { "epoch": 0.05426856490897366, "grad_norm": 8.749120712280273, "learning_rate": 9.9507886530699e-06, "loss": 0.3721, "step": 1094 }, { "epoch": 0.054318170544173816, "grad_norm": 9.626253128051758, "learning_rate": 9.95067843374162e-06, "loss": 0.4041, "step": 1095 }, { "epoch": 0.054367776179373974, "grad_norm": 8.141839027404785, "learning_rate": 9.950568091733289e-06, "loss": 0.3827, "step": 1096 }, { "epoch": 0.05441738181457414, "grad_norm": 5.114771842956543, "learning_rate": 9.95045762704764e-06, "loss": 0.3928, "step": 1097 }, { "epoch": 0.054466987449774296, "grad_norm": 9.46327018737793, "learning_rate": 9.95034703968741e-06, "loss": 0.3138, "step": 1098 }, { "epoch": 0.05451659308497445, "grad_norm": 18.956697463989258, "learning_rate": 9.950236329655345e-06, "loss": 0.3268, "step": 1099 }, { "epoch": 0.05456619872017461, "grad_norm": 13.090144157409668, "learning_rate": 9.950125496954183e-06, "loss": 0.3317, "step": 1100 }, { "epoch": 0.05461580435537477, "grad_norm": 6.673325538635254, "learning_rate": 9.950014541586673e-06, "loss": 0.3356, "step": 1101 }, { "epoch": 0.05466540999057493, "grad_norm": 13.457324028015137, "learning_rate": 9.949903463555561e-06, "loss": 0.4482, "step": 1102 }, { "epoch": 0.05471501562577509, "grad_norm": 7.730954170227051, "learning_rate": 9.949792262863603e-06, "loss": 0.2653, "step": 1103 }, { "epoch": 0.05476462126097525, "grad_norm": 8.046910285949707, "learning_rate": 9.949680939513555e-06, "loss": 0.3784, "step": 1104 }, { "epoch": 0.054814226896175405, "grad_norm": 7.196722507476807, "learning_rate": 9.949569493508174e-06, "loss": 0.3549, "step": 1105 }, { "epoch": 0.05486383253137556, "grad_norm": 11.751795768737793, "learning_rate": 9.949457924850219e-06, "loss": 0.416, "step": 1106 }, { "epoch": 0.05491343816657572, "grad_norm": 16.789812088012695, "learning_rate": 9.949346233542461e-06, "loss": 0.4008, "step": 1107 }, { "epoch": 0.054963043801775885, "grad_norm": 9.936403274536133, "learning_rate": 9.949234419587663e-06, "loss": 0.3412, "step": 1108 }, { "epoch": 0.05501264943697604, "grad_norm": 11.029191970825195, "learning_rate": 9.9491224829886e-06, "loss": 0.363, "step": 1109 }, { "epoch": 0.0550622550721762, "grad_norm": 5.062080383300781, "learning_rate": 9.94901042374804e-06, "loss": 0.3667, "step": 1110 }, { "epoch": 0.05511186070737636, "grad_norm": 5.96633768081665, "learning_rate": 9.948898241868764e-06, "loss": 0.3674, "step": 1111 }, { "epoch": 0.055161466342576515, "grad_norm": 8.726162910461426, "learning_rate": 9.94878593735355e-06, "loss": 0.3396, "step": 1112 }, { "epoch": 0.05521107197777667, "grad_norm": 7.876051902770996, "learning_rate": 9.948673510205184e-06, "loss": 0.2373, "step": 1113 }, { "epoch": 0.05526067761297684, "grad_norm": 7.55069637298584, "learning_rate": 9.94856096042645e-06, "loss": 0.233, "step": 1114 }, { "epoch": 0.055310283248176995, "grad_norm": 7.457310676574707, "learning_rate": 9.948448288020134e-06, "loss": 0.2823, "step": 1115 }, { "epoch": 0.05535988888337715, "grad_norm": 12.412620544433594, "learning_rate": 9.948335492989033e-06, "loss": 0.4048, "step": 1116 }, { "epoch": 0.05540949451857731, "grad_norm": 15.791685104370117, "learning_rate": 9.94822257533594e-06, "loss": 0.3521, "step": 1117 }, { "epoch": 0.05545910015377747, "grad_norm": 9.636754989624023, "learning_rate": 9.948109535063654e-06, "loss": 0.3256, "step": 1118 }, { "epoch": 0.055508705788977625, "grad_norm": 7.195380687713623, "learning_rate": 9.947996372174974e-06, "loss": 0.4186, "step": 1119 }, { "epoch": 0.05555831142417779, "grad_norm": 19.197830200195312, "learning_rate": 9.947883086672706e-06, "loss": 0.7344, "step": 1120 }, { "epoch": 0.05560791705937795, "grad_norm": 10.139707565307617, "learning_rate": 9.947769678559658e-06, "loss": 0.3442, "step": 1121 }, { "epoch": 0.055657522694578104, "grad_norm": 9.102619171142578, "learning_rate": 9.947656147838638e-06, "loss": 0.2715, "step": 1122 }, { "epoch": 0.05570712832977826, "grad_norm": 7.992745876312256, "learning_rate": 9.94754249451246e-06, "loss": 0.2663, "step": 1123 }, { "epoch": 0.05575673396497842, "grad_norm": 5.7220354080200195, "learning_rate": 9.947428718583942e-06, "loss": 0.3328, "step": 1124 }, { "epoch": 0.05580633960017858, "grad_norm": 12.540844917297363, "learning_rate": 9.947314820055904e-06, "loss": 0.3899, "step": 1125 }, { "epoch": 0.05585594523537874, "grad_norm": 14.624503135681152, "learning_rate": 9.947200798931164e-06, "loss": 0.3757, "step": 1126 }, { "epoch": 0.0559055508705789, "grad_norm": 6.447804927825928, "learning_rate": 9.947086655212551e-06, "loss": 0.3668, "step": 1127 }, { "epoch": 0.055955156505779056, "grad_norm": 7.2336344718933105, "learning_rate": 9.946972388902894e-06, "loss": 0.3088, "step": 1128 }, { "epoch": 0.056004762140979214, "grad_norm": 6.117588520050049, "learning_rate": 9.946858000005024e-06, "loss": 0.3594, "step": 1129 }, { "epoch": 0.05605436777617937, "grad_norm": 11.520637512207031, "learning_rate": 9.946743488521773e-06, "loss": 0.5236, "step": 1130 }, { "epoch": 0.056103973411379536, "grad_norm": 13.631630897521973, "learning_rate": 9.946628854455982e-06, "loss": 0.4704, "step": 1131 }, { "epoch": 0.056153579046579694, "grad_norm": 7.454135894775391, "learning_rate": 9.94651409781049e-06, "loss": 0.3721, "step": 1132 }, { "epoch": 0.05620318468177985, "grad_norm": 10.048567771911621, "learning_rate": 9.94639921858814e-06, "loss": 0.4625, "step": 1133 }, { "epoch": 0.05625279031698001, "grad_norm": 7.952001094818115, "learning_rate": 9.946284216791782e-06, "loss": 0.3425, "step": 1134 }, { "epoch": 0.056302395952180166, "grad_norm": 6.620849609375, "learning_rate": 9.946169092424263e-06, "loss": 0.2774, "step": 1135 }, { "epoch": 0.056352001587380324, "grad_norm": 9.80174446105957, "learning_rate": 9.946053845488436e-06, "loss": 0.344, "step": 1136 }, { "epoch": 0.05640160722258049, "grad_norm": 15.056455612182617, "learning_rate": 9.945938475987157e-06, "loss": 0.5338, "step": 1137 }, { "epoch": 0.056451212857780646, "grad_norm": 10.474882125854492, "learning_rate": 9.945822983923286e-06, "loss": 0.367, "step": 1138 }, { "epoch": 0.0565008184929808, "grad_norm": 9.387369155883789, "learning_rate": 9.945707369299685e-06, "loss": 0.3739, "step": 1139 }, { "epoch": 0.05655042412818096, "grad_norm": 12.067168235778809, "learning_rate": 9.945591632119215e-06, "loss": 0.3917, "step": 1140 }, { "epoch": 0.05660002976338112, "grad_norm": 10.2889986038208, "learning_rate": 9.94547577238475e-06, "loss": 0.3065, "step": 1141 }, { "epoch": 0.056649635398581276, "grad_norm": 9.892182350158691, "learning_rate": 9.945359790099158e-06, "loss": 0.3224, "step": 1142 }, { "epoch": 0.05669924103378144, "grad_norm": 12.283400535583496, "learning_rate": 9.945243685265313e-06, "loss": 0.3938, "step": 1143 }, { "epoch": 0.0567488466689816, "grad_norm": 10.27512264251709, "learning_rate": 9.945127457886092e-06, "loss": 0.2881, "step": 1144 }, { "epoch": 0.056798452304181755, "grad_norm": 9.573238372802734, "learning_rate": 9.945011107964377e-06, "loss": 0.3082, "step": 1145 }, { "epoch": 0.05684805793938191, "grad_norm": 9.173772811889648, "learning_rate": 9.944894635503049e-06, "loss": 0.3966, "step": 1146 }, { "epoch": 0.05689766357458207, "grad_norm": 6.632242679595947, "learning_rate": 9.944778040504995e-06, "loss": 0.344, "step": 1147 }, { "epoch": 0.05694726920978223, "grad_norm": 9.969261169433594, "learning_rate": 9.944661322973105e-06, "loss": 0.4152, "step": 1148 }, { "epoch": 0.05699687484498239, "grad_norm": 10.112640380859375, "learning_rate": 9.94454448291027e-06, "loss": 0.3849, "step": 1149 }, { "epoch": 0.05704648048018255, "grad_norm": 12.302751541137695, "learning_rate": 9.944427520319386e-06, "loss": 0.4448, "step": 1150 }, { "epoch": 0.05709608611538271, "grad_norm": 16.935148239135742, "learning_rate": 9.944310435203353e-06, "loss": 0.5744, "step": 1151 }, { "epoch": 0.057145691750582865, "grad_norm": 8.719389915466309, "learning_rate": 9.944193227565069e-06, "loss": 0.3637, "step": 1152 }, { "epoch": 0.05719529738578302, "grad_norm": 10.068212509155273, "learning_rate": 9.944075897407442e-06, "loss": 0.4661, "step": 1153 }, { "epoch": 0.05724490302098318, "grad_norm": 8.144474029541016, "learning_rate": 9.943958444733375e-06, "loss": 0.4385, "step": 1154 }, { "epoch": 0.057294508656183345, "grad_norm": 12.29166030883789, "learning_rate": 9.943840869545784e-06, "loss": 0.3863, "step": 1155 }, { "epoch": 0.0573441142913835, "grad_norm": 12.387356758117676, "learning_rate": 9.94372317184758e-06, "loss": 0.5489, "step": 1156 }, { "epoch": 0.05739371992658366, "grad_norm": 12.595489501953125, "learning_rate": 9.943605351641677e-06, "loss": 0.4587, "step": 1157 }, { "epoch": 0.05744332556178382, "grad_norm": 11.12134838104248, "learning_rate": 9.943487408931e-06, "loss": 0.3261, "step": 1158 }, { "epoch": 0.057492931196983975, "grad_norm": 7.688539505004883, "learning_rate": 9.943369343718468e-06, "loss": 0.3186, "step": 1159 }, { "epoch": 0.05754253683218414, "grad_norm": 9.2730073928833, "learning_rate": 9.943251156007005e-06, "loss": 0.3636, "step": 1160 }, { "epoch": 0.0575921424673843, "grad_norm": 10.843737602233887, "learning_rate": 9.943132845799546e-06, "loss": 0.3294, "step": 1161 }, { "epoch": 0.057641748102584454, "grad_norm": 52.36172103881836, "learning_rate": 9.943014413099016e-06, "loss": 0.3712, "step": 1162 }, { "epoch": 0.05769135373778461, "grad_norm": 13.299420356750488, "learning_rate": 9.942895857908355e-06, "loss": 0.5344, "step": 1163 }, { "epoch": 0.05774095937298477, "grad_norm": 10.963844299316406, "learning_rate": 9.942777180230498e-06, "loss": 0.2919, "step": 1164 }, { "epoch": 0.05779056500818493, "grad_norm": 6.023891448974609, "learning_rate": 9.942658380068387e-06, "loss": 0.2314, "step": 1165 }, { "epoch": 0.05784017064338509, "grad_norm": 10.773382186889648, "learning_rate": 9.942539457424964e-06, "loss": 0.4304, "step": 1166 }, { "epoch": 0.05788977627858525, "grad_norm": 11.27258014678955, "learning_rate": 9.94242041230318e-06, "loss": 0.4974, "step": 1167 }, { "epoch": 0.05793938191378541, "grad_norm": 7.339509010314941, "learning_rate": 9.942301244705981e-06, "loss": 0.2961, "step": 1168 }, { "epoch": 0.057988987548985564, "grad_norm": 10.39499568939209, "learning_rate": 9.94218195463632e-06, "loss": 0.3629, "step": 1169 }, { "epoch": 0.05803859318418572, "grad_norm": 6.454860687255859, "learning_rate": 9.942062542097158e-06, "loss": 0.3138, "step": 1170 }, { "epoch": 0.05808819881938588, "grad_norm": 6.728717803955078, "learning_rate": 9.941943007091448e-06, "loss": 0.2647, "step": 1171 }, { "epoch": 0.058137804454586044, "grad_norm": 8.349964141845703, "learning_rate": 9.941823349622156e-06, "loss": 0.4022, "step": 1172 }, { "epoch": 0.0581874100897862, "grad_norm": 8.70173454284668, "learning_rate": 9.941703569692246e-06, "loss": 0.3217, "step": 1173 }, { "epoch": 0.05823701572498636, "grad_norm": 8.226677894592285, "learning_rate": 9.941583667304685e-06, "loss": 0.3569, "step": 1174 }, { "epoch": 0.058286621360186516, "grad_norm": 9.798113822937012, "learning_rate": 9.941463642462447e-06, "loss": 0.2, "step": 1175 }, { "epoch": 0.058336226995386674, "grad_norm": 8.374095916748047, "learning_rate": 9.941343495168504e-06, "loss": 0.387, "step": 1176 }, { "epoch": 0.05838583263058683, "grad_norm": 6.6232733726501465, "learning_rate": 9.941223225425834e-06, "loss": 0.2744, "step": 1177 }, { "epoch": 0.058435438265786996, "grad_norm": 14.596923828125, "learning_rate": 9.941102833237417e-06, "loss": 0.4553, "step": 1178 }, { "epoch": 0.05848504390098715, "grad_norm": 11.408942222595215, "learning_rate": 9.940982318606237e-06, "loss": 0.3582, "step": 1179 }, { "epoch": 0.05853464953618731, "grad_norm": 9.931878089904785, "learning_rate": 9.94086168153528e-06, "loss": 0.3178, "step": 1180 }, { "epoch": 0.05858425517138747, "grad_norm": 9.805330276489258, "learning_rate": 9.940740922027534e-06, "loss": 0.3888, "step": 1181 }, { "epoch": 0.058633860806587626, "grad_norm": 9.548922538757324, "learning_rate": 9.940620040085996e-06, "loss": 0.4315, "step": 1182 }, { "epoch": 0.05868346644178779, "grad_norm": 6.034309387207031, "learning_rate": 9.940499035713657e-06, "loss": 0.3174, "step": 1183 }, { "epoch": 0.05873307207698795, "grad_norm": 5.928038597106934, "learning_rate": 9.940377908913516e-06, "loss": 0.3337, "step": 1184 }, { "epoch": 0.058782677712188106, "grad_norm": 6.21290922164917, "learning_rate": 9.940256659688577e-06, "loss": 0.3201, "step": 1185 }, { "epoch": 0.05883228334738826, "grad_norm": 6.569098949432373, "learning_rate": 9.940135288041843e-06, "loss": 0.351, "step": 1186 }, { "epoch": 0.05888188898258842, "grad_norm": 8.334142684936523, "learning_rate": 9.940013793976322e-06, "loss": 0.3055, "step": 1187 }, { "epoch": 0.05893149461778858, "grad_norm": 9.569978713989258, "learning_rate": 9.939892177495025e-06, "loss": 0.3656, "step": 1188 }, { "epoch": 0.05898110025298874, "grad_norm": 10.27012825012207, "learning_rate": 9.939770438600964e-06, "loss": 0.3171, "step": 1189 }, { "epoch": 0.0590307058881889, "grad_norm": 6.577728271484375, "learning_rate": 9.939648577297159e-06, "loss": 0.2556, "step": 1190 }, { "epoch": 0.05908031152338906, "grad_norm": 8.447844505310059, "learning_rate": 9.939526593586625e-06, "loss": 0.4366, "step": 1191 }, { "epoch": 0.059129917158589215, "grad_norm": 9.964942932128906, "learning_rate": 9.93940448747239e-06, "loss": 0.3549, "step": 1192 }, { "epoch": 0.05917952279378937, "grad_norm": 11.664607048034668, "learning_rate": 9.939282258957476e-06, "loss": 0.2442, "step": 1193 }, { "epoch": 0.05922912842898953, "grad_norm": 11.449808120727539, "learning_rate": 9.939159908044916e-06, "loss": 0.3713, "step": 1194 }, { "epoch": 0.059278734064189695, "grad_norm": 17.63922691345215, "learning_rate": 9.939037434737736e-06, "loss": 0.5001, "step": 1195 }, { "epoch": 0.05932833969938985, "grad_norm": 25.040077209472656, "learning_rate": 9.938914839038976e-06, "loss": 0.6917, "step": 1196 }, { "epoch": 0.05937794533459001, "grad_norm": 5.777078628540039, "learning_rate": 9.93879212095167e-06, "loss": 0.3073, "step": 1197 }, { "epoch": 0.05942755096979017, "grad_norm": 9.283334732055664, "learning_rate": 9.938669280478864e-06, "loss": 0.4177, "step": 1198 }, { "epoch": 0.059477156604990325, "grad_norm": 14.237353324890137, "learning_rate": 9.938546317623598e-06, "loss": 0.3959, "step": 1199 }, { "epoch": 0.05952676224019048, "grad_norm": 11.138237953186035, "learning_rate": 9.938423232388921e-06, "loss": 0.3349, "step": 1200 }, { "epoch": 0.05957636787539065, "grad_norm": 7.090245723724365, "learning_rate": 9.93830002477788e-06, "loss": 0.3938, "step": 1201 }, { "epoch": 0.059625973510590805, "grad_norm": 9.655427932739258, "learning_rate": 9.938176694793534e-06, "loss": 0.3757, "step": 1202 }, { "epoch": 0.05967557914579096, "grad_norm": 7.760598659515381, "learning_rate": 9.938053242438933e-06, "loss": 0.4249, "step": 1203 }, { "epoch": 0.05972518478099112, "grad_norm": 4.286716938018799, "learning_rate": 9.93792966771714e-06, "loss": 0.2847, "step": 1204 }, { "epoch": 0.05977479041619128, "grad_norm": 6.956202030181885, "learning_rate": 9.937805970631216e-06, "loss": 0.3366, "step": 1205 }, { "epoch": 0.059824396051391435, "grad_norm": 7.170729637145996, "learning_rate": 9.937682151184226e-06, "loss": 0.3142, "step": 1206 }, { "epoch": 0.0598740016865916, "grad_norm": 14.056304931640625, "learning_rate": 9.93755820937924e-06, "loss": 0.3722, "step": 1207 }, { "epoch": 0.05992360732179176, "grad_norm": 7.7219157218933105, "learning_rate": 9.937434145219327e-06, "loss": 0.375, "step": 1208 }, { "epoch": 0.059973212956991914, "grad_norm": 11.7953462600708, "learning_rate": 9.937309958707564e-06, "loss": 0.3952, "step": 1209 }, { "epoch": 0.06002281859219207, "grad_norm": 8.135913848876953, "learning_rate": 9.937185649847026e-06, "loss": 0.3875, "step": 1210 }, { "epoch": 0.06007242422739223, "grad_norm": 7.151678562164307, "learning_rate": 9.937061218640793e-06, "loss": 0.3585, "step": 1211 }, { "epoch": 0.060122029862592394, "grad_norm": 22.69881820678711, "learning_rate": 9.93693666509195e-06, "loss": 0.3989, "step": 1212 }, { "epoch": 0.06017163549779255, "grad_norm": 6.491105556488037, "learning_rate": 9.936811989203585e-06, "loss": 0.3153, "step": 1213 }, { "epoch": 0.06022124113299271, "grad_norm": 8.40210247039795, "learning_rate": 9.936687190978784e-06, "loss": 0.2789, "step": 1214 }, { "epoch": 0.060270846768192866, "grad_norm": 10.933921813964844, "learning_rate": 9.936562270420642e-06, "loss": 0.2946, "step": 1215 }, { "epoch": 0.060320452403393024, "grad_norm": 11.098299026489258, "learning_rate": 9.936437227532253e-06, "loss": 0.3815, "step": 1216 }, { "epoch": 0.06037005803859318, "grad_norm": 6.292201995849609, "learning_rate": 9.936312062316718e-06, "loss": 0.3329, "step": 1217 }, { "epoch": 0.060419663673793346, "grad_norm": 10.638615608215332, "learning_rate": 9.936186774777137e-06, "loss": 0.2799, "step": 1218 }, { "epoch": 0.060469269308993503, "grad_norm": 16.160097122192383, "learning_rate": 9.936061364916613e-06, "loss": 0.3508, "step": 1219 }, { "epoch": 0.06051887494419366, "grad_norm": 9.404645919799805, "learning_rate": 9.935935832738257e-06, "loss": 0.3045, "step": 1220 }, { "epoch": 0.06056848057939382, "grad_norm": 10.392375946044922, "learning_rate": 9.935810178245178e-06, "loss": 0.4056, "step": 1221 }, { "epoch": 0.060618086214593976, "grad_norm": 14.567391395568848, "learning_rate": 9.935684401440491e-06, "loss": 0.4601, "step": 1222 }, { "epoch": 0.060667691849794134, "grad_norm": 6.963433742523193, "learning_rate": 9.935558502327311e-06, "loss": 0.2815, "step": 1223 }, { "epoch": 0.0607172974849943, "grad_norm": 11.218400955200195, "learning_rate": 9.935432480908759e-06, "loss": 0.4471, "step": 1224 }, { "epoch": 0.060766903120194456, "grad_norm": 5.86857795715332, "learning_rate": 9.935306337187957e-06, "loss": 0.2687, "step": 1225 }, { "epoch": 0.06081650875539461, "grad_norm": 19.896142959594727, "learning_rate": 9.935180071168031e-06, "loss": 0.4107, "step": 1226 }, { "epoch": 0.06086611439059477, "grad_norm": 8.140525817871094, "learning_rate": 9.935053682852111e-06, "loss": 0.3832, "step": 1227 }, { "epoch": 0.06091572002579493, "grad_norm": 10.456302642822266, "learning_rate": 9.934927172243329e-06, "loss": 0.3257, "step": 1228 }, { "epoch": 0.060965325660995086, "grad_norm": 6.477744102478027, "learning_rate": 9.93480053934482e-06, "loss": 0.4307, "step": 1229 }, { "epoch": 0.06101493129619525, "grad_norm": 14.230854034423828, "learning_rate": 9.93467378415972e-06, "loss": 0.3473, "step": 1230 }, { "epoch": 0.06106453693139541, "grad_norm": 14.109139442443848, "learning_rate": 9.934546906691172e-06, "loss": 0.5028, "step": 1231 }, { "epoch": 0.061114142566595565, "grad_norm": 6.82271146774292, "learning_rate": 9.93441990694232e-06, "loss": 0.3777, "step": 1232 }, { "epoch": 0.06116374820179572, "grad_norm": 7.864867687225342, "learning_rate": 9.93429278491631e-06, "loss": 0.3924, "step": 1233 }, { "epoch": 0.06121335383699588, "grad_norm": 6.9054083824157715, "learning_rate": 9.934165540616291e-06, "loss": 0.255, "step": 1234 }, { "epoch": 0.061262959472196045, "grad_norm": 13.006478309631348, "learning_rate": 9.934038174045422e-06, "loss": 0.4204, "step": 1235 }, { "epoch": 0.0613125651073962, "grad_norm": 7.890387058258057, "learning_rate": 9.933910685206851e-06, "loss": 0.3756, "step": 1236 }, { "epoch": 0.06136217074259636, "grad_norm": 9.561013221740723, "learning_rate": 9.933783074103744e-06, "loss": 0.4005, "step": 1237 }, { "epoch": 0.06141177637779652, "grad_norm": 8.270393371582031, "learning_rate": 9.93365534073926e-06, "loss": 0.3421, "step": 1238 }, { "epoch": 0.061461382012996675, "grad_norm": 8.79798698425293, "learning_rate": 9.933527485116565e-06, "loss": 0.3117, "step": 1239 }, { "epoch": 0.06151098764819683, "grad_norm": 6.167314529418945, "learning_rate": 9.933399507238826e-06, "loss": 0.3589, "step": 1240 }, { "epoch": 0.061560593283397, "grad_norm": 9.113154411315918, "learning_rate": 9.933271407109218e-06, "loss": 0.4292, "step": 1241 }, { "epoch": 0.061610198918597155, "grad_norm": 7.777444362640381, "learning_rate": 9.93314318473091e-06, "loss": 0.3683, "step": 1242 }, { "epoch": 0.06165980455379731, "grad_norm": 7.445281505584717, "learning_rate": 9.933014840107085e-06, "loss": 0.3532, "step": 1243 }, { "epoch": 0.06170941018899747, "grad_norm": 22.68123435974121, "learning_rate": 9.93288637324092e-06, "loss": 0.4902, "step": 1244 }, { "epoch": 0.06175901582419763, "grad_norm": 8.663345336914062, "learning_rate": 9.932757784135597e-06, "loss": 0.37, "step": 1245 }, { "epoch": 0.061808621459397785, "grad_norm": 4.352477550506592, "learning_rate": 9.93262907279431e-06, "loss": 0.2434, "step": 1246 }, { "epoch": 0.06185822709459795, "grad_norm": 9.830451965332031, "learning_rate": 9.932500239220238e-06, "loss": 0.3055, "step": 1247 }, { "epoch": 0.06190783272979811, "grad_norm": 16.01142692565918, "learning_rate": 9.932371283416581e-06, "loss": 0.3056, "step": 1248 }, { "epoch": 0.061957438364998264, "grad_norm": 21.594018936157227, "learning_rate": 9.932242205386533e-06, "loss": 0.3429, "step": 1249 }, { "epoch": 0.06200704400019842, "grad_norm": 7.993307590484619, "learning_rate": 9.93211300513329e-06, "loss": 0.2869, "step": 1250 }, { "epoch": 0.06205664963539858, "grad_norm": 6.776331901550293, "learning_rate": 9.931983682660059e-06, "loss": 0.3026, "step": 1251 }, { "epoch": 0.06210625527059874, "grad_norm": 19.60647964477539, "learning_rate": 9.931854237970037e-06, "loss": 0.4374, "step": 1252 }, { "epoch": 0.0621558609057989, "grad_norm": 10.078857421875, "learning_rate": 9.93172467106644e-06, "loss": 0.3773, "step": 1253 }, { "epoch": 0.06220546654099906, "grad_norm": 10.238959312438965, "learning_rate": 9.931594981952474e-06, "loss": 0.3273, "step": 1254 }, { "epoch": 0.062255072176199217, "grad_norm": 6.507252216339111, "learning_rate": 9.931465170631352e-06, "loss": 0.3097, "step": 1255 }, { "epoch": 0.062304677811399374, "grad_norm": 17.54912567138672, "learning_rate": 9.931335237106294e-06, "loss": 0.4661, "step": 1256 }, { "epoch": 0.06235428344659953, "grad_norm": 16.334484100341797, "learning_rate": 9.931205181380516e-06, "loss": 0.3437, "step": 1257 }, { "epoch": 0.06240388908179969, "grad_norm": 6.964955806732178, "learning_rate": 9.931075003457245e-06, "loss": 0.2809, "step": 1258 }, { "epoch": 0.062453494716999854, "grad_norm": 3.6611719131469727, "learning_rate": 9.930944703339705e-06, "loss": 0.2537, "step": 1259 }, { "epoch": 0.06250310035220001, "grad_norm": 11.094873428344727, "learning_rate": 9.930814281031124e-06, "loss": 0.4112, "step": 1260 }, { "epoch": 0.06255270598740016, "grad_norm": 9.326472282409668, "learning_rate": 9.930683736534734e-06, "loss": 0.464, "step": 1261 }, { "epoch": 0.06260231162260033, "grad_norm": 9.640291213989258, "learning_rate": 9.930553069853773e-06, "loss": 0.3996, "step": 1262 }, { "epoch": 0.06265191725780049, "grad_norm": 9.344934463500977, "learning_rate": 9.930422280991473e-06, "loss": 0.437, "step": 1263 }, { "epoch": 0.06270152289300064, "grad_norm": 8.169788360595703, "learning_rate": 9.93029136995108e-06, "loss": 0.4346, "step": 1264 }, { "epoch": 0.0627511285282008, "grad_norm": 7.744695663452148, "learning_rate": 9.930160336735838e-06, "loss": 0.4009, "step": 1265 }, { "epoch": 0.06280073416340096, "grad_norm": 10.124882698059082, "learning_rate": 9.93002918134899e-06, "loss": 0.3298, "step": 1266 }, { "epoch": 0.06285033979860112, "grad_norm": 7.896705627441406, "learning_rate": 9.929897903793792e-06, "loss": 0.3429, "step": 1267 }, { "epoch": 0.06289994543380129, "grad_norm": 9.004559516906738, "learning_rate": 9.929766504073493e-06, "loss": 0.3775, "step": 1268 }, { "epoch": 0.06294955106900144, "grad_norm": 14.01289176940918, "learning_rate": 9.929634982191349e-06, "loss": 0.3679, "step": 1269 }, { "epoch": 0.0629991567042016, "grad_norm": 7.181146144866943, "learning_rate": 9.929503338150622e-06, "loss": 0.31, "step": 1270 }, { "epoch": 0.06304876233940175, "grad_norm": 9.855124473571777, "learning_rate": 9.92937157195457e-06, "loss": 0.3897, "step": 1271 }, { "epoch": 0.06309836797460192, "grad_norm": 8.506505012512207, "learning_rate": 9.929239683606463e-06, "loss": 0.4125, "step": 1272 }, { "epoch": 0.06314797360980208, "grad_norm": 9.302942276000977, "learning_rate": 9.929107673109565e-06, "loss": 0.4228, "step": 1273 }, { "epoch": 0.06319757924500223, "grad_norm": 5.870510578155518, "learning_rate": 9.928975540467149e-06, "loss": 0.3542, "step": 1274 }, { "epoch": 0.0632471848802024, "grad_norm": 8.074604988098145, "learning_rate": 9.92884328568249e-06, "loss": 0.4028, "step": 1275 }, { "epoch": 0.06329679051540255, "grad_norm": 9.979377746582031, "learning_rate": 9.928710908758867e-06, "loss": 0.3505, "step": 1276 }, { "epoch": 0.06334639615060271, "grad_norm": 10.082928657531738, "learning_rate": 9.928578409699557e-06, "loss": 0.4364, "step": 1277 }, { "epoch": 0.06339600178580286, "grad_norm": 4.862926483154297, "learning_rate": 9.928445788507842e-06, "loss": 0.2396, "step": 1278 }, { "epoch": 0.06344560742100303, "grad_norm": 17.976242065429688, "learning_rate": 9.928313045187014e-06, "loss": 0.4168, "step": 1279 }, { "epoch": 0.06349521305620319, "grad_norm": 15.304553985595703, "learning_rate": 9.928180179740357e-06, "loss": 0.3747, "step": 1280 }, { "epoch": 0.06354481869140334, "grad_norm": 11.333513259887695, "learning_rate": 9.928047192171168e-06, "loss": 0.37, "step": 1281 }, { "epoch": 0.0635944243266035, "grad_norm": 7.339984893798828, "learning_rate": 9.927914082482739e-06, "loss": 0.2991, "step": 1282 }, { "epoch": 0.06364402996180366, "grad_norm": 10.044337272644043, "learning_rate": 9.92778085067837e-06, "loss": 0.3899, "step": 1283 }, { "epoch": 0.06369363559700382, "grad_norm": 12.344429016113281, "learning_rate": 9.927647496761364e-06, "loss": 0.4104, "step": 1284 }, { "epoch": 0.06374324123220398, "grad_norm": 7.085752487182617, "learning_rate": 9.927514020735024e-06, "loss": 0.298, "step": 1285 }, { "epoch": 0.06379284686740413, "grad_norm": 8.35417652130127, "learning_rate": 9.927380422602657e-06, "loss": 0.4175, "step": 1286 }, { "epoch": 0.0638424525026043, "grad_norm": 6.737936019897461, "learning_rate": 9.927246702367571e-06, "loss": 0.3788, "step": 1287 }, { "epoch": 0.06389205813780445, "grad_norm": 7.547442436218262, "learning_rate": 9.927112860033088e-06, "loss": 0.3667, "step": 1288 }, { "epoch": 0.06394166377300461, "grad_norm": 8.295831680297852, "learning_rate": 9.926978895602518e-06, "loss": 0.2984, "step": 1289 }, { "epoch": 0.06399126940820478, "grad_norm": 7.333442687988281, "learning_rate": 9.92684480907918e-06, "loss": 0.3373, "step": 1290 }, { "epoch": 0.06404087504340493, "grad_norm": 14.652442932128906, "learning_rate": 9.9267106004664e-06, "loss": 0.3421, "step": 1291 }, { "epoch": 0.0640904806786051, "grad_norm": 9.973892211914062, "learning_rate": 9.926576269767503e-06, "loss": 0.2772, "step": 1292 }, { "epoch": 0.06414008631380524, "grad_norm": 15.759891510009766, "learning_rate": 9.92644181698582e-06, "loss": 0.4595, "step": 1293 }, { "epoch": 0.06418969194900541, "grad_norm": 8.77268123626709, "learning_rate": 9.926307242124677e-06, "loss": 0.1627, "step": 1294 }, { "epoch": 0.06423929758420556, "grad_norm": 8.741875648498535, "learning_rate": 9.926172545187413e-06, "loss": 0.3814, "step": 1295 }, { "epoch": 0.06428890321940572, "grad_norm": 8.398685455322266, "learning_rate": 9.926037726177364e-06, "loss": 0.3716, "step": 1296 }, { "epoch": 0.06433850885460589, "grad_norm": 6.197268962860107, "learning_rate": 9.925902785097874e-06, "loss": 0.3436, "step": 1297 }, { "epoch": 0.06438811448980604, "grad_norm": 7.312171459197998, "learning_rate": 9.925767721952284e-06, "loss": 0.2451, "step": 1298 }, { "epoch": 0.0644377201250062, "grad_norm": 7.367140293121338, "learning_rate": 9.925632536743941e-06, "loss": 0.3803, "step": 1299 }, { "epoch": 0.06448732576020635, "grad_norm": 5.937489032745361, "learning_rate": 9.925497229476196e-06, "loss": 0.3141, "step": 1300 }, { "epoch": 0.06453693139540652, "grad_norm": 6.733285427093506, "learning_rate": 9.925361800152403e-06, "loss": 0.3595, "step": 1301 }, { "epoch": 0.06458653703060668, "grad_norm": 8.228959083557129, "learning_rate": 9.925226248775915e-06, "loss": 0.4205, "step": 1302 }, { "epoch": 0.06463614266580683, "grad_norm": 6.472036361694336, "learning_rate": 9.925090575350091e-06, "loss": 0.303, "step": 1303 }, { "epoch": 0.064685748301007, "grad_norm": 10.773713111877441, "learning_rate": 9.924954779878298e-06, "loss": 0.2965, "step": 1304 }, { "epoch": 0.06473535393620715, "grad_norm": 9.627249717712402, "learning_rate": 9.924818862363896e-06, "loss": 0.3826, "step": 1305 }, { "epoch": 0.06478495957140731, "grad_norm": 11.377285957336426, "learning_rate": 9.924682822810255e-06, "loss": 0.3413, "step": 1306 }, { "epoch": 0.06483456520660746, "grad_norm": 13.910683631896973, "learning_rate": 9.924546661220746e-06, "loss": 0.406, "step": 1307 }, { "epoch": 0.06488417084180763, "grad_norm": 5.9702301025390625, "learning_rate": 9.924410377598743e-06, "loss": 0.3264, "step": 1308 }, { "epoch": 0.06493377647700779, "grad_norm": 5.335611343383789, "learning_rate": 9.924273971947624e-06, "loss": 0.3602, "step": 1309 }, { "epoch": 0.06498338211220794, "grad_norm": 15.272439956665039, "learning_rate": 9.924137444270768e-06, "loss": 0.5147, "step": 1310 }, { "epoch": 0.06503298774740811, "grad_norm": 24.92186737060547, "learning_rate": 9.92400079457156e-06, "loss": 0.4686, "step": 1311 }, { "epoch": 0.06508259338260826, "grad_norm": 7.907092094421387, "learning_rate": 9.923864022853382e-06, "loss": 0.2838, "step": 1312 }, { "epoch": 0.06513219901780842, "grad_norm": 7.985271453857422, "learning_rate": 9.923727129119629e-06, "loss": 0.3919, "step": 1313 }, { "epoch": 0.06518180465300859, "grad_norm": 7.326207160949707, "learning_rate": 9.923590113373688e-06, "loss": 0.3626, "step": 1314 }, { "epoch": 0.06523141028820874, "grad_norm": 6.334736347198486, "learning_rate": 9.923452975618958e-06, "loss": 0.3785, "step": 1315 }, { "epoch": 0.0652810159234089, "grad_norm": 10.257704734802246, "learning_rate": 9.923315715858838e-06, "loss": 0.3157, "step": 1316 }, { "epoch": 0.06533062155860905, "grad_norm": 8.394437789916992, "learning_rate": 9.923178334096727e-06, "loss": 0.3854, "step": 1317 }, { "epoch": 0.06538022719380922, "grad_norm": 10.594633102416992, "learning_rate": 9.923040830336028e-06, "loss": 0.4027, "step": 1318 }, { "epoch": 0.06542983282900938, "grad_norm": 13.82763385772705, "learning_rate": 9.92290320458015e-06, "loss": 0.3977, "step": 1319 }, { "epoch": 0.06547943846420953, "grad_norm": 8.829797744750977, "learning_rate": 9.922765456832506e-06, "loss": 0.4178, "step": 1320 }, { "epoch": 0.0655290440994097, "grad_norm": 7.782757759094238, "learning_rate": 9.922627587096506e-06, "loss": 0.2691, "step": 1321 }, { "epoch": 0.06557864973460985, "grad_norm": 6.792586326599121, "learning_rate": 9.922489595375566e-06, "loss": 0.3468, "step": 1322 }, { "epoch": 0.06562825536981001, "grad_norm": 9.03740119934082, "learning_rate": 9.92235148167311e-06, "loss": 0.3316, "step": 1323 }, { "epoch": 0.06567786100501016, "grad_norm": 6.4285759925842285, "learning_rate": 9.922213245992556e-06, "loss": 0.216, "step": 1324 }, { "epoch": 0.06572746664021033, "grad_norm": 11.175827980041504, "learning_rate": 9.922074888337332e-06, "loss": 0.4074, "step": 1325 }, { "epoch": 0.06577707227541049, "grad_norm": 12.671391487121582, "learning_rate": 9.921936408710865e-06, "loss": 0.5168, "step": 1326 }, { "epoch": 0.06582667791061064, "grad_norm": 12.832152366638184, "learning_rate": 9.921797807116588e-06, "loss": 0.41, "step": 1327 }, { "epoch": 0.06587628354581081, "grad_norm": 11.892497062683105, "learning_rate": 9.921659083557935e-06, "loss": 0.5399, "step": 1328 }, { "epoch": 0.06592588918101096, "grad_norm": 15.506855010986328, "learning_rate": 9.921520238038343e-06, "loss": 0.5497, "step": 1329 }, { "epoch": 0.06597549481621112, "grad_norm": 8.796740531921387, "learning_rate": 9.921381270561254e-06, "loss": 0.3933, "step": 1330 }, { "epoch": 0.06602510045141129, "grad_norm": 11.848779678344727, "learning_rate": 9.921242181130109e-06, "loss": 0.4355, "step": 1331 }, { "epoch": 0.06607470608661144, "grad_norm": 5.497931003570557, "learning_rate": 9.921102969748358e-06, "loss": 0.4349, "step": 1332 }, { "epoch": 0.0661243117218116, "grad_norm": 6.945119857788086, "learning_rate": 9.92096363641945e-06, "loss": 0.3469, "step": 1333 }, { "epoch": 0.06617391735701175, "grad_norm": 12.632014274597168, "learning_rate": 9.920824181146838e-06, "loss": 0.3289, "step": 1334 }, { "epoch": 0.06622352299221192, "grad_norm": 12.199407577514648, "learning_rate": 9.920684603933974e-06, "loss": 0.4706, "step": 1335 }, { "epoch": 0.06627312862741207, "grad_norm": 7.719212532043457, "learning_rate": 9.920544904784322e-06, "loss": 0.2452, "step": 1336 }, { "epoch": 0.06632273426261223, "grad_norm": 5.276161193847656, "learning_rate": 9.92040508370134e-06, "loss": 0.2032, "step": 1337 }, { "epoch": 0.0663723398978124, "grad_norm": 18.225967407226562, "learning_rate": 9.920265140688495e-06, "loss": 0.386, "step": 1338 }, { "epoch": 0.06642194553301255, "grad_norm": 12.545693397521973, "learning_rate": 9.920125075749254e-06, "loss": 0.4047, "step": 1339 }, { "epoch": 0.06647155116821271, "grad_norm": 7.031589031219482, "learning_rate": 9.919984888887088e-06, "loss": 0.2452, "step": 1340 }, { "epoch": 0.06652115680341286, "grad_norm": 18.834762573242188, "learning_rate": 9.91984458010547e-06, "loss": 0.3287, "step": 1341 }, { "epoch": 0.06657076243861303, "grad_norm": 9.868291854858398, "learning_rate": 9.919704149407877e-06, "loss": 0.4211, "step": 1342 }, { "epoch": 0.06662036807381319, "grad_norm": 6.404216289520264, "learning_rate": 9.919563596797793e-06, "loss": 0.2689, "step": 1343 }, { "epoch": 0.06666997370901334, "grad_norm": 8.095407485961914, "learning_rate": 9.919422922278696e-06, "loss": 0.3299, "step": 1344 }, { "epoch": 0.0667195793442135, "grad_norm": 11.915669441223145, "learning_rate": 9.919282125854076e-06, "loss": 0.3878, "step": 1345 }, { "epoch": 0.06676918497941366, "grad_norm": 10.64017105102539, "learning_rate": 9.919141207527416e-06, "loss": 0.3529, "step": 1346 }, { "epoch": 0.06681879061461382, "grad_norm": 8.49834156036377, "learning_rate": 9.919000167302213e-06, "loss": 0.4345, "step": 1347 }, { "epoch": 0.06686839624981399, "grad_norm": 15.478219985961914, "learning_rate": 9.918859005181962e-06, "loss": 0.5154, "step": 1348 }, { "epoch": 0.06691800188501414, "grad_norm": 12.360530853271484, "learning_rate": 9.918717721170159e-06, "loss": 0.4807, "step": 1349 }, { "epoch": 0.0669676075202143, "grad_norm": 7.602137088775635, "learning_rate": 9.918576315270306e-06, "loss": 0.4361, "step": 1350 }, { "epoch": 0.06701721315541445, "grad_norm": 19.059921264648438, "learning_rate": 9.918434787485904e-06, "loss": 0.5631, "step": 1351 }, { "epoch": 0.06706681879061462, "grad_norm": 10.450667381286621, "learning_rate": 9.918293137820467e-06, "loss": 0.3644, "step": 1352 }, { "epoch": 0.06711642442581477, "grad_norm": 12.681530952453613, "learning_rate": 9.9181513662775e-06, "loss": 0.3387, "step": 1353 }, { "epoch": 0.06716603006101493, "grad_norm": 11.846782684326172, "learning_rate": 9.918009472860517e-06, "loss": 0.2961, "step": 1354 }, { "epoch": 0.0672156356962151, "grad_norm": 10.816903114318848, "learning_rate": 9.917867457573036e-06, "loss": 0.4777, "step": 1355 }, { "epoch": 0.06726524133141525, "grad_norm": 8.092854499816895, "learning_rate": 9.917725320418574e-06, "loss": 0.3863, "step": 1356 }, { "epoch": 0.06731484696661541, "grad_norm": 6.04462194442749, "learning_rate": 9.917583061400653e-06, "loss": 0.324, "step": 1357 }, { "epoch": 0.06736445260181556, "grad_norm": 7.588403224945068, "learning_rate": 9.917440680522801e-06, "loss": 0.3463, "step": 1358 }, { "epoch": 0.06741405823701573, "grad_norm": 10.617532730102539, "learning_rate": 9.917298177788543e-06, "loss": 0.3519, "step": 1359 }, { "epoch": 0.06746366387221589, "grad_norm": 10.028850555419922, "learning_rate": 9.917155553201411e-06, "loss": 0.4291, "step": 1360 }, { "epoch": 0.06751326950741604, "grad_norm": 11.424459457397461, "learning_rate": 9.917012806764943e-06, "loss": 0.4464, "step": 1361 }, { "epoch": 0.0675628751426162, "grad_norm": 7.169731616973877, "learning_rate": 9.916869938482671e-06, "loss": 0.3183, "step": 1362 }, { "epoch": 0.06761248077781636, "grad_norm": 13.311320304870605, "learning_rate": 9.91672694835814e-06, "loss": 0.3671, "step": 1363 }, { "epoch": 0.06766208641301652, "grad_norm": 10.217534065246582, "learning_rate": 9.91658383639489e-06, "loss": 0.4093, "step": 1364 }, { "epoch": 0.06771169204821667, "grad_norm": 5.86232852935791, "learning_rate": 9.916440602596469e-06, "loss": 0.3268, "step": 1365 }, { "epoch": 0.06776129768341684, "grad_norm": 6.7693705558776855, "learning_rate": 9.916297246966425e-06, "loss": 0.2805, "step": 1366 }, { "epoch": 0.067810903318617, "grad_norm": 6.651901721954346, "learning_rate": 9.916153769508312e-06, "loss": 0.2912, "step": 1367 }, { "epoch": 0.06786050895381715, "grad_norm": 10.402472496032715, "learning_rate": 9.916010170225686e-06, "loss": 0.4124, "step": 1368 }, { "epoch": 0.06791011458901731, "grad_norm": 8.760671615600586, "learning_rate": 9.915866449122101e-06, "loss": 0.3064, "step": 1369 }, { "epoch": 0.06795972022421747, "grad_norm": 9.521327018737793, "learning_rate": 9.915722606201124e-06, "loss": 0.4235, "step": 1370 }, { "epoch": 0.06800932585941763, "grad_norm": 8.75162410736084, "learning_rate": 9.915578641466317e-06, "loss": 0.3079, "step": 1371 }, { "epoch": 0.0680589314946178, "grad_norm": 7.008697986602783, "learning_rate": 9.915434554921248e-06, "loss": 0.3731, "step": 1372 }, { "epoch": 0.06810853712981794, "grad_norm": 11.634955406188965, "learning_rate": 9.915290346569486e-06, "loss": 0.408, "step": 1373 }, { "epoch": 0.06815814276501811, "grad_norm": 4.375974655151367, "learning_rate": 9.915146016414606e-06, "loss": 0.2748, "step": 1374 }, { "epoch": 0.06820774840021826, "grad_norm": 8.661351203918457, "learning_rate": 9.915001564460185e-06, "loss": 0.45, "step": 1375 }, { "epoch": 0.06825735403541842, "grad_norm": 6.374176502227783, "learning_rate": 9.914856990709802e-06, "loss": 0.283, "step": 1376 }, { "epoch": 0.06830695967061859, "grad_norm": 13.575498580932617, "learning_rate": 9.91471229516704e-06, "loss": 0.4169, "step": 1377 }, { "epoch": 0.06835656530581874, "grad_norm": 7.531180381774902, "learning_rate": 9.914567477835482e-06, "loss": 0.3885, "step": 1378 }, { "epoch": 0.0684061709410189, "grad_norm": 15.134598731994629, "learning_rate": 9.914422538718719e-06, "loss": 0.4158, "step": 1379 }, { "epoch": 0.06845577657621905, "grad_norm": 14.755550384521484, "learning_rate": 9.914277477820345e-06, "loss": 0.354, "step": 1380 }, { "epoch": 0.06850538221141922, "grad_norm": 9.284915924072266, "learning_rate": 9.91413229514395e-06, "loss": 0.3694, "step": 1381 }, { "epoch": 0.06855498784661937, "grad_norm": 8.119775772094727, "learning_rate": 9.913986990693135e-06, "loss": 0.3266, "step": 1382 }, { "epoch": 0.06860459348181953, "grad_norm": 7.578646659851074, "learning_rate": 9.9138415644715e-06, "loss": 0.3225, "step": 1383 }, { "epoch": 0.0686541991170197, "grad_norm": 8.469328880310059, "learning_rate": 9.913696016482647e-06, "loss": 0.4337, "step": 1384 }, { "epoch": 0.06870380475221985, "grad_norm": 7.904855728149414, "learning_rate": 9.913550346730183e-06, "loss": 0.3714, "step": 1385 }, { "epoch": 0.06875341038742001, "grad_norm": 7.896658420562744, "learning_rate": 9.913404555217722e-06, "loss": 0.3948, "step": 1386 }, { "epoch": 0.06880301602262016, "grad_norm": 26.201770782470703, "learning_rate": 9.913258641948872e-06, "loss": 0.3175, "step": 1387 }, { "epoch": 0.06885262165782033, "grad_norm": 7.96417236328125, "learning_rate": 9.913112606927248e-06, "loss": 0.229, "step": 1388 }, { "epoch": 0.0689022272930205, "grad_norm": 6.9763689041137695, "learning_rate": 9.912966450156473e-06, "loss": 0.4468, "step": 1389 }, { "epoch": 0.06895183292822064, "grad_norm": 7.621404647827148, "learning_rate": 9.91282017164017e-06, "loss": 0.3514, "step": 1390 }, { "epoch": 0.06900143856342081, "grad_norm": 9.312821388244629, "learning_rate": 9.912673771381957e-06, "loss": 0.3341, "step": 1391 }, { "epoch": 0.06905104419862096, "grad_norm": 5.711660385131836, "learning_rate": 9.912527249385466e-06, "loss": 0.3078, "step": 1392 }, { "epoch": 0.06910064983382112, "grad_norm": 8.9847412109375, "learning_rate": 9.912380605654328e-06, "loss": 0.2832, "step": 1393 }, { "epoch": 0.06915025546902127, "grad_norm": 8.63967227935791, "learning_rate": 9.912233840192179e-06, "loss": 0.3435, "step": 1394 }, { "epoch": 0.06919986110422144, "grad_norm": 8.995624542236328, "learning_rate": 9.91208695300265e-06, "loss": 0.3432, "step": 1395 }, { "epoch": 0.0692494667394216, "grad_norm": 22.34879493713379, "learning_rate": 9.911939944089387e-06, "loss": 0.386, "step": 1396 }, { "epoch": 0.06929907237462175, "grad_norm": 13.72707748413086, "learning_rate": 9.911792813456026e-06, "loss": 0.2808, "step": 1397 }, { "epoch": 0.06934867800982192, "grad_norm": 12.809539794921875, "learning_rate": 9.911645561106222e-06, "loss": 0.4016, "step": 1398 }, { "epoch": 0.06939828364502207, "grad_norm": 6.212936878204346, "learning_rate": 9.911498187043617e-06, "loss": 0.3039, "step": 1399 }, { "epoch": 0.06944788928022223, "grad_norm": 18.401012420654297, "learning_rate": 9.911350691271865e-06, "loss": 0.3969, "step": 1400 }, { "epoch": 0.0694974949154224, "grad_norm": 10.159875869750977, "learning_rate": 9.911203073794622e-06, "loss": 0.3487, "step": 1401 }, { "epoch": 0.06954710055062255, "grad_norm": 10.51297664642334, "learning_rate": 9.911055334615545e-06, "loss": 0.3632, "step": 1402 }, { "epoch": 0.06959670618582271, "grad_norm": 8.49167537689209, "learning_rate": 9.910907473738295e-06, "loss": 0.3603, "step": 1403 }, { "epoch": 0.06964631182102286, "grad_norm": 13.546503067016602, "learning_rate": 9.910759491166537e-06, "loss": 0.2968, "step": 1404 }, { "epoch": 0.06969591745622303, "grad_norm": 8.162239074707031, "learning_rate": 9.910611386903937e-06, "loss": 0.3121, "step": 1405 }, { "epoch": 0.06974552309142319, "grad_norm": 7.588868618011475, "learning_rate": 9.910463160954164e-06, "loss": 0.3458, "step": 1406 }, { "epoch": 0.06979512872662334, "grad_norm": 4.8236403465271, "learning_rate": 9.910314813320896e-06, "loss": 0.2892, "step": 1407 }, { "epoch": 0.06984473436182351, "grad_norm": 9.909855842590332, "learning_rate": 9.910166344007804e-06, "loss": 0.3017, "step": 1408 }, { "epoch": 0.06989433999702366, "grad_norm": 6.6382246017456055, "learning_rate": 9.910017753018568e-06, "loss": 0.4042, "step": 1409 }, { "epoch": 0.06994394563222382, "grad_norm": 7.727550029754639, "learning_rate": 9.909869040356871e-06, "loss": 0.2862, "step": 1410 }, { "epoch": 0.06999355126742397, "grad_norm": 7.492676734924316, "learning_rate": 9.9097202060264e-06, "loss": 0.3567, "step": 1411 }, { "epoch": 0.07004315690262414, "grad_norm": 5.657822608947754, "learning_rate": 9.909571250030841e-06, "loss": 0.318, "step": 1412 }, { "epoch": 0.0700927625378243, "grad_norm": 6.645055294036865, "learning_rate": 9.909422172373884e-06, "loss": 0.3005, "step": 1413 }, { "epoch": 0.07014236817302445, "grad_norm": 9.04488468170166, "learning_rate": 9.909272973059226e-06, "loss": 0.4177, "step": 1414 }, { "epoch": 0.07019197380822462, "grad_norm": 6.708448886871338, "learning_rate": 9.909123652090563e-06, "loss": 0.269, "step": 1415 }, { "epoch": 0.07024157944342477, "grad_norm": 4.653655052185059, "learning_rate": 9.908974209471595e-06, "loss": 0.2578, "step": 1416 }, { "epoch": 0.07029118507862493, "grad_norm": 5.715394973754883, "learning_rate": 9.908824645206026e-06, "loss": 0.3038, "step": 1417 }, { "epoch": 0.0703407907138251, "grad_norm": 15.00020694732666, "learning_rate": 9.908674959297562e-06, "loss": 0.3437, "step": 1418 }, { "epoch": 0.07039039634902525, "grad_norm": 9.2960786819458, "learning_rate": 9.90852515174991e-06, "loss": 0.3002, "step": 1419 }, { "epoch": 0.07044000198422541, "grad_norm": 15.623435974121094, "learning_rate": 9.908375222566787e-06, "loss": 0.4146, "step": 1420 }, { "epoch": 0.07048960761942556, "grad_norm": 15.730914115905762, "learning_rate": 9.908225171751905e-06, "loss": 0.532, "step": 1421 }, { "epoch": 0.07053921325462573, "grad_norm": 17.809001922607422, "learning_rate": 9.908074999308981e-06, "loss": 0.5434, "step": 1422 }, { "epoch": 0.07058881888982589, "grad_norm": 13.539445877075195, "learning_rate": 9.90792470524174e-06, "loss": 0.5362, "step": 1423 }, { "epoch": 0.07063842452502604, "grad_norm": 9.528739929199219, "learning_rate": 9.907774289553904e-06, "loss": 0.2463, "step": 1424 }, { "epoch": 0.0706880301602262, "grad_norm": 10.163671493530273, "learning_rate": 9.907623752249203e-06, "loss": 0.364, "step": 1425 }, { "epoch": 0.07073763579542636, "grad_norm": 11.319984436035156, "learning_rate": 9.907473093331363e-06, "loss": 0.3712, "step": 1426 }, { "epoch": 0.07078724143062652, "grad_norm": 17.255756378173828, "learning_rate": 9.907322312804122e-06, "loss": 0.4895, "step": 1427 }, { "epoch": 0.07083684706582667, "grad_norm": 10.039335250854492, "learning_rate": 9.907171410671212e-06, "loss": 0.4381, "step": 1428 }, { "epoch": 0.07088645270102684, "grad_norm": 7.045080184936523, "learning_rate": 9.907020386936377e-06, "loss": 0.3333, "step": 1429 }, { "epoch": 0.070936058336227, "grad_norm": 5.95064640045166, "learning_rate": 9.906869241603356e-06, "loss": 0.3501, "step": 1430 }, { "epoch": 0.07098566397142715, "grad_norm": 6.79366397857666, "learning_rate": 9.906717974675896e-06, "loss": 0.4427, "step": 1431 }, { "epoch": 0.07103526960662732, "grad_norm": 7.164727687835693, "learning_rate": 9.906566586157744e-06, "loss": 0.3683, "step": 1432 }, { "epoch": 0.07108487524182747, "grad_norm": 9.374330520629883, "learning_rate": 9.906415076052654e-06, "loss": 0.4426, "step": 1433 }, { "epoch": 0.07113448087702763, "grad_norm": 5.546106338500977, "learning_rate": 9.906263444364378e-06, "loss": 0.3369, "step": 1434 }, { "epoch": 0.0711840865122278, "grad_norm": 10.342202186584473, "learning_rate": 9.906111691096675e-06, "loss": 0.4857, "step": 1435 }, { "epoch": 0.07123369214742795, "grad_norm": 9.203970909118652, "learning_rate": 9.905959816253304e-06, "loss": 0.2754, "step": 1436 }, { "epoch": 0.07128329778262811, "grad_norm": 11.111735343933105, "learning_rate": 9.90580781983803e-06, "loss": 0.4207, "step": 1437 }, { "epoch": 0.07133290341782826, "grad_norm": 7.8894758224487305, "learning_rate": 9.90565570185462e-06, "loss": 0.3687, "step": 1438 }, { "epoch": 0.07138250905302843, "grad_norm": 16.945571899414062, "learning_rate": 9.905503462306842e-06, "loss": 0.461, "step": 1439 }, { "epoch": 0.07143211468822858, "grad_norm": 12.212980270385742, "learning_rate": 9.90535110119847e-06, "loss": 0.4235, "step": 1440 }, { "epoch": 0.07148172032342874, "grad_norm": 8.681075096130371, "learning_rate": 9.905198618533278e-06, "loss": 0.2874, "step": 1441 }, { "epoch": 0.0715313259586289, "grad_norm": 6.498761177062988, "learning_rate": 9.905046014315045e-06, "loss": 0.259, "step": 1442 }, { "epoch": 0.07158093159382906, "grad_norm": 6.927768707275391, "learning_rate": 9.904893288547552e-06, "loss": 0.3682, "step": 1443 }, { "epoch": 0.07163053722902922, "grad_norm": 10.970015525817871, "learning_rate": 9.904740441234586e-06, "loss": 0.4685, "step": 1444 }, { "epoch": 0.07168014286422937, "grad_norm": 5.9584245681762695, "learning_rate": 9.904587472379934e-06, "loss": 0.284, "step": 1445 }, { "epoch": 0.07172974849942954, "grad_norm": 13.262645721435547, "learning_rate": 9.904434381987384e-06, "loss": 0.3488, "step": 1446 }, { "epoch": 0.0717793541346297, "grad_norm": 7.444366931915283, "learning_rate": 9.904281170060733e-06, "loss": 0.4107, "step": 1447 }, { "epoch": 0.07182895976982985, "grad_norm": 9.461404800415039, "learning_rate": 9.904127836603776e-06, "loss": 0.3403, "step": 1448 }, { "epoch": 0.07187856540503001, "grad_norm": 3.4252543449401855, "learning_rate": 9.903974381620312e-06, "loss": 0.1889, "step": 1449 }, { "epoch": 0.07192817104023017, "grad_norm": 8.342308044433594, "learning_rate": 9.903820805114145e-06, "loss": 0.4562, "step": 1450 }, { "epoch": 0.07197777667543033, "grad_norm": 13.038439750671387, "learning_rate": 9.90366710708908e-06, "loss": 0.3985, "step": 1451 }, { "epoch": 0.0720273823106305, "grad_norm": 5.914816379547119, "learning_rate": 9.903513287548927e-06, "loss": 0.3101, "step": 1452 }, { "epoch": 0.07207698794583065, "grad_norm": 13.787504196166992, "learning_rate": 9.903359346497496e-06, "loss": 0.5308, "step": 1453 }, { "epoch": 0.07212659358103081, "grad_norm": 14.838393211364746, "learning_rate": 9.903205283938602e-06, "loss": 0.3609, "step": 1454 }, { "epoch": 0.07217619921623096, "grad_norm": 9.99278450012207, "learning_rate": 9.903051099876061e-06, "loss": 0.384, "step": 1455 }, { "epoch": 0.07222580485143112, "grad_norm": 22.333633422851562, "learning_rate": 9.9028967943137e-06, "loss": 0.5665, "step": 1456 }, { "epoch": 0.07227541048663128, "grad_norm": 5.840421199798584, "learning_rate": 9.902742367255337e-06, "loss": 0.428, "step": 1457 }, { "epoch": 0.07232501612183144, "grad_norm": 9.742481231689453, "learning_rate": 9.9025878187048e-06, "loss": 0.3591, "step": 1458 }, { "epoch": 0.0723746217570316, "grad_norm": 17.712343215942383, "learning_rate": 9.90243314866592e-06, "loss": 0.3672, "step": 1459 }, { "epoch": 0.07242422739223175, "grad_norm": 13.339099884033203, "learning_rate": 9.902278357142528e-06, "loss": 0.4372, "step": 1460 }, { "epoch": 0.07247383302743192, "grad_norm": 11.875268936157227, "learning_rate": 9.90212344413846e-06, "loss": 0.3302, "step": 1461 }, { "epoch": 0.07252343866263207, "grad_norm": 6.725773811340332, "learning_rate": 9.901968409657556e-06, "loss": 0.2415, "step": 1462 }, { "epoch": 0.07257304429783223, "grad_norm": 9.347891807556152, "learning_rate": 9.901813253703659e-06, "loss": 0.2868, "step": 1463 }, { "epoch": 0.0726226499330324, "grad_norm": 10.418804168701172, "learning_rate": 9.90165797628061e-06, "loss": 0.4648, "step": 1464 }, { "epoch": 0.07267225556823255, "grad_norm": 7.55576753616333, "learning_rate": 9.90150257739226e-06, "loss": 0.322, "step": 1465 }, { "epoch": 0.07272186120343271, "grad_norm": 7.459690570831299, "learning_rate": 9.901347057042461e-06, "loss": 0.3787, "step": 1466 }, { "epoch": 0.07277146683863286, "grad_norm": 4.591613292694092, "learning_rate": 9.901191415235063e-06, "loss": 0.2575, "step": 1467 }, { "epoch": 0.07282107247383303, "grad_norm": 8.897802352905273, "learning_rate": 9.901035651973924e-06, "loss": 0.4014, "step": 1468 }, { "epoch": 0.07287067810903318, "grad_norm": 9.428009986877441, "learning_rate": 9.900879767262906e-06, "loss": 0.4104, "step": 1469 }, { "epoch": 0.07292028374423334, "grad_norm": 8.66041374206543, "learning_rate": 9.900723761105869e-06, "loss": 0.3575, "step": 1470 }, { "epoch": 0.07296988937943351, "grad_norm": 8.216608047485352, "learning_rate": 9.900567633506681e-06, "loss": 0.3418, "step": 1471 }, { "epoch": 0.07301949501463366, "grad_norm": 12.25301742553711, "learning_rate": 9.900411384469211e-06, "loss": 0.3998, "step": 1472 }, { "epoch": 0.07306910064983382, "grad_norm": 9.357500076293945, "learning_rate": 9.900255013997329e-06, "loss": 0.4146, "step": 1473 }, { "epoch": 0.07311870628503397, "grad_norm": 19.56878089904785, "learning_rate": 9.900098522094912e-06, "loss": 0.2804, "step": 1474 }, { "epoch": 0.07316831192023414, "grad_norm": 8.439906120300293, "learning_rate": 9.899941908765835e-06, "loss": 0.4493, "step": 1475 }, { "epoch": 0.0732179175554343, "grad_norm": 5.6773881912231445, "learning_rate": 9.899785174013983e-06, "loss": 0.2644, "step": 1476 }, { "epoch": 0.07326752319063445, "grad_norm": 9.765853881835938, "learning_rate": 9.899628317843238e-06, "loss": 0.234, "step": 1477 }, { "epoch": 0.07331712882583462, "grad_norm": 11.8174409866333, "learning_rate": 9.899471340257487e-06, "loss": 0.4651, "step": 1478 }, { "epoch": 0.07336673446103477, "grad_norm": 7.78737735748291, "learning_rate": 9.89931424126062e-06, "loss": 0.4425, "step": 1479 }, { "epoch": 0.07341634009623493, "grad_norm": 12.301872253417969, "learning_rate": 9.899157020856529e-06, "loss": 0.3587, "step": 1480 }, { "epoch": 0.0734659457314351, "grad_norm": 7.643492698669434, "learning_rate": 9.898999679049112e-06, "loss": 0.2957, "step": 1481 }, { "epoch": 0.07351555136663525, "grad_norm": 7.573518753051758, "learning_rate": 9.898842215842266e-06, "loss": 0.269, "step": 1482 }, { "epoch": 0.07356515700183541, "grad_norm": 6.994720458984375, "learning_rate": 9.898684631239895e-06, "loss": 0.2402, "step": 1483 }, { "epoch": 0.07361476263703556, "grad_norm": 10.315350532531738, "learning_rate": 9.8985269252459e-06, "loss": 0.4746, "step": 1484 }, { "epoch": 0.07366436827223573, "grad_norm": 10.123888969421387, "learning_rate": 9.898369097864196e-06, "loss": 0.345, "step": 1485 }, { "epoch": 0.07371397390743588, "grad_norm": 13.198616027832031, "learning_rate": 9.898211149098688e-06, "loss": 0.3979, "step": 1486 }, { "epoch": 0.07376357954263604, "grad_norm": 12.903793334960938, "learning_rate": 9.898053078953292e-06, "loss": 0.3947, "step": 1487 }, { "epoch": 0.07381318517783621, "grad_norm": 10.654472351074219, "learning_rate": 9.897894887431925e-06, "loss": 0.4025, "step": 1488 }, { "epoch": 0.07386279081303636, "grad_norm": 4.883676052093506, "learning_rate": 9.897736574538506e-06, "loss": 0.2536, "step": 1489 }, { "epoch": 0.07391239644823652, "grad_norm": 12.896039009094238, "learning_rate": 9.897578140276962e-06, "loss": 0.4313, "step": 1490 }, { "epoch": 0.07396200208343667, "grad_norm": 11.54503345489502, "learning_rate": 9.897419584651214e-06, "loss": 0.4159, "step": 1491 }, { "epoch": 0.07401160771863684, "grad_norm": 9.545669555664062, "learning_rate": 9.897260907665193e-06, "loss": 0.3741, "step": 1492 }, { "epoch": 0.074061213353837, "grad_norm": 7.21199369430542, "learning_rate": 9.897102109322833e-06, "loss": 0.4246, "step": 1493 }, { "epoch": 0.07411081898903715, "grad_norm": 17.615476608276367, "learning_rate": 9.896943189628067e-06, "loss": 0.3274, "step": 1494 }, { "epoch": 0.07416042462423732, "grad_norm": 16.042207717895508, "learning_rate": 9.896784148584835e-06, "loss": 0.4814, "step": 1495 }, { "epoch": 0.07421003025943747, "grad_norm": 11.091893196105957, "learning_rate": 9.896624986197074e-06, "loss": 0.3645, "step": 1496 }, { "epoch": 0.07425963589463763, "grad_norm": 5.51421594619751, "learning_rate": 9.896465702468733e-06, "loss": 0.4087, "step": 1497 }, { "epoch": 0.07430924152983778, "grad_norm": 18.56175994873047, "learning_rate": 9.896306297403758e-06, "loss": 0.5107, "step": 1498 }, { "epoch": 0.07435884716503795, "grad_norm": 14.247145652770996, "learning_rate": 9.896146771006095e-06, "loss": 0.3527, "step": 1499 }, { "epoch": 0.07440845280023811, "grad_norm": 10.00219440460205, "learning_rate": 9.895987123279702e-06, "loss": 0.3266, "step": 1500 }, { "epoch": 0.07445805843543826, "grad_norm": 7.0899248123168945, "learning_rate": 9.895827354228533e-06, "loss": 0.3729, "step": 1501 }, { "epoch": 0.07450766407063843, "grad_norm": 12.85866641998291, "learning_rate": 9.895667463856547e-06, "loss": 0.4353, "step": 1502 }, { "epoch": 0.07455726970583858, "grad_norm": 5.664809226989746, "learning_rate": 9.895507452167708e-06, "loss": 0.2896, "step": 1503 }, { "epoch": 0.07460687534103874, "grad_norm": 6.087647914886475, "learning_rate": 9.895347319165977e-06, "loss": 0.2529, "step": 1504 }, { "epoch": 0.0746564809762389, "grad_norm": 13.945515632629395, "learning_rate": 9.895187064855328e-06, "loss": 0.3959, "step": 1505 }, { "epoch": 0.07470608661143906, "grad_norm": 12.94007396697998, "learning_rate": 9.895026689239729e-06, "loss": 0.4108, "step": 1506 }, { "epoch": 0.07475569224663922, "grad_norm": 10.511324882507324, "learning_rate": 9.894866192323153e-06, "loss": 0.4185, "step": 1507 }, { "epoch": 0.07480529788183937, "grad_norm": 8.45113754272461, "learning_rate": 9.89470557410958e-06, "loss": 0.3935, "step": 1508 }, { "epoch": 0.07485490351703954, "grad_norm": 8.824756622314453, "learning_rate": 9.894544834602988e-06, "loss": 0.3363, "step": 1509 }, { "epoch": 0.0749045091522397, "grad_norm": 9.1610107421875, "learning_rate": 9.89438397380736e-06, "loss": 0.3878, "step": 1510 }, { "epoch": 0.07495411478743985, "grad_norm": 7.95134162902832, "learning_rate": 9.894222991726686e-06, "loss": 0.3073, "step": 1511 }, { "epoch": 0.07500372042264002, "grad_norm": 10.547239303588867, "learning_rate": 9.89406188836495e-06, "loss": 0.4055, "step": 1512 }, { "epoch": 0.07505332605784017, "grad_norm": 6.6956071853637695, "learning_rate": 9.893900663726146e-06, "loss": 0.3429, "step": 1513 }, { "epoch": 0.07510293169304033, "grad_norm": 9.327779769897461, "learning_rate": 9.89373931781427e-06, "loss": 0.3039, "step": 1514 }, { "epoch": 0.07515253732824048, "grad_norm": 6.053716659545898, "learning_rate": 9.893577850633322e-06, "loss": 0.3195, "step": 1515 }, { "epoch": 0.07520214296344065, "grad_norm": 6.196651935577393, "learning_rate": 9.8934162621873e-06, "loss": 0.2661, "step": 1516 }, { "epoch": 0.07525174859864081, "grad_norm": 10.1974515914917, "learning_rate": 9.89325455248021e-06, "loss": 0.4588, "step": 1517 }, { "epoch": 0.07530135423384096, "grad_norm": 9.299643516540527, "learning_rate": 9.893092721516058e-06, "loss": 0.3966, "step": 1518 }, { "epoch": 0.07535095986904113, "grad_norm": 8.846598625183105, "learning_rate": 9.892930769298857e-06, "loss": 0.3859, "step": 1519 }, { "epoch": 0.07540056550424128, "grad_norm": 6.323727607727051, "learning_rate": 9.892768695832615e-06, "loss": 0.3312, "step": 1520 }, { "epoch": 0.07545017113944144, "grad_norm": 9.659489631652832, "learning_rate": 9.892606501121353e-06, "loss": 0.4708, "step": 1521 }, { "epoch": 0.0754997767746416, "grad_norm": 18.449386596679688, "learning_rate": 9.892444185169089e-06, "loss": 0.4191, "step": 1522 }, { "epoch": 0.07554938240984176, "grad_norm": 11.131110191345215, "learning_rate": 9.892281747979845e-06, "loss": 0.3664, "step": 1523 }, { "epoch": 0.07559898804504192, "grad_norm": 9.305981636047363, "learning_rate": 9.892119189557646e-06, "loss": 0.3503, "step": 1524 }, { "epoch": 0.07564859368024207, "grad_norm": 9.210375785827637, "learning_rate": 9.89195650990652e-06, "loss": 0.2811, "step": 1525 }, { "epoch": 0.07569819931544224, "grad_norm": 13.787552833557129, "learning_rate": 9.891793709030499e-06, "loss": 0.5032, "step": 1526 }, { "epoch": 0.07574780495064239, "grad_norm": 12.093886375427246, "learning_rate": 9.891630786933617e-06, "loss": 0.4713, "step": 1527 }, { "epoch": 0.07579741058584255, "grad_norm": 7.652362823486328, "learning_rate": 9.891467743619914e-06, "loss": 0.2851, "step": 1528 }, { "epoch": 0.07584701622104272, "grad_norm": 10.848226547241211, "learning_rate": 9.891304579093425e-06, "loss": 0.3479, "step": 1529 }, { "epoch": 0.07589662185624287, "grad_norm": 15.513663291931152, "learning_rate": 9.891141293358196e-06, "loss": 0.3723, "step": 1530 }, { "epoch": 0.07594622749144303, "grad_norm": 7.385061264038086, "learning_rate": 9.890977886418274e-06, "loss": 0.2628, "step": 1531 }, { "epoch": 0.07599583312664318, "grad_norm": 8.660412788391113, "learning_rate": 9.890814358277707e-06, "loss": 0.3483, "step": 1532 }, { "epoch": 0.07604543876184335, "grad_norm": 6.258254051208496, "learning_rate": 9.890650708940547e-06, "loss": 0.3351, "step": 1533 }, { "epoch": 0.07609504439704351, "grad_norm": 14.879465103149414, "learning_rate": 9.890486938410853e-06, "loss": 0.3864, "step": 1534 }, { "epoch": 0.07614465003224366, "grad_norm": 6.918972492218018, "learning_rate": 9.890323046692678e-06, "loss": 0.2629, "step": 1535 }, { "epoch": 0.07619425566744382, "grad_norm": 12.939184188842773, "learning_rate": 9.890159033790086e-06, "loss": 0.3864, "step": 1536 }, { "epoch": 0.07624386130264398, "grad_norm": 7.818156719207764, "learning_rate": 9.889994899707143e-06, "loss": 0.2176, "step": 1537 }, { "epoch": 0.07629346693784414, "grad_norm": 10.645133972167969, "learning_rate": 9.889830644447913e-06, "loss": 0.3903, "step": 1538 }, { "epoch": 0.0763430725730443, "grad_norm": 9.126253128051758, "learning_rate": 9.889666268016468e-06, "loss": 0.3325, "step": 1539 }, { "epoch": 0.07639267820824445, "grad_norm": 10.491693496704102, "learning_rate": 9.889501770416881e-06, "loss": 0.4084, "step": 1540 }, { "epoch": 0.07644228384344462, "grad_norm": 9.27794361114502, "learning_rate": 9.889337151653226e-06, "loss": 0.4281, "step": 1541 }, { "epoch": 0.07649188947864477, "grad_norm": 6.730120658874512, "learning_rate": 9.889172411729589e-06, "loss": 0.3352, "step": 1542 }, { "epoch": 0.07654149511384493, "grad_norm": 7.0221757888793945, "learning_rate": 9.889007550650045e-06, "loss": 0.3502, "step": 1543 }, { "epoch": 0.07659110074904509, "grad_norm": 7.425034046173096, "learning_rate": 9.888842568418684e-06, "loss": 0.2635, "step": 1544 }, { "epoch": 0.07664070638424525, "grad_norm": 16.75295066833496, "learning_rate": 9.888677465039592e-06, "loss": 0.4992, "step": 1545 }, { "epoch": 0.07669031201944541, "grad_norm": 7.586045742034912, "learning_rate": 9.888512240516858e-06, "loss": 0.3367, "step": 1546 }, { "epoch": 0.07673991765464556, "grad_norm": 6.637373447418213, "learning_rate": 9.888346894854583e-06, "loss": 0.2358, "step": 1547 }, { "epoch": 0.07678952328984573, "grad_norm": 9.584066390991211, "learning_rate": 9.88818142805686e-06, "loss": 0.3754, "step": 1548 }, { "epoch": 0.07683912892504588, "grad_norm": 10.688159942626953, "learning_rate": 9.888015840127791e-06, "loss": 0.3589, "step": 1549 }, { "epoch": 0.07688873456024604, "grad_norm": 6.612847805023193, "learning_rate": 9.887850131071476e-06, "loss": 0.3545, "step": 1550 }, { "epoch": 0.07693834019544621, "grad_norm": 6.171592712402344, "learning_rate": 9.887684300892027e-06, "loss": 0.3184, "step": 1551 }, { "epoch": 0.07698794583064636, "grad_norm": 8.57545280456543, "learning_rate": 9.887518349593547e-06, "loss": 0.3255, "step": 1552 }, { "epoch": 0.07703755146584652, "grad_norm": 6.8267011642456055, "learning_rate": 9.887352277180153e-06, "loss": 0.3114, "step": 1553 }, { "epoch": 0.07708715710104667, "grad_norm": 7.4154582023620605, "learning_rate": 9.887186083655958e-06, "loss": 0.3321, "step": 1554 }, { "epoch": 0.07713676273624684, "grad_norm": 11.149864196777344, "learning_rate": 9.887019769025081e-06, "loss": 0.3473, "step": 1555 }, { "epoch": 0.07718636837144699, "grad_norm": 14.982035636901855, "learning_rate": 9.886853333291645e-06, "loss": 0.4675, "step": 1556 }, { "epoch": 0.07723597400664715, "grad_norm": 7.713184833526611, "learning_rate": 9.886686776459771e-06, "loss": 0.2354, "step": 1557 }, { "epoch": 0.07728557964184732, "grad_norm": 6.86961555480957, "learning_rate": 9.88652009853359e-06, "loss": 0.2344, "step": 1558 }, { "epoch": 0.07733518527704747, "grad_norm": 5.457010746002197, "learning_rate": 9.886353299517229e-06, "loss": 0.3124, "step": 1559 }, { "epoch": 0.07738479091224763, "grad_norm": 8.049211502075195, "learning_rate": 9.886186379414824e-06, "loss": 0.3934, "step": 1560 }, { "epoch": 0.07743439654744778, "grad_norm": 8.874573707580566, "learning_rate": 9.88601933823051e-06, "loss": 0.3664, "step": 1561 }, { "epoch": 0.07748400218264795, "grad_norm": 8.901989936828613, "learning_rate": 9.885852175968427e-06, "loss": 0.3306, "step": 1562 }, { "epoch": 0.07753360781784811, "grad_norm": 6.2260026931762695, "learning_rate": 9.885684892632716e-06, "loss": 0.3761, "step": 1563 }, { "epoch": 0.07758321345304826, "grad_norm": 8.313116073608398, "learning_rate": 9.885517488227523e-06, "loss": 0.3663, "step": 1564 }, { "epoch": 0.07763281908824843, "grad_norm": 10.09374713897705, "learning_rate": 9.885349962756998e-06, "loss": 0.4757, "step": 1565 }, { "epoch": 0.07768242472344858, "grad_norm": 5.703207492828369, "learning_rate": 9.885182316225291e-06, "loss": 0.2736, "step": 1566 }, { "epoch": 0.07773203035864874, "grad_norm": 6.386361598968506, "learning_rate": 9.885014548636556e-06, "loss": 0.2789, "step": 1567 }, { "epoch": 0.07778163599384891, "grad_norm": 9.486433029174805, "learning_rate": 9.884846659994948e-06, "loss": 0.3678, "step": 1568 }, { "epoch": 0.07783124162904906, "grad_norm": 7.908048629760742, "learning_rate": 9.884678650304634e-06, "loss": 0.4211, "step": 1569 }, { "epoch": 0.07788084726424922, "grad_norm": 6.88662052154541, "learning_rate": 9.884510519569772e-06, "loss": 0.2686, "step": 1570 }, { "epoch": 0.07793045289944937, "grad_norm": 6.2434797286987305, "learning_rate": 9.884342267794531e-06, "loss": 0.3427, "step": 1571 }, { "epoch": 0.07798005853464954, "grad_norm": 5.928280830383301, "learning_rate": 9.884173894983078e-06, "loss": 0.3216, "step": 1572 }, { "epoch": 0.07802966416984969, "grad_norm": 11.751882553100586, "learning_rate": 9.884005401139587e-06, "loss": 0.4012, "step": 1573 }, { "epoch": 0.07807926980504985, "grad_norm": 8.915448188781738, "learning_rate": 9.883836786268232e-06, "loss": 0.3715, "step": 1574 }, { "epoch": 0.07812887544025002, "grad_norm": 9.343057632446289, "learning_rate": 9.883668050373192e-06, "loss": 0.4407, "step": 1575 }, { "epoch": 0.07817848107545017, "grad_norm": 16.51947593688965, "learning_rate": 9.883499193458649e-06, "loss": 0.5203, "step": 1576 }, { "epoch": 0.07822808671065033, "grad_norm": 8.198932647705078, "learning_rate": 9.883330215528788e-06, "loss": 0.3453, "step": 1577 }, { "epoch": 0.07827769234585048, "grad_norm": 9.670140266418457, "learning_rate": 9.883161116587796e-06, "loss": 0.2995, "step": 1578 }, { "epoch": 0.07832729798105065, "grad_norm": 7.424777030944824, "learning_rate": 9.88299189663986e-06, "loss": 0.3325, "step": 1579 }, { "epoch": 0.07837690361625081, "grad_norm": 17.041748046875, "learning_rate": 9.882822555689178e-06, "loss": 0.4032, "step": 1580 }, { "epoch": 0.07842650925145096, "grad_norm": 6.103675365447998, "learning_rate": 9.882653093739943e-06, "loss": 0.2181, "step": 1581 }, { "epoch": 0.07847611488665113, "grad_norm": 14.001985549926758, "learning_rate": 9.882483510796355e-06, "loss": 0.4708, "step": 1582 }, { "epoch": 0.07852572052185128, "grad_norm": 6.731266498565674, "learning_rate": 9.882313806862619e-06, "loss": 0.3022, "step": 1583 }, { "epoch": 0.07857532615705144, "grad_norm": 10.085765838623047, "learning_rate": 9.882143981942938e-06, "loss": 0.3136, "step": 1584 }, { "epoch": 0.0786249317922516, "grad_norm": 17.175962448120117, "learning_rate": 9.88197403604152e-06, "loss": 0.5301, "step": 1585 }, { "epoch": 0.07867453742745176, "grad_norm": 5.55536413192749, "learning_rate": 9.881803969162578e-06, "loss": 0.3173, "step": 1586 }, { "epoch": 0.07872414306265192, "grad_norm": 5.865825653076172, "learning_rate": 9.881633781310325e-06, "loss": 0.3506, "step": 1587 }, { "epoch": 0.07877374869785207, "grad_norm": 8.638075828552246, "learning_rate": 9.881463472488977e-06, "loss": 0.3158, "step": 1588 }, { "epoch": 0.07882335433305224, "grad_norm": 10.886585235595703, "learning_rate": 9.881293042702758e-06, "loss": 0.3955, "step": 1589 }, { "epoch": 0.07887295996825239, "grad_norm": 6.513120174407959, "learning_rate": 9.881122491955888e-06, "loss": 0.4012, "step": 1590 }, { "epoch": 0.07892256560345255, "grad_norm": 5.715476989746094, "learning_rate": 9.880951820252596e-06, "loss": 0.331, "step": 1591 }, { "epoch": 0.07897217123865272, "grad_norm": 7.294380187988281, "learning_rate": 9.88078102759711e-06, "loss": 0.3843, "step": 1592 }, { "epoch": 0.07902177687385287, "grad_norm": 8.26030158996582, "learning_rate": 9.88061011399366e-06, "loss": 0.3235, "step": 1593 }, { "epoch": 0.07907138250905303, "grad_norm": 7.551604270935059, "learning_rate": 9.880439079446487e-06, "loss": 0.2693, "step": 1594 }, { "epoch": 0.07912098814425318, "grad_norm": 7.394933700561523, "learning_rate": 9.880267923959823e-06, "loss": 0.3749, "step": 1595 }, { "epoch": 0.07917059377945335, "grad_norm": 11.5780029296875, "learning_rate": 9.880096647537914e-06, "loss": 0.3926, "step": 1596 }, { "epoch": 0.07922019941465351, "grad_norm": 7.502588272094727, "learning_rate": 9.879925250185003e-06, "loss": 0.4386, "step": 1597 }, { "epoch": 0.07926980504985366, "grad_norm": 8.85771656036377, "learning_rate": 9.879753731905335e-06, "loss": 0.4391, "step": 1598 }, { "epoch": 0.07931941068505383, "grad_norm": 12.562503814697266, "learning_rate": 9.879582092703164e-06, "loss": 0.3219, "step": 1599 }, { "epoch": 0.07936901632025398, "grad_norm": 6.9978179931640625, "learning_rate": 9.87941033258274e-06, "loss": 0.233, "step": 1600 }, { "epoch": 0.07941862195545414, "grad_norm": 7.5410637855529785, "learning_rate": 9.879238451548322e-06, "loss": 0.2484, "step": 1601 }, { "epoch": 0.07946822759065429, "grad_norm": 7.248733043670654, "learning_rate": 9.879066449604167e-06, "loss": 0.3175, "step": 1602 }, { "epoch": 0.07951783322585446, "grad_norm": 9.929132461547852, "learning_rate": 9.878894326754538e-06, "loss": 0.329, "step": 1603 }, { "epoch": 0.07956743886105462, "grad_norm": 4.765555381774902, "learning_rate": 9.878722083003702e-06, "loss": 0.2861, "step": 1604 }, { "epoch": 0.07961704449625477, "grad_norm": 9.188660621643066, "learning_rate": 9.878549718355925e-06, "loss": 0.4624, "step": 1605 }, { "epoch": 0.07966665013145494, "grad_norm": 7.728784084320068, "learning_rate": 9.87837723281548e-06, "loss": 0.3883, "step": 1606 }, { "epoch": 0.07971625576665509, "grad_norm": 12.011131286621094, "learning_rate": 9.87820462638664e-06, "loss": 0.3601, "step": 1607 }, { "epoch": 0.07976586140185525, "grad_norm": 14.348953247070312, "learning_rate": 9.878031899073683e-06, "loss": 0.4484, "step": 1608 }, { "epoch": 0.07981546703705542, "grad_norm": 6.509224891662598, "learning_rate": 9.87785905088089e-06, "loss": 0.3881, "step": 1609 }, { "epoch": 0.07986507267225557, "grad_norm": 7.779508590698242, "learning_rate": 9.877686081812541e-06, "loss": 0.2863, "step": 1610 }, { "epoch": 0.07991467830745573, "grad_norm": 5.3476786613464355, "learning_rate": 9.877512991872924e-06, "loss": 0.2717, "step": 1611 }, { "epoch": 0.07996428394265588, "grad_norm": 6.682093143463135, "learning_rate": 9.87733978106633e-06, "loss": 0.3112, "step": 1612 }, { "epoch": 0.08001388957785605, "grad_norm": 6.583839416503906, "learning_rate": 9.877166449397052e-06, "loss": 0.3597, "step": 1613 }, { "epoch": 0.08006349521305621, "grad_norm": 5.786703109741211, "learning_rate": 9.87699299686938e-06, "loss": 0.4265, "step": 1614 }, { "epoch": 0.08011310084825636, "grad_norm": 7.8201212882995605, "learning_rate": 9.876819423487616e-06, "loss": 0.2648, "step": 1615 }, { "epoch": 0.08016270648345653, "grad_norm": 8.935152053833008, "learning_rate": 9.876645729256061e-06, "loss": 0.4404, "step": 1616 }, { "epoch": 0.08021231211865668, "grad_norm": 7.895463943481445, "learning_rate": 9.87647191417902e-06, "loss": 0.1966, "step": 1617 }, { "epoch": 0.08026191775385684, "grad_norm": 5.740810394287109, "learning_rate": 9.876297978260798e-06, "loss": 0.4047, "step": 1618 }, { "epoch": 0.08031152338905699, "grad_norm": 14.17640209197998, "learning_rate": 9.876123921505707e-06, "loss": 0.4793, "step": 1619 }, { "epoch": 0.08036112902425716, "grad_norm": 5.934042930603027, "learning_rate": 9.875949743918059e-06, "loss": 0.3208, "step": 1620 }, { "epoch": 0.08041073465945732, "grad_norm": 17.652633666992188, "learning_rate": 9.87577544550217e-06, "loss": 0.4997, "step": 1621 }, { "epoch": 0.08046034029465747, "grad_norm": 11.341680526733398, "learning_rate": 9.875601026262362e-06, "loss": 0.2861, "step": 1622 }, { "epoch": 0.08050994592985763, "grad_norm": 9.614791870117188, "learning_rate": 9.875426486202953e-06, "loss": 0.3422, "step": 1623 }, { "epoch": 0.08055955156505779, "grad_norm": 8.593183517456055, "learning_rate": 9.875251825328271e-06, "loss": 0.3895, "step": 1624 }, { "epoch": 0.08060915720025795, "grad_norm": 7.727685928344727, "learning_rate": 9.875077043642644e-06, "loss": 0.3548, "step": 1625 }, { "epoch": 0.08065876283545811, "grad_norm": 6.218430519104004, "learning_rate": 9.874902141150402e-06, "loss": 0.2602, "step": 1626 }, { "epoch": 0.08070836847065826, "grad_norm": 11.5658540725708, "learning_rate": 9.87472711785588e-06, "loss": 0.3779, "step": 1627 }, { "epoch": 0.08075797410585843, "grad_norm": 17.265674591064453, "learning_rate": 9.874551973763414e-06, "loss": 0.4796, "step": 1628 }, { "epoch": 0.08080757974105858, "grad_norm": 13.251201629638672, "learning_rate": 9.874376708877346e-06, "loss": 0.4151, "step": 1629 }, { "epoch": 0.08085718537625874, "grad_norm": 8.207274436950684, "learning_rate": 9.87420132320202e-06, "loss": 0.3443, "step": 1630 }, { "epoch": 0.0809067910114589, "grad_norm": 8.241366386413574, "learning_rate": 9.874025816741778e-06, "loss": 0.3556, "step": 1631 }, { "epoch": 0.08095639664665906, "grad_norm": 9.68425178527832, "learning_rate": 9.873850189500971e-06, "loss": 0.3835, "step": 1632 }, { "epoch": 0.08100600228185922, "grad_norm": 5.2658209800720215, "learning_rate": 9.873674441483954e-06, "loss": 0.3955, "step": 1633 }, { "epoch": 0.08105560791705937, "grad_norm": 6.598599433898926, "learning_rate": 9.873498572695077e-06, "loss": 0.3122, "step": 1634 }, { "epoch": 0.08110521355225954, "grad_norm": 10.244430541992188, "learning_rate": 9.873322583138702e-06, "loss": 0.3176, "step": 1635 }, { "epoch": 0.08115481918745969, "grad_norm": 8.693181037902832, "learning_rate": 9.873146472819191e-06, "loss": 0.3177, "step": 1636 }, { "epoch": 0.08120442482265985, "grad_norm": 5.011577129364014, "learning_rate": 9.872970241740904e-06, "loss": 0.3525, "step": 1637 }, { "epoch": 0.08125403045786002, "grad_norm": 7.820796489715576, "learning_rate": 9.87279388990821e-06, "loss": 0.3678, "step": 1638 }, { "epoch": 0.08130363609306017, "grad_norm": 9.983012199401855, "learning_rate": 9.872617417325478e-06, "loss": 0.4738, "step": 1639 }, { "epoch": 0.08135324172826033, "grad_norm": 18.674907684326172, "learning_rate": 9.872440823997085e-06, "loss": 0.5485, "step": 1640 }, { "epoch": 0.08140284736346048, "grad_norm": 12.595200538635254, "learning_rate": 9.872264109927402e-06, "loss": 0.4061, "step": 1641 }, { "epoch": 0.08145245299866065, "grad_norm": 6.314891815185547, "learning_rate": 9.872087275120814e-06, "loss": 0.3461, "step": 1642 }, { "epoch": 0.08150205863386081, "grad_norm": 7.488066673278809, "learning_rate": 9.871910319581696e-06, "loss": 0.4064, "step": 1643 }, { "epoch": 0.08155166426906096, "grad_norm": 10.52859115600586, "learning_rate": 9.871733243314437e-06, "loss": 0.3967, "step": 1644 }, { "epoch": 0.08160126990426113, "grad_norm": 13.152764320373535, "learning_rate": 9.871556046323426e-06, "loss": 0.454, "step": 1645 }, { "epoch": 0.08165087553946128, "grad_norm": 6.388102054595947, "learning_rate": 9.87137872861305e-06, "loss": 0.3336, "step": 1646 }, { "epoch": 0.08170048117466144, "grad_norm": 6.308408260345459, "learning_rate": 9.871201290187708e-06, "loss": 0.3605, "step": 1647 }, { "epoch": 0.0817500868098616, "grad_norm": 9.72773265838623, "learning_rate": 9.871023731051794e-06, "loss": 0.424, "step": 1648 }, { "epoch": 0.08179969244506176, "grad_norm": 9.96261215209961, "learning_rate": 9.870846051209707e-06, "loss": 0.3006, "step": 1649 }, { "epoch": 0.08184929808026192, "grad_norm": 6.244215488433838, "learning_rate": 9.870668250665854e-06, "loss": 0.3809, "step": 1650 }, { "epoch": 0.08189890371546207, "grad_norm": 9.471731185913086, "learning_rate": 9.870490329424636e-06, "loss": 0.3705, "step": 1651 }, { "epoch": 0.08194850935066224, "grad_norm": 6.297699451446533, "learning_rate": 9.870312287490467e-06, "loss": 0.3414, "step": 1652 }, { "epoch": 0.08199811498586239, "grad_norm": 6.173556327819824, "learning_rate": 9.870134124867754e-06, "loss": 0.2691, "step": 1653 }, { "epoch": 0.08204772062106255, "grad_norm": 5.341738700866699, "learning_rate": 9.869955841560917e-06, "loss": 0.2726, "step": 1654 }, { "epoch": 0.08209732625626272, "grad_norm": 7.998388767242432, "learning_rate": 9.869777437574368e-06, "loss": 0.3919, "step": 1655 }, { "epoch": 0.08214693189146287, "grad_norm": 8.902397155761719, "learning_rate": 9.869598912912533e-06, "loss": 0.3912, "step": 1656 }, { "epoch": 0.08219653752666303, "grad_norm": 12.910225868225098, "learning_rate": 9.869420267579835e-06, "loss": 0.3146, "step": 1657 }, { "epoch": 0.08224614316186318, "grad_norm": 8.661904335021973, "learning_rate": 9.8692415015807e-06, "loss": 0.4165, "step": 1658 }, { "epoch": 0.08229574879706335, "grad_norm": 6.892573833465576, "learning_rate": 9.869062614919556e-06, "loss": 0.3012, "step": 1659 }, { "epoch": 0.0823453544322635, "grad_norm": 3.847287178039551, "learning_rate": 9.86888360760084e-06, "loss": 0.2568, "step": 1660 }, { "epoch": 0.08239496006746366, "grad_norm": 6.333434104919434, "learning_rate": 9.868704479628986e-06, "loss": 0.4062, "step": 1661 }, { "epoch": 0.08244456570266383, "grad_norm": 11.923846244812012, "learning_rate": 9.86852523100843e-06, "loss": 0.3598, "step": 1662 }, { "epoch": 0.08249417133786398, "grad_norm": 8.758594512939453, "learning_rate": 9.868345861743619e-06, "loss": 0.3233, "step": 1663 }, { "epoch": 0.08254377697306414, "grad_norm": 10.4119234085083, "learning_rate": 9.868166371838994e-06, "loss": 0.4256, "step": 1664 }, { "epoch": 0.08259338260826429, "grad_norm": 9.276802062988281, "learning_rate": 9.867986761299004e-06, "loss": 0.4413, "step": 1665 }, { "epoch": 0.08264298824346446, "grad_norm": 6.780806064605713, "learning_rate": 9.867807030128101e-06, "loss": 0.3388, "step": 1666 }, { "epoch": 0.08269259387866462, "grad_norm": 6.589336395263672, "learning_rate": 9.867627178330738e-06, "loss": 0.306, "step": 1667 }, { "epoch": 0.08274219951386477, "grad_norm": 10.756865501403809, "learning_rate": 9.867447205911372e-06, "loss": 0.292, "step": 1668 }, { "epoch": 0.08279180514906494, "grad_norm": 9.781432151794434, "learning_rate": 9.867267112874461e-06, "loss": 0.2604, "step": 1669 }, { "epoch": 0.08284141078426509, "grad_norm": 5.377971649169922, "learning_rate": 9.867086899224469e-06, "loss": 0.3146, "step": 1670 }, { "epoch": 0.08289101641946525, "grad_norm": 10.005499839782715, "learning_rate": 9.866906564965863e-06, "loss": 0.3319, "step": 1671 }, { "epoch": 0.08294062205466542, "grad_norm": 5.4889631271362305, "learning_rate": 9.86672611010311e-06, "loss": 0.2565, "step": 1672 }, { "epoch": 0.08299022768986557, "grad_norm": 11.479827880859375, "learning_rate": 9.866545534640685e-06, "loss": 0.2859, "step": 1673 }, { "epoch": 0.08303983332506573, "grad_norm": 9.283011436462402, "learning_rate": 9.866364838583057e-06, "loss": 0.4108, "step": 1674 }, { "epoch": 0.08308943896026588, "grad_norm": 6.821549415588379, "learning_rate": 9.866184021934711e-06, "loss": 0.3081, "step": 1675 }, { "epoch": 0.08313904459546605, "grad_norm": 6.789343357086182, "learning_rate": 9.86600308470012e-06, "loss": 0.336, "step": 1676 }, { "epoch": 0.0831886502306662, "grad_norm": 6.925617218017578, "learning_rate": 9.865822026883772e-06, "loss": 0.3044, "step": 1677 }, { "epoch": 0.08323825586586636, "grad_norm": 11.073890686035156, "learning_rate": 9.865640848490154e-06, "loss": 0.4433, "step": 1678 }, { "epoch": 0.08328786150106653, "grad_norm": 9.839054107666016, "learning_rate": 9.865459549523755e-06, "loss": 0.3064, "step": 1679 }, { "epoch": 0.08333746713626668, "grad_norm": 17.94793128967285, "learning_rate": 9.865278129989066e-06, "loss": 0.6326, "step": 1680 }, { "epoch": 0.08338707277146684, "grad_norm": 6.905298709869385, "learning_rate": 9.865096589890586e-06, "loss": 0.3079, "step": 1681 }, { "epoch": 0.08343667840666699, "grad_norm": 18.17000389099121, "learning_rate": 9.864914929232812e-06, "loss": 0.4664, "step": 1682 }, { "epoch": 0.08348628404186716, "grad_norm": 7.320915699005127, "learning_rate": 9.864733148020243e-06, "loss": 0.2184, "step": 1683 }, { "epoch": 0.08353588967706732, "grad_norm": 7.681652069091797, "learning_rate": 9.864551246257387e-06, "loss": 0.3175, "step": 1684 }, { "epoch": 0.08358549531226747, "grad_norm": 11.897988319396973, "learning_rate": 9.864369223948752e-06, "loss": 0.3598, "step": 1685 }, { "epoch": 0.08363510094746764, "grad_norm": 10.861042022705078, "learning_rate": 9.864187081098847e-06, "loss": 0.3754, "step": 1686 }, { "epoch": 0.08368470658266779, "grad_norm": 9.829499244689941, "learning_rate": 9.864004817712185e-06, "loss": 0.3349, "step": 1687 }, { "epoch": 0.08373431221786795, "grad_norm": 14.780842781066895, "learning_rate": 9.863822433793283e-06, "loss": 0.4067, "step": 1688 }, { "epoch": 0.0837839178530681, "grad_norm": 8.93407154083252, "learning_rate": 9.863639929346662e-06, "loss": 0.3164, "step": 1689 }, { "epoch": 0.08383352348826827, "grad_norm": 19.313180923461914, "learning_rate": 9.863457304376843e-06, "loss": 0.3803, "step": 1690 }, { "epoch": 0.08388312912346843, "grad_norm": 7.7878217697143555, "learning_rate": 9.863274558888352e-06, "loss": 0.3538, "step": 1691 }, { "epoch": 0.08393273475866858, "grad_norm": 11.976914405822754, "learning_rate": 9.863091692885717e-06, "loss": 0.3837, "step": 1692 }, { "epoch": 0.08398234039386875, "grad_norm": 11.229386329650879, "learning_rate": 9.86290870637347e-06, "loss": 0.3479, "step": 1693 }, { "epoch": 0.0840319460290689, "grad_norm": 8.743642807006836, "learning_rate": 9.862725599356146e-06, "loss": 0.2561, "step": 1694 }, { "epoch": 0.08408155166426906, "grad_norm": 6.451022148132324, "learning_rate": 9.862542371838281e-06, "loss": 0.383, "step": 1695 }, { "epoch": 0.08413115729946923, "grad_norm": 10.751367568969727, "learning_rate": 9.86235902382442e-06, "loss": 0.3449, "step": 1696 }, { "epoch": 0.08418076293466938, "grad_norm": 17.241897583007812, "learning_rate": 9.862175555319098e-06, "loss": 0.4848, "step": 1697 }, { "epoch": 0.08423036856986954, "grad_norm": 10.598666191101074, "learning_rate": 9.861991966326869e-06, "loss": 0.3782, "step": 1698 }, { "epoch": 0.08427997420506969, "grad_norm": 12.535126686096191, "learning_rate": 9.86180825685228e-06, "loss": 0.3115, "step": 1699 }, { "epoch": 0.08432957984026986, "grad_norm": 11.797615051269531, "learning_rate": 9.861624426899884e-06, "loss": 0.2244, "step": 1700 }, { "epoch": 0.08437918547547002, "grad_norm": 6.3916497230529785, "learning_rate": 9.861440476474233e-06, "loss": 0.2911, "step": 1701 }, { "epoch": 0.08442879111067017, "grad_norm": 8.954326629638672, "learning_rate": 9.861256405579888e-06, "loss": 0.4245, "step": 1702 }, { "epoch": 0.08447839674587033, "grad_norm": 11.310367584228516, "learning_rate": 9.861072214221412e-06, "loss": 0.3825, "step": 1703 }, { "epoch": 0.08452800238107049, "grad_norm": 3.9800288677215576, "learning_rate": 9.860887902403368e-06, "loss": 0.2945, "step": 1704 }, { "epoch": 0.08457760801627065, "grad_norm": 16.064123153686523, "learning_rate": 9.860703470130322e-06, "loss": 0.3445, "step": 1705 }, { "epoch": 0.0846272136514708, "grad_norm": 5.937963962554932, "learning_rate": 9.860518917406846e-06, "loss": 0.3643, "step": 1706 }, { "epoch": 0.08467681928667097, "grad_norm": 9.199642181396484, "learning_rate": 9.860334244237511e-06, "loss": 0.3762, "step": 1707 }, { "epoch": 0.08472642492187113, "grad_norm": 17.32440948486328, "learning_rate": 9.860149450626894e-06, "loss": 0.5487, "step": 1708 }, { "epoch": 0.08477603055707128, "grad_norm": 13.696533203125, "learning_rate": 9.859964536579579e-06, "loss": 0.456, "step": 1709 }, { "epoch": 0.08482563619227144, "grad_norm": 6.028706073760986, "learning_rate": 9.859779502100143e-06, "loss": 0.328, "step": 1710 }, { "epoch": 0.0848752418274716, "grad_norm": 9.900463104248047, "learning_rate": 9.85959434719317e-06, "loss": 0.3751, "step": 1711 }, { "epoch": 0.08492484746267176, "grad_norm": 10.168771743774414, "learning_rate": 9.859409071863253e-06, "loss": 0.3896, "step": 1712 }, { "epoch": 0.08497445309787192, "grad_norm": 12.981367111206055, "learning_rate": 9.85922367611498e-06, "loss": 0.3471, "step": 1713 }, { "epoch": 0.08502405873307207, "grad_norm": 12.507396697998047, "learning_rate": 9.859038159952947e-06, "loss": 0.3329, "step": 1714 }, { "epoch": 0.08507366436827224, "grad_norm": 10.050649642944336, "learning_rate": 9.858852523381751e-06, "loss": 0.2872, "step": 1715 }, { "epoch": 0.08512327000347239, "grad_norm": 15.87730598449707, "learning_rate": 9.858666766405991e-06, "loss": 0.558, "step": 1716 }, { "epoch": 0.08517287563867255, "grad_norm": 10.1915283203125, "learning_rate": 9.858480889030271e-06, "loss": 0.2285, "step": 1717 }, { "epoch": 0.08522248127387272, "grad_norm": 5.569500923156738, "learning_rate": 9.858294891259197e-06, "loss": 0.3486, "step": 1718 }, { "epoch": 0.08527208690907287, "grad_norm": 22.10294532775879, "learning_rate": 9.858108773097378e-06, "loss": 0.4233, "step": 1719 }, { "epoch": 0.08532169254427303, "grad_norm": 13.273058891296387, "learning_rate": 9.857922534549424e-06, "loss": 0.3533, "step": 1720 }, { "epoch": 0.08537129817947318, "grad_norm": 5.54710578918457, "learning_rate": 9.857736175619955e-06, "loss": 0.2675, "step": 1721 }, { "epoch": 0.08542090381467335, "grad_norm": 5.899862766265869, "learning_rate": 9.857549696313584e-06, "loss": 0.3605, "step": 1722 }, { "epoch": 0.0854705094498735, "grad_norm": 9.712639808654785, "learning_rate": 9.857363096634935e-06, "loss": 0.285, "step": 1723 }, { "epoch": 0.08552011508507366, "grad_norm": 12.00320816040039, "learning_rate": 9.857176376588631e-06, "loss": 0.2811, "step": 1724 }, { "epoch": 0.08556972072027383, "grad_norm": 13.645427703857422, "learning_rate": 9.8569895361793e-06, "loss": 0.4035, "step": 1725 }, { "epoch": 0.08561932635547398, "grad_norm": 7.29953670501709, "learning_rate": 9.85680257541157e-06, "loss": 0.468, "step": 1726 }, { "epoch": 0.08566893199067414, "grad_norm": 6.48188591003418, "learning_rate": 9.856615494290075e-06, "loss": 0.374, "step": 1727 }, { "epoch": 0.0857185376258743, "grad_norm": 12.087566375732422, "learning_rate": 9.856428292819453e-06, "loss": 0.3911, "step": 1728 }, { "epoch": 0.08576814326107446, "grad_norm": 8.014568328857422, "learning_rate": 9.85624097100434e-06, "loss": 0.3395, "step": 1729 }, { "epoch": 0.08581774889627462, "grad_norm": 11.26977825164795, "learning_rate": 9.856053528849378e-06, "loss": 0.2768, "step": 1730 }, { "epoch": 0.08586735453147477, "grad_norm": 6.315792560577393, "learning_rate": 9.855865966359214e-06, "loss": 0.3286, "step": 1731 }, { "epoch": 0.08591696016667494, "grad_norm": 10.733760833740234, "learning_rate": 9.855678283538497e-06, "loss": 0.3703, "step": 1732 }, { "epoch": 0.08596656580187509, "grad_norm": 13.518539428710938, "learning_rate": 9.855490480391873e-06, "loss": 0.3641, "step": 1733 }, { "epoch": 0.08601617143707525, "grad_norm": 6.390489101409912, "learning_rate": 9.855302556924e-06, "loss": 0.3943, "step": 1734 }, { "epoch": 0.0860657770722754, "grad_norm": 8.339338302612305, "learning_rate": 9.855114513139532e-06, "loss": 0.3753, "step": 1735 }, { "epoch": 0.08611538270747557, "grad_norm": 7.326226711273193, "learning_rate": 9.85492634904313e-06, "loss": 0.3267, "step": 1736 }, { "epoch": 0.08616498834267573, "grad_norm": 12.215926170349121, "learning_rate": 9.854738064639457e-06, "loss": 0.3125, "step": 1737 }, { "epoch": 0.08621459397787588, "grad_norm": 6.573857307434082, "learning_rate": 9.85454965993318e-06, "loss": 0.217, "step": 1738 }, { "epoch": 0.08626419961307605, "grad_norm": 8.265450477600098, "learning_rate": 9.854361134928967e-06, "loss": 0.3121, "step": 1739 }, { "epoch": 0.0863138052482762, "grad_norm": 19.94937515258789, "learning_rate": 9.854172489631488e-06, "loss": 0.4702, "step": 1740 }, { "epoch": 0.08636341088347636, "grad_norm": 13.1885404586792, "learning_rate": 9.853983724045418e-06, "loss": 0.2296, "step": 1741 }, { "epoch": 0.08641301651867653, "grad_norm": 11.03985595703125, "learning_rate": 9.853794838175436e-06, "loss": 0.439, "step": 1742 }, { "epoch": 0.08646262215387668, "grad_norm": 7.394052028656006, "learning_rate": 9.853605832026222e-06, "loss": 0.3488, "step": 1743 }, { "epoch": 0.08651222778907684, "grad_norm": 7.297084331512451, "learning_rate": 9.853416705602462e-06, "loss": 0.2574, "step": 1744 }, { "epoch": 0.086561833424277, "grad_norm": 13.251880645751953, "learning_rate": 9.853227458908839e-06, "loss": 0.3505, "step": 1745 }, { "epoch": 0.08661143905947716, "grad_norm": 11.509428977966309, "learning_rate": 9.853038091950044e-06, "loss": 0.333, "step": 1746 }, { "epoch": 0.08666104469467732, "grad_norm": 21.53430938720703, "learning_rate": 9.852848604730771e-06, "loss": 0.3865, "step": 1747 }, { "epoch": 0.08671065032987747, "grad_norm": 7.109494686126709, "learning_rate": 9.852658997255716e-06, "loss": 0.264, "step": 1748 }, { "epoch": 0.08676025596507764, "grad_norm": 10.273148536682129, "learning_rate": 9.852469269529571e-06, "loss": 0.323, "step": 1749 }, { "epoch": 0.08680986160027779, "grad_norm": 14.866701126098633, "learning_rate": 9.852279421557047e-06, "loss": 0.3008, "step": 1750 }, { "epoch": 0.08685946723547795, "grad_norm": 10.633008003234863, "learning_rate": 9.852089453342842e-06, "loss": 0.5142, "step": 1751 }, { "epoch": 0.0869090728706781, "grad_norm": 9.39850902557373, "learning_rate": 9.851899364891667e-06, "loss": 0.4082, "step": 1752 }, { "epoch": 0.08695867850587827, "grad_norm": 16.56254005432129, "learning_rate": 9.85170915620823e-06, "loss": 0.42, "step": 1753 }, { "epoch": 0.08700828414107843, "grad_norm": 5.367066860198975, "learning_rate": 9.851518827297244e-06, "loss": 0.2741, "step": 1754 }, { "epoch": 0.08705788977627858, "grad_norm": 8.899810791015625, "learning_rate": 9.851328378163428e-06, "loss": 0.312, "step": 1755 }, { "epoch": 0.08710749541147875, "grad_norm": 9.65462875366211, "learning_rate": 9.8511378088115e-06, "loss": 0.3738, "step": 1756 }, { "epoch": 0.0871571010466789, "grad_norm": 6.663807392120361, "learning_rate": 9.850947119246184e-06, "loss": 0.2605, "step": 1757 }, { "epoch": 0.08720670668187906, "grad_norm": 11.782256126403809, "learning_rate": 9.850756309472202e-06, "loss": 0.3453, "step": 1758 }, { "epoch": 0.08725631231707923, "grad_norm": 16.54047203063965, "learning_rate": 9.850565379494285e-06, "loss": 0.4886, "step": 1759 }, { "epoch": 0.08730591795227938, "grad_norm": 7.988787651062012, "learning_rate": 9.850374329317166e-06, "loss": 0.3714, "step": 1760 }, { "epoch": 0.08735552358747954, "grad_norm": 7.717929840087891, "learning_rate": 9.850183158945574e-06, "loss": 0.3016, "step": 1761 }, { "epoch": 0.08740512922267969, "grad_norm": 9.508227348327637, "learning_rate": 9.849991868384252e-06, "loss": 0.4113, "step": 1762 }, { "epoch": 0.08745473485787986, "grad_norm": 5.009853363037109, "learning_rate": 9.849800457637936e-06, "loss": 0.2458, "step": 1763 }, { "epoch": 0.08750434049308001, "grad_norm": 10.087889671325684, "learning_rate": 9.849608926711371e-06, "loss": 0.4937, "step": 1764 }, { "epoch": 0.08755394612828017, "grad_norm": 14.739275932312012, "learning_rate": 9.849417275609302e-06, "loss": 0.4169, "step": 1765 }, { "epoch": 0.08760355176348034, "grad_norm": 4.75874662399292, "learning_rate": 9.849225504336482e-06, "loss": 0.2427, "step": 1766 }, { "epoch": 0.08765315739868049, "grad_norm": 7.77701473236084, "learning_rate": 9.849033612897659e-06, "loss": 0.3374, "step": 1767 }, { "epoch": 0.08770276303388065, "grad_norm": 6.833118915557861, "learning_rate": 9.84884160129759e-06, "loss": 0.3587, "step": 1768 }, { "epoch": 0.0877523686690808, "grad_norm": 20.675823211669922, "learning_rate": 9.848649469541032e-06, "loss": 0.4087, "step": 1769 }, { "epoch": 0.08780197430428097, "grad_norm": 8.325908660888672, "learning_rate": 9.848457217632747e-06, "loss": 0.2644, "step": 1770 }, { "epoch": 0.08785157993948113, "grad_norm": 14.383338928222656, "learning_rate": 9.848264845577499e-06, "loss": 0.3794, "step": 1771 }, { "epoch": 0.08790118557468128, "grad_norm": 14.238322257995605, "learning_rate": 9.848072353380055e-06, "loss": 0.3173, "step": 1772 }, { "epoch": 0.08795079120988145, "grad_norm": 12.932129859924316, "learning_rate": 9.847879741045185e-06, "loss": 0.4881, "step": 1773 }, { "epoch": 0.0880003968450816, "grad_norm": 8.794614791870117, "learning_rate": 9.847687008577662e-06, "loss": 0.231, "step": 1774 }, { "epoch": 0.08805000248028176, "grad_norm": 7.015131950378418, "learning_rate": 9.847494155982263e-06, "loss": 0.2783, "step": 1775 }, { "epoch": 0.08809960811548193, "grad_norm": 14.412837028503418, "learning_rate": 9.847301183263765e-06, "loss": 0.3884, "step": 1776 }, { "epoch": 0.08814921375068208, "grad_norm": 7.266180038452148, "learning_rate": 9.847108090426952e-06, "loss": 0.3726, "step": 1777 }, { "epoch": 0.08819881938588224, "grad_norm": 6.937601566314697, "learning_rate": 9.846914877476608e-06, "loss": 0.2933, "step": 1778 }, { "epoch": 0.08824842502108239, "grad_norm": 12.57112979888916, "learning_rate": 9.846721544417519e-06, "loss": 0.3354, "step": 1779 }, { "epoch": 0.08829803065628256, "grad_norm": 12.42977523803711, "learning_rate": 9.84652809125448e-06, "loss": 0.5146, "step": 1780 }, { "epoch": 0.0883476362914827, "grad_norm": 6.8399577140808105, "learning_rate": 9.846334517992281e-06, "loss": 0.3375, "step": 1781 }, { "epoch": 0.08839724192668287, "grad_norm": 9.846260070800781, "learning_rate": 9.846140824635722e-06, "loss": 0.4524, "step": 1782 }, { "epoch": 0.08844684756188304, "grad_norm": 8.066926956176758, "learning_rate": 9.845947011189599e-06, "loss": 0.4142, "step": 1783 }, { "epoch": 0.08849645319708319, "grad_norm": 8.893717765808105, "learning_rate": 9.845753077658718e-06, "loss": 0.3352, "step": 1784 }, { "epoch": 0.08854605883228335, "grad_norm": 10.158949851989746, "learning_rate": 9.845559024047884e-06, "loss": 0.3862, "step": 1785 }, { "epoch": 0.0885956644674835, "grad_norm": 7.891507148742676, "learning_rate": 9.845364850361906e-06, "loss": 0.3208, "step": 1786 }, { "epoch": 0.08864527010268367, "grad_norm": 14.461699485778809, "learning_rate": 9.845170556605595e-06, "loss": 0.3463, "step": 1787 }, { "epoch": 0.08869487573788383, "grad_norm": 6.489120006561279, "learning_rate": 9.844976142783764e-06, "loss": 0.3398, "step": 1788 }, { "epoch": 0.08874448137308398, "grad_norm": 12.194127082824707, "learning_rate": 9.844781608901234e-06, "loss": 0.3067, "step": 1789 }, { "epoch": 0.08879408700828414, "grad_norm": 9.105775833129883, "learning_rate": 9.844586954962825e-06, "loss": 0.2034, "step": 1790 }, { "epoch": 0.0888436926434843, "grad_norm": 16.012203216552734, "learning_rate": 9.84439218097336e-06, "loss": 0.4021, "step": 1791 }, { "epoch": 0.08889329827868446, "grad_norm": 6.901789665222168, "learning_rate": 9.844197286937664e-06, "loss": 0.2526, "step": 1792 }, { "epoch": 0.08894290391388461, "grad_norm": 8.048787117004395, "learning_rate": 9.84400227286057e-06, "loss": 0.3062, "step": 1793 }, { "epoch": 0.08899250954908478, "grad_norm": 13.611835479736328, "learning_rate": 9.843807138746905e-06, "loss": 0.5458, "step": 1794 }, { "epoch": 0.08904211518428494, "grad_norm": 6.514352321624756, "learning_rate": 9.84361188460151e-06, "loss": 0.3191, "step": 1795 }, { "epoch": 0.08909172081948509, "grad_norm": 9.604207992553711, "learning_rate": 9.843416510429222e-06, "loss": 0.4853, "step": 1796 }, { "epoch": 0.08914132645468525, "grad_norm": 16.079809188842773, "learning_rate": 9.84322101623488e-06, "loss": 0.3352, "step": 1797 }, { "epoch": 0.0891909320898854, "grad_norm": 8.83739948272705, "learning_rate": 9.843025402023333e-06, "loss": 0.3575, "step": 1798 }, { "epoch": 0.08924053772508557, "grad_norm": 5.984113693237305, "learning_rate": 9.842829667799425e-06, "loss": 0.317, "step": 1799 }, { "epoch": 0.08929014336028573, "grad_norm": 7.4415459632873535, "learning_rate": 9.842633813568007e-06, "loss": 0.4746, "step": 1800 }, { "epoch": 0.08933974899548588, "grad_norm": 4.952817440032959, "learning_rate": 9.842437839333931e-06, "loss": 0.3221, "step": 1801 }, { "epoch": 0.08938935463068605, "grad_norm": 8.868541717529297, "learning_rate": 9.842241745102057e-06, "loss": 0.477, "step": 1802 }, { "epoch": 0.0894389602658862, "grad_norm": 13.24862003326416, "learning_rate": 9.84204553087724e-06, "loss": 0.4039, "step": 1803 }, { "epoch": 0.08948856590108636, "grad_norm": 4.868973255157471, "learning_rate": 9.841849196664347e-06, "loss": 0.3295, "step": 1804 }, { "epoch": 0.08953817153628653, "grad_norm": 19.879777908325195, "learning_rate": 9.841652742468239e-06, "loss": 0.4018, "step": 1805 }, { "epoch": 0.08958777717148668, "grad_norm": 10.814399719238281, "learning_rate": 9.841456168293787e-06, "loss": 0.3981, "step": 1806 }, { "epoch": 0.08963738280668684, "grad_norm": 6.139529705047607, "learning_rate": 9.841259474145862e-06, "loss": 0.3651, "step": 1807 }, { "epoch": 0.089686988441887, "grad_norm": 7.954072952270508, "learning_rate": 9.841062660029335e-06, "loss": 0.2735, "step": 1808 }, { "epoch": 0.08973659407708716, "grad_norm": 12.996129035949707, "learning_rate": 9.840865725949086e-06, "loss": 0.4227, "step": 1809 }, { "epoch": 0.08978619971228731, "grad_norm": 8.275633811950684, "learning_rate": 9.840668671909995e-06, "loss": 0.3714, "step": 1810 }, { "epoch": 0.08983580534748747, "grad_norm": 10.79520320892334, "learning_rate": 9.840471497916943e-06, "loss": 0.3995, "step": 1811 }, { "epoch": 0.08988541098268764, "grad_norm": 12.121529579162598, "learning_rate": 9.84027420397482e-06, "loss": 0.4278, "step": 1812 }, { "epoch": 0.08993501661788779, "grad_norm": 9.465670585632324, "learning_rate": 9.840076790088511e-06, "loss": 0.4598, "step": 1813 }, { "epoch": 0.08998462225308795, "grad_norm": 7.201108932495117, "learning_rate": 9.839879256262911e-06, "loss": 0.223, "step": 1814 }, { "epoch": 0.0900342278882881, "grad_norm": 8.13745403289795, "learning_rate": 9.839681602502914e-06, "loss": 0.3506, "step": 1815 }, { "epoch": 0.09008383352348827, "grad_norm": 10.476263046264648, "learning_rate": 9.839483828813416e-06, "loss": 0.4071, "step": 1816 }, { "epoch": 0.09013343915868843, "grad_norm": 17.520965576171875, "learning_rate": 9.839285935199322e-06, "loss": 0.4407, "step": 1817 }, { "epoch": 0.09018304479388858, "grad_norm": 19.060792922973633, "learning_rate": 9.839087921665532e-06, "loss": 0.42, "step": 1818 }, { "epoch": 0.09023265042908875, "grad_norm": 12.80122184753418, "learning_rate": 9.838889788216955e-06, "loss": 0.322, "step": 1819 }, { "epoch": 0.0902822560642889, "grad_norm": 8.77220344543457, "learning_rate": 9.838691534858499e-06, "loss": 0.2741, "step": 1820 }, { "epoch": 0.09033186169948906, "grad_norm": 10.17717170715332, "learning_rate": 9.838493161595079e-06, "loss": 0.3491, "step": 1821 }, { "epoch": 0.09038146733468921, "grad_norm": 7.806191444396973, "learning_rate": 9.838294668431609e-06, "loss": 0.3437, "step": 1822 }, { "epoch": 0.09043107296988938, "grad_norm": 7.323751449584961, "learning_rate": 9.83809605537301e-06, "loss": 0.3735, "step": 1823 }, { "epoch": 0.09048067860508954, "grad_norm": 7.383821964263916, "learning_rate": 9.837897322424199e-06, "loss": 0.3158, "step": 1824 }, { "epoch": 0.0905302842402897, "grad_norm": 7.520629405975342, "learning_rate": 9.837698469590106e-06, "loss": 0.4186, "step": 1825 }, { "epoch": 0.09057988987548986, "grad_norm": 7.031495571136475, "learning_rate": 9.837499496875657e-06, "loss": 0.2425, "step": 1826 }, { "epoch": 0.09062949551069001, "grad_norm": 5.980292320251465, "learning_rate": 9.837300404285783e-06, "loss": 0.2842, "step": 1827 }, { "epoch": 0.09067910114589017, "grad_norm": 6.484702110290527, "learning_rate": 9.837101191825416e-06, "loss": 0.3549, "step": 1828 }, { "epoch": 0.09072870678109034, "grad_norm": 6.350093364715576, "learning_rate": 9.836901859499492e-06, "loss": 0.2634, "step": 1829 }, { "epoch": 0.09077831241629049, "grad_norm": 13.142672538757324, "learning_rate": 9.836702407312954e-06, "loss": 0.3902, "step": 1830 }, { "epoch": 0.09082791805149065, "grad_norm": 8.584156036376953, "learning_rate": 9.83650283527074e-06, "loss": 0.3124, "step": 1831 }, { "epoch": 0.0908775236866908, "grad_norm": 9.627276420593262, "learning_rate": 9.8363031433778e-06, "loss": 0.3258, "step": 1832 }, { "epoch": 0.09092712932189097, "grad_norm": 12.469727516174316, "learning_rate": 9.83610333163908e-06, "loss": 0.4362, "step": 1833 }, { "epoch": 0.09097673495709113, "grad_norm": 10.170856475830078, "learning_rate": 9.835903400059533e-06, "loss": 0.3853, "step": 1834 }, { "epoch": 0.09102634059229128, "grad_norm": 10.289214134216309, "learning_rate": 9.83570334864411e-06, "loss": 0.419, "step": 1835 }, { "epoch": 0.09107594622749145, "grad_norm": 7.1177849769592285, "learning_rate": 9.835503177397772e-06, "loss": 0.2948, "step": 1836 }, { "epoch": 0.0911255518626916, "grad_norm": 7.393987655639648, "learning_rate": 9.835302886325477e-06, "loss": 0.2244, "step": 1837 }, { "epoch": 0.09117515749789176, "grad_norm": 12.97370719909668, "learning_rate": 9.83510247543219e-06, "loss": 0.342, "step": 1838 }, { "epoch": 0.09122476313309191, "grad_norm": 9.420785903930664, "learning_rate": 9.834901944722877e-06, "loss": 0.4317, "step": 1839 }, { "epoch": 0.09127436876829208, "grad_norm": 13.726740837097168, "learning_rate": 9.834701294202505e-06, "loss": 0.3964, "step": 1840 }, { "epoch": 0.09132397440349224, "grad_norm": 5.722134113311768, "learning_rate": 9.83450052387605e-06, "loss": 0.259, "step": 1841 }, { "epoch": 0.09137358003869239, "grad_norm": 7.783267974853516, "learning_rate": 9.834299633748483e-06, "loss": 0.4036, "step": 1842 }, { "epoch": 0.09142318567389256, "grad_norm": 6.397197723388672, "learning_rate": 9.834098623824786e-06, "loss": 0.4267, "step": 1843 }, { "epoch": 0.09147279130909271, "grad_norm": 9.594106674194336, "learning_rate": 9.833897494109937e-06, "loss": 0.3666, "step": 1844 }, { "epoch": 0.09152239694429287, "grad_norm": 8.730587005615234, "learning_rate": 9.833696244608922e-06, "loss": 0.3463, "step": 1845 }, { "epoch": 0.09157200257949304, "grad_norm": 5.985142707824707, "learning_rate": 9.833494875326728e-06, "loss": 0.3003, "step": 1846 }, { "epoch": 0.09162160821469319, "grad_norm": 14.73845386505127, "learning_rate": 9.833293386268344e-06, "loss": 0.3626, "step": 1847 }, { "epoch": 0.09167121384989335, "grad_norm": 6.916848182678223, "learning_rate": 9.833091777438763e-06, "loss": 0.3202, "step": 1848 }, { "epoch": 0.0917208194850935, "grad_norm": 6.338430404663086, "learning_rate": 9.832890048842984e-06, "loss": 0.3504, "step": 1849 }, { "epoch": 0.09177042512029367, "grad_norm": 5.855422019958496, "learning_rate": 9.832688200486e-06, "loss": 0.2468, "step": 1850 }, { "epoch": 0.09182003075549382, "grad_norm": 9.931962013244629, "learning_rate": 9.83248623237282e-06, "loss": 0.3749, "step": 1851 }, { "epoch": 0.09186963639069398, "grad_norm": 11.44190502166748, "learning_rate": 9.832284144508442e-06, "loss": 0.4058, "step": 1852 }, { "epoch": 0.09191924202589415, "grad_norm": 12.448607444763184, "learning_rate": 9.83208193689788e-06, "loss": 0.3688, "step": 1853 }, { "epoch": 0.0919688476610943, "grad_norm": 5.8393378257751465, "learning_rate": 9.83187960954614e-06, "loss": 0.2831, "step": 1854 }, { "epoch": 0.09201845329629446, "grad_norm": 11.149821281433105, "learning_rate": 9.831677162458237e-06, "loss": 0.3528, "step": 1855 }, { "epoch": 0.09206805893149461, "grad_norm": 16.191484451293945, "learning_rate": 9.831474595639189e-06, "loss": 0.4405, "step": 1856 }, { "epoch": 0.09211766456669478, "grad_norm": 12.4427490234375, "learning_rate": 9.831271909094015e-06, "loss": 0.3912, "step": 1857 }, { "epoch": 0.09216727020189494, "grad_norm": 11.366442680358887, "learning_rate": 9.831069102827736e-06, "loss": 0.4468, "step": 1858 }, { "epoch": 0.09221687583709509, "grad_norm": 8.623149871826172, "learning_rate": 9.830866176845383e-06, "loss": 0.3157, "step": 1859 }, { "epoch": 0.09226648147229526, "grad_norm": 9.119871139526367, "learning_rate": 9.830663131151979e-06, "loss": 0.4711, "step": 1860 }, { "epoch": 0.0923160871074954, "grad_norm": 11.716060638427734, "learning_rate": 9.830459965752556e-06, "loss": 0.4553, "step": 1861 }, { "epoch": 0.09236569274269557, "grad_norm": 9.296329498291016, "learning_rate": 9.830256680652152e-06, "loss": 0.3422, "step": 1862 }, { "epoch": 0.09241529837789574, "grad_norm": 9.903167724609375, "learning_rate": 9.830053275855802e-06, "loss": 0.3566, "step": 1863 }, { "epoch": 0.09246490401309589, "grad_norm": 7.899327754974365, "learning_rate": 9.829849751368547e-06, "loss": 0.3197, "step": 1864 }, { "epoch": 0.09251450964829605, "grad_norm": 9.497446060180664, "learning_rate": 9.82964610719543e-06, "loss": 0.3958, "step": 1865 }, { "epoch": 0.0925641152834962, "grad_norm": 5.37947940826416, "learning_rate": 9.8294423433415e-06, "loss": 0.2929, "step": 1866 }, { "epoch": 0.09261372091869637, "grad_norm": 12.503776550292969, "learning_rate": 9.829238459811801e-06, "loss": 0.4319, "step": 1867 }, { "epoch": 0.09266332655389652, "grad_norm": 4.360990047454834, "learning_rate": 9.829034456611389e-06, "loss": 0.3117, "step": 1868 }, { "epoch": 0.09271293218909668, "grad_norm": 12.19900894165039, "learning_rate": 9.82883033374532e-06, "loss": 0.3509, "step": 1869 }, { "epoch": 0.09276253782429685, "grad_norm": 4.757053852081299, "learning_rate": 9.828626091218649e-06, "loss": 0.43, "step": 1870 }, { "epoch": 0.092812143459497, "grad_norm": 7.761868953704834, "learning_rate": 9.828421729036441e-06, "loss": 0.3049, "step": 1871 }, { "epoch": 0.09286174909469716, "grad_norm": 7.1294846534729, "learning_rate": 9.828217247203759e-06, "loss": 0.3225, "step": 1872 }, { "epoch": 0.09291135472989731, "grad_norm": 7.860109806060791, "learning_rate": 9.828012645725668e-06, "loss": 0.3504, "step": 1873 }, { "epoch": 0.09296096036509748, "grad_norm": 9.79242992401123, "learning_rate": 9.82780792460724e-06, "loss": 0.3196, "step": 1874 }, { "epoch": 0.09301056600029764, "grad_norm": 16.338851928710938, "learning_rate": 9.827603083853548e-06, "loss": 0.4019, "step": 1875 }, { "epoch": 0.09306017163549779, "grad_norm": 6.276151180267334, "learning_rate": 9.82739812346967e-06, "loss": 0.2556, "step": 1876 }, { "epoch": 0.09310977727069795, "grad_norm": 6.329183101654053, "learning_rate": 9.827193043460678e-06, "loss": 0.2525, "step": 1877 }, { "epoch": 0.0931593829058981, "grad_norm": 10.431655883789062, "learning_rate": 9.826987843831663e-06, "loss": 0.366, "step": 1878 }, { "epoch": 0.09320898854109827, "grad_norm": 6.420952320098877, "learning_rate": 9.826782524587704e-06, "loss": 0.3487, "step": 1879 }, { "epoch": 0.09325859417629843, "grad_norm": 13.505443572998047, "learning_rate": 9.82657708573389e-06, "loss": 0.4723, "step": 1880 }, { "epoch": 0.09330819981149858, "grad_norm": 6.541758060455322, "learning_rate": 9.826371527275315e-06, "loss": 0.2887, "step": 1881 }, { "epoch": 0.09335780544669875, "grad_norm": 7.136154651641846, "learning_rate": 9.82616584921707e-06, "loss": 0.4019, "step": 1882 }, { "epoch": 0.0934074110818989, "grad_norm": 8.471526145935059, "learning_rate": 9.82596005156425e-06, "loss": 0.2634, "step": 1883 }, { "epoch": 0.09345701671709906, "grad_norm": 6.513886451721191, "learning_rate": 9.825754134321957e-06, "loss": 0.3918, "step": 1884 }, { "epoch": 0.09350662235229922, "grad_norm": 8.017380714416504, "learning_rate": 9.825548097495294e-06, "loss": 0.3309, "step": 1885 }, { "epoch": 0.09355622798749938, "grad_norm": 8.011372566223145, "learning_rate": 9.825341941089367e-06, "loss": 0.3847, "step": 1886 }, { "epoch": 0.09360583362269954, "grad_norm": 7.127999305725098, "learning_rate": 9.825135665109284e-06, "loss": 0.2829, "step": 1887 }, { "epoch": 0.0936554392578997, "grad_norm": 9.757842063903809, "learning_rate": 9.824929269560156e-06, "loss": 0.3406, "step": 1888 }, { "epoch": 0.09370504489309986, "grad_norm": 11.352429389953613, "learning_rate": 9.824722754447097e-06, "loss": 0.3655, "step": 1889 }, { "epoch": 0.09375465052830001, "grad_norm": 13.877657890319824, "learning_rate": 9.824516119775229e-06, "loss": 0.3881, "step": 1890 }, { "epoch": 0.09380425616350017, "grad_norm": 9.898347854614258, "learning_rate": 9.824309365549667e-06, "loss": 0.482, "step": 1891 }, { "epoch": 0.09385386179870034, "grad_norm": 5.319416046142578, "learning_rate": 9.824102491775537e-06, "loss": 0.2888, "step": 1892 }, { "epoch": 0.09390346743390049, "grad_norm": 7.733501434326172, "learning_rate": 9.823895498457965e-06, "loss": 0.3132, "step": 1893 }, { "epoch": 0.09395307306910065, "grad_norm": 10.979900360107422, "learning_rate": 9.82368838560208e-06, "loss": 0.3686, "step": 1894 }, { "epoch": 0.0940026787043008, "grad_norm": 5.81588888168335, "learning_rate": 9.823481153213017e-06, "loss": 0.4061, "step": 1895 }, { "epoch": 0.09405228433950097, "grad_norm": 22.692657470703125, "learning_rate": 9.823273801295906e-06, "loss": 0.3797, "step": 1896 }, { "epoch": 0.09410188997470112, "grad_norm": 5.041460037231445, "learning_rate": 9.82306632985589e-06, "loss": 0.3026, "step": 1897 }, { "epoch": 0.09415149560990128, "grad_norm": 7.122931003570557, "learning_rate": 9.82285873889811e-06, "loss": 0.3758, "step": 1898 }, { "epoch": 0.09420110124510145, "grad_norm": 6.81614351272583, "learning_rate": 9.822651028427706e-06, "loss": 0.3776, "step": 1899 }, { "epoch": 0.0942507068803016, "grad_norm": 5.308084964752197, "learning_rate": 9.82244319844983e-06, "loss": 0.321, "step": 1900 }, { "epoch": 0.09430031251550176, "grad_norm": 8.645140647888184, "learning_rate": 9.822235248969629e-06, "loss": 0.3619, "step": 1901 }, { "epoch": 0.09434991815070191, "grad_norm": 10.702524185180664, "learning_rate": 9.822027179992257e-06, "loss": 0.3806, "step": 1902 }, { "epoch": 0.09439952378590208, "grad_norm": 5.81790828704834, "learning_rate": 9.82181899152287e-06, "loss": 0.3146, "step": 1903 }, { "epoch": 0.09444912942110224, "grad_norm": 6.13213586807251, "learning_rate": 9.821610683566629e-06, "loss": 0.3096, "step": 1904 }, { "epoch": 0.0944987350563024, "grad_norm": 7.482848167419434, "learning_rate": 9.821402256128693e-06, "loss": 0.2421, "step": 1905 }, { "epoch": 0.09454834069150256, "grad_norm": 4.685769557952881, "learning_rate": 9.82119370921423e-06, "loss": 0.3226, "step": 1906 }, { "epoch": 0.09459794632670271, "grad_norm": 9.71403694152832, "learning_rate": 9.820985042828404e-06, "loss": 0.3176, "step": 1907 }, { "epoch": 0.09464755196190287, "grad_norm": 7.672388076782227, "learning_rate": 9.820776256976388e-06, "loss": 0.3567, "step": 1908 }, { "epoch": 0.09469715759710304, "grad_norm": 12.757545471191406, "learning_rate": 9.820567351663357e-06, "loss": 0.3128, "step": 1909 }, { "epoch": 0.09474676323230319, "grad_norm": 14.867724418640137, "learning_rate": 9.820358326894484e-06, "loss": 0.4264, "step": 1910 }, { "epoch": 0.09479636886750335, "grad_norm": 8.612224578857422, "learning_rate": 9.820149182674952e-06, "loss": 0.1584, "step": 1911 }, { "epoch": 0.0948459745027035, "grad_norm": 22.832965850830078, "learning_rate": 9.819939919009944e-06, "loss": 0.3937, "step": 1912 }, { "epoch": 0.09489558013790367, "grad_norm": 7.468225002288818, "learning_rate": 9.819730535904643e-06, "loss": 0.2932, "step": 1913 }, { "epoch": 0.09494518577310382, "grad_norm": 9.81374740600586, "learning_rate": 9.81952103336424e-06, "loss": 0.4405, "step": 1914 }, { "epoch": 0.09499479140830398, "grad_norm": 6.982979774475098, "learning_rate": 9.819311411393927e-06, "loss": 0.337, "step": 1915 }, { "epoch": 0.09504439704350415, "grad_norm": 17.004899978637695, "learning_rate": 9.819101669998896e-06, "loss": 0.4541, "step": 1916 }, { "epoch": 0.0950940026787043, "grad_norm": 6.794373989105225, "learning_rate": 9.818891809184346e-06, "loss": 0.3322, "step": 1917 }, { "epoch": 0.09514360831390446, "grad_norm": 8.774012565612793, "learning_rate": 9.818681828955476e-06, "loss": 0.3594, "step": 1918 }, { "epoch": 0.09519321394910461, "grad_norm": 15.425930976867676, "learning_rate": 9.818471729317492e-06, "loss": 0.4036, "step": 1919 }, { "epoch": 0.09524281958430478, "grad_norm": 7.525584697723389, "learning_rate": 9.818261510275599e-06, "loss": 0.2643, "step": 1920 }, { "epoch": 0.09529242521950494, "grad_norm": 5.909482002258301, "learning_rate": 9.818051171835005e-06, "loss": 0.4438, "step": 1921 }, { "epoch": 0.09534203085470509, "grad_norm": 7.969305992126465, "learning_rate": 9.817840714000923e-06, "loss": 0.4576, "step": 1922 }, { "epoch": 0.09539163648990526, "grad_norm": 7.981988430023193, "learning_rate": 9.817630136778571e-06, "loss": 0.4006, "step": 1923 }, { "epoch": 0.09544124212510541, "grad_norm": 8.35676097869873, "learning_rate": 9.817419440173164e-06, "loss": 0.2289, "step": 1924 }, { "epoch": 0.09549084776030557, "grad_norm": 7.831982612609863, "learning_rate": 9.817208624189924e-06, "loss": 0.3825, "step": 1925 }, { "epoch": 0.09554045339550572, "grad_norm": 7.941090106964111, "learning_rate": 9.816997688834075e-06, "loss": 0.3026, "step": 1926 }, { "epoch": 0.09559005903070589, "grad_norm": 9.877608299255371, "learning_rate": 9.816786634110844e-06, "loss": 0.4139, "step": 1927 }, { "epoch": 0.09563966466590605, "grad_norm": 5.446537971496582, "learning_rate": 9.816575460025462e-06, "loss": 0.2921, "step": 1928 }, { "epoch": 0.0956892703011062, "grad_norm": 7.716127395629883, "learning_rate": 9.81636416658316e-06, "loss": 0.467, "step": 1929 }, { "epoch": 0.09573887593630637, "grad_norm": 4.674367427825928, "learning_rate": 9.816152753789178e-06, "loss": 0.1646, "step": 1930 }, { "epoch": 0.09578848157150652, "grad_norm": 11.330635070800781, "learning_rate": 9.81594122164875e-06, "loss": 0.3379, "step": 1931 }, { "epoch": 0.09583808720670668, "grad_norm": 5.781072616577148, "learning_rate": 9.81572957016712e-06, "loss": 0.3189, "step": 1932 }, { "epoch": 0.09588769284190685, "grad_norm": 5.710806846618652, "learning_rate": 9.815517799349535e-06, "loss": 0.3583, "step": 1933 }, { "epoch": 0.095937298477107, "grad_norm": 12.06517219543457, "learning_rate": 9.815305909201238e-06, "loss": 0.5992, "step": 1934 }, { "epoch": 0.09598690411230716, "grad_norm": 7.92380428314209, "learning_rate": 9.815093899727483e-06, "loss": 0.373, "step": 1935 }, { "epoch": 0.09603650974750731, "grad_norm": 13.251694679260254, "learning_rate": 9.814881770933525e-06, "loss": 0.3332, "step": 1936 }, { "epoch": 0.09608611538270748, "grad_norm": 9.353864669799805, "learning_rate": 9.814669522824617e-06, "loss": 0.3085, "step": 1937 }, { "epoch": 0.09613572101790764, "grad_norm": 6.206709384918213, "learning_rate": 9.81445715540602e-06, "loss": 0.2729, "step": 1938 }, { "epoch": 0.09618532665310779, "grad_norm": 9.469112396240234, "learning_rate": 9.814244668682998e-06, "loss": 0.3715, "step": 1939 }, { "epoch": 0.09623493228830796, "grad_norm": 4.985133171081543, "learning_rate": 9.814032062660814e-06, "loss": 0.2357, "step": 1940 }, { "epoch": 0.0962845379235081, "grad_norm": 8.891186714172363, "learning_rate": 9.813819337344738e-06, "loss": 0.3204, "step": 1941 }, { "epoch": 0.09633414355870827, "grad_norm": 5.592092514038086, "learning_rate": 9.813606492740043e-06, "loss": 0.3162, "step": 1942 }, { "epoch": 0.09638374919390842, "grad_norm": 5.660913944244385, "learning_rate": 9.813393528852001e-06, "loss": 0.3093, "step": 1943 }, { "epoch": 0.09643335482910859, "grad_norm": 6.605631351470947, "learning_rate": 9.81318044568589e-06, "loss": 0.2527, "step": 1944 }, { "epoch": 0.09648296046430875, "grad_norm": 9.378978729248047, "learning_rate": 9.812967243246989e-06, "loss": 0.3722, "step": 1945 }, { "epoch": 0.0965325660995089, "grad_norm": 5.575930595397949, "learning_rate": 9.812753921540584e-06, "loss": 0.3042, "step": 1946 }, { "epoch": 0.09658217173470907, "grad_norm": 6.573371410369873, "learning_rate": 9.81254048057196e-06, "loss": 0.2586, "step": 1947 }, { "epoch": 0.09663177736990922, "grad_norm": 12.790246963500977, "learning_rate": 9.812326920346404e-06, "loss": 0.4452, "step": 1948 }, { "epoch": 0.09668138300510938, "grad_norm": 9.097709655761719, "learning_rate": 9.812113240869212e-06, "loss": 0.4487, "step": 1949 }, { "epoch": 0.09673098864030955, "grad_norm": 5.254099369049072, "learning_rate": 9.811899442145676e-06, "loss": 0.3225, "step": 1950 }, { "epoch": 0.0967805942755097, "grad_norm": 14.281167030334473, "learning_rate": 9.811685524181097e-06, "loss": 0.4926, "step": 1951 }, { "epoch": 0.09683019991070986, "grad_norm": 8.29219913482666, "learning_rate": 9.811471486980773e-06, "loss": 0.242, "step": 1952 }, { "epoch": 0.09687980554591001, "grad_norm": 10.43627643585205, "learning_rate": 9.811257330550008e-06, "loss": 0.4553, "step": 1953 }, { "epoch": 0.09692941118111018, "grad_norm": 4.487534999847412, "learning_rate": 9.811043054894112e-06, "loss": 0.3958, "step": 1954 }, { "epoch": 0.09697901681631033, "grad_norm": 6.851210117340088, "learning_rate": 9.81082866001839e-06, "loss": 0.371, "step": 1955 }, { "epoch": 0.09702862245151049, "grad_norm": 10.276824951171875, "learning_rate": 9.81061414592816e-06, "loss": 0.41, "step": 1956 }, { "epoch": 0.09707822808671066, "grad_norm": 5.803585052490234, "learning_rate": 9.810399512628735e-06, "loss": 0.3744, "step": 1957 }, { "epoch": 0.0971278337219108, "grad_norm": 8.972223281860352, "learning_rate": 9.810184760125435e-06, "loss": 0.3465, "step": 1958 }, { "epoch": 0.09717743935711097, "grad_norm": 9.11353874206543, "learning_rate": 9.809969888423578e-06, "loss": 0.3761, "step": 1959 }, { "epoch": 0.09722704499231112, "grad_norm": 8.514960289001465, "learning_rate": 9.809754897528494e-06, "loss": 0.3683, "step": 1960 }, { "epoch": 0.09727665062751129, "grad_norm": 10.189689636230469, "learning_rate": 9.809539787445506e-06, "loss": 0.3978, "step": 1961 }, { "epoch": 0.09732625626271145, "grad_norm": 6.5697197914123535, "learning_rate": 9.809324558179947e-06, "loss": 0.4019, "step": 1962 }, { "epoch": 0.0973758618979116, "grad_norm": 7.86282205581665, "learning_rate": 9.80910920973715e-06, "loss": 0.173, "step": 1963 }, { "epoch": 0.09742546753311176, "grad_norm": 5.606771945953369, "learning_rate": 9.808893742122453e-06, "loss": 0.3146, "step": 1964 }, { "epoch": 0.09747507316831192, "grad_norm": 7.482272148132324, "learning_rate": 9.808678155341192e-06, "loss": 0.2997, "step": 1965 }, { "epoch": 0.09752467880351208, "grad_norm": 3.4086501598358154, "learning_rate": 9.808462449398713e-06, "loss": 0.2712, "step": 1966 }, { "epoch": 0.09757428443871224, "grad_norm": 8.489163398742676, "learning_rate": 9.808246624300356e-06, "loss": 0.3222, "step": 1967 }, { "epoch": 0.0976238900739124, "grad_norm": 10.507972717285156, "learning_rate": 9.808030680051474e-06, "loss": 0.4838, "step": 1968 }, { "epoch": 0.09767349570911256, "grad_norm": 7.108479976654053, "learning_rate": 9.807814616657419e-06, "loss": 0.281, "step": 1969 }, { "epoch": 0.09772310134431271, "grad_norm": 5.797888278961182, "learning_rate": 9.807598434123542e-06, "loss": 0.2646, "step": 1970 }, { "epoch": 0.09777270697951287, "grad_norm": 7.938786506652832, "learning_rate": 9.8073821324552e-06, "loss": 0.4137, "step": 1971 }, { "epoch": 0.09782231261471303, "grad_norm": 17.437780380249023, "learning_rate": 9.807165711657754e-06, "loss": 0.2667, "step": 1972 }, { "epoch": 0.09787191824991319, "grad_norm": 10.254009246826172, "learning_rate": 9.806949171736569e-06, "loss": 0.3641, "step": 1973 }, { "epoch": 0.09792152388511335, "grad_norm": 7.897180557250977, "learning_rate": 9.806732512697008e-06, "loss": 0.2937, "step": 1974 }, { "epoch": 0.0979711295203135, "grad_norm": 12.369852066040039, "learning_rate": 9.80651573454444e-06, "loss": 0.4406, "step": 1975 }, { "epoch": 0.09802073515551367, "grad_norm": 15.12887191772461, "learning_rate": 9.80629883728424e-06, "loss": 0.269, "step": 1976 }, { "epoch": 0.09807034079071382, "grad_norm": 5.26222562789917, "learning_rate": 9.806081820921778e-06, "loss": 0.3607, "step": 1977 }, { "epoch": 0.09811994642591398, "grad_norm": 7.0307111740112305, "learning_rate": 9.805864685462436e-06, "loss": 0.391, "step": 1978 }, { "epoch": 0.09816955206111415, "grad_norm": 17.146711349487305, "learning_rate": 9.805647430911593e-06, "loss": 0.4405, "step": 1979 }, { "epoch": 0.0982191576963143, "grad_norm": 8.461310386657715, "learning_rate": 9.805430057274635e-06, "loss": 0.2469, "step": 1980 }, { "epoch": 0.09826876333151446, "grad_norm": 11.26314640045166, "learning_rate": 9.805212564556943e-06, "loss": 0.2892, "step": 1981 }, { "epoch": 0.09831836896671461, "grad_norm": 13.155487060546875, "learning_rate": 9.804994952763912e-06, "loss": 0.306, "step": 1982 }, { "epoch": 0.09836797460191478, "grad_norm": 15.153905868530273, "learning_rate": 9.804777221900931e-06, "loss": 0.5406, "step": 1983 }, { "epoch": 0.09841758023711493, "grad_norm": 10.276985168457031, "learning_rate": 9.8045593719734e-06, "loss": 0.2652, "step": 1984 }, { "epoch": 0.0984671858723151, "grad_norm": 10.090267181396484, "learning_rate": 9.804341402986712e-06, "loss": 0.3239, "step": 1985 }, { "epoch": 0.09851679150751526, "grad_norm": 6.894720077514648, "learning_rate": 9.80412331494627e-06, "loss": 0.3289, "step": 1986 }, { "epoch": 0.09856639714271541, "grad_norm": 14.297849655151367, "learning_rate": 9.80390510785748e-06, "loss": 0.451, "step": 1987 }, { "epoch": 0.09861600277791557, "grad_norm": 7.776071071624756, "learning_rate": 9.803686781725749e-06, "loss": 0.3123, "step": 1988 }, { "epoch": 0.09866560841311572, "grad_norm": 5.7074151039123535, "learning_rate": 9.803468336556486e-06, "loss": 0.3488, "step": 1989 }, { "epoch": 0.09871521404831589, "grad_norm": 7.730112552642822, "learning_rate": 9.803249772355106e-06, "loss": 0.3128, "step": 1990 }, { "epoch": 0.09876481968351605, "grad_norm": 4.870964050292969, "learning_rate": 9.803031089127021e-06, "loss": 0.2662, "step": 1991 }, { "epoch": 0.0988144253187162, "grad_norm": 9.736956596374512, "learning_rate": 9.802812286877657e-06, "loss": 0.3903, "step": 1992 }, { "epoch": 0.09886403095391637, "grad_norm": 14.28625774383545, "learning_rate": 9.80259336561243e-06, "loss": 0.3838, "step": 1993 }, { "epoch": 0.09891363658911652, "grad_norm": 9.116620063781738, "learning_rate": 9.802374325336766e-06, "loss": 0.2424, "step": 1994 }, { "epoch": 0.09896324222431668, "grad_norm": 20.041223526000977, "learning_rate": 9.802155166056096e-06, "loss": 0.4568, "step": 1995 }, { "epoch": 0.09901284785951685, "grad_norm": 5.487825870513916, "learning_rate": 9.801935887775846e-06, "loss": 0.3375, "step": 1996 }, { "epoch": 0.099062453494717, "grad_norm": 10.101491928100586, "learning_rate": 9.801716490501454e-06, "loss": 0.3708, "step": 1997 }, { "epoch": 0.09911205912991716, "grad_norm": 6.336847305297852, "learning_rate": 9.801496974238356e-06, "loss": 0.376, "step": 1998 }, { "epoch": 0.09916166476511731, "grad_norm": 7.061018466949463, "learning_rate": 9.80127733899199e-06, "loss": 0.3354, "step": 1999 }, { "epoch": 0.09921127040031748, "grad_norm": 6.5655198097229, "learning_rate": 9.8010575847678e-06, "loss": 0.421, "step": 2000 }, { "epoch": 0.09926087603551763, "grad_norm": 10.39547061920166, "learning_rate": 9.800837711571231e-06, "loss": 0.3268, "step": 2001 }, { "epoch": 0.09931048167071779, "grad_norm": 7.401115417480469, "learning_rate": 9.800617719407732e-06, "loss": 0.2848, "step": 2002 }, { "epoch": 0.09936008730591796, "grad_norm": 6.731581687927246, "learning_rate": 9.800397608282754e-06, "loss": 0.3706, "step": 2003 }, { "epoch": 0.09940969294111811, "grad_norm": 10.9152250289917, "learning_rate": 9.800177378201754e-06, "loss": 0.379, "step": 2004 }, { "epoch": 0.09945929857631827, "grad_norm": 8.632756233215332, "learning_rate": 9.799957029170185e-06, "loss": 0.304, "step": 2005 }, { "epoch": 0.09950890421151842, "grad_norm": 12.711731910705566, "learning_rate": 9.799736561193511e-06, "loss": 0.3833, "step": 2006 }, { "epoch": 0.09955850984671859, "grad_norm": 23.691720962524414, "learning_rate": 9.799515974277192e-06, "loss": 0.463, "step": 2007 }, { "epoch": 0.09960811548191875, "grad_norm": 9.47325611114502, "learning_rate": 9.799295268426697e-06, "loss": 0.4059, "step": 2008 }, { "epoch": 0.0996577211171189, "grad_norm": 7.930121898651123, "learning_rate": 9.799074443647495e-06, "loss": 0.3422, "step": 2009 }, { "epoch": 0.09970732675231907, "grad_norm": 10.112260818481445, "learning_rate": 9.798853499945056e-06, "loss": 0.3997, "step": 2010 }, { "epoch": 0.09975693238751922, "grad_norm": 12.041114807128906, "learning_rate": 9.798632437324858e-06, "loss": 0.4696, "step": 2011 }, { "epoch": 0.09980653802271938, "grad_norm": 6.753830909729004, "learning_rate": 9.798411255792377e-06, "loss": 0.2238, "step": 2012 }, { "epoch": 0.09985614365791953, "grad_norm": 8.314200401306152, "learning_rate": 9.798189955353095e-06, "loss": 0.342, "step": 2013 }, { "epoch": 0.0999057492931197, "grad_norm": 7.858386039733887, "learning_rate": 9.797968536012494e-06, "loss": 0.3286, "step": 2014 }, { "epoch": 0.09995535492831986, "grad_norm": 15.504980087280273, "learning_rate": 9.797746997776064e-06, "loss": 0.4161, "step": 2015 }, { "epoch": 0.10000496056352001, "grad_norm": 8.770264625549316, "learning_rate": 9.797525340649291e-06, "loss": 0.2895, "step": 2016 }, { "epoch": 0.10000496056352001, "eval_loss": 0.3478594124317169, "eval_runtime": 35.5747, "eval_samples_per_second": 45.791, "eval_steps_per_second": 5.734, "step": 2016 }, { "epoch": 0.10005456619872018, "grad_norm": 9.505745887756348, "learning_rate": 9.797303564637672e-06, "loss": 0.3753, "step": 2017 }, { "epoch": 0.10010417183392033, "grad_norm": 7.719751834869385, "learning_rate": 9.7970816697467e-06, "loss": 0.41, "step": 2018 }, { "epoch": 0.10015377746912049, "grad_norm": 12.497514724731445, "learning_rate": 9.796859655981875e-06, "loss": 0.4283, "step": 2019 }, { "epoch": 0.10020338310432066, "grad_norm": 4.561680793762207, "learning_rate": 9.796637523348695e-06, "loss": 0.2916, "step": 2020 }, { "epoch": 0.1002529887395208, "grad_norm": 15.94431209564209, "learning_rate": 9.79641527185267e-06, "loss": 0.3974, "step": 2021 }, { "epoch": 0.10030259437472097, "grad_norm": 8.393918991088867, "learning_rate": 9.796192901499305e-06, "loss": 0.3931, "step": 2022 }, { "epoch": 0.10035220000992112, "grad_norm": 5.454020023345947, "learning_rate": 9.795970412294109e-06, "loss": 0.3282, "step": 2023 }, { "epoch": 0.10040180564512129, "grad_norm": 10.361604690551758, "learning_rate": 9.795747804242596e-06, "loss": 0.4286, "step": 2024 }, { "epoch": 0.10045141128032145, "grad_norm": 6.926659107208252, "learning_rate": 9.795525077350284e-06, "loss": 0.2755, "step": 2025 }, { "epoch": 0.1005010169155216, "grad_norm": 7.313343048095703, "learning_rate": 9.795302231622694e-06, "loss": 0.3452, "step": 2026 }, { "epoch": 0.10055062255072177, "grad_norm": 6.218172073364258, "learning_rate": 9.795079267065343e-06, "loss": 0.3769, "step": 2027 }, { "epoch": 0.10060022818592192, "grad_norm": 6.60775899887085, "learning_rate": 9.794856183683758e-06, "loss": 0.3545, "step": 2028 }, { "epoch": 0.10064983382112208, "grad_norm": 4.900721073150635, "learning_rate": 9.79463298148347e-06, "loss": 0.2855, "step": 2029 }, { "epoch": 0.10069943945632223, "grad_norm": 16.733476638793945, "learning_rate": 9.794409660470006e-06, "loss": 0.3774, "step": 2030 }, { "epoch": 0.1007490450915224, "grad_norm": 11.922089576721191, "learning_rate": 9.794186220648903e-06, "loss": 0.3039, "step": 2031 }, { "epoch": 0.10079865072672256, "grad_norm": 12.764277458190918, "learning_rate": 9.793962662025696e-06, "loss": 0.5181, "step": 2032 }, { "epoch": 0.10084825636192271, "grad_norm": 8.190794944763184, "learning_rate": 9.793738984605926e-06, "loss": 0.2592, "step": 2033 }, { "epoch": 0.10089786199712288, "grad_norm": 20.796876907348633, "learning_rate": 9.793515188395135e-06, "loss": 0.4517, "step": 2034 }, { "epoch": 0.10094746763232303, "grad_norm": 11.486146926879883, "learning_rate": 9.793291273398871e-06, "loss": 0.3777, "step": 2035 }, { "epoch": 0.10099707326752319, "grad_norm": 9.49141788482666, "learning_rate": 9.79306723962268e-06, "loss": 0.411, "step": 2036 }, { "epoch": 0.10104667890272336, "grad_norm": 9.56528091430664, "learning_rate": 9.792843087072114e-06, "loss": 0.3352, "step": 2037 }, { "epoch": 0.1010962845379235, "grad_norm": 7.723094940185547, "learning_rate": 9.792618815752728e-06, "loss": 0.3722, "step": 2038 }, { "epoch": 0.10114589017312367, "grad_norm": 43.45170211791992, "learning_rate": 9.79239442567008e-06, "loss": 0.3652, "step": 2039 }, { "epoch": 0.10119549580832382, "grad_norm": 14.752747535705566, "learning_rate": 9.792169916829732e-06, "loss": 0.379, "step": 2040 }, { "epoch": 0.10124510144352399, "grad_norm": 8.925764083862305, "learning_rate": 9.791945289237245e-06, "loss": 0.3539, "step": 2041 }, { "epoch": 0.10129470707872415, "grad_norm": 5.609681129455566, "learning_rate": 9.791720542898186e-06, "loss": 0.2891, "step": 2042 }, { "epoch": 0.1013443127139243, "grad_norm": 5.997899532318115, "learning_rate": 9.791495677818123e-06, "loss": 0.3434, "step": 2043 }, { "epoch": 0.10139391834912446, "grad_norm": 11.89236068725586, "learning_rate": 9.791270694002632e-06, "loss": 0.2944, "step": 2044 }, { "epoch": 0.10144352398432462, "grad_norm": 9.483509063720703, "learning_rate": 9.791045591457284e-06, "loss": 0.4378, "step": 2045 }, { "epoch": 0.10149312961952478, "grad_norm": 8.156742095947266, "learning_rate": 9.79082037018766e-06, "loss": 0.3085, "step": 2046 }, { "epoch": 0.10154273525472493, "grad_norm": 5.996416091918945, "learning_rate": 9.790595030199339e-06, "loss": 0.2634, "step": 2047 }, { "epoch": 0.1015923408899251, "grad_norm": 10.33320140838623, "learning_rate": 9.790369571497907e-06, "loss": 0.3252, "step": 2048 }, { "epoch": 0.10164194652512526, "grad_norm": 5.278276443481445, "learning_rate": 9.79014399408895e-06, "loss": 0.2529, "step": 2049 }, { "epoch": 0.10169155216032541, "grad_norm": 9.998637199401855, "learning_rate": 9.789918297978058e-06, "loss": 0.4132, "step": 2050 }, { "epoch": 0.10174115779552557, "grad_norm": 11.51066780090332, "learning_rate": 9.789692483170824e-06, "loss": 0.3335, "step": 2051 }, { "epoch": 0.10179076343072573, "grad_norm": 14.018813133239746, "learning_rate": 9.789466549672843e-06, "loss": 0.3511, "step": 2052 }, { "epoch": 0.10184036906592589, "grad_norm": 8.906064987182617, "learning_rate": 9.789240497489716e-06, "loss": 0.3051, "step": 2053 }, { "epoch": 0.10188997470112605, "grad_norm": 4.782965660095215, "learning_rate": 9.789014326627042e-06, "loss": 0.2769, "step": 2054 }, { "epoch": 0.1019395803363262, "grad_norm": 9.569931030273438, "learning_rate": 9.788788037090427e-06, "loss": 0.4091, "step": 2055 }, { "epoch": 0.10198918597152637, "grad_norm": 5.494135856628418, "learning_rate": 9.788561628885478e-06, "loss": 0.2724, "step": 2056 }, { "epoch": 0.10203879160672652, "grad_norm": 7.0037384033203125, "learning_rate": 9.788335102017805e-06, "loss": 0.3056, "step": 2057 }, { "epoch": 0.10208839724192668, "grad_norm": 7.452202320098877, "learning_rate": 9.788108456493024e-06, "loss": 0.2665, "step": 2058 }, { "epoch": 0.10213800287712684, "grad_norm": 8.815189361572266, "learning_rate": 9.787881692316748e-06, "loss": 0.4479, "step": 2059 }, { "epoch": 0.102187608512327, "grad_norm": 6.1300458908081055, "learning_rate": 9.787654809494597e-06, "loss": 0.2903, "step": 2060 }, { "epoch": 0.10223721414752716, "grad_norm": 3.8503427505493164, "learning_rate": 9.787427808032195e-06, "loss": 0.2392, "step": 2061 }, { "epoch": 0.10228681978272731, "grad_norm": 8.781084060668945, "learning_rate": 9.787200687935167e-06, "loss": 0.4095, "step": 2062 }, { "epoch": 0.10233642541792748, "grad_norm": 12.35096549987793, "learning_rate": 9.78697344920914e-06, "loss": 0.4874, "step": 2063 }, { "epoch": 0.10238603105312763, "grad_norm": 8.123503684997559, "learning_rate": 9.786746091859747e-06, "loss": 0.3058, "step": 2064 }, { "epoch": 0.1024356366883278, "grad_norm": 9.161474227905273, "learning_rate": 9.786518615892619e-06, "loss": 0.5031, "step": 2065 }, { "epoch": 0.10248524232352796, "grad_norm": 5.383370399475098, "learning_rate": 9.786291021313394e-06, "loss": 0.2824, "step": 2066 }, { "epoch": 0.10253484795872811, "grad_norm": 8.67806339263916, "learning_rate": 9.786063308127713e-06, "loss": 0.2731, "step": 2067 }, { "epoch": 0.10258445359392827, "grad_norm": 6.664607524871826, "learning_rate": 9.785835476341219e-06, "loss": 0.3251, "step": 2068 }, { "epoch": 0.10263405922912842, "grad_norm": 13.753837585449219, "learning_rate": 9.785607525959558e-06, "loss": 0.3418, "step": 2069 }, { "epoch": 0.10268366486432859, "grad_norm": 7.470885753631592, "learning_rate": 9.785379456988375e-06, "loss": 0.2505, "step": 2070 }, { "epoch": 0.10273327049952875, "grad_norm": 9.1493501663208, "learning_rate": 9.785151269433327e-06, "loss": 0.4699, "step": 2071 }, { "epoch": 0.1027828761347289, "grad_norm": 17.249980926513672, "learning_rate": 9.784922963300063e-06, "loss": 0.4914, "step": 2072 }, { "epoch": 0.10283248176992907, "grad_norm": 20.79112434387207, "learning_rate": 9.784694538594244e-06, "loss": 0.3993, "step": 2073 }, { "epoch": 0.10288208740512922, "grad_norm": 9.441667556762695, "learning_rate": 9.784465995321533e-06, "loss": 0.3333, "step": 2074 }, { "epoch": 0.10293169304032938, "grad_norm": 9.895691871643066, "learning_rate": 9.784237333487588e-06, "loss": 0.4453, "step": 2075 }, { "epoch": 0.10298129867552953, "grad_norm": 6.169625759124756, "learning_rate": 9.784008553098078e-06, "loss": 0.2795, "step": 2076 }, { "epoch": 0.1030309043107297, "grad_norm": 12.084507942199707, "learning_rate": 9.783779654158672e-06, "loss": 0.3964, "step": 2077 }, { "epoch": 0.10308050994592986, "grad_norm": 7.328539848327637, "learning_rate": 9.783550636675043e-06, "loss": 0.3785, "step": 2078 }, { "epoch": 0.10313011558113001, "grad_norm": 6.452024936676025, "learning_rate": 9.783321500652865e-06, "loss": 0.2297, "step": 2079 }, { "epoch": 0.10317972121633018, "grad_norm": 9.226775169372559, "learning_rate": 9.783092246097816e-06, "loss": 0.329, "step": 2080 }, { "epoch": 0.10322932685153033, "grad_norm": 11.101231575012207, "learning_rate": 9.782862873015578e-06, "loss": 0.3636, "step": 2081 }, { "epoch": 0.1032789324867305, "grad_norm": 11.09039306640625, "learning_rate": 9.782633381411835e-06, "loss": 0.4542, "step": 2082 }, { "epoch": 0.10332853812193066, "grad_norm": 7.474052429199219, "learning_rate": 9.782403771292272e-06, "loss": 0.323, "step": 2083 }, { "epoch": 0.10337814375713081, "grad_norm": 15.841594696044922, "learning_rate": 9.78217404266258e-06, "loss": 0.358, "step": 2084 }, { "epoch": 0.10342774939233097, "grad_norm": 12.771618843078613, "learning_rate": 9.781944195528453e-06, "loss": 0.3379, "step": 2085 }, { "epoch": 0.10347735502753112, "grad_norm": 8.823932647705078, "learning_rate": 9.781714229895587e-06, "loss": 0.4479, "step": 2086 }, { "epoch": 0.10352696066273129, "grad_norm": 11.878119468688965, "learning_rate": 9.781484145769677e-06, "loss": 0.3663, "step": 2087 }, { "epoch": 0.10357656629793144, "grad_norm": 9.100712776184082, "learning_rate": 9.781253943156429e-06, "loss": 0.3526, "step": 2088 }, { "epoch": 0.1036261719331316, "grad_norm": 9.424362182617188, "learning_rate": 9.781023622061543e-06, "loss": 0.2796, "step": 2089 }, { "epoch": 0.10367577756833177, "grad_norm": 5.655378341674805, "learning_rate": 9.780793182490732e-06, "loss": 0.3376, "step": 2090 }, { "epoch": 0.10372538320353192, "grad_norm": 10.99032211303711, "learning_rate": 9.7805626244497e-06, "loss": 0.4262, "step": 2091 }, { "epoch": 0.10377498883873208, "grad_norm": 9.473179817199707, "learning_rate": 9.780331947944167e-06, "loss": 0.2449, "step": 2092 }, { "epoch": 0.10382459447393223, "grad_norm": 7.4533233642578125, "learning_rate": 9.780101152979843e-06, "loss": 0.3002, "step": 2093 }, { "epoch": 0.1038742001091324, "grad_norm": 5.668026447296143, "learning_rate": 9.779870239562453e-06, "loss": 0.3356, "step": 2094 }, { "epoch": 0.10392380574433256, "grad_norm": 9.388623237609863, "learning_rate": 9.779639207697715e-06, "loss": 0.4252, "step": 2095 }, { "epoch": 0.10397341137953271, "grad_norm": 8.157100677490234, "learning_rate": 9.779408057391354e-06, "loss": 0.256, "step": 2096 }, { "epoch": 0.10402301701473288, "grad_norm": 11.949592590332031, "learning_rate": 9.779176788649101e-06, "loss": 0.3773, "step": 2097 }, { "epoch": 0.10407262264993303, "grad_norm": 7.897282123565674, "learning_rate": 9.778945401476685e-06, "loss": 0.4237, "step": 2098 }, { "epoch": 0.10412222828513319, "grad_norm": 7.3083672523498535, "learning_rate": 9.77871389587984e-06, "loss": 0.3205, "step": 2099 }, { "epoch": 0.10417183392033336, "grad_norm": 7.045561790466309, "learning_rate": 9.778482271864303e-06, "loss": 0.2942, "step": 2100 }, { "epoch": 0.10422143955553351, "grad_norm": 6.960677146911621, "learning_rate": 9.778250529435815e-06, "loss": 0.3145, "step": 2101 }, { "epoch": 0.10427104519073367, "grad_norm": 6.187163352966309, "learning_rate": 9.778018668600115e-06, "loss": 0.3148, "step": 2102 }, { "epoch": 0.10432065082593382, "grad_norm": 4.287555694580078, "learning_rate": 9.777786689362953e-06, "loss": 0.2201, "step": 2103 }, { "epoch": 0.10437025646113399, "grad_norm": 6.697414398193359, "learning_rate": 9.777554591730077e-06, "loss": 0.3212, "step": 2104 }, { "epoch": 0.10441986209633414, "grad_norm": 6.875143051147461, "learning_rate": 9.777322375707234e-06, "loss": 0.2639, "step": 2105 }, { "epoch": 0.1044694677315343, "grad_norm": 11.861167907714844, "learning_rate": 9.777090041300182e-06, "loss": 0.4369, "step": 2106 }, { "epoch": 0.10451907336673447, "grad_norm": 9.722434043884277, "learning_rate": 9.776857588514679e-06, "loss": 0.3459, "step": 2107 }, { "epoch": 0.10456867900193462, "grad_norm": 8.12880802154541, "learning_rate": 9.776625017356483e-06, "loss": 0.3409, "step": 2108 }, { "epoch": 0.10461828463713478, "grad_norm": 8.827991485595703, "learning_rate": 9.77639232783136e-06, "loss": 0.3853, "step": 2109 }, { "epoch": 0.10466789027233493, "grad_norm": 7.1481194496154785, "learning_rate": 9.776159519945074e-06, "loss": 0.3552, "step": 2110 }, { "epoch": 0.1047174959075351, "grad_norm": 5.600805759429932, "learning_rate": 9.775926593703392e-06, "loss": 0.363, "step": 2111 }, { "epoch": 0.10476710154273526, "grad_norm": 16.107702255249023, "learning_rate": 9.775693549112093e-06, "loss": 0.4915, "step": 2112 }, { "epoch": 0.10481670717793541, "grad_norm": 3.728973388671875, "learning_rate": 9.775460386176944e-06, "loss": 0.26, "step": 2113 }, { "epoch": 0.10486631281313558, "grad_norm": 9.551139831542969, "learning_rate": 9.77522710490373e-06, "loss": 0.348, "step": 2114 }, { "epoch": 0.10491591844833573, "grad_norm": 14.897445678710938, "learning_rate": 9.774993705298224e-06, "loss": 0.2816, "step": 2115 }, { "epoch": 0.10496552408353589, "grad_norm": 6.591182231903076, "learning_rate": 9.774760187366216e-06, "loss": 0.2683, "step": 2116 }, { "epoch": 0.10501512971873604, "grad_norm": 7.694820880889893, "learning_rate": 9.774526551113492e-06, "loss": 0.3215, "step": 2117 }, { "epoch": 0.1050647353539362, "grad_norm": 11.682832717895508, "learning_rate": 9.774292796545838e-06, "loss": 0.3678, "step": 2118 }, { "epoch": 0.10511434098913637, "grad_norm": 7.636603832244873, "learning_rate": 9.77405892366905e-06, "loss": 0.2661, "step": 2119 }, { "epoch": 0.10516394662433652, "grad_norm": 5.568404674530029, "learning_rate": 9.773824932488922e-06, "loss": 0.3352, "step": 2120 }, { "epoch": 0.10521355225953669, "grad_norm": 12.278729438781738, "learning_rate": 9.773590823011254e-06, "loss": 0.3732, "step": 2121 }, { "epoch": 0.10526315789473684, "grad_norm": 9.7464599609375, "learning_rate": 9.773356595241845e-06, "loss": 0.2278, "step": 2122 }, { "epoch": 0.105312763529937, "grad_norm": 9.199393272399902, "learning_rate": 9.7731222491865e-06, "loss": 0.3715, "step": 2123 }, { "epoch": 0.10536236916513717, "grad_norm": 7.660118579864502, "learning_rate": 9.772887784851027e-06, "loss": 0.3036, "step": 2124 }, { "epoch": 0.10541197480033732, "grad_norm": 14.645071029663086, "learning_rate": 9.772653202241236e-06, "loss": 0.437, "step": 2125 }, { "epoch": 0.10546158043553748, "grad_norm": 5.926042079925537, "learning_rate": 9.772418501362939e-06, "loss": 0.2994, "step": 2126 }, { "epoch": 0.10551118607073763, "grad_norm": 6.661621570587158, "learning_rate": 9.772183682221953e-06, "loss": 0.3268, "step": 2127 }, { "epoch": 0.1055607917059378, "grad_norm": 7.002601623535156, "learning_rate": 9.771948744824097e-06, "loss": 0.3383, "step": 2128 }, { "epoch": 0.10561039734113796, "grad_norm": 17.397878646850586, "learning_rate": 9.771713689175193e-06, "loss": 0.2777, "step": 2129 }, { "epoch": 0.10566000297633811, "grad_norm": 7.836291313171387, "learning_rate": 9.771478515281065e-06, "loss": 0.3548, "step": 2130 }, { "epoch": 0.10570960861153827, "grad_norm": 6.110612869262695, "learning_rate": 9.77124322314754e-06, "loss": 0.3554, "step": 2131 }, { "epoch": 0.10575921424673843, "grad_norm": 5.923634052276611, "learning_rate": 9.77100781278045e-06, "loss": 0.3128, "step": 2132 }, { "epoch": 0.10580881988193859, "grad_norm": 13.85751724243164, "learning_rate": 9.770772284185628e-06, "loss": 0.2439, "step": 2133 }, { "epoch": 0.10585842551713874, "grad_norm": 6.8285136222839355, "learning_rate": 9.770536637368912e-06, "loss": 0.2778, "step": 2134 }, { "epoch": 0.1059080311523389, "grad_norm": 11.341899871826172, "learning_rate": 9.77030087233614e-06, "loss": 0.4955, "step": 2135 }, { "epoch": 0.10595763678753907, "grad_norm": 7.831125736236572, "learning_rate": 9.770064989093154e-06, "loss": 0.1927, "step": 2136 }, { "epoch": 0.10600724242273922, "grad_norm": 7.235144138336182, "learning_rate": 9.7698289876458e-06, "loss": 0.3749, "step": 2137 }, { "epoch": 0.10605684805793938, "grad_norm": 6.822451114654541, "learning_rate": 9.769592867999923e-06, "loss": 0.2357, "step": 2138 }, { "epoch": 0.10610645369313954, "grad_norm": 14.580981254577637, "learning_rate": 9.769356630161382e-06, "loss": 0.4062, "step": 2139 }, { "epoch": 0.1061560593283397, "grad_norm": 11.19269847869873, "learning_rate": 9.769120274136024e-06, "loss": 0.4276, "step": 2140 }, { "epoch": 0.10620566496353986, "grad_norm": 12.273158073425293, "learning_rate": 9.768883799929708e-06, "loss": 0.4381, "step": 2141 }, { "epoch": 0.10625527059874001, "grad_norm": 6.563877105712891, "learning_rate": 9.768647207548297e-06, "loss": 0.3742, "step": 2142 }, { "epoch": 0.10630487623394018, "grad_norm": 7.455324649810791, "learning_rate": 9.768410496997648e-06, "loss": 0.3293, "step": 2143 }, { "epoch": 0.10635448186914033, "grad_norm": 5.446661472320557, "learning_rate": 9.768173668283632e-06, "loss": 0.2871, "step": 2144 }, { "epoch": 0.1064040875043405, "grad_norm": 10.584818840026855, "learning_rate": 9.767936721412114e-06, "loss": 0.1919, "step": 2145 }, { "epoch": 0.10645369313954064, "grad_norm": 8.694966316223145, "learning_rate": 9.767699656388967e-06, "loss": 0.3095, "step": 2146 }, { "epoch": 0.10650329877474081, "grad_norm": 8.878923416137695, "learning_rate": 9.767462473220066e-06, "loss": 0.351, "step": 2147 }, { "epoch": 0.10655290440994097, "grad_norm": 5.840853691101074, "learning_rate": 9.76722517191129e-06, "loss": 0.3392, "step": 2148 }, { "epoch": 0.10660251004514112, "grad_norm": 9.509084701538086, "learning_rate": 9.766987752468518e-06, "loss": 0.3923, "step": 2149 }, { "epoch": 0.10665211568034129, "grad_norm": 8.005722999572754, "learning_rate": 9.766750214897632e-06, "loss": 0.324, "step": 2150 }, { "epoch": 0.10670172131554144, "grad_norm": 8.973352432250977, "learning_rate": 9.766512559204518e-06, "loss": 0.3414, "step": 2151 }, { "epoch": 0.1067513269507416, "grad_norm": 11.176823616027832, "learning_rate": 9.766274785395068e-06, "loss": 0.4052, "step": 2152 }, { "epoch": 0.10680093258594177, "grad_norm": 7.489992141723633, "learning_rate": 9.766036893475172e-06, "loss": 0.3015, "step": 2153 }, { "epoch": 0.10685053822114192, "grad_norm": 7.76625919342041, "learning_rate": 9.765798883450727e-06, "loss": 0.4166, "step": 2154 }, { "epoch": 0.10690014385634208, "grad_norm": 9.721134185791016, "learning_rate": 9.76556075532763e-06, "loss": 0.4912, "step": 2155 }, { "epoch": 0.10694974949154223, "grad_norm": 3.3266842365264893, "learning_rate": 9.765322509111782e-06, "loss": 0.2076, "step": 2156 }, { "epoch": 0.1069993551267424, "grad_norm": 6.188510894775391, "learning_rate": 9.765084144809087e-06, "loss": 0.2923, "step": 2157 }, { "epoch": 0.10704896076194256, "grad_norm": 11.622623443603516, "learning_rate": 9.76484566242545e-06, "loss": 0.3771, "step": 2158 }, { "epoch": 0.10709856639714271, "grad_norm": 8.231328010559082, "learning_rate": 9.764607061966783e-06, "loss": 0.2375, "step": 2159 }, { "epoch": 0.10714817203234288, "grad_norm": 10.760143280029297, "learning_rate": 9.764368343438997e-06, "loss": 0.5083, "step": 2160 }, { "epoch": 0.10719777766754303, "grad_norm": 8.232865333557129, "learning_rate": 9.764129506848008e-06, "loss": 0.3325, "step": 2161 }, { "epoch": 0.1072473833027432, "grad_norm": 11.054889678955078, "learning_rate": 9.763890552199736e-06, "loss": 0.3778, "step": 2162 }, { "epoch": 0.10729698893794334, "grad_norm": 19.877050399780273, "learning_rate": 9.763651479500098e-06, "loss": 0.5092, "step": 2163 }, { "epoch": 0.10734659457314351, "grad_norm": 6.432341575622559, "learning_rate": 9.763412288755026e-06, "loss": 0.3446, "step": 2164 }, { "epoch": 0.10739620020834367, "grad_norm": 6.264476776123047, "learning_rate": 9.76317297997044e-06, "loss": 0.2749, "step": 2165 }, { "epoch": 0.10744580584354382, "grad_norm": 6.669432163238525, "learning_rate": 9.762933553152274e-06, "loss": 0.4128, "step": 2166 }, { "epoch": 0.10749541147874399, "grad_norm": 5.650030612945557, "learning_rate": 9.76269400830646e-06, "loss": 0.2804, "step": 2167 }, { "epoch": 0.10754501711394414, "grad_norm": 28.694475173950195, "learning_rate": 9.762454345438934e-06, "loss": 0.4891, "step": 2168 }, { "epoch": 0.1075946227491443, "grad_norm": 5.369659423828125, "learning_rate": 9.762214564555634e-06, "loss": 0.3175, "step": 2169 }, { "epoch": 0.10764422838434447, "grad_norm": 6.08712100982666, "learning_rate": 9.761974665662503e-06, "loss": 0.3362, "step": 2170 }, { "epoch": 0.10769383401954462, "grad_norm": 8.647067070007324, "learning_rate": 9.761734648765487e-06, "loss": 0.4035, "step": 2171 }, { "epoch": 0.10774343965474478, "grad_norm": 6.757367134094238, "learning_rate": 9.761494513870533e-06, "loss": 0.343, "step": 2172 }, { "epoch": 0.10779304528994493, "grad_norm": 12.324070930480957, "learning_rate": 9.761254260983589e-06, "loss": 0.523, "step": 2173 }, { "epoch": 0.1078426509251451, "grad_norm": 11.846338272094727, "learning_rate": 9.761013890110613e-06, "loss": 0.2898, "step": 2174 }, { "epoch": 0.10789225656034526, "grad_norm": 6.162031650543213, "learning_rate": 9.760773401257557e-06, "loss": 0.3089, "step": 2175 }, { "epoch": 0.10794186219554541, "grad_norm": 6.585159778594971, "learning_rate": 9.760532794430384e-06, "loss": 0.3108, "step": 2176 }, { "epoch": 0.10799146783074558, "grad_norm": 9.451800346374512, "learning_rate": 9.760292069635053e-06, "loss": 0.295, "step": 2177 }, { "epoch": 0.10804107346594573, "grad_norm": 3.938624858856201, "learning_rate": 9.760051226877534e-06, "loss": 0.1586, "step": 2178 }, { "epoch": 0.10809067910114589, "grad_norm": 5.457577228546143, "learning_rate": 9.75981026616379e-06, "loss": 0.3172, "step": 2179 }, { "epoch": 0.10814028473634604, "grad_norm": 6.004453182220459, "learning_rate": 9.759569187499795e-06, "loss": 0.3311, "step": 2180 }, { "epoch": 0.10818989037154621, "grad_norm": 16.03370475769043, "learning_rate": 9.759327990891522e-06, "loss": 0.3743, "step": 2181 }, { "epoch": 0.10823949600674637, "grad_norm": 7.926716327667236, "learning_rate": 9.75908667634495e-06, "loss": 0.3374, "step": 2182 }, { "epoch": 0.10828910164194652, "grad_norm": 18.746315002441406, "learning_rate": 9.758845243866058e-06, "loss": 0.5709, "step": 2183 }, { "epoch": 0.10833870727714669, "grad_norm": 11.938206672668457, "learning_rate": 9.758603693460827e-06, "loss": 0.4426, "step": 2184 }, { "epoch": 0.10838831291234684, "grad_norm": 5.7561445236206055, "learning_rate": 9.758362025135244e-06, "loss": 0.3122, "step": 2185 }, { "epoch": 0.108437918547547, "grad_norm": 6.653918266296387, "learning_rate": 9.758120238895297e-06, "loss": 0.3816, "step": 2186 }, { "epoch": 0.10848752418274717, "grad_norm": 27.00949478149414, "learning_rate": 9.75787833474698e-06, "loss": 0.4689, "step": 2187 }, { "epoch": 0.10853712981794732, "grad_norm": 85.3687515258789, "learning_rate": 9.757636312696283e-06, "loss": 0.4172, "step": 2188 }, { "epoch": 0.10858673545314748, "grad_norm": 10.618806838989258, "learning_rate": 9.757394172749208e-06, "loss": 0.3842, "step": 2189 }, { "epoch": 0.10863634108834763, "grad_norm": 4.834617614746094, "learning_rate": 9.757151914911753e-06, "loss": 0.3, "step": 2190 }, { "epoch": 0.1086859467235478, "grad_norm": 4.958165645599365, "learning_rate": 9.75690953918992e-06, "loss": 0.2512, "step": 2191 }, { "epoch": 0.10873555235874795, "grad_norm": 5.3827619552612305, "learning_rate": 9.75666704558972e-06, "loss": 0.3169, "step": 2192 }, { "epoch": 0.10878515799394811, "grad_norm": 8.169175148010254, "learning_rate": 9.756424434117156e-06, "loss": 0.4085, "step": 2193 }, { "epoch": 0.10883476362914828, "grad_norm": 5.962181091308594, "learning_rate": 9.756181704778244e-06, "loss": 0.2869, "step": 2194 }, { "epoch": 0.10888436926434843, "grad_norm": 12.218286514282227, "learning_rate": 9.755938857578999e-06, "loss": 0.3365, "step": 2195 }, { "epoch": 0.10893397489954859, "grad_norm": 6.907649040222168, "learning_rate": 9.755695892525437e-06, "loss": 0.3923, "step": 2196 }, { "epoch": 0.10898358053474874, "grad_norm": 11.358503341674805, "learning_rate": 9.755452809623578e-06, "loss": 0.2775, "step": 2197 }, { "epoch": 0.1090331861699489, "grad_norm": 8.399391174316406, "learning_rate": 9.75520960887945e-06, "loss": 0.4298, "step": 2198 }, { "epoch": 0.10908279180514907, "grad_norm": 13.582337379455566, "learning_rate": 9.754966290299075e-06, "loss": 0.4154, "step": 2199 }, { "epoch": 0.10913239744034922, "grad_norm": 10.09403133392334, "learning_rate": 9.754722853888485e-06, "loss": 0.4049, "step": 2200 }, { "epoch": 0.10918200307554939, "grad_norm": 9.07738971710205, "learning_rate": 9.754479299653711e-06, "loss": 0.3395, "step": 2201 }, { "epoch": 0.10923160871074954, "grad_norm": 10.335923194885254, "learning_rate": 9.754235627600791e-06, "loss": 0.3746, "step": 2202 }, { "epoch": 0.1092812143459497, "grad_norm": 8.579602241516113, "learning_rate": 9.753991837735762e-06, "loss": 0.332, "step": 2203 }, { "epoch": 0.10933081998114987, "grad_norm": 18.68873405456543, "learning_rate": 9.753747930064663e-06, "loss": 0.5191, "step": 2204 }, { "epoch": 0.10938042561635002, "grad_norm": 10.573698043823242, "learning_rate": 9.75350390459354e-06, "loss": 0.3363, "step": 2205 }, { "epoch": 0.10943003125155018, "grad_norm": 7.115175247192383, "learning_rate": 9.753259761328439e-06, "loss": 0.3171, "step": 2206 }, { "epoch": 0.10947963688675033, "grad_norm": 16.08066177368164, "learning_rate": 9.753015500275413e-06, "loss": 0.2811, "step": 2207 }, { "epoch": 0.1095292425219505, "grad_norm": 10.271651268005371, "learning_rate": 9.752771121440513e-06, "loss": 0.3694, "step": 2208 }, { "epoch": 0.10957884815715065, "grad_norm": 13.423624038696289, "learning_rate": 9.752526624829794e-06, "loss": 0.3781, "step": 2209 }, { "epoch": 0.10962845379235081, "grad_norm": 4.5903239250183105, "learning_rate": 9.752282010449316e-06, "loss": 0.3278, "step": 2210 }, { "epoch": 0.10967805942755098, "grad_norm": 7.2523088455200195, "learning_rate": 9.752037278305138e-06, "loss": 0.3011, "step": 2211 }, { "epoch": 0.10972766506275113, "grad_norm": 12.96951961517334, "learning_rate": 9.751792428403329e-06, "loss": 0.3651, "step": 2212 }, { "epoch": 0.10977727069795129, "grad_norm": 5.046563625335693, "learning_rate": 9.751547460749953e-06, "loss": 0.3134, "step": 2213 }, { "epoch": 0.10982687633315144, "grad_norm": 10.431803703308105, "learning_rate": 9.751302375351082e-06, "loss": 0.3889, "step": 2214 }, { "epoch": 0.1098764819683516, "grad_norm": 7.651235103607178, "learning_rate": 9.751057172212789e-06, "loss": 0.2732, "step": 2215 }, { "epoch": 0.10992608760355177, "grad_norm": 4.910721778869629, "learning_rate": 9.75081185134115e-06, "loss": 0.2918, "step": 2216 }, { "epoch": 0.10997569323875192, "grad_norm": 17.847806930541992, "learning_rate": 9.750566412742243e-06, "loss": 0.445, "step": 2217 }, { "epoch": 0.11002529887395208, "grad_norm": 6.284252166748047, "learning_rate": 9.750320856422154e-06, "loss": 0.2987, "step": 2218 }, { "epoch": 0.11007490450915224, "grad_norm": 8.634982109069824, "learning_rate": 9.750075182386964e-06, "loss": 0.3532, "step": 2219 }, { "epoch": 0.1101245101443524, "grad_norm": 12.71630573272705, "learning_rate": 9.749829390642763e-06, "loss": 0.3762, "step": 2220 }, { "epoch": 0.11017411577955255, "grad_norm": 8.237895965576172, "learning_rate": 9.749583481195639e-06, "loss": 0.3659, "step": 2221 }, { "epoch": 0.11022372141475272, "grad_norm": 17.926950454711914, "learning_rate": 9.749337454051689e-06, "loss": 0.4071, "step": 2222 }, { "epoch": 0.11027332704995288, "grad_norm": 10.738197326660156, "learning_rate": 9.749091309217009e-06, "loss": 0.3343, "step": 2223 }, { "epoch": 0.11032293268515303, "grad_norm": 5.25842809677124, "learning_rate": 9.748845046697698e-06, "loss": 0.3483, "step": 2224 }, { "epoch": 0.1103725383203532, "grad_norm": 7.712797164916992, "learning_rate": 9.748598666499858e-06, "loss": 0.3222, "step": 2225 }, { "epoch": 0.11042214395555335, "grad_norm": 5.2801666259765625, "learning_rate": 9.748352168629596e-06, "loss": 0.2777, "step": 2226 }, { "epoch": 0.11047174959075351, "grad_norm": 19.703712463378906, "learning_rate": 9.748105553093019e-06, "loss": 0.4646, "step": 2227 }, { "epoch": 0.11052135522595367, "grad_norm": 11.915000915527344, "learning_rate": 9.747858819896238e-06, "loss": 0.411, "step": 2228 }, { "epoch": 0.11057096086115382, "grad_norm": 17.656381607055664, "learning_rate": 9.747611969045368e-06, "loss": 0.6456, "step": 2229 }, { "epoch": 0.11062056649635399, "grad_norm": 6.515635013580322, "learning_rate": 9.747365000546528e-06, "loss": 0.3309, "step": 2230 }, { "epoch": 0.11067017213155414, "grad_norm": 5.260529518127441, "learning_rate": 9.747117914405833e-06, "loss": 0.3334, "step": 2231 }, { "epoch": 0.1107197777667543, "grad_norm": 6.315196514129639, "learning_rate": 9.746870710629409e-06, "loss": 0.2419, "step": 2232 }, { "epoch": 0.11076938340195447, "grad_norm": 12.825075149536133, "learning_rate": 9.746623389223381e-06, "loss": 0.2922, "step": 2233 }, { "epoch": 0.11081898903715462, "grad_norm": 5.13232421875, "learning_rate": 9.74637595019388e-06, "loss": 0.2929, "step": 2234 }, { "epoch": 0.11086859467235478, "grad_norm": 12.718811988830566, "learning_rate": 9.746128393547036e-06, "loss": 0.5364, "step": 2235 }, { "epoch": 0.11091820030755493, "grad_norm": 8.53619384765625, "learning_rate": 9.745880719288982e-06, "loss": 0.3164, "step": 2236 }, { "epoch": 0.1109678059427551, "grad_norm": 8.805099487304688, "learning_rate": 9.745632927425859e-06, "loss": 0.396, "step": 2237 }, { "epoch": 0.11101741157795525, "grad_norm": 13.991053581237793, "learning_rate": 9.745385017963804e-06, "loss": 0.3983, "step": 2238 }, { "epoch": 0.11106701721315541, "grad_norm": 11.795762062072754, "learning_rate": 9.745136990908962e-06, "loss": 0.436, "step": 2239 }, { "epoch": 0.11111662284835558, "grad_norm": 14.908984184265137, "learning_rate": 9.744888846267478e-06, "loss": 0.376, "step": 2240 }, { "epoch": 0.11116622848355573, "grad_norm": 8.178116798400879, "learning_rate": 9.744640584045503e-06, "loss": 0.4178, "step": 2241 }, { "epoch": 0.1112158341187559, "grad_norm": 12.956886291503906, "learning_rate": 9.744392204249187e-06, "loss": 0.4023, "step": 2242 }, { "epoch": 0.11126543975395604, "grad_norm": 10.89775562286377, "learning_rate": 9.744143706884688e-06, "loss": 0.2979, "step": 2243 }, { "epoch": 0.11131504538915621, "grad_norm": 6.914849281311035, "learning_rate": 9.74389509195816e-06, "loss": 0.3418, "step": 2244 }, { "epoch": 0.11136465102435637, "grad_norm": 7.2142252922058105, "learning_rate": 9.743646359475767e-06, "loss": 0.3415, "step": 2245 }, { "epoch": 0.11141425665955652, "grad_norm": 6.34801721572876, "learning_rate": 9.743397509443672e-06, "loss": 0.2786, "step": 2246 }, { "epoch": 0.11146386229475669, "grad_norm": 8.643049240112305, "learning_rate": 9.74314854186804e-06, "loss": 0.2709, "step": 2247 }, { "epoch": 0.11151346792995684, "grad_norm": 12.794844627380371, "learning_rate": 9.742899456755041e-06, "loss": 0.3456, "step": 2248 }, { "epoch": 0.111563073565157, "grad_norm": 5.564753532409668, "learning_rate": 9.742650254110849e-06, "loss": 0.2133, "step": 2249 }, { "epoch": 0.11161267920035715, "grad_norm": 24.116830825805664, "learning_rate": 9.742400933941638e-06, "loss": 0.3075, "step": 2250 }, { "epoch": 0.11166228483555732, "grad_norm": 12.395811080932617, "learning_rate": 9.742151496253587e-06, "loss": 0.4945, "step": 2251 }, { "epoch": 0.11171189047075748, "grad_norm": 12.35537338256836, "learning_rate": 9.741901941052876e-06, "loss": 0.4099, "step": 2252 }, { "epoch": 0.11176149610595763, "grad_norm": 8.875144004821777, "learning_rate": 9.74165226834569e-06, "loss": 0.3604, "step": 2253 }, { "epoch": 0.1118111017411578, "grad_norm": 7.580193519592285, "learning_rate": 9.741402478138218e-06, "loss": 0.2532, "step": 2254 }, { "epoch": 0.11186070737635795, "grad_norm": 43.566680908203125, "learning_rate": 9.741152570436645e-06, "loss": 0.3637, "step": 2255 }, { "epoch": 0.11191031301155811, "grad_norm": 8.673523902893066, "learning_rate": 9.740902545247169e-06, "loss": 0.3603, "step": 2256 }, { "epoch": 0.11195991864675828, "grad_norm": 6.950307369232178, "learning_rate": 9.740652402575981e-06, "loss": 0.3611, "step": 2257 }, { "epoch": 0.11200952428195843, "grad_norm": 11.326517105102539, "learning_rate": 9.740402142429284e-06, "loss": 0.4278, "step": 2258 }, { "epoch": 0.11205912991715859, "grad_norm": 6.751259803771973, "learning_rate": 9.740151764813276e-06, "loss": 0.2609, "step": 2259 }, { "epoch": 0.11210873555235874, "grad_norm": 7.852846622467041, "learning_rate": 9.739901269734165e-06, "loss": 0.2919, "step": 2260 }, { "epoch": 0.11215834118755891, "grad_norm": 9.730732917785645, "learning_rate": 9.739650657198153e-06, "loss": 0.2704, "step": 2261 }, { "epoch": 0.11220794682275907, "grad_norm": 8.042623519897461, "learning_rate": 9.739399927211457e-06, "loss": 0.1941, "step": 2262 }, { "epoch": 0.11225755245795922, "grad_norm": 11.459397315979004, "learning_rate": 9.739149079780285e-06, "loss": 0.467, "step": 2263 }, { "epoch": 0.11230715809315939, "grad_norm": 13.327730178833008, "learning_rate": 9.738898114910857e-06, "loss": 0.3891, "step": 2264 }, { "epoch": 0.11235676372835954, "grad_norm": 11.092421531677246, "learning_rate": 9.73864703260939e-06, "loss": 0.3865, "step": 2265 }, { "epoch": 0.1124063693635597, "grad_norm": 13.79794692993164, "learning_rate": 9.738395832882104e-06, "loss": 0.3295, "step": 2266 }, { "epoch": 0.11245597499875985, "grad_norm": 6.878377914428711, "learning_rate": 9.738144515735229e-06, "loss": 0.2253, "step": 2267 }, { "epoch": 0.11250558063396002, "grad_norm": 9.616692543029785, "learning_rate": 9.737893081174985e-06, "loss": 0.2606, "step": 2268 }, { "epoch": 0.11255518626916018, "grad_norm": 5.416689395904541, "learning_rate": 9.73764152920761e-06, "loss": 0.323, "step": 2269 }, { "epoch": 0.11260479190436033, "grad_norm": 14.241959571838379, "learning_rate": 9.737389859839336e-06, "loss": 0.4031, "step": 2270 }, { "epoch": 0.1126543975395605, "grad_norm": 10.40247631072998, "learning_rate": 9.737138073076396e-06, "loss": 0.4048, "step": 2271 }, { "epoch": 0.11270400317476065, "grad_norm": 12.991552352905273, "learning_rate": 9.736886168925033e-06, "loss": 0.3414, "step": 2272 }, { "epoch": 0.11275360880996081, "grad_norm": 15.00207233428955, "learning_rate": 9.736634147391489e-06, "loss": 0.5797, "step": 2273 }, { "epoch": 0.11280321444516098, "grad_norm": 24.565078735351562, "learning_rate": 9.736382008482006e-06, "loss": 0.4776, "step": 2274 }, { "epoch": 0.11285282008036113, "grad_norm": 12.01699161529541, "learning_rate": 9.736129752202836e-06, "loss": 0.3389, "step": 2275 }, { "epoch": 0.11290242571556129, "grad_norm": 6.642333984375, "learning_rate": 9.735877378560227e-06, "loss": 0.2551, "step": 2276 }, { "epoch": 0.11295203135076144, "grad_norm": 8.149763107299805, "learning_rate": 9.735624887560436e-06, "loss": 0.3994, "step": 2277 }, { "epoch": 0.1130016369859616, "grad_norm": 13.613983154296875, "learning_rate": 9.735372279209717e-06, "loss": 0.3605, "step": 2278 }, { "epoch": 0.11305124262116176, "grad_norm": 8.507684707641602, "learning_rate": 9.73511955351433e-06, "loss": 0.3334, "step": 2279 }, { "epoch": 0.11310084825636192, "grad_norm": 6.203250885009766, "learning_rate": 9.734866710480539e-06, "loss": 0.3152, "step": 2280 }, { "epoch": 0.11315045389156209, "grad_norm": 6.558668613433838, "learning_rate": 9.73461375011461e-06, "loss": 0.2918, "step": 2281 }, { "epoch": 0.11320005952676224, "grad_norm": 4.009509086608887, "learning_rate": 9.734360672422811e-06, "loss": 0.3033, "step": 2282 }, { "epoch": 0.1132496651619624, "grad_norm": 6.9792094230651855, "learning_rate": 9.734107477411411e-06, "loss": 0.3277, "step": 2283 }, { "epoch": 0.11329927079716255, "grad_norm": 6.244110107421875, "learning_rate": 9.733854165086687e-06, "loss": 0.4134, "step": 2284 }, { "epoch": 0.11334887643236272, "grad_norm": 14.468792915344238, "learning_rate": 9.733600735454916e-06, "loss": 0.5239, "step": 2285 }, { "epoch": 0.11339848206756288, "grad_norm": 9.354007720947266, "learning_rate": 9.733347188522377e-06, "loss": 0.2633, "step": 2286 }, { "epoch": 0.11344808770276303, "grad_norm": 10.339884757995605, "learning_rate": 9.733093524295353e-06, "loss": 0.4278, "step": 2287 }, { "epoch": 0.1134976933379632, "grad_norm": 6.688757419586182, "learning_rate": 9.732839742780133e-06, "loss": 0.3131, "step": 2288 }, { "epoch": 0.11354729897316335, "grad_norm": 8.004117965698242, "learning_rate": 9.732585843983e-06, "loss": 0.3096, "step": 2289 }, { "epoch": 0.11359690460836351, "grad_norm": 11.967208862304688, "learning_rate": 9.732331827910252e-06, "loss": 0.3631, "step": 2290 }, { "epoch": 0.11364651024356368, "grad_norm": 23.405000686645508, "learning_rate": 9.732077694568179e-06, "loss": 0.5313, "step": 2291 }, { "epoch": 0.11369611587876383, "grad_norm": 8.40040111541748, "learning_rate": 9.73182344396308e-06, "loss": 0.3443, "step": 2292 }, { "epoch": 0.11374572151396399, "grad_norm": 7.5579752922058105, "learning_rate": 9.731569076101255e-06, "loss": 0.3661, "step": 2293 }, { "epoch": 0.11379532714916414, "grad_norm": 11.897196769714355, "learning_rate": 9.73131459098901e-06, "loss": 0.3649, "step": 2294 }, { "epoch": 0.1138449327843643, "grad_norm": 5.296340465545654, "learning_rate": 9.731059988632648e-06, "loss": 0.3476, "step": 2295 }, { "epoch": 0.11389453841956446, "grad_norm": 6.114412307739258, "learning_rate": 9.73080526903848e-06, "loss": 0.3392, "step": 2296 }, { "epoch": 0.11394414405476462, "grad_norm": 7.076535701751709, "learning_rate": 9.730550432212818e-06, "loss": 0.3192, "step": 2297 }, { "epoch": 0.11399374968996479, "grad_norm": 14.987025260925293, "learning_rate": 9.730295478161974e-06, "loss": 0.4185, "step": 2298 }, { "epoch": 0.11404335532516494, "grad_norm": 12.136588096618652, "learning_rate": 9.730040406892269e-06, "loss": 0.4354, "step": 2299 }, { "epoch": 0.1140929609603651, "grad_norm": 6.878016948699951, "learning_rate": 9.729785218410026e-06, "loss": 0.2637, "step": 2300 }, { "epoch": 0.11414256659556525, "grad_norm": 6.107232093811035, "learning_rate": 9.729529912721562e-06, "loss": 0.2657, "step": 2301 }, { "epoch": 0.11419217223076542, "grad_norm": 4.251338005065918, "learning_rate": 9.72927448983321e-06, "loss": 0.3283, "step": 2302 }, { "epoch": 0.11424177786596558, "grad_norm": 8.699231147766113, "learning_rate": 9.729018949751295e-06, "loss": 0.3783, "step": 2303 }, { "epoch": 0.11429138350116573, "grad_norm": 12.071152687072754, "learning_rate": 9.728763292482153e-06, "loss": 0.4381, "step": 2304 }, { "epoch": 0.1143409891363659, "grad_norm": 19.63909912109375, "learning_rate": 9.728507518032116e-06, "loss": 0.4119, "step": 2305 }, { "epoch": 0.11439059477156605, "grad_norm": 4.767077922821045, "learning_rate": 9.728251626407526e-06, "loss": 0.191, "step": 2306 }, { "epoch": 0.11444020040676621, "grad_norm": 32.42797088623047, "learning_rate": 9.727995617614718e-06, "loss": 0.4372, "step": 2307 }, { "epoch": 0.11448980604196636, "grad_norm": 12.293757438659668, "learning_rate": 9.727739491660044e-06, "loss": 0.5695, "step": 2308 }, { "epoch": 0.11453941167716652, "grad_norm": 5.9633049964904785, "learning_rate": 9.727483248549844e-06, "loss": 0.323, "step": 2309 }, { "epoch": 0.11458901731236669, "grad_norm": 9.07762622833252, "learning_rate": 9.727226888290472e-06, "loss": 0.4888, "step": 2310 }, { "epoch": 0.11463862294756684, "grad_norm": 7.63347864151001, "learning_rate": 9.72697041088828e-06, "loss": 0.322, "step": 2311 }, { "epoch": 0.114688228582767, "grad_norm": 7.3694634437561035, "learning_rate": 9.726713816349621e-06, "loss": 0.3439, "step": 2312 }, { "epoch": 0.11473783421796716, "grad_norm": 5.23622989654541, "learning_rate": 9.726457104680856e-06, "loss": 0.332, "step": 2313 }, { "epoch": 0.11478743985316732, "grad_norm": 8.382673263549805, "learning_rate": 9.726200275888347e-06, "loss": 0.2958, "step": 2314 }, { "epoch": 0.11483704548836748, "grad_norm": 7.917362213134766, "learning_rate": 9.725943329978456e-06, "loss": 0.3895, "step": 2315 }, { "epoch": 0.11488665112356763, "grad_norm": 4.718419551849365, "learning_rate": 9.725686266957552e-06, "loss": 0.246, "step": 2316 }, { "epoch": 0.1149362567587678, "grad_norm": 6.761650085449219, "learning_rate": 9.725429086832005e-06, "loss": 0.3225, "step": 2317 }, { "epoch": 0.11498586239396795, "grad_norm": 6.9173264503479, "learning_rate": 9.725171789608188e-06, "loss": 0.2629, "step": 2318 }, { "epoch": 0.11503546802916811, "grad_norm": 4.381231784820557, "learning_rate": 9.724914375292476e-06, "loss": 0.2644, "step": 2319 }, { "epoch": 0.11508507366436828, "grad_norm": 13.76740837097168, "learning_rate": 9.72465684389125e-06, "loss": 0.4165, "step": 2320 }, { "epoch": 0.11513467929956843, "grad_norm": 4.938742160797119, "learning_rate": 9.724399195410887e-06, "loss": 0.3114, "step": 2321 }, { "epoch": 0.1151842849347686, "grad_norm": 14.524334907531738, "learning_rate": 9.724141429857777e-06, "loss": 0.4671, "step": 2322 }, { "epoch": 0.11523389056996874, "grad_norm": 7.580707550048828, "learning_rate": 9.723883547238306e-06, "loss": 0.3517, "step": 2323 }, { "epoch": 0.11528349620516891, "grad_norm": 7.403430938720703, "learning_rate": 9.723625547558863e-06, "loss": 0.3767, "step": 2324 }, { "epoch": 0.11533310184036906, "grad_norm": 5.536437511444092, "learning_rate": 9.723367430825842e-06, "loss": 0.2876, "step": 2325 }, { "epoch": 0.11538270747556922, "grad_norm": 6.79376745223999, "learning_rate": 9.723109197045638e-06, "loss": 0.2944, "step": 2326 }, { "epoch": 0.11543231311076939, "grad_norm": 12.74703598022461, "learning_rate": 9.722850846224654e-06, "loss": 0.5315, "step": 2327 }, { "epoch": 0.11548191874596954, "grad_norm": 15.757560729980469, "learning_rate": 9.72259237836929e-06, "loss": 0.4279, "step": 2328 }, { "epoch": 0.1155315243811697, "grad_norm": 8.032344818115234, "learning_rate": 9.722333793485949e-06, "loss": 0.2917, "step": 2329 }, { "epoch": 0.11558113001636985, "grad_norm": 16.552366256713867, "learning_rate": 9.722075091581041e-06, "loss": 0.3493, "step": 2330 }, { "epoch": 0.11563073565157002, "grad_norm": 7.654190540313721, "learning_rate": 9.721816272660976e-06, "loss": 0.2935, "step": 2331 }, { "epoch": 0.11568034128677018, "grad_norm": 10.611516952514648, "learning_rate": 9.721557336732167e-06, "loss": 0.4398, "step": 2332 }, { "epoch": 0.11572994692197033, "grad_norm": 10.01327896118164, "learning_rate": 9.721298283801033e-06, "loss": 0.1903, "step": 2333 }, { "epoch": 0.1157795525571705, "grad_norm": 6.269228458404541, "learning_rate": 9.721039113873992e-06, "loss": 0.2835, "step": 2334 }, { "epoch": 0.11582915819237065, "grad_norm": 16.9364070892334, "learning_rate": 9.720779826957464e-06, "loss": 0.3596, "step": 2335 }, { "epoch": 0.11587876382757081, "grad_norm": 5.730931282043457, "learning_rate": 9.720520423057879e-06, "loss": 0.3249, "step": 2336 }, { "epoch": 0.11592836946277098, "grad_norm": 6.033621788024902, "learning_rate": 9.720260902181661e-06, "loss": 0.3645, "step": 2337 }, { "epoch": 0.11597797509797113, "grad_norm": 6.2490949630737305, "learning_rate": 9.720001264335243e-06, "loss": 0.3934, "step": 2338 }, { "epoch": 0.11602758073317129, "grad_norm": 7.027055263519287, "learning_rate": 9.71974150952506e-06, "loss": 0.2391, "step": 2339 }, { "epoch": 0.11607718636837144, "grad_norm": 10.514912605285645, "learning_rate": 9.719481637757546e-06, "loss": 0.3197, "step": 2340 }, { "epoch": 0.11612679200357161, "grad_norm": 13.168127059936523, "learning_rate": 9.71922164903914e-06, "loss": 0.3935, "step": 2341 }, { "epoch": 0.11617639763877176, "grad_norm": 5.267793655395508, "learning_rate": 9.71896154337629e-06, "loss": 0.2715, "step": 2342 }, { "epoch": 0.11622600327397192, "grad_norm": 8.181732177734375, "learning_rate": 9.71870132077544e-06, "loss": 0.3134, "step": 2343 }, { "epoch": 0.11627560890917209, "grad_norm": 9.7907133102417, "learning_rate": 9.718440981243032e-06, "loss": 0.3495, "step": 2344 }, { "epoch": 0.11632521454437224, "grad_norm": 6.306664943695068, "learning_rate": 9.718180524785524e-06, "loss": 0.2991, "step": 2345 }, { "epoch": 0.1163748201795724, "grad_norm": 7.647217273712158, "learning_rate": 9.717919951409369e-06, "loss": 0.3169, "step": 2346 }, { "epoch": 0.11642442581477255, "grad_norm": 6.109262943267822, "learning_rate": 9.717659261121023e-06, "loss": 0.3199, "step": 2347 }, { "epoch": 0.11647403144997272, "grad_norm": 6.460163593292236, "learning_rate": 9.717398453926948e-06, "loss": 0.2928, "step": 2348 }, { "epoch": 0.11652363708517288, "grad_norm": 6.374751567840576, "learning_rate": 9.717137529833603e-06, "loss": 0.3264, "step": 2349 }, { "epoch": 0.11657324272037303, "grad_norm": 6.943753719329834, "learning_rate": 9.716876488847457e-06, "loss": 0.2966, "step": 2350 }, { "epoch": 0.1166228483555732, "grad_norm": 10.442416191101074, "learning_rate": 9.716615330974977e-06, "loss": 0.3874, "step": 2351 }, { "epoch": 0.11667245399077335, "grad_norm": 6.409839153289795, "learning_rate": 9.716354056222638e-06, "loss": 0.3194, "step": 2352 }, { "epoch": 0.11672205962597351, "grad_norm": 18.10710334777832, "learning_rate": 9.71609266459691e-06, "loss": 0.4258, "step": 2353 }, { "epoch": 0.11677166526117366, "grad_norm": 8.401642799377441, "learning_rate": 9.715831156104273e-06, "loss": 0.4614, "step": 2354 }, { "epoch": 0.11682127089637383, "grad_norm": 8.506279945373535, "learning_rate": 9.715569530751208e-06, "loss": 0.379, "step": 2355 }, { "epoch": 0.11687087653157399, "grad_norm": 9.406780242919922, "learning_rate": 9.715307788544195e-06, "loss": 0.2077, "step": 2356 }, { "epoch": 0.11692048216677414, "grad_norm": 7.003358364105225, "learning_rate": 9.715045929489723e-06, "loss": 0.2769, "step": 2357 }, { "epoch": 0.1169700878019743, "grad_norm": 7.160214900970459, "learning_rate": 9.71478395359428e-06, "loss": 0.329, "step": 2358 }, { "epoch": 0.11701969343717446, "grad_norm": 8.048697471618652, "learning_rate": 9.714521860864357e-06, "loss": 0.3936, "step": 2359 }, { "epoch": 0.11706929907237462, "grad_norm": 13.48479175567627, "learning_rate": 9.714259651306451e-06, "loss": 0.5517, "step": 2360 }, { "epoch": 0.11711890470757479, "grad_norm": 11.12836742401123, "learning_rate": 9.713997324927059e-06, "loss": 0.4738, "step": 2361 }, { "epoch": 0.11716851034277494, "grad_norm": 12.612269401550293, "learning_rate": 9.71373488173268e-06, "loss": 0.3521, "step": 2362 }, { "epoch": 0.1172181159779751, "grad_norm": 9.676106452941895, "learning_rate": 9.713472321729818e-06, "loss": 0.4109, "step": 2363 }, { "epoch": 0.11726772161317525, "grad_norm": 7.54909086227417, "learning_rate": 9.713209644924979e-06, "loss": 0.3276, "step": 2364 }, { "epoch": 0.11731732724837542, "grad_norm": 12.098886489868164, "learning_rate": 9.712946851324674e-06, "loss": 0.248, "step": 2365 }, { "epoch": 0.11736693288357558, "grad_norm": 7.386826515197754, "learning_rate": 9.712683940935414e-06, "loss": 0.3701, "step": 2366 }, { "epoch": 0.11741653851877573, "grad_norm": 15.875611305236816, "learning_rate": 9.712420913763714e-06, "loss": 0.4588, "step": 2367 }, { "epoch": 0.1174661441539759, "grad_norm": 8.18861198425293, "learning_rate": 9.712157769816091e-06, "loss": 0.392, "step": 2368 }, { "epoch": 0.11751574978917605, "grad_norm": 11.603535652160645, "learning_rate": 9.71189450909907e-06, "loss": 0.3589, "step": 2369 }, { "epoch": 0.11756535542437621, "grad_norm": 7.299293041229248, "learning_rate": 9.711631131619167e-06, "loss": 0.3013, "step": 2370 }, { "epoch": 0.11761496105957636, "grad_norm": 15.704314231872559, "learning_rate": 9.711367637382917e-06, "loss": 0.4766, "step": 2371 }, { "epoch": 0.11766456669477653, "grad_norm": 12.445527076721191, "learning_rate": 9.711104026396845e-06, "loss": 0.3924, "step": 2372 }, { "epoch": 0.11771417232997669, "grad_norm": 9.419421195983887, "learning_rate": 9.710840298667485e-06, "loss": 0.4412, "step": 2373 }, { "epoch": 0.11776377796517684, "grad_norm": 5.067162990570068, "learning_rate": 9.71057645420137e-06, "loss": 0.31, "step": 2374 }, { "epoch": 0.117813383600377, "grad_norm": 6.782060623168945, "learning_rate": 9.71031249300504e-06, "loss": 0.3241, "step": 2375 }, { "epoch": 0.11786298923557716, "grad_norm": 7.902908802032471, "learning_rate": 9.710048415085036e-06, "loss": 0.4476, "step": 2376 }, { "epoch": 0.11791259487077732, "grad_norm": 8.632847785949707, "learning_rate": 9.709784220447901e-06, "loss": 0.3567, "step": 2377 }, { "epoch": 0.11796220050597749, "grad_norm": 5.3717875480651855, "learning_rate": 9.709519909100182e-06, "loss": 0.2537, "step": 2378 }, { "epoch": 0.11801180614117764, "grad_norm": 8.283390998840332, "learning_rate": 9.709255481048433e-06, "loss": 0.3761, "step": 2379 }, { "epoch": 0.1180614117763778, "grad_norm": 7.986202716827393, "learning_rate": 9.708990936299199e-06, "loss": 0.3885, "step": 2380 }, { "epoch": 0.11811101741157795, "grad_norm": 18.183788299560547, "learning_rate": 9.70872627485904e-06, "loss": 0.5239, "step": 2381 }, { "epoch": 0.11816062304677812, "grad_norm": 5.412232398986816, "learning_rate": 9.708461496734515e-06, "loss": 0.143, "step": 2382 }, { "epoch": 0.11821022868197827, "grad_norm": 7.2118821144104, "learning_rate": 9.708196601932184e-06, "loss": 0.2737, "step": 2383 }, { "epoch": 0.11825983431717843, "grad_norm": 12.51240348815918, "learning_rate": 9.707931590458611e-06, "loss": 0.4007, "step": 2384 }, { "epoch": 0.1183094399523786, "grad_norm": 5.454401016235352, "learning_rate": 9.707666462320365e-06, "loss": 0.3562, "step": 2385 }, { "epoch": 0.11835904558757875, "grad_norm": 7.86375093460083, "learning_rate": 9.707401217524013e-06, "loss": 0.296, "step": 2386 }, { "epoch": 0.11840865122277891, "grad_norm": 7.986824989318848, "learning_rate": 9.70713585607613e-06, "loss": 0.4007, "step": 2387 }, { "epoch": 0.11845825685797906, "grad_norm": 7.0280609130859375, "learning_rate": 9.70687037798329e-06, "loss": 0.3413, "step": 2388 }, { "epoch": 0.11850786249317923, "grad_norm": 10.554536819458008, "learning_rate": 9.706604783252075e-06, "loss": 0.3499, "step": 2389 }, { "epoch": 0.11855746812837939, "grad_norm": 13.337430953979492, "learning_rate": 9.706339071889064e-06, "loss": 0.3817, "step": 2390 }, { "epoch": 0.11860707376357954, "grad_norm": 5.194786071777344, "learning_rate": 9.706073243900841e-06, "loss": 0.2973, "step": 2391 }, { "epoch": 0.1186566793987797, "grad_norm": 11.58328628540039, "learning_rate": 9.705807299293995e-06, "loss": 0.4166, "step": 2392 }, { "epoch": 0.11870628503397986, "grad_norm": 9.874541282653809, "learning_rate": 9.705541238075113e-06, "loss": 0.2764, "step": 2393 }, { "epoch": 0.11875589066918002, "grad_norm": 8.502825736999512, "learning_rate": 9.705275060250794e-06, "loss": 0.4029, "step": 2394 }, { "epoch": 0.11880549630438018, "grad_norm": 8.343789100646973, "learning_rate": 9.705008765827629e-06, "loss": 0.358, "step": 2395 }, { "epoch": 0.11885510193958033, "grad_norm": 6.912995338439941, "learning_rate": 9.704742354812219e-06, "loss": 0.3187, "step": 2396 }, { "epoch": 0.1189047075747805, "grad_norm": 5.345616340637207, "learning_rate": 9.704475827211163e-06, "loss": 0.3176, "step": 2397 }, { "epoch": 0.11895431320998065, "grad_norm": 7.013849258422852, "learning_rate": 9.704209183031071e-06, "loss": 0.331, "step": 2398 }, { "epoch": 0.11900391884518081, "grad_norm": 9.17798137664795, "learning_rate": 9.703942422278547e-06, "loss": 0.4141, "step": 2399 }, { "epoch": 0.11905352448038097, "grad_norm": 10.700651168823242, "learning_rate": 9.7036755449602e-06, "loss": 0.3072, "step": 2400 }, { "epoch": 0.11910313011558113, "grad_norm": 7.527407169342041, "learning_rate": 9.703408551082647e-06, "loss": 0.3384, "step": 2401 }, { "epoch": 0.1191527357507813, "grad_norm": 6.179930210113525, "learning_rate": 9.703141440652503e-06, "loss": 0.3169, "step": 2402 }, { "epoch": 0.11920234138598144, "grad_norm": 6.908524990081787, "learning_rate": 9.702874213676386e-06, "loss": 0.2724, "step": 2403 }, { "epoch": 0.11925194702118161, "grad_norm": 7.016523361206055, "learning_rate": 9.702606870160918e-06, "loss": 0.4163, "step": 2404 }, { "epoch": 0.11930155265638176, "grad_norm": 8.422061920166016, "learning_rate": 9.702339410112725e-06, "loss": 0.3289, "step": 2405 }, { "epoch": 0.11935115829158192, "grad_norm": 5.211742877960205, "learning_rate": 9.702071833538434e-06, "loss": 0.3589, "step": 2406 }, { "epoch": 0.11940076392678209, "grad_norm": 8.314519882202148, "learning_rate": 9.701804140444676e-06, "loss": 0.2608, "step": 2407 }, { "epoch": 0.11945036956198224, "grad_norm": 10.918852806091309, "learning_rate": 9.701536330838085e-06, "loss": 0.4329, "step": 2408 }, { "epoch": 0.1194999751971824, "grad_norm": 5.4944939613342285, "learning_rate": 9.701268404725297e-06, "loss": 0.2572, "step": 2409 }, { "epoch": 0.11954958083238255, "grad_norm": 10.761777877807617, "learning_rate": 9.70100036211295e-06, "loss": 0.3112, "step": 2410 }, { "epoch": 0.11959918646758272, "grad_norm": 6.775676250457764, "learning_rate": 9.70073220300769e-06, "loss": 0.2496, "step": 2411 }, { "epoch": 0.11964879210278287, "grad_norm": 8.24327564239502, "learning_rate": 9.700463927416159e-06, "loss": 0.2218, "step": 2412 }, { "epoch": 0.11969839773798303, "grad_norm": 11.606974601745605, "learning_rate": 9.700195535345004e-06, "loss": 0.3484, "step": 2413 }, { "epoch": 0.1197480033731832, "grad_norm": 14.801819801330566, "learning_rate": 9.699927026800879e-06, "loss": 0.4321, "step": 2414 }, { "epoch": 0.11979760900838335, "grad_norm": 11.03624439239502, "learning_rate": 9.699658401790436e-06, "loss": 0.4153, "step": 2415 }, { "epoch": 0.11984721464358351, "grad_norm": 22.590120315551758, "learning_rate": 9.69938966032033e-06, "loss": 0.4318, "step": 2416 }, { "epoch": 0.11989682027878366, "grad_norm": 9.678862571716309, "learning_rate": 9.699120802397224e-06, "loss": 0.415, "step": 2417 }, { "epoch": 0.11994642591398383, "grad_norm": 9.14079475402832, "learning_rate": 9.69885182802778e-06, "loss": 0.4136, "step": 2418 }, { "epoch": 0.11999603154918399, "grad_norm": 11.008824348449707, "learning_rate": 9.698582737218661e-06, "loss": 0.3784, "step": 2419 }, { "epoch": 0.12004563718438414, "grad_norm": 7.285724639892578, "learning_rate": 9.698313529976537e-06, "loss": 0.3273, "step": 2420 }, { "epoch": 0.12009524281958431, "grad_norm": 5.685687065124512, "learning_rate": 9.698044206308078e-06, "loss": 0.3075, "step": 2421 }, { "epoch": 0.12014484845478446, "grad_norm": 12.381129264831543, "learning_rate": 9.697774766219957e-06, "loss": 0.3421, "step": 2422 }, { "epoch": 0.12019445408998462, "grad_norm": 6.834768772125244, "learning_rate": 9.697505209718854e-06, "loss": 0.1966, "step": 2423 }, { "epoch": 0.12024405972518479, "grad_norm": 10.0400972366333, "learning_rate": 9.697235536811447e-06, "loss": 0.3718, "step": 2424 }, { "epoch": 0.12029366536038494, "grad_norm": 23.72802734375, "learning_rate": 9.696965747504417e-06, "loss": 0.4414, "step": 2425 }, { "epoch": 0.1203432709955851, "grad_norm": 5.583362579345703, "learning_rate": 9.696695841804453e-06, "loss": 0.3727, "step": 2426 }, { "epoch": 0.12039287663078525, "grad_norm": 6.547316074371338, "learning_rate": 9.69642581971824e-06, "loss": 0.3467, "step": 2427 }, { "epoch": 0.12044248226598542, "grad_norm": 7.288998126983643, "learning_rate": 9.696155681252473e-06, "loss": 0.2847, "step": 2428 }, { "epoch": 0.12049208790118557, "grad_norm": 7.304760932922363, "learning_rate": 9.695885426413842e-06, "loss": 0.3048, "step": 2429 }, { "epoch": 0.12054169353638573, "grad_norm": 9.221165657043457, "learning_rate": 9.695615055209047e-06, "loss": 0.4314, "step": 2430 }, { "epoch": 0.1205912991715859, "grad_norm": 20.934574127197266, "learning_rate": 9.695344567644785e-06, "loss": 0.3097, "step": 2431 }, { "epoch": 0.12064090480678605, "grad_norm": 18.89699935913086, "learning_rate": 9.69507396372776e-06, "loss": 0.4827, "step": 2432 }, { "epoch": 0.12069051044198621, "grad_norm": 5.4060258865356445, "learning_rate": 9.694803243464683e-06, "loss": 0.354, "step": 2433 }, { "epoch": 0.12074011607718636, "grad_norm": 4.657438278198242, "learning_rate": 9.694532406862255e-06, "loss": 0.2923, "step": 2434 }, { "epoch": 0.12078972171238653, "grad_norm": 17.240379333496094, "learning_rate": 9.694261453927189e-06, "loss": 0.4351, "step": 2435 }, { "epoch": 0.12083932734758669, "grad_norm": 7.809376239776611, "learning_rate": 9.693990384666203e-06, "loss": 0.2996, "step": 2436 }, { "epoch": 0.12088893298278684, "grad_norm": 5.509652614593506, "learning_rate": 9.693719199086011e-06, "loss": 0.3771, "step": 2437 }, { "epoch": 0.12093853861798701, "grad_norm": 7.868000030517578, "learning_rate": 9.693447897193335e-06, "loss": 0.2872, "step": 2438 }, { "epoch": 0.12098814425318716, "grad_norm": 4.514980316162109, "learning_rate": 9.693176478994897e-06, "loss": 0.2841, "step": 2439 }, { "epoch": 0.12103774988838732, "grad_norm": 4.087986469268799, "learning_rate": 9.692904944497423e-06, "loss": 0.272, "step": 2440 }, { "epoch": 0.12108735552358747, "grad_norm": 7.693967819213867, "learning_rate": 9.69263329370764e-06, "loss": 0.3974, "step": 2441 }, { "epoch": 0.12113696115878764, "grad_norm": 8.871305465698242, "learning_rate": 9.692361526632283e-06, "loss": 0.4344, "step": 2442 }, { "epoch": 0.1211865667939878, "grad_norm": 7.970797538757324, "learning_rate": 9.692089643278082e-06, "loss": 0.3469, "step": 2443 }, { "epoch": 0.12123617242918795, "grad_norm": 6.545554161071777, "learning_rate": 9.691817643651779e-06, "loss": 0.3713, "step": 2444 }, { "epoch": 0.12128577806438812, "grad_norm": 5.949271202087402, "learning_rate": 9.691545527760114e-06, "loss": 0.3371, "step": 2445 }, { "epoch": 0.12133538369958827, "grad_norm": 6.132912635803223, "learning_rate": 9.691273295609827e-06, "loss": 0.3525, "step": 2446 }, { "epoch": 0.12138498933478843, "grad_norm": 5.129061698913574, "learning_rate": 9.691000947207666e-06, "loss": 0.2823, "step": 2447 }, { "epoch": 0.1214345949699886, "grad_norm": 6.25486946105957, "learning_rate": 9.69072848256038e-06, "loss": 0.2803, "step": 2448 }, { "epoch": 0.12148420060518875, "grad_norm": 7.943402290344238, "learning_rate": 9.69045590167472e-06, "loss": 0.4521, "step": 2449 }, { "epoch": 0.12153380624038891, "grad_norm": 9.7322416305542, "learning_rate": 9.69018320455744e-06, "loss": 0.279, "step": 2450 }, { "epoch": 0.12158341187558906, "grad_norm": 12.378670692443848, "learning_rate": 9.6899103912153e-06, "loss": 0.3722, "step": 2451 }, { "epoch": 0.12163301751078923, "grad_norm": 5.052497386932373, "learning_rate": 9.689637461655058e-06, "loss": 0.2746, "step": 2452 }, { "epoch": 0.12168262314598939, "grad_norm": 3.6880791187286377, "learning_rate": 9.68936441588348e-06, "loss": 0.1894, "step": 2453 }, { "epoch": 0.12173222878118954, "grad_norm": 9.076184272766113, "learning_rate": 9.689091253907329e-06, "loss": 0.3225, "step": 2454 }, { "epoch": 0.1217818344163897, "grad_norm": 12.45032024383545, "learning_rate": 9.688817975733375e-06, "loss": 0.3014, "step": 2455 }, { "epoch": 0.12183144005158986, "grad_norm": 4.717978477478027, "learning_rate": 9.688544581368391e-06, "loss": 0.2926, "step": 2456 }, { "epoch": 0.12188104568679002, "grad_norm": 10.298452377319336, "learning_rate": 9.688271070819153e-06, "loss": 0.3601, "step": 2457 }, { "epoch": 0.12193065132199017, "grad_norm": 3.687117099761963, "learning_rate": 9.687997444092435e-06, "loss": 0.1936, "step": 2458 }, { "epoch": 0.12198025695719034, "grad_norm": 4.572269916534424, "learning_rate": 9.68772370119502e-06, "loss": 0.2546, "step": 2459 }, { "epoch": 0.1220298625923905, "grad_norm": 16.641082763671875, "learning_rate": 9.687449842133694e-06, "loss": 0.4583, "step": 2460 }, { "epoch": 0.12207946822759065, "grad_norm": 9.207003593444824, "learning_rate": 9.687175866915237e-06, "loss": 0.3917, "step": 2461 }, { "epoch": 0.12212907386279082, "grad_norm": 25.358091354370117, "learning_rate": 9.686901775546444e-06, "loss": 0.4368, "step": 2462 }, { "epoch": 0.12217867949799097, "grad_norm": 18.90797233581543, "learning_rate": 9.686627568034103e-06, "loss": 0.4327, "step": 2463 }, { "epoch": 0.12222828513319113, "grad_norm": 10.068788528442383, "learning_rate": 9.686353244385013e-06, "loss": 0.3111, "step": 2464 }, { "epoch": 0.1222778907683913, "grad_norm": 5.270859718322754, "learning_rate": 9.68607880460597e-06, "loss": 0.3246, "step": 2465 }, { "epoch": 0.12232749640359145, "grad_norm": 7.3048858642578125, "learning_rate": 9.685804248703772e-06, "loss": 0.327, "step": 2466 }, { "epoch": 0.12237710203879161, "grad_norm": 5.019975185394287, "learning_rate": 9.685529576685226e-06, "loss": 0.3095, "step": 2467 }, { "epoch": 0.12242670767399176, "grad_norm": 6.248720645904541, "learning_rate": 9.68525478855714e-06, "loss": 0.2877, "step": 2468 }, { "epoch": 0.12247631330919193, "grad_norm": 12.92682933807373, "learning_rate": 9.684979884326318e-06, "loss": 0.3899, "step": 2469 }, { "epoch": 0.12252591894439209, "grad_norm": 6.047433853149414, "learning_rate": 9.684704863999576e-06, "loss": 0.3337, "step": 2470 }, { "epoch": 0.12257552457959224, "grad_norm": 11.711431503295898, "learning_rate": 9.684429727583728e-06, "loss": 0.3505, "step": 2471 }, { "epoch": 0.1226251302147924, "grad_norm": 12.593937873840332, "learning_rate": 9.684154475085593e-06, "loss": 0.3931, "step": 2472 }, { "epoch": 0.12267473584999256, "grad_norm": 10.72181510925293, "learning_rate": 9.68387910651199e-06, "loss": 0.4113, "step": 2473 }, { "epoch": 0.12272434148519272, "grad_norm": 14.90906047821045, "learning_rate": 9.683603621869746e-06, "loss": 0.3892, "step": 2474 }, { "epoch": 0.12277394712039287, "grad_norm": 7.277804851531982, "learning_rate": 9.683328021165686e-06, "loss": 0.4326, "step": 2475 }, { "epoch": 0.12282355275559304, "grad_norm": 9.733091354370117, "learning_rate": 9.683052304406637e-06, "loss": 0.2852, "step": 2476 }, { "epoch": 0.1228731583907932, "grad_norm": 7.4674224853515625, "learning_rate": 9.682776471599433e-06, "loss": 0.3565, "step": 2477 }, { "epoch": 0.12292276402599335, "grad_norm": 5.227487087249756, "learning_rate": 9.682500522750913e-06, "loss": 0.2988, "step": 2478 }, { "epoch": 0.12297236966119351, "grad_norm": 7.060752868652344, "learning_rate": 9.68222445786791e-06, "loss": 0.209, "step": 2479 }, { "epoch": 0.12302197529639367, "grad_norm": 15.245250701904297, "learning_rate": 9.681948276957267e-06, "loss": 0.3939, "step": 2480 }, { "epoch": 0.12307158093159383, "grad_norm": 5.133892059326172, "learning_rate": 9.681671980025826e-06, "loss": 0.249, "step": 2481 }, { "epoch": 0.123121186566794, "grad_norm": 9.529043197631836, "learning_rate": 9.681395567080438e-06, "loss": 0.3636, "step": 2482 }, { "epoch": 0.12317079220199414, "grad_norm": 7.740728378295898, "learning_rate": 9.681119038127951e-06, "loss": 0.2755, "step": 2483 }, { "epoch": 0.12322039783719431, "grad_norm": 15.611144065856934, "learning_rate": 9.680842393175213e-06, "loss": 0.3822, "step": 2484 }, { "epoch": 0.12327000347239446, "grad_norm": 5.2435431480407715, "learning_rate": 9.680565632229086e-06, "loss": 0.3246, "step": 2485 }, { "epoch": 0.12331960910759462, "grad_norm": 6.946830749511719, "learning_rate": 9.680288755296425e-06, "loss": 0.2891, "step": 2486 }, { "epoch": 0.12336921474279477, "grad_norm": 5.080166339874268, "learning_rate": 9.680011762384092e-06, "loss": 0.2738, "step": 2487 }, { "epoch": 0.12341882037799494, "grad_norm": 4.7754387855529785, "learning_rate": 9.67973465349895e-06, "loss": 0.3061, "step": 2488 }, { "epoch": 0.1234684260131951, "grad_norm": 7.710872173309326, "learning_rate": 9.679457428647867e-06, "loss": 0.2949, "step": 2489 }, { "epoch": 0.12351803164839525, "grad_norm": 21.543596267700195, "learning_rate": 9.67918008783771e-06, "loss": 0.4132, "step": 2490 }, { "epoch": 0.12356763728359542, "grad_norm": 6.360115051269531, "learning_rate": 9.678902631075355e-06, "loss": 0.2952, "step": 2491 }, { "epoch": 0.12361724291879557, "grad_norm": 5.175297737121582, "learning_rate": 9.678625058367678e-06, "loss": 0.2564, "step": 2492 }, { "epoch": 0.12366684855399573, "grad_norm": 9.5166597366333, "learning_rate": 9.678347369721554e-06, "loss": 0.297, "step": 2493 }, { "epoch": 0.1237164541891959, "grad_norm": 14.420470237731934, "learning_rate": 9.678069565143865e-06, "loss": 0.2162, "step": 2494 }, { "epoch": 0.12376605982439605, "grad_norm": 7.715248107910156, "learning_rate": 9.677791644641498e-06, "loss": 0.2701, "step": 2495 }, { "epoch": 0.12381566545959621, "grad_norm": 11.06255054473877, "learning_rate": 9.677513608221338e-06, "loss": 0.3052, "step": 2496 }, { "epoch": 0.12386527109479636, "grad_norm": 13.190618515014648, "learning_rate": 9.677235455890275e-06, "loss": 0.4981, "step": 2497 }, { "epoch": 0.12391487672999653, "grad_norm": 8.635363578796387, "learning_rate": 9.6769571876552e-06, "loss": 0.2969, "step": 2498 }, { "epoch": 0.1239644823651967, "grad_norm": 5.006185531616211, "learning_rate": 9.676678803523013e-06, "loss": 0.1718, "step": 2499 }, { "epoch": 0.12401408800039684, "grad_norm": 9.337536811828613, "learning_rate": 9.676400303500607e-06, "loss": 0.3822, "step": 2500 }, { "epoch": 0.12406369363559701, "grad_norm": 7.5692315101623535, "learning_rate": 9.676121687594886e-06, "loss": 0.3199, "step": 2501 }, { "epoch": 0.12411329927079716, "grad_norm": 12.501665115356445, "learning_rate": 9.675842955812755e-06, "loss": 0.413, "step": 2502 }, { "epoch": 0.12416290490599732, "grad_norm": 6.633852958679199, "learning_rate": 9.675564108161123e-06, "loss": 0.356, "step": 2503 }, { "epoch": 0.12421251054119747, "grad_norm": 10.387250900268555, "learning_rate": 9.675285144646892e-06, "loss": 0.3841, "step": 2504 }, { "epoch": 0.12426211617639764, "grad_norm": 14.506689071655273, "learning_rate": 9.675006065276985e-06, "loss": 0.3793, "step": 2505 }, { "epoch": 0.1243117218115978, "grad_norm": 8.770960807800293, "learning_rate": 9.674726870058312e-06, "loss": 0.3507, "step": 2506 }, { "epoch": 0.12436132744679795, "grad_norm": 6.713231086730957, "learning_rate": 9.674447558997791e-06, "loss": 0.2801, "step": 2507 }, { "epoch": 0.12441093308199812, "grad_norm": 8.033679962158203, "learning_rate": 9.674168132102346e-06, "loss": 0.3469, "step": 2508 }, { "epoch": 0.12446053871719827, "grad_norm": 13.966948509216309, "learning_rate": 9.673888589378902e-06, "loss": 0.49, "step": 2509 }, { "epoch": 0.12451014435239843, "grad_norm": 9.12122631072998, "learning_rate": 9.673608930834383e-06, "loss": 0.3741, "step": 2510 }, { "epoch": 0.1245597499875986, "grad_norm": 9.270841598510742, "learning_rate": 9.673329156475719e-06, "loss": 0.365, "step": 2511 }, { "epoch": 0.12460935562279875, "grad_norm": 11.122451782226562, "learning_rate": 9.673049266309847e-06, "loss": 0.3697, "step": 2512 }, { "epoch": 0.12465896125799891, "grad_norm": 12.341662406921387, "learning_rate": 9.672769260343698e-06, "loss": 0.4115, "step": 2513 }, { "epoch": 0.12470856689319906, "grad_norm": 5.553604602813721, "learning_rate": 9.672489138584215e-06, "loss": 0.3915, "step": 2514 }, { "epoch": 0.12475817252839923, "grad_norm": 20.546951293945312, "learning_rate": 9.672208901038335e-06, "loss": 0.5395, "step": 2515 }, { "epoch": 0.12480777816359938, "grad_norm": 6.234288215637207, "learning_rate": 9.671928547713008e-06, "loss": 0.2904, "step": 2516 }, { "epoch": 0.12485738379879954, "grad_norm": 7.514054775238037, "learning_rate": 9.671648078615176e-06, "loss": 0.3162, "step": 2517 }, { "epoch": 0.12490698943399971, "grad_norm": 6.661904335021973, "learning_rate": 9.671367493751793e-06, "loss": 0.3617, "step": 2518 }, { "epoch": 0.12495659506919986, "grad_norm": 9.339412689208984, "learning_rate": 9.671086793129811e-06, "loss": 0.3991, "step": 2519 }, { "epoch": 0.12500620070440002, "grad_norm": 8.442239761352539, "learning_rate": 9.670805976756183e-06, "loss": 0.2951, "step": 2520 }, { "epoch": 0.12505580633960017, "grad_norm": 7.529243469238281, "learning_rate": 9.670525044637872e-06, "loss": 0.3217, "step": 2521 }, { "epoch": 0.12510541197480032, "grad_norm": 8.800198554992676, "learning_rate": 9.670243996781837e-06, "loss": 0.4207, "step": 2522 }, { "epoch": 0.1251550176100005, "grad_norm": 12.036102294921875, "learning_rate": 9.669962833195043e-06, "loss": 0.3358, "step": 2523 }, { "epoch": 0.12520462324520065, "grad_norm": 6.969433307647705, "learning_rate": 9.669681553884458e-06, "loss": 0.3218, "step": 2524 }, { "epoch": 0.1252542288804008, "grad_norm": 7.515370845794678, "learning_rate": 9.669400158857053e-06, "loss": 0.3244, "step": 2525 }, { "epoch": 0.12530383451560098, "grad_norm": 9.109379768371582, "learning_rate": 9.669118648119797e-06, "loss": 0.3578, "step": 2526 }, { "epoch": 0.12535344015080113, "grad_norm": 8.27729320526123, "learning_rate": 9.668837021679669e-06, "loss": 0.3106, "step": 2527 }, { "epoch": 0.12540304578600128, "grad_norm": 4.201387405395508, "learning_rate": 9.66855527954365e-06, "loss": 0.2075, "step": 2528 }, { "epoch": 0.12545265142120146, "grad_norm": 10.037164688110352, "learning_rate": 9.668273421718717e-06, "loss": 0.2995, "step": 2529 }, { "epoch": 0.1255022570564016, "grad_norm": 9.677303314208984, "learning_rate": 9.667991448211858e-06, "loss": 0.3879, "step": 2530 }, { "epoch": 0.12555186269160176, "grad_norm": 6.927585601806641, "learning_rate": 9.66770935903006e-06, "loss": 0.3286, "step": 2531 }, { "epoch": 0.1256014683268019, "grad_norm": 11.605757713317871, "learning_rate": 9.667427154180312e-06, "loss": 0.3735, "step": 2532 }, { "epoch": 0.1256510739620021, "grad_norm": 7.0952911376953125, "learning_rate": 9.667144833669608e-06, "loss": 0.2278, "step": 2533 }, { "epoch": 0.12570067959720224, "grad_norm": 9.724806785583496, "learning_rate": 9.666862397504944e-06, "loss": 0.3683, "step": 2534 }, { "epoch": 0.1257502852324024, "grad_norm": 5.510579586029053, "learning_rate": 9.666579845693318e-06, "loss": 0.3149, "step": 2535 }, { "epoch": 0.12579989086760257, "grad_norm": 5.857567310333252, "learning_rate": 9.666297178241732e-06, "loss": 0.3194, "step": 2536 }, { "epoch": 0.12584949650280272, "grad_norm": 12.43244743347168, "learning_rate": 9.666014395157192e-06, "loss": 0.5139, "step": 2537 }, { "epoch": 0.12589910213800287, "grad_norm": 11.095528602600098, "learning_rate": 9.665731496446706e-06, "loss": 0.3943, "step": 2538 }, { "epoch": 0.12594870777320302, "grad_norm": 7.626346588134766, "learning_rate": 9.665448482117281e-06, "loss": 0.3998, "step": 2539 }, { "epoch": 0.1259983134084032, "grad_norm": 7.2959818840026855, "learning_rate": 9.665165352175933e-06, "loss": 0.3271, "step": 2540 }, { "epoch": 0.12604791904360335, "grad_norm": 9.65866470336914, "learning_rate": 9.664882106629676e-06, "loss": 0.323, "step": 2541 }, { "epoch": 0.1260975246788035, "grad_norm": 9.726171493530273, "learning_rate": 9.664598745485533e-06, "loss": 0.3019, "step": 2542 }, { "epoch": 0.12614713031400368, "grad_norm": 11.737858772277832, "learning_rate": 9.664315268750521e-06, "loss": 0.4198, "step": 2543 }, { "epoch": 0.12619673594920383, "grad_norm": 5.297663688659668, "learning_rate": 9.664031676431666e-06, "loss": 0.2637, "step": 2544 }, { "epoch": 0.12624634158440398, "grad_norm": 14.707118034362793, "learning_rate": 9.663747968535999e-06, "loss": 0.4514, "step": 2545 }, { "epoch": 0.12629594721960416, "grad_norm": 13.07162857055664, "learning_rate": 9.663464145070547e-06, "loss": 0.3805, "step": 2546 }, { "epoch": 0.1263455528548043, "grad_norm": 4.375665187835693, "learning_rate": 9.663180206042341e-06, "loss": 0.284, "step": 2547 }, { "epoch": 0.12639515849000446, "grad_norm": 11.147712707519531, "learning_rate": 9.662896151458423e-06, "loss": 0.4494, "step": 2548 }, { "epoch": 0.1264447641252046, "grad_norm": 15.574592590332031, "learning_rate": 9.66261198132583e-06, "loss": 0.384, "step": 2549 }, { "epoch": 0.1264943697604048, "grad_norm": 4.942800521850586, "learning_rate": 9.6623276956516e-06, "loss": 0.3012, "step": 2550 }, { "epoch": 0.12654397539560494, "grad_norm": 7.071644306182861, "learning_rate": 9.662043294442782e-06, "loss": 0.2772, "step": 2551 }, { "epoch": 0.1265935810308051, "grad_norm": 5.563115119934082, "learning_rate": 9.661758777706422e-06, "loss": 0.3016, "step": 2552 }, { "epoch": 0.12664318666600527, "grad_norm": 9.499642372131348, "learning_rate": 9.661474145449571e-06, "loss": 0.2927, "step": 2553 }, { "epoch": 0.12669279230120542, "grad_norm": 6.241979122161865, "learning_rate": 9.661189397679282e-06, "loss": 0.3685, "step": 2554 }, { "epoch": 0.12674239793640557, "grad_norm": 5.352355480194092, "learning_rate": 9.660904534402612e-06, "loss": 0.289, "step": 2555 }, { "epoch": 0.12679200357160572, "grad_norm": 5.599234580993652, "learning_rate": 9.660619555626617e-06, "loss": 0.2796, "step": 2556 }, { "epoch": 0.1268416092068059, "grad_norm": 9.35952377319336, "learning_rate": 9.660334461358364e-06, "loss": 0.3425, "step": 2557 }, { "epoch": 0.12689121484200605, "grad_norm": 7.370266914367676, "learning_rate": 9.660049251604914e-06, "loss": 0.2916, "step": 2558 }, { "epoch": 0.1269408204772062, "grad_norm": 7.044974327087402, "learning_rate": 9.659763926373335e-06, "loss": 0.3317, "step": 2559 }, { "epoch": 0.12699042611240638, "grad_norm": 6.374874114990234, "learning_rate": 9.659478485670699e-06, "loss": 0.3133, "step": 2560 }, { "epoch": 0.12704003174760653, "grad_norm": 5.96092414855957, "learning_rate": 9.659192929504077e-06, "loss": 0.2945, "step": 2561 }, { "epoch": 0.12708963738280668, "grad_norm": 6.644962310791016, "learning_rate": 9.658907257880547e-06, "loss": 0.3063, "step": 2562 }, { "epoch": 0.12713924301800686, "grad_norm": 10.66186237335205, "learning_rate": 9.65862147080719e-06, "loss": 0.3673, "step": 2563 }, { "epoch": 0.127188848653207, "grad_norm": 16.00255584716797, "learning_rate": 9.658335568291083e-06, "loss": 0.4996, "step": 2564 }, { "epoch": 0.12723845428840716, "grad_norm": 6.592918872833252, "learning_rate": 9.658049550339314e-06, "loss": 0.4226, "step": 2565 }, { "epoch": 0.1272880599236073, "grad_norm": 4.7215576171875, "learning_rate": 9.657763416958971e-06, "loss": 0.2321, "step": 2566 }, { "epoch": 0.1273376655588075, "grad_norm": 11.190516471862793, "learning_rate": 9.657477168157144e-06, "loss": 0.3738, "step": 2567 }, { "epoch": 0.12738727119400764, "grad_norm": 6.673969268798828, "learning_rate": 9.657190803940924e-06, "loss": 0.3648, "step": 2568 }, { "epoch": 0.1274368768292078, "grad_norm": 8.217679977416992, "learning_rate": 9.65690432431741e-06, "loss": 0.3982, "step": 2569 }, { "epoch": 0.12748648246440797, "grad_norm": 7.212956428527832, "learning_rate": 9.6566177292937e-06, "loss": 0.3368, "step": 2570 }, { "epoch": 0.12753608809960812, "grad_norm": 6.173274040222168, "learning_rate": 9.656331018876896e-06, "loss": 0.3199, "step": 2571 }, { "epoch": 0.12758569373480827, "grad_norm": 7.669810771942139, "learning_rate": 9.656044193074104e-06, "loss": 0.3072, "step": 2572 }, { "epoch": 0.12763529937000842, "grad_norm": 8.89181900024414, "learning_rate": 9.65575725189243e-06, "loss": 0.3936, "step": 2573 }, { "epoch": 0.1276849050052086, "grad_norm": 5.202755451202393, "learning_rate": 9.655470195338987e-06, "loss": 0.343, "step": 2574 }, { "epoch": 0.12773451064040875, "grad_norm": 15.471092224121094, "learning_rate": 9.655183023420885e-06, "loss": 0.4369, "step": 2575 }, { "epoch": 0.1277841162756089, "grad_norm": 9.563400268554688, "learning_rate": 9.654895736145243e-06, "loss": 0.4486, "step": 2576 }, { "epoch": 0.12783372191080908, "grad_norm": 6.3487019538879395, "learning_rate": 9.654608333519178e-06, "loss": 0.2659, "step": 2577 }, { "epoch": 0.12788332754600923, "grad_norm": 17.384136199951172, "learning_rate": 9.654320815549813e-06, "loss": 0.3334, "step": 2578 }, { "epoch": 0.12793293318120938, "grad_norm": 5.5724778175354, "learning_rate": 9.654033182244273e-06, "loss": 0.2887, "step": 2579 }, { "epoch": 0.12798253881640956, "grad_norm": 8.023836135864258, "learning_rate": 9.653745433609685e-06, "loss": 0.3251, "step": 2580 }, { "epoch": 0.1280321444516097, "grad_norm": 13.632904052734375, "learning_rate": 9.653457569653182e-06, "loss": 0.4203, "step": 2581 }, { "epoch": 0.12808175008680986, "grad_norm": 12.304381370544434, "learning_rate": 9.653169590381893e-06, "loss": 0.4148, "step": 2582 }, { "epoch": 0.12813135572201, "grad_norm": 9.267997741699219, "learning_rate": 9.652881495802957e-06, "loss": 0.3162, "step": 2583 }, { "epoch": 0.1281809613572102, "grad_norm": 5.754968643188477, "learning_rate": 9.652593285923514e-06, "loss": 0.2564, "step": 2584 }, { "epoch": 0.12823056699241034, "grad_norm": 8.393622398376465, "learning_rate": 9.652304960750705e-06, "loss": 0.2895, "step": 2585 }, { "epoch": 0.1282801726276105, "grad_norm": 6.875696659088135, "learning_rate": 9.652016520291672e-06, "loss": 0.4073, "step": 2586 }, { "epoch": 0.12832977826281067, "grad_norm": 11.927863121032715, "learning_rate": 9.651727964553568e-06, "loss": 0.4057, "step": 2587 }, { "epoch": 0.12837938389801082, "grad_norm": 7.422607421875, "learning_rate": 9.65143929354354e-06, "loss": 0.4003, "step": 2588 }, { "epoch": 0.12842898953321097, "grad_norm": 9.225765228271484, "learning_rate": 9.651150507268743e-06, "loss": 0.4229, "step": 2589 }, { "epoch": 0.12847859516841112, "grad_norm": 8.54140853881836, "learning_rate": 9.650861605736331e-06, "loss": 0.3057, "step": 2590 }, { "epoch": 0.1285282008036113, "grad_norm": 10.7621431350708, "learning_rate": 9.650572588953466e-06, "loss": 0.3223, "step": 2591 }, { "epoch": 0.12857780643881145, "grad_norm": 6.388766765594482, "learning_rate": 9.650283456927307e-06, "loss": 0.3428, "step": 2592 }, { "epoch": 0.1286274120740116, "grad_norm": 6.131351470947266, "learning_rate": 9.649994209665021e-06, "loss": 0.2853, "step": 2593 }, { "epoch": 0.12867701770921178, "grad_norm": 11.816426277160645, "learning_rate": 9.649704847173776e-06, "loss": 0.4695, "step": 2594 }, { "epoch": 0.12872662334441193, "grad_norm": 8.72481632232666, "learning_rate": 9.649415369460742e-06, "loss": 0.2815, "step": 2595 }, { "epoch": 0.12877622897961208, "grad_norm": 6.414183139801025, "learning_rate": 9.649125776533091e-06, "loss": 0.2743, "step": 2596 }, { "epoch": 0.12882583461481223, "grad_norm": 5.599297046661377, "learning_rate": 9.648836068398e-06, "loss": 0.3163, "step": 2597 }, { "epoch": 0.1288754402500124, "grad_norm": 8.940401077270508, "learning_rate": 9.648546245062647e-06, "loss": 0.3744, "step": 2598 }, { "epoch": 0.12892504588521256, "grad_norm": 9.108989715576172, "learning_rate": 9.648256306534219e-06, "loss": 0.3656, "step": 2599 }, { "epoch": 0.1289746515204127, "grad_norm": 5.974538326263428, "learning_rate": 9.647966252819894e-06, "loss": 0.28, "step": 2600 }, { "epoch": 0.1290242571556129, "grad_norm": 8.575970649719238, "learning_rate": 9.647676083926862e-06, "loss": 0.4577, "step": 2601 }, { "epoch": 0.12907386279081304, "grad_norm": 5.662580490112305, "learning_rate": 9.647385799862317e-06, "loss": 0.317, "step": 2602 }, { "epoch": 0.1291234684260132, "grad_norm": 6.578912258148193, "learning_rate": 9.647095400633449e-06, "loss": 0.3474, "step": 2603 }, { "epoch": 0.12917307406121337, "grad_norm": 18.591588973999023, "learning_rate": 9.646804886247454e-06, "loss": 0.4499, "step": 2604 }, { "epoch": 0.12922267969641352, "grad_norm": 5.869370460510254, "learning_rate": 9.646514256711532e-06, "loss": 0.2697, "step": 2605 }, { "epoch": 0.12927228533161367, "grad_norm": 8.05028247833252, "learning_rate": 9.646223512032886e-06, "loss": 0.3264, "step": 2606 }, { "epoch": 0.12932189096681382, "grad_norm": 7.499444484710693, "learning_rate": 9.645932652218718e-06, "loss": 0.3417, "step": 2607 }, { "epoch": 0.129371496602014, "grad_norm": 12.103188514709473, "learning_rate": 9.645641677276239e-06, "loss": 0.4794, "step": 2608 }, { "epoch": 0.12942110223721415, "grad_norm": 5.798313617706299, "learning_rate": 9.645350587212656e-06, "loss": 0.3136, "step": 2609 }, { "epoch": 0.1294707078724143, "grad_norm": 8.534245491027832, "learning_rate": 9.645059382035185e-06, "loss": 0.401, "step": 2610 }, { "epoch": 0.12952031350761448, "grad_norm": 10.440908432006836, "learning_rate": 9.644768061751042e-06, "loss": 0.3161, "step": 2611 }, { "epoch": 0.12956991914281463, "grad_norm": 7.8620805740356445, "learning_rate": 9.644476626367446e-06, "loss": 0.3001, "step": 2612 }, { "epoch": 0.12961952477801478, "grad_norm": 6.015709400177002, "learning_rate": 9.644185075891615e-06, "loss": 0.2933, "step": 2613 }, { "epoch": 0.12966913041321493, "grad_norm": 5.654194355010986, "learning_rate": 9.64389341033078e-06, "loss": 0.3404, "step": 2614 }, { "epoch": 0.1297187360484151, "grad_norm": 7.64992618560791, "learning_rate": 9.643601629692165e-06, "loss": 0.392, "step": 2615 }, { "epoch": 0.12976834168361526, "grad_norm": 11.992166519165039, "learning_rate": 9.643309733983e-06, "loss": 0.4993, "step": 2616 }, { "epoch": 0.1298179473188154, "grad_norm": 12.879552841186523, "learning_rate": 9.643017723210519e-06, "loss": 0.5283, "step": 2617 }, { "epoch": 0.12986755295401559, "grad_norm": 15.67817497253418, "learning_rate": 9.642725597381961e-06, "loss": 0.4827, "step": 2618 }, { "epoch": 0.12991715858921574, "grad_norm": 11.822070121765137, "learning_rate": 9.642433356504561e-06, "loss": 0.3363, "step": 2619 }, { "epoch": 0.1299667642244159, "grad_norm": 18.285009384155273, "learning_rate": 9.642141000585563e-06, "loss": 0.3918, "step": 2620 }, { "epoch": 0.13001636985961607, "grad_norm": 14.95343017578125, "learning_rate": 9.64184852963221e-06, "loss": 0.4162, "step": 2621 }, { "epoch": 0.13006597549481622, "grad_norm": 9.47995376586914, "learning_rate": 9.641555943651752e-06, "loss": 0.2732, "step": 2622 }, { "epoch": 0.13011558113001637, "grad_norm": 9.330103874206543, "learning_rate": 9.641263242651437e-06, "loss": 0.3923, "step": 2623 }, { "epoch": 0.13016518676521652, "grad_norm": 17.805437088012695, "learning_rate": 9.640970426638521e-06, "loss": 0.4757, "step": 2624 }, { "epoch": 0.1302147924004167, "grad_norm": 6.599569797515869, "learning_rate": 9.640677495620258e-06, "loss": 0.3242, "step": 2625 }, { "epoch": 0.13026439803561685, "grad_norm": 5.173746109008789, "learning_rate": 9.640384449603907e-06, "loss": 0.306, "step": 2626 }, { "epoch": 0.130314003670817, "grad_norm": 9.291160583496094, "learning_rate": 9.640091288596729e-06, "loss": 0.31, "step": 2627 }, { "epoch": 0.13036360930601718, "grad_norm": 6.851490497589111, "learning_rate": 9.639798012605992e-06, "loss": 0.3761, "step": 2628 }, { "epoch": 0.13041321494121733, "grad_norm": 6.968094348907471, "learning_rate": 9.63950462163896e-06, "loss": 0.3826, "step": 2629 }, { "epoch": 0.13046282057641748, "grad_norm": 6.456867694854736, "learning_rate": 9.639211115702907e-06, "loss": 0.3721, "step": 2630 }, { "epoch": 0.13051242621161763, "grad_norm": 8.749516487121582, "learning_rate": 9.638917494805101e-06, "loss": 0.2887, "step": 2631 }, { "epoch": 0.1305620318468178, "grad_norm": 8.625744819641113, "learning_rate": 9.638623758952823e-06, "loss": 0.314, "step": 2632 }, { "epoch": 0.13061163748201796, "grad_norm": 8.092828750610352, "learning_rate": 9.63832990815335e-06, "loss": 0.2993, "step": 2633 }, { "epoch": 0.1306612431172181, "grad_norm": 9.486128807067871, "learning_rate": 9.638035942413962e-06, "loss": 0.424, "step": 2634 }, { "epoch": 0.13071084875241828, "grad_norm": 7.729866981506348, "learning_rate": 9.637741861741947e-06, "loss": 0.3243, "step": 2635 }, { "epoch": 0.13076045438761844, "grad_norm": 24.97355079650879, "learning_rate": 9.63744766614459e-06, "loss": 0.469, "step": 2636 }, { "epoch": 0.1308100600228186, "grad_norm": 6.435118198394775, "learning_rate": 9.637153355629183e-06, "loss": 0.2539, "step": 2637 }, { "epoch": 0.13085966565801876, "grad_norm": 7.0200018882751465, "learning_rate": 9.636858930203017e-06, "loss": 0.311, "step": 2638 }, { "epoch": 0.13090927129321892, "grad_norm": 9.185080528259277, "learning_rate": 9.63656438987339e-06, "loss": 0.4428, "step": 2639 }, { "epoch": 0.13095887692841907, "grad_norm": 4.523911476135254, "learning_rate": 9.6362697346476e-06, "loss": 0.2422, "step": 2640 }, { "epoch": 0.13100848256361922, "grad_norm": 5.897689342498779, "learning_rate": 9.635974964532949e-06, "loss": 0.3565, "step": 2641 }, { "epoch": 0.1310580881988194, "grad_norm": 13.2169771194458, "learning_rate": 9.63568007953674e-06, "loss": 0.4456, "step": 2642 }, { "epoch": 0.13110769383401955, "grad_norm": 15.620888710021973, "learning_rate": 9.635385079666284e-06, "loss": 0.3622, "step": 2643 }, { "epoch": 0.1311572994692197, "grad_norm": 11.493285179138184, "learning_rate": 9.635089964928888e-06, "loss": 0.4523, "step": 2644 }, { "epoch": 0.13120690510441987, "grad_norm": 7.781362533569336, "learning_rate": 9.634794735331865e-06, "loss": 0.3089, "step": 2645 }, { "epoch": 0.13125651073962002, "grad_norm": 5.236591815948486, "learning_rate": 9.634499390882532e-06, "loss": 0.1968, "step": 2646 }, { "epoch": 0.13130611637482018, "grad_norm": 9.28908920288086, "learning_rate": 9.63420393158821e-06, "loss": 0.3613, "step": 2647 }, { "epoch": 0.13135572201002033, "grad_norm": 5.216414928436279, "learning_rate": 9.633908357456214e-06, "loss": 0.2661, "step": 2648 }, { "epoch": 0.1314053276452205, "grad_norm": 11.088641166687012, "learning_rate": 9.633612668493875e-06, "loss": 0.3915, "step": 2649 }, { "epoch": 0.13145493328042065, "grad_norm": 10.5094575881958, "learning_rate": 9.63331686470852e-06, "loss": 0.3998, "step": 2650 }, { "epoch": 0.1315045389156208, "grad_norm": 16.0618896484375, "learning_rate": 9.633020946107474e-06, "loss": 0.4133, "step": 2651 }, { "epoch": 0.13155414455082098, "grad_norm": 9.457983016967773, "learning_rate": 9.632724912698074e-06, "loss": 0.3992, "step": 2652 }, { "epoch": 0.13160375018602113, "grad_norm": 21.37450408935547, "learning_rate": 9.632428764487658e-06, "loss": 0.3801, "step": 2653 }, { "epoch": 0.13165335582122129, "grad_norm": 6.645049095153809, "learning_rate": 9.632132501483559e-06, "loss": 0.4141, "step": 2654 }, { "epoch": 0.13170296145642144, "grad_norm": 11.5509614944458, "learning_rate": 9.63183612369312e-06, "loss": 0.3475, "step": 2655 }, { "epoch": 0.13175256709162161, "grad_norm": 10.379989624023438, "learning_rate": 9.63153963112369e-06, "loss": 0.3158, "step": 2656 }, { "epoch": 0.13180217272682176, "grad_norm": 7.727962017059326, "learning_rate": 9.631243023782611e-06, "loss": 0.3972, "step": 2657 }, { "epoch": 0.13185177836202192, "grad_norm": 10.746049880981445, "learning_rate": 9.630946301677236e-06, "loss": 0.3839, "step": 2658 }, { "epoch": 0.1319013839972221, "grad_norm": 6.580548286437988, "learning_rate": 9.630649464814916e-06, "loss": 0.1652, "step": 2659 }, { "epoch": 0.13195098963242224, "grad_norm": 6.732256889343262, "learning_rate": 9.630352513203008e-06, "loss": 0.2836, "step": 2660 }, { "epoch": 0.1320005952676224, "grad_norm": 13.01729679107666, "learning_rate": 9.63005544684887e-06, "loss": 0.4131, "step": 2661 }, { "epoch": 0.13205020090282257, "grad_norm": 8.863692283630371, "learning_rate": 9.629758265759862e-06, "loss": 0.2904, "step": 2662 }, { "epoch": 0.13209980653802272, "grad_norm": 5.697234630584717, "learning_rate": 9.62946096994335e-06, "loss": 0.2889, "step": 2663 }, { "epoch": 0.13214941217322287, "grad_norm": 6.698058605194092, "learning_rate": 9.629163559406704e-06, "loss": 0.3222, "step": 2664 }, { "epoch": 0.13219901780842302, "grad_norm": 6.293748378753662, "learning_rate": 9.628866034157289e-06, "loss": 0.303, "step": 2665 }, { "epoch": 0.1322486234436232, "grad_norm": 9.282719612121582, "learning_rate": 9.628568394202481e-06, "loss": 0.3019, "step": 2666 }, { "epoch": 0.13229822907882335, "grad_norm": 12.019295692443848, "learning_rate": 9.628270639549654e-06, "loss": 0.2902, "step": 2667 }, { "epoch": 0.1323478347140235, "grad_norm": 15.280282020568848, "learning_rate": 9.627972770206187e-06, "loss": 0.396, "step": 2668 }, { "epoch": 0.13239744034922368, "grad_norm": 7.502130508422852, "learning_rate": 9.627674786179459e-06, "loss": 0.3782, "step": 2669 }, { "epoch": 0.13244704598442383, "grad_norm": 11.557888984680176, "learning_rate": 9.62737668747686e-06, "loss": 0.3831, "step": 2670 }, { "epoch": 0.13249665161962398, "grad_norm": 7.109849452972412, "learning_rate": 9.627078474105772e-06, "loss": 0.253, "step": 2671 }, { "epoch": 0.13254625725482413, "grad_norm": 10.500927925109863, "learning_rate": 9.626780146073587e-06, "loss": 0.3338, "step": 2672 }, { "epoch": 0.1325958628900243, "grad_norm": 11.876670837402344, "learning_rate": 9.626481703387696e-06, "loss": 0.4254, "step": 2673 }, { "epoch": 0.13264546852522446, "grad_norm": 13.731385231018066, "learning_rate": 9.626183146055495e-06, "loss": 0.368, "step": 2674 }, { "epoch": 0.13269507416042461, "grad_norm": 5.879188537597656, "learning_rate": 9.625884474084383e-06, "loss": 0.2568, "step": 2675 }, { "epoch": 0.1327446797956248, "grad_norm": 12.115762710571289, "learning_rate": 9.625585687481763e-06, "loss": 0.4249, "step": 2676 }, { "epoch": 0.13279428543082494, "grad_norm": 5.718569755554199, "learning_rate": 9.625286786255038e-06, "loss": 0.2888, "step": 2677 }, { "epoch": 0.1328438910660251, "grad_norm": 8.261771202087402, "learning_rate": 9.624987770411612e-06, "loss": 0.362, "step": 2678 }, { "epoch": 0.13289349670122527, "grad_norm": 6.352380752563477, "learning_rate": 9.624688639958898e-06, "loss": 0.3228, "step": 2679 }, { "epoch": 0.13294310233642542, "grad_norm": 10.631830215454102, "learning_rate": 9.624389394904306e-06, "loss": 0.3626, "step": 2680 }, { "epoch": 0.13299270797162557, "grad_norm": 7.784488201141357, "learning_rate": 9.624090035255254e-06, "loss": 0.3385, "step": 2681 }, { "epoch": 0.13304231360682572, "grad_norm": 5.235793113708496, "learning_rate": 9.623790561019159e-06, "loss": 0.2914, "step": 2682 }, { "epoch": 0.1330919192420259, "grad_norm": 10.075662612915039, "learning_rate": 9.623490972203442e-06, "loss": 0.4015, "step": 2683 }, { "epoch": 0.13314152487722605, "grad_norm": 4.879518508911133, "learning_rate": 9.623191268815528e-06, "loss": 0.2612, "step": 2684 }, { "epoch": 0.1331911305124262, "grad_norm": 9.617380142211914, "learning_rate": 9.622891450862844e-06, "loss": 0.3306, "step": 2685 }, { "epoch": 0.13324073614762638, "grad_norm": 9.543309211730957, "learning_rate": 9.622591518352817e-06, "loss": 0.397, "step": 2686 }, { "epoch": 0.13329034178282653, "grad_norm": 13.683753967285156, "learning_rate": 9.622291471292882e-06, "loss": 0.3447, "step": 2687 }, { "epoch": 0.13333994741802668, "grad_norm": 17.46101188659668, "learning_rate": 9.621991309690473e-06, "loss": 0.3682, "step": 2688 }, { "epoch": 0.13338955305322683, "grad_norm": 8.936522483825684, "learning_rate": 9.621691033553028e-06, "loss": 0.3238, "step": 2689 }, { "epoch": 0.133439158688427, "grad_norm": 5.39033317565918, "learning_rate": 9.62139064288799e-06, "loss": 0.3022, "step": 2690 }, { "epoch": 0.13348876432362716, "grad_norm": 13.094985008239746, "learning_rate": 9.621090137702801e-06, "loss": 0.3434, "step": 2691 }, { "epoch": 0.1335383699588273, "grad_norm": 8.737351417541504, "learning_rate": 9.620789518004907e-06, "loss": 0.4345, "step": 2692 }, { "epoch": 0.1335879755940275, "grad_norm": 30.492019653320312, "learning_rate": 9.620488783801761e-06, "loss": 0.6758, "step": 2693 }, { "epoch": 0.13363758122922764, "grad_norm": 7.944163799285889, "learning_rate": 9.62018793510081e-06, "loss": 0.3566, "step": 2694 }, { "epoch": 0.1336871868644278, "grad_norm": 14.353229522705078, "learning_rate": 9.619886971909513e-06, "loss": 0.4496, "step": 2695 }, { "epoch": 0.13373679249962797, "grad_norm": 12.398311614990234, "learning_rate": 9.619585894235328e-06, "loss": 0.3737, "step": 2696 }, { "epoch": 0.13378639813482812, "grad_norm": 7.229384899139404, "learning_rate": 9.619284702085714e-06, "loss": 0.2237, "step": 2697 }, { "epoch": 0.13383600377002827, "grad_norm": 9.782696723937988, "learning_rate": 9.618983395468135e-06, "loss": 0.3923, "step": 2698 }, { "epoch": 0.13388560940522842, "grad_norm": 7.246676445007324, "learning_rate": 9.61868197439006e-06, "loss": 0.3097, "step": 2699 }, { "epoch": 0.1339352150404286, "grad_norm": 5.650084495544434, "learning_rate": 9.618380438858954e-06, "loss": 0.2876, "step": 2700 }, { "epoch": 0.13398482067562875, "grad_norm": 7.284073352813721, "learning_rate": 9.618078788882292e-06, "loss": 0.2705, "step": 2701 }, { "epoch": 0.1340344263108289, "grad_norm": 11.660938262939453, "learning_rate": 9.617777024467551e-06, "loss": 0.4054, "step": 2702 }, { "epoch": 0.13408403194602908, "grad_norm": 9.412128448486328, "learning_rate": 9.617475145622205e-06, "loss": 0.3289, "step": 2703 }, { "epoch": 0.13413363758122923, "grad_norm": 11.98977279663086, "learning_rate": 9.617173152353735e-06, "loss": 0.2439, "step": 2704 }, { "epoch": 0.13418324321642938, "grad_norm": 10.793004989624023, "learning_rate": 9.616871044669626e-06, "loss": 0.438, "step": 2705 }, { "epoch": 0.13423284885162953, "grad_norm": 9.32612133026123, "learning_rate": 9.616568822577366e-06, "loss": 0.3653, "step": 2706 }, { "epoch": 0.1342824544868297, "grad_norm": 5.812561511993408, "learning_rate": 9.616266486084441e-06, "loss": 0.2665, "step": 2707 }, { "epoch": 0.13433206012202986, "grad_norm": 12.313000679016113, "learning_rate": 9.615964035198345e-06, "loss": 0.4196, "step": 2708 }, { "epoch": 0.13438166575723, "grad_norm": 10.286516189575195, "learning_rate": 9.615661469926571e-06, "loss": 0.3339, "step": 2709 }, { "epoch": 0.1344312713924302, "grad_norm": 5.391015529632568, "learning_rate": 9.615358790276618e-06, "loss": 0.3284, "step": 2710 }, { "epoch": 0.13448087702763034, "grad_norm": 8.349987983703613, "learning_rate": 9.615055996255989e-06, "loss": 0.3443, "step": 2711 }, { "epoch": 0.1345304826628305, "grad_norm": 11.664007186889648, "learning_rate": 9.614753087872182e-06, "loss": 0.5004, "step": 2712 }, { "epoch": 0.13458008829803067, "grad_norm": 6.774430274963379, "learning_rate": 9.614450065132706e-06, "loss": 0.3365, "step": 2713 }, { "epoch": 0.13462969393323082, "grad_norm": 6.390953063964844, "learning_rate": 9.614146928045071e-06, "loss": 0.285, "step": 2714 }, { "epoch": 0.13467929956843097, "grad_norm": 7.006358623504639, "learning_rate": 9.613843676616788e-06, "loss": 0.36, "step": 2715 }, { "epoch": 0.13472890520363112, "grad_norm": 13.540396690368652, "learning_rate": 9.613540310855372e-06, "loss": 0.3653, "step": 2716 }, { "epoch": 0.1347785108388313, "grad_norm": 9.681002616882324, "learning_rate": 9.613236830768337e-06, "loss": 0.4108, "step": 2717 }, { "epoch": 0.13482811647403145, "grad_norm": 10.238933563232422, "learning_rate": 9.612933236363212e-06, "loss": 0.4123, "step": 2718 }, { "epoch": 0.1348777221092316, "grad_norm": 12.918455123901367, "learning_rate": 9.61262952764751e-06, "loss": 0.3341, "step": 2719 }, { "epoch": 0.13492732774443178, "grad_norm": 8.376398086547852, "learning_rate": 9.612325704628765e-06, "loss": 0.4479, "step": 2720 }, { "epoch": 0.13497693337963193, "grad_norm": 12.603553771972656, "learning_rate": 9.6120217673145e-06, "loss": 0.3326, "step": 2721 }, { "epoch": 0.13502653901483208, "grad_norm": 6.813079357147217, "learning_rate": 9.61171771571225e-06, "loss": 0.2808, "step": 2722 }, { "epoch": 0.13507614465003223, "grad_norm": 15.660665512084961, "learning_rate": 9.61141354982955e-06, "loss": 0.4379, "step": 2723 }, { "epoch": 0.1351257502852324, "grad_norm": 4.711135387420654, "learning_rate": 9.611109269673935e-06, "loss": 0.2858, "step": 2724 }, { "epoch": 0.13517535592043256, "grad_norm": 5.87208890914917, "learning_rate": 9.610804875252948e-06, "loss": 0.2758, "step": 2725 }, { "epoch": 0.1352249615556327, "grad_norm": 14.8265380859375, "learning_rate": 9.610500366574129e-06, "loss": 0.5382, "step": 2726 }, { "epoch": 0.1352745671908329, "grad_norm": 6.670057773590088, "learning_rate": 9.610195743645027e-06, "loss": 0.2734, "step": 2727 }, { "epoch": 0.13532417282603304, "grad_norm": 10.206396102905273, "learning_rate": 9.609891006473188e-06, "loss": 0.3999, "step": 2728 }, { "epoch": 0.1353737784612332, "grad_norm": 7.509227275848389, "learning_rate": 9.609586155066164e-06, "loss": 0.3795, "step": 2729 }, { "epoch": 0.13542338409643334, "grad_norm": 9.805160522460938, "learning_rate": 9.609281189431512e-06, "loss": 0.3698, "step": 2730 }, { "epoch": 0.13547298973163352, "grad_norm": 6.615857124328613, "learning_rate": 9.608976109576785e-06, "loss": 0.1999, "step": 2731 }, { "epoch": 0.13552259536683367, "grad_norm": 5.578145503997803, "learning_rate": 9.608670915509547e-06, "loss": 0.3383, "step": 2732 }, { "epoch": 0.13557220100203382, "grad_norm": 13.05483341217041, "learning_rate": 9.608365607237356e-06, "loss": 0.4858, "step": 2733 }, { "epoch": 0.135621806637234, "grad_norm": 14.217299461364746, "learning_rate": 9.608060184767782e-06, "loss": 0.3643, "step": 2734 }, { "epoch": 0.13567141227243415, "grad_norm": 7.229690074920654, "learning_rate": 9.607754648108393e-06, "loss": 0.4619, "step": 2735 }, { "epoch": 0.1357210179076343, "grad_norm": 6.123700141906738, "learning_rate": 9.60744899726676e-06, "loss": 0.3877, "step": 2736 }, { "epoch": 0.13577062354283448, "grad_norm": 9.442890167236328, "learning_rate": 9.607143232250455e-06, "loss": 0.3651, "step": 2737 }, { "epoch": 0.13582022917803463, "grad_norm": 7.926819324493408, "learning_rate": 9.606837353067058e-06, "loss": 0.3603, "step": 2738 }, { "epoch": 0.13586983481323478, "grad_norm": 4.903346538543701, "learning_rate": 9.606531359724147e-06, "loss": 0.3345, "step": 2739 }, { "epoch": 0.13591944044843493, "grad_norm": 14.827775955200195, "learning_rate": 9.606225252229305e-06, "loss": 0.3719, "step": 2740 }, { "epoch": 0.1359690460836351, "grad_norm": 12.309520721435547, "learning_rate": 9.605919030590118e-06, "loss": 0.4291, "step": 2741 }, { "epoch": 0.13601865171883526, "grad_norm": 10.381913185119629, "learning_rate": 9.605612694814175e-06, "loss": 0.4302, "step": 2742 }, { "epoch": 0.1360682573540354, "grad_norm": 8.597452163696289, "learning_rate": 9.605306244909064e-06, "loss": 0.3427, "step": 2743 }, { "epoch": 0.1361178629892356, "grad_norm": 10.329436302185059, "learning_rate": 9.604999680882382e-06, "loss": 0.4176, "step": 2744 }, { "epoch": 0.13616746862443574, "grad_norm": 7.763155460357666, "learning_rate": 9.604693002741726e-06, "loss": 0.301, "step": 2745 }, { "epoch": 0.1362170742596359, "grad_norm": 10.196576118469238, "learning_rate": 9.604386210494691e-06, "loss": 0.4179, "step": 2746 }, { "epoch": 0.13626667989483604, "grad_norm": 13.923554420471191, "learning_rate": 9.604079304148885e-06, "loss": 0.4467, "step": 2747 }, { "epoch": 0.13631628553003622, "grad_norm": 13.522721290588379, "learning_rate": 9.603772283711911e-06, "loss": 0.5105, "step": 2748 }, { "epoch": 0.13636589116523637, "grad_norm": 8.215544700622559, "learning_rate": 9.603465149191377e-06, "loss": 0.389, "step": 2749 }, { "epoch": 0.13641549680043652, "grad_norm": 5.925583839416504, "learning_rate": 9.603157900594897e-06, "loss": 0.2846, "step": 2750 }, { "epoch": 0.1364651024356367, "grad_norm": 9.49794864654541, "learning_rate": 9.60285053793008e-06, "loss": 0.3884, "step": 2751 }, { "epoch": 0.13651470807083685, "grad_norm": 4.590199947357178, "learning_rate": 9.602543061204543e-06, "loss": 0.368, "step": 2752 }, { "epoch": 0.136564313706037, "grad_norm": 12.901947021484375, "learning_rate": 9.602235470425909e-06, "loss": 0.4052, "step": 2753 }, { "epoch": 0.13661391934123718, "grad_norm": 8.219683647155762, "learning_rate": 9.601927765601798e-06, "loss": 0.385, "step": 2754 }, { "epoch": 0.13666352497643733, "grad_norm": 6.2130913734436035, "learning_rate": 9.601619946739835e-06, "loss": 0.2864, "step": 2755 }, { "epoch": 0.13671313061163748, "grad_norm": 10.658233642578125, "learning_rate": 9.601312013847647e-06, "loss": 0.3163, "step": 2756 }, { "epoch": 0.13676273624683763, "grad_norm": 9.657953262329102, "learning_rate": 9.601003966932866e-06, "loss": 0.3317, "step": 2757 }, { "epoch": 0.1368123418820378, "grad_norm": 7.593544960021973, "learning_rate": 9.600695806003128e-06, "loss": 0.3132, "step": 2758 }, { "epoch": 0.13686194751723796, "grad_norm": 7.522039890289307, "learning_rate": 9.600387531066065e-06, "loss": 0.3578, "step": 2759 }, { "epoch": 0.1369115531524381, "grad_norm": 6.209968566894531, "learning_rate": 9.600079142129317e-06, "loss": 0.2405, "step": 2760 }, { "epoch": 0.1369611587876383, "grad_norm": 7.248753070831299, "learning_rate": 9.599770639200529e-06, "loss": 0.3456, "step": 2761 }, { "epoch": 0.13701076442283844, "grad_norm": 5.687702655792236, "learning_rate": 9.599462022287342e-06, "loss": 0.3901, "step": 2762 }, { "epoch": 0.1370603700580386, "grad_norm": 8.579425811767578, "learning_rate": 9.599153291397407e-06, "loss": 0.3472, "step": 2763 }, { "epoch": 0.13710997569323874, "grad_norm": 6.724272727966309, "learning_rate": 9.598844446538374e-06, "loss": 0.2671, "step": 2764 }, { "epoch": 0.13715958132843892, "grad_norm": 13.88985538482666, "learning_rate": 9.598535487717892e-06, "loss": 0.3841, "step": 2765 }, { "epoch": 0.13720918696363907, "grad_norm": 8.702108383178711, "learning_rate": 9.598226414943623e-06, "loss": 0.3814, "step": 2766 }, { "epoch": 0.13725879259883922, "grad_norm": 6.203908920288086, "learning_rate": 9.597917228223224e-06, "loss": 0.3675, "step": 2767 }, { "epoch": 0.1373083982340394, "grad_norm": 14.2216796875, "learning_rate": 9.597607927564354e-06, "loss": 0.4351, "step": 2768 }, { "epoch": 0.13735800386923955, "grad_norm": 16.713363647460938, "learning_rate": 9.597298512974683e-06, "loss": 0.4271, "step": 2769 }, { "epoch": 0.1374076095044397, "grad_norm": 9.92328929901123, "learning_rate": 9.596988984461871e-06, "loss": 0.3353, "step": 2770 }, { "epoch": 0.13745721513963988, "grad_norm": 4.787294864654541, "learning_rate": 9.596679342033595e-06, "loss": 0.293, "step": 2771 }, { "epoch": 0.13750682077484003, "grad_norm": 5.246037006378174, "learning_rate": 9.596369585697525e-06, "loss": 0.2852, "step": 2772 }, { "epoch": 0.13755642641004018, "grad_norm": 15.693968772888184, "learning_rate": 9.596059715461338e-06, "loss": 0.5018, "step": 2773 }, { "epoch": 0.13760603204524033, "grad_norm": 9.260848045349121, "learning_rate": 9.595749731332713e-06, "loss": 0.3788, "step": 2774 }, { "epoch": 0.1376556376804405, "grad_norm": 5.411118984222412, "learning_rate": 9.59543963331933e-06, "loss": 0.3327, "step": 2775 }, { "epoch": 0.13770524331564066, "grad_norm": 7.259090423583984, "learning_rate": 9.595129421428873e-06, "loss": 0.305, "step": 2776 }, { "epoch": 0.1377548489508408, "grad_norm": 6.0531325340271, "learning_rate": 9.594819095669031e-06, "loss": 0.2882, "step": 2777 }, { "epoch": 0.137804454586041, "grad_norm": 7.607854843139648, "learning_rate": 9.594508656047495e-06, "loss": 0.4419, "step": 2778 }, { "epoch": 0.13785406022124114, "grad_norm": 8.794962882995605, "learning_rate": 9.594198102571953e-06, "loss": 0.3724, "step": 2779 }, { "epoch": 0.1379036658564413, "grad_norm": 6.7055158615112305, "learning_rate": 9.593887435250107e-06, "loss": 0.3391, "step": 2780 }, { "epoch": 0.13795327149164144, "grad_norm": 5.886970043182373, "learning_rate": 9.593576654089648e-06, "loss": 0.3449, "step": 2781 }, { "epoch": 0.13800287712684162, "grad_norm": 4.469974994659424, "learning_rate": 9.593265759098287e-06, "loss": 0.2593, "step": 2782 }, { "epoch": 0.13805248276204177, "grad_norm": 4.764194011688232, "learning_rate": 9.59295475028372e-06, "loss": 0.3507, "step": 2783 }, { "epoch": 0.13810208839724192, "grad_norm": 9.16661262512207, "learning_rate": 9.592643627653656e-06, "loss": 0.3731, "step": 2784 }, { "epoch": 0.1381516940324421, "grad_norm": 4.732944011688232, "learning_rate": 9.592332391215806e-06, "loss": 0.3692, "step": 2785 }, { "epoch": 0.13820129966764225, "grad_norm": 9.378660202026367, "learning_rate": 9.592021040977883e-06, "loss": 0.4441, "step": 2786 }, { "epoch": 0.1382509053028424, "grad_norm": 9.843173027038574, "learning_rate": 9.591709576947601e-06, "loss": 0.327, "step": 2787 }, { "epoch": 0.13830051093804255, "grad_norm": 12.350006103515625, "learning_rate": 9.59139799913268e-06, "loss": 0.5844, "step": 2788 }, { "epoch": 0.13835011657324273, "grad_norm": 5.669825077056885, "learning_rate": 9.591086307540836e-06, "loss": 0.3671, "step": 2789 }, { "epoch": 0.13839972220844288, "grad_norm": 9.30919361114502, "learning_rate": 9.590774502179799e-06, "loss": 0.1854, "step": 2790 }, { "epoch": 0.13844932784364303, "grad_norm": 9.502874374389648, "learning_rate": 9.590462583057293e-06, "loss": 0.413, "step": 2791 }, { "epoch": 0.1384989334788432, "grad_norm": 5.705443859100342, "learning_rate": 9.590150550181047e-06, "loss": 0.3083, "step": 2792 }, { "epoch": 0.13854853911404336, "grad_norm": 5.787900924682617, "learning_rate": 9.589838403558795e-06, "loss": 0.3407, "step": 2793 }, { "epoch": 0.1385981447492435, "grad_norm": 9.3602933883667, "learning_rate": 9.589526143198272e-06, "loss": 0.5187, "step": 2794 }, { "epoch": 0.13864775038444369, "grad_norm": 7.299269199371338, "learning_rate": 9.589213769107212e-06, "loss": 0.3689, "step": 2795 }, { "epoch": 0.13869735601964384, "grad_norm": 4.333940029144287, "learning_rate": 9.588901281293362e-06, "loss": 0.3482, "step": 2796 }, { "epoch": 0.138746961654844, "grad_norm": 7.139735698699951, "learning_rate": 9.588588679764461e-06, "loss": 0.2887, "step": 2797 }, { "epoch": 0.13879656729004414, "grad_norm": 8.645315170288086, "learning_rate": 9.588275964528254e-06, "loss": 0.3961, "step": 2798 }, { "epoch": 0.13884617292524432, "grad_norm": 7.579341888427734, "learning_rate": 9.587963135592498e-06, "loss": 0.3279, "step": 2799 }, { "epoch": 0.13889577856044447, "grad_norm": 7.4428229331970215, "learning_rate": 9.587650192964939e-06, "loss": 0.2007, "step": 2800 }, { "epoch": 0.13894538419564462, "grad_norm": 11.998217582702637, "learning_rate": 9.587337136653331e-06, "loss": 0.3401, "step": 2801 }, { "epoch": 0.1389949898308448, "grad_norm": 6.465380668640137, "learning_rate": 9.587023966665436e-06, "loss": 0.3787, "step": 2802 }, { "epoch": 0.13904459546604495, "grad_norm": 5.210338115692139, "learning_rate": 9.58671068300901e-06, "loss": 0.2677, "step": 2803 }, { "epoch": 0.1390942011012451, "grad_norm": 8.77302074432373, "learning_rate": 9.58639728569182e-06, "loss": 0.3289, "step": 2804 }, { "epoch": 0.13914380673644525, "grad_norm": 10.957964897155762, "learning_rate": 9.58608377472163e-06, "loss": 0.4529, "step": 2805 }, { "epoch": 0.13919341237164543, "grad_norm": 8.900890350341797, "learning_rate": 9.585770150106209e-06, "loss": 0.3059, "step": 2806 }, { "epoch": 0.13924301800684558, "grad_norm": 5.972315788269043, "learning_rate": 9.585456411853331e-06, "loss": 0.3823, "step": 2807 }, { "epoch": 0.13929262364204573, "grad_norm": 19.986934661865234, "learning_rate": 9.585142559970768e-06, "loss": 0.3972, "step": 2808 }, { "epoch": 0.1393422292772459, "grad_norm": 15.934805870056152, "learning_rate": 9.584828594466298e-06, "loss": 0.41, "step": 2809 }, { "epoch": 0.13939183491244606, "grad_norm": 4.751220226287842, "learning_rate": 9.5845145153477e-06, "loss": 0.2189, "step": 2810 }, { "epoch": 0.1394414405476462, "grad_norm": 9.574443817138672, "learning_rate": 9.584200322622761e-06, "loss": 0.3201, "step": 2811 }, { "epoch": 0.13949104618284638, "grad_norm": 9.369197845458984, "learning_rate": 9.583886016299263e-06, "loss": 0.3226, "step": 2812 }, { "epoch": 0.13954065181804653, "grad_norm": 7.092344284057617, "learning_rate": 9.583571596384997e-06, "loss": 0.36, "step": 2813 }, { "epoch": 0.13959025745324669, "grad_norm": 6.836540222167969, "learning_rate": 9.583257062887754e-06, "loss": 0.2692, "step": 2814 }, { "epoch": 0.13963986308844684, "grad_norm": 9.894054412841797, "learning_rate": 9.582942415815326e-06, "loss": 0.25, "step": 2815 }, { "epoch": 0.13968946872364701, "grad_norm": 6.028022289276123, "learning_rate": 9.582627655175513e-06, "loss": 0.3034, "step": 2816 }, { "epoch": 0.13973907435884717, "grad_norm": 7.7518229484558105, "learning_rate": 9.582312780976113e-06, "loss": 0.3516, "step": 2817 }, { "epoch": 0.13978867999404732, "grad_norm": 5.382441520690918, "learning_rate": 9.58199779322493e-06, "loss": 0.3523, "step": 2818 }, { "epoch": 0.1398382856292475, "grad_norm": 4.668188571929932, "learning_rate": 9.581682691929769e-06, "loss": 0.2969, "step": 2819 }, { "epoch": 0.13988789126444764, "grad_norm": 5.631656646728516, "learning_rate": 9.581367477098437e-06, "loss": 0.315, "step": 2820 }, { "epoch": 0.1399374968996478, "grad_norm": 6.1035380363464355, "learning_rate": 9.58105214873875e-06, "loss": 0.3468, "step": 2821 }, { "epoch": 0.13998710253484795, "grad_norm": 8.88121223449707, "learning_rate": 9.580736706858515e-06, "loss": 0.2899, "step": 2822 }, { "epoch": 0.14003670817004812, "grad_norm": 10.247925758361816, "learning_rate": 9.580421151465554e-06, "loss": 0.3006, "step": 2823 }, { "epoch": 0.14008631380524827, "grad_norm": 20.52873992919922, "learning_rate": 9.580105482567682e-06, "loss": 0.3843, "step": 2824 }, { "epoch": 0.14013591944044843, "grad_norm": 5.836763381958008, "learning_rate": 9.579789700172727e-06, "loss": 0.2199, "step": 2825 }, { "epoch": 0.1401855250756486, "grad_norm": 6.511462211608887, "learning_rate": 9.579473804288512e-06, "loss": 0.4505, "step": 2826 }, { "epoch": 0.14023513071084875, "grad_norm": 7.800593376159668, "learning_rate": 9.579157794922864e-06, "loss": 0.3624, "step": 2827 }, { "epoch": 0.1402847363460489, "grad_norm": 3.784937858581543, "learning_rate": 9.578841672083616e-06, "loss": 0.2608, "step": 2828 }, { "epoch": 0.14033434198124908, "grad_norm": 6.924763202667236, "learning_rate": 9.578525435778599e-06, "loss": 0.433, "step": 2829 }, { "epoch": 0.14038394761644923, "grad_norm": 5.5378594398498535, "learning_rate": 9.578209086015649e-06, "loss": 0.2229, "step": 2830 }, { "epoch": 0.14043355325164938, "grad_norm": 15.243186950683594, "learning_rate": 9.577892622802608e-06, "loss": 0.3319, "step": 2831 }, { "epoch": 0.14048315888684954, "grad_norm": 11.101627349853516, "learning_rate": 9.577576046147316e-06, "loss": 0.4274, "step": 2832 }, { "epoch": 0.1405327645220497, "grad_norm": 11.238321304321289, "learning_rate": 9.577259356057621e-06, "loss": 0.4418, "step": 2833 }, { "epoch": 0.14058237015724986, "grad_norm": 6.684149265289307, "learning_rate": 9.576942552541368e-06, "loss": 0.2729, "step": 2834 }, { "epoch": 0.14063197579245001, "grad_norm": 7.837451457977295, "learning_rate": 9.576625635606408e-06, "loss": 0.2771, "step": 2835 }, { "epoch": 0.1406815814276502, "grad_norm": 10.400588989257812, "learning_rate": 9.576308605260593e-06, "loss": 0.4431, "step": 2836 }, { "epoch": 0.14073118706285034, "grad_norm": 11.425849914550781, "learning_rate": 9.575991461511783e-06, "loss": 0.4087, "step": 2837 }, { "epoch": 0.1407807926980505, "grad_norm": 8.441357612609863, "learning_rate": 9.575674204367833e-06, "loss": 0.3122, "step": 2838 }, { "epoch": 0.14083039833325064, "grad_norm": 12.076153755187988, "learning_rate": 9.575356833836607e-06, "loss": 0.4067, "step": 2839 }, { "epoch": 0.14088000396845082, "grad_norm": 14.884502410888672, "learning_rate": 9.575039349925969e-06, "loss": 0.3623, "step": 2840 }, { "epoch": 0.14092960960365097, "grad_norm": 6.5526251792907715, "learning_rate": 9.574721752643788e-06, "loss": 0.2982, "step": 2841 }, { "epoch": 0.14097921523885112, "grad_norm": 5.262254238128662, "learning_rate": 9.574404041997932e-06, "loss": 0.3278, "step": 2842 }, { "epoch": 0.1410288208740513, "grad_norm": 11.683188438415527, "learning_rate": 9.574086217996272e-06, "loss": 0.4174, "step": 2843 }, { "epoch": 0.14107842650925145, "grad_norm": 17.366193771362305, "learning_rate": 9.573768280646688e-06, "loss": 0.3719, "step": 2844 }, { "epoch": 0.1411280321444516, "grad_norm": 6.150028228759766, "learning_rate": 9.573450229957057e-06, "loss": 0.2423, "step": 2845 }, { "epoch": 0.14117763777965178, "grad_norm": 5.446971893310547, "learning_rate": 9.57313206593526e-06, "loss": 0.2346, "step": 2846 }, { "epoch": 0.14122724341485193, "grad_norm": 5.257535457611084, "learning_rate": 9.572813788589183e-06, "loss": 0.3159, "step": 2847 }, { "epoch": 0.14127684905005208, "grad_norm": 7.199207782745361, "learning_rate": 9.57249539792671e-06, "loss": 0.2654, "step": 2848 }, { "epoch": 0.14132645468525223, "grad_norm": 5.482876777648926, "learning_rate": 9.572176893955736e-06, "loss": 0.2966, "step": 2849 }, { "epoch": 0.1413760603204524, "grad_norm": 5.476739883422852, "learning_rate": 9.571858276684146e-06, "loss": 0.3734, "step": 2850 }, { "epoch": 0.14142566595565256, "grad_norm": 7.241606712341309, "learning_rate": 9.571539546119844e-06, "loss": 0.3667, "step": 2851 }, { "epoch": 0.1414752715908527, "grad_norm": 4.293368816375732, "learning_rate": 9.571220702270723e-06, "loss": 0.2773, "step": 2852 }, { "epoch": 0.1415248772260529, "grad_norm": 5.907167434692383, "learning_rate": 9.570901745144683e-06, "loss": 0.2798, "step": 2853 }, { "epoch": 0.14157448286125304, "grad_norm": 6.096986293792725, "learning_rate": 9.570582674749631e-06, "loss": 0.3713, "step": 2854 }, { "epoch": 0.1416240884964532, "grad_norm": 17.707780838012695, "learning_rate": 9.570263491093475e-06, "loss": 0.3841, "step": 2855 }, { "epoch": 0.14167369413165334, "grad_norm": 9.306772232055664, "learning_rate": 9.56994419418412e-06, "loss": 0.343, "step": 2856 }, { "epoch": 0.14172329976685352, "grad_norm": 19.573503494262695, "learning_rate": 9.569624784029481e-06, "loss": 0.4247, "step": 2857 }, { "epoch": 0.14177290540205367, "grad_norm": 13.614273071289062, "learning_rate": 9.569305260637474e-06, "loss": 0.4811, "step": 2858 }, { "epoch": 0.14182251103725382, "grad_norm": 8.248705863952637, "learning_rate": 9.568985624016015e-06, "loss": 0.4204, "step": 2859 }, { "epoch": 0.141872116672454, "grad_norm": 6.370992183685303, "learning_rate": 9.568665874173024e-06, "loss": 0.3365, "step": 2860 }, { "epoch": 0.14192172230765415, "grad_norm": 6.8597002029418945, "learning_rate": 9.568346011116427e-06, "loss": 0.3213, "step": 2861 }, { "epoch": 0.1419713279428543, "grad_norm": 10.76968002319336, "learning_rate": 9.56802603485415e-06, "loss": 0.3219, "step": 2862 }, { "epoch": 0.14202093357805445, "grad_norm": 23.05219841003418, "learning_rate": 9.567705945394122e-06, "loss": 0.4599, "step": 2863 }, { "epoch": 0.14207053921325463, "grad_norm": 6.115686893463135, "learning_rate": 9.567385742744274e-06, "loss": 0.3792, "step": 2864 }, { "epoch": 0.14212014484845478, "grad_norm": 10.179176330566406, "learning_rate": 9.56706542691254e-06, "loss": 0.3936, "step": 2865 }, { "epoch": 0.14216975048365493, "grad_norm": 7.104694366455078, "learning_rate": 9.566744997906859e-06, "loss": 0.2916, "step": 2866 }, { "epoch": 0.1422193561188551, "grad_norm": 17.57457160949707, "learning_rate": 9.566424455735172e-06, "loss": 0.5439, "step": 2867 }, { "epoch": 0.14226896175405526, "grad_norm": 14.585073471069336, "learning_rate": 9.566103800405421e-06, "loss": 0.3882, "step": 2868 }, { "epoch": 0.1423185673892554, "grad_norm": 7.552137851715088, "learning_rate": 9.565783031925554e-06, "loss": 0.3734, "step": 2869 }, { "epoch": 0.1423681730244556, "grad_norm": 22.954675674438477, "learning_rate": 9.565462150303514e-06, "loss": 0.4144, "step": 2870 }, { "epoch": 0.14241777865965574, "grad_norm": 4.037818431854248, "learning_rate": 9.565141155547262e-06, "loss": 0.2267, "step": 2871 }, { "epoch": 0.1424673842948559, "grad_norm": 8.370355606079102, "learning_rate": 9.564820047664745e-06, "loss": 0.3878, "step": 2872 }, { "epoch": 0.14251698993005604, "grad_norm": 11.998072624206543, "learning_rate": 9.564498826663922e-06, "loss": 0.4359, "step": 2873 }, { "epoch": 0.14256659556525622, "grad_norm": 15.01105785369873, "learning_rate": 9.564177492552752e-06, "loss": 0.2897, "step": 2874 }, { "epoch": 0.14261620120045637, "grad_norm": 8.486572265625, "learning_rate": 9.5638560453392e-06, "loss": 0.4383, "step": 2875 }, { "epoch": 0.14266580683565652, "grad_norm": 8.568399429321289, "learning_rate": 9.563534485031232e-06, "loss": 0.3258, "step": 2876 }, { "epoch": 0.1427154124708567, "grad_norm": 15.952680587768555, "learning_rate": 9.563212811636814e-06, "loss": 0.5365, "step": 2877 }, { "epoch": 0.14276501810605685, "grad_norm": 6.897945404052734, "learning_rate": 9.562891025163919e-06, "loss": 0.3452, "step": 2878 }, { "epoch": 0.142814623741257, "grad_norm": 14.399515151977539, "learning_rate": 9.56256912562052e-06, "loss": 0.4785, "step": 2879 }, { "epoch": 0.14286422937645715, "grad_norm": 9.684900283813477, "learning_rate": 9.562247113014593e-06, "loss": 0.4636, "step": 2880 }, { "epoch": 0.14291383501165733, "grad_norm": 10.712484359741211, "learning_rate": 9.56192498735412e-06, "loss": 0.4584, "step": 2881 }, { "epoch": 0.14296344064685748, "grad_norm": 10.155364990234375, "learning_rate": 9.561602748647083e-06, "loss": 0.3817, "step": 2882 }, { "epoch": 0.14301304628205763, "grad_norm": 8.726295471191406, "learning_rate": 9.561280396901465e-06, "loss": 0.4349, "step": 2883 }, { "epoch": 0.1430626519172578, "grad_norm": 7.560958385467529, "learning_rate": 9.560957932125257e-06, "loss": 0.261, "step": 2884 }, { "epoch": 0.14311225755245796, "grad_norm": 6.38466215133667, "learning_rate": 9.560635354326447e-06, "loss": 0.2447, "step": 2885 }, { "epoch": 0.1431618631876581, "grad_norm": 12.344974517822266, "learning_rate": 9.56031266351303e-06, "loss": 0.4081, "step": 2886 }, { "epoch": 0.1432114688228583, "grad_norm": 5.518012046813965, "learning_rate": 9.559989859693003e-06, "loss": 0.3427, "step": 2887 }, { "epoch": 0.14326107445805844, "grad_norm": 4.930747985839844, "learning_rate": 9.559666942874364e-06, "loss": 0.3485, "step": 2888 }, { "epoch": 0.1433106800932586, "grad_norm": 4.749153137207031, "learning_rate": 9.559343913065116e-06, "loss": 0.3119, "step": 2889 }, { "epoch": 0.14336028572845874, "grad_norm": 5.75899076461792, "learning_rate": 9.559020770273264e-06, "loss": 0.2532, "step": 2890 }, { "epoch": 0.14340989136365892, "grad_norm": 6.539164066314697, "learning_rate": 9.558697514506813e-06, "loss": 0.2895, "step": 2891 }, { "epoch": 0.14345949699885907, "grad_norm": 5.608231544494629, "learning_rate": 9.558374145773778e-06, "loss": 0.3171, "step": 2892 }, { "epoch": 0.14350910263405922, "grad_norm": 6.1063232421875, "learning_rate": 9.558050664082168e-06, "loss": 0.3099, "step": 2893 }, { "epoch": 0.1435587082692594, "grad_norm": 5.489142894744873, "learning_rate": 9.557727069440002e-06, "loss": 0.3271, "step": 2894 }, { "epoch": 0.14360831390445955, "grad_norm": 4.984603404998779, "learning_rate": 9.557403361855297e-06, "loss": 0.3078, "step": 2895 }, { "epoch": 0.1436579195396597, "grad_norm": 15.958627700805664, "learning_rate": 9.557079541336074e-06, "loss": 0.3055, "step": 2896 }, { "epoch": 0.14370752517485985, "grad_norm": 8.30695915222168, "learning_rate": 9.55675560789036e-06, "loss": 0.2788, "step": 2897 }, { "epoch": 0.14375713081006003, "grad_norm": 14.692317008972168, "learning_rate": 9.556431561526181e-06, "loss": 0.3206, "step": 2898 }, { "epoch": 0.14380673644526018, "grad_norm": 6.539543628692627, "learning_rate": 9.556107402251565e-06, "loss": 0.2825, "step": 2899 }, { "epoch": 0.14385634208046033, "grad_norm": 14.386412620544434, "learning_rate": 9.555783130074548e-06, "loss": 0.4751, "step": 2900 }, { "epoch": 0.1439059477156605, "grad_norm": 7.521795272827148, "learning_rate": 9.555458745003164e-06, "loss": 0.3004, "step": 2901 }, { "epoch": 0.14395555335086066, "grad_norm": 8.623774528503418, "learning_rate": 9.55513424704545e-06, "loss": 0.4098, "step": 2902 }, { "epoch": 0.1440051589860608, "grad_norm": 6.390080451965332, "learning_rate": 9.554809636209452e-06, "loss": 0.3379, "step": 2903 }, { "epoch": 0.144054764621261, "grad_norm": 8.675582885742188, "learning_rate": 9.55448491250321e-06, "loss": 0.3732, "step": 2904 }, { "epoch": 0.14410437025646114, "grad_norm": 7.0575432777404785, "learning_rate": 9.55416007593477e-06, "loss": 0.3319, "step": 2905 }, { "epoch": 0.1441539758916613, "grad_norm": 7.635843753814697, "learning_rate": 9.553835126512184e-06, "loss": 0.2324, "step": 2906 }, { "epoch": 0.14420358152686144, "grad_norm": 6.796002388000488, "learning_rate": 9.553510064243504e-06, "loss": 0.3342, "step": 2907 }, { "epoch": 0.14425318716206162, "grad_norm": 9.436991691589355, "learning_rate": 9.553184889136785e-06, "loss": 0.375, "step": 2908 }, { "epoch": 0.14430279279726177, "grad_norm": 7.76375150680542, "learning_rate": 9.552859601200084e-06, "loss": 0.3507, "step": 2909 }, { "epoch": 0.14435239843246192, "grad_norm": 7.743363857269287, "learning_rate": 9.552534200441463e-06, "loss": 0.3577, "step": 2910 }, { "epoch": 0.1444020040676621, "grad_norm": 14.722246170043945, "learning_rate": 9.552208686868985e-06, "loss": 0.454, "step": 2911 }, { "epoch": 0.14445160970286225, "grad_norm": 8.355340003967285, "learning_rate": 9.551883060490717e-06, "loss": 0.244, "step": 2912 }, { "epoch": 0.1445012153380624, "grad_norm": 7.237880229949951, "learning_rate": 9.551557321314728e-06, "loss": 0.3031, "step": 2913 }, { "epoch": 0.14455082097326255, "grad_norm": 9.980372428894043, "learning_rate": 9.55123146934909e-06, "loss": 0.3292, "step": 2914 }, { "epoch": 0.14460042660846273, "grad_norm": 4.946305274963379, "learning_rate": 9.550905504601874e-06, "loss": 0.2953, "step": 2915 }, { "epoch": 0.14465003224366288, "grad_norm": 11.037138938903809, "learning_rate": 9.550579427081167e-06, "loss": 0.3748, "step": 2916 }, { "epoch": 0.14469963787886303, "grad_norm": 5.153481483459473, "learning_rate": 9.550253236795037e-06, "loss": 0.3311, "step": 2917 }, { "epoch": 0.1447492435140632, "grad_norm": 9.652649879455566, "learning_rate": 9.549926933751578e-06, "loss": 0.3928, "step": 2918 }, { "epoch": 0.14479884914926336, "grad_norm": 12.088217735290527, "learning_rate": 9.54960051795887e-06, "loss": 0.4042, "step": 2919 }, { "epoch": 0.1448484547844635, "grad_norm": 11.709822654724121, "learning_rate": 9.549273989425003e-06, "loss": 0.3602, "step": 2920 }, { "epoch": 0.14489806041966366, "grad_norm": 6.109491348266602, "learning_rate": 9.548947348158068e-06, "loss": 0.233, "step": 2921 }, { "epoch": 0.14494766605486384, "grad_norm": 7.041043758392334, "learning_rate": 9.54862059416616e-06, "loss": 0.2687, "step": 2922 }, { "epoch": 0.144997271690064, "grad_norm": 7.1388349533081055, "learning_rate": 9.548293727457378e-06, "loss": 0.3037, "step": 2923 }, { "epoch": 0.14504687732526414, "grad_norm": 8.28686237335205, "learning_rate": 9.547966748039818e-06, "loss": 0.3849, "step": 2924 }, { "epoch": 0.14509648296046432, "grad_norm": 6.895704746246338, "learning_rate": 9.547639655921586e-06, "loss": 0.3639, "step": 2925 }, { "epoch": 0.14514608859566447, "grad_norm": 9.593576431274414, "learning_rate": 9.547312451110786e-06, "loss": 0.3694, "step": 2926 }, { "epoch": 0.14519569423086462, "grad_norm": 7.026237487792969, "learning_rate": 9.546985133615527e-06, "loss": 0.3736, "step": 2927 }, { "epoch": 0.1452452998660648, "grad_norm": 15.100151062011719, "learning_rate": 9.546657703443917e-06, "loss": 0.361, "step": 2928 }, { "epoch": 0.14529490550126495, "grad_norm": 13.467123031616211, "learning_rate": 9.546330160604076e-06, "loss": 0.3607, "step": 2929 }, { "epoch": 0.1453445111364651, "grad_norm": 5.927829742431641, "learning_rate": 9.546002505104115e-06, "loss": 0.3547, "step": 2930 }, { "epoch": 0.14539411677166525, "grad_norm": 5.789943695068359, "learning_rate": 9.545674736952155e-06, "loss": 0.2913, "step": 2931 }, { "epoch": 0.14544372240686543, "grad_norm": 4.373473644256592, "learning_rate": 9.54534685615632e-06, "loss": 0.1913, "step": 2932 }, { "epoch": 0.14549332804206558, "grad_norm": 8.278157234191895, "learning_rate": 9.545018862724733e-06, "loss": 0.3776, "step": 2933 }, { "epoch": 0.14554293367726573, "grad_norm": 9.77973747253418, "learning_rate": 9.544690756665524e-06, "loss": 0.3705, "step": 2934 }, { "epoch": 0.1455925393124659, "grad_norm": 13.065530776977539, "learning_rate": 9.544362537986821e-06, "loss": 0.4967, "step": 2935 }, { "epoch": 0.14564214494766606, "grad_norm": 12.539377212524414, "learning_rate": 9.54403420669676e-06, "loss": 0.4029, "step": 2936 }, { "epoch": 0.1456917505828662, "grad_norm": 6.77907133102417, "learning_rate": 9.543705762803474e-06, "loss": 0.288, "step": 2937 }, { "epoch": 0.14574135621806636, "grad_norm": 4.526096343994141, "learning_rate": 9.543377206315106e-06, "loss": 0.3377, "step": 2938 }, { "epoch": 0.14579096185326654, "grad_norm": 9.4704008102417, "learning_rate": 9.543048537239794e-06, "loss": 0.3237, "step": 2939 }, { "epoch": 0.1458405674884667, "grad_norm": 9.409615516662598, "learning_rate": 9.542719755585684e-06, "loss": 0.3958, "step": 2940 }, { "epoch": 0.14589017312366684, "grad_norm": 5.497317314147949, "learning_rate": 9.542390861360924e-06, "loss": 0.3085, "step": 2941 }, { "epoch": 0.14593977875886702, "grad_norm": 11.87500286102295, "learning_rate": 9.542061854573664e-06, "loss": 0.4612, "step": 2942 }, { "epoch": 0.14598938439406717, "grad_norm": 6.153878211975098, "learning_rate": 9.541732735232057e-06, "loss": 0.3438, "step": 2943 }, { "epoch": 0.14603899002926732, "grad_norm": 7.764197826385498, "learning_rate": 9.541403503344258e-06, "loss": 0.3589, "step": 2944 }, { "epoch": 0.1460885956644675, "grad_norm": 61.31501007080078, "learning_rate": 9.541074158918426e-06, "loss": 0.3722, "step": 2945 }, { "epoch": 0.14613820129966765, "grad_norm": 7.012516975402832, "learning_rate": 9.540744701962721e-06, "loss": 0.3965, "step": 2946 }, { "epoch": 0.1461878069348678, "grad_norm": 10.765146255493164, "learning_rate": 9.54041513248531e-06, "loss": 0.3638, "step": 2947 }, { "epoch": 0.14623741257006795, "grad_norm": 8.163467407226562, "learning_rate": 9.540085450494357e-06, "loss": 0.3107, "step": 2948 }, { "epoch": 0.14628701820526813, "grad_norm": 6.6346049308776855, "learning_rate": 9.539755655998034e-06, "loss": 0.3901, "step": 2949 }, { "epoch": 0.14633662384046828, "grad_norm": 8.2503023147583, "learning_rate": 9.539425749004512e-06, "loss": 0.4025, "step": 2950 }, { "epoch": 0.14638622947566843, "grad_norm": 6.374919891357422, "learning_rate": 9.539095729521965e-06, "loss": 0.2324, "step": 2951 }, { "epoch": 0.1464358351108686, "grad_norm": 6.15988302230835, "learning_rate": 9.538765597558573e-06, "loss": 0.3145, "step": 2952 }, { "epoch": 0.14648544074606876, "grad_norm": 7.452398300170898, "learning_rate": 9.538435353122519e-06, "loss": 0.2834, "step": 2953 }, { "epoch": 0.1465350463812689, "grad_norm": 6.08220100402832, "learning_rate": 9.53810499622198e-06, "loss": 0.3164, "step": 2954 }, { "epoch": 0.14658465201646906, "grad_norm": 21.359899520874023, "learning_rate": 9.537774526865148e-06, "loss": 0.4056, "step": 2955 }, { "epoch": 0.14663425765166924, "grad_norm": 5.223051071166992, "learning_rate": 9.537443945060211e-06, "loss": 0.2887, "step": 2956 }, { "epoch": 0.1466838632868694, "grad_norm": 8.420639991760254, "learning_rate": 9.537113250815363e-06, "loss": 0.2861, "step": 2957 }, { "epoch": 0.14673346892206954, "grad_norm": 9.875797271728516, "learning_rate": 9.536782444138792e-06, "loss": 0.3803, "step": 2958 }, { "epoch": 0.14678307455726972, "grad_norm": 7.845086574554443, "learning_rate": 9.536451525038702e-06, "loss": 0.3134, "step": 2959 }, { "epoch": 0.14683268019246987, "grad_norm": 7.588436126708984, "learning_rate": 9.53612049352329e-06, "loss": 0.3789, "step": 2960 }, { "epoch": 0.14688228582767002, "grad_norm": 5.656131267547607, "learning_rate": 9.535789349600761e-06, "loss": 0.3046, "step": 2961 }, { "epoch": 0.1469318914628702, "grad_norm": 6.8376688957214355, "learning_rate": 9.535458093279321e-06, "loss": 0.4179, "step": 2962 }, { "epoch": 0.14698149709807035, "grad_norm": 14.028919219970703, "learning_rate": 9.535126724567178e-06, "loss": 0.3447, "step": 2963 }, { "epoch": 0.1470311027332705, "grad_norm": 11.12044620513916, "learning_rate": 9.534795243472541e-06, "loss": 0.413, "step": 2964 }, { "epoch": 0.14708070836847065, "grad_norm": 9.024169921875, "learning_rate": 9.53446365000363e-06, "loss": 0.382, "step": 2965 }, { "epoch": 0.14713031400367083, "grad_norm": 13.03848934173584, "learning_rate": 9.534131944168655e-06, "loss": 0.3816, "step": 2966 }, { "epoch": 0.14717991963887098, "grad_norm": 5.0269927978515625, "learning_rate": 9.53380012597584e-06, "loss": 0.2421, "step": 2967 }, { "epoch": 0.14722952527407113, "grad_norm": 9.660884857177734, "learning_rate": 9.533468195433408e-06, "loss": 0.3034, "step": 2968 }, { "epoch": 0.1472791309092713, "grad_norm": 7.478847503662109, "learning_rate": 9.533136152549584e-06, "loss": 0.327, "step": 2969 }, { "epoch": 0.14732873654447146, "grad_norm": 8.143957138061523, "learning_rate": 9.532803997332596e-06, "loss": 0.3875, "step": 2970 }, { "epoch": 0.1473783421796716, "grad_norm": 7.633665084838867, "learning_rate": 9.532471729790673e-06, "loss": 0.3936, "step": 2971 }, { "epoch": 0.14742794781487176, "grad_norm": 7.145435810089111, "learning_rate": 9.53213934993205e-06, "loss": 0.3358, "step": 2972 }, { "epoch": 0.14747755345007194, "grad_norm": 15.093914031982422, "learning_rate": 9.531806857764965e-06, "loss": 0.3174, "step": 2973 }, { "epoch": 0.14752715908527209, "grad_norm": 24.733642578125, "learning_rate": 9.531474253297655e-06, "loss": 0.3856, "step": 2974 }, { "epoch": 0.14757676472047224, "grad_norm": 6.405581951141357, "learning_rate": 9.531141536538365e-06, "loss": 0.2335, "step": 2975 }, { "epoch": 0.14762637035567241, "grad_norm": 8.565552711486816, "learning_rate": 9.530808707495335e-06, "loss": 0.3733, "step": 2976 }, { "epoch": 0.14767597599087257, "grad_norm": 5.794182300567627, "learning_rate": 9.530475766176818e-06, "loss": 0.2232, "step": 2977 }, { "epoch": 0.14772558162607272, "grad_norm": 5.789178848266602, "learning_rate": 9.530142712591062e-06, "loss": 0.3152, "step": 2978 }, { "epoch": 0.14777518726127287, "grad_norm": 7.143840312957764, "learning_rate": 9.529809546746321e-06, "loss": 0.3022, "step": 2979 }, { "epoch": 0.14782479289647305, "grad_norm": 12.20642375946045, "learning_rate": 9.52947626865085e-06, "loss": 0.3885, "step": 2980 }, { "epoch": 0.1478743985316732, "grad_norm": 12.47549819946289, "learning_rate": 9.529142878312907e-06, "loss": 0.474, "step": 2981 }, { "epoch": 0.14792400416687335, "grad_norm": 17.83022689819336, "learning_rate": 9.528809375740755e-06, "loss": 0.3391, "step": 2982 }, { "epoch": 0.14797360980207352, "grad_norm": 13.045662879943848, "learning_rate": 9.52847576094266e-06, "loss": 0.2451, "step": 2983 }, { "epoch": 0.14802321543727368, "grad_norm": 10.765446662902832, "learning_rate": 9.528142033926884e-06, "loss": 0.4897, "step": 2984 }, { "epoch": 0.14807282107247383, "grad_norm": 7.874626159667969, "learning_rate": 9.527808194701703e-06, "loss": 0.3342, "step": 2985 }, { "epoch": 0.148122426707674, "grad_norm": 10.069647789001465, "learning_rate": 9.527474243275384e-06, "loss": 0.315, "step": 2986 }, { "epoch": 0.14817203234287415, "grad_norm": 6.44605827331543, "learning_rate": 9.527140179656207e-06, "loss": 0.2161, "step": 2987 }, { "epoch": 0.1482216379780743, "grad_norm": 6.717790603637695, "learning_rate": 9.52680600385245e-06, "loss": 0.32, "step": 2988 }, { "epoch": 0.14827124361327446, "grad_norm": 5.227035045623779, "learning_rate": 9.526471715872389e-06, "loss": 0.2824, "step": 2989 }, { "epoch": 0.14832084924847463, "grad_norm": 7.774450302124023, "learning_rate": 9.526137315724312e-06, "loss": 0.3125, "step": 2990 }, { "epoch": 0.14837045488367478, "grad_norm": 11.069734573364258, "learning_rate": 9.525802803416509e-06, "loss": 0.3237, "step": 2991 }, { "epoch": 0.14842006051887494, "grad_norm": 5.544340133666992, "learning_rate": 9.525468178957262e-06, "loss": 0.2663, "step": 2992 }, { "epoch": 0.14846966615407511, "grad_norm": 10.863571166992188, "learning_rate": 9.525133442354866e-06, "loss": 0.3222, "step": 2993 }, { "epoch": 0.14851927178927526, "grad_norm": 6.288308620452881, "learning_rate": 9.524798593617617e-06, "loss": 0.3605, "step": 2994 }, { "epoch": 0.14856887742447542, "grad_norm": 6.571115016937256, "learning_rate": 9.524463632753812e-06, "loss": 0.2721, "step": 2995 }, { "epoch": 0.14861848305967557, "grad_norm": 13.89298152923584, "learning_rate": 9.524128559771753e-06, "loss": 0.3524, "step": 2996 }, { "epoch": 0.14866808869487574, "grad_norm": 11.221294403076172, "learning_rate": 9.52379337467974e-06, "loss": 0.3426, "step": 2997 }, { "epoch": 0.1487176943300759, "grad_norm": 9.949872016906738, "learning_rate": 9.52345807748608e-06, "loss": 0.2829, "step": 2998 }, { "epoch": 0.14876729996527605, "grad_norm": 10.279791831970215, "learning_rate": 9.523122668199085e-06, "loss": 0.3254, "step": 2999 }, { "epoch": 0.14881690560047622, "grad_norm": 14.147788047790527, "learning_rate": 9.522787146827064e-06, "loss": 0.5204, "step": 3000 }, { "epoch": 0.14886651123567637, "grad_norm": 11.813923835754395, "learning_rate": 9.52245151337833e-06, "loss": 0.409, "step": 3001 }, { "epoch": 0.14891611687087652, "grad_norm": 5.359797477722168, "learning_rate": 9.522115767861204e-06, "loss": 0.2675, "step": 3002 }, { "epoch": 0.1489657225060767, "grad_norm": 5.029655456542969, "learning_rate": 9.521779910284001e-06, "loss": 0.2756, "step": 3003 }, { "epoch": 0.14901532814127685, "grad_norm": 6.023921012878418, "learning_rate": 9.52144394065505e-06, "loss": 0.387, "step": 3004 }, { "epoch": 0.149064933776477, "grad_norm": 4.086258888244629, "learning_rate": 9.521107858982668e-06, "loss": 0.2616, "step": 3005 }, { "epoch": 0.14911453941167715, "grad_norm": 7.437710285186768, "learning_rate": 9.520771665275192e-06, "loss": 0.3636, "step": 3006 }, { "epoch": 0.14916414504687733, "grad_norm": 5.57368803024292, "learning_rate": 9.520435359540948e-06, "loss": 0.4095, "step": 3007 }, { "epoch": 0.14921375068207748, "grad_norm": 6.480587959289551, "learning_rate": 9.52009894178827e-06, "loss": 0.3507, "step": 3008 }, { "epoch": 0.14926335631727763, "grad_norm": 7.307194709777832, "learning_rate": 9.519762412025497e-06, "loss": 0.2538, "step": 3009 }, { "epoch": 0.1493129619524778, "grad_norm": 13.953824996948242, "learning_rate": 9.519425770260967e-06, "loss": 0.4115, "step": 3010 }, { "epoch": 0.14936256758767796, "grad_norm": 6.215646743774414, "learning_rate": 9.51908901650302e-06, "loss": 0.3481, "step": 3011 }, { "epoch": 0.14941217322287811, "grad_norm": 6.974526405334473, "learning_rate": 9.518752150760006e-06, "loss": 0.2775, "step": 3012 }, { "epoch": 0.14946177885807826, "grad_norm": 4.24546480178833, "learning_rate": 9.518415173040267e-06, "loss": 0.2191, "step": 3013 }, { "epoch": 0.14951138449327844, "grad_norm": 7.05185604095459, "learning_rate": 9.518078083352158e-06, "loss": 0.2922, "step": 3014 }, { "epoch": 0.1495609901284786, "grad_norm": 8.106447219848633, "learning_rate": 9.51774088170403e-06, "loss": 0.3415, "step": 3015 }, { "epoch": 0.14961059576367874, "grad_norm": 4.826859474182129, "learning_rate": 9.517403568104238e-06, "loss": 0.2066, "step": 3016 }, { "epoch": 0.14966020139887892, "grad_norm": 9.213775634765625, "learning_rate": 9.517066142561143e-06, "loss": 0.3238, "step": 3017 }, { "epoch": 0.14970980703407907, "grad_norm": 6.183804512023926, "learning_rate": 9.516728605083105e-06, "loss": 0.2872, "step": 3018 }, { "epoch": 0.14975941266927922, "grad_norm": 8.877299308776855, "learning_rate": 9.51639095567849e-06, "loss": 0.3355, "step": 3019 }, { "epoch": 0.1498090183044794, "grad_norm": 8.68667221069336, "learning_rate": 9.516053194355664e-06, "loss": 0.3386, "step": 3020 }, { "epoch": 0.14985862393967955, "grad_norm": 16.6275577545166, "learning_rate": 9.515715321122998e-06, "loss": 0.5176, "step": 3021 }, { "epoch": 0.1499082295748797, "grad_norm": 46.07261276245117, "learning_rate": 9.515377335988862e-06, "loss": 0.3849, "step": 3022 }, { "epoch": 0.14995783521007985, "grad_norm": 13.720123291015625, "learning_rate": 9.515039238961635e-06, "loss": 0.1762, "step": 3023 }, { "epoch": 0.15000744084528003, "grad_norm": 3.6063990592956543, "learning_rate": 9.514701030049691e-06, "loss": 0.1678, "step": 3024 }, { "epoch": 0.15005704648048018, "grad_norm": 10.192358016967773, "learning_rate": 9.514362709261413e-06, "loss": 0.4478, "step": 3025 }, { "epoch": 0.15010665211568033, "grad_norm": 11.812969207763672, "learning_rate": 9.514024276605187e-06, "loss": 0.3097, "step": 3026 }, { "epoch": 0.1501562577508805, "grad_norm": 13.593914031982422, "learning_rate": 9.513685732089398e-06, "loss": 0.468, "step": 3027 }, { "epoch": 0.15020586338608066, "grad_norm": 15.077197074890137, "learning_rate": 9.513347075722433e-06, "loss": 0.3964, "step": 3028 }, { "epoch": 0.1502554690212808, "grad_norm": 9.389357566833496, "learning_rate": 9.513008307512686e-06, "loss": 0.3537, "step": 3029 }, { "epoch": 0.15030507465648096, "grad_norm": 6.9527788162231445, "learning_rate": 9.512669427468551e-06, "loss": 0.2886, "step": 3030 }, { "epoch": 0.15035468029168114, "grad_norm": 8.481401443481445, "learning_rate": 9.512330435598429e-06, "loss": 0.4468, "step": 3031 }, { "epoch": 0.1504042859268813, "grad_norm": 13.951269149780273, "learning_rate": 9.511991331910714e-06, "loss": 0.3542, "step": 3032 }, { "epoch": 0.15045389156208144, "grad_norm": 21.043508529663086, "learning_rate": 9.511652116413816e-06, "loss": 0.3264, "step": 3033 }, { "epoch": 0.15050349719728162, "grad_norm": 19.205989837646484, "learning_rate": 9.511312789116135e-06, "loss": 0.4404, "step": 3034 }, { "epoch": 0.15055310283248177, "grad_norm": 8.025096893310547, "learning_rate": 9.510973350026085e-06, "loss": 0.3739, "step": 3035 }, { "epoch": 0.15060270846768192, "grad_norm": 15.019975662231445, "learning_rate": 9.510633799152074e-06, "loss": 0.3778, "step": 3036 }, { "epoch": 0.1506523141028821, "grad_norm": 9.505389213562012, "learning_rate": 9.510294136502515e-06, "loss": 0.3285, "step": 3037 }, { "epoch": 0.15070191973808225, "grad_norm": 5.79796838760376, "learning_rate": 9.509954362085832e-06, "loss": 0.3941, "step": 3038 }, { "epoch": 0.1507515253732824, "grad_norm": 8.000617027282715, "learning_rate": 9.509614475910436e-06, "loss": 0.4343, "step": 3039 }, { "epoch": 0.15080113100848255, "grad_norm": 10.736135482788086, "learning_rate": 9.509274477984754e-06, "loss": 0.3541, "step": 3040 }, { "epoch": 0.15085073664368273, "grad_norm": 7.417187690734863, "learning_rate": 9.508934368317211e-06, "loss": 0.3007, "step": 3041 }, { "epoch": 0.15090034227888288, "grad_norm": 4.323909759521484, "learning_rate": 9.508594146916234e-06, "loss": 0.304, "step": 3042 }, { "epoch": 0.15094994791408303, "grad_norm": 6.544260501861572, "learning_rate": 9.508253813790255e-06, "loss": 0.3058, "step": 3043 }, { "epoch": 0.1509995535492832, "grad_norm": 5.299976348876953, "learning_rate": 9.507913368947707e-06, "loss": 0.315, "step": 3044 }, { "epoch": 0.15104915918448336, "grad_norm": 6.970980167388916, "learning_rate": 9.507572812397027e-06, "loss": 0.3161, "step": 3045 }, { "epoch": 0.1510987648196835, "grad_norm": 7.578407287597656, "learning_rate": 9.507232144146654e-06, "loss": 0.2779, "step": 3046 }, { "epoch": 0.15114837045488366, "grad_norm": 7.492265224456787, "learning_rate": 9.506891364205029e-06, "loss": 0.287, "step": 3047 }, { "epoch": 0.15119797609008384, "grad_norm": 8.143016815185547, "learning_rate": 9.506550472580598e-06, "loss": 0.3323, "step": 3048 }, { "epoch": 0.151247581725284, "grad_norm": 8.061009407043457, "learning_rate": 9.506209469281806e-06, "loss": 0.3448, "step": 3049 }, { "epoch": 0.15129718736048414, "grad_norm": 6.533756256103516, "learning_rate": 9.505868354317106e-06, "loss": 0.2741, "step": 3050 }, { "epoch": 0.15134679299568432, "grad_norm": 10.512141227722168, "learning_rate": 9.505527127694951e-06, "loss": 0.418, "step": 3051 }, { "epoch": 0.15139639863088447, "grad_norm": 6.793345928192139, "learning_rate": 9.505185789423794e-06, "loss": 0.3375, "step": 3052 }, { "epoch": 0.15144600426608462, "grad_norm": 8.978283882141113, "learning_rate": 9.504844339512096e-06, "loss": 0.3085, "step": 3053 }, { "epoch": 0.15149560990128477, "grad_norm": 7.582152843475342, "learning_rate": 9.50450277796832e-06, "loss": 0.4329, "step": 3054 }, { "epoch": 0.15154521553648495, "grad_norm": 6.497351169586182, "learning_rate": 9.504161104800923e-06, "loss": 0.1766, "step": 3055 }, { "epoch": 0.1515948211716851, "grad_norm": 9.645428657531738, "learning_rate": 9.503819320018379e-06, "loss": 0.3097, "step": 3056 }, { "epoch": 0.15164442680688525, "grad_norm": 8.701994895935059, "learning_rate": 9.503477423629155e-06, "loss": 0.3518, "step": 3057 }, { "epoch": 0.15169403244208543, "grad_norm": 4.836479663848877, "learning_rate": 9.503135415641721e-06, "loss": 0.2693, "step": 3058 }, { "epoch": 0.15174363807728558, "grad_norm": 6.800168037414551, "learning_rate": 9.502793296064557e-06, "loss": 0.2591, "step": 3059 }, { "epoch": 0.15179324371248573, "grad_norm": 14.161138534545898, "learning_rate": 9.502451064906138e-06, "loss": 0.4724, "step": 3060 }, { "epoch": 0.1518428493476859, "grad_norm": 13.188551902770996, "learning_rate": 9.502108722174943e-06, "loss": 0.3749, "step": 3061 }, { "epoch": 0.15189245498288606, "grad_norm": 7.182842254638672, "learning_rate": 9.50176626787946e-06, "loss": 0.2969, "step": 3062 }, { "epoch": 0.1519420606180862, "grad_norm": 6.72225284576416, "learning_rate": 9.50142370202817e-06, "loss": 0.3178, "step": 3063 }, { "epoch": 0.15199166625328636, "grad_norm": 5.8919901847839355, "learning_rate": 9.501081024629566e-06, "loss": 0.2712, "step": 3064 }, { "epoch": 0.15204127188848654, "grad_norm": 7.202943325042725, "learning_rate": 9.500738235692138e-06, "loss": 0.3519, "step": 3065 }, { "epoch": 0.1520908775236867, "grad_norm": 6.469943046569824, "learning_rate": 9.500395335224381e-06, "loss": 0.3276, "step": 3066 }, { "epoch": 0.15214048315888684, "grad_norm": 9.102254867553711, "learning_rate": 9.500052323234792e-06, "loss": 0.3312, "step": 3067 }, { "epoch": 0.15219008879408702, "grad_norm": 9.983161926269531, "learning_rate": 9.49970919973187e-06, "loss": 0.4234, "step": 3068 }, { "epoch": 0.15223969442928717, "grad_norm": 10.03344440460205, "learning_rate": 9.499365964724118e-06, "loss": 0.3152, "step": 3069 }, { "epoch": 0.15228930006448732, "grad_norm": 11.75068187713623, "learning_rate": 9.499022618220043e-06, "loss": 0.2945, "step": 3070 }, { "epoch": 0.15233890569968747, "grad_norm": 9.912504196166992, "learning_rate": 9.498679160228153e-06, "loss": 0.331, "step": 3071 }, { "epoch": 0.15238851133488765, "grad_norm": 7.795519828796387, "learning_rate": 9.498335590756957e-06, "loss": 0.2934, "step": 3072 }, { "epoch": 0.1524381169700878, "grad_norm": 8.43103313446045, "learning_rate": 9.49799190981497e-06, "loss": 0.2803, "step": 3073 }, { "epoch": 0.15248772260528795, "grad_norm": 8.508658409118652, "learning_rate": 9.49764811741071e-06, "loss": 0.261, "step": 3074 }, { "epoch": 0.15253732824048813, "grad_norm": 18.585437774658203, "learning_rate": 9.497304213552695e-06, "loss": 0.4099, "step": 3075 }, { "epoch": 0.15258693387568828, "grad_norm": 6.959853649139404, "learning_rate": 9.496960198249447e-06, "loss": 0.3523, "step": 3076 }, { "epoch": 0.15263653951088843, "grad_norm": 17.1887264251709, "learning_rate": 9.496616071509491e-06, "loss": 0.4244, "step": 3077 }, { "epoch": 0.1526861451460886, "grad_norm": 6.838824272155762, "learning_rate": 9.496271833341356e-06, "loss": 0.3619, "step": 3078 }, { "epoch": 0.15273575078128876, "grad_norm": 7.0357537269592285, "learning_rate": 9.49592748375357e-06, "loss": 0.384, "step": 3079 }, { "epoch": 0.1527853564164889, "grad_norm": 8.719807624816895, "learning_rate": 9.495583022754667e-06, "loss": 0.2849, "step": 3080 }, { "epoch": 0.15283496205168906, "grad_norm": 6.791317462921143, "learning_rate": 9.495238450353182e-06, "loss": 0.3364, "step": 3081 }, { "epoch": 0.15288456768688924, "grad_norm": 6.334449768066406, "learning_rate": 9.494893766557657e-06, "loss": 0.4352, "step": 3082 }, { "epoch": 0.1529341733220894, "grad_norm": 5.801387310028076, "learning_rate": 9.49454897137663e-06, "loss": 0.3995, "step": 3083 }, { "epoch": 0.15298377895728954, "grad_norm": 5.3354058265686035, "learning_rate": 9.494204064818646e-06, "loss": 0.2902, "step": 3084 }, { "epoch": 0.15303338459248972, "grad_norm": 5.857560157775879, "learning_rate": 9.493859046892252e-06, "loss": 0.2898, "step": 3085 }, { "epoch": 0.15308299022768987, "grad_norm": 7.119302749633789, "learning_rate": 9.493513917606001e-06, "loss": 0.3622, "step": 3086 }, { "epoch": 0.15313259586289002, "grad_norm": 7.120561122894287, "learning_rate": 9.49316867696844e-06, "loss": 0.2965, "step": 3087 }, { "epoch": 0.15318220149809017, "grad_norm": 9.419934272766113, "learning_rate": 9.492823324988127e-06, "loss": 0.2919, "step": 3088 }, { "epoch": 0.15323180713329035, "grad_norm": 7.902732849121094, "learning_rate": 9.49247786167362e-06, "loss": 0.3586, "step": 3089 }, { "epoch": 0.1532814127684905, "grad_norm": 11.58711051940918, "learning_rate": 9.49213228703348e-06, "loss": 0.4251, "step": 3090 }, { "epoch": 0.15333101840369065, "grad_norm": 6.477022647857666, "learning_rate": 9.49178660107627e-06, "loss": 0.3014, "step": 3091 }, { "epoch": 0.15338062403889083, "grad_norm": 14.917794227600098, "learning_rate": 9.491440803810555e-06, "loss": 0.414, "step": 3092 }, { "epoch": 0.15343022967409098, "grad_norm": 8.154749870300293, "learning_rate": 9.491094895244905e-06, "loss": 0.4007, "step": 3093 }, { "epoch": 0.15347983530929113, "grad_norm": 11.313209533691406, "learning_rate": 9.490748875387892e-06, "loss": 0.2992, "step": 3094 }, { "epoch": 0.1535294409444913, "grad_norm": 6.084963798522949, "learning_rate": 9.490402744248091e-06, "loss": 0.3197, "step": 3095 }, { "epoch": 0.15357904657969146, "grad_norm": 13.461780548095703, "learning_rate": 9.490056501834079e-06, "loss": 0.3005, "step": 3096 }, { "epoch": 0.1536286522148916, "grad_norm": 15.0126314163208, "learning_rate": 9.489710148154437e-06, "loss": 0.3109, "step": 3097 }, { "epoch": 0.15367825785009176, "grad_norm": 6.685807228088379, "learning_rate": 9.489363683217744e-06, "loss": 0.3373, "step": 3098 }, { "epoch": 0.15372786348529194, "grad_norm": 7.127919673919678, "learning_rate": 9.48901710703259e-06, "loss": 0.3415, "step": 3099 }, { "epoch": 0.1537774691204921, "grad_norm": 4.33756160736084, "learning_rate": 9.48867041960756e-06, "loss": 0.2799, "step": 3100 }, { "epoch": 0.15382707475569224, "grad_norm": 8.864341735839844, "learning_rate": 9.488323620951248e-06, "loss": 0.3013, "step": 3101 }, { "epoch": 0.15387668039089242, "grad_norm": 11.313314437866211, "learning_rate": 9.487976711072246e-06, "loss": 0.2377, "step": 3102 }, { "epoch": 0.15392628602609257, "grad_norm": 5.10629940032959, "learning_rate": 9.48762968997915e-06, "loss": 0.3026, "step": 3103 }, { "epoch": 0.15397589166129272, "grad_norm": 10.816805839538574, "learning_rate": 9.487282557680562e-06, "loss": 0.3467, "step": 3104 }, { "epoch": 0.15402549729649287, "grad_norm": 6.665344715118408, "learning_rate": 9.48693531418508e-06, "loss": 0.3094, "step": 3105 }, { "epoch": 0.15407510293169305, "grad_norm": 14.091194152832031, "learning_rate": 9.486587959501314e-06, "loss": 0.2453, "step": 3106 }, { "epoch": 0.1541247085668932, "grad_norm": 6.072402477264404, "learning_rate": 9.486240493637868e-06, "loss": 0.311, "step": 3107 }, { "epoch": 0.15417431420209335, "grad_norm": 18.71208953857422, "learning_rate": 9.485892916603353e-06, "loss": 0.3673, "step": 3108 }, { "epoch": 0.15422391983729353, "grad_norm": 15.287580490112305, "learning_rate": 9.48554522840638e-06, "loss": 0.52, "step": 3109 }, { "epoch": 0.15427352547249368, "grad_norm": 7.468416213989258, "learning_rate": 9.48519742905557e-06, "loss": 0.3641, "step": 3110 }, { "epoch": 0.15432313110769383, "grad_norm": 6.39995813369751, "learning_rate": 9.484849518559539e-06, "loss": 0.2554, "step": 3111 }, { "epoch": 0.15437273674289398, "grad_norm": 9.876321792602539, "learning_rate": 9.484501496926906e-06, "loss": 0.3324, "step": 3112 }, { "epoch": 0.15442234237809416, "grad_norm": 7.694117069244385, "learning_rate": 9.4841533641663e-06, "loss": 0.2738, "step": 3113 }, { "epoch": 0.1544719480132943, "grad_norm": 4.565333843231201, "learning_rate": 9.483805120286343e-06, "loss": 0.2694, "step": 3114 }, { "epoch": 0.15452155364849446, "grad_norm": 5.482649803161621, "learning_rate": 9.483456765295668e-06, "loss": 0.3456, "step": 3115 }, { "epoch": 0.15457115928369464, "grad_norm": 23.898374557495117, "learning_rate": 9.483108299202907e-06, "loss": 0.5124, "step": 3116 }, { "epoch": 0.1546207649188948, "grad_norm": 7.886423110961914, "learning_rate": 9.482759722016693e-06, "loss": 0.2565, "step": 3117 }, { "epoch": 0.15467037055409494, "grad_norm": 7.319941520690918, "learning_rate": 9.482411033745667e-06, "loss": 0.3724, "step": 3118 }, { "epoch": 0.15471997618929512, "grad_norm": 9.734521865844727, "learning_rate": 9.482062234398467e-06, "loss": 0.4254, "step": 3119 }, { "epoch": 0.15476958182449527, "grad_norm": 12.171855926513672, "learning_rate": 9.481713323983736e-06, "loss": 0.4753, "step": 3120 }, { "epoch": 0.15481918745969542, "grad_norm": 7.332026481628418, "learning_rate": 9.481364302510123e-06, "loss": 0.4192, "step": 3121 }, { "epoch": 0.15486879309489557, "grad_norm": 5.759345054626465, "learning_rate": 9.481015169986274e-06, "loss": 0.2787, "step": 3122 }, { "epoch": 0.15491839873009575, "grad_norm": 5.0655741691589355, "learning_rate": 9.480665926420844e-06, "loss": 0.3653, "step": 3123 }, { "epoch": 0.1549680043652959, "grad_norm": 5.75142765045166, "learning_rate": 9.480316571822485e-06, "loss": 0.3233, "step": 3124 }, { "epoch": 0.15501761000049605, "grad_norm": 10.721899032592773, "learning_rate": 9.479967106199855e-06, "loss": 0.4015, "step": 3125 }, { "epoch": 0.15506721563569623, "grad_norm": 9.920660972595215, "learning_rate": 9.479617529561612e-06, "loss": 0.3922, "step": 3126 }, { "epoch": 0.15511682127089638, "grad_norm": 6.408607006072998, "learning_rate": 9.479267841916422e-06, "loss": 0.2524, "step": 3127 }, { "epoch": 0.15516642690609653, "grad_norm": 8.205399513244629, "learning_rate": 9.478918043272947e-06, "loss": 0.3859, "step": 3128 }, { "epoch": 0.15521603254129668, "grad_norm": 9.685617446899414, "learning_rate": 9.478568133639858e-06, "loss": 0.3551, "step": 3129 }, { "epoch": 0.15526563817649686, "grad_norm": 6.901444911956787, "learning_rate": 9.478218113025826e-06, "loss": 0.2272, "step": 3130 }, { "epoch": 0.155315243811697, "grad_norm": 8.95615291595459, "learning_rate": 9.477867981439521e-06, "loss": 0.3114, "step": 3131 }, { "epoch": 0.15536484944689716, "grad_norm": 7.095144271850586, "learning_rate": 9.477517738889624e-06, "loss": 0.335, "step": 3132 }, { "epoch": 0.15541445508209734, "grad_norm": 8.892661094665527, "learning_rate": 9.47716738538481e-06, "loss": 0.2759, "step": 3133 }, { "epoch": 0.1554640607172975, "grad_norm": 5.593336582183838, "learning_rate": 9.476816920933764e-06, "loss": 0.3779, "step": 3134 }, { "epoch": 0.15551366635249764, "grad_norm": 7.797176837921143, "learning_rate": 9.47646634554517e-06, "loss": 0.3601, "step": 3135 }, { "epoch": 0.15556327198769782, "grad_norm": 5.878205299377441, "learning_rate": 9.476115659227713e-06, "loss": 0.3016, "step": 3136 }, { "epoch": 0.15561287762289797, "grad_norm": 7.209716320037842, "learning_rate": 9.475764861990087e-06, "loss": 0.2663, "step": 3137 }, { "epoch": 0.15566248325809812, "grad_norm": 12.181374549865723, "learning_rate": 9.475413953840984e-06, "loss": 0.4936, "step": 3138 }, { "epoch": 0.15571208889329827, "grad_norm": 6.660126209259033, "learning_rate": 9.475062934789097e-06, "loss": 0.3543, "step": 3139 }, { "epoch": 0.15576169452849845, "grad_norm": 7.379275798797607, "learning_rate": 9.474711804843127e-06, "loss": 0.3645, "step": 3140 }, { "epoch": 0.1558113001636986, "grad_norm": 14.322665214538574, "learning_rate": 9.474360564011775e-06, "loss": 0.4035, "step": 3141 }, { "epoch": 0.15586090579889875, "grad_norm": 8.71790599822998, "learning_rate": 9.474009212303744e-06, "loss": 0.3401, "step": 3142 }, { "epoch": 0.15591051143409893, "grad_norm": 5.037220001220703, "learning_rate": 9.47365774972774e-06, "loss": 0.2212, "step": 3143 }, { "epoch": 0.15596011706929908, "grad_norm": 5.698306560516357, "learning_rate": 9.473306176292472e-06, "loss": 0.3424, "step": 3144 }, { "epoch": 0.15600972270449923, "grad_norm": 7.753818035125732, "learning_rate": 9.472954492006656e-06, "loss": 0.4449, "step": 3145 }, { "epoch": 0.15605932833969938, "grad_norm": 7.2794189453125, "learning_rate": 9.472602696879004e-06, "loss": 0.3674, "step": 3146 }, { "epoch": 0.15610893397489956, "grad_norm": 7.66330099105835, "learning_rate": 9.472250790918233e-06, "loss": 0.2878, "step": 3147 }, { "epoch": 0.1561585396100997, "grad_norm": 12.447885513305664, "learning_rate": 9.471898774133065e-06, "loss": 0.2801, "step": 3148 }, { "epoch": 0.15620814524529986, "grad_norm": 8.460334777832031, "learning_rate": 9.471546646532222e-06, "loss": 0.3963, "step": 3149 }, { "epoch": 0.15625775088050003, "grad_norm": 4.684885025024414, "learning_rate": 9.471194408124432e-06, "loss": 0.3196, "step": 3150 }, { "epoch": 0.15630735651570019, "grad_norm": 5.207546234130859, "learning_rate": 9.47084205891842e-06, "loss": 0.2951, "step": 3151 }, { "epoch": 0.15635696215090034, "grad_norm": 4.745906352996826, "learning_rate": 9.47048959892292e-06, "loss": 0.371, "step": 3152 }, { "epoch": 0.15640656778610051, "grad_norm": 5.648268222808838, "learning_rate": 9.470137028146665e-06, "loss": 0.3431, "step": 3153 }, { "epoch": 0.15645617342130066, "grad_norm": 9.894728660583496, "learning_rate": 9.469784346598392e-06, "loss": 0.3794, "step": 3154 }, { "epoch": 0.15650577905650082, "grad_norm": 5.8351945877075195, "learning_rate": 9.469431554286844e-06, "loss": 0.3068, "step": 3155 }, { "epoch": 0.15655538469170097, "grad_norm": 10.333155632019043, "learning_rate": 9.469078651220758e-06, "loss": 0.408, "step": 3156 }, { "epoch": 0.15660499032690114, "grad_norm": 7.319229602813721, "learning_rate": 9.46872563740888e-06, "loss": 0.312, "step": 3157 }, { "epoch": 0.1566545959621013, "grad_norm": 6.335809707641602, "learning_rate": 9.468372512859963e-06, "loss": 0.408, "step": 3158 }, { "epoch": 0.15670420159730145, "grad_norm": 12.34383487701416, "learning_rate": 9.468019277582751e-06, "loss": 0.4542, "step": 3159 }, { "epoch": 0.15675380723250162, "grad_norm": 18.050546646118164, "learning_rate": 9.467665931586e-06, "loss": 0.4247, "step": 3160 }, { "epoch": 0.15680341286770177, "grad_norm": 21.650766372680664, "learning_rate": 9.467312474878469e-06, "loss": 0.4488, "step": 3161 }, { "epoch": 0.15685301850290193, "grad_norm": 4.976654052734375, "learning_rate": 9.466958907468913e-06, "loss": 0.3593, "step": 3162 }, { "epoch": 0.15690262413810208, "grad_norm": 6.277866840362549, "learning_rate": 9.466605229366094e-06, "loss": 0.2968, "step": 3163 }, { "epoch": 0.15695222977330225, "grad_norm": 8.043359756469727, "learning_rate": 9.466251440578777e-06, "loss": 0.3157, "step": 3164 }, { "epoch": 0.1570018354085024, "grad_norm": 5.338812828063965, "learning_rate": 9.46589754111573e-06, "loss": 0.3504, "step": 3165 }, { "epoch": 0.15705144104370256, "grad_norm": 5.532532691955566, "learning_rate": 9.465543530985719e-06, "loss": 0.214, "step": 3166 }, { "epoch": 0.15710104667890273, "grad_norm": 9.171671867370605, "learning_rate": 9.465189410197521e-06, "loss": 0.4001, "step": 3167 }, { "epoch": 0.15715065231410288, "grad_norm": 11.324100494384766, "learning_rate": 9.46483517875991e-06, "loss": 0.4264, "step": 3168 }, { "epoch": 0.15720025794930303, "grad_norm": 5.516009330749512, "learning_rate": 9.464480836681663e-06, "loss": 0.2614, "step": 3169 }, { "epoch": 0.1572498635845032, "grad_norm": 8.404312133789062, "learning_rate": 9.464126383971564e-06, "loss": 0.338, "step": 3170 }, { "epoch": 0.15729946921970336, "grad_norm": 9.158870697021484, "learning_rate": 9.46377182063839e-06, "loss": 0.3512, "step": 3171 }, { "epoch": 0.15734907485490351, "grad_norm": 8.973617553710938, "learning_rate": 9.463417146690932e-06, "loss": 0.2134, "step": 3172 }, { "epoch": 0.15739868049010367, "grad_norm": 11.736458778381348, "learning_rate": 9.46306236213798e-06, "loss": 0.2653, "step": 3173 }, { "epoch": 0.15744828612530384, "grad_norm": 7.084090709686279, "learning_rate": 9.462707466988323e-06, "loss": 0.3125, "step": 3174 }, { "epoch": 0.157497891760504, "grad_norm": 6.233851432800293, "learning_rate": 9.462352461250756e-06, "loss": 0.3284, "step": 3175 }, { "epoch": 0.15754749739570414, "grad_norm": 13.343886375427246, "learning_rate": 9.461997344934075e-06, "loss": 0.4829, "step": 3176 }, { "epoch": 0.15759710303090432, "grad_norm": 6.768619537353516, "learning_rate": 9.461642118047083e-06, "loss": 0.3432, "step": 3177 }, { "epoch": 0.15764670866610447, "grad_norm": 12.103811264038086, "learning_rate": 9.461286780598582e-06, "loss": 0.462, "step": 3178 }, { "epoch": 0.15769631430130462, "grad_norm": 21.175769805908203, "learning_rate": 9.460931332597374e-06, "loss": 0.4327, "step": 3179 }, { "epoch": 0.15774591993650477, "grad_norm": 9.472521781921387, "learning_rate": 9.460575774052272e-06, "loss": 0.2516, "step": 3180 }, { "epoch": 0.15779552557170495, "grad_norm": 11.861058235168457, "learning_rate": 9.460220104972083e-06, "loss": 0.5122, "step": 3181 }, { "epoch": 0.1578451312069051, "grad_norm": 5.644253730773926, "learning_rate": 9.459864325365623e-06, "loss": 0.2883, "step": 3182 }, { "epoch": 0.15789473684210525, "grad_norm": 5.103278636932373, "learning_rate": 9.459508435241708e-06, "loss": 0.2241, "step": 3183 }, { "epoch": 0.15794434247730543, "grad_norm": 6.205841064453125, "learning_rate": 9.459152434609156e-06, "loss": 0.3937, "step": 3184 }, { "epoch": 0.15799394811250558, "grad_norm": 8.674827575683594, "learning_rate": 9.45879632347679e-06, "loss": 0.3661, "step": 3185 }, { "epoch": 0.15804355374770573, "grad_norm": 13.607484817504883, "learning_rate": 9.458440101853434e-06, "loss": 0.2292, "step": 3186 }, { "epoch": 0.15809315938290588, "grad_norm": 10.761003494262695, "learning_rate": 9.458083769747915e-06, "loss": 0.3169, "step": 3187 }, { "epoch": 0.15814276501810606, "grad_norm": 11.239890098571777, "learning_rate": 9.457727327169064e-06, "loss": 0.4685, "step": 3188 }, { "epoch": 0.1581923706533062, "grad_norm": 16.700780868530273, "learning_rate": 9.457370774125714e-06, "loss": 0.4415, "step": 3189 }, { "epoch": 0.15824197628850636, "grad_norm": 7.291068077087402, "learning_rate": 9.457014110626698e-06, "loss": 0.3099, "step": 3190 }, { "epoch": 0.15829158192370654, "grad_norm": 11.416441917419434, "learning_rate": 9.45665733668086e-06, "loss": 0.3059, "step": 3191 }, { "epoch": 0.1583411875589067, "grad_norm": 6.899285316467285, "learning_rate": 9.456300452297033e-06, "loss": 0.391, "step": 3192 }, { "epoch": 0.15839079319410684, "grad_norm": 6.969752311706543, "learning_rate": 9.455943457484067e-06, "loss": 0.3438, "step": 3193 }, { "epoch": 0.15844039882930702, "grad_norm": 6.123564720153809, "learning_rate": 9.455586352250806e-06, "loss": 0.2819, "step": 3194 }, { "epoch": 0.15849000446450717, "grad_norm": 8.635136604309082, "learning_rate": 9.4552291366061e-06, "loss": 0.3445, "step": 3195 }, { "epoch": 0.15853961009970732, "grad_norm": 5.158025741577148, "learning_rate": 9.454871810558801e-06, "loss": 0.3503, "step": 3196 }, { "epoch": 0.15858921573490747, "grad_norm": 6.992101192474365, "learning_rate": 9.454514374117762e-06, "loss": 0.3753, "step": 3197 }, { "epoch": 0.15863882137010765, "grad_norm": 5.911597728729248, "learning_rate": 9.454156827291843e-06, "loss": 0.35, "step": 3198 }, { "epoch": 0.1586884270053078, "grad_norm": 3.7176289558410645, "learning_rate": 9.453799170089904e-06, "loss": 0.2502, "step": 3199 }, { "epoch": 0.15873803264050795, "grad_norm": 8.194098472595215, "learning_rate": 9.453441402520804e-06, "loss": 0.3885, "step": 3200 }, { "epoch": 0.15878763827570813, "grad_norm": 7.1098713874816895, "learning_rate": 9.453083524593414e-06, "loss": 0.311, "step": 3201 }, { "epoch": 0.15883724391090828, "grad_norm": 9.787528991699219, "learning_rate": 9.452725536316598e-06, "loss": 0.3476, "step": 3202 }, { "epoch": 0.15888684954610843, "grad_norm": 5.040643215179443, "learning_rate": 9.452367437699231e-06, "loss": 0.287, "step": 3203 }, { "epoch": 0.15893645518130858, "grad_norm": 5.459049224853516, "learning_rate": 9.452009228750184e-06, "loss": 0.3267, "step": 3204 }, { "epoch": 0.15898606081650876, "grad_norm": 6.417264938354492, "learning_rate": 9.451650909478335e-06, "loss": 0.3602, "step": 3205 }, { "epoch": 0.1590356664517089, "grad_norm": 9.530943870544434, "learning_rate": 9.451292479892563e-06, "loss": 0.339, "step": 3206 }, { "epoch": 0.15908527208690906, "grad_norm": 14.492083549499512, "learning_rate": 9.450933940001747e-06, "loss": 0.5043, "step": 3207 }, { "epoch": 0.15913487772210924, "grad_norm": 8.475744247436523, "learning_rate": 9.450575289814778e-06, "loss": 0.4078, "step": 3208 }, { "epoch": 0.1591844833573094, "grad_norm": 14.149904251098633, "learning_rate": 9.450216529340538e-06, "loss": 0.418, "step": 3209 }, { "epoch": 0.15923408899250954, "grad_norm": 6.017909526824951, "learning_rate": 9.44985765858792e-06, "loss": 0.2345, "step": 3210 }, { "epoch": 0.15928369462770972, "grad_norm": 6.766818046569824, "learning_rate": 9.449498677565816e-06, "loss": 0.294, "step": 3211 }, { "epoch": 0.15933330026290987, "grad_norm": 11.235970497131348, "learning_rate": 9.449139586283122e-06, "loss": 0.3998, "step": 3212 }, { "epoch": 0.15938290589811002, "grad_norm": 19.050092697143555, "learning_rate": 9.448780384748738e-06, "loss": 0.3054, "step": 3213 }, { "epoch": 0.15943251153331017, "grad_norm": 7.4975409507751465, "learning_rate": 9.448421072971563e-06, "loss": 0.2586, "step": 3214 }, { "epoch": 0.15948211716851035, "grad_norm": 7.192587852478027, "learning_rate": 9.4480616509605e-06, "loss": 0.3251, "step": 3215 }, { "epoch": 0.1595317228037105, "grad_norm": 14.213371276855469, "learning_rate": 9.44770211872446e-06, "loss": 0.4842, "step": 3216 }, { "epoch": 0.15958132843891065, "grad_norm": 11.44278335571289, "learning_rate": 9.447342476272347e-06, "loss": 0.4357, "step": 3217 }, { "epoch": 0.15963093407411083, "grad_norm": 10.28407096862793, "learning_rate": 9.446982723613076e-06, "loss": 0.4251, "step": 3218 }, { "epoch": 0.15968053970931098, "grad_norm": 14.914729118347168, "learning_rate": 9.446622860755563e-06, "loss": 0.3999, "step": 3219 }, { "epoch": 0.15973014534451113, "grad_norm": 8.37828254699707, "learning_rate": 9.446262887708724e-06, "loss": 0.3657, "step": 3220 }, { "epoch": 0.15977975097971128, "grad_norm": 5.674683094024658, "learning_rate": 9.445902804481479e-06, "loss": 0.3773, "step": 3221 }, { "epoch": 0.15982935661491146, "grad_norm": 6.001343727111816, "learning_rate": 9.44554261108275e-06, "loss": 0.3065, "step": 3222 }, { "epoch": 0.1598789622501116, "grad_norm": 4.433187484741211, "learning_rate": 9.445182307521466e-06, "loss": 0.2174, "step": 3223 }, { "epoch": 0.15992856788531176, "grad_norm": 9.31721305847168, "learning_rate": 9.444821893806553e-06, "loss": 0.3744, "step": 3224 }, { "epoch": 0.15997817352051194, "grad_norm": 6.227138042449951, "learning_rate": 9.444461369946943e-06, "loss": 0.351, "step": 3225 }, { "epoch": 0.1600277791557121, "grad_norm": 9.330697059631348, "learning_rate": 9.44410073595157e-06, "loss": 0.3842, "step": 3226 }, { "epoch": 0.16007738479091224, "grad_norm": 8.37795639038086, "learning_rate": 9.443739991829368e-06, "loss": 0.3359, "step": 3227 }, { "epoch": 0.16012699042611242, "grad_norm": 11.281765937805176, "learning_rate": 9.443379137589282e-06, "loss": 0.4504, "step": 3228 }, { "epoch": 0.16017659606131257, "grad_norm": 6.5587239265441895, "learning_rate": 9.443018173240249e-06, "loss": 0.2942, "step": 3229 }, { "epoch": 0.16022620169651272, "grad_norm": 4.323283672332764, "learning_rate": 9.442657098791217e-06, "loss": 0.2329, "step": 3230 }, { "epoch": 0.16027580733171287, "grad_norm": 9.888378143310547, "learning_rate": 9.442295914251131e-06, "loss": 0.3565, "step": 3231 }, { "epoch": 0.16032541296691305, "grad_norm": 4.115589618682861, "learning_rate": 9.441934619628944e-06, "loss": 0.1804, "step": 3232 }, { "epoch": 0.1603750186021132, "grad_norm": 7.1000518798828125, "learning_rate": 9.441573214933608e-06, "loss": 0.3506, "step": 3233 }, { "epoch": 0.16042462423731335, "grad_norm": 5.900187969207764, "learning_rate": 9.44121170017408e-06, "loss": 0.2607, "step": 3234 }, { "epoch": 0.16047422987251353, "grad_norm": 9.502942085266113, "learning_rate": 9.440850075359315e-06, "loss": 0.3149, "step": 3235 }, { "epoch": 0.16052383550771368, "grad_norm": 9.80914306640625, "learning_rate": 9.440488340498277e-06, "loss": 0.2989, "step": 3236 }, { "epoch": 0.16057344114291383, "grad_norm": 4.093424320220947, "learning_rate": 9.440126495599929e-06, "loss": 0.2241, "step": 3237 }, { "epoch": 0.16062304677811398, "grad_norm": 10.093339920043945, "learning_rate": 9.439764540673238e-06, "loss": 0.3427, "step": 3238 }, { "epoch": 0.16067265241331416, "grad_norm": 37.053131103515625, "learning_rate": 9.439402475727173e-06, "loss": 0.3865, "step": 3239 }, { "epoch": 0.1607222580485143, "grad_norm": 10.222583770751953, "learning_rate": 9.439040300770708e-06, "loss": 0.3956, "step": 3240 }, { "epoch": 0.16077186368371446, "grad_norm": 10.677020072937012, "learning_rate": 9.438678015812814e-06, "loss": 0.3141, "step": 3241 }, { "epoch": 0.16082146931891464, "grad_norm": 7.751485824584961, "learning_rate": 9.438315620862472e-06, "loss": 0.368, "step": 3242 }, { "epoch": 0.1608710749541148, "grad_norm": 5.175616264343262, "learning_rate": 9.437953115928663e-06, "loss": 0.3471, "step": 3243 }, { "epoch": 0.16092068058931494, "grad_norm": 9.600318908691406, "learning_rate": 9.437590501020366e-06, "loss": 0.292, "step": 3244 }, { "epoch": 0.1609702862245151, "grad_norm": 5.29502010345459, "learning_rate": 9.43722777614657e-06, "loss": 0.2581, "step": 3245 }, { "epoch": 0.16101989185971527, "grad_norm": 5.874805927276611, "learning_rate": 9.436864941316262e-06, "loss": 0.3963, "step": 3246 }, { "epoch": 0.16106949749491542, "grad_norm": 8.909183502197266, "learning_rate": 9.436501996538435e-06, "loss": 0.3311, "step": 3247 }, { "epoch": 0.16111910313011557, "grad_norm": 9.382125854492188, "learning_rate": 9.43613894182208e-06, "loss": 0.4488, "step": 3248 }, { "epoch": 0.16116870876531575, "grad_norm": 11.422027587890625, "learning_rate": 9.435775777176196e-06, "loss": 0.4258, "step": 3249 }, { "epoch": 0.1612183144005159, "grad_norm": 5.606239318847656, "learning_rate": 9.435412502609782e-06, "loss": 0.3585, "step": 3250 }, { "epoch": 0.16126792003571605, "grad_norm": 7.558977127075195, "learning_rate": 9.435049118131839e-06, "loss": 0.3944, "step": 3251 }, { "epoch": 0.16131752567091623, "grad_norm": 12.395248413085938, "learning_rate": 9.434685623751373e-06, "loss": 0.4049, "step": 3252 }, { "epoch": 0.16136713130611638, "grad_norm": 10.974417686462402, "learning_rate": 9.434322019477391e-06, "loss": 0.2742, "step": 3253 }, { "epoch": 0.16141673694131653, "grad_norm": 7.650134563446045, "learning_rate": 9.433958305318904e-06, "loss": 0.2751, "step": 3254 }, { "epoch": 0.16146634257651668, "grad_norm": 8.274886131286621, "learning_rate": 9.433594481284924e-06, "loss": 0.428, "step": 3255 }, { "epoch": 0.16151594821171686, "grad_norm": 8.108936309814453, "learning_rate": 9.433230547384468e-06, "loss": 0.397, "step": 3256 }, { "epoch": 0.161565553846917, "grad_norm": 5.287353992462158, "learning_rate": 9.432866503626554e-06, "loss": 0.3254, "step": 3257 }, { "epoch": 0.16161515948211716, "grad_norm": 5.8007941246032715, "learning_rate": 9.432502350020201e-06, "loss": 0.3375, "step": 3258 }, { "epoch": 0.16166476511731734, "grad_norm": 10.331681251525879, "learning_rate": 9.432138086574436e-06, "loss": 0.4647, "step": 3259 }, { "epoch": 0.1617143707525175, "grad_norm": 6.239912986755371, "learning_rate": 9.431773713298284e-06, "loss": 0.3371, "step": 3260 }, { "epoch": 0.16176397638771764, "grad_norm": 6.404751300811768, "learning_rate": 9.431409230200777e-06, "loss": 0.4061, "step": 3261 }, { "epoch": 0.1618135820229178, "grad_norm": 8.125102996826172, "learning_rate": 9.43104463729094e-06, "loss": 0.346, "step": 3262 }, { "epoch": 0.16186318765811797, "grad_norm": 5.738210678100586, "learning_rate": 9.430679934577816e-06, "loss": 0.2982, "step": 3263 }, { "epoch": 0.16191279329331812, "grad_norm": 10.189786911010742, "learning_rate": 9.430315122070439e-06, "loss": 0.3905, "step": 3264 }, { "epoch": 0.16196239892851827, "grad_norm": 7.787026882171631, "learning_rate": 9.429950199777848e-06, "loss": 0.3633, "step": 3265 }, { "epoch": 0.16201200456371845, "grad_norm": 10.467720031738281, "learning_rate": 9.429585167709087e-06, "loss": 0.4731, "step": 3266 }, { "epoch": 0.1620616101989186, "grad_norm": 6.612691402435303, "learning_rate": 9.429220025873203e-06, "loss": 0.3849, "step": 3267 }, { "epoch": 0.16211121583411875, "grad_norm": 4.0860371589660645, "learning_rate": 9.428854774279244e-06, "loss": 0.3657, "step": 3268 }, { "epoch": 0.16216082146931893, "grad_norm": 3.4489152431488037, "learning_rate": 9.428489412936257e-06, "loss": 0.2958, "step": 3269 }, { "epoch": 0.16221042710451908, "grad_norm": 4.765350818634033, "learning_rate": 9.428123941853302e-06, "loss": 0.3797, "step": 3270 }, { "epoch": 0.16226003273971923, "grad_norm": 4.785475254058838, "learning_rate": 9.427758361039432e-06, "loss": 0.3026, "step": 3271 }, { "epoch": 0.16230963837491938, "grad_norm": 5.540241241455078, "learning_rate": 9.427392670503706e-06, "loss": 0.389, "step": 3272 }, { "epoch": 0.16235924401011956, "grad_norm": 9.197431564331055, "learning_rate": 9.427026870255186e-06, "loss": 0.422, "step": 3273 }, { "epoch": 0.1624088496453197, "grad_norm": 15.885157585144043, "learning_rate": 9.42666096030294e-06, "loss": 0.3716, "step": 3274 }, { "epoch": 0.16245845528051986, "grad_norm": 7.169836521148682, "learning_rate": 9.426294940656033e-06, "loss": 0.3622, "step": 3275 }, { "epoch": 0.16250806091572004, "grad_norm": 7.198306083679199, "learning_rate": 9.425928811323533e-06, "loss": 0.3293, "step": 3276 }, { "epoch": 0.1625576665509202, "grad_norm": 9.992755889892578, "learning_rate": 9.425562572314517e-06, "loss": 0.3418, "step": 3277 }, { "epoch": 0.16260727218612034, "grad_norm": 10.664825439453125, "learning_rate": 9.425196223638058e-06, "loss": 0.3647, "step": 3278 }, { "epoch": 0.1626568778213205, "grad_norm": 9.778603553771973, "learning_rate": 9.424829765303234e-06, "loss": 0.3527, "step": 3279 }, { "epoch": 0.16270648345652067, "grad_norm": 7.05722188949585, "learning_rate": 9.424463197319129e-06, "loss": 0.2956, "step": 3280 }, { "epoch": 0.16275608909172082, "grad_norm": 4.729372501373291, "learning_rate": 9.424096519694823e-06, "loss": 0.2557, "step": 3281 }, { "epoch": 0.16280569472692097, "grad_norm": 9.197173118591309, "learning_rate": 9.423729732439403e-06, "loss": 0.3886, "step": 3282 }, { "epoch": 0.16285530036212115, "grad_norm": 9.840089797973633, "learning_rate": 9.42336283556196e-06, "loss": 0.3239, "step": 3283 }, { "epoch": 0.1629049059973213, "grad_norm": 8.591729164123535, "learning_rate": 9.422995829071586e-06, "loss": 0.2918, "step": 3284 }, { "epoch": 0.16295451163252145, "grad_norm": 12.00283432006836, "learning_rate": 9.422628712977373e-06, "loss": 0.4571, "step": 3285 }, { "epoch": 0.16300411726772163, "grad_norm": 6.3530755043029785, "learning_rate": 9.422261487288422e-06, "loss": 0.3524, "step": 3286 }, { "epoch": 0.16305372290292178, "grad_norm": 13.770118713378906, "learning_rate": 9.421894152013829e-06, "loss": 0.3115, "step": 3287 }, { "epoch": 0.16310332853812193, "grad_norm": 11.639487266540527, "learning_rate": 9.421526707162699e-06, "loss": 0.3426, "step": 3288 }, { "epoch": 0.16315293417332208, "grad_norm": 11.354881286621094, "learning_rate": 9.421159152744137e-06, "loss": 0.3459, "step": 3289 }, { "epoch": 0.16320253980852226, "grad_norm": 8.390839576721191, "learning_rate": 9.420791488767252e-06, "loss": 0.4341, "step": 3290 }, { "epoch": 0.1632521454437224, "grad_norm": 12.225075721740723, "learning_rate": 9.420423715241153e-06, "loss": 0.3849, "step": 3291 }, { "epoch": 0.16330175107892256, "grad_norm": 4.654949188232422, "learning_rate": 9.420055832174958e-06, "loss": 0.2214, "step": 3292 }, { "epoch": 0.16335135671412274, "grad_norm": 6.99893856048584, "learning_rate": 9.419687839577777e-06, "loss": 0.3123, "step": 3293 }, { "epoch": 0.1634009623493229, "grad_norm": 8.31661319732666, "learning_rate": 9.419319737458732e-06, "loss": 0.285, "step": 3294 }, { "epoch": 0.16345056798452304, "grad_norm": 7.383347034454346, "learning_rate": 9.418951525826947e-06, "loss": 0.3638, "step": 3295 }, { "epoch": 0.1635001736197232, "grad_norm": 10.256978034973145, "learning_rate": 9.418583204691543e-06, "loss": 0.462, "step": 3296 }, { "epoch": 0.16354977925492337, "grad_norm": 4.8046746253967285, "learning_rate": 9.418214774061647e-06, "loss": 0.3066, "step": 3297 }, { "epoch": 0.16359938489012352, "grad_norm": 6.641630172729492, "learning_rate": 9.417846233946393e-06, "loss": 0.3412, "step": 3298 }, { "epoch": 0.16364899052532367, "grad_norm": 18.259706497192383, "learning_rate": 9.41747758435491e-06, "loss": 0.239, "step": 3299 }, { "epoch": 0.16369859616052385, "grad_norm": 5.467233180999756, "learning_rate": 9.417108825296332e-06, "loss": 0.3709, "step": 3300 }, { "epoch": 0.163748201795724, "grad_norm": 4.939638137817383, "learning_rate": 9.416739956779802e-06, "loss": 0.3025, "step": 3301 }, { "epoch": 0.16379780743092415, "grad_norm": 7.5643510818481445, "learning_rate": 9.416370978814458e-06, "loss": 0.2784, "step": 3302 }, { "epoch": 0.16384741306612433, "grad_norm": 20.354053497314453, "learning_rate": 9.416001891409443e-06, "loss": 0.5838, "step": 3303 }, { "epoch": 0.16389701870132448, "grad_norm": 29.563579559326172, "learning_rate": 9.415632694573902e-06, "loss": 0.3643, "step": 3304 }, { "epoch": 0.16394662433652463, "grad_norm": 12.558989524841309, "learning_rate": 9.415263388316986e-06, "loss": 0.3635, "step": 3305 }, { "epoch": 0.16399622997172478, "grad_norm": 8.784982681274414, "learning_rate": 9.414893972647846e-06, "loss": 0.4537, "step": 3306 }, { "epoch": 0.16404583560692496, "grad_norm": 11.648796081542969, "learning_rate": 9.414524447575636e-06, "loss": 0.3732, "step": 3307 }, { "epoch": 0.1640954412421251, "grad_norm": 18.293315887451172, "learning_rate": 9.414154813109514e-06, "loss": 0.429, "step": 3308 }, { "epoch": 0.16414504687732526, "grad_norm": 6.889711856842041, "learning_rate": 9.41378506925864e-06, "loss": 0.2787, "step": 3309 }, { "epoch": 0.16419465251252544, "grad_norm": 11.645055770874023, "learning_rate": 9.413415216032175e-06, "loss": 0.4203, "step": 3310 }, { "epoch": 0.16424425814772559, "grad_norm": 6.189864635467529, "learning_rate": 9.413045253439284e-06, "loss": 0.2514, "step": 3311 }, { "epoch": 0.16429386378292574, "grad_norm": 7.864741802215576, "learning_rate": 9.412675181489135e-06, "loss": 0.3834, "step": 3312 }, { "epoch": 0.1643434694181259, "grad_norm": 5.440008163452148, "learning_rate": 9.4123050001909e-06, "loss": 0.251, "step": 3313 }, { "epoch": 0.16439307505332607, "grad_norm": 5.038330078125, "learning_rate": 9.411934709553749e-06, "loss": 0.398, "step": 3314 }, { "epoch": 0.16444268068852622, "grad_norm": 8.879467010498047, "learning_rate": 9.411564309586862e-06, "loss": 0.3274, "step": 3315 }, { "epoch": 0.16449228632372637, "grad_norm": 5.987996578216553, "learning_rate": 9.411193800299417e-06, "loss": 0.3005, "step": 3316 }, { "epoch": 0.16454189195892654, "grad_norm": 12.930344581604004, "learning_rate": 9.410823181700593e-06, "loss": 0.4695, "step": 3317 }, { "epoch": 0.1645914975941267, "grad_norm": 7.1876654624938965, "learning_rate": 9.410452453799575e-06, "loss": 0.3039, "step": 3318 }, { "epoch": 0.16464110322932685, "grad_norm": 8.254679679870605, "learning_rate": 9.410081616605548e-06, "loss": 0.3333, "step": 3319 }, { "epoch": 0.164690708864527, "grad_norm": 8.650383949279785, "learning_rate": 9.409710670127707e-06, "loss": 0.3429, "step": 3320 }, { "epoch": 0.16474031449972718, "grad_norm": 5.980535984039307, "learning_rate": 9.40933961437524e-06, "loss": 0.3642, "step": 3321 }, { "epoch": 0.16478992013492733, "grad_norm": 7.04221248626709, "learning_rate": 9.408968449357341e-06, "loss": 0.2502, "step": 3322 }, { "epoch": 0.16483952577012748, "grad_norm": 10.120478630065918, "learning_rate": 9.408597175083211e-06, "loss": 0.3558, "step": 3323 }, { "epoch": 0.16488913140532765, "grad_norm": 4.235088348388672, "learning_rate": 9.40822579156205e-06, "loss": 0.2533, "step": 3324 }, { "epoch": 0.1649387370405278, "grad_norm": 6.786940574645996, "learning_rate": 9.407854298803059e-06, "loss": 0.4056, "step": 3325 }, { "epoch": 0.16498834267572796, "grad_norm": 12.211187362670898, "learning_rate": 9.407482696815444e-06, "loss": 0.4462, "step": 3326 }, { "epoch": 0.16503794831092813, "grad_norm": 5.712188720703125, "learning_rate": 9.407110985608414e-06, "loss": 0.3279, "step": 3327 }, { "epoch": 0.16508755394612828, "grad_norm": 5.844563961029053, "learning_rate": 9.40673916519118e-06, "loss": 0.2956, "step": 3328 }, { "epoch": 0.16513715958132844, "grad_norm": 4.949684143066406, "learning_rate": 9.406367235572956e-06, "loss": 0.284, "step": 3329 }, { "epoch": 0.16518676521652859, "grad_norm": 4.831003665924072, "learning_rate": 9.40599519676296e-06, "loss": 0.3173, "step": 3330 }, { "epoch": 0.16523637085172876, "grad_norm": 5.444947719573975, "learning_rate": 9.405623048770409e-06, "loss": 0.3458, "step": 3331 }, { "epoch": 0.16528597648692891, "grad_norm": 6.48923921585083, "learning_rate": 9.405250791604525e-06, "loss": 0.3347, "step": 3332 }, { "epoch": 0.16533558212212907, "grad_norm": 18.401609420776367, "learning_rate": 9.404878425274535e-06, "loss": 0.3582, "step": 3333 }, { "epoch": 0.16538518775732924, "grad_norm": 6.338003158569336, "learning_rate": 9.404505949789665e-06, "loss": 0.2556, "step": 3334 }, { "epoch": 0.1654347933925294, "grad_norm": 4.15631103515625, "learning_rate": 9.404133365159145e-06, "loss": 0.2933, "step": 3335 }, { "epoch": 0.16548439902772955, "grad_norm": 19.83640480041504, "learning_rate": 9.403760671392207e-06, "loss": 0.487, "step": 3336 }, { "epoch": 0.1655340046629297, "grad_norm": 14.296631813049316, "learning_rate": 9.40338786849809e-06, "loss": 0.4493, "step": 3337 }, { "epoch": 0.16558361029812987, "grad_norm": 12.395111083984375, "learning_rate": 9.403014956486028e-06, "loss": 0.4408, "step": 3338 }, { "epoch": 0.16563321593333002, "grad_norm": 5.958890914916992, "learning_rate": 9.402641935365264e-06, "loss": 0.2445, "step": 3339 }, { "epoch": 0.16568282156853018, "grad_norm": 8.10214614868164, "learning_rate": 9.402268805145043e-06, "loss": 0.356, "step": 3340 }, { "epoch": 0.16573242720373035, "grad_norm": 5.015878677368164, "learning_rate": 9.401895565834607e-06, "loss": 0.2934, "step": 3341 }, { "epoch": 0.1657820328389305, "grad_norm": 6.011500358581543, "learning_rate": 9.401522217443208e-06, "loss": 0.3262, "step": 3342 }, { "epoch": 0.16583163847413065, "grad_norm": 7.059354305267334, "learning_rate": 9.401148759980098e-06, "loss": 0.3044, "step": 3343 }, { "epoch": 0.16588124410933083, "grad_norm": 8.941642761230469, "learning_rate": 9.40077519345453e-06, "loss": 0.289, "step": 3344 }, { "epoch": 0.16593084974453098, "grad_norm": 7.659727096557617, "learning_rate": 9.400401517875763e-06, "loss": 0.2775, "step": 3345 }, { "epoch": 0.16598045537973113, "grad_norm": 13.049546241760254, "learning_rate": 9.400027733253057e-06, "loss": 0.3979, "step": 3346 }, { "epoch": 0.16603006101493128, "grad_norm": 8.059309005737305, "learning_rate": 9.399653839595673e-06, "loss": 0.276, "step": 3347 }, { "epoch": 0.16607966665013146, "grad_norm": 12.208089828491211, "learning_rate": 9.399279836912875e-06, "loss": 0.3921, "step": 3348 }, { "epoch": 0.16612927228533161, "grad_norm": 6.027822971343994, "learning_rate": 9.398905725213934e-06, "loss": 0.3231, "step": 3349 }, { "epoch": 0.16617887792053176, "grad_norm": 4.864265441894531, "learning_rate": 9.398531504508119e-06, "loss": 0.2772, "step": 3350 }, { "epoch": 0.16622848355573194, "grad_norm": 4.287332057952881, "learning_rate": 9.398157174804703e-06, "loss": 0.2564, "step": 3351 }, { "epoch": 0.1662780891909321, "grad_norm": 8.650463104248047, "learning_rate": 9.397782736112964e-06, "loss": 0.3248, "step": 3352 }, { "epoch": 0.16632769482613224, "grad_norm": 4.970389366149902, "learning_rate": 9.397408188442177e-06, "loss": 0.2317, "step": 3353 }, { "epoch": 0.1663773004613324, "grad_norm": 12.130841255187988, "learning_rate": 9.397033531801628e-06, "loss": 0.4729, "step": 3354 }, { "epoch": 0.16642690609653257, "grad_norm": 10.584931373596191, "learning_rate": 9.3966587662006e-06, "loss": 0.312, "step": 3355 }, { "epoch": 0.16647651173173272, "grad_norm": 6.825296401977539, "learning_rate": 9.396283891648377e-06, "loss": 0.3306, "step": 3356 }, { "epoch": 0.16652611736693287, "grad_norm": 16.398271560668945, "learning_rate": 9.39590890815425e-06, "loss": 0.294, "step": 3357 }, { "epoch": 0.16657572300213305, "grad_norm": 9.651103019714355, "learning_rate": 9.395533815727513e-06, "loss": 0.3393, "step": 3358 }, { "epoch": 0.1666253286373332, "grad_norm": 10.944780349731445, "learning_rate": 9.39515861437746e-06, "loss": 0.2394, "step": 3359 }, { "epoch": 0.16667493427253335, "grad_norm": 5.979079246520996, "learning_rate": 9.394783304113389e-06, "loss": 0.3069, "step": 3360 }, { "epoch": 0.16672453990773353, "grad_norm": 6.922043323516846, "learning_rate": 9.394407884944599e-06, "loss": 0.3746, "step": 3361 }, { "epoch": 0.16677414554293368, "grad_norm": 13.033217430114746, "learning_rate": 9.394032356880393e-06, "loss": 0.3825, "step": 3362 }, { "epoch": 0.16682375117813383, "grad_norm": 7.399617671966553, "learning_rate": 9.393656719930078e-06, "loss": 0.3435, "step": 3363 }, { "epoch": 0.16687335681333398, "grad_norm": 7.57236909866333, "learning_rate": 9.393280974102962e-06, "loss": 0.3446, "step": 3364 }, { "epoch": 0.16692296244853416, "grad_norm": 6.339715480804443, "learning_rate": 9.392905119408355e-06, "loss": 0.303, "step": 3365 }, { "epoch": 0.1669725680837343, "grad_norm": 4.2052154541015625, "learning_rate": 9.392529155855575e-06, "loss": 0.3438, "step": 3366 }, { "epoch": 0.16702217371893446, "grad_norm": 18.727136611938477, "learning_rate": 9.392153083453932e-06, "loss": 0.5542, "step": 3367 }, { "epoch": 0.16707177935413464, "grad_norm": 8.279699325561523, "learning_rate": 9.391776902212753e-06, "loss": 0.3368, "step": 3368 }, { "epoch": 0.1671213849893348, "grad_norm": 4.365902423858643, "learning_rate": 9.391400612141353e-06, "loss": 0.3294, "step": 3369 }, { "epoch": 0.16717099062453494, "grad_norm": 8.74957275390625, "learning_rate": 9.39102421324906e-06, "loss": 0.3323, "step": 3370 }, { "epoch": 0.1672205962597351, "grad_norm": 9.629015922546387, "learning_rate": 9.390647705545199e-06, "loss": 0.3586, "step": 3371 }, { "epoch": 0.16727020189493527, "grad_norm": 12.894376754760742, "learning_rate": 9.390271089039105e-06, "loss": 0.4891, "step": 3372 }, { "epoch": 0.16731980753013542, "grad_norm": 8.809718132019043, "learning_rate": 9.389894363740105e-06, "loss": 0.3674, "step": 3373 }, { "epoch": 0.16736941316533557, "grad_norm": 7.086987495422363, "learning_rate": 9.389517529657537e-06, "loss": 0.3665, "step": 3374 }, { "epoch": 0.16741901880053575, "grad_norm": 4.4856038093566895, "learning_rate": 9.38914058680074e-06, "loss": 0.3298, "step": 3375 }, { "epoch": 0.1674686244357359, "grad_norm": 4.929076671600342, "learning_rate": 9.388763535179053e-06, "loss": 0.3315, "step": 3376 }, { "epoch": 0.16751823007093605, "grad_norm": 11.392423629760742, "learning_rate": 9.388386374801821e-06, "loss": 0.2703, "step": 3377 }, { "epoch": 0.1675678357061362, "grad_norm": 5.224465847015381, "learning_rate": 9.388009105678389e-06, "loss": 0.2545, "step": 3378 }, { "epoch": 0.16761744134133638, "grad_norm": 6.01020622253418, "learning_rate": 9.387631727818108e-06, "loss": 0.325, "step": 3379 }, { "epoch": 0.16766704697653653, "grad_norm": 12.029732704162598, "learning_rate": 9.387254241230326e-06, "loss": 0.4164, "step": 3380 }, { "epoch": 0.16771665261173668, "grad_norm": 5.709690570831299, "learning_rate": 9.3868766459244e-06, "loss": 0.3579, "step": 3381 }, { "epoch": 0.16776625824693686, "grad_norm": 6.2432966232299805, "learning_rate": 9.386498941909687e-06, "loss": 0.2843, "step": 3382 }, { "epoch": 0.167815863882137, "grad_norm": 4.840115070343018, "learning_rate": 9.386121129195546e-06, "loss": 0.3458, "step": 3383 }, { "epoch": 0.16786546951733716, "grad_norm": 5.810461044311523, "learning_rate": 9.385743207791337e-06, "loss": 0.3226, "step": 3384 }, { "epoch": 0.16791507515253734, "grad_norm": 5.761040210723877, "learning_rate": 9.38536517770643e-06, "loss": 0.2803, "step": 3385 }, { "epoch": 0.1679646807877375, "grad_norm": 9.60328483581543, "learning_rate": 9.38498703895019e-06, "loss": 0.2827, "step": 3386 }, { "epoch": 0.16801428642293764, "grad_norm": 6.701169490814209, "learning_rate": 9.384608791531988e-06, "loss": 0.4435, "step": 3387 }, { "epoch": 0.1680638920581378, "grad_norm": 6.794419288635254, "learning_rate": 9.384230435461194e-06, "loss": 0.2991, "step": 3388 }, { "epoch": 0.16811349769333797, "grad_norm": 4.962478160858154, "learning_rate": 9.383851970747189e-06, "loss": 0.2591, "step": 3389 }, { "epoch": 0.16816310332853812, "grad_norm": 5.652781009674072, "learning_rate": 9.38347339739935e-06, "loss": 0.3055, "step": 3390 }, { "epoch": 0.16821270896373827, "grad_norm": 4.795217990875244, "learning_rate": 9.383094715427055e-06, "loss": 0.3121, "step": 3391 }, { "epoch": 0.16826231459893845, "grad_norm": 9.304120063781738, "learning_rate": 9.382715924839691e-06, "loss": 0.3488, "step": 3392 }, { "epoch": 0.1683119202341386, "grad_norm": 7.64414119720459, "learning_rate": 9.382337025646644e-06, "loss": 0.3125, "step": 3393 }, { "epoch": 0.16836152586933875, "grad_norm": 9.18398380279541, "learning_rate": 9.381958017857304e-06, "loss": 0.3238, "step": 3394 }, { "epoch": 0.1684111315045389, "grad_norm": 8.412978172302246, "learning_rate": 9.38157890148106e-06, "loss": 0.3628, "step": 3395 }, { "epoch": 0.16846073713973908, "grad_norm": 4.567814826965332, "learning_rate": 9.381199676527312e-06, "loss": 0.3238, "step": 3396 }, { "epoch": 0.16851034277493923, "grad_norm": 8.85196304321289, "learning_rate": 9.380820343005453e-06, "loss": 0.209, "step": 3397 }, { "epoch": 0.16855994841013938, "grad_norm": 10.522849082946777, "learning_rate": 9.380440900924883e-06, "loss": 0.3481, "step": 3398 }, { "epoch": 0.16860955404533956, "grad_norm": 15.355013847351074, "learning_rate": 9.380061350295007e-06, "loss": 0.4093, "step": 3399 }, { "epoch": 0.1686591596805397, "grad_norm": 14.208699226379395, "learning_rate": 9.379681691125228e-06, "loss": 0.5137, "step": 3400 }, { "epoch": 0.16870876531573986, "grad_norm": 7.755918502807617, "learning_rate": 9.379301923424958e-06, "loss": 0.3213, "step": 3401 }, { "epoch": 0.16875837095094004, "grad_norm": 7.952286243438721, "learning_rate": 9.378922047203602e-06, "loss": 0.3542, "step": 3402 }, { "epoch": 0.1688079765861402, "grad_norm": 5.774015426635742, "learning_rate": 9.37854206247058e-06, "loss": 0.2574, "step": 3403 }, { "epoch": 0.16885758222134034, "grad_norm": 18.754552841186523, "learning_rate": 9.378161969235305e-06, "loss": 0.4204, "step": 3404 }, { "epoch": 0.1689071878565405, "grad_norm": 7.097278594970703, "learning_rate": 9.377781767507194e-06, "loss": 0.3393, "step": 3405 }, { "epoch": 0.16895679349174067, "grad_norm": 25.42083740234375, "learning_rate": 9.377401457295672e-06, "loss": 0.3647, "step": 3406 }, { "epoch": 0.16900639912694082, "grad_norm": 7.870178699493408, "learning_rate": 9.37702103861016e-06, "loss": 0.3816, "step": 3407 }, { "epoch": 0.16905600476214097, "grad_norm": 6.851776123046875, "learning_rate": 9.376640511460088e-06, "loss": 0.3622, "step": 3408 }, { "epoch": 0.16910561039734115, "grad_norm": 6.960081577301025, "learning_rate": 9.376259875854885e-06, "loss": 0.2595, "step": 3409 }, { "epoch": 0.1691552160325413, "grad_norm": 6.251972198486328, "learning_rate": 9.375879131803982e-06, "loss": 0.2808, "step": 3410 }, { "epoch": 0.16920482166774145, "grad_norm": 7.005060195922852, "learning_rate": 9.375498279316814e-06, "loss": 0.3399, "step": 3411 }, { "epoch": 0.1692544273029416, "grad_norm": 12.816554069519043, "learning_rate": 9.37511731840282e-06, "loss": 0.4713, "step": 3412 }, { "epoch": 0.16930403293814178, "grad_norm": 9.58520221710205, "learning_rate": 9.37473624907144e-06, "loss": 0.2837, "step": 3413 }, { "epoch": 0.16935363857334193, "grad_norm": 9.516267776489258, "learning_rate": 9.374355071332115e-06, "loss": 0.2717, "step": 3414 }, { "epoch": 0.16940324420854208, "grad_norm": 10.373558044433594, "learning_rate": 9.373973785194295e-06, "loss": 0.34, "step": 3415 }, { "epoch": 0.16945284984374226, "grad_norm": 9.114309310913086, "learning_rate": 9.373592390667426e-06, "loss": 0.4306, "step": 3416 }, { "epoch": 0.1695024554789424, "grad_norm": 9.88085651397705, "learning_rate": 9.373210887760957e-06, "loss": 0.3291, "step": 3417 }, { "epoch": 0.16955206111414256, "grad_norm": 9.477645874023438, "learning_rate": 9.372829276484345e-06, "loss": 0.4256, "step": 3418 }, { "epoch": 0.16960166674934274, "grad_norm": 4.452052116394043, "learning_rate": 9.372447556847046e-06, "loss": 0.3106, "step": 3419 }, { "epoch": 0.1696512723845429, "grad_norm": 7.3696208000183105, "learning_rate": 9.37206572885852e-06, "loss": 0.4076, "step": 3420 }, { "epoch": 0.16970087801974304, "grad_norm": 7.556446552276611, "learning_rate": 9.371683792528226e-06, "loss": 0.362, "step": 3421 }, { "epoch": 0.1697504836549432, "grad_norm": 12.84568977355957, "learning_rate": 9.37130174786563e-06, "loss": 0.3492, "step": 3422 }, { "epoch": 0.16980008929014337, "grad_norm": 10.130121231079102, "learning_rate": 9.3709195948802e-06, "loss": 0.4, "step": 3423 }, { "epoch": 0.16984969492534352, "grad_norm": 10.446534156799316, "learning_rate": 9.370537333581404e-06, "loss": 0.3653, "step": 3424 }, { "epoch": 0.16989930056054367, "grad_norm": 16.369247436523438, "learning_rate": 9.370154963978717e-06, "loss": 0.4029, "step": 3425 }, { "epoch": 0.16994890619574385, "grad_norm": 5.661682605743408, "learning_rate": 9.369772486081612e-06, "loss": 0.2912, "step": 3426 }, { "epoch": 0.169998511830944, "grad_norm": 7.1092448234558105, "learning_rate": 9.369389899899569e-06, "loss": 0.2429, "step": 3427 }, { "epoch": 0.17004811746614415, "grad_norm": 5.940703392028809, "learning_rate": 9.369007205442068e-06, "loss": 0.3691, "step": 3428 }, { "epoch": 0.1700977231013443, "grad_norm": 6.900142192840576, "learning_rate": 9.368624402718592e-06, "loss": 0.3138, "step": 3429 }, { "epoch": 0.17014732873654448, "grad_norm": 6.359867572784424, "learning_rate": 9.368241491738626e-06, "loss": 0.3257, "step": 3430 }, { "epoch": 0.17019693437174463, "grad_norm": 8.62467098236084, "learning_rate": 9.367858472511662e-06, "loss": 0.375, "step": 3431 }, { "epoch": 0.17024654000694478, "grad_norm": 8.740450859069824, "learning_rate": 9.367475345047189e-06, "loss": 0.279, "step": 3432 }, { "epoch": 0.17029614564214496, "grad_norm": 8.649428367614746, "learning_rate": 9.367092109354698e-06, "loss": 0.3813, "step": 3433 }, { "epoch": 0.1703457512773451, "grad_norm": 8.144044876098633, "learning_rate": 9.366708765443693e-06, "loss": 0.2966, "step": 3434 }, { "epoch": 0.17039535691254526, "grad_norm": 7.973156452178955, "learning_rate": 9.366325313323668e-06, "loss": 0.3698, "step": 3435 }, { "epoch": 0.17044496254774544, "grad_norm": 5.892399787902832, "learning_rate": 9.365941753004126e-06, "loss": 0.3399, "step": 3436 }, { "epoch": 0.1704945681829456, "grad_norm": 14.448698997497559, "learning_rate": 9.365558084494573e-06, "loss": 0.3988, "step": 3437 }, { "epoch": 0.17054417381814574, "grad_norm": 8.313777923583984, "learning_rate": 9.365174307804516e-06, "loss": 0.2706, "step": 3438 }, { "epoch": 0.1705937794533459, "grad_norm": 7.890561580657959, "learning_rate": 9.364790422943464e-06, "loss": 0.2939, "step": 3439 }, { "epoch": 0.17064338508854607, "grad_norm": 3.5513737201690674, "learning_rate": 9.364406429920933e-06, "loss": 0.1548, "step": 3440 }, { "epoch": 0.17069299072374622, "grad_norm": 9.286589622497559, "learning_rate": 9.364022328746435e-06, "loss": 0.3851, "step": 3441 }, { "epoch": 0.17074259635894637, "grad_norm": 5.2744140625, "learning_rate": 9.36363811942949e-06, "loss": 0.3423, "step": 3442 }, { "epoch": 0.17079220199414655, "grad_norm": 7.23007869720459, "learning_rate": 9.36325380197962e-06, "loss": 0.3764, "step": 3443 }, { "epoch": 0.1708418076293467, "grad_norm": 6.563748359680176, "learning_rate": 9.362869376406344e-06, "loss": 0.2171, "step": 3444 }, { "epoch": 0.17089141326454685, "grad_norm": 4.804980754852295, "learning_rate": 9.362484842719193e-06, "loss": 0.2697, "step": 3445 }, { "epoch": 0.170941018899747, "grad_norm": 6.372636795043945, "learning_rate": 9.362100200927695e-06, "loss": 0.3467, "step": 3446 }, { "epoch": 0.17099062453494718, "grad_norm": 5.102002143859863, "learning_rate": 9.36171545104138e-06, "loss": 0.2169, "step": 3447 }, { "epoch": 0.17104023017014733, "grad_norm": 5.427037715911865, "learning_rate": 9.361330593069783e-06, "loss": 0.2702, "step": 3448 }, { "epoch": 0.17108983580534748, "grad_norm": 8.227630615234375, "learning_rate": 9.360945627022441e-06, "loss": 0.4095, "step": 3449 }, { "epoch": 0.17113944144054766, "grad_norm": 5.379608631134033, "learning_rate": 9.360560552908893e-06, "loss": 0.2068, "step": 3450 }, { "epoch": 0.1711890470757478, "grad_norm": 9.673354148864746, "learning_rate": 9.360175370738683e-06, "loss": 0.4414, "step": 3451 }, { "epoch": 0.17123865271094796, "grad_norm": 14.328415870666504, "learning_rate": 9.359790080521357e-06, "loss": 0.5436, "step": 3452 }, { "epoch": 0.1712882583461481, "grad_norm": 3.4365763664245605, "learning_rate": 9.359404682266458e-06, "loss": 0.2168, "step": 3453 }, { "epoch": 0.1713378639813483, "grad_norm": 6.932625770568848, "learning_rate": 9.359019175983538e-06, "loss": 0.373, "step": 3454 }, { "epoch": 0.17138746961654844, "grad_norm": 7.461650848388672, "learning_rate": 9.358633561682153e-06, "loss": 0.2476, "step": 3455 }, { "epoch": 0.1714370752517486, "grad_norm": 13.833237648010254, "learning_rate": 9.358247839371856e-06, "loss": 0.4428, "step": 3456 }, { "epoch": 0.17148668088694877, "grad_norm": 7.2630615234375, "learning_rate": 9.357862009062206e-06, "loss": 0.3541, "step": 3457 }, { "epoch": 0.17153628652214892, "grad_norm": 11.078597068786621, "learning_rate": 9.357476070762765e-06, "loss": 0.4348, "step": 3458 }, { "epoch": 0.17158589215734907, "grad_norm": 4.777954578399658, "learning_rate": 9.357090024483096e-06, "loss": 0.2532, "step": 3459 }, { "epoch": 0.17163549779254925, "grad_norm": 4.957030773162842, "learning_rate": 9.356703870232764e-06, "loss": 0.3366, "step": 3460 }, { "epoch": 0.1716851034277494, "grad_norm": 11.747660636901855, "learning_rate": 9.35631760802134e-06, "loss": 0.2884, "step": 3461 }, { "epoch": 0.17173470906294955, "grad_norm": 7.114878177642822, "learning_rate": 9.355931237858394e-06, "loss": 0.3373, "step": 3462 }, { "epoch": 0.1717843146981497, "grad_norm": 8.091163635253906, "learning_rate": 9.355544759753503e-06, "loss": 0.421, "step": 3463 }, { "epoch": 0.17183392033334988, "grad_norm": 5.878291130065918, "learning_rate": 9.355158173716242e-06, "loss": 0.3464, "step": 3464 }, { "epoch": 0.17188352596855003, "grad_norm": 7.4121551513671875, "learning_rate": 9.354771479756191e-06, "loss": 0.3695, "step": 3465 }, { "epoch": 0.17193313160375018, "grad_norm": 6.292684555053711, "learning_rate": 9.354384677882934e-06, "loss": 0.3831, "step": 3466 }, { "epoch": 0.17198273723895036, "grad_norm": 7.789008617401123, "learning_rate": 9.353997768106054e-06, "loss": 0.3474, "step": 3467 }, { "epoch": 0.1720323428741505, "grad_norm": 9.674654006958008, "learning_rate": 9.35361075043514e-06, "loss": 0.4095, "step": 3468 }, { "epoch": 0.17208194850935066, "grad_norm": 5.2903828620910645, "learning_rate": 9.353223624879783e-06, "loss": 0.3642, "step": 3469 }, { "epoch": 0.1721315541445508, "grad_norm": 5.468954563140869, "learning_rate": 9.352836391449576e-06, "loss": 0.3003, "step": 3470 }, { "epoch": 0.172181159779751, "grad_norm": 4.932878017425537, "learning_rate": 9.352449050154113e-06, "loss": 0.3413, "step": 3471 }, { "epoch": 0.17223076541495114, "grad_norm": 9.118770599365234, "learning_rate": 9.352061601002993e-06, "loss": 0.3522, "step": 3472 }, { "epoch": 0.1722803710501513, "grad_norm": 6.982419013977051, "learning_rate": 9.35167404400582e-06, "loss": 0.2837, "step": 3473 }, { "epoch": 0.17232997668535147, "grad_norm": 4.81496000289917, "learning_rate": 9.351286379172193e-06, "loss": 0.3952, "step": 3474 }, { "epoch": 0.17237958232055162, "grad_norm": 4.9778947830200195, "learning_rate": 9.350898606511723e-06, "loss": 0.3453, "step": 3475 }, { "epoch": 0.17242918795575177, "grad_norm": 4.999386787414551, "learning_rate": 9.350510726034017e-06, "loss": 0.3663, "step": 3476 }, { "epoch": 0.17247879359095195, "grad_norm": 4.971452713012695, "learning_rate": 9.350122737748688e-06, "loss": 0.3581, "step": 3477 }, { "epoch": 0.1725283992261521, "grad_norm": 8.061105728149414, "learning_rate": 9.349734641665348e-06, "loss": 0.3012, "step": 3478 }, { "epoch": 0.17257800486135225, "grad_norm": 9.074999809265137, "learning_rate": 9.349346437793617e-06, "loss": 0.4211, "step": 3479 }, { "epoch": 0.1726276104965524, "grad_norm": 6.879262924194336, "learning_rate": 9.348958126143113e-06, "loss": 0.2636, "step": 3480 }, { "epoch": 0.17267721613175258, "grad_norm": 6.755545616149902, "learning_rate": 9.34856970672346e-06, "loss": 0.3526, "step": 3481 }, { "epoch": 0.17272682176695273, "grad_norm": 4.364633083343506, "learning_rate": 9.348181179544284e-06, "loss": 0.2596, "step": 3482 }, { "epoch": 0.17277642740215288, "grad_norm": 8.038683891296387, "learning_rate": 9.347792544615209e-06, "loss": 0.4539, "step": 3483 }, { "epoch": 0.17282603303735306, "grad_norm": 6.5810441970825195, "learning_rate": 9.347403801945871e-06, "loss": 0.4199, "step": 3484 }, { "epoch": 0.1728756386725532, "grad_norm": 6.375881671905518, "learning_rate": 9.347014951545897e-06, "loss": 0.3307, "step": 3485 }, { "epoch": 0.17292524430775336, "grad_norm": 5.950263977050781, "learning_rate": 9.34662599342493e-06, "loss": 0.2988, "step": 3486 }, { "epoch": 0.1729748499429535, "grad_norm": 6.7534661293029785, "learning_rate": 9.3462369275926e-06, "loss": 0.3659, "step": 3487 }, { "epoch": 0.17302445557815369, "grad_norm": 6.7624711990356445, "learning_rate": 9.345847754058557e-06, "loss": 0.1883, "step": 3488 }, { "epoch": 0.17307406121335384, "grad_norm": 8.448397636413574, "learning_rate": 9.345458472832438e-06, "loss": 0.3704, "step": 3489 }, { "epoch": 0.173123666848554, "grad_norm": 8.825891494750977, "learning_rate": 9.345069083923894e-06, "loss": 0.3208, "step": 3490 }, { "epoch": 0.17317327248375416, "grad_norm": 10.230424880981445, "learning_rate": 9.344679587342571e-06, "loss": 0.3748, "step": 3491 }, { "epoch": 0.17322287811895432, "grad_norm": 9.59598445892334, "learning_rate": 9.344289983098126e-06, "loss": 0.248, "step": 3492 }, { "epoch": 0.17327248375415447, "grad_norm": 9.067891120910645, "learning_rate": 9.343900271200206e-06, "loss": 0.2382, "step": 3493 }, { "epoch": 0.17332208938935464, "grad_norm": 5.089390277862549, "learning_rate": 9.343510451658474e-06, "loss": 0.2356, "step": 3494 }, { "epoch": 0.1733716950245548, "grad_norm": 4.161797523498535, "learning_rate": 9.34312052448259e-06, "loss": 0.2992, "step": 3495 }, { "epoch": 0.17342130065975495, "grad_norm": 6.854724407196045, "learning_rate": 9.342730489682212e-06, "loss": 0.3786, "step": 3496 }, { "epoch": 0.1734709062949551, "grad_norm": 5.759871482849121, "learning_rate": 9.34234034726701e-06, "loss": 0.3366, "step": 3497 }, { "epoch": 0.17352051193015527, "grad_norm": 6.889047145843506, "learning_rate": 9.341950097246648e-06, "loss": 0.2806, "step": 3498 }, { "epoch": 0.17357011756535543, "grad_norm": 4.418380260467529, "learning_rate": 9.3415597396308e-06, "loss": 0.3131, "step": 3499 }, { "epoch": 0.17361972320055558, "grad_norm": 10.636030197143555, "learning_rate": 9.341169274429135e-06, "loss": 0.3769, "step": 3500 }, { "epoch": 0.17366932883575575, "grad_norm": 4.478562831878662, "learning_rate": 9.340778701651335e-06, "loss": 0.2811, "step": 3501 }, { "epoch": 0.1737189344709559, "grad_norm": 6.55566930770874, "learning_rate": 9.340388021307075e-06, "loss": 0.3055, "step": 3502 }, { "epoch": 0.17376854010615606, "grad_norm": 5.244553089141846, "learning_rate": 9.339997233406035e-06, "loss": 0.3256, "step": 3503 }, { "epoch": 0.1738181457413562, "grad_norm": 10.02685260772705, "learning_rate": 9.3396063379579e-06, "loss": 0.2878, "step": 3504 }, { "epoch": 0.17386775137655638, "grad_norm": 6.535678386688232, "learning_rate": 9.339215334972358e-06, "loss": 0.2713, "step": 3505 }, { "epoch": 0.17391735701175653, "grad_norm": 7.7972493171691895, "learning_rate": 9.338824224459098e-06, "loss": 0.1835, "step": 3506 }, { "epoch": 0.17396696264695669, "grad_norm": 16.25882911682129, "learning_rate": 9.33843300642781e-06, "loss": 0.2332, "step": 3507 }, { "epoch": 0.17401656828215686, "grad_norm": 5.846924781799316, "learning_rate": 9.33804168088819e-06, "loss": 0.3127, "step": 3508 }, { "epoch": 0.17406617391735701, "grad_norm": 7.2081522941589355, "learning_rate": 9.337650247849935e-06, "loss": 0.4572, "step": 3509 }, { "epoch": 0.17411577955255716, "grad_norm": 4.908964157104492, "learning_rate": 9.337258707322743e-06, "loss": 0.2413, "step": 3510 }, { "epoch": 0.17416538518775732, "grad_norm": 6.372095584869385, "learning_rate": 9.336867059316321e-06, "loss": 0.3553, "step": 3511 }, { "epoch": 0.1742149908229575, "grad_norm": 5.701564311981201, "learning_rate": 9.336475303840369e-06, "loss": 0.24, "step": 3512 }, { "epoch": 0.17426459645815764, "grad_norm": 5.165797710418701, "learning_rate": 9.3360834409046e-06, "loss": 0.3135, "step": 3513 }, { "epoch": 0.1743142020933578, "grad_norm": 5.923939228057861, "learning_rate": 9.33569147051872e-06, "loss": 0.3321, "step": 3514 }, { "epoch": 0.17436380772855797, "grad_norm": 7.6728386878967285, "learning_rate": 9.335299392692444e-06, "loss": 0.3282, "step": 3515 }, { "epoch": 0.17441341336375812, "grad_norm": 9.994720458984375, "learning_rate": 9.334907207435489e-06, "loss": 0.2313, "step": 3516 }, { "epoch": 0.17446301899895827, "grad_norm": 12.987817764282227, "learning_rate": 9.334514914757572e-06, "loss": 0.3173, "step": 3517 }, { "epoch": 0.17451262463415845, "grad_norm": 11.141470909118652, "learning_rate": 9.334122514668413e-06, "loss": 0.3044, "step": 3518 }, { "epoch": 0.1745622302693586, "grad_norm": 11.300474166870117, "learning_rate": 9.33373000717774e-06, "loss": 0.4695, "step": 3519 }, { "epoch": 0.17461183590455875, "grad_norm": 7.888092994689941, "learning_rate": 9.333337392295277e-06, "loss": 0.2884, "step": 3520 }, { "epoch": 0.1746614415397589, "grad_norm": 3.6498827934265137, "learning_rate": 9.332944670030753e-06, "loss": 0.2578, "step": 3521 }, { "epoch": 0.17471104717495908, "grad_norm": 12.634873390197754, "learning_rate": 9.332551840393898e-06, "loss": 0.4171, "step": 3522 }, { "epoch": 0.17476065281015923, "grad_norm": 8.829276084899902, "learning_rate": 9.33215890339445e-06, "loss": 0.4223, "step": 3523 }, { "epoch": 0.17481025844535938, "grad_norm": 7.131317138671875, "learning_rate": 9.331765859042145e-06, "loss": 0.3791, "step": 3524 }, { "epoch": 0.17485986408055956, "grad_norm": 5.517908573150635, "learning_rate": 9.331372707346723e-06, "loss": 0.3104, "step": 3525 }, { "epoch": 0.1749094697157597, "grad_norm": 5.313640594482422, "learning_rate": 9.330979448317925e-06, "loss": 0.209, "step": 3526 }, { "epoch": 0.17495907535095986, "grad_norm": 9.616602897644043, "learning_rate": 9.330586081965498e-06, "loss": 0.3537, "step": 3527 }, { "epoch": 0.17500868098616001, "grad_norm": 6.0321149826049805, "learning_rate": 9.330192608299188e-06, "loss": 0.2604, "step": 3528 }, { "epoch": 0.1750582866213602, "grad_norm": 6.29712438583374, "learning_rate": 9.329799027328748e-06, "loss": 0.3166, "step": 3529 }, { "epoch": 0.17510789225656034, "grad_norm": 5.0875678062438965, "learning_rate": 9.329405339063929e-06, "loss": 0.3527, "step": 3530 }, { "epoch": 0.1751574978917605, "grad_norm": 6.67483377456665, "learning_rate": 9.329011543514486e-06, "loss": 0.2898, "step": 3531 }, { "epoch": 0.17520710352696067, "grad_norm": 6.485953330993652, "learning_rate": 9.328617640690179e-06, "loss": 0.3505, "step": 3532 }, { "epoch": 0.17525670916216082, "grad_norm": 6.921205997467041, "learning_rate": 9.32822363060077e-06, "loss": 0.3253, "step": 3533 }, { "epoch": 0.17530631479736097, "grad_norm": 5.981271743774414, "learning_rate": 9.32782951325602e-06, "loss": 0.288, "step": 3534 }, { "epoch": 0.17535592043256115, "grad_norm": 8.271559715270996, "learning_rate": 9.327435288665698e-06, "loss": 0.3671, "step": 3535 }, { "epoch": 0.1754055260677613, "grad_norm": 5.326413631439209, "learning_rate": 9.327040956839573e-06, "loss": 0.3003, "step": 3536 }, { "epoch": 0.17545513170296145, "grad_norm": 5.609182834625244, "learning_rate": 9.326646517787414e-06, "loss": 0.3625, "step": 3537 }, { "epoch": 0.1755047373381616, "grad_norm": 5.955725193023682, "learning_rate": 9.326251971518998e-06, "loss": 0.3094, "step": 3538 }, { "epoch": 0.17555434297336178, "grad_norm": 6.90479040145874, "learning_rate": 9.325857318044101e-06, "loss": 0.3254, "step": 3539 }, { "epoch": 0.17560394860856193, "grad_norm": 6.408212184906006, "learning_rate": 9.325462557372503e-06, "loss": 0.2951, "step": 3540 }, { "epoch": 0.17565355424376208, "grad_norm": 9.650724411010742, "learning_rate": 9.325067689513985e-06, "loss": 0.5087, "step": 3541 }, { "epoch": 0.17570315987896226, "grad_norm": 13.922996520996094, "learning_rate": 9.324672714478335e-06, "loss": 0.3491, "step": 3542 }, { "epoch": 0.1757527655141624, "grad_norm": 7.720235347747803, "learning_rate": 9.324277632275338e-06, "loss": 0.405, "step": 3543 }, { "epoch": 0.17580237114936256, "grad_norm": 5.986389636993408, "learning_rate": 9.323882442914785e-06, "loss": 0.2794, "step": 3544 }, { "epoch": 0.1758519767845627, "grad_norm": 5.4353156089782715, "learning_rate": 9.32348714640647e-06, "loss": 0.3765, "step": 3545 }, { "epoch": 0.1759015824197629, "grad_norm": 13.675477027893066, "learning_rate": 9.323091742760187e-06, "loss": 0.3899, "step": 3546 }, { "epoch": 0.17595118805496304, "grad_norm": 6.909374713897705, "learning_rate": 9.322696231985735e-06, "loss": 0.34, "step": 3547 }, { "epoch": 0.1760007936901632, "grad_norm": 5.917598247528076, "learning_rate": 9.322300614092916e-06, "loss": 0.303, "step": 3548 }, { "epoch": 0.17605039932536337, "grad_norm": 6.229620933532715, "learning_rate": 9.32190488909153e-06, "loss": 0.3816, "step": 3549 }, { "epoch": 0.17610000496056352, "grad_norm": 8.626042366027832, "learning_rate": 9.321509056991389e-06, "loss": 0.3057, "step": 3550 }, { "epoch": 0.17614961059576367, "grad_norm": 6.770140171051025, "learning_rate": 9.321113117802297e-06, "loss": 0.2868, "step": 3551 }, { "epoch": 0.17619921623096385, "grad_norm": 6.955382823944092, "learning_rate": 9.320717071534067e-06, "loss": 0.3559, "step": 3552 }, { "epoch": 0.176248821866164, "grad_norm": 7.042568683624268, "learning_rate": 9.320320918196514e-06, "loss": 0.3803, "step": 3553 }, { "epoch": 0.17629842750136415, "grad_norm": 6.474143981933594, "learning_rate": 9.319924657799455e-06, "loss": 0.3648, "step": 3554 }, { "epoch": 0.1763480331365643, "grad_norm": 7.334106922149658, "learning_rate": 9.319528290352709e-06, "loss": 0.3652, "step": 3555 }, { "epoch": 0.17639763877176448, "grad_norm": 5.886374473571777, "learning_rate": 9.319131815866099e-06, "loss": 0.2972, "step": 3556 }, { "epoch": 0.17644724440696463, "grad_norm": 9.888398170471191, "learning_rate": 9.318735234349446e-06, "loss": 0.3537, "step": 3557 }, { "epoch": 0.17649685004216478, "grad_norm": 16.49654197692871, "learning_rate": 9.318338545812582e-06, "loss": 0.5993, "step": 3558 }, { "epoch": 0.17654645567736496, "grad_norm": 7.170640468597412, "learning_rate": 9.317941750265335e-06, "loss": 0.4031, "step": 3559 }, { "epoch": 0.1765960613125651, "grad_norm": 5.697184085845947, "learning_rate": 9.317544847717538e-06, "loss": 0.3952, "step": 3560 }, { "epoch": 0.17664566694776526, "grad_norm": 9.864437103271484, "learning_rate": 9.317147838179027e-06, "loss": 0.2819, "step": 3561 }, { "epoch": 0.1766952725829654, "grad_norm": 7.297430992126465, "learning_rate": 9.31675072165964e-06, "loss": 0.3601, "step": 3562 }, { "epoch": 0.1767448782181656, "grad_norm": 5.540244102478027, "learning_rate": 9.316353498169217e-06, "loss": 0.3578, "step": 3563 }, { "epoch": 0.17679448385336574, "grad_norm": 8.391559600830078, "learning_rate": 9.3159561677176e-06, "loss": 0.4614, "step": 3564 }, { "epoch": 0.1768440894885659, "grad_norm": 5.506028652191162, "learning_rate": 9.31555873031464e-06, "loss": 0.3227, "step": 3565 }, { "epoch": 0.17689369512376607, "grad_norm": 4.737863540649414, "learning_rate": 9.315161185970182e-06, "loss": 0.253, "step": 3566 }, { "epoch": 0.17694330075896622, "grad_norm": 5.235500335693359, "learning_rate": 9.314763534694076e-06, "loss": 0.325, "step": 3567 }, { "epoch": 0.17699290639416637, "grad_norm": 6.9475812911987305, "learning_rate": 9.31436577649618e-06, "loss": 0.4077, "step": 3568 }, { "epoch": 0.17704251202936652, "grad_norm": 12.690330505371094, "learning_rate": 9.313967911386347e-06, "loss": 0.4472, "step": 3569 }, { "epoch": 0.1770921176645667, "grad_norm": 7.229403972625732, "learning_rate": 9.313569939374438e-06, "loss": 0.3352, "step": 3570 }, { "epoch": 0.17714172329976685, "grad_norm": 8.036088943481445, "learning_rate": 9.313171860470315e-06, "loss": 0.3387, "step": 3571 }, { "epoch": 0.177191328934967, "grad_norm": 9.910188674926758, "learning_rate": 9.312773674683841e-06, "loss": 0.3731, "step": 3572 }, { "epoch": 0.17724093457016718, "grad_norm": 7.37747049331665, "learning_rate": 9.312375382024887e-06, "loss": 0.379, "step": 3573 }, { "epoch": 0.17729054020536733, "grad_norm": 4.052466869354248, "learning_rate": 9.311976982503319e-06, "loss": 0.3214, "step": 3574 }, { "epoch": 0.17734014584056748, "grad_norm": 3.165348529815674, "learning_rate": 9.311578476129011e-06, "loss": 0.286, "step": 3575 }, { "epoch": 0.17738975147576766, "grad_norm": 6.814894676208496, "learning_rate": 9.311179862911836e-06, "loss": 0.3495, "step": 3576 }, { "epoch": 0.1774393571109678, "grad_norm": 5.557689189910889, "learning_rate": 9.310781142861675e-06, "loss": 0.3017, "step": 3577 }, { "epoch": 0.17748896274616796, "grad_norm": 6.890210151672363, "learning_rate": 9.310382315988408e-06, "loss": 0.359, "step": 3578 }, { "epoch": 0.1775385683813681, "grad_norm": 4.857250213623047, "learning_rate": 9.30998338230192e-06, "loss": 0.2696, "step": 3579 }, { "epoch": 0.1775881740165683, "grad_norm": 5.009138107299805, "learning_rate": 9.30958434181209e-06, "loss": 0.2054, "step": 3580 }, { "epoch": 0.17763777965176844, "grad_norm": 9.555342674255371, "learning_rate": 9.309185194528812e-06, "loss": 0.5387, "step": 3581 }, { "epoch": 0.1776873852869686, "grad_norm": 4.916088104248047, "learning_rate": 9.308785940461976e-06, "loss": 0.3439, "step": 3582 }, { "epoch": 0.17773699092216877, "grad_norm": 7.923920154571533, "learning_rate": 9.308386579621475e-06, "loss": 0.3263, "step": 3583 }, { "epoch": 0.17778659655736892, "grad_norm": 16.87380599975586, "learning_rate": 9.307987112017204e-06, "loss": 0.3336, "step": 3584 }, { "epoch": 0.17783620219256907, "grad_norm": 6.500134468078613, "learning_rate": 9.307587537659066e-06, "loss": 0.3929, "step": 3585 }, { "epoch": 0.17788580782776922, "grad_norm": 7.729284763336182, "learning_rate": 9.307187856556961e-06, "loss": 0.235, "step": 3586 }, { "epoch": 0.1779354134629694, "grad_norm": 5.9831929206848145, "learning_rate": 9.30678806872079e-06, "loss": 0.1798, "step": 3587 }, { "epoch": 0.17798501909816955, "grad_norm": 10.668107032775879, "learning_rate": 9.306388174160464e-06, "loss": 0.3216, "step": 3588 }, { "epoch": 0.1780346247333697, "grad_norm": 12.185824394226074, "learning_rate": 9.30598817288589e-06, "loss": 0.3907, "step": 3589 }, { "epoch": 0.17808423036856988, "grad_norm": 4.154964923858643, "learning_rate": 9.305588064906983e-06, "loss": 0.2389, "step": 3590 }, { "epoch": 0.17813383600377003, "grad_norm": 7.561675548553467, "learning_rate": 9.305187850233654e-06, "loss": 0.3647, "step": 3591 }, { "epoch": 0.17818344163897018, "grad_norm": 6.916323184967041, "learning_rate": 9.304787528875824e-06, "loss": 0.2204, "step": 3592 }, { "epoch": 0.17823304727417036, "grad_norm": 18.333526611328125, "learning_rate": 9.304387100843412e-06, "loss": 0.6081, "step": 3593 }, { "epoch": 0.1782826529093705, "grad_norm": 11.357025146484375, "learning_rate": 9.303986566146338e-06, "loss": 0.2321, "step": 3594 }, { "epoch": 0.17833225854457066, "grad_norm": 12.93372631072998, "learning_rate": 9.303585924794533e-06, "loss": 0.4815, "step": 3595 }, { "epoch": 0.1783818641797708, "grad_norm": 13.667856216430664, "learning_rate": 9.303185176797923e-06, "loss": 0.4019, "step": 3596 }, { "epoch": 0.178431469814971, "grad_norm": 23.205472946166992, "learning_rate": 9.302784322166433e-06, "loss": 0.5315, "step": 3597 }, { "epoch": 0.17848107545017114, "grad_norm": 6.364995002746582, "learning_rate": 9.302383360910006e-06, "loss": 0.2867, "step": 3598 }, { "epoch": 0.1785306810853713, "grad_norm": 5.619176864624023, "learning_rate": 9.301982293038569e-06, "loss": 0.2721, "step": 3599 }, { "epoch": 0.17858028672057147, "grad_norm": 9.336888313293457, "learning_rate": 9.301581118562069e-06, "loss": 0.3941, "step": 3600 }, { "epoch": 0.17862989235577162, "grad_norm": 10.249963760375977, "learning_rate": 9.30117983749044e-06, "loss": 0.3766, "step": 3601 }, { "epoch": 0.17867949799097177, "grad_norm": 11.81485652923584, "learning_rate": 9.30077844983363e-06, "loss": 0.5172, "step": 3602 }, { "epoch": 0.17872910362617192, "grad_norm": 5.445967674255371, "learning_rate": 9.300376955601584e-06, "loss": 0.2944, "step": 3603 }, { "epoch": 0.1787787092613721, "grad_norm": 11.249866485595703, "learning_rate": 9.299975354804252e-06, "loss": 0.3155, "step": 3604 }, { "epoch": 0.17882831489657225, "grad_norm": 6.404260635375977, "learning_rate": 9.299573647451586e-06, "loss": 0.413, "step": 3605 }, { "epoch": 0.1788779205317724, "grad_norm": 5.833316802978516, "learning_rate": 9.299171833553539e-06, "loss": 0.352, "step": 3606 }, { "epoch": 0.17892752616697258, "grad_norm": 5.6470842361450195, "learning_rate": 9.298769913120072e-06, "loss": 0.3911, "step": 3607 }, { "epoch": 0.17897713180217273, "grad_norm": 3.7026634216308594, "learning_rate": 9.29836788616114e-06, "loss": 0.3265, "step": 3608 }, { "epoch": 0.17902673743737288, "grad_norm": 5.2589569091796875, "learning_rate": 9.297965752686707e-06, "loss": 0.333, "step": 3609 }, { "epoch": 0.17907634307257306, "grad_norm": 12.806267738342285, "learning_rate": 9.29756351270674e-06, "loss": 0.4204, "step": 3610 }, { "epoch": 0.1791259487077732, "grad_norm": 8.893047332763672, "learning_rate": 9.297161166231203e-06, "loss": 0.4287, "step": 3611 }, { "epoch": 0.17917555434297336, "grad_norm": 5.132418155670166, "learning_rate": 9.29675871327007e-06, "loss": 0.3797, "step": 3612 }, { "epoch": 0.1792251599781735, "grad_norm": 10.41483211517334, "learning_rate": 9.296356153833312e-06, "loss": 0.3591, "step": 3613 }, { "epoch": 0.1792747656133737, "grad_norm": 4.922377586364746, "learning_rate": 9.295953487930907e-06, "loss": 0.2721, "step": 3614 }, { "epoch": 0.17932437124857384, "grad_norm": 6.078029155731201, "learning_rate": 9.295550715572829e-06, "loss": 0.2477, "step": 3615 }, { "epoch": 0.179373976883774, "grad_norm": 8.93543529510498, "learning_rate": 9.29514783676906e-06, "loss": 0.3997, "step": 3616 }, { "epoch": 0.17942358251897417, "grad_norm": 6.490139007568359, "learning_rate": 9.294744851529587e-06, "loss": 0.3516, "step": 3617 }, { "epoch": 0.17947318815417432, "grad_norm": 15.52550983428955, "learning_rate": 9.294341759864394e-06, "loss": 0.4839, "step": 3618 }, { "epoch": 0.17952279378937447, "grad_norm": 7.505877494812012, "learning_rate": 9.293938561783469e-06, "loss": 0.2276, "step": 3619 }, { "epoch": 0.17957239942457462, "grad_norm": 5.6484479904174805, "learning_rate": 9.293535257296804e-06, "loss": 0.2828, "step": 3620 }, { "epoch": 0.1796220050597748, "grad_norm": 8.63835620880127, "learning_rate": 9.293131846414394e-06, "loss": 0.3563, "step": 3621 }, { "epoch": 0.17967161069497495, "grad_norm": 6.798695087432861, "learning_rate": 9.292728329146232e-06, "loss": 0.3148, "step": 3622 }, { "epoch": 0.1797212163301751, "grad_norm": 9.06438159942627, "learning_rate": 9.292324705502324e-06, "loss": 0.2794, "step": 3623 }, { "epoch": 0.17977082196537528, "grad_norm": 12.4310941696167, "learning_rate": 9.291920975492666e-06, "loss": 0.4499, "step": 3624 }, { "epoch": 0.17982042760057543, "grad_norm": 8.849579811096191, "learning_rate": 9.291517139127264e-06, "loss": 0.2397, "step": 3625 }, { "epoch": 0.17987003323577558, "grad_norm": 4.971452713012695, "learning_rate": 9.291113196416128e-06, "loss": 0.3592, "step": 3626 }, { "epoch": 0.17991963887097576, "grad_norm": 6.0465617179870605, "learning_rate": 9.290709147369266e-06, "loss": 0.2845, "step": 3627 }, { "epoch": 0.1799692445061759, "grad_norm": 12.830348014831543, "learning_rate": 9.290304991996687e-06, "loss": 0.2959, "step": 3628 }, { "epoch": 0.18001885014137606, "grad_norm": 4.748063087463379, "learning_rate": 9.289900730308415e-06, "loss": 0.2996, "step": 3629 }, { "epoch": 0.1800684557765762, "grad_norm": 7.308681488037109, "learning_rate": 9.28949636231446e-06, "loss": 0.3875, "step": 3630 }, { "epoch": 0.1801180614117764, "grad_norm": 6.619848728179932, "learning_rate": 9.289091888024842e-06, "loss": 0.3593, "step": 3631 }, { "epoch": 0.18016766704697654, "grad_norm": 9.984259605407715, "learning_rate": 9.28868730744959e-06, "loss": 0.3725, "step": 3632 }, { "epoch": 0.1802172726821767, "grad_norm": 6.252998352050781, "learning_rate": 9.288282620598726e-06, "loss": 0.3362, "step": 3633 }, { "epoch": 0.18026687831737687, "grad_norm": 6.700893878936768, "learning_rate": 9.287877827482277e-06, "loss": 0.3769, "step": 3634 }, { "epoch": 0.18031648395257702, "grad_norm": 5.095185279846191, "learning_rate": 9.287472928110277e-06, "loss": 0.3255, "step": 3635 }, { "epoch": 0.18036608958777717, "grad_norm": 8.647311210632324, "learning_rate": 9.287067922492759e-06, "loss": 0.3391, "step": 3636 }, { "epoch": 0.18041569522297732, "grad_norm": 6.67681884765625, "learning_rate": 9.28666281063976e-06, "loss": 0.3537, "step": 3637 }, { "epoch": 0.1804653008581775, "grad_norm": 8.421165466308594, "learning_rate": 9.286257592561314e-06, "loss": 0.3265, "step": 3638 }, { "epoch": 0.18051490649337765, "grad_norm": 4.701231956481934, "learning_rate": 9.285852268267468e-06, "loss": 0.3452, "step": 3639 }, { "epoch": 0.1805645121285778, "grad_norm": 6.8073811531066895, "learning_rate": 9.285446837768264e-06, "loss": 0.2848, "step": 3640 }, { "epoch": 0.18061411776377798, "grad_norm": 5.883018493652344, "learning_rate": 9.28504130107375e-06, "loss": 0.2731, "step": 3641 }, { "epoch": 0.18066372339897813, "grad_norm": 5.936650276184082, "learning_rate": 9.284635658193972e-06, "loss": 0.3735, "step": 3642 }, { "epoch": 0.18071332903417828, "grad_norm": 16.90523910522461, "learning_rate": 9.284229909138986e-06, "loss": 0.3287, "step": 3643 }, { "epoch": 0.18076293466937843, "grad_norm": 8.490625381469727, "learning_rate": 9.283824053918844e-06, "loss": 0.2859, "step": 3644 }, { "epoch": 0.1808125403045786, "grad_norm": 13.931760787963867, "learning_rate": 9.283418092543604e-06, "loss": 0.4896, "step": 3645 }, { "epoch": 0.18086214593977876, "grad_norm": 8.058212280273438, "learning_rate": 9.283012025023325e-06, "loss": 0.3285, "step": 3646 }, { "epoch": 0.1809117515749789, "grad_norm": 8.22346305847168, "learning_rate": 9.282605851368072e-06, "loss": 0.3714, "step": 3647 }, { "epoch": 0.18096135721017909, "grad_norm": 4.54291296005249, "learning_rate": 9.282199571587909e-06, "loss": 0.3536, "step": 3648 }, { "epoch": 0.18101096284537924, "grad_norm": 10.625056266784668, "learning_rate": 9.281793185692904e-06, "loss": 0.3473, "step": 3649 }, { "epoch": 0.1810605684805794, "grad_norm": 4.80381965637207, "learning_rate": 9.281386693693127e-06, "loss": 0.2863, "step": 3650 }, { "epoch": 0.18111017411577957, "grad_norm": 12.351819038391113, "learning_rate": 9.280980095598649e-06, "loss": 0.4285, "step": 3651 }, { "epoch": 0.18115977975097972, "grad_norm": 4.363864421844482, "learning_rate": 9.28057339141955e-06, "loss": 0.2294, "step": 3652 }, { "epoch": 0.18120938538617987, "grad_norm": 6.104877948760986, "learning_rate": 9.280166581165904e-06, "loss": 0.4134, "step": 3653 }, { "epoch": 0.18125899102138002, "grad_norm": 9.74032974243164, "learning_rate": 9.279759664847795e-06, "loss": 0.3711, "step": 3654 }, { "epoch": 0.1813085966565802, "grad_norm": 13.37436580657959, "learning_rate": 9.279352642475305e-06, "loss": 0.3584, "step": 3655 }, { "epoch": 0.18135820229178035, "grad_norm": 6.633988380432129, "learning_rate": 9.278945514058522e-06, "loss": 0.3907, "step": 3656 }, { "epoch": 0.1814078079269805, "grad_norm": 10.739034652709961, "learning_rate": 9.278538279607533e-06, "loss": 0.3628, "step": 3657 }, { "epoch": 0.18145741356218067, "grad_norm": 8.23672866821289, "learning_rate": 9.278130939132431e-06, "loss": 0.43, "step": 3658 }, { "epoch": 0.18150701919738083, "grad_norm": 6.218230247497559, "learning_rate": 9.277723492643307e-06, "loss": 0.2852, "step": 3659 }, { "epoch": 0.18155662483258098, "grad_norm": 5.508529186248779, "learning_rate": 9.277315940150262e-06, "loss": 0.2871, "step": 3660 }, { "epoch": 0.18160623046778113, "grad_norm": 11.597356796264648, "learning_rate": 9.276908281663391e-06, "loss": 0.3856, "step": 3661 }, { "epoch": 0.1816558361029813, "grad_norm": 6.746452808380127, "learning_rate": 9.276500517192798e-06, "loss": 0.3252, "step": 3662 }, { "epoch": 0.18170544173818146, "grad_norm": 10.949308395385742, "learning_rate": 9.27609264674859e-06, "loss": 0.3847, "step": 3663 }, { "epoch": 0.1817550473733816, "grad_norm": 27.41460609436035, "learning_rate": 9.275684670340871e-06, "loss": 0.3311, "step": 3664 }, { "epoch": 0.18180465300858178, "grad_norm": 7.502086639404297, "learning_rate": 9.275276587979752e-06, "loss": 0.4357, "step": 3665 }, { "epoch": 0.18185425864378194, "grad_norm": 3.8446505069732666, "learning_rate": 9.274868399675345e-06, "loss": 0.2779, "step": 3666 }, { "epoch": 0.18190386427898209, "grad_norm": 4.811502456665039, "learning_rate": 9.274460105437765e-06, "loss": 0.2095, "step": 3667 }, { "epoch": 0.18195346991418226, "grad_norm": 10.377081871032715, "learning_rate": 9.27405170527713e-06, "loss": 0.3074, "step": 3668 }, { "epoch": 0.18200307554938241, "grad_norm": 13.123211860656738, "learning_rate": 9.273643199203562e-06, "loss": 0.3668, "step": 3669 }, { "epoch": 0.18205268118458257, "grad_norm": 8.465155601501465, "learning_rate": 9.27323458722718e-06, "loss": 0.2922, "step": 3670 }, { "epoch": 0.18210228681978272, "grad_norm": 6.650754451751709, "learning_rate": 9.272825869358114e-06, "loss": 0.3108, "step": 3671 }, { "epoch": 0.1821518924549829, "grad_norm": 9.023988723754883, "learning_rate": 9.272417045606488e-06, "loss": 0.3327, "step": 3672 }, { "epoch": 0.18220149809018304, "grad_norm": 7.959477424621582, "learning_rate": 9.272008115982436e-06, "loss": 0.3715, "step": 3673 }, { "epoch": 0.1822511037253832, "grad_norm": 4.86047887802124, "learning_rate": 9.27159908049609e-06, "loss": 0.2799, "step": 3674 }, { "epoch": 0.18230070936058337, "grad_norm": 4.593847751617432, "learning_rate": 9.271189939157587e-06, "loss": 0.2919, "step": 3675 }, { "epoch": 0.18235031499578352, "grad_norm": 6.342029571533203, "learning_rate": 9.270780691977066e-06, "loss": 0.3183, "step": 3676 }, { "epoch": 0.18239992063098368, "grad_norm": 6.235935211181641, "learning_rate": 9.270371338964665e-06, "loss": 0.2936, "step": 3677 }, { "epoch": 0.18244952626618383, "grad_norm": 6.777374267578125, "learning_rate": 9.269961880130534e-06, "loss": 0.3815, "step": 3678 }, { "epoch": 0.182499131901384, "grad_norm": 7.4232940673828125, "learning_rate": 9.269552315484813e-06, "loss": 0.2679, "step": 3679 }, { "epoch": 0.18254873753658415, "grad_norm": 9.58059310913086, "learning_rate": 9.269142645037657e-06, "loss": 0.2453, "step": 3680 }, { "epoch": 0.1825983431717843, "grad_norm": 5.365817070007324, "learning_rate": 9.268732868799213e-06, "loss": 0.2691, "step": 3681 }, { "epoch": 0.18264794880698448, "grad_norm": 7.770260334014893, "learning_rate": 9.268322986779639e-06, "loss": 0.3233, "step": 3682 }, { "epoch": 0.18269755444218463, "grad_norm": 5.875345706939697, "learning_rate": 9.26791299898909e-06, "loss": 0.3194, "step": 3683 }, { "epoch": 0.18274716007738478, "grad_norm": 7.209329605102539, "learning_rate": 9.267502905437727e-06, "loss": 0.4017, "step": 3684 }, { "epoch": 0.18279676571258496, "grad_norm": 13.899514198303223, "learning_rate": 9.26709270613571e-06, "loss": 0.3741, "step": 3685 }, { "epoch": 0.1828463713477851, "grad_norm": 7.575051307678223, "learning_rate": 9.266682401093206e-06, "loss": 0.4143, "step": 3686 }, { "epoch": 0.18289597698298526, "grad_norm": 4.346584320068359, "learning_rate": 9.266271990320382e-06, "loss": 0.2408, "step": 3687 }, { "epoch": 0.18294558261818542, "grad_norm": 11.2249755859375, "learning_rate": 9.265861473827409e-06, "loss": 0.4501, "step": 3688 }, { "epoch": 0.1829951882533856, "grad_norm": 8.871237754821777, "learning_rate": 9.265450851624458e-06, "loss": 0.3674, "step": 3689 }, { "epoch": 0.18304479388858574, "grad_norm": 5.67971134185791, "learning_rate": 9.265040123721706e-06, "loss": 0.2872, "step": 3690 }, { "epoch": 0.1830943995237859, "grad_norm": 6.848139762878418, "learning_rate": 9.264629290129332e-06, "loss": 0.3151, "step": 3691 }, { "epoch": 0.18314400515898607, "grad_norm": 7.023892402648926, "learning_rate": 9.264218350857514e-06, "loss": 0.3331, "step": 3692 }, { "epoch": 0.18319361079418622, "grad_norm": 6.092163562774658, "learning_rate": 9.263807305916435e-06, "loss": 0.3175, "step": 3693 }, { "epoch": 0.18324321642938637, "grad_norm": 6.847169399261475, "learning_rate": 9.263396155316284e-06, "loss": 0.3767, "step": 3694 }, { "epoch": 0.18329282206458652, "grad_norm": 7.374477386474609, "learning_rate": 9.262984899067247e-06, "loss": 0.269, "step": 3695 }, { "epoch": 0.1833424276997867, "grad_norm": 5.8704352378845215, "learning_rate": 9.262573537179516e-06, "loss": 0.3255, "step": 3696 }, { "epoch": 0.18339203333498685, "grad_norm": 4.05799674987793, "learning_rate": 9.262162069663285e-06, "loss": 0.2667, "step": 3697 }, { "epoch": 0.183441638970187, "grad_norm": 6.24201774597168, "learning_rate": 9.261750496528748e-06, "loss": 0.3162, "step": 3698 }, { "epoch": 0.18349124460538718, "grad_norm": 5.173689365386963, "learning_rate": 9.261338817786107e-06, "loss": 0.3412, "step": 3699 }, { "epoch": 0.18354085024058733, "grad_norm": 7.267763614654541, "learning_rate": 9.260927033445564e-06, "loss": 0.248, "step": 3700 }, { "epoch": 0.18359045587578748, "grad_norm": 9.036163330078125, "learning_rate": 9.260515143517319e-06, "loss": 0.3009, "step": 3701 }, { "epoch": 0.18364006151098763, "grad_norm": 10.713967323303223, "learning_rate": 9.260103148011584e-06, "loss": 0.305, "step": 3702 }, { "epoch": 0.1836896671461878, "grad_norm": 6.354364395141602, "learning_rate": 9.259691046938564e-06, "loss": 0.2544, "step": 3703 }, { "epoch": 0.18373927278138796, "grad_norm": 13.191632270812988, "learning_rate": 9.259278840308474e-06, "loss": 0.4925, "step": 3704 }, { "epoch": 0.18378887841658811, "grad_norm": 5.274548530578613, "learning_rate": 9.258866528131527e-06, "loss": 0.2453, "step": 3705 }, { "epoch": 0.1838384840517883, "grad_norm": 9.864952087402344, "learning_rate": 9.258454110417943e-06, "loss": 0.4503, "step": 3706 }, { "epoch": 0.18388808968698844, "grad_norm": 7.0949387550354, "learning_rate": 9.258041587177937e-06, "loss": 0.3462, "step": 3707 }, { "epoch": 0.1839376953221886, "grad_norm": 4.771572113037109, "learning_rate": 9.257628958421736e-06, "loss": 0.2677, "step": 3708 }, { "epoch": 0.18398730095738877, "grad_norm": 7.245157718658447, "learning_rate": 9.257216224159564e-06, "loss": 0.3944, "step": 3709 }, { "epoch": 0.18403690659258892, "grad_norm": 9.479260444641113, "learning_rate": 9.256803384401647e-06, "loss": 0.3203, "step": 3710 }, { "epoch": 0.18408651222778907, "grad_norm": 13.076632499694824, "learning_rate": 9.256390439158215e-06, "loss": 0.2967, "step": 3711 }, { "epoch": 0.18413611786298922, "grad_norm": 6.75695276260376, "learning_rate": 9.255977388439506e-06, "loss": 0.3364, "step": 3712 }, { "epoch": 0.1841857234981894, "grad_norm": 6.7277512550354, "learning_rate": 9.255564232255751e-06, "loss": 0.2951, "step": 3713 }, { "epoch": 0.18423532913338955, "grad_norm": 9.453882217407227, "learning_rate": 9.255150970617188e-06, "loss": 0.375, "step": 3714 }, { "epoch": 0.1842849347685897, "grad_norm": 3.6994967460632324, "learning_rate": 9.254737603534061e-06, "loss": 0.258, "step": 3715 }, { "epoch": 0.18433454040378988, "grad_norm": 7.431496620178223, "learning_rate": 9.25432413101661e-06, "loss": 0.2787, "step": 3716 }, { "epoch": 0.18438414603899003, "grad_norm": 9.423493385314941, "learning_rate": 9.253910553075083e-06, "loss": 0.424, "step": 3717 }, { "epoch": 0.18443375167419018, "grad_norm": 8.961546897888184, "learning_rate": 9.253496869719728e-06, "loss": 0.3727, "step": 3718 }, { "epoch": 0.18448335730939033, "grad_norm": 12.339235305786133, "learning_rate": 9.253083080960798e-06, "loss": 0.3868, "step": 3719 }, { "epoch": 0.1845329629445905, "grad_norm": 7.020535945892334, "learning_rate": 9.252669186808543e-06, "loss": 0.3072, "step": 3720 }, { "epoch": 0.18458256857979066, "grad_norm": 5.783322334289551, "learning_rate": 9.252255187273223e-06, "loss": 0.3403, "step": 3721 }, { "epoch": 0.1846321742149908, "grad_norm": 3.7432825565338135, "learning_rate": 9.251841082365096e-06, "loss": 0.3392, "step": 3722 }, { "epoch": 0.184681779850191, "grad_norm": 5.398527145385742, "learning_rate": 9.251426872094425e-06, "loss": 0.2624, "step": 3723 }, { "epoch": 0.18473138548539114, "grad_norm": 11.189199447631836, "learning_rate": 9.25101255647147e-06, "loss": 0.3277, "step": 3724 }, { "epoch": 0.1847809911205913, "grad_norm": 8.333590507507324, "learning_rate": 9.250598135506503e-06, "loss": 0.3919, "step": 3725 }, { "epoch": 0.18483059675579147, "grad_norm": 5.789470672607422, "learning_rate": 9.250183609209792e-06, "loss": 0.2827, "step": 3726 }, { "epoch": 0.18488020239099162, "grad_norm": 5.375936508178711, "learning_rate": 9.249768977591607e-06, "loss": 0.3331, "step": 3727 }, { "epoch": 0.18492980802619177, "grad_norm": 7.39975118637085, "learning_rate": 9.249354240662226e-06, "loss": 0.3213, "step": 3728 }, { "epoch": 0.18497941366139192, "grad_norm": 9.743929862976074, "learning_rate": 9.248939398431924e-06, "loss": 0.2395, "step": 3729 }, { "epoch": 0.1850290192965921, "grad_norm": 13.242753028869629, "learning_rate": 9.248524450910981e-06, "loss": 0.3771, "step": 3730 }, { "epoch": 0.18507862493179225, "grad_norm": 6.669060230255127, "learning_rate": 9.248109398109681e-06, "loss": 0.3788, "step": 3731 }, { "epoch": 0.1851282305669924, "grad_norm": 12.256590843200684, "learning_rate": 9.247694240038307e-06, "loss": 0.4548, "step": 3732 }, { "epoch": 0.18517783620219258, "grad_norm": 14.265579223632812, "learning_rate": 9.247278976707151e-06, "loss": 0.3883, "step": 3733 }, { "epoch": 0.18522744183739273, "grad_norm": 7.442595958709717, "learning_rate": 9.246863608126498e-06, "loss": 0.2742, "step": 3734 }, { "epoch": 0.18527704747259288, "grad_norm": 7.488824844360352, "learning_rate": 9.246448134306647e-06, "loss": 0.3618, "step": 3735 }, { "epoch": 0.18532665310779303, "grad_norm": 9.933454513549805, "learning_rate": 9.246032555257886e-06, "loss": 0.2776, "step": 3736 }, { "epoch": 0.1853762587429932, "grad_norm": 20.317121505737305, "learning_rate": 9.24561687099052e-06, "loss": 0.5402, "step": 3737 }, { "epoch": 0.18542586437819336, "grad_norm": 6.637823104858398, "learning_rate": 9.245201081514848e-06, "loss": 0.3363, "step": 3738 }, { "epoch": 0.1854754700133935, "grad_norm": 10.446732521057129, "learning_rate": 9.244785186841173e-06, "loss": 0.4009, "step": 3739 }, { "epoch": 0.1855250756485937, "grad_norm": 4.971446514129639, "learning_rate": 9.2443691869798e-06, "loss": 0.2991, "step": 3740 }, { "epoch": 0.18557468128379384, "grad_norm": 5.849692344665527, "learning_rate": 9.243953081941039e-06, "loss": 0.3228, "step": 3741 }, { "epoch": 0.185624286918994, "grad_norm": 6.053155899047852, "learning_rate": 9.2435368717352e-06, "loss": 0.3484, "step": 3742 }, { "epoch": 0.18567389255419417, "grad_norm": 10.21238899230957, "learning_rate": 9.243120556372598e-06, "loss": 0.4032, "step": 3743 }, { "epoch": 0.18572349818939432, "grad_norm": 7.271864414215088, "learning_rate": 9.242704135863549e-06, "loss": 0.2584, "step": 3744 }, { "epoch": 0.18577310382459447, "grad_norm": 5.126954078674316, "learning_rate": 9.242287610218374e-06, "loss": 0.3064, "step": 3745 }, { "epoch": 0.18582270945979462, "grad_norm": 6.188652038574219, "learning_rate": 9.241870979447392e-06, "loss": 0.3036, "step": 3746 }, { "epoch": 0.1858723150949948, "grad_norm": 13.295690536499023, "learning_rate": 9.24145424356093e-06, "loss": 0.415, "step": 3747 }, { "epoch": 0.18592192073019495, "grad_norm": 9.315980911254883, "learning_rate": 9.241037402569311e-06, "loss": 0.3295, "step": 3748 }, { "epoch": 0.1859715263653951, "grad_norm": 5.959822654724121, "learning_rate": 9.240620456482868e-06, "loss": 0.3007, "step": 3749 }, { "epoch": 0.18602113200059528, "grad_norm": 5.4442524909973145, "learning_rate": 9.24020340531193e-06, "loss": 0.2831, "step": 3750 }, { "epoch": 0.18607073763579543, "grad_norm": 12.41697883605957, "learning_rate": 9.239786249066834e-06, "loss": 0.3694, "step": 3751 }, { "epoch": 0.18612034327099558, "grad_norm": 11.018672943115234, "learning_rate": 9.239368987757918e-06, "loss": 0.3571, "step": 3752 }, { "epoch": 0.18616994890619573, "grad_norm": 9.526529312133789, "learning_rate": 9.23895162139552e-06, "loss": 0.2961, "step": 3753 }, { "epoch": 0.1862195545413959, "grad_norm": 13.911309242248535, "learning_rate": 9.238534149989983e-06, "loss": 0.27, "step": 3754 }, { "epoch": 0.18626916017659606, "grad_norm": 7.839229583740234, "learning_rate": 9.238116573551654e-06, "loss": 0.2651, "step": 3755 }, { "epoch": 0.1863187658117962, "grad_norm": 5.1164960861206055, "learning_rate": 9.237698892090878e-06, "loss": 0.3195, "step": 3756 }, { "epoch": 0.1863683714469964, "grad_norm": 5.891760349273682, "learning_rate": 9.237281105618007e-06, "loss": 0.3082, "step": 3757 }, { "epoch": 0.18641797708219654, "grad_norm": 7.284499645233154, "learning_rate": 9.236863214143395e-06, "loss": 0.3477, "step": 3758 }, { "epoch": 0.1864675827173967, "grad_norm": 12.417335510253906, "learning_rate": 9.236445217677393e-06, "loss": 0.4463, "step": 3759 }, { "epoch": 0.18651718835259687, "grad_norm": 18.939680099487305, "learning_rate": 9.236027116230364e-06, "loss": 0.4191, "step": 3760 }, { "epoch": 0.18656679398779702, "grad_norm": 13.820703506469727, "learning_rate": 9.235608909812665e-06, "loss": 0.3203, "step": 3761 }, { "epoch": 0.18661639962299717, "grad_norm": 5.200255393981934, "learning_rate": 9.235190598434664e-06, "loss": 0.281, "step": 3762 }, { "epoch": 0.18666600525819732, "grad_norm": 10.21777629852295, "learning_rate": 9.234772182106722e-06, "loss": 0.2922, "step": 3763 }, { "epoch": 0.1867156108933975, "grad_norm": 8.719725608825684, "learning_rate": 9.23435366083921e-06, "loss": 0.447, "step": 3764 }, { "epoch": 0.18676521652859765, "grad_norm": 10.732746124267578, "learning_rate": 9.233935034642501e-06, "loss": 0.4634, "step": 3765 }, { "epoch": 0.1868148221637978, "grad_norm": 17.875932693481445, "learning_rate": 9.233516303526966e-06, "loss": 0.4495, "step": 3766 }, { "epoch": 0.18686442779899798, "grad_norm": 7.293544292449951, "learning_rate": 9.23309746750298e-06, "loss": 0.2511, "step": 3767 }, { "epoch": 0.18691403343419813, "grad_norm": 9.975712776184082, "learning_rate": 9.232678526580926e-06, "loss": 0.3329, "step": 3768 }, { "epoch": 0.18696363906939828, "grad_norm": 19.972209930419922, "learning_rate": 9.232259480771182e-06, "loss": 0.319, "step": 3769 }, { "epoch": 0.18701324470459843, "grad_norm": 4.53566837310791, "learning_rate": 9.231840330084135e-06, "loss": 0.3666, "step": 3770 }, { "epoch": 0.1870628503397986, "grad_norm": 8.1580171585083, "learning_rate": 9.231421074530168e-06, "loss": 0.3109, "step": 3771 }, { "epoch": 0.18711245597499876, "grad_norm": 11.623466491699219, "learning_rate": 9.231001714119674e-06, "loss": 0.2805, "step": 3772 }, { "epoch": 0.1871620616101989, "grad_norm": 7.410106658935547, "learning_rate": 9.230582248863045e-06, "loss": 0.4394, "step": 3773 }, { "epoch": 0.1872116672453991, "grad_norm": 7.843281269073486, "learning_rate": 9.230162678770672e-06, "loss": 0.3394, "step": 3774 }, { "epoch": 0.18726127288059924, "grad_norm": 5.126232147216797, "learning_rate": 9.229743003852956e-06, "loss": 0.2944, "step": 3775 }, { "epoch": 0.1873108785157994, "grad_norm": 4.5613203048706055, "learning_rate": 9.229323224120294e-06, "loss": 0.283, "step": 3776 }, { "epoch": 0.18736048415099954, "grad_norm": 8.639835357666016, "learning_rate": 9.228903339583088e-06, "loss": 0.3042, "step": 3777 }, { "epoch": 0.18741008978619972, "grad_norm": 7.179988384246826, "learning_rate": 9.228483350251747e-06, "loss": 0.3793, "step": 3778 }, { "epoch": 0.18745969542139987, "grad_norm": 5.204151630401611, "learning_rate": 9.228063256136674e-06, "loss": 0.3164, "step": 3779 }, { "epoch": 0.18750930105660002, "grad_norm": 9.510513305664062, "learning_rate": 9.227643057248284e-06, "loss": 0.2644, "step": 3780 }, { "epoch": 0.1875589066918002, "grad_norm": 9.26729679107666, "learning_rate": 9.227222753596982e-06, "loss": 0.3755, "step": 3781 }, { "epoch": 0.18760851232700035, "grad_norm": 6.226562976837158, "learning_rate": 9.22680234519319e-06, "loss": 0.3305, "step": 3782 }, { "epoch": 0.1876581179622005, "grad_norm": 11.337358474731445, "learning_rate": 9.226381832047327e-06, "loss": 0.3631, "step": 3783 }, { "epoch": 0.18770772359740068, "grad_norm": 17.12700653076172, "learning_rate": 9.225961214169807e-06, "loss": 0.5887, "step": 3784 }, { "epoch": 0.18775732923260083, "grad_norm": 6.5298967361450195, "learning_rate": 9.225540491571057e-06, "loss": 0.2878, "step": 3785 }, { "epoch": 0.18780693486780098, "grad_norm": 6.381612777709961, "learning_rate": 9.225119664261502e-06, "loss": 0.3122, "step": 3786 }, { "epoch": 0.18785654050300113, "grad_norm": 10.52608871459961, "learning_rate": 9.224698732251571e-06, "loss": 0.3146, "step": 3787 }, { "epoch": 0.1879061461382013, "grad_norm": 6.690004825592041, "learning_rate": 9.224277695551694e-06, "loss": 0.2769, "step": 3788 }, { "epoch": 0.18795575177340146, "grad_norm": 7.7869791984558105, "learning_rate": 9.223856554172306e-06, "loss": 0.2746, "step": 3789 }, { "epoch": 0.1880053574086016, "grad_norm": 6.229508876800537, "learning_rate": 9.22343530812384e-06, "loss": 0.3337, "step": 3790 }, { "epoch": 0.1880549630438018, "grad_norm": 7.284118175506592, "learning_rate": 9.223013957416738e-06, "loss": 0.3146, "step": 3791 }, { "epoch": 0.18810456867900194, "grad_norm": 15.672369956970215, "learning_rate": 9.22259250206144e-06, "loss": 0.3932, "step": 3792 }, { "epoch": 0.1881541743142021, "grad_norm": 5.855510234832764, "learning_rate": 9.22217094206839e-06, "loss": 0.2694, "step": 3793 }, { "epoch": 0.18820377994940224, "grad_norm": 4.92496919631958, "learning_rate": 9.221749277448031e-06, "loss": 0.2094, "step": 3794 }, { "epoch": 0.18825338558460242, "grad_norm": 9.192893981933594, "learning_rate": 9.221327508210818e-06, "loss": 0.34, "step": 3795 }, { "epoch": 0.18830299121980257, "grad_norm": 6.183964729309082, "learning_rate": 9.220905634367201e-06, "loss": 0.2186, "step": 3796 }, { "epoch": 0.18835259685500272, "grad_norm": 7.062398910522461, "learning_rate": 9.22048365592763e-06, "loss": 0.2879, "step": 3797 }, { "epoch": 0.1884022024902029, "grad_norm": 14.441728591918945, "learning_rate": 9.220061572902567e-06, "loss": 0.3982, "step": 3798 }, { "epoch": 0.18845180812540305, "grad_norm": 16.507719039916992, "learning_rate": 9.219639385302468e-06, "loss": 0.2734, "step": 3799 }, { "epoch": 0.1885014137606032, "grad_norm": 6.11433219909668, "learning_rate": 9.219217093137798e-06, "loss": 0.2924, "step": 3800 }, { "epoch": 0.18855101939580338, "grad_norm": 9.85827922821045, "learning_rate": 9.218794696419018e-06, "loss": 0.3401, "step": 3801 }, { "epoch": 0.18860062503100353, "grad_norm": 8.924749374389648, "learning_rate": 9.218372195156597e-06, "loss": 0.4823, "step": 3802 }, { "epoch": 0.18865023066620368, "grad_norm": 10.75954818725586, "learning_rate": 9.217949589361008e-06, "loss": 0.3928, "step": 3803 }, { "epoch": 0.18869983630140383, "grad_norm": 8.083357810974121, "learning_rate": 9.217526879042718e-06, "loss": 0.2775, "step": 3804 }, { "epoch": 0.188749441936604, "grad_norm": 10.596597671508789, "learning_rate": 9.217104064212204e-06, "loss": 0.2973, "step": 3805 }, { "epoch": 0.18879904757180416, "grad_norm": 10.54415225982666, "learning_rate": 9.216681144879941e-06, "loss": 0.2323, "step": 3806 }, { "epoch": 0.1888486532070043, "grad_norm": 4.633784294128418, "learning_rate": 9.216258121056416e-06, "loss": 0.3027, "step": 3807 }, { "epoch": 0.1888982588422045, "grad_norm": 15.130617141723633, "learning_rate": 9.215834992752107e-06, "loss": 0.4241, "step": 3808 }, { "epoch": 0.18894786447740464, "grad_norm": 7.6302266120910645, "learning_rate": 9.215411759977498e-06, "loss": 0.3114, "step": 3809 }, { "epoch": 0.1889974701126048, "grad_norm": 5.592073440551758, "learning_rate": 9.21498842274308e-06, "loss": 0.3236, "step": 3810 }, { "epoch": 0.18904707574780494, "grad_norm": 5.81284761428833, "learning_rate": 9.214564981059341e-06, "loss": 0.3383, "step": 3811 }, { "epoch": 0.18909668138300512, "grad_norm": 5.52640438079834, "learning_rate": 9.214141434936776e-06, "loss": 0.2618, "step": 3812 }, { "epoch": 0.18914628701820527, "grad_norm": 7.577035427093506, "learning_rate": 9.21371778438588e-06, "loss": 0.3731, "step": 3813 }, { "epoch": 0.18919589265340542, "grad_norm": 8.464638710021973, "learning_rate": 9.213294029417152e-06, "loss": 0.4143, "step": 3814 }, { "epoch": 0.1892454982886056, "grad_norm": 10.142895698547363, "learning_rate": 9.212870170041092e-06, "loss": 0.3271, "step": 3815 }, { "epoch": 0.18929510392380575, "grad_norm": 7.683192253112793, "learning_rate": 9.212446206268202e-06, "loss": 0.406, "step": 3816 }, { "epoch": 0.1893447095590059, "grad_norm": 5.544844627380371, "learning_rate": 9.21202213810899e-06, "loss": 0.3424, "step": 3817 }, { "epoch": 0.18939431519420608, "grad_norm": 5.554598331451416, "learning_rate": 9.211597965573967e-06, "loss": 0.2819, "step": 3818 }, { "epoch": 0.18944392082940623, "grad_norm": 8.785751342773438, "learning_rate": 9.211173688673636e-06, "loss": 0.339, "step": 3819 }, { "epoch": 0.18949352646460638, "grad_norm": 9.103226661682129, "learning_rate": 9.21074930741852e-06, "loss": 0.3277, "step": 3820 }, { "epoch": 0.18954313209980653, "grad_norm": 3.5827417373657227, "learning_rate": 9.21032482181913e-06, "loss": 0.3151, "step": 3821 }, { "epoch": 0.1895927377350067, "grad_norm": 6.390666961669922, "learning_rate": 9.209900231885987e-06, "loss": 0.3473, "step": 3822 }, { "epoch": 0.18964234337020686, "grad_norm": 8.067184448242188, "learning_rate": 9.209475537629613e-06, "loss": 0.3494, "step": 3823 }, { "epoch": 0.189691949005407, "grad_norm": 5.599658012390137, "learning_rate": 9.209050739060529e-06, "loss": 0.3244, "step": 3824 }, { "epoch": 0.18974155464060719, "grad_norm": 8.131135940551758, "learning_rate": 9.208625836189264e-06, "loss": 0.3879, "step": 3825 }, { "epoch": 0.18979116027580734, "grad_norm": 6.777852535247803, "learning_rate": 9.208200829026348e-06, "loss": 0.3992, "step": 3826 }, { "epoch": 0.1898407659110075, "grad_norm": 8.70584487915039, "learning_rate": 9.207775717582312e-06, "loss": 0.3886, "step": 3827 }, { "epoch": 0.18989037154620764, "grad_norm": 4.2235283851623535, "learning_rate": 9.20735050186769e-06, "loss": 0.259, "step": 3828 }, { "epoch": 0.18993997718140782, "grad_norm": 5.675368785858154, "learning_rate": 9.206925181893019e-06, "loss": 0.3081, "step": 3829 }, { "epoch": 0.18998958281660797, "grad_norm": 7.92857027053833, "learning_rate": 9.206499757668838e-06, "loss": 0.298, "step": 3830 }, { "epoch": 0.19003918845180812, "grad_norm": 9.479079246520996, "learning_rate": 9.206074229205691e-06, "loss": 0.3571, "step": 3831 }, { "epoch": 0.1900887940870083, "grad_norm": 14.181781768798828, "learning_rate": 9.205648596514124e-06, "loss": 0.4661, "step": 3832 }, { "epoch": 0.19013839972220845, "grad_norm": 7.188015460968018, "learning_rate": 9.20522285960468e-06, "loss": 0.4353, "step": 3833 }, { "epoch": 0.1901880053574086, "grad_norm": 12.890825271606445, "learning_rate": 9.204797018487909e-06, "loss": 0.3665, "step": 3834 }, { "epoch": 0.19023761099260875, "grad_norm": 5.1503496170043945, "learning_rate": 9.20437107317437e-06, "loss": 0.2705, "step": 3835 }, { "epoch": 0.19028721662780892, "grad_norm": 7.172544002532959, "learning_rate": 9.203945023674613e-06, "loss": 0.271, "step": 3836 }, { "epoch": 0.19033682226300908, "grad_norm": 3.790304660797119, "learning_rate": 9.203518869999195e-06, "loss": 0.2721, "step": 3837 }, { "epoch": 0.19038642789820923, "grad_norm": 7.758367538452148, "learning_rate": 9.203092612158681e-06, "loss": 0.3478, "step": 3838 }, { "epoch": 0.1904360335334094, "grad_norm": 4.047357559204102, "learning_rate": 9.202666250163628e-06, "loss": 0.2381, "step": 3839 }, { "epoch": 0.19048563916860956, "grad_norm": 6.346692085266113, "learning_rate": 9.202239784024607e-06, "loss": 0.2873, "step": 3840 }, { "epoch": 0.1905352448038097, "grad_norm": 3.9945878982543945, "learning_rate": 9.20181321375218e-06, "loss": 0.3098, "step": 3841 }, { "epoch": 0.19058485043900988, "grad_norm": 3.957824230194092, "learning_rate": 9.201386539356923e-06, "loss": 0.294, "step": 3842 }, { "epoch": 0.19063445607421003, "grad_norm": 7.116961479187012, "learning_rate": 9.200959760849407e-06, "loss": 0.3158, "step": 3843 }, { "epoch": 0.19068406170941019, "grad_norm": 7.665694713592529, "learning_rate": 9.200532878240208e-06, "loss": 0.3365, "step": 3844 }, { "epoch": 0.19073366734461034, "grad_norm": 8.361236572265625, "learning_rate": 9.200105891539903e-06, "loss": 0.404, "step": 3845 }, { "epoch": 0.19078327297981051, "grad_norm": 8.645485877990723, "learning_rate": 9.199678800759072e-06, "loss": 0.294, "step": 3846 }, { "epoch": 0.19083287861501066, "grad_norm": 9.826221466064453, "learning_rate": 9.199251605908306e-06, "loss": 0.2968, "step": 3847 }, { "epoch": 0.19088248425021082, "grad_norm": 9.482677459716797, "learning_rate": 9.198824306998182e-06, "loss": 0.235, "step": 3848 }, { "epoch": 0.190932089885411, "grad_norm": 7.62348747253418, "learning_rate": 9.198396904039294e-06, "loss": 0.2995, "step": 3849 }, { "epoch": 0.19098169552061114, "grad_norm": 29.35057830810547, "learning_rate": 9.19796939704223e-06, "loss": 0.39, "step": 3850 }, { "epoch": 0.1910313011558113, "grad_norm": 5.721874237060547, "learning_rate": 9.197541786017586e-06, "loss": 0.2806, "step": 3851 }, { "epoch": 0.19108090679101145, "grad_norm": 4.359705924987793, "learning_rate": 9.197114070975959e-06, "loss": 0.2278, "step": 3852 }, { "epoch": 0.19113051242621162, "grad_norm": 8.07025146484375, "learning_rate": 9.196686251927946e-06, "loss": 0.2143, "step": 3853 }, { "epoch": 0.19118011806141177, "grad_norm": 9.631511688232422, "learning_rate": 9.196258328884149e-06, "loss": 0.3329, "step": 3854 }, { "epoch": 0.19122972369661193, "grad_norm": 9.375107765197754, "learning_rate": 9.195830301855171e-06, "loss": 0.2117, "step": 3855 }, { "epoch": 0.1912793293318121, "grad_norm": 7.157764911651611, "learning_rate": 9.195402170851621e-06, "loss": 0.353, "step": 3856 }, { "epoch": 0.19132893496701225, "grad_norm": 6.61203670501709, "learning_rate": 9.19497393588411e-06, "loss": 0.2949, "step": 3857 }, { "epoch": 0.1913785406022124, "grad_norm": 8.932178497314453, "learning_rate": 9.194545596963245e-06, "loss": 0.4302, "step": 3858 }, { "epoch": 0.19142814623741258, "grad_norm": 7.296398639678955, "learning_rate": 9.194117154099644e-06, "loss": 0.3019, "step": 3859 }, { "epoch": 0.19147775187261273, "grad_norm": 9.015249252319336, "learning_rate": 9.193688607303921e-06, "loss": 0.3928, "step": 3860 }, { "epoch": 0.19152735750781288, "grad_norm": 14.310980796813965, "learning_rate": 9.193259956586698e-06, "loss": 0.3349, "step": 3861 }, { "epoch": 0.19157696314301303, "grad_norm": 10.488424301147461, "learning_rate": 9.192831201958597e-06, "loss": 0.3454, "step": 3862 }, { "epoch": 0.1916265687782132, "grad_norm": 8.199101448059082, "learning_rate": 9.192402343430241e-06, "loss": 0.3226, "step": 3863 }, { "epoch": 0.19167617441341336, "grad_norm": 6.801671504974365, "learning_rate": 9.19197338101226e-06, "loss": 0.2466, "step": 3864 }, { "epoch": 0.19172578004861351, "grad_norm": 5.858008861541748, "learning_rate": 9.19154431471528e-06, "loss": 0.3393, "step": 3865 }, { "epoch": 0.1917753856838137, "grad_norm": 5.8767523765563965, "learning_rate": 9.191115144549939e-06, "loss": 0.3205, "step": 3866 }, { "epoch": 0.19182499131901384, "grad_norm": 5.750826358795166, "learning_rate": 9.190685870526867e-06, "loss": 0.2719, "step": 3867 }, { "epoch": 0.191874596954214, "grad_norm": 5.96956205368042, "learning_rate": 9.190256492656704e-06, "loss": 0.2112, "step": 3868 }, { "epoch": 0.19192420258941414, "grad_norm": 6.396494388580322, "learning_rate": 9.189827010950088e-06, "loss": 0.1897, "step": 3869 }, { "epoch": 0.19197380822461432, "grad_norm": 15.449461936950684, "learning_rate": 9.189397425417665e-06, "loss": 0.414, "step": 3870 }, { "epoch": 0.19202341385981447, "grad_norm": 5.010310173034668, "learning_rate": 9.188967736070079e-06, "loss": 0.2498, "step": 3871 }, { "epoch": 0.19207301949501462, "grad_norm": 6.446873188018799, "learning_rate": 9.188537942917976e-06, "loss": 0.2857, "step": 3872 }, { "epoch": 0.1921226251302148, "grad_norm": 9.925950050354004, "learning_rate": 9.188108045972011e-06, "loss": 0.2983, "step": 3873 }, { "epoch": 0.19217223076541495, "grad_norm": 16.341060638427734, "learning_rate": 9.187678045242832e-06, "loss": 0.428, "step": 3874 }, { "epoch": 0.1922218364006151, "grad_norm": 13.522686004638672, "learning_rate": 9.187247940741098e-06, "loss": 0.4676, "step": 3875 }, { "epoch": 0.19227144203581528, "grad_norm": 9.901750564575195, "learning_rate": 9.186817732477465e-06, "loss": 0.349, "step": 3876 }, { "epoch": 0.19232104767101543, "grad_norm": 7.515870094299316, "learning_rate": 9.186387420462593e-06, "loss": 0.3036, "step": 3877 }, { "epoch": 0.19237065330621558, "grad_norm": 5.703118801116943, "learning_rate": 9.185957004707149e-06, "loss": 0.2822, "step": 3878 }, { "epoch": 0.19242025894141573, "grad_norm": 12.12293815612793, "learning_rate": 9.185526485221797e-06, "loss": 0.3701, "step": 3879 }, { "epoch": 0.1924698645766159, "grad_norm": 6.530767440795898, "learning_rate": 9.185095862017207e-06, "loss": 0.3165, "step": 3880 }, { "epoch": 0.19251947021181606, "grad_norm": 21.983827590942383, "learning_rate": 9.184665135104045e-06, "loss": 0.4883, "step": 3881 }, { "epoch": 0.1925690758470162, "grad_norm": 8.318282127380371, "learning_rate": 9.18423430449299e-06, "loss": 0.3238, "step": 3882 }, { "epoch": 0.1926186814822164, "grad_norm": 8.62678050994873, "learning_rate": 9.183803370194714e-06, "loss": 0.3603, "step": 3883 }, { "epoch": 0.19266828711741654, "grad_norm": 11.790099143981934, "learning_rate": 9.1833723322199e-06, "loss": 0.35, "step": 3884 }, { "epoch": 0.1927178927526167, "grad_norm": 14.58010196685791, "learning_rate": 9.182941190579227e-06, "loss": 0.4627, "step": 3885 }, { "epoch": 0.19276749838781684, "grad_norm": 10.967442512512207, "learning_rate": 9.18250994528338e-06, "loss": 0.4353, "step": 3886 }, { "epoch": 0.19281710402301702, "grad_norm": 5.77970552444458, "learning_rate": 9.182078596343043e-06, "loss": 0.3131, "step": 3887 }, { "epoch": 0.19286670965821717, "grad_norm": 5.313802719116211, "learning_rate": 9.181647143768908e-06, "loss": 0.3178, "step": 3888 }, { "epoch": 0.19291631529341732, "grad_norm": 25.15638542175293, "learning_rate": 9.181215587571665e-06, "loss": 0.4559, "step": 3889 }, { "epoch": 0.1929659209286175, "grad_norm": 13.204371452331543, "learning_rate": 9.180783927762006e-06, "loss": 0.4307, "step": 3890 }, { "epoch": 0.19301552656381765, "grad_norm": 16.475582122802734, "learning_rate": 9.180352164350633e-06, "loss": 0.353, "step": 3891 }, { "epoch": 0.1930651321990178, "grad_norm": 10.29577350616455, "learning_rate": 9.179920297348241e-06, "loss": 0.5964, "step": 3892 }, { "epoch": 0.19311473783421798, "grad_norm": 5.3266143798828125, "learning_rate": 9.179488326765534e-06, "loss": 0.276, "step": 3893 }, { "epoch": 0.19316434346941813, "grad_norm": 18.837299346923828, "learning_rate": 9.179056252613215e-06, "loss": 0.4533, "step": 3894 }, { "epoch": 0.19321394910461828, "grad_norm": 9.370462417602539, "learning_rate": 9.178624074901991e-06, "loss": 0.4774, "step": 3895 }, { "epoch": 0.19326355473981843, "grad_norm": 7.8402791023254395, "learning_rate": 9.178191793642575e-06, "loss": 0.3842, "step": 3896 }, { "epoch": 0.1933131603750186, "grad_norm": 9.062370300292969, "learning_rate": 9.177759408845675e-06, "loss": 0.3929, "step": 3897 }, { "epoch": 0.19336276601021876, "grad_norm": 7.734874725341797, "learning_rate": 9.177326920522006e-06, "loss": 0.4489, "step": 3898 }, { "epoch": 0.1934123716454189, "grad_norm": 9.747312545776367, "learning_rate": 9.176894328682286e-06, "loss": 0.3074, "step": 3899 }, { "epoch": 0.1934619772806191, "grad_norm": 7.591876983642578, "learning_rate": 9.176461633337237e-06, "loss": 0.2989, "step": 3900 }, { "epoch": 0.19351158291581924, "grad_norm": 7.8857316970825195, "learning_rate": 9.176028834497578e-06, "loss": 0.4153, "step": 3901 }, { "epoch": 0.1935611885510194, "grad_norm": 9.964024543762207, "learning_rate": 9.175595932174035e-06, "loss": 0.3129, "step": 3902 }, { "epoch": 0.19361079418621954, "grad_norm": 11.569724082946777, "learning_rate": 9.175162926377338e-06, "loss": 0.4847, "step": 3903 }, { "epoch": 0.19366039982141972, "grad_norm": 14.020084381103516, "learning_rate": 9.174729817118213e-06, "loss": 0.4692, "step": 3904 }, { "epoch": 0.19371000545661987, "grad_norm": 4.89148473739624, "learning_rate": 9.174296604407395e-06, "loss": 0.3225, "step": 3905 }, { "epoch": 0.19375961109182002, "grad_norm": 11.896220207214355, "learning_rate": 9.173863288255621e-06, "loss": 0.471, "step": 3906 }, { "epoch": 0.1938092167270202, "grad_norm": 7.577060222625732, "learning_rate": 9.173429868673625e-06, "loss": 0.3468, "step": 3907 }, { "epoch": 0.19385882236222035, "grad_norm": 7.6421074867248535, "learning_rate": 9.17299634567215e-06, "loss": 0.2876, "step": 3908 }, { "epoch": 0.1939084279974205, "grad_norm": 4.835819244384766, "learning_rate": 9.172562719261937e-06, "loss": 0.2992, "step": 3909 }, { "epoch": 0.19395803363262065, "grad_norm": 5.653550624847412, "learning_rate": 9.172128989453733e-06, "loss": 0.2251, "step": 3910 }, { "epoch": 0.19400763926782083, "grad_norm": 11.968775749206543, "learning_rate": 9.171695156258285e-06, "loss": 0.411, "step": 3911 }, { "epoch": 0.19405724490302098, "grad_norm": 8.33229923248291, "learning_rate": 9.171261219686343e-06, "loss": 0.4181, "step": 3912 }, { "epoch": 0.19410685053822113, "grad_norm": 17.099584579467773, "learning_rate": 9.170827179748663e-06, "loss": 0.4067, "step": 3913 }, { "epoch": 0.1941564561734213, "grad_norm": 5.293796062469482, "learning_rate": 9.170393036455995e-06, "loss": 0.3048, "step": 3914 }, { "epoch": 0.19420606180862146, "grad_norm": 6.82053804397583, "learning_rate": 9.169958789819106e-06, "loss": 0.3847, "step": 3915 }, { "epoch": 0.1942556674438216, "grad_norm": 9.830835342407227, "learning_rate": 9.16952443984875e-06, "loss": 0.336, "step": 3916 }, { "epoch": 0.1943052730790218, "grad_norm": 6.287410736083984, "learning_rate": 9.16908998655569e-06, "loss": 0.3748, "step": 3917 }, { "epoch": 0.19435487871422194, "grad_norm": 8.248516082763672, "learning_rate": 9.168655429950696e-06, "loss": 0.3704, "step": 3918 }, { "epoch": 0.1944044843494221, "grad_norm": 8.862945556640625, "learning_rate": 9.168220770044536e-06, "loss": 0.4027, "step": 3919 }, { "epoch": 0.19445408998462224, "grad_norm": 7.800444602966309, "learning_rate": 9.167786006847979e-06, "loss": 0.3298, "step": 3920 }, { "epoch": 0.19450369561982242, "grad_norm": 9.081657409667969, "learning_rate": 9.1673511403718e-06, "loss": 0.3073, "step": 3921 }, { "epoch": 0.19455330125502257, "grad_norm": 11.444412231445312, "learning_rate": 9.166916170626773e-06, "loss": 0.3322, "step": 3922 }, { "epoch": 0.19460290689022272, "grad_norm": 7.548627853393555, "learning_rate": 9.16648109762368e-06, "loss": 0.3035, "step": 3923 }, { "epoch": 0.1946525125254229, "grad_norm": 13.379825592041016, "learning_rate": 9.166045921373302e-06, "loss": 0.424, "step": 3924 }, { "epoch": 0.19470211816062305, "grad_norm": 7.4003496170043945, "learning_rate": 9.165610641886417e-06, "loss": 0.3282, "step": 3925 }, { "epoch": 0.1947517237958232, "grad_norm": 5.969890117645264, "learning_rate": 9.16517525917382e-06, "loss": 0.4135, "step": 3926 }, { "epoch": 0.19480132943102335, "grad_norm": 5.712440490722656, "learning_rate": 9.164739773246293e-06, "loss": 0.3246, "step": 3927 }, { "epoch": 0.19485093506622353, "grad_norm": 8.504457473754883, "learning_rate": 9.164304184114634e-06, "loss": 0.3525, "step": 3928 }, { "epoch": 0.19490054070142368, "grad_norm": 7.107117652893066, "learning_rate": 9.163868491789631e-06, "loss": 0.2503, "step": 3929 }, { "epoch": 0.19495014633662383, "grad_norm": 10.750757217407227, "learning_rate": 9.163432696282084e-06, "loss": 0.4267, "step": 3930 }, { "epoch": 0.194999751971824, "grad_norm": 9.30384349822998, "learning_rate": 9.16299679760279e-06, "loss": 0.377, "step": 3931 }, { "epoch": 0.19504935760702416, "grad_norm": 9.029687881469727, "learning_rate": 9.162560795762554e-06, "loss": 0.3942, "step": 3932 }, { "epoch": 0.1950989632422243, "grad_norm": 10.938501358032227, "learning_rate": 9.16212469077218e-06, "loss": 0.4547, "step": 3933 }, { "epoch": 0.1951485688774245, "grad_norm": 7.095552921295166, "learning_rate": 9.16168848264247e-06, "loss": 0.3519, "step": 3934 }, { "epoch": 0.19519817451262464, "grad_norm": 8.969658851623535, "learning_rate": 9.161252171384239e-06, "loss": 0.411, "step": 3935 }, { "epoch": 0.1952477801478248, "grad_norm": 5.20880126953125, "learning_rate": 9.160815757008297e-06, "loss": 0.2831, "step": 3936 }, { "epoch": 0.19529738578302494, "grad_norm": 16.622961044311523, "learning_rate": 9.160379239525458e-06, "loss": 0.3329, "step": 3937 }, { "epoch": 0.19534699141822512, "grad_norm": 7.607392311096191, "learning_rate": 9.15994261894654e-06, "loss": 0.3257, "step": 3938 }, { "epoch": 0.19539659705342527, "grad_norm": 6.788718223571777, "learning_rate": 9.15950589528236e-06, "loss": 0.3361, "step": 3939 }, { "epoch": 0.19544620268862542, "grad_norm": 4.5548415184021, "learning_rate": 9.159069068543744e-06, "loss": 0.334, "step": 3940 }, { "epoch": 0.1954958083238256, "grad_norm": 10.720378875732422, "learning_rate": 9.158632138741515e-06, "loss": 0.2926, "step": 3941 }, { "epoch": 0.19554541395902575, "grad_norm": 8.84926986694336, "learning_rate": 9.1581951058865e-06, "loss": 0.4212, "step": 3942 }, { "epoch": 0.1955950195942259, "grad_norm": 7.122525691986084, "learning_rate": 9.157757969989529e-06, "loss": 0.2097, "step": 3943 }, { "epoch": 0.19564462522942605, "grad_norm": 8.63664722442627, "learning_rate": 9.157320731061437e-06, "loss": 0.3701, "step": 3944 }, { "epoch": 0.19569423086462623, "grad_norm": 6.206829071044922, "learning_rate": 9.156883389113054e-06, "loss": 0.31, "step": 3945 }, { "epoch": 0.19574383649982638, "grad_norm": 14.388981819152832, "learning_rate": 9.156445944155221e-06, "loss": 0.4147, "step": 3946 }, { "epoch": 0.19579344213502653, "grad_norm": 9.686305046081543, "learning_rate": 9.156008396198779e-06, "loss": 0.3693, "step": 3947 }, { "epoch": 0.1958430477702267, "grad_norm": 7.609938144683838, "learning_rate": 9.155570745254567e-06, "loss": 0.3391, "step": 3948 }, { "epoch": 0.19589265340542686, "grad_norm": 5.812369346618652, "learning_rate": 9.155132991333433e-06, "loss": 0.3592, "step": 3949 }, { "epoch": 0.195942259040627, "grad_norm": 6.455488204956055, "learning_rate": 9.154695134446223e-06, "loss": 0.2993, "step": 3950 }, { "epoch": 0.1959918646758272, "grad_norm": 11.291476249694824, "learning_rate": 9.15425717460379e-06, "loss": 0.4743, "step": 3951 }, { "epoch": 0.19604147031102734, "grad_norm": 4.670400142669678, "learning_rate": 9.153819111816984e-06, "loss": 0.3372, "step": 3952 }, { "epoch": 0.1960910759462275, "grad_norm": 7.019837379455566, "learning_rate": 9.153380946096662e-06, "loss": 0.3085, "step": 3953 }, { "epoch": 0.19614068158142764, "grad_norm": 4.920792102813721, "learning_rate": 9.15294267745368e-06, "loss": 0.3184, "step": 3954 }, { "epoch": 0.19619028721662782, "grad_norm": 4.861361503601074, "learning_rate": 9.1525043058989e-06, "loss": 0.2668, "step": 3955 }, { "epoch": 0.19623989285182797, "grad_norm": 6.364124774932861, "learning_rate": 9.152065831443187e-06, "loss": 0.3129, "step": 3956 }, { "epoch": 0.19628949848702812, "grad_norm": 11.700894355773926, "learning_rate": 9.151627254097402e-06, "loss": 0.2613, "step": 3957 }, { "epoch": 0.1963391041222283, "grad_norm": 6.145894527435303, "learning_rate": 9.151188573872417e-06, "loss": 0.302, "step": 3958 }, { "epoch": 0.19638870975742845, "grad_norm": 9.801996231079102, "learning_rate": 9.150749790779102e-06, "loss": 0.3885, "step": 3959 }, { "epoch": 0.1964383153926286, "grad_norm": 8.45374870300293, "learning_rate": 9.150310904828328e-06, "loss": 0.3733, "step": 3960 }, { "epoch": 0.19648792102782875, "grad_norm": 6.564323425292969, "learning_rate": 9.149871916030973e-06, "loss": 0.2427, "step": 3961 }, { "epoch": 0.19653752666302893, "grad_norm": 15.898738861083984, "learning_rate": 9.149432824397915e-06, "loss": 0.4363, "step": 3962 }, { "epoch": 0.19658713229822908, "grad_norm": 5.857618808746338, "learning_rate": 9.148993629940033e-06, "loss": 0.2745, "step": 3963 }, { "epoch": 0.19663673793342923, "grad_norm": 5.362997055053711, "learning_rate": 9.148554332668214e-06, "loss": 0.3644, "step": 3964 }, { "epoch": 0.1966863435686294, "grad_norm": 7.305186748504639, "learning_rate": 9.148114932593342e-06, "loss": 0.3825, "step": 3965 }, { "epoch": 0.19673594920382956, "grad_norm": 4.84718656539917, "learning_rate": 9.147675429726306e-06, "loss": 0.3574, "step": 3966 }, { "epoch": 0.1967855548390297, "grad_norm": 6.719901084899902, "learning_rate": 9.147235824077995e-06, "loss": 0.2329, "step": 3967 }, { "epoch": 0.19683516047422986, "grad_norm": 13.920559883117676, "learning_rate": 9.146796115659304e-06, "loss": 0.3178, "step": 3968 }, { "epoch": 0.19688476610943004, "grad_norm": 9.444648742675781, "learning_rate": 9.146356304481132e-06, "loss": 0.3132, "step": 3969 }, { "epoch": 0.1969343717446302, "grad_norm": 13.762804985046387, "learning_rate": 9.145916390554373e-06, "loss": 0.3922, "step": 3970 }, { "epoch": 0.19698397737983034, "grad_norm": 6.48599100112915, "learning_rate": 9.145476373889931e-06, "loss": 0.3231, "step": 3971 }, { "epoch": 0.19703358301503052, "grad_norm": 13.359349250793457, "learning_rate": 9.14503625449871e-06, "loss": 0.4088, "step": 3972 }, { "epoch": 0.19708318865023067, "grad_norm": 14.9124116897583, "learning_rate": 9.144596032391615e-06, "loss": 0.4879, "step": 3973 }, { "epoch": 0.19713279428543082, "grad_norm": 10.285002708435059, "learning_rate": 9.144155707579557e-06, "loss": 0.3885, "step": 3974 }, { "epoch": 0.197182399920631, "grad_norm": 7.617891311645508, "learning_rate": 9.143715280073443e-06, "loss": 0.308, "step": 3975 }, { "epoch": 0.19723200555583115, "grad_norm": 10.732914924621582, "learning_rate": 9.14327474988419e-06, "loss": 0.4269, "step": 3976 }, { "epoch": 0.1972816111910313, "grad_norm": 9.099231719970703, "learning_rate": 9.142834117022717e-06, "loss": 0.3066, "step": 3977 }, { "epoch": 0.19733121682623145, "grad_norm": 5.062342166900635, "learning_rate": 9.14239338149994e-06, "loss": 0.2707, "step": 3978 }, { "epoch": 0.19738082246143163, "grad_norm": 6.475127696990967, "learning_rate": 9.14195254332678e-06, "loss": 0.344, "step": 3979 }, { "epoch": 0.19743042809663178, "grad_norm": 10.65186595916748, "learning_rate": 9.141511602514163e-06, "loss": 0.3528, "step": 3980 }, { "epoch": 0.19748003373183193, "grad_norm": 6.5602498054504395, "learning_rate": 9.141070559073015e-06, "loss": 0.247, "step": 3981 }, { "epoch": 0.1975296393670321, "grad_norm": 14.390069007873535, "learning_rate": 9.140629413014266e-06, "loss": 0.2945, "step": 3982 }, { "epoch": 0.19757924500223226, "grad_norm": 7.545741558074951, "learning_rate": 9.140188164348847e-06, "loss": 0.41, "step": 3983 }, { "epoch": 0.1976288506374324, "grad_norm": 11.141643524169922, "learning_rate": 9.139746813087691e-06, "loss": 0.3377, "step": 3984 }, { "epoch": 0.19767845627263256, "grad_norm": 5.916397571563721, "learning_rate": 9.139305359241738e-06, "loss": 0.3038, "step": 3985 }, { "epoch": 0.19772806190783274, "grad_norm": 3.674957752227783, "learning_rate": 9.138863802821924e-06, "loss": 0.2566, "step": 3986 }, { "epoch": 0.1977776675430329, "grad_norm": 12.543074607849121, "learning_rate": 9.138422143839195e-06, "loss": 0.48, "step": 3987 }, { "epoch": 0.19782727317823304, "grad_norm": 6.325972080230713, "learning_rate": 9.137980382304492e-06, "loss": 0.3559, "step": 3988 }, { "epoch": 0.19787687881343322, "grad_norm": 7.721231460571289, "learning_rate": 9.137538518228763e-06, "loss": 0.2917, "step": 3989 }, { "epoch": 0.19792648444863337, "grad_norm": 4.629160404205322, "learning_rate": 9.13709655162296e-06, "loss": 0.2897, "step": 3990 }, { "epoch": 0.19797609008383352, "grad_norm": 11.877222061157227, "learning_rate": 9.136654482498032e-06, "loss": 0.5123, "step": 3991 }, { "epoch": 0.1980256957190337, "grad_norm": 7.377359867095947, "learning_rate": 9.136212310864934e-06, "loss": 0.3177, "step": 3992 }, { "epoch": 0.19807530135423385, "grad_norm": 13.464530944824219, "learning_rate": 9.135770036734624e-06, "loss": 0.4204, "step": 3993 }, { "epoch": 0.198124906989434, "grad_norm": 4.713728904724121, "learning_rate": 9.13532766011806e-06, "loss": 0.3255, "step": 3994 }, { "epoch": 0.19817451262463415, "grad_norm": 6.877205848693848, "learning_rate": 9.134885181026208e-06, "loss": 0.3414, "step": 3995 }, { "epoch": 0.19822411825983433, "grad_norm": 8.69957160949707, "learning_rate": 9.134442599470031e-06, "loss": 0.3724, "step": 3996 }, { "epoch": 0.19827372389503448, "grad_norm": 5.82328462600708, "learning_rate": 9.133999915460497e-06, "loss": 0.3256, "step": 3997 }, { "epoch": 0.19832332953023463, "grad_norm": 10.542398452758789, "learning_rate": 9.133557129008572e-06, "loss": 0.386, "step": 3998 }, { "epoch": 0.1983729351654348, "grad_norm": 5.791320323944092, "learning_rate": 9.133114240125234e-06, "loss": 0.2937, "step": 3999 }, { "epoch": 0.19842254080063496, "grad_norm": 5.053089618682861, "learning_rate": 9.132671248821455e-06, "loss": 0.3248, "step": 4000 }, { "epoch": 0.1984721464358351, "grad_norm": 5.724758625030518, "learning_rate": 9.132228155108213e-06, "loss": 0.4077, "step": 4001 }, { "epoch": 0.19852175207103526, "grad_norm": 5.370489597320557, "learning_rate": 9.131784958996489e-06, "loss": 0.2991, "step": 4002 }, { "epoch": 0.19857135770623544, "grad_norm": 4.306190490722656, "learning_rate": 9.131341660497263e-06, "loss": 0.352, "step": 4003 }, { "epoch": 0.19862096334143559, "grad_norm": 8.008062362670898, "learning_rate": 9.130898259621523e-06, "loss": 0.3383, "step": 4004 }, { "epoch": 0.19867056897663574, "grad_norm": 6.948530673980713, "learning_rate": 9.130454756380255e-06, "loss": 0.3941, "step": 4005 }, { "epoch": 0.19872017461183591, "grad_norm": 5.146605014801025, "learning_rate": 9.13001115078445e-06, "loss": 0.3422, "step": 4006 }, { "epoch": 0.19876978024703607, "grad_norm": 7.433956146240234, "learning_rate": 9.1295674428451e-06, "loss": 0.3875, "step": 4007 }, { "epoch": 0.19881938588223622, "grad_norm": 5.9738922119140625, "learning_rate": 9.1291236325732e-06, "loss": 0.3624, "step": 4008 }, { "epoch": 0.1988689915174364, "grad_norm": 6.165672779083252, "learning_rate": 9.12867971997975e-06, "loss": 0.3245, "step": 4009 }, { "epoch": 0.19891859715263654, "grad_norm": 8.186585426330566, "learning_rate": 9.12823570507575e-06, "loss": 0.2632, "step": 4010 }, { "epoch": 0.1989682027878367, "grad_norm": 6.308106899261475, "learning_rate": 9.127791587872198e-06, "loss": 0.3157, "step": 4011 }, { "epoch": 0.19901780842303685, "grad_norm": 8.733061790466309, "learning_rate": 9.127347368380106e-06, "loss": 0.3671, "step": 4012 }, { "epoch": 0.19906741405823702, "grad_norm": 11.7293701171875, "learning_rate": 9.12690304661048e-06, "loss": 0.4005, "step": 4013 }, { "epoch": 0.19911701969343717, "grad_norm": 8.029512405395508, "learning_rate": 9.126458622574327e-06, "loss": 0.3105, "step": 4014 }, { "epoch": 0.19916662532863733, "grad_norm": 13.447128295898438, "learning_rate": 9.126014096282664e-06, "loss": 0.2943, "step": 4015 }, { "epoch": 0.1992162309638375, "grad_norm": 17.62788963317871, "learning_rate": 9.125569467746506e-06, "loss": 0.3664, "step": 4016 }, { "epoch": 0.19926583659903765, "grad_norm": 5.402416229248047, "learning_rate": 9.125124736976871e-06, "loss": 0.3344, "step": 4017 }, { "epoch": 0.1993154422342378, "grad_norm": 8.165898323059082, "learning_rate": 9.12467990398478e-06, "loss": 0.294, "step": 4018 }, { "epoch": 0.19936504786943796, "grad_norm": 4.897776126861572, "learning_rate": 9.124234968781251e-06, "loss": 0.2196, "step": 4019 }, { "epoch": 0.19941465350463813, "grad_norm": 6.0555243492126465, "learning_rate": 9.123789931377316e-06, "loss": 0.2209, "step": 4020 }, { "epoch": 0.19946425913983828, "grad_norm": 6.086609840393066, "learning_rate": 9.123344791784002e-06, "loss": 0.1941, "step": 4021 }, { "epoch": 0.19951386477503844, "grad_norm": 5.5823163986206055, "learning_rate": 9.122899550012338e-06, "loss": 0.3352, "step": 4022 }, { "epoch": 0.1995634704102386, "grad_norm": 9.43817138671875, "learning_rate": 9.122454206073359e-06, "loss": 0.3856, "step": 4023 }, { "epoch": 0.19961307604543876, "grad_norm": 18.349031448364258, "learning_rate": 9.122008759978098e-06, "loss": 0.4648, "step": 4024 }, { "epoch": 0.19966268168063891, "grad_norm": 4.752074241638184, "learning_rate": 9.121563211737596e-06, "loss": 0.2616, "step": 4025 }, { "epoch": 0.19971228731583907, "grad_norm": 6.961416244506836, "learning_rate": 9.121117561362895e-06, "loss": 0.2368, "step": 4026 }, { "epoch": 0.19976189295103924, "grad_norm": 12.92971134185791, "learning_rate": 9.120671808865034e-06, "loss": 0.3938, "step": 4027 }, { "epoch": 0.1998114985862394, "grad_norm": 6.536196708679199, "learning_rate": 9.120225954255065e-06, "loss": 0.226, "step": 4028 }, { "epoch": 0.19986110422143955, "grad_norm": 7.997209072113037, "learning_rate": 9.119779997544029e-06, "loss": 0.3973, "step": 4029 }, { "epoch": 0.19991070985663972, "grad_norm": 9.754100799560547, "learning_rate": 9.119333938742984e-06, "loss": 0.3292, "step": 4030 }, { "epoch": 0.19996031549183987, "grad_norm": 8.653054237365723, "learning_rate": 9.11888777786298e-06, "loss": 0.2791, "step": 4031 }, { "epoch": 0.20000992112704002, "grad_norm": 7.659861087799072, "learning_rate": 9.118441514915074e-06, "loss": 0.2891, "step": 4032 }, { "epoch": 0.20000992112704002, "eval_loss": 0.33201321959495544, "eval_runtime": 35.5622, "eval_samples_per_second": 45.807, "eval_steps_per_second": 5.736, "step": 4032 }, { "epoch": 0.2000595267622402, "grad_norm": 10.530786514282227, "learning_rate": 9.117995149910323e-06, "loss": 0.3596, "step": 4033 }, { "epoch": 0.20010913239744035, "grad_norm": 7.060723781585693, "learning_rate": 9.11754868285979e-06, "loss": 0.3446, "step": 4034 }, { "epoch": 0.2001587380326405, "grad_norm": 7.975677967071533, "learning_rate": 9.117102113774538e-06, "loss": 0.3304, "step": 4035 }, { "epoch": 0.20020834366784065, "grad_norm": 6.897091388702393, "learning_rate": 9.116655442665634e-06, "loss": 0.3449, "step": 4036 }, { "epoch": 0.20025794930304083, "grad_norm": 5.201015472412109, "learning_rate": 9.116208669544145e-06, "loss": 0.343, "step": 4037 }, { "epoch": 0.20030755493824098, "grad_norm": 8.655289649963379, "learning_rate": 9.115761794421142e-06, "loss": 0.3329, "step": 4038 }, { "epoch": 0.20035716057344113, "grad_norm": 7.071066856384277, "learning_rate": 9.115314817307704e-06, "loss": 0.3519, "step": 4039 }, { "epoch": 0.2004067662086413, "grad_norm": 6.117389678955078, "learning_rate": 9.1148677382149e-06, "loss": 0.2926, "step": 4040 }, { "epoch": 0.20045637184384146, "grad_norm": 5.811916828155518, "learning_rate": 9.114420557153814e-06, "loss": 0.3122, "step": 4041 }, { "epoch": 0.2005059774790416, "grad_norm": 6.417400360107422, "learning_rate": 9.113973274135524e-06, "loss": 0.3246, "step": 4042 }, { "epoch": 0.20055558311424176, "grad_norm": 6.636836051940918, "learning_rate": 9.113525889171115e-06, "loss": 0.2814, "step": 4043 }, { "epoch": 0.20060518874944194, "grad_norm": 9.193307876586914, "learning_rate": 9.113078402271675e-06, "loss": 0.3526, "step": 4044 }, { "epoch": 0.2006547943846421, "grad_norm": 6.477517127990723, "learning_rate": 9.112630813448291e-06, "loss": 0.3434, "step": 4045 }, { "epoch": 0.20070440001984224, "grad_norm": 7.9452338218688965, "learning_rate": 9.112183122712054e-06, "loss": 0.3082, "step": 4046 }, { "epoch": 0.20075400565504242, "grad_norm": 7.021263122558594, "learning_rate": 9.11173533007406e-06, "loss": 0.2566, "step": 4047 }, { "epoch": 0.20080361129024257, "grad_norm": 6.646376132965088, "learning_rate": 9.111287435545407e-06, "loss": 0.3502, "step": 4048 }, { "epoch": 0.20085321692544272, "grad_norm": 5.349921703338623, "learning_rate": 9.110839439137189e-06, "loss": 0.344, "step": 4049 }, { "epoch": 0.2009028225606429, "grad_norm": 7.718123912811279, "learning_rate": 9.11039134086051e-06, "loss": 0.3999, "step": 4050 }, { "epoch": 0.20095242819584305, "grad_norm": 5.833086013793945, "learning_rate": 9.109943140726476e-06, "loss": 0.3949, "step": 4051 }, { "epoch": 0.2010020338310432, "grad_norm": 10.344588279724121, "learning_rate": 9.109494838746188e-06, "loss": 0.3852, "step": 4052 }, { "epoch": 0.20105163946624335, "grad_norm": 8.303544998168945, "learning_rate": 9.109046434930764e-06, "loss": 0.3133, "step": 4053 }, { "epoch": 0.20110124510144353, "grad_norm": 15.040295600891113, "learning_rate": 9.108597929291308e-06, "loss": 0.3755, "step": 4054 }, { "epoch": 0.20115085073664368, "grad_norm": 9.167144775390625, "learning_rate": 9.108149321838938e-06, "loss": 0.2878, "step": 4055 }, { "epoch": 0.20120045637184383, "grad_norm": 8.104304313659668, "learning_rate": 9.107700612584767e-06, "loss": 0.2542, "step": 4056 }, { "epoch": 0.201250062007044, "grad_norm": 14.87349796295166, "learning_rate": 9.107251801539918e-06, "loss": 0.3249, "step": 4057 }, { "epoch": 0.20129966764224416, "grad_norm": 11.129029273986816, "learning_rate": 9.106802888715513e-06, "loss": 0.3434, "step": 4058 }, { "epoch": 0.2013492732774443, "grad_norm": 7.16485071182251, "learning_rate": 9.106353874122673e-06, "loss": 0.3484, "step": 4059 }, { "epoch": 0.20139887891264446, "grad_norm": 8.032432556152344, "learning_rate": 9.105904757772527e-06, "loss": 0.3281, "step": 4060 }, { "epoch": 0.20144848454784464, "grad_norm": 16.19017791748047, "learning_rate": 9.105455539676203e-06, "loss": 0.4131, "step": 4061 }, { "epoch": 0.2014980901830448, "grad_norm": 11.512158393859863, "learning_rate": 9.105006219844835e-06, "loss": 0.4935, "step": 4062 }, { "epoch": 0.20154769581824494, "grad_norm": 13.0516939163208, "learning_rate": 9.104556798289554e-06, "loss": 0.4476, "step": 4063 }, { "epoch": 0.20159730145344512, "grad_norm": 5.143037796020508, "learning_rate": 9.1041072750215e-06, "loss": 0.2597, "step": 4064 }, { "epoch": 0.20164690708864527, "grad_norm": 6.37547492980957, "learning_rate": 9.103657650051813e-06, "loss": 0.2774, "step": 4065 }, { "epoch": 0.20169651272384542, "grad_norm": 5.47123908996582, "learning_rate": 9.103207923391631e-06, "loss": 0.2952, "step": 4066 }, { "epoch": 0.2017461183590456, "grad_norm": 7.205540180206299, "learning_rate": 9.102758095052101e-06, "loss": 0.3476, "step": 4067 }, { "epoch": 0.20179572399424575, "grad_norm": 16.746063232421875, "learning_rate": 9.102308165044371e-06, "loss": 0.3384, "step": 4068 }, { "epoch": 0.2018453296294459, "grad_norm": 10.456448554992676, "learning_rate": 9.101858133379586e-06, "loss": 0.3996, "step": 4069 }, { "epoch": 0.20189493526464605, "grad_norm": 11.056951522827148, "learning_rate": 9.101408000068904e-06, "loss": 0.3769, "step": 4070 }, { "epoch": 0.20194454089984623, "grad_norm": 6.479068279266357, "learning_rate": 9.100957765123474e-06, "loss": 0.2943, "step": 4071 }, { "epoch": 0.20199414653504638, "grad_norm": 13.574660301208496, "learning_rate": 9.100507428554457e-06, "loss": 0.4178, "step": 4072 }, { "epoch": 0.20204375217024653, "grad_norm": 8.511590957641602, "learning_rate": 9.10005699037301e-06, "loss": 0.2987, "step": 4073 }, { "epoch": 0.2020933578054467, "grad_norm": 13.450634956359863, "learning_rate": 9.099606450590298e-06, "loss": 0.4491, "step": 4074 }, { "epoch": 0.20214296344064686, "grad_norm": 6.757253170013428, "learning_rate": 9.099155809217481e-06, "loss": 0.387, "step": 4075 }, { "epoch": 0.202192569075847, "grad_norm": 4.165247440338135, "learning_rate": 9.098705066265732e-06, "loss": 0.3153, "step": 4076 }, { "epoch": 0.20224217471104716, "grad_norm": 9.14373779296875, "learning_rate": 9.098254221746214e-06, "loss": 0.4102, "step": 4077 }, { "epoch": 0.20229178034624734, "grad_norm": 5.269263744354248, "learning_rate": 9.097803275670104e-06, "loss": 0.303, "step": 4078 }, { "epoch": 0.2023413859814475, "grad_norm": 8.68563461303711, "learning_rate": 9.097352228048577e-06, "loss": 0.4103, "step": 4079 }, { "epoch": 0.20239099161664764, "grad_norm": 6.970372200012207, "learning_rate": 9.096901078892807e-06, "loss": 0.3199, "step": 4080 }, { "epoch": 0.20244059725184782, "grad_norm": 8.300920486450195, "learning_rate": 9.096449828213973e-06, "loss": 0.3466, "step": 4081 }, { "epoch": 0.20249020288704797, "grad_norm": 9.92300796508789, "learning_rate": 9.095998476023262e-06, "loss": 0.3415, "step": 4082 }, { "epoch": 0.20253980852224812, "grad_norm": 6.86759090423584, "learning_rate": 9.095547022331856e-06, "loss": 0.2392, "step": 4083 }, { "epoch": 0.2025894141574483, "grad_norm": 9.174874305725098, "learning_rate": 9.09509546715094e-06, "loss": 0.3573, "step": 4084 }, { "epoch": 0.20263901979264845, "grad_norm": 5.397306442260742, "learning_rate": 9.094643810491707e-06, "loss": 0.2847, "step": 4085 }, { "epoch": 0.2026886254278486, "grad_norm": 5.933460235595703, "learning_rate": 9.094192052365349e-06, "loss": 0.2675, "step": 4086 }, { "epoch": 0.20273823106304875, "grad_norm": 5.873361110687256, "learning_rate": 9.093740192783059e-06, "loss": 0.2351, "step": 4087 }, { "epoch": 0.20278783669824893, "grad_norm": 5.356662750244141, "learning_rate": 9.093288231756036e-06, "loss": 0.3198, "step": 4088 }, { "epoch": 0.20283744233344908, "grad_norm": 9.999002456665039, "learning_rate": 9.092836169295478e-06, "loss": 0.3916, "step": 4089 }, { "epoch": 0.20288704796864923, "grad_norm": 7.169549465179443, "learning_rate": 9.092384005412589e-06, "loss": 0.3024, "step": 4090 }, { "epoch": 0.2029366536038494, "grad_norm": 5.881540298461914, "learning_rate": 9.091931740118573e-06, "loss": 0.3049, "step": 4091 }, { "epoch": 0.20298625923904956, "grad_norm": 5.780019760131836, "learning_rate": 9.091479373424636e-06, "loss": 0.2394, "step": 4092 }, { "epoch": 0.2030358648742497, "grad_norm": 8.20425796508789, "learning_rate": 9.091026905341992e-06, "loss": 0.298, "step": 4093 }, { "epoch": 0.20308547050944986, "grad_norm": 3.7749545574188232, "learning_rate": 9.09057433588185e-06, "loss": 0.2176, "step": 4094 }, { "epoch": 0.20313507614465004, "grad_norm": 6.831127166748047, "learning_rate": 9.090121665055425e-06, "loss": 0.3266, "step": 4095 }, { "epoch": 0.2031846817798502, "grad_norm": 10.404523849487305, "learning_rate": 9.089668892873935e-06, "loss": 0.3612, "step": 4096 }, { "epoch": 0.20323428741505034, "grad_norm": 8.260355949401855, "learning_rate": 9.089216019348599e-06, "loss": 0.3474, "step": 4097 }, { "epoch": 0.20328389305025052, "grad_norm": 11.330195426940918, "learning_rate": 9.088763044490642e-06, "loss": 0.296, "step": 4098 }, { "epoch": 0.20333349868545067, "grad_norm": 12.58725643157959, "learning_rate": 9.088309968311286e-06, "loss": 0.417, "step": 4099 }, { "epoch": 0.20338310432065082, "grad_norm": 8.359930038452148, "learning_rate": 9.08785679082176e-06, "loss": 0.3983, "step": 4100 }, { "epoch": 0.20343270995585097, "grad_norm": 8.226407051086426, "learning_rate": 9.087403512033294e-06, "loss": 0.3806, "step": 4101 }, { "epoch": 0.20348231559105115, "grad_norm": 9.692227363586426, "learning_rate": 9.08695013195712e-06, "loss": 0.3475, "step": 4102 }, { "epoch": 0.2035319212262513, "grad_norm": 9.229395866394043, "learning_rate": 9.086496650604473e-06, "loss": 0.3373, "step": 4103 }, { "epoch": 0.20358152686145145, "grad_norm": 4.682605266571045, "learning_rate": 9.08604306798659e-06, "loss": 0.3814, "step": 4104 }, { "epoch": 0.20363113249665163, "grad_norm": 7.647718906402588, "learning_rate": 9.08558938411471e-06, "loss": 0.2732, "step": 4105 }, { "epoch": 0.20368073813185178, "grad_norm": 6.968380451202393, "learning_rate": 9.085135599000078e-06, "loss": 0.3333, "step": 4106 }, { "epoch": 0.20373034376705193, "grad_norm": 8.933701515197754, "learning_rate": 9.08468171265394e-06, "loss": 0.4088, "step": 4107 }, { "epoch": 0.2037799494022521, "grad_norm": 13.68459701538086, "learning_rate": 9.08422772508754e-06, "loss": 0.3061, "step": 4108 }, { "epoch": 0.20382955503745226, "grad_norm": 6.136422634124756, "learning_rate": 9.08377363631213e-06, "loss": 0.3553, "step": 4109 }, { "epoch": 0.2038791606726524, "grad_norm": 4.955543518066406, "learning_rate": 9.08331944633896e-06, "loss": 0.2374, "step": 4110 }, { "epoch": 0.20392876630785256, "grad_norm": 4.8610310554504395, "learning_rate": 9.08286515517929e-06, "loss": 0.3556, "step": 4111 }, { "epoch": 0.20397837194305274, "grad_norm": 6.181722640991211, "learning_rate": 9.082410762844373e-06, "loss": 0.3404, "step": 4112 }, { "epoch": 0.2040279775782529, "grad_norm": 6.602219104766846, "learning_rate": 9.08195626934547e-06, "loss": 0.3261, "step": 4113 }, { "epoch": 0.20407758321345304, "grad_norm": 8.592971801757812, "learning_rate": 9.081501674693844e-06, "loss": 0.3531, "step": 4114 }, { "epoch": 0.20412718884865322, "grad_norm": 19.925209045410156, "learning_rate": 9.081046978900764e-06, "loss": 0.3188, "step": 4115 }, { "epoch": 0.20417679448385337, "grad_norm": 8.042935371398926, "learning_rate": 9.080592181977489e-06, "loss": 0.3859, "step": 4116 }, { "epoch": 0.20422640011905352, "grad_norm": 9.074334144592285, "learning_rate": 9.080137283935296e-06, "loss": 0.3207, "step": 4117 }, { "epoch": 0.20427600575425367, "grad_norm": 7.922863960266113, "learning_rate": 9.079682284785455e-06, "loss": 0.3206, "step": 4118 }, { "epoch": 0.20432561138945385, "grad_norm": 9.60081672668457, "learning_rate": 9.079227184539241e-06, "loss": 0.3911, "step": 4119 }, { "epoch": 0.204375217024654, "grad_norm": 7.8793768882751465, "learning_rate": 9.078771983207933e-06, "loss": 0.237, "step": 4120 }, { "epoch": 0.20442482265985415, "grad_norm": 5.309297561645508, "learning_rate": 9.07831668080281e-06, "loss": 0.3286, "step": 4121 }, { "epoch": 0.20447442829505433, "grad_norm": 4.838541030883789, "learning_rate": 9.077861277335157e-06, "loss": 0.3769, "step": 4122 }, { "epoch": 0.20452403393025448, "grad_norm": 4.383686542510986, "learning_rate": 9.077405772816253e-06, "loss": 0.3017, "step": 4123 }, { "epoch": 0.20457363956545463, "grad_norm": 8.79593276977539, "learning_rate": 9.076950167257392e-06, "loss": 0.3643, "step": 4124 }, { "epoch": 0.2046232452006548, "grad_norm": 11.541577339172363, "learning_rate": 9.076494460669862e-06, "loss": 0.4743, "step": 4125 }, { "epoch": 0.20467285083585496, "grad_norm": 5.0831193923950195, "learning_rate": 9.076038653064953e-06, "loss": 0.2942, "step": 4126 }, { "epoch": 0.2047224564710551, "grad_norm": 4.89513635635376, "learning_rate": 9.075582744453963e-06, "loss": 0.3659, "step": 4127 }, { "epoch": 0.20477206210625526, "grad_norm": 12.507326126098633, "learning_rate": 9.07512673484819e-06, "loss": 0.3152, "step": 4128 }, { "epoch": 0.20482166774145544, "grad_norm": 5.30604887008667, "learning_rate": 9.074670624258934e-06, "loss": 0.2645, "step": 4129 }, { "epoch": 0.2048712733766556, "grad_norm": 9.16377067565918, "learning_rate": 9.074214412697495e-06, "loss": 0.4461, "step": 4130 }, { "epoch": 0.20492087901185574, "grad_norm": 6.938054084777832, "learning_rate": 9.073758100175181e-06, "loss": 0.2443, "step": 4131 }, { "epoch": 0.20497048464705592, "grad_norm": 10.454914093017578, "learning_rate": 9.073301686703298e-06, "loss": 0.2095, "step": 4132 }, { "epoch": 0.20502009028225607, "grad_norm": 8.022944450378418, "learning_rate": 9.072845172293156e-06, "loss": 0.3582, "step": 4133 }, { "epoch": 0.20506969591745622, "grad_norm": 7.476511001586914, "learning_rate": 9.072388556956069e-06, "loss": 0.2131, "step": 4134 }, { "epoch": 0.20511930155265637, "grad_norm": 9.925106048583984, "learning_rate": 9.071931840703351e-06, "loss": 0.3731, "step": 4135 }, { "epoch": 0.20516890718785655, "grad_norm": 15.313767433166504, "learning_rate": 9.071475023546321e-06, "loss": 0.5025, "step": 4136 }, { "epoch": 0.2052185128230567, "grad_norm": 10.460860252380371, "learning_rate": 9.0710181054963e-06, "loss": 0.2316, "step": 4137 }, { "epoch": 0.20526811845825685, "grad_norm": 10.92088794708252, "learning_rate": 9.070561086564605e-06, "loss": 0.4164, "step": 4138 }, { "epoch": 0.20531772409345703, "grad_norm": 8.063156127929688, "learning_rate": 9.070103966762567e-06, "loss": 0.33, "step": 4139 }, { "epoch": 0.20536732972865718, "grad_norm": 7.165029525756836, "learning_rate": 9.06964674610151e-06, "loss": 0.3261, "step": 4140 }, { "epoch": 0.20541693536385733, "grad_norm": 18.208845138549805, "learning_rate": 9.069189424592767e-06, "loss": 0.346, "step": 4141 }, { "epoch": 0.2054665409990575, "grad_norm": 7.466860771179199, "learning_rate": 9.068732002247669e-06, "loss": 0.2482, "step": 4142 }, { "epoch": 0.20551614663425766, "grad_norm": 8.376472473144531, "learning_rate": 9.06827447907755e-06, "loss": 0.2946, "step": 4143 }, { "epoch": 0.2055657522694578, "grad_norm": 12.459406852722168, "learning_rate": 9.067816855093752e-06, "loss": 0.4361, "step": 4144 }, { "epoch": 0.20561535790465796, "grad_norm": 4.473345756530762, "learning_rate": 9.067359130307611e-06, "loss": 0.2769, "step": 4145 }, { "epoch": 0.20566496353985814, "grad_norm": 11.624700546264648, "learning_rate": 9.066901304730471e-06, "loss": 0.4122, "step": 4146 }, { "epoch": 0.2057145691750583, "grad_norm": 15.871678352355957, "learning_rate": 9.066443378373678e-06, "loss": 0.5179, "step": 4147 }, { "epoch": 0.20576417481025844, "grad_norm": 5.687254428863525, "learning_rate": 9.065985351248576e-06, "loss": 0.2664, "step": 4148 }, { "epoch": 0.20581378044545862, "grad_norm": 20.435779571533203, "learning_rate": 9.065527223366519e-06, "loss": 0.185, "step": 4149 }, { "epoch": 0.20586338608065877, "grad_norm": 6.127744674682617, "learning_rate": 9.065068994738859e-06, "loss": 0.388, "step": 4150 }, { "epoch": 0.20591299171585892, "grad_norm": 3.8522956371307373, "learning_rate": 9.064610665376949e-06, "loss": 0.3411, "step": 4151 }, { "epoch": 0.20596259735105907, "grad_norm": 9.666646957397461, "learning_rate": 9.06415223529215e-06, "loss": 0.3934, "step": 4152 }, { "epoch": 0.20601220298625925, "grad_norm": 10.470085144042969, "learning_rate": 9.06369370449582e-06, "loss": 0.3677, "step": 4153 }, { "epoch": 0.2060618086214594, "grad_norm": 8.307764053344727, "learning_rate": 9.063235072999321e-06, "loss": 0.3259, "step": 4154 }, { "epoch": 0.20611141425665955, "grad_norm": 9.140525817871094, "learning_rate": 9.06277634081402e-06, "loss": 0.3932, "step": 4155 }, { "epoch": 0.20616101989185973, "grad_norm": 7.3000993728637695, "learning_rate": 9.062317507951282e-06, "loss": 0.3441, "step": 4156 }, { "epoch": 0.20621062552705988, "grad_norm": 5.997860431671143, "learning_rate": 9.061858574422478e-06, "loss": 0.2582, "step": 4157 }, { "epoch": 0.20626023116226003, "grad_norm": 6.297177791595459, "learning_rate": 9.061399540238983e-06, "loss": 0.3745, "step": 4158 }, { "epoch": 0.20630983679746018, "grad_norm": 3.8052713871002197, "learning_rate": 9.060940405412169e-06, "loss": 0.3129, "step": 4159 }, { "epoch": 0.20635944243266036, "grad_norm": 6.396929740905762, "learning_rate": 9.060481169953416e-06, "loss": 0.3275, "step": 4160 }, { "epoch": 0.2064090480678605, "grad_norm": 5.157673358917236, "learning_rate": 9.060021833874101e-06, "loss": 0.3217, "step": 4161 }, { "epoch": 0.20645865370306066, "grad_norm": 16.789506912231445, "learning_rate": 9.05956239718561e-06, "loss": 0.3315, "step": 4162 }, { "epoch": 0.20650825933826084, "grad_norm": 14.455942153930664, "learning_rate": 9.059102859899327e-06, "loss": 0.3868, "step": 4163 }, { "epoch": 0.206557864973461, "grad_norm": 5.092593193054199, "learning_rate": 9.058643222026637e-06, "loss": 0.3012, "step": 4164 }, { "epoch": 0.20660747060866114, "grad_norm": 10.60020637512207, "learning_rate": 9.058183483578932e-06, "loss": 0.4283, "step": 4165 }, { "epoch": 0.20665707624386132, "grad_norm": 6.945636749267578, "learning_rate": 9.057723644567606e-06, "loss": 0.3816, "step": 4166 }, { "epoch": 0.20670668187906147, "grad_norm": 5.764324188232422, "learning_rate": 9.057263705004053e-06, "loss": 0.3201, "step": 4167 }, { "epoch": 0.20675628751426162, "grad_norm": 17.998994827270508, "learning_rate": 9.05680366489967e-06, "loss": 0.4399, "step": 4168 }, { "epoch": 0.20680589314946177, "grad_norm": 7.142068862915039, "learning_rate": 9.056343524265856e-06, "loss": 0.3786, "step": 4169 }, { "epoch": 0.20685549878466195, "grad_norm": 8.178522109985352, "learning_rate": 9.055883283114014e-06, "loss": 0.325, "step": 4170 }, { "epoch": 0.2069051044198621, "grad_norm": 6.665626525878906, "learning_rate": 9.055422941455552e-06, "loss": 0.3345, "step": 4171 }, { "epoch": 0.20695471005506225, "grad_norm": 4.707834243774414, "learning_rate": 9.054962499301873e-06, "loss": 0.3043, "step": 4172 }, { "epoch": 0.20700431569026242, "grad_norm": 5.051018714904785, "learning_rate": 9.054501956664388e-06, "loss": 0.3264, "step": 4173 }, { "epoch": 0.20705392132546258, "grad_norm": 8.480809211730957, "learning_rate": 9.054041313554513e-06, "loss": 0.2819, "step": 4174 }, { "epoch": 0.20710352696066273, "grad_norm": 7.015946865081787, "learning_rate": 9.05358056998366e-06, "loss": 0.3633, "step": 4175 }, { "epoch": 0.20715313259586288, "grad_norm": 8.387144088745117, "learning_rate": 9.053119725963247e-06, "loss": 0.2882, "step": 4176 }, { "epoch": 0.20720273823106305, "grad_norm": 4.8686137199401855, "learning_rate": 9.05265878150469e-06, "loss": 0.3507, "step": 4177 }, { "epoch": 0.2072523438662632, "grad_norm": 5.050049304962158, "learning_rate": 9.052197736619419e-06, "loss": 0.3274, "step": 4178 }, { "epoch": 0.20730194950146336, "grad_norm": 12.95349407196045, "learning_rate": 9.051736591318854e-06, "loss": 0.3409, "step": 4179 }, { "epoch": 0.20735155513666353, "grad_norm": 6.158841609954834, "learning_rate": 9.051275345614426e-06, "loss": 0.2898, "step": 4180 }, { "epoch": 0.20740116077186369, "grad_norm": 4.600276470184326, "learning_rate": 9.050813999517559e-06, "loss": 0.3397, "step": 4181 }, { "epoch": 0.20745076640706384, "grad_norm": 7.063106060028076, "learning_rate": 9.05035255303969e-06, "loss": 0.3954, "step": 4182 }, { "epoch": 0.20750037204226401, "grad_norm": 10.206747055053711, "learning_rate": 9.049891006192251e-06, "loss": 0.4537, "step": 4183 }, { "epoch": 0.20754997767746416, "grad_norm": 3.5708303451538086, "learning_rate": 9.049429358986683e-06, "loss": 0.2275, "step": 4184 }, { "epoch": 0.20759958331266432, "grad_norm": 4.145933628082275, "learning_rate": 9.048967611434423e-06, "loss": 0.2885, "step": 4185 }, { "epoch": 0.20764918894786447, "grad_norm": 5.902062892913818, "learning_rate": 9.048505763546914e-06, "loss": 0.3602, "step": 4186 }, { "epoch": 0.20769879458306464, "grad_norm": 8.553346633911133, "learning_rate": 9.0480438153356e-06, "loss": 0.3169, "step": 4187 }, { "epoch": 0.2077484002182648, "grad_norm": 7.719511985778809, "learning_rate": 9.047581766811932e-06, "loss": 0.2906, "step": 4188 }, { "epoch": 0.20779800585346495, "grad_norm": 4.437017917633057, "learning_rate": 9.047119617987355e-06, "loss": 0.366, "step": 4189 }, { "epoch": 0.20784761148866512, "grad_norm": 5.231690406799316, "learning_rate": 9.046657368873321e-06, "loss": 0.2029, "step": 4190 }, { "epoch": 0.20789721712386527, "grad_norm": 8.23279857635498, "learning_rate": 9.04619501948129e-06, "loss": 0.3293, "step": 4191 }, { "epoch": 0.20794682275906543, "grad_norm": 22.960594177246094, "learning_rate": 9.045732569822714e-06, "loss": 0.3592, "step": 4192 }, { "epoch": 0.20799642839426558, "grad_norm": 6.988253593444824, "learning_rate": 9.045270019909056e-06, "loss": 0.3823, "step": 4193 }, { "epoch": 0.20804603402946575, "grad_norm": 6.367435932159424, "learning_rate": 9.044807369751778e-06, "loss": 0.3296, "step": 4194 }, { "epoch": 0.2080956396646659, "grad_norm": 6.545803546905518, "learning_rate": 9.044344619362342e-06, "loss": 0.3468, "step": 4195 }, { "epoch": 0.20814524529986606, "grad_norm": 9.356366157531738, "learning_rate": 9.043881768752218e-06, "loss": 0.3889, "step": 4196 }, { "epoch": 0.20819485093506623, "grad_norm": 5.499203205108643, "learning_rate": 9.043418817932874e-06, "loss": 0.2762, "step": 4197 }, { "epoch": 0.20824445657026638, "grad_norm": 5.834770679473877, "learning_rate": 9.042955766915784e-06, "loss": 0.3659, "step": 4198 }, { "epoch": 0.20829406220546653, "grad_norm": 8.843045234680176, "learning_rate": 9.042492615712418e-06, "loss": 0.3531, "step": 4199 }, { "epoch": 0.2083436678406667, "grad_norm": 9.76370620727539, "learning_rate": 9.04202936433426e-06, "loss": 0.3847, "step": 4200 }, { "epoch": 0.20839327347586686, "grad_norm": 6.0687432289123535, "learning_rate": 9.041566012792785e-06, "loss": 0.338, "step": 4201 }, { "epoch": 0.20844287911106701, "grad_norm": 7.388519763946533, "learning_rate": 9.041102561099472e-06, "loss": 0.2731, "step": 4202 }, { "epoch": 0.20849248474626716, "grad_norm": 5.036025047302246, "learning_rate": 9.040639009265815e-06, "loss": 0.3125, "step": 4203 }, { "epoch": 0.20854209038146734, "grad_norm": 5.759617805480957, "learning_rate": 9.040175357303293e-06, "loss": 0.2936, "step": 4204 }, { "epoch": 0.2085916960166675, "grad_norm": 4.532472133636475, "learning_rate": 9.039711605223399e-06, "loss": 0.302, "step": 4205 }, { "epoch": 0.20864130165186764, "grad_norm": 7.776944637298584, "learning_rate": 9.039247753037623e-06, "loss": 0.4001, "step": 4206 }, { "epoch": 0.20869090728706782, "grad_norm": 6.081018447875977, "learning_rate": 9.03878380075746e-06, "loss": 0.2387, "step": 4207 }, { "epoch": 0.20874051292226797, "grad_norm": 9.187873840332031, "learning_rate": 9.038319748394409e-06, "loss": 0.373, "step": 4208 }, { "epoch": 0.20879011855746812, "grad_norm": 7.283161640167236, "learning_rate": 9.037855595959968e-06, "loss": 0.3436, "step": 4209 }, { "epoch": 0.20883972419266827, "grad_norm": 9.714877128601074, "learning_rate": 9.037391343465637e-06, "loss": 0.4225, "step": 4210 }, { "epoch": 0.20888932982786845, "grad_norm": 4.901986598968506, "learning_rate": 9.036926990922926e-06, "loss": 0.3601, "step": 4211 }, { "epoch": 0.2089389354630686, "grad_norm": 7.981143474578857, "learning_rate": 9.036462538343333e-06, "loss": 0.3404, "step": 4212 }, { "epoch": 0.20898854109826875, "grad_norm": 9.80532169342041, "learning_rate": 9.035997985738377e-06, "loss": 0.3655, "step": 4213 }, { "epoch": 0.20903814673346893, "grad_norm": 11.385917663574219, "learning_rate": 9.035533333119561e-06, "loss": 0.3367, "step": 4214 }, { "epoch": 0.20908775236866908, "grad_norm": 6.755775451660156, "learning_rate": 9.035068580498408e-06, "loss": 0.2713, "step": 4215 }, { "epoch": 0.20913735800386923, "grad_norm": 12.044169425964355, "learning_rate": 9.034603727886426e-06, "loss": 0.4134, "step": 4216 }, { "epoch": 0.2091869636390694, "grad_norm": 4.856890678405762, "learning_rate": 9.034138775295141e-06, "loss": 0.2907, "step": 4217 }, { "epoch": 0.20923656927426956, "grad_norm": 7.017263889312744, "learning_rate": 9.033673722736072e-06, "loss": 0.3568, "step": 4218 }, { "epoch": 0.2092861749094697, "grad_norm": 9.447507858276367, "learning_rate": 9.033208570220744e-06, "loss": 0.3128, "step": 4219 }, { "epoch": 0.20933578054466986, "grad_norm": 10.57365894317627, "learning_rate": 9.032743317760683e-06, "loss": 0.3258, "step": 4220 }, { "epoch": 0.20938538617987004, "grad_norm": 7.532380104064941, "learning_rate": 9.032277965367418e-06, "loss": 0.3794, "step": 4221 }, { "epoch": 0.2094349918150702, "grad_norm": 7.331364154815674, "learning_rate": 9.031812513052481e-06, "loss": 0.3251, "step": 4222 }, { "epoch": 0.20948459745027034, "grad_norm": 7.954070091247559, "learning_rate": 9.031346960827406e-06, "loss": 0.2603, "step": 4223 }, { "epoch": 0.20953420308547052, "grad_norm": 7.31088924407959, "learning_rate": 9.030881308703729e-06, "loss": 0.2827, "step": 4224 }, { "epoch": 0.20958380872067067, "grad_norm": 8.049463272094727, "learning_rate": 9.030415556692992e-06, "loss": 0.3612, "step": 4225 }, { "epoch": 0.20963341435587082, "grad_norm": 7.243463039398193, "learning_rate": 9.029949704806732e-06, "loss": 0.3355, "step": 4226 }, { "epoch": 0.20968301999107097, "grad_norm": 5.331629753112793, "learning_rate": 9.029483753056497e-06, "loss": 0.4296, "step": 4227 }, { "epoch": 0.20973262562627115, "grad_norm": 11.560796737670898, "learning_rate": 9.029017701453831e-06, "loss": 0.2496, "step": 4228 }, { "epoch": 0.2097822312614713, "grad_norm": 8.809276580810547, "learning_rate": 9.028551550010283e-06, "loss": 0.3916, "step": 4229 }, { "epoch": 0.20983183689667145, "grad_norm": 8.901692390441895, "learning_rate": 9.028085298737407e-06, "loss": 0.2361, "step": 4230 }, { "epoch": 0.20988144253187163, "grad_norm": 7.324542999267578, "learning_rate": 9.027618947646755e-06, "loss": 0.3052, "step": 4231 }, { "epoch": 0.20993104816707178, "grad_norm": 8.83244800567627, "learning_rate": 9.027152496749884e-06, "loss": 0.3221, "step": 4232 }, { "epoch": 0.20998065380227193, "grad_norm": 8.8121919631958, "learning_rate": 9.026685946058351e-06, "loss": 0.3428, "step": 4233 }, { "epoch": 0.21003025943747208, "grad_norm": 7.835159778594971, "learning_rate": 9.02621929558372e-06, "loss": 0.3846, "step": 4234 }, { "epoch": 0.21007986507267226, "grad_norm": 29.816295623779297, "learning_rate": 9.025752545337551e-06, "loss": 0.4901, "step": 4235 }, { "epoch": 0.2101294707078724, "grad_norm": 8.440359115600586, "learning_rate": 9.025285695331415e-06, "loss": 0.3616, "step": 4236 }, { "epoch": 0.21017907634307256, "grad_norm": 9.702835083007812, "learning_rate": 9.02481874557688e-06, "loss": 0.3947, "step": 4237 }, { "epoch": 0.21022868197827274, "grad_norm": 5.818923473358154, "learning_rate": 9.024351696085514e-06, "loss": 0.3173, "step": 4238 }, { "epoch": 0.2102782876134729, "grad_norm": 6.662415504455566, "learning_rate": 9.023884546868894e-06, "loss": 0.3276, "step": 4239 }, { "epoch": 0.21032789324867304, "grad_norm": 20.675926208496094, "learning_rate": 9.023417297938593e-06, "loss": 0.5173, "step": 4240 }, { "epoch": 0.21037749888387322, "grad_norm": 11.096102714538574, "learning_rate": 9.022949949306193e-06, "loss": 0.3538, "step": 4241 }, { "epoch": 0.21042710451907337, "grad_norm": 7.048398017883301, "learning_rate": 9.022482500983272e-06, "loss": 0.3114, "step": 4242 }, { "epoch": 0.21047671015427352, "grad_norm": 8.47322940826416, "learning_rate": 9.022014952981415e-06, "loss": 0.3454, "step": 4243 }, { "epoch": 0.21052631578947367, "grad_norm": 4.38675594329834, "learning_rate": 9.021547305312208e-06, "loss": 0.371, "step": 4244 }, { "epoch": 0.21057592142467385, "grad_norm": 9.585238456726074, "learning_rate": 9.021079557987242e-06, "loss": 0.4522, "step": 4245 }, { "epoch": 0.210625527059874, "grad_norm": 4.242496490478516, "learning_rate": 9.020611711018104e-06, "loss": 0.3674, "step": 4246 }, { "epoch": 0.21067513269507415, "grad_norm": 7.761110782623291, "learning_rate": 9.020143764416389e-06, "loss": 0.2991, "step": 4247 }, { "epoch": 0.21072473833027433, "grad_norm": 8.837409973144531, "learning_rate": 9.019675718193692e-06, "loss": 0.3607, "step": 4248 }, { "epoch": 0.21077434396547448, "grad_norm": 6.373970031738281, "learning_rate": 9.019207572361614e-06, "loss": 0.3228, "step": 4249 }, { "epoch": 0.21082394960067463, "grad_norm": 9.882707595825195, "learning_rate": 9.018739326931753e-06, "loss": 0.3765, "step": 4250 }, { "epoch": 0.21087355523587478, "grad_norm": 7.439492225646973, "learning_rate": 9.018270981915715e-06, "loss": 0.2614, "step": 4251 }, { "epoch": 0.21092316087107496, "grad_norm": 6.167928695678711, "learning_rate": 9.017802537325104e-06, "loss": 0.3298, "step": 4252 }, { "epoch": 0.2109727665062751, "grad_norm": 5.064970016479492, "learning_rate": 9.017333993171528e-06, "loss": 0.2806, "step": 4253 }, { "epoch": 0.21102237214147526, "grad_norm": 5.635116100311279, "learning_rate": 9.0168653494666e-06, "loss": 0.3266, "step": 4254 }, { "epoch": 0.21107197777667544, "grad_norm": 5.281791687011719, "learning_rate": 9.016396606221929e-06, "loss": 0.3275, "step": 4255 }, { "epoch": 0.2111215834118756, "grad_norm": 13.659224510192871, "learning_rate": 9.015927763449135e-06, "loss": 0.346, "step": 4256 }, { "epoch": 0.21117118904707574, "grad_norm": 22.42197036743164, "learning_rate": 9.015458821159833e-06, "loss": 0.254, "step": 4257 }, { "epoch": 0.21122079468227592, "grad_norm": 4.849052906036377, "learning_rate": 9.014989779365646e-06, "loss": 0.2734, "step": 4258 }, { "epoch": 0.21127040031747607, "grad_norm": 7.972314834594727, "learning_rate": 9.014520638078198e-06, "loss": 0.3098, "step": 4259 }, { "epoch": 0.21132000595267622, "grad_norm": 7.8527421951293945, "learning_rate": 9.01405139730911e-06, "loss": 0.3416, "step": 4260 }, { "epoch": 0.21136961158787637, "grad_norm": 5.013093948364258, "learning_rate": 9.013582057070014e-06, "loss": 0.2703, "step": 4261 }, { "epoch": 0.21141921722307655, "grad_norm": 6.859163761138916, "learning_rate": 9.013112617372538e-06, "loss": 0.3608, "step": 4262 }, { "epoch": 0.2114688228582767, "grad_norm": 4.013072967529297, "learning_rate": 9.012643078228316e-06, "loss": 0.3132, "step": 4263 }, { "epoch": 0.21151842849347685, "grad_norm": 7.94273042678833, "learning_rate": 9.012173439648985e-06, "loss": 0.2876, "step": 4264 }, { "epoch": 0.21156803412867703, "grad_norm": 5.534153938293457, "learning_rate": 9.01170370164618e-06, "loss": 0.2917, "step": 4265 }, { "epoch": 0.21161763976387718, "grad_norm": 6.3078179359436035, "learning_rate": 9.011233864231543e-06, "loss": 0.3935, "step": 4266 }, { "epoch": 0.21166724539907733, "grad_norm": 10.82219409942627, "learning_rate": 9.010763927416715e-06, "loss": 0.3395, "step": 4267 }, { "epoch": 0.21171685103427748, "grad_norm": 9.776633262634277, "learning_rate": 9.010293891213344e-06, "loss": 0.4365, "step": 4268 }, { "epoch": 0.21176645666947766, "grad_norm": 4.972217082977295, "learning_rate": 9.009823755633077e-06, "loss": 0.2062, "step": 4269 }, { "epoch": 0.2118160623046778, "grad_norm": 6.693033218383789, "learning_rate": 9.009353520687563e-06, "loss": 0.3238, "step": 4270 }, { "epoch": 0.21186566793987796, "grad_norm": 7.80200719833374, "learning_rate": 9.008883186388454e-06, "loss": 0.2183, "step": 4271 }, { "epoch": 0.21191527357507814, "grad_norm": 5.623772144317627, "learning_rate": 9.008412752747407e-06, "loss": 0.3524, "step": 4272 }, { "epoch": 0.2119648792102783, "grad_norm": 7.569887161254883, "learning_rate": 9.007942219776078e-06, "loss": 0.3502, "step": 4273 }, { "epoch": 0.21201448484547844, "grad_norm": 13.34516429901123, "learning_rate": 9.00747158748613e-06, "loss": 0.3868, "step": 4274 }, { "epoch": 0.21206409048067862, "grad_norm": 6.510983943939209, "learning_rate": 9.00700085588922e-06, "loss": 0.196, "step": 4275 }, { "epoch": 0.21211369611587877, "grad_norm": 14.511094093322754, "learning_rate": 9.00653002499702e-06, "loss": 0.466, "step": 4276 }, { "epoch": 0.21216330175107892, "grad_norm": 7.479142665863037, "learning_rate": 9.006059094821191e-06, "loss": 0.2817, "step": 4277 }, { "epoch": 0.21221290738627907, "grad_norm": 7.946821689605713, "learning_rate": 9.005588065373408e-06, "loss": 0.3234, "step": 4278 }, { "epoch": 0.21226251302147925, "grad_norm": 10.689533233642578, "learning_rate": 9.005116936665337e-06, "loss": 0.3749, "step": 4279 }, { "epoch": 0.2123121186566794, "grad_norm": 38.992313385009766, "learning_rate": 9.004645708708659e-06, "loss": 0.3888, "step": 4280 }, { "epoch": 0.21236172429187955, "grad_norm": 9.675858497619629, "learning_rate": 9.004174381515049e-06, "loss": 0.4241, "step": 4281 }, { "epoch": 0.21241132992707973, "grad_norm": 10.272503852844238, "learning_rate": 9.003702955096185e-06, "loss": 0.4131, "step": 4282 }, { "epoch": 0.21246093556227988, "grad_norm": 7.29622220993042, "learning_rate": 9.003231429463753e-06, "loss": 0.2376, "step": 4283 }, { "epoch": 0.21251054119748003, "grad_norm": 17.377782821655273, "learning_rate": 9.002759804629433e-06, "loss": 0.465, "step": 4284 }, { "epoch": 0.21256014683268018, "grad_norm": 6.905088424682617, "learning_rate": 9.002288080604915e-06, "loss": 0.3835, "step": 4285 }, { "epoch": 0.21260975246788036, "grad_norm": 6.798495292663574, "learning_rate": 9.001816257401889e-06, "loss": 0.387, "step": 4286 }, { "epoch": 0.2126593581030805, "grad_norm": 7.695557594299316, "learning_rate": 9.001344335032046e-06, "loss": 0.2593, "step": 4287 }, { "epoch": 0.21270896373828066, "grad_norm": 20.17243766784668, "learning_rate": 9.00087231350708e-06, "loss": 0.4832, "step": 4288 }, { "epoch": 0.21275856937348084, "grad_norm": 4.269257068634033, "learning_rate": 9.000400192838686e-06, "loss": 0.2516, "step": 4289 }, { "epoch": 0.212808175008681, "grad_norm": 10.142280578613281, "learning_rate": 8.999927973038569e-06, "loss": 0.2538, "step": 4290 }, { "epoch": 0.21285778064388114, "grad_norm": 9.352755546569824, "learning_rate": 8.999455654118425e-06, "loss": 0.3052, "step": 4291 }, { "epoch": 0.2129073862790813, "grad_norm": 5.15044641494751, "learning_rate": 8.998983236089963e-06, "loss": 0.3651, "step": 4292 }, { "epoch": 0.21295699191428147, "grad_norm": 11.387003898620605, "learning_rate": 8.998510718964885e-06, "loss": 0.4064, "step": 4293 }, { "epoch": 0.21300659754948162, "grad_norm": 5.808084487915039, "learning_rate": 8.998038102754905e-06, "loss": 0.3176, "step": 4294 }, { "epoch": 0.21305620318468177, "grad_norm": 6.981192588806152, "learning_rate": 8.997565387471731e-06, "loss": 0.2975, "step": 4295 }, { "epoch": 0.21310580881988195, "grad_norm": 6.968095302581787, "learning_rate": 8.997092573127079e-06, "loss": 0.3887, "step": 4296 }, { "epoch": 0.2131554144550821, "grad_norm": 8.555678367614746, "learning_rate": 8.996619659732663e-06, "loss": 0.3401, "step": 4297 }, { "epoch": 0.21320502009028225, "grad_norm": 11.315071105957031, "learning_rate": 8.996146647300206e-06, "loss": 0.3533, "step": 4298 }, { "epoch": 0.21325462572548243, "grad_norm": 5.771987438201904, "learning_rate": 8.995673535841427e-06, "loss": 0.3394, "step": 4299 }, { "epoch": 0.21330423136068258, "grad_norm": 4.522816181182861, "learning_rate": 8.99520032536805e-06, "loss": 0.2474, "step": 4300 }, { "epoch": 0.21335383699588273, "grad_norm": 8.624652862548828, "learning_rate": 8.994727015891803e-06, "loss": 0.4201, "step": 4301 }, { "epoch": 0.21340344263108288, "grad_norm": 7.47108793258667, "learning_rate": 8.994253607424412e-06, "loss": 0.2799, "step": 4302 }, { "epoch": 0.21345304826628306, "grad_norm": 3.4712061882019043, "learning_rate": 8.993780099977612e-06, "loss": 0.2312, "step": 4303 }, { "epoch": 0.2135026539014832, "grad_norm": 4.488366603851318, "learning_rate": 8.993306493563133e-06, "loss": 0.3025, "step": 4304 }, { "epoch": 0.21355225953668336, "grad_norm": 11.717986106872559, "learning_rate": 8.992832788192711e-06, "loss": 0.4024, "step": 4305 }, { "epoch": 0.21360186517188354, "grad_norm": 6.424246311187744, "learning_rate": 8.992358983878088e-06, "loss": 0.2731, "step": 4306 }, { "epoch": 0.2136514708070837, "grad_norm": 5.031726837158203, "learning_rate": 8.991885080631004e-06, "loss": 0.3675, "step": 4307 }, { "epoch": 0.21370107644228384, "grad_norm": 4.354146957397461, "learning_rate": 8.991411078463201e-06, "loss": 0.2991, "step": 4308 }, { "epoch": 0.213750682077484, "grad_norm": 17.59326171875, "learning_rate": 8.990936977386426e-06, "loss": 0.3474, "step": 4309 }, { "epoch": 0.21380028771268417, "grad_norm": 5.4289937019348145, "learning_rate": 8.990462777412427e-06, "loss": 0.3004, "step": 4310 }, { "epoch": 0.21384989334788432, "grad_norm": 7.413381099700928, "learning_rate": 8.989988478552957e-06, "loss": 0.4033, "step": 4311 }, { "epoch": 0.21389949898308447, "grad_norm": 3.1587259769439697, "learning_rate": 8.989514080819766e-06, "loss": 0.2222, "step": 4312 }, { "epoch": 0.21394910461828465, "grad_norm": 9.840120315551758, "learning_rate": 8.989039584224612e-06, "loss": 0.3975, "step": 4313 }, { "epoch": 0.2139987102534848, "grad_norm": 11.624733924865723, "learning_rate": 8.988564988779254e-06, "loss": 0.342, "step": 4314 }, { "epoch": 0.21404831588868495, "grad_norm": 9.239142417907715, "learning_rate": 8.98809029449545e-06, "loss": 0.4027, "step": 4315 }, { "epoch": 0.21409792152388513, "grad_norm": 11.006860733032227, "learning_rate": 8.987615501384964e-06, "loss": 0.448, "step": 4316 }, { "epoch": 0.21414752715908528, "grad_norm": 6.879716873168945, "learning_rate": 8.987140609459562e-06, "loss": 0.3369, "step": 4317 }, { "epoch": 0.21419713279428543, "grad_norm": 14.66848087310791, "learning_rate": 8.986665618731014e-06, "loss": 0.4913, "step": 4318 }, { "epoch": 0.21424673842948558, "grad_norm": 12.354828834533691, "learning_rate": 8.986190529211088e-06, "loss": 0.2895, "step": 4319 }, { "epoch": 0.21429634406468576, "grad_norm": 5.63767671585083, "learning_rate": 8.985715340911556e-06, "loss": 0.3077, "step": 4320 }, { "epoch": 0.2143459496998859, "grad_norm": 10.34127426147461, "learning_rate": 8.985240053844195e-06, "loss": 0.3404, "step": 4321 }, { "epoch": 0.21439555533508606, "grad_norm": 9.092305183410645, "learning_rate": 8.984764668020785e-06, "loss": 0.2871, "step": 4322 }, { "epoch": 0.21444516097028624, "grad_norm": 3.0785117149353027, "learning_rate": 8.984289183453102e-06, "loss": 0.3298, "step": 4323 }, { "epoch": 0.2144947666054864, "grad_norm": 11.589842796325684, "learning_rate": 8.98381360015293e-06, "loss": 0.2821, "step": 4324 }, { "epoch": 0.21454437224068654, "grad_norm": 8.123591423034668, "learning_rate": 8.983337918132058e-06, "loss": 0.2379, "step": 4325 }, { "epoch": 0.2145939778758867, "grad_norm": 17.14373779296875, "learning_rate": 8.982862137402269e-06, "loss": 0.5339, "step": 4326 }, { "epoch": 0.21464358351108687, "grad_norm": 8.632067680358887, "learning_rate": 8.982386257975354e-06, "loss": 0.3701, "step": 4327 }, { "epoch": 0.21469318914628702, "grad_norm": 8.124970436096191, "learning_rate": 8.981910279863106e-06, "loss": 0.3852, "step": 4328 }, { "epoch": 0.21474279478148717, "grad_norm": 8.514897346496582, "learning_rate": 8.981434203077321e-06, "loss": 0.3097, "step": 4329 }, { "epoch": 0.21479240041668735, "grad_norm": 7.594711780548096, "learning_rate": 8.980958027629796e-06, "loss": 0.2547, "step": 4330 }, { "epoch": 0.2148420060518875, "grad_norm": 9.436511993408203, "learning_rate": 8.980481753532329e-06, "loss": 0.3023, "step": 4331 }, { "epoch": 0.21489161168708765, "grad_norm": 12.648492813110352, "learning_rate": 8.980005380796723e-06, "loss": 0.3628, "step": 4332 }, { "epoch": 0.21494121732228783, "grad_norm": 3.9866156578063965, "learning_rate": 8.979528909434785e-06, "loss": 0.253, "step": 4333 }, { "epoch": 0.21499082295748798, "grad_norm": 6.736687660217285, "learning_rate": 8.97905233945832e-06, "loss": 0.2112, "step": 4334 }, { "epoch": 0.21504042859268813, "grad_norm": 6.368419647216797, "learning_rate": 8.978575670879138e-06, "loss": 0.3589, "step": 4335 }, { "epoch": 0.21509003422788828, "grad_norm": 19.468780517578125, "learning_rate": 8.97809890370905e-06, "loss": 0.4887, "step": 4336 }, { "epoch": 0.21513963986308846, "grad_norm": 5.16759729385376, "learning_rate": 8.977622037959872e-06, "loss": 0.3329, "step": 4337 }, { "epoch": 0.2151892454982886, "grad_norm": 8.169331550598145, "learning_rate": 8.977145073643422e-06, "loss": 0.3402, "step": 4338 }, { "epoch": 0.21523885113348876, "grad_norm": 8.9488525390625, "learning_rate": 8.976668010771517e-06, "loss": 0.2863, "step": 4339 }, { "epoch": 0.21528845676868893, "grad_norm": 8.762202262878418, "learning_rate": 8.97619084935598e-06, "loss": 0.4012, "step": 4340 }, { "epoch": 0.21533806240388909, "grad_norm": 6.637516975402832, "learning_rate": 8.975713589408634e-06, "loss": 0.2454, "step": 4341 }, { "epoch": 0.21538766803908924, "grad_norm": 8.244840621948242, "learning_rate": 8.975236230941309e-06, "loss": 0.3982, "step": 4342 }, { "epoch": 0.2154372736742894, "grad_norm": 4.393795013427734, "learning_rate": 8.974758773965831e-06, "loss": 0.2272, "step": 4343 }, { "epoch": 0.21548687930948957, "grad_norm": 13.982834815979004, "learning_rate": 8.974281218494034e-06, "loss": 0.2966, "step": 4344 }, { "epoch": 0.21553648494468972, "grad_norm": 5.5566277503967285, "learning_rate": 8.97380356453775e-06, "loss": 0.3606, "step": 4345 }, { "epoch": 0.21558609057988987, "grad_norm": 8.152959823608398, "learning_rate": 8.973325812108814e-06, "loss": 0.4268, "step": 4346 }, { "epoch": 0.21563569621509004, "grad_norm": 7.34717321395874, "learning_rate": 8.972847961219069e-06, "loss": 0.2832, "step": 4347 }, { "epoch": 0.2156853018502902, "grad_norm": 49.34778594970703, "learning_rate": 8.972370011880354e-06, "loss": 0.3329, "step": 4348 }, { "epoch": 0.21573490748549035, "grad_norm": 6.295292854309082, "learning_rate": 8.971891964104513e-06, "loss": 0.2693, "step": 4349 }, { "epoch": 0.21578451312069052, "grad_norm": 12.192811965942383, "learning_rate": 8.971413817903392e-06, "loss": 0.4152, "step": 4350 }, { "epoch": 0.21583411875589067, "grad_norm": 5.421932697296143, "learning_rate": 8.970935573288841e-06, "loss": 0.2164, "step": 4351 }, { "epoch": 0.21588372439109083, "grad_norm": 5.967931270599365, "learning_rate": 8.97045723027271e-06, "loss": 0.2278, "step": 4352 }, { "epoch": 0.21593333002629098, "grad_norm": 7.49965763092041, "learning_rate": 8.969978788866853e-06, "loss": 0.2553, "step": 4353 }, { "epoch": 0.21598293566149115, "grad_norm": 10.7647123336792, "learning_rate": 8.969500249083126e-06, "loss": 0.4289, "step": 4354 }, { "epoch": 0.2160325412966913, "grad_norm": 7.359901428222656, "learning_rate": 8.969021610933387e-06, "loss": 0.3116, "step": 4355 }, { "epoch": 0.21608214693189146, "grad_norm": 6.823284149169922, "learning_rate": 8.968542874429495e-06, "loss": 0.3563, "step": 4356 }, { "epoch": 0.21613175256709163, "grad_norm": 12.628748893737793, "learning_rate": 8.968064039583318e-06, "loss": 0.4963, "step": 4357 }, { "epoch": 0.21618135820229178, "grad_norm": 8.689424514770508, "learning_rate": 8.967585106406718e-06, "loss": 0.2677, "step": 4358 }, { "epoch": 0.21623096383749194, "grad_norm": 7.122835636138916, "learning_rate": 8.967106074911564e-06, "loss": 0.2956, "step": 4359 }, { "epoch": 0.21628056947269209, "grad_norm": 7.530995845794678, "learning_rate": 8.96662694510973e-06, "loss": 0.2655, "step": 4360 }, { "epoch": 0.21633017510789226, "grad_norm": 16.06205940246582, "learning_rate": 8.966147717013082e-06, "loss": 0.3851, "step": 4361 }, { "epoch": 0.21637978074309241, "grad_norm": 9.982095718383789, "learning_rate": 8.965668390633501e-06, "loss": 0.3816, "step": 4362 }, { "epoch": 0.21642938637829257, "grad_norm": 7.434168815612793, "learning_rate": 8.965188965982866e-06, "loss": 0.2994, "step": 4363 }, { "epoch": 0.21647899201349274, "grad_norm": 4.740592956542969, "learning_rate": 8.96470944307305e-06, "loss": 0.2828, "step": 4364 }, { "epoch": 0.2165285976486929, "grad_norm": 6.057357311248779, "learning_rate": 8.964229821915944e-06, "loss": 0.2888, "step": 4365 }, { "epoch": 0.21657820328389304, "grad_norm": 5.243924140930176, "learning_rate": 8.963750102523428e-06, "loss": 0.2422, "step": 4366 }, { "epoch": 0.2166278089190932, "grad_norm": 11.783563613891602, "learning_rate": 8.963270284907393e-06, "loss": 0.2418, "step": 4367 }, { "epoch": 0.21667741455429337, "grad_norm": 9.594483375549316, "learning_rate": 8.962790369079725e-06, "loss": 0.3371, "step": 4368 }, { "epoch": 0.21672702018949352, "grad_norm": 10.65664005279541, "learning_rate": 8.962310355052321e-06, "loss": 0.3076, "step": 4369 }, { "epoch": 0.21677662582469368, "grad_norm": 8.048819541931152, "learning_rate": 8.961830242837075e-06, "loss": 0.4045, "step": 4370 }, { "epoch": 0.21682623145989385, "grad_norm": 5.188067436218262, "learning_rate": 8.961350032445882e-06, "loss": 0.1595, "step": 4371 }, { "epoch": 0.216875837095094, "grad_norm": 10.51353645324707, "learning_rate": 8.960869723890645e-06, "loss": 0.4067, "step": 4372 }, { "epoch": 0.21692544273029415, "grad_norm": 13.125699043273926, "learning_rate": 8.960389317183262e-06, "loss": 0.3737, "step": 4373 }, { "epoch": 0.21697504836549433, "grad_norm": 8.729803085327148, "learning_rate": 8.95990881233564e-06, "loss": 0.3096, "step": 4374 }, { "epoch": 0.21702465400069448, "grad_norm": 10.107522010803223, "learning_rate": 8.959428209359688e-06, "loss": 0.3664, "step": 4375 }, { "epoch": 0.21707425963589463, "grad_norm": 6.4606404304504395, "learning_rate": 8.958947508267314e-06, "loss": 0.21, "step": 4376 }, { "epoch": 0.21712386527109478, "grad_norm": 6.413834571838379, "learning_rate": 8.958466709070431e-06, "loss": 0.2978, "step": 4377 }, { "epoch": 0.21717347090629496, "grad_norm": 14.803675651550293, "learning_rate": 8.95798581178095e-06, "loss": 0.3898, "step": 4378 }, { "epoch": 0.2172230765414951, "grad_norm": 50.43474578857422, "learning_rate": 8.957504816410791e-06, "loss": 0.5076, "step": 4379 }, { "epoch": 0.21727268217669526, "grad_norm": 10.608820915222168, "learning_rate": 8.957023722971874e-06, "loss": 0.4123, "step": 4380 }, { "epoch": 0.21732228781189544, "grad_norm": 9.70102596282959, "learning_rate": 8.956542531476118e-06, "loss": 0.3804, "step": 4381 }, { "epoch": 0.2173718934470956, "grad_norm": 4.892563819885254, "learning_rate": 8.956061241935448e-06, "loss": 0.2851, "step": 4382 }, { "epoch": 0.21742149908229574, "grad_norm": 6.976910591125488, "learning_rate": 8.955579854361792e-06, "loss": 0.236, "step": 4383 }, { "epoch": 0.2174711047174959, "grad_norm": 10.870915412902832, "learning_rate": 8.955098368767076e-06, "loss": 0.3734, "step": 4384 }, { "epoch": 0.21752071035269607, "grad_norm": 4.8938517570495605, "learning_rate": 8.954616785163234e-06, "loss": 0.3295, "step": 4385 }, { "epoch": 0.21757031598789622, "grad_norm": 12.093793869018555, "learning_rate": 8.954135103562199e-06, "loss": 0.4673, "step": 4386 }, { "epoch": 0.21761992162309637, "grad_norm": 6.302145004272461, "learning_rate": 8.953653323975908e-06, "loss": 0.3934, "step": 4387 }, { "epoch": 0.21766952725829655, "grad_norm": 5.2452006340026855, "learning_rate": 8.9531714464163e-06, "loss": 0.2346, "step": 4388 }, { "epoch": 0.2177191328934967, "grad_norm": 5.198238372802734, "learning_rate": 8.952689470895314e-06, "loss": 0.2516, "step": 4389 }, { "epoch": 0.21776873852869685, "grad_norm": 7.780845642089844, "learning_rate": 8.952207397424894e-06, "loss": 0.3911, "step": 4390 }, { "epoch": 0.21781834416389703, "grad_norm": 5.642891883850098, "learning_rate": 8.951725226016989e-06, "loss": 0.3702, "step": 4391 }, { "epoch": 0.21786794979909718, "grad_norm": 19.829702377319336, "learning_rate": 8.951242956683546e-06, "loss": 0.4322, "step": 4392 }, { "epoch": 0.21791755543429733, "grad_norm": 8.017038345336914, "learning_rate": 8.950760589436514e-06, "loss": 0.3647, "step": 4393 }, { "epoch": 0.21796716106949748, "grad_norm": 8.385381698608398, "learning_rate": 8.950278124287846e-06, "loss": 0.3553, "step": 4394 }, { "epoch": 0.21801676670469766, "grad_norm": 5.952361583709717, "learning_rate": 8.949795561249501e-06, "loss": 0.3202, "step": 4395 }, { "epoch": 0.2180663723398978, "grad_norm": 8.45097541809082, "learning_rate": 8.949312900333436e-06, "loss": 0.1736, "step": 4396 }, { "epoch": 0.21811597797509796, "grad_norm": 13.041887283325195, "learning_rate": 8.948830141551608e-06, "loss": 0.2946, "step": 4397 }, { "epoch": 0.21816558361029814, "grad_norm": 10.992547035217285, "learning_rate": 8.948347284915985e-06, "loss": 0.3845, "step": 4398 }, { "epoch": 0.2182151892454983, "grad_norm": 5.717249393463135, "learning_rate": 8.94786433043853e-06, "loss": 0.3219, "step": 4399 }, { "epoch": 0.21826479488069844, "grad_norm": 3.9597601890563965, "learning_rate": 8.947381278131211e-06, "loss": 0.1989, "step": 4400 }, { "epoch": 0.2183144005158986, "grad_norm": 4.3661603927612305, "learning_rate": 8.946898128006e-06, "loss": 0.3525, "step": 4401 }, { "epoch": 0.21836400615109877, "grad_norm": 6.6478657722473145, "learning_rate": 8.946414880074864e-06, "loss": 0.3186, "step": 4402 }, { "epoch": 0.21841361178629892, "grad_norm": 5.623530864715576, "learning_rate": 8.945931534349784e-06, "loss": 0.3592, "step": 4403 }, { "epoch": 0.21846321742149907, "grad_norm": 7.565740585327148, "learning_rate": 8.945448090842737e-06, "loss": 0.3781, "step": 4404 }, { "epoch": 0.21851282305669925, "grad_norm": 4.240115165710449, "learning_rate": 8.9449645495657e-06, "loss": 0.2717, "step": 4405 }, { "epoch": 0.2185624286918994, "grad_norm": 7.595535755157471, "learning_rate": 8.944480910530658e-06, "loss": 0.2573, "step": 4406 }, { "epoch": 0.21861203432709955, "grad_norm": 8.230480194091797, "learning_rate": 8.943997173749593e-06, "loss": 0.2573, "step": 4407 }, { "epoch": 0.21866163996229973, "grad_norm": 13.614282608032227, "learning_rate": 8.943513339234495e-06, "loss": 0.3117, "step": 4408 }, { "epoch": 0.21871124559749988, "grad_norm": 6.81828498840332, "learning_rate": 8.943029406997354e-06, "loss": 0.3182, "step": 4409 }, { "epoch": 0.21876085123270003, "grad_norm": 7.631438255310059, "learning_rate": 8.94254537705016e-06, "loss": 0.3281, "step": 4410 }, { "epoch": 0.21881045686790018, "grad_norm": 10.345833778381348, "learning_rate": 8.942061249404908e-06, "loss": 0.3568, "step": 4411 }, { "epoch": 0.21886006250310036, "grad_norm": 5.73184871673584, "learning_rate": 8.941577024073593e-06, "loss": 0.2861, "step": 4412 }, { "epoch": 0.2189096681383005, "grad_norm": 9.6812105178833, "learning_rate": 8.94109270106822e-06, "loss": 0.3212, "step": 4413 }, { "epoch": 0.21895927377350066, "grad_norm": 6.352288722991943, "learning_rate": 8.940608280400785e-06, "loss": 0.2652, "step": 4414 }, { "epoch": 0.21900887940870084, "grad_norm": 9.817475318908691, "learning_rate": 8.940123762083294e-06, "loss": 0.3372, "step": 4415 }, { "epoch": 0.219058485043901, "grad_norm": 7.900227069854736, "learning_rate": 8.939639146127754e-06, "loss": 0.3061, "step": 4416 }, { "epoch": 0.21910809067910114, "grad_norm": 5.259790420532227, "learning_rate": 8.939154432546173e-06, "loss": 0.2991, "step": 4417 }, { "epoch": 0.2191576963143013, "grad_norm": 6.445072174072266, "learning_rate": 8.938669621350566e-06, "loss": 0.3283, "step": 4418 }, { "epoch": 0.21920730194950147, "grad_norm": 8.090020179748535, "learning_rate": 8.938184712552941e-06, "loss": 0.1809, "step": 4419 }, { "epoch": 0.21925690758470162, "grad_norm": 9.252503395080566, "learning_rate": 8.93769970616532e-06, "loss": 0.4066, "step": 4420 }, { "epoch": 0.21930651321990177, "grad_norm": 10.90070915222168, "learning_rate": 8.937214602199717e-06, "loss": 0.385, "step": 4421 }, { "epoch": 0.21935611885510195, "grad_norm": 8.951013565063477, "learning_rate": 8.936729400668158e-06, "loss": 0.3232, "step": 4422 }, { "epoch": 0.2194057244903021, "grad_norm": 5.1052985191345215, "learning_rate": 8.93624410158266e-06, "loss": 0.3328, "step": 4423 }, { "epoch": 0.21945533012550225, "grad_norm": 13.123587608337402, "learning_rate": 8.935758704955256e-06, "loss": 0.4034, "step": 4424 }, { "epoch": 0.2195049357607024, "grad_norm": 7.022109508514404, "learning_rate": 8.93527321079797e-06, "loss": 0.3896, "step": 4425 }, { "epoch": 0.21955454139590258, "grad_norm": 4.677964687347412, "learning_rate": 8.934787619122833e-06, "loss": 0.2923, "step": 4426 }, { "epoch": 0.21960414703110273, "grad_norm": 10.68932056427002, "learning_rate": 8.934301929941877e-06, "loss": 0.3389, "step": 4427 }, { "epoch": 0.21965375266630288, "grad_norm": 4.614721298217773, "learning_rate": 8.933816143267144e-06, "loss": 0.2583, "step": 4428 }, { "epoch": 0.21970335830150306, "grad_norm": 5.085387706756592, "learning_rate": 8.933330259110663e-06, "loss": 0.2749, "step": 4429 }, { "epoch": 0.2197529639367032, "grad_norm": 16.87224006652832, "learning_rate": 8.932844277484482e-06, "loss": 0.3723, "step": 4430 }, { "epoch": 0.21980256957190336, "grad_norm": 6.613677501678467, "learning_rate": 8.932358198400639e-06, "loss": 0.3513, "step": 4431 }, { "epoch": 0.21985217520710354, "grad_norm": 7.0493364334106445, "learning_rate": 8.931872021871184e-06, "loss": 0.3814, "step": 4432 }, { "epoch": 0.2199017808423037, "grad_norm": 8.599268913269043, "learning_rate": 8.931385747908159e-06, "loss": 0.3806, "step": 4433 }, { "epoch": 0.21995138647750384, "grad_norm": 4.753684043884277, "learning_rate": 8.930899376523616e-06, "loss": 0.1334, "step": 4434 }, { "epoch": 0.220000992112704, "grad_norm": 9.661500930786133, "learning_rate": 8.930412907729612e-06, "loss": 0.3628, "step": 4435 }, { "epoch": 0.22005059774790417, "grad_norm": 5.155233383178711, "learning_rate": 8.929926341538194e-06, "loss": 0.3535, "step": 4436 }, { "epoch": 0.22010020338310432, "grad_norm": 4.600677490234375, "learning_rate": 8.929439677961427e-06, "loss": 0.2457, "step": 4437 }, { "epoch": 0.22014980901830447, "grad_norm": 6.681911945343018, "learning_rate": 8.928952917011366e-06, "loss": 0.3601, "step": 4438 }, { "epoch": 0.22019941465350465, "grad_norm": 7.717925071716309, "learning_rate": 8.928466058700074e-06, "loss": 0.3738, "step": 4439 }, { "epoch": 0.2202490202887048, "grad_norm": 8.402155876159668, "learning_rate": 8.927979103039616e-06, "loss": 0.285, "step": 4440 }, { "epoch": 0.22029862592390495, "grad_norm": 5.1715240478515625, "learning_rate": 8.92749205004206e-06, "loss": 0.2717, "step": 4441 }, { "epoch": 0.2203482315591051, "grad_norm": 9.836599349975586, "learning_rate": 8.927004899719473e-06, "loss": 0.4066, "step": 4442 }, { "epoch": 0.22039783719430528, "grad_norm": 3.4439313411712646, "learning_rate": 8.926517652083928e-06, "loss": 0.2607, "step": 4443 }, { "epoch": 0.22044744282950543, "grad_norm": 7.011397838592529, "learning_rate": 8.9260303071475e-06, "loss": 0.2324, "step": 4444 }, { "epoch": 0.22049704846470558, "grad_norm": 9.327055931091309, "learning_rate": 8.925542864922267e-06, "loss": 0.3361, "step": 4445 }, { "epoch": 0.22054665409990576, "grad_norm": 9.932021141052246, "learning_rate": 8.925055325420304e-06, "loss": 0.4052, "step": 4446 }, { "epoch": 0.2205962597351059, "grad_norm": 9.728282928466797, "learning_rate": 8.924567688653694e-06, "loss": 0.4449, "step": 4447 }, { "epoch": 0.22064586537030606, "grad_norm": 5.959722995758057, "learning_rate": 8.924079954634523e-06, "loss": 0.2333, "step": 4448 }, { "epoch": 0.22069547100550624, "grad_norm": 5.163544178009033, "learning_rate": 8.923592123374876e-06, "loss": 0.3013, "step": 4449 }, { "epoch": 0.2207450766407064, "grad_norm": 4.3792195320129395, "learning_rate": 8.923104194886838e-06, "loss": 0.2016, "step": 4450 }, { "epoch": 0.22079468227590654, "grad_norm": 6.487570762634277, "learning_rate": 8.922616169182507e-06, "loss": 0.3222, "step": 4451 }, { "epoch": 0.2208442879111067, "grad_norm": 4.841893672943115, "learning_rate": 8.922128046273973e-06, "loss": 0.2496, "step": 4452 }, { "epoch": 0.22089389354630687, "grad_norm": 8.191276550292969, "learning_rate": 8.921639826173329e-06, "loss": 0.4558, "step": 4453 }, { "epoch": 0.22094349918150702, "grad_norm": 7.56160831451416, "learning_rate": 8.921151508892678e-06, "loss": 0.1973, "step": 4454 }, { "epoch": 0.22099310481670717, "grad_norm": 11.733478546142578, "learning_rate": 8.920663094444119e-06, "loss": 0.3115, "step": 4455 }, { "epoch": 0.22104271045190735, "grad_norm": 7.820400714874268, "learning_rate": 8.920174582839754e-06, "loss": 0.2464, "step": 4456 }, { "epoch": 0.2210923160871075, "grad_norm": 15.303228378295898, "learning_rate": 8.91968597409169e-06, "loss": 0.5708, "step": 4457 }, { "epoch": 0.22114192172230765, "grad_norm": 6.8523688316345215, "learning_rate": 8.919197268212035e-06, "loss": 0.3081, "step": 4458 }, { "epoch": 0.2211915273575078, "grad_norm": 6.101567268371582, "learning_rate": 8.9187084652129e-06, "loss": 0.1569, "step": 4459 }, { "epoch": 0.22124113299270798, "grad_norm": 7.612792015075684, "learning_rate": 8.918219565106393e-06, "loss": 0.3627, "step": 4460 }, { "epoch": 0.22129073862790813, "grad_norm": 8.298295021057129, "learning_rate": 8.917730567904637e-06, "loss": 0.3601, "step": 4461 }, { "epoch": 0.22134034426310828, "grad_norm": 6.693580150604248, "learning_rate": 8.917241473619742e-06, "loss": 0.3708, "step": 4462 }, { "epoch": 0.22138994989830846, "grad_norm": 5.296604633331299, "learning_rate": 8.916752282263832e-06, "loss": 0.3471, "step": 4463 }, { "epoch": 0.2214395555335086, "grad_norm": 9.107948303222656, "learning_rate": 8.916262993849032e-06, "loss": 0.4518, "step": 4464 }, { "epoch": 0.22148916116870876, "grad_norm": 12.581083297729492, "learning_rate": 8.915773608387459e-06, "loss": 0.3291, "step": 4465 }, { "epoch": 0.22153876680390894, "grad_norm": 8.955026626586914, "learning_rate": 8.915284125891246e-06, "loss": 0.3482, "step": 4466 }, { "epoch": 0.2215883724391091, "grad_norm": 8.496355056762695, "learning_rate": 8.914794546372523e-06, "loss": 0.4876, "step": 4467 }, { "epoch": 0.22163797807430924, "grad_norm": 5.201877117156982, "learning_rate": 8.914304869843419e-06, "loss": 0.2778, "step": 4468 }, { "epoch": 0.2216875837095094, "grad_norm": 8.492386817932129, "learning_rate": 8.91381509631607e-06, "loss": 0.334, "step": 4469 }, { "epoch": 0.22173718934470957, "grad_norm": 5.481325626373291, "learning_rate": 8.913325225802612e-06, "loss": 0.2858, "step": 4470 }, { "epoch": 0.22178679497990972, "grad_norm": 4.928059101104736, "learning_rate": 8.912835258315184e-06, "loss": 0.3122, "step": 4471 }, { "epoch": 0.22183640061510987, "grad_norm": 5.428162574768066, "learning_rate": 8.91234519386593e-06, "loss": 0.2431, "step": 4472 }, { "epoch": 0.22188600625031005, "grad_norm": 6.280510902404785, "learning_rate": 8.911855032466992e-06, "loss": 0.3589, "step": 4473 }, { "epoch": 0.2219356118855102, "grad_norm": 5.866282939910889, "learning_rate": 8.911364774130516e-06, "loss": 0.3549, "step": 4474 }, { "epoch": 0.22198521752071035, "grad_norm": 7.132577896118164, "learning_rate": 8.910874418868652e-06, "loss": 0.3794, "step": 4475 }, { "epoch": 0.2220348231559105, "grad_norm": 11.376815795898438, "learning_rate": 8.910383966693551e-06, "loss": 0.3209, "step": 4476 }, { "epoch": 0.22208442879111068, "grad_norm": 5.672459602355957, "learning_rate": 8.909893417617367e-06, "loss": 0.3314, "step": 4477 }, { "epoch": 0.22213403442631083, "grad_norm": 8.576131820678711, "learning_rate": 8.909402771652255e-06, "loss": 0.3236, "step": 4478 }, { "epoch": 0.22218364006151098, "grad_norm": 6.90125036239624, "learning_rate": 8.908912028810372e-06, "loss": 0.2971, "step": 4479 }, { "epoch": 0.22223324569671116, "grad_norm": 5.140538692474365, "learning_rate": 8.908421189103883e-06, "loss": 0.2957, "step": 4480 }, { "epoch": 0.2222828513319113, "grad_norm": 12.748921394348145, "learning_rate": 8.907930252544949e-06, "loss": 0.3842, "step": 4481 }, { "epoch": 0.22233245696711146, "grad_norm": 6.431394100189209, "learning_rate": 8.907439219145735e-06, "loss": 0.2851, "step": 4482 }, { "epoch": 0.22238206260231164, "grad_norm": 10.221746444702148, "learning_rate": 8.90694808891841e-06, "loss": 0.4976, "step": 4483 }, { "epoch": 0.2224316682375118, "grad_norm": 8.743896484375, "learning_rate": 8.906456861875143e-06, "loss": 0.3872, "step": 4484 }, { "epoch": 0.22248127387271194, "grad_norm": 8.508426666259766, "learning_rate": 8.905965538028108e-06, "loss": 0.3093, "step": 4485 }, { "epoch": 0.2225308795079121, "grad_norm": 7.680749893188477, "learning_rate": 8.905474117389482e-06, "loss": 0.2877, "step": 4486 }, { "epoch": 0.22258048514311227, "grad_norm": 8.473488807678223, "learning_rate": 8.904982599971439e-06, "loss": 0.3927, "step": 4487 }, { "epoch": 0.22263009077831242, "grad_norm": 10.441822052001953, "learning_rate": 8.90449098578616e-06, "loss": 0.3829, "step": 4488 }, { "epoch": 0.22267969641351257, "grad_norm": 7.949940204620361, "learning_rate": 8.903999274845829e-06, "loss": 0.3262, "step": 4489 }, { "epoch": 0.22272930204871275, "grad_norm": 10.455456733703613, "learning_rate": 8.90350746716263e-06, "loss": 0.4013, "step": 4490 }, { "epoch": 0.2227789076839129, "grad_norm": 6.382672309875488, "learning_rate": 8.903015562748751e-06, "loss": 0.2899, "step": 4491 }, { "epoch": 0.22282851331911305, "grad_norm": 7.428103923797607, "learning_rate": 8.902523561616381e-06, "loss": 0.3837, "step": 4492 }, { "epoch": 0.2228781189543132, "grad_norm": 6.620781898498535, "learning_rate": 8.902031463777712e-06, "loss": 0.2872, "step": 4493 }, { "epoch": 0.22292772458951338, "grad_norm": 5.070403099060059, "learning_rate": 8.901539269244936e-06, "loss": 0.2544, "step": 4494 }, { "epoch": 0.22297733022471353, "grad_norm": 10.05212688446045, "learning_rate": 8.901046978030254e-06, "loss": 0.3297, "step": 4495 }, { "epoch": 0.22302693585991368, "grad_norm": 14.02652645111084, "learning_rate": 8.900554590145862e-06, "loss": 0.3934, "step": 4496 }, { "epoch": 0.22307654149511386, "grad_norm": 6.309854507446289, "learning_rate": 8.900062105603966e-06, "loss": 0.2442, "step": 4497 }, { "epoch": 0.223126147130314, "grad_norm": 9.626514434814453, "learning_rate": 8.899569524416762e-06, "loss": 0.3982, "step": 4498 }, { "epoch": 0.22317575276551416, "grad_norm": 41.08066940307617, "learning_rate": 8.899076846596465e-06, "loss": 0.3486, "step": 4499 }, { "epoch": 0.2232253584007143, "grad_norm": 13.104851722717285, "learning_rate": 8.898584072155279e-06, "loss": 0.4669, "step": 4500 }, { "epoch": 0.22327496403591449, "grad_norm": 10.011497497558594, "learning_rate": 8.898091201105414e-06, "loss": 0.4193, "step": 4501 }, { "epoch": 0.22332456967111464, "grad_norm": 6.790970802307129, "learning_rate": 8.897598233459088e-06, "loss": 0.3327, "step": 4502 }, { "epoch": 0.2233741753063148, "grad_norm": 5.931440353393555, "learning_rate": 8.897105169228515e-06, "loss": 0.2512, "step": 4503 }, { "epoch": 0.22342378094151497, "grad_norm": 9.249348640441895, "learning_rate": 8.896612008425911e-06, "loss": 0.3033, "step": 4504 }, { "epoch": 0.22347338657671512, "grad_norm": 4.344629287719727, "learning_rate": 8.8961187510635e-06, "loss": 0.2766, "step": 4505 }, { "epoch": 0.22352299221191527, "grad_norm": 7.196081161499023, "learning_rate": 8.895625397153505e-06, "loss": 0.3066, "step": 4506 }, { "epoch": 0.22357259784711545, "grad_norm": 4.504549026489258, "learning_rate": 8.89513194670815e-06, "loss": 0.1775, "step": 4507 }, { "epoch": 0.2236222034823156, "grad_norm": 24.197738647460938, "learning_rate": 8.894638399739663e-06, "loss": 0.4152, "step": 4508 }, { "epoch": 0.22367180911751575, "grad_norm": 12.808403968811035, "learning_rate": 8.894144756260274e-06, "loss": 0.3711, "step": 4509 }, { "epoch": 0.2237214147527159, "grad_norm": 13.752742767333984, "learning_rate": 8.893651016282216e-06, "loss": 0.4708, "step": 4510 }, { "epoch": 0.22377102038791608, "grad_norm": 13.962891578674316, "learning_rate": 8.893157179817726e-06, "loss": 0.4404, "step": 4511 }, { "epoch": 0.22382062602311623, "grad_norm": 9.926630973815918, "learning_rate": 8.892663246879041e-06, "loss": 0.4013, "step": 4512 }, { "epoch": 0.22387023165831638, "grad_norm": 10.885147094726562, "learning_rate": 8.892169217478397e-06, "loss": 0.3612, "step": 4513 }, { "epoch": 0.22391983729351655, "grad_norm": 7.92582368850708, "learning_rate": 8.891675091628041e-06, "loss": 0.2459, "step": 4514 }, { "epoch": 0.2239694429287167, "grad_norm": 5.352229595184326, "learning_rate": 8.891180869340216e-06, "loss": 0.3347, "step": 4515 }, { "epoch": 0.22401904856391686, "grad_norm": 5.316109657287598, "learning_rate": 8.890686550627169e-06, "loss": 0.259, "step": 4516 }, { "epoch": 0.224068654199117, "grad_norm": 7.959018707275391, "learning_rate": 8.890192135501148e-06, "loss": 0.3695, "step": 4517 }, { "epoch": 0.22411825983431719, "grad_norm": 6.789284706115723, "learning_rate": 8.889697623974407e-06, "loss": 0.2905, "step": 4518 }, { "epoch": 0.22416786546951734, "grad_norm": 5.614918231964111, "learning_rate": 8.889203016059199e-06, "loss": 0.3273, "step": 4519 }, { "epoch": 0.2242174711047175, "grad_norm": 4.922409534454346, "learning_rate": 8.888708311767781e-06, "loss": 0.2679, "step": 4520 }, { "epoch": 0.22426707673991766, "grad_norm": 6.810568332672119, "learning_rate": 8.888213511112413e-06, "loss": 0.3423, "step": 4521 }, { "epoch": 0.22431668237511782, "grad_norm": 5.319775581359863, "learning_rate": 8.887718614105354e-06, "loss": 0.3207, "step": 4522 }, { "epoch": 0.22436628801031797, "grad_norm": 6.6080803871154785, "learning_rate": 8.88722362075887e-06, "loss": 0.3168, "step": 4523 }, { "epoch": 0.22441589364551814, "grad_norm": 16.23967170715332, "learning_rate": 8.886728531085225e-06, "loss": 0.4507, "step": 4524 }, { "epoch": 0.2244654992807183, "grad_norm": 5.357030391693115, "learning_rate": 8.886233345096691e-06, "loss": 0.3191, "step": 4525 }, { "epoch": 0.22451510491591845, "grad_norm": 26.37118148803711, "learning_rate": 8.885738062805536e-06, "loss": 0.3616, "step": 4526 }, { "epoch": 0.2245647105511186, "grad_norm": 7.327362060546875, "learning_rate": 8.885242684224034e-06, "loss": 0.2514, "step": 4527 }, { "epoch": 0.22461431618631877, "grad_norm": 7.27288818359375, "learning_rate": 8.884747209364459e-06, "loss": 0.3297, "step": 4528 }, { "epoch": 0.22466392182151892, "grad_norm": 10.438055992126465, "learning_rate": 8.884251638239092e-06, "loss": 0.3288, "step": 4529 }, { "epoch": 0.22471352745671908, "grad_norm": 21.181974411010742, "learning_rate": 8.883755970860212e-06, "loss": 0.3918, "step": 4530 }, { "epoch": 0.22476313309191925, "grad_norm": 4.11492919921875, "learning_rate": 8.883260207240105e-06, "loss": 0.3124, "step": 4531 }, { "epoch": 0.2248127387271194, "grad_norm": 6.396302700042725, "learning_rate": 8.88276434739105e-06, "loss": 0.3756, "step": 4532 }, { "epoch": 0.22486234436231956, "grad_norm": 19.325740814208984, "learning_rate": 8.882268391325341e-06, "loss": 0.3617, "step": 4533 }, { "epoch": 0.2249119499975197, "grad_norm": 5.7566819190979, "learning_rate": 8.881772339055263e-06, "loss": 0.2615, "step": 4534 }, { "epoch": 0.22496155563271988, "grad_norm": 7.202446460723877, "learning_rate": 8.881276190593111e-06, "loss": 0.3047, "step": 4535 }, { "epoch": 0.22501116126792003, "grad_norm": 7.196591377258301, "learning_rate": 8.88077994595118e-06, "loss": 0.2617, "step": 4536 }, { "epoch": 0.22506076690312019, "grad_norm": 4.9482526779174805, "learning_rate": 8.880283605141766e-06, "loss": 0.2645, "step": 4537 }, { "epoch": 0.22511037253832036, "grad_norm": 14.742371559143066, "learning_rate": 8.879787168177169e-06, "loss": 0.454, "step": 4538 }, { "epoch": 0.22515997817352051, "grad_norm": 8.11248779296875, "learning_rate": 8.87929063506969e-06, "loss": 0.4537, "step": 4539 }, { "epoch": 0.22520958380872066, "grad_norm": 6.993363380432129, "learning_rate": 8.878794005831636e-06, "loss": 0.381, "step": 4540 }, { "epoch": 0.22525918944392084, "grad_norm": 13.202330589294434, "learning_rate": 8.878297280475312e-06, "loss": 0.4278, "step": 4541 }, { "epoch": 0.225308795079121, "grad_norm": 7.928531169891357, "learning_rate": 8.877800459013026e-06, "loss": 0.2735, "step": 4542 }, { "epoch": 0.22535840071432114, "grad_norm": 11.694995880126953, "learning_rate": 8.87730354145709e-06, "loss": 0.3927, "step": 4543 }, { "epoch": 0.2254080063495213, "grad_norm": 7.118415832519531, "learning_rate": 8.876806527819819e-06, "loss": 0.3467, "step": 4544 }, { "epoch": 0.22545761198472147, "grad_norm": 3.8370227813720703, "learning_rate": 8.87630941811353e-06, "loss": 0.222, "step": 4545 }, { "epoch": 0.22550721761992162, "grad_norm": 11.829076766967773, "learning_rate": 8.875812212350538e-06, "loss": 0.4159, "step": 4546 }, { "epoch": 0.22555682325512177, "grad_norm": 5.436630725860596, "learning_rate": 8.875314910543167e-06, "loss": 0.2858, "step": 4547 }, { "epoch": 0.22560642889032195, "grad_norm": 25.24088478088379, "learning_rate": 8.874817512703739e-06, "loss": 0.4577, "step": 4548 }, { "epoch": 0.2256560345255221, "grad_norm": 9.355764389038086, "learning_rate": 8.874320018844581e-06, "loss": 0.4339, "step": 4549 }, { "epoch": 0.22570564016072225, "grad_norm": 11.965444564819336, "learning_rate": 8.87382242897802e-06, "loss": 0.3711, "step": 4550 }, { "epoch": 0.2257552457959224, "grad_norm": 6.695784091949463, "learning_rate": 8.873324743116386e-06, "loss": 0.3788, "step": 4551 }, { "epoch": 0.22580485143112258, "grad_norm": 15.388497352600098, "learning_rate": 8.872826961272015e-06, "loss": 0.4681, "step": 4552 }, { "epoch": 0.22585445706632273, "grad_norm": 9.243722915649414, "learning_rate": 8.872329083457239e-06, "loss": 0.4179, "step": 4553 }, { "epoch": 0.22590406270152288, "grad_norm": 5.205132484436035, "learning_rate": 8.871831109684397e-06, "loss": 0.2953, "step": 4554 }, { "epoch": 0.22595366833672306, "grad_norm": 5.817073345184326, "learning_rate": 8.871333039965828e-06, "loss": 0.332, "step": 4555 }, { "epoch": 0.2260032739719232, "grad_norm": 6.955972194671631, "learning_rate": 8.870834874313876e-06, "loss": 0.371, "step": 4556 }, { "epoch": 0.22605287960712336, "grad_norm": 4.277887344360352, "learning_rate": 8.870336612740883e-06, "loss": 0.3422, "step": 4557 }, { "epoch": 0.22610248524232351, "grad_norm": 5.239279270172119, "learning_rate": 8.8698382552592e-06, "loss": 0.2016, "step": 4558 }, { "epoch": 0.2261520908775237, "grad_norm": 6.218982696533203, "learning_rate": 8.869339801881173e-06, "loss": 0.3278, "step": 4559 }, { "epoch": 0.22620169651272384, "grad_norm": 10.673357963562012, "learning_rate": 8.868841252619156e-06, "loss": 0.4367, "step": 4560 }, { "epoch": 0.226251302147924, "grad_norm": 9.410380363464355, "learning_rate": 8.868342607485504e-06, "loss": 0.3698, "step": 4561 }, { "epoch": 0.22630090778312417, "grad_norm": 17.14410972595215, "learning_rate": 8.86784386649257e-06, "loss": 0.4028, "step": 4562 }, { "epoch": 0.22635051341832432, "grad_norm": 6.31898832321167, "learning_rate": 8.867345029652716e-06, "loss": 0.3081, "step": 4563 }, { "epoch": 0.22640011905352447, "grad_norm": 9.432043075561523, "learning_rate": 8.866846096978303e-06, "loss": 0.4089, "step": 4564 }, { "epoch": 0.22644972468872465, "grad_norm": 5.342355251312256, "learning_rate": 8.866347068481696e-06, "loss": 0.286, "step": 4565 }, { "epoch": 0.2264993303239248, "grad_norm": 21.73241424560547, "learning_rate": 8.865847944175257e-06, "loss": 0.3867, "step": 4566 }, { "epoch": 0.22654893595912495, "grad_norm": 5.830616474151611, "learning_rate": 8.865348724071359e-06, "loss": 0.258, "step": 4567 }, { "epoch": 0.2265985415943251, "grad_norm": 6.664274215698242, "learning_rate": 8.864849408182373e-06, "loss": 0.3022, "step": 4568 }, { "epoch": 0.22664814722952528, "grad_norm": 5.119040012359619, "learning_rate": 8.864349996520667e-06, "loss": 0.2319, "step": 4569 }, { "epoch": 0.22669775286472543, "grad_norm": 7.624216079711914, "learning_rate": 8.863850489098621e-06, "loss": 0.3427, "step": 4570 }, { "epoch": 0.22674735849992558, "grad_norm": 6.301235198974609, "learning_rate": 8.863350885928613e-06, "loss": 0.2221, "step": 4571 }, { "epoch": 0.22679696413512576, "grad_norm": 9.45453929901123, "learning_rate": 8.86285118702302e-06, "loss": 0.3217, "step": 4572 }, { "epoch": 0.2268465697703259, "grad_norm": 6.176670074462891, "learning_rate": 8.862351392394231e-06, "loss": 0.346, "step": 4573 }, { "epoch": 0.22689617540552606, "grad_norm": 12.443695068359375, "learning_rate": 8.861851502054625e-06, "loss": 0.4347, "step": 4574 }, { "epoch": 0.2269457810407262, "grad_norm": 8.622538566589355, "learning_rate": 8.861351516016592e-06, "loss": 0.3059, "step": 4575 }, { "epoch": 0.2269953866759264, "grad_norm": 14.075139045715332, "learning_rate": 8.860851434292521e-06, "loss": 0.3978, "step": 4576 }, { "epoch": 0.22704499231112654, "grad_norm": 5.6127238273620605, "learning_rate": 8.860351256894807e-06, "loss": 0.3262, "step": 4577 }, { "epoch": 0.2270945979463267, "grad_norm": 6.8146281242370605, "learning_rate": 8.859850983835842e-06, "loss": 0.3597, "step": 4578 }, { "epoch": 0.22714420358152687, "grad_norm": 11.7225980758667, "learning_rate": 8.859350615128023e-06, "loss": 0.4407, "step": 4579 }, { "epoch": 0.22719380921672702, "grad_norm": 6.216101169586182, "learning_rate": 8.858850150783751e-06, "loss": 0.2919, "step": 4580 }, { "epoch": 0.22724341485192717, "grad_norm": 11.163463592529297, "learning_rate": 8.858349590815425e-06, "loss": 0.3555, "step": 4581 }, { "epoch": 0.22729302048712735, "grad_norm": 5.814659595489502, "learning_rate": 8.857848935235452e-06, "loss": 0.3501, "step": 4582 }, { "epoch": 0.2273426261223275, "grad_norm": 3.3097641468048096, "learning_rate": 8.857348184056236e-06, "loss": 0.2745, "step": 4583 }, { "epoch": 0.22739223175752765, "grad_norm": 8.572089195251465, "learning_rate": 8.85684733729019e-06, "loss": 0.3225, "step": 4584 }, { "epoch": 0.2274418373927278, "grad_norm": 13.273133277893066, "learning_rate": 8.85634639494972e-06, "loss": 0.3175, "step": 4585 }, { "epoch": 0.22749144302792798, "grad_norm": 6.405428409576416, "learning_rate": 8.855845357047242e-06, "loss": 0.2802, "step": 4586 }, { "epoch": 0.22754104866312813, "grad_norm": 11.247232437133789, "learning_rate": 8.855344223595173e-06, "loss": 0.3932, "step": 4587 }, { "epoch": 0.22759065429832828, "grad_norm": 8.497319221496582, "learning_rate": 8.85484299460593e-06, "loss": 0.3482, "step": 4588 }, { "epoch": 0.22764025993352846, "grad_norm": 9.549513816833496, "learning_rate": 8.854341670091933e-06, "loss": 0.3597, "step": 4589 }, { "epoch": 0.2276898655687286, "grad_norm": 6.866343975067139, "learning_rate": 8.853840250065606e-06, "loss": 0.3942, "step": 4590 }, { "epoch": 0.22773947120392876, "grad_norm": 14.216639518737793, "learning_rate": 8.853338734539373e-06, "loss": 0.3828, "step": 4591 }, { "epoch": 0.2277890768391289, "grad_norm": 5.892381191253662, "learning_rate": 8.852837123525666e-06, "loss": 0.2908, "step": 4592 }, { "epoch": 0.2278386824743291, "grad_norm": 9.092718124389648, "learning_rate": 8.852335417036911e-06, "loss": 0.3629, "step": 4593 }, { "epoch": 0.22788828810952924, "grad_norm": 8.076126098632812, "learning_rate": 8.851833615085542e-06, "loss": 0.3764, "step": 4594 }, { "epoch": 0.2279378937447294, "grad_norm": 10.137521743774414, "learning_rate": 8.851331717683994e-06, "loss": 0.4094, "step": 4595 }, { "epoch": 0.22798749937992957, "grad_norm": 4.656782627105713, "learning_rate": 8.850829724844703e-06, "loss": 0.2462, "step": 4596 }, { "epoch": 0.22803710501512972, "grad_norm": 5.981223106384277, "learning_rate": 8.85032763658011e-06, "loss": 0.2351, "step": 4597 }, { "epoch": 0.22808671065032987, "grad_norm": 5.099477767944336, "learning_rate": 8.849825452902657e-06, "loss": 0.2726, "step": 4598 }, { "epoch": 0.22813631628553005, "grad_norm": 6.347727298736572, "learning_rate": 8.849323173824786e-06, "loss": 0.2647, "step": 4599 }, { "epoch": 0.2281859219207302, "grad_norm": 11.231966018676758, "learning_rate": 8.848820799358945e-06, "loss": 0.4167, "step": 4600 }, { "epoch": 0.22823552755593035, "grad_norm": 11.260115623474121, "learning_rate": 8.848318329517587e-06, "loss": 0.3669, "step": 4601 }, { "epoch": 0.2282851331911305, "grad_norm": 6.487644672393799, "learning_rate": 8.847815764313158e-06, "loss": 0.321, "step": 4602 }, { "epoch": 0.22833473882633068, "grad_norm": 5.932831287384033, "learning_rate": 8.847313103758114e-06, "loss": 0.3332, "step": 4603 }, { "epoch": 0.22838434446153083, "grad_norm": 4.632785320281982, "learning_rate": 8.846810347864912e-06, "loss": 0.3137, "step": 4604 }, { "epoch": 0.22843395009673098, "grad_norm": 8.501399040222168, "learning_rate": 8.846307496646008e-06, "loss": 0.3018, "step": 4605 }, { "epoch": 0.22848355573193116, "grad_norm": 4.364784240722656, "learning_rate": 8.845804550113864e-06, "loss": 0.357, "step": 4606 }, { "epoch": 0.2285331613671313, "grad_norm": 5.952094554901123, "learning_rate": 8.845301508280945e-06, "loss": 0.2922, "step": 4607 }, { "epoch": 0.22858276700233146, "grad_norm": 14.209589958190918, "learning_rate": 8.844798371159714e-06, "loss": 0.3426, "step": 4608 }, { "epoch": 0.2286323726375316, "grad_norm": 8.833710670471191, "learning_rate": 8.84429513876264e-06, "loss": 0.4296, "step": 4609 }, { "epoch": 0.2286819782727318, "grad_norm": 5.530732154846191, "learning_rate": 8.843791811102194e-06, "loss": 0.2654, "step": 4610 }, { "epoch": 0.22873158390793194, "grad_norm": 8.05799674987793, "learning_rate": 8.843288388190849e-06, "loss": 0.3849, "step": 4611 }, { "epoch": 0.2287811895431321, "grad_norm": 6.293107509613037, "learning_rate": 8.842784870041078e-06, "loss": 0.2991, "step": 4612 }, { "epoch": 0.22883079517833227, "grad_norm": 6.537694454193115, "learning_rate": 8.842281256665362e-06, "loss": 0.3284, "step": 4613 }, { "epoch": 0.22888040081353242, "grad_norm": 10.878643035888672, "learning_rate": 8.841777548076176e-06, "loss": 0.4122, "step": 4614 }, { "epoch": 0.22893000644873257, "grad_norm": 7.325645923614502, "learning_rate": 8.841273744286005e-06, "loss": 0.3224, "step": 4615 }, { "epoch": 0.22897961208393272, "grad_norm": 14.93862247467041, "learning_rate": 8.840769845307333e-06, "loss": 0.3421, "step": 4616 }, { "epoch": 0.2290292177191329, "grad_norm": 8.845842361450195, "learning_rate": 8.840265851152648e-06, "loss": 0.303, "step": 4617 }, { "epoch": 0.22907882335433305, "grad_norm": 9.312238693237305, "learning_rate": 8.839761761834436e-06, "loss": 0.3259, "step": 4618 }, { "epoch": 0.2291284289895332, "grad_norm": 5.620080947875977, "learning_rate": 8.839257577365193e-06, "loss": 0.2853, "step": 4619 }, { "epoch": 0.22917803462473338, "grad_norm": 12.600873947143555, "learning_rate": 8.838753297757408e-06, "loss": 0.3565, "step": 4620 }, { "epoch": 0.22922764025993353, "grad_norm": 7.775954723358154, "learning_rate": 8.838248923023582e-06, "loss": 0.3371, "step": 4621 }, { "epoch": 0.22927724589513368, "grad_norm": 26.190385818481445, "learning_rate": 8.837744453176212e-06, "loss": 0.4047, "step": 4622 }, { "epoch": 0.22932685153033386, "grad_norm": 4.276395797729492, "learning_rate": 8.837239888227796e-06, "loss": 0.3033, "step": 4623 }, { "epoch": 0.229376457165534, "grad_norm": 8.10233211517334, "learning_rate": 8.83673522819084e-06, "loss": 0.3391, "step": 4624 }, { "epoch": 0.22942606280073416, "grad_norm": 5.873762130737305, "learning_rate": 8.836230473077851e-06, "loss": 0.3243, "step": 4625 }, { "epoch": 0.2294756684359343, "grad_norm": 5.620453357696533, "learning_rate": 8.835725622901335e-06, "loss": 0.301, "step": 4626 }, { "epoch": 0.2295252740711345, "grad_norm": 7.660037994384766, "learning_rate": 8.835220677673803e-06, "loss": 0.3048, "step": 4627 }, { "epoch": 0.22957487970633464, "grad_norm": 8.658072471618652, "learning_rate": 8.834715637407768e-06, "loss": 0.3396, "step": 4628 }, { "epoch": 0.2296244853415348, "grad_norm": 10.328086853027344, "learning_rate": 8.834210502115746e-06, "loss": 0.354, "step": 4629 }, { "epoch": 0.22967409097673497, "grad_norm": 6.177628993988037, "learning_rate": 8.83370527181025e-06, "loss": 0.3477, "step": 4630 }, { "epoch": 0.22972369661193512, "grad_norm": 8.606382369995117, "learning_rate": 8.833199946503805e-06, "loss": 0.3421, "step": 4631 }, { "epoch": 0.22977330224713527, "grad_norm": 20.89480209350586, "learning_rate": 8.832694526208932e-06, "loss": 0.4294, "step": 4632 }, { "epoch": 0.22982290788233542, "grad_norm": 8.2441987991333, "learning_rate": 8.832189010938155e-06, "loss": 0.3941, "step": 4633 }, { "epoch": 0.2298725135175356, "grad_norm": 7.584169864654541, "learning_rate": 8.831683400704e-06, "loss": 0.3152, "step": 4634 }, { "epoch": 0.22992211915273575, "grad_norm": 6.577970027923584, "learning_rate": 8.831177695518997e-06, "loss": 0.3239, "step": 4635 }, { "epoch": 0.2299717247879359, "grad_norm": 8.773552894592285, "learning_rate": 8.830671895395678e-06, "loss": 0.3425, "step": 4636 }, { "epoch": 0.23002133042313608, "grad_norm": 4.315682888031006, "learning_rate": 8.830166000346576e-06, "loss": 0.3087, "step": 4637 }, { "epoch": 0.23007093605833623, "grad_norm": 4.526347637176514, "learning_rate": 8.82966001038423e-06, "loss": 0.3755, "step": 4638 }, { "epoch": 0.23012054169353638, "grad_norm": 7.624767780303955, "learning_rate": 8.829153925521175e-06, "loss": 0.3793, "step": 4639 }, { "epoch": 0.23017014732873656, "grad_norm": 7.284114837646484, "learning_rate": 8.828647745769954e-06, "loss": 0.2881, "step": 4640 }, { "epoch": 0.2302197529639367, "grad_norm": 6.391641616821289, "learning_rate": 8.828141471143108e-06, "loss": 0.3085, "step": 4641 }, { "epoch": 0.23026935859913686, "grad_norm": 5.580058574676514, "learning_rate": 8.827635101653187e-06, "loss": 0.2291, "step": 4642 }, { "epoch": 0.230318964234337, "grad_norm": 4.555609226226807, "learning_rate": 8.827128637312737e-06, "loss": 0.2181, "step": 4643 }, { "epoch": 0.2303685698695372, "grad_norm": 6.584194183349609, "learning_rate": 8.826622078134307e-06, "loss": 0.326, "step": 4644 }, { "epoch": 0.23041817550473734, "grad_norm": 20.574010848999023, "learning_rate": 8.82611542413045e-06, "loss": 0.4681, "step": 4645 }, { "epoch": 0.2304677811399375, "grad_norm": 12.491229057312012, "learning_rate": 8.825608675313723e-06, "loss": 0.3665, "step": 4646 }, { "epoch": 0.23051738677513767, "grad_norm": 8.4984769821167, "learning_rate": 8.825101831696685e-06, "loss": 0.3963, "step": 4647 }, { "epoch": 0.23056699241033782, "grad_norm": 18.233383178710938, "learning_rate": 8.82459489329189e-06, "loss": 0.4229, "step": 4648 }, { "epoch": 0.23061659804553797, "grad_norm": 7.56990385055542, "learning_rate": 8.824087860111905e-06, "loss": 0.3229, "step": 4649 }, { "epoch": 0.23066620368073812, "grad_norm": 12.09207820892334, "learning_rate": 8.823580732169293e-06, "loss": 0.293, "step": 4650 }, { "epoch": 0.2307158093159383, "grad_norm": 6.810799598693848, "learning_rate": 8.823073509476621e-06, "loss": 0.2637, "step": 4651 }, { "epoch": 0.23076541495113845, "grad_norm": 7.200340747833252, "learning_rate": 8.822566192046457e-06, "loss": 0.3298, "step": 4652 }, { "epoch": 0.2308150205863386, "grad_norm": 8.37469482421875, "learning_rate": 8.822058779891375e-06, "loss": 0.3938, "step": 4653 }, { "epoch": 0.23086462622153878, "grad_norm": 6.826453685760498, "learning_rate": 8.821551273023945e-06, "loss": 0.4368, "step": 4654 }, { "epoch": 0.23091423185673893, "grad_norm": 4.861080169677734, "learning_rate": 8.821043671456748e-06, "loss": 0.1931, "step": 4655 }, { "epoch": 0.23096383749193908, "grad_norm": 8.243729591369629, "learning_rate": 8.82053597520236e-06, "loss": 0.3867, "step": 4656 }, { "epoch": 0.23101344312713926, "grad_norm": 6.488671779632568, "learning_rate": 8.820028184273363e-06, "loss": 0.3644, "step": 4657 }, { "epoch": 0.2310630487623394, "grad_norm": 5.044696807861328, "learning_rate": 8.819520298682337e-06, "loss": 0.2454, "step": 4658 }, { "epoch": 0.23111265439753956, "grad_norm": 7.688903331756592, "learning_rate": 8.819012318441873e-06, "loss": 0.441, "step": 4659 }, { "epoch": 0.2311622600327397, "grad_norm": 4.688778877258301, "learning_rate": 8.818504243564556e-06, "loss": 0.2205, "step": 4660 }, { "epoch": 0.2312118656679399, "grad_norm": 11.18616771697998, "learning_rate": 8.817996074062975e-06, "loss": 0.3324, "step": 4661 }, { "epoch": 0.23126147130314004, "grad_norm": 7.655897617340088, "learning_rate": 8.817487809949726e-06, "loss": 0.369, "step": 4662 }, { "epoch": 0.2313110769383402, "grad_norm": 6.727259159088135, "learning_rate": 8.8169794512374e-06, "loss": 0.3793, "step": 4663 }, { "epoch": 0.23136068257354037, "grad_norm": 5.590033054351807, "learning_rate": 8.816470997938598e-06, "loss": 0.2973, "step": 4664 }, { "epoch": 0.23141028820874052, "grad_norm": 7.031032085418701, "learning_rate": 8.81596245006592e-06, "loss": 0.367, "step": 4665 }, { "epoch": 0.23145989384394067, "grad_norm": 5.064475059509277, "learning_rate": 8.815453807631963e-06, "loss": 0.2693, "step": 4666 }, { "epoch": 0.23150949947914082, "grad_norm": 4.680160999298096, "learning_rate": 8.814945070649337e-06, "loss": 0.2914, "step": 4667 }, { "epoch": 0.231559105114341, "grad_norm": 7.65644645690918, "learning_rate": 8.814436239130645e-06, "loss": 0.2481, "step": 4668 }, { "epoch": 0.23160871074954115, "grad_norm": 8.732515335083008, "learning_rate": 8.813927313088498e-06, "loss": 0.3914, "step": 4669 }, { "epoch": 0.2316583163847413, "grad_norm": 8.19379711151123, "learning_rate": 8.813418292535507e-06, "loss": 0.3633, "step": 4670 }, { "epoch": 0.23170792201994148, "grad_norm": 14.388723373413086, "learning_rate": 8.812909177484287e-06, "loss": 0.4676, "step": 4671 }, { "epoch": 0.23175752765514163, "grad_norm": 8.456799507141113, "learning_rate": 8.812399967947452e-06, "loss": 0.3177, "step": 4672 }, { "epoch": 0.23180713329034178, "grad_norm": 7.525990962982178, "learning_rate": 8.811890663937621e-06, "loss": 0.2191, "step": 4673 }, { "epoch": 0.23185673892554196, "grad_norm": 9.602320671081543, "learning_rate": 8.811381265467413e-06, "loss": 0.3349, "step": 4674 }, { "epoch": 0.2319063445607421, "grad_norm": 6.123682022094727, "learning_rate": 8.810871772549457e-06, "loss": 0.2729, "step": 4675 }, { "epoch": 0.23195595019594226, "grad_norm": 4.852537631988525, "learning_rate": 8.810362185196373e-06, "loss": 0.3443, "step": 4676 }, { "epoch": 0.2320055558311424, "grad_norm": 6.855079174041748, "learning_rate": 8.80985250342079e-06, "loss": 0.3179, "step": 4677 }, { "epoch": 0.23205516146634259, "grad_norm": 12.040047645568848, "learning_rate": 8.80934272723534e-06, "loss": 0.3532, "step": 4678 }, { "epoch": 0.23210476710154274, "grad_norm": 14.154614448547363, "learning_rate": 8.808832856652652e-06, "loss": 0.4274, "step": 4679 }, { "epoch": 0.2321543727367429, "grad_norm": 6.6236891746521, "learning_rate": 8.808322891685363e-06, "loss": 0.3621, "step": 4680 }, { "epoch": 0.23220397837194307, "grad_norm": 18.629568099975586, "learning_rate": 8.807812832346111e-06, "loss": 0.3718, "step": 4681 }, { "epoch": 0.23225358400714322, "grad_norm": 5.8259406089782715, "learning_rate": 8.807302678647534e-06, "loss": 0.3626, "step": 4682 }, { "epoch": 0.23230318964234337, "grad_norm": 7.94471549987793, "learning_rate": 8.806792430602275e-06, "loss": 0.4149, "step": 4683 }, { "epoch": 0.23235279527754352, "grad_norm": 6.503755569458008, "learning_rate": 8.806282088222977e-06, "loss": 0.2664, "step": 4684 }, { "epoch": 0.2324024009127437, "grad_norm": 7.953148365020752, "learning_rate": 8.805771651522288e-06, "loss": 0.4062, "step": 4685 }, { "epoch": 0.23245200654794385, "grad_norm": 6.0344109535217285, "learning_rate": 8.805261120512856e-06, "loss": 0.35, "step": 4686 }, { "epoch": 0.232501612183144, "grad_norm": 9.743476867675781, "learning_rate": 8.80475049520733e-06, "loss": 0.3852, "step": 4687 }, { "epoch": 0.23255121781834417, "grad_norm": 14.886805534362793, "learning_rate": 8.804239775618369e-06, "loss": 0.4207, "step": 4688 }, { "epoch": 0.23260082345354433, "grad_norm": 4.368387222290039, "learning_rate": 8.803728961758623e-06, "loss": 0.255, "step": 4689 }, { "epoch": 0.23265042908874448, "grad_norm": 5.359022617340088, "learning_rate": 8.803218053640753e-06, "loss": 0.2463, "step": 4690 }, { "epoch": 0.23270003472394463, "grad_norm": 7.8523335456848145, "learning_rate": 8.80270705127742e-06, "loss": 0.4088, "step": 4691 }, { "epoch": 0.2327496403591448, "grad_norm": 6.168440818786621, "learning_rate": 8.802195954681285e-06, "loss": 0.2963, "step": 4692 }, { "epoch": 0.23279924599434496, "grad_norm": 5.582457542419434, "learning_rate": 8.801684763865015e-06, "loss": 0.3413, "step": 4693 }, { "epoch": 0.2328488516295451, "grad_norm": 4.015377521514893, "learning_rate": 8.801173478841278e-06, "loss": 0.2147, "step": 4694 }, { "epoch": 0.23289845726474528, "grad_norm": 4.047308921813965, "learning_rate": 8.800662099622741e-06, "loss": 0.207, "step": 4695 }, { "epoch": 0.23294806289994544, "grad_norm": 5.986549377441406, "learning_rate": 8.800150626222079e-06, "loss": 0.285, "step": 4696 }, { "epoch": 0.23299766853514559, "grad_norm": 6.994384765625, "learning_rate": 8.799639058651966e-06, "loss": 0.2776, "step": 4697 }, { "epoch": 0.23304727417034576, "grad_norm": 5.7368669509887695, "learning_rate": 8.799127396925076e-06, "loss": 0.2768, "step": 4698 }, { "epoch": 0.23309687980554591, "grad_norm": 9.458151817321777, "learning_rate": 8.798615641054093e-06, "loss": 0.2703, "step": 4699 }, { "epoch": 0.23314648544074607, "grad_norm": 7.990480422973633, "learning_rate": 8.798103791051696e-06, "loss": 0.3264, "step": 4700 }, { "epoch": 0.23319609107594622, "grad_norm": 7.335395336151123, "learning_rate": 8.797591846930567e-06, "loss": 0.2661, "step": 4701 }, { "epoch": 0.2332456967111464, "grad_norm": 9.883576393127441, "learning_rate": 8.797079808703395e-06, "loss": 0.3086, "step": 4702 }, { "epoch": 0.23329530234634654, "grad_norm": 7.533585071563721, "learning_rate": 8.796567676382867e-06, "loss": 0.368, "step": 4703 }, { "epoch": 0.2333449079815467, "grad_norm": 7.743425369262695, "learning_rate": 8.796055449981678e-06, "loss": 0.2643, "step": 4704 }, { "epoch": 0.23339451361674687, "grad_norm": 11.037745475769043, "learning_rate": 8.795543129512515e-06, "loss": 0.3485, "step": 4705 }, { "epoch": 0.23344411925194702, "grad_norm": 6.961883544921875, "learning_rate": 8.795030714988077e-06, "loss": 0.1705, "step": 4706 }, { "epoch": 0.23349372488714717, "grad_norm": 9.208016395568848, "learning_rate": 8.794518206421061e-06, "loss": 0.3375, "step": 4707 }, { "epoch": 0.23354333052234733, "grad_norm": 6.274508476257324, "learning_rate": 8.794005603824167e-06, "loss": 0.3096, "step": 4708 }, { "epoch": 0.2335929361575475, "grad_norm": 5.615492343902588, "learning_rate": 8.793492907210098e-06, "loss": 0.2966, "step": 4709 }, { "epoch": 0.23364254179274765, "grad_norm": 12.169706344604492, "learning_rate": 8.79298011659156e-06, "loss": 0.4727, "step": 4710 }, { "epoch": 0.2336921474279478, "grad_norm": 8.060370445251465, "learning_rate": 8.792467231981258e-06, "loss": 0.3802, "step": 4711 }, { "epoch": 0.23374175306314798, "grad_norm": 5.379899501800537, "learning_rate": 8.791954253391902e-06, "loss": 0.319, "step": 4712 }, { "epoch": 0.23379135869834813, "grad_norm": 5.074967861175537, "learning_rate": 8.791441180836204e-06, "loss": 0.2841, "step": 4713 }, { "epoch": 0.23384096433354828, "grad_norm": 14.15971565246582, "learning_rate": 8.79092801432688e-06, "loss": 0.3855, "step": 4714 }, { "epoch": 0.23389056996874846, "grad_norm": 4.978229999542236, "learning_rate": 8.790414753876645e-06, "loss": 0.2368, "step": 4715 }, { "epoch": 0.2339401756039486, "grad_norm": 6.066773891448975, "learning_rate": 8.789901399498217e-06, "loss": 0.3013, "step": 4716 }, { "epoch": 0.23398978123914876, "grad_norm": 6.129713535308838, "learning_rate": 8.789387951204317e-06, "loss": 0.2768, "step": 4717 }, { "epoch": 0.23403938687434891, "grad_norm": 8.40538501739502, "learning_rate": 8.788874409007672e-06, "loss": 0.3648, "step": 4718 }, { "epoch": 0.2340889925095491, "grad_norm": 5.8853631019592285, "learning_rate": 8.788360772921003e-06, "loss": 0.2724, "step": 4719 }, { "epoch": 0.23413859814474924, "grad_norm": 11.629155158996582, "learning_rate": 8.787847042957041e-06, "loss": 0.4245, "step": 4720 }, { "epoch": 0.2341882037799494, "grad_norm": 6.8453168869018555, "learning_rate": 8.787333219128516e-06, "loss": 0.3325, "step": 4721 }, { "epoch": 0.23423780941514957, "grad_norm": 10.069562911987305, "learning_rate": 8.786819301448163e-06, "loss": 0.2684, "step": 4722 }, { "epoch": 0.23428741505034972, "grad_norm": 5.8846845626831055, "learning_rate": 8.78630528992871e-06, "loss": 0.3075, "step": 4723 }, { "epoch": 0.23433702068554987, "grad_norm": 6.200575828552246, "learning_rate": 8.785791184582901e-06, "loss": 0.3401, "step": 4724 }, { "epoch": 0.23438662632075002, "grad_norm": 8.653850555419922, "learning_rate": 8.785276985423477e-06, "loss": 0.3423, "step": 4725 }, { "epoch": 0.2344362319559502, "grad_norm": 9.771991729736328, "learning_rate": 8.784762692463174e-06, "loss": 0.4893, "step": 4726 }, { "epoch": 0.23448583759115035, "grad_norm": 4.762378215789795, "learning_rate": 8.78424830571474e-06, "loss": 0.2761, "step": 4727 }, { "epoch": 0.2345354432263505, "grad_norm": 6.073694229125977, "learning_rate": 8.783733825190921e-06, "loss": 0.3306, "step": 4728 }, { "epoch": 0.23458504886155068, "grad_norm": 8.626784324645996, "learning_rate": 8.783219250904467e-06, "loss": 0.4283, "step": 4729 }, { "epoch": 0.23463465449675083, "grad_norm": 8.032404899597168, "learning_rate": 8.782704582868127e-06, "loss": 0.2515, "step": 4730 }, { "epoch": 0.23468426013195098, "grad_norm": 7.829229354858398, "learning_rate": 8.782189821094657e-06, "loss": 0.3796, "step": 4731 }, { "epoch": 0.23473386576715116, "grad_norm": 12.956279754638672, "learning_rate": 8.781674965596814e-06, "loss": 0.3267, "step": 4732 }, { "epoch": 0.2347834714023513, "grad_norm": 10.9618501663208, "learning_rate": 8.781160016387354e-06, "loss": 0.2243, "step": 4733 }, { "epoch": 0.23483307703755146, "grad_norm": 4.408595561981201, "learning_rate": 8.780644973479036e-06, "loss": 0.3024, "step": 4734 }, { "epoch": 0.2348826826727516, "grad_norm": 7.075747013092041, "learning_rate": 8.780129836884627e-06, "loss": 0.3854, "step": 4735 }, { "epoch": 0.2349322883079518, "grad_norm": 7.399988174438477, "learning_rate": 8.779614606616892e-06, "loss": 0.3592, "step": 4736 }, { "epoch": 0.23498189394315194, "grad_norm": 4.839559555053711, "learning_rate": 8.779099282688597e-06, "loss": 0.295, "step": 4737 }, { "epoch": 0.2350314995783521, "grad_norm": 6.669712066650391, "learning_rate": 8.778583865112511e-06, "loss": 0.2904, "step": 4738 }, { "epoch": 0.23508110521355227, "grad_norm": 7.988617420196533, "learning_rate": 8.778068353901408e-06, "loss": 0.3647, "step": 4739 }, { "epoch": 0.23513071084875242, "grad_norm": 12.177517890930176, "learning_rate": 8.777552749068062e-06, "loss": 0.3869, "step": 4740 }, { "epoch": 0.23518031648395257, "grad_norm": 13.006524085998535, "learning_rate": 8.77703705062525e-06, "loss": 0.3129, "step": 4741 }, { "epoch": 0.23522992211915272, "grad_norm": 5.671232223510742, "learning_rate": 8.776521258585752e-06, "loss": 0.2677, "step": 4742 }, { "epoch": 0.2352795277543529, "grad_norm": 9.954730033874512, "learning_rate": 8.77600537296235e-06, "loss": 0.3508, "step": 4743 }, { "epoch": 0.23532913338955305, "grad_norm": 5.502383232116699, "learning_rate": 8.775489393767824e-06, "loss": 0.2947, "step": 4744 }, { "epoch": 0.2353787390247532, "grad_norm": 19.89909553527832, "learning_rate": 8.774973321014966e-06, "loss": 0.3761, "step": 4745 }, { "epoch": 0.23542834465995338, "grad_norm": 9.726212501525879, "learning_rate": 8.774457154716561e-06, "loss": 0.4079, "step": 4746 }, { "epoch": 0.23547795029515353, "grad_norm": 9.441930770874023, "learning_rate": 8.773940894885398e-06, "loss": 0.3786, "step": 4747 }, { "epoch": 0.23552755593035368, "grad_norm": 6.1198225021362305, "learning_rate": 8.773424541534274e-06, "loss": 0.378, "step": 4748 }, { "epoch": 0.23557716156555383, "grad_norm": 7.229689121246338, "learning_rate": 8.772908094675983e-06, "loss": 0.394, "step": 4749 }, { "epoch": 0.235626767200754, "grad_norm": 5.746704578399658, "learning_rate": 8.772391554323322e-06, "loss": 0.3604, "step": 4750 }, { "epoch": 0.23567637283595416, "grad_norm": 8.392986297607422, "learning_rate": 8.771874920489092e-06, "loss": 0.2849, "step": 4751 }, { "epoch": 0.2357259784711543, "grad_norm": 6.083909034729004, "learning_rate": 8.771358193186096e-06, "loss": 0.3416, "step": 4752 }, { "epoch": 0.2357755841063545, "grad_norm": 7.260320663452148, "learning_rate": 8.770841372427137e-06, "loss": 0.3622, "step": 4753 }, { "epoch": 0.23582518974155464, "grad_norm": 5.567183971405029, "learning_rate": 8.770324458225023e-06, "loss": 0.3669, "step": 4754 }, { "epoch": 0.2358747953767548, "grad_norm": 6.8060832023620605, "learning_rate": 8.769807450592564e-06, "loss": 0.3849, "step": 4755 }, { "epoch": 0.23592440101195497, "grad_norm": 6.7957539558410645, "learning_rate": 8.769290349542573e-06, "loss": 0.3676, "step": 4756 }, { "epoch": 0.23597400664715512, "grad_norm": 29.121501922607422, "learning_rate": 8.76877315508786e-06, "loss": 0.2562, "step": 4757 }, { "epoch": 0.23602361228235527, "grad_norm": 5.316099643707275, "learning_rate": 8.768255867241243e-06, "loss": 0.2574, "step": 4758 }, { "epoch": 0.23607321791755542, "grad_norm": 5.06720495223999, "learning_rate": 8.767738486015543e-06, "loss": 0.269, "step": 4759 }, { "epoch": 0.2361228235527556, "grad_norm": 5.528696060180664, "learning_rate": 8.767221011423576e-06, "loss": 0.2514, "step": 4760 }, { "epoch": 0.23617242918795575, "grad_norm": 6.671326637268066, "learning_rate": 8.76670344347817e-06, "loss": 0.2141, "step": 4761 }, { "epoch": 0.2362220348231559, "grad_norm": 5.847105503082275, "learning_rate": 8.76618578219215e-06, "loss": 0.2666, "step": 4762 }, { "epoch": 0.23627164045835608, "grad_norm": 5.9985270500183105, "learning_rate": 8.765668027578341e-06, "loss": 0.3068, "step": 4763 }, { "epoch": 0.23632124609355623, "grad_norm": 34.83697509765625, "learning_rate": 8.765150179649574e-06, "loss": 0.3821, "step": 4764 }, { "epoch": 0.23637085172875638, "grad_norm": 12.978912353515625, "learning_rate": 8.764632238418685e-06, "loss": 0.3377, "step": 4765 }, { "epoch": 0.23642045736395653, "grad_norm": 8.901049613952637, "learning_rate": 8.764114203898506e-06, "loss": 0.321, "step": 4766 }, { "epoch": 0.2364700629991567, "grad_norm": 7.095558166503906, "learning_rate": 8.763596076101873e-06, "loss": 0.2856, "step": 4767 }, { "epoch": 0.23651966863435686, "grad_norm": 5.831636428833008, "learning_rate": 8.763077855041626e-06, "loss": 0.2478, "step": 4768 }, { "epoch": 0.236569274269557, "grad_norm": 5.467679023742676, "learning_rate": 8.76255954073061e-06, "loss": 0.3072, "step": 4769 }, { "epoch": 0.2366188799047572, "grad_norm": 5.759185791015625, "learning_rate": 8.762041133181664e-06, "loss": 0.2905, "step": 4770 }, { "epoch": 0.23666848553995734, "grad_norm": 16.85489845275879, "learning_rate": 8.761522632407639e-06, "loss": 0.4486, "step": 4771 }, { "epoch": 0.2367180911751575, "grad_norm": 11.676318168640137, "learning_rate": 8.76100403842138e-06, "loss": 0.4288, "step": 4772 }, { "epoch": 0.23676769681035767, "grad_norm": 12.351155281066895, "learning_rate": 8.760485351235741e-06, "loss": 0.3087, "step": 4773 }, { "epoch": 0.23681730244555782, "grad_norm": 14.711747169494629, "learning_rate": 8.759966570863573e-06, "loss": 0.2015, "step": 4774 }, { "epoch": 0.23686690808075797, "grad_norm": 21.187498092651367, "learning_rate": 8.759447697317734e-06, "loss": 0.6814, "step": 4775 }, { "epoch": 0.23691651371595812, "grad_norm": 7.748876571655273, "learning_rate": 8.758928730611077e-06, "loss": 0.3303, "step": 4776 }, { "epoch": 0.2369661193511583, "grad_norm": 10.030688285827637, "learning_rate": 8.758409670756469e-06, "loss": 0.3376, "step": 4777 }, { "epoch": 0.23701572498635845, "grad_norm": 9.89483642578125, "learning_rate": 8.757890517766769e-06, "loss": 0.47, "step": 4778 }, { "epoch": 0.2370653306215586, "grad_norm": 10.015938758850098, "learning_rate": 8.757371271654843e-06, "loss": 0.4479, "step": 4779 }, { "epoch": 0.23711493625675878, "grad_norm": 7.327580451965332, "learning_rate": 8.756851932433555e-06, "loss": 0.1799, "step": 4780 }, { "epoch": 0.23716454189195893, "grad_norm": 5.659873008728027, "learning_rate": 8.756332500115776e-06, "loss": 0.2949, "step": 4781 }, { "epoch": 0.23721414752715908, "grad_norm": 4.483700275421143, "learning_rate": 8.75581297471438e-06, "loss": 0.276, "step": 4782 }, { "epoch": 0.23726375316235923, "grad_norm": 7.554708003997803, "learning_rate": 8.75529335624224e-06, "loss": 0.2559, "step": 4783 }, { "epoch": 0.2373133587975594, "grad_norm": 7.312623977661133, "learning_rate": 8.75477364471223e-06, "loss": 0.3331, "step": 4784 }, { "epoch": 0.23736296443275956, "grad_norm": 5.363890171051025, "learning_rate": 8.754253840137232e-06, "loss": 0.3042, "step": 4785 }, { "epoch": 0.2374125700679597, "grad_norm": 13.030547142028809, "learning_rate": 8.753733942530128e-06, "loss": 0.4003, "step": 4786 }, { "epoch": 0.2374621757031599, "grad_norm": 6.288585186004639, "learning_rate": 8.753213951903795e-06, "loss": 0.3965, "step": 4787 }, { "epoch": 0.23751178133836004, "grad_norm": 7.041198253631592, "learning_rate": 8.752693868271124e-06, "loss": 0.3194, "step": 4788 }, { "epoch": 0.2375613869735602, "grad_norm": 5.890784740447998, "learning_rate": 8.752173691645e-06, "loss": 0.394, "step": 4789 }, { "epoch": 0.23761099260876037, "grad_norm": 9.718550682067871, "learning_rate": 8.751653422038317e-06, "loss": 0.3689, "step": 4790 }, { "epoch": 0.23766059824396052, "grad_norm": 8.640804290771484, "learning_rate": 8.751133059463966e-06, "loss": 0.3994, "step": 4791 }, { "epoch": 0.23771020387916067, "grad_norm": 5.380307197570801, "learning_rate": 8.750612603934839e-06, "loss": 0.3129, "step": 4792 }, { "epoch": 0.23775980951436082, "grad_norm": 5.36586856842041, "learning_rate": 8.750092055463835e-06, "loss": 0.2683, "step": 4793 }, { "epoch": 0.237809415149561, "grad_norm": 4.652355670928955, "learning_rate": 8.749571414063853e-06, "loss": 0.256, "step": 4794 }, { "epoch": 0.23785902078476115, "grad_norm": 4.5697503089904785, "learning_rate": 8.749050679747796e-06, "loss": 0.3434, "step": 4795 }, { "epoch": 0.2379086264199613, "grad_norm": 5.152519226074219, "learning_rate": 8.748529852528567e-06, "loss": 0.2948, "step": 4796 }, { "epoch": 0.23795823205516148, "grad_norm": 10.370455741882324, "learning_rate": 8.748008932419073e-06, "loss": 0.4253, "step": 4797 }, { "epoch": 0.23800783769036163, "grad_norm": 22.783714294433594, "learning_rate": 8.747487919432223e-06, "loss": 0.4538, "step": 4798 }, { "epoch": 0.23805744332556178, "grad_norm": 10.072460174560547, "learning_rate": 8.746966813580926e-06, "loss": 0.4269, "step": 4799 }, { "epoch": 0.23810704896076193, "grad_norm": 9.098079681396484, "learning_rate": 8.746445614878096e-06, "loss": 0.3374, "step": 4800 }, { "epoch": 0.2381566545959621, "grad_norm": 11.692249298095703, "learning_rate": 8.74592432333665e-06, "loss": 0.4964, "step": 4801 }, { "epoch": 0.23820626023116226, "grad_norm": 5.904872417449951, "learning_rate": 8.745402938969503e-06, "loss": 0.3662, "step": 4802 }, { "epoch": 0.2382558658663624, "grad_norm": 11.375890731811523, "learning_rate": 8.744881461789579e-06, "loss": 0.2849, "step": 4803 }, { "epoch": 0.2383054715015626, "grad_norm": 5.2152323722839355, "learning_rate": 8.744359891809799e-06, "loss": 0.271, "step": 4804 }, { "epoch": 0.23835507713676274, "grad_norm": 15.4353666305542, "learning_rate": 8.743838229043084e-06, "loss": 0.2836, "step": 4805 }, { "epoch": 0.2384046827719629, "grad_norm": 9.469985008239746, "learning_rate": 8.743316473502368e-06, "loss": 0.4003, "step": 4806 }, { "epoch": 0.23845428840716307, "grad_norm": 8.305567741394043, "learning_rate": 8.742794625200574e-06, "loss": 0.4039, "step": 4807 }, { "epoch": 0.23850389404236322, "grad_norm": 9.308073043823242, "learning_rate": 8.742272684150637e-06, "loss": 0.3718, "step": 4808 }, { "epoch": 0.23855349967756337, "grad_norm": 14.793691635131836, "learning_rate": 8.74175065036549e-06, "loss": 0.3796, "step": 4809 }, { "epoch": 0.23860310531276352, "grad_norm": 6.856769561767578, "learning_rate": 8.741228523858069e-06, "loss": 0.3296, "step": 4810 }, { "epoch": 0.2386527109479637, "grad_norm": 11.46412181854248, "learning_rate": 8.740706304641312e-06, "loss": 0.4689, "step": 4811 }, { "epoch": 0.23870231658316385, "grad_norm": 6.454957008361816, "learning_rate": 8.740183992728162e-06, "loss": 0.2287, "step": 4812 }, { "epoch": 0.238751922218364, "grad_norm": 5.64779806137085, "learning_rate": 8.73966158813156e-06, "loss": 0.355, "step": 4813 }, { "epoch": 0.23880152785356418, "grad_norm": 9.20823860168457, "learning_rate": 8.739139090864452e-06, "loss": 0.3924, "step": 4814 }, { "epoch": 0.23885113348876433, "grad_norm": 4.345889091491699, "learning_rate": 8.738616500939788e-06, "loss": 0.3341, "step": 4815 }, { "epoch": 0.23890073912396448, "grad_norm": 4.286038398742676, "learning_rate": 8.738093818370514e-06, "loss": 0.3252, "step": 4816 }, { "epoch": 0.23895034475916463, "grad_norm": 8.19092845916748, "learning_rate": 8.737571043169585e-06, "loss": 0.3566, "step": 4817 }, { "epoch": 0.2389999503943648, "grad_norm": 7.681640148162842, "learning_rate": 8.737048175349953e-06, "loss": 0.2696, "step": 4818 }, { "epoch": 0.23904955602956496, "grad_norm": 5.2992730140686035, "learning_rate": 8.73652521492458e-06, "loss": 0.3291, "step": 4819 }, { "epoch": 0.2390991616647651, "grad_norm": 7.2621564865112305, "learning_rate": 8.73600216190642e-06, "loss": 0.3662, "step": 4820 }, { "epoch": 0.2391487672999653, "grad_norm": 5.315970420837402, "learning_rate": 8.735479016308437e-06, "loss": 0.2544, "step": 4821 }, { "epoch": 0.23919837293516544, "grad_norm": 9.33275032043457, "learning_rate": 8.734955778143594e-06, "loss": 0.3157, "step": 4822 }, { "epoch": 0.2392479785703656, "grad_norm": 5.688447952270508, "learning_rate": 8.734432447424856e-06, "loss": 0.2831, "step": 4823 }, { "epoch": 0.23929758420556574, "grad_norm": 5.930923938751221, "learning_rate": 8.733909024165195e-06, "loss": 0.3203, "step": 4824 }, { "epoch": 0.23934718984076592, "grad_norm": 5.691405773162842, "learning_rate": 8.733385508377578e-06, "loss": 0.3059, "step": 4825 }, { "epoch": 0.23939679547596607, "grad_norm": 5.741551876068115, "learning_rate": 8.732861900074981e-06, "loss": 0.3053, "step": 4826 }, { "epoch": 0.23944640111116622, "grad_norm": 7.737889289855957, "learning_rate": 8.732338199270376e-06, "loss": 0.3714, "step": 4827 }, { "epoch": 0.2394960067463664, "grad_norm": 7.706058502197266, "learning_rate": 8.731814405976742e-06, "loss": 0.3116, "step": 4828 }, { "epoch": 0.23954561238156655, "grad_norm": 4.423486232757568, "learning_rate": 8.731290520207058e-06, "loss": 0.2511, "step": 4829 }, { "epoch": 0.2395952180167667, "grad_norm": 16.15536117553711, "learning_rate": 8.730766541974309e-06, "loss": 0.3531, "step": 4830 }, { "epoch": 0.23964482365196688, "grad_norm": 13.843965530395508, "learning_rate": 8.730242471291476e-06, "loss": 0.4533, "step": 4831 }, { "epoch": 0.23969442928716703, "grad_norm": 13.24403190612793, "learning_rate": 8.729718308171548e-06, "loss": 0.6612, "step": 4832 }, { "epoch": 0.23974403492236718, "grad_norm": 6.824463844299316, "learning_rate": 8.729194052627514e-06, "loss": 0.2574, "step": 4833 }, { "epoch": 0.23979364055756733, "grad_norm": 8.6362943649292, "learning_rate": 8.728669704672364e-06, "loss": 0.2749, "step": 4834 }, { "epoch": 0.2398432461927675, "grad_norm": 6.38847017288208, "learning_rate": 8.728145264319093e-06, "loss": 0.3334, "step": 4835 }, { "epoch": 0.23989285182796766, "grad_norm": 6.238589286804199, "learning_rate": 8.727620731580692e-06, "loss": 0.3209, "step": 4836 }, { "epoch": 0.2399424574631678, "grad_norm": 9.375105857849121, "learning_rate": 8.727096106470168e-06, "loss": 0.4029, "step": 4837 }, { "epoch": 0.23999206309836799, "grad_norm": 5.055041790008545, "learning_rate": 8.726571389000514e-06, "loss": 0.2597, "step": 4838 }, { "epoch": 0.24004166873356814, "grad_norm": 8.205262184143066, "learning_rate": 8.726046579184734e-06, "loss": 0.477, "step": 4839 }, { "epoch": 0.2400912743687683, "grad_norm": 9.941280364990234, "learning_rate": 8.725521677035837e-06, "loss": 0.3083, "step": 4840 }, { "epoch": 0.24014088000396844, "grad_norm": 14.23498249053955, "learning_rate": 8.724996682566826e-06, "loss": 0.4734, "step": 4841 }, { "epoch": 0.24019048563916862, "grad_norm": 8.662179946899414, "learning_rate": 8.724471595790713e-06, "loss": 0.3217, "step": 4842 }, { "epoch": 0.24024009127436877, "grad_norm": 14.480212211608887, "learning_rate": 8.723946416720509e-06, "loss": 0.3363, "step": 4843 }, { "epoch": 0.24028969690956892, "grad_norm": 6.500362873077393, "learning_rate": 8.723421145369227e-06, "loss": 0.327, "step": 4844 }, { "epoch": 0.2403393025447691, "grad_norm": 5.308797836303711, "learning_rate": 8.722895781749887e-06, "loss": 0.2741, "step": 4845 }, { "epoch": 0.24038890817996925, "grad_norm": 7.066781520843506, "learning_rate": 8.722370325875504e-06, "loss": 0.4501, "step": 4846 }, { "epoch": 0.2404385138151694, "grad_norm": 5.6832122802734375, "learning_rate": 8.7218447777591e-06, "loss": 0.2587, "step": 4847 }, { "epoch": 0.24048811945036958, "grad_norm": 8.595324516296387, "learning_rate": 8.7213191374137e-06, "loss": 0.3063, "step": 4848 }, { "epoch": 0.24053772508556973, "grad_norm": 4.751644134521484, "learning_rate": 8.720793404852328e-06, "loss": 0.2688, "step": 4849 }, { "epoch": 0.24058733072076988, "grad_norm": 5.581527233123779, "learning_rate": 8.720267580088012e-06, "loss": 0.3322, "step": 4850 }, { "epoch": 0.24063693635597003, "grad_norm": 4.5230278968811035, "learning_rate": 8.719741663133783e-06, "loss": 0.2719, "step": 4851 }, { "epoch": 0.2406865419911702, "grad_norm": 9.105645179748535, "learning_rate": 8.719215654002672e-06, "loss": 0.3369, "step": 4852 }, { "epoch": 0.24073614762637036, "grad_norm": 5.386175632476807, "learning_rate": 8.718689552707716e-06, "loss": 0.2773, "step": 4853 }, { "epoch": 0.2407857532615705, "grad_norm": 4.059303283691406, "learning_rate": 8.718163359261951e-06, "loss": 0.2546, "step": 4854 }, { "epoch": 0.24083535889677068, "grad_norm": 6.9268317222595215, "learning_rate": 8.717637073678416e-06, "loss": 0.4001, "step": 4855 }, { "epoch": 0.24088496453197084, "grad_norm": 5.996947288513184, "learning_rate": 8.71711069597015e-06, "loss": 0.2365, "step": 4856 }, { "epoch": 0.24093457016717099, "grad_norm": 4.276791572570801, "learning_rate": 8.716584226150203e-06, "loss": 0.2741, "step": 4857 }, { "epoch": 0.24098417580237114, "grad_norm": 9.744200706481934, "learning_rate": 8.716057664231616e-06, "loss": 0.3814, "step": 4858 }, { "epoch": 0.24103378143757132, "grad_norm": 3.9128119945526123, "learning_rate": 8.715531010227438e-06, "loss": 0.2754, "step": 4859 }, { "epoch": 0.24108338707277147, "grad_norm": 6.53959846496582, "learning_rate": 8.715004264150723e-06, "loss": 0.2607, "step": 4860 }, { "epoch": 0.24113299270797162, "grad_norm": 6.1652727127075195, "learning_rate": 8.714477426014521e-06, "loss": 0.406, "step": 4861 }, { "epoch": 0.2411825983431718, "grad_norm": 6.860401630401611, "learning_rate": 8.713950495831888e-06, "loss": 0.3702, "step": 4862 }, { "epoch": 0.24123220397837195, "grad_norm": 14.109601020812988, "learning_rate": 8.71342347361588e-06, "loss": 0.5158, "step": 4863 }, { "epoch": 0.2412818096135721, "grad_norm": 9.815552711486816, "learning_rate": 8.71289635937956e-06, "loss": 0.3755, "step": 4864 }, { "epoch": 0.24133141524877227, "grad_norm": 6.327269077301025, "learning_rate": 8.712369153135991e-06, "loss": 0.3965, "step": 4865 }, { "epoch": 0.24138102088397242, "grad_norm": 7.617829322814941, "learning_rate": 8.711841854898233e-06, "loss": 0.4463, "step": 4866 }, { "epoch": 0.24143062651917258, "grad_norm": 4.661076068878174, "learning_rate": 8.711314464679355e-06, "loss": 0.2359, "step": 4867 }, { "epoch": 0.24148023215437273, "grad_norm": 4.328101634979248, "learning_rate": 8.710786982492424e-06, "loss": 0.2667, "step": 4868 }, { "epoch": 0.2415298377895729, "grad_norm": 7.558294773101807, "learning_rate": 8.710259408350516e-06, "loss": 0.3331, "step": 4869 }, { "epoch": 0.24157944342477305, "grad_norm": 4.144596099853516, "learning_rate": 8.7097317422667e-06, "loss": 0.3719, "step": 4870 }, { "epoch": 0.2416290490599732, "grad_norm": 6.059451103210449, "learning_rate": 8.709203984254054e-06, "loss": 0.255, "step": 4871 }, { "epoch": 0.24167865469517338, "grad_norm": 8.075830459594727, "learning_rate": 8.708676134325656e-06, "loss": 0.329, "step": 4872 }, { "epoch": 0.24172826033037353, "grad_norm": 18.41478157043457, "learning_rate": 8.708148192494585e-06, "loss": 0.6097, "step": 4873 }, { "epoch": 0.24177786596557369, "grad_norm": 6.304973125457764, "learning_rate": 8.707620158773925e-06, "loss": 0.3515, "step": 4874 }, { "epoch": 0.24182747160077384, "grad_norm": 5.6059064865112305, "learning_rate": 8.707092033176759e-06, "loss": 0.2799, "step": 4875 }, { "epoch": 0.24187707723597401, "grad_norm": 4.876119613647461, "learning_rate": 8.706563815716175e-06, "loss": 0.2957, "step": 4876 }, { "epoch": 0.24192668287117416, "grad_norm": 6.070199489593506, "learning_rate": 8.706035506405263e-06, "loss": 0.2226, "step": 4877 }, { "epoch": 0.24197628850637432, "grad_norm": 11.435314178466797, "learning_rate": 8.705507105257116e-06, "loss": 0.4553, "step": 4878 }, { "epoch": 0.2420258941415745, "grad_norm": 7.2267165184021, "learning_rate": 8.704978612284827e-06, "loss": 0.3896, "step": 4879 }, { "epoch": 0.24207549977677464, "grad_norm": 5.476332664489746, "learning_rate": 8.704450027501492e-06, "loss": 0.2711, "step": 4880 }, { "epoch": 0.2421251054119748, "grad_norm": 10.58021068572998, "learning_rate": 8.70392135092021e-06, "loss": 0.428, "step": 4881 }, { "epoch": 0.24217471104717495, "grad_norm": 5.613966464996338, "learning_rate": 8.70339258255408e-06, "loss": 0.3767, "step": 4882 }, { "epoch": 0.24222431668237512, "grad_norm": 6.2068352699279785, "learning_rate": 8.702863722416206e-06, "loss": 0.3221, "step": 4883 }, { "epoch": 0.24227392231757527, "grad_norm": 11.754383087158203, "learning_rate": 8.702334770519696e-06, "loss": 0.3266, "step": 4884 }, { "epoch": 0.24232352795277542, "grad_norm": 8.764625549316406, "learning_rate": 8.701805726877655e-06, "loss": 0.2954, "step": 4885 }, { "epoch": 0.2423731335879756, "grad_norm": 7.455016613006592, "learning_rate": 8.701276591503194e-06, "loss": 0.2301, "step": 4886 }, { "epoch": 0.24242273922317575, "grad_norm": 7.394196033477783, "learning_rate": 8.700747364409423e-06, "loss": 0.2718, "step": 4887 }, { "epoch": 0.2424723448583759, "grad_norm": 7.432955741882324, "learning_rate": 8.700218045609459e-06, "loss": 0.3888, "step": 4888 }, { "epoch": 0.24252195049357608, "grad_norm": 7.884835243225098, "learning_rate": 8.699688635116418e-06, "loss": 0.3192, "step": 4889 }, { "epoch": 0.24257155612877623, "grad_norm": 5.201120853424072, "learning_rate": 8.69915913294342e-06, "loss": 0.3004, "step": 4890 }, { "epoch": 0.24262116176397638, "grad_norm": 6.206422805786133, "learning_rate": 8.698629539103585e-06, "loss": 0.3455, "step": 4891 }, { "epoch": 0.24267076739917653, "grad_norm": 7.1545209884643555, "learning_rate": 8.698099853610037e-06, "loss": 0.3198, "step": 4892 }, { "epoch": 0.2427203730343767, "grad_norm": 11.26246452331543, "learning_rate": 8.697570076475902e-06, "loss": 0.4899, "step": 4893 }, { "epoch": 0.24276997866957686, "grad_norm": 10.102889060974121, "learning_rate": 8.697040207714308e-06, "loss": 0.4635, "step": 4894 }, { "epoch": 0.24281958430477701, "grad_norm": 4.74357795715332, "learning_rate": 8.696510247338384e-06, "loss": 0.2561, "step": 4895 }, { "epoch": 0.2428691899399772, "grad_norm": 6.60405969619751, "learning_rate": 8.695980195361266e-06, "loss": 0.3077, "step": 4896 }, { "epoch": 0.24291879557517734, "grad_norm": 5.731534004211426, "learning_rate": 8.695450051796085e-06, "loss": 0.3308, "step": 4897 }, { "epoch": 0.2429684012103775, "grad_norm": 4.025793075561523, "learning_rate": 8.694919816655982e-06, "loss": 0.3247, "step": 4898 }, { "epoch": 0.24301800684557764, "grad_norm": 7.757547378540039, "learning_rate": 8.694389489954092e-06, "loss": 0.3247, "step": 4899 }, { "epoch": 0.24306761248077782, "grad_norm": 8.833586692810059, "learning_rate": 8.693859071703561e-06, "loss": 0.3626, "step": 4900 }, { "epoch": 0.24311721811597797, "grad_norm": 4.977424144744873, "learning_rate": 8.69332856191753e-06, "loss": 0.1862, "step": 4901 }, { "epoch": 0.24316682375117812, "grad_norm": 8.980056762695312, "learning_rate": 8.692797960609148e-06, "loss": 0.3507, "step": 4902 }, { "epoch": 0.2432164293863783, "grad_norm": 10.060263633728027, "learning_rate": 8.692267267791561e-06, "loss": 0.4212, "step": 4903 }, { "epoch": 0.24326603502157845, "grad_norm": 6.891430854797363, "learning_rate": 8.691736483477921e-06, "loss": 0.3303, "step": 4904 }, { "epoch": 0.2433156406567786, "grad_norm": 7.886997699737549, "learning_rate": 8.691205607681383e-06, "loss": 0.2871, "step": 4905 }, { "epoch": 0.24336524629197878, "grad_norm": 4.575383186340332, "learning_rate": 8.6906746404151e-06, "loss": 0.2551, "step": 4906 }, { "epoch": 0.24341485192717893, "grad_norm": 6.744558811187744, "learning_rate": 8.690143581692227e-06, "loss": 0.2555, "step": 4907 }, { "epoch": 0.24346445756237908, "grad_norm": 10.370920181274414, "learning_rate": 8.68961243152593e-06, "loss": 0.3835, "step": 4908 }, { "epoch": 0.24351406319757923, "grad_norm": 10.969734191894531, "learning_rate": 8.689081189929365e-06, "loss": 0.3637, "step": 4909 }, { "epoch": 0.2435636688327794, "grad_norm": 7.889966011047363, "learning_rate": 8.688549856915703e-06, "loss": 0.3542, "step": 4910 }, { "epoch": 0.24361327446797956, "grad_norm": 6.098058223724365, "learning_rate": 8.688018432498104e-06, "loss": 0.3845, "step": 4911 }, { "epoch": 0.2436628801031797, "grad_norm": 8.574195861816406, "learning_rate": 8.68748691668974e-06, "loss": 0.3107, "step": 4912 }, { "epoch": 0.2437124857383799, "grad_norm": 7.057313919067383, "learning_rate": 8.686955309503784e-06, "loss": 0.2292, "step": 4913 }, { "epoch": 0.24376209137358004, "grad_norm": 9.496316909790039, "learning_rate": 8.686423610953406e-06, "loss": 0.385, "step": 4914 }, { "epoch": 0.2438116970087802, "grad_norm": 4.492580890655518, "learning_rate": 8.685891821051783e-06, "loss": 0.2349, "step": 4915 }, { "epoch": 0.24386130264398034, "grad_norm": 4.11447286605835, "learning_rate": 8.685359939812094e-06, "loss": 0.3163, "step": 4916 }, { "epoch": 0.24391090827918052, "grad_norm": 6.649402141571045, "learning_rate": 8.68482796724752e-06, "loss": 0.2926, "step": 4917 }, { "epoch": 0.24396051391438067, "grad_norm": 6.831770420074463, "learning_rate": 8.684295903371242e-06, "loss": 0.2626, "step": 4918 }, { "epoch": 0.24401011954958082, "grad_norm": 9.798798561096191, "learning_rate": 8.683763748196444e-06, "loss": 0.3664, "step": 4919 }, { "epoch": 0.244059725184781, "grad_norm": 4.755085468292236, "learning_rate": 8.683231501736314e-06, "loss": 0.2743, "step": 4920 }, { "epoch": 0.24410933081998115, "grad_norm": 6.661286354064941, "learning_rate": 8.682699164004042e-06, "loss": 0.3325, "step": 4921 }, { "epoch": 0.2441589364551813, "grad_norm": 5.652699947357178, "learning_rate": 8.682166735012818e-06, "loss": 0.2784, "step": 4922 }, { "epoch": 0.24420854209038148, "grad_norm": 8.679305076599121, "learning_rate": 8.681634214775835e-06, "loss": 0.443, "step": 4923 }, { "epoch": 0.24425814772558163, "grad_norm": 6.508243560791016, "learning_rate": 8.681101603306295e-06, "loss": 0.3932, "step": 4924 }, { "epoch": 0.24430775336078178, "grad_norm": 7.367760181427002, "learning_rate": 8.680568900617389e-06, "loss": 0.377, "step": 4925 }, { "epoch": 0.24435735899598193, "grad_norm": 7.396305084228516, "learning_rate": 8.680036106722321e-06, "loss": 0.4125, "step": 4926 }, { "epoch": 0.2444069646311821, "grad_norm": 5.691708087921143, "learning_rate": 8.679503221634294e-06, "loss": 0.2775, "step": 4927 }, { "epoch": 0.24445657026638226, "grad_norm": 5.76488733291626, "learning_rate": 8.678970245366514e-06, "loss": 0.3584, "step": 4928 }, { "epoch": 0.2445061759015824, "grad_norm": 7.45761251449585, "learning_rate": 8.678437177932185e-06, "loss": 0.2724, "step": 4929 }, { "epoch": 0.2445557815367826, "grad_norm": 7.005194187164307, "learning_rate": 8.677904019344521e-06, "loss": 0.3688, "step": 4930 }, { "epoch": 0.24460538717198274, "grad_norm": 6.0391716957092285, "learning_rate": 8.67737076961673e-06, "loss": 0.3475, "step": 4931 }, { "epoch": 0.2446549928071829, "grad_norm": 13.710672378540039, "learning_rate": 8.676837428762028e-06, "loss": 0.3362, "step": 4932 }, { "epoch": 0.24470459844238304, "grad_norm": 11.577655792236328, "learning_rate": 8.67630399679363e-06, "loss": 0.3348, "step": 4933 }, { "epoch": 0.24475420407758322, "grad_norm": 5.8351969718933105, "learning_rate": 8.675770473724759e-06, "loss": 0.222, "step": 4934 }, { "epoch": 0.24480380971278337, "grad_norm": 10.096539497375488, "learning_rate": 8.675236859568631e-06, "loss": 0.4994, "step": 4935 }, { "epoch": 0.24485341534798352, "grad_norm": 4.891762733459473, "learning_rate": 8.674703154338473e-06, "loss": 0.2395, "step": 4936 }, { "epoch": 0.2449030209831837, "grad_norm": 5.413550853729248, "learning_rate": 8.674169358047507e-06, "loss": 0.3148, "step": 4937 }, { "epoch": 0.24495262661838385, "grad_norm": 5.928805351257324, "learning_rate": 8.673635470708964e-06, "loss": 0.3376, "step": 4938 }, { "epoch": 0.245002232253584, "grad_norm": 5.55855655670166, "learning_rate": 8.67310149233607e-06, "loss": 0.3968, "step": 4939 }, { "epoch": 0.24505183788878418, "grad_norm": 7.338902473449707, "learning_rate": 8.67256742294206e-06, "loss": 0.3697, "step": 4940 }, { "epoch": 0.24510144352398433, "grad_norm": 11.739971160888672, "learning_rate": 8.672033262540169e-06, "loss": 0.3461, "step": 4941 }, { "epoch": 0.24515104915918448, "grad_norm": 13.08261489868164, "learning_rate": 8.671499011143632e-06, "loss": 0.352, "step": 4942 }, { "epoch": 0.24520065479438463, "grad_norm": 7.062291622161865, "learning_rate": 8.670964668765689e-06, "loss": 0.3301, "step": 4943 }, { "epoch": 0.2452502604295848, "grad_norm": 5.951625347137451, "learning_rate": 8.67043023541958e-06, "loss": 0.2566, "step": 4944 }, { "epoch": 0.24529986606478496, "grad_norm": 8.063338279724121, "learning_rate": 8.66989571111855e-06, "loss": 0.3956, "step": 4945 }, { "epoch": 0.2453494716999851, "grad_norm": 6.577303886413574, "learning_rate": 8.669361095875844e-06, "loss": 0.3104, "step": 4946 }, { "epoch": 0.2453990773351853, "grad_norm": 12.797369003295898, "learning_rate": 8.668826389704713e-06, "loss": 0.401, "step": 4947 }, { "epoch": 0.24544868297038544, "grad_norm": 3.7240726947784424, "learning_rate": 8.6682915926184e-06, "loss": 0.347, "step": 4948 }, { "epoch": 0.2454982886055856, "grad_norm": 6.282966136932373, "learning_rate": 8.667756704630164e-06, "loss": 0.3911, "step": 4949 }, { "epoch": 0.24554789424078574, "grad_norm": 14.772244453430176, "learning_rate": 8.667221725753258e-06, "loss": 0.2575, "step": 4950 }, { "epoch": 0.24559749987598592, "grad_norm": 6.933682441711426, "learning_rate": 8.666686656000939e-06, "loss": 0.2764, "step": 4951 }, { "epoch": 0.24564710551118607, "grad_norm": 7.409311294555664, "learning_rate": 8.666151495386465e-06, "loss": 0.3151, "step": 4952 }, { "epoch": 0.24569671114638622, "grad_norm": 6.994854927062988, "learning_rate": 8.6656162439231e-06, "loss": 0.3346, "step": 4953 }, { "epoch": 0.2457463167815864, "grad_norm": 6.013587951660156, "learning_rate": 8.665080901624105e-06, "loss": 0.3658, "step": 4954 }, { "epoch": 0.24579592241678655, "grad_norm": 6.885890483856201, "learning_rate": 8.664545468502749e-06, "loss": 0.4092, "step": 4955 }, { "epoch": 0.2458455280519867, "grad_norm": 11.093948364257812, "learning_rate": 8.664009944572296e-06, "loss": 0.369, "step": 4956 }, { "epoch": 0.24589513368718685, "grad_norm": 4.860503673553467, "learning_rate": 8.663474329846022e-06, "loss": 0.3221, "step": 4957 }, { "epoch": 0.24594473932238703, "grad_norm": 7.0385613441467285, "learning_rate": 8.662938624337195e-06, "loss": 0.2938, "step": 4958 }, { "epoch": 0.24599434495758718, "grad_norm": 8.322954177856445, "learning_rate": 8.662402828059093e-06, "loss": 0.275, "step": 4959 }, { "epoch": 0.24604395059278733, "grad_norm": 8.395994186401367, "learning_rate": 8.661866941024993e-06, "loss": 0.3516, "step": 4960 }, { "epoch": 0.2460935562279875, "grad_norm": 16.195695877075195, "learning_rate": 8.661330963248172e-06, "loss": 0.2986, "step": 4961 }, { "epoch": 0.24614316186318766, "grad_norm": 8.947867393493652, "learning_rate": 8.660794894741912e-06, "loss": 0.3751, "step": 4962 }, { "epoch": 0.2461927674983878, "grad_norm": 7.23123025894165, "learning_rate": 8.660258735519502e-06, "loss": 0.3461, "step": 4963 }, { "epoch": 0.246242373133588, "grad_norm": 23.045860290527344, "learning_rate": 8.659722485594224e-06, "loss": 0.3731, "step": 4964 }, { "epoch": 0.24629197876878814, "grad_norm": 8.332520484924316, "learning_rate": 8.659186144979367e-06, "loss": 0.2966, "step": 4965 }, { "epoch": 0.2463415844039883, "grad_norm": 8.636780738830566, "learning_rate": 8.65864971368822e-06, "loss": 0.3759, "step": 4966 }, { "epoch": 0.24639119003918844, "grad_norm": 7.239997863769531, "learning_rate": 8.658113191734081e-06, "loss": 0.3707, "step": 4967 }, { "epoch": 0.24644079567438862, "grad_norm": 7.1891303062438965, "learning_rate": 8.657576579130242e-06, "loss": 0.3134, "step": 4968 }, { "epoch": 0.24649040130958877, "grad_norm": 6.120919227600098, "learning_rate": 8.657039875890001e-06, "loss": 0.2926, "step": 4969 }, { "epoch": 0.24654000694478892, "grad_norm": 13.046150207519531, "learning_rate": 8.656503082026656e-06, "loss": 0.4552, "step": 4970 }, { "epoch": 0.2465896125799891, "grad_norm": 5.098702907562256, "learning_rate": 8.655966197553511e-06, "loss": 0.2759, "step": 4971 }, { "epoch": 0.24663921821518925, "grad_norm": 6.085293292999268, "learning_rate": 8.65542922248387e-06, "loss": 0.336, "step": 4972 }, { "epoch": 0.2466888238503894, "grad_norm": 6.024162769317627, "learning_rate": 8.65489215683104e-06, "loss": 0.3505, "step": 4973 }, { "epoch": 0.24673842948558955, "grad_norm": 6.0855793952941895, "learning_rate": 8.654355000608329e-06, "loss": 0.3339, "step": 4974 }, { "epoch": 0.24678803512078973, "grad_norm": 14.351767539978027, "learning_rate": 8.653817753829048e-06, "loss": 0.4792, "step": 4975 }, { "epoch": 0.24683764075598988, "grad_norm": 6.915399551391602, "learning_rate": 8.65328041650651e-06, "loss": 0.4493, "step": 4976 }, { "epoch": 0.24688724639119003, "grad_norm": 13.51931381225586, "learning_rate": 8.65274298865403e-06, "loss": 0.4648, "step": 4977 }, { "epoch": 0.2469368520263902, "grad_norm": 5.508246898651123, "learning_rate": 8.652205470284927e-06, "loss": 0.2086, "step": 4978 }, { "epoch": 0.24698645766159036, "grad_norm": 6.525710105895996, "learning_rate": 8.651667861412521e-06, "loss": 0.3273, "step": 4979 }, { "epoch": 0.2470360632967905, "grad_norm": 3.5476315021514893, "learning_rate": 8.651130162050133e-06, "loss": 0.2739, "step": 4980 }, { "epoch": 0.2470856689319907, "grad_norm": 7.1289262771606445, "learning_rate": 8.650592372211088e-06, "loss": 0.4529, "step": 4981 }, { "epoch": 0.24713527456719084, "grad_norm": 5.461895942687988, "learning_rate": 8.650054491908714e-06, "loss": 0.2954, "step": 4982 }, { "epoch": 0.247184880202391, "grad_norm": 6.1187944412231445, "learning_rate": 8.649516521156338e-06, "loss": 0.2982, "step": 4983 }, { "epoch": 0.24723448583759114, "grad_norm": 5.318274021148682, "learning_rate": 8.64897845996729e-06, "loss": 0.3391, "step": 4984 }, { "epoch": 0.24728409147279132, "grad_norm": 6.189678192138672, "learning_rate": 8.648440308354907e-06, "loss": 0.2886, "step": 4985 }, { "epoch": 0.24733369710799147, "grad_norm": 5.020787239074707, "learning_rate": 8.64790206633252e-06, "loss": 0.292, "step": 4986 }, { "epoch": 0.24738330274319162, "grad_norm": 7.174196243286133, "learning_rate": 8.647363733913473e-06, "loss": 0.2611, "step": 4987 }, { "epoch": 0.2474329083783918, "grad_norm": 5.273921966552734, "learning_rate": 8.646825311111102e-06, "loss": 0.2715, "step": 4988 }, { "epoch": 0.24748251401359195, "grad_norm": 5.187678813934326, "learning_rate": 8.64628679793875e-06, "loss": 0.2294, "step": 4989 }, { "epoch": 0.2475321196487921, "grad_norm": 9.062447547912598, "learning_rate": 8.64574819440976e-06, "loss": 0.3454, "step": 4990 }, { "epoch": 0.24758172528399225, "grad_norm": 6.59219217300415, "learning_rate": 8.645209500537484e-06, "loss": 0.3321, "step": 4991 }, { "epoch": 0.24763133091919243, "grad_norm": 5.206234455108643, "learning_rate": 8.644670716335265e-06, "loss": 0.2664, "step": 4992 }, { "epoch": 0.24768093655439258, "grad_norm": 4.931230068206787, "learning_rate": 8.644131841816456e-06, "loss": 0.3237, "step": 4993 }, { "epoch": 0.24773054218959273, "grad_norm": 10.811156272888184, "learning_rate": 8.643592876994415e-06, "loss": 0.4325, "step": 4994 }, { "epoch": 0.2477801478247929, "grad_norm": 7.290472030639648, "learning_rate": 8.643053821882493e-06, "loss": 0.3368, "step": 4995 }, { "epoch": 0.24782975345999306, "grad_norm": 5.408751964569092, "learning_rate": 8.642514676494048e-06, "loss": 0.2962, "step": 4996 }, { "epoch": 0.2478793590951932, "grad_norm": 8.965791702270508, "learning_rate": 8.641975440842442e-06, "loss": 0.3031, "step": 4997 }, { "epoch": 0.2479289647303934, "grad_norm": 4.418378829956055, "learning_rate": 8.641436114941038e-06, "loss": 0.2327, "step": 4998 }, { "epoch": 0.24797857036559354, "grad_norm": 5.463059902191162, "learning_rate": 8.6408966988032e-06, "loss": 0.4096, "step": 4999 }, { "epoch": 0.2480281760007937, "grad_norm": 6.047604084014893, "learning_rate": 8.640357192442294e-06, "loss": 0.3108, "step": 5000 }, { "epoch": 0.24807778163599384, "grad_norm": 5.783846378326416, "learning_rate": 8.63981759587169e-06, "loss": 0.2534, "step": 5001 }, { "epoch": 0.24812738727119402, "grad_norm": 7.382470607757568, "learning_rate": 8.63927790910476e-06, "loss": 0.2866, "step": 5002 }, { "epoch": 0.24817699290639417, "grad_norm": 8.237980842590332, "learning_rate": 8.638738132154878e-06, "loss": 0.3065, "step": 5003 }, { "epoch": 0.24822659854159432, "grad_norm": 7.004522800445557, "learning_rate": 8.638198265035418e-06, "loss": 0.2285, "step": 5004 }, { "epoch": 0.2482762041767945, "grad_norm": 7.5514302253723145, "learning_rate": 8.63765830775976e-06, "loss": 0.3468, "step": 5005 }, { "epoch": 0.24832580981199465, "grad_norm": 7.351796627044678, "learning_rate": 8.637118260341282e-06, "loss": 0.2778, "step": 5006 }, { "epoch": 0.2483754154471948, "grad_norm": 5.889345169067383, "learning_rate": 8.63657812279337e-06, "loss": 0.2139, "step": 5007 }, { "epoch": 0.24842502108239495, "grad_norm": 13.26884937286377, "learning_rate": 8.636037895129407e-06, "loss": 0.3309, "step": 5008 }, { "epoch": 0.24847462671759513, "grad_norm": 5.2835373878479, "learning_rate": 8.635497577362779e-06, "loss": 0.2337, "step": 5009 }, { "epoch": 0.24852423235279528, "grad_norm": 5.8312883377075195, "learning_rate": 8.634957169506877e-06, "loss": 0.357, "step": 5010 }, { "epoch": 0.24857383798799543, "grad_norm": 7.241880893707275, "learning_rate": 8.634416671575092e-06, "loss": 0.3159, "step": 5011 }, { "epoch": 0.2486234436231956, "grad_norm": 14.474934577941895, "learning_rate": 8.633876083580818e-06, "loss": 0.4086, "step": 5012 }, { "epoch": 0.24867304925839576, "grad_norm": 7.473515033721924, "learning_rate": 8.633335405537452e-06, "loss": 0.3351, "step": 5013 }, { "epoch": 0.2487226548935959, "grad_norm": 15.311811447143555, "learning_rate": 8.63279463745839e-06, "loss": 0.2436, "step": 5014 }, { "epoch": 0.24877226052879606, "grad_norm": 5.408823013305664, "learning_rate": 8.632253779357034e-06, "loss": 0.2806, "step": 5015 }, { "epoch": 0.24882186616399624, "grad_norm": 6.5476508140563965, "learning_rate": 8.631712831246786e-06, "loss": 0.3159, "step": 5016 }, { "epoch": 0.2488714717991964, "grad_norm": 3.908872127532959, "learning_rate": 8.63117179314105e-06, "loss": 0.2095, "step": 5017 }, { "epoch": 0.24892107743439654, "grad_norm": 8.175509452819824, "learning_rate": 8.630630665053237e-06, "loss": 0.2858, "step": 5018 }, { "epoch": 0.24897068306959672, "grad_norm": 7.598481178283691, "learning_rate": 8.630089446996753e-06, "loss": 0.3797, "step": 5019 }, { "epoch": 0.24902028870479687, "grad_norm": 4.854218006134033, "learning_rate": 8.629548138985009e-06, "loss": 0.2645, "step": 5020 }, { "epoch": 0.24906989433999702, "grad_norm": 12.397501945495605, "learning_rate": 8.629006741031422e-06, "loss": 0.4022, "step": 5021 }, { "epoch": 0.2491194999751972, "grad_norm": 5.976962089538574, "learning_rate": 8.628465253149404e-06, "loss": 0.2965, "step": 5022 }, { "epoch": 0.24916910561039735, "grad_norm": 9.169936180114746, "learning_rate": 8.627923675352378e-06, "loss": 0.3684, "step": 5023 }, { "epoch": 0.2492187112455975, "grad_norm": 5.893977642059326, "learning_rate": 8.627382007653761e-06, "loss": 0.3045, "step": 5024 }, { "epoch": 0.24926831688079765, "grad_norm": 4.790883541107178, "learning_rate": 8.626840250066979e-06, "loss": 0.1728, "step": 5025 }, { "epoch": 0.24931792251599783, "grad_norm": 8.705724716186523, "learning_rate": 8.626298402605453e-06, "loss": 0.331, "step": 5026 }, { "epoch": 0.24936752815119798, "grad_norm": 3.726943016052246, "learning_rate": 8.625756465282614e-06, "loss": 0.3222, "step": 5027 }, { "epoch": 0.24941713378639813, "grad_norm": 7.162644863128662, "learning_rate": 8.625214438111886e-06, "loss": 0.3208, "step": 5028 }, { "epoch": 0.2494667394215983, "grad_norm": 13.8372802734375, "learning_rate": 8.624672321106708e-06, "loss": 0.5463, "step": 5029 }, { "epoch": 0.24951634505679846, "grad_norm": 6.062767505645752, "learning_rate": 8.62413011428051e-06, "loss": 0.2842, "step": 5030 }, { "epoch": 0.2495659506919986, "grad_norm": 6.0809550285339355, "learning_rate": 8.623587817646726e-06, "loss": 0.2228, "step": 5031 }, { "epoch": 0.24961555632719876, "grad_norm": 18.401540756225586, "learning_rate": 8.6230454312188e-06, "loss": 0.4387, "step": 5032 }, { "epoch": 0.24966516196239893, "grad_norm": 5.574349403381348, "learning_rate": 8.622502955010166e-06, "loss": 0.3062, "step": 5033 }, { "epoch": 0.24971476759759909, "grad_norm": 6.852510929107666, "learning_rate": 8.621960389034273e-06, "loss": 0.2189, "step": 5034 }, { "epoch": 0.24976437323279924, "grad_norm": 10.8261079788208, "learning_rate": 8.621417733304561e-06, "loss": 0.329, "step": 5035 }, { "epoch": 0.24981397886799941, "grad_norm": 7.997315883636475, "learning_rate": 8.62087498783448e-06, "loss": 0.309, "step": 5036 }, { "epoch": 0.24986358450319957, "grad_norm": 5.040968894958496, "learning_rate": 8.620332152637479e-06, "loss": 0.2385, "step": 5037 }, { "epoch": 0.24991319013839972, "grad_norm": 6.576035499572754, "learning_rate": 8.61978922772701e-06, "loss": 0.3196, "step": 5038 }, { "epoch": 0.2499627957735999, "grad_norm": 7.077080249786377, "learning_rate": 8.619246213116527e-06, "loss": 0.3713, "step": 5039 }, { "epoch": 0.25001240140880004, "grad_norm": 5.0799360275268555, "learning_rate": 8.618703108819484e-06, "loss": 0.3431, "step": 5040 }, { "epoch": 0.2500620070440002, "grad_norm": 6.131013870239258, "learning_rate": 8.618159914849342e-06, "loss": 0.3343, "step": 5041 }, { "epoch": 0.25011161267920035, "grad_norm": 6.80207633972168, "learning_rate": 8.617616631219561e-06, "loss": 0.444, "step": 5042 }, { "epoch": 0.2501612183144005, "grad_norm": 7.026452541351318, "learning_rate": 8.617073257943603e-06, "loss": 0.3047, "step": 5043 }, { "epoch": 0.25021082394960065, "grad_norm": 10.258092880249023, "learning_rate": 8.616529795034934e-06, "loss": 0.3296, "step": 5044 }, { "epoch": 0.2502604295848008, "grad_norm": 6.9978156089782715, "learning_rate": 8.61598624250702e-06, "loss": 0.3749, "step": 5045 }, { "epoch": 0.250310035220001, "grad_norm": 10.226799964904785, "learning_rate": 8.615442600373332e-06, "loss": 0.475, "step": 5046 }, { "epoch": 0.2503596408552011, "grad_norm": 6.059260368347168, "learning_rate": 8.614898868647342e-06, "loss": 0.2997, "step": 5047 }, { "epoch": 0.2504092464904013, "grad_norm": 5.648770809173584, "learning_rate": 8.61435504734252e-06, "loss": 0.1936, "step": 5048 }, { "epoch": 0.2504588521256015, "grad_norm": 17.166168212890625, "learning_rate": 8.613811136472346e-06, "loss": 0.5613, "step": 5049 }, { "epoch": 0.2505084577608016, "grad_norm": 7.723521709442139, "learning_rate": 8.613267136050299e-06, "loss": 0.3284, "step": 5050 }, { "epoch": 0.2505580633960018, "grad_norm": 9.47550106048584, "learning_rate": 8.612723046089855e-06, "loss": 0.3099, "step": 5051 }, { "epoch": 0.25060766903120196, "grad_norm": 4.928577899932861, "learning_rate": 8.6121788666045e-06, "loss": 0.381, "step": 5052 }, { "epoch": 0.2506572746664021, "grad_norm": 6.273313045501709, "learning_rate": 8.611634597607721e-06, "loss": 0.2603, "step": 5053 }, { "epoch": 0.25070688030160226, "grad_norm": 5.594416618347168, "learning_rate": 8.611090239113002e-06, "loss": 0.3802, "step": 5054 }, { "epoch": 0.25075648593680244, "grad_norm": 11.145174980163574, "learning_rate": 8.610545791133833e-06, "loss": 0.3219, "step": 5055 }, { "epoch": 0.25080609157200257, "grad_norm": 9.65555191040039, "learning_rate": 8.610001253683708e-06, "loss": 0.4158, "step": 5056 }, { "epoch": 0.25085569720720274, "grad_norm": 6.385344982147217, "learning_rate": 8.609456626776116e-06, "loss": 0.3654, "step": 5057 }, { "epoch": 0.2509053028424029, "grad_norm": 5.583750247955322, "learning_rate": 8.608911910424558e-06, "loss": 0.2482, "step": 5058 }, { "epoch": 0.25095490847760304, "grad_norm": 4.511964797973633, "learning_rate": 8.60836710464253e-06, "loss": 0.3338, "step": 5059 }, { "epoch": 0.2510045141128032, "grad_norm": 6.714266777038574, "learning_rate": 8.607822209443532e-06, "loss": 0.2613, "step": 5060 }, { "epoch": 0.25105411974800335, "grad_norm": 4.798628330230713, "learning_rate": 8.607277224841069e-06, "loss": 0.3489, "step": 5061 }, { "epoch": 0.2511037253832035, "grad_norm": 10.121649742126465, "learning_rate": 8.606732150848644e-06, "loss": 0.3805, "step": 5062 }, { "epoch": 0.2511533310184037, "grad_norm": 8.906756401062012, "learning_rate": 8.606186987479766e-06, "loss": 0.4316, "step": 5063 }, { "epoch": 0.2512029366536038, "grad_norm": 7.702183246612549, "learning_rate": 8.60564173474794e-06, "loss": 0.3558, "step": 5064 }, { "epoch": 0.251252542288804, "grad_norm": 7.275509357452393, "learning_rate": 8.605096392666682e-06, "loss": 0.3263, "step": 5065 }, { "epoch": 0.2513021479240042, "grad_norm": 5.6201701164245605, "learning_rate": 8.604550961249506e-06, "loss": 0.3491, "step": 5066 }, { "epoch": 0.2513517535592043, "grad_norm": 6.222919940948486, "learning_rate": 8.604005440509928e-06, "loss": 0.3215, "step": 5067 }, { "epoch": 0.2514013591944045, "grad_norm": 9.117447853088379, "learning_rate": 8.603459830461462e-06, "loss": 0.3237, "step": 5068 }, { "epoch": 0.25145096482960466, "grad_norm": 6.9365339279174805, "learning_rate": 8.602914131117632e-06, "loss": 0.2725, "step": 5069 }, { "epoch": 0.2515005704648048, "grad_norm": 6.797154426574707, "learning_rate": 8.60236834249196e-06, "loss": 0.3563, "step": 5070 }, { "epoch": 0.25155017610000496, "grad_norm": 9.977957725524902, "learning_rate": 8.601822464597973e-06, "loss": 0.3674, "step": 5071 }, { "epoch": 0.25159978173520514, "grad_norm": 10.605372428894043, "learning_rate": 8.601276497449193e-06, "loss": 0.3633, "step": 5072 }, { "epoch": 0.25164938737040526, "grad_norm": 7.31951379776001, "learning_rate": 8.600730441059154e-06, "loss": 0.2572, "step": 5073 }, { "epoch": 0.25169899300560544, "grad_norm": 6.51950216293335, "learning_rate": 8.600184295441386e-06, "loss": 0.3123, "step": 5074 }, { "epoch": 0.2517485986408056, "grad_norm": 5.524555683135986, "learning_rate": 8.599638060609423e-06, "loss": 0.2722, "step": 5075 }, { "epoch": 0.25179820427600574, "grad_norm": 12.207430839538574, "learning_rate": 8.599091736576799e-06, "loss": 0.5036, "step": 5076 }, { "epoch": 0.2518478099112059, "grad_norm": 7.4112019538879395, "learning_rate": 8.598545323357054e-06, "loss": 0.3329, "step": 5077 }, { "epoch": 0.25189741554640604, "grad_norm": 6.986148357391357, "learning_rate": 8.597998820963729e-06, "loss": 0.321, "step": 5078 }, { "epoch": 0.2519470211816062, "grad_norm": 7.2760748863220215, "learning_rate": 8.597452229410365e-06, "loss": 0.2865, "step": 5079 }, { "epoch": 0.2519966268168064, "grad_norm": 9.710038185119629, "learning_rate": 8.596905548710507e-06, "loss": 0.329, "step": 5080 }, { "epoch": 0.2520462324520065, "grad_norm": 13.305597305297852, "learning_rate": 8.596358778877705e-06, "loss": 0.3928, "step": 5081 }, { "epoch": 0.2520958380872067, "grad_norm": 8.490680694580078, "learning_rate": 8.595811919925503e-06, "loss": 0.4092, "step": 5082 }, { "epoch": 0.2521454437224069, "grad_norm": 4.630774974822998, "learning_rate": 8.595264971867456e-06, "loss": 0.3327, "step": 5083 }, { "epoch": 0.252195049357607, "grad_norm": 14.149465560913086, "learning_rate": 8.594717934717116e-06, "loss": 0.3859, "step": 5084 }, { "epoch": 0.2522446549928072, "grad_norm": 7.739006042480469, "learning_rate": 8.59417080848804e-06, "loss": 0.3439, "step": 5085 }, { "epoch": 0.25229426062800736, "grad_norm": 7.709648132324219, "learning_rate": 8.593623593193786e-06, "loss": 0.2709, "step": 5086 }, { "epoch": 0.2523438662632075, "grad_norm": 14.057538986206055, "learning_rate": 8.593076288847913e-06, "loss": 0.5415, "step": 5087 }, { "epoch": 0.25239347189840766, "grad_norm": 8.463693618774414, "learning_rate": 8.592528895463985e-06, "loss": 0.313, "step": 5088 }, { "epoch": 0.25244307753360784, "grad_norm": 4.483257293701172, "learning_rate": 8.591981413055566e-06, "loss": 0.2619, "step": 5089 }, { "epoch": 0.25249268316880796, "grad_norm": 3.940303325653076, "learning_rate": 8.591433841636223e-06, "loss": 0.2818, "step": 5090 }, { "epoch": 0.25254228880400814, "grad_norm": 13.953078269958496, "learning_rate": 8.590886181219524e-06, "loss": 0.3373, "step": 5091 }, { "epoch": 0.2525918944392083, "grad_norm": 5.017396450042725, "learning_rate": 8.59033843181904e-06, "loss": 0.3258, "step": 5092 }, { "epoch": 0.25264150007440844, "grad_norm": 4.074874401092529, "learning_rate": 8.589790593448347e-06, "loss": 0.2818, "step": 5093 }, { "epoch": 0.2526911057096086, "grad_norm": 6.922732830047607, "learning_rate": 8.58924266612102e-06, "loss": 0.415, "step": 5094 }, { "epoch": 0.25274071134480874, "grad_norm": 7.3894362449646, "learning_rate": 8.588694649850635e-06, "loss": 0.3462, "step": 5095 }, { "epoch": 0.2527903169800089, "grad_norm": 4.616450309753418, "learning_rate": 8.588146544650773e-06, "loss": 0.3314, "step": 5096 }, { "epoch": 0.2528399226152091, "grad_norm": 9.168503761291504, "learning_rate": 8.587598350535017e-06, "loss": 0.4372, "step": 5097 }, { "epoch": 0.2528895282504092, "grad_norm": 4.421501636505127, "learning_rate": 8.587050067516952e-06, "loss": 0.3234, "step": 5098 }, { "epoch": 0.2529391338856094, "grad_norm": 5.7149434089660645, "learning_rate": 8.586501695610162e-06, "loss": 0.2684, "step": 5099 }, { "epoch": 0.2529887395208096, "grad_norm": 6.0397257804870605, "learning_rate": 8.585953234828238e-06, "loss": 0.3582, "step": 5100 }, { "epoch": 0.2530383451560097, "grad_norm": 6.620445728302002, "learning_rate": 8.58540468518477e-06, "loss": 0.2927, "step": 5101 }, { "epoch": 0.2530879507912099, "grad_norm": 11.323982238769531, "learning_rate": 8.584856046693355e-06, "loss": 0.2802, "step": 5102 }, { "epoch": 0.25313755642641006, "grad_norm": 4.022934436798096, "learning_rate": 8.584307319367584e-06, "loss": 0.28, "step": 5103 }, { "epoch": 0.2531871620616102, "grad_norm": 9.785512924194336, "learning_rate": 8.583758503221055e-06, "loss": 0.2711, "step": 5104 }, { "epoch": 0.25323676769681036, "grad_norm": 6.003190517425537, "learning_rate": 8.58320959826737e-06, "loss": 0.2471, "step": 5105 }, { "epoch": 0.25328637333201054, "grad_norm": 6.180747985839844, "learning_rate": 8.58266060452013e-06, "loss": 0.3674, "step": 5106 }, { "epoch": 0.25333597896721066, "grad_norm": 5.014632701873779, "learning_rate": 8.582111521992941e-06, "loss": 0.2596, "step": 5107 }, { "epoch": 0.25338558460241084, "grad_norm": 11.575963020324707, "learning_rate": 8.581562350699406e-06, "loss": 0.2656, "step": 5108 }, { "epoch": 0.253435190237611, "grad_norm": 4.034341335296631, "learning_rate": 8.581013090653137e-06, "loss": 0.2383, "step": 5109 }, { "epoch": 0.25348479587281114, "grad_norm": 5.3014349937438965, "learning_rate": 8.580463741867745e-06, "loss": 0.2613, "step": 5110 }, { "epoch": 0.2535344015080113, "grad_norm": 12.551118850708008, "learning_rate": 8.57991430435684e-06, "loss": 0.3939, "step": 5111 }, { "epoch": 0.25358400714321144, "grad_norm": 10.965414047241211, "learning_rate": 8.57936477813404e-06, "loss": 0.3997, "step": 5112 }, { "epoch": 0.2536336127784116, "grad_norm": 8.320232391357422, "learning_rate": 8.578815163212962e-06, "loss": 0.4274, "step": 5113 }, { "epoch": 0.2536832184136118, "grad_norm": 24.91767120361328, "learning_rate": 8.578265459607224e-06, "loss": 0.5096, "step": 5114 }, { "epoch": 0.2537328240488119, "grad_norm": 4.459475040435791, "learning_rate": 8.57771566733045e-06, "loss": 0.223, "step": 5115 }, { "epoch": 0.2537824296840121, "grad_norm": 18.163984298706055, "learning_rate": 8.577165786396265e-06, "loss": 0.3728, "step": 5116 }, { "epoch": 0.2538320353192123, "grad_norm": 6.61244535446167, "learning_rate": 8.57661581681829e-06, "loss": 0.3844, "step": 5117 }, { "epoch": 0.2538816409544124, "grad_norm": 6.811540603637695, "learning_rate": 8.57606575861016e-06, "loss": 0.3464, "step": 5118 }, { "epoch": 0.2539312465896126, "grad_norm": 6.252614498138428, "learning_rate": 8.575515611785503e-06, "loss": 0.3298, "step": 5119 }, { "epoch": 0.25398085222481276, "grad_norm": 6.477056503295898, "learning_rate": 8.574965376357951e-06, "loss": 0.3165, "step": 5120 }, { "epoch": 0.2540304578600129, "grad_norm": 9.538025856018066, "learning_rate": 8.574415052341139e-06, "loss": 0.3619, "step": 5121 }, { "epoch": 0.25408006349521306, "grad_norm": 5.8588175773620605, "learning_rate": 8.573864639748705e-06, "loss": 0.2972, "step": 5122 }, { "epoch": 0.25412966913041324, "grad_norm": 3.8317503929138184, "learning_rate": 8.57331413859429e-06, "loss": 0.2807, "step": 5123 }, { "epoch": 0.25417927476561336, "grad_norm": 5.434549808502197, "learning_rate": 8.572763548891533e-06, "loss": 0.3212, "step": 5124 }, { "epoch": 0.25422888040081354, "grad_norm": 5.26184606552124, "learning_rate": 8.572212870654082e-06, "loss": 0.2686, "step": 5125 }, { "epoch": 0.2542784860360137, "grad_norm": 4.214367866516113, "learning_rate": 8.571662103895578e-06, "loss": 0.4351, "step": 5126 }, { "epoch": 0.25432809167121384, "grad_norm": 9.56705379486084, "learning_rate": 8.571111248629671e-06, "loss": 0.4315, "step": 5127 }, { "epoch": 0.254377697306414, "grad_norm": 15.5123872756958, "learning_rate": 8.570560304870012e-06, "loss": 0.5794, "step": 5128 }, { "epoch": 0.25442730294161414, "grad_norm": 5.228140354156494, "learning_rate": 8.570009272630253e-06, "loss": 0.2501, "step": 5129 }, { "epoch": 0.2544769085768143, "grad_norm": 21.80599021911621, "learning_rate": 8.569458151924051e-06, "loss": 0.3669, "step": 5130 }, { "epoch": 0.2545265142120145, "grad_norm": 6.022062301635742, "learning_rate": 8.56890694276506e-06, "loss": 0.2998, "step": 5131 }, { "epoch": 0.2545761198472146, "grad_norm": 4.455216407775879, "learning_rate": 8.568355645166942e-06, "loss": 0.2724, "step": 5132 }, { "epoch": 0.2546257254824148, "grad_norm": 7.372659206390381, "learning_rate": 8.567804259143356e-06, "loss": 0.4082, "step": 5133 }, { "epoch": 0.254675331117615, "grad_norm": 5.610144138336182, "learning_rate": 8.567252784707967e-06, "loss": 0.2889, "step": 5134 }, { "epoch": 0.2547249367528151, "grad_norm": 6.767913341522217, "learning_rate": 8.566701221874441e-06, "loss": 0.4104, "step": 5135 }, { "epoch": 0.2547745423880153, "grad_norm": 6.223945617675781, "learning_rate": 8.566149570656444e-06, "loss": 0.343, "step": 5136 }, { "epoch": 0.25482414802321546, "grad_norm": 4.918407917022705, "learning_rate": 8.565597831067647e-06, "loss": 0.316, "step": 5137 }, { "epoch": 0.2548737536584156, "grad_norm": 11.199241638183594, "learning_rate": 8.565046003121724e-06, "loss": 0.3999, "step": 5138 }, { "epoch": 0.25492335929361576, "grad_norm": 6.434494495391846, "learning_rate": 8.564494086832349e-06, "loss": 0.344, "step": 5139 }, { "epoch": 0.25497296492881594, "grad_norm": 5.110023021697998, "learning_rate": 8.563942082213195e-06, "loss": 0.338, "step": 5140 }, { "epoch": 0.25502257056401606, "grad_norm": 4.939047813415527, "learning_rate": 8.563389989277948e-06, "loss": 0.2456, "step": 5141 }, { "epoch": 0.25507217619921624, "grad_norm": 4.4225568771362305, "learning_rate": 8.562837808040284e-06, "loss": 0.2515, "step": 5142 }, { "epoch": 0.2551217818344164, "grad_norm": 4.157312393188477, "learning_rate": 8.562285538513886e-06, "loss": 0.2814, "step": 5143 }, { "epoch": 0.25517138746961654, "grad_norm": 12.210389137268066, "learning_rate": 8.561733180712441e-06, "loss": 0.2056, "step": 5144 }, { "epoch": 0.2552209931048167, "grad_norm": 6.433752536773682, "learning_rate": 8.561180734649638e-06, "loss": 0.3789, "step": 5145 }, { "epoch": 0.25527059874001684, "grad_norm": 14.443253517150879, "learning_rate": 8.560628200339164e-06, "loss": 0.5806, "step": 5146 }, { "epoch": 0.255320204375217, "grad_norm": 7.404294013977051, "learning_rate": 8.560075577794715e-06, "loss": 0.2908, "step": 5147 }, { "epoch": 0.2553698100104172, "grad_norm": 6.367834568023682, "learning_rate": 8.55952286702998e-06, "loss": 0.3096, "step": 5148 }, { "epoch": 0.2554194156456173, "grad_norm": 6.507887840270996, "learning_rate": 8.55897006805866e-06, "loss": 0.2556, "step": 5149 }, { "epoch": 0.2554690212808175, "grad_norm": 8.585387229919434, "learning_rate": 8.558417180894451e-06, "loss": 0.3977, "step": 5150 }, { "epoch": 0.2555186269160177, "grad_norm": 5.827871322631836, "learning_rate": 8.557864205551057e-06, "loss": 0.2016, "step": 5151 }, { "epoch": 0.2555682325512178, "grad_norm": 5.571110725402832, "learning_rate": 8.557311142042176e-06, "loss": 0.2676, "step": 5152 }, { "epoch": 0.255617838186418, "grad_norm": 6.369826316833496, "learning_rate": 8.556757990381517e-06, "loss": 0.3288, "step": 5153 }, { "epoch": 0.25566744382161816, "grad_norm": 5.306295871734619, "learning_rate": 8.556204750582783e-06, "loss": 0.2835, "step": 5154 }, { "epoch": 0.2557170494568183, "grad_norm": 6.32535982131958, "learning_rate": 8.555651422659689e-06, "loss": 0.3358, "step": 5155 }, { "epoch": 0.25576665509201846, "grad_norm": 12.063516616821289, "learning_rate": 8.555098006625946e-06, "loss": 0.3874, "step": 5156 }, { "epoch": 0.25581626072721864, "grad_norm": 17.530752182006836, "learning_rate": 8.554544502495265e-06, "loss": 0.4104, "step": 5157 }, { "epoch": 0.25586586636241876, "grad_norm": 6.59840202331543, "learning_rate": 8.553990910281362e-06, "loss": 0.3618, "step": 5158 }, { "epoch": 0.25591547199761894, "grad_norm": 9.475499153137207, "learning_rate": 8.553437229997958e-06, "loss": 0.3623, "step": 5159 }, { "epoch": 0.2559650776328191, "grad_norm": 9.933563232421875, "learning_rate": 8.552883461658771e-06, "loss": 0.3332, "step": 5160 }, { "epoch": 0.25601468326801924, "grad_norm": 13.396222114562988, "learning_rate": 8.552329605277527e-06, "loss": 0.5728, "step": 5161 }, { "epoch": 0.2560642889032194, "grad_norm": 7.923305988311768, "learning_rate": 8.551775660867946e-06, "loss": 0.322, "step": 5162 }, { "epoch": 0.25611389453841954, "grad_norm": 9.516253471374512, "learning_rate": 8.551221628443759e-06, "loss": 0.4098, "step": 5163 }, { "epoch": 0.2561635001736197, "grad_norm": 7.8612799644470215, "learning_rate": 8.550667508018692e-06, "loss": 0.2945, "step": 5164 }, { "epoch": 0.2562131058088199, "grad_norm": 9.623018264770508, "learning_rate": 8.550113299606479e-06, "loss": 0.315, "step": 5165 }, { "epoch": 0.25626271144402, "grad_norm": 6.8283867835998535, "learning_rate": 8.549559003220853e-06, "loss": 0.3412, "step": 5166 }, { "epoch": 0.2563123170792202, "grad_norm": 8.585325241088867, "learning_rate": 8.549004618875548e-06, "loss": 0.2982, "step": 5167 }, { "epoch": 0.2563619227144204, "grad_norm": 5.874581336975098, "learning_rate": 8.548450146584304e-06, "loss": 0.4165, "step": 5168 }, { "epoch": 0.2564115283496205, "grad_norm": 5.678098678588867, "learning_rate": 8.54789558636086e-06, "loss": 0.3417, "step": 5169 }, { "epoch": 0.2564611339848207, "grad_norm": 6.8210859298706055, "learning_rate": 8.54734093821896e-06, "loss": 0.2719, "step": 5170 }, { "epoch": 0.25651073962002086, "grad_norm": 6.0683674812316895, "learning_rate": 8.546786202172344e-06, "loss": 0.2499, "step": 5171 }, { "epoch": 0.256560345255221, "grad_norm": 7.984922885894775, "learning_rate": 8.546231378234764e-06, "loss": 0.2517, "step": 5172 }, { "epoch": 0.25660995089042116, "grad_norm": 7.006345272064209, "learning_rate": 8.545676466419966e-06, "loss": 0.3347, "step": 5173 }, { "epoch": 0.25665955652562134, "grad_norm": 13.634810447692871, "learning_rate": 8.5451214667417e-06, "loss": 0.4156, "step": 5174 }, { "epoch": 0.25670916216082146, "grad_norm": 5.2445292472839355, "learning_rate": 8.544566379213721e-06, "loss": 0.2685, "step": 5175 }, { "epoch": 0.25675876779602164, "grad_norm": 6.7685441970825195, "learning_rate": 8.544011203849783e-06, "loss": 0.3111, "step": 5176 }, { "epoch": 0.25680837343122176, "grad_norm": 10.316171646118164, "learning_rate": 8.543455940663645e-06, "loss": 0.2871, "step": 5177 }, { "epoch": 0.25685797906642194, "grad_norm": 7.097105979919434, "learning_rate": 8.542900589669067e-06, "loss": 0.4104, "step": 5178 }, { "epoch": 0.2569075847016221, "grad_norm": 16.174766540527344, "learning_rate": 8.542345150879806e-06, "loss": 0.5044, "step": 5179 }, { "epoch": 0.25695719033682224, "grad_norm": 8.980493545532227, "learning_rate": 8.541789624309633e-06, "loss": 0.2567, "step": 5180 }, { "epoch": 0.2570067959720224, "grad_norm": 5.142066955566406, "learning_rate": 8.541234009972312e-06, "loss": 0.2892, "step": 5181 }, { "epoch": 0.2570564016072226, "grad_norm": 7.560493469238281, "learning_rate": 8.540678307881608e-06, "loss": 0.2673, "step": 5182 }, { "epoch": 0.2571060072424227, "grad_norm": 8.77737808227539, "learning_rate": 8.540122518051297e-06, "loss": 0.3665, "step": 5183 }, { "epoch": 0.2571556128776229, "grad_norm": 14.541144371032715, "learning_rate": 8.539566640495145e-06, "loss": 0.347, "step": 5184 }, { "epoch": 0.2572052185128231, "grad_norm": 10.817015647888184, "learning_rate": 8.539010675226932e-06, "loss": 0.3676, "step": 5185 }, { "epoch": 0.2572548241480232, "grad_norm": 6.795153617858887, "learning_rate": 8.538454622260433e-06, "loss": 0.3195, "step": 5186 }, { "epoch": 0.2573044297832234, "grad_norm": 10.186166763305664, "learning_rate": 8.537898481609428e-06, "loss": 0.3029, "step": 5187 }, { "epoch": 0.25735403541842355, "grad_norm": 8.482223510742188, "learning_rate": 8.5373422532877e-06, "loss": 0.331, "step": 5188 }, { "epoch": 0.2574036410536237, "grad_norm": 4.933534622192383, "learning_rate": 8.53678593730903e-06, "loss": 0.274, "step": 5189 }, { "epoch": 0.25745324668882386, "grad_norm": 4.2056145668029785, "learning_rate": 8.536229533687202e-06, "loss": 0.2519, "step": 5190 }, { "epoch": 0.25750285232402403, "grad_norm": 9.331643104553223, "learning_rate": 8.53567304243601e-06, "loss": 0.4264, "step": 5191 }, { "epoch": 0.25755245795922416, "grad_norm": 5.5673089027404785, "learning_rate": 8.535116463569239e-06, "loss": 0.4019, "step": 5192 }, { "epoch": 0.25760206359442434, "grad_norm": 10.432602882385254, "learning_rate": 8.534559797100683e-06, "loss": 0.3612, "step": 5193 }, { "epoch": 0.25765166922962446, "grad_norm": 7.176919937133789, "learning_rate": 8.534003043044137e-06, "loss": 0.2611, "step": 5194 }, { "epoch": 0.25770127486482464, "grad_norm": 7.469781875610352, "learning_rate": 8.533446201413396e-06, "loss": 0.4426, "step": 5195 }, { "epoch": 0.2577508805000248, "grad_norm": 8.082540512084961, "learning_rate": 8.53288927222226e-06, "loss": 0.3167, "step": 5196 }, { "epoch": 0.25780048613522494, "grad_norm": 14.303866386413574, "learning_rate": 8.53233225548453e-06, "loss": 0.4423, "step": 5197 }, { "epoch": 0.2578500917704251, "grad_norm": 4.892303466796875, "learning_rate": 8.531775151214011e-06, "loss": 0.3164, "step": 5198 }, { "epoch": 0.2578996974056253, "grad_norm": 9.373422622680664, "learning_rate": 8.531217959424503e-06, "loss": 0.3993, "step": 5199 }, { "epoch": 0.2579493030408254, "grad_norm": 4.593648910522461, "learning_rate": 8.530660680129816e-06, "loss": 0.3012, "step": 5200 }, { "epoch": 0.2579989086760256, "grad_norm": 6.97067403793335, "learning_rate": 8.530103313343764e-06, "loss": 0.2965, "step": 5201 }, { "epoch": 0.2580485143112258, "grad_norm": 5.978394508361816, "learning_rate": 8.529545859080153e-06, "loss": 0.3705, "step": 5202 }, { "epoch": 0.2580981199464259, "grad_norm": 6.300312519073486, "learning_rate": 8.528988317352798e-06, "loss": 0.286, "step": 5203 }, { "epoch": 0.2581477255816261, "grad_norm": 12.363053321838379, "learning_rate": 8.528430688175518e-06, "loss": 0.3458, "step": 5204 }, { "epoch": 0.25819733121682625, "grad_norm": 6.151861190795898, "learning_rate": 8.527872971562127e-06, "loss": 0.2181, "step": 5205 }, { "epoch": 0.2582469368520264, "grad_norm": 4.302932262420654, "learning_rate": 8.52731516752645e-06, "loss": 0.2676, "step": 5206 }, { "epoch": 0.25829654248722655, "grad_norm": 7.424460411071777, "learning_rate": 8.526757276082307e-06, "loss": 0.3114, "step": 5207 }, { "epoch": 0.25834614812242673, "grad_norm": 5.079699516296387, "learning_rate": 8.526199297243525e-06, "loss": 0.3874, "step": 5208 }, { "epoch": 0.25839575375762686, "grad_norm": 7.865157127380371, "learning_rate": 8.525641231023927e-06, "loss": 0.3945, "step": 5209 }, { "epoch": 0.25844535939282703, "grad_norm": 10.497719764709473, "learning_rate": 8.525083077437345e-06, "loss": 0.533, "step": 5210 }, { "epoch": 0.25849496502802716, "grad_norm": 7.208527088165283, "learning_rate": 8.524524836497613e-06, "loss": 0.3039, "step": 5211 }, { "epoch": 0.25854457066322734, "grad_norm": 8.047035217285156, "learning_rate": 8.523966508218556e-06, "loss": 0.3917, "step": 5212 }, { "epoch": 0.2585941762984275, "grad_norm": 5.805288314819336, "learning_rate": 8.523408092614017e-06, "loss": 0.2915, "step": 5213 }, { "epoch": 0.25864378193362764, "grad_norm": 9.78768539428711, "learning_rate": 8.522849589697832e-06, "loss": 0.3546, "step": 5214 }, { "epoch": 0.2586933875688278, "grad_norm": 13.180912971496582, "learning_rate": 8.52229099948384e-06, "loss": 0.3465, "step": 5215 }, { "epoch": 0.258742993204028, "grad_norm": 5.256656169891357, "learning_rate": 8.521732321985884e-06, "loss": 0.256, "step": 5216 }, { "epoch": 0.2587925988392281, "grad_norm": 6.7384514808654785, "learning_rate": 8.521173557217809e-06, "loss": 0.3569, "step": 5217 }, { "epoch": 0.2588422044744283, "grad_norm": 8.311195373535156, "learning_rate": 8.52061470519346e-06, "loss": 0.3507, "step": 5218 }, { "epoch": 0.2588918101096285, "grad_norm": 5.43680477142334, "learning_rate": 8.520055765926685e-06, "loss": 0.2553, "step": 5219 }, { "epoch": 0.2589414157448286, "grad_norm": 12.709001541137695, "learning_rate": 8.519496739431336e-06, "loss": 0.486, "step": 5220 }, { "epoch": 0.2589910213800288, "grad_norm": 7.455424785614014, "learning_rate": 8.518937625721267e-06, "loss": 0.4233, "step": 5221 }, { "epoch": 0.25904062701522895, "grad_norm": 9.294195175170898, "learning_rate": 8.518378424810331e-06, "loss": 0.3107, "step": 5222 }, { "epoch": 0.2590902326504291, "grad_norm": 9.7074613571167, "learning_rate": 8.517819136712387e-06, "loss": 0.4369, "step": 5223 }, { "epoch": 0.25913983828562925, "grad_norm": 7.736593246459961, "learning_rate": 8.517259761441293e-06, "loss": 0.2894, "step": 5224 }, { "epoch": 0.25918944392082943, "grad_norm": 5.0799336433410645, "learning_rate": 8.51670029901091e-06, "loss": 0.3165, "step": 5225 }, { "epoch": 0.25923904955602955, "grad_norm": 14.667070388793945, "learning_rate": 8.516140749435105e-06, "loss": 0.3033, "step": 5226 }, { "epoch": 0.25928865519122973, "grad_norm": 11.609130859375, "learning_rate": 8.51558111272774e-06, "loss": 0.4776, "step": 5227 }, { "epoch": 0.25933826082642986, "grad_norm": 6.568151950836182, "learning_rate": 8.515021388902684e-06, "loss": 0.3442, "step": 5228 }, { "epoch": 0.25938786646163003, "grad_norm": 6.538636684417725, "learning_rate": 8.514461577973809e-06, "loss": 0.3349, "step": 5229 }, { "epoch": 0.2594374720968302, "grad_norm": 4.991848468780518, "learning_rate": 8.513901679954986e-06, "loss": 0.2718, "step": 5230 }, { "epoch": 0.25948707773203034, "grad_norm": 7.84515380859375, "learning_rate": 8.513341694860091e-06, "loss": 0.4893, "step": 5231 }, { "epoch": 0.2595366833672305, "grad_norm": 12.81201171875, "learning_rate": 8.512781622702999e-06, "loss": 0.4813, "step": 5232 }, { "epoch": 0.2595862890024307, "grad_norm": 9.894709587097168, "learning_rate": 8.51222146349759e-06, "loss": 0.3122, "step": 5233 }, { "epoch": 0.2596358946376308, "grad_norm": 10.070866584777832, "learning_rate": 8.511661217257742e-06, "loss": 0.3546, "step": 5234 }, { "epoch": 0.259685500272831, "grad_norm": 6.000296115875244, "learning_rate": 8.511100883997344e-06, "loss": 0.3399, "step": 5235 }, { "epoch": 0.25973510590803117, "grad_norm": 7.69377326965332, "learning_rate": 8.510540463730275e-06, "loss": 0.3884, "step": 5236 }, { "epoch": 0.2597847115432313, "grad_norm": 12.270028114318848, "learning_rate": 8.509979956470426e-06, "loss": 0.3017, "step": 5237 }, { "epoch": 0.2598343171784315, "grad_norm": 4.922876834869385, "learning_rate": 8.509419362231687e-06, "loss": 0.2811, "step": 5238 }, { "epoch": 0.25988392281363165, "grad_norm": 7.2322492599487305, "learning_rate": 8.508858681027946e-06, "loss": 0.4159, "step": 5239 }, { "epoch": 0.2599335284488318, "grad_norm": 10.329569816589355, "learning_rate": 8.508297912873102e-06, "loss": 0.4711, "step": 5240 }, { "epoch": 0.25998313408403195, "grad_norm": 7.480157852172852, "learning_rate": 8.50773705778105e-06, "loss": 0.2776, "step": 5241 }, { "epoch": 0.26003273971923213, "grad_norm": 4.652698516845703, "learning_rate": 8.507176115765685e-06, "loss": 0.3099, "step": 5242 }, { "epoch": 0.26008234535443225, "grad_norm": 11.98753547668457, "learning_rate": 8.50661508684091e-06, "loss": 0.4055, "step": 5243 }, { "epoch": 0.26013195098963243, "grad_norm": 7.300627708435059, "learning_rate": 8.506053971020627e-06, "loss": 0.429, "step": 5244 }, { "epoch": 0.26018155662483256, "grad_norm": 3.8829798698425293, "learning_rate": 8.50549276831874e-06, "loss": 0.2689, "step": 5245 }, { "epoch": 0.26023116226003273, "grad_norm": 9.233057975769043, "learning_rate": 8.504931478749156e-06, "loss": 0.355, "step": 5246 }, { "epoch": 0.2602807678952329, "grad_norm": 5.330105304718018, "learning_rate": 8.504370102325785e-06, "loss": 0.3289, "step": 5247 }, { "epoch": 0.26033037353043303, "grad_norm": 6.18773889541626, "learning_rate": 8.503808639062538e-06, "loss": 0.302, "step": 5248 }, { "epoch": 0.2603799791656332, "grad_norm": 7.385091781616211, "learning_rate": 8.503247088973329e-06, "loss": 0.3687, "step": 5249 }, { "epoch": 0.2604295848008334, "grad_norm": 6.484624862670898, "learning_rate": 8.50268545207207e-06, "loss": 0.3574, "step": 5250 }, { "epoch": 0.2604791904360335, "grad_norm": 5.526583194732666, "learning_rate": 8.502123728372683e-06, "loss": 0.2764, "step": 5251 }, { "epoch": 0.2605287960712337, "grad_norm": 9.974333763122559, "learning_rate": 8.501561917889086e-06, "loss": 0.354, "step": 5252 }, { "epoch": 0.26057840170643387, "grad_norm": 10.126853942871094, "learning_rate": 8.5010000206352e-06, "loss": 0.3597, "step": 5253 }, { "epoch": 0.260628007341634, "grad_norm": 5.64898681640625, "learning_rate": 8.500438036624949e-06, "loss": 0.2816, "step": 5254 }, { "epoch": 0.26067761297683417, "grad_norm": 4.564761161804199, "learning_rate": 8.499875965872262e-06, "loss": 0.3018, "step": 5255 }, { "epoch": 0.26072721861203435, "grad_norm": 7.467818260192871, "learning_rate": 8.499313808391063e-06, "loss": 0.3554, "step": 5256 }, { "epoch": 0.2607768242472345, "grad_norm": 6.499337673187256, "learning_rate": 8.498751564195287e-06, "loss": 0.3446, "step": 5257 }, { "epoch": 0.26082642988243465, "grad_norm": 13.375696182250977, "learning_rate": 8.498189233298863e-06, "loss": 0.3735, "step": 5258 }, { "epoch": 0.26087603551763483, "grad_norm": 7.1051177978515625, "learning_rate": 8.49762681571573e-06, "loss": 0.3517, "step": 5259 }, { "epoch": 0.26092564115283495, "grad_norm": 5.5095109939575195, "learning_rate": 8.497064311459819e-06, "loss": 0.2303, "step": 5260 }, { "epoch": 0.26097524678803513, "grad_norm": 10.354667663574219, "learning_rate": 8.496501720545074e-06, "loss": 0.3687, "step": 5261 }, { "epoch": 0.26102485242323525, "grad_norm": 4.253104209899902, "learning_rate": 8.495939042985434e-06, "loss": 0.2174, "step": 5262 }, { "epoch": 0.26107445805843543, "grad_norm": 10.284395217895508, "learning_rate": 8.495376278794844e-06, "loss": 0.303, "step": 5263 }, { "epoch": 0.2611240636936356, "grad_norm": 5.031721115112305, "learning_rate": 8.49481342798725e-06, "loss": 0.3123, "step": 5264 }, { "epoch": 0.26117366932883573, "grad_norm": 5.888519763946533, "learning_rate": 8.494250490576596e-06, "loss": 0.208, "step": 5265 }, { "epoch": 0.2612232749640359, "grad_norm": 7.641378402709961, "learning_rate": 8.493687466576836e-06, "loss": 0.3206, "step": 5266 }, { "epoch": 0.2612728805992361, "grad_norm": 10.465010643005371, "learning_rate": 8.493124356001918e-06, "loss": 0.2552, "step": 5267 }, { "epoch": 0.2613224862344362, "grad_norm": 9.509516716003418, "learning_rate": 8.4925611588658e-06, "loss": 0.3592, "step": 5268 }, { "epoch": 0.2613720918696364, "grad_norm": 10.721659660339355, "learning_rate": 8.491997875182437e-06, "loss": 0.3339, "step": 5269 }, { "epoch": 0.26142169750483657, "grad_norm": 7.153807640075684, "learning_rate": 8.491434504965786e-06, "loss": 0.3285, "step": 5270 }, { "epoch": 0.2614713031400367, "grad_norm": 10.244074821472168, "learning_rate": 8.49087104822981e-06, "loss": 0.3198, "step": 5271 }, { "epoch": 0.26152090877523687, "grad_norm": 5.397165298461914, "learning_rate": 8.490307504988468e-06, "loss": 0.3048, "step": 5272 }, { "epoch": 0.26157051441043705, "grad_norm": 5.951436996459961, "learning_rate": 8.48974387525573e-06, "loss": 0.2946, "step": 5273 }, { "epoch": 0.2616201200456372, "grad_norm": 9.922401428222656, "learning_rate": 8.489180159045557e-06, "loss": 0.2962, "step": 5274 }, { "epoch": 0.26166972568083735, "grad_norm": 6.836930751800537, "learning_rate": 8.488616356371924e-06, "loss": 0.244, "step": 5275 }, { "epoch": 0.26171933131603753, "grad_norm": 6.480302810668945, "learning_rate": 8.488052467248798e-06, "loss": 0.3596, "step": 5276 }, { "epoch": 0.26176893695123765, "grad_norm": 7.614717960357666, "learning_rate": 8.487488491690154e-06, "loss": 0.2327, "step": 5277 }, { "epoch": 0.26181854258643783, "grad_norm": 16.53972816467285, "learning_rate": 8.486924429709969e-06, "loss": 0.4501, "step": 5278 }, { "epoch": 0.26186814822163795, "grad_norm": 6.161458492279053, "learning_rate": 8.48636028132222e-06, "loss": 0.2818, "step": 5279 }, { "epoch": 0.26191775385683813, "grad_norm": 5.988899230957031, "learning_rate": 8.485796046540884e-06, "loss": 0.3423, "step": 5280 }, { "epoch": 0.2619673594920383, "grad_norm": 4.41724967956543, "learning_rate": 8.485231725379945e-06, "loss": 0.2369, "step": 5281 }, { "epoch": 0.26201696512723843, "grad_norm": 8.023351669311523, "learning_rate": 8.48466731785339e-06, "loss": 0.3333, "step": 5282 }, { "epoch": 0.2620665707624386, "grad_norm": 7.131095886230469, "learning_rate": 8.4841028239752e-06, "loss": 0.2859, "step": 5283 }, { "epoch": 0.2621161763976388, "grad_norm": 10.167893409729004, "learning_rate": 8.483538243759367e-06, "loss": 0.305, "step": 5284 }, { "epoch": 0.2621657820328389, "grad_norm": 5.534658908843994, "learning_rate": 8.48297357721988e-06, "loss": 0.2709, "step": 5285 }, { "epoch": 0.2622153876680391, "grad_norm": 6.1133341789245605, "learning_rate": 8.482408824370734e-06, "loss": 0.3431, "step": 5286 }, { "epoch": 0.26226499330323927, "grad_norm": 13.138932228088379, "learning_rate": 8.481843985225919e-06, "loss": 0.3724, "step": 5287 }, { "epoch": 0.2623145989384394, "grad_norm": 4.499127388000488, "learning_rate": 8.481279059799437e-06, "loss": 0.1911, "step": 5288 }, { "epoch": 0.26236420457363957, "grad_norm": 9.156511306762695, "learning_rate": 8.480714048105285e-06, "loss": 0.3148, "step": 5289 }, { "epoch": 0.26241381020883975, "grad_norm": 6.552516460418701, "learning_rate": 8.480148950157464e-06, "loss": 0.3394, "step": 5290 }, { "epoch": 0.26246341584403987, "grad_norm": 10.689783096313477, "learning_rate": 8.47958376596998e-06, "loss": 0.4368, "step": 5291 }, { "epoch": 0.26251302147924005, "grad_norm": 6.552921295166016, "learning_rate": 8.479018495556833e-06, "loss": 0.2648, "step": 5292 }, { "epoch": 0.26256262711444023, "grad_norm": 5.648386478424072, "learning_rate": 8.478453138932035e-06, "loss": 0.3107, "step": 5293 }, { "epoch": 0.26261223274964035, "grad_norm": 8.445478439331055, "learning_rate": 8.477887696109594e-06, "loss": 0.3598, "step": 5294 }, { "epoch": 0.26266183838484053, "grad_norm": 9.703696250915527, "learning_rate": 8.477322167103525e-06, "loss": 0.2819, "step": 5295 }, { "epoch": 0.26271144402004065, "grad_norm": 7.800277233123779, "learning_rate": 8.476756551927839e-06, "loss": 0.2563, "step": 5296 }, { "epoch": 0.26276104965524083, "grad_norm": 8.646904945373535, "learning_rate": 8.47619085059655e-06, "loss": 0.3354, "step": 5297 }, { "epoch": 0.262810655290441, "grad_norm": 8.707880973815918, "learning_rate": 8.475625063123683e-06, "loss": 0.2812, "step": 5298 }, { "epoch": 0.26286026092564113, "grad_norm": 5.983588218688965, "learning_rate": 8.475059189523253e-06, "loss": 0.3588, "step": 5299 }, { "epoch": 0.2629098665608413, "grad_norm": 11.236210823059082, "learning_rate": 8.474493229809286e-06, "loss": 0.4148, "step": 5300 }, { "epoch": 0.2629594721960415, "grad_norm": 11.313372611999512, "learning_rate": 8.473927183995803e-06, "loss": 0.354, "step": 5301 }, { "epoch": 0.2630090778312416, "grad_norm": 6.819655418395996, "learning_rate": 8.473361052096833e-06, "loss": 0.3471, "step": 5302 }, { "epoch": 0.2630586834664418, "grad_norm": 6.728565216064453, "learning_rate": 8.472794834126406e-06, "loss": 0.3636, "step": 5303 }, { "epoch": 0.26310828910164197, "grad_norm": 6.499357223510742, "learning_rate": 8.472228530098551e-06, "loss": 0.4336, "step": 5304 }, { "epoch": 0.2631578947368421, "grad_norm": 4.717827320098877, "learning_rate": 8.471662140027304e-06, "loss": 0.2662, "step": 5305 }, { "epoch": 0.26320750037204227, "grad_norm": 15.557036399841309, "learning_rate": 8.471095663926698e-06, "loss": 0.4597, "step": 5306 }, { "epoch": 0.26325710600724245, "grad_norm": 8.115334510803223, "learning_rate": 8.470529101810772e-06, "loss": 0.3505, "step": 5307 }, { "epoch": 0.26330671164244257, "grad_norm": 6.923091888427734, "learning_rate": 8.469962453693564e-06, "loss": 0.3275, "step": 5308 }, { "epoch": 0.26335631727764275, "grad_norm": 8.611011505126953, "learning_rate": 8.469395719589117e-06, "loss": 0.3713, "step": 5309 }, { "epoch": 0.26340592291284287, "grad_norm": 8.040492057800293, "learning_rate": 8.468828899511474e-06, "loss": 0.3513, "step": 5310 }, { "epoch": 0.26345552854804305, "grad_norm": 10.50399398803711, "learning_rate": 8.468261993474683e-06, "loss": 0.3426, "step": 5311 }, { "epoch": 0.26350513418324323, "grad_norm": 10.831597328186035, "learning_rate": 8.467695001492789e-06, "loss": 0.3096, "step": 5312 }, { "epoch": 0.26355473981844335, "grad_norm": 9.907333374023438, "learning_rate": 8.467127923579846e-06, "loss": 0.3869, "step": 5313 }, { "epoch": 0.26360434545364353, "grad_norm": 5.256123065948486, "learning_rate": 8.466560759749904e-06, "loss": 0.1924, "step": 5314 }, { "epoch": 0.2636539510888437, "grad_norm": 7.261187553405762, "learning_rate": 8.465993510017019e-06, "loss": 0.2349, "step": 5315 }, { "epoch": 0.26370355672404383, "grad_norm": 11.301277160644531, "learning_rate": 8.465426174395247e-06, "loss": 0.3203, "step": 5316 }, { "epoch": 0.263753162359244, "grad_norm": 5.953991889953613, "learning_rate": 8.464858752898646e-06, "loss": 0.3542, "step": 5317 }, { "epoch": 0.2638027679944442, "grad_norm": 5.223270416259766, "learning_rate": 8.464291245541279e-06, "loss": 0.3453, "step": 5318 }, { "epoch": 0.2638523736296443, "grad_norm": 6.686816215515137, "learning_rate": 8.463723652337206e-06, "loss": 0.2937, "step": 5319 }, { "epoch": 0.2639019792648445, "grad_norm": 4.001700401306152, "learning_rate": 8.463155973300495e-06, "loss": 0.2472, "step": 5320 }, { "epoch": 0.26395158490004467, "grad_norm": 8.26931381225586, "learning_rate": 8.462588208445213e-06, "loss": 0.3705, "step": 5321 }, { "epoch": 0.2640011905352448, "grad_norm": 9.206981658935547, "learning_rate": 8.462020357785427e-06, "loss": 0.3677, "step": 5322 }, { "epoch": 0.26405079617044497, "grad_norm": 6.684130668640137, "learning_rate": 8.461452421335214e-06, "loss": 0.3812, "step": 5323 }, { "epoch": 0.26410040180564515, "grad_norm": 9.635438919067383, "learning_rate": 8.460884399108642e-06, "loss": 0.3669, "step": 5324 }, { "epoch": 0.26415000744084527, "grad_norm": 7.569336414337158, "learning_rate": 8.460316291119787e-06, "loss": 0.2814, "step": 5325 }, { "epoch": 0.26419961307604545, "grad_norm": 9.949418067932129, "learning_rate": 8.459748097382732e-06, "loss": 0.3608, "step": 5326 }, { "epoch": 0.26424921871124557, "grad_norm": 6.640712261199951, "learning_rate": 8.459179817911554e-06, "loss": 0.3014, "step": 5327 }, { "epoch": 0.26429882434644575, "grad_norm": 5.082208156585693, "learning_rate": 8.458611452720334e-06, "loss": 0.2688, "step": 5328 }, { "epoch": 0.2643484299816459, "grad_norm": 6.086938381195068, "learning_rate": 8.458043001823159e-06, "loss": 0.2797, "step": 5329 }, { "epoch": 0.26439803561684605, "grad_norm": 9.139687538146973, "learning_rate": 8.457474465234114e-06, "loss": 0.3163, "step": 5330 }, { "epoch": 0.26444764125204623, "grad_norm": 5.288522720336914, "learning_rate": 8.456905842967286e-06, "loss": 0.2651, "step": 5331 }, { "epoch": 0.2644972468872464, "grad_norm": 11.716052055358887, "learning_rate": 8.45633713503677e-06, "loss": 0.5095, "step": 5332 }, { "epoch": 0.26454685252244653, "grad_norm": 11.642616271972656, "learning_rate": 8.455768341456656e-06, "loss": 0.2984, "step": 5333 }, { "epoch": 0.2645964581576467, "grad_norm": 14.012213706970215, "learning_rate": 8.455199462241037e-06, "loss": 0.5297, "step": 5334 }, { "epoch": 0.2646460637928469, "grad_norm": 19.120553970336914, "learning_rate": 8.454630497404012e-06, "loss": 0.4126, "step": 5335 }, { "epoch": 0.264695669428047, "grad_norm": 7.463601589202881, "learning_rate": 8.454061446959685e-06, "loss": 0.3268, "step": 5336 }, { "epoch": 0.2647452750632472, "grad_norm": 9.411334037780762, "learning_rate": 8.453492310922149e-06, "loss": 0.3886, "step": 5337 }, { "epoch": 0.26479488069844737, "grad_norm": 5.983002662658691, "learning_rate": 8.452923089305514e-06, "loss": 0.2772, "step": 5338 }, { "epoch": 0.2648444863336475, "grad_norm": 7.940988063812256, "learning_rate": 8.45235378212388e-06, "loss": 0.3784, "step": 5339 }, { "epoch": 0.26489409196884767, "grad_norm": 4.49648380279541, "learning_rate": 8.45178438939136e-06, "loss": 0.301, "step": 5340 }, { "epoch": 0.26494369760404785, "grad_norm": 7.2222466468811035, "learning_rate": 8.451214911122062e-06, "loss": 0.3547, "step": 5341 }, { "epoch": 0.26499330323924797, "grad_norm": 7.187011241912842, "learning_rate": 8.450645347330093e-06, "loss": 0.2975, "step": 5342 }, { "epoch": 0.26504290887444815, "grad_norm": 8.31513500213623, "learning_rate": 8.450075698029576e-06, "loss": 0.2465, "step": 5343 }, { "epoch": 0.26509251450964827, "grad_norm": 18.897693634033203, "learning_rate": 8.44950596323462e-06, "loss": 0.5048, "step": 5344 }, { "epoch": 0.26514212014484845, "grad_norm": 5.646877765655518, "learning_rate": 8.448936142959348e-06, "loss": 0.3736, "step": 5345 }, { "epoch": 0.2651917257800486, "grad_norm": 5.838080406188965, "learning_rate": 8.448366237217876e-06, "loss": 0.2712, "step": 5346 }, { "epoch": 0.26524133141524875, "grad_norm": 3.9875054359436035, "learning_rate": 8.44779624602433e-06, "loss": 0.3519, "step": 5347 }, { "epoch": 0.2652909370504489, "grad_norm": 4.125168323516846, "learning_rate": 8.447226169392832e-06, "loss": 0.183, "step": 5348 }, { "epoch": 0.2653405426856491, "grad_norm": 5.1254072189331055, "learning_rate": 8.446656007337512e-06, "loss": 0.2673, "step": 5349 }, { "epoch": 0.26539014832084923, "grad_norm": 8.40460205078125, "learning_rate": 8.446085759872497e-06, "loss": 0.2531, "step": 5350 }, { "epoch": 0.2654397539560494, "grad_norm": 6.405237674713135, "learning_rate": 8.445515427011916e-06, "loss": 0.2541, "step": 5351 }, { "epoch": 0.2654893595912496, "grad_norm": 7.866547584533691, "learning_rate": 8.444945008769907e-06, "loss": 0.3524, "step": 5352 }, { "epoch": 0.2655389652264497, "grad_norm": 7.379385471343994, "learning_rate": 8.444374505160601e-06, "loss": 0.2551, "step": 5353 }, { "epoch": 0.2655885708616499, "grad_norm": 4.282839775085449, "learning_rate": 8.443803916198136e-06, "loss": 0.267, "step": 5354 }, { "epoch": 0.26563817649685006, "grad_norm": 4.747246265411377, "learning_rate": 8.443233241896653e-06, "loss": 0.2646, "step": 5355 }, { "epoch": 0.2656877821320502, "grad_norm": 8.490826606750488, "learning_rate": 8.442662482270293e-06, "loss": 0.3648, "step": 5356 }, { "epoch": 0.26573738776725037, "grad_norm": 9.639345169067383, "learning_rate": 8.442091637333199e-06, "loss": 0.381, "step": 5357 }, { "epoch": 0.26578699340245054, "grad_norm": 5.068434715270996, "learning_rate": 8.441520707099516e-06, "loss": 0.3209, "step": 5358 }, { "epoch": 0.26583659903765067, "grad_norm": 12.920294761657715, "learning_rate": 8.440949691583394e-06, "loss": 0.2864, "step": 5359 }, { "epoch": 0.26588620467285085, "grad_norm": 8.9058198928833, "learning_rate": 8.440378590798983e-06, "loss": 0.4059, "step": 5360 }, { "epoch": 0.26593581030805097, "grad_norm": 6.172276496887207, "learning_rate": 8.439807404760434e-06, "loss": 0.287, "step": 5361 }, { "epoch": 0.26598541594325115, "grad_norm": 8.321894645690918, "learning_rate": 8.4392361334819e-06, "loss": 0.3328, "step": 5362 }, { "epoch": 0.2660350215784513, "grad_norm": 4.409558296203613, "learning_rate": 8.438664776977542e-06, "loss": 0.2508, "step": 5363 }, { "epoch": 0.26608462721365145, "grad_norm": 7.674745559692383, "learning_rate": 8.438093335261513e-06, "loss": 0.3776, "step": 5364 }, { "epoch": 0.2661342328488516, "grad_norm": 11.81762981414795, "learning_rate": 8.437521808347974e-06, "loss": 0.4461, "step": 5365 }, { "epoch": 0.2661838384840518, "grad_norm": 16.085739135742188, "learning_rate": 8.436950196251092e-06, "loss": 0.3183, "step": 5366 }, { "epoch": 0.2662334441192519, "grad_norm": 4.750621318817139, "learning_rate": 8.43637849898503e-06, "loss": 0.2515, "step": 5367 }, { "epoch": 0.2662830497544521, "grad_norm": 4.519717693328857, "learning_rate": 8.435806716563955e-06, "loss": 0.188, "step": 5368 }, { "epoch": 0.2663326553896523, "grad_norm": 7.895843505859375, "learning_rate": 8.435234849002033e-06, "loss": 0.2582, "step": 5369 }, { "epoch": 0.2663822610248524, "grad_norm": 6.815189361572266, "learning_rate": 8.43466289631344e-06, "loss": 0.2605, "step": 5370 }, { "epoch": 0.2664318666600526, "grad_norm": 5.613749027252197, "learning_rate": 8.434090858512345e-06, "loss": 0.2266, "step": 5371 }, { "epoch": 0.26648147229525276, "grad_norm": 10.955257415771484, "learning_rate": 8.433518735612925e-06, "loss": 0.39, "step": 5372 }, { "epoch": 0.2665310779304529, "grad_norm": 16.26923179626465, "learning_rate": 8.432946527629358e-06, "loss": 0.5126, "step": 5373 }, { "epoch": 0.26658068356565306, "grad_norm": 27.259626388549805, "learning_rate": 8.432374234575823e-06, "loss": 0.5465, "step": 5374 }, { "epoch": 0.26663028920085324, "grad_norm": 11.856463432312012, "learning_rate": 8.431801856466501e-06, "loss": 0.246, "step": 5375 }, { "epoch": 0.26667989483605337, "grad_norm": 9.038220405578613, "learning_rate": 8.431229393315579e-06, "loss": 0.4705, "step": 5376 }, { "epoch": 0.26672950047125354, "grad_norm": 6.6756768226623535, "learning_rate": 8.43065684513724e-06, "loss": 0.2966, "step": 5377 }, { "epoch": 0.26677910610645367, "grad_norm": 5.720130920410156, "learning_rate": 8.43008421194567e-06, "loss": 0.2869, "step": 5378 }, { "epoch": 0.26682871174165385, "grad_norm": 3.722399950027466, "learning_rate": 8.429511493755064e-06, "loss": 0.2127, "step": 5379 }, { "epoch": 0.266878317376854, "grad_norm": 4.935715198516846, "learning_rate": 8.428938690579611e-06, "loss": 0.2416, "step": 5380 }, { "epoch": 0.26692792301205415, "grad_norm": 11.293269157409668, "learning_rate": 8.428365802433506e-06, "loss": 0.3981, "step": 5381 }, { "epoch": 0.2669775286472543, "grad_norm": 8.6625394821167, "learning_rate": 8.427792829330947e-06, "loss": 0.413, "step": 5382 }, { "epoch": 0.2670271342824545, "grad_norm": 11.232183456420898, "learning_rate": 8.42721977128613e-06, "loss": 0.285, "step": 5383 }, { "epoch": 0.2670767399176546, "grad_norm": 13.092721939086914, "learning_rate": 8.426646628313256e-06, "loss": 0.372, "step": 5384 }, { "epoch": 0.2671263455528548, "grad_norm": 4.842816352844238, "learning_rate": 8.42607340042653e-06, "loss": 0.3409, "step": 5385 }, { "epoch": 0.267175951188055, "grad_norm": 11.245955467224121, "learning_rate": 8.425500087640155e-06, "loss": 0.4408, "step": 5386 }, { "epoch": 0.2672255568232551, "grad_norm": 8.041816711425781, "learning_rate": 8.424926689968338e-06, "loss": 0.4387, "step": 5387 }, { "epoch": 0.2672751624584553, "grad_norm": 4.155465126037598, "learning_rate": 8.424353207425289e-06, "loss": 0.2292, "step": 5388 }, { "epoch": 0.26732476809365546, "grad_norm": 4.899085998535156, "learning_rate": 8.423779640025217e-06, "loss": 0.2702, "step": 5389 }, { "epoch": 0.2673743737288556, "grad_norm": 5.487790107727051, "learning_rate": 8.423205987782338e-06, "loss": 0.2598, "step": 5390 }, { "epoch": 0.26742397936405576, "grad_norm": 6.095058441162109, "learning_rate": 8.422632250710865e-06, "loss": 0.3807, "step": 5391 }, { "epoch": 0.26747358499925594, "grad_norm": 7.153007984161377, "learning_rate": 8.422058428825017e-06, "loss": 0.3281, "step": 5392 }, { "epoch": 0.26752319063445607, "grad_norm": 6.553511619567871, "learning_rate": 8.421484522139013e-06, "loss": 0.3122, "step": 5393 }, { "epoch": 0.26757279626965624, "grad_norm": 7.593138217926025, "learning_rate": 8.420910530667077e-06, "loss": 0.2673, "step": 5394 }, { "epoch": 0.26762240190485637, "grad_norm": 5.58172607421875, "learning_rate": 8.420336454423428e-06, "loss": 0.2891, "step": 5395 }, { "epoch": 0.26767200754005654, "grad_norm": 6.854176998138428, "learning_rate": 8.419762293422296e-06, "loss": 0.3091, "step": 5396 }, { "epoch": 0.2677216131752567, "grad_norm": 7.20849609375, "learning_rate": 8.419188047677907e-06, "loss": 0.2313, "step": 5397 }, { "epoch": 0.26777121881045685, "grad_norm": 5.379668712615967, "learning_rate": 8.41861371720449e-06, "loss": 0.3175, "step": 5398 }, { "epoch": 0.267820824445657, "grad_norm": 5.564431667327881, "learning_rate": 8.418039302016282e-06, "loss": 0.2753, "step": 5399 }, { "epoch": 0.2678704300808572, "grad_norm": 5.410451889038086, "learning_rate": 8.417464802127513e-06, "loss": 0.3023, "step": 5400 }, { "epoch": 0.2679200357160573, "grad_norm": 4.626411437988281, "learning_rate": 8.416890217552418e-06, "loss": 0.2281, "step": 5401 }, { "epoch": 0.2679696413512575, "grad_norm": 3.5154027938842773, "learning_rate": 8.416315548305239e-06, "loss": 0.1779, "step": 5402 }, { "epoch": 0.2680192469864577, "grad_norm": 3.883284091949463, "learning_rate": 8.415740794400216e-06, "loss": 0.2748, "step": 5403 }, { "epoch": 0.2680688526216578, "grad_norm": 6.584335803985596, "learning_rate": 8.415165955851591e-06, "loss": 0.3424, "step": 5404 }, { "epoch": 0.268118458256858, "grad_norm": 5.782784461975098, "learning_rate": 8.414591032673608e-06, "loss": 0.2053, "step": 5405 }, { "epoch": 0.26816806389205816, "grad_norm": 15.550911903381348, "learning_rate": 8.414016024880515e-06, "loss": 0.3703, "step": 5406 }, { "epoch": 0.2682176695272583, "grad_norm": 4.5768632888793945, "learning_rate": 8.41344093248656e-06, "loss": 0.2438, "step": 5407 }, { "epoch": 0.26826727516245846, "grad_norm": 8.73996353149414, "learning_rate": 8.412865755505995e-06, "loss": 0.2673, "step": 5408 }, { "epoch": 0.26831688079765864, "grad_norm": 8.137763023376465, "learning_rate": 8.412290493953074e-06, "loss": 0.3976, "step": 5409 }, { "epoch": 0.26836648643285876, "grad_norm": 6.57310152053833, "learning_rate": 8.41171514784205e-06, "loss": 0.3079, "step": 5410 }, { "epoch": 0.26841609206805894, "grad_norm": 7.447854995727539, "learning_rate": 8.41113971718718e-06, "loss": 0.4044, "step": 5411 }, { "epoch": 0.26846569770325907, "grad_norm": 4.737069606781006, "learning_rate": 8.410564202002727e-06, "loss": 0.2722, "step": 5412 }, { "epoch": 0.26851530333845924, "grad_norm": 13.984667778015137, "learning_rate": 8.40998860230295e-06, "loss": 0.4066, "step": 5413 }, { "epoch": 0.2685649089736594, "grad_norm": 6.967346668243408, "learning_rate": 8.40941291810211e-06, "loss": 0.3809, "step": 5414 }, { "epoch": 0.26861451460885954, "grad_norm": 7.1916093826293945, "learning_rate": 8.40883714941448e-06, "loss": 0.3171, "step": 5415 }, { "epoch": 0.2686641202440597, "grad_norm": 4.376883029937744, "learning_rate": 8.40826129625432e-06, "loss": 0.2245, "step": 5416 }, { "epoch": 0.2687137258792599, "grad_norm": 10.532292366027832, "learning_rate": 8.407685358635906e-06, "loss": 0.3557, "step": 5417 }, { "epoch": 0.26876333151446, "grad_norm": 5.47633171081543, "learning_rate": 8.407109336573506e-06, "loss": 0.3477, "step": 5418 }, { "epoch": 0.2688129371496602, "grad_norm": 3.9169061183929443, "learning_rate": 8.406533230081394e-06, "loss": 0.197, "step": 5419 }, { "epoch": 0.2688625427848604, "grad_norm": 6.9862141609191895, "learning_rate": 8.40595703917385e-06, "loss": 0.2919, "step": 5420 }, { "epoch": 0.2689121484200605, "grad_norm": 5.705668926239014, "learning_rate": 8.40538076386515e-06, "loss": 0.2566, "step": 5421 }, { "epoch": 0.2689617540552607, "grad_norm": 8.423681259155273, "learning_rate": 8.404804404169572e-06, "loss": 0.4677, "step": 5422 }, { "epoch": 0.26901135969046086, "grad_norm": 5.2985005378723145, "learning_rate": 8.404227960101402e-06, "loss": 0.3238, "step": 5423 }, { "epoch": 0.269060965325661, "grad_norm": 13.636810302734375, "learning_rate": 8.403651431674924e-06, "loss": 0.3899, "step": 5424 }, { "epoch": 0.26911057096086116, "grad_norm": 7.819157600402832, "learning_rate": 8.403074818904423e-06, "loss": 0.3736, "step": 5425 }, { "epoch": 0.26916017659606134, "grad_norm": 5.36924934387207, "learning_rate": 8.402498121804187e-06, "loss": 0.3042, "step": 5426 }, { "epoch": 0.26920978223126146, "grad_norm": 7.32952356338501, "learning_rate": 8.401921340388512e-06, "loss": 0.3998, "step": 5427 }, { "epoch": 0.26925938786646164, "grad_norm": 8.081432342529297, "learning_rate": 8.401344474671686e-06, "loss": 0.2918, "step": 5428 }, { "epoch": 0.26930899350166176, "grad_norm": 7.259293079376221, "learning_rate": 8.400767524668004e-06, "loss": 0.3238, "step": 5429 }, { "epoch": 0.26935859913686194, "grad_norm": 4.450658321380615, "learning_rate": 8.400190490391763e-06, "loss": 0.3156, "step": 5430 }, { "epoch": 0.2694082047720621, "grad_norm": 4.178694248199463, "learning_rate": 8.399613371857268e-06, "loss": 0.2341, "step": 5431 }, { "epoch": 0.26945781040726224, "grad_norm": 5.484811782836914, "learning_rate": 8.399036169078814e-06, "loss": 0.3032, "step": 5432 }, { "epoch": 0.2695074160424624, "grad_norm": 9.178486824035645, "learning_rate": 8.398458882070705e-06, "loss": 0.3388, "step": 5433 }, { "epoch": 0.2695570216776626, "grad_norm": 5.142063140869141, "learning_rate": 8.397881510847247e-06, "loss": 0.2583, "step": 5434 }, { "epoch": 0.2696066273128627, "grad_norm": 10.847723960876465, "learning_rate": 8.39730405542275e-06, "loss": 0.217, "step": 5435 }, { "epoch": 0.2696562329480629, "grad_norm": 5.032606601715088, "learning_rate": 8.396726515811521e-06, "loss": 0.203, "step": 5436 }, { "epoch": 0.2697058385832631, "grad_norm": 10.338068008422852, "learning_rate": 8.396148892027873e-06, "loss": 0.3626, "step": 5437 }, { "epoch": 0.2697554442184632, "grad_norm": 9.935627937316895, "learning_rate": 8.39557118408612e-06, "loss": 0.274, "step": 5438 }, { "epoch": 0.2698050498536634, "grad_norm": 5.0335540771484375, "learning_rate": 8.394993392000576e-06, "loss": 0.2501, "step": 5439 }, { "epoch": 0.26985465548886356, "grad_norm": 7.167852401733398, "learning_rate": 8.39441551578556e-06, "loss": 0.3355, "step": 5440 }, { "epoch": 0.2699042611240637, "grad_norm": 9.99490737915039, "learning_rate": 8.393837555455391e-06, "loss": 0.3439, "step": 5441 }, { "epoch": 0.26995386675926386, "grad_norm": 10.335182189941406, "learning_rate": 8.393259511024394e-06, "loss": 0.4125, "step": 5442 }, { "epoch": 0.270003472394464, "grad_norm": 8.09412956237793, "learning_rate": 8.39268138250689e-06, "loss": 0.3394, "step": 5443 }, { "epoch": 0.27005307802966416, "grad_norm": 5.937371253967285, "learning_rate": 8.392103169917208e-06, "loss": 0.3215, "step": 5444 }, { "epoch": 0.27010268366486434, "grad_norm": 9.32388687133789, "learning_rate": 8.391524873269674e-06, "loss": 0.3534, "step": 5445 }, { "epoch": 0.27015228930006446, "grad_norm": 9.944478988647461, "learning_rate": 8.39094649257862e-06, "loss": 0.2576, "step": 5446 }, { "epoch": 0.27020189493526464, "grad_norm": 21.218637466430664, "learning_rate": 8.390368027858378e-06, "loss": 0.2526, "step": 5447 }, { "epoch": 0.2702515005704648, "grad_norm": 4.684743404388428, "learning_rate": 8.389789479123282e-06, "loss": 0.2029, "step": 5448 }, { "epoch": 0.27030110620566494, "grad_norm": 6.7101335525512695, "learning_rate": 8.389210846387669e-06, "loss": 0.3574, "step": 5449 }, { "epoch": 0.2703507118408651, "grad_norm": 4.88425874710083, "learning_rate": 8.388632129665881e-06, "loss": 0.3196, "step": 5450 }, { "epoch": 0.2704003174760653, "grad_norm": 7.687252998352051, "learning_rate": 8.388053328972254e-06, "loss": 0.2736, "step": 5451 }, { "epoch": 0.2704499231112654, "grad_norm": 4.708369731903076, "learning_rate": 8.387474444321132e-06, "loss": 0.2353, "step": 5452 }, { "epoch": 0.2704995287464656, "grad_norm": 9.547255516052246, "learning_rate": 8.386895475726862e-06, "loss": 0.3903, "step": 5453 }, { "epoch": 0.2705491343816658, "grad_norm": 6.6938371658325195, "learning_rate": 8.38631642320379e-06, "loss": 0.3247, "step": 5454 }, { "epoch": 0.2705987400168659, "grad_norm": 6.042801856994629, "learning_rate": 8.385737286766265e-06, "loss": 0.3174, "step": 5455 }, { "epoch": 0.2706483456520661, "grad_norm": 5.552185535430908, "learning_rate": 8.385158066428638e-06, "loss": 0.3063, "step": 5456 }, { "epoch": 0.27069795128726626, "grad_norm": 10.15284252166748, "learning_rate": 8.384578762205263e-06, "loss": 0.3029, "step": 5457 }, { "epoch": 0.2707475569224664, "grad_norm": 7.745758533477783, "learning_rate": 8.383999374110496e-06, "loss": 0.2107, "step": 5458 }, { "epoch": 0.27079716255766656, "grad_norm": 4.064722061157227, "learning_rate": 8.383419902158694e-06, "loss": 0.2496, "step": 5459 }, { "epoch": 0.2708467681928667, "grad_norm": 6.3697967529296875, "learning_rate": 8.382840346364217e-06, "loss": 0.2852, "step": 5460 }, { "epoch": 0.27089637382806686, "grad_norm": 7.663001537322998, "learning_rate": 8.382260706741423e-06, "loss": 0.3588, "step": 5461 }, { "epoch": 0.27094597946326704, "grad_norm": 18.489788055419922, "learning_rate": 8.38168098330468e-06, "loss": 0.5377, "step": 5462 }, { "epoch": 0.27099558509846716, "grad_norm": 5.412477493286133, "learning_rate": 8.381101176068354e-06, "loss": 0.2697, "step": 5463 }, { "epoch": 0.27104519073366734, "grad_norm": 3.8934943675994873, "learning_rate": 8.38052128504681e-06, "loss": 0.2515, "step": 5464 }, { "epoch": 0.2710947963688675, "grad_norm": 7.22828483581543, "learning_rate": 8.379941310254421e-06, "loss": 0.3422, "step": 5465 }, { "epoch": 0.27114440200406764, "grad_norm": 3.982423782348633, "learning_rate": 8.379361251705555e-06, "loss": 0.2124, "step": 5466 }, { "epoch": 0.2711940076392678, "grad_norm": 7.104859352111816, "learning_rate": 8.37878110941459e-06, "loss": 0.2992, "step": 5467 }, { "epoch": 0.271243613274468, "grad_norm": 7.39495325088501, "learning_rate": 8.378200883395901e-06, "loss": 0.2405, "step": 5468 }, { "epoch": 0.2712932189096681, "grad_norm": 7.582124710083008, "learning_rate": 8.377620573663865e-06, "loss": 0.2834, "step": 5469 }, { "epoch": 0.2713428245448683, "grad_norm": 12.04345417022705, "learning_rate": 8.377040180232862e-06, "loss": 0.5046, "step": 5470 }, { "epoch": 0.2713924301800685, "grad_norm": 10.675061225891113, "learning_rate": 8.376459703117279e-06, "loss": 0.3619, "step": 5471 }, { "epoch": 0.2714420358152686, "grad_norm": 5.215445518493652, "learning_rate": 8.375879142331496e-06, "loss": 0.355, "step": 5472 }, { "epoch": 0.2714916414504688, "grad_norm": 7.863058090209961, "learning_rate": 8.3752984978899e-06, "loss": 0.2546, "step": 5473 }, { "epoch": 0.27154124708566896, "grad_norm": 9.001819610595703, "learning_rate": 8.374717769806881e-06, "loss": 0.4439, "step": 5474 }, { "epoch": 0.2715908527208691, "grad_norm": 7.364742755889893, "learning_rate": 8.37413695809683e-06, "loss": 0.2406, "step": 5475 }, { "epoch": 0.27164045835606926, "grad_norm": 7.738584041595459, "learning_rate": 8.373556062774137e-06, "loss": 0.2422, "step": 5476 }, { "epoch": 0.2716900639912694, "grad_norm": 10.192800521850586, "learning_rate": 8.372975083853198e-06, "loss": 0.3978, "step": 5477 }, { "epoch": 0.27173966962646956, "grad_norm": 4.590334415435791, "learning_rate": 8.372394021348415e-06, "loss": 0.2874, "step": 5478 }, { "epoch": 0.27178927526166974, "grad_norm": 5.148804187774658, "learning_rate": 8.371812875274179e-06, "loss": 0.2233, "step": 5479 }, { "epoch": 0.27183888089686986, "grad_norm": 5.462326526641846, "learning_rate": 8.371231645644894e-06, "loss": 0.2616, "step": 5480 }, { "epoch": 0.27188848653207004, "grad_norm": 9.084911346435547, "learning_rate": 8.370650332474968e-06, "loss": 0.2928, "step": 5481 }, { "epoch": 0.2719380921672702, "grad_norm": 3.3942067623138428, "learning_rate": 8.370068935778799e-06, "loss": 0.1723, "step": 5482 }, { "epoch": 0.27198769780247034, "grad_norm": 5.763572692871094, "learning_rate": 8.369487455570797e-06, "loss": 0.217, "step": 5483 }, { "epoch": 0.2720373034376705, "grad_norm": 12.225540161132812, "learning_rate": 8.368905891865374e-06, "loss": 0.3472, "step": 5484 }, { "epoch": 0.2720869090728707, "grad_norm": 7.9709906578063965, "learning_rate": 8.368324244676937e-06, "loss": 0.4422, "step": 5485 }, { "epoch": 0.2721365147080708, "grad_norm": 11.239433288574219, "learning_rate": 8.367742514019902e-06, "loss": 0.3771, "step": 5486 }, { "epoch": 0.272186120343271, "grad_norm": 5.736754894256592, "learning_rate": 8.367160699908685e-06, "loss": 0.3486, "step": 5487 }, { "epoch": 0.2722357259784712, "grad_norm": 6.203617095947266, "learning_rate": 8.366578802357702e-06, "loss": 0.2599, "step": 5488 }, { "epoch": 0.2722853316136713, "grad_norm": 5.949020862579346, "learning_rate": 8.365996821381373e-06, "loss": 0.2723, "step": 5489 }, { "epoch": 0.2723349372488715, "grad_norm": 9.195355415344238, "learning_rate": 8.365414756994122e-06, "loss": 0.3607, "step": 5490 }, { "epoch": 0.27238454288407166, "grad_norm": 5.9033589363098145, "learning_rate": 8.364832609210367e-06, "loss": 0.2565, "step": 5491 }, { "epoch": 0.2724341485192718, "grad_norm": 8.444336891174316, "learning_rate": 8.364250378044542e-06, "loss": 0.3033, "step": 5492 }, { "epoch": 0.27248375415447196, "grad_norm": 4.553657531738281, "learning_rate": 8.363668063511069e-06, "loss": 0.2554, "step": 5493 }, { "epoch": 0.2725333597896721, "grad_norm": 9.461258888244629, "learning_rate": 8.363085665624379e-06, "loss": 0.3376, "step": 5494 }, { "epoch": 0.27258296542487226, "grad_norm": 7.606205463409424, "learning_rate": 8.362503184398906e-06, "loss": 0.2825, "step": 5495 }, { "epoch": 0.27263257106007244, "grad_norm": 12.461313247680664, "learning_rate": 8.361920619849083e-06, "loss": 0.3513, "step": 5496 }, { "epoch": 0.27268217669527256, "grad_norm": 13.080353736877441, "learning_rate": 8.361337971989347e-06, "loss": 0.3669, "step": 5497 }, { "epoch": 0.27273178233047274, "grad_norm": 12.59928035736084, "learning_rate": 8.360755240834135e-06, "loss": 0.2324, "step": 5498 }, { "epoch": 0.2727813879656729, "grad_norm": 5.281373500823975, "learning_rate": 8.360172426397888e-06, "loss": 0.2622, "step": 5499 }, { "epoch": 0.27283099360087304, "grad_norm": 5.0850701332092285, "learning_rate": 8.359589528695049e-06, "loss": 0.3131, "step": 5500 }, { "epoch": 0.2728805992360732, "grad_norm": 11.145225524902344, "learning_rate": 8.35900654774006e-06, "loss": 0.4971, "step": 5501 }, { "epoch": 0.2729302048712734, "grad_norm": 10.561161041259766, "learning_rate": 8.358423483547371e-06, "loss": 0.3243, "step": 5502 }, { "epoch": 0.2729798105064735, "grad_norm": 15.14734935760498, "learning_rate": 8.357840336131426e-06, "loss": 0.3631, "step": 5503 }, { "epoch": 0.2730294161416737, "grad_norm": 5.384989261627197, "learning_rate": 8.357257105506683e-06, "loss": 0.3692, "step": 5504 }, { "epoch": 0.2730790217768739, "grad_norm": 11.054250717163086, "learning_rate": 8.356673791687588e-06, "loss": 0.3757, "step": 5505 }, { "epoch": 0.273128627412074, "grad_norm": 7.164314270019531, "learning_rate": 8.356090394688595e-06, "loss": 0.2609, "step": 5506 }, { "epoch": 0.2731782330472742, "grad_norm": 5.852171897888184, "learning_rate": 8.355506914524168e-06, "loss": 0.3171, "step": 5507 }, { "epoch": 0.27322783868247436, "grad_norm": 6.170834541320801, "learning_rate": 8.35492335120876e-06, "loss": 0.2959, "step": 5508 }, { "epoch": 0.2732774443176745, "grad_norm": 5.868838787078857, "learning_rate": 8.354339704756836e-06, "loss": 0.3125, "step": 5509 }, { "epoch": 0.27332704995287466, "grad_norm": 6.025845527648926, "learning_rate": 8.353755975182855e-06, "loss": 0.2874, "step": 5510 }, { "epoch": 0.2733766555880748, "grad_norm": 7.413133144378662, "learning_rate": 8.353172162501284e-06, "loss": 0.3097, "step": 5511 }, { "epoch": 0.27342626122327496, "grad_norm": 6.382351398468018, "learning_rate": 8.352588266726592e-06, "loss": 0.2796, "step": 5512 }, { "epoch": 0.27347586685847514, "grad_norm": 6.6308112144470215, "learning_rate": 8.352004287873242e-06, "loss": 0.3712, "step": 5513 }, { "epoch": 0.27352547249367526, "grad_norm": 6.397802829742432, "learning_rate": 8.351420225955713e-06, "loss": 0.4041, "step": 5514 }, { "epoch": 0.27357507812887544, "grad_norm": 4.383605003356934, "learning_rate": 8.350836080988474e-06, "loss": 0.2138, "step": 5515 }, { "epoch": 0.2736246837640756, "grad_norm": 8.503790855407715, "learning_rate": 8.350251852986e-06, "loss": 0.3664, "step": 5516 }, { "epoch": 0.27367428939927574, "grad_norm": 12.051919937133789, "learning_rate": 8.34966754196277e-06, "loss": 0.445, "step": 5517 }, { "epoch": 0.2737238950344759, "grad_norm": 4.27903938293457, "learning_rate": 8.349083147933264e-06, "loss": 0.2579, "step": 5518 }, { "epoch": 0.2737735006696761, "grad_norm": 6.366534233093262, "learning_rate": 8.34849867091196e-06, "loss": 0.335, "step": 5519 }, { "epoch": 0.2738231063048762, "grad_norm": 6.456354141235352, "learning_rate": 8.347914110913347e-06, "loss": 0.2818, "step": 5520 }, { "epoch": 0.2738727119400764, "grad_norm": 7.133735656738281, "learning_rate": 8.347329467951908e-06, "loss": 0.3101, "step": 5521 }, { "epoch": 0.2739223175752766, "grad_norm": 5.006049633026123, "learning_rate": 8.34674474204213e-06, "loss": 0.298, "step": 5522 }, { "epoch": 0.2739719232104767, "grad_norm": 12.324883460998535, "learning_rate": 8.346159933198501e-06, "loss": 0.3177, "step": 5523 }, { "epoch": 0.2740215288456769, "grad_norm": 7.085936069488525, "learning_rate": 8.345575041435519e-06, "loss": 0.3667, "step": 5524 }, { "epoch": 0.27407113448087705, "grad_norm": 4.833345413208008, "learning_rate": 8.344990066767673e-06, "loss": 0.2938, "step": 5525 }, { "epoch": 0.2741207401160772, "grad_norm": 14.486024856567383, "learning_rate": 8.34440500920946e-06, "loss": 0.4594, "step": 5526 }, { "epoch": 0.27417034575127736, "grad_norm": 6.233124256134033, "learning_rate": 8.343819868775375e-06, "loss": 0.3078, "step": 5527 }, { "epoch": 0.2742199513864775, "grad_norm": 6.308267593383789, "learning_rate": 8.343234645479926e-06, "loss": 0.2248, "step": 5528 }, { "epoch": 0.27426955702167766, "grad_norm": 10.26835823059082, "learning_rate": 8.342649339337607e-06, "loss": 0.3001, "step": 5529 }, { "epoch": 0.27431916265687784, "grad_norm": 6.634329319000244, "learning_rate": 8.342063950362927e-06, "loss": 0.3293, "step": 5530 }, { "epoch": 0.27436876829207796, "grad_norm": 7.767709732055664, "learning_rate": 8.34147847857039e-06, "loss": 0.3876, "step": 5531 }, { "epoch": 0.27441837392727814, "grad_norm": 6.439932823181152, "learning_rate": 8.340892923974505e-06, "loss": 0.2981, "step": 5532 }, { "epoch": 0.2744679795624783, "grad_norm": 7.93820858001709, "learning_rate": 8.340307286589779e-06, "loss": 0.3735, "step": 5533 }, { "epoch": 0.27451758519767844, "grad_norm": 7.784219264984131, "learning_rate": 8.33972156643073e-06, "loss": 0.3947, "step": 5534 }, { "epoch": 0.2745671908328786, "grad_norm": 18.633180618286133, "learning_rate": 8.33913576351187e-06, "loss": 0.3203, "step": 5535 }, { "epoch": 0.2746167964680788, "grad_norm": 11.627189636230469, "learning_rate": 8.338549877847717e-06, "loss": 0.2638, "step": 5536 }, { "epoch": 0.2746664021032789, "grad_norm": 6.946606636047363, "learning_rate": 8.337963909452785e-06, "loss": 0.2799, "step": 5537 }, { "epoch": 0.2747160077384791, "grad_norm": 13.780843734741211, "learning_rate": 8.337377858341599e-06, "loss": 0.3249, "step": 5538 }, { "epoch": 0.2747656133736793, "grad_norm": 4.689237117767334, "learning_rate": 8.33679172452868e-06, "loss": 0.2523, "step": 5539 }, { "epoch": 0.2748152190088794, "grad_norm": 7.10341739654541, "learning_rate": 8.336205508028553e-06, "loss": 0.309, "step": 5540 }, { "epoch": 0.2748648246440796, "grad_norm": 9.035713195800781, "learning_rate": 8.335619208855742e-06, "loss": 0.3758, "step": 5541 }, { "epoch": 0.27491443027927975, "grad_norm": 6.9710845947265625, "learning_rate": 8.33503282702478e-06, "loss": 0.3798, "step": 5542 }, { "epoch": 0.2749640359144799, "grad_norm": 5.684024810791016, "learning_rate": 8.334446362550195e-06, "loss": 0.321, "step": 5543 }, { "epoch": 0.27501364154968005, "grad_norm": 7.436519622802734, "learning_rate": 8.33385981544652e-06, "loss": 0.3996, "step": 5544 }, { "epoch": 0.2750632471848802, "grad_norm": 4.729306221008301, "learning_rate": 8.333273185728294e-06, "loss": 0.3082, "step": 5545 }, { "epoch": 0.27511285282008036, "grad_norm": 6.0047926902771, "learning_rate": 8.332686473410049e-06, "loss": 0.2439, "step": 5546 }, { "epoch": 0.27516245845528053, "grad_norm": 7.30253791809082, "learning_rate": 8.332099678506325e-06, "loss": 0.3604, "step": 5547 }, { "epoch": 0.27521206409048066, "grad_norm": 6.249824047088623, "learning_rate": 8.331512801031665e-06, "loss": 0.3426, "step": 5548 }, { "epoch": 0.27526166972568084, "grad_norm": 9.128933906555176, "learning_rate": 8.33092584100061e-06, "loss": 0.3836, "step": 5549 }, { "epoch": 0.275311275360881, "grad_norm": 5.1629180908203125, "learning_rate": 8.330338798427704e-06, "loss": 0.3633, "step": 5550 }, { "epoch": 0.27536088099608114, "grad_norm": 7.699304580688477, "learning_rate": 8.329751673327498e-06, "loss": 0.2927, "step": 5551 }, { "epoch": 0.2754104866312813, "grad_norm": 6.240982532501221, "learning_rate": 8.329164465714542e-06, "loss": 0.3083, "step": 5552 }, { "epoch": 0.2754600922664815, "grad_norm": 5.880269527435303, "learning_rate": 8.32857717560338e-06, "loss": 0.275, "step": 5553 }, { "epoch": 0.2755096979016816, "grad_norm": 6.324913501739502, "learning_rate": 8.327989803008571e-06, "loss": 0.3233, "step": 5554 }, { "epoch": 0.2755593035368818, "grad_norm": 8.983816146850586, "learning_rate": 8.327402347944672e-06, "loss": 0.3902, "step": 5555 }, { "epoch": 0.275608909172082, "grad_norm": 5.813235282897949, "learning_rate": 8.326814810426234e-06, "loss": 0.3009, "step": 5556 }, { "epoch": 0.2756585148072821, "grad_norm": 6.192025661468506, "learning_rate": 8.326227190467822e-06, "loss": 0.4079, "step": 5557 }, { "epoch": 0.2757081204424823, "grad_norm": 4.699579238891602, "learning_rate": 8.325639488083995e-06, "loss": 0.2991, "step": 5558 }, { "epoch": 0.27575772607768245, "grad_norm": 7.118378162384033, "learning_rate": 8.325051703289317e-06, "loss": 0.2581, "step": 5559 }, { "epoch": 0.2758073317128826, "grad_norm": 5.147189617156982, "learning_rate": 8.324463836098355e-06, "loss": 0.3271, "step": 5560 }, { "epoch": 0.27585693734808275, "grad_norm": 5.8087615966796875, "learning_rate": 8.323875886525674e-06, "loss": 0.3309, "step": 5561 }, { "epoch": 0.2759065429832829, "grad_norm": 9.88439655303955, "learning_rate": 8.323287854585847e-06, "loss": 0.3884, "step": 5562 }, { "epoch": 0.27595614861848305, "grad_norm": 41.09955978393555, "learning_rate": 8.322699740293441e-06, "loss": 0.3598, "step": 5563 }, { "epoch": 0.27600575425368323, "grad_norm": 6.234735488891602, "learning_rate": 8.322111543663033e-06, "loss": 0.2672, "step": 5564 }, { "epoch": 0.27605535988888336, "grad_norm": 5.422840595245361, "learning_rate": 8.3215232647092e-06, "loss": 0.3315, "step": 5565 }, { "epoch": 0.27610496552408353, "grad_norm": 6.190616130828857, "learning_rate": 8.320934903446517e-06, "loss": 0.3547, "step": 5566 }, { "epoch": 0.2761545711592837, "grad_norm": 6.3651275634765625, "learning_rate": 8.320346459889564e-06, "loss": 0.3993, "step": 5567 }, { "epoch": 0.27620417679448384, "grad_norm": 11.566912651062012, "learning_rate": 8.319757934052925e-06, "loss": 0.4009, "step": 5568 }, { "epoch": 0.276253782429684, "grad_norm": 17.844860076904297, "learning_rate": 8.319169325951184e-06, "loss": 0.525, "step": 5569 }, { "epoch": 0.2763033880648842, "grad_norm": 8.716506004333496, "learning_rate": 8.318580635598923e-06, "loss": 0.2362, "step": 5570 }, { "epoch": 0.2763529937000843, "grad_norm": 10.0066499710083, "learning_rate": 8.317991863010732e-06, "loss": 0.4633, "step": 5571 }, { "epoch": 0.2764025993352845, "grad_norm": 6.443975448608398, "learning_rate": 8.317403008201204e-06, "loss": 0.2786, "step": 5572 }, { "epoch": 0.27645220497048467, "grad_norm": 9.911837577819824, "learning_rate": 8.316814071184928e-06, "loss": 0.3151, "step": 5573 }, { "epoch": 0.2765018106056848, "grad_norm": 7.1284894943237305, "learning_rate": 8.316225051976498e-06, "loss": 0.314, "step": 5574 }, { "epoch": 0.276551416240885, "grad_norm": 11.71515941619873, "learning_rate": 8.315635950590512e-06, "loss": 0.302, "step": 5575 }, { "epoch": 0.2766010218760851, "grad_norm": 4.017297267913818, "learning_rate": 8.31504676704157e-06, "loss": 0.1938, "step": 5576 }, { "epoch": 0.2766506275112853, "grad_norm": 5.693626403808594, "learning_rate": 8.314457501344267e-06, "loss": 0.2361, "step": 5577 }, { "epoch": 0.27670023314648545, "grad_norm": 6.571849346160889, "learning_rate": 8.313868153513206e-06, "loss": 0.2317, "step": 5578 }, { "epoch": 0.2767498387816856, "grad_norm": 4.399932384490967, "learning_rate": 8.313278723562996e-06, "loss": 0.2735, "step": 5579 }, { "epoch": 0.27679944441688575, "grad_norm": 7.186256408691406, "learning_rate": 8.312689211508241e-06, "loss": 0.2637, "step": 5580 }, { "epoch": 0.27684905005208593, "grad_norm": 14.102897644042969, "learning_rate": 8.312099617363545e-06, "loss": 0.4983, "step": 5581 }, { "epoch": 0.27689865568728605, "grad_norm": 9.504610061645508, "learning_rate": 8.311509941143525e-06, "loss": 0.3238, "step": 5582 }, { "epoch": 0.27694826132248623, "grad_norm": 8.96536636352539, "learning_rate": 8.310920182862792e-06, "loss": 0.3881, "step": 5583 }, { "epoch": 0.2769978669576864, "grad_norm": 5.216058254241943, "learning_rate": 8.310330342535956e-06, "loss": 0.3821, "step": 5584 }, { "epoch": 0.27704747259288653, "grad_norm": 6.529356956481934, "learning_rate": 8.30974042017764e-06, "loss": 0.3636, "step": 5585 }, { "epoch": 0.2770970782280867, "grad_norm": 8.612602233886719, "learning_rate": 8.309150415802456e-06, "loss": 0.4021, "step": 5586 }, { "epoch": 0.2771466838632869, "grad_norm": 5.214857578277588, "learning_rate": 8.308560329425028e-06, "loss": 0.3507, "step": 5587 }, { "epoch": 0.277196289498487, "grad_norm": 5.295434951782227, "learning_rate": 8.307970161059981e-06, "loss": 0.2082, "step": 5588 }, { "epoch": 0.2772458951336872, "grad_norm": 7.7544941902160645, "learning_rate": 8.307379910721936e-06, "loss": 0.3409, "step": 5589 }, { "epoch": 0.27729550076888737, "grad_norm": 10.194450378417969, "learning_rate": 8.30678957842552e-06, "loss": 0.3824, "step": 5590 }, { "epoch": 0.2773451064040875, "grad_norm": 7.165929317474365, "learning_rate": 8.306199164185362e-06, "loss": 0.2181, "step": 5591 }, { "epoch": 0.27739471203928767, "grad_norm": 8.811724662780762, "learning_rate": 8.305608668016094e-06, "loss": 0.3722, "step": 5592 }, { "epoch": 0.2774443176744878, "grad_norm": 4.85629940032959, "learning_rate": 8.305018089932348e-06, "loss": 0.1065, "step": 5593 }, { "epoch": 0.277493923309688, "grad_norm": 5.771171569824219, "learning_rate": 8.304427429948759e-06, "loss": 0.2085, "step": 5594 }, { "epoch": 0.27754352894488815, "grad_norm": 8.551044464111328, "learning_rate": 8.303836688079963e-06, "loss": 0.3704, "step": 5595 }, { "epoch": 0.2775931345800883, "grad_norm": 7.899099349975586, "learning_rate": 8.3032458643406e-06, "loss": 0.3407, "step": 5596 }, { "epoch": 0.27764274021528845, "grad_norm": 3.6467058658599854, "learning_rate": 8.302654958745309e-06, "loss": 0.1803, "step": 5597 }, { "epoch": 0.27769234585048863, "grad_norm": 8.139066696166992, "learning_rate": 8.302063971308735e-06, "loss": 0.1854, "step": 5598 }, { "epoch": 0.27774195148568875, "grad_norm": 12.895140647888184, "learning_rate": 8.301472902045521e-06, "loss": 0.3798, "step": 5599 }, { "epoch": 0.27779155712088893, "grad_norm": 7.458400726318359, "learning_rate": 8.300881750970317e-06, "loss": 0.3933, "step": 5600 }, { "epoch": 0.2778411627560891, "grad_norm": 5.257174968719482, "learning_rate": 8.30029051809777e-06, "loss": 0.2992, "step": 5601 }, { "epoch": 0.27789076839128923, "grad_norm": 7.404329776763916, "learning_rate": 8.29969920344253e-06, "loss": 0.2695, "step": 5602 }, { "epoch": 0.2779403740264894, "grad_norm": 9.801267623901367, "learning_rate": 8.29910780701925e-06, "loss": 0.3341, "step": 5603 }, { "epoch": 0.2779899796616896, "grad_norm": 11.410320281982422, "learning_rate": 8.298516328842589e-06, "loss": 0.3531, "step": 5604 }, { "epoch": 0.2780395852968897, "grad_norm": 6.405877590179443, "learning_rate": 8.297924768927199e-06, "loss": 0.2931, "step": 5605 }, { "epoch": 0.2780891909320899, "grad_norm": 11.744199752807617, "learning_rate": 8.297333127287744e-06, "loss": 0.3757, "step": 5606 }, { "epoch": 0.27813879656729007, "grad_norm": 10.961505889892578, "learning_rate": 8.29674140393888e-06, "loss": 0.3406, "step": 5607 }, { "epoch": 0.2781884022024902, "grad_norm": 7.201754093170166, "learning_rate": 8.296149598895273e-06, "loss": 0.3878, "step": 5608 }, { "epoch": 0.27823800783769037, "grad_norm": 10.102473258972168, "learning_rate": 8.295557712171588e-06, "loss": 0.3119, "step": 5609 }, { "epoch": 0.2782876134728905, "grad_norm": 12.374725341796875, "learning_rate": 8.294965743782494e-06, "loss": 0.4318, "step": 5610 }, { "epoch": 0.27833721910809067, "grad_norm": 4.725217342376709, "learning_rate": 8.294373693742656e-06, "loss": 0.2178, "step": 5611 }, { "epoch": 0.27838682474329085, "grad_norm": 6.607895374298096, "learning_rate": 8.293781562066749e-06, "loss": 0.2893, "step": 5612 }, { "epoch": 0.278436430378491, "grad_norm": 8.424433708190918, "learning_rate": 8.293189348769445e-06, "loss": 0.379, "step": 5613 }, { "epoch": 0.27848603601369115, "grad_norm": 10.980463027954102, "learning_rate": 8.292597053865418e-06, "loss": 0.3875, "step": 5614 }, { "epoch": 0.27853564164889133, "grad_norm": 8.959635734558105, "learning_rate": 8.292004677369348e-06, "loss": 0.4484, "step": 5615 }, { "epoch": 0.27858524728409145, "grad_norm": 4.755715847015381, "learning_rate": 8.291412219295913e-06, "loss": 0.3454, "step": 5616 }, { "epoch": 0.27863485291929163, "grad_norm": 4.1615753173828125, "learning_rate": 8.290819679659793e-06, "loss": 0.2496, "step": 5617 }, { "epoch": 0.2786844585544918, "grad_norm": 6.628416061401367, "learning_rate": 8.290227058475675e-06, "loss": 0.3226, "step": 5618 }, { "epoch": 0.27873406418969193, "grad_norm": 10.190326690673828, "learning_rate": 8.28963435575824e-06, "loss": 0.3306, "step": 5619 }, { "epoch": 0.2787836698248921, "grad_norm": 8.463415145874023, "learning_rate": 8.289041571522178e-06, "loss": 0.2535, "step": 5620 }, { "epoch": 0.2788332754600923, "grad_norm": 5.3559675216674805, "learning_rate": 8.28844870578218e-06, "loss": 0.2476, "step": 5621 }, { "epoch": 0.2788828810952924, "grad_norm": 9.430163383483887, "learning_rate": 8.287855758552932e-06, "loss": 0.3835, "step": 5622 }, { "epoch": 0.2789324867304926, "grad_norm": 6.126960754394531, "learning_rate": 8.287262729849134e-06, "loss": 0.3333, "step": 5623 }, { "epoch": 0.27898209236569277, "grad_norm": 7.753378391265869, "learning_rate": 8.286669619685477e-06, "loss": 0.2183, "step": 5624 }, { "epoch": 0.2790316980008929, "grad_norm": 11.990863800048828, "learning_rate": 8.28607642807666e-06, "loss": 0.3881, "step": 5625 }, { "epoch": 0.27908130363609307, "grad_norm": 6.618161201477051, "learning_rate": 8.285483155037384e-06, "loss": 0.4078, "step": 5626 }, { "epoch": 0.2791309092712932, "grad_norm": 10.019136428833008, "learning_rate": 8.284889800582349e-06, "loss": 0.4131, "step": 5627 }, { "epoch": 0.27918051490649337, "grad_norm": 4.39310359954834, "learning_rate": 8.284296364726258e-06, "loss": 0.2775, "step": 5628 }, { "epoch": 0.27923012054169355, "grad_norm": 14.357715606689453, "learning_rate": 8.283702847483817e-06, "loss": 0.5952, "step": 5629 }, { "epoch": 0.2792797261768937, "grad_norm": 6.581421375274658, "learning_rate": 8.283109248869735e-06, "loss": 0.262, "step": 5630 }, { "epoch": 0.27932933181209385, "grad_norm": 8.782176971435547, "learning_rate": 8.282515568898722e-06, "loss": 0.3527, "step": 5631 }, { "epoch": 0.27937893744729403, "grad_norm": 7.4072771072387695, "learning_rate": 8.281921807585486e-06, "loss": 0.3468, "step": 5632 }, { "epoch": 0.27942854308249415, "grad_norm": 6.025349140167236, "learning_rate": 8.281327964944743e-06, "loss": 0.3656, "step": 5633 }, { "epoch": 0.27947814871769433, "grad_norm": 4.922119140625, "learning_rate": 8.280734040991209e-06, "loss": 0.3088, "step": 5634 }, { "epoch": 0.2795277543528945, "grad_norm": 5.304591655731201, "learning_rate": 8.280140035739603e-06, "loss": 0.2867, "step": 5635 }, { "epoch": 0.27957735998809463, "grad_norm": 6.180287837982178, "learning_rate": 8.279545949204642e-06, "loss": 0.2226, "step": 5636 }, { "epoch": 0.2796269656232948, "grad_norm": 8.53077220916748, "learning_rate": 8.27895178140105e-06, "loss": 0.304, "step": 5637 }, { "epoch": 0.279676571258495, "grad_norm": 6.990444183349609, "learning_rate": 8.278357532343549e-06, "loss": 0.3834, "step": 5638 }, { "epoch": 0.2797261768936951, "grad_norm": 25.331087112426758, "learning_rate": 8.277763202046865e-06, "loss": 0.3965, "step": 5639 }, { "epoch": 0.2797757825288953, "grad_norm": 4.877172946929932, "learning_rate": 8.277168790525726e-06, "loss": 0.295, "step": 5640 }, { "epoch": 0.27982538816409547, "grad_norm": 9.121813774108887, "learning_rate": 8.276574297794862e-06, "loss": 0.2445, "step": 5641 }, { "epoch": 0.2798749937992956, "grad_norm": 6.891788959503174, "learning_rate": 8.275979723869006e-06, "loss": 0.289, "step": 5642 }, { "epoch": 0.27992459943449577, "grad_norm": 10.74486255645752, "learning_rate": 8.275385068762888e-06, "loss": 0.4166, "step": 5643 }, { "epoch": 0.2799742050696959, "grad_norm": 7.269767761230469, "learning_rate": 8.274790332491249e-06, "loss": 0.3295, "step": 5644 }, { "epoch": 0.28002381070489607, "grad_norm": 7.970848083496094, "learning_rate": 8.274195515068822e-06, "loss": 0.3125, "step": 5645 }, { "epoch": 0.28007341634009625, "grad_norm": 7.203873634338379, "learning_rate": 8.27360061651035e-06, "loss": 0.3723, "step": 5646 }, { "epoch": 0.28012302197529637, "grad_norm": 5.110128402709961, "learning_rate": 8.273005636830575e-06, "loss": 0.2244, "step": 5647 }, { "epoch": 0.28017262761049655, "grad_norm": 5.424227714538574, "learning_rate": 8.272410576044239e-06, "loss": 0.2626, "step": 5648 }, { "epoch": 0.28022223324569673, "grad_norm": 4.308065891265869, "learning_rate": 8.271815434166089e-06, "loss": 0.331, "step": 5649 }, { "epoch": 0.28027183888089685, "grad_norm": 8.124497413635254, "learning_rate": 8.271220211210872e-06, "loss": 0.2432, "step": 5650 }, { "epoch": 0.28032144451609703, "grad_norm": 5.083340644836426, "learning_rate": 8.270624907193338e-06, "loss": 0.3079, "step": 5651 }, { "epoch": 0.2803710501512972, "grad_norm": 11.433088302612305, "learning_rate": 8.27002952212824e-06, "loss": 0.3625, "step": 5652 }, { "epoch": 0.28042065578649733, "grad_norm": 6.71289587020874, "learning_rate": 8.26943405603033e-06, "loss": 0.299, "step": 5653 }, { "epoch": 0.2804702614216975, "grad_norm": 8.840856552124023, "learning_rate": 8.268838508914368e-06, "loss": 0.4338, "step": 5654 }, { "epoch": 0.2805198670568977, "grad_norm": 6.504896640777588, "learning_rate": 8.268242880795106e-06, "loss": 0.2583, "step": 5655 }, { "epoch": 0.2805694726920978, "grad_norm": 5.595861434936523, "learning_rate": 8.267647171687307e-06, "loss": 0.172, "step": 5656 }, { "epoch": 0.280619078327298, "grad_norm": 9.118438720703125, "learning_rate": 8.267051381605736e-06, "loss": 0.3079, "step": 5657 }, { "epoch": 0.28066868396249817, "grad_norm": 7.540924549102783, "learning_rate": 8.266455510565152e-06, "loss": 0.304, "step": 5658 }, { "epoch": 0.2807182895976983, "grad_norm": 11.668027877807617, "learning_rate": 8.265859558580322e-06, "loss": 0.3413, "step": 5659 }, { "epoch": 0.28076789523289847, "grad_norm": 6.380486488342285, "learning_rate": 8.265263525666017e-06, "loss": 0.3172, "step": 5660 }, { "epoch": 0.2808175008680986, "grad_norm": 5.215870380401611, "learning_rate": 8.264667411837004e-06, "loss": 0.3153, "step": 5661 }, { "epoch": 0.28086710650329877, "grad_norm": 5.089531421661377, "learning_rate": 8.264071217108055e-06, "loss": 0.276, "step": 5662 }, { "epoch": 0.28091671213849895, "grad_norm": 8.71729850769043, "learning_rate": 8.263474941493945e-06, "loss": 0.2841, "step": 5663 }, { "epoch": 0.28096631777369907, "grad_norm": 3.818605899810791, "learning_rate": 8.262878585009451e-06, "loss": 0.2867, "step": 5664 }, { "epoch": 0.28101592340889925, "grad_norm": 10.769503593444824, "learning_rate": 8.262282147669347e-06, "loss": 0.3418, "step": 5665 }, { "epoch": 0.2810655290440994, "grad_norm": 5.142663955688477, "learning_rate": 8.261685629488418e-06, "loss": 0.3488, "step": 5666 }, { "epoch": 0.28111513467929955, "grad_norm": 9.626883506774902, "learning_rate": 8.261089030481444e-06, "loss": 0.3214, "step": 5667 }, { "epoch": 0.28116474031449973, "grad_norm": 6.784379959106445, "learning_rate": 8.260492350663207e-06, "loss": 0.3294, "step": 5668 }, { "epoch": 0.2812143459496999, "grad_norm": 4.559875011444092, "learning_rate": 8.259895590048494e-06, "loss": 0.2914, "step": 5669 }, { "epoch": 0.28126395158490003, "grad_norm": 8.04754638671875, "learning_rate": 8.259298748652097e-06, "loss": 0.3718, "step": 5670 }, { "epoch": 0.2813135572201002, "grad_norm": 6.835618495941162, "learning_rate": 8.258701826488799e-06, "loss": 0.274, "step": 5671 }, { "epoch": 0.2813631628553004, "grad_norm": 8.525469779968262, "learning_rate": 8.258104823573398e-06, "loss": 0.443, "step": 5672 }, { "epoch": 0.2814127684905005, "grad_norm": 9.69397258758545, "learning_rate": 8.257507739920685e-06, "loss": 0.3152, "step": 5673 }, { "epoch": 0.2814623741257007, "grad_norm": 10.471972465515137, "learning_rate": 8.256910575545458e-06, "loss": 0.3679, "step": 5674 }, { "epoch": 0.28151197976090087, "grad_norm": 7.952491283416748, "learning_rate": 8.256313330462511e-06, "loss": 0.3151, "step": 5675 }, { "epoch": 0.281561585396101, "grad_norm": 6.891663551330566, "learning_rate": 8.255716004686649e-06, "loss": 0.2815, "step": 5676 }, { "epoch": 0.28161119103130117, "grad_norm": 8.885831832885742, "learning_rate": 8.255118598232671e-06, "loss": 0.4118, "step": 5677 }, { "epoch": 0.2816607966665013, "grad_norm": 4.767065048217773, "learning_rate": 8.25452111111538e-06, "loss": 0.3085, "step": 5678 }, { "epoch": 0.28171040230170147, "grad_norm": 17.188827514648438, "learning_rate": 8.253923543349586e-06, "loss": 0.4547, "step": 5679 }, { "epoch": 0.28176000793690165, "grad_norm": 7.893543720245361, "learning_rate": 8.253325894950094e-06, "loss": 0.2818, "step": 5680 }, { "epoch": 0.28180961357210177, "grad_norm": 4.814849853515625, "learning_rate": 8.252728165931714e-06, "loss": 0.3072, "step": 5681 }, { "epoch": 0.28185921920730195, "grad_norm": 5.854729175567627, "learning_rate": 8.25213035630926e-06, "loss": 0.3903, "step": 5682 }, { "epoch": 0.2819088248425021, "grad_norm": 11.167263984680176, "learning_rate": 8.251532466097543e-06, "loss": 0.3313, "step": 5683 }, { "epoch": 0.28195843047770225, "grad_norm": 12.263229370117188, "learning_rate": 8.250934495311382e-06, "loss": 0.3416, "step": 5684 }, { "epoch": 0.2820080361129024, "grad_norm": 18.922666549682617, "learning_rate": 8.250336443965592e-06, "loss": 0.452, "step": 5685 }, { "epoch": 0.2820576417481026, "grad_norm": 7.635475158691406, "learning_rate": 8.249738312074995e-06, "loss": 0.3368, "step": 5686 }, { "epoch": 0.28210724738330273, "grad_norm": 10.084216117858887, "learning_rate": 8.249140099654415e-06, "loss": 0.2927, "step": 5687 }, { "epoch": 0.2821568530185029, "grad_norm": 8.95396614074707, "learning_rate": 8.248541806718671e-06, "loss": 0.2538, "step": 5688 }, { "epoch": 0.2822064586537031, "grad_norm": 5.275089263916016, "learning_rate": 8.247943433282593e-06, "loss": 0.3174, "step": 5689 }, { "epoch": 0.2822560642889032, "grad_norm": 11.8997163772583, "learning_rate": 8.247344979361007e-06, "loss": 0.4604, "step": 5690 }, { "epoch": 0.2823056699241034, "grad_norm": 4.825535774230957, "learning_rate": 8.246746444968744e-06, "loss": 0.1841, "step": 5691 }, { "epoch": 0.28235527555930356, "grad_norm": 8.496810913085938, "learning_rate": 8.246147830120634e-06, "loss": 0.344, "step": 5692 }, { "epoch": 0.2824048811945037, "grad_norm": 6.29927921295166, "learning_rate": 8.245549134831514e-06, "loss": 0.2716, "step": 5693 }, { "epoch": 0.28245448682970387, "grad_norm": 7.4348297119140625, "learning_rate": 8.244950359116218e-06, "loss": 0.4209, "step": 5694 }, { "epoch": 0.282504092464904, "grad_norm": 5.051985740661621, "learning_rate": 8.244351502989583e-06, "loss": 0.2337, "step": 5695 }, { "epoch": 0.28255369810010417, "grad_norm": 7.394221782684326, "learning_rate": 8.243752566466452e-06, "loss": 0.2502, "step": 5696 }, { "epoch": 0.28260330373530435, "grad_norm": 8.607372283935547, "learning_rate": 8.243153549561665e-06, "loss": 0.3538, "step": 5697 }, { "epoch": 0.28265290937050447, "grad_norm": 4.233231544494629, "learning_rate": 8.242554452290065e-06, "loss": 0.3523, "step": 5698 }, { "epoch": 0.28270251500570465, "grad_norm": 2.6843338012695312, "learning_rate": 8.241955274666498e-06, "loss": 0.2186, "step": 5699 }, { "epoch": 0.2827521206409048, "grad_norm": 8.013465881347656, "learning_rate": 8.241356016705814e-06, "loss": 0.3847, "step": 5700 }, { "epoch": 0.28280172627610495, "grad_norm": 4.5135297775268555, "learning_rate": 8.240756678422862e-06, "loss": 0.2739, "step": 5701 }, { "epoch": 0.2828513319113051, "grad_norm": 5.590653896331787, "learning_rate": 8.240157259832493e-06, "loss": 0.3189, "step": 5702 }, { "epoch": 0.2829009375465053, "grad_norm": 4.8139448165893555, "learning_rate": 8.239557760949562e-06, "loss": 0.2452, "step": 5703 }, { "epoch": 0.2829505431817054, "grad_norm": 5.564820289611816, "learning_rate": 8.238958181788922e-06, "loss": 0.3052, "step": 5704 }, { "epoch": 0.2830001488169056, "grad_norm": 17.216703414916992, "learning_rate": 8.238358522365438e-06, "loss": 0.4245, "step": 5705 }, { "epoch": 0.2830497544521058, "grad_norm": 17.166717529296875, "learning_rate": 8.23775878269396e-06, "loss": 0.5893, "step": 5706 }, { "epoch": 0.2830993600873059, "grad_norm": 9.643789291381836, "learning_rate": 8.237158962789357e-06, "loss": 0.268, "step": 5707 }, { "epoch": 0.2831489657225061, "grad_norm": 8.506582260131836, "learning_rate": 8.23655906266649e-06, "loss": 0.2368, "step": 5708 }, { "epoch": 0.2831985713577062, "grad_norm": 13.871350288391113, "learning_rate": 8.235959082340225e-06, "loss": 0.4273, "step": 5709 }, { "epoch": 0.2832481769929064, "grad_norm": 6.915696144104004, "learning_rate": 8.23535902182543e-06, "loss": 0.3315, "step": 5710 }, { "epoch": 0.28329778262810656, "grad_norm": 8.421612739562988, "learning_rate": 8.234758881136977e-06, "loss": 0.3319, "step": 5711 }, { "epoch": 0.2833473882633067, "grad_norm": 7.985786437988281, "learning_rate": 8.234158660289735e-06, "loss": 0.3535, "step": 5712 }, { "epoch": 0.28339699389850687, "grad_norm": 10.284786224365234, "learning_rate": 8.233558359298576e-06, "loss": 0.3774, "step": 5713 }, { "epoch": 0.28344659953370704, "grad_norm": 10.040491104125977, "learning_rate": 8.23295797817838e-06, "loss": 0.255, "step": 5714 }, { "epoch": 0.28349620516890717, "grad_norm": 5.595912933349609, "learning_rate": 8.232357516944025e-06, "loss": 0.2789, "step": 5715 }, { "epoch": 0.28354581080410735, "grad_norm": 4.7356648445129395, "learning_rate": 8.231756975610385e-06, "loss": 0.3187, "step": 5716 }, { "epoch": 0.2835954164393075, "grad_norm": 7.236598968505859, "learning_rate": 8.231156354192348e-06, "loss": 0.3086, "step": 5717 }, { "epoch": 0.28364502207450765, "grad_norm": 6.412227630615234, "learning_rate": 8.230555652704792e-06, "loss": 0.4174, "step": 5718 }, { "epoch": 0.2836946277097078, "grad_norm": 4.029956340789795, "learning_rate": 8.229954871162607e-06, "loss": 0.3224, "step": 5719 }, { "epoch": 0.283744233344908, "grad_norm": 4.24634313583374, "learning_rate": 8.229354009580678e-06, "loss": 0.3151, "step": 5720 }, { "epoch": 0.2837938389801081, "grad_norm": 7.540820598602295, "learning_rate": 8.228753067973899e-06, "loss": 0.3404, "step": 5721 }, { "epoch": 0.2838434446153083, "grad_norm": 11.626363754272461, "learning_rate": 8.228152046357157e-06, "loss": 0.4595, "step": 5722 }, { "epoch": 0.2838930502505085, "grad_norm": 9.66860580444336, "learning_rate": 8.227550944745346e-06, "loss": 0.2973, "step": 5723 }, { "epoch": 0.2839426558857086, "grad_norm": 4.579481601715088, "learning_rate": 8.226949763153363e-06, "loss": 0.2604, "step": 5724 }, { "epoch": 0.2839922615209088, "grad_norm": 4.728333473205566, "learning_rate": 8.226348501596107e-06, "loss": 0.2785, "step": 5725 }, { "epoch": 0.2840418671561089, "grad_norm": 5.801522254943848, "learning_rate": 8.225747160088473e-06, "loss": 0.3255, "step": 5726 }, { "epoch": 0.2840914727913091, "grad_norm": 9.71080493927002, "learning_rate": 8.225145738645366e-06, "loss": 0.402, "step": 5727 }, { "epoch": 0.28414107842650926, "grad_norm": 7.7201056480407715, "learning_rate": 8.22454423728169e-06, "loss": 0.2548, "step": 5728 }, { "epoch": 0.2841906840617094, "grad_norm": 7.3372907638549805, "learning_rate": 8.22394265601235e-06, "loss": 0.3763, "step": 5729 }, { "epoch": 0.28424028969690956, "grad_norm": 6.236943244934082, "learning_rate": 8.223340994852249e-06, "loss": 0.2688, "step": 5730 }, { "epoch": 0.28428989533210974, "grad_norm": 5.251100540161133, "learning_rate": 8.222739253816303e-06, "loss": 0.2624, "step": 5731 }, { "epoch": 0.28433950096730987, "grad_norm": 13.10353946685791, "learning_rate": 8.222137432919419e-06, "loss": 0.4449, "step": 5732 }, { "epoch": 0.28438910660251004, "grad_norm": 3.7990658283233643, "learning_rate": 8.221535532176513e-06, "loss": 0.2379, "step": 5733 }, { "epoch": 0.2844387122377102, "grad_norm": 8.616120338439941, "learning_rate": 8.220933551602497e-06, "loss": 0.3212, "step": 5734 }, { "epoch": 0.28448831787291035, "grad_norm": 8.97266674041748, "learning_rate": 8.220331491212293e-06, "loss": 0.3367, "step": 5735 }, { "epoch": 0.2845379235081105, "grad_norm": 4.682146072387695, "learning_rate": 8.219729351020817e-06, "loss": 0.2735, "step": 5736 }, { "epoch": 0.2845875291433107, "grad_norm": 8.168801307678223, "learning_rate": 8.219127131042993e-06, "loss": 0.3328, "step": 5737 }, { "epoch": 0.2846371347785108, "grad_norm": 12.906115531921387, "learning_rate": 8.21852483129374e-06, "loss": 0.4505, "step": 5738 }, { "epoch": 0.284686740413711, "grad_norm": 8.674236297607422, "learning_rate": 8.217922451787987e-06, "loss": 0.385, "step": 5739 }, { "epoch": 0.2847363460489112, "grad_norm": 11.961420059204102, "learning_rate": 8.217319992540658e-06, "loss": 0.413, "step": 5740 }, { "epoch": 0.2847859516841113, "grad_norm": 7.679323196411133, "learning_rate": 8.216717453566687e-06, "loss": 0.3494, "step": 5741 }, { "epoch": 0.2848355573193115, "grad_norm": 5.993337631225586, "learning_rate": 8.216114834881002e-06, "loss": 0.3029, "step": 5742 }, { "epoch": 0.2848851629545116, "grad_norm": 7.678836345672607, "learning_rate": 8.215512136498535e-06, "loss": 0.3407, "step": 5743 }, { "epoch": 0.2849347685897118, "grad_norm": 5.798316955566406, "learning_rate": 8.214909358434224e-06, "loss": 0.2657, "step": 5744 }, { "epoch": 0.28498437422491196, "grad_norm": 6.025611877441406, "learning_rate": 8.214306500703005e-06, "loss": 0.2596, "step": 5745 }, { "epoch": 0.2850339798601121, "grad_norm": 8.105040550231934, "learning_rate": 8.213703563319816e-06, "loss": 0.3624, "step": 5746 }, { "epoch": 0.28508358549531226, "grad_norm": 7.609563827514648, "learning_rate": 8.2131005462996e-06, "loss": 0.2416, "step": 5747 }, { "epoch": 0.28513319113051244, "grad_norm": 7.6688008308410645, "learning_rate": 8.212497449657298e-06, "loss": 0.4134, "step": 5748 }, { "epoch": 0.28518279676571257, "grad_norm": 7.223295211791992, "learning_rate": 8.211894273407856e-06, "loss": 0.2794, "step": 5749 }, { "epoch": 0.28523240240091274, "grad_norm": 5.475490570068359, "learning_rate": 8.21129101756622e-06, "loss": 0.3387, "step": 5750 }, { "epoch": 0.2852820080361129, "grad_norm": 9.238226890563965, "learning_rate": 8.210687682147342e-06, "loss": 0.3248, "step": 5751 }, { "epoch": 0.28533161367131304, "grad_norm": 5.332939624786377, "learning_rate": 8.210084267166171e-06, "loss": 0.2657, "step": 5752 }, { "epoch": 0.2853812193065132, "grad_norm": 6.693543910980225, "learning_rate": 8.20948077263766e-06, "loss": 0.3366, "step": 5753 }, { "epoch": 0.2854308249417134, "grad_norm": 4.58903694152832, "learning_rate": 8.208877198576762e-06, "loss": 0.1839, "step": 5754 }, { "epoch": 0.2854804305769135, "grad_norm": 8.050583839416504, "learning_rate": 8.208273544998437e-06, "loss": 0.3333, "step": 5755 }, { "epoch": 0.2855300362121137, "grad_norm": 5.764772891998291, "learning_rate": 8.20766981191764e-06, "loss": 0.3383, "step": 5756 }, { "epoch": 0.2855796418473139, "grad_norm": 7.520389080047607, "learning_rate": 8.207065999349336e-06, "loss": 0.3395, "step": 5757 }, { "epoch": 0.285629247482514, "grad_norm": 9.197294235229492, "learning_rate": 8.206462107308486e-06, "loss": 0.3951, "step": 5758 }, { "epoch": 0.2856788531177142, "grad_norm": 5.273519039154053, "learning_rate": 8.205858135810055e-06, "loss": 0.1559, "step": 5759 }, { "epoch": 0.2857284587529143, "grad_norm": 6.711642742156982, "learning_rate": 8.205254084869008e-06, "loss": 0.2962, "step": 5760 }, { "epoch": 0.2857780643881145, "grad_norm": 5.7385101318359375, "learning_rate": 8.204649954500315e-06, "loss": 0.2311, "step": 5761 }, { "epoch": 0.28582767002331466, "grad_norm": 5.8635053634643555, "learning_rate": 8.204045744718948e-06, "loss": 0.3092, "step": 5762 }, { "epoch": 0.2858772756585148, "grad_norm": 9.330025672912598, "learning_rate": 8.203441455539874e-06, "loss": 0.4172, "step": 5763 }, { "epoch": 0.28592688129371496, "grad_norm": 5.090535640716553, "learning_rate": 8.202837086978075e-06, "loss": 0.355, "step": 5764 }, { "epoch": 0.28597648692891514, "grad_norm": 3.8540151119232178, "learning_rate": 8.202232639048523e-06, "loss": 0.2525, "step": 5765 }, { "epoch": 0.28602609256411526, "grad_norm": 11.209542274475098, "learning_rate": 8.201628111766198e-06, "loss": 0.4165, "step": 5766 }, { "epoch": 0.28607569819931544, "grad_norm": 12.8452787399292, "learning_rate": 8.20102350514608e-06, "loss": 0.292, "step": 5767 }, { "epoch": 0.2861253038345156, "grad_norm": 9.19039535522461, "learning_rate": 8.200418819203152e-06, "loss": 0.2431, "step": 5768 }, { "epoch": 0.28617490946971574, "grad_norm": 6.173596382141113, "learning_rate": 8.199814053952398e-06, "loss": 0.3376, "step": 5769 }, { "epoch": 0.2862245151049159, "grad_norm": 7.964208126068115, "learning_rate": 8.199209209408805e-06, "loss": 0.3187, "step": 5770 }, { "epoch": 0.2862741207401161, "grad_norm": 4.211689472198486, "learning_rate": 8.198604285587359e-06, "loss": 0.3124, "step": 5771 }, { "epoch": 0.2863237263753162, "grad_norm": 5.5688652992248535, "learning_rate": 8.197999282503053e-06, "loss": 0.2939, "step": 5772 }, { "epoch": 0.2863733320105164, "grad_norm": 9.69790267944336, "learning_rate": 8.197394200170878e-06, "loss": 0.3512, "step": 5773 }, { "epoch": 0.2864229376457166, "grad_norm": 8.249692916870117, "learning_rate": 8.196789038605828e-06, "loss": 0.3401, "step": 5774 }, { "epoch": 0.2864725432809167, "grad_norm": 10.674128532409668, "learning_rate": 8.196183797822901e-06, "loss": 0.3572, "step": 5775 }, { "epoch": 0.2865221489161169, "grad_norm": 6.810438632965088, "learning_rate": 8.195578477837091e-06, "loss": 0.3207, "step": 5776 }, { "epoch": 0.286571754551317, "grad_norm": 5.435784816741943, "learning_rate": 8.194973078663402e-06, "loss": 0.231, "step": 5777 }, { "epoch": 0.2866213601865172, "grad_norm": 42.988224029541016, "learning_rate": 8.194367600316836e-06, "loss": 0.3764, "step": 5778 }, { "epoch": 0.28667096582171736, "grad_norm": 5.525411605834961, "learning_rate": 8.193762042812395e-06, "loss": 0.3179, "step": 5779 }, { "epoch": 0.2867205714569175, "grad_norm": 7.058532238006592, "learning_rate": 8.193156406165086e-06, "loss": 0.3127, "step": 5780 }, { "epoch": 0.28677017709211766, "grad_norm": 5.988506317138672, "learning_rate": 8.192550690389916e-06, "loss": 0.2751, "step": 5781 }, { "epoch": 0.28681978272731784, "grad_norm": 8.726764678955078, "learning_rate": 8.191944895501898e-06, "loss": 0.3562, "step": 5782 }, { "epoch": 0.28686938836251796, "grad_norm": 3.8263068199157715, "learning_rate": 8.19133902151604e-06, "loss": 0.1343, "step": 5783 }, { "epoch": 0.28691899399771814, "grad_norm": 6.890205383300781, "learning_rate": 8.190733068447358e-06, "loss": 0.2835, "step": 5784 }, { "epoch": 0.2869685996329183, "grad_norm": 6.6461710929870605, "learning_rate": 8.190127036310866e-06, "loss": 0.362, "step": 5785 }, { "epoch": 0.28701820526811844, "grad_norm": 8.771382331848145, "learning_rate": 8.189520925121584e-06, "loss": 0.2791, "step": 5786 }, { "epoch": 0.2870678109033186, "grad_norm": 4.73598051071167, "learning_rate": 8.18891473489453e-06, "loss": 0.2587, "step": 5787 }, { "epoch": 0.2871174165385188, "grad_norm": 5.565756320953369, "learning_rate": 8.188308465644727e-06, "loss": 0.3114, "step": 5788 }, { "epoch": 0.2871670221737189, "grad_norm": 8.036689758300781, "learning_rate": 8.1877021173872e-06, "loss": 0.355, "step": 5789 }, { "epoch": 0.2872166278089191, "grad_norm": 6.932114601135254, "learning_rate": 8.18709569013697e-06, "loss": 0.3783, "step": 5790 }, { "epoch": 0.2872662334441193, "grad_norm": 4.721575736999512, "learning_rate": 8.186489183909066e-06, "loss": 0.2619, "step": 5791 }, { "epoch": 0.2873158390793194, "grad_norm": 5.8286824226379395, "learning_rate": 8.185882598718522e-06, "loss": 0.3911, "step": 5792 }, { "epoch": 0.2873654447145196, "grad_norm": 48.94615173339844, "learning_rate": 8.185275934580363e-06, "loss": 0.4015, "step": 5793 }, { "epoch": 0.2874150503497197, "grad_norm": 11.335421562194824, "learning_rate": 8.184669191509626e-06, "loss": 0.3458, "step": 5794 }, { "epoch": 0.2874646559849199, "grad_norm": 6.4402241706848145, "learning_rate": 8.184062369521347e-06, "loss": 0.3069, "step": 5795 }, { "epoch": 0.28751426162012006, "grad_norm": 10.667764663696289, "learning_rate": 8.18345546863056e-06, "loss": 0.3972, "step": 5796 }, { "epoch": 0.2875638672553202, "grad_norm": 34.5444221496582, "learning_rate": 8.182848488852311e-06, "loss": 0.2774, "step": 5797 }, { "epoch": 0.28761347289052036, "grad_norm": 17.329015731811523, "learning_rate": 8.182241430201634e-06, "loss": 0.4182, "step": 5798 }, { "epoch": 0.28766307852572054, "grad_norm": 10.627346992492676, "learning_rate": 8.181634292693574e-06, "loss": 0.397, "step": 5799 }, { "epoch": 0.28771268416092066, "grad_norm": 5.178332805633545, "learning_rate": 8.181027076343178e-06, "loss": 0.3316, "step": 5800 }, { "epoch": 0.28776228979612084, "grad_norm": 7.726327419281006, "learning_rate": 8.180419781165491e-06, "loss": 0.4269, "step": 5801 }, { "epoch": 0.287811895431321, "grad_norm": 5.232367038726807, "learning_rate": 8.179812407175565e-06, "loss": 0.3336, "step": 5802 }, { "epoch": 0.28786150106652114, "grad_norm": 8.318181991577148, "learning_rate": 8.17920495438845e-06, "loss": 0.3925, "step": 5803 }, { "epoch": 0.2879111067017213, "grad_norm": 8.07323932647705, "learning_rate": 8.178597422819194e-06, "loss": 0.3294, "step": 5804 }, { "epoch": 0.2879607123369215, "grad_norm": 8.690610885620117, "learning_rate": 8.177989812482858e-06, "loss": 0.2679, "step": 5805 }, { "epoch": 0.2880103179721216, "grad_norm": 5.043615818023682, "learning_rate": 8.177382123394499e-06, "loss": 0.3196, "step": 5806 }, { "epoch": 0.2880599236073218, "grad_norm": 7.796558380126953, "learning_rate": 8.176774355569171e-06, "loss": 0.4082, "step": 5807 }, { "epoch": 0.288109529242522, "grad_norm": 5.55460786819458, "learning_rate": 8.176166509021939e-06, "loss": 0.3023, "step": 5808 }, { "epoch": 0.2881591348777221, "grad_norm": 5.139672756195068, "learning_rate": 8.175558583767864e-06, "loss": 0.3344, "step": 5809 }, { "epoch": 0.2882087405129223, "grad_norm": 8.041411399841309, "learning_rate": 8.17495057982201e-06, "loss": 0.3521, "step": 5810 }, { "epoch": 0.2882583461481224, "grad_norm": 4.396289348602295, "learning_rate": 8.174342497199445e-06, "loss": 0.2285, "step": 5811 }, { "epoch": 0.2883079517833226, "grad_norm": 4.605868339538574, "learning_rate": 8.173734335915236e-06, "loss": 0.2612, "step": 5812 }, { "epoch": 0.28835755741852276, "grad_norm": 5.562824249267578, "learning_rate": 8.173126095984456e-06, "loss": 0.3028, "step": 5813 }, { "epoch": 0.2884071630537229, "grad_norm": 7.329362392425537, "learning_rate": 8.172517777422175e-06, "loss": 0.3139, "step": 5814 }, { "epoch": 0.28845676868892306, "grad_norm": 9.778827667236328, "learning_rate": 8.171909380243468e-06, "loss": 0.2693, "step": 5815 }, { "epoch": 0.28850637432412324, "grad_norm": 6.81259822845459, "learning_rate": 8.171300904463413e-06, "loss": 0.3244, "step": 5816 }, { "epoch": 0.28855597995932336, "grad_norm": 7.782382011413574, "learning_rate": 8.170692350097087e-06, "loss": 0.3755, "step": 5817 }, { "epoch": 0.28860558559452354, "grad_norm": 7.268486976623535, "learning_rate": 8.17008371715957e-06, "loss": 0.2528, "step": 5818 }, { "epoch": 0.2886551912297237, "grad_norm": 5.995654582977295, "learning_rate": 8.169475005665944e-06, "loss": 0.3462, "step": 5819 }, { "epoch": 0.28870479686492384, "grad_norm": 5.219066143035889, "learning_rate": 8.168866215631294e-06, "loss": 0.2644, "step": 5820 }, { "epoch": 0.288754402500124, "grad_norm": 6.558803558349609, "learning_rate": 8.168257347070707e-06, "loss": 0.2376, "step": 5821 }, { "epoch": 0.2888040081353242, "grad_norm": 5.074898719787598, "learning_rate": 8.167648399999267e-06, "loss": 0.2183, "step": 5822 }, { "epoch": 0.2888536137705243, "grad_norm": 13.4492826461792, "learning_rate": 8.167039374432068e-06, "loss": 0.4315, "step": 5823 }, { "epoch": 0.2889032194057245, "grad_norm": 5.55445671081543, "learning_rate": 8.166430270384202e-06, "loss": 0.3295, "step": 5824 }, { "epoch": 0.2889528250409247, "grad_norm": 7.874107360839844, "learning_rate": 8.16582108787076e-06, "loss": 0.3041, "step": 5825 }, { "epoch": 0.2890024306761248, "grad_norm": 9.761262893676758, "learning_rate": 8.16521182690684e-06, "loss": 0.4451, "step": 5826 }, { "epoch": 0.289052036311325, "grad_norm": 8.968145370483398, "learning_rate": 8.16460248750754e-06, "loss": 0.3267, "step": 5827 }, { "epoch": 0.2891016419465251, "grad_norm": 11.73597526550293, "learning_rate": 8.163993069687956e-06, "loss": 0.5063, "step": 5828 }, { "epoch": 0.2891512475817253, "grad_norm": 7.512941360473633, "learning_rate": 8.163383573463197e-06, "loss": 0.3759, "step": 5829 }, { "epoch": 0.28920085321692546, "grad_norm": 9.991281509399414, "learning_rate": 8.162773998848358e-06, "loss": 0.503, "step": 5830 }, { "epoch": 0.2892504588521256, "grad_norm": 7.8529438972473145, "learning_rate": 8.162164345858551e-06, "loss": 0.3395, "step": 5831 }, { "epoch": 0.28930006448732576, "grad_norm": 6.185366630554199, "learning_rate": 8.16155461450888e-06, "loss": 0.3974, "step": 5832 }, { "epoch": 0.28934967012252594, "grad_norm": 10.99787712097168, "learning_rate": 8.160944804814457e-06, "loss": 0.409, "step": 5833 }, { "epoch": 0.28939927575772606, "grad_norm": 6.323594570159912, "learning_rate": 8.16033491679039e-06, "loss": 0.3155, "step": 5834 }, { "epoch": 0.28944888139292624, "grad_norm": 7.993556499481201, "learning_rate": 8.159724950451796e-06, "loss": 0.3093, "step": 5835 }, { "epoch": 0.2894984870281264, "grad_norm": 7.8300018310546875, "learning_rate": 8.159114905813786e-06, "loss": 0.3462, "step": 5836 }, { "epoch": 0.28954809266332654, "grad_norm": 8.219179153442383, "learning_rate": 8.158504782891482e-06, "loss": 0.2868, "step": 5837 }, { "epoch": 0.2895976982985267, "grad_norm": 6.258101463317871, "learning_rate": 8.157894581699999e-06, "loss": 0.2954, "step": 5838 }, { "epoch": 0.2896473039337269, "grad_norm": 5.723827838897705, "learning_rate": 8.15728430225446e-06, "loss": 0.301, "step": 5839 }, { "epoch": 0.289696909568927, "grad_norm": 6.397636413574219, "learning_rate": 8.156673944569988e-06, "loss": 0.4013, "step": 5840 }, { "epoch": 0.2897465152041272, "grad_norm": 10.182180404663086, "learning_rate": 8.15606350866171e-06, "loss": 0.4332, "step": 5841 }, { "epoch": 0.2897961208393273, "grad_norm": 15.078742027282715, "learning_rate": 8.155452994544747e-06, "loss": 0.4106, "step": 5842 }, { "epoch": 0.2898457264745275, "grad_norm": 4.535860538482666, "learning_rate": 8.154842402234234e-06, "loss": 0.3164, "step": 5843 }, { "epoch": 0.2898953321097277, "grad_norm": 6.061865329742432, "learning_rate": 8.154231731745297e-06, "loss": 0.3371, "step": 5844 }, { "epoch": 0.2899449377449278, "grad_norm": 4.237775802612305, "learning_rate": 8.153620983093072e-06, "loss": 0.2759, "step": 5845 }, { "epoch": 0.289994543380128, "grad_norm": 4.243404865264893, "learning_rate": 8.153010156292694e-06, "loss": 0.2773, "step": 5846 }, { "epoch": 0.29004414901532816, "grad_norm": 7.753279685974121, "learning_rate": 8.152399251359296e-06, "loss": 0.3345, "step": 5847 }, { "epoch": 0.2900937546505283, "grad_norm": 13.913440704345703, "learning_rate": 8.151788268308019e-06, "loss": 0.3763, "step": 5848 }, { "epoch": 0.29014336028572846, "grad_norm": 5.333032608032227, "learning_rate": 8.151177207154002e-06, "loss": 0.3514, "step": 5849 }, { "epoch": 0.29019296592092864, "grad_norm": 5.002535820007324, "learning_rate": 8.150566067912391e-06, "loss": 0.2933, "step": 5850 }, { "epoch": 0.29024257155612876, "grad_norm": 7.487760066986084, "learning_rate": 8.149954850598327e-06, "loss": 0.306, "step": 5851 }, { "epoch": 0.29029217719132894, "grad_norm": 5.733560085296631, "learning_rate": 8.149343555226955e-06, "loss": 0.2449, "step": 5852 }, { "epoch": 0.2903417828265291, "grad_norm": 3.571974515914917, "learning_rate": 8.148732181813426e-06, "loss": 0.2689, "step": 5853 }, { "epoch": 0.29039138846172924, "grad_norm": 15.597646713256836, "learning_rate": 8.14812073037289e-06, "loss": 0.3625, "step": 5854 }, { "epoch": 0.2904409940969294, "grad_norm": 5.5637054443359375, "learning_rate": 8.147509200920498e-06, "loss": 0.2697, "step": 5855 }, { "epoch": 0.2904905997321296, "grad_norm": 6.259580135345459, "learning_rate": 8.146897593471405e-06, "loss": 0.3628, "step": 5856 }, { "epoch": 0.2905402053673297, "grad_norm": 7.561470031738281, "learning_rate": 8.146285908040764e-06, "loss": 0.2866, "step": 5857 }, { "epoch": 0.2905898110025299, "grad_norm": 6.464754104614258, "learning_rate": 8.145674144643737e-06, "loss": 0.4206, "step": 5858 }, { "epoch": 0.29063941663773, "grad_norm": 7.409738063812256, "learning_rate": 8.145062303295478e-06, "loss": 0.2579, "step": 5859 }, { "epoch": 0.2906890222729302, "grad_norm": 8.834580421447754, "learning_rate": 8.144450384011158e-06, "loss": 0.4038, "step": 5860 }, { "epoch": 0.2907386279081304, "grad_norm": 6.984935760498047, "learning_rate": 8.14383838680593e-06, "loss": 0.3573, "step": 5861 }, { "epoch": 0.2907882335433305, "grad_norm": 13.533495903015137, "learning_rate": 8.143226311694968e-06, "loss": 0.4034, "step": 5862 }, { "epoch": 0.2908378391785307, "grad_norm": 7.53830623626709, "learning_rate": 8.142614158693436e-06, "loss": 0.3579, "step": 5863 }, { "epoch": 0.29088744481373086, "grad_norm": 4.43626594543457, "learning_rate": 8.142001927816503e-06, "loss": 0.2348, "step": 5864 }, { "epoch": 0.290937050448931, "grad_norm": 6.489910125732422, "learning_rate": 8.14138961907934e-06, "loss": 0.3111, "step": 5865 }, { "epoch": 0.29098665608413116, "grad_norm": 9.508406639099121, "learning_rate": 8.140777232497122e-06, "loss": 0.4135, "step": 5866 }, { "epoch": 0.29103626171933134, "grad_norm": 6.503819942474365, "learning_rate": 8.140164768085023e-06, "loss": 0.3181, "step": 5867 }, { "epoch": 0.29108586735453146, "grad_norm": 6.155819416046143, "learning_rate": 8.139552225858223e-06, "loss": 0.2482, "step": 5868 }, { "epoch": 0.29113547298973164, "grad_norm": 8.472455024719238, "learning_rate": 8.138939605831895e-06, "loss": 0.3757, "step": 5869 }, { "epoch": 0.2911850786249318, "grad_norm": 3.61244535446167, "learning_rate": 8.138326908021226e-06, "loss": 0.2171, "step": 5870 }, { "epoch": 0.29123468426013194, "grad_norm": 6.800203800201416, "learning_rate": 8.137714132441397e-06, "loss": 0.2319, "step": 5871 }, { "epoch": 0.2912842898953321, "grad_norm": 9.048907279968262, "learning_rate": 8.137101279107591e-06, "loss": 0.3852, "step": 5872 }, { "epoch": 0.2913338955305323, "grad_norm": 7.47894287109375, "learning_rate": 8.136488348034997e-06, "loss": 0.3256, "step": 5873 }, { "epoch": 0.2913835011657324, "grad_norm": 7.644927024841309, "learning_rate": 8.135875339238802e-06, "loss": 0.2205, "step": 5874 }, { "epoch": 0.2914331068009326, "grad_norm": 8.41865062713623, "learning_rate": 8.135262252734199e-06, "loss": 0.284, "step": 5875 }, { "epoch": 0.2914827124361327, "grad_norm": 6.738636016845703, "learning_rate": 8.134649088536377e-06, "loss": 0.3695, "step": 5876 }, { "epoch": 0.2915323180713329, "grad_norm": 13.75317668914795, "learning_rate": 8.134035846660535e-06, "loss": 0.3562, "step": 5877 }, { "epoch": 0.2915819237065331, "grad_norm": 6.778503894805908, "learning_rate": 8.133422527121865e-06, "loss": 0.2554, "step": 5878 }, { "epoch": 0.2916315293417332, "grad_norm": 5.299436092376709, "learning_rate": 8.13280912993557e-06, "loss": 0.2671, "step": 5879 }, { "epoch": 0.2916811349769334, "grad_norm": 11.565719604492188, "learning_rate": 8.132195655116846e-06, "loss": 0.3199, "step": 5880 }, { "epoch": 0.29173074061213355, "grad_norm": 6.522604465484619, "learning_rate": 8.131582102680896e-06, "loss": 0.3956, "step": 5881 }, { "epoch": 0.2917803462473337, "grad_norm": 9.839271545410156, "learning_rate": 8.130968472642926e-06, "loss": 0.2796, "step": 5882 }, { "epoch": 0.29182995188253386, "grad_norm": 7.607645034790039, "learning_rate": 8.13035476501814e-06, "loss": 0.3326, "step": 5883 }, { "epoch": 0.29187955751773403, "grad_norm": 7.284149646759033, "learning_rate": 8.129740979821749e-06, "loss": 0.2633, "step": 5884 }, { "epoch": 0.29192916315293416, "grad_norm": 5.35750150680542, "learning_rate": 8.129127117068958e-06, "loss": 0.3303, "step": 5885 }, { "epoch": 0.29197876878813434, "grad_norm": 14.812350273132324, "learning_rate": 8.128513176774984e-06, "loss": 0.4357, "step": 5886 }, { "epoch": 0.2920283744233345, "grad_norm": 7.988562107086182, "learning_rate": 8.127899158955038e-06, "loss": 0.3022, "step": 5887 }, { "epoch": 0.29207798005853464, "grad_norm": 6.463335990905762, "learning_rate": 8.127285063624334e-06, "loss": 0.3175, "step": 5888 }, { "epoch": 0.2921275856937348, "grad_norm": 11.735941886901855, "learning_rate": 8.126670890798094e-06, "loss": 0.3897, "step": 5889 }, { "epoch": 0.292177191328935, "grad_norm": 6.175605297088623, "learning_rate": 8.126056640491533e-06, "loss": 0.2832, "step": 5890 }, { "epoch": 0.2922267969641351, "grad_norm": 4.5105438232421875, "learning_rate": 8.125442312719875e-06, "loss": 0.3677, "step": 5891 }, { "epoch": 0.2922764025993353, "grad_norm": 8.82105827331543, "learning_rate": 8.124827907498343e-06, "loss": 0.2807, "step": 5892 }, { "epoch": 0.2923260082345354, "grad_norm": 4.991082668304443, "learning_rate": 8.124213424842161e-06, "loss": 0.2597, "step": 5893 }, { "epoch": 0.2923756138697356, "grad_norm": 7.856477737426758, "learning_rate": 8.123598864766559e-06, "loss": 0.2927, "step": 5894 }, { "epoch": 0.2924252195049358, "grad_norm": 7.0322442054748535, "learning_rate": 8.122984227286763e-06, "loss": 0.3653, "step": 5895 }, { "epoch": 0.2924748251401359, "grad_norm": 4.836246490478516, "learning_rate": 8.122369512418005e-06, "loss": 0.3528, "step": 5896 }, { "epoch": 0.2925244307753361, "grad_norm": 3.9976885318756104, "learning_rate": 8.121754720175518e-06, "loss": 0.3398, "step": 5897 }, { "epoch": 0.29257403641053625, "grad_norm": 10.13711166381836, "learning_rate": 8.121139850574537e-06, "loss": 0.2999, "step": 5898 }, { "epoch": 0.2926236420457364, "grad_norm": 4.823659896850586, "learning_rate": 8.1205249036303e-06, "loss": 0.2128, "step": 5899 }, { "epoch": 0.29267324768093655, "grad_norm": 3.429877281188965, "learning_rate": 8.119909879358043e-06, "loss": 0.2639, "step": 5900 }, { "epoch": 0.29272285331613673, "grad_norm": 4.564887523651123, "learning_rate": 8.119294777773009e-06, "loss": 0.3684, "step": 5901 }, { "epoch": 0.29277245895133686, "grad_norm": 5.1973652839660645, "learning_rate": 8.118679598890437e-06, "loss": 0.2278, "step": 5902 }, { "epoch": 0.29282206458653703, "grad_norm": 5.997891902923584, "learning_rate": 8.118064342725576e-06, "loss": 0.2944, "step": 5903 }, { "epoch": 0.2928716702217372, "grad_norm": 6.50007438659668, "learning_rate": 8.117449009293668e-06, "loss": 0.2944, "step": 5904 }, { "epoch": 0.29292127585693734, "grad_norm": 12.914739608764648, "learning_rate": 8.116833598609966e-06, "loss": 0.3794, "step": 5905 }, { "epoch": 0.2929708814921375, "grad_norm": 4.238699913024902, "learning_rate": 8.116218110689715e-06, "loss": 0.2095, "step": 5906 }, { "epoch": 0.2930204871273377, "grad_norm": 5.724145412445068, "learning_rate": 8.115602545548171e-06, "loss": 0.2953, "step": 5907 }, { "epoch": 0.2930700927625378, "grad_norm": 11.12928581237793, "learning_rate": 8.114986903200587e-06, "loss": 0.2376, "step": 5908 }, { "epoch": 0.293119698397738, "grad_norm": 9.105833053588867, "learning_rate": 8.114371183662217e-06, "loss": 0.2941, "step": 5909 }, { "epoch": 0.2931693040329381, "grad_norm": 5.255617618560791, "learning_rate": 8.113755386948322e-06, "loss": 0.38, "step": 5910 }, { "epoch": 0.2932189096681383, "grad_norm": 12.346186637878418, "learning_rate": 8.11313951307416e-06, "loss": 0.3325, "step": 5911 }, { "epoch": 0.2932685153033385, "grad_norm": 7.2397918701171875, "learning_rate": 8.11252356205499e-06, "loss": 0.3508, "step": 5912 }, { "epoch": 0.2933181209385386, "grad_norm": 14.38294792175293, "learning_rate": 8.11190753390608e-06, "loss": 0.3585, "step": 5913 }, { "epoch": 0.2933677265737388, "grad_norm": 9.7249755859375, "learning_rate": 8.111291428642695e-06, "loss": 0.44, "step": 5914 }, { "epoch": 0.29341733220893895, "grad_norm": 22.150985717773438, "learning_rate": 8.1106752462801e-06, "loss": 0.4279, "step": 5915 }, { "epoch": 0.2934669378441391, "grad_norm": 6.476067543029785, "learning_rate": 8.110058986833564e-06, "loss": 0.2355, "step": 5916 }, { "epoch": 0.29351654347933925, "grad_norm": 7.685212135314941, "learning_rate": 8.109442650318362e-06, "loss": 0.4161, "step": 5917 }, { "epoch": 0.29356614911453943, "grad_norm": 4.686404228210449, "learning_rate": 8.108826236749763e-06, "loss": 0.3343, "step": 5918 }, { "epoch": 0.29361575474973955, "grad_norm": 5.122316837310791, "learning_rate": 8.108209746143044e-06, "loss": 0.2768, "step": 5919 }, { "epoch": 0.29366536038493973, "grad_norm": 7.984151840209961, "learning_rate": 8.10759317851348e-06, "loss": 0.3551, "step": 5920 }, { "epoch": 0.2937149660201399, "grad_norm": 9.755817413330078, "learning_rate": 8.106976533876352e-06, "loss": 0.2883, "step": 5921 }, { "epoch": 0.29376457165534003, "grad_norm": 29.38170623779297, "learning_rate": 8.106359812246941e-06, "loss": 0.2622, "step": 5922 }, { "epoch": 0.2938141772905402, "grad_norm": 5.719972133636475, "learning_rate": 8.105743013640527e-06, "loss": 0.263, "step": 5923 }, { "epoch": 0.2938637829257404, "grad_norm": 6.414551258087158, "learning_rate": 8.105126138072399e-06, "loss": 0.2584, "step": 5924 }, { "epoch": 0.2939133885609405, "grad_norm": 5.483943939208984, "learning_rate": 8.104509185557839e-06, "loss": 0.2858, "step": 5925 }, { "epoch": 0.2939629941961407, "grad_norm": 14.830251693725586, "learning_rate": 8.103892156112137e-06, "loss": 0.3869, "step": 5926 }, { "epoch": 0.2940125998313408, "grad_norm": 6.624782562255859, "learning_rate": 8.103275049750584e-06, "loss": 0.3294, "step": 5927 }, { "epoch": 0.294062205466541, "grad_norm": 8.216828346252441, "learning_rate": 8.102657866488471e-06, "loss": 0.3648, "step": 5928 }, { "epoch": 0.29411181110174117, "grad_norm": 6.753572463989258, "learning_rate": 8.102040606341092e-06, "loss": 0.2891, "step": 5929 }, { "epoch": 0.2941614167369413, "grad_norm": 6.6499247550964355, "learning_rate": 8.101423269323744e-06, "loss": 0.3868, "step": 5930 }, { "epoch": 0.2942110223721415, "grad_norm": 11.409613609313965, "learning_rate": 8.100805855451725e-06, "loss": 0.4241, "step": 5931 }, { "epoch": 0.29426062800734165, "grad_norm": 9.088883399963379, "learning_rate": 8.100188364740334e-06, "loss": 0.3467, "step": 5932 }, { "epoch": 0.2943102336425418, "grad_norm": 6.255326747894287, "learning_rate": 8.099570797204873e-06, "loss": 0.3939, "step": 5933 }, { "epoch": 0.29435983927774195, "grad_norm": 4.803329944610596, "learning_rate": 8.098953152860644e-06, "loss": 0.3195, "step": 5934 }, { "epoch": 0.29440944491294213, "grad_norm": 12.553382873535156, "learning_rate": 8.098335431722957e-06, "loss": 0.523, "step": 5935 }, { "epoch": 0.29445905054814225, "grad_norm": 16.01536750793457, "learning_rate": 8.097717633807114e-06, "loss": 0.3089, "step": 5936 }, { "epoch": 0.29450865618334243, "grad_norm": 18.09463119506836, "learning_rate": 8.097099759128428e-06, "loss": 0.476, "step": 5937 }, { "epoch": 0.2945582618185426, "grad_norm": 5.138153076171875, "learning_rate": 8.096481807702208e-06, "loss": 0.2716, "step": 5938 }, { "epoch": 0.29460786745374273, "grad_norm": 5.5588459968566895, "learning_rate": 8.09586377954377e-06, "loss": 0.3478, "step": 5939 }, { "epoch": 0.2946574730889429, "grad_norm": 8.50092887878418, "learning_rate": 8.095245674668425e-06, "loss": 0.3664, "step": 5940 }, { "epoch": 0.2947070787241431, "grad_norm": 6.382041931152344, "learning_rate": 8.094627493091493e-06, "loss": 0.3619, "step": 5941 }, { "epoch": 0.2947566843593432, "grad_norm": 6.780635356903076, "learning_rate": 8.094009234828293e-06, "loss": 0.2822, "step": 5942 }, { "epoch": 0.2948062899945434, "grad_norm": 10.736260414123535, "learning_rate": 8.093390899894143e-06, "loss": 0.2972, "step": 5943 }, { "epoch": 0.2948558956297435, "grad_norm": 3.767774820327759, "learning_rate": 8.092772488304369e-06, "loss": 0.2226, "step": 5944 }, { "epoch": 0.2949055012649437, "grad_norm": 8.244120597839355, "learning_rate": 8.092154000074291e-06, "loss": 0.3136, "step": 5945 }, { "epoch": 0.29495510690014387, "grad_norm": 5.370375633239746, "learning_rate": 8.09153543521924e-06, "loss": 0.3058, "step": 5946 }, { "epoch": 0.295004712535344, "grad_norm": 6.7926411628723145, "learning_rate": 8.090916793754542e-06, "loss": 0.3715, "step": 5947 }, { "epoch": 0.29505431817054417, "grad_norm": 10.336189270019531, "learning_rate": 8.090298075695529e-06, "loss": 0.3981, "step": 5948 }, { "epoch": 0.29510392380574435, "grad_norm": 8.137916564941406, "learning_rate": 8.08967928105753e-06, "loss": 0.4001, "step": 5949 }, { "epoch": 0.2951535294409445, "grad_norm": 7.991514205932617, "learning_rate": 8.08906040985588e-06, "loss": 0.3819, "step": 5950 }, { "epoch": 0.29520313507614465, "grad_norm": 7.46534538269043, "learning_rate": 8.088441462105918e-06, "loss": 0.2685, "step": 5951 }, { "epoch": 0.29525274071134483, "grad_norm": 6.731695652008057, "learning_rate": 8.087822437822978e-06, "loss": 0.3266, "step": 5952 }, { "epoch": 0.29530234634654495, "grad_norm": 7.093535900115967, "learning_rate": 8.087203337022403e-06, "loss": 0.3223, "step": 5953 }, { "epoch": 0.29535195198174513, "grad_norm": 7.378767967224121, "learning_rate": 8.08658415971953e-06, "loss": 0.3616, "step": 5954 }, { "epoch": 0.2954015576169453, "grad_norm": 6.297858715057373, "learning_rate": 8.085964905929708e-06, "loss": 0.3137, "step": 5955 }, { "epoch": 0.29545116325214543, "grad_norm": 5.617952823638916, "learning_rate": 8.085345575668277e-06, "loss": 0.3193, "step": 5956 }, { "epoch": 0.2955007688873456, "grad_norm": 11.025209426879883, "learning_rate": 8.08472616895059e-06, "loss": 0.3283, "step": 5957 }, { "epoch": 0.29555037452254573, "grad_norm": 4.528433322906494, "learning_rate": 8.08410668579199e-06, "loss": 0.2806, "step": 5958 }, { "epoch": 0.2955999801577459, "grad_norm": 9.42824935913086, "learning_rate": 8.083487126207834e-06, "loss": 0.403, "step": 5959 }, { "epoch": 0.2956495857929461, "grad_norm": 8.467203140258789, "learning_rate": 8.08286749021347e-06, "loss": 0.329, "step": 5960 }, { "epoch": 0.2956991914281462, "grad_norm": 7.673368453979492, "learning_rate": 8.082247777824257e-06, "loss": 0.2972, "step": 5961 }, { "epoch": 0.2957487970633464, "grad_norm": 4.122410774230957, "learning_rate": 8.081627989055549e-06, "loss": 0.2516, "step": 5962 }, { "epoch": 0.29579840269854657, "grad_norm": 7.585806369781494, "learning_rate": 8.081008123922705e-06, "loss": 0.3779, "step": 5963 }, { "epoch": 0.2958480083337467, "grad_norm": 7.02218770980835, "learning_rate": 8.080388182441087e-06, "loss": 0.2977, "step": 5964 }, { "epoch": 0.29589761396894687, "grad_norm": 3.933532238006592, "learning_rate": 8.079768164626057e-06, "loss": 0.1935, "step": 5965 }, { "epoch": 0.29594721960414705, "grad_norm": 4.815734386444092, "learning_rate": 8.079148070492975e-06, "loss": 0.3197, "step": 5966 }, { "epoch": 0.29599682523934717, "grad_norm": 7.727734565734863, "learning_rate": 8.078527900057214e-06, "loss": 0.325, "step": 5967 }, { "epoch": 0.29604643087454735, "grad_norm": 7.216281414031982, "learning_rate": 8.07790765333414e-06, "loss": 0.3943, "step": 5968 }, { "epoch": 0.29609603650974753, "grad_norm": 5.150207996368408, "learning_rate": 8.077287330339122e-06, "loss": 0.3505, "step": 5969 }, { "epoch": 0.29614564214494765, "grad_norm": 6.6794514656066895, "learning_rate": 8.076666931087529e-06, "loss": 0.2021, "step": 5970 }, { "epoch": 0.29619524778014783, "grad_norm": 5.016422748565674, "learning_rate": 8.076046455594739e-06, "loss": 0.3174, "step": 5971 }, { "epoch": 0.296244853415348, "grad_norm": 18.08977699279785, "learning_rate": 8.075425903876128e-06, "loss": 0.4841, "step": 5972 }, { "epoch": 0.29629445905054813, "grad_norm": 6.687600612640381, "learning_rate": 8.07480527594707e-06, "loss": 0.284, "step": 5973 }, { "epoch": 0.2963440646857483, "grad_norm": 5.098779201507568, "learning_rate": 8.074184571822946e-06, "loss": 0.3271, "step": 5974 }, { "epoch": 0.29639367032094843, "grad_norm": 7.308037281036377, "learning_rate": 8.07356379151914e-06, "loss": 0.2936, "step": 5975 }, { "epoch": 0.2964432759561486, "grad_norm": 5.051682949066162, "learning_rate": 8.072942935051031e-06, "loss": 0.1643, "step": 5976 }, { "epoch": 0.2964928815913488, "grad_norm": 5.016665935516357, "learning_rate": 8.072322002434008e-06, "loss": 0.2697, "step": 5977 }, { "epoch": 0.2965424872265489, "grad_norm": 5.029568672180176, "learning_rate": 8.071700993683454e-06, "loss": 0.3173, "step": 5978 }, { "epoch": 0.2965920928617491, "grad_norm": 5.567310333251953, "learning_rate": 8.07107990881476e-06, "loss": 0.2983, "step": 5979 }, { "epoch": 0.29664169849694927, "grad_norm": 3.8882839679718018, "learning_rate": 8.070458747843317e-06, "loss": 0.2052, "step": 5980 }, { "epoch": 0.2966913041321494, "grad_norm": 5.011893272399902, "learning_rate": 8.06983751078452e-06, "loss": 0.3212, "step": 5981 }, { "epoch": 0.29674090976734957, "grad_norm": 7.070488452911377, "learning_rate": 8.069216197653757e-06, "loss": 0.3061, "step": 5982 }, { "epoch": 0.29679051540254975, "grad_norm": 8.014636039733887, "learning_rate": 8.06859480846643e-06, "loss": 0.4557, "step": 5983 }, { "epoch": 0.29684012103774987, "grad_norm": 7.384474754333496, "learning_rate": 8.067973343237934e-06, "loss": 0.3553, "step": 5984 }, { "epoch": 0.29688972667295005, "grad_norm": 4.851027488708496, "learning_rate": 8.067351801983672e-06, "loss": 0.2585, "step": 5985 }, { "epoch": 0.29693933230815023, "grad_norm": 9.864754676818848, "learning_rate": 8.066730184719047e-06, "loss": 0.3509, "step": 5986 }, { "epoch": 0.29698893794335035, "grad_norm": 7.078875541687012, "learning_rate": 8.066108491459457e-06, "loss": 0.3127, "step": 5987 }, { "epoch": 0.29703854357855053, "grad_norm": 7.768344402313232, "learning_rate": 8.065486722220314e-06, "loss": 0.2765, "step": 5988 }, { "epoch": 0.2970881492137507, "grad_norm": 11.274609565734863, "learning_rate": 8.064864877017022e-06, "loss": 0.3257, "step": 5989 }, { "epoch": 0.29713775484895083, "grad_norm": 7.711249828338623, "learning_rate": 8.064242955864993e-06, "loss": 0.2912, "step": 5990 }, { "epoch": 0.297187360484151, "grad_norm": 5.8700947761535645, "learning_rate": 8.063620958779637e-06, "loss": 0.2942, "step": 5991 }, { "epoch": 0.29723696611935113, "grad_norm": 7.2818217277526855, "learning_rate": 8.062998885776368e-06, "loss": 0.3931, "step": 5992 }, { "epoch": 0.2972865717545513, "grad_norm": 4.338043212890625, "learning_rate": 8.0623767368706e-06, "loss": 0.2477, "step": 5993 }, { "epoch": 0.2973361773897515, "grad_norm": 14.094085693359375, "learning_rate": 8.061754512077753e-06, "loss": 0.3241, "step": 5994 }, { "epoch": 0.2973857830249516, "grad_norm": 12.80112361907959, "learning_rate": 8.061132211413243e-06, "loss": 0.407, "step": 5995 }, { "epoch": 0.2974353886601518, "grad_norm": 5.675551891326904, "learning_rate": 8.060509834892493e-06, "loss": 0.3153, "step": 5996 }, { "epoch": 0.29748499429535197, "grad_norm": 10.186182975769043, "learning_rate": 8.059887382530924e-06, "loss": 0.4293, "step": 5997 }, { "epoch": 0.2975345999305521, "grad_norm": 7.791384220123291, "learning_rate": 8.059264854343962e-06, "loss": 0.3289, "step": 5998 }, { "epoch": 0.29758420556575227, "grad_norm": 5.560298919677734, "learning_rate": 8.058642250347033e-06, "loss": 0.4049, "step": 5999 }, { "epoch": 0.29763381120095245, "grad_norm": 6.367534160614014, "learning_rate": 8.058019570555566e-06, "loss": 0.255, "step": 6000 }, { "epoch": 0.29768341683615257, "grad_norm": 5.68864107131958, "learning_rate": 8.057396814984989e-06, "loss": 0.3302, "step": 6001 }, { "epoch": 0.29773302247135275, "grad_norm": 5.100740909576416, "learning_rate": 8.056773983650737e-06, "loss": 0.384, "step": 6002 }, { "epoch": 0.2977826281065529, "grad_norm": 6.2049126625061035, "learning_rate": 8.056151076568243e-06, "loss": 0.3114, "step": 6003 }, { "epoch": 0.29783223374175305, "grad_norm": 7.165894508361816, "learning_rate": 8.055528093752942e-06, "loss": 0.3108, "step": 6004 }, { "epoch": 0.29788183937695323, "grad_norm": 6.0141119956970215, "learning_rate": 8.054905035220275e-06, "loss": 0.3666, "step": 6005 }, { "epoch": 0.2979314450121534, "grad_norm": 6.255248546600342, "learning_rate": 8.054281900985678e-06, "loss": 0.2637, "step": 6006 }, { "epoch": 0.29798105064735353, "grad_norm": 8.556069374084473, "learning_rate": 8.053658691064594e-06, "loss": 0.355, "step": 6007 }, { "epoch": 0.2980306562825537, "grad_norm": 5.820807933807373, "learning_rate": 8.053035405472464e-06, "loss": 0.2683, "step": 6008 }, { "epoch": 0.29808026191775383, "grad_norm": 5.785347938537598, "learning_rate": 8.05241204422474e-06, "loss": 0.3542, "step": 6009 }, { "epoch": 0.298129867552954, "grad_norm": 5.831637382507324, "learning_rate": 8.05178860733686e-06, "loss": 0.1538, "step": 6010 }, { "epoch": 0.2981794731881542, "grad_norm": 3.9680914878845215, "learning_rate": 8.05116509482428e-06, "loss": 0.1797, "step": 6011 }, { "epoch": 0.2982290788233543, "grad_norm": 4.619146347045898, "learning_rate": 8.050541506702452e-06, "loss": 0.3197, "step": 6012 }, { "epoch": 0.2982786844585545, "grad_norm": 5.243293285369873, "learning_rate": 8.049917842986822e-06, "loss": 0.3229, "step": 6013 }, { "epoch": 0.29832829009375467, "grad_norm": 5.012423038482666, "learning_rate": 8.049294103692848e-06, "loss": 0.3278, "step": 6014 }, { "epoch": 0.2983778957289548, "grad_norm": 4.265224933624268, "learning_rate": 8.048670288835988e-06, "loss": 0.298, "step": 6015 }, { "epoch": 0.29842750136415497, "grad_norm": 3.9087631702423096, "learning_rate": 8.048046398431699e-06, "loss": 0.1753, "step": 6016 }, { "epoch": 0.29847710699935515, "grad_norm": 6.279041290283203, "learning_rate": 8.04742243249544e-06, "loss": 0.2909, "step": 6017 }, { "epoch": 0.29852671263455527, "grad_norm": 7.559542179107666, "learning_rate": 8.046798391042676e-06, "loss": 0.3852, "step": 6018 }, { "epoch": 0.29857631826975545, "grad_norm": 12.287028312683105, "learning_rate": 8.04617427408887e-06, "loss": 0.3388, "step": 6019 }, { "epoch": 0.2986259239049556, "grad_norm": 4.536038398742676, "learning_rate": 8.045550081649486e-06, "loss": 0.2497, "step": 6020 }, { "epoch": 0.29867552954015575, "grad_norm": 6.094814300537109, "learning_rate": 8.044925813739995e-06, "loss": 0.281, "step": 6021 }, { "epoch": 0.2987251351753559, "grad_norm": 8.85406494140625, "learning_rate": 8.044301470375866e-06, "loss": 0.3185, "step": 6022 }, { "epoch": 0.2987747408105561, "grad_norm": 10.65716552734375, "learning_rate": 8.043677051572568e-06, "loss": 0.3359, "step": 6023 }, { "epoch": 0.29882434644575623, "grad_norm": 6.678856372833252, "learning_rate": 8.043052557345575e-06, "loss": 0.4067, "step": 6024 }, { "epoch": 0.2988739520809564, "grad_norm": 6.97715425491333, "learning_rate": 8.042427987710364e-06, "loss": 0.2542, "step": 6025 }, { "epoch": 0.29892355771615653, "grad_norm": 4.588672161102295, "learning_rate": 8.04180334268241e-06, "loss": 0.2881, "step": 6026 }, { "epoch": 0.2989731633513567, "grad_norm": 8.10418701171875, "learning_rate": 8.041178622277195e-06, "loss": 0.3558, "step": 6027 }, { "epoch": 0.2990227689865569, "grad_norm": 7.988320350646973, "learning_rate": 8.040553826510196e-06, "loss": 0.4582, "step": 6028 }, { "epoch": 0.299072374621757, "grad_norm": 9.235389709472656, "learning_rate": 8.0399289553969e-06, "loss": 0.2952, "step": 6029 }, { "epoch": 0.2991219802569572, "grad_norm": 8.294669151306152, "learning_rate": 8.039304008952788e-06, "loss": 0.3913, "step": 6030 }, { "epoch": 0.29917158589215737, "grad_norm": 7.35670280456543, "learning_rate": 8.038678987193349e-06, "loss": 0.2894, "step": 6031 }, { "epoch": 0.2992211915273575, "grad_norm": 9.296625137329102, "learning_rate": 8.03805389013407e-06, "loss": 0.2627, "step": 6032 }, { "epoch": 0.29927079716255767, "grad_norm": 7.438705921173096, "learning_rate": 8.037428717790441e-06, "loss": 0.3315, "step": 6033 }, { "epoch": 0.29932040279775785, "grad_norm": 5.575569152832031, "learning_rate": 8.036803470177954e-06, "loss": 0.2865, "step": 6034 }, { "epoch": 0.29937000843295797, "grad_norm": 5.852168560028076, "learning_rate": 8.036178147312102e-06, "loss": 0.2516, "step": 6035 }, { "epoch": 0.29941961406815815, "grad_norm": 5.695618152618408, "learning_rate": 8.035552749208384e-06, "loss": 0.3002, "step": 6036 }, { "epoch": 0.2994692197033583, "grad_norm": 9.761893272399902, "learning_rate": 8.034927275882296e-06, "loss": 0.3842, "step": 6037 }, { "epoch": 0.29951882533855845, "grad_norm": 7.887144088745117, "learning_rate": 8.034301727349336e-06, "loss": 0.3101, "step": 6038 }, { "epoch": 0.2995684309737586, "grad_norm": 8.669061660766602, "learning_rate": 8.033676103625008e-06, "loss": 0.285, "step": 6039 }, { "epoch": 0.2996180366089588, "grad_norm": 9.53614330291748, "learning_rate": 8.033050404724813e-06, "loss": 0.3787, "step": 6040 }, { "epoch": 0.2996676422441589, "grad_norm": 6.097080230712891, "learning_rate": 8.032424630664258e-06, "loss": 0.3407, "step": 6041 }, { "epoch": 0.2997172478793591, "grad_norm": 10.379948616027832, "learning_rate": 8.031798781458848e-06, "loss": 0.4174, "step": 6042 }, { "epoch": 0.29976685351455923, "grad_norm": 5.594551086425781, "learning_rate": 8.031172857124092e-06, "loss": 0.3704, "step": 6043 }, { "epoch": 0.2998164591497594, "grad_norm": 6.240160942077637, "learning_rate": 8.030546857675502e-06, "loss": 0.2925, "step": 6044 }, { "epoch": 0.2998660647849596, "grad_norm": 7.696261882781982, "learning_rate": 8.029920783128591e-06, "loss": 0.2836, "step": 6045 }, { "epoch": 0.2999156704201597, "grad_norm": 8.430956840515137, "learning_rate": 8.02929463349887e-06, "loss": 0.3003, "step": 6046 }, { "epoch": 0.2999652760553599, "grad_norm": 6.374470233917236, "learning_rate": 8.02866840880186e-06, "loss": 0.3002, "step": 6047 }, { "epoch": 0.30001488169056006, "grad_norm": 6.410780906677246, "learning_rate": 8.028042109053076e-06, "loss": 0.396, "step": 6048 }, { "epoch": 0.30001488169056006, "eval_loss": 0.3245352804660797, "eval_runtime": 35.5808, "eval_samples_per_second": 45.783, "eval_steps_per_second": 5.733, "step": 6048 }, { "epoch": 0.3000644873257602, "grad_norm": 5.9941935539245605, "learning_rate": 8.027415734268039e-06, "loss": 0.2306, "step": 6049 }, { "epoch": 0.30011409296096037, "grad_norm": 5.018630027770996, "learning_rate": 8.02678928446227e-06, "loss": 0.3172, "step": 6050 }, { "epoch": 0.30016369859616054, "grad_norm": 7.954620361328125, "learning_rate": 8.026162759651294e-06, "loss": 0.2883, "step": 6051 }, { "epoch": 0.30021330423136067, "grad_norm": 11.724872589111328, "learning_rate": 8.025536159850638e-06, "loss": 0.4074, "step": 6052 }, { "epoch": 0.30026290986656085, "grad_norm": 11.013463973999023, "learning_rate": 8.024909485075824e-06, "loss": 0.3363, "step": 6053 }, { "epoch": 0.300312515501761, "grad_norm": 12.755468368530273, "learning_rate": 8.024282735342386e-06, "loss": 0.274, "step": 6054 }, { "epoch": 0.30036212113696115, "grad_norm": 8.977307319641113, "learning_rate": 8.023655910665853e-06, "loss": 0.4012, "step": 6055 }, { "epoch": 0.3004117267721613, "grad_norm": 7.829868316650391, "learning_rate": 8.023029011061761e-06, "loss": 0.3934, "step": 6056 }, { "epoch": 0.3004613324073615, "grad_norm": 7.038140773773193, "learning_rate": 8.02240203654564e-06, "loss": 0.3583, "step": 6057 }, { "epoch": 0.3005109380425616, "grad_norm": 6.87877082824707, "learning_rate": 8.02177498713303e-06, "loss": 0.3389, "step": 6058 }, { "epoch": 0.3005605436777618, "grad_norm": 11.022873878479004, "learning_rate": 8.021147862839472e-06, "loss": 0.3669, "step": 6059 }, { "epoch": 0.3006101493129619, "grad_norm": 5.517581939697266, "learning_rate": 8.0205206636805e-06, "loss": 0.2589, "step": 6060 }, { "epoch": 0.3006597549481621, "grad_norm": 8.171650886535645, "learning_rate": 8.01989338967166e-06, "loss": 0.3607, "step": 6061 }, { "epoch": 0.3007093605833623, "grad_norm": 3.15089750289917, "learning_rate": 8.019266040828496e-06, "loss": 0.2345, "step": 6062 }, { "epoch": 0.3007589662185624, "grad_norm": 7.146161079406738, "learning_rate": 8.018638617166552e-06, "loss": 0.2764, "step": 6063 }, { "epoch": 0.3008085718537626, "grad_norm": 6.268551349639893, "learning_rate": 8.018011118701381e-06, "loss": 0.2271, "step": 6064 }, { "epoch": 0.30085817748896276, "grad_norm": 8.47658920288086, "learning_rate": 8.017383545448527e-06, "loss": 0.2778, "step": 6065 }, { "epoch": 0.3009077831241629, "grad_norm": 12.296753883361816, "learning_rate": 8.016755897423545e-06, "loss": 0.3817, "step": 6066 }, { "epoch": 0.30095738875936306, "grad_norm": 7.289828300476074, "learning_rate": 8.016128174641986e-06, "loss": 0.2122, "step": 6067 }, { "epoch": 0.30100699439456324, "grad_norm": 10.725020408630371, "learning_rate": 8.015500377119407e-06, "loss": 0.3465, "step": 6068 }, { "epoch": 0.30105660002976337, "grad_norm": 5.959719657897949, "learning_rate": 8.014872504871366e-06, "loss": 0.3036, "step": 6069 }, { "epoch": 0.30110620566496354, "grad_norm": 3.494642496109009, "learning_rate": 8.01424455791342e-06, "loss": 0.2278, "step": 6070 }, { "epoch": 0.3011558113001637, "grad_norm": 7.857613563537598, "learning_rate": 8.013616536261129e-06, "loss": 0.2747, "step": 6071 }, { "epoch": 0.30120541693536385, "grad_norm": 5.218995571136475, "learning_rate": 8.012988439930058e-06, "loss": 0.285, "step": 6072 }, { "epoch": 0.301255022570564, "grad_norm": 15.06037712097168, "learning_rate": 8.01236026893577e-06, "loss": 0.3776, "step": 6073 }, { "epoch": 0.3013046282057642, "grad_norm": 3.830716609954834, "learning_rate": 8.011732023293832e-06, "loss": 0.3202, "step": 6074 }, { "epoch": 0.3013542338409643, "grad_norm": 6.80632209777832, "learning_rate": 8.011103703019813e-06, "loss": 0.3315, "step": 6075 }, { "epoch": 0.3014038394761645, "grad_norm": 5.142208576202393, "learning_rate": 8.010475308129282e-06, "loss": 0.3262, "step": 6076 }, { "epoch": 0.3014534451113646, "grad_norm": 5.3121137619018555, "learning_rate": 8.009846838637809e-06, "loss": 0.2578, "step": 6077 }, { "epoch": 0.3015030507465648, "grad_norm": 8.230082511901855, "learning_rate": 8.009218294560971e-06, "loss": 0.3913, "step": 6078 }, { "epoch": 0.301552656381765, "grad_norm": 6.6432271003723145, "learning_rate": 8.008589675914345e-06, "loss": 0.367, "step": 6079 }, { "epoch": 0.3016022620169651, "grad_norm": 10.925169944763184, "learning_rate": 8.007960982713503e-06, "loss": 0.4121, "step": 6080 }, { "epoch": 0.3016518676521653, "grad_norm": 7.6471428871154785, "learning_rate": 8.00733221497403e-06, "loss": 0.3169, "step": 6081 }, { "epoch": 0.30170147328736546, "grad_norm": 9.431321144104004, "learning_rate": 8.006703372711499e-06, "loss": 0.3157, "step": 6082 }, { "epoch": 0.3017510789225656, "grad_norm": 14.085268020629883, "learning_rate": 8.006074455941503e-06, "loss": 0.3689, "step": 6083 }, { "epoch": 0.30180068455776576, "grad_norm": 10.962059020996094, "learning_rate": 8.005445464679621e-06, "loss": 0.3124, "step": 6084 }, { "epoch": 0.30185029019296594, "grad_norm": 7.024527549743652, "learning_rate": 8.004816398941441e-06, "loss": 0.2669, "step": 6085 }, { "epoch": 0.30189989582816606, "grad_norm": 31.574567794799805, "learning_rate": 8.004187258742551e-06, "loss": 0.424, "step": 6086 }, { "epoch": 0.30194950146336624, "grad_norm": 5.997720718383789, "learning_rate": 8.003558044098544e-06, "loss": 0.3091, "step": 6087 }, { "epoch": 0.3019991070985664, "grad_norm": 8.116313934326172, "learning_rate": 8.002928755025006e-06, "loss": 0.4131, "step": 6088 }, { "epoch": 0.30204871273376654, "grad_norm": 11.408917427062988, "learning_rate": 8.002299391537536e-06, "loss": 0.3414, "step": 6089 }, { "epoch": 0.3020983183689667, "grad_norm": 8.644296646118164, "learning_rate": 8.001669953651732e-06, "loss": 0.3637, "step": 6090 }, { "epoch": 0.30214792400416685, "grad_norm": 8.344582557678223, "learning_rate": 8.001040441383185e-06, "loss": 0.3663, "step": 6091 }, { "epoch": 0.302197529639367, "grad_norm": 7.684525489807129, "learning_rate": 8.000410854747501e-06, "loss": 0.3725, "step": 6092 }, { "epoch": 0.3022471352745672, "grad_norm": 12.336397171020508, "learning_rate": 7.999781193760278e-06, "loss": 0.3003, "step": 6093 }, { "epoch": 0.3022967409097673, "grad_norm": 3.2834537029266357, "learning_rate": 7.99915145843712e-06, "loss": 0.2853, "step": 6094 }, { "epoch": 0.3023463465449675, "grad_norm": 6.093806266784668, "learning_rate": 7.998521648793631e-06, "loss": 0.3609, "step": 6095 }, { "epoch": 0.3023959521801677, "grad_norm": 9.711617469787598, "learning_rate": 7.99789176484542e-06, "loss": 0.2402, "step": 6096 }, { "epoch": 0.3024455578153678, "grad_norm": 6.049691200256348, "learning_rate": 7.997261806608097e-06, "loss": 0.2825, "step": 6097 }, { "epoch": 0.302495163450568, "grad_norm": 11.842458724975586, "learning_rate": 7.996631774097268e-06, "loss": 0.2889, "step": 6098 }, { "epoch": 0.30254476908576816, "grad_norm": 5.548084259033203, "learning_rate": 7.99600166732855e-06, "loss": 0.3354, "step": 6099 }, { "epoch": 0.3025943747209683, "grad_norm": 7.07309103012085, "learning_rate": 7.995371486317553e-06, "loss": 0.324, "step": 6100 }, { "epoch": 0.30264398035616846, "grad_norm": 4.70692777633667, "learning_rate": 7.994741231079897e-06, "loss": 0.2138, "step": 6101 }, { "epoch": 0.30269358599136864, "grad_norm": 18.737651824951172, "learning_rate": 7.9941109016312e-06, "loss": 0.3262, "step": 6102 }, { "epoch": 0.30274319162656876, "grad_norm": 7.748540878295898, "learning_rate": 7.993480497987078e-06, "loss": 0.3734, "step": 6103 }, { "epoch": 0.30279279726176894, "grad_norm": 5.790707111358643, "learning_rate": 7.992850020163156e-06, "loss": 0.3419, "step": 6104 }, { "epoch": 0.3028424028969691, "grad_norm": 9.949363708496094, "learning_rate": 7.992219468175057e-06, "loss": 0.4476, "step": 6105 }, { "epoch": 0.30289200853216924, "grad_norm": 9.621177673339844, "learning_rate": 7.991588842038408e-06, "loss": 0.3719, "step": 6106 }, { "epoch": 0.3029416141673694, "grad_norm": 6.612340450286865, "learning_rate": 7.990958141768831e-06, "loss": 0.3049, "step": 6107 }, { "epoch": 0.30299121980256954, "grad_norm": 6.050815105438232, "learning_rate": 7.99032736738196e-06, "loss": 0.2983, "step": 6108 }, { "epoch": 0.3030408254377697, "grad_norm": 4.1773810386657715, "learning_rate": 7.989696518893423e-06, "loss": 0.1758, "step": 6109 }, { "epoch": 0.3030904310729699, "grad_norm": 4.740350723266602, "learning_rate": 7.989065596318858e-06, "loss": 0.2863, "step": 6110 }, { "epoch": 0.30314003670817, "grad_norm": 8.328258514404297, "learning_rate": 7.98843459967389e-06, "loss": 0.3073, "step": 6111 }, { "epoch": 0.3031896423433702, "grad_norm": 5.088786602020264, "learning_rate": 7.987803528974163e-06, "loss": 0.2694, "step": 6112 }, { "epoch": 0.3032392479785704, "grad_norm": 5.854559421539307, "learning_rate": 7.987172384235313e-06, "loss": 0.3214, "step": 6113 }, { "epoch": 0.3032888536137705, "grad_norm": 3.006333589553833, "learning_rate": 7.98654116547298e-06, "loss": 0.16, "step": 6114 }, { "epoch": 0.3033384592489707, "grad_norm": 10.007630348205566, "learning_rate": 7.985909872702806e-06, "loss": 0.4054, "step": 6115 }, { "epoch": 0.30338806488417086, "grad_norm": 6.195342540740967, "learning_rate": 7.985278505940434e-06, "loss": 0.3334, "step": 6116 }, { "epoch": 0.303437670519371, "grad_norm": 11.65875244140625, "learning_rate": 7.984647065201511e-06, "loss": 0.3937, "step": 6117 }, { "epoch": 0.30348727615457116, "grad_norm": 9.686326026916504, "learning_rate": 7.984015550501684e-06, "loss": 0.4207, "step": 6118 }, { "epoch": 0.30353688178977134, "grad_norm": 7.638311862945557, "learning_rate": 7.9833839618566e-06, "loss": 0.3814, "step": 6119 }, { "epoch": 0.30358648742497146, "grad_norm": 16.866811752319336, "learning_rate": 7.982752299281915e-06, "loss": 0.3325, "step": 6120 }, { "epoch": 0.30363609306017164, "grad_norm": 19.435447692871094, "learning_rate": 7.982120562793277e-06, "loss": 0.3435, "step": 6121 }, { "epoch": 0.3036856986953718, "grad_norm": 6.951542377471924, "learning_rate": 7.981488752406342e-06, "loss": 0.266, "step": 6122 }, { "epoch": 0.30373530433057194, "grad_norm": 7.074606895446777, "learning_rate": 7.980856868136768e-06, "loss": 0.3392, "step": 6123 }, { "epoch": 0.3037849099657721, "grad_norm": 3.6872055530548096, "learning_rate": 7.980224910000212e-06, "loss": 0.2845, "step": 6124 }, { "epoch": 0.30383451560097224, "grad_norm": 7.650872230529785, "learning_rate": 7.979592878012334e-06, "loss": 0.359, "step": 6125 }, { "epoch": 0.3038841212361724, "grad_norm": 5.85947322845459, "learning_rate": 7.978960772188797e-06, "loss": 0.21, "step": 6126 }, { "epoch": 0.3039337268713726, "grad_norm": 12.495823860168457, "learning_rate": 7.978328592545265e-06, "loss": 0.5197, "step": 6127 }, { "epoch": 0.3039833325065727, "grad_norm": 6.802512168884277, "learning_rate": 7.977696339097403e-06, "loss": 0.2978, "step": 6128 }, { "epoch": 0.3040329381417729, "grad_norm": 8.359135627746582, "learning_rate": 7.977064011860879e-06, "loss": 0.3785, "step": 6129 }, { "epoch": 0.3040825437769731, "grad_norm": 5.705048084259033, "learning_rate": 7.976431610851361e-06, "loss": 0.2931, "step": 6130 }, { "epoch": 0.3041321494121732, "grad_norm": 5.686145782470703, "learning_rate": 7.975799136084523e-06, "loss": 0.2397, "step": 6131 }, { "epoch": 0.3041817550473734, "grad_norm": 5.678277015686035, "learning_rate": 7.975166587576035e-06, "loss": 0.31, "step": 6132 }, { "epoch": 0.30423136068257356, "grad_norm": 4.825985908508301, "learning_rate": 7.974533965341573e-06, "loss": 0.2883, "step": 6133 }, { "epoch": 0.3042809663177737, "grad_norm": 7.608597755432129, "learning_rate": 7.973901269396816e-06, "loss": 0.3627, "step": 6134 }, { "epoch": 0.30433057195297386, "grad_norm": 11.36336898803711, "learning_rate": 7.973268499757438e-06, "loss": 0.3789, "step": 6135 }, { "epoch": 0.30438017758817404, "grad_norm": 8.575992584228516, "learning_rate": 7.972635656439122e-06, "loss": 0.3212, "step": 6136 }, { "epoch": 0.30442978322337416, "grad_norm": 8.686038970947266, "learning_rate": 7.972002739457552e-06, "loss": 0.3514, "step": 6137 }, { "epoch": 0.30447938885857434, "grad_norm": 7.192084312438965, "learning_rate": 7.971369748828406e-06, "loss": 0.2974, "step": 6138 }, { "epoch": 0.3045289944937745, "grad_norm": 5.290398597717285, "learning_rate": 7.970736684567374e-06, "loss": 0.259, "step": 6139 }, { "epoch": 0.30457860012897464, "grad_norm": 5.644108772277832, "learning_rate": 7.970103546690146e-06, "loss": 0.3081, "step": 6140 }, { "epoch": 0.3046282057641748, "grad_norm": 5.475761890411377, "learning_rate": 7.969470335212408e-06, "loss": 0.2303, "step": 6141 }, { "epoch": 0.30467781139937494, "grad_norm": 4.759490013122559, "learning_rate": 7.96883705014985e-06, "loss": 0.2648, "step": 6142 }, { "epoch": 0.3047274170345751, "grad_norm": 8.92180061340332, "learning_rate": 7.968203691518168e-06, "loss": 0.3144, "step": 6143 }, { "epoch": 0.3047770226697753, "grad_norm": 4.593949317932129, "learning_rate": 7.967570259333058e-06, "loss": 0.2623, "step": 6144 }, { "epoch": 0.3048266283049754, "grad_norm": 7.072360992431641, "learning_rate": 7.96693675361021e-06, "loss": 0.3446, "step": 6145 }, { "epoch": 0.3048762339401756, "grad_norm": 10.124698638916016, "learning_rate": 7.96630317436533e-06, "loss": 0.2992, "step": 6146 }, { "epoch": 0.3049258395753758, "grad_norm": 8.518061637878418, "learning_rate": 7.965669521614116e-06, "loss": 0.3227, "step": 6147 }, { "epoch": 0.3049754452105759, "grad_norm": 11.98958683013916, "learning_rate": 7.965035795372269e-06, "loss": 0.3593, "step": 6148 }, { "epoch": 0.3050250508457761, "grad_norm": 6.780615329742432, "learning_rate": 7.964401995655493e-06, "loss": 0.291, "step": 6149 }, { "epoch": 0.30507465648097626, "grad_norm": 11.42901611328125, "learning_rate": 7.963768122479494e-06, "loss": 0.3226, "step": 6150 }, { "epoch": 0.3051242621161764, "grad_norm": 8.947308540344238, "learning_rate": 7.963134175859984e-06, "loss": 0.2629, "step": 6151 }, { "epoch": 0.30517386775137656, "grad_norm": 5.269867420196533, "learning_rate": 7.962500155812665e-06, "loss": 0.2687, "step": 6152 }, { "epoch": 0.30522347338657674, "grad_norm": 14.630555152893066, "learning_rate": 7.961866062353252e-06, "loss": 0.3433, "step": 6153 }, { "epoch": 0.30527307902177686, "grad_norm": 8.212530136108398, "learning_rate": 7.96123189549746e-06, "loss": 0.2432, "step": 6154 }, { "epoch": 0.30532268465697704, "grad_norm": 7.377742767333984, "learning_rate": 7.960597655261001e-06, "loss": 0.3453, "step": 6155 }, { "epoch": 0.3053722902921772, "grad_norm": 7.691253185272217, "learning_rate": 7.959963341659593e-06, "loss": 0.3286, "step": 6156 }, { "epoch": 0.30542189592737734, "grad_norm": 5.638608932495117, "learning_rate": 7.959328954708956e-06, "loss": 0.3165, "step": 6157 }, { "epoch": 0.3054715015625775, "grad_norm": 8.514432907104492, "learning_rate": 7.958694494424808e-06, "loss": 0.3237, "step": 6158 }, { "epoch": 0.30552110719777764, "grad_norm": 4.297780990600586, "learning_rate": 7.958059960822871e-06, "loss": 0.2595, "step": 6159 }, { "epoch": 0.3055707128329778, "grad_norm": 4.078060626983643, "learning_rate": 7.957425353918871e-06, "loss": 0.2882, "step": 6160 }, { "epoch": 0.305620318468178, "grad_norm": 6.811436176300049, "learning_rate": 7.956790673728533e-06, "loss": 0.2653, "step": 6161 }, { "epoch": 0.3056699241033781, "grad_norm": 4.270350933074951, "learning_rate": 7.956155920267584e-06, "loss": 0.2574, "step": 6162 }, { "epoch": 0.3057195297385783, "grad_norm": 5.703972339630127, "learning_rate": 7.955521093551754e-06, "loss": 0.2755, "step": 6163 }, { "epoch": 0.3057691353737785, "grad_norm": 5.049365520477295, "learning_rate": 7.954886193596775e-06, "loss": 0.2938, "step": 6164 }, { "epoch": 0.3058187410089786, "grad_norm": 8.388873100280762, "learning_rate": 7.954251220418382e-06, "loss": 0.3745, "step": 6165 }, { "epoch": 0.3058683466441788, "grad_norm": 10.96402645111084, "learning_rate": 7.953616174032304e-06, "loss": 0.2664, "step": 6166 }, { "epoch": 0.30591795227937896, "grad_norm": 3.7100861072540283, "learning_rate": 7.95298105445428e-06, "loss": 0.1882, "step": 6167 }, { "epoch": 0.3059675579145791, "grad_norm": 4.334278106689453, "learning_rate": 7.952345861700053e-06, "loss": 0.2813, "step": 6168 }, { "epoch": 0.30601716354977926, "grad_norm": 9.98488712310791, "learning_rate": 7.951710595785358e-06, "loss": 0.3695, "step": 6169 }, { "epoch": 0.30606676918497944, "grad_norm": 17.760671615600586, "learning_rate": 7.95107525672594e-06, "loss": 0.309, "step": 6170 }, { "epoch": 0.30611637482017956, "grad_norm": 9.79922103881836, "learning_rate": 7.950439844537543e-06, "loss": 0.317, "step": 6171 }, { "epoch": 0.30616598045537974, "grad_norm": 13.948829650878906, "learning_rate": 7.949804359235911e-06, "loss": 0.4717, "step": 6172 }, { "epoch": 0.3062155860905799, "grad_norm": 7.646422386169434, "learning_rate": 7.949168800836793e-06, "loss": 0.3234, "step": 6173 }, { "epoch": 0.30626519172578004, "grad_norm": 8.386237144470215, "learning_rate": 7.948533169355937e-06, "loss": 0.2745, "step": 6174 }, { "epoch": 0.3063147973609802, "grad_norm": 7.183841228485107, "learning_rate": 7.947897464809095e-06, "loss": 0.2121, "step": 6175 }, { "epoch": 0.30636440299618034, "grad_norm": 15.762542724609375, "learning_rate": 7.947261687212022e-06, "loss": 0.4632, "step": 6176 }, { "epoch": 0.3064140086313805, "grad_norm": 6.070952415466309, "learning_rate": 7.946625836580472e-06, "loss": 0.3381, "step": 6177 }, { "epoch": 0.3064636142665807, "grad_norm": 6.020915985107422, "learning_rate": 7.9459899129302e-06, "loss": 0.3738, "step": 6178 }, { "epoch": 0.3065132199017808, "grad_norm": 10.913460731506348, "learning_rate": 7.945353916276964e-06, "loss": 0.3941, "step": 6179 }, { "epoch": 0.306562825536981, "grad_norm": 5.918532848358154, "learning_rate": 7.944717846636526e-06, "loss": 0.295, "step": 6180 }, { "epoch": 0.3066124311721812, "grad_norm": 7.783341407775879, "learning_rate": 7.944081704024648e-06, "loss": 0.2761, "step": 6181 }, { "epoch": 0.3066620368073813, "grad_norm": 10.465291023254395, "learning_rate": 7.943445488457094e-06, "loss": 0.2503, "step": 6182 }, { "epoch": 0.3067116424425815, "grad_norm": 6.443753719329834, "learning_rate": 7.942809199949631e-06, "loss": 0.3271, "step": 6183 }, { "epoch": 0.30676124807778166, "grad_norm": 10.3128080368042, "learning_rate": 7.942172838518022e-06, "loss": 0.3479, "step": 6184 }, { "epoch": 0.3068108537129818, "grad_norm": 11.305307388305664, "learning_rate": 7.94153640417804e-06, "loss": 0.5412, "step": 6185 }, { "epoch": 0.30686045934818196, "grad_norm": 7.0937395095825195, "learning_rate": 7.940899896945456e-06, "loss": 0.2773, "step": 6186 }, { "epoch": 0.30691006498338214, "grad_norm": 4.7389020919799805, "learning_rate": 7.940263316836041e-06, "loss": 0.33, "step": 6187 }, { "epoch": 0.30695967061858226, "grad_norm": 4.129010200500488, "learning_rate": 7.939626663865571e-06, "loss": 0.2347, "step": 6188 }, { "epoch": 0.30700927625378244, "grad_norm": 11.686692237854004, "learning_rate": 7.938989938049824e-06, "loss": 0.3487, "step": 6189 }, { "epoch": 0.3070588818889826, "grad_norm": 4.6464667320251465, "learning_rate": 7.938353139404576e-06, "loss": 0.2963, "step": 6190 }, { "epoch": 0.30710848752418274, "grad_norm": 7.2545976638793945, "learning_rate": 7.937716267945609e-06, "loss": 0.3728, "step": 6191 }, { "epoch": 0.3071580931593829, "grad_norm": 5.676454544067383, "learning_rate": 7.937079323688704e-06, "loss": 0.3331, "step": 6192 }, { "epoch": 0.30720769879458304, "grad_norm": 6.807911396026611, "learning_rate": 7.936442306649643e-06, "loss": 0.3412, "step": 6193 }, { "epoch": 0.3072573044297832, "grad_norm": 7.15321683883667, "learning_rate": 7.935805216844214e-06, "loss": 0.3153, "step": 6194 }, { "epoch": 0.3073069100649834, "grad_norm": 5.0783843994140625, "learning_rate": 7.935168054288204e-06, "loss": 0.2233, "step": 6195 }, { "epoch": 0.3073565157001835, "grad_norm": 9.635205268859863, "learning_rate": 7.9345308189974e-06, "loss": 0.4141, "step": 6196 }, { "epoch": 0.3074061213353837, "grad_norm": 16.160627365112305, "learning_rate": 7.933893510987599e-06, "loss": 0.4874, "step": 6197 }, { "epoch": 0.3074557269705839, "grad_norm": 10.61918830871582, "learning_rate": 7.933256130274588e-06, "loss": 0.4118, "step": 6198 }, { "epoch": 0.307505332605784, "grad_norm": 10.096668243408203, "learning_rate": 7.932618676874162e-06, "loss": 0.3882, "step": 6199 }, { "epoch": 0.3075549382409842, "grad_norm": 11.364688873291016, "learning_rate": 7.93198115080212e-06, "loss": 0.3909, "step": 6200 }, { "epoch": 0.30760454387618436, "grad_norm": 6.76970100402832, "learning_rate": 7.931343552074258e-06, "loss": 0.3376, "step": 6201 }, { "epoch": 0.3076541495113845, "grad_norm": 7.472651481628418, "learning_rate": 7.930705880706377e-06, "loss": 0.3541, "step": 6202 }, { "epoch": 0.30770375514658466, "grad_norm": 4.3530449867248535, "learning_rate": 7.930068136714277e-06, "loss": 0.2312, "step": 6203 }, { "epoch": 0.30775336078178483, "grad_norm": 25.95637321472168, "learning_rate": 7.929430320113764e-06, "loss": 0.499, "step": 6204 }, { "epoch": 0.30780296641698496, "grad_norm": 8.976306915283203, "learning_rate": 7.928792430920644e-06, "loss": 0.3176, "step": 6205 }, { "epoch": 0.30785257205218514, "grad_norm": 4.782773971557617, "learning_rate": 7.928154469150722e-06, "loss": 0.1789, "step": 6206 }, { "epoch": 0.3079021776873853, "grad_norm": 6.982358455657959, "learning_rate": 7.927516434819807e-06, "loss": 0.389, "step": 6207 }, { "epoch": 0.30795178332258544, "grad_norm": 10.537243843078613, "learning_rate": 7.926878327943712e-06, "loss": 0.3757, "step": 6208 }, { "epoch": 0.3080013889577856, "grad_norm": 7.106256484985352, "learning_rate": 7.926240148538247e-06, "loss": 0.3104, "step": 6209 }, { "epoch": 0.30805099459298574, "grad_norm": 4.756839752197266, "learning_rate": 7.925601896619229e-06, "loss": 0.3397, "step": 6210 }, { "epoch": 0.3081006002281859, "grad_norm": 6.316346645355225, "learning_rate": 7.92496357220247e-06, "loss": 0.2752, "step": 6211 }, { "epoch": 0.3081502058633861, "grad_norm": 6.751911163330078, "learning_rate": 7.92432517530379e-06, "loss": 0.228, "step": 6212 }, { "epoch": 0.3081998114985862, "grad_norm": 9.22756290435791, "learning_rate": 7.923686705939014e-06, "loss": 0.3663, "step": 6213 }, { "epoch": 0.3082494171337864, "grad_norm": 10.556167602539062, "learning_rate": 7.923048164123955e-06, "loss": 0.4676, "step": 6214 }, { "epoch": 0.3082990227689866, "grad_norm": 8.897321701049805, "learning_rate": 7.922409549874442e-06, "loss": 0.3203, "step": 6215 }, { "epoch": 0.3083486284041867, "grad_norm": 4.715305328369141, "learning_rate": 7.921770863206297e-06, "loss": 0.2743, "step": 6216 }, { "epoch": 0.3083982340393869, "grad_norm": 7.377171039581299, "learning_rate": 7.921132104135349e-06, "loss": 0.2714, "step": 6217 }, { "epoch": 0.30844783967458705, "grad_norm": 7.8962907791137695, "learning_rate": 7.920493272677425e-06, "loss": 0.2768, "step": 6218 }, { "epoch": 0.3084974453097872, "grad_norm": 6.251745223999023, "learning_rate": 7.919854368848358e-06, "loss": 0.2894, "step": 6219 }, { "epoch": 0.30854705094498736, "grad_norm": 6.703230857849121, "learning_rate": 7.91921539266398e-06, "loss": 0.3758, "step": 6220 }, { "epoch": 0.30859665658018753, "grad_norm": 8.25305461883545, "learning_rate": 7.918576344140123e-06, "loss": 0.3208, "step": 6221 }, { "epoch": 0.30864626221538766, "grad_norm": 6.785236835479736, "learning_rate": 7.917937223292622e-06, "loss": 0.3519, "step": 6222 }, { "epoch": 0.30869586785058784, "grad_norm": 7.797677516937256, "learning_rate": 7.91729803013732e-06, "loss": 0.3418, "step": 6223 }, { "epoch": 0.30874547348578796, "grad_norm": 5.554804801940918, "learning_rate": 7.91665876469005e-06, "loss": 0.2757, "step": 6224 }, { "epoch": 0.30879507912098814, "grad_norm": 6.426848411560059, "learning_rate": 7.91601942696666e-06, "loss": 0.2371, "step": 6225 }, { "epoch": 0.3088446847561883, "grad_norm": 6.445448875427246, "learning_rate": 7.915380016982986e-06, "loss": 0.2428, "step": 6226 }, { "epoch": 0.30889429039138844, "grad_norm": 6.056314468383789, "learning_rate": 7.914740534754878e-06, "loss": 0.2645, "step": 6227 }, { "epoch": 0.3089438960265886, "grad_norm": 6.0481085777282715, "learning_rate": 7.914100980298182e-06, "loss": 0.3093, "step": 6228 }, { "epoch": 0.3089935016617888, "grad_norm": 3.8277523517608643, "learning_rate": 7.913461353628744e-06, "loss": 0.2237, "step": 6229 }, { "epoch": 0.3090431072969889, "grad_norm": 5.247878074645996, "learning_rate": 7.912821654762417e-06, "loss": 0.2902, "step": 6230 }, { "epoch": 0.3090927129321891, "grad_norm": 6.898838520050049, "learning_rate": 7.91218188371505e-06, "loss": 0.2935, "step": 6231 }, { "epoch": 0.3091423185673893, "grad_norm": 4.964415073394775, "learning_rate": 7.9115420405025e-06, "loss": 0.3468, "step": 6232 }, { "epoch": 0.3091919242025894, "grad_norm": 6.25162935256958, "learning_rate": 7.91090212514062e-06, "loss": 0.3693, "step": 6233 }, { "epoch": 0.3092415298377896, "grad_norm": 4.480035781860352, "learning_rate": 7.91026213764527e-06, "loss": 0.2775, "step": 6234 }, { "epoch": 0.30929113547298975, "grad_norm": 6.42820930480957, "learning_rate": 7.909622078032307e-06, "loss": 0.3395, "step": 6235 }, { "epoch": 0.3093407411081899, "grad_norm": 7.495540618896484, "learning_rate": 7.908981946317594e-06, "loss": 0.3479, "step": 6236 }, { "epoch": 0.30939034674339005, "grad_norm": 4.09984827041626, "learning_rate": 7.908341742516992e-06, "loss": 0.2358, "step": 6237 }, { "epoch": 0.30943995237859023, "grad_norm": 8.789385795593262, "learning_rate": 7.907701466646365e-06, "loss": 0.2884, "step": 6238 }, { "epoch": 0.30948955801379036, "grad_norm": 10.38425064086914, "learning_rate": 7.90706111872158e-06, "loss": 0.4203, "step": 6239 }, { "epoch": 0.30953916364899053, "grad_norm": 9.523193359375, "learning_rate": 7.906420698758508e-06, "loss": 0.4134, "step": 6240 }, { "epoch": 0.30958876928419066, "grad_norm": 11.42223072052002, "learning_rate": 7.905780206773013e-06, "loss": 0.3669, "step": 6241 }, { "epoch": 0.30963837491939084, "grad_norm": 5.919314384460449, "learning_rate": 7.905139642780972e-06, "loss": 0.3013, "step": 6242 }, { "epoch": 0.309687980554591, "grad_norm": 9.85573959350586, "learning_rate": 7.904499006798257e-06, "loss": 0.2607, "step": 6243 }, { "epoch": 0.30973758618979114, "grad_norm": 5.457704067230225, "learning_rate": 7.903858298840741e-06, "loss": 0.4037, "step": 6244 }, { "epoch": 0.3097871918249913, "grad_norm": 5.808368682861328, "learning_rate": 7.903217518924304e-06, "loss": 0.2698, "step": 6245 }, { "epoch": 0.3098367974601915, "grad_norm": 10.24229621887207, "learning_rate": 7.902576667064825e-06, "loss": 0.3707, "step": 6246 }, { "epoch": 0.3098864030953916, "grad_norm": 11.765593528747559, "learning_rate": 7.901935743278181e-06, "loss": 0.3452, "step": 6247 }, { "epoch": 0.3099360087305918, "grad_norm": 7.631901741027832, "learning_rate": 7.901294747580256e-06, "loss": 0.3384, "step": 6248 }, { "epoch": 0.309985614365792, "grad_norm": 6.9193902015686035, "learning_rate": 7.900653679986937e-06, "loss": 0.3035, "step": 6249 }, { "epoch": 0.3100352200009921, "grad_norm": 6.419456958770752, "learning_rate": 7.900012540514106e-06, "loss": 0.2957, "step": 6250 }, { "epoch": 0.3100848256361923, "grad_norm": 6.829810619354248, "learning_rate": 7.899371329177654e-06, "loss": 0.2863, "step": 6251 }, { "epoch": 0.31013443127139245, "grad_norm": 14.68520450592041, "learning_rate": 7.898730045993469e-06, "loss": 0.4823, "step": 6252 }, { "epoch": 0.3101840369065926, "grad_norm": 22.032054901123047, "learning_rate": 7.89808869097744e-06, "loss": 0.362, "step": 6253 }, { "epoch": 0.31023364254179275, "grad_norm": 5.031023025512695, "learning_rate": 7.897447264145463e-06, "loss": 0.2936, "step": 6254 }, { "epoch": 0.31028324817699293, "grad_norm": 8.685579299926758, "learning_rate": 7.896805765513435e-06, "loss": 0.381, "step": 6255 }, { "epoch": 0.31033285381219305, "grad_norm": 8.291555404663086, "learning_rate": 7.896164195097247e-06, "loss": 0.4145, "step": 6256 }, { "epoch": 0.31038245944739323, "grad_norm": 3.8572983741760254, "learning_rate": 7.8955225529128e-06, "loss": 0.2279, "step": 6257 }, { "epoch": 0.31043206508259336, "grad_norm": 7.776270389556885, "learning_rate": 7.894880838975995e-06, "loss": 0.3924, "step": 6258 }, { "epoch": 0.31048167071779353, "grad_norm": 4.96118688583374, "learning_rate": 7.894239053302733e-06, "loss": 0.31, "step": 6259 }, { "epoch": 0.3105312763529937, "grad_norm": 7.309938430786133, "learning_rate": 7.893597195908919e-06, "loss": 0.3364, "step": 6260 }, { "epoch": 0.31058088198819384, "grad_norm": 11.211051940917969, "learning_rate": 7.892955266810456e-06, "loss": 0.2702, "step": 6261 }, { "epoch": 0.310630487623394, "grad_norm": 7.759780406951904, "learning_rate": 7.892313266023255e-06, "loss": 0.3038, "step": 6262 }, { "epoch": 0.3106800932585942, "grad_norm": 4.813044548034668, "learning_rate": 7.891671193563222e-06, "loss": 0.3152, "step": 6263 }, { "epoch": 0.3107296988937943, "grad_norm": 5.1391682624816895, "learning_rate": 7.891029049446269e-06, "loss": 0.2305, "step": 6264 }, { "epoch": 0.3107793045289945, "grad_norm": 6.091246604919434, "learning_rate": 7.890386833688307e-06, "loss": 0.2891, "step": 6265 }, { "epoch": 0.31082891016419467, "grad_norm": 4.850683212280273, "learning_rate": 7.889744546305252e-06, "loss": 0.2857, "step": 6266 }, { "epoch": 0.3108785157993948, "grad_norm": 7.835826873779297, "learning_rate": 7.889102187313021e-06, "loss": 0.3223, "step": 6267 }, { "epoch": 0.310928121434595, "grad_norm": 7.1018548011779785, "learning_rate": 7.888459756727531e-06, "loss": 0.3192, "step": 6268 }, { "epoch": 0.31097772706979515, "grad_norm": 5.740875720977783, "learning_rate": 7.887817254564701e-06, "loss": 0.2617, "step": 6269 }, { "epoch": 0.3110273327049953, "grad_norm": 8.681203842163086, "learning_rate": 7.887174680840453e-06, "loss": 0.3156, "step": 6270 }, { "epoch": 0.31107693834019545, "grad_norm": 5.627064228057861, "learning_rate": 7.88653203557071e-06, "loss": 0.4174, "step": 6271 }, { "epoch": 0.31112654397539563, "grad_norm": 8.840415000915527, "learning_rate": 7.8858893187714e-06, "loss": 0.2837, "step": 6272 }, { "epoch": 0.31117614961059575, "grad_norm": 6.544886112213135, "learning_rate": 7.885246530458445e-06, "loss": 0.2217, "step": 6273 }, { "epoch": 0.31122575524579593, "grad_norm": 17.677719116210938, "learning_rate": 7.884603670647776e-06, "loss": 0.4032, "step": 6274 }, { "epoch": 0.31127536088099605, "grad_norm": 4.176745891571045, "learning_rate": 7.883960739355324e-06, "loss": 0.2789, "step": 6275 }, { "epoch": 0.31132496651619623, "grad_norm": 12.838163375854492, "learning_rate": 7.88331773659702e-06, "loss": 0.3598, "step": 6276 }, { "epoch": 0.3113745721513964, "grad_norm": 11.360301971435547, "learning_rate": 7.882674662388798e-06, "loss": 0.2158, "step": 6277 }, { "epoch": 0.31142417778659653, "grad_norm": 7.397754669189453, "learning_rate": 7.882031516746593e-06, "loss": 0.3112, "step": 6278 }, { "epoch": 0.3114737834217967, "grad_norm": 8.41458511352539, "learning_rate": 7.881388299686346e-06, "loss": 0.3707, "step": 6279 }, { "epoch": 0.3115233890569969, "grad_norm": 6.1465582847595215, "learning_rate": 7.880745011223992e-06, "loss": 0.3064, "step": 6280 }, { "epoch": 0.311572994692197, "grad_norm": 5.098341464996338, "learning_rate": 7.88010165137547e-06, "loss": 0.2698, "step": 6281 }, { "epoch": 0.3116226003273972, "grad_norm": 6.092422008514404, "learning_rate": 7.879458220156733e-06, "loss": 0.2918, "step": 6282 }, { "epoch": 0.31167220596259737, "grad_norm": 5.159546375274658, "learning_rate": 7.878814717583714e-06, "loss": 0.2076, "step": 6283 }, { "epoch": 0.3117218115977975, "grad_norm": 23.82529067993164, "learning_rate": 7.878171143672366e-06, "loss": 0.3972, "step": 6284 }, { "epoch": 0.31177141723299767, "grad_norm": 8.091723442077637, "learning_rate": 7.877527498438633e-06, "loss": 0.3451, "step": 6285 }, { "epoch": 0.31182102286819785, "grad_norm": 5.761369705200195, "learning_rate": 7.87688378189847e-06, "loss": 0.2741, "step": 6286 }, { "epoch": 0.311870628503398, "grad_norm": 7.775960922241211, "learning_rate": 7.876239994067824e-06, "loss": 0.3398, "step": 6287 }, { "epoch": 0.31192023413859815, "grad_norm": 12.871475219726562, "learning_rate": 7.875596134962649e-06, "loss": 0.3775, "step": 6288 }, { "epoch": 0.31196983977379833, "grad_norm": 8.211723327636719, "learning_rate": 7.874952204598903e-06, "loss": 0.3047, "step": 6289 }, { "epoch": 0.31201944540899845, "grad_norm": 7.134515285491943, "learning_rate": 7.87430820299254e-06, "loss": 0.2448, "step": 6290 }, { "epoch": 0.31206905104419863, "grad_norm": 19.369932174682617, "learning_rate": 7.873664130159518e-06, "loss": 0.5226, "step": 6291 }, { "epoch": 0.31211865667939875, "grad_norm": 5.593133449554443, "learning_rate": 7.8730199861158e-06, "loss": 0.2607, "step": 6292 }, { "epoch": 0.31216826231459893, "grad_norm": 13.946444511413574, "learning_rate": 7.872375770877348e-06, "loss": 0.2952, "step": 6293 }, { "epoch": 0.3122178679497991, "grad_norm": 8.661599159240723, "learning_rate": 7.871731484460124e-06, "loss": 0.2677, "step": 6294 }, { "epoch": 0.31226747358499923, "grad_norm": 9.16037368774414, "learning_rate": 7.871087126880095e-06, "loss": 0.3521, "step": 6295 }, { "epoch": 0.3123170792201994, "grad_norm": 7.4602274894714355, "learning_rate": 7.870442698153229e-06, "loss": 0.2566, "step": 6296 }, { "epoch": 0.3123666848553996, "grad_norm": 5.44557523727417, "learning_rate": 7.869798198295495e-06, "loss": 0.3728, "step": 6297 }, { "epoch": 0.3124162904905997, "grad_norm": 11.23982048034668, "learning_rate": 7.869153627322862e-06, "loss": 0.2491, "step": 6298 }, { "epoch": 0.3124658961257999, "grad_norm": 5.436179161071777, "learning_rate": 7.868508985251305e-06, "loss": 0.3309, "step": 6299 }, { "epoch": 0.31251550176100007, "grad_norm": 4.179923057556152, "learning_rate": 7.867864272096796e-06, "loss": 0.2679, "step": 6300 }, { "epoch": 0.3125651073962002, "grad_norm": 10.480948448181152, "learning_rate": 7.867219487875316e-06, "loss": 0.357, "step": 6301 }, { "epoch": 0.31261471303140037, "grad_norm": 5.204622268676758, "learning_rate": 7.866574632602839e-06, "loss": 0.3355, "step": 6302 }, { "epoch": 0.31266431866660055, "grad_norm": 15.866844177246094, "learning_rate": 7.865929706295345e-06, "loss": 0.4013, "step": 6303 }, { "epoch": 0.31271392430180067, "grad_norm": 8.43070125579834, "learning_rate": 7.865284708968818e-06, "loss": 0.365, "step": 6304 }, { "epoch": 0.31276352993700085, "grad_norm": 13.494219779968262, "learning_rate": 7.864639640639237e-06, "loss": 0.3303, "step": 6305 }, { "epoch": 0.31281313557220103, "grad_norm": 8.173759460449219, "learning_rate": 7.863994501322594e-06, "loss": 0.3214, "step": 6306 }, { "epoch": 0.31286274120740115, "grad_norm": 7.597445964813232, "learning_rate": 7.86334929103487e-06, "loss": 0.3658, "step": 6307 }, { "epoch": 0.31291234684260133, "grad_norm": 13.091497421264648, "learning_rate": 7.862704009792054e-06, "loss": 0.3792, "step": 6308 }, { "epoch": 0.31296195247780145, "grad_norm": 7.068939208984375, "learning_rate": 7.862058657610138e-06, "loss": 0.2655, "step": 6309 }, { "epoch": 0.31301155811300163, "grad_norm": 11.238765716552734, "learning_rate": 7.861413234505115e-06, "loss": 0.4014, "step": 6310 }, { "epoch": 0.3130611637482018, "grad_norm": 3.9431378841400146, "learning_rate": 7.860767740492978e-06, "loss": 0.3094, "step": 6311 }, { "epoch": 0.31311076938340193, "grad_norm": 4.57913875579834, "learning_rate": 7.86012217558972e-06, "loss": 0.2423, "step": 6312 }, { "epoch": 0.3131603750186021, "grad_norm": 7.287594318389893, "learning_rate": 7.859476539811344e-06, "loss": 0.2548, "step": 6313 }, { "epoch": 0.3132099806538023, "grad_norm": 11.072185516357422, "learning_rate": 7.858830833173842e-06, "loss": 0.3441, "step": 6314 }, { "epoch": 0.3132595862890024, "grad_norm": 19.788604736328125, "learning_rate": 7.858185055693222e-06, "loss": 0.3642, "step": 6315 }, { "epoch": 0.3133091919242026, "grad_norm": 9.349621772766113, "learning_rate": 7.857539207385482e-06, "loss": 0.2739, "step": 6316 }, { "epoch": 0.31335879755940277, "grad_norm": 7.00372314453125, "learning_rate": 7.856893288266629e-06, "loss": 0.2963, "step": 6317 }, { "epoch": 0.3134084031946029, "grad_norm": 8.521873474121094, "learning_rate": 7.856247298352666e-06, "loss": 0.2976, "step": 6318 }, { "epoch": 0.31345800882980307, "grad_norm": 15.709465980529785, "learning_rate": 7.855601237659604e-06, "loss": 0.3625, "step": 6319 }, { "epoch": 0.31350761446500325, "grad_norm": 7.847692012786865, "learning_rate": 7.854955106203451e-06, "loss": 0.2564, "step": 6320 }, { "epoch": 0.31355722010020337, "grad_norm": 13.908472061157227, "learning_rate": 7.85430890400022e-06, "loss": 0.4074, "step": 6321 }, { "epoch": 0.31360682573540355, "grad_norm": 6.871955394744873, "learning_rate": 7.853662631065922e-06, "loss": 0.2877, "step": 6322 }, { "epoch": 0.3136564313706037, "grad_norm": 9.039634704589844, "learning_rate": 7.853016287416575e-06, "loss": 0.4436, "step": 6323 }, { "epoch": 0.31370603700580385, "grad_norm": 6.27807092666626, "learning_rate": 7.852369873068194e-06, "loss": 0.293, "step": 6324 }, { "epoch": 0.31375564264100403, "grad_norm": 8.399821281433105, "learning_rate": 7.851723388036794e-06, "loss": 0.3369, "step": 6325 }, { "epoch": 0.31380524827620415, "grad_norm": 6.3848395347595215, "learning_rate": 7.8510768323384e-06, "loss": 0.2997, "step": 6326 }, { "epoch": 0.31385485391140433, "grad_norm": 4.778131008148193, "learning_rate": 7.850430205989035e-06, "loss": 0.3147, "step": 6327 }, { "epoch": 0.3139044595466045, "grad_norm": 4.77881383895874, "learning_rate": 7.849783509004716e-06, "loss": 0.2771, "step": 6328 }, { "epoch": 0.31395406518180463, "grad_norm": 6.266303539276123, "learning_rate": 7.849136741401475e-06, "loss": 0.2964, "step": 6329 }, { "epoch": 0.3140036708170048, "grad_norm": 7.494488716125488, "learning_rate": 7.848489903195337e-06, "loss": 0.3816, "step": 6330 }, { "epoch": 0.314053276452205, "grad_norm": 5.929967880249023, "learning_rate": 7.847842994402331e-06, "loss": 0.3135, "step": 6331 }, { "epoch": 0.3141028820874051, "grad_norm": 7.152928829193115, "learning_rate": 7.847196015038485e-06, "loss": 0.1487, "step": 6332 }, { "epoch": 0.3141524877226053, "grad_norm": 4.707422256469727, "learning_rate": 7.846548965119836e-06, "loss": 0.302, "step": 6333 }, { "epoch": 0.31420209335780547, "grad_norm": 8.542401313781738, "learning_rate": 7.845901844662415e-06, "loss": 0.4256, "step": 6334 }, { "epoch": 0.3142516989930056, "grad_norm": 6.861444473266602, "learning_rate": 7.84525465368226e-06, "loss": 0.3307, "step": 6335 }, { "epoch": 0.31430130462820577, "grad_norm": 12.488029479980469, "learning_rate": 7.844607392195406e-06, "loss": 0.4328, "step": 6336 }, { "epoch": 0.31435091026340595, "grad_norm": 7.295916557312012, "learning_rate": 7.843960060217895e-06, "loss": 0.3807, "step": 6337 }, { "epoch": 0.31440051589860607, "grad_norm": 6.072237014770508, "learning_rate": 7.843312657765767e-06, "loss": 0.2753, "step": 6338 }, { "epoch": 0.31445012153380625, "grad_norm": 7.261124134063721, "learning_rate": 7.842665184855066e-06, "loss": 0.3616, "step": 6339 }, { "epoch": 0.3144997271690064, "grad_norm": 15.448396682739258, "learning_rate": 7.842017641501834e-06, "loss": 0.615, "step": 6340 }, { "epoch": 0.31454933280420655, "grad_norm": 8.021350860595703, "learning_rate": 7.841370027722122e-06, "loss": 0.2555, "step": 6341 }, { "epoch": 0.31459893843940673, "grad_norm": 6.933772087097168, "learning_rate": 7.840722343531972e-06, "loss": 0.3249, "step": 6342 }, { "epoch": 0.31464854407460685, "grad_norm": 15.454512596130371, "learning_rate": 7.84007458894744e-06, "loss": 0.3993, "step": 6343 }, { "epoch": 0.31469814970980703, "grad_norm": 9.45832347869873, "learning_rate": 7.839426763984575e-06, "loss": 0.3744, "step": 6344 }, { "epoch": 0.3147477553450072, "grad_norm": 7.5923967361450195, "learning_rate": 7.83877886865943e-06, "loss": 0.282, "step": 6345 }, { "epoch": 0.31479736098020733, "grad_norm": 5.760879993438721, "learning_rate": 7.83813090298806e-06, "loss": 0.2954, "step": 6346 }, { "epoch": 0.3148469666154075, "grad_norm": 8.019405364990234, "learning_rate": 7.837482866986521e-06, "loss": 0.3227, "step": 6347 }, { "epoch": 0.3148965722506077, "grad_norm": 7.213224411010742, "learning_rate": 7.836834760670875e-06, "loss": 0.2316, "step": 6348 }, { "epoch": 0.3149461778858078, "grad_norm": 6.775620460510254, "learning_rate": 7.836186584057179e-06, "loss": 0.335, "step": 6349 }, { "epoch": 0.314995783521008, "grad_norm": 4.218844413757324, "learning_rate": 7.835538337161497e-06, "loss": 0.2198, "step": 6350 }, { "epoch": 0.31504538915620817, "grad_norm": 8.506552696228027, "learning_rate": 7.834890019999892e-06, "loss": 0.3389, "step": 6351 }, { "epoch": 0.3150949947914083, "grad_norm": 6.667000770568848, "learning_rate": 7.834241632588431e-06, "loss": 0.2204, "step": 6352 }, { "epoch": 0.31514460042660847, "grad_norm": 4.314852714538574, "learning_rate": 7.833593174943181e-06, "loss": 0.2316, "step": 6353 }, { "epoch": 0.31519420606180865, "grad_norm": 6.257678031921387, "learning_rate": 7.83294464708021e-06, "loss": 0.3557, "step": 6354 }, { "epoch": 0.31524381169700877, "grad_norm": 7.583454608917236, "learning_rate": 7.832296049015586e-06, "loss": 0.3454, "step": 6355 }, { "epoch": 0.31529341733220895, "grad_norm": 4.000058174133301, "learning_rate": 7.83164738076539e-06, "loss": 0.2626, "step": 6356 }, { "epoch": 0.31534302296740907, "grad_norm": 8.186145782470703, "learning_rate": 7.830998642345687e-06, "loss": 0.3447, "step": 6357 }, { "epoch": 0.31539262860260925, "grad_norm": 104.25868225097656, "learning_rate": 7.830349833772558e-06, "loss": 0.4355, "step": 6358 }, { "epoch": 0.3154422342378094, "grad_norm": 10.058536529541016, "learning_rate": 7.82970095506208e-06, "loss": 0.4018, "step": 6359 }, { "epoch": 0.31549183987300955, "grad_norm": 18.29463005065918, "learning_rate": 7.829052006230334e-06, "loss": 0.4102, "step": 6360 }, { "epoch": 0.31554144550820973, "grad_norm": 14.205148696899414, "learning_rate": 7.828402987293397e-06, "loss": 0.3307, "step": 6361 }, { "epoch": 0.3155910511434099, "grad_norm": 6.165006637573242, "learning_rate": 7.827753898267354e-06, "loss": 0.3636, "step": 6362 }, { "epoch": 0.31564065677861003, "grad_norm": 7.516364097595215, "learning_rate": 7.827104739168291e-06, "loss": 0.305, "step": 6363 }, { "epoch": 0.3156902624138102, "grad_norm": 9.974604606628418, "learning_rate": 7.826455510012296e-06, "loss": 0.2963, "step": 6364 }, { "epoch": 0.3157398680490104, "grad_norm": 10.147313117980957, "learning_rate": 7.825806210815454e-06, "loss": 0.4125, "step": 6365 }, { "epoch": 0.3157894736842105, "grad_norm": 6.428226947784424, "learning_rate": 7.825156841593854e-06, "loss": 0.3127, "step": 6366 }, { "epoch": 0.3158390793194107, "grad_norm": 4.773719310760498, "learning_rate": 7.824507402363591e-06, "loss": 0.3559, "step": 6367 }, { "epoch": 0.31588868495461087, "grad_norm": 6.173552989959717, "learning_rate": 7.823857893140756e-06, "loss": 0.222, "step": 6368 }, { "epoch": 0.315938290589811, "grad_norm": 5.951358318328857, "learning_rate": 7.823208313941446e-06, "loss": 0.2625, "step": 6369 }, { "epoch": 0.31598789622501117, "grad_norm": 7.7724432945251465, "learning_rate": 7.822558664781756e-06, "loss": 0.3405, "step": 6370 }, { "epoch": 0.31603750186021135, "grad_norm": 4.409429550170898, "learning_rate": 7.821908945677787e-06, "loss": 0.2664, "step": 6371 }, { "epoch": 0.31608710749541147, "grad_norm": 9.251694679260254, "learning_rate": 7.821259156645637e-06, "loss": 0.3277, "step": 6372 }, { "epoch": 0.31613671313061165, "grad_norm": 7.840397357940674, "learning_rate": 7.820609297701409e-06, "loss": 0.3508, "step": 6373 }, { "epoch": 0.31618631876581177, "grad_norm": 9.628864288330078, "learning_rate": 7.819959368861206e-06, "loss": 0.3177, "step": 6374 }, { "epoch": 0.31623592440101195, "grad_norm": 7.429715633392334, "learning_rate": 7.819309370141134e-06, "loss": 0.3546, "step": 6375 }, { "epoch": 0.3162855300362121, "grad_norm": 9.608504295349121, "learning_rate": 7.818659301557302e-06, "loss": 0.2863, "step": 6376 }, { "epoch": 0.31633513567141225, "grad_norm": 9.845430374145508, "learning_rate": 7.818009163125816e-06, "loss": 0.3252, "step": 6377 }, { "epoch": 0.3163847413066124, "grad_norm": 5.673381805419922, "learning_rate": 7.81735895486279e-06, "loss": 0.3047, "step": 6378 }, { "epoch": 0.3164343469418126, "grad_norm": 6.277688503265381, "learning_rate": 7.816708676784332e-06, "loss": 0.3977, "step": 6379 }, { "epoch": 0.31648395257701273, "grad_norm": 8.79881477355957, "learning_rate": 7.816058328906562e-06, "loss": 0.2847, "step": 6380 }, { "epoch": 0.3165335582122129, "grad_norm": 5.127819538116455, "learning_rate": 7.815407911245592e-06, "loss": 0.2903, "step": 6381 }, { "epoch": 0.3165831638474131, "grad_norm": 5.459056854248047, "learning_rate": 7.814757423817539e-06, "loss": 0.361, "step": 6382 }, { "epoch": 0.3166327694826132, "grad_norm": 11.786589622497559, "learning_rate": 7.814106866638524e-06, "loss": 0.443, "step": 6383 }, { "epoch": 0.3166823751178134, "grad_norm": 7.396628379821777, "learning_rate": 7.813456239724668e-06, "loss": 0.4023, "step": 6384 }, { "epoch": 0.31673198075301356, "grad_norm": 12.18025016784668, "learning_rate": 7.812805543092094e-06, "loss": 0.378, "step": 6385 }, { "epoch": 0.3167815863882137, "grad_norm": 6.918927192687988, "learning_rate": 7.812154776756926e-06, "loss": 0.3771, "step": 6386 }, { "epoch": 0.31683119202341387, "grad_norm": 8.104296684265137, "learning_rate": 7.811503940735292e-06, "loss": 0.3023, "step": 6387 }, { "epoch": 0.31688079765861404, "grad_norm": 7.723027229309082, "learning_rate": 7.810853035043318e-06, "loss": 0.2702, "step": 6388 }, { "epoch": 0.31693040329381417, "grad_norm": 4.023336887359619, "learning_rate": 7.810202059697133e-06, "loss": 0.2713, "step": 6389 }, { "epoch": 0.31698000892901435, "grad_norm": 6.978924751281738, "learning_rate": 7.80955101471287e-06, "loss": 0.3144, "step": 6390 }, { "epoch": 0.31702961456421447, "grad_norm": 5.385634899139404, "learning_rate": 7.808899900106664e-06, "loss": 0.3549, "step": 6391 }, { "epoch": 0.31707922019941465, "grad_norm": 12.602327346801758, "learning_rate": 7.808248715894645e-06, "loss": 0.439, "step": 6392 }, { "epoch": 0.3171288258346148, "grad_norm": 3.810076951980591, "learning_rate": 7.807597462092952e-06, "loss": 0.2262, "step": 6393 }, { "epoch": 0.31717843146981495, "grad_norm": 6.013265132904053, "learning_rate": 7.806946138717727e-06, "loss": 0.3377, "step": 6394 }, { "epoch": 0.3172280371050151, "grad_norm": 7.903205394744873, "learning_rate": 7.806294745785106e-06, "loss": 0.3153, "step": 6395 }, { "epoch": 0.3172776427402153, "grad_norm": 6.054721355438232, "learning_rate": 7.805643283311231e-06, "loss": 0.305, "step": 6396 }, { "epoch": 0.3173272483754154, "grad_norm": 4.017788887023926, "learning_rate": 7.804991751312247e-06, "loss": 0.2965, "step": 6397 }, { "epoch": 0.3173768540106156, "grad_norm": 5.722713947296143, "learning_rate": 7.804340149804299e-06, "loss": 0.3007, "step": 6398 }, { "epoch": 0.3174264596458158, "grad_norm": 6.487730503082275, "learning_rate": 7.803688478803532e-06, "loss": 0.3065, "step": 6399 }, { "epoch": 0.3174760652810159, "grad_norm": 6.449167251586914, "learning_rate": 7.803036738326097e-06, "loss": 0.3307, "step": 6400 }, { "epoch": 0.3175256709162161, "grad_norm": 9.888346672058105, "learning_rate": 7.802384928388144e-06, "loss": 0.2725, "step": 6401 }, { "epoch": 0.31757527655141626, "grad_norm": 6.862818241119385, "learning_rate": 7.801733049005825e-06, "loss": 0.3039, "step": 6402 }, { "epoch": 0.3176248821866164, "grad_norm": 9.04959774017334, "learning_rate": 7.801081100195294e-06, "loss": 0.4701, "step": 6403 }, { "epoch": 0.31767448782181656, "grad_norm": 5.5462775230407715, "learning_rate": 7.800429081972705e-06, "loss": 0.3195, "step": 6404 }, { "epoch": 0.31772409345701674, "grad_norm": 7.3814377784729, "learning_rate": 7.799776994354217e-06, "loss": 0.359, "step": 6405 }, { "epoch": 0.31777369909221687, "grad_norm": 5.582953929901123, "learning_rate": 7.799124837355989e-06, "loss": 0.3009, "step": 6406 }, { "epoch": 0.31782330472741704, "grad_norm": 14.137614250183105, "learning_rate": 7.79847261099418e-06, "loss": 0.4533, "step": 6407 }, { "epoch": 0.31787291036261717, "grad_norm": 5.788240909576416, "learning_rate": 7.797820315284957e-06, "loss": 0.2639, "step": 6408 }, { "epoch": 0.31792251599781735, "grad_norm": 7.97137975692749, "learning_rate": 7.797167950244478e-06, "loss": 0.3465, "step": 6409 }, { "epoch": 0.3179721216330175, "grad_norm": 8.172945976257324, "learning_rate": 7.796515515888911e-06, "loss": 0.2844, "step": 6410 }, { "epoch": 0.31802172726821765, "grad_norm": 6.864522933959961, "learning_rate": 7.795863012234428e-06, "loss": 0.3507, "step": 6411 }, { "epoch": 0.3180713329034178, "grad_norm": 7.9424028396606445, "learning_rate": 7.795210439297192e-06, "loss": 0.2457, "step": 6412 }, { "epoch": 0.318120938538618, "grad_norm": 8.421050071716309, "learning_rate": 7.794557797093378e-06, "loss": 0.3891, "step": 6413 }, { "epoch": 0.3181705441738181, "grad_norm": 11.924736976623535, "learning_rate": 7.79390508563916e-06, "loss": 0.3882, "step": 6414 }, { "epoch": 0.3182201498090183, "grad_norm": 6.277160167694092, "learning_rate": 7.793252304950707e-06, "loss": 0.4266, "step": 6415 }, { "epoch": 0.3182697554442185, "grad_norm": 7.118072509765625, "learning_rate": 7.792599455044198e-06, "loss": 0.2195, "step": 6416 }, { "epoch": 0.3183193610794186, "grad_norm": 8.68127155303955, "learning_rate": 7.791946535935815e-06, "loss": 0.3443, "step": 6417 }, { "epoch": 0.3183689667146188, "grad_norm": 5.524832725524902, "learning_rate": 7.791293547641729e-06, "loss": 0.2332, "step": 6418 }, { "epoch": 0.31841857234981896, "grad_norm": 17.217885971069336, "learning_rate": 7.79064049017813e-06, "loss": 0.396, "step": 6419 }, { "epoch": 0.3184681779850191, "grad_norm": 5.20712947845459, "learning_rate": 7.789987363561196e-06, "loss": 0.2092, "step": 6420 }, { "epoch": 0.31851778362021926, "grad_norm": 16.876014709472656, "learning_rate": 7.789334167807112e-06, "loss": 0.386, "step": 6421 }, { "epoch": 0.31856738925541944, "grad_norm": 6.950552940368652, "learning_rate": 7.788680902932067e-06, "loss": 0.2507, "step": 6422 }, { "epoch": 0.31861699489061956, "grad_norm": 8.431093215942383, "learning_rate": 7.788027568952247e-06, "loss": 0.2747, "step": 6423 }, { "epoch": 0.31866660052581974, "grad_norm": 5.4920525550842285, "learning_rate": 7.787374165883844e-06, "loss": 0.2382, "step": 6424 }, { "epoch": 0.31871620616101987, "grad_norm": 6.114250183105469, "learning_rate": 7.786720693743048e-06, "loss": 0.3399, "step": 6425 }, { "epoch": 0.31876581179622004, "grad_norm": 9.624238967895508, "learning_rate": 7.786067152546051e-06, "loss": 0.4176, "step": 6426 }, { "epoch": 0.3188154174314202, "grad_norm": 28.232271194458008, "learning_rate": 7.78541354230905e-06, "loss": 0.4021, "step": 6427 }, { "epoch": 0.31886502306662035, "grad_norm": 6.948848247528076, "learning_rate": 7.784759863048243e-06, "loss": 0.355, "step": 6428 }, { "epoch": 0.3189146287018205, "grad_norm": 3.972184896469116, "learning_rate": 7.784106114779826e-06, "loss": 0.24, "step": 6429 }, { "epoch": 0.3189642343370207, "grad_norm": 6.2776079177856445, "learning_rate": 7.78345229752e-06, "loss": 0.3174, "step": 6430 }, { "epoch": 0.3190138399722208, "grad_norm": 10.512795448303223, "learning_rate": 7.782798411284968e-06, "loss": 0.4418, "step": 6431 }, { "epoch": 0.319063445607421, "grad_norm": 5.657670974731445, "learning_rate": 7.782144456090931e-06, "loss": 0.2577, "step": 6432 }, { "epoch": 0.3191130512426212, "grad_norm": 8.824331283569336, "learning_rate": 7.781490431954097e-06, "loss": 0.3546, "step": 6433 }, { "epoch": 0.3191626568778213, "grad_norm": 8.585709571838379, "learning_rate": 7.78083633889067e-06, "loss": 0.3454, "step": 6434 }, { "epoch": 0.3192122625130215, "grad_norm": 5.974283218383789, "learning_rate": 7.780182176916864e-06, "loss": 0.2159, "step": 6435 }, { "epoch": 0.31926186814822166, "grad_norm": 12.666109085083008, "learning_rate": 7.779527946048884e-06, "loss": 0.5175, "step": 6436 }, { "epoch": 0.3193114737834218, "grad_norm": 8.219266891479492, "learning_rate": 7.778873646302943e-06, "loss": 0.5096, "step": 6437 }, { "epoch": 0.31936107941862196, "grad_norm": 6.974595546722412, "learning_rate": 7.778219277695259e-06, "loss": 0.2568, "step": 6438 }, { "epoch": 0.31941068505382214, "grad_norm": 8.815500259399414, "learning_rate": 7.777564840242042e-06, "loss": 0.3189, "step": 6439 }, { "epoch": 0.31946029068902226, "grad_norm": 5.818253993988037, "learning_rate": 7.776910333959511e-06, "loss": 0.3629, "step": 6440 }, { "epoch": 0.31950989632422244, "grad_norm": 5.590034008026123, "learning_rate": 7.776255758863889e-06, "loss": 0.3439, "step": 6441 }, { "epoch": 0.31955950195942256, "grad_norm": 5.489120006561279, "learning_rate": 7.775601114971392e-06, "loss": 0.3702, "step": 6442 }, { "epoch": 0.31960910759462274, "grad_norm": 10.744552612304688, "learning_rate": 7.774946402298244e-06, "loss": 0.382, "step": 6443 }, { "epoch": 0.3196587132298229, "grad_norm": 4.469836235046387, "learning_rate": 7.77429162086067e-06, "loss": 0.178, "step": 6444 }, { "epoch": 0.31970831886502304, "grad_norm": 4.740589141845703, "learning_rate": 7.773636770674894e-06, "loss": 0.2306, "step": 6445 }, { "epoch": 0.3197579245002232, "grad_norm": 9.316146850585938, "learning_rate": 7.772981851757143e-06, "loss": 0.5659, "step": 6446 }, { "epoch": 0.3198075301354234, "grad_norm": 5.198523998260498, "learning_rate": 7.772326864123646e-06, "loss": 0.3953, "step": 6447 }, { "epoch": 0.3198571357706235, "grad_norm": 6.390565395355225, "learning_rate": 7.771671807790637e-06, "loss": 0.2723, "step": 6448 }, { "epoch": 0.3199067414058237, "grad_norm": 3.9165894985198975, "learning_rate": 7.771016682774349e-06, "loss": 0.2791, "step": 6449 }, { "epoch": 0.3199563470410239, "grad_norm": 5.498579502105713, "learning_rate": 7.770361489091011e-06, "loss": 0.2945, "step": 6450 }, { "epoch": 0.320005952676224, "grad_norm": 4.617426872253418, "learning_rate": 7.769706226756864e-06, "loss": 0.3128, "step": 6451 }, { "epoch": 0.3200555583114242, "grad_norm": 6.956923007965088, "learning_rate": 7.769050895788141e-06, "loss": 0.3603, "step": 6452 }, { "epoch": 0.32010516394662436, "grad_norm": 8.055607795715332, "learning_rate": 7.768395496201087e-06, "loss": 0.298, "step": 6453 }, { "epoch": 0.3201547695818245, "grad_norm": 7.854771614074707, "learning_rate": 7.767740028011942e-06, "loss": 0.3479, "step": 6454 }, { "epoch": 0.32020437521702466, "grad_norm": 4.905089378356934, "learning_rate": 7.767084491236943e-06, "loss": 0.2502, "step": 6455 }, { "epoch": 0.32025398085222484, "grad_norm": 6.219354152679443, "learning_rate": 7.766428885892341e-06, "loss": 0.3412, "step": 6456 }, { "epoch": 0.32030358648742496, "grad_norm": 11.693902015686035, "learning_rate": 7.765773211994378e-06, "loss": 0.4719, "step": 6457 }, { "epoch": 0.32035319212262514, "grad_norm": 6.8735432624816895, "learning_rate": 7.765117469559305e-06, "loss": 0.3287, "step": 6458 }, { "epoch": 0.32040279775782526, "grad_norm": 7.018953323364258, "learning_rate": 7.76446165860337e-06, "loss": 0.3741, "step": 6459 }, { "epoch": 0.32045240339302544, "grad_norm": 6.758845806121826, "learning_rate": 7.763805779142825e-06, "loss": 0.3931, "step": 6460 }, { "epoch": 0.3205020090282256, "grad_norm": 11.414790153503418, "learning_rate": 7.763149831193923e-06, "loss": 0.3146, "step": 6461 }, { "epoch": 0.32055161466342574, "grad_norm": 3.9755709171295166, "learning_rate": 7.762493814772918e-06, "loss": 0.3207, "step": 6462 }, { "epoch": 0.3206012202986259, "grad_norm": 6.546868801116943, "learning_rate": 7.761837729896065e-06, "loss": 0.3576, "step": 6463 }, { "epoch": 0.3206508259338261, "grad_norm": 11.825101852416992, "learning_rate": 7.761181576579626e-06, "loss": 0.4182, "step": 6464 }, { "epoch": 0.3207004315690262, "grad_norm": 11.018345832824707, "learning_rate": 7.760525354839857e-06, "loss": 0.33, "step": 6465 }, { "epoch": 0.3207500372042264, "grad_norm": 7.666572570800781, "learning_rate": 7.759869064693022e-06, "loss": 0.3344, "step": 6466 }, { "epoch": 0.3207996428394266, "grad_norm": 5.031750202178955, "learning_rate": 7.759212706155383e-06, "loss": 0.2839, "step": 6467 }, { "epoch": 0.3208492484746267, "grad_norm": 10.593536376953125, "learning_rate": 7.758556279243205e-06, "loss": 0.4066, "step": 6468 }, { "epoch": 0.3208988541098269, "grad_norm": 6.743133068084717, "learning_rate": 7.757899783972753e-06, "loss": 0.2862, "step": 6469 }, { "epoch": 0.32094845974502706, "grad_norm": 7.226381778717041, "learning_rate": 7.757243220360299e-06, "loss": 0.2828, "step": 6470 }, { "epoch": 0.3209980653802272, "grad_norm": 7.989894866943359, "learning_rate": 7.756586588422111e-06, "loss": 0.3533, "step": 6471 }, { "epoch": 0.32104767101542736, "grad_norm": 7.172388076782227, "learning_rate": 7.755929888174458e-06, "loss": 0.2718, "step": 6472 }, { "epoch": 0.32109727665062754, "grad_norm": 10.958028793334961, "learning_rate": 7.755273119633617e-06, "loss": 0.3739, "step": 6473 }, { "epoch": 0.32114688228582766, "grad_norm": 8.296669006347656, "learning_rate": 7.75461628281586e-06, "loss": 0.3744, "step": 6474 }, { "epoch": 0.32119648792102784, "grad_norm": 5.742181777954102, "learning_rate": 7.75395937773747e-06, "loss": 0.306, "step": 6475 }, { "epoch": 0.32124609355622796, "grad_norm": 11.745733261108398, "learning_rate": 7.753302404414718e-06, "loss": 0.4395, "step": 6476 }, { "epoch": 0.32129569919142814, "grad_norm": 7.539831638336182, "learning_rate": 7.752645362863886e-06, "loss": 0.4194, "step": 6477 }, { "epoch": 0.3213453048266283, "grad_norm": 7.292501926422119, "learning_rate": 7.75198825310126e-06, "loss": 0.2764, "step": 6478 }, { "epoch": 0.32139491046182844, "grad_norm": 5.483368396759033, "learning_rate": 7.751331075143116e-06, "loss": 0.3699, "step": 6479 }, { "epoch": 0.3214445160970286, "grad_norm": 5.555126667022705, "learning_rate": 7.750673829005746e-06, "loss": 0.2133, "step": 6480 }, { "epoch": 0.3214941217322288, "grad_norm": 7.890456199645996, "learning_rate": 7.750016514705433e-06, "loss": 0.4055, "step": 6481 }, { "epoch": 0.3215437273674289, "grad_norm": 14.265227317810059, "learning_rate": 7.749359132258467e-06, "loss": 0.3525, "step": 6482 }, { "epoch": 0.3215933330026291, "grad_norm": 4.732740879058838, "learning_rate": 7.748701681681139e-06, "loss": 0.3201, "step": 6483 }, { "epoch": 0.3216429386378293, "grad_norm": 6.974274158477783, "learning_rate": 7.74804416298974e-06, "loss": 0.2721, "step": 6484 }, { "epoch": 0.3216925442730294, "grad_norm": 4.534354209899902, "learning_rate": 7.747386576200562e-06, "loss": 0.2908, "step": 6485 }, { "epoch": 0.3217421499082296, "grad_norm": 4.990870475769043, "learning_rate": 7.746728921329903e-06, "loss": 0.2321, "step": 6486 }, { "epoch": 0.32179175554342976, "grad_norm": 7.644561290740967, "learning_rate": 7.746071198394059e-06, "loss": 0.3159, "step": 6487 }, { "epoch": 0.3218413611786299, "grad_norm": 6.74783992767334, "learning_rate": 7.745413407409328e-06, "loss": 0.3285, "step": 6488 }, { "epoch": 0.32189096681383006, "grad_norm": 3.4719929695129395, "learning_rate": 7.744755548392011e-06, "loss": 0.225, "step": 6489 }, { "epoch": 0.3219405724490302, "grad_norm": 5.871480464935303, "learning_rate": 7.74409762135841e-06, "loss": 0.3833, "step": 6490 }, { "epoch": 0.32199017808423036, "grad_norm": 10.64559268951416, "learning_rate": 7.74343962632483e-06, "loss": 0.2845, "step": 6491 }, { "epoch": 0.32203978371943054, "grad_norm": 11.261591911315918, "learning_rate": 7.742781563307574e-06, "loss": 0.4816, "step": 6492 }, { "epoch": 0.32208938935463066, "grad_norm": 5.631901741027832, "learning_rate": 7.74212343232295e-06, "loss": 0.2748, "step": 6493 }, { "epoch": 0.32213899498983084, "grad_norm": 6.607101917266846, "learning_rate": 7.741465233387267e-06, "loss": 0.3417, "step": 6494 }, { "epoch": 0.322188600625031, "grad_norm": 9.443604469299316, "learning_rate": 7.740806966516835e-06, "loss": 0.3227, "step": 6495 }, { "epoch": 0.32223820626023114, "grad_norm": 6.26198673248291, "learning_rate": 7.740148631727967e-06, "loss": 0.2486, "step": 6496 }, { "epoch": 0.3222878118954313, "grad_norm": 9.113554000854492, "learning_rate": 7.739490229036976e-06, "loss": 0.3224, "step": 6497 }, { "epoch": 0.3223374175306315, "grad_norm": 10.966947555541992, "learning_rate": 7.738831758460178e-06, "loss": 0.4091, "step": 6498 }, { "epoch": 0.3223870231658316, "grad_norm": 7.622558116912842, "learning_rate": 7.73817322001389e-06, "loss": 0.2773, "step": 6499 }, { "epoch": 0.3224366288010318, "grad_norm": 6.029237270355225, "learning_rate": 7.737514613714432e-06, "loss": 0.2252, "step": 6500 }, { "epoch": 0.322486234436232, "grad_norm": 4.428651809692383, "learning_rate": 7.736855939578123e-06, "loss": 0.322, "step": 6501 }, { "epoch": 0.3225358400714321, "grad_norm": 4.62679386138916, "learning_rate": 7.736197197621286e-06, "loss": 0.3199, "step": 6502 }, { "epoch": 0.3225854457066323, "grad_norm": 17.151220321655273, "learning_rate": 7.735538387860244e-06, "loss": 0.3612, "step": 6503 }, { "epoch": 0.32263505134183246, "grad_norm": 7.6789069175720215, "learning_rate": 7.734879510311323e-06, "loss": 0.2765, "step": 6504 }, { "epoch": 0.3226846569770326, "grad_norm": 8.49851131439209, "learning_rate": 7.734220564990853e-06, "loss": 0.3457, "step": 6505 }, { "epoch": 0.32273426261223276, "grad_norm": 5.868967056274414, "learning_rate": 7.733561551915159e-06, "loss": 0.3559, "step": 6506 }, { "epoch": 0.3227838682474329, "grad_norm": 8.148934364318848, "learning_rate": 7.732902471100571e-06, "loss": 0.163, "step": 6507 }, { "epoch": 0.32283347388263306, "grad_norm": 11.442878723144531, "learning_rate": 7.732243322563426e-06, "loss": 0.3624, "step": 6508 }, { "epoch": 0.32288307951783324, "grad_norm": 7.3943939208984375, "learning_rate": 7.731584106320055e-06, "loss": 0.3831, "step": 6509 }, { "epoch": 0.32293268515303336, "grad_norm": 3.969759941101074, "learning_rate": 7.730924822386795e-06, "loss": 0.2561, "step": 6510 }, { "epoch": 0.32298229078823354, "grad_norm": 14.36358642578125, "learning_rate": 7.730265470779982e-06, "loss": 0.3736, "step": 6511 }, { "epoch": 0.3230318964234337, "grad_norm": 5.947001934051514, "learning_rate": 7.729606051515957e-06, "loss": 0.3839, "step": 6512 }, { "epoch": 0.32308150205863384, "grad_norm": 5.613556861877441, "learning_rate": 7.728946564611057e-06, "loss": 0.2507, "step": 6513 }, { "epoch": 0.323131107693834, "grad_norm": 5.190431594848633, "learning_rate": 7.728287010081628e-06, "loss": 0.2829, "step": 6514 }, { "epoch": 0.3231807133290342, "grad_norm": 5.310678482055664, "learning_rate": 7.727627387944013e-06, "loss": 0.3289, "step": 6515 }, { "epoch": 0.3232303189642343, "grad_norm": 3.808957576751709, "learning_rate": 7.726967698214556e-06, "loss": 0.2108, "step": 6516 }, { "epoch": 0.3232799245994345, "grad_norm": 10.942614555358887, "learning_rate": 7.726307940909605e-06, "loss": 0.3267, "step": 6517 }, { "epoch": 0.3233295302346347, "grad_norm": 7.480978488922119, "learning_rate": 7.725648116045512e-06, "loss": 0.3135, "step": 6518 }, { "epoch": 0.3233791358698348, "grad_norm": 9.536062240600586, "learning_rate": 7.724988223638628e-06, "loss": 0.4691, "step": 6519 }, { "epoch": 0.323428741505035, "grad_norm": 9.753767967224121, "learning_rate": 7.724328263705299e-06, "loss": 0.3235, "step": 6520 }, { "epoch": 0.32347834714023516, "grad_norm": 9.416946411132812, "learning_rate": 7.723668236261885e-06, "loss": 0.3973, "step": 6521 }, { "epoch": 0.3235279527754353, "grad_norm": 7.827301025390625, "learning_rate": 7.72300814132474e-06, "loss": 0.3913, "step": 6522 }, { "epoch": 0.32357755841063546, "grad_norm": 11.335097312927246, "learning_rate": 7.722347978910222e-06, "loss": 0.2994, "step": 6523 }, { "epoch": 0.3236271640458356, "grad_norm": 6.703947067260742, "learning_rate": 7.721687749034688e-06, "loss": 0.2551, "step": 6524 }, { "epoch": 0.32367676968103576, "grad_norm": 6.977732181549072, "learning_rate": 7.721027451714502e-06, "loss": 0.3018, "step": 6525 }, { "epoch": 0.32372637531623594, "grad_norm": 4.1528472900390625, "learning_rate": 7.720367086966026e-06, "loss": 0.309, "step": 6526 }, { "epoch": 0.32377598095143606, "grad_norm": 5.762758255004883, "learning_rate": 7.719706654805621e-06, "loss": 0.3465, "step": 6527 }, { "epoch": 0.32382558658663624, "grad_norm": 5.845366954803467, "learning_rate": 7.719046155249654e-06, "loss": 0.27, "step": 6528 }, { "epoch": 0.3238751922218364, "grad_norm": 6.521230220794678, "learning_rate": 7.718385588314495e-06, "loss": 0.2746, "step": 6529 }, { "epoch": 0.32392479785703654, "grad_norm": 10.021734237670898, "learning_rate": 7.717724954016512e-06, "loss": 0.4466, "step": 6530 }, { "epoch": 0.3239744034922367, "grad_norm": 5.009587287902832, "learning_rate": 7.717064252372073e-06, "loss": 0.2663, "step": 6531 }, { "epoch": 0.3240240091274369, "grad_norm": 5.387628555297852, "learning_rate": 7.716403483397555e-06, "loss": 0.3094, "step": 6532 }, { "epoch": 0.324073614762637, "grad_norm": 4.620908737182617, "learning_rate": 7.715742647109329e-06, "loss": 0.2492, "step": 6533 }, { "epoch": 0.3241232203978372, "grad_norm": 7.1676812171936035, "learning_rate": 7.715081743523771e-06, "loss": 0.2488, "step": 6534 }, { "epoch": 0.3241728260330374, "grad_norm": 5.600887775421143, "learning_rate": 7.714420772657261e-06, "loss": 0.324, "step": 6535 }, { "epoch": 0.3242224316682375, "grad_norm": 9.232091903686523, "learning_rate": 7.713759734526176e-06, "loss": 0.4198, "step": 6536 }, { "epoch": 0.3242720373034377, "grad_norm": 4.203449249267578, "learning_rate": 7.713098629146895e-06, "loss": 0.2782, "step": 6537 }, { "epoch": 0.32432164293863786, "grad_norm": 11.941879272460938, "learning_rate": 7.712437456535804e-06, "loss": 0.2861, "step": 6538 }, { "epoch": 0.324371248573838, "grad_norm": 8.138650894165039, "learning_rate": 7.711776216709287e-06, "loss": 0.3721, "step": 6539 }, { "epoch": 0.32442085420903816, "grad_norm": 5.429128646850586, "learning_rate": 7.711114909683727e-06, "loss": 0.2298, "step": 6540 }, { "epoch": 0.3244704598442383, "grad_norm": 5.6667022705078125, "learning_rate": 7.710453535475515e-06, "loss": 0.2551, "step": 6541 }, { "epoch": 0.32452006547943846, "grad_norm": 6.992079734802246, "learning_rate": 7.709792094101037e-06, "loss": 0.3312, "step": 6542 }, { "epoch": 0.32456967111463864, "grad_norm": 11.27860164642334, "learning_rate": 7.709130585576684e-06, "loss": 0.433, "step": 6543 }, { "epoch": 0.32461927674983876, "grad_norm": 5.886403560638428, "learning_rate": 7.708469009918853e-06, "loss": 0.3119, "step": 6544 }, { "epoch": 0.32466888238503894, "grad_norm": 10.164913177490234, "learning_rate": 7.707807367143932e-06, "loss": 0.3258, "step": 6545 }, { "epoch": 0.3247184880202391, "grad_norm": 15.134105682373047, "learning_rate": 7.707145657268322e-06, "loss": 0.5103, "step": 6546 }, { "epoch": 0.32476809365543924, "grad_norm": 3.06375789642334, "learning_rate": 7.706483880308415e-06, "loss": 0.211, "step": 6547 }, { "epoch": 0.3248176992906394, "grad_norm": 7.798203945159912, "learning_rate": 7.705822036280615e-06, "loss": 0.2643, "step": 6548 }, { "epoch": 0.3248673049258396, "grad_norm": 10.834623336791992, "learning_rate": 7.705160125201322e-06, "loss": 0.4501, "step": 6549 }, { "epoch": 0.3249169105610397, "grad_norm": 8.866413116455078, "learning_rate": 7.704498147086936e-06, "loss": 0.3921, "step": 6550 }, { "epoch": 0.3249665161962399, "grad_norm": 9.50882625579834, "learning_rate": 7.703836101953862e-06, "loss": 0.3545, "step": 6551 }, { "epoch": 0.3250161218314401, "grad_norm": 10.702054977416992, "learning_rate": 7.703173989818506e-06, "loss": 0.3601, "step": 6552 }, { "epoch": 0.3250657274666402, "grad_norm": 5.113358497619629, "learning_rate": 7.702511810697278e-06, "loss": 0.2881, "step": 6553 }, { "epoch": 0.3251153331018404, "grad_norm": 8.019805908203125, "learning_rate": 7.701849564606583e-06, "loss": 0.3451, "step": 6554 }, { "epoch": 0.32516493873704055, "grad_norm": 9.973411560058594, "learning_rate": 7.701187251562834e-06, "loss": 0.3834, "step": 6555 }, { "epoch": 0.3252145443722407, "grad_norm": 10.179302215576172, "learning_rate": 7.700524871582441e-06, "loss": 0.4137, "step": 6556 }, { "epoch": 0.32526415000744086, "grad_norm": 8.944625854492188, "learning_rate": 7.699862424681823e-06, "loss": 0.3842, "step": 6557 }, { "epoch": 0.325313755642641, "grad_norm": 9.070514678955078, "learning_rate": 7.699199910877391e-06, "loss": 0.3116, "step": 6558 }, { "epoch": 0.32536336127784116, "grad_norm": 6.834606647491455, "learning_rate": 7.698537330185568e-06, "loss": 0.3338, "step": 6559 }, { "epoch": 0.32541296691304133, "grad_norm": 26.053462982177734, "learning_rate": 7.697874682622766e-06, "loss": 0.3773, "step": 6560 }, { "epoch": 0.32546257254824146, "grad_norm": 4.288338661193848, "learning_rate": 7.69721196820541e-06, "loss": 0.2363, "step": 6561 }, { "epoch": 0.32551217818344164, "grad_norm": 9.68822193145752, "learning_rate": 7.69654918694992e-06, "loss": 0.2586, "step": 6562 }, { "epoch": 0.3255617838186418, "grad_norm": 4.005614757537842, "learning_rate": 7.695886338872722e-06, "loss": 0.334, "step": 6563 }, { "epoch": 0.32561138945384194, "grad_norm": 5.477311611175537, "learning_rate": 7.695223423990241e-06, "loss": 0.3109, "step": 6564 }, { "epoch": 0.3256609950890421, "grad_norm": 7.577752113342285, "learning_rate": 7.694560442318904e-06, "loss": 0.3468, "step": 6565 }, { "epoch": 0.3257106007242423, "grad_norm": 7.846498489379883, "learning_rate": 7.693897393875143e-06, "loss": 0.3901, "step": 6566 }, { "epoch": 0.3257602063594424, "grad_norm": 6.034412384033203, "learning_rate": 7.693234278675384e-06, "loss": 0.3436, "step": 6567 }, { "epoch": 0.3258098119946426, "grad_norm": 14.402568817138672, "learning_rate": 7.69257109673606e-06, "loss": 0.377, "step": 6568 }, { "epoch": 0.3258594176298428, "grad_norm": 5.880625247955322, "learning_rate": 7.691907848073608e-06, "loss": 0.263, "step": 6569 }, { "epoch": 0.3259090232650429, "grad_norm": 6.973926067352295, "learning_rate": 7.691244532704462e-06, "loss": 0.3316, "step": 6570 }, { "epoch": 0.3259586289002431, "grad_norm": 21.177167892456055, "learning_rate": 7.690581150645057e-06, "loss": 0.5184, "step": 6571 }, { "epoch": 0.32600823453544325, "grad_norm": 7.012520790100098, "learning_rate": 7.689917701911835e-06, "loss": 0.3484, "step": 6572 }, { "epoch": 0.3260578401706434, "grad_norm": 6.035035133361816, "learning_rate": 7.689254186521238e-06, "loss": 0.363, "step": 6573 }, { "epoch": 0.32610744580584355, "grad_norm": 11.275256156921387, "learning_rate": 7.688590604489703e-06, "loss": 0.2736, "step": 6574 }, { "epoch": 0.3261570514410437, "grad_norm": 5.997214317321777, "learning_rate": 7.687926955833678e-06, "loss": 0.3202, "step": 6575 }, { "epoch": 0.32620665707624386, "grad_norm": 9.111343383789062, "learning_rate": 7.687263240569607e-06, "loss": 0.3164, "step": 6576 }, { "epoch": 0.32625626271144403, "grad_norm": 6.2020697593688965, "learning_rate": 7.686599458713938e-06, "loss": 0.2872, "step": 6577 }, { "epoch": 0.32630586834664416, "grad_norm": 5.850819110870361, "learning_rate": 7.685935610283116e-06, "loss": 0.2251, "step": 6578 }, { "epoch": 0.32635547398184434, "grad_norm": 5.208436489105225, "learning_rate": 7.685271695293596e-06, "loss": 0.2959, "step": 6579 }, { "epoch": 0.3264050796170445, "grad_norm": 7.5158257484436035, "learning_rate": 7.68460771376183e-06, "loss": 0.4065, "step": 6580 }, { "epoch": 0.32645468525224464, "grad_norm": 9.166715621948242, "learning_rate": 7.68394366570427e-06, "loss": 0.2299, "step": 6581 }, { "epoch": 0.3265042908874448, "grad_norm": 5.530697345733643, "learning_rate": 7.683279551137372e-06, "loss": 0.3061, "step": 6582 }, { "epoch": 0.326553896522645, "grad_norm": 5.458729267120361, "learning_rate": 7.682615370077593e-06, "loss": 0.2932, "step": 6583 }, { "epoch": 0.3266035021578451, "grad_norm": 6.227097034454346, "learning_rate": 7.681951122541391e-06, "loss": 0.4516, "step": 6584 }, { "epoch": 0.3266531077930453, "grad_norm": 6.057391166687012, "learning_rate": 7.681286808545228e-06, "loss": 0.2405, "step": 6585 }, { "epoch": 0.3267027134282455, "grad_norm": 12.114432334899902, "learning_rate": 7.680622428105563e-06, "loss": 0.3173, "step": 6586 }, { "epoch": 0.3267523190634456, "grad_norm": 7.5370869636535645, "learning_rate": 7.679957981238863e-06, "loss": 0.3797, "step": 6587 }, { "epoch": 0.3268019246986458, "grad_norm": 8.696494102478027, "learning_rate": 7.679293467961592e-06, "loss": 0.2965, "step": 6588 }, { "epoch": 0.32685153033384595, "grad_norm": 6.924187183380127, "learning_rate": 7.678628888290217e-06, "loss": 0.2294, "step": 6589 }, { "epoch": 0.3269011359690461, "grad_norm": 7.012506008148193, "learning_rate": 7.677964242241208e-06, "loss": 0.2501, "step": 6590 }, { "epoch": 0.32695074160424625, "grad_norm": 10.324117660522461, "learning_rate": 7.67729952983103e-06, "loss": 0.3326, "step": 6591 }, { "epoch": 0.3270003472394464, "grad_norm": 8.201972007751465, "learning_rate": 7.67663475107616e-06, "loss": 0.3585, "step": 6592 }, { "epoch": 0.32704995287464655, "grad_norm": 6.297916889190674, "learning_rate": 7.675969905993072e-06, "loss": 0.3682, "step": 6593 }, { "epoch": 0.32709955850984673, "grad_norm": 9.1939115524292, "learning_rate": 7.675304994598238e-06, "loss": 0.2994, "step": 6594 }, { "epoch": 0.32714916414504686, "grad_norm": 7.72332239151001, "learning_rate": 7.674640016908137e-06, "loss": 0.3052, "step": 6595 }, { "epoch": 0.32719876978024703, "grad_norm": 5.907863140106201, "learning_rate": 7.673974972939243e-06, "loss": 0.3135, "step": 6596 }, { "epoch": 0.3272483754154472, "grad_norm": 7.116635799407959, "learning_rate": 7.673309862708044e-06, "loss": 0.2408, "step": 6597 }, { "epoch": 0.32729798105064734, "grad_norm": 13.356797218322754, "learning_rate": 7.672644686231015e-06, "loss": 0.5369, "step": 6598 }, { "epoch": 0.3273475866858475, "grad_norm": 7.772167682647705, "learning_rate": 7.671979443524642e-06, "loss": 0.2613, "step": 6599 }, { "epoch": 0.3273971923210477, "grad_norm": 7.257083892822266, "learning_rate": 7.67131413460541e-06, "loss": 0.3701, "step": 6600 }, { "epoch": 0.3274467979562478, "grad_norm": 15.454821586608887, "learning_rate": 7.670648759489806e-06, "loss": 0.311, "step": 6601 }, { "epoch": 0.327496403591448, "grad_norm": 5.787495136260986, "learning_rate": 7.669983318194316e-06, "loss": 0.3176, "step": 6602 }, { "epoch": 0.32754600922664817, "grad_norm": 6.6770806312561035, "learning_rate": 7.669317810735433e-06, "loss": 0.329, "step": 6603 }, { "epoch": 0.3275956148618483, "grad_norm": 4.62900447845459, "learning_rate": 7.668652237129647e-06, "loss": 0.2323, "step": 6604 }, { "epoch": 0.3276452204970485, "grad_norm": 9.342658996582031, "learning_rate": 7.66798659739345e-06, "loss": 0.278, "step": 6605 }, { "epoch": 0.32769482613224865, "grad_norm": 11.792365074157715, "learning_rate": 7.66732089154334e-06, "loss": 0.4845, "step": 6606 }, { "epoch": 0.3277444317674488, "grad_norm": 12.946662902832031, "learning_rate": 7.66665511959581e-06, "loss": 0.3989, "step": 6607 }, { "epoch": 0.32779403740264895, "grad_norm": 7.160332679748535, "learning_rate": 7.66598928156736e-06, "loss": 0.2971, "step": 6608 }, { "epoch": 0.3278436430378491, "grad_norm": 24.16335678100586, "learning_rate": 7.665323377474492e-06, "loss": 0.4171, "step": 6609 }, { "epoch": 0.32789324867304925, "grad_norm": 5.583578586578369, "learning_rate": 7.664657407333705e-06, "loss": 0.2274, "step": 6610 }, { "epoch": 0.32794285430824943, "grad_norm": 4.568729877471924, "learning_rate": 7.6639913711615e-06, "loss": 0.2713, "step": 6611 }, { "epoch": 0.32799245994344955, "grad_norm": 3.603343963623047, "learning_rate": 7.663325268974384e-06, "loss": 0.2906, "step": 6612 }, { "epoch": 0.32804206557864973, "grad_norm": 7.652223110198975, "learning_rate": 7.662659100788864e-06, "loss": 0.3242, "step": 6613 }, { "epoch": 0.3280916712138499, "grad_norm": 5.842055320739746, "learning_rate": 7.661992866621447e-06, "loss": 0.2986, "step": 6614 }, { "epoch": 0.32814127684905003, "grad_norm": 10.246533393859863, "learning_rate": 7.66132656648864e-06, "loss": 0.3203, "step": 6615 }, { "epoch": 0.3281908824842502, "grad_norm": 6.006476879119873, "learning_rate": 7.66066020040696e-06, "loss": 0.3343, "step": 6616 }, { "epoch": 0.3282404881194504, "grad_norm": 10.360273361206055, "learning_rate": 7.659993768392916e-06, "loss": 0.3271, "step": 6617 }, { "epoch": 0.3282900937546505, "grad_norm": 4.805943489074707, "learning_rate": 7.659327270463024e-06, "loss": 0.2789, "step": 6618 }, { "epoch": 0.3283396993898507, "grad_norm": 4.51533842086792, "learning_rate": 7.658660706633796e-06, "loss": 0.2103, "step": 6619 }, { "epoch": 0.32838930502505087, "grad_norm": 12.862939834594727, "learning_rate": 7.657994076921758e-06, "loss": 0.4537, "step": 6620 }, { "epoch": 0.328438910660251, "grad_norm": 73.11248016357422, "learning_rate": 7.65732738134342e-06, "loss": 0.3452, "step": 6621 }, { "epoch": 0.32848851629545117, "grad_norm": 9.502182006835938, "learning_rate": 7.656660619915311e-06, "loss": 0.3439, "step": 6622 }, { "epoch": 0.3285381219306513, "grad_norm": 4.464500427246094, "learning_rate": 7.655993792653947e-06, "loss": 0.271, "step": 6623 }, { "epoch": 0.3285877275658515, "grad_norm": 6.231845855712891, "learning_rate": 7.655326899575857e-06, "loss": 0.2229, "step": 6624 }, { "epoch": 0.32863733320105165, "grad_norm": 4.254913330078125, "learning_rate": 7.654659940697564e-06, "loss": 0.3262, "step": 6625 }, { "epoch": 0.3286869388362518, "grad_norm": 10.794933319091797, "learning_rate": 7.653992916035598e-06, "loss": 0.2719, "step": 6626 }, { "epoch": 0.32873654447145195, "grad_norm": 9.104338645935059, "learning_rate": 7.653325825606488e-06, "loss": 0.2711, "step": 6627 }, { "epoch": 0.32878615010665213, "grad_norm": 6.455014705657959, "learning_rate": 7.652658669426763e-06, "loss": 0.248, "step": 6628 }, { "epoch": 0.32883575574185225, "grad_norm": 5.320782661437988, "learning_rate": 7.651991447512954e-06, "loss": 0.3509, "step": 6629 }, { "epoch": 0.32888536137705243, "grad_norm": 4.066151142120361, "learning_rate": 7.6513241598816e-06, "loss": 0.2937, "step": 6630 }, { "epoch": 0.3289349670122526, "grad_norm": 13.196557998657227, "learning_rate": 7.650656806549232e-06, "loss": 0.4538, "step": 6631 }, { "epoch": 0.32898457264745273, "grad_norm": 8.372529983520508, "learning_rate": 7.649989387532391e-06, "loss": 0.4404, "step": 6632 }, { "epoch": 0.3290341782826529, "grad_norm": 7.580558776855469, "learning_rate": 7.649321902847615e-06, "loss": 0.307, "step": 6633 }, { "epoch": 0.3290837839178531, "grad_norm": 4.781277179718018, "learning_rate": 7.648654352511442e-06, "loss": 0.3677, "step": 6634 }, { "epoch": 0.3291333895530532, "grad_norm": 7.356675148010254, "learning_rate": 7.647986736540415e-06, "loss": 0.3642, "step": 6635 }, { "epoch": 0.3291829951882534, "grad_norm": 5.595554351806641, "learning_rate": 7.647319054951082e-06, "loss": 0.2373, "step": 6636 }, { "epoch": 0.32923260082345357, "grad_norm": 16.313152313232422, "learning_rate": 7.646651307759984e-06, "loss": 0.4884, "step": 6637 }, { "epoch": 0.3292822064586537, "grad_norm": 5.320671558380127, "learning_rate": 7.645983494983668e-06, "loss": 0.272, "step": 6638 }, { "epoch": 0.32933181209385387, "grad_norm": 6.05307674407959, "learning_rate": 7.645315616638687e-06, "loss": 0.2779, "step": 6639 }, { "epoch": 0.329381417729054, "grad_norm": 7.534523010253906, "learning_rate": 7.644647672741586e-06, "loss": 0.3228, "step": 6640 }, { "epoch": 0.32943102336425417, "grad_norm": 6.4770917892456055, "learning_rate": 7.64397966330892e-06, "loss": 0.3332, "step": 6641 }, { "epoch": 0.32948062899945435, "grad_norm": 12.873193740844727, "learning_rate": 7.643311588357242e-06, "loss": 0.3794, "step": 6642 }, { "epoch": 0.3295302346346545, "grad_norm": 4.301114559173584, "learning_rate": 7.642643447903106e-06, "loss": 0.2687, "step": 6643 }, { "epoch": 0.32957984026985465, "grad_norm": 8.613245010375977, "learning_rate": 7.641975241963072e-06, "loss": 0.3098, "step": 6644 }, { "epoch": 0.32962944590505483, "grad_norm": 7.047953128814697, "learning_rate": 7.641306970553694e-06, "loss": 0.3667, "step": 6645 }, { "epoch": 0.32967905154025495, "grad_norm": 6.564382553100586, "learning_rate": 7.640638633691538e-06, "loss": 0.349, "step": 6646 }, { "epoch": 0.32972865717545513, "grad_norm": 9.706694602966309, "learning_rate": 7.63997023139316e-06, "loss": 0.3263, "step": 6647 }, { "epoch": 0.3297782628106553, "grad_norm": 6.831263542175293, "learning_rate": 7.639301763675125e-06, "loss": 0.2339, "step": 6648 }, { "epoch": 0.32982786844585543, "grad_norm": 3.5686283111572266, "learning_rate": 7.638633230553998e-06, "loss": 0.2395, "step": 6649 }, { "epoch": 0.3298774740810556, "grad_norm": 6.179670333862305, "learning_rate": 7.637964632046348e-06, "loss": 0.2624, "step": 6650 }, { "epoch": 0.3299270797162558, "grad_norm": 14.825297355651855, "learning_rate": 7.63729596816874e-06, "loss": 0.4921, "step": 6651 }, { "epoch": 0.3299766853514559, "grad_norm": 11.783934593200684, "learning_rate": 7.636627238937745e-06, "loss": 0.4766, "step": 6652 }, { "epoch": 0.3300262909866561, "grad_norm": 5.151864528656006, "learning_rate": 7.635958444369933e-06, "loss": 0.3287, "step": 6653 }, { "epoch": 0.33007589662185627, "grad_norm": 4.414694309234619, "learning_rate": 7.635289584481881e-06, "loss": 0.2841, "step": 6654 }, { "epoch": 0.3301255022570564, "grad_norm": 4.544384002685547, "learning_rate": 7.634620659290159e-06, "loss": 0.278, "step": 6655 }, { "epoch": 0.33017510789225657, "grad_norm": 8.657266616821289, "learning_rate": 7.633951668811346e-06, "loss": 0.3806, "step": 6656 }, { "epoch": 0.3302247135274567, "grad_norm": 6.091037750244141, "learning_rate": 7.633282613062021e-06, "loss": 0.2528, "step": 6657 }, { "epoch": 0.33027431916265687, "grad_norm": 9.876154899597168, "learning_rate": 7.632613492058758e-06, "loss": 0.2196, "step": 6658 }, { "epoch": 0.33032392479785705, "grad_norm": 4.833776473999023, "learning_rate": 7.631944305818145e-06, "loss": 0.2879, "step": 6659 }, { "epoch": 0.33037353043305717, "grad_norm": 9.518840789794922, "learning_rate": 7.63127505435676e-06, "loss": 0.3025, "step": 6660 }, { "epoch": 0.33042313606825735, "grad_norm": 16.105619430541992, "learning_rate": 7.630605737691188e-06, "loss": 0.3178, "step": 6661 }, { "epoch": 0.33047274170345753, "grad_norm": 8.803515434265137, "learning_rate": 7.629936355838017e-06, "loss": 0.2582, "step": 6662 }, { "epoch": 0.33052234733865765, "grad_norm": 8.11681079864502, "learning_rate": 7.629266908813835e-06, "loss": 0.3582, "step": 6663 }, { "epoch": 0.33057195297385783, "grad_norm": 7.624452590942383, "learning_rate": 7.628597396635227e-06, "loss": 0.3055, "step": 6664 }, { "epoch": 0.330621558609058, "grad_norm": 3.1846020221710205, "learning_rate": 7.627927819318788e-06, "loss": 0.2043, "step": 6665 }, { "epoch": 0.33067116424425813, "grad_norm": 9.78004264831543, "learning_rate": 7.627258176881107e-06, "loss": 0.3504, "step": 6666 }, { "epoch": 0.3307207698794583, "grad_norm": 8.977459907531738, "learning_rate": 7.626588469338782e-06, "loss": 0.424, "step": 6667 }, { "epoch": 0.3307703755146585, "grad_norm": 9.907536506652832, "learning_rate": 7.625918696708407e-06, "loss": 0.363, "step": 6668 }, { "epoch": 0.3308199811498586, "grad_norm": 5.835493564605713, "learning_rate": 7.625248859006578e-06, "loss": 0.3332, "step": 6669 }, { "epoch": 0.3308695867850588, "grad_norm": 14.306864738464355, "learning_rate": 7.624578956249893e-06, "loss": 0.3701, "step": 6670 }, { "epoch": 0.33091919242025897, "grad_norm": 4.109790802001953, "learning_rate": 7.6239089884549575e-06, "loss": 0.267, "step": 6671 }, { "epoch": 0.3309687980554591, "grad_norm": 6.597084045410156, "learning_rate": 7.623238955638368e-06, "loss": 0.3431, "step": 6672 }, { "epoch": 0.33101840369065927, "grad_norm": 14.123985290527344, "learning_rate": 7.622568857816731e-06, "loss": 0.4306, "step": 6673 }, { "epoch": 0.3310680093258594, "grad_norm": 7.165088176727295, "learning_rate": 7.6218986950066524e-06, "loss": 0.3681, "step": 6674 }, { "epoch": 0.33111761496105957, "grad_norm": 7.669041156768799, "learning_rate": 7.621228467224737e-06, "loss": 0.3455, "step": 6675 }, { "epoch": 0.33116722059625975, "grad_norm": 14.675498008728027, "learning_rate": 7.620558174487594e-06, "loss": 0.5511, "step": 6676 }, { "epoch": 0.33121682623145987, "grad_norm": 5.6609206199646, "learning_rate": 7.6198878168118366e-06, "loss": 0.3364, "step": 6677 }, { "epoch": 0.33126643186666005, "grad_norm": 4.706306457519531, "learning_rate": 7.6192173942140715e-06, "loss": 0.2691, "step": 6678 }, { "epoch": 0.3313160375018602, "grad_norm": 7.555307388305664, "learning_rate": 7.618546906710915e-06, "loss": 0.3906, "step": 6679 }, { "epoch": 0.33136564313706035, "grad_norm": 10.835783958435059, "learning_rate": 7.617876354318982e-06, "loss": 0.3414, "step": 6680 }, { "epoch": 0.33141524877226053, "grad_norm": 10.62038803100586, "learning_rate": 7.617205737054889e-06, "loss": 0.3465, "step": 6681 }, { "epoch": 0.3314648544074607, "grad_norm": 5.007689952850342, "learning_rate": 7.616535054935253e-06, "loss": 0.2647, "step": 6682 }, { "epoch": 0.33151446004266083, "grad_norm": 8.641242027282715, "learning_rate": 7.6158643079766955e-06, "loss": 0.4263, "step": 6683 }, { "epoch": 0.331564065677861, "grad_norm": 5.433018207550049, "learning_rate": 7.615193496195839e-06, "loss": 0.3092, "step": 6684 }, { "epoch": 0.3316136713130612, "grad_norm": 7.276157379150391, "learning_rate": 7.614522619609301e-06, "loss": 0.3104, "step": 6685 }, { "epoch": 0.3316632769482613, "grad_norm": 6.8833770751953125, "learning_rate": 7.613851678233713e-06, "loss": 0.3457, "step": 6686 }, { "epoch": 0.3317128825834615, "grad_norm": 6.19954776763916, "learning_rate": 7.613180672085696e-06, "loss": 0.3101, "step": 6687 }, { "epoch": 0.33176248821866167, "grad_norm": 4.999745845794678, "learning_rate": 7.612509601181881e-06, "loss": 0.3112, "step": 6688 }, { "epoch": 0.3318120938538618, "grad_norm": 6.228193283081055, "learning_rate": 7.611838465538894e-06, "loss": 0.2958, "step": 6689 }, { "epoch": 0.33186169948906197, "grad_norm": 5.741692543029785, "learning_rate": 7.611167265173372e-06, "loss": 0.2551, "step": 6690 }, { "epoch": 0.3319113051242621, "grad_norm": 8.719383239746094, "learning_rate": 7.610496000101941e-06, "loss": 0.3606, "step": 6691 }, { "epoch": 0.33196091075946227, "grad_norm": 9.910268783569336, "learning_rate": 7.609824670341239e-06, "loss": 0.3092, "step": 6692 }, { "epoch": 0.33201051639466245, "grad_norm": 13.988916397094727, "learning_rate": 7.609153275907902e-06, "loss": 0.4047, "step": 6693 }, { "epoch": 0.33206012202986257, "grad_norm": 3.937014579772949, "learning_rate": 7.608481816818567e-06, "loss": 0.1896, "step": 6694 }, { "epoch": 0.33210972766506275, "grad_norm": 5.614792346954346, "learning_rate": 7.607810293089872e-06, "loss": 0.2814, "step": 6695 }, { "epoch": 0.3321593333002629, "grad_norm": 4.207074165344238, "learning_rate": 7.607138704738457e-06, "loss": 0.2703, "step": 6696 }, { "epoch": 0.33220893893546305, "grad_norm": 6.418427467346191, "learning_rate": 7.606467051780967e-06, "loss": 0.3263, "step": 6697 }, { "epoch": 0.33225854457066323, "grad_norm": 7.36176872253418, "learning_rate": 7.605795334234045e-06, "loss": 0.3372, "step": 6698 }, { "epoch": 0.3323081502058634, "grad_norm": 17.45909881591797, "learning_rate": 7.605123552114335e-06, "loss": 0.3707, "step": 6699 }, { "epoch": 0.33235775584106353, "grad_norm": 4.011193752288818, "learning_rate": 7.604451705438485e-06, "loss": 0.3411, "step": 6700 }, { "epoch": 0.3324073614762637, "grad_norm": 10.484621047973633, "learning_rate": 7.6037797942231425e-06, "loss": 0.3516, "step": 6701 }, { "epoch": 0.3324569671114639, "grad_norm": 6.476011276245117, "learning_rate": 7.6031078184849605e-06, "loss": 0.1955, "step": 6702 }, { "epoch": 0.332506572746664, "grad_norm": 17.72796058654785, "learning_rate": 7.602435778240589e-06, "loss": 0.4543, "step": 6703 }, { "epoch": 0.3325561783818642, "grad_norm": 5.828348636627197, "learning_rate": 7.601763673506681e-06, "loss": 0.2884, "step": 6704 }, { "epoch": 0.33260578401706437, "grad_norm": 4.331352710723877, "learning_rate": 7.601091504299893e-06, "loss": 0.201, "step": 6705 }, { "epoch": 0.3326553896522645, "grad_norm": 6.076851844787598, "learning_rate": 7.600419270636881e-06, "loss": 0.2766, "step": 6706 }, { "epoch": 0.33270499528746467, "grad_norm": 6.251058101654053, "learning_rate": 7.599746972534301e-06, "loss": 0.3602, "step": 6707 }, { "epoch": 0.3327546009226648, "grad_norm": 14.008665084838867, "learning_rate": 7.5990746100088174e-06, "loss": 0.2717, "step": 6708 }, { "epoch": 0.33280420655786497, "grad_norm": 8.425930976867676, "learning_rate": 7.598402183077089e-06, "loss": 0.3647, "step": 6709 }, { "epoch": 0.33285381219306515, "grad_norm": 7.522956848144531, "learning_rate": 7.597729691755777e-06, "loss": 0.3408, "step": 6710 }, { "epoch": 0.33290341782826527, "grad_norm": 14.998309135437012, "learning_rate": 7.597057136061552e-06, "loss": 0.3511, "step": 6711 }, { "epoch": 0.33295302346346545, "grad_norm": 9.600279808044434, "learning_rate": 7.596384516011074e-06, "loss": 0.2565, "step": 6712 }, { "epoch": 0.3330026290986656, "grad_norm": 9.567112922668457, "learning_rate": 7.5957118316210134e-06, "loss": 0.3483, "step": 6713 }, { "epoch": 0.33305223473386575, "grad_norm": 9.805038452148438, "learning_rate": 7.595039082908039e-06, "loss": 0.3736, "step": 6714 }, { "epoch": 0.3331018403690659, "grad_norm": 10.069108009338379, "learning_rate": 7.594366269888822e-06, "loss": 0.4287, "step": 6715 }, { "epoch": 0.3331514460042661, "grad_norm": 3.944645404815674, "learning_rate": 7.593693392580037e-06, "loss": 0.3168, "step": 6716 }, { "epoch": 0.33320105163946623, "grad_norm": 7.527709007263184, "learning_rate": 7.5930204509983565e-06, "loss": 0.3089, "step": 6717 }, { "epoch": 0.3332506572746664, "grad_norm": 5.554685592651367, "learning_rate": 7.592347445160454e-06, "loss": 0.3201, "step": 6718 }, { "epoch": 0.3333002629098666, "grad_norm": 5.614992618560791, "learning_rate": 7.5916743750830105e-06, "loss": 0.2688, "step": 6719 }, { "epoch": 0.3333498685450667, "grad_norm": 9.527338027954102, "learning_rate": 7.591001240782703e-06, "loss": 0.2771, "step": 6720 }, { "epoch": 0.3333994741802669, "grad_norm": 8.224875450134277, "learning_rate": 7.590328042276214e-06, "loss": 0.2976, "step": 6721 }, { "epoch": 0.33344907981546706, "grad_norm": 6.910152435302734, "learning_rate": 7.589654779580224e-06, "loss": 0.3321, "step": 6722 }, { "epoch": 0.3334986854506672, "grad_norm": 5.564257621765137, "learning_rate": 7.588981452711417e-06, "loss": 0.3195, "step": 6723 }, { "epoch": 0.33354829108586737, "grad_norm": 14.333439826965332, "learning_rate": 7.588308061686478e-06, "loss": 0.4065, "step": 6724 }, { "epoch": 0.3335978967210675, "grad_norm": 5.800236225128174, "learning_rate": 7.587634606522094e-06, "loss": 0.3247, "step": 6725 }, { "epoch": 0.33364750235626767, "grad_norm": 6.48653507232666, "learning_rate": 7.5869610872349554e-06, "loss": 0.276, "step": 6726 }, { "epoch": 0.33369710799146785, "grad_norm": 12.444733619689941, "learning_rate": 7.58628750384175e-06, "loss": 0.2757, "step": 6727 }, { "epoch": 0.33374671362666797, "grad_norm": 7.7176384925842285, "learning_rate": 7.585613856359171e-06, "loss": 0.2282, "step": 6728 }, { "epoch": 0.33379631926186815, "grad_norm": 9.81818962097168, "learning_rate": 7.58494014480391e-06, "loss": 0.4023, "step": 6729 }, { "epoch": 0.3338459248970683, "grad_norm": 11.321444511413574, "learning_rate": 7.584266369192663e-06, "loss": 0.3617, "step": 6730 }, { "epoch": 0.33389553053226845, "grad_norm": 6.204928874969482, "learning_rate": 7.583592529542127e-06, "loss": 0.304, "step": 6731 }, { "epoch": 0.3339451361674686, "grad_norm": 12.745118141174316, "learning_rate": 7.582918625869e-06, "loss": 0.3313, "step": 6732 }, { "epoch": 0.3339947418026688, "grad_norm": 9.972746849060059, "learning_rate": 7.58224465818998e-06, "loss": 0.3157, "step": 6733 }, { "epoch": 0.3340443474378689, "grad_norm": 8.448522567749023, "learning_rate": 7.581570626521769e-06, "loss": 0.4248, "step": 6734 }, { "epoch": 0.3340939530730691, "grad_norm": 5.871203899383545, "learning_rate": 7.580896530881072e-06, "loss": 0.2189, "step": 6735 }, { "epoch": 0.3341435587082693, "grad_norm": 13.840660095214844, "learning_rate": 7.580222371284589e-06, "loss": 0.4463, "step": 6736 }, { "epoch": 0.3341931643434694, "grad_norm": 8.55363941192627, "learning_rate": 7.57954814774903e-06, "loss": 0.3765, "step": 6737 }, { "epoch": 0.3342427699786696, "grad_norm": 7.106348991394043, "learning_rate": 7.5788738602911015e-06, "loss": 0.2757, "step": 6738 }, { "epoch": 0.33429237561386976, "grad_norm": 4.152445316314697, "learning_rate": 7.5781995089275104e-06, "loss": 0.2758, "step": 6739 }, { "epoch": 0.3343419812490699, "grad_norm": 6.261514186859131, "learning_rate": 7.577525093674971e-06, "loss": 0.326, "step": 6740 }, { "epoch": 0.33439158688427006, "grad_norm": 5.262006759643555, "learning_rate": 7.576850614550193e-06, "loss": 0.2429, "step": 6741 }, { "epoch": 0.3344411925194702, "grad_norm": 5.384947299957275, "learning_rate": 7.576176071569892e-06, "loss": 0.1961, "step": 6742 }, { "epoch": 0.33449079815467037, "grad_norm": 6.296934604644775, "learning_rate": 7.575501464750781e-06, "loss": 0.2878, "step": 6743 }, { "epoch": 0.33454040378987054, "grad_norm": 12.702404022216797, "learning_rate": 7.57482679410958e-06, "loss": 0.363, "step": 6744 }, { "epoch": 0.33459000942507067, "grad_norm": 9.350852012634277, "learning_rate": 7.574152059663006e-06, "loss": 0.2904, "step": 6745 }, { "epoch": 0.33463961506027085, "grad_norm": 6.642719268798828, "learning_rate": 7.57347726142778e-06, "loss": 0.2741, "step": 6746 }, { "epoch": 0.334689220695471, "grad_norm": 8.905227661132812, "learning_rate": 7.572802399420621e-06, "loss": 0.3998, "step": 6747 }, { "epoch": 0.33473882633067115, "grad_norm": 9.085999488830566, "learning_rate": 7.572127473658257e-06, "loss": 0.3494, "step": 6748 }, { "epoch": 0.3347884319658713, "grad_norm": 5.39510440826416, "learning_rate": 7.571452484157411e-06, "loss": 0.2903, "step": 6749 }, { "epoch": 0.3348380376010715, "grad_norm": 7.810582160949707, "learning_rate": 7.5707774309348085e-06, "loss": 0.3441, "step": 6750 }, { "epoch": 0.3348876432362716, "grad_norm": 4.337714195251465, "learning_rate": 7.570102314007177e-06, "loss": 0.2196, "step": 6751 }, { "epoch": 0.3349372488714718, "grad_norm": 11.693799018859863, "learning_rate": 7.5694271333912496e-06, "loss": 0.3425, "step": 6752 }, { "epoch": 0.334986854506672, "grad_norm": 5.036044120788574, "learning_rate": 7.5687518891037545e-06, "loss": 0.3122, "step": 6753 }, { "epoch": 0.3350364601418721, "grad_norm": 9.176881790161133, "learning_rate": 7.568076581161425e-06, "loss": 0.3919, "step": 6754 }, { "epoch": 0.3350860657770723, "grad_norm": 3.798582077026367, "learning_rate": 7.567401209580998e-06, "loss": 0.2213, "step": 6755 }, { "epoch": 0.3351356714122724, "grad_norm": 6.313309192657471, "learning_rate": 7.566725774379207e-06, "loss": 0.2708, "step": 6756 }, { "epoch": 0.3351852770474726, "grad_norm": 5.196157932281494, "learning_rate": 7.5660502755727895e-06, "loss": 0.344, "step": 6757 }, { "epoch": 0.33523488268267276, "grad_norm": 6.578492641448975, "learning_rate": 7.5653747131784864e-06, "loss": 0.3592, "step": 6758 }, { "epoch": 0.3352844883178729, "grad_norm": 5.063149929046631, "learning_rate": 7.564699087213037e-06, "loss": 0.2151, "step": 6759 }, { "epoch": 0.33533409395307306, "grad_norm": 6.909139633178711, "learning_rate": 7.564023397693185e-06, "loss": 0.2261, "step": 6760 }, { "epoch": 0.33538369958827324, "grad_norm": 10.348678588867188, "learning_rate": 7.563347644635672e-06, "loss": 0.3461, "step": 6761 }, { "epoch": 0.33543330522347337, "grad_norm": 4.414678573608398, "learning_rate": 7.562671828057245e-06, "loss": 0.2583, "step": 6762 }, { "epoch": 0.33548291085867354, "grad_norm": 8.627908706665039, "learning_rate": 7.561995947974652e-06, "loss": 0.2678, "step": 6763 }, { "epoch": 0.3355325164938737, "grad_norm": 7.582820415496826, "learning_rate": 7.561320004404639e-06, "loss": 0.3448, "step": 6764 }, { "epoch": 0.33558212212907385, "grad_norm": 7.471231460571289, "learning_rate": 7.5606439973639585e-06, "loss": 0.3234, "step": 6765 }, { "epoch": 0.335631727764274, "grad_norm": 8.626811981201172, "learning_rate": 7.55996792686936e-06, "loss": 0.3057, "step": 6766 }, { "epoch": 0.3356813333994742, "grad_norm": 6.619399547576904, "learning_rate": 7.5592917929376e-06, "loss": 0.2347, "step": 6767 }, { "epoch": 0.3357309390346743, "grad_norm": 8.561199188232422, "learning_rate": 7.558615595585431e-06, "loss": 0.3923, "step": 6768 }, { "epoch": 0.3357805446698745, "grad_norm": 6.049768447875977, "learning_rate": 7.55793933482961e-06, "loss": 0.2986, "step": 6769 }, { "epoch": 0.3358301503050747, "grad_norm": 8.826526641845703, "learning_rate": 7.557263010686897e-06, "loss": 0.4192, "step": 6770 }, { "epoch": 0.3358797559402748, "grad_norm": 6.7046027183532715, "learning_rate": 7.556586623174047e-06, "loss": 0.2699, "step": 6771 }, { "epoch": 0.335929361575475, "grad_norm": 5.7317914962768555, "learning_rate": 7.555910172307825e-06, "loss": 0.261, "step": 6772 }, { "epoch": 0.3359789672106751, "grad_norm": 4.717203617095947, "learning_rate": 7.555233658104995e-06, "loss": 0.3419, "step": 6773 }, { "epoch": 0.3360285728458753, "grad_norm": 17.43836212158203, "learning_rate": 7.554557080582316e-06, "loss": 0.3514, "step": 6774 }, { "epoch": 0.33607817848107546, "grad_norm": 7.052708625793457, "learning_rate": 7.553880439756559e-06, "loss": 0.2238, "step": 6775 }, { "epoch": 0.3361277841162756, "grad_norm": 6.904913902282715, "learning_rate": 7.5532037356444875e-06, "loss": 0.4112, "step": 6776 }, { "epoch": 0.33617738975147576, "grad_norm": 7.525966644287109, "learning_rate": 7.552526968262875e-06, "loss": 0.3219, "step": 6777 }, { "epoch": 0.33622699538667594, "grad_norm": 4.972947120666504, "learning_rate": 7.551850137628487e-06, "loss": 0.3215, "step": 6778 }, { "epoch": 0.33627660102187606, "grad_norm": 12.128397941589355, "learning_rate": 7.551173243758101e-06, "loss": 0.2884, "step": 6779 }, { "epoch": 0.33632620665707624, "grad_norm": 8.969162940979004, "learning_rate": 7.550496286668485e-06, "loss": 0.3928, "step": 6780 }, { "epoch": 0.3363758122922764, "grad_norm": 4.319933891296387, "learning_rate": 7.549819266376419e-06, "loss": 0.3089, "step": 6781 }, { "epoch": 0.33642541792747654, "grad_norm": 8.399842262268066, "learning_rate": 7.5491421828986784e-06, "loss": 0.2893, "step": 6782 }, { "epoch": 0.3364750235626767, "grad_norm": 10.0790433883667, "learning_rate": 7.548465036252042e-06, "loss": 0.4668, "step": 6783 }, { "epoch": 0.3365246291978769, "grad_norm": 7.633548259735107, "learning_rate": 7.547787826453288e-06, "loss": 0.3279, "step": 6784 }, { "epoch": 0.336574234833077, "grad_norm": 8.602336883544922, "learning_rate": 7.547110553519199e-06, "loss": 0.2909, "step": 6785 }, { "epoch": 0.3366238404682772, "grad_norm": 5.499806880950928, "learning_rate": 7.546433217466559e-06, "loss": 0.2699, "step": 6786 }, { "epoch": 0.3366734461034774, "grad_norm": 9.647639274597168, "learning_rate": 7.545755818312151e-06, "loss": 0.4614, "step": 6787 }, { "epoch": 0.3367230517386775, "grad_norm": 4.969811916351318, "learning_rate": 7.545078356072763e-06, "loss": 0.1723, "step": 6788 }, { "epoch": 0.3367726573738777, "grad_norm": 16.719392776489258, "learning_rate": 7.5444008307651824e-06, "loss": 0.355, "step": 6789 }, { "epoch": 0.3368222630090778, "grad_norm": 9.739411354064941, "learning_rate": 7.543723242406198e-06, "loss": 0.2681, "step": 6790 }, { "epoch": 0.336871868644278, "grad_norm": 18.475582122802734, "learning_rate": 7.543045591012601e-06, "loss": 0.3808, "step": 6791 }, { "epoch": 0.33692147427947816, "grad_norm": 8.209003448486328, "learning_rate": 7.542367876601184e-06, "loss": 0.2233, "step": 6792 }, { "epoch": 0.3369710799146783, "grad_norm": 5.488264083862305, "learning_rate": 7.541690099188741e-06, "loss": 0.2614, "step": 6793 }, { "epoch": 0.33702068554987846, "grad_norm": 7.977147102355957, "learning_rate": 7.541012258792066e-06, "loss": 0.2136, "step": 6794 }, { "epoch": 0.33707029118507864, "grad_norm": 5.8829779624938965, "learning_rate": 7.54033435542796e-06, "loss": 0.2536, "step": 6795 }, { "epoch": 0.33711989682027876, "grad_norm": 11.906428337097168, "learning_rate": 7.539656389113219e-06, "loss": 0.3879, "step": 6796 }, { "epoch": 0.33716950245547894, "grad_norm": 5.590389251708984, "learning_rate": 7.538978359864644e-06, "loss": 0.2617, "step": 6797 }, { "epoch": 0.3372191080906791, "grad_norm": 4.835536956787109, "learning_rate": 7.538300267699036e-06, "loss": 0.2306, "step": 6798 }, { "epoch": 0.33726871372587924, "grad_norm": 6.619049072265625, "learning_rate": 7.5376221126332005e-06, "loss": 0.2967, "step": 6799 }, { "epoch": 0.3373183193610794, "grad_norm": 7.236058712005615, "learning_rate": 7.536943894683939e-06, "loss": 0.3071, "step": 6800 }, { "epoch": 0.3373679249962796, "grad_norm": 11.628028869628906, "learning_rate": 7.536265613868061e-06, "loss": 0.433, "step": 6801 }, { "epoch": 0.3374175306314797, "grad_norm": 15.61121654510498, "learning_rate": 7.535587270202376e-06, "loss": 0.5146, "step": 6802 }, { "epoch": 0.3374671362666799, "grad_norm": 5.991674900054932, "learning_rate": 7.53490886370369e-06, "loss": 0.3111, "step": 6803 }, { "epoch": 0.3375167419018801, "grad_norm": 7.569841384887695, "learning_rate": 7.534230394388813e-06, "loss": 0.278, "step": 6804 }, { "epoch": 0.3375663475370802, "grad_norm": 8.246238708496094, "learning_rate": 7.533551862274564e-06, "loss": 0.3974, "step": 6805 }, { "epoch": 0.3376159531722804, "grad_norm": 16.352306365966797, "learning_rate": 7.532873267377753e-06, "loss": 0.3421, "step": 6806 }, { "epoch": 0.3376655588074805, "grad_norm": 5.721248626708984, "learning_rate": 7.5321946097151955e-06, "loss": 0.2582, "step": 6807 }, { "epoch": 0.3377151644426807, "grad_norm": 6.00046968460083, "learning_rate": 7.5315158893037114e-06, "loss": 0.1621, "step": 6808 }, { "epoch": 0.33776477007788086, "grad_norm": 5.936038970947266, "learning_rate": 7.5308371061601185e-06, "loss": 0.4018, "step": 6809 }, { "epoch": 0.337814375713081, "grad_norm": 8.217931747436523, "learning_rate": 7.530158260301237e-06, "loss": 0.4218, "step": 6810 }, { "epoch": 0.33786398134828116, "grad_norm": 6.26723051071167, "learning_rate": 7.529479351743888e-06, "loss": 0.2922, "step": 6811 }, { "epoch": 0.33791358698348134, "grad_norm": 8.381196022033691, "learning_rate": 7.528800380504897e-06, "loss": 0.2322, "step": 6812 }, { "epoch": 0.33796319261868146, "grad_norm": 6.102658271789551, "learning_rate": 7.528121346601089e-06, "loss": 0.2374, "step": 6813 }, { "epoch": 0.33801279825388164, "grad_norm": 5.336745738983154, "learning_rate": 7.527442250049291e-06, "loss": 0.2588, "step": 6814 }, { "epoch": 0.3380624038890818, "grad_norm": 10.598607063293457, "learning_rate": 7.52676309086633e-06, "loss": 0.3154, "step": 6815 }, { "epoch": 0.33811200952428194, "grad_norm": 5.184597969055176, "learning_rate": 7.526083869069039e-06, "loss": 0.335, "step": 6816 }, { "epoch": 0.3381616151594821, "grad_norm": 6.287495136260986, "learning_rate": 7.525404584674244e-06, "loss": 0.2845, "step": 6817 }, { "epoch": 0.3382112207946823, "grad_norm": 13.400396347045898, "learning_rate": 7.524725237698782e-06, "loss": 0.3304, "step": 6818 }, { "epoch": 0.3382608264298824, "grad_norm": 3.9853830337524414, "learning_rate": 7.524045828159487e-06, "loss": 0.1909, "step": 6819 }, { "epoch": 0.3383104320650826, "grad_norm": 5.323287010192871, "learning_rate": 7.523366356073194e-06, "loss": 0.2252, "step": 6820 }, { "epoch": 0.3383600377002828, "grad_norm": 9.294404029846191, "learning_rate": 7.522686821456742e-06, "loss": 0.4577, "step": 6821 }, { "epoch": 0.3384096433354829, "grad_norm": 10.274779319763184, "learning_rate": 7.522007224326968e-06, "loss": 0.3054, "step": 6822 }, { "epoch": 0.3384592489706831, "grad_norm": 12.877670288085938, "learning_rate": 7.5213275647007156e-06, "loss": 0.269, "step": 6823 }, { "epoch": 0.3385088546058832, "grad_norm": 10.897645950317383, "learning_rate": 7.520647842594826e-06, "loss": 0.234, "step": 6824 }, { "epoch": 0.3385584602410834, "grad_norm": 6.876753807067871, "learning_rate": 7.519968058026141e-06, "loss": 0.3114, "step": 6825 }, { "epoch": 0.33860806587628356, "grad_norm": 5.482268333435059, "learning_rate": 7.5192882110115085e-06, "loss": 0.2164, "step": 6826 }, { "epoch": 0.3386576715114837, "grad_norm": 4.043100357055664, "learning_rate": 7.518608301567774e-06, "loss": 0.2755, "step": 6827 }, { "epoch": 0.33870727714668386, "grad_norm": 10.072563171386719, "learning_rate": 7.517928329711787e-06, "loss": 0.3533, "step": 6828 }, { "epoch": 0.33875688278188404, "grad_norm": 7.919368743896484, "learning_rate": 7.517248295460397e-06, "loss": 0.3139, "step": 6829 }, { "epoch": 0.33880648841708416, "grad_norm": 6.281004905700684, "learning_rate": 7.516568198830455e-06, "loss": 0.2598, "step": 6830 }, { "epoch": 0.33885609405228434, "grad_norm": 6.1587724685668945, "learning_rate": 7.515888039838815e-06, "loss": 0.256, "step": 6831 }, { "epoch": 0.3389056996874845, "grad_norm": 4.5650715827941895, "learning_rate": 7.515207818502331e-06, "loss": 0.2616, "step": 6832 }, { "epoch": 0.33895530532268464, "grad_norm": 11.170530319213867, "learning_rate": 7.514527534837861e-06, "loss": 0.2671, "step": 6833 }, { "epoch": 0.3390049109578848, "grad_norm": 5.936452865600586, "learning_rate": 7.51384718886226e-06, "loss": 0.38, "step": 6834 }, { "epoch": 0.339054516593085, "grad_norm": 9.998977661132812, "learning_rate": 7.513166780592389e-06, "loss": 0.3567, "step": 6835 }, { "epoch": 0.3391041222282851, "grad_norm": 12.514314651489258, "learning_rate": 7.512486310045108e-06, "loss": 0.3588, "step": 6836 }, { "epoch": 0.3391537278634853, "grad_norm": 6.937585830688477, "learning_rate": 7.51180577723728e-06, "loss": 0.3396, "step": 6837 }, { "epoch": 0.3392033334986855, "grad_norm": 6.06933069229126, "learning_rate": 7.511125182185768e-06, "loss": 0.3323, "step": 6838 }, { "epoch": 0.3392529391338856, "grad_norm": 8.129271507263184, "learning_rate": 7.5104445249074385e-06, "loss": 0.3114, "step": 6839 }, { "epoch": 0.3393025447690858, "grad_norm": 8.92104721069336, "learning_rate": 7.509763805419159e-06, "loss": 0.3083, "step": 6840 }, { "epoch": 0.3393521504042859, "grad_norm": 10.137476921081543, "learning_rate": 7.509083023737795e-06, "loss": 0.4637, "step": 6841 }, { "epoch": 0.3394017560394861, "grad_norm": 6.653792381286621, "learning_rate": 7.50840217988022e-06, "loss": 0.4155, "step": 6842 }, { "epoch": 0.33945136167468626, "grad_norm": 5.498740196228027, "learning_rate": 7.507721273863304e-06, "loss": 0.2743, "step": 6843 }, { "epoch": 0.3395009673098864, "grad_norm": 9.89418888092041, "learning_rate": 7.507040305703921e-06, "loss": 0.416, "step": 6844 }, { "epoch": 0.33955057294508656, "grad_norm": 6.519068241119385, "learning_rate": 7.5063592754189455e-06, "loss": 0.3403, "step": 6845 }, { "epoch": 0.33960017858028674, "grad_norm": 9.888566970825195, "learning_rate": 7.505678183025254e-06, "loss": 0.3635, "step": 6846 }, { "epoch": 0.33964978421548686, "grad_norm": 5.6001176834106445, "learning_rate": 7.504997028539721e-06, "loss": 0.307, "step": 6847 }, { "epoch": 0.33969938985068704, "grad_norm": 10.134345054626465, "learning_rate": 7.50431581197923e-06, "loss": 0.339, "step": 6848 }, { "epoch": 0.3397489954858872, "grad_norm": 5.974014759063721, "learning_rate": 7.50363453336066e-06, "loss": 0.2742, "step": 6849 }, { "epoch": 0.33979860112108734, "grad_norm": 3.875861883163452, "learning_rate": 7.502953192700895e-06, "loss": 0.3038, "step": 6850 }, { "epoch": 0.3398482067562875, "grad_norm": 4.202265739440918, "learning_rate": 7.502271790016815e-06, "loss": 0.3049, "step": 6851 }, { "epoch": 0.3398978123914877, "grad_norm": 8.302652359008789, "learning_rate": 7.50159032532531e-06, "loss": 0.3703, "step": 6852 }, { "epoch": 0.3399474180266878, "grad_norm": 4.512240409851074, "learning_rate": 7.500908798643266e-06, "loss": 0.2727, "step": 6853 }, { "epoch": 0.339997023661888, "grad_norm": 6.626575946807861, "learning_rate": 7.50022720998757e-06, "loss": 0.2741, "step": 6854 }, { "epoch": 0.3400466292970882, "grad_norm": 8.300806999206543, "learning_rate": 7.4995455593751125e-06, "loss": 0.3441, "step": 6855 }, { "epoch": 0.3400962349322883, "grad_norm": 9.378729820251465, "learning_rate": 7.498863846822784e-06, "loss": 0.4307, "step": 6856 }, { "epoch": 0.3401458405674885, "grad_norm": 12.444568634033203, "learning_rate": 7.4981820723474815e-06, "loss": 0.3961, "step": 6857 }, { "epoch": 0.3401954462026886, "grad_norm": 17.28133773803711, "learning_rate": 7.497500235966095e-06, "loss": 0.5085, "step": 6858 }, { "epoch": 0.3402450518378888, "grad_norm": 11.92574405670166, "learning_rate": 7.496818337695524e-06, "loss": 0.2899, "step": 6859 }, { "epoch": 0.34029465747308896, "grad_norm": 5.178873538970947, "learning_rate": 7.496136377552666e-06, "loss": 0.2863, "step": 6860 }, { "epoch": 0.3403442631082891, "grad_norm": 61.260738372802734, "learning_rate": 7.495454355554419e-06, "loss": 0.3991, "step": 6861 }, { "epoch": 0.34039386874348926, "grad_norm": 10.457250595092773, "learning_rate": 7.494772271717683e-06, "loss": 0.4363, "step": 6862 }, { "epoch": 0.34044347437868944, "grad_norm": 10.669915199279785, "learning_rate": 7.494090126059363e-06, "loss": 0.3772, "step": 6863 }, { "epoch": 0.34049308001388956, "grad_norm": 5.912572383880615, "learning_rate": 7.4934079185963606e-06, "loss": 0.2951, "step": 6864 }, { "epoch": 0.34054268564908974, "grad_norm": 8.47039794921875, "learning_rate": 7.492725649345582e-06, "loss": 0.3784, "step": 6865 }, { "epoch": 0.3405922912842899, "grad_norm": 9.315797805786133, "learning_rate": 7.492043318323934e-06, "loss": 0.2937, "step": 6866 }, { "epoch": 0.34064189691949004, "grad_norm": 11.98012924194336, "learning_rate": 7.491360925548326e-06, "loss": 0.4616, "step": 6867 }, { "epoch": 0.3406915025546902, "grad_norm": 11.974418640136719, "learning_rate": 7.490678471035668e-06, "loss": 0.3393, "step": 6868 }, { "epoch": 0.3407411081898904, "grad_norm": 6.712147235870361, "learning_rate": 7.489995954802869e-06, "loss": 0.3426, "step": 6869 }, { "epoch": 0.3407907138250905, "grad_norm": 8.187787055969238, "learning_rate": 7.489313376866846e-06, "loss": 0.3439, "step": 6870 }, { "epoch": 0.3408403194602907, "grad_norm": 6.492562294006348, "learning_rate": 7.48863073724451e-06, "loss": 0.2722, "step": 6871 }, { "epoch": 0.3408899250954909, "grad_norm": 9.524808883666992, "learning_rate": 7.487948035952779e-06, "loss": 0.3074, "step": 6872 }, { "epoch": 0.340939530730691, "grad_norm": 6.392839431762695, "learning_rate": 7.48726527300857e-06, "loss": 0.2493, "step": 6873 }, { "epoch": 0.3409891363658912, "grad_norm": 4.618629455566406, "learning_rate": 7.486582448428803e-06, "loss": 0.3061, "step": 6874 }, { "epoch": 0.3410387420010913, "grad_norm": 8.172751426696777, "learning_rate": 7.485899562230398e-06, "loss": 0.4032, "step": 6875 }, { "epoch": 0.3410883476362915, "grad_norm": 5.330697536468506, "learning_rate": 7.485216614430276e-06, "loss": 0.2689, "step": 6876 }, { "epoch": 0.34113795327149166, "grad_norm": 8.618717193603516, "learning_rate": 7.4845336050453645e-06, "loss": 0.405, "step": 6877 }, { "epoch": 0.3411875589066918, "grad_norm": 9.2623929977417, "learning_rate": 7.483850534092585e-06, "loss": 0.4297, "step": 6878 }, { "epoch": 0.34123716454189196, "grad_norm": 9.240473747253418, "learning_rate": 7.483167401588865e-06, "loss": 0.3067, "step": 6879 }, { "epoch": 0.34128677017709214, "grad_norm": 3.9360318183898926, "learning_rate": 7.482484207551135e-06, "loss": 0.1407, "step": 6880 }, { "epoch": 0.34133637581229226, "grad_norm": 11.431703567504883, "learning_rate": 7.481800951996322e-06, "loss": 0.311, "step": 6881 }, { "epoch": 0.34138598144749244, "grad_norm": 7.706937313079834, "learning_rate": 7.481117634941361e-06, "loss": 0.2422, "step": 6882 }, { "epoch": 0.3414355870826926, "grad_norm": 7.013637065887451, "learning_rate": 7.480434256403179e-06, "loss": 0.3143, "step": 6883 }, { "epoch": 0.34148519271789274, "grad_norm": 11.269091606140137, "learning_rate": 7.4797508163987164e-06, "loss": 0.3732, "step": 6884 }, { "epoch": 0.3415347983530929, "grad_norm": 5.299112319946289, "learning_rate": 7.4790673149449075e-06, "loss": 0.2635, "step": 6885 }, { "epoch": 0.3415844039882931, "grad_norm": 7.188982963562012, "learning_rate": 7.478383752058689e-06, "loss": 0.2814, "step": 6886 }, { "epoch": 0.3416340096234932, "grad_norm": 8.527850151062012, "learning_rate": 7.477700127756999e-06, "loss": 0.3285, "step": 6887 }, { "epoch": 0.3416836152586934, "grad_norm": 7.047873497009277, "learning_rate": 7.477016442056779e-06, "loss": 0.2691, "step": 6888 }, { "epoch": 0.3417332208938935, "grad_norm": 7.979347229003906, "learning_rate": 7.4763326949749715e-06, "loss": 0.3273, "step": 6889 }, { "epoch": 0.3417828265290937, "grad_norm": 5.581180572509766, "learning_rate": 7.47564888652852e-06, "loss": 0.223, "step": 6890 }, { "epoch": 0.3418324321642939, "grad_norm": 6.718724250793457, "learning_rate": 7.474965016734369e-06, "loss": 0.2164, "step": 6891 }, { "epoch": 0.341882037799494, "grad_norm": 15.431909561157227, "learning_rate": 7.474281085609465e-06, "loss": 0.567, "step": 6892 }, { "epoch": 0.3419316434346942, "grad_norm": 5.507948398590088, "learning_rate": 7.473597093170757e-06, "loss": 0.3409, "step": 6893 }, { "epoch": 0.34198124906989436, "grad_norm": 7.004646301269531, "learning_rate": 7.472913039435194e-06, "loss": 0.3433, "step": 6894 }, { "epoch": 0.3420308547050945, "grad_norm": 7.662240982055664, "learning_rate": 7.4722289244197265e-06, "loss": 0.3182, "step": 6895 }, { "epoch": 0.34208046034029466, "grad_norm": 6.721523761749268, "learning_rate": 7.4715447481413085e-06, "loss": 0.2406, "step": 6896 }, { "epoch": 0.34213006597549483, "grad_norm": 6.8508124351501465, "learning_rate": 7.470860510616894e-06, "loss": 0.2981, "step": 6897 }, { "epoch": 0.34217967161069496, "grad_norm": 10.076385498046875, "learning_rate": 7.470176211863438e-06, "loss": 0.3201, "step": 6898 }, { "epoch": 0.34222927724589514, "grad_norm": 5.231705665588379, "learning_rate": 7.469491851897898e-06, "loss": 0.3446, "step": 6899 }, { "epoch": 0.3422788828810953, "grad_norm": 9.449018478393555, "learning_rate": 7.468807430737234e-06, "loss": 0.3299, "step": 6900 }, { "epoch": 0.34232848851629544, "grad_norm": 5.8868303298950195, "learning_rate": 7.468122948398403e-06, "loss": 0.2646, "step": 6901 }, { "epoch": 0.3423780941514956, "grad_norm": 12.262717247009277, "learning_rate": 7.467438404898371e-06, "loss": 0.332, "step": 6902 }, { "epoch": 0.3424276997866958, "grad_norm": 5.1248955726623535, "learning_rate": 7.466753800254096e-06, "loss": 0.2911, "step": 6903 }, { "epoch": 0.3424773054218959, "grad_norm": 13.667881965637207, "learning_rate": 7.46606913448255e-06, "loss": 0.4324, "step": 6904 }, { "epoch": 0.3425269110570961, "grad_norm": 8.709392547607422, "learning_rate": 7.465384407600693e-06, "loss": 0.3076, "step": 6905 }, { "epoch": 0.3425765166922962, "grad_norm": 8.402368545532227, "learning_rate": 7.464699619625495e-06, "loss": 0.3125, "step": 6906 }, { "epoch": 0.3426261223274964, "grad_norm": 7.827890396118164, "learning_rate": 7.4640147705739275e-06, "loss": 0.4059, "step": 6907 }, { "epoch": 0.3426757279626966, "grad_norm": 9.56425666809082, "learning_rate": 7.463329860462958e-06, "loss": 0.2834, "step": 6908 }, { "epoch": 0.3427253335978967, "grad_norm": 4.875293254852295, "learning_rate": 7.462644889309559e-06, "loss": 0.2607, "step": 6909 }, { "epoch": 0.3427749392330969, "grad_norm": 10.414897918701172, "learning_rate": 7.461959857130708e-06, "loss": 0.2599, "step": 6910 }, { "epoch": 0.34282454486829705, "grad_norm": 17.13286781311035, "learning_rate": 7.461274763943377e-06, "loss": 0.4328, "step": 6911 }, { "epoch": 0.3428741505034972, "grad_norm": 11.739418029785156, "learning_rate": 7.460589609764544e-06, "loss": 0.4297, "step": 6912 }, { "epoch": 0.34292375613869736, "grad_norm": 11.635924339294434, "learning_rate": 7.459904394611186e-06, "loss": 0.3416, "step": 6913 }, { "epoch": 0.34297336177389753, "grad_norm": 6.074042320251465, "learning_rate": 7.459219118500287e-06, "loss": 0.3982, "step": 6914 }, { "epoch": 0.34302296740909766, "grad_norm": 7.32991361618042, "learning_rate": 7.458533781448824e-06, "loss": 0.4494, "step": 6915 }, { "epoch": 0.34307257304429783, "grad_norm": 4.738449573516846, "learning_rate": 7.457848383473783e-06, "loss": 0.3224, "step": 6916 }, { "epoch": 0.343122178679498, "grad_norm": 9.019834518432617, "learning_rate": 7.457162924592147e-06, "loss": 0.3712, "step": 6917 }, { "epoch": 0.34317178431469814, "grad_norm": 3.774400234222412, "learning_rate": 7.456477404820901e-06, "loss": 0.2763, "step": 6918 }, { "epoch": 0.3432213899498983, "grad_norm": 17.06139373779297, "learning_rate": 7.455791824177035e-06, "loss": 0.3272, "step": 6919 }, { "epoch": 0.3432709955850985, "grad_norm": 5.326053142547607, "learning_rate": 7.455106182677536e-06, "loss": 0.3061, "step": 6920 }, { "epoch": 0.3433206012202986, "grad_norm": 4.548459529876709, "learning_rate": 7.454420480339396e-06, "loss": 0.2908, "step": 6921 }, { "epoch": 0.3433702068554988, "grad_norm": 6.55258846282959, "learning_rate": 7.453734717179606e-06, "loss": 0.2604, "step": 6922 }, { "epoch": 0.3434198124906989, "grad_norm": 8.23892593383789, "learning_rate": 7.453048893215159e-06, "loss": 0.3686, "step": 6923 }, { "epoch": 0.3434694181258991, "grad_norm": 6.940099716186523, "learning_rate": 7.452363008463053e-06, "loss": 0.3221, "step": 6924 }, { "epoch": 0.3435190237610993, "grad_norm": 6.291128635406494, "learning_rate": 7.451677062940279e-06, "loss": 0.3299, "step": 6925 }, { "epoch": 0.3435686293962994, "grad_norm": 3.929720163345337, "learning_rate": 7.450991056663841e-06, "loss": 0.2972, "step": 6926 }, { "epoch": 0.3436182350314996, "grad_norm": 5.661260604858398, "learning_rate": 7.4503049896507354e-06, "loss": 0.2993, "step": 6927 }, { "epoch": 0.34366784066669975, "grad_norm": 11.41534423828125, "learning_rate": 7.449618861917962e-06, "loss": 0.337, "step": 6928 }, { "epoch": 0.3437174463018999, "grad_norm": 5.46663761138916, "learning_rate": 7.448932673482528e-06, "loss": 0.2888, "step": 6929 }, { "epoch": 0.34376705193710005, "grad_norm": 22.19755744934082, "learning_rate": 7.4482464243614315e-06, "loss": 0.4023, "step": 6930 }, { "epoch": 0.34381665757230023, "grad_norm": 6.585799217224121, "learning_rate": 7.447560114571683e-06, "loss": 0.3627, "step": 6931 }, { "epoch": 0.34386626320750036, "grad_norm": 5.63456392288208, "learning_rate": 7.4468737441302875e-06, "loss": 0.3301, "step": 6932 }, { "epoch": 0.34391586884270053, "grad_norm": 11.443201065063477, "learning_rate": 7.446187313054254e-06, "loss": 0.3214, "step": 6933 }, { "epoch": 0.3439654744779007, "grad_norm": 5.574601650238037, "learning_rate": 7.445500821360592e-06, "loss": 0.2884, "step": 6934 }, { "epoch": 0.34401508011310084, "grad_norm": 9.770489692687988, "learning_rate": 7.444814269066314e-06, "loss": 0.369, "step": 6935 }, { "epoch": 0.344064685748301, "grad_norm": 6.389120101928711, "learning_rate": 7.444127656188431e-06, "loss": 0.3175, "step": 6936 }, { "epoch": 0.3441142913835012, "grad_norm": 8.98788070678711, "learning_rate": 7.4434409827439604e-06, "loss": 0.4326, "step": 6937 }, { "epoch": 0.3441638970187013, "grad_norm": 9.351208686828613, "learning_rate": 7.442754248749916e-06, "loss": 0.3361, "step": 6938 }, { "epoch": 0.3442135026539015, "grad_norm": 5.679034233093262, "learning_rate": 7.442067454223316e-06, "loss": 0.2581, "step": 6939 }, { "epoch": 0.3442631082891016, "grad_norm": 7.753663539886475, "learning_rate": 7.441380599181181e-06, "loss": 0.3084, "step": 6940 }, { "epoch": 0.3443127139243018, "grad_norm": 6.563231945037842, "learning_rate": 7.440693683640531e-06, "loss": 0.2789, "step": 6941 }, { "epoch": 0.344362319559502, "grad_norm": 8.461106300354004, "learning_rate": 7.4400067076183855e-06, "loss": 0.3254, "step": 6942 }, { "epoch": 0.3444119251947021, "grad_norm": 6.442532062530518, "learning_rate": 7.439319671131772e-06, "loss": 0.31, "step": 6943 }, { "epoch": 0.3444615308299023, "grad_norm": 7.818819046020508, "learning_rate": 7.438632574197713e-06, "loss": 0.3427, "step": 6944 }, { "epoch": 0.34451113646510245, "grad_norm": 4.680306434631348, "learning_rate": 7.437945416833235e-06, "loss": 0.2703, "step": 6945 }, { "epoch": 0.3445607421003026, "grad_norm": 7.114738464355469, "learning_rate": 7.437258199055367e-06, "loss": 0.2484, "step": 6946 }, { "epoch": 0.34461034773550275, "grad_norm": 3.843578577041626, "learning_rate": 7.436570920881138e-06, "loss": 0.2199, "step": 6947 }, { "epoch": 0.34465995337070293, "grad_norm": 4.94020938873291, "learning_rate": 7.435883582327581e-06, "loss": 0.2792, "step": 6948 }, { "epoch": 0.34470955900590305, "grad_norm": 4.668866157531738, "learning_rate": 7.4351961834117256e-06, "loss": 0.2593, "step": 6949 }, { "epoch": 0.34475916464110323, "grad_norm": 6.40756368637085, "learning_rate": 7.434508724150607e-06, "loss": 0.2448, "step": 6950 }, { "epoch": 0.3448087702763034, "grad_norm": 8.228119850158691, "learning_rate": 7.433821204561262e-06, "loss": 0.3656, "step": 6951 }, { "epoch": 0.34485837591150353, "grad_norm": 4.571948528289795, "learning_rate": 7.433133624660726e-06, "loss": 0.2682, "step": 6952 }, { "epoch": 0.3449079815467037, "grad_norm": 3.599677324295044, "learning_rate": 7.432445984466039e-06, "loss": 0.2773, "step": 6953 }, { "epoch": 0.3449575871819039, "grad_norm": 9.65471076965332, "learning_rate": 7.431758283994239e-06, "loss": 0.4286, "step": 6954 }, { "epoch": 0.345007192817104, "grad_norm": 6.828722953796387, "learning_rate": 7.431070523262369e-06, "loss": 0.4194, "step": 6955 }, { "epoch": 0.3450567984523042, "grad_norm": 7.554081439971924, "learning_rate": 7.430382702287472e-06, "loss": 0.3232, "step": 6956 }, { "epoch": 0.3451064040875043, "grad_norm": 8.09808349609375, "learning_rate": 7.429694821086593e-06, "loss": 0.2926, "step": 6957 }, { "epoch": 0.3451560097227045, "grad_norm": 4.959543704986572, "learning_rate": 7.429006879676776e-06, "loss": 0.3083, "step": 6958 }, { "epoch": 0.34520561535790467, "grad_norm": 6.85957670211792, "learning_rate": 7.428318878075071e-06, "loss": 0.3013, "step": 6959 }, { "epoch": 0.3452552209931048, "grad_norm": 7.933683395385742, "learning_rate": 7.427630816298523e-06, "loss": 0.4435, "step": 6960 }, { "epoch": 0.345304826628305, "grad_norm": 5.763876914978027, "learning_rate": 7.426942694364189e-06, "loss": 0.2694, "step": 6961 }, { "epoch": 0.34535443226350515, "grad_norm": 13.320265769958496, "learning_rate": 7.426254512289115e-06, "loss": 0.4258, "step": 6962 }, { "epoch": 0.3454040378987053, "grad_norm": 4.006875991821289, "learning_rate": 7.425566270090357e-06, "loss": 0.2335, "step": 6963 }, { "epoch": 0.34545364353390545, "grad_norm": 6.716362953186035, "learning_rate": 7.424877967784969e-06, "loss": 0.3223, "step": 6964 }, { "epoch": 0.34550324916910563, "grad_norm": 6.180950164794922, "learning_rate": 7.42418960539001e-06, "loss": 0.2394, "step": 6965 }, { "epoch": 0.34555285480430575, "grad_norm": 11.385859489440918, "learning_rate": 7.4235011829225345e-06, "loss": 0.2843, "step": 6966 }, { "epoch": 0.34560246043950593, "grad_norm": 6.053246974945068, "learning_rate": 7.422812700399603e-06, "loss": 0.337, "step": 6967 }, { "epoch": 0.3456520660747061, "grad_norm": 5.941585540771484, "learning_rate": 7.422124157838279e-06, "loss": 0.3294, "step": 6968 }, { "epoch": 0.34570167170990623, "grad_norm": 9.7907133102417, "learning_rate": 7.42143555525562e-06, "loss": 0.4015, "step": 6969 }, { "epoch": 0.3457512773451064, "grad_norm": 7.745720863342285, "learning_rate": 7.420746892668693e-06, "loss": 0.3184, "step": 6970 }, { "epoch": 0.3458008829803066, "grad_norm": 17.027851104736328, "learning_rate": 7.420058170094565e-06, "loss": 0.4449, "step": 6971 }, { "epoch": 0.3458504886155067, "grad_norm": 6.742854595184326, "learning_rate": 7.419369387550299e-06, "loss": 0.2774, "step": 6972 }, { "epoch": 0.3459000942507069, "grad_norm": 14.219571113586426, "learning_rate": 7.4186805450529665e-06, "loss": 0.4416, "step": 6973 }, { "epoch": 0.345949699885907, "grad_norm": 4.722236633300781, "learning_rate": 7.417991642619635e-06, "loss": 0.2569, "step": 6974 }, { "epoch": 0.3459993055211072, "grad_norm": 9.01919937133789, "learning_rate": 7.4173026802673775e-06, "loss": 0.444, "step": 6975 }, { "epoch": 0.34604891115630737, "grad_norm": 10.53837776184082, "learning_rate": 7.416613658013265e-06, "loss": 0.3689, "step": 6976 }, { "epoch": 0.3460985167915075, "grad_norm": 6.5788421630859375, "learning_rate": 7.415924575874374e-06, "loss": 0.3176, "step": 6977 }, { "epoch": 0.34614812242670767, "grad_norm": 37.70882797241211, "learning_rate": 7.41523543386778e-06, "loss": 0.3774, "step": 6978 }, { "epoch": 0.34619772806190785, "grad_norm": 5.4278244972229, "learning_rate": 7.414546232010557e-06, "loss": 0.2922, "step": 6979 }, { "epoch": 0.346247333697108, "grad_norm": 6.472194671630859, "learning_rate": 7.413856970319789e-06, "loss": 0.3381, "step": 6980 }, { "epoch": 0.34629693933230815, "grad_norm": 5.20059871673584, "learning_rate": 7.41316764881255e-06, "loss": 0.2959, "step": 6981 }, { "epoch": 0.34634654496750833, "grad_norm": 8.406448364257812, "learning_rate": 7.412478267505928e-06, "loss": 0.2193, "step": 6982 }, { "epoch": 0.34639615060270845, "grad_norm": 7.2521867752075195, "learning_rate": 7.411788826417001e-06, "loss": 0.4009, "step": 6983 }, { "epoch": 0.34644575623790863, "grad_norm": 5.416424751281738, "learning_rate": 7.411099325562857e-06, "loss": 0.3291, "step": 6984 }, { "epoch": 0.3464953618731088, "grad_norm": 9.519176483154297, "learning_rate": 7.4104097649605825e-06, "loss": 0.2358, "step": 6985 }, { "epoch": 0.34654496750830893, "grad_norm": 5.942306995391846, "learning_rate": 7.4097201446272616e-06, "loss": 0.2476, "step": 6986 }, { "epoch": 0.3465945731435091, "grad_norm": 6.059419631958008, "learning_rate": 7.409030464579986e-06, "loss": 0.2899, "step": 6987 }, { "epoch": 0.3466441787787093, "grad_norm": 6.546401023864746, "learning_rate": 7.408340724835845e-06, "loss": 0.3188, "step": 6988 }, { "epoch": 0.3466937844139094, "grad_norm": 4.312063694000244, "learning_rate": 7.407650925411933e-06, "loss": 0.2598, "step": 6989 }, { "epoch": 0.3467433900491096, "grad_norm": 6.954490661621094, "learning_rate": 7.406961066325342e-06, "loss": 0.2942, "step": 6990 }, { "epoch": 0.3467929956843097, "grad_norm": 12.651052474975586, "learning_rate": 7.4062711475931655e-06, "loss": 0.4614, "step": 6991 }, { "epoch": 0.3468426013195099, "grad_norm": 4.270594120025635, "learning_rate": 7.405581169232502e-06, "loss": 0.265, "step": 6992 }, { "epoch": 0.34689220695471007, "grad_norm": 5.8493332862854, "learning_rate": 7.404891131260448e-06, "loss": 0.3398, "step": 6993 }, { "epoch": 0.3469418125899102, "grad_norm": 5.64643669128418, "learning_rate": 7.404201033694107e-06, "loss": 0.3141, "step": 6994 }, { "epoch": 0.34699141822511037, "grad_norm": 9.657113075256348, "learning_rate": 7.403510876550574e-06, "loss": 0.2857, "step": 6995 }, { "epoch": 0.34704102386031055, "grad_norm": 6.65513801574707, "learning_rate": 7.402820659846955e-06, "loss": 0.4191, "step": 6996 }, { "epoch": 0.34709062949551067, "grad_norm": 6.70595121383667, "learning_rate": 7.4021303836003545e-06, "loss": 0.3449, "step": 6997 }, { "epoch": 0.34714023513071085, "grad_norm": 7.046650409698486, "learning_rate": 7.401440047827875e-06, "loss": 0.2987, "step": 6998 }, { "epoch": 0.34718984076591103, "grad_norm": 8.669910430908203, "learning_rate": 7.4007496525466245e-06, "loss": 0.3928, "step": 6999 }, { "epoch": 0.34723944640111115, "grad_norm": 25.866153717041016, "learning_rate": 7.400059197773713e-06, "loss": 0.3626, "step": 7000 }, { "epoch": 0.34728905203631133, "grad_norm": 8.039042472839355, "learning_rate": 7.399368683526248e-06, "loss": 0.343, "step": 7001 }, { "epoch": 0.3473386576715115, "grad_norm": 6.777929306030273, "learning_rate": 7.3986781098213425e-06, "loss": 0.2556, "step": 7002 }, { "epoch": 0.34738826330671163, "grad_norm": 5.220606803894043, "learning_rate": 7.397987476676108e-06, "loss": 0.2351, "step": 7003 }, { "epoch": 0.3474378689419118, "grad_norm": 8.96528434753418, "learning_rate": 7.397296784107658e-06, "loss": 0.3835, "step": 7004 }, { "epoch": 0.34748747457711193, "grad_norm": 3.536937713623047, "learning_rate": 7.3966060321331116e-06, "loss": 0.2605, "step": 7005 }, { "epoch": 0.3475370802123121, "grad_norm": 4.552877426147461, "learning_rate": 7.3959152207695815e-06, "loss": 0.2906, "step": 7006 }, { "epoch": 0.3475866858475123, "grad_norm": 7.615642070770264, "learning_rate": 7.395224350034189e-06, "loss": 0.3681, "step": 7007 }, { "epoch": 0.3476362914827124, "grad_norm": 4.911966323852539, "learning_rate": 7.394533419944053e-06, "loss": 0.2949, "step": 7008 }, { "epoch": 0.3476858971179126, "grad_norm": 5.291638374328613, "learning_rate": 7.393842430516297e-06, "loss": 0.2994, "step": 7009 }, { "epoch": 0.34773550275311277, "grad_norm": 12.185384750366211, "learning_rate": 7.393151381768042e-06, "loss": 0.4868, "step": 7010 }, { "epoch": 0.3477851083883129, "grad_norm": 15.486794471740723, "learning_rate": 7.392460273716413e-06, "loss": 0.3227, "step": 7011 }, { "epoch": 0.34783471402351307, "grad_norm": 5.874928951263428, "learning_rate": 7.391769106378538e-06, "loss": 0.3191, "step": 7012 }, { "epoch": 0.34788431965871325, "grad_norm": 11.611351013183594, "learning_rate": 7.391077879771541e-06, "loss": 0.3745, "step": 7013 }, { "epoch": 0.34793392529391337, "grad_norm": 16.660991668701172, "learning_rate": 7.390386593912552e-06, "loss": 0.46, "step": 7014 }, { "epoch": 0.34798353092911355, "grad_norm": 4.763960838317871, "learning_rate": 7.389695248818703e-06, "loss": 0.2604, "step": 7015 }, { "epoch": 0.3480331365643137, "grad_norm": 14.191248893737793, "learning_rate": 7.389003844507124e-06, "loss": 0.403, "step": 7016 }, { "epoch": 0.34808274219951385, "grad_norm": 7.037658214569092, "learning_rate": 7.38831238099495e-06, "loss": 0.3118, "step": 7017 }, { "epoch": 0.34813234783471403, "grad_norm": 8.11669635772705, "learning_rate": 7.387620858299314e-06, "loss": 0.2916, "step": 7018 }, { "epoch": 0.3481819534699142, "grad_norm": 6.216121673583984, "learning_rate": 7.386929276437354e-06, "loss": 0.264, "step": 7019 }, { "epoch": 0.34823155910511433, "grad_norm": 3.873971462249756, "learning_rate": 7.386237635426206e-06, "loss": 0.2112, "step": 7020 }, { "epoch": 0.3482811647403145, "grad_norm": 3.694993257522583, "learning_rate": 7.385545935283009e-06, "loss": 0.2312, "step": 7021 }, { "epoch": 0.34833077037551463, "grad_norm": 5.739084720611572, "learning_rate": 7.384854176024907e-06, "loss": 0.38, "step": 7022 }, { "epoch": 0.3483803760107148, "grad_norm": 7.740498065948486, "learning_rate": 7.384162357669039e-06, "loss": 0.331, "step": 7023 }, { "epoch": 0.348429981645915, "grad_norm": 8.112424850463867, "learning_rate": 7.383470480232549e-06, "loss": 0.3703, "step": 7024 }, { "epoch": 0.3484795872811151, "grad_norm": 6.86171817779541, "learning_rate": 7.382778543732582e-06, "loss": 0.3773, "step": 7025 }, { "epoch": 0.3485291929163153, "grad_norm": 8.67487621307373, "learning_rate": 7.3820865481862845e-06, "loss": 0.2917, "step": 7026 }, { "epoch": 0.34857879855151547, "grad_norm": 13.596163749694824, "learning_rate": 7.381394493610806e-06, "loss": 0.3202, "step": 7027 }, { "epoch": 0.3486284041867156, "grad_norm": 6.551291465759277, "learning_rate": 7.3807023800232935e-06, "loss": 0.3004, "step": 7028 }, { "epoch": 0.34867800982191577, "grad_norm": 7.62593936920166, "learning_rate": 7.380010207440901e-06, "loss": 0.3703, "step": 7029 }, { "epoch": 0.34872761545711595, "grad_norm": 6.150789737701416, "learning_rate": 7.379317975880777e-06, "loss": 0.2013, "step": 7030 }, { "epoch": 0.34877722109231607, "grad_norm": 6.951422214508057, "learning_rate": 7.378625685360078e-06, "loss": 0.3392, "step": 7031 }, { "epoch": 0.34882682672751625, "grad_norm": 18.571048736572266, "learning_rate": 7.377933335895959e-06, "loss": 0.4547, "step": 7032 }, { "epoch": 0.3488764323627164, "grad_norm": 8.11876106262207, "learning_rate": 7.3772409275055755e-06, "loss": 0.3102, "step": 7033 }, { "epoch": 0.34892603799791655, "grad_norm": 31.1877384185791, "learning_rate": 7.376548460206087e-06, "loss": 0.4278, "step": 7034 }, { "epoch": 0.3489756436331167, "grad_norm": 4.69012975692749, "learning_rate": 7.375855934014651e-06, "loss": 0.1814, "step": 7035 }, { "epoch": 0.3490252492683169, "grad_norm": 6.129932880401611, "learning_rate": 7.375163348948433e-06, "loss": 0.3283, "step": 7036 }, { "epoch": 0.34907485490351703, "grad_norm": 5.616217136383057, "learning_rate": 7.374470705024591e-06, "loss": 0.325, "step": 7037 }, { "epoch": 0.3491244605387172, "grad_norm": 8.349471092224121, "learning_rate": 7.373778002260291e-06, "loss": 0.2483, "step": 7038 }, { "epoch": 0.34917406617391733, "grad_norm": 9.61002254486084, "learning_rate": 7.3730852406727e-06, "loss": 0.3127, "step": 7039 }, { "epoch": 0.3492236718091175, "grad_norm": 3.968142509460449, "learning_rate": 7.372392420278981e-06, "loss": 0.2231, "step": 7040 }, { "epoch": 0.3492732774443177, "grad_norm": 11.291664123535156, "learning_rate": 7.371699541096304e-06, "loss": 0.29, "step": 7041 }, { "epoch": 0.3493228830795178, "grad_norm": 5.327857971191406, "learning_rate": 7.371006603141841e-06, "loss": 0.3394, "step": 7042 }, { "epoch": 0.349372488714718, "grad_norm": 9.700220108032227, "learning_rate": 7.370313606432763e-06, "loss": 0.3335, "step": 7043 }, { "epoch": 0.34942209434991817, "grad_norm": 5.779268264770508, "learning_rate": 7.369620550986241e-06, "loss": 0.2968, "step": 7044 }, { "epoch": 0.3494716999851183, "grad_norm": 5.208291530609131, "learning_rate": 7.368927436819448e-06, "loss": 0.2, "step": 7045 }, { "epoch": 0.34952130562031847, "grad_norm": 7.477053165435791, "learning_rate": 7.368234263949563e-06, "loss": 0.3775, "step": 7046 }, { "epoch": 0.34957091125551865, "grad_norm": 4.735623359680176, "learning_rate": 7.367541032393763e-06, "loss": 0.2304, "step": 7047 }, { "epoch": 0.34962051689071877, "grad_norm": 14.907615661621094, "learning_rate": 7.366847742169223e-06, "loss": 0.3917, "step": 7048 }, { "epoch": 0.34967012252591895, "grad_norm": 7.801573276519775, "learning_rate": 7.366154393293126e-06, "loss": 0.236, "step": 7049 }, { "epoch": 0.3497197281611191, "grad_norm": 12.433605194091797, "learning_rate": 7.365460985782653e-06, "loss": 0.4645, "step": 7050 }, { "epoch": 0.34976933379631925, "grad_norm": 5.96927547454834, "learning_rate": 7.364767519654986e-06, "loss": 0.2492, "step": 7051 }, { "epoch": 0.3498189394315194, "grad_norm": 9.531065940856934, "learning_rate": 7.364073994927312e-06, "loss": 0.3316, "step": 7052 }, { "epoch": 0.3498685450667196, "grad_norm": 4.09649658203125, "learning_rate": 7.363380411616814e-06, "loss": 0.3294, "step": 7053 }, { "epoch": 0.34991815070191973, "grad_norm": 7.249378204345703, "learning_rate": 7.362686769740679e-06, "loss": 0.3793, "step": 7054 }, { "epoch": 0.3499677563371199, "grad_norm": 7.683093547821045, "learning_rate": 7.361993069316098e-06, "loss": 0.3, "step": 7055 }, { "epoch": 0.35001736197232003, "grad_norm": 4.299739360809326, "learning_rate": 7.361299310360261e-06, "loss": 0.3251, "step": 7056 }, { "epoch": 0.3500669676075202, "grad_norm": 5.033491134643555, "learning_rate": 7.360605492890359e-06, "loss": 0.3577, "step": 7057 }, { "epoch": 0.3501165732427204, "grad_norm": 5.986566543579102, "learning_rate": 7.3599116169235855e-06, "loss": 0.2559, "step": 7058 }, { "epoch": 0.3501661788779205, "grad_norm": 5.447710037231445, "learning_rate": 7.359217682477133e-06, "loss": 0.2653, "step": 7059 }, { "epoch": 0.3502157845131207, "grad_norm": 4.313941955566406, "learning_rate": 7.3585236895682e-06, "loss": 0.287, "step": 7060 }, { "epoch": 0.35026539014832087, "grad_norm": 7.637284278869629, "learning_rate": 7.357829638213983e-06, "loss": 0.3496, "step": 7061 }, { "epoch": 0.350314995783521, "grad_norm": 6.338592052459717, "learning_rate": 7.3571355284316806e-06, "loss": 0.3026, "step": 7062 }, { "epoch": 0.35036460141872117, "grad_norm": 4.291900157928467, "learning_rate": 7.3564413602384945e-06, "loss": 0.3057, "step": 7063 }, { "epoch": 0.35041420705392134, "grad_norm": 6.453249931335449, "learning_rate": 7.355747133651625e-06, "loss": 0.391, "step": 7064 }, { "epoch": 0.35046381268912147, "grad_norm": 10.172966957092285, "learning_rate": 7.355052848688275e-06, "loss": 0.2968, "step": 7065 }, { "epoch": 0.35051341832432165, "grad_norm": 5.759427547454834, "learning_rate": 7.354358505365653e-06, "loss": 0.3779, "step": 7066 }, { "epoch": 0.3505630239595218, "grad_norm": 6.266010284423828, "learning_rate": 7.353664103700959e-06, "loss": 0.2989, "step": 7067 }, { "epoch": 0.35061262959472195, "grad_norm": 4.85037899017334, "learning_rate": 7.352969643711405e-06, "loss": 0.2604, "step": 7068 }, { "epoch": 0.3506622352299221, "grad_norm": 10.832046508789062, "learning_rate": 7.3522751254142e-06, "loss": 0.3918, "step": 7069 }, { "epoch": 0.3507118408651223, "grad_norm": 8.516046524047852, "learning_rate": 7.351580548826551e-06, "loss": 0.4171, "step": 7070 }, { "epoch": 0.3507614465003224, "grad_norm": 8.894085884094238, "learning_rate": 7.350885913965674e-06, "loss": 0.3185, "step": 7071 }, { "epoch": 0.3508110521355226, "grad_norm": 5.729081630706787, "learning_rate": 7.35019122084878e-06, "loss": 0.2894, "step": 7072 }, { "epoch": 0.35086065777072273, "grad_norm": 13.089929580688477, "learning_rate": 7.3494964694930845e-06, "loss": 0.2862, "step": 7073 }, { "epoch": 0.3509102634059229, "grad_norm": 3.9938485622406006, "learning_rate": 7.348801659915805e-06, "loss": 0.1532, "step": 7074 }, { "epoch": 0.3509598690411231, "grad_norm": 10.758288383483887, "learning_rate": 7.348106792134156e-06, "loss": 0.3836, "step": 7075 }, { "epoch": 0.3510094746763232, "grad_norm": 10.991242408752441, "learning_rate": 7.34741186616536e-06, "loss": 0.4476, "step": 7076 }, { "epoch": 0.3510590803115234, "grad_norm": 10.598747253417969, "learning_rate": 7.346716882026635e-06, "loss": 0.332, "step": 7077 }, { "epoch": 0.35110868594672356, "grad_norm": 4.725310802459717, "learning_rate": 7.346021839735205e-06, "loss": 0.334, "step": 7078 }, { "epoch": 0.3511582915819237, "grad_norm": 6.741266250610352, "learning_rate": 7.3453267393082915e-06, "loss": 0.2049, "step": 7079 }, { "epoch": 0.35120789721712387, "grad_norm": 9.22669792175293, "learning_rate": 7.344631580763122e-06, "loss": 0.2925, "step": 7080 }, { "epoch": 0.35125750285232404, "grad_norm": 9.651012420654297, "learning_rate": 7.34393636411692e-06, "loss": 0.2287, "step": 7081 }, { "epoch": 0.35130710848752417, "grad_norm": 10.56411075592041, "learning_rate": 7.343241089386914e-06, "loss": 0.3142, "step": 7082 }, { "epoch": 0.35135671412272435, "grad_norm": 6.44924783706665, "learning_rate": 7.342545756590335e-06, "loss": 0.3348, "step": 7083 }, { "epoch": 0.3514063197579245, "grad_norm": 7.3845672607421875, "learning_rate": 7.341850365744413e-06, "loss": 0.2354, "step": 7084 }, { "epoch": 0.35145592539312465, "grad_norm": 9.089544296264648, "learning_rate": 7.341154916866379e-06, "loss": 0.2085, "step": 7085 }, { "epoch": 0.3515055310283248, "grad_norm": 3.7743310928344727, "learning_rate": 7.340459409973467e-06, "loss": 0.276, "step": 7086 }, { "epoch": 0.351555136663525, "grad_norm": 8.85534954071045, "learning_rate": 7.339763845082912e-06, "loss": 0.4316, "step": 7087 }, { "epoch": 0.3516047422987251, "grad_norm": 9.481888771057129, "learning_rate": 7.339068222211951e-06, "loss": 0.3517, "step": 7088 }, { "epoch": 0.3516543479339253, "grad_norm": 4.769990921020508, "learning_rate": 7.338372541377821e-06, "loss": 0.3322, "step": 7089 }, { "epoch": 0.3517039535691254, "grad_norm": 3.830305814743042, "learning_rate": 7.337676802597762e-06, "loss": 0.2065, "step": 7090 }, { "epoch": 0.3517535592043256, "grad_norm": 8.678956985473633, "learning_rate": 7.336981005889014e-06, "loss": 0.3669, "step": 7091 }, { "epoch": 0.3518031648395258, "grad_norm": 6.747347354888916, "learning_rate": 7.336285151268821e-06, "loss": 0.303, "step": 7092 }, { "epoch": 0.3518527704747259, "grad_norm": 4.1370463371276855, "learning_rate": 7.335589238754423e-06, "loss": 0.296, "step": 7093 }, { "epoch": 0.3519023761099261, "grad_norm": 5.90669584274292, "learning_rate": 7.334893268363068e-06, "loss": 0.364, "step": 7094 }, { "epoch": 0.35195198174512626, "grad_norm": 3.730111837387085, "learning_rate": 7.3341972401120025e-06, "loss": 0.2412, "step": 7095 }, { "epoch": 0.3520015873803264, "grad_norm": 9.569466590881348, "learning_rate": 7.333501154018472e-06, "loss": 0.3332, "step": 7096 }, { "epoch": 0.35205119301552656, "grad_norm": 5.400397777557373, "learning_rate": 7.3328050100997285e-06, "loss": 0.2681, "step": 7097 }, { "epoch": 0.35210079865072674, "grad_norm": 7.209134101867676, "learning_rate": 7.332108808373021e-06, "loss": 0.3104, "step": 7098 }, { "epoch": 0.35215040428592687, "grad_norm": 6.140774726867676, "learning_rate": 7.331412548855601e-06, "loss": 0.3173, "step": 7099 }, { "epoch": 0.35220000992112704, "grad_norm": 5.310923099517822, "learning_rate": 7.330716231564725e-06, "loss": 0.2413, "step": 7100 }, { "epoch": 0.3522496155563272, "grad_norm": 4.703817844390869, "learning_rate": 7.330019856517647e-06, "loss": 0.3106, "step": 7101 }, { "epoch": 0.35229922119152735, "grad_norm": 7.468245983123779, "learning_rate": 7.329323423731622e-06, "loss": 0.3643, "step": 7102 }, { "epoch": 0.3523488268267275, "grad_norm": 5.406594276428223, "learning_rate": 7.328626933223909e-06, "loss": 0.2454, "step": 7103 }, { "epoch": 0.3523984324619277, "grad_norm": 5.526923179626465, "learning_rate": 7.327930385011768e-06, "loss": 0.2664, "step": 7104 }, { "epoch": 0.3524480380971278, "grad_norm": 8.635635375976562, "learning_rate": 7.327233779112457e-06, "loss": 0.3552, "step": 7105 }, { "epoch": 0.352497643732328, "grad_norm": 7.844919204711914, "learning_rate": 7.326537115543243e-06, "loss": 0.3791, "step": 7106 }, { "epoch": 0.3525472493675281, "grad_norm": 12.744024276733398, "learning_rate": 7.3258403943213855e-06, "loss": 0.4181, "step": 7107 }, { "epoch": 0.3525968550027283, "grad_norm": 5.724393367767334, "learning_rate": 7.325143615464151e-06, "loss": 0.3295, "step": 7108 }, { "epoch": 0.3526464606379285, "grad_norm": 5.420614719390869, "learning_rate": 7.324446778988805e-06, "loss": 0.3174, "step": 7109 }, { "epoch": 0.3526960662731286, "grad_norm": 10.188358306884766, "learning_rate": 7.323749884912619e-06, "loss": 0.3024, "step": 7110 }, { "epoch": 0.3527456719083288, "grad_norm": 4.023218154907227, "learning_rate": 7.323052933252857e-06, "loss": 0.2651, "step": 7111 }, { "epoch": 0.35279527754352896, "grad_norm": 8.91612434387207, "learning_rate": 7.322355924026793e-06, "loss": 0.3456, "step": 7112 }, { "epoch": 0.3528448831787291, "grad_norm": 6.556329250335693, "learning_rate": 7.3216588572517e-06, "loss": 0.2887, "step": 7113 }, { "epoch": 0.35289448881392926, "grad_norm": 8.48592472076416, "learning_rate": 7.320961732944849e-06, "loss": 0.285, "step": 7114 }, { "epoch": 0.35294409444912944, "grad_norm": 6.7961578369140625, "learning_rate": 7.320264551123517e-06, "loss": 0.3646, "step": 7115 }, { "epoch": 0.35299370008432956, "grad_norm": 9.28494644165039, "learning_rate": 7.319567311804979e-06, "loss": 0.342, "step": 7116 }, { "epoch": 0.35304330571952974, "grad_norm": 11.835628509521484, "learning_rate": 7.318870015006515e-06, "loss": 0.4369, "step": 7117 }, { "epoch": 0.3530929113547299, "grad_norm": 10.203873634338379, "learning_rate": 7.3181726607454026e-06, "loss": 0.3485, "step": 7118 }, { "epoch": 0.35314251698993004, "grad_norm": 7.112367153167725, "learning_rate": 7.317475249038923e-06, "loss": 0.274, "step": 7119 }, { "epoch": 0.3531921226251302, "grad_norm": 6.544838905334473, "learning_rate": 7.316777779904358e-06, "loss": 0.2398, "step": 7120 }, { "epoch": 0.3532417282603304, "grad_norm": 6.032543659210205, "learning_rate": 7.3160802533589924e-06, "loss": 0.3738, "step": 7121 }, { "epoch": 0.3532913338955305, "grad_norm": 7.426928520202637, "learning_rate": 7.3153826694201106e-06, "loss": 0.2246, "step": 7122 }, { "epoch": 0.3533409395307307, "grad_norm": 8.802803993225098, "learning_rate": 7.314685028104999e-06, "loss": 0.4337, "step": 7123 }, { "epoch": 0.3533905451659308, "grad_norm": 6.011046409606934, "learning_rate": 7.313987329430944e-06, "loss": 0.3115, "step": 7124 }, { "epoch": 0.353440150801131, "grad_norm": 23.377933502197266, "learning_rate": 7.313289573415237e-06, "loss": 0.613, "step": 7125 }, { "epoch": 0.3534897564363312, "grad_norm": 14.927282333374023, "learning_rate": 7.312591760075167e-06, "loss": 0.276, "step": 7126 }, { "epoch": 0.3535393620715313, "grad_norm": 8.950824737548828, "learning_rate": 7.311893889428029e-06, "loss": 0.2831, "step": 7127 }, { "epoch": 0.3535889677067315, "grad_norm": 4.472133636474609, "learning_rate": 7.3111959614911136e-06, "loss": 0.2314, "step": 7128 }, { "epoch": 0.35363857334193166, "grad_norm": 18.454086303710938, "learning_rate": 7.310497976281716e-06, "loss": 0.4966, "step": 7129 }, { "epoch": 0.3536881789771318, "grad_norm": 8.14253044128418, "learning_rate": 7.309799933817136e-06, "loss": 0.279, "step": 7130 }, { "epoch": 0.35373778461233196, "grad_norm": 10.055306434631348, "learning_rate": 7.309101834114667e-06, "loss": 0.3507, "step": 7131 }, { "epoch": 0.35378739024753214, "grad_norm": 11.632349014282227, "learning_rate": 7.308403677191609e-06, "loss": 0.3149, "step": 7132 }, { "epoch": 0.35383699588273226, "grad_norm": 5.048231601715088, "learning_rate": 7.307705463065264e-06, "loss": 0.3265, "step": 7133 }, { "epoch": 0.35388660151793244, "grad_norm": 8.273520469665527, "learning_rate": 7.307007191752936e-06, "loss": 0.2744, "step": 7134 }, { "epoch": 0.3539362071531326, "grad_norm": 18.572418212890625, "learning_rate": 7.306308863271925e-06, "loss": 0.6102, "step": 7135 }, { "epoch": 0.35398581278833274, "grad_norm": 6.371850490570068, "learning_rate": 7.305610477639535e-06, "loss": 0.2679, "step": 7136 }, { "epoch": 0.3540354184235329, "grad_norm": 7.463992595672607, "learning_rate": 7.304912034873077e-06, "loss": 0.3647, "step": 7137 }, { "epoch": 0.35408502405873304, "grad_norm": 6.775331497192383, "learning_rate": 7.304213534989856e-06, "loss": 0.2432, "step": 7138 }, { "epoch": 0.3541346296939332, "grad_norm": 8.040539741516113, "learning_rate": 7.30351497800718e-06, "loss": 0.3668, "step": 7139 }, { "epoch": 0.3541842353291334, "grad_norm": 4.18646764755249, "learning_rate": 7.302816363942362e-06, "loss": 0.3454, "step": 7140 }, { "epoch": 0.3542338409643335, "grad_norm": 4.724647521972656, "learning_rate": 7.302117692812712e-06, "loss": 0.3328, "step": 7141 }, { "epoch": 0.3542834465995337, "grad_norm": 5.240962505340576, "learning_rate": 7.301418964635545e-06, "loss": 0.2104, "step": 7142 }, { "epoch": 0.3543330522347339, "grad_norm": 8.785015106201172, "learning_rate": 7.300720179428174e-06, "loss": 0.2143, "step": 7143 }, { "epoch": 0.354382657869934, "grad_norm": 5.658421039581299, "learning_rate": 7.300021337207918e-06, "loss": 0.2958, "step": 7144 }, { "epoch": 0.3544322635051342, "grad_norm": 3.1139729022979736, "learning_rate": 7.299322437992091e-06, "loss": 0.2514, "step": 7145 }, { "epoch": 0.35448186914033436, "grad_norm": 9.52027702331543, "learning_rate": 7.298623481798015e-06, "loss": 0.2271, "step": 7146 }, { "epoch": 0.3545314747755345, "grad_norm": 8.129454612731934, "learning_rate": 7.297924468643009e-06, "loss": 0.4613, "step": 7147 }, { "epoch": 0.35458108041073466, "grad_norm": 4.504768371582031, "learning_rate": 7.297225398544395e-06, "loss": 0.2588, "step": 7148 }, { "epoch": 0.35463068604593484, "grad_norm": 5.424777507781982, "learning_rate": 7.296526271519496e-06, "loss": 0.2999, "step": 7149 }, { "epoch": 0.35468029168113496, "grad_norm": 5.395016193389893, "learning_rate": 7.295827087585639e-06, "loss": 0.3196, "step": 7150 }, { "epoch": 0.35472989731633514, "grad_norm": 9.96019458770752, "learning_rate": 7.295127846760146e-06, "loss": 0.3152, "step": 7151 }, { "epoch": 0.3547795029515353, "grad_norm": 10.172720909118652, "learning_rate": 7.294428549060349e-06, "loss": 0.4328, "step": 7152 }, { "epoch": 0.35482910858673544, "grad_norm": 2.916229724884033, "learning_rate": 7.293729194503571e-06, "loss": 0.2184, "step": 7153 }, { "epoch": 0.3548787142219356, "grad_norm": 11.950214385986328, "learning_rate": 7.293029783107149e-06, "loss": 0.3151, "step": 7154 }, { "epoch": 0.35492831985713574, "grad_norm": 9.27575397491455, "learning_rate": 7.2923303148884115e-06, "loss": 0.3717, "step": 7155 }, { "epoch": 0.3549779254923359, "grad_norm": 6.687348365783691, "learning_rate": 7.291630789864692e-06, "loss": 0.3567, "step": 7156 }, { "epoch": 0.3550275311275361, "grad_norm": 5.101902008056641, "learning_rate": 7.290931208053325e-06, "loss": 0.3024, "step": 7157 }, { "epoch": 0.3550771367627362, "grad_norm": 4.3573479652404785, "learning_rate": 7.290231569471644e-06, "loss": 0.1787, "step": 7158 }, { "epoch": 0.3551267423979364, "grad_norm": 11.003138542175293, "learning_rate": 7.28953187413699e-06, "loss": 0.4714, "step": 7159 }, { "epoch": 0.3551763480331366, "grad_norm": 9.101558685302734, "learning_rate": 7.288832122066703e-06, "loss": 0.287, "step": 7160 }, { "epoch": 0.3552259536683367, "grad_norm": 4.8227763175964355, "learning_rate": 7.288132313278118e-06, "loss": 0.3216, "step": 7161 }, { "epoch": 0.3552755593035369, "grad_norm": 4.424846172332764, "learning_rate": 7.28743244778858e-06, "loss": 0.2223, "step": 7162 }, { "epoch": 0.35532516493873706, "grad_norm": 7.922739505767822, "learning_rate": 7.28673252561543e-06, "loss": 0.3302, "step": 7163 }, { "epoch": 0.3553747705739372, "grad_norm": 6.745125770568848, "learning_rate": 7.286032546776016e-06, "loss": 0.3239, "step": 7164 }, { "epoch": 0.35542437620913736, "grad_norm": 5.122762680053711, "learning_rate": 7.28533251128768e-06, "loss": 0.3678, "step": 7165 }, { "epoch": 0.35547398184433754, "grad_norm": 7.368564605712891, "learning_rate": 7.28463241916777e-06, "loss": 0.3554, "step": 7166 }, { "epoch": 0.35552358747953766, "grad_norm": 7.702402591705322, "learning_rate": 7.283932270433637e-06, "loss": 0.3199, "step": 7167 }, { "epoch": 0.35557319311473784, "grad_norm": 6.093287944793701, "learning_rate": 7.283232065102628e-06, "loss": 0.192, "step": 7168 }, { "epoch": 0.355622798749938, "grad_norm": 4.471253871917725, "learning_rate": 7.282531803192096e-06, "loss": 0.3134, "step": 7169 }, { "epoch": 0.35567240438513814, "grad_norm": 21.091798782348633, "learning_rate": 7.281831484719392e-06, "loss": 0.4339, "step": 7170 }, { "epoch": 0.3557220100203383, "grad_norm": 4.724364280700684, "learning_rate": 7.281131109701873e-06, "loss": 0.1832, "step": 7171 }, { "epoch": 0.35577161565553844, "grad_norm": 4.521313190460205, "learning_rate": 7.280430678156894e-06, "loss": 0.306, "step": 7172 }, { "epoch": 0.3558212212907386, "grad_norm": 8.4774808883667, "learning_rate": 7.2797301901018095e-06, "loss": 0.3997, "step": 7173 }, { "epoch": 0.3558708269259388, "grad_norm": 8.956460952758789, "learning_rate": 7.279029645553981e-06, "loss": 0.4074, "step": 7174 }, { "epoch": 0.3559204325611389, "grad_norm": 13.66927719116211, "learning_rate": 7.278329044530766e-06, "loss": 0.4366, "step": 7175 }, { "epoch": 0.3559700381963391, "grad_norm": 4.9906110763549805, "learning_rate": 7.277628387049527e-06, "loss": 0.327, "step": 7176 }, { "epoch": 0.3560196438315393, "grad_norm": 5.806548118591309, "learning_rate": 7.276927673127627e-06, "loss": 0.2484, "step": 7177 }, { "epoch": 0.3560692494667394, "grad_norm": 24.977445602416992, "learning_rate": 7.276226902782428e-06, "loss": 0.3914, "step": 7178 }, { "epoch": 0.3561188551019396, "grad_norm": 10.460713386535645, "learning_rate": 7.275526076031297e-06, "loss": 0.3087, "step": 7179 }, { "epoch": 0.35616846073713976, "grad_norm": 5.739375591278076, "learning_rate": 7.274825192891601e-06, "loss": 0.2069, "step": 7180 }, { "epoch": 0.3562180663723399, "grad_norm": 7.030651092529297, "learning_rate": 7.274124253380708e-06, "loss": 0.2145, "step": 7181 }, { "epoch": 0.35626767200754006, "grad_norm": 8.117037773132324, "learning_rate": 7.273423257515987e-06, "loss": 0.3621, "step": 7182 }, { "epoch": 0.35631727764274024, "grad_norm": 5.3062424659729, "learning_rate": 7.272722205314808e-06, "loss": 0.2799, "step": 7183 }, { "epoch": 0.35636688327794036, "grad_norm": 9.950963973999023, "learning_rate": 7.272021096794547e-06, "loss": 0.2961, "step": 7184 }, { "epoch": 0.35641648891314054, "grad_norm": 7.2635884284973145, "learning_rate": 7.271319931972575e-06, "loss": 0.2248, "step": 7185 }, { "epoch": 0.3564660945483407, "grad_norm": 5.410210132598877, "learning_rate": 7.270618710866266e-06, "loss": 0.2656, "step": 7186 }, { "epoch": 0.35651570018354084, "grad_norm": 8.4531831741333, "learning_rate": 7.269917433492999e-06, "loss": 0.296, "step": 7187 }, { "epoch": 0.356565305818741, "grad_norm": 9.120265007019043, "learning_rate": 7.2692160998701526e-06, "loss": 0.3327, "step": 7188 }, { "epoch": 0.35661491145394114, "grad_norm": 10.414855003356934, "learning_rate": 7.268514710015104e-06, "loss": 0.387, "step": 7189 }, { "epoch": 0.3566645170891413, "grad_norm": 4.2656145095825195, "learning_rate": 7.2678132639452325e-06, "loss": 0.2761, "step": 7190 }, { "epoch": 0.3567141227243415, "grad_norm": 9.771448135375977, "learning_rate": 7.267111761677925e-06, "loss": 0.3465, "step": 7191 }, { "epoch": 0.3567637283595416, "grad_norm": 7.730362415313721, "learning_rate": 7.266410203230562e-06, "loss": 0.2687, "step": 7192 }, { "epoch": 0.3568133339947418, "grad_norm": 5.596668720245361, "learning_rate": 7.265708588620528e-06, "loss": 0.3069, "step": 7193 }, { "epoch": 0.356862939629942, "grad_norm": 16.277360916137695, "learning_rate": 7.2650069178652114e-06, "loss": 0.3836, "step": 7194 }, { "epoch": 0.3569125452651421, "grad_norm": 7.376255989074707, "learning_rate": 7.264305190982e-06, "loss": 0.3063, "step": 7195 }, { "epoch": 0.3569621509003423, "grad_norm": 13.233317375183105, "learning_rate": 7.263603407988279e-06, "loss": 0.5199, "step": 7196 }, { "epoch": 0.35701175653554246, "grad_norm": 6.657689094543457, "learning_rate": 7.262901568901442e-06, "loss": 0.3422, "step": 7197 }, { "epoch": 0.3570613621707426, "grad_norm": 11.4208345413208, "learning_rate": 7.262199673738883e-06, "loss": 0.3672, "step": 7198 }, { "epoch": 0.35711096780594276, "grad_norm": 8.558052062988281, "learning_rate": 7.261497722517989e-06, "loss": 0.3597, "step": 7199 }, { "epoch": 0.35716057344114294, "grad_norm": 4.126383304595947, "learning_rate": 7.260795715256161e-06, "loss": 0.2696, "step": 7200 }, { "epoch": 0.35721017907634306, "grad_norm": 6.004017353057861, "learning_rate": 7.260093651970793e-06, "loss": 0.2952, "step": 7201 }, { "epoch": 0.35725978471154324, "grad_norm": 4.724573135375977, "learning_rate": 7.259391532679281e-06, "loss": 0.1565, "step": 7202 }, { "epoch": 0.3573093903467434, "grad_norm": 5.399770736694336, "learning_rate": 7.258689357399024e-06, "loss": 0.3061, "step": 7203 }, { "epoch": 0.35735899598194354, "grad_norm": 5.704208850860596, "learning_rate": 7.2579871261474234e-06, "loss": 0.3092, "step": 7204 }, { "epoch": 0.3574086016171437, "grad_norm": 11.206584930419922, "learning_rate": 7.257284838941881e-06, "loss": 0.3674, "step": 7205 }, { "epoch": 0.35745820725234384, "grad_norm": 6.193020343780518, "learning_rate": 7.2565824957998e-06, "loss": 0.2952, "step": 7206 }, { "epoch": 0.357507812887544, "grad_norm": 6.806900501251221, "learning_rate": 7.255880096738582e-06, "loss": 0.3033, "step": 7207 }, { "epoch": 0.3575574185227442, "grad_norm": 8.40142822265625, "learning_rate": 7.255177641775635e-06, "loss": 0.3762, "step": 7208 }, { "epoch": 0.3576070241579443, "grad_norm": 5.5778608322143555, "learning_rate": 7.2544751309283675e-06, "loss": 0.3624, "step": 7209 }, { "epoch": 0.3576566297931445, "grad_norm": 5.287676811218262, "learning_rate": 7.253772564214186e-06, "loss": 0.2457, "step": 7210 }, { "epoch": 0.3577062354283447, "grad_norm": 4.621486186981201, "learning_rate": 7.2530699416505e-06, "loss": 0.2624, "step": 7211 }, { "epoch": 0.3577558410635448, "grad_norm": 12.234294891357422, "learning_rate": 7.252367263254721e-06, "loss": 0.3866, "step": 7212 }, { "epoch": 0.357805446698745, "grad_norm": 5.437961101531982, "learning_rate": 7.251664529044264e-06, "loss": 0.3398, "step": 7213 }, { "epoch": 0.35785505233394516, "grad_norm": 9.087340354919434, "learning_rate": 7.250961739036542e-06, "loss": 0.317, "step": 7214 }, { "epoch": 0.3579046579691453, "grad_norm": 8.130763053894043, "learning_rate": 7.250258893248968e-06, "loss": 0.2504, "step": 7215 }, { "epoch": 0.35795426360434546, "grad_norm": 13.013701438903809, "learning_rate": 7.249555991698961e-06, "loss": 0.2282, "step": 7216 }, { "epoch": 0.35800386923954564, "grad_norm": 25.60496711730957, "learning_rate": 7.24885303440394e-06, "loss": 0.3988, "step": 7217 }, { "epoch": 0.35805347487474576, "grad_norm": 9.43073558807373, "learning_rate": 7.2481500213813226e-06, "loss": 0.2555, "step": 7218 }, { "epoch": 0.35810308050994594, "grad_norm": 14.975708961486816, "learning_rate": 7.247446952648531e-06, "loss": 0.4164, "step": 7219 }, { "epoch": 0.3581526861451461, "grad_norm": 8.657349586486816, "learning_rate": 7.246743828222987e-06, "loss": 0.2237, "step": 7220 }, { "epoch": 0.35820229178034624, "grad_norm": 8.715046882629395, "learning_rate": 7.246040648122116e-06, "loss": 0.3092, "step": 7221 }, { "epoch": 0.3582518974155464, "grad_norm": 4.911709785461426, "learning_rate": 7.24533741236334e-06, "loss": 0.2434, "step": 7222 }, { "epoch": 0.35830150305074654, "grad_norm": 7.482361793518066, "learning_rate": 7.244634120964088e-06, "loss": 0.3017, "step": 7223 }, { "epoch": 0.3583511086859467, "grad_norm": 4.906964302062988, "learning_rate": 7.243930773941787e-06, "loss": 0.2492, "step": 7224 }, { "epoch": 0.3584007143211469, "grad_norm": 7.818419933319092, "learning_rate": 7.243227371313868e-06, "loss": 0.3479, "step": 7225 }, { "epoch": 0.358450319956347, "grad_norm": 38.61444854736328, "learning_rate": 7.242523913097757e-06, "loss": 0.3602, "step": 7226 }, { "epoch": 0.3584999255915472, "grad_norm": 8.83707332611084, "learning_rate": 7.2418203993108906e-06, "loss": 0.3278, "step": 7227 }, { "epoch": 0.3585495312267474, "grad_norm": 10.228717803955078, "learning_rate": 7.241116829970701e-06, "loss": 0.4592, "step": 7228 }, { "epoch": 0.3585991368619475, "grad_norm": 4.3358659744262695, "learning_rate": 7.240413205094623e-06, "loss": 0.2696, "step": 7229 }, { "epoch": 0.3586487424971477, "grad_norm": 3.8663580417633057, "learning_rate": 7.239709524700091e-06, "loss": 0.1776, "step": 7230 }, { "epoch": 0.35869834813234786, "grad_norm": 3.752074718475342, "learning_rate": 7.239005788804544e-06, "loss": 0.2383, "step": 7231 }, { "epoch": 0.358747953767548, "grad_norm": 14.2809419631958, "learning_rate": 7.2383019974254225e-06, "loss": 0.3211, "step": 7232 }, { "epoch": 0.35879755940274816, "grad_norm": 13.515849113464355, "learning_rate": 7.237598150580163e-06, "loss": 0.205, "step": 7233 }, { "epoch": 0.35884716503794833, "grad_norm": 9.907849311828613, "learning_rate": 7.2368942482862095e-06, "loss": 0.3414, "step": 7234 }, { "epoch": 0.35889677067314846, "grad_norm": 6.377071857452393, "learning_rate": 7.236190290561005e-06, "loss": 0.3473, "step": 7235 }, { "epoch": 0.35894637630834864, "grad_norm": 7.705077171325684, "learning_rate": 7.235486277421994e-06, "loss": 0.2926, "step": 7236 }, { "epoch": 0.3589959819435488, "grad_norm": 5.458397388458252, "learning_rate": 7.23478220888662e-06, "loss": 0.2442, "step": 7237 }, { "epoch": 0.35904558757874894, "grad_norm": 6.251132011413574, "learning_rate": 7.234078084972335e-06, "loss": 0.2642, "step": 7238 }, { "epoch": 0.3590951932139491, "grad_norm": 8.379168510437012, "learning_rate": 7.233373905696581e-06, "loss": 0.3892, "step": 7239 }, { "epoch": 0.35914479884914924, "grad_norm": 10.000578880310059, "learning_rate": 7.232669671076813e-06, "loss": 0.268, "step": 7240 }, { "epoch": 0.3591944044843494, "grad_norm": 11.505931854248047, "learning_rate": 7.231965381130481e-06, "loss": 0.353, "step": 7241 }, { "epoch": 0.3592440101195496, "grad_norm": 4.062272548675537, "learning_rate": 7.231261035875038e-06, "loss": 0.1956, "step": 7242 }, { "epoch": 0.3592936157547497, "grad_norm": 7.573880195617676, "learning_rate": 7.230556635327936e-06, "loss": 0.3016, "step": 7243 }, { "epoch": 0.3593432213899499, "grad_norm": 15.89703369140625, "learning_rate": 7.229852179506631e-06, "loss": 0.2993, "step": 7244 }, { "epoch": 0.3593928270251501, "grad_norm": 5.26546049118042, "learning_rate": 7.229147668428582e-06, "loss": 0.2945, "step": 7245 }, { "epoch": 0.3594424326603502, "grad_norm": 7.365880012512207, "learning_rate": 7.228443102111244e-06, "loss": 0.2487, "step": 7246 }, { "epoch": 0.3594920382955504, "grad_norm": 10.255578994750977, "learning_rate": 7.227738480572078e-06, "loss": 0.3653, "step": 7247 }, { "epoch": 0.35954164393075055, "grad_norm": 11.804086685180664, "learning_rate": 7.227033803828546e-06, "loss": 0.4644, "step": 7248 }, { "epoch": 0.3595912495659507, "grad_norm": 14.160920143127441, "learning_rate": 7.226329071898107e-06, "loss": 0.3422, "step": 7249 }, { "epoch": 0.35964085520115086, "grad_norm": 3.6400954723358154, "learning_rate": 7.225624284798227e-06, "loss": 0.2225, "step": 7250 }, { "epoch": 0.35969046083635103, "grad_norm": 10.781380653381348, "learning_rate": 7.224919442546372e-06, "loss": 0.3463, "step": 7251 }, { "epoch": 0.35974006647155116, "grad_norm": 10.208992004394531, "learning_rate": 7.224214545160006e-06, "loss": 0.3465, "step": 7252 }, { "epoch": 0.35978967210675133, "grad_norm": 8.064449310302734, "learning_rate": 7.223509592656597e-06, "loss": 0.3604, "step": 7253 }, { "epoch": 0.3598392777419515, "grad_norm": 7.935639381408691, "learning_rate": 7.222804585053614e-06, "loss": 0.2507, "step": 7254 }, { "epoch": 0.35988888337715164, "grad_norm": 4.7329277992248535, "learning_rate": 7.22209952236853e-06, "loss": 0.2575, "step": 7255 }, { "epoch": 0.3599384890123518, "grad_norm": 9.407201766967773, "learning_rate": 7.221394404618813e-06, "loss": 0.2588, "step": 7256 }, { "epoch": 0.35998809464755194, "grad_norm": 11.4075345993042, "learning_rate": 7.220689231821938e-06, "loss": 0.4037, "step": 7257 }, { "epoch": 0.3600377002827521, "grad_norm": 16.306781768798828, "learning_rate": 7.21998400399538e-06, "loss": 0.2305, "step": 7258 }, { "epoch": 0.3600873059179523, "grad_norm": 8.382131576538086, "learning_rate": 7.219278721156613e-06, "loss": 0.3884, "step": 7259 }, { "epoch": 0.3601369115531524, "grad_norm": 4.9166951179504395, "learning_rate": 7.2185733833231155e-06, "loss": 0.2397, "step": 7260 }, { "epoch": 0.3601865171883526, "grad_norm": 16.061864852905273, "learning_rate": 7.217867990512366e-06, "loss": 0.4758, "step": 7261 }, { "epoch": 0.3602361228235528, "grad_norm": 10.278905868530273, "learning_rate": 7.217162542741847e-06, "loss": 0.4023, "step": 7262 }, { "epoch": 0.3602857284587529, "grad_norm": 6.752485752105713, "learning_rate": 7.216457040029035e-06, "loss": 0.2857, "step": 7263 }, { "epoch": 0.3603353340939531, "grad_norm": 5.616085052490234, "learning_rate": 7.215751482391414e-06, "loss": 0.2351, "step": 7264 }, { "epoch": 0.36038493972915325, "grad_norm": 7.663873195648193, "learning_rate": 7.2150458698464695e-06, "loss": 0.3551, "step": 7265 }, { "epoch": 0.3604345453643534, "grad_norm": 8.067924499511719, "learning_rate": 7.214340202411687e-06, "loss": 0.2982, "step": 7266 }, { "epoch": 0.36048415099955355, "grad_norm": 8.178836822509766, "learning_rate": 7.213634480104553e-06, "loss": 0.3686, "step": 7267 }, { "epoch": 0.36053375663475373, "grad_norm": 14.921281814575195, "learning_rate": 7.212928702942555e-06, "loss": 0.2961, "step": 7268 }, { "epoch": 0.36058336226995386, "grad_norm": 11.81937026977539, "learning_rate": 7.212222870943182e-06, "loss": 0.338, "step": 7269 }, { "epoch": 0.36063296790515403, "grad_norm": 6.289700031280518, "learning_rate": 7.211516984123926e-06, "loss": 0.2117, "step": 7270 }, { "epoch": 0.36068257354035416, "grad_norm": 5.869888782501221, "learning_rate": 7.210811042502279e-06, "loss": 0.2656, "step": 7271 }, { "epoch": 0.36073217917555433, "grad_norm": 6.294882297515869, "learning_rate": 7.210105046095735e-06, "loss": 0.3457, "step": 7272 }, { "epoch": 0.3607817848107545, "grad_norm": 4.2202534675598145, "learning_rate": 7.2093989949217865e-06, "loss": 0.2263, "step": 7273 }, { "epoch": 0.36083139044595464, "grad_norm": 8.86694049835205, "learning_rate": 7.2086928889979324e-06, "loss": 0.4193, "step": 7274 }, { "epoch": 0.3608809960811548, "grad_norm": 7.488732814788818, "learning_rate": 7.207986728341671e-06, "loss": 0.2936, "step": 7275 }, { "epoch": 0.360930601716355, "grad_norm": 4.54926872253418, "learning_rate": 7.207280512970497e-06, "loss": 0.3186, "step": 7276 }, { "epoch": 0.3609802073515551, "grad_norm": 9.269347190856934, "learning_rate": 7.206574242901914e-06, "loss": 0.4215, "step": 7277 }, { "epoch": 0.3610298129867553, "grad_norm": 10.524182319641113, "learning_rate": 7.205867918153424e-06, "loss": 0.3486, "step": 7278 }, { "epoch": 0.3610794186219555, "grad_norm": 8.370560646057129, "learning_rate": 7.205161538742529e-06, "loss": 0.3859, "step": 7279 }, { "epoch": 0.3611290242571556, "grad_norm": 8.907596588134766, "learning_rate": 7.204455104686734e-06, "loss": 0.3816, "step": 7280 }, { "epoch": 0.3611786298923558, "grad_norm": 7.32633113861084, "learning_rate": 7.203748616003543e-06, "loss": 0.243, "step": 7281 }, { "epoch": 0.36122823552755595, "grad_norm": 8.004287719726562, "learning_rate": 7.2030420727104655e-06, "loss": 0.3265, "step": 7282 }, { "epoch": 0.3612778411627561, "grad_norm": 4.728321075439453, "learning_rate": 7.202335474825009e-06, "loss": 0.2758, "step": 7283 }, { "epoch": 0.36132744679795625, "grad_norm": 6.9071245193481445, "learning_rate": 7.201628822364681e-06, "loss": 0.1817, "step": 7284 }, { "epoch": 0.36137705243315643, "grad_norm": 9.906856536865234, "learning_rate": 7.200922115346998e-06, "loss": 0.3796, "step": 7285 }, { "epoch": 0.36142665806835655, "grad_norm": 5.533314228057861, "learning_rate": 7.200215353789468e-06, "loss": 0.2169, "step": 7286 }, { "epoch": 0.36147626370355673, "grad_norm": 5.351282119750977, "learning_rate": 7.1995085377096055e-06, "loss": 0.3294, "step": 7287 }, { "epoch": 0.36152586933875686, "grad_norm": 4.34619665145874, "learning_rate": 7.198801667124927e-06, "loss": 0.2582, "step": 7288 }, { "epoch": 0.36157547497395703, "grad_norm": 15.01434326171875, "learning_rate": 7.19809474205295e-06, "loss": 0.2891, "step": 7289 }, { "epoch": 0.3616250806091572, "grad_norm": 4.5059404373168945, "learning_rate": 7.19738776251119e-06, "loss": 0.2479, "step": 7290 }, { "epoch": 0.36167468624435734, "grad_norm": 7.41041374206543, "learning_rate": 7.196680728517166e-06, "loss": 0.2369, "step": 7291 }, { "epoch": 0.3617242918795575, "grad_norm": 4.083291530609131, "learning_rate": 7.195973640088402e-06, "loss": 0.3066, "step": 7292 }, { "epoch": 0.3617738975147577, "grad_norm": 8.212087631225586, "learning_rate": 7.195266497242417e-06, "loss": 0.2991, "step": 7293 }, { "epoch": 0.3618235031499578, "grad_norm": 7.685489654541016, "learning_rate": 7.194559299996734e-06, "loss": 0.2435, "step": 7294 }, { "epoch": 0.361873108785158, "grad_norm": 8.640483856201172, "learning_rate": 7.193852048368879e-06, "loss": 0.2581, "step": 7295 }, { "epoch": 0.36192271442035817, "grad_norm": 6.900772571563721, "learning_rate": 7.1931447423763805e-06, "loss": 0.2994, "step": 7296 }, { "epoch": 0.3619723200555583, "grad_norm": 6.482480525970459, "learning_rate": 7.192437382036761e-06, "loss": 0.2923, "step": 7297 }, { "epoch": 0.3620219256907585, "grad_norm": 4.488142490386963, "learning_rate": 7.191729967367551e-06, "loss": 0.2254, "step": 7298 }, { "epoch": 0.36207153132595865, "grad_norm": 8.253763198852539, "learning_rate": 7.191022498386283e-06, "loss": 0.2032, "step": 7299 }, { "epoch": 0.3621211369611588, "grad_norm": 5.551783084869385, "learning_rate": 7.190314975110484e-06, "loss": 0.2382, "step": 7300 }, { "epoch": 0.36217074259635895, "grad_norm": 7.909235000610352, "learning_rate": 7.189607397557691e-06, "loss": 0.3217, "step": 7301 }, { "epoch": 0.36222034823155913, "grad_norm": 5.787333965301514, "learning_rate": 7.188899765745436e-06, "loss": 0.3815, "step": 7302 }, { "epoch": 0.36226995386675925, "grad_norm": 4.6480631828308105, "learning_rate": 7.188192079691254e-06, "loss": 0.268, "step": 7303 }, { "epoch": 0.36231955950195943, "grad_norm": 10.729755401611328, "learning_rate": 7.187484339412682e-06, "loss": 0.3497, "step": 7304 }, { "epoch": 0.36236916513715955, "grad_norm": 8.693085670471191, "learning_rate": 7.186776544927259e-06, "loss": 0.3737, "step": 7305 }, { "epoch": 0.36241877077235973, "grad_norm": 10.473305702209473, "learning_rate": 7.186068696252525e-06, "loss": 0.3463, "step": 7306 }, { "epoch": 0.3624683764075599, "grad_norm": 7.380245685577393, "learning_rate": 7.185360793406019e-06, "loss": 0.3836, "step": 7307 }, { "epoch": 0.36251798204276003, "grad_norm": 8.399714469909668, "learning_rate": 7.184652836405284e-06, "loss": 0.3841, "step": 7308 }, { "epoch": 0.3625675876779602, "grad_norm": 8.179054260253906, "learning_rate": 7.183944825267863e-06, "loss": 0.3788, "step": 7309 }, { "epoch": 0.3626171933131604, "grad_norm": 5.69163703918457, "learning_rate": 7.183236760011303e-06, "loss": 0.1745, "step": 7310 }, { "epoch": 0.3626667989483605, "grad_norm": 3.477541923522949, "learning_rate": 7.182528640653146e-06, "loss": 0.3313, "step": 7311 }, { "epoch": 0.3627164045835607, "grad_norm": 6.2493977546691895, "learning_rate": 7.181820467210944e-06, "loss": 0.262, "step": 7312 }, { "epoch": 0.36276601021876087, "grad_norm": 7.860310077667236, "learning_rate": 7.1811122397022434e-06, "loss": 0.4517, "step": 7313 }, { "epoch": 0.362815615853961, "grad_norm": 10.421162605285645, "learning_rate": 7.180403958144595e-06, "loss": 0.5873, "step": 7314 }, { "epoch": 0.36286522148916117, "grad_norm": 7.962784767150879, "learning_rate": 7.179695622555549e-06, "loss": 0.3092, "step": 7315 }, { "epoch": 0.36291482712436135, "grad_norm": 7.417401313781738, "learning_rate": 7.178987232952661e-06, "loss": 0.3636, "step": 7316 }, { "epoch": 0.3629644327595615, "grad_norm": 6.279482841491699, "learning_rate": 7.178278789353483e-06, "loss": 0.2748, "step": 7317 }, { "epoch": 0.36301403839476165, "grad_norm": 7.955942153930664, "learning_rate": 7.177570291775571e-06, "loss": 0.2436, "step": 7318 }, { "epoch": 0.36306364402996183, "grad_norm": 8.394667625427246, "learning_rate": 7.176861740236483e-06, "loss": 0.3066, "step": 7319 }, { "epoch": 0.36311324966516195, "grad_norm": 5.2086615562438965, "learning_rate": 7.176153134753775e-06, "loss": 0.3233, "step": 7320 }, { "epoch": 0.36316285530036213, "grad_norm": 16.42348861694336, "learning_rate": 7.1754444753450105e-06, "loss": 0.4818, "step": 7321 }, { "epoch": 0.36321246093556225, "grad_norm": 7.152669906616211, "learning_rate": 7.174735762027745e-06, "loss": 0.4225, "step": 7322 }, { "epoch": 0.36326206657076243, "grad_norm": 4.680116653442383, "learning_rate": 7.174026994819545e-06, "loss": 0.2424, "step": 7323 }, { "epoch": 0.3633116722059626, "grad_norm": 3.8968687057495117, "learning_rate": 7.173318173737972e-06, "loss": 0.2506, "step": 7324 }, { "epoch": 0.36336127784116273, "grad_norm": 5.4231672286987305, "learning_rate": 7.172609298800592e-06, "loss": 0.1925, "step": 7325 }, { "epoch": 0.3634108834763629, "grad_norm": 14.397757530212402, "learning_rate": 7.1719003700249716e-06, "loss": 0.3211, "step": 7326 }, { "epoch": 0.3634604891115631, "grad_norm": 6.7498650550842285, "learning_rate": 7.171191387428675e-06, "loss": 0.3323, "step": 7327 }, { "epoch": 0.3635100947467632, "grad_norm": 6.300227165222168, "learning_rate": 7.170482351029276e-06, "loss": 0.2656, "step": 7328 }, { "epoch": 0.3635597003819634, "grad_norm": 6.782670021057129, "learning_rate": 7.1697732608443414e-06, "loss": 0.3197, "step": 7329 }, { "epoch": 0.36360930601716357, "grad_norm": 5.488073348999023, "learning_rate": 7.169064116891446e-06, "loss": 0.269, "step": 7330 }, { "epoch": 0.3636589116523637, "grad_norm": 6.963284969329834, "learning_rate": 7.1683549191881585e-06, "loss": 0.3358, "step": 7331 }, { "epoch": 0.36370851728756387, "grad_norm": 10.184584617614746, "learning_rate": 7.1676456677520555e-06, "loss": 0.4475, "step": 7332 }, { "epoch": 0.36375812292276405, "grad_norm": 9.589035034179688, "learning_rate": 7.1669363626007136e-06, "loss": 0.3201, "step": 7333 }, { "epoch": 0.36380772855796417, "grad_norm": 6.983319282531738, "learning_rate": 7.166227003751707e-06, "loss": 0.3077, "step": 7334 }, { "epoch": 0.36385733419316435, "grad_norm": 5.08266019821167, "learning_rate": 7.165517591222615e-06, "loss": 0.2939, "step": 7335 }, { "epoch": 0.36390693982836453, "grad_norm": 5.515010356903076, "learning_rate": 7.164808125031019e-06, "loss": 0.2692, "step": 7336 }, { "epoch": 0.36395654546356465, "grad_norm": 7.266780853271484, "learning_rate": 7.164098605194498e-06, "loss": 0.3553, "step": 7337 }, { "epoch": 0.36400615109876483, "grad_norm": 3.4742307662963867, "learning_rate": 7.163389031730634e-06, "loss": 0.2053, "step": 7338 }, { "epoch": 0.36405575673396495, "grad_norm": 14.364250183105469, "learning_rate": 7.1626794046570116e-06, "loss": 0.4699, "step": 7339 }, { "epoch": 0.36410536236916513, "grad_norm": 4.490586757659912, "learning_rate": 7.1619697239912155e-06, "loss": 0.3382, "step": 7340 }, { "epoch": 0.3641549680043653, "grad_norm": 7.728790760040283, "learning_rate": 7.161259989750832e-06, "loss": 0.2544, "step": 7341 }, { "epoch": 0.36420457363956543, "grad_norm": 10.593283653259277, "learning_rate": 7.160550201953447e-06, "loss": 0.3961, "step": 7342 }, { "epoch": 0.3642541792747656, "grad_norm": 11.422147750854492, "learning_rate": 7.159840360616652e-06, "loss": 0.3075, "step": 7343 }, { "epoch": 0.3643037849099658, "grad_norm": 10.23298168182373, "learning_rate": 7.159130465758035e-06, "loss": 0.3649, "step": 7344 }, { "epoch": 0.3643533905451659, "grad_norm": 8.717533111572266, "learning_rate": 7.158420517395188e-06, "loss": 0.1752, "step": 7345 }, { "epoch": 0.3644029961803661, "grad_norm": 6.348936557769775, "learning_rate": 7.157710515545706e-06, "loss": 0.2927, "step": 7346 }, { "epoch": 0.36445260181556627, "grad_norm": 6.759324073791504, "learning_rate": 7.15700046022718e-06, "loss": 0.2779, "step": 7347 }, { "epoch": 0.3645022074507664, "grad_norm": 17.80255699157715, "learning_rate": 7.156290351457207e-06, "loss": 0.3851, "step": 7348 }, { "epoch": 0.36455181308596657, "grad_norm": 7.935839653015137, "learning_rate": 7.155580189253384e-06, "loss": 0.3912, "step": 7349 }, { "epoch": 0.36460141872116675, "grad_norm": 6.0448102951049805, "learning_rate": 7.154869973633308e-06, "loss": 0.1762, "step": 7350 }, { "epoch": 0.36465102435636687, "grad_norm": 7.945312023162842, "learning_rate": 7.15415970461458e-06, "loss": 0.3744, "step": 7351 }, { "epoch": 0.36470062999156705, "grad_norm": 6.220426082611084, "learning_rate": 7.1534493822148e-06, "loss": 0.2945, "step": 7352 }, { "epoch": 0.3647502356267672, "grad_norm": 6.77437162399292, "learning_rate": 7.1527390064515714e-06, "loss": 0.2814, "step": 7353 }, { "epoch": 0.36479984126196735, "grad_norm": 9.35064697265625, "learning_rate": 7.1520285773424945e-06, "loss": 0.2853, "step": 7354 }, { "epoch": 0.36484944689716753, "grad_norm": 4.871371746063232, "learning_rate": 7.151318094905176e-06, "loss": 0.2093, "step": 7355 }, { "epoch": 0.36489905253236765, "grad_norm": 7.1323561668396, "learning_rate": 7.1506075591572235e-06, "loss": 0.31, "step": 7356 }, { "epoch": 0.36494865816756783, "grad_norm": 4.631711483001709, "learning_rate": 7.149896970116242e-06, "loss": 0.3585, "step": 7357 }, { "epoch": 0.364998263802768, "grad_norm": 7.459251403808594, "learning_rate": 7.149186327799841e-06, "loss": 0.3486, "step": 7358 }, { "epoch": 0.36504786943796813, "grad_norm": 5.9479169845581055, "learning_rate": 7.148475632225631e-06, "loss": 0.2056, "step": 7359 }, { "epoch": 0.3650974750731683, "grad_norm": 8.928091049194336, "learning_rate": 7.147764883411224e-06, "loss": 0.3531, "step": 7360 }, { "epoch": 0.3651470807083685, "grad_norm": 5.4355669021606445, "learning_rate": 7.14705408137423e-06, "loss": 0.2441, "step": 7361 }, { "epoch": 0.3651966863435686, "grad_norm": 6.832871913909912, "learning_rate": 7.146343226132264e-06, "loss": 0.2948, "step": 7362 }, { "epoch": 0.3652462919787688, "grad_norm": 12.12093734741211, "learning_rate": 7.145632317702944e-06, "loss": 0.3884, "step": 7363 }, { "epoch": 0.36529589761396897, "grad_norm": 8.169892311096191, "learning_rate": 7.1449213561038835e-06, "loss": 0.2728, "step": 7364 }, { "epoch": 0.3653455032491691, "grad_norm": 7.346402168273926, "learning_rate": 7.1442103413527006e-06, "loss": 0.3129, "step": 7365 }, { "epoch": 0.36539510888436927, "grad_norm": 7.259943008422852, "learning_rate": 7.143499273467016e-06, "loss": 0.3115, "step": 7366 }, { "epoch": 0.36544471451956945, "grad_norm": 7.976997375488281, "learning_rate": 7.142788152464452e-06, "loss": 0.3035, "step": 7367 }, { "epoch": 0.36549432015476957, "grad_norm": 10.911532402038574, "learning_rate": 7.1420769783626266e-06, "loss": 0.2725, "step": 7368 }, { "epoch": 0.36554392578996975, "grad_norm": 5.785207748413086, "learning_rate": 7.1413657511791635e-06, "loss": 0.2976, "step": 7369 }, { "epoch": 0.3655935314251699, "grad_norm": 7.3787946701049805, "learning_rate": 7.140654470931691e-06, "loss": 0.2827, "step": 7370 }, { "epoch": 0.36564313706037005, "grad_norm": 12.756766319274902, "learning_rate": 7.13994313763783e-06, "loss": 0.4771, "step": 7371 }, { "epoch": 0.3656927426955702, "grad_norm": 10.160102844238281, "learning_rate": 7.139231751315211e-06, "loss": 0.4049, "step": 7372 }, { "epoch": 0.36574234833077035, "grad_norm": 6.593501567840576, "learning_rate": 7.138520311981461e-06, "loss": 0.3065, "step": 7373 }, { "epoch": 0.36579195396597053, "grad_norm": 7.350008010864258, "learning_rate": 7.137808819654213e-06, "loss": 0.2803, "step": 7374 }, { "epoch": 0.3658415596011707, "grad_norm": 9.738807678222656, "learning_rate": 7.1370972743510925e-06, "loss": 0.3475, "step": 7375 }, { "epoch": 0.36589116523637083, "grad_norm": 11.080111503601074, "learning_rate": 7.136385676089736e-06, "loss": 0.4939, "step": 7376 }, { "epoch": 0.365940770871571, "grad_norm": 4.294566631317139, "learning_rate": 7.135674024887774e-06, "loss": 0.2523, "step": 7377 }, { "epoch": 0.3659903765067712, "grad_norm": 6.60207986831665, "learning_rate": 7.134962320762846e-06, "loss": 0.3716, "step": 7378 }, { "epoch": 0.3660399821419713, "grad_norm": 10.140738487243652, "learning_rate": 7.134250563732585e-06, "loss": 0.4168, "step": 7379 }, { "epoch": 0.3660895877771715, "grad_norm": 7.8497419357299805, "learning_rate": 7.13353875381463e-06, "loss": 0.3725, "step": 7380 }, { "epoch": 0.36613919341237167, "grad_norm": 2.941743850708008, "learning_rate": 7.13282689102662e-06, "loss": 0.247, "step": 7381 }, { "epoch": 0.3661887990475718, "grad_norm": 5.45089864730835, "learning_rate": 7.132114975386193e-06, "loss": 0.3787, "step": 7382 }, { "epoch": 0.36623840468277197, "grad_norm": 4.458986759185791, "learning_rate": 7.131403006910994e-06, "loss": 0.2556, "step": 7383 }, { "epoch": 0.36628801031797215, "grad_norm": 5.827243328094482, "learning_rate": 7.130690985618664e-06, "loss": 0.285, "step": 7384 }, { "epoch": 0.36633761595317227, "grad_norm": 14.270936965942383, "learning_rate": 7.129978911526848e-06, "loss": 0.4468, "step": 7385 }, { "epoch": 0.36638722158837245, "grad_norm": 7.527426242828369, "learning_rate": 7.129266784653191e-06, "loss": 0.4008, "step": 7386 }, { "epoch": 0.3664368272235726, "grad_norm": 7.772493839263916, "learning_rate": 7.12855460501534e-06, "loss": 0.258, "step": 7387 }, { "epoch": 0.36648643285877275, "grad_norm": 8.926755905151367, "learning_rate": 7.127842372630942e-06, "loss": 0.2756, "step": 7388 }, { "epoch": 0.3665360384939729, "grad_norm": 5.911827087402344, "learning_rate": 7.127130087517648e-06, "loss": 0.3091, "step": 7389 }, { "epoch": 0.36658564412917305, "grad_norm": 8.979087829589844, "learning_rate": 7.126417749693108e-06, "loss": 0.3266, "step": 7390 }, { "epoch": 0.36663524976437323, "grad_norm": 4.2129669189453125, "learning_rate": 7.125705359174976e-06, "loss": 0.2554, "step": 7391 }, { "epoch": 0.3666848553995734, "grad_norm": 7.773726463317871, "learning_rate": 7.124992915980902e-06, "loss": 0.2396, "step": 7392 }, { "epoch": 0.36673446103477353, "grad_norm": 5.074946880340576, "learning_rate": 7.124280420128542e-06, "loss": 0.3445, "step": 7393 }, { "epoch": 0.3667840666699737, "grad_norm": 17.203004837036133, "learning_rate": 7.123567871635554e-06, "loss": 0.3922, "step": 7394 }, { "epoch": 0.3668336723051739, "grad_norm": 8.320574760437012, "learning_rate": 7.122855270519592e-06, "loss": 0.4088, "step": 7395 }, { "epoch": 0.366883277940374, "grad_norm": 8.999616622924805, "learning_rate": 7.122142616798315e-06, "loss": 0.4828, "step": 7396 }, { "epoch": 0.3669328835755742, "grad_norm": 8.010563850402832, "learning_rate": 7.121429910489387e-06, "loss": 0.294, "step": 7397 }, { "epoch": 0.36698248921077437, "grad_norm": 4.487525939941406, "learning_rate": 7.120717151610465e-06, "loss": 0.3153, "step": 7398 }, { "epoch": 0.3670320948459745, "grad_norm": 8.919793128967285, "learning_rate": 7.1200043401792115e-06, "loss": 0.4114, "step": 7399 }, { "epoch": 0.36708170048117467, "grad_norm": 4.65932035446167, "learning_rate": 7.119291476213294e-06, "loss": 0.2834, "step": 7400 }, { "epoch": 0.36713130611637484, "grad_norm": 6.757955074310303, "learning_rate": 7.1185785597303725e-06, "loss": 0.2726, "step": 7401 }, { "epoch": 0.36718091175157497, "grad_norm": 5.49879264831543, "learning_rate": 7.117865590748116e-06, "loss": 0.2891, "step": 7402 }, { "epoch": 0.36723051738677515, "grad_norm": 10.012253761291504, "learning_rate": 7.1171525692841935e-06, "loss": 0.3024, "step": 7403 }, { "epoch": 0.36728012302197527, "grad_norm": 9.321568489074707, "learning_rate": 7.116439495356273e-06, "loss": 0.4255, "step": 7404 }, { "epoch": 0.36732972865717545, "grad_norm": 6.935399055480957, "learning_rate": 7.115726368982023e-06, "loss": 0.4054, "step": 7405 }, { "epoch": 0.3673793342923756, "grad_norm": 9.898658752441406, "learning_rate": 7.115013190179117e-06, "loss": 0.482, "step": 7406 }, { "epoch": 0.36742893992757575, "grad_norm": 6.129305362701416, "learning_rate": 7.114299958965229e-06, "loss": 0.3417, "step": 7407 }, { "epoch": 0.3674785455627759, "grad_norm": 6.577560901641846, "learning_rate": 7.11358667535803e-06, "loss": 0.3909, "step": 7408 }, { "epoch": 0.3675281511979761, "grad_norm": 4.379382133483887, "learning_rate": 7.112873339375197e-06, "loss": 0.3198, "step": 7409 }, { "epoch": 0.36757775683317623, "grad_norm": 7.534758567810059, "learning_rate": 7.112159951034406e-06, "loss": 0.3703, "step": 7410 }, { "epoch": 0.3676273624683764, "grad_norm": 5.675413608551025, "learning_rate": 7.111446510353338e-06, "loss": 0.3278, "step": 7411 }, { "epoch": 0.3676769681035766, "grad_norm": 6.967897891998291, "learning_rate": 7.110733017349669e-06, "loss": 0.2509, "step": 7412 }, { "epoch": 0.3677265737387767, "grad_norm": 6.550582408905029, "learning_rate": 7.110019472041082e-06, "loss": 0.316, "step": 7413 }, { "epoch": 0.3677761793739769, "grad_norm": 6.8035688400268555, "learning_rate": 7.109305874445258e-06, "loss": 0.2254, "step": 7414 }, { "epoch": 0.36782578500917706, "grad_norm": 14.635570526123047, "learning_rate": 7.10859222457988e-06, "loss": 0.444, "step": 7415 }, { "epoch": 0.3678753906443772, "grad_norm": 5.455930709838867, "learning_rate": 7.1078785224626324e-06, "loss": 0.2866, "step": 7416 }, { "epoch": 0.36792499627957737, "grad_norm": 5.176828861236572, "learning_rate": 7.107164768111202e-06, "loss": 0.3468, "step": 7417 }, { "epoch": 0.36797460191477754, "grad_norm": 8.033722877502441, "learning_rate": 7.106450961543275e-06, "loss": 0.3746, "step": 7418 }, { "epoch": 0.36802420754997767, "grad_norm": 5.957535743713379, "learning_rate": 7.105737102776541e-06, "loss": 0.2721, "step": 7419 }, { "epoch": 0.36807381318517784, "grad_norm": 6.970209121704102, "learning_rate": 7.1050231918286884e-06, "loss": 0.2177, "step": 7420 }, { "epoch": 0.36812341882037797, "grad_norm": 6.132044792175293, "learning_rate": 7.10430922871741e-06, "loss": 0.2067, "step": 7421 }, { "epoch": 0.36817302445557815, "grad_norm": 7.467514514923096, "learning_rate": 7.103595213460396e-06, "loss": 0.3082, "step": 7422 }, { "epoch": 0.3682226300907783, "grad_norm": 5.460533618927002, "learning_rate": 7.102881146075341e-06, "loss": 0.2831, "step": 7423 }, { "epoch": 0.36827223572597845, "grad_norm": 9.703965187072754, "learning_rate": 7.1021670265799406e-06, "loss": 0.3407, "step": 7424 }, { "epoch": 0.3683218413611786, "grad_norm": 11.4193115234375, "learning_rate": 7.101452854991891e-06, "loss": 0.3823, "step": 7425 }, { "epoch": 0.3683714469963788, "grad_norm": 6.8084187507629395, "learning_rate": 7.100738631328887e-06, "loss": 0.3413, "step": 7426 }, { "epoch": 0.3684210526315789, "grad_norm": 6.929849147796631, "learning_rate": 7.10002435560863e-06, "loss": 0.2499, "step": 7427 }, { "epoch": 0.3684706582667791, "grad_norm": 7.984933853149414, "learning_rate": 7.09931002784882e-06, "loss": 0.445, "step": 7428 }, { "epoch": 0.3685202639019793, "grad_norm": 5.276371002197266, "learning_rate": 7.098595648067158e-06, "loss": 0.3508, "step": 7429 }, { "epoch": 0.3685698695371794, "grad_norm": 7.874441146850586, "learning_rate": 7.097881216281346e-06, "loss": 0.3162, "step": 7430 }, { "epoch": 0.3686194751723796, "grad_norm": 5.98217248916626, "learning_rate": 7.097166732509088e-06, "loss": 0.3375, "step": 7431 }, { "epoch": 0.36866908080757976, "grad_norm": 4.4891180992126465, "learning_rate": 7.096452196768091e-06, "loss": 0.3442, "step": 7432 }, { "epoch": 0.3687186864427799, "grad_norm": 7.8443708419799805, "learning_rate": 7.095737609076061e-06, "loss": 0.3275, "step": 7433 }, { "epoch": 0.36876829207798006, "grad_norm": 6.689291954040527, "learning_rate": 7.095022969450704e-06, "loss": 0.3327, "step": 7434 }, { "epoch": 0.36881789771318024, "grad_norm": 4.463997840881348, "learning_rate": 7.09430827790973e-06, "loss": 0.2553, "step": 7435 }, { "epoch": 0.36886750334838037, "grad_norm": 6.128584861755371, "learning_rate": 7.093593534470849e-06, "loss": 0.3221, "step": 7436 }, { "epoch": 0.36891710898358054, "grad_norm": 6.792574405670166, "learning_rate": 7.092878739151775e-06, "loss": 0.3091, "step": 7437 }, { "epoch": 0.36896671461878067, "grad_norm": 6.993630409240723, "learning_rate": 7.092163891970217e-06, "loss": 0.3482, "step": 7438 }, { "epoch": 0.36901632025398085, "grad_norm": 7.335768222808838, "learning_rate": 7.091448992943892e-06, "loss": 0.3193, "step": 7439 }, { "epoch": 0.369065925889181, "grad_norm": 6.987377643585205, "learning_rate": 7.090734042090515e-06, "loss": 0.2856, "step": 7440 }, { "epoch": 0.36911553152438115, "grad_norm": 9.561454772949219, "learning_rate": 7.090019039427804e-06, "loss": 0.2292, "step": 7441 }, { "epoch": 0.3691651371595813, "grad_norm": 4.106080532073975, "learning_rate": 7.089303984973475e-06, "loss": 0.3081, "step": 7442 }, { "epoch": 0.3692147427947815, "grad_norm": 10.467852592468262, "learning_rate": 7.0885888787452466e-06, "loss": 0.417, "step": 7443 }, { "epoch": 0.3692643484299816, "grad_norm": 4.017070293426514, "learning_rate": 7.087873720760843e-06, "loss": 0.301, "step": 7444 }, { "epoch": 0.3693139540651818, "grad_norm": 3.699733257293701, "learning_rate": 7.087158511037984e-06, "loss": 0.2728, "step": 7445 }, { "epoch": 0.369363559700382, "grad_norm": 10.118364334106445, "learning_rate": 7.086443249594391e-06, "loss": 0.3853, "step": 7446 }, { "epoch": 0.3694131653355821, "grad_norm": 7.013960838317871, "learning_rate": 7.085727936447792e-06, "loss": 0.3988, "step": 7447 }, { "epoch": 0.3694627709707823, "grad_norm": 4.356960296630859, "learning_rate": 7.085012571615912e-06, "loss": 0.3058, "step": 7448 }, { "epoch": 0.36951237660598246, "grad_norm": 10.328298568725586, "learning_rate": 7.084297155116476e-06, "loss": 0.4032, "step": 7449 }, { "epoch": 0.3695619822411826, "grad_norm": 8.643827438354492, "learning_rate": 7.0835816869672135e-06, "loss": 0.414, "step": 7450 }, { "epoch": 0.36961158787638276, "grad_norm": 4.615387916564941, "learning_rate": 7.082866167185855e-06, "loss": 0.2976, "step": 7451 }, { "epoch": 0.36966119351158294, "grad_norm": 4.405721664428711, "learning_rate": 7.082150595790131e-06, "loss": 0.3233, "step": 7452 }, { "epoch": 0.36971079914678306, "grad_norm": 11.005640029907227, "learning_rate": 7.081434972797773e-06, "loss": 0.3372, "step": 7453 }, { "epoch": 0.36976040478198324, "grad_norm": 6.940913200378418, "learning_rate": 7.0807192982265145e-06, "loss": 0.3803, "step": 7454 }, { "epoch": 0.36981001041718337, "grad_norm": 8.985177040100098, "learning_rate": 7.080003572094092e-06, "loss": 0.3357, "step": 7455 }, { "epoch": 0.36985961605238354, "grad_norm": 6.963493824005127, "learning_rate": 7.079287794418238e-06, "loss": 0.3364, "step": 7456 }, { "epoch": 0.3699092216875837, "grad_norm": 14.425150871276855, "learning_rate": 7.078571965216693e-06, "loss": 0.3431, "step": 7457 }, { "epoch": 0.36995882732278385, "grad_norm": 5.420324325561523, "learning_rate": 7.077856084507194e-06, "loss": 0.2878, "step": 7458 }, { "epoch": 0.370008432957984, "grad_norm": 10.23221492767334, "learning_rate": 7.077140152307483e-06, "loss": 0.2995, "step": 7459 }, { "epoch": 0.3700580385931842, "grad_norm": 6.801268100738525, "learning_rate": 7.076424168635296e-06, "loss": 0.3181, "step": 7460 }, { "epoch": 0.3701076442283843, "grad_norm": 5.384871482849121, "learning_rate": 7.075708133508382e-06, "loss": 0.2456, "step": 7461 }, { "epoch": 0.3701572498635845, "grad_norm": 5.511752605438232, "learning_rate": 7.07499204694448e-06, "loss": 0.1853, "step": 7462 }, { "epoch": 0.3702068554987847, "grad_norm": 12.200311660766602, "learning_rate": 7.074275908961336e-06, "loss": 0.4674, "step": 7463 }, { "epoch": 0.3702564611339848, "grad_norm": 4.018428802490234, "learning_rate": 7.073559719576698e-06, "loss": 0.2415, "step": 7464 }, { "epoch": 0.370306066769185, "grad_norm": 4.9647135734558105, "learning_rate": 7.072843478808313e-06, "loss": 0.229, "step": 7465 }, { "epoch": 0.37035567240438516, "grad_norm": 4.115612030029297, "learning_rate": 7.072127186673927e-06, "loss": 0.2982, "step": 7466 }, { "epoch": 0.3704052780395853, "grad_norm": 7.2600603103637695, "learning_rate": 7.071410843191292e-06, "loss": 0.2852, "step": 7467 }, { "epoch": 0.37045488367478546, "grad_norm": 5.667437553405762, "learning_rate": 7.070694448378161e-06, "loss": 0.3385, "step": 7468 }, { "epoch": 0.37050448930998564, "grad_norm": 4.8858747482299805, "learning_rate": 7.069978002252283e-06, "loss": 0.2757, "step": 7469 }, { "epoch": 0.37055409494518576, "grad_norm": 8.629996299743652, "learning_rate": 7.069261504831415e-06, "loss": 0.3045, "step": 7470 }, { "epoch": 0.37060370058038594, "grad_norm": 7.51499080657959, "learning_rate": 7.0685449561333105e-06, "loss": 0.3771, "step": 7471 }, { "epoch": 0.37065330621558606, "grad_norm": 6.682140827178955, "learning_rate": 7.067828356175727e-06, "loss": 0.3082, "step": 7472 }, { "epoch": 0.37070291185078624, "grad_norm": 5.7669596672058105, "learning_rate": 7.0671117049764195e-06, "loss": 0.3636, "step": 7473 }, { "epoch": 0.3707525174859864, "grad_norm": 8.095320701599121, "learning_rate": 7.06639500255315e-06, "loss": 0.3543, "step": 7474 }, { "epoch": 0.37080212312118654, "grad_norm": 4.087314128875732, "learning_rate": 7.065678248923678e-06, "loss": 0.3035, "step": 7475 }, { "epoch": 0.3708517287563867, "grad_norm": 8.12526798248291, "learning_rate": 7.064961444105764e-06, "loss": 0.2659, "step": 7476 }, { "epoch": 0.3709013343915869, "grad_norm": 5.884867191314697, "learning_rate": 7.064244588117171e-06, "loss": 0.346, "step": 7477 }, { "epoch": 0.370950940026787, "grad_norm": 4.461544036865234, "learning_rate": 7.063527680975666e-06, "loss": 0.3026, "step": 7478 }, { "epoch": 0.3710005456619872, "grad_norm": 6.68672513961792, "learning_rate": 7.06281072269901e-06, "loss": 0.3182, "step": 7479 }, { "epoch": 0.3710501512971874, "grad_norm": 5.4304962158203125, "learning_rate": 7.062093713304969e-06, "loss": 0.3615, "step": 7480 }, { "epoch": 0.3710997569323875, "grad_norm": 5.644629955291748, "learning_rate": 7.061376652811315e-06, "loss": 0.2366, "step": 7481 }, { "epoch": 0.3711493625675877, "grad_norm": 12.006982803344727, "learning_rate": 7.060659541235816e-06, "loss": 0.3274, "step": 7482 }, { "epoch": 0.37119896820278786, "grad_norm": 5.045993804931641, "learning_rate": 7.059942378596239e-06, "loss": 0.3196, "step": 7483 }, { "epoch": 0.371248573837988, "grad_norm": 6.702109336853027, "learning_rate": 7.059225164910359e-06, "loss": 0.2786, "step": 7484 }, { "epoch": 0.37129817947318816, "grad_norm": 7.683013439178467, "learning_rate": 7.058507900195949e-06, "loss": 0.2706, "step": 7485 }, { "epoch": 0.37134778510838834, "grad_norm": 5.441490173339844, "learning_rate": 7.057790584470782e-06, "loss": 0.2483, "step": 7486 }, { "epoch": 0.37139739074358846, "grad_norm": 6.363167762756348, "learning_rate": 7.057073217752632e-06, "loss": 0.3121, "step": 7487 }, { "epoch": 0.37144699637878864, "grad_norm": 16.498929977416992, "learning_rate": 7.056355800059278e-06, "loss": 0.4584, "step": 7488 }, { "epoch": 0.37149660201398876, "grad_norm": 6.383925914764404, "learning_rate": 7.055638331408496e-06, "loss": 0.2818, "step": 7489 }, { "epoch": 0.37154620764918894, "grad_norm": 7.1875529289245605, "learning_rate": 7.054920811818068e-06, "loss": 0.2238, "step": 7490 }, { "epoch": 0.3715958132843891, "grad_norm": 4.397893905639648, "learning_rate": 7.0542032413057725e-06, "loss": 0.2353, "step": 7491 }, { "epoch": 0.37164541891958924, "grad_norm": 4.014697074890137, "learning_rate": 7.0534856198893904e-06, "loss": 0.1961, "step": 7492 }, { "epoch": 0.3716950245547894, "grad_norm": 7.523996829986572, "learning_rate": 7.052767947586706e-06, "loss": 0.2744, "step": 7493 }, { "epoch": 0.3717446301899896, "grad_norm": 3.0590038299560547, "learning_rate": 7.052050224415503e-06, "loss": 0.1824, "step": 7494 }, { "epoch": 0.3717942358251897, "grad_norm": 9.827926635742188, "learning_rate": 7.051332450393569e-06, "loss": 0.348, "step": 7495 }, { "epoch": 0.3718438414603899, "grad_norm": 4.516639232635498, "learning_rate": 7.0506146255386874e-06, "loss": 0.32, "step": 7496 }, { "epoch": 0.3718934470955901, "grad_norm": 10.950226783752441, "learning_rate": 7.0498967498686475e-06, "loss": 0.3468, "step": 7497 }, { "epoch": 0.3719430527307902, "grad_norm": 6.713932037353516, "learning_rate": 7.049178823401241e-06, "loss": 0.3303, "step": 7498 }, { "epoch": 0.3719926583659904, "grad_norm": 4.103440284729004, "learning_rate": 7.048460846154255e-06, "loss": 0.2214, "step": 7499 }, { "epoch": 0.37204226400119056, "grad_norm": 9.96302318572998, "learning_rate": 7.047742818145482e-06, "loss": 0.399, "step": 7500 }, { "epoch": 0.3720918696363907, "grad_norm": 13.646435737609863, "learning_rate": 7.047024739392716e-06, "loss": 0.3969, "step": 7501 }, { "epoch": 0.37214147527159086, "grad_norm": 4.279820442199707, "learning_rate": 7.0463066099137515e-06, "loss": 0.2224, "step": 7502 }, { "epoch": 0.37219108090679104, "grad_norm": 4.912289619445801, "learning_rate": 7.0455884297263835e-06, "loss": 0.1802, "step": 7503 }, { "epoch": 0.37224068654199116, "grad_norm": 6.390551567077637, "learning_rate": 7.044870198848408e-06, "loss": 0.3357, "step": 7504 }, { "epoch": 0.37229029217719134, "grad_norm": 8.345308303833008, "learning_rate": 7.044151917297625e-06, "loss": 0.3506, "step": 7505 }, { "epoch": 0.37233989781239146, "grad_norm": 7.476396083831787, "learning_rate": 7.043433585091832e-06, "loss": 0.3563, "step": 7506 }, { "epoch": 0.37238950344759164, "grad_norm": 5.270285129547119, "learning_rate": 7.042715202248831e-06, "loss": 0.3118, "step": 7507 }, { "epoch": 0.3724391090827918, "grad_norm": 13.256916999816895, "learning_rate": 7.0419967687864225e-06, "loss": 0.4208, "step": 7508 }, { "epoch": 0.37248871471799194, "grad_norm": 3.562833547592163, "learning_rate": 7.041278284722412e-06, "loss": 0.2823, "step": 7509 }, { "epoch": 0.3725383203531921, "grad_norm": 6.4809160232543945, "learning_rate": 7.040559750074601e-06, "loss": 0.3352, "step": 7510 }, { "epoch": 0.3725879259883923, "grad_norm": 8.505663871765137, "learning_rate": 7.039841164860795e-06, "loss": 0.2363, "step": 7511 }, { "epoch": 0.3726375316235924, "grad_norm": 11.440680503845215, "learning_rate": 7.0391225290988055e-06, "loss": 0.4087, "step": 7512 }, { "epoch": 0.3726871372587926, "grad_norm": 9.562750816345215, "learning_rate": 7.038403842806435e-06, "loss": 0.4522, "step": 7513 }, { "epoch": 0.3727367428939928, "grad_norm": 6.212771892547607, "learning_rate": 7.037685106001495e-06, "loss": 0.2363, "step": 7514 }, { "epoch": 0.3727863485291929, "grad_norm": 5.476102352142334, "learning_rate": 7.036966318701799e-06, "loss": 0.3122, "step": 7515 }, { "epoch": 0.3728359541643931, "grad_norm": 5.288868427276611, "learning_rate": 7.036247480925154e-06, "loss": 0.317, "step": 7516 }, { "epoch": 0.37288555979959326, "grad_norm": 5.170575141906738, "learning_rate": 7.035528592689375e-06, "loss": 0.2939, "step": 7517 }, { "epoch": 0.3729351654347934, "grad_norm": 5.771095275878906, "learning_rate": 7.0348096540122765e-06, "loss": 0.2259, "step": 7518 }, { "epoch": 0.37298477106999356, "grad_norm": 8.392203330993652, "learning_rate": 7.034090664911677e-06, "loss": 0.3677, "step": 7519 }, { "epoch": 0.37303437670519374, "grad_norm": 3.9146223068237305, "learning_rate": 7.033371625405388e-06, "loss": 0.3339, "step": 7520 }, { "epoch": 0.37308398234039386, "grad_norm": 8.321584701538086, "learning_rate": 7.032652535511231e-06, "loss": 0.3127, "step": 7521 }, { "epoch": 0.37313358797559404, "grad_norm": 5.519598007202148, "learning_rate": 7.031933395247026e-06, "loss": 0.2516, "step": 7522 }, { "epoch": 0.37318319361079416, "grad_norm": 3.11555814743042, "learning_rate": 7.03121420463059e-06, "loss": 0.2464, "step": 7523 }, { "epoch": 0.37323279924599434, "grad_norm": 7.2509446144104, "learning_rate": 7.0304949636797485e-06, "loss": 0.3165, "step": 7524 }, { "epoch": 0.3732824048811945, "grad_norm": 9.30707836151123, "learning_rate": 7.0297756724123245e-06, "loss": 0.3734, "step": 7525 }, { "epoch": 0.37333201051639464, "grad_norm": 18.398462295532227, "learning_rate": 7.029056330846139e-06, "loss": 0.3992, "step": 7526 }, { "epoch": 0.3733816161515948, "grad_norm": 9.5296049118042, "learning_rate": 7.028336938999021e-06, "loss": 0.3417, "step": 7527 }, { "epoch": 0.373431221786795, "grad_norm": 6.89223575592041, "learning_rate": 7.0276174968887944e-06, "loss": 0.2979, "step": 7528 }, { "epoch": 0.3734808274219951, "grad_norm": 11.079362869262695, "learning_rate": 7.0268980045332915e-06, "loss": 0.3067, "step": 7529 }, { "epoch": 0.3735304330571953, "grad_norm": 8.962875366210938, "learning_rate": 7.026178461950338e-06, "loss": 0.3213, "step": 7530 }, { "epoch": 0.3735800386923955, "grad_norm": 12.989012718200684, "learning_rate": 7.025458869157766e-06, "loss": 0.405, "step": 7531 }, { "epoch": 0.3736296443275956, "grad_norm": 6.729211807250977, "learning_rate": 7.024739226173407e-06, "loss": 0.3291, "step": 7532 }, { "epoch": 0.3736792499627958, "grad_norm": 5.756789684295654, "learning_rate": 7.024019533015094e-06, "loss": 0.2724, "step": 7533 }, { "epoch": 0.37372885559799596, "grad_norm": 4.74581241607666, "learning_rate": 7.023299789700661e-06, "loss": 0.2551, "step": 7534 }, { "epoch": 0.3737784612331961, "grad_norm": 6.877578258514404, "learning_rate": 7.022579996247944e-06, "loss": 0.3204, "step": 7535 }, { "epoch": 0.37382806686839626, "grad_norm": 7.273519992828369, "learning_rate": 7.021860152674781e-06, "loss": 0.3296, "step": 7536 }, { "epoch": 0.3738776725035964, "grad_norm": 7.20319128036499, "learning_rate": 7.021140258999007e-06, "loss": 0.4035, "step": 7537 }, { "epoch": 0.37392727813879656, "grad_norm": 7.369048118591309, "learning_rate": 7.020420315238464e-06, "loss": 0.3722, "step": 7538 }, { "epoch": 0.37397688377399674, "grad_norm": 8.885302543640137, "learning_rate": 7.019700321410992e-06, "loss": 0.3711, "step": 7539 }, { "epoch": 0.37402648940919686, "grad_norm": 7.60203742980957, "learning_rate": 7.018980277534433e-06, "loss": 0.3624, "step": 7540 }, { "epoch": 0.37407609504439704, "grad_norm": 7.501079559326172, "learning_rate": 7.018260183626628e-06, "loss": 0.2726, "step": 7541 }, { "epoch": 0.3741257006795972, "grad_norm": 7.974462509155273, "learning_rate": 7.017540039705423e-06, "loss": 0.2487, "step": 7542 }, { "epoch": 0.37417530631479734, "grad_norm": 7.220675945281982, "learning_rate": 7.016819845788662e-06, "loss": 0.375, "step": 7543 }, { "epoch": 0.3742249119499975, "grad_norm": 13.227251052856445, "learning_rate": 7.016099601894195e-06, "loss": 0.4511, "step": 7544 }, { "epoch": 0.3742745175851977, "grad_norm": 3.803907871246338, "learning_rate": 7.015379308039867e-06, "loss": 0.1739, "step": 7545 }, { "epoch": 0.3743241232203978, "grad_norm": 7.341335773468018, "learning_rate": 7.0146589642435285e-06, "loss": 0.3405, "step": 7546 }, { "epoch": 0.374373728855598, "grad_norm": 5.2092814445495605, "learning_rate": 7.013938570523029e-06, "loss": 0.3351, "step": 7547 }, { "epoch": 0.3744233344907982, "grad_norm": 9.438909530639648, "learning_rate": 7.013218126896221e-06, "loss": 0.4041, "step": 7548 }, { "epoch": 0.3744729401259983, "grad_norm": 5.624510765075684, "learning_rate": 7.012497633380958e-06, "loss": 0.2435, "step": 7549 }, { "epoch": 0.3745225457611985, "grad_norm": 5.530627250671387, "learning_rate": 7.011777089995092e-06, "loss": 0.3126, "step": 7550 }, { "epoch": 0.37457215139639866, "grad_norm": 6.8922576904296875, "learning_rate": 7.0110564967564795e-06, "loss": 0.4489, "step": 7551 }, { "epoch": 0.3746217570315988, "grad_norm": 7.192589282989502, "learning_rate": 7.010335853682978e-06, "loss": 0.3391, "step": 7552 }, { "epoch": 0.37467136266679896, "grad_norm": 7.824216842651367, "learning_rate": 7.009615160792446e-06, "loss": 0.3377, "step": 7553 }, { "epoch": 0.3747209683019991, "grad_norm": 4.7256999015808105, "learning_rate": 7.008894418102738e-06, "loss": 0.2533, "step": 7554 }, { "epoch": 0.37477057393719926, "grad_norm": 7.127453327178955, "learning_rate": 7.00817362563172e-06, "loss": 0.2481, "step": 7555 }, { "epoch": 0.37482017957239944, "grad_norm": 7.39059591293335, "learning_rate": 7.0074527833972514e-06, "loss": 0.335, "step": 7556 }, { "epoch": 0.37486978520759956, "grad_norm": 6.709129333496094, "learning_rate": 7.006731891417193e-06, "loss": 0.3709, "step": 7557 }, { "epoch": 0.37491939084279974, "grad_norm": 7.3259758949279785, "learning_rate": 7.006010949709412e-06, "loss": 0.3181, "step": 7558 }, { "epoch": 0.3749689964779999, "grad_norm": 5.695085048675537, "learning_rate": 7.005289958291772e-06, "loss": 0.2757, "step": 7559 }, { "epoch": 0.37501860211320004, "grad_norm": 6.6742448806762695, "learning_rate": 7.00456891718214e-06, "loss": 0.2921, "step": 7560 }, { "epoch": 0.3750682077484002, "grad_norm": 6.775691032409668, "learning_rate": 7.0038478263983836e-06, "loss": 0.2493, "step": 7561 }, { "epoch": 0.3751178133836004, "grad_norm": 6.747783660888672, "learning_rate": 7.00312668595837e-06, "loss": 0.372, "step": 7562 }, { "epoch": 0.3751674190188005, "grad_norm": 13.735750198364258, "learning_rate": 7.002405495879973e-06, "loss": 0.5424, "step": 7563 }, { "epoch": 0.3752170246540007, "grad_norm": 8.279044151306152, "learning_rate": 7.001684256181061e-06, "loss": 0.2938, "step": 7564 }, { "epoch": 0.3752666302892009, "grad_norm": 4.391968250274658, "learning_rate": 7.0009629668795074e-06, "loss": 0.3051, "step": 7565 }, { "epoch": 0.375316235924401, "grad_norm": 5.050683498382568, "learning_rate": 7.0002416279931875e-06, "loss": 0.2848, "step": 7566 }, { "epoch": 0.3753658415596012, "grad_norm": 6.001181602478027, "learning_rate": 6.999520239539974e-06, "loss": 0.3199, "step": 7567 }, { "epoch": 0.37541544719480135, "grad_norm": 4.314004898071289, "learning_rate": 6.998798801537745e-06, "loss": 0.2239, "step": 7568 }, { "epoch": 0.3754650528300015, "grad_norm": 6.192883014678955, "learning_rate": 6.998077314004378e-06, "loss": 0.3046, "step": 7569 }, { "epoch": 0.37551465846520166, "grad_norm": 6.998244285583496, "learning_rate": 6.99735577695775e-06, "loss": 0.3573, "step": 7570 }, { "epoch": 0.3755642641004018, "grad_norm": 19.937015533447266, "learning_rate": 6.996634190415743e-06, "loss": 0.352, "step": 7571 }, { "epoch": 0.37561386973560196, "grad_norm": 30.149126052856445, "learning_rate": 6.9959125543962384e-06, "loss": 0.3196, "step": 7572 }, { "epoch": 0.37566347537080214, "grad_norm": 14.257759094238281, "learning_rate": 6.995190868917118e-06, "loss": 0.3358, "step": 7573 }, { "epoch": 0.37571308100600226, "grad_norm": 11.250457763671875, "learning_rate": 6.994469133996264e-06, "loss": 0.4281, "step": 7574 }, { "epoch": 0.37576268664120244, "grad_norm": 12.55660629272461, "learning_rate": 6.9937473496515625e-06, "loss": 0.4057, "step": 7575 }, { "epoch": 0.3758122922764026, "grad_norm": 4.795119285583496, "learning_rate": 6.993025515900902e-06, "loss": 0.2807, "step": 7576 }, { "epoch": 0.37586189791160274, "grad_norm": 12.6504487991333, "learning_rate": 6.992303632762165e-06, "loss": 0.3335, "step": 7577 }, { "epoch": 0.3759115035468029, "grad_norm": 7.610298156738281, "learning_rate": 6.991581700253245e-06, "loss": 0.2611, "step": 7578 }, { "epoch": 0.3759611091820031, "grad_norm": 5.498465061187744, "learning_rate": 6.990859718392028e-06, "loss": 0.1931, "step": 7579 }, { "epoch": 0.3760107148172032, "grad_norm": 7.010843276977539, "learning_rate": 6.990137687196408e-06, "loss": 0.3403, "step": 7580 }, { "epoch": 0.3760603204524034, "grad_norm": 10.620691299438477, "learning_rate": 6.989415606684274e-06, "loss": 0.2726, "step": 7581 }, { "epoch": 0.3761099260876036, "grad_norm": 7.910451889038086, "learning_rate": 6.988693476873522e-06, "loss": 0.3256, "step": 7582 }, { "epoch": 0.3761595317228037, "grad_norm": 7.475656509399414, "learning_rate": 6.987971297782048e-06, "loss": 0.3205, "step": 7583 }, { "epoch": 0.3762091373580039, "grad_norm": 4.801816463470459, "learning_rate": 6.987249069427744e-06, "loss": 0.2606, "step": 7584 }, { "epoch": 0.37625874299320405, "grad_norm": 4.747095108032227, "learning_rate": 6.9865267918285094e-06, "loss": 0.2772, "step": 7585 }, { "epoch": 0.3763083486284042, "grad_norm": 6.368175983428955, "learning_rate": 6.985804465002243e-06, "loss": 0.3192, "step": 7586 }, { "epoch": 0.37635795426360436, "grad_norm": 6.722482204437256, "learning_rate": 6.985082088966843e-06, "loss": 0.2867, "step": 7587 }, { "epoch": 0.3764075598988045, "grad_norm": 8.78406047821045, "learning_rate": 6.98435966374021e-06, "loss": 0.359, "step": 7588 }, { "epoch": 0.37645716553400466, "grad_norm": 5.967626571655273, "learning_rate": 6.9836371893402485e-06, "loss": 0.2973, "step": 7589 }, { "epoch": 0.37650677116920483, "grad_norm": 12.432586669921875, "learning_rate": 6.98291466578486e-06, "loss": 0.2182, "step": 7590 }, { "epoch": 0.37655637680440496, "grad_norm": 11.097782135009766, "learning_rate": 6.982192093091949e-06, "loss": 0.3095, "step": 7591 }, { "epoch": 0.37660598243960514, "grad_norm": 11.79262638092041, "learning_rate": 6.981469471279421e-06, "loss": 0.3076, "step": 7592 }, { "epoch": 0.3766555880748053, "grad_norm": 5.858252048492432, "learning_rate": 6.9807468003651824e-06, "loss": 0.2696, "step": 7593 }, { "epoch": 0.37670519371000544, "grad_norm": 10.342324256896973, "learning_rate": 6.980024080367142e-06, "loss": 0.3944, "step": 7594 }, { "epoch": 0.3767547993452056, "grad_norm": 7.6793131828308105, "learning_rate": 6.979301311303211e-06, "loss": 0.3243, "step": 7595 }, { "epoch": 0.3768044049804058, "grad_norm": 8.752201080322266, "learning_rate": 6.978578493191297e-06, "loss": 0.302, "step": 7596 }, { "epoch": 0.3768540106156059, "grad_norm": 20.337326049804688, "learning_rate": 6.977855626049312e-06, "loss": 0.5589, "step": 7597 }, { "epoch": 0.3769036162508061, "grad_norm": 8.382206916809082, "learning_rate": 6.977132709895171e-06, "loss": 0.3053, "step": 7598 }, { "epoch": 0.3769532218860063, "grad_norm": 9.094972610473633, "learning_rate": 6.976409744746788e-06, "loss": 0.3532, "step": 7599 }, { "epoch": 0.3770028275212064, "grad_norm": 6.2367167472839355, "learning_rate": 6.975686730622078e-06, "loss": 0.3046, "step": 7600 }, { "epoch": 0.3770524331564066, "grad_norm": 5.5109357833862305, "learning_rate": 6.974963667538954e-06, "loss": 0.2591, "step": 7601 }, { "epoch": 0.37710203879160675, "grad_norm": 4.571735382080078, "learning_rate": 6.974240555515339e-06, "loss": 0.2532, "step": 7602 }, { "epoch": 0.3771516444268069, "grad_norm": 9.487640380859375, "learning_rate": 6.9735173945691515e-06, "loss": 0.3085, "step": 7603 }, { "epoch": 0.37720125006200705, "grad_norm": 15.168983459472656, "learning_rate": 6.972794184718308e-06, "loss": 0.3776, "step": 7604 }, { "epoch": 0.3772508556972072, "grad_norm": 6.221713542938232, "learning_rate": 6.972070925980732e-06, "loss": 0.2146, "step": 7605 }, { "epoch": 0.37730046133240736, "grad_norm": 4.987672805786133, "learning_rate": 6.9713476183743464e-06, "loss": 0.2926, "step": 7606 }, { "epoch": 0.37735006696760753, "grad_norm": 15.432028770446777, "learning_rate": 6.970624261917077e-06, "loss": 0.364, "step": 7607 }, { "epoch": 0.37739967260280766, "grad_norm": 6.801698207855225, "learning_rate": 6.9699008566268456e-06, "loss": 0.2971, "step": 7608 }, { "epoch": 0.37744927823800783, "grad_norm": 12.043304443359375, "learning_rate": 6.96917740252158e-06, "loss": 0.3297, "step": 7609 }, { "epoch": 0.377498883873208, "grad_norm": 5.500181674957275, "learning_rate": 6.968453899619207e-06, "loss": 0.267, "step": 7610 }, { "epoch": 0.37754848950840814, "grad_norm": 6.666325092315674, "learning_rate": 6.967730347937658e-06, "loss": 0.2876, "step": 7611 }, { "epoch": 0.3775980951436083, "grad_norm": 11.290613174438477, "learning_rate": 6.967006747494857e-06, "loss": 0.3713, "step": 7612 }, { "epoch": 0.3776477007788085, "grad_norm": 6.586376667022705, "learning_rate": 6.9662830983087435e-06, "loss": 0.3604, "step": 7613 }, { "epoch": 0.3776973064140086, "grad_norm": 4.383332252502441, "learning_rate": 6.965559400397244e-06, "loss": 0.1944, "step": 7614 }, { "epoch": 0.3777469120492088, "grad_norm": 6.143556118011475, "learning_rate": 6.964835653778292e-06, "loss": 0.2829, "step": 7615 }, { "epoch": 0.377796517684409, "grad_norm": 5.395646095275879, "learning_rate": 6.964111858469824e-06, "loss": 0.1861, "step": 7616 }, { "epoch": 0.3778461233196091, "grad_norm": 10.456192970275879, "learning_rate": 6.963388014489777e-06, "loss": 0.3527, "step": 7617 }, { "epoch": 0.3778957289548093, "grad_norm": 14.989723205566406, "learning_rate": 6.962664121856085e-06, "loss": 0.3922, "step": 7618 }, { "epoch": 0.37794533459000945, "grad_norm": 5.7199859619140625, "learning_rate": 6.9619401805866904e-06, "loss": 0.26, "step": 7619 }, { "epoch": 0.3779949402252096, "grad_norm": 12.660694122314453, "learning_rate": 6.961216190699529e-06, "loss": 0.2967, "step": 7620 }, { "epoch": 0.37804454586040975, "grad_norm": 13.935731887817383, "learning_rate": 6.9604921522125434e-06, "loss": 0.3345, "step": 7621 }, { "epoch": 0.3780941514956099, "grad_norm": 9.238943099975586, "learning_rate": 6.959768065143676e-06, "loss": 0.3423, "step": 7622 }, { "epoch": 0.37814375713081005, "grad_norm": 4.1597747802734375, "learning_rate": 6.95904392951087e-06, "loss": 0.2609, "step": 7623 }, { "epoch": 0.37819336276601023, "grad_norm": 4.887661933898926, "learning_rate": 6.958319745332068e-06, "loss": 0.2407, "step": 7624 }, { "epoch": 0.37824296840121036, "grad_norm": 5.445485591888428, "learning_rate": 6.957595512625217e-06, "loss": 0.3428, "step": 7625 }, { "epoch": 0.37829257403641053, "grad_norm": 14.15648078918457, "learning_rate": 6.9568712314082646e-06, "loss": 0.3471, "step": 7626 }, { "epoch": 0.3783421796716107, "grad_norm": 8.725546836853027, "learning_rate": 6.956146901699158e-06, "loss": 0.236, "step": 7627 }, { "epoch": 0.37839178530681084, "grad_norm": 9.81650447845459, "learning_rate": 6.955422523515846e-06, "loss": 0.2442, "step": 7628 }, { "epoch": 0.378441390942011, "grad_norm": 7.673025608062744, "learning_rate": 6.954698096876278e-06, "loss": 0.3112, "step": 7629 }, { "epoch": 0.3784909965772112, "grad_norm": 5.92024040222168, "learning_rate": 6.953973621798409e-06, "loss": 0.3046, "step": 7630 }, { "epoch": 0.3785406022124113, "grad_norm": 7.147467613220215, "learning_rate": 6.953249098300189e-06, "loss": 0.3353, "step": 7631 }, { "epoch": 0.3785902078476115, "grad_norm": 14.307558059692383, "learning_rate": 6.952524526399571e-06, "loss": 0.322, "step": 7632 }, { "epoch": 0.37863981348281167, "grad_norm": 23.9403018951416, "learning_rate": 6.951799906114513e-06, "loss": 0.2524, "step": 7633 }, { "epoch": 0.3786894191180118, "grad_norm": 5.060797691345215, "learning_rate": 6.951075237462973e-06, "loss": 0.3052, "step": 7634 }, { "epoch": 0.378739024753212, "grad_norm": 5.383914470672607, "learning_rate": 6.950350520462903e-06, "loss": 0.2907, "step": 7635 }, { "epoch": 0.37878863038841215, "grad_norm": 9.984024047851562, "learning_rate": 6.949625755132266e-06, "loss": 0.3157, "step": 7636 }, { "epoch": 0.3788382360236123, "grad_norm": 7.324678421020508, "learning_rate": 6.948900941489022e-06, "loss": 0.3474, "step": 7637 }, { "epoch": 0.37888784165881245, "grad_norm": 41.50089645385742, "learning_rate": 6.948176079551129e-06, "loss": 0.3242, "step": 7638 }, { "epoch": 0.3789374472940126, "grad_norm": 4.476964950561523, "learning_rate": 6.947451169336552e-06, "loss": 0.3539, "step": 7639 }, { "epoch": 0.37898705292921275, "grad_norm": 5.956202507019043, "learning_rate": 6.946726210863255e-06, "loss": 0.3209, "step": 7640 }, { "epoch": 0.37903665856441293, "grad_norm": 5.292600631713867, "learning_rate": 6.946001204149202e-06, "loss": 0.3106, "step": 7641 }, { "epoch": 0.37908626419961305, "grad_norm": 6.910115718841553, "learning_rate": 6.945276149212358e-06, "loss": 0.3322, "step": 7642 }, { "epoch": 0.37913586983481323, "grad_norm": 6.104470729827881, "learning_rate": 6.944551046070691e-06, "loss": 0.2647, "step": 7643 }, { "epoch": 0.3791854754700134, "grad_norm": 6.699631690979004, "learning_rate": 6.94382589474217e-06, "loss": 0.3879, "step": 7644 }, { "epoch": 0.37923508110521353, "grad_norm": 12.930503845214844, "learning_rate": 6.943100695244763e-06, "loss": 0.3475, "step": 7645 }, { "epoch": 0.3792846867404137, "grad_norm": 10.107372283935547, "learning_rate": 6.9423754475964435e-06, "loss": 0.3321, "step": 7646 }, { "epoch": 0.3793342923756139, "grad_norm": 11.283998489379883, "learning_rate": 6.941650151815181e-06, "loss": 0.2617, "step": 7647 }, { "epoch": 0.379383898010814, "grad_norm": 3.7812368869781494, "learning_rate": 6.940924807918949e-06, "loss": 0.214, "step": 7648 }, { "epoch": 0.3794335036460142, "grad_norm": 6.2857866287231445, "learning_rate": 6.9401994159257225e-06, "loss": 0.3238, "step": 7649 }, { "epoch": 0.37948310928121437, "grad_norm": 7.090737819671631, "learning_rate": 6.939473975853477e-06, "loss": 0.2675, "step": 7650 }, { "epoch": 0.3795327149164145, "grad_norm": 5.423182964324951, "learning_rate": 6.9387484877201885e-06, "loss": 0.3058, "step": 7651 }, { "epoch": 0.37958232055161467, "grad_norm": 4.7629618644714355, "learning_rate": 6.938022951543837e-06, "loss": 0.3326, "step": 7652 }, { "epoch": 0.37963192618681485, "grad_norm": 11.473617553710938, "learning_rate": 6.937297367342399e-06, "loss": 0.3189, "step": 7653 }, { "epoch": 0.379681531822015, "grad_norm": 6.272560119628906, "learning_rate": 6.936571735133854e-06, "loss": 0.2597, "step": 7654 }, { "epoch": 0.37973113745721515, "grad_norm": 5.545653343200684, "learning_rate": 6.935846054936188e-06, "loss": 0.1902, "step": 7655 }, { "epoch": 0.3797807430924153, "grad_norm": 5.9856157302856445, "learning_rate": 6.935120326767381e-06, "loss": 0.2508, "step": 7656 }, { "epoch": 0.37983034872761545, "grad_norm": 8.19546127319336, "learning_rate": 6.934394550645417e-06, "loss": 0.3612, "step": 7657 }, { "epoch": 0.37987995436281563, "grad_norm": 8.31806755065918, "learning_rate": 6.933668726588279e-06, "loss": 0.3618, "step": 7658 }, { "epoch": 0.37992955999801575, "grad_norm": 5.657055377960205, "learning_rate": 6.932942854613957e-06, "loss": 0.276, "step": 7659 }, { "epoch": 0.37997916563321593, "grad_norm": 12.097312927246094, "learning_rate": 6.9322169347404365e-06, "loss": 0.4429, "step": 7660 }, { "epoch": 0.3800287712684161, "grad_norm": 11.698113441467285, "learning_rate": 6.931490966985707e-06, "loss": 0.3823, "step": 7661 }, { "epoch": 0.38007837690361623, "grad_norm": 8.323229789733887, "learning_rate": 6.9307649513677565e-06, "loss": 0.2618, "step": 7662 }, { "epoch": 0.3801279825388164, "grad_norm": 5.330113410949707, "learning_rate": 6.930038887904578e-06, "loss": 0.3089, "step": 7663 }, { "epoch": 0.3801775881740166, "grad_norm": 5.092170715332031, "learning_rate": 6.929312776614162e-06, "loss": 0.1826, "step": 7664 }, { "epoch": 0.3802271938092167, "grad_norm": 26.593093872070312, "learning_rate": 6.928586617514503e-06, "loss": 0.4966, "step": 7665 }, { "epoch": 0.3802767994444169, "grad_norm": 11.804747581481934, "learning_rate": 6.9278604106235945e-06, "loss": 0.4445, "step": 7666 }, { "epoch": 0.38032640507961707, "grad_norm": 6.297605514526367, "learning_rate": 6.927134155959434e-06, "loss": 0.3218, "step": 7667 }, { "epoch": 0.3803760107148172, "grad_norm": 11.246105194091797, "learning_rate": 6.926407853540017e-06, "loss": 0.3633, "step": 7668 }, { "epoch": 0.38042561635001737, "grad_norm": 6.842497825622559, "learning_rate": 6.925681503383342e-06, "loss": 0.3766, "step": 7669 }, { "epoch": 0.3804752219852175, "grad_norm": 5.840522766113281, "learning_rate": 6.924955105507408e-06, "loss": 0.3009, "step": 7670 }, { "epoch": 0.38052482762041767, "grad_norm": 7.366410732269287, "learning_rate": 6.924228659930216e-06, "loss": 0.3252, "step": 7671 }, { "epoch": 0.38057443325561785, "grad_norm": 17.72989845275879, "learning_rate": 6.923502166669768e-06, "loss": 0.4861, "step": 7672 }, { "epoch": 0.380624038890818, "grad_norm": 5.546792507171631, "learning_rate": 6.922775625744065e-06, "loss": 0.2443, "step": 7673 }, { "epoch": 0.38067364452601815, "grad_norm": 6.999748229980469, "learning_rate": 6.922049037171113e-06, "loss": 0.3074, "step": 7674 }, { "epoch": 0.38072325016121833, "grad_norm": 6.279211044311523, "learning_rate": 6.921322400968916e-06, "loss": 0.2365, "step": 7675 }, { "epoch": 0.38077285579641845, "grad_norm": 7.313372611999512, "learning_rate": 6.920595717155481e-06, "loss": 0.3448, "step": 7676 }, { "epoch": 0.38082246143161863, "grad_norm": 4.43379020690918, "learning_rate": 6.919868985748815e-06, "loss": 0.2304, "step": 7677 }, { "epoch": 0.3808720670668188, "grad_norm": 10.09178638458252, "learning_rate": 6.919142206766929e-06, "loss": 0.3626, "step": 7678 }, { "epoch": 0.38092167270201893, "grad_norm": 5.501733779907227, "learning_rate": 6.918415380227829e-06, "loss": 0.279, "step": 7679 }, { "epoch": 0.3809712783372191, "grad_norm": 7.85560941696167, "learning_rate": 6.917688506149528e-06, "loss": 0.3619, "step": 7680 }, { "epoch": 0.3810208839724193, "grad_norm": 8.552980422973633, "learning_rate": 6.916961584550039e-06, "loss": 0.3684, "step": 7681 }, { "epoch": 0.3810704896076194, "grad_norm": 9.92198371887207, "learning_rate": 6.916234615447375e-06, "loss": 0.3689, "step": 7682 }, { "epoch": 0.3811200952428196, "grad_norm": 4.686771392822266, "learning_rate": 6.91550759885955e-06, "loss": 0.303, "step": 7683 }, { "epoch": 0.38116970087801977, "grad_norm": 5.275847911834717, "learning_rate": 6.9147805348045815e-06, "loss": 0.3236, "step": 7684 }, { "epoch": 0.3812193065132199, "grad_norm": 8.261143684387207, "learning_rate": 6.914053423300483e-06, "loss": 0.3754, "step": 7685 }, { "epoch": 0.38126891214842007, "grad_norm": 4.997560024261475, "learning_rate": 6.913326264365275e-06, "loss": 0.2586, "step": 7686 }, { "epoch": 0.3813185177836202, "grad_norm": 7.4385271072387695, "learning_rate": 6.912599058016977e-06, "loss": 0.3055, "step": 7687 }, { "epoch": 0.38136812341882037, "grad_norm": 7.21614408493042, "learning_rate": 6.911871804273611e-06, "loss": 0.3865, "step": 7688 }, { "epoch": 0.38141772905402055, "grad_norm": 6.620090484619141, "learning_rate": 6.911144503153195e-06, "loss": 0.2619, "step": 7689 }, { "epoch": 0.38146733468922067, "grad_norm": 6.686376571655273, "learning_rate": 6.9104171546737555e-06, "loss": 0.3631, "step": 7690 }, { "epoch": 0.38151694032442085, "grad_norm": 5.232331275939941, "learning_rate": 6.909689758853314e-06, "loss": 0.2276, "step": 7691 }, { "epoch": 0.38156654595962103, "grad_norm": 7.866064548492432, "learning_rate": 6.908962315709895e-06, "loss": 0.2836, "step": 7692 }, { "epoch": 0.38161615159482115, "grad_norm": 6.236809253692627, "learning_rate": 6.908234825261527e-06, "loss": 0.3772, "step": 7693 }, { "epoch": 0.38166575723002133, "grad_norm": 12.549954414367676, "learning_rate": 6.907507287526238e-06, "loss": 0.4176, "step": 7694 }, { "epoch": 0.3817153628652215, "grad_norm": 4.713645935058594, "learning_rate": 6.906779702522055e-06, "loss": 0.1912, "step": 7695 }, { "epoch": 0.38176496850042163, "grad_norm": 3.9541971683502197, "learning_rate": 6.9060520702670075e-06, "loss": 0.2583, "step": 7696 }, { "epoch": 0.3818145741356218, "grad_norm": 14.398659706115723, "learning_rate": 6.9053243907791275e-06, "loss": 0.4081, "step": 7697 }, { "epoch": 0.381864179770822, "grad_norm": 8.03490161895752, "learning_rate": 6.904596664076449e-06, "loss": 0.3177, "step": 7698 }, { "epoch": 0.3819137854060221, "grad_norm": 7.078262805938721, "learning_rate": 6.903868890177002e-06, "loss": 0.4208, "step": 7699 }, { "epoch": 0.3819633910412223, "grad_norm": 8.966767311096191, "learning_rate": 6.903141069098822e-06, "loss": 0.4199, "step": 7700 }, { "epoch": 0.38201299667642247, "grad_norm": 4.684469699859619, "learning_rate": 6.9024132008599474e-06, "loss": 0.3388, "step": 7701 }, { "epoch": 0.3820626023116226, "grad_norm": 8.366095542907715, "learning_rate": 6.901685285478412e-06, "loss": 0.3954, "step": 7702 }, { "epoch": 0.38211220794682277, "grad_norm": 6.196014881134033, "learning_rate": 6.900957322972256e-06, "loss": 0.2013, "step": 7703 }, { "epoch": 0.3821618135820229, "grad_norm": 10.830774307250977, "learning_rate": 6.900229313359517e-06, "loss": 0.3292, "step": 7704 }, { "epoch": 0.38221141921722307, "grad_norm": 5.009799003601074, "learning_rate": 6.899501256658235e-06, "loss": 0.2502, "step": 7705 }, { "epoch": 0.38226102485242325, "grad_norm": 5.904954433441162, "learning_rate": 6.898773152886455e-06, "loss": 0.2374, "step": 7706 }, { "epoch": 0.38231063048762337, "grad_norm": 10.102376937866211, "learning_rate": 6.898045002062215e-06, "loss": 0.2926, "step": 7707 }, { "epoch": 0.38236023612282355, "grad_norm": 7.036323070526123, "learning_rate": 6.897316804203562e-06, "loss": 0.3538, "step": 7708 }, { "epoch": 0.3824098417580237, "grad_norm": 5.287701606750488, "learning_rate": 6.896588559328541e-06, "loss": 0.2385, "step": 7709 }, { "epoch": 0.38245944739322385, "grad_norm": 13.30910873413086, "learning_rate": 6.895860267455197e-06, "loss": 0.4453, "step": 7710 }, { "epoch": 0.38250905302842403, "grad_norm": 13.058077812194824, "learning_rate": 6.895131928601578e-06, "loss": 0.2711, "step": 7711 }, { "epoch": 0.3825586586636242, "grad_norm": 10.293916702270508, "learning_rate": 6.894403542785732e-06, "loss": 0.3863, "step": 7712 }, { "epoch": 0.38260826429882433, "grad_norm": 4.752939224243164, "learning_rate": 6.89367511002571e-06, "loss": 0.2819, "step": 7713 }, { "epoch": 0.3826578699340245, "grad_norm": 9.789870262145996, "learning_rate": 6.892946630339562e-06, "loss": 0.339, "step": 7714 }, { "epoch": 0.3827074755692247, "grad_norm": 3.6488890647888184, "learning_rate": 6.892218103745341e-06, "loss": 0.2511, "step": 7715 }, { "epoch": 0.3827570812044248, "grad_norm": 4.551019668579102, "learning_rate": 6.891489530261099e-06, "loss": 0.1488, "step": 7716 }, { "epoch": 0.382806686839625, "grad_norm": 22.22185516357422, "learning_rate": 6.89076090990489e-06, "loss": 0.3335, "step": 7717 }, { "epoch": 0.38285629247482517, "grad_norm": 8.958740234375, "learning_rate": 6.890032242694772e-06, "loss": 0.2881, "step": 7718 }, { "epoch": 0.3829058981100253, "grad_norm": 7.110162734985352, "learning_rate": 6.889303528648799e-06, "loss": 0.3309, "step": 7719 }, { "epoch": 0.38295550374522547, "grad_norm": 5.395248889923096, "learning_rate": 6.888574767785029e-06, "loss": 0.2906, "step": 7720 }, { "epoch": 0.3830051093804256, "grad_norm": 8.162710189819336, "learning_rate": 6.887845960121524e-06, "loss": 0.2419, "step": 7721 }, { "epoch": 0.38305471501562577, "grad_norm": 5.353063106536865, "learning_rate": 6.887117105676341e-06, "loss": 0.3499, "step": 7722 }, { "epoch": 0.38310432065082595, "grad_norm": 9.009053230285645, "learning_rate": 6.8863882044675425e-06, "loss": 0.3303, "step": 7723 }, { "epoch": 0.38315392628602607, "grad_norm": 6.892003059387207, "learning_rate": 6.885659256513191e-06, "loss": 0.3586, "step": 7724 }, { "epoch": 0.38320353192122625, "grad_norm": 17.579448699951172, "learning_rate": 6.8849302618313505e-06, "loss": 0.4349, "step": 7725 }, { "epoch": 0.3832531375564264, "grad_norm": 12.170915603637695, "learning_rate": 6.884201220440084e-06, "loss": 0.3796, "step": 7726 }, { "epoch": 0.38330274319162655, "grad_norm": 7.630892276763916, "learning_rate": 6.88347213235746e-06, "loss": 0.3222, "step": 7727 }, { "epoch": 0.3833523488268267, "grad_norm": 5.442229747772217, "learning_rate": 6.882742997601546e-06, "loss": 0.313, "step": 7728 }, { "epoch": 0.3834019544620269, "grad_norm": 9.646807670593262, "learning_rate": 6.882013816190407e-06, "loss": 0.256, "step": 7729 }, { "epoch": 0.38345156009722703, "grad_norm": 8.283317565917969, "learning_rate": 6.881284588142114e-06, "loss": 0.2765, "step": 7730 }, { "epoch": 0.3835011657324272, "grad_norm": 11.434330940246582, "learning_rate": 6.88055531347474e-06, "loss": 0.4106, "step": 7731 }, { "epoch": 0.3835507713676274, "grad_norm": 5.899059772491455, "learning_rate": 6.879825992206353e-06, "loss": 0.3339, "step": 7732 }, { "epoch": 0.3836003770028275, "grad_norm": 6.571139335632324, "learning_rate": 6.879096624355028e-06, "loss": 0.2238, "step": 7733 }, { "epoch": 0.3836499826380277, "grad_norm": 7.081805229187012, "learning_rate": 6.878367209938839e-06, "loss": 0.3391, "step": 7734 }, { "epoch": 0.38369958827322787, "grad_norm": 6.269684314727783, "learning_rate": 6.877637748975861e-06, "loss": 0.2378, "step": 7735 }, { "epoch": 0.383749193908428, "grad_norm": 12.369800567626953, "learning_rate": 6.876908241484169e-06, "loss": 0.4128, "step": 7736 }, { "epoch": 0.38379879954362817, "grad_norm": 6.3968329429626465, "learning_rate": 6.876178687481843e-06, "loss": 0.3368, "step": 7737 }, { "epoch": 0.3838484051788283, "grad_norm": 7.329506874084473, "learning_rate": 6.875449086986961e-06, "loss": 0.2768, "step": 7738 }, { "epoch": 0.38389801081402847, "grad_norm": 5.123520851135254, "learning_rate": 6.8747194400175995e-06, "loss": 0.3297, "step": 7739 }, { "epoch": 0.38394761644922865, "grad_norm": 5.649885177612305, "learning_rate": 6.873989746591844e-06, "loss": 0.3069, "step": 7740 }, { "epoch": 0.38399722208442877, "grad_norm": 5.300861358642578, "learning_rate": 6.873260006727776e-06, "loss": 0.2911, "step": 7741 }, { "epoch": 0.38404682771962895, "grad_norm": 4.2316083908081055, "learning_rate": 6.872530220443477e-06, "loss": 0.2795, "step": 7742 }, { "epoch": 0.3840964333548291, "grad_norm": 11.107213973999023, "learning_rate": 6.8718003877570304e-06, "loss": 0.3548, "step": 7743 }, { "epoch": 0.38414603899002925, "grad_norm": 8.334728240966797, "learning_rate": 6.871070508686525e-06, "loss": 0.3204, "step": 7744 }, { "epoch": 0.3841956446252294, "grad_norm": 5.41296911239624, "learning_rate": 6.8703405832500454e-06, "loss": 0.2907, "step": 7745 }, { "epoch": 0.3842452502604296, "grad_norm": 7.481489658355713, "learning_rate": 6.86961061146568e-06, "loss": 0.3289, "step": 7746 }, { "epoch": 0.38429485589562973, "grad_norm": 5.724315643310547, "learning_rate": 6.868880593351519e-06, "loss": 0.2996, "step": 7747 }, { "epoch": 0.3843444615308299, "grad_norm": 5.933610916137695, "learning_rate": 6.868150528925651e-06, "loss": 0.301, "step": 7748 }, { "epoch": 0.3843940671660301, "grad_norm": 12.034856796264648, "learning_rate": 6.867420418206168e-06, "loss": 0.5059, "step": 7749 }, { "epoch": 0.3844436728012302, "grad_norm": 3.648986339569092, "learning_rate": 6.866690261211162e-06, "loss": 0.2897, "step": 7750 }, { "epoch": 0.3844932784364304, "grad_norm": 10.032470703125, "learning_rate": 6.865960057958726e-06, "loss": 0.3325, "step": 7751 }, { "epoch": 0.38454288407163056, "grad_norm": 5.219058036804199, "learning_rate": 6.865229808466958e-06, "loss": 0.3024, "step": 7752 }, { "epoch": 0.3845924897068307, "grad_norm": 5.556284427642822, "learning_rate": 6.864499512753949e-06, "loss": 0.2678, "step": 7753 }, { "epoch": 0.38464209534203087, "grad_norm": 6.725539207458496, "learning_rate": 6.8637691708377995e-06, "loss": 0.3841, "step": 7754 }, { "epoch": 0.384691700977231, "grad_norm": 11.172492980957031, "learning_rate": 6.863038782736608e-06, "loss": 0.3608, "step": 7755 }, { "epoch": 0.38474130661243117, "grad_norm": 5.291830062866211, "learning_rate": 6.862308348468472e-06, "loss": 0.3149, "step": 7756 }, { "epoch": 0.38479091224763134, "grad_norm": 4.6139326095581055, "learning_rate": 6.861577868051492e-06, "loss": 0.2176, "step": 7757 }, { "epoch": 0.38484051788283147, "grad_norm": 5.887146472930908, "learning_rate": 6.860847341503771e-06, "loss": 0.3309, "step": 7758 }, { "epoch": 0.38489012351803165, "grad_norm": 5.714312553405762, "learning_rate": 6.8601167688434115e-06, "loss": 0.3641, "step": 7759 }, { "epoch": 0.3849397291532318, "grad_norm": 6.282626152038574, "learning_rate": 6.859386150088517e-06, "loss": 0.3311, "step": 7760 }, { "epoch": 0.38498933478843195, "grad_norm": 7.009016990661621, "learning_rate": 6.8586554852571925e-06, "loss": 0.3562, "step": 7761 }, { "epoch": 0.3850389404236321, "grad_norm": 4.764088153839111, "learning_rate": 6.857924774367544e-06, "loss": 0.3035, "step": 7762 }, { "epoch": 0.3850885460588323, "grad_norm": 6.092294216156006, "learning_rate": 6.857194017437678e-06, "loss": 0.3512, "step": 7763 }, { "epoch": 0.3851381516940324, "grad_norm": 9.420722007751465, "learning_rate": 6.8564632144857066e-06, "loss": 0.3598, "step": 7764 }, { "epoch": 0.3851877573292326, "grad_norm": 7.286437511444092, "learning_rate": 6.855732365529736e-06, "loss": 0.3081, "step": 7765 }, { "epoch": 0.3852373629644328, "grad_norm": 4.192716121673584, "learning_rate": 6.855001470587877e-06, "loss": 0.1846, "step": 7766 }, { "epoch": 0.3852869685996329, "grad_norm": 4.800869941711426, "learning_rate": 6.854270529678243e-06, "loss": 0.2882, "step": 7767 }, { "epoch": 0.3853365742348331, "grad_norm": 4.346072673797607, "learning_rate": 6.853539542818946e-06, "loss": 0.2556, "step": 7768 }, { "epoch": 0.38538617987003326, "grad_norm": 4.607879638671875, "learning_rate": 6.8528085100281024e-06, "loss": 0.3033, "step": 7769 }, { "epoch": 0.3854357855052334, "grad_norm": 9.577198028564453, "learning_rate": 6.852077431323825e-06, "loss": 0.3601, "step": 7770 }, { "epoch": 0.38548539114043356, "grad_norm": 6.192426681518555, "learning_rate": 6.85134630672423e-06, "loss": 0.3174, "step": 7771 }, { "epoch": 0.3855349967756337, "grad_norm": 10.109339714050293, "learning_rate": 6.850615136247439e-06, "loss": 0.2944, "step": 7772 }, { "epoch": 0.38558460241083387, "grad_norm": 6.87660026550293, "learning_rate": 6.849883919911564e-06, "loss": 0.2878, "step": 7773 }, { "epoch": 0.38563420804603404, "grad_norm": 4.055756092071533, "learning_rate": 6.849152657734731e-06, "loss": 0.2769, "step": 7774 }, { "epoch": 0.38568381368123417, "grad_norm": 5.171581268310547, "learning_rate": 6.848421349735058e-06, "loss": 0.2438, "step": 7775 }, { "epoch": 0.38573341931643434, "grad_norm": 11.288330078125, "learning_rate": 6.847689995930669e-06, "loss": 0.4137, "step": 7776 }, { "epoch": 0.3857830249516345, "grad_norm": 9.109583854675293, "learning_rate": 6.846958596339683e-06, "loss": 0.3052, "step": 7777 }, { "epoch": 0.38583263058683465, "grad_norm": 7.073576927185059, "learning_rate": 6.84622715098023e-06, "loss": 0.3905, "step": 7778 }, { "epoch": 0.3858822362220348, "grad_norm": 4.968140602111816, "learning_rate": 6.8454956598704335e-06, "loss": 0.3224, "step": 7779 }, { "epoch": 0.385931841857235, "grad_norm": 6.21671724319458, "learning_rate": 6.844764123028419e-06, "loss": 0.3154, "step": 7780 }, { "epoch": 0.3859814474924351, "grad_norm": 16.323535919189453, "learning_rate": 6.844032540472314e-06, "loss": 0.3129, "step": 7781 }, { "epoch": 0.3860310531276353, "grad_norm": 12.44984245300293, "learning_rate": 6.84330091222025e-06, "loss": 0.4139, "step": 7782 }, { "epoch": 0.3860806587628355, "grad_norm": 11.82811164855957, "learning_rate": 6.842569238290355e-06, "loss": 0.3907, "step": 7783 }, { "epoch": 0.3861302643980356, "grad_norm": 11.379559516906738, "learning_rate": 6.84183751870076e-06, "loss": 0.3146, "step": 7784 }, { "epoch": 0.3861798700332358, "grad_norm": 5.333043098449707, "learning_rate": 6.841105753469599e-06, "loss": 0.2335, "step": 7785 }, { "epoch": 0.38622947566843596, "grad_norm": 4.967715263366699, "learning_rate": 6.840373942615005e-06, "loss": 0.2243, "step": 7786 }, { "epoch": 0.3862790813036361, "grad_norm": 10.022710800170898, "learning_rate": 6.839642086155111e-06, "loss": 0.3624, "step": 7787 }, { "epoch": 0.38632868693883626, "grad_norm": 5.864272594451904, "learning_rate": 6.8389101841080555e-06, "loss": 0.3247, "step": 7788 }, { "epoch": 0.3863782925740364, "grad_norm": 7.746621131896973, "learning_rate": 6.838178236491974e-06, "loss": 0.3694, "step": 7789 }, { "epoch": 0.38642789820923656, "grad_norm": 5.441708087921143, "learning_rate": 6.837446243325002e-06, "loss": 0.225, "step": 7790 }, { "epoch": 0.38647750384443674, "grad_norm": 10.099761009216309, "learning_rate": 6.836714204625282e-06, "loss": 0.3201, "step": 7791 }, { "epoch": 0.38652710947963687, "grad_norm": 7.709654331207275, "learning_rate": 6.835982120410955e-06, "loss": 0.3326, "step": 7792 }, { "epoch": 0.38657671511483704, "grad_norm": 5.046462059020996, "learning_rate": 6.835249990700158e-06, "loss": 0.3537, "step": 7793 }, { "epoch": 0.3866263207500372, "grad_norm": 8.220141410827637, "learning_rate": 6.834517815511037e-06, "loss": 0.3256, "step": 7794 }, { "epoch": 0.38667592638523735, "grad_norm": 4.878993034362793, "learning_rate": 6.833785594861734e-06, "loss": 0.3065, "step": 7795 }, { "epoch": 0.3867255320204375, "grad_norm": 12.213685035705566, "learning_rate": 6.833053328770396e-06, "loss": 0.3138, "step": 7796 }, { "epoch": 0.3867751376556377, "grad_norm": 4.233381271362305, "learning_rate": 6.832321017255165e-06, "loss": 0.2688, "step": 7797 }, { "epoch": 0.3868247432908378, "grad_norm": 7.946172714233398, "learning_rate": 6.831588660334192e-06, "loss": 0.3615, "step": 7798 }, { "epoch": 0.386874348926038, "grad_norm": 6.702083110809326, "learning_rate": 6.830856258025624e-06, "loss": 0.3722, "step": 7799 }, { "epoch": 0.3869239545612382, "grad_norm": 6.760904312133789, "learning_rate": 6.830123810347609e-06, "loss": 0.2871, "step": 7800 }, { "epoch": 0.3869735601964383, "grad_norm": 7.109576225280762, "learning_rate": 6.829391317318298e-06, "loss": 0.3613, "step": 7801 }, { "epoch": 0.3870231658316385, "grad_norm": 9.915914535522461, "learning_rate": 6.8286587789558425e-06, "loss": 0.3329, "step": 7802 }, { "epoch": 0.3870727714668386, "grad_norm": 8.067110061645508, "learning_rate": 6.827926195278396e-06, "loss": 0.2693, "step": 7803 }, { "epoch": 0.3871223771020388, "grad_norm": 10.620550155639648, "learning_rate": 6.827193566304111e-06, "loss": 0.3011, "step": 7804 }, { "epoch": 0.38717198273723896, "grad_norm": 7.712658882141113, "learning_rate": 6.826460892051142e-06, "loss": 0.2905, "step": 7805 }, { "epoch": 0.3872215883724391, "grad_norm": 4.8022613525390625, "learning_rate": 6.825728172537648e-06, "loss": 0.2718, "step": 7806 }, { "epoch": 0.38727119400763926, "grad_norm": 4.956416130065918, "learning_rate": 6.8249954077817824e-06, "loss": 0.3647, "step": 7807 }, { "epoch": 0.38732079964283944, "grad_norm": 4.236588954925537, "learning_rate": 6.8242625978017054e-06, "loss": 0.2722, "step": 7808 }, { "epoch": 0.38737040527803956, "grad_norm": 9.012616157531738, "learning_rate": 6.823529742615577e-06, "loss": 0.4515, "step": 7809 }, { "epoch": 0.38742001091323974, "grad_norm": 4.584346771240234, "learning_rate": 6.822796842241555e-06, "loss": 0.3474, "step": 7810 }, { "epoch": 0.3874696165484399, "grad_norm": 9.72087574005127, "learning_rate": 6.822063896697803e-06, "loss": 0.4227, "step": 7811 }, { "epoch": 0.38751922218364004, "grad_norm": 5.9721550941467285, "learning_rate": 6.821330906002485e-06, "loss": 0.3244, "step": 7812 }, { "epoch": 0.3875688278188402, "grad_norm": 8.460460662841797, "learning_rate": 6.820597870173762e-06, "loss": 0.3637, "step": 7813 }, { "epoch": 0.3876184334540404, "grad_norm": 11.383602142333984, "learning_rate": 6.8198647892298e-06, "loss": 0.4435, "step": 7814 }, { "epoch": 0.3876680390892405, "grad_norm": 11.416595458984375, "learning_rate": 6.819131663188766e-06, "loss": 0.3848, "step": 7815 }, { "epoch": 0.3877176447244407, "grad_norm": 4.6000494956970215, "learning_rate": 6.818398492068826e-06, "loss": 0.2394, "step": 7816 }, { "epoch": 0.3877672503596409, "grad_norm": 5.061651229858398, "learning_rate": 6.817665275888149e-06, "loss": 0.3073, "step": 7817 }, { "epoch": 0.387816855994841, "grad_norm": 10.661062240600586, "learning_rate": 6.816932014664905e-06, "loss": 0.2789, "step": 7818 }, { "epoch": 0.3878664616300412, "grad_norm": 14.415078163146973, "learning_rate": 6.816198708417263e-06, "loss": 0.4143, "step": 7819 }, { "epoch": 0.3879160672652413, "grad_norm": 7.086989402770996, "learning_rate": 6.815465357163396e-06, "loss": 0.3627, "step": 7820 }, { "epoch": 0.3879656729004415, "grad_norm": 7.550416469573975, "learning_rate": 6.8147319609214745e-06, "loss": 0.2681, "step": 7821 }, { "epoch": 0.38801527853564166, "grad_norm": 6.752048015594482, "learning_rate": 6.813998519709674e-06, "loss": 0.2612, "step": 7822 }, { "epoch": 0.3880648841708418, "grad_norm": 3.97847580909729, "learning_rate": 6.813265033546173e-06, "loss": 0.2931, "step": 7823 }, { "epoch": 0.38811448980604196, "grad_norm": 8.884696960449219, "learning_rate": 6.812531502449142e-06, "loss": 0.2956, "step": 7824 }, { "epoch": 0.38816409544124214, "grad_norm": 6.243718147277832, "learning_rate": 6.811797926436761e-06, "loss": 0.2848, "step": 7825 }, { "epoch": 0.38821370107644226, "grad_norm": 7.82668399810791, "learning_rate": 6.811064305527207e-06, "loss": 0.3706, "step": 7826 }, { "epoch": 0.38826330671164244, "grad_norm": 14.333024978637695, "learning_rate": 6.8103306397386616e-06, "loss": 0.3025, "step": 7827 }, { "epoch": 0.3883129123468426, "grad_norm": 5.27127742767334, "learning_rate": 6.809596929089303e-06, "loss": 0.2464, "step": 7828 }, { "epoch": 0.38836251798204274, "grad_norm": 5.4435038566589355, "learning_rate": 6.8088631735973135e-06, "loss": 0.2365, "step": 7829 }, { "epoch": 0.3884121236172429, "grad_norm": 8.887669563293457, "learning_rate": 6.8081293732808785e-06, "loss": 0.3327, "step": 7830 }, { "epoch": 0.3884617292524431, "grad_norm": 10.85962963104248, "learning_rate": 6.807395528158177e-06, "loss": 0.3061, "step": 7831 }, { "epoch": 0.3885113348876432, "grad_norm": 15.577095031738281, "learning_rate": 6.806661638247398e-06, "loss": 0.3167, "step": 7832 }, { "epoch": 0.3885609405228434, "grad_norm": 10.943571090698242, "learning_rate": 6.8059277035667274e-06, "loss": 0.3265, "step": 7833 }, { "epoch": 0.3886105461580436, "grad_norm": 6.826914310455322, "learning_rate": 6.805193724134349e-06, "loss": 0.3065, "step": 7834 }, { "epoch": 0.3886601517932437, "grad_norm": 9.764092445373535, "learning_rate": 6.804459699968455e-06, "loss": 0.4152, "step": 7835 }, { "epoch": 0.3887097574284439, "grad_norm": 6.93990421295166, "learning_rate": 6.803725631087234e-06, "loss": 0.3435, "step": 7836 }, { "epoch": 0.388759363063644, "grad_norm": 9.542945861816406, "learning_rate": 6.802991517508876e-06, "loss": 0.4153, "step": 7837 }, { "epoch": 0.3888089686988442, "grad_norm": 6.429408550262451, "learning_rate": 6.8022573592515706e-06, "loss": 0.2949, "step": 7838 }, { "epoch": 0.38885857433404436, "grad_norm": 12.578437805175781, "learning_rate": 6.801523156333514e-06, "loss": 0.319, "step": 7839 }, { "epoch": 0.3889081799692445, "grad_norm": 6.875696182250977, "learning_rate": 6.8007889087729e-06, "loss": 0.2988, "step": 7840 }, { "epoch": 0.38895778560444466, "grad_norm": 4.671596527099609, "learning_rate": 6.800054616587919e-06, "loss": 0.2649, "step": 7841 }, { "epoch": 0.38900739123964484, "grad_norm": 6.006364822387695, "learning_rate": 6.799320279796772e-06, "loss": 0.2812, "step": 7842 }, { "epoch": 0.38905699687484496, "grad_norm": 4.2409186363220215, "learning_rate": 6.798585898417656e-06, "loss": 0.2387, "step": 7843 }, { "epoch": 0.38910660251004514, "grad_norm": 7.994353294372559, "learning_rate": 6.797851472468766e-06, "loss": 0.3456, "step": 7844 }, { "epoch": 0.3891562081452453, "grad_norm": 7.509178161621094, "learning_rate": 6.797117001968303e-06, "loss": 0.3566, "step": 7845 }, { "epoch": 0.38920581378044544, "grad_norm": 7.76427698135376, "learning_rate": 6.79638248693447e-06, "loss": 0.395, "step": 7846 }, { "epoch": 0.3892554194156456, "grad_norm": 5.262029647827148, "learning_rate": 6.795647927385465e-06, "loss": 0.2573, "step": 7847 }, { "epoch": 0.3893050250508458, "grad_norm": 6.264524936676025, "learning_rate": 6.794913323339492e-06, "loss": 0.3856, "step": 7848 }, { "epoch": 0.3893546306860459, "grad_norm": 6.890411376953125, "learning_rate": 6.794178674814753e-06, "loss": 0.2639, "step": 7849 }, { "epoch": 0.3894042363212461, "grad_norm": 5.978209495544434, "learning_rate": 6.793443981829458e-06, "loss": 0.3253, "step": 7850 }, { "epoch": 0.3894538419564463, "grad_norm": 7.432955265045166, "learning_rate": 6.792709244401808e-06, "loss": 0.2552, "step": 7851 }, { "epoch": 0.3895034475916464, "grad_norm": 6.7440924644470215, "learning_rate": 6.791974462550013e-06, "loss": 0.4555, "step": 7852 }, { "epoch": 0.3895530532268466, "grad_norm": 6.98511266708374, "learning_rate": 6.79123963629228e-06, "loss": 0.2599, "step": 7853 }, { "epoch": 0.3896026588620467, "grad_norm": 10.792484283447266, "learning_rate": 6.79050476564682e-06, "loss": 0.3451, "step": 7854 }, { "epoch": 0.3896522644972469, "grad_norm": 6.465098857879639, "learning_rate": 6.789769850631839e-06, "loss": 0.3166, "step": 7855 }, { "epoch": 0.38970187013244706, "grad_norm": 10.091902732849121, "learning_rate": 6.789034891265553e-06, "loss": 0.3872, "step": 7856 }, { "epoch": 0.3897514757676472, "grad_norm": 7.182839393615723, "learning_rate": 6.788299887566173e-06, "loss": 0.4121, "step": 7857 }, { "epoch": 0.38980108140284736, "grad_norm": 4.3191633224487305, "learning_rate": 6.787564839551912e-06, "loss": 0.3263, "step": 7858 }, { "epoch": 0.38985068703804754, "grad_norm": 9.798484802246094, "learning_rate": 6.786829747240987e-06, "loss": 0.2265, "step": 7859 }, { "epoch": 0.38990029267324766, "grad_norm": 6.985161781311035, "learning_rate": 6.786094610651612e-06, "loss": 0.4027, "step": 7860 }, { "epoch": 0.38994989830844784, "grad_norm": 5.720831871032715, "learning_rate": 6.785359429802005e-06, "loss": 0.2163, "step": 7861 }, { "epoch": 0.389999503943648, "grad_norm": 10.011801719665527, "learning_rate": 6.784624204710384e-06, "loss": 0.4058, "step": 7862 }, { "epoch": 0.39004910957884814, "grad_norm": 11.390349388122559, "learning_rate": 6.783888935394968e-06, "loss": 0.4176, "step": 7863 }, { "epoch": 0.3900987152140483, "grad_norm": 8.165139198303223, "learning_rate": 6.783153621873976e-06, "loss": 0.3663, "step": 7864 }, { "epoch": 0.3901483208492485, "grad_norm": 8.451294898986816, "learning_rate": 6.782418264165631e-06, "loss": 0.3833, "step": 7865 }, { "epoch": 0.3901979264844486, "grad_norm": 4.612209796905518, "learning_rate": 6.781682862288154e-06, "loss": 0.2578, "step": 7866 }, { "epoch": 0.3902475321196488, "grad_norm": 7.1979780197143555, "learning_rate": 6.780947416259773e-06, "loss": 0.222, "step": 7867 }, { "epoch": 0.390297137754849, "grad_norm": 4.897851943969727, "learning_rate": 6.780211926098707e-06, "loss": 0.3766, "step": 7868 }, { "epoch": 0.3903467433900491, "grad_norm": 4.2451395988464355, "learning_rate": 6.779476391823184e-06, "loss": 0.3554, "step": 7869 }, { "epoch": 0.3903963490252493, "grad_norm": 10.31413745880127, "learning_rate": 6.778740813451432e-06, "loss": 0.3273, "step": 7870 }, { "epoch": 0.3904459546604494, "grad_norm": 6.857425689697266, "learning_rate": 6.778005191001677e-06, "loss": 0.3464, "step": 7871 }, { "epoch": 0.3904955602956496, "grad_norm": 5.995386600494385, "learning_rate": 6.777269524492152e-06, "loss": 0.3026, "step": 7872 }, { "epoch": 0.39054516593084976, "grad_norm": 6.745243072509766, "learning_rate": 6.776533813941081e-06, "loss": 0.329, "step": 7873 }, { "epoch": 0.3905947715660499, "grad_norm": 4.7189249992370605, "learning_rate": 6.7757980593666995e-06, "loss": 0.2542, "step": 7874 }, { "epoch": 0.39064437720125006, "grad_norm": 7.230837821960449, "learning_rate": 6.775062260787239e-06, "loss": 0.3734, "step": 7875 }, { "epoch": 0.39069398283645024, "grad_norm": 6.197508811950684, "learning_rate": 6.774326418220934e-06, "loss": 0.3552, "step": 7876 }, { "epoch": 0.39074358847165036, "grad_norm": 6.7679972648620605, "learning_rate": 6.773590531686015e-06, "loss": 0.3005, "step": 7877 }, { "epoch": 0.39079319410685054, "grad_norm": 5.119418621063232, "learning_rate": 6.772854601200721e-06, "loss": 0.1682, "step": 7878 }, { "epoch": 0.3908427997420507, "grad_norm": 9.976882934570312, "learning_rate": 6.772118626783288e-06, "loss": 0.4195, "step": 7879 }, { "epoch": 0.39089240537725084, "grad_norm": 8.905230522155762, "learning_rate": 6.771382608451956e-06, "loss": 0.3091, "step": 7880 }, { "epoch": 0.390942011012451, "grad_norm": 9.872408866882324, "learning_rate": 6.77064654622496e-06, "loss": 0.3441, "step": 7881 }, { "epoch": 0.3909916166476512, "grad_norm": 6.203339099884033, "learning_rate": 6.769910440120542e-06, "loss": 0.2593, "step": 7882 }, { "epoch": 0.3910412222828513, "grad_norm": 7.087265968322754, "learning_rate": 6.7691742901569425e-06, "loss": 0.2478, "step": 7883 }, { "epoch": 0.3910908279180515, "grad_norm": 5.088888168334961, "learning_rate": 6.768438096352404e-06, "loss": 0.3544, "step": 7884 }, { "epoch": 0.3911404335532517, "grad_norm": 8.610106468200684, "learning_rate": 6.76770185872517e-06, "loss": 0.2008, "step": 7885 }, { "epoch": 0.3911900391884518, "grad_norm": 9.84471321105957, "learning_rate": 6.766965577293483e-06, "loss": 0.4356, "step": 7886 }, { "epoch": 0.391239644823652, "grad_norm": 6.038639068603516, "learning_rate": 6.766229252075591e-06, "loss": 0.2097, "step": 7887 }, { "epoch": 0.3912892504588521, "grad_norm": 6.6395158767700195, "learning_rate": 6.7654928830897385e-06, "loss": 0.3007, "step": 7888 }, { "epoch": 0.3913388560940523, "grad_norm": 9.106294631958008, "learning_rate": 6.764756470354173e-06, "loss": 0.3475, "step": 7889 }, { "epoch": 0.39138846172925246, "grad_norm": 6.906870365142822, "learning_rate": 6.7640200138871455e-06, "loss": 0.3361, "step": 7890 }, { "epoch": 0.3914380673644526, "grad_norm": 6.4360575675964355, "learning_rate": 6.763283513706904e-06, "loss": 0.3462, "step": 7891 }, { "epoch": 0.39148767299965276, "grad_norm": 5.61082124710083, "learning_rate": 6.762546969831698e-06, "loss": 0.3147, "step": 7892 }, { "epoch": 0.39153727863485294, "grad_norm": 5.312451362609863, "learning_rate": 6.761810382279781e-06, "loss": 0.2331, "step": 7893 }, { "epoch": 0.39158688427005306, "grad_norm": 5.667461395263672, "learning_rate": 6.761073751069407e-06, "loss": 0.2895, "step": 7894 }, { "epoch": 0.39163648990525324, "grad_norm": 13.249974250793457, "learning_rate": 6.760337076218828e-06, "loss": 0.3419, "step": 7895 }, { "epoch": 0.3916860955404534, "grad_norm": 8.976489067077637, "learning_rate": 6.7596003577462995e-06, "loss": 0.3944, "step": 7896 }, { "epoch": 0.39173570117565354, "grad_norm": 4.768313407897949, "learning_rate": 6.75886359567008e-06, "loss": 0.3099, "step": 7897 }, { "epoch": 0.3917853068108537, "grad_norm": 5.826797008514404, "learning_rate": 6.7581267900084234e-06, "loss": 0.2979, "step": 7898 }, { "epoch": 0.3918349124460539, "grad_norm": 8.41641616821289, "learning_rate": 6.75738994077959e-06, "loss": 0.3658, "step": 7899 }, { "epoch": 0.391884518081254, "grad_norm": 5.7982497215271, "learning_rate": 6.75665304800184e-06, "loss": 0.3402, "step": 7900 }, { "epoch": 0.3919341237164542, "grad_norm": 9.959863662719727, "learning_rate": 6.755916111693432e-06, "loss": 0.3434, "step": 7901 }, { "epoch": 0.3919837293516544, "grad_norm": 9.10842227935791, "learning_rate": 6.755179131872627e-06, "loss": 0.2348, "step": 7902 }, { "epoch": 0.3920333349868545, "grad_norm": 5.958962440490723, "learning_rate": 6.754442108557691e-06, "loss": 0.2296, "step": 7903 }, { "epoch": 0.3920829406220547, "grad_norm": 6.263080596923828, "learning_rate": 6.753705041766885e-06, "loss": 0.3185, "step": 7904 }, { "epoch": 0.3921325462572548, "grad_norm": 6.382467269897461, "learning_rate": 6.752967931518475e-06, "loss": 0.3184, "step": 7905 }, { "epoch": 0.392182151892455, "grad_norm": 4.947244644165039, "learning_rate": 6.752230777830726e-06, "loss": 0.3235, "step": 7906 }, { "epoch": 0.39223175752765516, "grad_norm": 7.208501815795898, "learning_rate": 6.751493580721908e-06, "loss": 0.3049, "step": 7907 }, { "epoch": 0.3922813631628553, "grad_norm": 6.102543830871582, "learning_rate": 6.750756340210285e-06, "loss": 0.3426, "step": 7908 }, { "epoch": 0.39233096879805546, "grad_norm": 10.201926231384277, "learning_rate": 6.7500190563141275e-06, "loss": 0.2845, "step": 7909 }, { "epoch": 0.39238057443325564, "grad_norm": 4.9410481452941895, "learning_rate": 6.749281729051706e-06, "loss": 0.2492, "step": 7910 }, { "epoch": 0.39243018006845576, "grad_norm": 4.364724636077881, "learning_rate": 6.748544358441295e-06, "loss": 0.2205, "step": 7911 }, { "epoch": 0.39247978570365594, "grad_norm": 4.508846282958984, "learning_rate": 6.74780694450116e-06, "loss": 0.1658, "step": 7912 }, { "epoch": 0.3925293913388561, "grad_norm": 4.377569675445557, "learning_rate": 6.747069487249581e-06, "loss": 0.2733, "step": 7913 }, { "epoch": 0.39257899697405624, "grad_norm": 4.585017681121826, "learning_rate": 6.7463319867048295e-06, "loss": 0.2454, "step": 7914 }, { "epoch": 0.3926286026092564, "grad_norm": 4.410924911499023, "learning_rate": 6.74559444288518e-06, "loss": 0.2907, "step": 7915 }, { "epoch": 0.3926782082444566, "grad_norm": 7.962091445922852, "learning_rate": 6.7448568558089125e-06, "loss": 0.4026, "step": 7916 }, { "epoch": 0.3927278138796567, "grad_norm": 5.101713180541992, "learning_rate": 6.7441192254943014e-06, "loss": 0.2114, "step": 7917 }, { "epoch": 0.3927774195148569, "grad_norm": 9.987007141113281, "learning_rate": 6.743381551959627e-06, "loss": 0.4149, "step": 7918 }, { "epoch": 0.3928270251500571, "grad_norm": 7.616781711578369, "learning_rate": 6.742643835223169e-06, "loss": 0.3211, "step": 7919 }, { "epoch": 0.3928766307852572, "grad_norm": 10.511234283447266, "learning_rate": 6.741906075303208e-06, "loss": 0.3507, "step": 7920 }, { "epoch": 0.3929262364204574, "grad_norm": 5.78715705871582, "learning_rate": 6.741168272218028e-06, "loss": 0.2538, "step": 7921 }, { "epoch": 0.3929758420556575, "grad_norm": 12.884017944335938, "learning_rate": 6.740430425985911e-06, "loss": 0.4664, "step": 7922 }, { "epoch": 0.3930254476908577, "grad_norm": 9.968151092529297, "learning_rate": 6.739692536625138e-06, "loss": 0.4723, "step": 7923 }, { "epoch": 0.39307505332605785, "grad_norm": 5.365957736968994, "learning_rate": 6.738954604154e-06, "loss": 0.2541, "step": 7924 }, { "epoch": 0.393124658961258, "grad_norm": 6.310296058654785, "learning_rate": 6.738216628590779e-06, "loss": 0.3421, "step": 7925 }, { "epoch": 0.39317426459645816, "grad_norm": 5.94690465927124, "learning_rate": 6.7374786099537634e-06, "loss": 0.1942, "step": 7926 }, { "epoch": 0.39322387023165833, "grad_norm": 4.55031156539917, "learning_rate": 6.736740548261242e-06, "loss": 0.2554, "step": 7927 }, { "epoch": 0.39327347586685846, "grad_norm": 7.168657302856445, "learning_rate": 6.736002443531504e-06, "loss": 0.2706, "step": 7928 }, { "epoch": 0.39332308150205864, "grad_norm": 13.05323600769043, "learning_rate": 6.735264295782839e-06, "loss": 0.3711, "step": 7929 }, { "epoch": 0.3933726871372588, "grad_norm": 8.357601165771484, "learning_rate": 6.734526105033543e-06, "loss": 0.3479, "step": 7930 }, { "epoch": 0.39342229277245894, "grad_norm": 4.9684553146362305, "learning_rate": 6.733787871301903e-06, "loss": 0.2873, "step": 7931 }, { "epoch": 0.3934718984076591, "grad_norm": 8.475502967834473, "learning_rate": 6.733049594606217e-06, "loss": 0.3598, "step": 7932 }, { "epoch": 0.3935215040428593, "grad_norm": 6.63123083114624, "learning_rate": 6.732311274964776e-06, "loss": 0.3372, "step": 7933 }, { "epoch": 0.3935711096780594, "grad_norm": 5.549365043640137, "learning_rate": 6.73157291239588e-06, "loss": 0.2633, "step": 7934 }, { "epoch": 0.3936207153132596, "grad_norm": 5.999533176422119, "learning_rate": 6.7308345069178225e-06, "loss": 0.2297, "step": 7935 }, { "epoch": 0.3936703209484597, "grad_norm": 6.565506935119629, "learning_rate": 6.730096058548904e-06, "loss": 0.2527, "step": 7936 }, { "epoch": 0.3937199265836599, "grad_norm": 4.593297481536865, "learning_rate": 6.7293575673074215e-06, "loss": 0.37, "step": 7937 }, { "epoch": 0.3937695322188601, "grad_norm": 5.439312934875488, "learning_rate": 6.728619033211679e-06, "loss": 0.2001, "step": 7938 }, { "epoch": 0.3938191378540602, "grad_norm": 8.838671684265137, "learning_rate": 6.727880456279971e-06, "loss": 0.282, "step": 7939 }, { "epoch": 0.3938687434892604, "grad_norm": 6.556765556335449, "learning_rate": 6.727141836530606e-06, "loss": 0.3637, "step": 7940 }, { "epoch": 0.39391834912446055, "grad_norm": 4.794878005981445, "learning_rate": 6.726403173981886e-06, "loss": 0.2612, "step": 7941 }, { "epoch": 0.3939679547596607, "grad_norm": 9.87076473236084, "learning_rate": 6.725664468652112e-06, "loss": 0.3955, "step": 7942 }, { "epoch": 0.39401756039486086, "grad_norm": 6.756847858428955, "learning_rate": 6.724925720559592e-06, "loss": 0.3609, "step": 7943 }, { "epoch": 0.39406716603006103, "grad_norm": 7.864449501037598, "learning_rate": 6.724186929722634e-06, "loss": 0.2842, "step": 7944 }, { "epoch": 0.39411677166526116, "grad_norm": 7.175579071044922, "learning_rate": 6.723448096159543e-06, "loss": 0.3021, "step": 7945 }, { "epoch": 0.39416637730046133, "grad_norm": 6.739511966705322, "learning_rate": 6.722709219888628e-06, "loss": 0.2945, "step": 7946 }, { "epoch": 0.3942159829356615, "grad_norm": 14.071764945983887, "learning_rate": 6.7219703009282e-06, "loss": 0.5724, "step": 7947 }, { "epoch": 0.39426558857086164, "grad_norm": 7.275821208953857, "learning_rate": 6.72123133929657e-06, "loss": 0.2978, "step": 7948 }, { "epoch": 0.3943151942060618, "grad_norm": 24.490312576293945, "learning_rate": 6.7204923350120475e-06, "loss": 0.3112, "step": 7949 }, { "epoch": 0.394364799841262, "grad_norm": 12.276838302612305, "learning_rate": 6.719753288092947e-06, "loss": 0.4384, "step": 7950 }, { "epoch": 0.3944144054764621, "grad_norm": 4.618952751159668, "learning_rate": 6.719014198557583e-06, "loss": 0.2682, "step": 7951 }, { "epoch": 0.3944640111116623, "grad_norm": 8.247342109680176, "learning_rate": 6.718275066424268e-06, "loss": 0.3345, "step": 7952 }, { "epoch": 0.3945136167468624, "grad_norm": 4.797314167022705, "learning_rate": 6.71753589171132e-06, "loss": 0.3603, "step": 7953 }, { "epoch": 0.3945632223820626, "grad_norm": 5.793436050415039, "learning_rate": 6.716796674437055e-06, "loss": 0.2921, "step": 7954 }, { "epoch": 0.3946128280172628, "grad_norm": 4.975821495056152, "learning_rate": 6.716057414619794e-06, "loss": 0.3026, "step": 7955 }, { "epoch": 0.3946624336524629, "grad_norm": 8.58674144744873, "learning_rate": 6.715318112277852e-06, "loss": 0.2671, "step": 7956 }, { "epoch": 0.3947120392876631, "grad_norm": 6.383212566375732, "learning_rate": 6.714578767429551e-06, "loss": 0.2631, "step": 7957 }, { "epoch": 0.39476164492286325, "grad_norm": 5.705106735229492, "learning_rate": 6.713839380093214e-06, "loss": 0.224, "step": 7958 }, { "epoch": 0.3948112505580634, "grad_norm": 11.201098442077637, "learning_rate": 6.713099950287162e-06, "loss": 0.3899, "step": 7959 }, { "epoch": 0.39486085619326355, "grad_norm": 5.9021735191345215, "learning_rate": 6.712360478029718e-06, "loss": 0.3426, "step": 7960 }, { "epoch": 0.39491046182846373, "grad_norm": 7.0229902267456055, "learning_rate": 6.711620963339207e-06, "loss": 0.2766, "step": 7961 }, { "epoch": 0.39496006746366386, "grad_norm": 8.845996856689453, "learning_rate": 6.710881406233953e-06, "loss": 0.3704, "step": 7962 }, { "epoch": 0.39500967309886403, "grad_norm": 5.271673679351807, "learning_rate": 6.710141806732284e-06, "loss": 0.3132, "step": 7963 }, { "epoch": 0.3950592787340642, "grad_norm": 7.813311576843262, "learning_rate": 6.709402164852529e-06, "loss": 0.2952, "step": 7964 }, { "epoch": 0.39510888436926433, "grad_norm": 4.922377109527588, "learning_rate": 6.708662480613015e-06, "loss": 0.2247, "step": 7965 }, { "epoch": 0.3951584900044645, "grad_norm": 4.876836776733398, "learning_rate": 6.70792275403207e-06, "loss": 0.2804, "step": 7966 }, { "epoch": 0.3952080956396647, "grad_norm": 6.757436275482178, "learning_rate": 6.707182985128028e-06, "loss": 0.2564, "step": 7967 }, { "epoch": 0.3952577012748648, "grad_norm": 11.390239715576172, "learning_rate": 6.706443173919219e-06, "loss": 0.4355, "step": 7968 }, { "epoch": 0.395307306910065, "grad_norm": 9.09697151184082, "learning_rate": 6.705703320423977e-06, "loss": 0.2556, "step": 7969 }, { "epoch": 0.3953569125452651, "grad_norm": 10.86653995513916, "learning_rate": 6.704963424660633e-06, "loss": 0.2682, "step": 7970 }, { "epoch": 0.3954065181804653, "grad_norm": 6.549184799194336, "learning_rate": 6.704223486647526e-06, "loss": 0.2799, "step": 7971 }, { "epoch": 0.3954561238156655, "grad_norm": 7.404897212982178, "learning_rate": 6.70348350640299e-06, "loss": 0.3608, "step": 7972 }, { "epoch": 0.3955057294508656, "grad_norm": 6.269264221191406, "learning_rate": 6.702743483945361e-06, "loss": 0.3073, "step": 7973 }, { "epoch": 0.3955553350860658, "grad_norm": 4.084358215332031, "learning_rate": 6.702003419292979e-06, "loss": 0.3152, "step": 7974 }, { "epoch": 0.39560494072126595, "grad_norm": 7.723450660705566, "learning_rate": 6.701263312464184e-06, "loss": 0.193, "step": 7975 }, { "epoch": 0.3956545463564661, "grad_norm": 18.2601261138916, "learning_rate": 6.7005231634773114e-06, "loss": 0.6104, "step": 7976 }, { "epoch": 0.39570415199166625, "grad_norm": 5.857794284820557, "learning_rate": 6.699782972350707e-06, "loss": 0.2706, "step": 7977 }, { "epoch": 0.39575375762686643, "grad_norm": 4.197115898132324, "learning_rate": 6.699042739102711e-06, "loss": 0.263, "step": 7978 }, { "epoch": 0.39580336326206655, "grad_norm": 8.444401741027832, "learning_rate": 6.698302463751669e-06, "loss": 0.4932, "step": 7979 }, { "epoch": 0.39585296889726673, "grad_norm": 7.347057342529297, "learning_rate": 6.697562146315923e-06, "loss": 0.338, "step": 7980 }, { "epoch": 0.3959025745324669, "grad_norm": 6.686666488647461, "learning_rate": 6.696821786813818e-06, "loss": 0.353, "step": 7981 }, { "epoch": 0.39595218016766703, "grad_norm": 7.227753639221191, "learning_rate": 6.6960813852637015e-06, "loss": 0.2679, "step": 7982 }, { "epoch": 0.3960017858028672, "grad_norm": 10.250999450683594, "learning_rate": 6.695340941683922e-06, "loss": 0.3765, "step": 7983 }, { "epoch": 0.3960513914380674, "grad_norm": 9.974032402038574, "learning_rate": 6.694600456092828e-06, "loss": 0.401, "step": 7984 }, { "epoch": 0.3961009970732675, "grad_norm": 6.947900295257568, "learning_rate": 6.693859928508767e-06, "loss": 0.4133, "step": 7985 }, { "epoch": 0.3961506027084677, "grad_norm": 8.95491886138916, "learning_rate": 6.693119358950091e-06, "loss": 0.3999, "step": 7986 }, { "epoch": 0.3962002083436678, "grad_norm": 4.803222179412842, "learning_rate": 6.692378747435151e-06, "loss": 0.2448, "step": 7987 }, { "epoch": 0.396249813978868, "grad_norm": 7.2605695724487305, "learning_rate": 6.691638093982301e-06, "loss": 0.2565, "step": 7988 }, { "epoch": 0.39629941961406817, "grad_norm": 5.966193675994873, "learning_rate": 6.690897398609893e-06, "loss": 0.2893, "step": 7989 }, { "epoch": 0.3963490252492683, "grad_norm": 6.903596878051758, "learning_rate": 6.690156661336282e-06, "loss": 0.2418, "step": 7990 }, { "epoch": 0.3963986308844685, "grad_norm": 5.882294178009033, "learning_rate": 6.6894158821798264e-06, "loss": 0.3175, "step": 7991 }, { "epoch": 0.39644823651966865, "grad_norm": 6.414381504058838, "learning_rate": 6.688675061158881e-06, "loss": 0.2386, "step": 7992 }, { "epoch": 0.3964978421548688, "grad_norm": 4.235260009765625, "learning_rate": 6.687934198291801e-06, "loss": 0.2349, "step": 7993 }, { "epoch": 0.39654744779006895, "grad_norm": 9.306212425231934, "learning_rate": 6.68719329359695e-06, "loss": 0.2956, "step": 7994 }, { "epoch": 0.39659705342526913, "grad_norm": 6.885573387145996, "learning_rate": 6.686452347092687e-06, "loss": 0.2554, "step": 7995 }, { "epoch": 0.39664665906046925, "grad_norm": 11.696515083312988, "learning_rate": 6.68571135879737e-06, "loss": 0.4239, "step": 7996 }, { "epoch": 0.39669626469566943, "grad_norm": 5.711118698120117, "learning_rate": 6.6849703287293634e-06, "loss": 0.2941, "step": 7997 }, { "epoch": 0.3967458703308696, "grad_norm": 6.459647178649902, "learning_rate": 6.68422925690703e-06, "loss": 0.3563, "step": 7998 }, { "epoch": 0.39679547596606973, "grad_norm": 6.098774433135986, "learning_rate": 6.683488143348733e-06, "loss": 0.2027, "step": 7999 }, { "epoch": 0.3968450816012699, "grad_norm": 8.65111255645752, "learning_rate": 6.682746988072839e-06, "loss": 0.4596, "step": 8000 }, { "epoch": 0.3968946872364701, "grad_norm": 6.240878105163574, "learning_rate": 6.682005791097715e-06, "loss": 0.2894, "step": 8001 }, { "epoch": 0.3969442928716702, "grad_norm": 10.234781265258789, "learning_rate": 6.681264552441724e-06, "loss": 0.3536, "step": 8002 }, { "epoch": 0.3969938985068704, "grad_norm": 6.883139610290527, "learning_rate": 6.680523272123239e-06, "loss": 0.2764, "step": 8003 }, { "epoch": 0.3970435041420705, "grad_norm": 6.424339771270752, "learning_rate": 6.679781950160625e-06, "loss": 0.2145, "step": 8004 }, { "epoch": 0.3970931097772707, "grad_norm": 7.277004718780518, "learning_rate": 6.679040586572257e-06, "loss": 0.2749, "step": 8005 }, { "epoch": 0.39714271541247087, "grad_norm": 14.069175720214844, "learning_rate": 6.678299181376502e-06, "loss": 0.3691, "step": 8006 }, { "epoch": 0.397192321047671, "grad_norm": 6.4461894035339355, "learning_rate": 6.677557734591734e-06, "loss": 0.3608, "step": 8007 }, { "epoch": 0.39724192668287117, "grad_norm": 4.860254287719727, "learning_rate": 6.6768162462363265e-06, "loss": 0.2432, "step": 8008 }, { "epoch": 0.39729153231807135, "grad_norm": 12.706910133361816, "learning_rate": 6.676074716328655e-06, "loss": 0.339, "step": 8009 }, { "epoch": 0.3973411379532715, "grad_norm": 6.035835266113281, "learning_rate": 6.675333144887093e-06, "loss": 0.233, "step": 8010 }, { "epoch": 0.39739074358847165, "grad_norm": 6.7540602684021, "learning_rate": 6.674591531930018e-06, "loss": 0.3173, "step": 8011 }, { "epoch": 0.39744034922367183, "grad_norm": 6.086275100708008, "learning_rate": 6.673849877475809e-06, "loss": 0.2396, "step": 8012 }, { "epoch": 0.39748995485887195, "grad_norm": 6.775099277496338, "learning_rate": 6.67310818154284e-06, "loss": 0.2061, "step": 8013 }, { "epoch": 0.39753956049407213, "grad_norm": 6.250968933105469, "learning_rate": 6.672366444149495e-06, "loss": 0.3123, "step": 8014 }, { "epoch": 0.3975891661292723, "grad_norm": 4.894866943359375, "learning_rate": 6.671624665314153e-06, "loss": 0.2288, "step": 8015 }, { "epoch": 0.39763877176447243, "grad_norm": 6.011033058166504, "learning_rate": 6.6708828450551955e-06, "loss": 0.2934, "step": 8016 }, { "epoch": 0.3976883773996726, "grad_norm": 12.324115753173828, "learning_rate": 6.670140983391005e-06, "loss": 0.4868, "step": 8017 }, { "epoch": 0.3977379830348728, "grad_norm": 12.104131698608398, "learning_rate": 6.669399080339965e-06, "loss": 0.4806, "step": 8018 }, { "epoch": 0.3977875886700729, "grad_norm": 5.004607200622559, "learning_rate": 6.6686571359204625e-06, "loss": 0.2787, "step": 8019 }, { "epoch": 0.3978371943052731, "grad_norm": 11.25570011138916, "learning_rate": 6.66791515015088e-06, "loss": 0.472, "step": 8020 }, { "epoch": 0.3978867999404732, "grad_norm": 7.729058742523193, "learning_rate": 6.667173123049605e-06, "loss": 0.3742, "step": 8021 }, { "epoch": 0.3979364055756734, "grad_norm": 3.8864049911499023, "learning_rate": 6.666431054635028e-06, "loss": 0.1826, "step": 8022 }, { "epoch": 0.39798601121087357, "grad_norm": 9.157069206237793, "learning_rate": 6.6656889449255335e-06, "loss": 0.3705, "step": 8023 }, { "epoch": 0.3980356168460737, "grad_norm": 9.917397499084473, "learning_rate": 6.664946793939513e-06, "loss": 0.2894, "step": 8024 }, { "epoch": 0.39808522248127387, "grad_norm": 7.202164649963379, "learning_rate": 6.664204601695361e-06, "loss": 0.2539, "step": 8025 }, { "epoch": 0.39813482811647405, "grad_norm": 6.0861992835998535, "learning_rate": 6.663462368211463e-06, "loss": 0.3325, "step": 8026 }, { "epoch": 0.39818443375167417, "grad_norm": 4.874293327331543, "learning_rate": 6.662720093506216e-06, "loss": 0.2839, "step": 8027 }, { "epoch": 0.39823403938687435, "grad_norm": 7.213565349578857, "learning_rate": 6.661977777598013e-06, "loss": 0.3702, "step": 8028 }, { "epoch": 0.39828364502207453, "grad_norm": 13.094592094421387, "learning_rate": 6.66123542050525e-06, "loss": 0.3999, "step": 8029 }, { "epoch": 0.39833325065727465, "grad_norm": 4.474545955657959, "learning_rate": 6.66049302224632e-06, "loss": 0.2216, "step": 8030 }, { "epoch": 0.39838285629247483, "grad_norm": 11.027804374694824, "learning_rate": 6.659750582839622e-06, "loss": 0.2053, "step": 8031 }, { "epoch": 0.398432461927675, "grad_norm": 5.286520004272461, "learning_rate": 6.659008102303555e-06, "loss": 0.2883, "step": 8032 }, { "epoch": 0.39848206756287513, "grad_norm": 12.132208824157715, "learning_rate": 6.658265580656517e-06, "loss": 0.3053, "step": 8033 }, { "epoch": 0.3985316731980753, "grad_norm": 12.94210147857666, "learning_rate": 6.657523017916907e-06, "loss": 0.4154, "step": 8034 }, { "epoch": 0.3985812788332755, "grad_norm": 4.090688228607178, "learning_rate": 6.656780414103125e-06, "loss": 0.2562, "step": 8035 }, { "epoch": 0.3986308844684756, "grad_norm": 7.939277172088623, "learning_rate": 6.656037769233577e-06, "loss": 0.2526, "step": 8036 }, { "epoch": 0.3986804901036758, "grad_norm": 30.385662078857422, "learning_rate": 6.655295083326664e-06, "loss": 0.5154, "step": 8037 }, { "epoch": 0.3987300957388759, "grad_norm": 9.596624374389648, "learning_rate": 6.654552356400791e-06, "loss": 0.2997, "step": 8038 }, { "epoch": 0.3987797013740761, "grad_norm": 6.747781276702881, "learning_rate": 6.653809588474361e-06, "loss": 0.2727, "step": 8039 }, { "epoch": 0.39882930700927627, "grad_norm": 8.213046073913574, "learning_rate": 6.65306677956578e-06, "loss": 0.3377, "step": 8040 }, { "epoch": 0.3988789126444764, "grad_norm": 7.194550037384033, "learning_rate": 6.652323929693457e-06, "loss": 0.3261, "step": 8041 }, { "epoch": 0.39892851827967657, "grad_norm": 12.226893424987793, "learning_rate": 6.651581038875802e-06, "loss": 0.3446, "step": 8042 }, { "epoch": 0.39897812391487675, "grad_norm": 4.704514980316162, "learning_rate": 6.650838107131219e-06, "loss": 0.2566, "step": 8043 }, { "epoch": 0.39902772955007687, "grad_norm": 7.446937561035156, "learning_rate": 6.650095134478122e-06, "loss": 0.3559, "step": 8044 }, { "epoch": 0.39907733518527705, "grad_norm": 4.6394734382629395, "learning_rate": 6.649352120934921e-06, "loss": 0.2162, "step": 8045 }, { "epoch": 0.3991269408204772, "grad_norm": 6.825143337249756, "learning_rate": 6.648609066520029e-06, "loss": 0.2882, "step": 8046 }, { "epoch": 0.39917654645567735, "grad_norm": 4.559693336486816, "learning_rate": 6.6478659712518574e-06, "loss": 0.3381, "step": 8047 }, { "epoch": 0.39922615209087753, "grad_norm": 9.407470703125, "learning_rate": 6.647122835148822e-06, "loss": 0.2717, "step": 8048 }, { "epoch": 0.3992757577260777, "grad_norm": 7.786904335021973, "learning_rate": 6.646379658229339e-06, "loss": 0.3194, "step": 8049 }, { "epoch": 0.39932536336127783, "grad_norm": 5.522005558013916, "learning_rate": 6.645636440511823e-06, "loss": 0.3207, "step": 8050 }, { "epoch": 0.399374968996478, "grad_norm": 6.373376846313477, "learning_rate": 6.64489318201469e-06, "loss": 0.3231, "step": 8051 }, { "epoch": 0.39942457463167813, "grad_norm": 10.593857765197754, "learning_rate": 6.644149882756361e-06, "loss": 0.4146, "step": 8052 }, { "epoch": 0.3994741802668783, "grad_norm": 4.877021789550781, "learning_rate": 6.643406542755254e-06, "loss": 0.31, "step": 8053 }, { "epoch": 0.3995237859020785, "grad_norm": 6.020054817199707, "learning_rate": 6.64266316202979e-06, "loss": 0.3268, "step": 8054 }, { "epoch": 0.3995733915372786, "grad_norm": 9.753057479858398, "learning_rate": 6.641919740598388e-06, "loss": 0.2209, "step": 8055 }, { "epoch": 0.3996229971724788, "grad_norm": 5.851746559143066, "learning_rate": 6.641176278479475e-06, "loss": 0.2829, "step": 8056 }, { "epoch": 0.39967260280767897, "grad_norm": 8.185468673706055, "learning_rate": 6.6404327756914685e-06, "loss": 0.2588, "step": 8057 }, { "epoch": 0.3997222084428791, "grad_norm": 8.413805961608887, "learning_rate": 6.639689232252797e-06, "loss": 0.4098, "step": 8058 }, { "epoch": 0.39977181407807927, "grad_norm": 9.302755355834961, "learning_rate": 6.638945648181885e-06, "loss": 0.3313, "step": 8059 }, { "epoch": 0.39982141971327945, "grad_norm": 7.428669452667236, "learning_rate": 6.638202023497158e-06, "loss": 0.308, "step": 8060 }, { "epoch": 0.39987102534847957, "grad_norm": 7.81510591506958, "learning_rate": 6.637458358217044e-06, "loss": 0.3172, "step": 8061 }, { "epoch": 0.39992063098367975, "grad_norm": 29.447662353515625, "learning_rate": 6.63671465235997e-06, "loss": 0.3459, "step": 8062 }, { "epoch": 0.3999702366188799, "grad_norm": 6.402780055999756, "learning_rate": 6.635970905944369e-06, "loss": 0.27, "step": 8063 }, { "epoch": 0.40001984225408005, "grad_norm": 5.922914981842041, "learning_rate": 6.635227118988668e-06, "loss": 0.2693, "step": 8064 }, { "epoch": 0.40001984225408005, "eval_loss": 0.3079777657985687, "eval_runtime": 35.5879, "eval_samples_per_second": 45.774, "eval_steps_per_second": 5.732, "step": 8064 }, { "epoch": 0.4000694478892802, "grad_norm": 6.312528610229492, "learning_rate": 6.634483291511298e-06, "loss": 0.3749, "step": 8065 }, { "epoch": 0.4001190535244804, "grad_norm": 6.170041084289551, "learning_rate": 6.633739423530694e-06, "loss": 0.308, "step": 8066 }, { "epoch": 0.40016865915968053, "grad_norm": 11.615614891052246, "learning_rate": 6.632995515065288e-06, "loss": 0.4327, "step": 8067 }, { "epoch": 0.4002182647948807, "grad_norm": 6.004881858825684, "learning_rate": 6.632251566133514e-06, "loss": 0.4112, "step": 8068 }, { "epoch": 0.40026787043008083, "grad_norm": 8.540292739868164, "learning_rate": 6.631507576753809e-06, "loss": 0.4311, "step": 8069 }, { "epoch": 0.400317476065281, "grad_norm": 9.978060722351074, "learning_rate": 6.630763546944607e-06, "loss": 0.2902, "step": 8070 }, { "epoch": 0.4003670817004812, "grad_norm": 4.662056922912598, "learning_rate": 6.630019476724345e-06, "loss": 0.2683, "step": 8071 }, { "epoch": 0.4004166873356813, "grad_norm": 13.938081741333008, "learning_rate": 6.629275366111465e-06, "loss": 0.4849, "step": 8072 }, { "epoch": 0.4004662929708815, "grad_norm": 7.864598274230957, "learning_rate": 6.628531215124405e-06, "loss": 0.2475, "step": 8073 }, { "epoch": 0.40051589860608167, "grad_norm": 7.162006378173828, "learning_rate": 6.627787023781604e-06, "loss": 0.2594, "step": 8074 }, { "epoch": 0.4005655042412818, "grad_norm": 4.748510837554932, "learning_rate": 6.627042792101504e-06, "loss": 0.2538, "step": 8075 }, { "epoch": 0.40061510987648197, "grad_norm": 11.78288459777832, "learning_rate": 6.626298520102549e-06, "loss": 0.3761, "step": 8076 }, { "epoch": 0.40066471551168215, "grad_norm": 16.956466674804688, "learning_rate": 6.62555420780318e-06, "loss": 0.4308, "step": 8077 }, { "epoch": 0.40071432114688227, "grad_norm": 8.373991012573242, "learning_rate": 6.624809855221841e-06, "loss": 0.3784, "step": 8078 }, { "epoch": 0.40076392678208245, "grad_norm": 12.835128784179688, "learning_rate": 6.624065462376981e-06, "loss": 0.3824, "step": 8079 }, { "epoch": 0.4008135324172826, "grad_norm": 8.933627128601074, "learning_rate": 6.623321029287043e-06, "loss": 0.3893, "step": 8080 }, { "epoch": 0.40086313805248275, "grad_norm": 5.3355021476745605, "learning_rate": 6.622576555970476e-06, "loss": 0.336, "step": 8081 }, { "epoch": 0.4009127436876829, "grad_norm": 3.9776484966278076, "learning_rate": 6.621832042445727e-06, "loss": 0.2618, "step": 8082 }, { "epoch": 0.4009623493228831, "grad_norm": 5.009521961212158, "learning_rate": 6.621087488731248e-06, "loss": 0.2681, "step": 8083 }, { "epoch": 0.4010119549580832, "grad_norm": 6.369012355804443, "learning_rate": 6.620342894845487e-06, "loss": 0.2856, "step": 8084 }, { "epoch": 0.4010615605932834, "grad_norm": 6.256905555725098, "learning_rate": 6.6195982608068964e-06, "loss": 0.2459, "step": 8085 }, { "epoch": 0.40111116622848353, "grad_norm": 12.84496021270752, "learning_rate": 6.618853586633929e-06, "loss": 0.3523, "step": 8086 }, { "epoch": 0.4011607718636837, "grad_norm": 15.418498992919922, "learning_rate": 6.618108872345035e-06, "loss": 0.3945, "step": 8087 }, { "epoch": 0.4012103774988839, "grad_norm": 6.7798566818237305, "learning_rate": 6.6173641179586724e-06, "loss": 0.3274, "step": 8088 }, { "epoch": 0.401259983134084, "grad_norm": 7.716045379638672, "learning_rate": 6.616619323493295e-06, "loss": 0.4646, "step": 8089 }, { "epoch": 0.4013095887692842, "grad_norm": 6.771888256072998, "learning_rate": 6.61587448896736e-06, "loss": 0.3115, "step": 8090 }, { "epoch": 0.40135919440448437, "grad_norm": 6.2733154296875, "learning_rate": 6.615129614399325e-06, "loss": 0.2605, "step": 8091 }, { "epoch": 0.4014088000396845, "grad_norm": 5.427724361419678, "learning_rate": 6.614384699807647e-06, "loss": 0.386, "step": 8092 }, { "epoch": 0.40145840567488467, "grad_norm": 7.475636005401611, "learning_rate": 6.613639745210785e-06, "loss": 0.3602, "step": 8093 }, { "epoch": 0.40150801131008484, "grad_norm": 6.51065731048584, "learning_rate": 6.612894750627201e-06, "loss": 0.2472, "step": 8094 }, { "epoch": 0.40155761694528497, "grad_norm": 7.076779365539551, "learning_rate": 6.612149716075355e-06, "loss": 0.4031, "step": 8095 }, { "epoch": 0.40160722258048515, "grad_norm": 4.876071929931641, "learning_rate": 6.611404641573711e-06, "loss": 0.2543, "step": 8096 }, { "epoch": 0.4016568282156853, "grad_norm": 5.737143516540527, "learning_rate": 6.610659527140729e-06, "loss": 0.2268, "step": 8097 }, { "epoch": 0.40170643385088545, "grad_norm": 8.001469612121582, "learning_rate": 6.609914372794877e-06, "loss": 0.3869, "step": 8098 }, { "epoch": 0.4017560394860856, "grad_norm": 6.542433261871338, "learning_rate": 6.609169178554618e-06, "loss": 0.1905, "step": 8099 }, { "epoch": 0.4018056451212858, "grad_norm": 14.04908561706543, "learning_rate": 6.608423944438419e-06, "loss": 0.3266, "step": 8100 }, { "epoch": 0.4018552507564859, "grad_norm": 5.048221111297607, "learning_rate": 6.607678670464748e-06, "loss": 0.2848, "step": 8101 }, { "epoch": 0.4019048563916861, "grad_norm": 9.461974143981934, "learning_rate": 6.60693335665207e-06, "loss": 0.2555, "step": 8102 }, { "epoch": 0.40195446202688623, "grad_norm": 10.286386489868164, "learning_rate": 6.6061880030188585e-06, "loss": 0.2694, "step": 8103 }, { "epoch": 0.4020040676620864, "grad_norm": 6.055221080780029, "learning_rate": 6.605442609583581e-06, "loss": 0.3453, "step": 8104 }, { "epoch": 0.4020536732972866, "grad_norm": 6.043844699859619, "learning_rate": 6.604697176364709e-06, "loss": 0.3447, "step": 8105 }, { "epoch": 0.4021032789324867, "grad_norm": 4.936936855316162, "learning_rate": 6.6039517033807155e-06, "loss": 0.277, "step": 8106 }, { "epoch": 0.4021528845676869, "grad_norm": 5.745471477508545, "learning_rate": 6.6032061906500744e-06, "loss": 0.2638, "step": 8107 }, { "epoch": 0.40220249020288706, "grad_norm": 11.08815860748291, "learning_rate": 6.602460638191257e-06, "loss": 0.4157, "step": 8108 }, { "epoch": 0.4022520958380872, "grad_norm": 20.238542556762695, "learning_rate": 6.601715046022741e-06, "loss": 0.347, "step": 8109 }, { "epoch": 0.40230170147328737, "grad_norm": 7.207237720489502, "learning_rate": 6.600969414163002e-06, "loss": 0.3711, "step": 8110 }, { "epoch": 0.40235130710848754, "grad_norm": 7.986015319824219, "learning_rate": 6.600223742630516e-06, "loss": 0.3063, "step": 8111 }, { "epoch": 0.40240091274368767, "grad_norm": 7.878209590911865, "learning_rate": 6.599478031443761e-06, "loss": 0.3215, "step": 8112 }, { "epoch": 0.40245051837888784, "grad_norm": 7.281239986419678, "learning_rate": 6.5987322806212184e-06, "loss": 0.2616, "step": 8113 }, { "epoch": 0.402500124014088, "grad_norm": 5.581003665924072, "learning_rate": 6.597986490181366e-06, "loss": 0.3769, "step": 8114 }, { "epoch": 0.40254972964928815, "grad_norm": 3.7927300930023193, "learning_rate": 6.597240660142685e-06, "loss": 0.1773, "step": 8115 }, { "epoch": 0.4025993352844883, "grad_norm": 8.001687049865723, "learning_rate": 6.596494790523659e-06, "loss": 0.2918, "step": 8116 }, { "epoch": 0.4026489409196885, "grad_norm": 11.078349113464355, "learning_rate": 6.5957488813427705e-06, "loss": 0.2895, "step": 8117 }, { "epoch": 0.4026985465548886, "grad_norm": 10.508216857910156, "learning_rate": 6.595002932618501e-06, "loss": 0.43, "step": 8118 }, { "epoch": 0.4027481521900888, "grad_norm": 5.022364616394043, "learning_rate": 6.594256944369338e-06, "loss": 0.2462, "step": 8119 }, { "epoch": 0.4027977578252889, "grad_norm": 6.556063652038574, "learning_rate": 6.5935109166137675e-06, "loss": 0.3134, "step": 8120 }, { "epoch": 0.4028473634604891, "grad_norm": 6.172582149505615, "learning_rate": 6.592764849370274e-06, "loss": 0.3179, "step": 8121 }, { "epoch": 0.4028969690956893, "grad_norm": 6.50973653793335, "learning_rate": 6.5920187426573486e-06, "loss": 0.2256, "step": 8122 }, { "epoch": 0.4029465747308894, "grad_norm": 7.002032279968262, "learning_rate": 6.591272596493479e-06, "loss": 0.3429, "step": 8123 }, { "epoch": 0.4029961803660896, "grad_norm": 8.78823184967041, "learning_rate": 6.590526410897153e-06, "loss": 0.3229, "step": 8124 }, { "epoch": 0.40304578600128976, "grad_norm": 9.163922309875488, "learning_rate": 6.589780185886865e-06, "loss": 0.3717, "step": 8125 }, { "epoch": 0.4030953916364899, "grad_norm": 6.4039106369018555, "learning_rate": 6.5890339214811025e-06, "loss": 0.2929, "step": 8126 }, { "epoch": 0.40314499727169006, "grad_norm": 6.841647624969482, "learning_rate": 6.588287617698364e-06, "loss": 0.3513, "step": 8127 }, { "epoch": 0.40319460290689024, "grad_norm": 42.67544937133789, "learning_rate": 6.587541274557136e-06, "loss": 0.3827, "step": 8128 }, { "epoch": 0.40324420854209037, "grad_norm": 9.726941108703613, "learning_rate": 6.586794892075919e-06, "loss": 0.4758, "step": 8129 }, { "epoch": 0.40329381417729054, "grad_norm": 4.722309589385986, "learning_rate": 6.586048470273207e-06, "loss": 0.3019, "step": 8130 }, { "epoch": 0.4033434198124907, "grad_norm": 6.444636344909668, "learning_rate": 6.585302009167496e-06, "loss": 0.3397, "step": 8131 }, { "epoch": 0.40339302544769085, "grad_norm": 9.898844718933105, "learning_rate": 6.584555508777284e-06, "loss": 0.4859, "step": 8132 }, { "epoch": 0.403442631082891, "grad_norm": 7.539127826690674, "learning_rate": 6.583808969121069e-06, "loss": 0.2966, "step": 8133 }, { "epoch": 0.4034922367180912, "grad_norm": 6.40886926651001, "learning_rate": 6.583062390217353e-06, "loss": 0.3474, "step": 8134 }, { "epoch": 0.4035418423532913, "grad_norm": 4.589723110198975, "learning_rate": 6.582315772084634e-06, "loss": 0.2692, "step": 8135 }, { "epoch": 0.4035914479884915, "grad_norm": 5.539412498474121, "learning_rate": 6.581569114741414e-06, "loss": 0.272, "step": 8136 }, { "epoch": 0.4036410536236916, "grad_norm": 15.098658561706543, "learning_rate": 6.580822418206198e-06, "loss": 0.5268, "step": 8137 }, { "epoch": 0.4036906592588918, "grad_norm": 8.455954551696777, "learning_rate": 6.580075682497484e-06, "loss": 0.2855, "step": 8138 }, { "epoch": 0.403740264894092, "grad_norm": 7.746862411499023, "learning_rate": 6.579328907633782e-06, "loss": 0.2726, "step": 8139 }, { "epoch": 0.4037898705292921, "grad_norm": 4.838782787322998, "learning_rate": 6.578582093633596e-06, "loss": 0.2755, "step": 8140 }, { "epoch": 0.4038394761644923, "grad_norm": 13.97804069519043, "learning_rate": 6.57783524051543e-06, "loss": 0.4448, "step": 8141 }, { "epoch": 0.40388908179969246, "grad_norm": 8.719269752502441, "learning_rate": 6.577088348297793e-06, "loss": 0.4025, "step": 8142 }, { "epoch": 0.4039386874348926, "grad_norm": 8.420600891113281, "learning_rate": 6.5763414169991934e-06, "loss": 0.3167, "step": 8143 }, { "epoch": 0.40398829307009276, "grad_norm": 5.734579086303711, "learning_rate": 6.575594446638141e-06, "loss": 0.2908, "step": 8144 }, { "epoch": 0.40403789870529294, "grad_norm": 6.917837619781494, "learning_rate": 6.574847437233146e-06, "loss": 0.2568, "step": 8145 }, { "epoch": 0.40408750434049306, "grad_norm": 7.226398468017578, "learning_rate": 6.574100388802719e-06, "loss": 0.4181, "step": 8146 }, { "epoch": 0.40413710997569324, "grad_norm": 13.352898597717285, "learning_rate": 6.5733533013653706e-06, "loss": 0.2614, "step": 8147 }, { "epoch": 0.4041867156108934, "grad_norm": 6.256410121917725, "learning_rate": 6.572606174939617e-06, "loss": 0.3403, "step": 8148 }, { "epoch": 0.40423632124609354, "grad_norm": 11.283366203308105, "learning_rate": 6.571859009543971e-06, "loss": 0.3825, "step": 8149 }, { "epoch": 0.4042859268812937, "grad_norm": 7.063945293426514, "learning_rate": 6.571111805196948e-06, "loss": 0.2332, "step": 8150 }, { "epoch": 0.4043355325164939, "grad_norm": 8.557994842529297, "learning_rate": 6.570364561917063e-06, "loss": 0.4272, "step": 8151 }, { "epoch": 0.404385138151694, "grad_norm": 12.783883094787598, "learning_rate": 6.5696172797228345e-06, "loss": 0.3728, "step": 8152 }, { "epoch": 0.4044347437868942, "grad_norm": 8.9168701171875, "learning_rate": 6.568869958632779e-06, "loss": 0.2691, "step": 8153 }, { "epoch": 0.4044843494220943, "grad_norm": 4.750243663787842, "learning_rate": 6.568122598665417e-06, "loss": 0.2798, "step": 8154 }, { "epoch": 0.4045339550572945, "grad_norm": 5.383848190307617, "learning_rate": 6.567375199839268e-06, "loss": 0.2885, "step": 8155 }, { "epoch": 0.4045835606924947, "grad_norm": 5.828566551208496, "learning_rate": 6.566627762172851e-06, "loss": 0.2342, "step": 8156 }, { "epoch": 0.4046331663276948, "grad_norm": 12.496689796447754, "learning_rate": 6.565880285684691e-06, "loss": 0.3869, "step": 8157 }, { "epoch": 0.404682771962895, "grad_norm": 10.856395721435547, "learning_rate": 6.565132770393309e-06, "loss": 0.4351, "step": 8158 }, { "epoch": 0.40473237759809516, "grad_norm": 10.206257820129395, "learning_rate": 6.564385216317228e-06, "loss": 0.3691, "step": 8159 }, { "epoch": 0.4047819832332953, "grad_norm": 8.681084632873535, "learning_rate": 6.5636376234749745e-06, "loss": 0.3309, "step": 8160 }, { "epoch": 0.40483158886849546, "grad_norm": 7.725153923034668, "learning_rate": 6.562889991885073e-06, "loss": 0.3703, "step": 8161 }, { "epoch": 0.40488119450369564, "grad_norm": 5.909244060516357, "learning_rate": 6.562142321566051e-06, "loss": 0.3282, "step": 8162 }, { "epoch": 0.40493080013889576, "grad_norm": 5.806608200073242, "learning_rate": 6.561394612536434e-06, "loss": 0.2545, "step": 8163 }, { "epoch": 0.40498040577409594, "grad_norm": 8.739251136779785, "learning_rate": 6.560646864814755e-06, "loss": 0.3748, "step": 8164 }, { "epoch": 0.4050300114092961, "grad_norm": 8.076193809509277, "learning_rate": 6.559899078419538e-06, "loss": 0.4338, "step": 8165 }, { "epoch": 0.40507961704449624, "grad_norm": 15.273876190185547, "learning_rate": 6.559151253369318e-06, "loss": 0.4152, "step": 8166 }, { "epoch": 0.4051292226796964, "grad_norm": 8.808553695678711, "learning_rate": 6.558403389682625e-06, "loss": 0.311, "step": 8167 }, { "epoch": 0.4051788283148966, "grad_norm": 6.605782985687256, "learning_rate": 6.557655487377989e-06, "loss": 0.2823, "step": 8168 }, { "epoch": 0.4052284339500967, "grad_norm": 5.751838684082031, "learning_rate": 6.556907546473947e-06, "loss": 0.2975, "step": 8169 }, { "epoch": 0.4052780395852969, "grad_norm": 7.33817195892334, "learning_rate": 6.556159566989031e-06, "loss": 0.3016, "step": 8170 }, { "epoch": 0.405327645220497, "grad_norm": 9.118626594543457, "learning_rate": 6.555411548941778e-06, "loss": 0.3043, "step": 8171 }, { "epoch": 0.4053772508556972, "grad_norm": 5.691238880157471, "learning_rate": 6.554663492350723e-06, "loss": 0.294, "step": 8172 }, { "epoch": 0.4054268564908974, "grad_norm": 10.81389045715332, "learning_rate": 6.553915397234402e-06, "loss": 0.4362, "step": 8173 }, { "epoch": 0.4054764621260975, "grad_norm": 10.73210620880127, "learning_rate": 6.553167263611357e-06, "loss": 0.3199, "step": 8174 }, { "epoch": 0.4055260677612977, "grad_norm": 6.295868396759033, "learning_rate": 6.552419091500122e-06, "loss": 0.3454, "step": 8175 }, { "epoch": 0.40557567339649786, "grad_norm": 10.842802047729492, "learning_rate": 6.5516708809192405e-06, "loss": 0.3119, "step": 8176 }, { "epoch": 0.405625279031698, "grad_norm": 8.414471626281738, "learning_rate": 6.550922631887253e-06, "loss": 0.3026, "step": 8177 }, { "epoch": 0.40567488466689816, "grad_norm": 11.856038093566895, "learning_rate": 6.550174344422702e-06, "loss": 0.2909, "step": 8178 }, { "epoch": 0.40572449030209834, "grad_norm": 6.6994853019714355, "learning_rate": 6.5494260185441285e-06, "loss": 0.3372, "step": 8179 }, { "epoch": 0.40577409593729846, "grad_norm": 6.695154190063477, "learning_rate": 6.548677654270077e-06, "loss": 0.3063, "step": 8180 }, { "epoch": 0.40582370157249864, "grad_norm": 9.366320610046387, "learning_rate": 6.547929251619094e-06, "loss": 0.3871, "step": 8181 }, { "epoch": 0.4058733072076988, "grad_norm": 9.5745849609375, "learning_rate": 6.5471808106097225e-06, "loss": 0.3838, "step": 8182 }, { "epoch": 0.40592291284289894, "grad_norm": 5.485726833343506, "learning_rate": 6.54643233126051e-06, "loss": 0.3768, "step": 8183 }, { "epoch": 0.4059725184780991, "grad_norm": 21.12229347229004, "learning_rate": 6.545683813590008e-06, "loss": 0.3586, "step": 8184 }, { "epoch": 0.40602212411329924, "grad_norm": 7.10235071182251, "learning_rate": 6.544935257616759e-06, "loss": 0.2578, "step": 8185 }, { "epoch": 0.4060717297484994, "grad_norm": 5.554854393005371, "learning_rate": 6.544186663359317e-06, "loss": 0.266, "step": 8186 }, { "epoch": 0.4061213353836996, "grad_norm": 13.668720245361328, "learning_rate": 6.54343803083623e-06, "loss": 0.3138, "step": 8187 }, { "epoch": 0.4061709410188997, "grad_norm": 5.422760009765625, "learning_rate": 6.542689360066051e-06, "loss": 0.2214, "step": 8188 }, { "epoch": 0.4062205466540999, "grad_norm": 8.089689254760742, "learning_rate": 6.541940651067331e-06, "loss": 0.3879, "step": 8189 }, { "epoch": 0.4062701522893001, "grad_norm": 9.688043594360352, "learning_rate": 6.5411919038586256e-06, "loss": 0.4115, "step": 8190 }, { "epoch": 0.4063197579245002, "grad_norm": 4.465935230255127, "learning_rate": 6.540443118458487e-06, "loss": 0.274, "step": 8191 }, { "epoch": 0.4063693635597004, "grad_norm": 8.489799499511719, "learning_rate": 6.539694294885471e-06, "loss": 0.4533, "step": 8192 }, { "epoch": 0.40641896919490056, "grad_norm": 9.7196683883667, "learning_rate": 6.5389454331581336e-06, "loss": 0.3488, "step": 8193 }, { "epoch": 0.4064685748301007, "grad_norm": 19.22551155090332, "learning_rate": 6.538196533295034e-06, "loss": 0.3503, "step": 8194 }, { "epoch": 0.40651818046530086, "grad_norm": 38.90424346923828, "learning_rate": 6.537447595314726e-06, "loss": 0.3708, "step": 8195 }, { "epoch": 0.40656778610050104, "grad_norm": 7.62680721282959, "learning_rate": 6.536698619235771e-06, "loss": 0.392, "step": 8196 }, { "epoch": 0.40661739173570116, "grad_norm": 14.489943504333496, "learning_rate": 6.535949605076732e-06, "loss": 0.2544, "step": 8197 }, { "epoch": 0.40666699737090134, "grad_norm": 6.301872253417969, "learning_rate": 6.535200552856165e-06, "loss": 0.3001, "step": 8198 }, { "epoch": 0.4067166030061015, "grad_norm": 12.007772445678711, "learning_rate": 6.5344514625926336e-06, "loss": 0.3004, "step": 8199 }, { "epoch": 0.40676620864130164, "grad_norm": 7.203836917877197, "learning_rate": 6.5337023343047015e-06, "loss": 0.2691, "step": 8200 }, { "epoch": 0.4068158142765018, "grad_norm": 7.230950832366943, "learning_rate": 6.532953168010931e-06, "loss": 0.3179, "step": 8201 }, { "epoch": 0.40686541991170194, "grad_norm": 8.551226615905762, "learning_rate": 6.532203963729888e-06, "loss": 0.3861, "step": 8202 }, { "epoch": 0.4069150255469021, "grad_norm": 6.232387542724609, "learning_rate": 6.531454721480139e-06, "loss": 0.3406, "step": 8203 }, { "epoch": 0.4069646311821023, "grad_norm": 6.886396884918213, "learning_rate": 6.530705441280247e-06, "loss": 0.2889, "step": 8204 }, { "epoch": 0.4070142368173024, "grad_norm": 13.667840957641602, "learning_rate": 6.5299561231487825e-06, "loss": 0.3204, "step": 8205 }, { "epoch": 0.4070638424525026, "grad_norm": 12.636908531188965, "learning_rate": 6.5292067671043136e-06, "loss": 0.4016, "step": 8206 }, { "epoch": 0.4071134480877028, "grad_norm": 7.628061771392822, "learning_rate": 6.528457373165409e-06, "loss": 0.3646, "step": 8207 }, { "epoch": 0.4071630537229029, "grad_norm": 6.518974304199219, "learning_rate": 6.52770794135064e-06, "loss": 0.2601, "step": 8208 }, { "epoch": 0.4072126593581031, "grad_norm": 11.973231315612793, "learning_rate": 6.526958471678577e-06, "loss": 0.288, "step": 8209 }, { "epoch": 0.40726226499330326, "grad_norm": 10.17873477935791, "learning_rate": 6.5262089641677915e-06, "loss": 0.3422, "step": 8210 }, { "epoch": 0.4073118706285034, "grad_norm": 9.208586692810059, "learning_rate": 6.525459418836859e-06, "loss": 0.2395, "step": 8211 }, { "epoch": 0.40736147626370356, "grad_norm": 16.52077293395996, "learning_rate": 6.52470983570435e-06, "loss": 0.3292, "step": 8212 }, { "epoch": 0.40741108189890374, "grad_norm": 12.98205852508545, "learning_rate": 6.523960214788844e-06, "loss": 0.4711, "step": 8213 }, { "epoch": 0.40746068753410386, "grad_norm": 8.382335662841797, "learning_rate": 6.523210556108912e-06, "loss": 0.306, "step": 8214 }, { "epoch": 0.40751029316930404, "grad_norm": 6.974193572998047, "learning_rate": 6.522460859683135e-06, "loss": 0.2557, "step": 8215 }, { "epoch": 0.4075598988045042, "grad_norm": 5.78615665435791, "learning_rate": 6.5217111255300895e-06, "loss": 0.3426, "step": 8216 }, { "epoch": 0.40760950443970434, "grad_norm": 6.397883415222168, "learning_rate": 6.520961353668353e-06, "loss": 0.3091, "step": 8217 }, { "epoch": 0.4076591100749045, "grad_norm": 10.219032287597656, "learning_rate": 6.520211544116507e-06, "loss": 0.2913, "step": 8218 }, { "epoch": 0.40770871571010464, "grad_norm": 4.890500545501709, "learning_rate": 6.51946169689313e-06, "loss": 0.2863, "step": 8219 }, { "epoch": 0.4077583213453048, "grad_norm": 6.095818042755127, "learning_rate": 6.518711812016806e-06, "loss": 0.2828, "step": 8220 }, { "epoch": 0.407807926980505, "grad_norm": 6.511114120483398, "learning_rate": 6.517961889506116e-06, "loss": 0.2883, "step": 8221 }, { "epoch": 0.4078575326157051, "grad_norm": 7.14715051651001, "learning_rate": 6.517211929379643e-06, "loss": 0.2514, "step": 8222 }, { "epoch": 0.4079071382509053, "grad_norm": 6.646841049194336, "learning_rate": 6.516461931655973e-06, "loss": 0.3204, "step": 8223 }, { "epoch": 0.4079567438861055, "grad_norm": 5.070342063903809, "learning_rate": 6.515711896353689e-06, "loss": 0.2955, "step": 8224 }, { "epoch": 0.4080063495213056, "grad_norm": 6.626026630401611, "learning_rate": 6.514961823491381e-06, "loss": 0.3144, "step": 8225 }, { "epoch": 0.4080559551565058, "grad_norm": 6.770035743713379, "learning_rate": 6.514211713087631e-06, "loss": 0.3846, "step": 8226 }, { "epoch": 0.40810556079170596, "grad_norm": 7.905810832977295, "learning_rate": 6.513461565161033e-06, "loss": 0.3109, "step": 8227 }, { "epoch": 0.4081551664269061, "grad_norm": 7.660275936126709, "learning_rate": 6.51271137973017e-06, "loss": 0.426, "step": 8228 }, { "epoch": 0.40820477206210626, "grad_norm": 4.771656513214111, "learning_rate": 6.511961156813635e-06, "loss": 0.2849, "step": 8229 }, { "epoch": 0.40825437769730644, "grad_norm": 5.366342544555664, "learning_rate": 6.51121089643002e-06, "loss": 0.256, "step": 8230 }, { "epoch": 0.40830398333250656, "grad_norm": 7.065442085266113, "learning_rate": 6.510460598597914e-06, "loss": 0.2095, "step": 8231 }, { "epoch": 0.40835358896770674, "grad_norm": 5.3019633293151855, "learning_rate": 6.509710263335912e-06, "loss": 0.3522, "step": 8232 }, { "epoch": 0.4084031946029069, "grad_norm": 5.81487512588501, "learning_rate": 6.5089598906626074e-06, "loss": 0.379, "step": 8233 }, { "epoch": 0.40845280023810704, "grad_norm": 5.0504302978515625, "learning_rate": 6.508209480596593e-06, "loss": 0.2675, "step": 8234 }, { "epoch": 0.4085024058733072, "grad_norm": 16.426589965820312, "learning_rate": 6.5074590331564655e-06, "loss": 0.531, "step": 8235 }, { "epoch": 0.40855201150850734, "grad_norm": 6.393742084503174, "learning_rate": 6.506708548360822e-06, "loss": 0.3233, "step": 8236 }, { "epoch": 0.4086016171437075, "grad_norm": 9.279458999633789, "learning_rate": 6.505958026228258e-06, "loss": 0.3405, "step": 8237 }, { "epoch": 0.4086512227789077, "grad_norm": 9.002007484436035, "learning_rate": 6.5052074667773736e-06, "loss": 0.3687, "step": 8238 }, { "epoch": 0.4087008284141078, "grad_norm": 19.319047927856445, "learning_rate": 6.504456870026768e-06, "loss": 0.4609, "step": 8239 }, { "epoch": 0.408750434049308, "grad_norm": 5.799563884735107, "learning_rate": 6.503706235995038e-06, "loss": 0.3291, "step": 8240 }, { "epoch": 0.4088000396845082, "grad_norm": 11.726917266845703, "learning_rate": 6.502955564700789e-06, "loss": 0.3485, "step": 8241 }, { "epoch": 0.4088496453197083, "grad_norm": 10.011758804321289, "learning_rate": 6.502204856162622e-06, "loss": 0.3553, "step": 8242 }, { "epoch": 0.4088992509549085, "grad_norm": 5.085601806640625, "learning_rate": 6.501454110399138e-06, "loss": 0.2257, "step": 8243 }, { "epoch": 0.40894885659010866, "grad_norm": 3.7906644344329834, "learning_rate": 6.500703327428942e-06, "loss": 0.2615, "step": 8244 }, { "epoch": 0.4089984622253088, "grad_norm": 7.723247528076172, "learning_rate": 6.499952507270641e-06, "loss": 0.3384, "step": 8245 }, { "epoch": 0.40904806786050896, "grad_norm": 5.480218410491943, "learning_rate": 6.499201649942836e-06, "loss": 0.2463, "step": 8246 }, { "epoch": 0.40909767349570914, "grad_norm": 11.306356430053711, "learning_rate": 6.498450755464135e-06, "loss": 0.2693, "step": 8247 }, { "epoch": 0.40914727913090926, "grad_norm": 9.435400009155273, "learning_rate": 6.49769982385315e-06, "loss": 0.3016, "step": 8248 }, { "epoch": 0.40919688476610944, "grad_norm": 6.993593692779541, "learning_rate": 6.496948855128482e-06, "loss": 0.3701, "step": 8249 }, { "epoch": 0.4092464904013096, "grad_norm": 8.278627395629883, "learning_rate": 6.496197849308746e-06, "loss": 0.3288, "step": 8250 }, { "epoch": 0.40929609603650974, "grad_norm": 8.149149894714355, "learning_rate": 6.495446806412551e-06, "loss": 0.3694, "step": 8251 }, { "epoch": 0.4093457016717099, "grad_norm": 5.5268940925598145, "learning_rate": 6.494695726458507e-06, "loss": 0.2674, "step": 8252 }, { "epoch": 0.40939530730691004, "grad_norm": 7.497143745422363, "learning_rate": 6.493944609465227e-06, "loss": 0.4073, "step": 8253 }, { "epoch": 0.4094449129421102, "grad_norm": 5.614459991455078, "learning_rate": 6.493193455451323e-06, "loss": 0.3503, "step": 8254 }, { "epoch": 0.4094945185773104, "grad_norm": 6.133059024810791, "learning_rate": 6.49244226443541e-06, "loss": 0.1679, "step": 8255 }, { "epoch": 0.4095441242125105, "grad_norm": 4.839748859405518, "learning_rate": 6.491691036436103e-06, "loss": 0.2617, "step": 8256 }, { "epoch": 0.4095937298477107, "grad_norm": 8.055593490600586, "learning_rate": 6.490939771472018e-06, "loss": 0.3887, "step": 8257 }, { "epoch": 0.4096433354829109, "grad_norm": 4.994906902313232, "learning_rate": 6.49018846956177e-06, "loss": 0.284, "step": 8258 }, { "epoch": 0.409692941118111, "grad_norm": 6.309365272521973, "learning_rate": 6.4894371307239776e-06, "loss": 0.294, "step": 8259 }, { "epoch": 0.4097425467533112, "grad_norm": 8.448390007019043, "learning_rate": 6.48868575497726e-06, "loss": 0.2723, "step": 8260 }, { "epoch": 0.40979215238851135, "grad_norm": 8.259685516357422, "learning_rate": 6.487934342340235e-06, "loss": 0.4093, "step": 8261 }, { "epoch": 0.4098417580237115, "grad_norm": 3.7750234603881836, "learning_rate": 6.487182892831526e-06, "loss": 0.3577, "step": 8262 }, { "epoch": 0.40989136365891166, "grad_norm": 7.020105361938477, "learning_rate": 6.486431406469751e-06, "loss": 0.3413, "step": 8263 }, { "epoch": 0.40994096929411183, "grad_norm": 6.619856834411621, "learning_rate": 6.485679883273533e-06, "loss": 0.3323, "step": 8264 }, { "epoch": 0.40999057492931196, "grad_norm": 4.477511882781982, "learning_rate": 6.484928323261496e-06, "loss": 0.2566, "step": 8265 }, { "epoch": 0.41004018056451214, "grad_norm": 9.768450736999512, "learning_rate": 6.484176726452262e-06, "loss": 0.4109, "step": 8266 }, { "epoch": 0.4100897861997123, "grad_norm": 10.565398216247559, "learning_rate": 6.483425092864459e-06, "loss": 0.3459, "step": 8267 }, { "epoch": 0.41013939183491244, "grad_norm": 6.564810752868652, "learning_rate": 6.48267342251671e-06, "loss": 0.3335, "step": 8268 }, { "epoch": 0.4101889974701126, "grad_norm": 7.856904983520508, "learning_rate": 6.481921715427645e-06, "loss": 0.3608, "step": 8269 }, { "epoch": 0.41023860310531274, "grad_norm": 4.970837593078613, "learning_rate": 6.481169971615887e-06, "loss": 0.2437, "step": 8270 }, { "epoch": 0.4102882087405129, "grad_norm": 7.279654502868652, "learning_rate": 6.480418191100069e-06, "loss": 0.3005, "step": 8271 }, { "epoch": 0.4103378143757131, "grad_norm": 7.050360202789307, "learning_rate": 6.479666373898818e-06, "loss": 0.3464, "step": 8272 }, { "epoch": 0.4103874200109132, "grad_norm": 6.128451824188232, "learning_rate": 6.478914520030764e-06, "loss": 0.2837, "step": 8273 }, { "epoch": 0.4104370256461134, "grad_norm": 15.837666511535645, "learning_rate": 6.478162629514539e-06, "loss": 0.4464, "step": 8274 }, { "epoch": 0.4104866312813136, "grad_norm": 4.48174524307251, "learning_rate": 6.477410702368777e-06, "loss": 0.3164, "step": 8275 }, { "epoch": 0.4105362369165137, "grad_norm": 12.713321685791016, "learning_rate": 6.476658738612109e-06, "loss": 0.4677, "step": 8276 }, { "epoch": 0.4105858425517139, "grad_norm": 6.265842914581299, "learning_rate": 6.475906738263169e-06, "loss": 0.2238, "step": 8277 }, { "epoch": 0.41063544818691405, "grad_norm": 3.611445188522339, "learning_rate": 6.475154701340592e-06, "loss": 0.2455, "step": 8278 }, { "epoch": 0.4106850538221142, "grad_norm": 14.285157203674316, "learning_rate": 6.474402627863017e-06, "loss": 0.2956, "step": 8279 }, { "epoch": 0.41073465945731436, "grad_norm": 14.67897891998291, "learning_rate": 6.473650517849075e-06, "loss": 0.4676, "step": 8280 }, { "epoch": 0.41078426509251453, "grad_norm": 4.83132791519165, "learning_rate": 6.4728983713174066e-06, "loss": 0.2479, "step": 8281 }, { "epoch": 0.41083387072771466, "grad_norm": 39.649147033691406, "learning_rate": 6.472146188286652e-06, "loss": 0.273, "step": 8282 }, { "epoch": 0.41088347636291483, "grad_norm": 9.521241188049316, "learning_rate": 6.471393968775448e-06, "loss": 0.2497, "step": 8283 }, { "epoch": 0.410933081998115, "grad_norm": 5.333329200744629, "learning_rate": 6.470641712802436e-06, "loss": 0.2884, "step": 8284 }, { "epoch": 0.41098268763331514, "grad_norm": 19.7803955078125, "learning_rate": 6.469889420386256e-06, "loss": 0.5328, "step": 8285 }, { "epoch": 0.4110322932685153, "grad_norm": 8.690640449523926, "learning_rate": 6.469137091545553e-06, "loss": 0.3499, "step": 8286 }, { "epoch": 0.41108189890371544, "grad_norm": 6.68502140045166, "learning_rate": 6.4683847262989675e-06, "loss": 0.2936, "step": 8287 }, { "epoch": 0.4111315045389156, "grad_norm": 13.8678560256958, "learning_rate": 6.467632324665144e-06, "loss": 0.4425, "step": 8288 }, { "epoch": 0.4111811101741158, "grad_norm": 10.337818145751953, "learning_rate": 6.4668798866627295e-06, "loss": 0.32, "step": 8289 }, { "epoch": 0.4112307158093159, "grad_norm": 4.791990280151367, "learning_rate": 6.466127412310365e-06, "loss": 0.2858, "step": 8290 }, { "epoch": 0.4112803214445161, "grad_norm": 5.790664196014404, "learning_rate": 6.465374901626702e-06, "loss": 0.3162, "step": 8291 }, { "epoch": 0.4113299270797163, "grad_norm": 5.874716758728027, "learning_rate": 6.464622354630385e-06, "loss": 0.2671, "step": 8292 }, { "epoch": 0.4113795327149164, "grad_norm": 8.41309928894043, "learning_rate": 6.463869771340064e-06, "loss": 0.3104, "step": 8293 }, { "epoch": 0.4114291383501166, "grad_norm": 6.655261516571045, "learning_rate": 6.463117151774387e-06, "loss": 0.3325, "step": 8294 }, { "epoch": 0.41147874398531675, "grad_norm": 7.724358558654785, "learning_rate": 6.462364495952007e-06, "loss": 0.3199, "step": 8295 }, { "epoch": 0.4115283496205169, "grad_norm": 8.883708953857422, "learning_rate": 6.461611803891572e-06, "loss": 0.3311, "step": 8296 }, { "epoch": 0.41157795525571705, "grad_norm": 10.934976577758789, "learning_rate": 6.460859075611736e-06, "loss": 0.2992, "step": 8297 }, { "epoch": 0.41162756089091723, "grad_norm": 9.30780029296875, "learning_rate": 6.46010631113115e-06, "loss": 0.2999, "step": 8298 }, { "epoch": 0.41167716652611736, "grad_norm": 6.893355369567871, "learning_rate": 6.4593535104684715e-06, "loss": 0.3478, "step": 8299 }, { "epoch": 0.41172677216131753, "grad_norm": 5.357611656188965, "learning_rate": 6.458600673642351e-06, "loss": 0.3324, "step": 8300 }, { "epoch": 0.4117763777965177, "grad_norm": 6.95661735534668, "learning_rate": 6.457847800671446e-06, "loss": 0.316, "step": 8301 }, { "epoch": 0.41182598343171783, "grad_norm": 6.166412830352783, "learning_rate": 6.4570948915744135e-06, "loss": 0.2326, "step": 8302 }, { "epoch": 0.411875589066918, "grad_norm": 5.190500736236572, "learning_rate": 6.456341946369911e-06, "loss": 0.2658, "step": 8303 }, { "epoch": 0.41192519470211814, "grad_norm": 8.906079292297363, "learning_rate": 6.455588965076596e-06, "loss": 0.3677, "step": 8304 }, { "epoch": 0.4119748003373183, "grad_norm": 17.631977081298828, "learning_rate": 6.454835947713127e-06, "loss": 0.3495, "step": 8305 }, { "epoch": 0.4120244059725185, "grad_norm": 10.77230167388916, "learning_rate": 6.4540828942981655e-06, "loss": 0.3687, "step": 8306 }, { "epoch": 0.4120740116077186, "grad_norm": 6.525575160980225, "learning_rate": 6.453329804850373e-06, "loss": 0.3483, "step": 8307 }, { "epoch": 0.4121236172429188, "grad_norm": 13.506216049194336, "learning_rate": 6.452576679388409e-06, "loss": 0.3978, "step": 8308 }, { "epoch": 0.41217322287811897, "grad_norm": 13.020296096801758, "learning_rate": 6.4518235179309395e-06, "loss": 0.3193, "step": 8309 }, { "epoch": 0.4122228285133191, "grad_norm": 7.219079494476318, "learning_rate": 6.4510703204966254e-06, "loss": 0.3418, "step": 8310 }, { "epoch": 0.4122724341485193, "grad_norm": 11.17712116241455, "learning_rate": 6.450317087104133e-06, "loss": 0.3676, "step": 8311 }, { "epoch": 0.41232203978371945, "grad_norm": 8.071907997131348, "learning_rate": 6.449563817772127e-06, "loss": 0.3011, "step": 8312 }, { "epoch": 0.4123716454189196, "grad_norm": 6.9069504737854, "learning_rate": 6.4488105125192736e-06, "loss": 0.3919, "step": 8313 }, { "epoch": 0.41242125105411975, "grad_norm": 6.092918395996094, "learning_rate": 6.44805717136424e-06, "loss": 0.3363, "step": 8314 }, { "epoch": 0.41247085668931993, "grad_norm": 7.871789932250977, "learning_rate": 6.447303794325697e-06, "loss": 0.2891, "step": 8315 }, { "epoch": 0.41252046232452005, "grad_norm": 5.1174468994140625, "learning_rate": 6.446550381422308e-06, "loss": 0.2942, "step": 8316 }, { "epoch": 0.41257006795972023, "grad_norm": 9.361279487609863, "learning_rate": 6.445796932672748e-06, "loss": 0.3483, "step": 8317 }, { "epoch": 0.41261967359492036, "grad_norm": 7.990181922912598, "learning_rate": 6.445043448095684e-06, "loss": 0.3664, "step": 8318 }, { "epoch": 0.41266927923012053, "grad_norm": 23.777042388916016, "learning_rate": 6.444289927709792e-06, "loss": 0.2596, "step": 8319 }, { "epoch": 0.4127188848653207, "grad_norm": 4.801300525665283, "learning_rate": 6.443536371533743e-06, "loss": 0.3719, "step": 8320 }, { "epoch": 0.41276849050052083, "grad_norm": 6.871494770050049, "learning_rate": 6.442782779586208e-06, "loss": 0.2154, "step": 8321 }, { "epoch": 0.412818096135721, "grad_norm": 5.1887311935424805, "learning_rate": 6.4420291518858625e-06, "loss": 0.3092, "step": 8322 }, { "epoch": 0.4128677017709212, "grad_norm": 5.008848190307617, "learning_rate": 6.441275488451384e-06, "loss": 0.2576, "step": 8323 }, { "epoch": 0.4129173074061213, "grad_norm": 6.217060565948486, "learning_rate": 6.440521789301446e-06, "loss": 0.2625, "step": 8324 }, { "epoch": 0.4129669130413215, "grad_norm": 5.747735977172852, "learning_rate": 6.439768054454727e-06, "loss": 0.3125, "step": 8325 }, { "epoch": 0.41301651867652167, "grad_norm": 5.059518814086914, "learning_rate": 6.439014283929904e-06, "loss": 0.2431, "step": 8326 }, { "epoch": 0.4130661243117218, "grad_norm": 8.706039428710938, "learning_rate": 6.438260477745656e-06, "loss": 0.3545, "step": 8327 }, { "epoch": 0.413115729946922, "grad_norm": 13.716561317443848, "learning_rate": 6.437506635920663e-06, "loss": 0.2793, "step": 8328 }, { "epoch": 0.41316533558212215, "grad_norm": 13.598962783813477, "learning_rate": 6.436752758473605e-06, "loss": 0.4121, "step": 8329 }, { "epoch": 0.4132149412173223, "grad_norm": 6.029038906097412, "learning_rate": 6.435998845423164e-06, "loss": 0.3101, "step": 8330 }, { "epoch": 0.41326454685252245, "grad_norm": 14.358681678771973, "learning_rate": 6.435244896788022e-06, "loss": 0.4133, "step": 8331 }, { "epoch": 0.41331415248772263, "grad_norm": 4.393636226654053, "learning_rate": 6.434490912586862e-06, "loss": 0.2353, "step": 8332 }, { "epoch": 0.41336375812292275, "grad_norm": 5.525365352630615, "learning_rate": 6.433736892838369e-06, "loss": 0.3219, "step": 8333 }, { "epoch": 0.41341336375812293, "grad_norm": 7.871824264526367, "learning_rate": 6.432982837561226e-06, "loss": 0.3059, "step": 8334 }, { "epoch": 0.41346296939332305, "grad_norm": 8.397173881530762, "learning_rate": 6.432228746774121e-06, "loss": 0.2838, "step": 8335 }, { "epoch": 0.41351257502852323, "grad_norm": 6.6539459228515625, "learning_rate": 6.4314746204957404e-06, "loss": 0.2385, "step": 8336 }, { "epoch": 0.4135621806637234, "grad_norm": 4.774646759033203, "learning_rate": 6.43072045874477e-06, "loss": 0.3053, "step": 8337 }, { "epoch": 0.41361178629892353, "grad_norm": 6.062375545501709, "learning_rate": 6.429966261539899e-06, "loss": 0.3103, "step": 8338 }, { "epoch": 0.4136613919341237, "grad_norm": 6.837894916534424, "learning_rate": 6.4292120288998186e-06, "loss": 0.2236, "step": 8339 }, { "epoch": 0.4137109975693239, "grad_norm": 10.47966194152832, "learning_rate": 6.428457760843217e-06, "loss": 0.4306, "step": 8340 }, { "epoch": 0.413760603204524, "grad_norm": 6.852570533752441, "learning_rate": 6.427703457388785e-06, "loss": 0.3727, "step": 8341 }, { "epoch": 0.4138102088397242, "grad_norm": 9.850144386291504, "learning_rate": 6.4269491185552165e-06, "loss": 0.3413, "step": 8342 }, { "epoch": 0.41385981447492437, "grad_norm": 7.577626705169678, "learning_rate": 6.426194744361204e-06, "loss": 0.3401, "step": 8343 }, { "epoch": 0.4139094201101245, "grad_norm": 6.090060234069824, "learning_rate": 6.42544033482544e-06, "loss": 0.357, "step": 8344 }, { "epoch": 0.41395902574532467, "grad_norm": 4.161980628967285, "learning_rate": 6.424685889966618e-06, "loss": 0.1961, "step": 8345 }, { "epoch": 0.41400863138052485, "grad_norm": 6.129615306854248, "learning_rate": 6.423931409803439e-06, "loss": 0.2577, "step": 8346 }, { "epoch": 0.414058237015725, "grad_norm": 7.367713928222656, "learning_rate": 6.4231768943545926e-06, "loss": 0.3299, "step": 8347 }, { "epoch": 0.41410784265092515, "grad_norm": 9.02859878540039, "learning_rate": 6.422422343638779e-06, "loss": 0.4051, "step": 8348 }, { "epoch": 0.41415744828612533, "grad_norm": 8.378251075744629, "learning_rate": 6.421667757674697e-06, "loss": 0.3704, "step": 8349 }, { "epoch": 0.41420705392132545, "grad_norm": 7.233518123626709, "learning_rate": 6.420913136481046e-06, "loss": 0.3547, "step": 8350 }, { "epoch": 0.41425665955652563, "grad_norm": 9.305235862731934, "learning_rate": 6.420158480076524e-06, "loss": 0.4074, "step": 8351 }, { "epoch": 0.41430626519172575, "grad_norm": 10.170196533203125, "learning_rate": 6.419403788479831e-06, "loss": 0.2486, "step": 8352 }, { "epoch": 0.41435587082692593, "grad_norm": 3.9899003505706787, "learning_rate": 6.418649061709672e-06, "loss": 0.3104, "step": 8353 }, { "epoch": 0.4144054764621261, "grad_norm": 5.483023166656494, "learning_rate": 6.417894299784746e-06, "loss": 0.2323, "step": 8354 }, { "epoch": 0.41445508209732623, "grad_norm": 4.867655277252197, "learning_rate": 6.417139502723759e-06, "loss": 0.2138, "step": 8355 }, { "epoch": 0.4145046877325264, "grad_norm": 8.078376770019531, "learning_rate": 6.416384670545413e-06, "loss": 0.3423, "step": 8356 }, { "epoch": 0.4145542933677266, "grad_norm": 5.7334771156311035, "learning_rate": 6.415629803268415e-06, "loss": 0.2402, "step": 8357 }, { "epoch": 0.4146038990029267, "grad_norm": 6.006180286407471, "learning_rate": 6.4148749009114695e-06, "loss": 0.3103, "step": 8358 }, { "epoch": 0.4146535046381269, "grad_norm": 9.802151679992676, "learning_rate": 6.414119963493284e-06, "loss": 0.3876, "step": 8359 }, { "epoch": 0.41470311027332707, "grad_norm": 4.612269878387451, "learning_rate": 6.413364991032568e-06, "loss": 0.2655, "step": 8360 }, { "epoch": 0.4147527159085272, "grad_norm": 11.387715339660645, "learning_rate": 6.412609983548026e-06, "loss": 0.371, "step": 8361 }, { "epoch": 0.41480232154372737, "grad_norm": 5.451358318328857, "learning_rate": 6.411854941058369e-06, "loss": 0.2919, "step": 8362 }, { "epoch": 0.41485192717892755, "grad_norm": 6.5319905281066895, "learning_rate": 6.411099863582309e-06, "loss": 0.3388, "step": 8363 }, { "epoch": 0.41490153281412767, "grad_norm": 4.919743537902832, "learning_rate": 6.410344751138557e-06, "loss": 0.3022, "step": 8364 }, { "epoch": 0.41495113844932785, "grad_norm": 22.86598014831543, "learning_rate": 6.409589603745824e-06, "loss": 0.3307, "step": 8365 }, { "epoch": 0.41500074408452803, "grad_norm": 10.787084579467773, "learning_rate": 6.408834421422822e-06, "loss": 0.3317, "step": 8366 }, { "epoch": 0.41505034971972815, "grad_norm": 8.102203369140625, "learning_rate": 6.408079204188266e-06, "loss": 0.3682, "step": 8367 }, { "epoch": 0.41509995535492833, "grad_norm": 4.763258457183838, "learning_rate": 6.40732395206087e-06, "loss": 0.2615, "step": 8368 }, { "epoch": 0.41514956099012845, "grad_norm": 14.211158752441406, "learning_rate": 6.406568665059351e-06, "loss": 0.3867, "step": 8369 }, { "epoch": 0.41519916662532863, "grad_norm": 8.318552017211914, "learning_rate": 6.405813343202423e-06, "loss": 0.3889, "step": 8370 }, { "epoch": 0.4152487722605288, "grad_norm": 5.432484149932861, "learning_rate": 6.405057986508805e-06, "loss": 0.2032, "step": 8371 }, { "epoch": 0.41529837789572893, "grad_norm": 20.294950485229492, "learning_rate": 6.404302594997214e-06, "loss": 0.3842, "step": 8372 }, { "epoch": 0.4153479835309291, "grad_norm": 3.9946866035461426, "learning_rate": 6.40354716868637e-06, "loss": 0.286, "step": 8373 }, { "epoch": 0.4153975891661293, "grad_norm": 8.271482467651367, "learning_rate": 6.402791707594993e-06, "loss": 0.3212, "step": 8374 }, { "epoch": 0.4154471948013294, "grad_norm": 5.197501182556152, "learning_rate": 6.402036211741801e-06, "loss": 0.3424, "step": 8375 }, { "epoch": 0.4154968004365296, "grad_norm": 9.07332992553711, "learning_rate": 6.401280681145518e-06, "loss": 0.2747, "step": 8376 }, { "epoch": 0.41554640607172977, "grad_norm": 5.037099361419678, "learning_rate": 6.400525115824868e-06, "loss": 0.1913, "step": 8377 }, { "epoch": 0.4155960117069299, "grad_norm": 5.831675052642822, "learning_rate": 6.3997695157985706e-06, "loss": 0.2307, "step": 8378 }, { "epoch": 0.41564561734213007, "grad_norm": 6.014793395996094, "learning_rate": 6.399013881085351e-06, "loss": 0.3007, "step": 8379 }, { "epoch": 0.41569522297733025, "grad_norm": 5.130073547363281, "learning_rate": 6.398258211703936e-06, "loss": 0.3003, "step": 8380 }, { "epoch": 0.41574482861253037, "grad_norm": 10.051375389099121, "learning_rate": 6.397502507673048e-06, "loss": 0.2814, "step": 8381 }, { "epoch": 0.41579443424773055, "grad_norm": 7.162989616394043, "learning_rate": 6.396746769011417e-06, "loss": 0.3687, "step": 8382 }, { "epoch": 0.4158440398829307, "grad_norm": 7.888028144836426, "learning_rate": 6.395990995737769e-06, "loss": 0.3542, "step": 8383 }, { "epoch": 0.41589364551813085, "grad_norm": 14.592212677001953, "learning_rate": 6.395235187870832e-06, "loss": 0.3949, "step": 8384 }, { "epoch": 0.41594325115333103, "grad_norm": 8.774496078491211, "learning_rate": 6.394479345429337e-06, "loss": 0.2132, "step": 8385 }, { "epoch": 0.41599285678853115, "grad_norm": 6.411208629608154, "learning_rate": 6.393723468432012e-06, "loss": 0.3846, "step": 8386 }, { "epoch": 0.41604246242373133, "grad_norm": 8.005069732666016, "learning_rate": 6.39296755689759e-06, "loss": 0.3197, "step": 8387 }, { "epoch": 0.4160920680589315, "grad_norm": 8.071687698364258, "learning_rate": 6.392211610844801e-06, "loss": 0.2599, "step": 8388 }, { "epoch": 0.41614167369413163, "grad_norm": 3.831601142883301, "learning_rate": 6.391455630292379e-06, "loss": 0.1988, "step": 8389 }, { "epoch": 0.4161912793293318, "grad_norm": 6.325007915496826, "learning_rate": 6.390699615259057e-06, "loss": 0.2784, "step": 8390 }, { "epoch": 0.416240884964532, "grad_norm": 7.372008800506592, "learning_rate": 6.38994356576357e-06, "loss": 0.3833, "step": 8391 }, { "epoch": 0.4162904905997321, "grad_norm": 8.193634033203125, "learning_rate": 6.389187481824651e-06, "loss": 0.3144, "step": 8392 }, { "epoch": 0.4163400962349323, "grad_norm": 4.522326469421387, "learning_rate": 6.388431363461038e-06, "loss": 0.2731, "step": 8393 }, { "epoch": 0.41638970187013247, "grad_norm": 10.472796440124512, "learning_rate": 6.387675210691471e-06, "loss": 0.3611, "step": 8394 }, { "epoch": 0.4164393075053326, "grad_norm": 12.884713172912598, "learning_rate": 6.386919023534681e-06, "loss": 0.314, "step": 8395 }, { "epoch": 0.41648891314053277, "grad_norm": 12.8996000289917, "learning_rate": 6.386162802009412e-06, "loss": 0.4776, "step": 8396 }, { "epoch": 0.41653851877573295, "grad_norm": 6.477384090423584, "learning_rate": 6.385406546134402e-06, "loss": 0.3477, "step": 8397 }, { "epoch": 0.41658812441093307, "grad_norm": 8.572202682495117, "learning_rate": 6.384650255928389e-06, "loss": 0.2587, "step": 8398 }, { "epoch": 0.41663773004613325, "grad_norm": 3.6415865421295166, "learning_rate": 6.383893931410118e-06, "loss": 0.2051, "step": 8399 }, { "epoch": 0.4166873356813334, "grad_norm": 5.388212203979492, "learning_rate": 6.383137572598328e-06, "loss": 0.2525, "step": 8400 }, { "epoch": 0.41673694131653355, "grad_norm": 8.629003524780273, "learning_rate": 6.382381179511765e-06, "loss": 0.406, "step": 8401 }, { "epoch": 0.4167865469517337, "grad_norm": 5.329893589019775, "learning_rate": 6.38162475216917e-06, "loss": 0.271, "step": 8402 }, { "epoch": 0.41683615258693385, "grad_norm": 5.426332473754883, "learning_rate": 6.380868290589289e-06, "loss": 0.3426, "step": 8403 }, { "epoch": 0.41688575822213403, "grad_norm": 5.961349964141846, "learning_rate": 6.380111794790867e-06, "loss": 0.3307, "step": 8404 }, { "epoch": 0.4169353638573342, "grad_norm": 10.30838680267334, "learning_rate": 6.379355264792651e-06, "loss": 0.2464, "step": 8405 }, { "epoch": 0.41698496949253433, "grad_norm": 10.798534393310547, "learning_rate": 6.378598700613387e-06, "loss": 0.3901, "step": 8406 }, { "epoch": 0.4170345751277345, "grad_norm": 11.477741241455078, "learning_rate": 6.377842102271824e-06, "loss": 0.3354, "step": 8407 }, { "epoch": 0.4170841807629347, "grad_norm": 4.6219401359558105, "learning_rate": 6.37708546978671e-06, "loss": 0.2328, "step": 8408 }, { "epoch": 0.4171337863981348, "grad_norm": 5.059953689575195, "learning_rate": 6.376328803176794e-06, "loss": 0.2837, "step": 8409 }, { "epoch": 0.417183392033335, "grad_norm": 4.8148512840271, "learning_rate": 6.375572102460829e-06, "loss": 0.2819, "step": 8410 }, { "epoch": 0.41723299766853517, "grad_norm": 8.878453254699707, "learning_rate": 6.374815367657566e-06, "loss": 0.2868, "step": 8411 }, { "epoch": 0.4172826033037353, "grad_norm": 4.628575325012207, "learning_rate": 6.374058598785755e-06, "loss": 0.2883, "step": 8412 }, { "epoch": 0.41733220893893547, "grad_norm": 8.90162181854248, "learning_rate": 6.373301795864151e-06, "loss": 0.3855, "step": 8413 }, { "epoch": 0.41738181457413565, "grad_norm": 5.0022993087768555, "learning_rate": 6.372544958911508e-06, "loss": 0.2963, "step": 8414 }, { "epoch": 0.41743142020933577, "grad_norm": 9.436027526855469, "learning_rate": 6.371788087946577e-06, "loss": 0.3577, "step": 8415 }, { "epoch": 0.41748102584453595, "grad_norm": 4.953300476074219, "learning_rate": 6.37103118298812e-06, "loss": 0.2919, "step": 8416 }, { "epoch": 0.4175306314797361, "grad_norm": 7.774862289428711, "learning_rate": 6.3702742440548886e-06, "loss": 0.3072, "step": 8417 }, { "epoch": 0.41758023711493625, "grad_norm": 6.588710784912109, "learning_rate": 6.369517271165642e-06, "loss": 0.2651, "step": 8418 }, { "epoch": 0.4176298427501364, "grad_norm": 3.9526925086975098, "learning_rate": 6.368760264339137e-06, "loss": 0.201, "step": 8419 }, { "epoch": 0.41767944838533655, "grad_norm": 9.587284088134766, "learning_rate": 6.3680032235941345e-06, "loss": 0.3506, "step": 8420 }, { "epoch": 0.4177290540205367, "grad_norm": 4.785582542419434, "learning_rate": 6.367246148949393e-06, "loss": 0.3026, "step": 8421 }, { "epoch": 0.4177786596557369, "grad_norm": 5.069439888000488, "learning_rate": 6.366489040423672e-06, "loss": 0.2999, "step": 8422 }, { "epoch": 0.41782826529093703, "grad_norm": 5.7969255447387695, "learning_rate": 6.365731898035737e-06, "loss": 0.2889, "step": 8423 }, { "epoch": 0.4178778709261372, "grad_norm": 5.350911617279053, "learning_rate": 6.364974721804347e-06, "loss": 0.2675, "step": 8424 }, { "epoch": 0.4179274765613374, "grad_norm": 6.99228048324585, "learning_rate": 6.364217511748265e-06, "loss": 0.2757, "step": 8425 }, { "epoch": 0.4179770821965375, "grad_norm": 7.182861804962158, "learning_rate": 6.363460267886257e-06, "loss": 0.2967, "step": 8426 }, { "epoch": 0.4180266878317377, "grad_norm": 5.8087849617004395, "learning_rate": 6.3627029902370855e-06, "loss": 0.2847, "step": 8427 }, { "epoch": 0.41807629346693786, "grad_norm": 6.443467617034912, "learning_rate": 6.361945678819519e-06, "loss": 0.3155, "step": 8428 }, { "epoch": 0.418125899102138, "grad_norm": 6.107285499572754, "learning_rate": 6.361188333652321e-06, "loss": 0.2622, "step": 8429 }, { "epoch": 0.41817550473733817, "grad_norm": 6.874649524688721, "learning_rate": 6.360430954754261e-06, "loss": 0.3312, "step": 8430 }, { "epoch": 0.41822511037253834, "grad_norm": 7.352455139160156, "learning_rate": 6.359673542144108e-06, "loss": 0.3479, "step": 8431 }, { "epoch": 0.41827471600773847, "grad_norm": 5.430283546447754, "learning_rate": 6.358916095840628e-06, "loss": 0.2871, "step": 8432 }, { "epoch": 0.41832432164293865, "grad_norm": 10.360660552978516, "learning_rate": 6.358158615862592e-06, "loss": 0.3912, "step": 8433 }, { "epoch": 0.4183739272781388, "grad_norm": 6.552742004394531, "learning_rate": 6.357401102228773e-06, "loss": 0.3133, "step": 8434 }, { "epoch": 0.41842353291333895, "grad_norm": 8.606554985046387, "learning_rate": 6.35664355495794e-06, "loss": 0.2987, "step": 8435 }, { "epoch": 0.4184731385485391, "grad_norm": 7.141168594360352, "learning_rate": 6.355885974068865e-06, "loss": 0.3053, "step": 8436 }, { "epoch": 0.41852274418373925, "grad_norm": 4.839234352111816, "learning_rate": 6.355128359580322e-06, "loss": 0.2388, "step": 8437 }, { "epoch": 0.4185723498189394, "grad_norm": 5.191694259643555, "learning_rate": 6.354370711511087e-06, "loss": 0.2868, "step": 8438 }, { "epoch": 0.4186219554541396, "grad_norm": 9.794175148010254, "learning_rate": 6.353613029879931e-06, "loss": 0.2796, "step": 8439 }, { "epoch": 0.4186715610893397, "grad_norm": 11.125190734863281, "learning_rate": 6.352855314705634e-06, "loss": 0.3963, "step": 8440 }, { "epoch": 0.4187211667245399, "grad_norm": 5.4440717697143555, "learning_rate": 6.35209756600697e-06, "loss": 0.2011, "step": 8441 }, { "epoch": 0.4187707723597401, "grad_norm": 8.733673095703125, "learning_rate": 6.351339783802716e-06, "loss": 0.3036, "step": 8442 }, { "epoch": 0.4188203779949402, "grad_norm": 10.59074878692627, "learning_rate": 6.3505819681116496e-06, "loss": 0.3916, "step": 8443 }, { "epoch": 0.4188699836301404, "grad_norm": 6.792966365814209, "learning_rate": 6.349824118952553e-06, "loss": 0.2628, "step": 8444 }, { "epoch": 0.41891958926534056, "grad_norm": 5.386927127838135, "learning_rate": 6.3490662363442035e-06, "loss": 0.3533, "step": 8445 }, { "epoch": 0.4189691949005407, "grad_norm": 5.081223964691162, "learning_rate": 6.348308320305383e-06, "loss": 0.2196, "step": 8446 }, { "epoch": 0.41901880053574087, "grad_norm": 17.3938045501709, "learning_rate": 6.3475503708548716e-06, "loss": 0.3463, "step": 8447 }, { "epoch": 0.41906840617094104, "grad_norm": 7.316923141479492, "learning_rate": 6.3467923880114534e-06, "loss": 0.313, "step": 8448 }, { "epoch": 0.41911801180614117, "grad_norm": 5.964839458465576, "learning_rate": 6.34603437179391e-06, "loss": 0.2455, "step": 8449 }, { "epoch": 0.41916761744134134, "grad_norm": 8.260625839233398, "learning_rate": 6.345276322221026e-06, "loss": 0.3297, "step": 8450 }, { "epoch": 0.41921722307654147, "grad_norm": 13.366207122802734, "learning_rate": 6.344518239311586e-06, "loss": 0.4056, "step": 8451 }, { "epoch": 0.41926682871174165, "grad_norm": 5.082314491271973, "learning_rate": 6.343760123084376e-06, "loss": 0.2521, "step": 8452 }, { "epoch": 0.4193164343469418, "grad_norm": 4.911306381225586, "learning_rate": 6.3430019735581815e-06, "loss": 0.246, "step": 8453 }, { "epoch": 0.41936603998214195, "grad_norm": 5.327590465545654, "learning_rate": 6.3422437907517916e-06, "loss": 0.2581, "step": 8454 }, { "epoch": 0.4194156456173421, "grad_norm": 7.042530059814453, "learning_rate": 6.341485574683994e-06, "loss": 0.2832, "step": 8455 }, { "epoch": 0.4194652512525423, "grad_norm": 7.139641761779785, "learning_rate": 6.3407273253735754e-06, "loss": 0.2874, "step": 8456 }, { "epoch": 0.4195148568877424, "grad_norm": 6.920899868011475, "learning_rate": 6.339969042839328e-06, "loss": 0.3339, "step": 8457 }, { "epoch": 0.4195644625229426, "grad_norm": 11.458678245544434, "learning_rate": 6.339210727100042e-06, "loss": 0.437, "step": 8458 }, { "epoch": 0.4196140681581428, "grad_norm": 18.83873176574707, "learning_rate": 6.338452378174508e-06, "loss": 0.436, "step": 8459 }, { "epoch": 0.4196636737933429, "grad_norm": 33.50217819213867, "learning_rate": 6.337693996081518e-06, "loss": 0.2915, "step": 8460 }, { "epoch": 0.4197132794285431, "grad_norm": 7.765169620513916, "learning_rate": 6.336935580839867e-06, "loss": 0.3249, "step": 8461 }, { "epoch": 0.41976288506374326, "grad_norm": 8.031329154968262, "learning_rate": 6.336177132468347e-06, "loss": 0.3452, "step": 8462 }, { "epoch": 0.4198124906989434, "grad_norm": 21.0769100189209, "learning_rate": 6.335418650985753e-06, "loss": 0.4196, "step": 8463 }, { "epoch": 0.41986209633414356, "grad_norm": 6.2808451652526855, "learning_rate": 6.3346601364108795e-06, "loss": 0.316, "step": 8464 }, { "epoch": 0.41991170196934374, "grad_norm": 6.95659875869751, "learning_rate": 6.333901588762527e-06, "loss": 0.2719, "step": 8465 }, { "epoch": 0.41996130760454387, "grad_norm": 5.1356987953186035, "learning_rate": 6.333143008059488e-06, "loss": 0.2908, "step": 8466 }, { "epoch": 0.42001091323974404, "grad_norm": 7.393054962158203, "learning_rate": 6.332384394320562e-06, "loss": 0.2932, "step": 8467 }, { "epoch": 0.42006051887494417, "grad_norm": 7.509903430938721, "learning_rate": 6.331625747564549e-06, "loss": 0.3011, "step": 8468 }, { "epoch": 0.42011012451014434, "grad_norm": 9.484272003173828, "learning_rate": 6.330867067810247e-06, "loss": 0.3171, "step": 8469 }, { "epoch": 0.4201597301453445, "grad_norm": 18.759113311767578, "learning_rate": 6.330108355076456e-06, "loss": 0.3115, "step": 8470 }, { "epoch": 0.42020933578054465, "grad_norm": 10.213891983032227, "learning_rate": 6.329349609381979e-06, "loss": 0.3357, "step": 8471 }, { "epoch": 0.4202589414157448, "grad_norm": 9.695723533630371, "learning_rate": 6.328590830745619e-06, "loss": 0.2881, "step": 8472 }, { "epoch": 0.420308547050945, "grad_norm": 6.629559516906738, "learning_rate": 6.327832019186175e-06, "loss": 0.3244, "step": 8473 }, { "epoch": 0.4203581526861451, "grad_norm": 8.018431663513184, "learning_rate": 6.327073174722453e-06, "loss": 0.4093, "step": 8474 }, { "epoch": 0.4204077583213453, "grad_norm": 5.9171552658081055, "learning_rate": 6.326314297373259e-06, "loss": 0.3144, "step": 8475 }, { "epoch": 0.4204573639565455, "grad_norm": 13.397637367248535, "learning_rate": 6.325555387157396e-06, "loss": 0.3002, "step": 8476 }, { "epoch": 0.4205069695917456, "grad_norm": 7.826318740844727, "learning_rate": 6.324796444093672e-06, "loss": 0.2926, "step": 8477 }, { "epoch": 0.4205565752269458, "grad_norm": 14.985815048217773, "learning_rate": 6.324037468200891e-06, "loss": 0.2657, "step": 8478 }, { "epoch": 0.42060618086214596, "grad_norm": 6.871674060821533, "learning_rate": 6.323278459497862e-06, "loss": 0.3236, "step": 8479 }, { "epoch": 0.4206557864973461, "grad_norm": 7.371098518371582, "learning_rate": 6.3225194180033966e-06, "loss": 0.3198, "step": 8480 }, { "epoch": 0.42070539213254626, "grad_norm": 5.8589324951171875, "learning_rate": 6.3217603437363e-06, "loss": 0.4036, "step": 8481 }, { "epoch": 0.42075499776774644, "grad_norm": 7.832577705383301, "learning_rate": 6.321001236715385e-06, "loss": 0.2774, "step": 8482 }, { "epoch": 0.42080460340294656, "grad_norm": 10.621140480041504, "learning_rate": 6.320242096959462e-06, "loss": 0.325, "step": 8483 }, { "epoch": 0.42085420903814674, "grad_norm": 8.398102760314941, "learning_rate": 6.319482924487341e-06, "loss": 0.3429, "step": 8484 }, { "epoch": 0.42090381467334687, "grad_norm": 7.695755481719971, "learning_rate": 6.318723719317839e-06, "loss": 0.196, "step": 8485 }, { "epoch": 0.42095342030854704, "grad_norm": 7.2109222412109375, "learning_rate": 6.317964481469765e-06, "loss": 0.2917, "step": 8486 }, { "epoch": 0.4210030259437472, "grad_norm": 18.133892059326172, "learning_rate": 6.317205210961935e-06, "loss": 0.3674, "step": 8487 }, { "epoch": 0.42105263157894735, "grad_norm": 4.802558422088623, "learning_rate": 6.316445907813166e-06, "loss": 0.2496, "step": 8488 }, { "epoch": 0.4211022372141475, "grad_norm": 35.714500427246094, "learning_rate": 6.315686572042269e-06, "loss": 0.2909, "step": 8489 }, { "epoch": 0.4211518428493477, "grad_norm": 6.28798770904541, "learning_rate": 6.314927203668064e-06, "loss": 0.3758, "step": 8490 }, { "epoch": 0.4212014484845478, "grad_norm": 7.19911003112793, "learning_rate": 6.31416780270937e-06, "loss": 0.3316, "step": 8491 }, { "epoch": 0.421251054119748, "grad_norm": 6.07077169418335, "learning_rate": 6.313408369185003e-06, "loss": 0.2588, "step": 8492 }, { "epoch": 0.4213006597549482, "grad_norm": 14.336094856262207, "learning_rate": 6.312648903113781e-06, "loss": 0.4094, "step": 8493 }, { "epoch": 0.4213502653901483, "grad_norm": 5.902555465698242, "learning_rate": 6.3118894045145265e-06, "loss": 0.2732, "step": 8494 }, { "epoch": 0.4213998710253485, "grad_norm": 4.073240280151367, "learning_rate": 6.311129873406059e-06, "loss": 0.1733, "step": 8495 }, { "epoch": 0.42144947666054866, "grad_norm": 12.651930809020996, "learning_rate": 6.3103703098072e-06, "loss": 0.6523, "step": 8496 }, { "epoch": 0.4214990822957488, "grad_norm": 6.226548671722412, "learning_rate": 6.309610713736771e-06, "loss": 0.3285, "step": 8497 }, { "epoch": 0.42154868793094896, "grad_norm": 17.650453567504883, "learning_rate": 6.308851085213599e-06, "loss": 0.3846, "step": 8498 }, { "epoch": 0.42159829356614914, "grad_norm": 9.612112998962402, "learning_rate": 6.308091424256502e-06, "loss": 0.325, "step": 8499 }, { "epoch": 0.42164789920134926, "grad_norm": 12.337507247924805, "learning_rate": 6.307331730884309e-06, "loss": 0.5527, "step": 8500 }, { "epoch": 0.42169750483654944, "grad_norm": 11.206459999084473, "learning_rate": 6.306572005115843e-06, "loss": 0.5057, "step": 8501 }, { "epoch": 0.42174711047174956, "grad_norm": 5.715447902679443, "learning_rate": 6.305812246969934e-06, "loss": 0.2743, "step": 8502 }, { "epoch": 0.42179671610694974, "grad_norm": 10.078139305114746, "learning_rate": 6.305052456465406e-06, "loss": 0.3681, "step": 8503 }, { "epoch": 0.4218463217421499, "grad_norm": 6.669373035430908, "learning_rate": 6.3042926336210875e-06, "loss": 0.3292, "step": 8504 }, { "epoch": 0.42189592737735004, "grad_norm": 4.971192359924316, "learning_rate": 6.303532778455808e-06, "loss": 0.3334, "step": 8505 }, { "epoch": 0.4219455330125502, "grad_norm": 6.673951148986816, "learning_rate": 6.302772890988396e-06, "loss": 0.2898, "step": 8506 }, { "epoch": 0.4219951386477504, "grad_norm": 6.368217468261719, "learning_rate": 6.302012971237682e-06, "loss": 0.2577, "step": 8507 }, { "epoch": 0.4220447442829505, "grad_norm": 8.35300064086914, "learning_rate": 6.301253019222498e-06, "loss": 0.2644, "step": 8508 }, { "epoch": 0.4220943499181507, "grad_norm": 4.964402198791504, "learning_rate": 6.300493034961676e-06, "loss": 0.2649, "step": 8509 }, { "epoch": 0.4221439555533509, "grad_norm": 5.764467716217041, "learning_rate": 6.299733018474048e-06, "loss": 0.2873, "step": 8510 }, { "epoch": 0.422193561188551, "grad_norm": 15.586702346801758, "learning_rate": 6.298972969778448e-06, "loss": 0.4271, "step": 8511 }, { "epoch": 0.4222431668237512, "grad_norm": 5.434998989105225, "learning_rate": 6.298212888893711e-06, "loss": 0.3186, "step": 8512 }, { "epoch": 0.42229277245895136, "grad_norm": 4.791537761688232, "learning_rate": 6.297452775838671e-06, "loss": 0.2232, "step": 8513 }, { "epoch": 0.4223423780941515, "grad_norm": 13.499798774719238, "learning_rate": 6.296692630632163e-06, "loss": 0.307, "step": 8514 }, { "epoch": 0.42239198372935166, "grad_norm": 5.066573143005371, "learning_rate": 6.295932453293027e-06, "loss": 0.2009, "step": 8515 }, { "epoch": 0.42244158936455184, "grad_norm": 7.605664253234863, "learning_rate": 6.295172243840096e-06, "loss": 0.2861, "step": 8516 }, { "epoch": 0.42249119499975196, "grad_norm": 4.968080043792725, "learning_rate": 6.294412002292213e-06, "loss": 0.2817, "step": 8517 }, { "epoch": 0.42254080063495214, "grad_norm": 6.0252685546875, "learning_rate": 6.293651728668214e-06, "loss": 0.2115, "step": 8518 }, { "epoch": 0.42259040627015226, "grad_norm": 8.229557037353516, "learning_rate": 6.292891422986942e-06, "loss": 0.255, "step": 8519 }, { "epoch": 0.42264001190535244, "grad_norm": 9.376084327697754, "learning_rate": 6.292131085267233e-06, "loss": 0.3907, "step": 8520 }, { "epoch": 0.4226896175405526, "grad_norm": 7.268956184387207, "learning_rate": 6.291370715527932e-06, "loss": 0.3179, "step": 8521 }, { "epoch": 0.42273922317575274, "grad_norm": 12.730164527893066, "learning_rate": 6.290610313787881e-06, "loss": 0.3872, "step": 8522 }, { "epoch": 0.4227888288109529, "grad_norm": 6.456518650054932, "learning_rate": 6.289849880065924e-06, "loss": 0.2122, "step": 8523 }, { "epoch": 0.4228384344461531, "grad_norm": 3.786478042602539, "learning_rate": 6.289089414380901e-06, "loss": 0.2338, "step": 8524 }, { "epoch": 0.4228880400813532, "grad_norm": 14.468828201293945, "learning_rate": 6.28832891675166e-06, "loss": 0.4131, "step": 8525 }, { "epoch": 0.4229376457165534, "grad_norm": 7.720047950744629, "learning_rate": 6.287568387197047e-06, "loss": 0.2911, "step": 8526 }, { "epoch": 0.4229872513517536, "grad_norm": 6.950695037841797, "learning_rate": 6.286807825735905e-06, "loss": 0.3446, "step": 8527 }, { "epoch": 0.4230368569869537, "grad_norm": 8.416082382202148, "learning_rate": 6.286047232387084e-06, "loss": 0.3193, "step": 8528 }, { "epoch": 0.4230864626221539, "grad_norm": 13.597891807556152, "learning_rate": 6.28528660716943e-06, "loss": 0.3977, "step": 8529 }, { "epoch": 0.42313606825735406, "grad_norm": 8.336374282836914, "learning_rate": 6.284525950101794e-06, "loss": 0.3092, "step": 8530 }, { "epoch": 0.4231856738925542, "grad_norm": 14.839431762695312, "learning_rate": 6.283765261203023e-06, "loss": 0.335, "step": 8531 }, { "epoch": 0.42323527952775436, "grad_norm": 5.473494052886963, "learning_rate": 6.283004540491968e-06, "loss": 0.2253, "step": 8532 }, { "epoch": 0.42328488516295454, "grad_norm": 19.128299713134766, "learning_rate": 6.28224378798748e-06, "loss": 0.2624, "step": 8533 }, { "epoch": 0.42333449079815466, "grad_norm": 5.112053871154785, "learning_rate": 6.28148300370841e-06, "loss": 0.3865, "step": 8534 }, { "epoch": 0.42338409643335484, "grad_norm": 5.762014865875244, "learning_rate": 6.280722187673615e-06, "loss": 0.3705, "step": 8535 }, { "epoch": 0.42343370206855496, "grad_norm": 4.003426551818848, "learning_rate": 6.279961339901941e-06, "loss": 0.202, "step": 8536 }, { "epoch": 0.42348330770375514, "grad_norm": 10.134846687316895, "learning_rate": 6.279200460412248e-06, "loss": 0.2895, "step": 8537 }, { "epoch": 0.4235329133389553, "grad_norm": 5.462307453155518, "learning_rate": 6.278439549223388e-06, "loss": 0.2153, "step": 8538 }, { "epoch": 0.42358251897415544, "grad_norm": 5.471109867095947, "learning_rate": 6.27767860635422e-06, "loss": 0.2897, "step": 8539 }, { "epoch": 0.4236321246093556, "grad_norm": 9.43460464477539, "learning_rate": 6.276917631823596e-06, "loss": 0.3189, "step": 8540 }, { "epoch": 0.4236817302445558, "grad_norm": 9.332839965820312, "learning_rate": 6.276156625650375e-06, "loss": 0.3124, "step": 8541 }, { "epoch": 0.4237313358797559, "grad_norm": 17.427978515625, "learning_rate": 6.2753955878534165e-06, "loss": 0.3244, "step": 8542 }, { "epoch": 0.4237809415149561, "grad_norm": 5.639006614685059, "learning_rate": 6.274634518451579e-06, "loss": 0.3007, "step": 8543 }, { "epoch": 0.4238305471501563, "grad_norm": 6.46972131729126, "learning_rate": 6.273873417463721e-06, "loss": 0.3292, "step": 8544 }, { "epoch": 0.4238801527853564, "grad_norm": 8.451179504394531, "learning_rate": 6.273112284908702e-06, "loss": 0.3096, "step": 8545 }, { "epoch": 0.4239297584205566, "grad_norm": 8.239972114562988, "learning_rate": 6.272351120805387e-06, "loss": 0.3546, "step": 8546 }, { "epoch": 0.42397936405575676, "grad_norm": 8.722862243652344, "learning_rate": 6.271589925172635e-06, "loss": 0.4293, "step": 8547 }, { "epoch": 0.4240289696909569, "grad_norm": 14.26556396484375, "learning_rate": 6.270828698029307e-06, "loss": 0.4054, "step": 8548 }, { "epoch": 0.42407857532615706, "grad_norm": 7.403923034667969, "learning_rate": 6.270067439394273e-06, "loss": 0.2264, "step": 8549 }, { "epoch": 0.42412818096135724, "grad_norm": 11.565949440002441, "learning_rate": 6.269306149286392e-06, "loss": 0.3145, "step": 8550 }, { "epoch": 0.42417778659655736, "grad_norm": 9.031753540039062, "learning_rate": 6.2685448277245286e-06, "loss": 0.3313, "step": 8551 }, { "epoch": 0.42422739223175754, "grad_norm": 7.011336803436279, "learning_rate": 6.267783474727551e-06, "loss": 0.2584, "step": 8552 }, { "epoch": 0.42427699786695766, "grad_norm": 8.132736206054688, "learning_rate": 6.267022090314328e-06, "loss": 0.3343, "step": 8553 }, { "epoch": 0.42432660350215784, "grad_norm": 6.52031135559082, "learning_rate": 6.266260674503723e-06, "loss": 0.3786, "step": 8554 }, { "epoch": 0.424376209137358, "grad_norm": 8.462186813354492, "learning_rate": 6.265499227314604e-06, "loss": 0.3104, "step": 8555 }, { "epoch": 0.42442581477255814, "grad_norm": 16.135557174682617, "learning_rate": 6.2647377487658445e-06, "loss": 0.2759, "step": 8556 }, { "epoch": 0.4244754204077583, "grad_norm": 12.22468376159668, "learning_rate": 6.263976238876311e-06, "loss": 0.417, "step": 8557 }, { "epoch": 0.4245250260429585, "grad_norm": 7.393308639526367, "learning_rate": 6.2632146976648724e-06, "loss": 0.3083, "step": 8558 }, { "epoch": 0.4245746316781586, "grad_norm": 6.559920787811279, "learning_rate": 6.262453125150404e-06, "loss": 0.3516, "step": 8559 }, { "epoch": 0.4246242373133588, "grad_norm": 8.566852569580078, "learning_rate": 6.261691521351775e-06, "loss": 0.3828, "step": 8560 }, { "epoch": 0.424673842948559, "grad_norm": 5.8687591552734375, "learning_rate": 6.26092988628786e-06, "loss": 0.2502, "step": 8561 }, { "epoch": 0.4247234485837591, "grad_norm": 5.177310466766357, "learning_rate": 6.260168219977532e-06, "loss": 0.221, "step": 8562 }, { "epoch": 0.4247730542189593, "grad_norm": 4.70844030380249, "learning_rate": 6.259406522439666e-06, "loss": 0.2762, "step": 8563 }, { "epoch": 0.42482265985415946, "grad_norm": 6.481001853942871, "learning_rate": 6.258644793693136e-06, "loss": 0.2977, "step": 8564 }, { "epoch": 0.4248722654893596, "grad_norm": 6.984325408935547, "learning_rate": 6.257883033756818e-06, "loss": 0.3153, "step": 8565 }, { "epoch": 0.42492187112455976, "grad_norm": 25.919553756713867, "learning_rate": 6.257121242649591e-06, "loss": 0.4517, "step": 8566 }, { "epoch": 0.42497147675975994, "grad_norm": 6.223647594451904, "learning_rate": 6.25635942039033e-06, "loss": 0.2858, "step": 8567 }, { "epoch": 0.42502108239496006, "grad_norm": 7.142419815063477, "learning_rate": 6.255597566997914e-06, "loss": 0.2692, "step": 8568 }, { "epoch": 0.42507068803016024, "grad_norm": 5.908512592315674, "learning_rate": 6.2548356824912235e-06, "loss": 0.3687, "step": 8569 }, { "epoch": 0.42512029366536036, "grad_norm": 5.145002365112305, "learning_rate": 6.254073766889136e-06, "loss": 0.2486, "step": 8570 }, { "epoch": 0.42516989930056054, "grad_norm": 3.259946823120117, "learning_rate": 6.2533118202105334e-06, "loss": 0.1637, "step": 8571 }, { "epoch": 0.4252195049357607, "grad_norm": 6.499113082885742, "learning_rate": 6.252549842474297e-06, "loss": 0.2784, "step": 8572 }, { "epoch": 0.42526911057096084, "grad_norm": 5.525660514831543, "learning_rate": 6.25178783369931e-06, "loss": 0.3239, "step": 8573 }, { "epoch": 0.425318716206161, "grad_norm": 33.881065368652344, "learning_rate": 6.251025793904453e-06, "loss": 0.4115, "step": 8574 }, { "epoch": 0.4253683218413612, "grad_norm": 5.959908962249756, "learning_rate": 6.250263723108611e-06, "loss": 0.2605, "step": 8575 }, { "epoch": 0.4254179274765613, "grad_norm": 5.103996753692627, "learning_rate": 6.249501621330669e-06, "loss": 0.2567, "step": 8576 }, { "epoch": 0.4254675331117615, "grad_norm": 5.147141933441162, "learning_rate": 6.248739488589511e-06, "loss": 0.2979, "step": 8577 }, { "epoch": 0.4255171387469617, "grad_norm": 5.067723751068115, "learning_rate": 6.247977324904024e-06, "loss": 0.2681, "step": 8578 }, { "epoch": 0.4255667443821618, "grad_norm": 7.431972980499268, "learning_rate": 6.247215130293094e-06, "loss": 0.2961, "step": 8579 }, { "epoch": 0.425616350017362, "grad_norm": 13.41210651397705, "learning_rate": 6.246452904775609e-06, "loss": 0.405, "step": 8580 }, { "epoch": 0.42566595565256216, "grad_norm": 19.605628967285156, "learning_rate": 6.2456906483704585e-06, "loss": 0.351, "step": 8581 }, { "epoch": 0.4257155612877623, "grad_norm": 5.527100086212158, "learning_rate": 6.244928361096529e-06, "loss": 0.2483, "step": 8582 }, { "epoch": 0.42576516692296246, "grad_norm": 11.170734405517578, "learning_rate": 6.24416604297271e-06, "loss": 0.38, "step": 8583 }, { "epoch": 0.4258147725581626, "grad_norm": 6.049726963043213, "learning_rate": 6.243403694017896e-06, "loss": 0.3403, "step": 8584 }, { "epoch": 0.42586437819336276, "grad_norm": 6.338733673095703, "learning_rate": 6.242641314250976e-06, "loss": 0.1926, "step": 8585 }, { "epoch": 0.42591398382856294, "grad_norm": 6.355752944946289, "learning_rate": 6.2418789036908415e-06, "loss": 0.3356, "step": 8586 }, { "epoch": 0.42596358946376306, "grad_norm": 4.044278144836426, "learning_rate": 6.241116462356385e-06, "loss": 0.3189, "step": 8587 }, { "epoch": 0.42601319509896324, "grad_norm": 13.496872901916504, "learning_rate": 6.240353990266503e-06, "loss": 0.2836, "step": 8588 }, { "epoch": 0.4260628007341634, "grad_norm": 16.862302780151367, "learning_rate": 6.239591487440087e-06, "loss": 0.4936, "step": 8589 }, { "epoch": 0.42611240636936354, "grad_norm": 16.100191116333008, "learning_rate": 6.2388289538960336e-06, "loss": 0.363, "step": 8590 }, { "epoch": 0.4261620120045637, "grad_norm": 9.942829132080078, "learning_rate": 6.2380663896532365e-06, "loss": 0.2879, "step": 8591 }, { "epoch": 0.4262116176397639, "grad_norm": 8.630945205688477, "learning_rate": 6.237303794730596e-06, "loss": 0.2904, "step": 8592 }, { "epoch": 0.426261223274964, "grad_norm": 7.965048789978027, "learning_rate": 6.236541169147009e-06, "loss": 0.345, "step": 8593 }, { "epoch": 0.4263108289101642, "grad_norm": 6.830381870269775, "learning_rate": 6.23577851292137e-06, "loss": 0.3603, "step": 8594 }, { "epoch": 0.4263604345453644, "grad_norm": 7.487889289855957, "learning_rate": 6.2350158260725814e-06, "loss": 0.2178, "step": 8595 }, { "epoch": 0.4264100401805645, "grad_norm": 4.45108699798584, "learning_rate": 6.234253108619543e-06, "loss": 0.2901, "step": 8596 }, { "epoch": 0.4264596458157647, "grad_norm": 8.801383018493652, "learning_rate": 6.233490360581154e-06, "loss": 0.3388, "step": 8597 }, { "epoch": 0.42650925145096485, "grad_norm": 4.872724533081055, "learning_rate": 6.232727581976314e-06, "loss": 0.2894, "step": 8598 }, { "epoch": 0.426558857086165, "grad_norm": 4.561777114868164, "learning_rate": 6.231964772823929e-06, "loss": 0.2754, "step": 8599 }, { "epoch": 0.42660846272136516, "grad_norm": 10.562121391296387, "learning_rate": 6.2312019331428986e-06, "loss": 0.2036, "step": 8600 }, { "epoch": 0.4266580683565653, "grad_norm": 6.9673919677734375, "learning_rate": 6.230439062952127e-06, "loss": 0.2624, "step": 8601 }, { "epoch": 0.42670767399176546, "grad_norm": 5.384194374084473, "learning_rate": 6.229676162270519e-06, "loss": 0.279, "step": 8602 }, { "epoch": 0.42675727962696564, "grad_norm": 7.949892997741699, "learning_rate": 6.228913231116981e-06, "loss": 0.3533, "step": 8603 }, { "epoch": 0.42680688526216576, "grad_norm": 4.995456695556641, "learning_rate": 6.228150269510416e-06, "loss": 0.2756, "step": 8604 }, { "epoch": 0.42685649089736594, "grad_norm": 12.4676513671875, "learning_rate": 6.227387277469731e-06, "loss": 0.2375, "step": 8605 }, { "epoch": 0.4269060965325661, "grad_norm": 9.91335678100586, "learning_rate": 6.226624255013835e-06, "loss": 0.3539, "step": 8606 }, { "epoch": 0.42695570216776624, "grad_norm": 5.433670520782471, "learning_rate": 6.225861202161636e-06, "loss": 0.2435, "step": 8607 }, { "epoch": 0.4270053078029664, "grad_norm": 7.173501968383789, "learning_rate": 6.22509811893204e-06, "loss": 0.3367, "step": 8608 }, { "epoch": 0.4270549134381666, "grad_norm": 4.594831943511963, "learning_rate": 6.22433500534396e-06, "loss": 0.2978, "step": 8609 }, { "epoch": 0.4271045190733667, "grad_norm": 5.9485764503479, "learning_rate": 6.223571861416304e-06, "loss": 0.2887, "step": 8610 }, { "epoch": 0.4271541247085669, "grad_norm": 5.258517742156982, "learning_rate": 6.222808687167984e-06, "loss": 0.2672, "step": 8611 }, { "epoch": 0.4272037303437671, "grad_norm": 10.203166961669922, "learning_rate": 6.222045482617911e-06, "loss": 0.2543, "step": 8612 }, { "epoch": 0.4272533359789672, "grad_norm": 3.616776466369629, "learning_rate": 6.2212822477849985e-06, "loss": 0.2381, "step": 8613 }, { "epoch": 0.4273029416141674, "grad_norm": 6.515583515167236, "learning_rate": 6.22051898268816e-06, "loss": 0.2696, "step": 8614 }, { "epoch": 0.42735254724936755, "grad_norm": 5.066302299499512, "learning_rate": 6.219755687346309e-06, "loss": 0.2552, "step": 8615 }, { "epoch": 0.4274021528845677, "grad_norm": 6.130136966705322, "learning_rate": 6.218992361778359e-06, "loss": 0.262, "step": 8616 }, { "epoch": 0.42745175851976785, "grad_norm": 5.256589412689209, "learning_rate": 6.218229006003229e-06, "loss": 0.2677, "step": 8617 }, { "epoch": 0.427501364154968, "grad_norm": 8.801947593688965, "learning_rate": 6.217465620039831e-06, "loss": 0.2628, "step": 8618 }, { "epoch": 0.42755096979016816, "grad_norm": 10.146979331970215, "learning_rate": 6.216702203907086e-06, "loss": 0.4023, "step": 8619 }, { "epoch": 0.42760057542536833, "grad_norm": 6.401999473571777, "learning_rate": 6.215938757623909e-06, "loss": 0.3452, "step": 8620 }, { "epoch": 0.42765018106056846, "grad_norm": 6.826318740844727, "learning_rate": 6.2151752812092195e-06, "loss": 0.3939, "step": 8621 }, { "epoch": 0.42769978669576864, "grad_norm": 11.51347827911377, "learning_rate": 6.214411774681937e-06, "loss": 0.2703, "step": 8622 }, { "epoch": 0.4277493923309688, "grad_norm": 12.875349998474121, "learning_rate": 6.213648238060982e-06, "loss": 0.2463, "step": 8623 }, { "epoch": 0.42779899796616894, "grad_norm": 11.293842315673828, "learning_rate": 6.212884671365274e-06, "loss": 0.382, "step": 8624 }, { "epoch": 0.4278486036013691, "grad_norm": 5.331315994262695, "learning_rate": 6.212121074613735e-06, "loss": 0.2589, "step": 8625 }, { "epoch": 0.4278982092365693, "grad_norm": 8.870234489440918, "learning_rate": 6.211357447825287e-06, "loss": 0.3957, "step": 8626 }, { "epoch": 0.4279478148717694, "grad_norm": 12.210384368896484, "learning_rate": 6.210593791018855e-06, "loss": 0.2769, "step": 8627 }, { "epoch": 0.4279974205069696, "grad_norm": 5.561174392700195, "learning_rate": 6.20983010421336e-06, "loss": 0.2827, "step": 8628 }, { "epoch": 0.4280470261421698, "grad_norm": 8.58786678314209, "learning_rate": 6.209066387427729e-06, "loss": 0.3542, "step": 8629 }, { "epoch": 0.4280966317773699, "grad_norm": 4.410723686218262, "learning_rate": 6.2083026406808855e-06, "loss": 0.2702, "step": 8630 }, { "epoch": 0.4281462374125701, "grad_norm": 4.865283966064453, "learning_rate": 6.207538863991756e-06, "loss": 0.1792, "step": 8631 }, { "epoch": 0.42819584304777025, "grad_norm": 7.856048107147217, "learning_rate": 6.206775057379266e-06, "loss": 0.3297, "step": 8632 }, { "epoch": 0.4282454486829704, "grad_norm": 2.8013579845428467, "learning_rate": 6.206011220862345e-06, "loss": 0.1219, "step": 8633 }, { "epoch": 0.42829505431817055, "grad_norm": 6.206772327423096, "learning_rate": 6.205247354459921e-06, "loss": 0.3838, "step": 8634 }, { "epoch": 0.4283446599533707, "grad_norm": 12.215152740478516, "learning_rate": 6.204483458190922e-06, "loss": 0.3449, "step": 8635 }, { "epoch": 0.42839426558857086, "grad_norm": 13.803248405456543, "learning_rate": 6.203719532074277e-06, "loss": 0.4325, "step": 8636 }, { "epoch": 0.42844387122377103, "grad_norm": 9.63785457611084, "learning_rate": 6.20295557612892e-06, "loss": 0.2441, "step": 8637 }, { "epoch": 0.42849347685897116, "grad_norm": 11.55812931060791, "learning_rate": 6.2021915903737786e-06, "loss": 0.4315, "step": 8638 }, { "epoch": 0.42854308249417133, "grad_norm": 7.375522136688232, "learning_rate": 6.201427574827784e-06, "loss": 0.3649, "step": 8639 }, { "epoch": 0.4285926881293715, "grad_norm": 12.090837478637695, "learning_rate": 6.200663529509871e-06, "loss": 0.4748, "step": 8640 }, { "epoch": 0.42864229376457164, "grad_norm": 8.276494026184082, "learning_rate": 6.1998994544389745e-06, "loss": 0.3642, "step": 8641 }, { "epoch": 0.4286918993997718, "grad_norm": 9.90369701385498, "learning_rate": 6.199135349634027e-06, "loss": 0.3075, "step": 8642 }, { "epoch": 0.428741505034972, "grad_norm": 3.4173991680145264, "learning_rate": 6.198371215113961e-06, "loss": 0.2063, "step": 8643 }, { "epoch": 0.4287911106701721, "grad_norm": 15.555800437927246, "learning_rate": 6.197607050897715e-06, "loss": 0.3146, "step": 8644 }, { "epoch": 0.4288407163053723, "grad_norm": 4.39076042175293, "learning_rate": 6.196842857004224e-06, "loss": 0.3382, "step": 8645 }, { "epoch": 0.42889032194057247, "grad_norm": 8.625283241271973, "learning_rate": 6.196078633452426e-06, "loss": 0.3672, "step": 8646 }, { "epoch": 0.4289399275757726, "grad_norm": 8.224259376525879, "learning_rate": 6.19531438026126e-06, "loss": 0.3161, "step": 8647 }, { "epoch": 0.4289895332109728, "grad_norm": 4.775639533996582, "learning_rate": 6.194550097449661e-06, "loss": 0.3316, "step": 8648 }, { "epoch": 0.42903913884617295, "grad_norm": 6.900202751159668, "learning_rate": 6.193785785036571e-06, "loss": 0.3107, "step": 8649 }, { "epoch": 0.4290887444813731, "grad_norm": 3.914682388305664, "learning_rate": 6.1930214430409284e-06, "loss": 0.2574, "step": 8650 }, { "epoch": 0.42913835011657325, "grad_norm": 4.112392425537109, "learning_rate": 6.192257071481677e-06, "loss": 0.2468, "step": 8651 }, { "epoch": 0.4291879557517734, "grad_norm": 3.825263738632202, "learning_rate": 6.1914926703777545e-06, "loss": 0.316, "step": 8652 }, { "epoch": 0.42923756138697355, "grad_norm": 7.615800857543945, "learning_rate": 6.190728239748104e-06, "loss": 0.334, "step": 8653 }, { "epoch": 0.42928716702217373, "grad_norm": 6.8584465980529785, "learning_rate": 6.1899637796116705e-06, "loss": 0.2775, "step": 8654 }, { "epoch": 0.42933677265737386, "grad_norm": 7.59799861907959, "learning_rate": 6.189199289987395e-06, "loss": 0.3669, "step": 8655 }, { "epoch": 0.42938637829257403, "grad_norm": 5.763672351837158, "learning_rate": 6.188434770894224e-06, "loss": 0.2945, "step": 8656 }, { "epoch": 0.4294359839277742, "grad_norm": 5.892768859863281, "learning_rate": 6.187670222351102e-06, "loss": 0.3128, "step": 8657 }, { "epoch": 0.42948558956297433, "grad_norm": 4.5424323081970215, "learning_rate": 6.1869056443769734e-06, "loss": 0.2945, "step": 8658 }, { "epoch": 0.4295351951981745, "grad_norm": 7.013113021850586, "learning_rate": 6.186141036990787e-06, "loss": 0.2794, "step": 8659 }, { "epoch": 0.4295848008333747, "grad_norm": 5.566855430603027, "learning_rate": 6.185376400211489e-06, "loss": 0.2189, "step": 8660 }, { "epoch": 0.4296344064685748, "grad_norm": 13.249335289001465, "learning_rate": 6.184611734058028e-06, "loss": 0.3286, "step": 8661 }, { "epoch": 0.429684012103775, "grad_norm": 5.909994125366211, "learning_rate": 6.183847038549352e-06, "loss": 0.2596, "step": 8662 }, { "epoch": 0.42973361773897517, "grad_norm": 8.020061492919922, "learning_rate": 6.18308231370441e-06, "loss": 0.3263, "step": 8663 }, { "epoch": 0.4297832233741753, "grad_norm": 6.8781046867370605, "learning_rate": 6.182317559542154e-06, "loss": 0.3023, "step": 8664 }, { "epoch": 0.42983282900937547, "grad_norm": 15.330312728881836, "learning_rate": 6.181552776081534e-06, "loss": 0.3785, "step": 8665 }, { "epoch": 0.42988243464457565, "grad_norm": 6.48189640045166, "learning_rate": 6.180787963341502e-06, "loss": 0.392, "step": 8666 }, { "epoch": 0.4299320402797758, "grad_norm": 12.321941375732422, "learning_rate": 6.180023121341011e-06, "loss": 0.4248, "step": 8667 }, { "epoch": 0.42998164591497595, "grad_norm": 4.891861915588379, "learning_rate": 6.179258250099012e-06, "loss": 0.3564, "step": 8668 }, { "epoch": 0.4300312515501761, "grad_norm": 14.818894386291504, "learning_rate": 6.17849334963446e-06, "loss": 0.3785, "step": 8669 }, { "epoch": 0.43008085718537625, "grad_norm": 5.5429463386535645, "learning_rate": 6.177728419966309e-06, "loss": 0.313, "step": 8670 }, { "epoch": 0.43013046282057643, "grad_norm": 4.772147178649902, "learning_rate": 6.176963461113517e-06, "loss": 0.3259, "step": 8671 }, { "epoch": 0.43018006845577655, "grad_norm": 9.418457984924316, "learning_rate": 6.176198473095036e-06, "loss": 0.3768, "step": 8672 }, { "epoch": 0.43022967409097673, "grad_norm": 6.58732271194458, "learning_rate": 6.175433455929827e-06, "loss": 0.2109, "step": 8673 }, { "epoch": 0.4302792797261769, "grad_norm": 11.562808990478516, "learning_rate": 6.174668409636844e-06, "loss": 0.3177, "step": 8674 }, { "epoch": 0.43032888536137703, "grad_norm": 6.239884853363037, "learning_rate": 6.1739033342350465e-06, "loss": 0.2798, "step": 8675 }, { "epoch": 0.4303784909965772, "grad_norm": 8.394800186157227, "learning_rate": 6.1731382297433924e-06, "loss": 0.3303, "step": 8676 }, { "epoch": 0.4304280966317774, "grad_norm": 7.0248026847839355, "learning_rate": 6.1723730961808434e-06, "loss": 0.3533, "step": 8677 }, { "epoch": 0.4304777022669775, "grad_norm": 7.248839855194092, "learning_rate": 6.1716079335663595e-06, "loss": 0.2965, "step": 8678 }, { "epoch": 0.4305273079021777, "grad_norm": 4.0746941566467285, "learning_rate": 6.1708427419189e-06, "loss": 0.2635, "step": 8679 }, { "epoch": 0.43057691353737787, "grad_norm": 6.014649391174316, "learning_rate": 6.170077521257427e-06, "loss": 0.2488, "step": 8680 }, { "epoch": 0.430626519172578, "grad_norm": 5.953792572021484, "learning_rate": 6.169312271600906e-06, "loss": 0.3593, "step": 8681 }, { "epoch": 0.43067612480777817, "grad_norm": 14.503729820251465, "learning_rate": 6.168546992968296e-06, "loss": 0.2773, "step": 8682 }, { "epoch": 0.43072573044297835, "grad_norm": 6.930776119232178, "learning_rate": 6.167781685378564e-06, "loss": 0.3444, "step": 8683 }, { "epoch": 0.4307753360781785, "grad_norm": 6.402989387512207, "learning_rate": 6.167016348850673e-06, "loss": 0.2792, "step": 8684 }, { "epoch": 0.43082494171337865, "grad_norm": 7.870118141174316, "learning_rate": 6.166250983403589e-06, "loss": 0.2519, "step": 8685 }, { "epoch": 0.4308745473485788, "grad_norm": 18.209203720092773, "learning_rate": 6.165485589056277e-06, "loss": 0.3684, "step": 8686 }, { "epoch": 0.43092415298377895, "grad_norm": 8.518010139465332, "learning_rate": 6.164720165827707e-06, "loss": 0.2567, "step": 8687 }, { "epoch": 0.43097375861897913, "grad_norm": 5.946651935577393, "learning_rate": 6.163954713736844e-06, "loss": 0.2913, "step": 8688 }, { "epoch": 0.43102336425417925, "grad_norm": 7.172557830810547, "learning_rate": 6.163189232802656e-06, "loss": 0.3384, "step": 8689 }, { "epoch": 0.43107296988937943, "grad_norm": 21.419897079467773, "learning_rate": 6.162423723044112e-06, "loss": 0.3687, "step": 8690 }, { "epoch": 0.4311225755245796, "grad_norm": 12.554505348205566, "learning_rate": 6.1616581844801836e-06, "loss": 0.427, "step": 8691 }, { "epoch": 0.43117218115977973, "grad_norm": 5.367879867553711, "learning_rate": 6.160892617129839e-06, "loss": 0.2749, "step": 8692 }, { "epoch": 0.4312217867949799, "grad_norm": 5.743899345397949, "learning_rate": 6.160127021012051e-06, "loss": 0.3382, "step": 8693 }, { "epoch": 0.4312713924301801, "grad_norm": 7.980472564697266, "learning_rate": 6.15936139614579e-06, "loss": 0.3028, "step": 8694 }, { "epoch": 0.4313209980653802, "grad_norm": 4.905912399291992, "learning_rate": 6.158595742550028e-06, "loss": 0.2972, "step": 8695 }, { "epoch": 0.4313706037005804, "grad_norm": 6.207144260406494, "learning_rate": 6.157830060243742e-06, "loss": 0.1985, "step": 8696 }, { "epoch": 0.43142020933578057, "grad_norm": 5.404829978942871, "learning_rate": 6.157064349245902e-06, "loss": 0.328, "step": 8697 }, { "epoch": 0.4314698149709807, "grad_norm": 6.573134899139404, "learning_rate": 6.156298609575484e-06, "loss": 0.3777, "step": 8698 }, { "epoch": 0.43151942060618087, "grad_norm": 4.989375114440918, "learning_rate": 6.1555328412514635e-06, "loss": 0.3651, "step": 8699 }, { "epoch": 0.43156902624138105, "grad_norm": 5.081705093383789, "learning_rate": 6.154767044292816e-06, "loss": 0.28, "step": 8700 }, { "epoch": 0.43161863187658117, "grad_norm": 7.768953800201416, "learning_rate": 6.154001218718521e-06, "loss": 0.3681, "step": 8701 }, { "epoch": 0.43166823751178135, "grad_norm": 5.871734619140625, "learning_rate": 6.1532353645475515e-06, "loss": 0.3883, "step": 8702 }, { "epoch": 0.4317178431469815, "grad_norm": 4.580519676208496, "learning_rate": 6.152469481798888e-06, "loss": 0.2203, "step": 8703 }, { "epoch": 0.43176744878218165, "grad_norm": 6.356497287750244, "learning_rate": 6.15170357049151e-06, "loss": 0.2516, "step": 8704 }, { "epoch": 0.43181705441738183, "grad_norm": 3.7245371341705322, "learning_rate": 6.150937630644398e-06, "loss": 0.1721, "step": 8705 }, { "epoch": 0.43186666005258195, "grad_norm": 8.84260082244873, "learning_rate": 6.15017166227653e-06, "loss": 0.2765, "step": 8706 }, { "epoch": 0.43191626568778213, "grad_norm": 29.30375862121582, "learning_rate": 6.149405665406887e-06, "loss": 0.4411, "step": 8707 }, { "epoch": 0.4319658713229823, "grad_norm": 7.326137065887451, "learning_rate": 6.148639640054452e-06, "loss": 0.3171, "step": 8708 }, { "epoch": 0.43201547695818243, "grad_norm": 5.285912036895752, "learning_rate": 6.147873586238207e-06, "loss": 0.226, "step": 8709 }, { "epoch": 0.4320650825933826, "grad_norm": 11.101164817810059, "learning_rate": 6.147107503977135e-06, "loss": 0.3288, "step": 8710 }, { "epoch": 0.4321146882285828, "grad_norm": 15.547804832458496, "learning_rate": 6.146341393290223e-06, "loss": 0.4735, "step": 8711 }, { "epoch": 0.4321642938637829, "grad_norm": 14.389471054077148, "learning_rate": 6.14557525419645e-06, "loss": 0.3551, "step": 8712 }, { "epoch": 0.4322138994989831, "grad_norm": 5.053379058837891, "learning_rate": 6.144809086714804e-06, "loss": 0.2857, "step": 8713 }, { "epoch": 0.43226350513418327, "grad_norm": 21.94467544555664, "learning_rate": 6.144042890864271e-06, "loss": 0.2747, "step": 8714 }, { "epoch": 0.4323131107693834, "grad_norm": 9.830965995788574, "learning_rate": 6.143276666663839e-06, "loss": 0.2462, "step": 8715 }, { "epoch": 0.43236271640458357, "grad_norm": 4.608011722564697, "learning_rate": 6.142510414132493e-06, "loss": 0.2565, "step": 8716 }, { "epoch": 0.4324123220397837, "grad_norm": 6.916198253631592, "learning_rate": 6.141744133289223e-06, "loss": 0.2366, "step": 8717 }, { "epoch": 0.43246192767498387, "grad_norm": 11.071134567260742, "learning_rate": 6.140977824153016e-06, "loss": 0.3258, "step": 8718 }, { "epoch": 0.43251153331018405, "grad_norm": 6.957414627075195, "learning_rate": 6.140211486742862e-06, "loss": 0.3002, "step": 8719 }, { "epoch": 0.43256113894538417, "grad_norm": 13.133056640625, "learning_rate": 6.139445121077751e-06, "loss": 0.303, "step": 8720 }, { "epoch": 0.43261074458058435, "grad_norm": 4.875195503234863, "learning_rate": 6.138678727176678e-06, "loss": 0.1952, "step": 8721 }, { "epoch": 0.43266035021578453, "grad_norm": 17.321735382080078, "learning_rate": 6.137912305058628e-06, "loss": 0.3674, "step": 8722 }, { "epoch": 0.43270995585098465, "grad_norm": 6.177785396575928, "learning_rate": 6.137145854742597e-06, "loss": 0.3096, "step": 8723 }, { "epoch": 0.43275956148618483, "grad_norm": 6.776732921600342, "learning_rate": 6.136379376247577e-06, "loss": 0.3404, "step": 8724 }, { "epoch": 0.432809167121385, "grad_norm": 11.32190990447998, "learning_rate": 6.135612869592563e-06, "loss": 0.3776, "step": 8725 }, { "epoch": 0.43285877275658513, "grad_norm": 10.280698776245117, "learning_rate": 6.134846334796547e-06, "loss": 0.4569, "step": 8726 }, { "epoch": 0.4329083783917853, "grad_norm": 6.066988468170166, "learning_rate": 6.134079771878526e-06, "loss": 0.2838, "step": 8727 }, { "epoch": 0.4329579840269855, "grad_norm": 9.376895904541016, "learning_rate": 6.1333131808574965e-06, "loss": 0.3474, "step": 8728 }, { "epoch": 0.4330075896621856, "grad_norm": 6.765527248382568, "learning_rate": 6.132546561752452e-06, "loss": 0.3017, "step": 8729 }, { "epoch": 0.4330571952973858, "grad_norm": 6.252688407897949, "learning_rate": 6.131779914582393e-06, "loss": 0.4194, "step": 8730 }, { "epoch": 0.43310680093258597, "grad_norm": 6.549815654754639, "learning_rate": 6.131013239366314e-06, "loss": 0.2966, "step": 8731 }, { "epoch": 0.4331564065677861, "grad_norm": 6.2953386306762695, "learning_rate": 6.1302465361232175e-06, "loss": 0.3036, "step": 8732 }, { "epoch": 0.43320601220298627, "grad_norm": 12.331210136413574, "learning_rate": 6.1294798048720996e-06, "loss": 0.4444, "step": 8733 }, { "epoch": 0.4332556178381864, "grad_norm": 5.321897506713867, "learning_rate": 6.1287130456319595e-06, "loss": 0.2301, "step": 8734 }, { "epoch": 0.43330522347338657, "grad_norm": 4.315062999725342, "learning_rate": 6.127946258421802e-06, "loss": 0.3537, "step": 8735 }, { "epoch": 0.43335482910858675, "grad_norm": 21.465831756591797, "learning_rate": 6.127179443260625e-06, "loss": 0.4762, "step": 8736 }, { "epoch": 0.43340443474378687, "grad_norm": 4.41867208480835, "learning_rate": 6.126412600167431e-06, "loss": 0.423, "step": 8737 }, { "epoch": 0.43345404037898705, "grad_norm": 4.407602787017822, "learning_rate": 6.125645729161225e-06, "loss": 0.2566, "step": 8738 }, { "epoch": 0.4335036460141872, "grad_norm": 7.607431888580322, "learning_rate": 6.1248788302610065e-06, "loss": 0.2635, "step": 8739 }, { "epoch": 0.43355325164938735, "grad_norm": 4.069340229034424, "learning_rate": 6.124111903485782e-06, "loss": 0.2804, "step": 8740 }, { "epoch": 0.43360285728458753, "grad_norm": 6.919893264770508, "learning_rate": 6.1233449488545574e-06, "loss": 0.3696, "step": 8741 }, { "epoch": 0.4336524629197877, "grad_norm": 4.846510410308838, "learning_rate": 6.122577966386336e-06, "loss": 0.3232, "step": 8742 }, { "epoch": 0.43370206855498783, "grad_norm": 10.308487892150879, "learning_rate": 6.121810956100125e-06, "loss": 0.3987, "step": 8743 }, { "epoch": 0.433751674190188, "grad_norm": 9.166557312011719, "learning_rate": 6.1210439180149304e-06, "loss": 0.3262, "step": 8744 }, { "epoch": 0.4338012798253882, "grad_norm": 6.911062717437744, "learning_rate": 6.120276852149762e-06, "loss": 0.3496, "step": 8745 }, { "epoch": 0.4338508854605883, "grad_norm": 7.497730255126953, "learning_rate": 6.119509758523625e-06, "loss": 0.1822, "step": 8746 }, { "epoch": 0.4339004910957885, "grad_norm": 8.035295486450195, "learning_rate": 6.1187426371555304e-06, "loss": 0.3576, "step": 8747 }, { "epoch": 0.43395009673098867, "grad_norm": 19.105371475219727, "learning_rate": 6.117975488064486e-06, "loss": 0.2533, "step": 8748 }, { "epoch": 0.4339997023661888, "grad_norm": 7.527079105377197, "learning_rate": 6.1172083112695045e-06, "loss": 0.3775, "step": 8749 }, { "epoch": 0.43404930800138897, "grad_norm": 10.229854583740234, "learning_rate": 6.116441106789596e-06, "loss": 0.4495, "step": 8750 }, { "epoch": 0.4340989136365891, "grad_norm": 7.450472831726074, "learning_rate": 6.11567387464377e-06, "loss": 0.3998, "step": 8751 }, { "epoch": 0.43414851927178927, "grad_norm": 5.173519134521484, "learning_rate": 6.114906614851043e-06, "loss": 0.1838, "step": 8752 }, { "epoch": 0.43419812490698945, "grad_norm": 5.374610424041748, "learning_rate": 6.114139327430424e-06, "loss": 0.281, "step": 8753 }, { "epoch": 0.43424773054218957, "grad_norm": 12.819060325622559, "learning_rate": 6.11337201240093e-06, "loss": 0.314, "step": 8754 }, { "epoch": 0.43429733617738975, "grad_norm": 9.232683181762695, "learning_rate": 6.112604669781572e-06, "loss": 0.2978, "step": 8755 }, { "epoch": 0.4343469418125899, "grad_norm": 10.830647468566895, "learning_rate": 6.111837299591369e-06, "loss": 0.3879, "step": 8756 }, { "epoch": 0.43439654744779005, "grad_norm": 11.155150413513184, "learning_rate": 6.111069901849333e-06, "loss": 0.3687, "step": 8757 }, { "epoch": 0.4344461530829902, "grad_norm": 8.778119087219238, "learning_rate": 6.110302476574483e-06, "loss": 0.2695, "step": 8758 }, { "epoch": 0.4344957587181904, "grad_norm": 3.9413657188415527, "learning_rate": 6.109535023785837e-06, "loss": 0.2684, "step": 8759 }, { "epoch": 0.43454536435339053, "grad_norm": 4.669241905212402, "learning_rate": 6.108767543502409e-06, "loss": 0.2715, "step": 8760 }, { "epoch": 0.4345949699885907, "grad_norm": 3.7468645572662354, "learning_rate": 6.10800003574322e-06, "loss": 0.2922, "step": 8761 }, { "epoch": 0.4346445756237909, "grad_norm": 4.837864398956299, "learning_rate": 6.10723250052729e-06, "loss": 0.2938, "step": 8762 }, { "epoch": 0.434694181258991, "grad_norm": 2.9001753330230713, "learning_rate": 6.1064649378736375e-06, "loss": 0.2949, "step": 8763 }, { "epoch": 0.4347437868941912, "grad_norm": 5.400076866149902, "learning_rate": 6.105697347801283e-06, "loss": 0.2719, "step": 8764 }, { "epoch": 0.43479339252939136, "grad_norm": 8.440720558166504, "learning_rate": 6.1049297303292475e-06, "loss": 0.2754, "step": 8765 }, { "epoch": 0.4348429981645915, "grad_norm": 5.361318111419678, "learning_rate": 6.104162085476555e-06, "loss": 0.2843, "step": 8766 }, { "epoch": 0.43489260379979167, "grad_norm": 3.808074951171875, "learning_rate": 6.103394413262224e-06, "loss": 0.2452, "step": 8767 }, { "epoch": 0.4349422094349918, "grad_norm": 4.158984661102295, "learning_rate": 6.102626713705282e-06, "loss": 0.2487, "step": 8768 }, { "epoch": 0.43499181507019197, "grad_norm": 6.1868414878845215, "learning_rate": 6.101858986824753e-06, "loss": 0.2841, "step": 8769 }, { "epoch": 0.43504142070539215, "grad_norm": 9.656038284301758, "learning_rate": 6.101091232639657e-06, "loss": 0.3211, "step": 8770 }, { "epoch": 0.43509102634059227, "grad_norm": 6.018657207489014, "learning_rate": 6.100323451169022e-06, "loss": 0.2631, "step": 8771 }, { "epoch": 0.43514063197579245, "grad_norm": 5.390365123748779, "learning_rate": 6.0995556424318765e-06, "loss": 0.2524, "step": 8772 }, { "epoch": 0.4351902376109926, "grad_norm": 9.574929237365723, "learning_rate": 6.0987878064472435e-06, "loss": 0.3533, "step": 8773 }, { "epoch": 0.43523984324619275, "grad_norm": 8.9114990234375, "learning_rate": 6.098019943234151e-06, "loss": 0.3237, "step": 8774 }, { "epoch": 0.4352894488813929, "grad_norm": 4.912973403930664, "learning_rate": 6.097252052811629e-06, "loss": 0.3086, "step": 8775 }, { "epoch": 0.4353390545165931, "grad_norm": 6.336334228515625, "learning_rate": 6.096484135198704e-06, "loss": 0.3315, "step": 8776 }, { "epoch": 0.4353886601517932, "grad_norm": 6.262817859649658, "learning_rate": 6.095716190414407e-06, "loss": 0.2067, "step": 8777 }, { "epoch": 0.4354382657869934, "grad_norm": 7.08755350112915, "learning_rate": 6.0949482184777665e-06, "loss": 0.2958, "step": 8778 }, { "epoch": 0.4354878714221936, "grad_norm": 7.152904033660889, "learning_rate": 6.0941802194078146e-06, "loss": 0.4166, "step": 8779 }, { "epoch": 0.4355374770573937, "grad_norm": 16.139297485351562, "learning_rate": 6.09341219322358e-06, "loss": 0.454, "step": 8780 }, { "epoch": 0.4355870826925939, "grad_norm": 5.734100818634033, "learning_rate": 6.092644139944099e-06, "loss": 0.2529, "step": 8781 }, { "epoch": 0.43563668832779406, "grad_norm": 7.253421783447266, "learning_rate": 6.0918760595884005e-06, "loss": 0.3313, "step": 8782 }, { "epoch": 0.4356862939629942, "grad_norm": 3.3381576538085938, "learning_rate": 6.091107952175519e-06, "loss": 0.1641, "step": 8783 }, { "epoch": 0.43573589959819437, "grad_norm": 11.115202903747559, "learning_rate": 6.09033981772449e-06, "loss": 0.4233, "step": 8784 }, { "epoch": 0.4357855052333945, "grad_norm": 8.437799453735352, "learning_rate": 6.089571656254346e-06, "loss": 0.4016, "step": 8785 }, { "epoch": 0.43583511086859467, "grad_norm": 5.922760963439941, "learning_rate": 6.088803467784125e-06, "loss": 0.2464, "step": 8786 }, { "epoch": 0.43588471650379484, "grad_norm": 10.044414520263672, "learning_rate": 6.08803525233286e-06, "loss": 0.3795, "step": 8787 }, { "epoch": 0.43593432213899497, "grad_norm": 5.32659387588501, "learning_rate": 6.08726700991959e-06, "loss": 0.237, "step": 8788 }, { "epoch": 0.43598392777419515, "grad_norm": 9.571271896362305, "learning_rate": 6.086498740563352e-06, "loss": 0.2853, "step": 8789 }, { "epoch": 0.4360335334093953, "grad_norm": 8.889852523803711, "learning_rate": 6.0857304442831825e-06, "loss": 0.2451, "step": 8790 }, { "epoch": 0.43608313904459545, "grad_norm": 7.290403842926025, "learning_rate": 6.084962121098122e-06, "loss": 0.3148, "step": 8791 }, { "epoch": 0.4361327446797956, "grad_norm": 5.3149189949035645, "learning_rate": 6.084193771027211e-06, "loss": 0.2705, "step": 8792 }, { "epoch": 0.4361823503149958, "grad_norm": 6.141618728637695, "learning_rate": 6.083425394089486e-06, "loss": 0.308, "step": 8793 }, { "epoch": 0.4362319559501959, "grad_norm": 4.193218231201172, "learning_rate": 6.08265699030399e-06, "loss": 0.3084, "step": 8794 }, { "epoch": 0.4362815615853961, "grad_norm": 7.473574161529541, "learning_rate": 6.081888559689763e-06, "loss": 0.3397, "step": 8795 }, { "epoch": 0.4363311672205963, "grad_norm": 14.166943550109863, "learning_rate": 6.081120102265851e-06, "loss": 0.4619, "step": 8796 }, { "epoch": 0.4363807728557964, "grad_norm": 7.070124626159668, "learning_rate": 6.080351618051291e-06, "loss": 0.3359, "step": 8797 }, { "epoch": 0.4364303784909966, "grad_norm": 6.2369771003723145, "learning_rate": 6.07958310706513e-06, "loss": 0.291, "step": 8798 }, { "epoch": 0.43647998412619676, "grad_norm": 13.027420043945312, "learning_rate": 6.078814569326413e-06, "loss": 0.4173, "step": 8799 }, { "epoch": 0.4365295897613969, "grad_norm": 9.628006935119629, "learning_rate": 6.078046004854182e-06, "loss": 0.2871, "step": 8800 }, { "epoch": 0.43657919539659706, "grad_norm": 45.11951446533203, "learning_rate": 6.077277413667482e-06, "loss": 0.4543, "step": 8801 }, { "epoch": 0.4366288010317972, "grad_norm": 7.573087692260742, "learning_rate": 6.0765087957853605e-06, "loss": 0.1838, "step": 8802 }, { "epoch": 0.43667840666699737, "grad_norm": 4.787253379821777, "learning_rate": 6.075740151226866e-06, "loss": 0.3156, "step": 8803 }, { "epoch": 0.43672801230219754, "grad_norm": 6.2127203941345215, "learning_rate": 6.074971480011043e-06, "loss": 0.36, "step": 8804 }, { "epoch": 0.43677761793739767, "grad_norm": 8.163783073425293, "learning_rate": 6.0742027821569395e-06, "loss": 0.3224, "step": 8805 }, { "epoch": 0.43682722357259784, "grad_norm": 13.747164726257324, "learning_rate": 6.073434057683606e-06, "loss": 0.5181, "step": 8806 }, { "epoch": 0.436876829207798, "grad_norm": 11.941906929016113, "learning_rate": 6.07266530661009e-06, "loss": 0.4358, "step": 8807 }, { "epoch": 0.43692643484299815, "grad_norm": 6.411843299865723, "learning_rate": 6.071896528955444e-06, "loss": 0.3311, "step": 8808 }, { "epoch": 0.4369760404781983, "grad_norm": 7.489734172821045, "learning_rate": 6.0711277247387165e-06, "loss": 0.4199, "step": 8809 }, { "epoch": 0.4370256461133985, "grad_norm": 5.587981224060059, "learning_rate": 6.070358893978959e-06, "loss": 0.3165, "step": 8810 }, { "epoch": 0.4370752517485986, "grad_norm": 16.62832260131836, "learning_rate": 6.0695900366952245e-06, "loss": 0.3789, "step": 8811 }, { "epoch": 0.4371248573837988, "grad_norm": 5.965003967285156, "learning_rate": 6.068821152906564e-06, "loss": 0.271, "step": 8812 }, { "epoch": 0.437174463018999, "grad_norm": 6.337334632873535, "learning_rate": 6.0680522426320345e-06, "loss": 0.2382, "step": 8813 }, { "epoch": 0.4372240686541991, "grad_norm": 5.942561149597168, "learning_rate": 6.067283305890686e-06, "loss": 0.2613, "step": 8814 }, { "epoch": 0.4372736742893993, "grad_norm": 4.248175621032715, "learning_rate": 6.066514342701573e-06, "loss": 0.282, "step": 8815 }, { "epoch": 0.43732327992459946, "grad_norm": 8.174845695495605, "learning_rate": 6.065745353083754e-06, "loss": 0.3658, "step": 8816 }, { "epoch": 0.4373728855597996, "grad_norm": 4.492249011993408, "learning_rate": 6.064976337056282e-06, "loss": 0.2844, "step": 8817 }, { "epoch": 0.43742249119499976, "grad_norm": 7.252639293670654, "learning_rate": 6.064207294638215e-06, "loss": 0.307, "step": 8818 }, { "epoch": 0.4374720968301999, "grad_norm": 7.421159267425537, "learning_rate": 6.0634382258486104e-06, "loss": 0.2837, "step": 8819 }, { "epoch": 0.43752170246540006, "grad_norm": 5.284374237060547, "learning_rate": 6.062669130706525e-06, "loss": 0.3272, "step": 8820 }, { "epoch": 0.43757130810060024, "grad_norm": 7.258668422698975, "learning_rate": 6.061900009231016e-06, "loss": 0.3779, "step": 8821 }, { "epoch": 0.43762091373580037, "grad_norm": 6.062292575836182, "learning_rate": 6.061130861441146e-06, "loss": 0.3057, "step": 8822 }, { "epoch": 0.43767051937100054, "grad_norm": 8.059529304504395, "learning_rate": 6.060361687355974e-06, "loss": 0.1944, "step": 8823 }, { "epoch": 0.4377201250062007, "grad_norm": 12.380656242370605, "learning_rate": 6.059592486994559e-06, "loss": 0.3257, "step": 8824 }, { "epoch": 0.43776973064140084, "grad_norm": 6.50783634185791, "learning_rate": 6.058823260375964e-06, "loss": 0.2532, "step": 8825 }, { "epoch": 0.437819336276601, "grad_norm": 14.361563682556152, "learning_rate": 6.058054007519248e-06, "loss": 0.4129, "step": 8826 }, { "epoch": 0.4378689419118012, "grad_norm": 7.188479423522949, "learning_rate": 6.057284728443475e-06, "loss": 0.2511, "step": 8827 }, { "epoch": 0.4379185475470013, "grad_norm": 4.416321754455566, "learning_rate": 6.056515423167709e-06, "loss": 0.2933, "step": 8828 }, { "epoch": 0.4379681531822015, "grad_norm": 5.308863162994385, "learning_rate": 6.055746091711012e-06, "loss": 0.3284, "step": 8829 }, { "epoch": 0.4380177588174017, "grad_norm": 5.518740653991699, "learning_rate": 6.054976734092451e-06, "loss": 0.2517, "step": 8830 }, { "epoch": 0.4380673644526018, "grad_norm": 6.571074962615967, "learning_rate": 6.054207350331088e-06, "loss": 0.3155, "step": 8831 }, { "epoch": 0.438116970087802, "grad_norm": 4.9237380027771, "learning_rate": 6.053437940445991e-06, "loss": 0.205, "step": 8832 }, { "epoch": 0.43816657572300216, "grad_norm": 11.611102104187012, "learning_rate": 6.052668504456225e-06, "loss": 0.3931, "step": 8833 }, { "epoch": 0.4382161813582023, "grad_norm": 7.047473907470703, "learning_rate": 6.051899042380857e-06, "loss": 0.3685, "step": 8834 }, { "epoch": 0.43826578699340246, "grad_norm": 10.424734115600586, "learning_rate": 6.051129554238954e-06, "loss": 0.2084, "step": 8835 }, { "epoch": 0.4383153926286026, "grad_norm": 4.554659366607666, "learning_rate": 6.050360040049587e-06, "loss": 0.2832, "step": 8836 }, { "epoch": 0.43836499826380276, "grad_norm": 4.253595352172852, "learning_rate": 6.049590499831821e-06, "loss": 0.3533, "step": 8837 }, { "epoch": 0.43841460389900294, "grad_norm": 6.518942832946777, "learning_rate": 6.048820933604729e-06, "loss": 0.263, "step": 8838 }, { "epoch": 0.43846420953420306, "grad_norm": 8.789047241210938, "learning_rate": 6.04805134138738e-06, "loss": 0.3539, "step": 8839 }, { "epoch": 0.43851381516940324, "grad_norm": 6.714591026306152, "learning_rate": 6.047281723198845e-06, "loss": 0.3024, "step": 8840 }, { "epoch": 0.4385634208046034, "grad_norm": 7.8033223152160645, "learning_rate": 6.046512079058194e-06, "loss": 0.3302, "step": 8841 }, { "epoch": 0.43861302643980354, "grad_norm": 5.201801776885986, "learning_rate": 6.0457424089845e-06, "loss": 0.3106, "step": 8842 }, { "epoch": 0.4386626320750037, "grad_norm": 9.782855987548828, "learning_rate": 6.044972712996837e-06, "loss": 0.4069, "step": 8843 }, { "epoch": 0.4387122377102039, "grad_norm": 4.745899677276611, "learning_rate": 6.0442029911142775e-06, "loss": 0.212, "step": 8844 }, { "epoch": 0.438761843345404, "grad_norm": 5.4340009689331055, "learning_rate": 6.043433243355895e-06, "loss": 0.2644, "step": 8845 }, { "epoch": 0.4388114489806042, "grad_norm": 7.23086404800415, "learning_rate": 6.042663469740766e-06, "loss": 0.3259, "step": 8846 }, { "epoch": 0.4388610546158044, "grad_norm": 11.420696258544922, "learning_rate": 6.041893670287963e-06, "loss": 0.3997, "step": 8847 }, { "epoch": 0.4389106602510045, "grad_norm": 5.609861850738525, "learning_rate": 6.041123845016563e-06, "loss": 0.3268, "step": 8848 }, { "epoch": 0.4389602658862047, "grad_norm": 5.682490348815918, "learning_rate": 6.040353993945645e-06, "loss": 0.2982, "step": 8849 }, { "epoch": 0.4390098715214048, "grad_norm": 4.602017879486084, "learning_rate": 6.039584117094284e-06, "loss": 0.2348, "step": 8850 }, { "epoch": 0.439059477156605, "grad_norm": 8.862855911254883, "learning_rate": 6.0388142144815575e-06, "loss": 0.2634, "step": 8851 }, { "epoch": 0.43910908279180516, "grad_norm": 4.93411111831665, "learning_rate": 6.038044286126545e-06, "loss": 0.2238, "step": 8852 }, { "epoch": 0.4391586884270053, "grad_norm": 5.397759914398193, "learning_rate": 6.037274332048328e-06, "loss": 0.3135, "step": 8853 }, { "epoch": 0.43920829406220546, "grad_norm": 5.996488094329834, "learning_rate": 6.0365043522659805e-06, "loss": 0.3074, "step": 8854 }, { "epoch": 0.43925789969740564, "grad_norm": 4.701621055603027, "learning_rate": 6.035734346798587e-06, "loss": 0.2828, "step": 8855 }, { "epoch": 0.43930750533260576, "grad_norm": 5.577280044555664, "learning_rate": 6.034964315665228e-06, "loss": 0.2304, "step": 8856 }, { "epoch": 0.43935711096780594, "grad_norm": 8.43515396118164, "learning_rate": 6.034194258884987e-06, "loss": 0.3767, "step": 8857 }, { "epoch": 0.4394067166030061, "grad_norm": 7.617384910583496, "learning_rate": 6.0334241764769415e-06, "loss": 0.2476, "step": 8858 }, { "epoch": 0.43945632223820624, "grad_norm": 6.024167537689209, "learning_rate": 6.03265406846018e-06, "loss": 0.2968, "step": 8859 }, { "epoch": 0.4395059278734064, "grad_norm": 5.526983261108398, "learning_rate": 6.0318839348537814e-06, "loss": 0.2827, "step": 8860 }, { "epoch": 0.4395555335086066, "grad_norm": 5.6200785636901855, "learning_rate": 6.031113775676834e-06, "loss": 0.2361, "step": 8861 }, { "epoch": 0.4396051391438067, "grad_norm": 11.639988899230957, "learning_rate": 6.03034359094842e-06, "loss": 0.3835, "step": 8862 }, { "epoch": 0.4396547447790069, "grad_norm": 11.020915031433105, "learning_rate": 6.029573380687626e-06, "loss": 0.4716, "step": 8863 }, { "epoch": 0.4397043504142071, "grad_norm": 5.155727863311768, "learning_rate": 6.028803144913536e-06, "loss": 0.3083, "step": 8864 }, { "epoch": 0.4397539560494072, "grad_norm": 5.034865856170654, "learning_rate": 6.0280328836452406e-06, "loss": 0.2297, "step": 8865 }, { "epoch": 0.4398035616846074, "grad_norm": 11.518389701843262, "learning_rate": 6.027262596901826e-06, "loss": 0.3495, "step": 8866 }, { "epoch": 0.4398531673198075, "grad_norm": 6.831076622009277, "learning_rate": 6.0264922847023786e-06, "loss": 0.329, "step": 8867 }, { "epoch": 0.4399027729550077, "grad_norm": 8.30316162109375, "learning_rate": 6.025721947065988e-06, "loss": 0.2822, "step": 8868 }, { "epoch": 0.43995237859020786, "grad_norm": 13.704652786254883, "learning_rate": 6.024951584011742e-06, "loss": 0.3665, "step": 8869 }, { "epoch": 0.440001984225408, "grad_norm": 8.511970520019531, "learning_rate": 6.024181195558735e-06, "loss": 0.3579, "step": 8870 }, { "epoch": 0.44005158986060816, "grad_norm": 6.219891548156738, "learning_rate": 6.023410781726054e-06, "loss": 0.3117, "step": 8871 }, { "epoch": 0.44010119549580834, "grad_norm": 8.50201416015625, "learning_rate": 6.022640342532789e-06, "loss": 0.3207, "step": 8872 }, { "epoch": 0.44015080113100846, "grad_norm": 5.294954776763916, "learning_rate": 6.021869877998036e-06, "loss": 0.2627, "step": 8873 }, { "epoch": 0.44020040676620864, "grad_norm": 4.708144187927246, "learning_rate": 6.021099388140884e-06, "loss": 0.2871, "step": 8874 }, { "epoch": 0.4402500124014088, "grad_norm": 7.313092231750488, "learning_rate": 6.020328872980427e-06, "loss": 0.3877, "step": 8875 }, { "epoch": 0.44029961803660894, "grad_norm": 4.868210792541504, "learning_rate": 6.01955833253576e-06, "loss": 0.256, "step": 8876 }, { "epoch": 0.4403492236718091, "grad_norm": 4.550297260284424, "learning_rate": 6.018787766825976e-06, "loss": 0.2725, "step": 8877 }, { "epoch": 0.4403988293070093, "grad_norm": 5.007236957550049, "learning_rate": 6.01801717587017e-06, "loss": 0.3773, "step": 8878 }, { "epoch": 0.4404484349422094, "grad_norm": 4.156529426574707, "learning_rate": 6.017246559687437e-06, "loss": 0.1973, "step": 8879 }, { "epoch": 0.4404980405774096, "grad_norm": 4.062533378601074, "learning_rate": 6.016475918296877e-06, "loss": 0.2859, "step": 8880 }, { "epoch": 0.4405476462126098, "grad_norm": 10.259379386901855, "learning_rate": 6.01570525171758e-06, "loss": 0.4294, "step": 8881 }, { "epoch": 0.4405972518478099, "grad_norm": 23.857158660888672, "learning_rate": 6.01493455996865e-06, "loss": 0.256, "step": 8882 }, { "epoch": 0.4406468574830101, "grad_norm": 7.544006824493408, "learning_rate": 6.014163843069182e-06, "loss": 0.2451, "step": 8883 }, { "epoch": 0.4406964631182102, "grad_norm": 12.23297119140625, "learning_rate": 6.013393101038276e-06, "loss": 0.2724, "step": 8884 }, { "epoch": 0.4407460687534104, "grad_norm": 8.297977447509766, "learning_rate": 6.0126223338950295e-06, "loss": 0.3407, "step": 8885 }, { "epoch": 0.44079567438861056, "grad_norm": 5.267806053161621, "learning_rate": 6.011851541658543e-06, "loss": 0.2655, "step": 8886 }, { "epoch": 0.4408452800238107, "grad_norm": 6.488036632537842, "learning_rate": 6.0110807243479195e-06, "loss": 0.2719, "step": 8887 }, { "epoch": 0.44089488565901086, "grad_norm": 7.7956976890563965, "learning_rate": 6.010309881982257e-06, "loss": 0.38, "step": 8888 }, { "epoch": 0.44094449129421104, "grad_norm": 4.341121196746826, "learning_rate": 6.009539014580657e-06, "loss": 0.204, "step": 8889 }, { "epoch": 0.44099409692941116, "grad_norm": 8.35672664642334, "learning_rate": 6.008768122162227e-06, "loss": 0.2856, "step": 8890 }, { "epoch": 0.44104370256461134, "grad_norm": 7.812633514404297, "learning_rate": 6.0079972047460644e-06, "loss": 0.3286, "step": 8891 }, { "epoch": 0.4410933081998115, "grad_norm": 7.789712429046631, "learning_rate": 6.007226262351275e-06, "loss": 0.2668, "step": 8892 }, { "epoch": 0.44114291383501164, "grad_norm": 7.511255741119385, "learning_rate": 6.0064552949969635e-06, "loss": 0.291, "step": 8893 }, { "epoch": 0.4411925194702118, "grad_norm": 18.567007064819336, "learning_rate": 6.005684302702236e-06, "loss": 0.3178, "step": 8894 }, { "epoch": 0.441242125105412, "grad_norm": 8.238615036010742, "learning_rate": 6.004913285486195e-06, "loss": 0.2666, "step": 8895 }, { "epoch": 0.4412917307406121, "grad_norm": 8.614384651184082, "learning_rate": 6.004142243367948e-06, "loss": 0.2319, "step": 8896 }, { "epoch": 0.4413413363758123, "grad_norm": 10.307069778442383, "learning_rate": 6.003371176366604e-06, "loss": 0.4112, "step": 8897 }, { "epoch": 0.4413909420110125, "grad_norm": 9.277326583862305, "learning_rate": 6.0026000845012654e-06, "loss": 0.327, "step": 8898 }, { "epoch": 0.4414405476462126, "grad_norm": 5.475791931152344, "learning_rate": 6.0018289677910445e-06, "loss": 0.2616, "step": 8899 }, { "epoch": 0.4414901532814128, "grad_norm": 6.260609149932861, "learning_rate": 6.0010578262550485e-06, "loss": 0.3498, "step": 8900 }, { "epoch": 0.4415397589166129, "grad_norm": 6.047708034515381, "learning_rate": 6.000286659912389e-06, "loss": 0.3144, "step": 8901 }, { "epoch": 0.4415893645518131, "grad_norm": 6.382195472717285, "learning_rate": 5.999515468782171e-06, "loss": 0.2808, "step": 8902 }, { "epoch": 0.44163897018701326, "grad_norm": 16.834819793701172, "learning_rate": 5.998744252883507e-06, "loss": 0.4569, "step": 8903 }, { "epoch": 0.4416885758222134, "grad_norm": 6.870162487030029, "learning_rate": 5.997973012235511e-06, "loss": 0.2912, "step": 8904 }, { "epoch": 0.44173818145741356, "grad_norm": 9.23453140258789, "learning_rate": 5.997201746857291e-06, "loss": 0.3922, "step": 8905 }, { "epoch": 0.44178778709261374, "grad_norm": 5.196774005889893, "learning_rate": 5.99643045676796e-06, "loss": 0.4125, "step": 8906 }, { "epoch": 0.44183739272781386, "grad_norm": 7.844715118408203, "learning_rate": 5.995659141986634e-06, "loss": 0.2921, "step": 8907 }, { "epoch": 0.44188699836301404, "grad_norm": 5.0302863121032715, "learning_rate": 5.994887802532422e-06, "loss": 0.338, "step": 8908 }, { "epoch": 0.4419366039982142, "grad_norm": 12.360093116760254, "learning_rate": 5.99411643842444e-06, "loss": 0.4467, "step": 8909 }, { "epoch": 0.44198620963341434, "grad_norm": 7.4065656661987305, "learning_rate": 5.993345049681803e-06, "loss": 0.2976, "step": 8910 }, { "epoch": 0.4420358152686145, "grad_norm": 5.619332790374756, "learning_rate": 5.992573636323627e-06, "loss": 0.3283, "step": 8911 }, { "epoch": 0.4420854209038147, "grad_norm": 6.775236129760742, "learning_rate": 5.991802198369027e-06, "loss": 0.2189, "step": 8912 }, { "epoch": 0.4421350265390148, "grad_norm": 9.931320190429688, "learning_rate": 5.99103073583712e-06, "loss": 0.3946, "step": 8913 }, { "epoch": 0.442184632174215, "grad_norm": 6.351436138153076, "learning_rate": 5.990259248747023e-06, "loss": 0.2469, "step": 8914 }, { "epoch": 0.4422342378094152, "grad_norm": 7.590844631195068, "learning_rate": 5.989487737117855e-06, "loss": 0.3074, "step": 8915 }, { "epoch": 0.4422838434446153, "grad_norm": 14.438575744628906, "learning_rate": 5.9887162009687315e-06, "loss": 0.5241, "step": 8916 }, { "epoch": 0.4423334490798155, "grad_norm": 5.076465129852295, "learning_rate": 5.987944640318775e-06, "loss": 0.24, "step": 8917 }, { "epoch": 0.4423830547150156, "grad_norm": 7.171439170837402, "learning_rate": 5.987173055187101e-06, "loss": 0.4236, "step": 8918 }, { "epoch": 0.4424326603502158, "grad_norm": 4.140522480010986, "learning_rate": 5.986401445592834e-06, "loss": 0.2825, "step": 8919 }, { "epoch": 0.44248226598541596, "grad_norm": 13.851717948913574, "learning_rate": 5.9856298115550935e-06, "loss": 0.4367, "step": 8920 }, { "epoch": 0.4425318716206161, "grad_norm": 10.118217468261719, "learning_rate": 5.984858153092999e-06, "loss": 0.3991, "step": 8921 }, { "epoch": 0.44258147725581626, "grad_norm": 10.361886978149414, "learning_rate": 5.984086470225674e-06, "loss": 0.3763, "step": 8922 }, { "epoch": 0.44263108289101644, "grad_norm": 4.51092529296875, "learning_rate": 5.983314762972242e-06, "loss": 0.2723, "step": 8923 }, { "epoch": 0.44268068852621656, "grad_norm": 5.32133150100708, "learning_rate": 5.982543031351825e-06, "loss": 0.2933, "step": 8924 }, { "epoch": 0.44273029416141674, "grad_norm": 5.182078838348389, "learning_rate": 5.981771275383548e-06, "loss": 0.2956, "step": 8925 }, { "epoch": 0.4427798997966169, "grad_norm": 5.682119846343994, "learning_rate": 5.980999495086533e-06, "loss": 0.3164, "step": 8926 }, { "epoch": 0.44282950543181704, "grad_norm": 5.5861287117004395, "learning_rate": 5.980227690479907e-06, "loss": 0.3805, "step": 8927 }, { "epoch": 0.4428791110670172, "grad_norm": 7.369940757751465, "learning_rate": 5.979455861582797e-06, "loss": 0.3308, "step": 8928 }, { "epoch": 0.4429287167022174, "grad_norm": 4.54078483581543, "learning_rate": 5.9786840084143275e-06, "loss": 0.3185, "step": 8929 }, { "epoch": 0.4429783223374175, "grad_norm": 8.66247272491455, "learning_rate": 5.9779121309936236e-06, "loss": 0.3118, "step": 8930 }, { "epoch": 0.4430279279726177, "grad_norm": 8.655308723449707, "learning_rate": 5.977140229339817e-06, "loss": 0.1903, "step": 8931 }, { "epoch": 0.4430775336078179, "grad_norm": 6.146754741668701, "learning_rate": 5.976368303472032e-06, "loss": 0.2625, "step": 8932 }, { "epoch": 0.443127139243018, "grad_norm": 10.838533401489258, "learning_rate": 5.9755963534094e-06, "loss": 0.3262, "step": 8933 }, { "epoch": 0.4431767448782182, "grad_norm": 6.624478816986084, "learning_rate": 5.974824379171049e-06, "loss": 0.3051, "step": 8934 }, { "epoch": 0.4432263505134183, "grad_norm": 5.525616645812988, "learning_rate": 5.974052380776107e-06, "loss": 0.2445, "step": 8935 }, { "epoch": 0.4432759561486185, "grad_norm": 4.878185272216797, "learning_rate": 5.9732803582437085e-06, "loss": 0.288, "step": 8936 }, { "epoch": 0.44332556178381866, "grad_norm": 7.708505153656006, "learning_rate": 5.972508311592981e-06, "loss": 0.2853, "step": 8937 }, { "epoch": 0.4433751674190188, "grad_norm": 3.930884838104248, "learning_rate": 5.971736240843059e-06, "loss": 0.291, "step": 8938 }, { "epoch": 0.44342477305421896, "grad_norm": 8.985316276550293, "learning_rate": 5.970964146013072e-06, "loss": 0.3699, "step": 8939 }, { "epoch": 0.44347437868941914, "grad_norm": 4.501840114593506, "learning_rate": 5.970192027122155e-06, "loss": 0.3484, "step": 8940 }, { "epoch": 0.44352398432461926, "grad_norm": 5.745943546295166, "learning_rate": 5.969419884189441e-06, "loss": 0.3407, "step": 8941 }, { "epoch": 0.44357358995981944, "grad_norm": 7.9413981437683105, "learning_rate": 5.968647717234063e-06, "loss": 0.2978, "step": 8942 }, { "epoch": 0.4436231955950196, "grad_norm": 3.916822671890259, "learning_rate": 5.9678755262751566e-06, "loss": 0.2783, "step": 8943 }, { "epoch": 0.44367280123021974, "grad_norm": 6.769519805908203, "learning_rate": 5.967103311331858e-06, "loss": 0.2371, "step": 8944 }, { "epoch": 0.4437224068654199, "grad_norm": 7.5290985107421875, "learning_rate": 5.9663310724233005e-06, "loss": 0.4059, "step": 8945 }, { "epoch": 0.4437720125006201, "grad_norm": 7.193803787231445, "learning_rate": 5.965558809568623e-06, "loss": 0.3838, "step": 8946 }, { "epoch": 0.4438216181358202, "grad_norm": 7.088768482208252, "learning_rate": 5.96478652278696e-06, "loss": 0.3893, "step": 8947 }, { "epoch": 0.4438712237710204, "grad_norm": 8.906062126159668, "learning_rate": 5.964014212097451e-06, "loss": 0.3665, "step": 8948 }, { "epoch": 0.4439208294062206, "grad_norm": 7.1974592208862305, "learning_rate": 5.963241877519235e-06, "loss": 0.2422, "step": 8949 }, { "epoch": 0.4439704350414207, "grad_norm": 3.6125404834747314, "learning_rate": 5.962469519071447e-06, "loss": 0.1966, "step": 8950 }, { "epoch": 0.4440200406766209, "grad_norm": 9.715620040893555, "learning_rate": 5.961697136773232e-06, "loss": 0.3344, "step": 8951 }, { "epoch": 0.444069646311821, "grad_norm": 3.9548180103302, "learning_rate": 5.960924730643724e-06, "loss": 0.3407, "step": 8952 }, { "epoch": 0.4441192519470212, "grad_norm": 11.100028038024902, "learning_rate": 5.960152300702069e-06, "loss": 0.3541, "step": 8953 }, { "epoch": 0.44416885758222135, "grad_norm": 5.047741889953613, "learning_rate": 5.959379846967404e-06, "loss": 0.2594, "step": 8954 }, { "epoch": 0.4442184632174215, "grad_norm": 8.913220405578613, "learning_rate": 5.958607369458873e-06, "loss": 0.2808, "step": 8955 }, { "epoch": 0.44426806885262166, "grad_norm": 5.743430137634277, "learning_rate": 5.957834868195618e-06, "loss": 0.3088, "step": 8956 }, { "epoch": 0.44431767448782183, "grad_norm": 11.99303913116455, "learning_rate": 5.957062343196781e-06, "loss": 0.3129, "step": 8957 }, { "epoch": 0.44436728012302196, "grad_norm": 17.820425033569336, "learning_rate": 5.956289794481508e-06, "loss": 0.4102, "step": 8958 }, { "epoch": 0.44441688575822214, "grad_norm": 5.411531448364258, "learning_rate": 5.955517222068939e-06, "loss": 0.3005, "step": 8959 }, { "epoch": 0.4444664913934223, "grad_norm": 5.723550796508789, "learning_rate": 5.954744625978222e-06, "loss": 0.283, "step": 8960 }, { "epoch": 0.44451609702862244, "grad_norm": 7.902830600738525, "learning_rate": 5.953972006228502e-06, "loss": 0.2804, "step": 8961 }, { "epoch": 0.4445657026638226, "grad_norm": 9.96273422241211, "learning_rate": 5.953199362838923e-06, "loss": 0.3198, "step": 8962 }, { "epoch": 0.4446153082990228, "grad_norm": 11.2589750289917, "learning_rate": 5.952426695828633e-06, "loss": 0.36, "step": 8963 }, { "epoch": 0.4446649139342229, "grad_norm": 17.174890518188477, "learning_rate": 5.951654005216778e-06, "loss": 0.3175, "step": 8964 }, { "epoch": 0.4447145195694231, "grad_norm": 5.5887770652771, "learning_rate": 5.9508812910225074e-06, "loss": 0.3035, "step": 8965 }, { "epoch": 0.4447641252046233, "grad_norm": 7.254083156585693, "learning_rate": 5.950108553264968e-06, "loss": 0.2859, "step": 8966 }, { "epoch": 0.4448137308398234, "grad_norm": 5.780615329742432, "learning_rate": 5.949335791963308e-06, "loss": 0.3557, "step": 8967 }, { "epoch": 0.4448633364750236, "grad_norm": 8.50731086730957, "learning_rate": 5.948563007136678e-06, "loss": 0.3529, "step": 8968 }, { "epoch": 0.4449129421102237, "grad_norm": 5.408071041107178, "learning_rate": 5.94779019880423e-06, "loss": 0.2593, "step": 8969 }, { "epoch": 0.4449625477454239, "grad_norm": 6.335920810699463, "learning_rate": 5.947017366985109e-06, "loss": 0.2806, "step": 8970 }, { "epoch": 0.44501215338062405, "grad_norm": 11.890430450439453, "learning_rate": 5.94624451169847e-06, "loss": 0.6006, "step": 8971 }, { "epoch": 0.4450617590158242, "grad_norm": 8.529147148132324, "learning_rate": 5.945471632963464e-06, "loss": 0.4062, "step": 8972 }, { "epoch": 0.44511136465102435, "grad_norm": 8.999983787536621, "learning_rate": 5.944698730799243e-06, "loss": 0.2914, "step": 8973 }, { "epoch": 0.44516097028622453, "grad_norm": 7.712264060974121, "learning_rate": 5.943925805224961e-06, "loss": 0.3235, "step": 8974 }, { "epoch": 0.44521057592142466, "grad_norm": 4.619207859039307, "learning_rate": 5.943152856259771e-06, "loss": 0.2726, "step": 8975 }, { "epoch": 0.44526018155662483, "grad_norm": 9.173768043518066, "learning_rate": 5.9423798839228255e-06, "loss": 0.3515, "step": 8976 }, { "epoch": 0.445309787191825, "grad_norm": 5.797523498535156, "learning_rate": 5.94160688823328e-06, "loss": 0.2279, "step": 8977 }, { "epoch": 0.44535939282702514, "grad_norm": 4.567713737487793, "learning_rate": 5.940833869210292e-06, "loss": 0.3123, "step": 8978 }, { "epoch": 0.4454089984622253, "grad_norm": 3.3604576587677, "learning_rate": 5.940060826873013e-06, "loss": 0.2352, "step": 8979 }, { "epoch": 0.4454586040974255, "grad_norm": 4.272314548492432, "learning_rate": 5.939287761240602e-06, "loss": 0.2036, "step": 8980 }, { "epoch": 0.4455082097326256, "grad_norm": 8.649518013000488, "learning_rate": 5.9385146723322155e-06, "loss": 0.2867, "step": 8981 }, { "epoch": 0.4455578153678258, "grad_norm": 8.24367618560791, "learning_rate": 5.937741560167011e-06, "loss": 0.3329, "step": 8982 }, { "epoch": 0.4456074210030259, "grad_norm": 9.817475318908691, "learning_rate": 5.9369684247641456e-06, "loss": 0.3877, "step": 8983 }, { "epoch": 0.4456570266382261, "grad_norm": 6.127562999725342, "learning_rate": 5.936195266142779e-06, "loss": 0.2767, "step": 8984 }, { "epoch": 0.4457066322734263, "grad_norm": 6.460101127624512, "learning_rate": 5.9354220843220724e-06, "loss": 0.201, "step": 8985 }, { "epoch": 0.4457562379086264, "grad_norm": 8.181743621826172, "learning_rate": 5.934648879321181e-06, "loss": 0.3182, "step": 8986 }, { "epoch": 0.4458058435438266, "grad_norm": 6.100225448608398, "learning_rate": 5.9338756511592675e-06, "loss": 0.3352, "step": 8987 }, { "epoch": 0.44585544917902675, "grad_norm": 5.192868709564209, "learning_rate": 5.933102399855495e-06, "loss": 0.3057, "step": 8988 }, { "epoch": 0.4459050548142269, "grad_norm": 6.4385247230529785, "learning_rate": 5.932329125429022e-06, "loss": 0.2862, "step": 8989 }, { "epoch": 0.44595466044942705, "grad_norm": 5.449522018432617, "learning_rate": 5.93155582789901e-06, "loss": 0.2511, "step": 8990 }, { "epoch": 0.44600426608462723, "grad_norm": 10.492161750793457, "learning_rate": 5.930782507284624e-06, "loss": 0.3024, "step": 8991 }, { "epoch": 0.44605387171982736, "grad_norm": 6.9560227394104, "learning_rate": 5.9300091636050274e-06, "loss": 0.3969, "step": 8992 }, { "epoch": 0.44610347735502753, "grad_norm": 9.093035697937012, "learning_rate": 5.9292357968793826e-06, "loss": 0.3512, "step": 8993 }, { "epoch": 0.4461530829902277, "grad_norm": 6.658531665802002, "learning_rate": 5.928462407126854e-06, "loss": 0.2935, "step": 8994 }, { "epoch": 0.44620268862542783, "grad_norm": 10.405705451965332, "learning_rate": 5.927688994366607e-06, "loss": 0.3555, "step": 8995 }, { "epoch": 0.446252294260628, "grad_norm": 10.660264015197754, "learning_rate": 5.926915558617807e-06, "loss": 0.3209, "step": 8996 }, { "epoch": 0.4463018998958282, "grad_norm": 8.676729202270508, "learning_rate": 5.926142099899621e-06, "loss": 0.3446, "step": 8997 }, { "epoch": 0.4463515055310283, "grad_norm": 6.661323070526123, "learning_rate": 5.925368618231214e-06, "loss": 0.3397, "step": 8998 }, { "epoch": 0.4464011111662285, "grad_norm": 11.374516487121582, "learning_rate": 5.924595113631755e-06, "loss": 0.3698, "step": 8999 }, { "epoch": 0.4464507168014286, "grad_norm": 5.9280686378479, "learning_rate": 5.923821586120409e-06, "loss": 0.2878, "step": 9000 }, { "epoch": 0.4465003224366288, "grad_norm": 8.061600685119629, "learning_rate": 5.923048035716348e-06, "loss": 0.315, "step": 9001 }, { "epoch": 0.44654992807182897, "grad_norm": 8.693730354309082, "learning_rate": 5.922274462438739e-06, "loss": 0.2144, "step": 9002 }, { "epoch": 0.4465995337070291, "grad_norm": 8.08780574798584, "learning_rate": 5.921500866306751e-06, "loss": 0.3863, "step": 9003 }, { "epoch": 0.4466491393422293, "grad_norm": 5.665900230407715, "learning_rate": 5.920727247339555e-06, "loss": 0.247, "step": 9004 }, { "epoch": 0.44669874497742945, "grad_norm": 11.22594928741455, "learning_rate": 5.919953605556321e-06, "loss": 0.3765, "step": 9005 }, { "epoch": 0.4467483506126296, "grad_norm": 7.598670482635498, "learning_rate": 5.91917994097622e-06, "loss": 0.3582, "step": 9006 }, { "epoch": 0.44679795624782975, "grad_norm": 13.79748249053955, "learning_rate": 5.918406253618425e-06, "loss": 0.3999, "step": 9007 }, { "epoch": 0.44684756188302993, "grad_norm": 9.586971282958984, "learning_rate": 5.9176325435021075e-06, "loss": 0.368, "step": 9008 }, { "epoch": 0.44689716751823005, "grad_norm": 5.408568382263184, "learning_rate": 5.916858810646442e-06, "loss": 0.3442, "step": 9009 }, { "epoch": 0.44694677315343023, "grad_norm": 7.491800308227539, "learning_rate": 5.916085055070598e-06, "loss": 0.308, "step": 9010 }, { "epoch": 0.4469963787886304, "grad_norm": 5.834086894989014, "learning_rate": 5.915311276793753e-06, "loss": 0.2493, "step": 9011 }, { "epoch": 0.44704598442383053, "grad_norm": 6.429961204528809, "learning_rate": 5.914537475835082e-06, "loss": 0.3142, "step": 9012 }, { "epoch": 0.4470955900590307, "grad_norm": 5.951792240142822, "learning_rate": 5.9137636522137554e-06, "loss": 0.344, "step": 9013 }, { "epoch": 0.4471451956942309, "grad_norm": 4.674448013305664, "learning_rate": 5.912989805948954e-06, "loss": 0.2868, "step": 9014 }, { "epoch": 0.447194801329431, "grad_norm": 6.331020355224609, "learning_rate": 5.9122159370598535e-06, "loss": 0.3628, "step": 9015 }, { "epoch": 0.4472444069646312, "grad_norm": 3.922646999359131, "learning_rate": 5.911442045565627e-06, "loss": 0.2686, "step": 9016 }, { "epoch": 0.4472940125998313, "grad_norm": 5.609221935272217, "learning_rate": 5.9106681314854555e-06, "loss": 0.2389, "step": 9017 }, { "epoch": 0.4473436182350315, "grad_norm": 7.5317864418029785, "learning_rate": 5.909894194838515e-06, "loss": 0.2798, "step": 9018 }, { "epoch": 0.44739322387023167, "grad_norm": 9.526137351989746, "learning_rate": 5.909120235643986e-06, "loss": 0.3752, "step": 9019 }, { "epoch": 0.4474428295054318, "grad_norm": 4.256668567657471, "learning_rate": 5.908346253921046e-06, "loss": 0.2218, "step": 9020 }, { "epoch": 0.447492435140632, "grad_norm": 13.019966125488281, "learning_rate": 5.907572249688873e-06, "loss": 0.3458, "step": 9021 }, { "epoch": 0.44754204077583215, "grad_norm": 4.984771251678467, "learning_rate": 5.9067982229666506e-06, "loss": 0.2756, "step": 9022 }, { "epoch": 0.4475916464110323, "grad_norm": 11.64152717590332, "learning_rate": 5.906024173773559e-06, "loss": 0.2227, "step": 9023 }, { "epoch": 0.44764125204623245, "grad_norm": 5.97252082824707, "learning_rate": 5.9052501021287775e-06, "loss": 0.2364, "step": 9024 }, { "epoch": 0.44769085768143263, "grad_norm": 6.849527835845947, "learning_rate": 5.90447600805149e-06, "loss": 0.2766, "step": 9025 }, { "epoch": 0.44774046331663275, "grad_norm": 5.412698745727539, "learning_rate": 5.903701891560876e-06, "loss": 0.3037, "step": 9026 }, { "epoch": 0.44779006895183293, "grad_norm": 8.90514850616455, "learning_rate": 5.902927752676121e-06, "loss": 0.3873, "step": 9027 }, { "epoch": 0.4478396745870331, "grad_norm": 23.376373291015625, "learning_rate": 5.902153591416409e-06, "loss": 0.4062, "step": 9028 }, { "epoch": 0.44788928022223323, "grad_norm": 8.086099624633789, "learning_rate": 5.9013794078009225e-06, "loss": 0.3477, "step": 9029 }, { "epoch": 0.4479388858574334, "grad_norm": 8.96780014038086, "learning_rate": 5.900605201848846e-06, "loss": 0.4158, "step": 9030 }, { "epoch": 0.4479884914926336, "grad_norm": 7.622201442718506, "learning_rate": 5.899830973579365e-06, "loss": 0.4222, "step": 9031 }, { "epoch": 0.4480380971278337, "grad_norm": 3.7610514163970947, "learning_rate": 5.899056723011666e-06, "loss": 0.3056, "step": 9032 }, { "epoch": 0.4480877027630339, "grad_norm": 5.723994731903076, "learning_rate": 5.898282450164934e-06, "loss": 0.2222, "step": 9033 }, { "epoch": 0.448137308398234, "grad_norm": 10.162191390991211, "learning_rate": 5.897508155058357e-06, "loss": 0.385, "step": 9034 }, { "epoch": 0.4481869140334342, "grad_norm": 20.161699295043945, "learning_rate": 5.896733837711122e-06, "loss": 0.4006, "step": 9035 }, { "epoch": 0.44823651966863437, "grad_norm": 6.7653632164001465, "learning_rate": 5.895959498142417e-06, "loss": 0.2907, "step": 9036 }, { "epoch": 0.4482861253038345, "grad_norm": 25.170162200927734, "learning_rate": 5.895185136371431e-06, "loss": 0.3322, "step": 9037 }, { "epoch": 0.44833573093903467, "grad_norm": 4.12122917175293, "learning_rate": 5.894410752417351e-06, "loss": 0.2623, "step": 9038 }, { "epoch": 0.44838533657423485, "grad_norm": 6.659789085388184, "learning_rate": 5.8936363462993695e-06, "loss": 0.2553, "step": 9039 }, { "epoch": 0.448434942209435, "grad_norm": 6.826360702514648, "learning_rate": 5.892861918036674e-06, "loss": 0.3978, "step": 9040 }, { "epoch": 0.44848454784463515, "grad_norm": 5.640498161315918, "learning_rate": 5.8920874676484565e-06, "loss": 0.3819, "step": 9041 }, { "epoch": 0.44853415347983533, "grad_norm": 9.508804321289062, "learning_rate": 5.891312995153908e-06, "loss": 0.2697, "step": 9042 }, { "epoch": 0.44858375911503545, "grad_norm": 7.159795761108398, "learning_rate": 5.890538500572221e-06, "loss": 0.3309, "step": 9043 }, { "epoch": 0.44863336475023563, "grad_norm": 7.196769714355469, "learning_rate": 5.889763983922585e-06, "loss": 0.3438, "step": 9044 }, { "epoch": 0.4486829703854358, "grad_norm": 5.411342620849609, "learning_rate": 5.8889894452241965e-06, "loss": 0.3542, "step": 9045 }, { "epoch": 0.44873257602063593, "grad_norm": 6.7242255210876465, "learning_rate": 5.888214884496249e-06, "loss": 0.2176, "step": 9046 }, { "epoch": 0.4487821816558361, "grad_norm": 6.1782026290893555, "learning_rate": 5.887440301757932e-06, "loss": 0.2537, "step": 9047 }, { "epoch": 0.4488317872910363, "grad_norm": 8.860457420349121, "learning_rate": 5.8866656970284445e-06, "loss": 0.3139, "step": 9048 }, { "epoch": 0.4488813929262364, "grad_norm": 7.258164405822754, "learning_rate": 5.88589107032698e-06, "loss": 0.3378, "step": 9049 }, { "epoch": 0.4489309985614366, "grad_norm": 6.810018539428711, "learning_rate": 5.885116421672733e-06, "loss": 0.381, "step": 9050 }, { "epoch": 0.4489806041966367, "grad_norm": 4.0737762451171875, "learning_rate": 5.884341751084901e-06, "loss": 0.1312, "step": 9051 }, { "epoch": 0.4490302098318369, "grad_norm": 7.054722785949707, "learning_rate": 5.8835670585826795e-06, "loss": 0.1766, "step": 9052 }, { "epoch": 0.44907981546703707, "grad_norm": 13.663512229919434, "learning_rate": 5.88279234418527e-06, "loss": 0.2747, "step": 9053 }, { "epoch": 0.4491294211022372, "grad_norm": 7.597311019897461, "learning_rate": 5.882017607911864e-06, "loss": 0.2026, "step": 9054 }, { "epoch": 0.44917902673743737, "grad_norm": 6.282041072845459, "learning_rate": 5.881242849781663e-06, "loss": 0.3755, "step": 9055 }, { "epoch": 0.44922863237263755, "grad_norm": 5.712904930114746, "learning_rate": 5.8804680698138665e-06, "loss": 0.3277, "step": 9056 }, { "epoch": 0.44927823800783767, "grad_norm": 5.4127116203308105, "learning_rate": 5.879693268027672e-06, "loss": 0.2585, "step": 9057 }, { "epoch": 0.44932784364303785, "grad_norm": 8.754392623901367, "learning_rate": 5.878918444442281e-06, "loss": 0.2613, "step": 9058 }, { "epoch": 0.44937744927823803, "grad_norm": 6.337591648101807, "learning_rate": 5.878143599076893e-06, "loss": 0.2793, "step": 9059 }, { "epoch": 0.44942705491343815, "grad_norm": 7.981420040130615, "learning_rate": 5.87736873195071e-06, "loss": 0.3736, "step": 9060 }, { "epoch": 0.44947666054863833, "grad_norm": 6.647456169128418, "learning_rate": 5.876593843082932e-06, "loss": 0.344, "step": 9061 }, { "epoch": 0.4495262661838385, "grad_norm": 5.065123081207275, "learning_rate": 5.875818932492762e-06, "loss": 0.3202, "step": 9062 }, { "epoch": 0.44957587181903863, "grad_norm": 12.702837944030762, "learning_rate": 5.8750440001994034e-06, "loss": 0.4464, "step": 9063 }, { "epoch": 0.4496254774542388, "grad_norm": 8.018211364746094, "learning_rate": 5.8742690462220586e-06, "loss": 0.3023, "step": 9064 }, { "epoch": 0.449675083089439, "grad_norm": 8.448225975036621, "learning_rate": 5.87349407057993e-06, "loss": 0.2463, "step": 9065 }, { "epoch": 0.4497246887246391, "grad_norm": 7.179837226867676, "learning_rate": 5.872719073292225e-06, "loss": 0.2656, "step": 9066 }, { "epoch": 0.4497742943598393, "grad_norm": 5.080188274383545, "learning_rate": 5.871944054378147e-06, "loss": 0.2561, "step": 9067 }, { "epoch": 0.4498238999950394, "grad_norm": 5.154647350311279, "learning_rate": 5.871169013856899e-06, "loss": 0.2646, "step": 9068 }, { "epoch": 0.4498735056302396, "grad_norm": 6.024061679840088, "learning_rate": 5.870393951747691e-06, "loss": 0.3044, "step": 9069 }, { "epoch": 0.44992311126543977, "grad_norm": 4.764162540435791, "learning_rate": 5.869618868069725e-06, "loss": 0.2614, "step": 9070 }, { "epoch": 0.4499727169006399, "grad_norm": 5.0171709060668945, "learning_rate": 5.8688437628422105e-06, "loss": 0.2401, "step": 9071 }, { "epoch": 0.45002232253584007, "grad_norm": 5.335446357727051, "learning_rate": 5.868068636084354e-06, "loss": 0.3317, "step": 9072 }, { "epoch": 0.45007192817104025, "grad_norm": 9.470858573913574, "learning_rate": 5.867293487815366e-06, "loss": 0.356, "step": 9073 }, { "epoch": 0.45012153380624037, "grad_norm": 13.48417854309082, "learning_rate": 5.866518318054453e-06, "loss": 0.4133, "step": 9074 }, { "epoch": 0.45017113944144055, "grad_norm": 6.386463642120361, "learning_rate": 5.865743126820822e-06, "loss": 0.2199, "step": 9075 }, { "epoch": 0.4502207450766407, "grad_norm": 4.825361251831055, "learning_rate": 5.864967914133686e-06, "loss": 0.2926, "step": 9076 }, { "epoch": 0.45027035071184085, "grad_norm": 3.7415952682495117, "learning_rate": 5.864192680012254e-06, "loss": 0.2417, "step": 9077 }, { "epoch": 0.45031995634704103, "grad_norm": 5.403868675231934, "learning_rate": 5.863417424475736e-06, "loss": 0.2582, "step": 9078 }, { "epoch": 0.4503695619822412, "grad_norm": 7.404202461242676, "learning_rate": 5.862642147543343e-06, "loss": 0.3236, "step": 9079 }, { "epoch": 0.45041916761744133, "grad_norm": 8.149617195129395, "learning_rate": 5.861866849234288e-06, "loss": 0.2588, "step": 9080 }, { "epoch": 0.4504687732526415, "grad_norm": 5.408395290374756, "learning_rate": 5.861091529567784e-06, "loss": 0.3394, "step": 9081 }, { "epoch": 0.4505183788878417, "grad_norm": 8.477558135986328, "learning_rate": 5.86031618856304e-06, "loss": 0.2784, "step": 9082 }, { "epoch": 0.4505679845230418, "grad_norm": 4.561506748199463, "learning_rate": 5.8595408262392725e-06, "loss": 0.2538, "step": 9083 }, { "epoch": 0.450617590158242, "grad_norm": 5.379908561706543, "learning_rate": 5.858765442615695e-06, "loss": 0.2657, "step": 9084 }, { "epoch": 0.4506671957934421, "grad_norm": 5.261281490325928, "learning_rate": 5.857990037711521e-06, "loss": 0.2648, "step": 9085 }, { "epoch": 0.4507168014286423, "grad_norm": 8.574357986450195, "learning_rate": 5.857214611545966e-06, "loss": 0.2861, "step": 9086 }, { "epoch": 0.45076640706384247, "grad_norm": 8.616096496582031, "learning_rate": 5.856439164138246e-06, "loss": 0.3498, "step": 9087 }, { "epoch": 0.4508160126990426, "grad_norm": 15.452136993408203, "learning_rate": 5.855663695507574e-06, "loss": 0.4286, "step": 9088 }, { "epoch": 0.45086561833424277, "grad_norm": 10.545865058898926, "learning_rate": 5.854888205673169e-06, "loss": 0.3013, "step": 9089 }, { "epoch": 0.45091522396944295, "grad_norm": 7.035226345062256, "learning_rate": 5.854112694654249e-06, "loss": 0.2735, "step": 9090 }, { "epoch": 0.45096482960464307, "grad_norm": 8.581293106079102, "learning_rate": 5.85333716247003e-06, "loss": 0.2367, "step": 9091 }, { "epoch": 0.45101443523984325, "grad_norm": 8.545720100402832, "learning_rate": 5.852561609139729e-06, "loss": 0.3442, "step": 9092 }, { "epoch": 0.4510640408750434, "grad_norm": 19.480749130249023, "learning_rate": 5.8517860346825675e-06, "loss": 0.4394, "step": 9093 }, { "epoch": 0.45111364651024355, "grad_norm": 4.535495758056641, "learning_rate": 5.85101043911776e-06, "loss": 0.2644, "step": 9094 }, { "epoch": 0.4511632521454437, "grad_norm": 12.745451927185059, "learning_rate": 5.85023482246453e-06, "loss": 0.5138, "step": 9095 }, { "epoch": 0.4512128577806439, "grad_norm": 7.784082889556885, "learning_rate": 5.849459184742098e-06, "loss": 0.3097, "step": 9096 }, { "epoch": 0.45126246341584403, "grad_norm": 6.190426349639893, "learning_rate": 5.8486835259696815e-06, "loss": 0.2123, "step": 9097 }, { "epoch": 0.4513120690510442, "grad_norm": 7.485413074493408, "learning_rate": 5.847907846166503e-06, "loss": 0.2719, "step": 9098 }, { "epoch": 0.45136167468624433, "grad_norm": 4.552088737487793, "learning_rate": 5.847132145351786e-06, "loss": 0.2867, "step": 9099 }, { "epoch": 0.4514112803214445, "grad_norm": 12.601725578308105, "learning_rate": 5.846356423544751e-06, "loss": 0.3323, "step": 9100 }, { "epoch": 0.4514608859566447, "grad_norm": 12.104616165161133, "learning_rate": 5.84558068076462e-06, "loss": 0.3229, "step": 9101 }, { "epoch": 0.4515104915918448, "grad_norm": 6.198175430297852, "learning_rate": 5.844804917030618e-06, "loss": 0.2852, "step": 9102 }, { "epoch": 0.451560097227045, "grad_norm": 5.410532474517822, "learning_rate": 5.844029132361968e-06, "loss": 0.2663, "step": 9103 }, { "epoch": 0.45160970286224517, "grad_norm": 4.930001258850098, "learning_rate": 5.843253326777894e-06, "loss": 0.2401, "step": 9104 }, { "epoch": 0.4516593084974453, "grad_norm": 5.52925968170166, "learning_rate": 5.842477500297621e-06, "loss": 0.3627, "step": 9105 }, { "epoch": 0.45170891413264547, "grad_norm": 4.216784954071045, "learning_rate": 5.841701652940373e-06, "loss": 0.2317, "step": 9106 }, { "epoch": 0.45175851976784565, "grad_norm": 4.864006519317627, "learning_rate": 5.840925784725381e-06, "loss": 0.3324, "step": 9107 }, { "epoch": 0.45180812540304577, "grad_norm": 7.233694076538086, "learning_rate": 5.840149895671865e-06, "loss": 0.4071, "step": 9108 }, { "epoch": 0.45185773103824595, "grad_norm": 5.253827095031738, "learning_rate": 5.839373985799055e-06, "loss": 0.2267, "step": 9109 }, { "epoch": 0.4519073366734461, "grad_norm": 8.319368362426758, "learning_rate": 5.838598055126179e-06, "loss": 0.3293, "step": 9110 }, { "epoch": 0.45195694230864625, "grad_norm": 4.8397135734558105, "learning_rate": 5.837822103672462e-06, "loss": 0.2234, "step": 9111 }, { "epoch": 0.4520065479438464, "grad_norm": 7.145486831665039, "learning_rate": 5.837046131457135e-06, "loss": 0.3037, "step": 9112 }, { "epoch": 0.4520561535790466, "grad_norm": 5.987228870391846, "learning_rate": 5.836270138499427e-06, "loss": 0.2758, "step": 9113 }, { "epoch": 0.4521057592142467, "grad_norm": 8.890654563903809, "learning_rate": 5.835494124818566e-06, "loss": 0.3834, "step": 9114 }, { "epoch": 0.4521553648494469, "grad_norm": 4.026060581207275, "learning_rate": 5.834718090433782e-06, "loss": 0.2747, "step": 9115 }, { "epoch": 0.45220497048464703, "grad_norm": 6.390992641448975, "learning_rate": 5.8339420353643075e-06, "loss": 0.2609, "step": 9116 }, { "epoch": 0.4522545761198472, "grad_norm": 4.890384197235107, "learning_rate": 5.833165959629372e-06, "loss": 0.3289, "step": 9117 }, { "epoch": 0.4523041817550474, "grad_norm": 15.502182960510254, "learning_rate": 5.832389863248206e-06, "loss": 0.4237, "step": 9118 }, { "epoch": 0.4523537873902475, "grad_norm": 15.476162910461426, "learning_rate": 5.831613746240043e-06, "loss": 0.3357, "step": 9119 }, { "epoch": 0.4524033930254477, "grad_norm": 6.806601047515869, "learning_rate": 5.830837608624116e-06, "loss": 0.2467, "step": 9120 }, { "epoch": 0.45245299866064786, "grad_norm": 10.620415687561035, "learning_rate": 5.830061450419657e-06, "loss": 0.4172, "step": 9121 }, { "epoch": 0.452502604295848, "grad_norm": 9.921995162963867, "learning_rate": 5.829285271645899e-06, "loss": 0.422, "step": 9122 }, { "epoch": 0.45255220993104817, "grad_norm": 4.157822132110596, "learning_rate": 5.8285090723220776e-06, "loss": 0.2256, "step": 9123 }, { "epoch": 0.45260181556624834, "grad_norm": 7.8845534324646, "learning_rate": 5.827732852467427e-06, "loss": 0.33, "step": 9124 }, { "epoch": 0.45265142120144847, "grad_norm": 26.288753509521484, "learning_rate": 5.82695661210118e-06, "loss": 0.2831, "step": 9125 }, { "epoch": 0.45270102683664865, "grad_norm": 6.5394392013549805, "learning_rate": 5.826180351242576e-06, "loss": 0.2968, "step": 9126 }, { "epoch": 0.4527506324718488, "grad_norm": 6.921393871307373, "learning_rate": 5.82540406991085e-06, "loss": 0.1556, "step": 9127 }, { "epoch": 0.45280023810704895, "grad_norm": 4.7838358879089355, "learning_rate": 5.824627768125235e-06, "loss": 0.2288, "step": 9128 }, { "epoch": 0.4528498437422491, "grad_norm": 4.468137264251709, "learning_rate": 5.823851445904971e-06, "loss": 0.3031, "step": 9129 }, { "epoch": 0.4528994493774493, "grad_norm": 6.147755146026611, "learning_rate": 5.823075103269298e-06, "loss": 0.3283, "step": 9130 }, { "epoch": 0.4529490550126494, "grad_norm": 8.605644226074219, "learning_rate": 5.82229874023745e-06, "loss": 0.3994, "step": 9131 }, { "epoch": 0.4529986606478496, "grad_norm": 6.972653388977051, "learning_rate": 5.821522356828667e-06, "loss": 0.3475, "step": 9132 }, { "epoch": 0.4530482662830497, "grad_norm": 6.598848819732666, "learning_rate": 5.820745953062189e-06, "loss": 0.2786, "step": 9133 }, { "epoch": 0.4530978719182499, "grad_norm": 7.881361961364746, "learning_rate": 5.8199695289572546e-06, "loss": 0.3365, "step": 9134 }, { "epoch": 0.4531474775534501, "grad_norm": 4.381480693817139, "learning_rate": 5.819193084533106e-06, "loss": 0.3152, "step": 9135 }, { "epoch": 0.4531970831886502, "grad_norm": 6.162791728973389, "learning_rate": 5.8184166198089795e-06, "loss": 0.2872, "step": 9136 }, { "epoch": 0.4532466888238504, "grad_norm": 5.190123081207275, "learning_rate": 5.8176401348041215e-06, "loss": 0.3327, "step": 9137 }, { "epoch": 0.45329629445905056, "grad_norm": 9.415966033935547, "learning_rate": 5.816863629537769e-06, "loss": 0.2954, "step": 9138 }, { "epoch": 0.4533459000942507, "grad_norm": 8.96059799194336, "learning_rate": 5.816087104029167e-06, "loss": 0.4085, "step": 9139 }, { "epoch": 0.45339550572945087, "grad_norm": 11.439013481140137, "learning_rate": 5.815310558297558e-06, "loss": 0.3713, "step": 9140 }, { "epoch": 0.45344511136465104, "grad_norm": 9.672640800476074, "learning_rate": 5.814533992362183e-06, "loss": 0.3375, "step": 9141 }, { "epoch": 0.45349471699985117, "grad_norm": 5.0584869384765625, "learning_rate": 5.813757406242289e-06, "loss": 0.2663, "step": 9142 }, { "epoch": 0.45354432263505134, "grad_norm": 4.75980806350708, "learning_rate": 5.812980799957117e-06, "loss": 0.2417, "step": 9143 }, { "epoch": 0.4535939282702515, "grad_norm": 6.9673261642456055, "learning_rate": 5.812204173525915e-06, "loss": 0.288, "step": 9144 }, { "epoch": 0.45364353390545165, "grad_norm": 7.335731029510498, "learning_rate": 5.811427526967926e-06, "loss": 0.3737, "step": 9145 }, { "epoch": 0.4536931395406518, "grad_norm": 7.289527893066406, "learning_rate": 5.8106508603023935e-06, "loss": 0.3102, "step": 9146 }, { "epoch": 0.453742745175852, "grad_norm": 7.378514289855957, "learning_rate": 5.809874173548568e-06, "loss": 0.2347, "step": 9147 }, { "epoch": 0.4537923508110521, "grad_norm": 4.3491997718811035, "learning_rate": 5.809097466725693e-06, "loss": 0.2064, "step": 9148 }, { "epoch": 0.4538419564462523, "grad_norm": 21.927793502807617, "learning_rate": 5.808320739853017e-06, "loss": 0.3274, "step": 9149 }, { "epoch": 0.4538915620814524, "grad_norm": 4.525214672088623, "learning_rate": 5.807543992949787e-06, "loss": 0.3144, "step": 9150 }, { "epoch": 0.4539411677166526, "grad_norm": 7.254501819610596, "learning_rate": 5.806767226035254e-06, "loss": 0.3179, "step": 9151 }, { "epoch": 0.4539907733518528, "grad_norm": 6.549398422241211, "learning_rate": 5.8059904391286615e-06, "loss": 0.3461, "step": 9152 }, { "epoch": 0.4540403789870529, "grad_norm": 7.212302207946777, "learning_rate": 5.805213632249262e-06, "loss": 0.3883, "step": 9153 }, { "epoch": 0.4540899846222531, "grad_norm": 6.346226215362549, "learning_rate": 5.804436805416305e-06, "loss": 0.3185, "step": 9154 }, { "epoch": 0.45413959025745326, "grad_norm": 10.000271797180176, "learning_rate": 5.80365995864904e-06, "loss": 0.3658, "step": 9155 }, { "epoch": 0.4541891958926534, "grad_norm": 9.70993709564209, "learning_rate": 5.802883091966717e-06, "loss": 0.3069, "step": 9156 }, { "epoch": 0.45423880152785356, "grad_norm": 6.8924560546875, "learning_rate": 5.802106205388589e-06, "loss": 0.3032, "step": 9157 }, { "epoch": 0.45428840716305374, "grad_norm": 5.132268905639648, "learning_rate": 5.8013292989339055e-06, "loss": 0.2346, "step": 9158 }, { "epoch": 0.45433801279825387, "grad_norm": 4.702400207519531, "learning_rate": 5.800552372621919e-06, "loss": 0.2998, "step": 9159 }, { "epoch": 0.45438761843345404, "grad_norm": 12.10404109954834, "learning_rate": 5.799775426471883e-06, "loss": 0.4241, "step": 9160 }, { "epoch": 0.4544372240686542, "grad_norm": 4.65830135345459, "learning_rate": 5.798998460503051e-06, "loss": 0.3599, "step": 9161 }, { "epoch": 0.45448682970385434, "grad_norm": 4.287590503692627, "learning_rate": 5.798221474734675e-06, "loss": 0.2215, "step": 9162 }, { "epoch": 0.4545364353390545, "grad_norm": 5.056127071380615, "learning_rate": 5.79744446918601e-06, "loss": 0.2639, "step": 9163 }, { "epoch": 0.4545860409742547, "grad_norm": 14.489352226257324, "learning_rate": 5.79666744387631e-06, "loss": 0.4014, "step": 9164 }, { "epoch": 0.4546356466094548, "grad_norm": 7.6287360191345215, "learning_rate": 5.7958903988248305e-06, "loss": 0.3821, "step": 9165 }, { "epoch": 0.454685252244655, "grad_norm": 4.805913925170898, "learning_rate": 5.795113334050827e-06, "loss": 0.1643, "step": 9166 }, { "epoch": 0.4547348578798551, "grad_norm": 8.606518745422363, "learning_rate": 5.794336249573556e-06, "loss": 0.3248, "step": 9167 }, { "epoch": 0.4547844635150553, "grad_norm": 4.62847900390625, "learning_rate": 5.793559145412273e-06, "loss": 0.239, "step": 9168 }, { "epoch": 0.4548340691502555, "grad_norm": 5.940776348114014, "learning_rate": 5.792782021586234e-06, "loss": 0.3121, "step": 9169 }, { "epoch": 0.4548836747854556, "grad_norm": 3.9234776496887207, "learning_rate": 5.792004878114698e-06, "loss": 0.2098, "step": 9170 }, { "epoch": 0.4549332804206558, "grad_norm": 7.656253337860107, "learning_rate": 5.791227715016926e-06, "loss": 0.2843, "step": 9171 }, { "epoch": 0.45498288605585596, "grad_norm": 4.955218315124512, "learning_rate": 5.790450532312171e-06, "loss": 0.2923, "step": 9172 }, { "epoch": 0.4550324916910561, "grad_norm": 10.322042465209961, "learning_rate": 5.789673330019694e-06, "loss": 0.4559, "step": 9173 }, { "epoch": 0.45508209732625626, "grad_norm": 7.968462944030762, "learning_rate": 5.788896108158756e-06, "loss": 0.37, "step": 9174 }, { "epoch": 0.45513170296145644, "grad_norm": 7.167356014251709, "learning_rate": 5.788118866748615e-06, "loss": 0.3224, "step": 9175 }, { "epoch": 0.45518130859665656, "grad_norm": 9.995973587036133, "learning_rate": 5.787341605808531e-06, "loss": 0.337, "step": 9176 }, { "epoch": 0.45523091423185674, "grad_norm": 4.961999416351318, "learning_rate": 5.786564325357766e-06, "loss": 0.1702, "step": 9177 }, { "epoch": 0.4552805198670569, "grad_norm": 11.484383583068848, "learning_rate": 5.785787025415583e-06, "loss": 0.3169, "step": 9178 }, { "epoch": 0.45533012550225704, "grad_norm": 4.919294834136963, "learning_rate": 5.78500970600124e-06, "loss": 0.2849, "step": 9179 }, { "epoch": 0.4553797311374572, "grad_norm": 5.138538837432861, "learning_rate": 5.784232367134002e-06, "loss": 0.2772, "step": 9180 }, { "epoch": 0.4554293367726574, "grad_norm": 8.279857635498047, "learning_rate": 5.783455008833132e-06, "loss": 0.2812, "step": 9181 }, { "epoch": 0.4554789424078575, "grad_norm": 9.467475891113281, "learning_rate": 5.782677631117892e-06, "loss": 0.2952, "step": 9182 }, { "epoch": 0.4555285480430577, "grad_norm": 6.194747447967529, "learning_rate": 5.781900234007546e-06, "loss": 0.3187, "step": 9183 }, { "epoch": 0.4555781536782578, "grad_norm": 8.230257034301758, "learning_rate": 5.7811228175213595e-06, "loss": 0.2601, "step": 9184 }, { "epoch": 0.455627759313458, "grad_norm": 7.004148483276367, "learning_rate": 5.780345381678595e-06, "loss": 0.3448, "step": 9185 }, { "epoch": 0.4556773649486582, "grad_norm": 27.487953186035156, "learning_rate": 5.779567926498519e-06, "loss": 0.3185, "step": 9186 }, { "epoch": 0.4557269705838583, "grad_norm": 5.29823637008667, "learning_rate": 5.778790452000398e-06, "loss": 0.3329, "step": 9187 }, { "epoch": 0.4557765762190585, "grad_norm": 6.57350492477417, "learning_rate": 5.7780129582034965e-06, "loss": 0.2981, "step": 9188 }, { "epoch": 0.45582618185425866, "grad_norm": 6.766585826873779, "learning_rate": 5.777235445127083e-06, "loss": 0.1634, "step": 9189 }, { "epoch": 0.4558757874894588, "grad_norm": 5.951061248779297, "learning_rate": 5.776457912790422e-06, "loss": 0.2683, "step": 9190 }, { "epoch": 0.45592539312465896, "grad_norm": 6.2344584465026855, "learning_rate": 5.775680361212783e-06, "loss": 0.2802, "step": 9191 }, { "epoch": 0.45597499875985914, "grad_norm": 5.199272632598877, "learning_rate": 5.774902790413435e-06, "loss": 0.2401, "step": 9192 }, { "epoch": 0.45602460439505926, "grad_norm": 6.522409915924072, "learning_rate": 5.7741252004116456e-06, "loss": 0.2849, "step": 9193 }, { "epoch": 0.45607421003025944, "grad_norm": 5.035708904266357, "learning_rate": 5.773347591226682e-06, "loss": 0.3384, "step": 9194 }, { "epoch": 0.4561238156654596, "grad_norm": 5.883544445037842, "learning_rate": 5.7725699628778155e-06, "loss": 0.335, "step": 9195 }, { "epoch": 0.45617342130065974, "grad_norm": 5.076176166534424, "learning_rate": 5.7717923153843155e-06, "loss": 0.2176, "step": 9196 }, { "epoch": 0.4562230269358599, "grad_norm": 5.575733661651611, "learning_rate": 5.771014648765454e-06, "loss": 0.3165, "step": 9197 }, { "epoch": 0.4562726325710601, "grad_norm": 4.333879470825195, "learning_rate": 5.770236963040501e-06, "loss": 0.2188, "step": 9198 }, { "epoch": 0.4563222382062602, "grad_norm": 8.125870704650879, "learning_rate": 5.769459258228727e-06, "loss": 0.3219, "step": 9199 }, { "epoch": 0.4563718438414604, "grad_norm": 7.281040191650391, "learning_rate": 5.768681534349404e-06, "loss": 0.27, "step": 9200 }, { "epoch": 0.4564214494766605, "grad_norm": 4.932793617248535, "learning_rate": 5.7679037914218065e-06, "loss": 0.3326, "step": 9201 }, { "epoch": 0.4564710551118607, "grad_norm": 8.236610412597656, "learning_rate": 5.767126029465203e-06, "loss": 0.3362, "step": 9202 }, { "epoch": 0.4565206607470609, "grad_norm": 23.537700653076172, "learning_rate": 5.766348248498871e-06, "loss": 0.4328, "step": 9203 }, { "epoch": 0.456570266382261, "grad_norm": 4.014273643493652, "learning_rate": 5.765570448542082e-06, "loss": 0.2551, "step": 9204 }, { "epoch": 0.4566198720174612, "grad_norm": 7.4780473709106445, "learning_rate": 5.764792629614113e-06, "loss": 0.4184, "step": 9205 }, { "epoch": 0.45666947765266136, "grad_norm": 6.035645961761475, "learning_rate": 5.764014791734236e-06, "loss": 0.2869, "step": 9206 }, { "epoch": 0.4567190832878615, "grad_norm": 5.441429138183594, "learning_rate": 5.763236934921726e-06, "loss": 0.2756, "step": 9207 }, { "epoch": 0.45676868892306166, "grad_norm": 6.069753646850586, "learning_rate": 5.7624590591958596e-06, "loss": 0.2163, "step": 9208 }, { "epoch": 0.45681829455826184, "grad_norm": 8.339583396911621, "learning_rate": 5.761681164575913e-06, "loss": 0.2686, "step": 9209 }, { "epoch": 0.45686790019346196, "grad_norm": 8.003843307495117, "learning_rate": 5.760903251081162e-06, "loss": 0.2778, "step": 9210 }, { "epoch": 0.45691750582866214, "grad_norm": 7.229156970977783, "learning_rate": 5.760125318730886e-06, "loss": 0.2655, "step": 9211 }, { "epoch": 0.4569671114638623, "grad_norm": 5.480672836303711, "learning_rate": 5.759347367544359e-06, "loss": 0.2893, "step": 9212 }, { "epoch": 0.45701671709906244, "grad_norm": 9.705568313598633, "learning_rate": 5.758569397540861e-06, "loss": 0.4251, "step": 9213 }, { "epoch": 0.4570663227342626, "grad_norm": 8.52051830291748, "learning_rate": 5.75779140873967e-06, "loss": 0.3512, "step": 9214 }, { "epoch": 0.4571159283694628, "grad_norm": 10.452427864074707, "learning_rate": 5.757013401160066e-06, "loss": 0.2686, "step": 9215 }, { "epoch": 0.4571655340046629, "grad_norm": 5.5892157554626465, "learning_rate": 5.756235374821327e-06, "loss": 0.306, "step": 9216 }, { "epoch": 0.4572151396398631, "grad_norm": 8.611551284790039, "learning_rate": 5.755457329742734e-06, "loss": 0.3789, "step": 9217 }, { "epoch": 0.4572647452750632, "grad_norm": 7.936030387878418, "learning_rate": 5.7546792659435665e-06, "loss": 0.3136, "step": 9218 }, { "epoch": 0.4573143509102634, "grad_norm": 5.977659702301025, "learning_rate": 5.753901183443105e-06, "loss": 0.378, "step": 9219 }, { "epoch": 0.4573639565454636, "grad_norm": 6.489945411682129, "learning_rate": 5.753123082260631e-06, "loss": 0.2606, "step": 9220 }, { "epoch": 0.4574135621806637, "grad_norm": 12.159387588500977, "learning_rate": 5.752344962415427e-06, "loss": 0.4322, "step": 9221 }, { "epoch": 0.4574631678158639, "grad_norm": 6.981513023376465, "learning_rate": 5.751566823926774e-06, "loss": 0.2543, "step": 9222 }, { "epoch": 0.45751277345106406, "grad_norm": 10.622247695922852, "learning_rate": 5.750788666813955e-06, "loss": 0.2795, "step": 9223 }, { "epoch": 0.4575623790862642, "grad_norm": 12.985194206237793, "learning_rate": 5.750010491096253e-06, "loss": 0.4076, "step": 9224 }, { "epoch": 0.45761198472146436, "grad_norm": 6.042830944061279, "learning_rate": 5.749232296792953e-06, "loss": 0.28, "step": 9225 }, { "epoch": 0.45766159035666454, "grad_norm": 29.66745376586914, "learning_rate": 5.748454083923336e-06, "loss": 0.3281, "step": 9226 }, { "epoch": 0.45771119599186466, "grad_norm": 13.897444725036621, "learning_rate": 5.747675852506689e-06, "loss": 0.5305, "step": 9227 }, { "epoch": 0.45776080162706484, "grad_norm": 10.036155700683594, "learning_rate": 5.746897602562297e-06, "loss": 0.3018, "step": 9228 }, { "epoch": 0.457810407262265, "grad_norm": 4.5076212882995605, "learning_rate": 5.746119334109443e-06, "loss": 0.2913, "step": 9229 }, { "epoch": 0.45786001289746514, "grad_norm": 4.223785400390625, "learning_rate": 5.7453410471674154e-06, "loss": 0.2657, "step": 9230 }, { "epoch": 0.4579096185326653, "grad_norm": 9.804956436157227, "learning_rate": 5.7445627417554995e-06, "loss": 0.3885, "step": 9231 }, { "epoch": 0.45795922416786544, "grad_norm": 5.4949517250061035, "learning_rate": 5.7437844178929824e-06, "loss": 0.2602, "step": 9232 }, { "epoch": 0.4580088298030656, "grad_norm": 8.08132266998291, "learning_rate": 5.7430060755991505e-06, "loss": 0.2831, "step": 9233 }, { "epoch": 0.4580584354382658, "grad_norm": 7.043701171875, "learning_rate": 5.742227714893291e-06, "loss": 0.2801, "step": 9234 }, { "epoch": 0.4581080410734659, "grad_norm": 10.544901847839355, "learning_rate": 5.7414493357946945e-06, "loss": 0.2335, "step": 9235 }, { "epoch": 0.4581576467086661, "grad_norm": 7.913941383361816, "learning_rate": 5.740670938322648e-06, "loss": 0.3788, "step": 9236 }, { "epoch": 0.4582072523438663, "grad_norm": 6.32103157043457, "learning_rate": 5.73989252249644e-06, "loss": 0.2822, "step": 9237 }, { "epoch": 0.4582568579790664, "grad_norm": 8.533464431762695, "learning_rate": 5.73911408833536e-06, "loss": 0.3153, "step": 9238 }, { "epoch": 0.4583064636142666, "grad_norm": 8.140231132507324, "learning_rate": 5.7383356358586995e-06, "loss": 0.3043, "step": 9239 }, { "epoch": 0.45835606924946676, "grad_norm": 7.051755428314209, "learning_rate": 5.737557165085748e-06, "loss": 0.3036, "step": 9240 }, { "epoch": 0.4584056748846669, "grad_norm": 8.31494426727295, "learning_rate": 5.736778676035795e-06, "loss": 0.3614, "step": 9241 }, { "epoch": 0.45845528051986706, "grad_norm": 9.608396530151367, "learning_rate": 5.736000168728135e-06, "loss": 0.2899, "step": 9242 }, { "epoch": 0.45850488615506724, "grad_norm": 10.168566703796387, "learning_rate": 5.735221643182057e-06, "loss": 0.4026, "step": 9243 }, { "epoch": 0.45855449179026736, "grad_norm": 6.4823832511901855, "learning_rate": 5.734443099416853e-06, "loss": 0.3374, "step": 9244 }, { "epoch": 0.45860409742546754, "grad_norm": 6.914775371551514, "learning_rate": 5.733664537451819e-06, "loss": 0.2301, "step": 9245 }, { "epoch": 0.4586537030606677, "grad_norm": 8.050521850585938, "learning_rate": 5.7328859573062435e-06, "loss": 0.2839, "step": 9246 }, { "epoch": 0.45870330869586784, "grad_norm": 7.4482102394104, "learning_rate": 5.732107358999425e-06, "loss": 0.2799, "step": 9247 }, { "epoch": 0.458752914331068, "grad_norm": 7.836096286773682, "learning_rate": 5.731328742550653e-06, "loss": 0.3057, "step": 9248 }, { "epoch": 0.45880251996626814, "grad_norm": 13.652012825012207, "learning_rate": 5.7305501079792245e-06, "loss": 0.4219, "step": 9249 }, { "epoch": 0.4588521256014683, "grad_norm": 8.389177322387695, "learning_rate": 5.729771455304434e-06, "loss": 0.3031, "step": 9250 }, { "epoch": 0.4589017312366685, "grad_norm": 4.642100811004639, "learning_rate": 5.728992784545576e-06, "loss": 0.2541, "step": 9251 }, { "epoch": 0.4589513368718686, "grad_norm": 7.309360027313232, "learning_rate": 5.728214095721949e-06, "loss": 0.293, "step": 9252 }, { "epoch": 0.4590009425070688, "grad_norm": 4.575575351715088, "learning_rate": 5.727435388852846e-06, "loss": 0.2408, "step": 9253 }, { "epoch": 0.459050548142269, "grad_norm": 9.462696075439453, "learning_rate": 5.726656663957564e-06, "loss": 0.373, "step": 9254 }, { "epoch": 0.4591001537774691, "grad_norm": 7.617127418518066, "learning_rate": 5.725877921055403e-06, "loss": 0.3153, "step": 9255 }, { "epoch": 0.4591497594126693, "grad_norm": 12.691338539123535, "learning_rate": 5.7250991601656584e-06, "loss": 0.3532, "step": 9256 }, { "epoch": 0.45919936504786946, "grad_norm": 6.316711902618408, "learning_rate": 5.724320381307628e-06, "loss": 0.2649, "step": 9257 }, { "epoch": 0.4592489706830696, "grad_norm": 3.6005241870880127, "learning_rate": 5.7235415845006104e-06, "loss": 0.2502, "step": 9258 }, { "epoch": 0.45929857631826976, "grad_norm": 5.7121405601501465, "learning_rate": 5.7227627697639064e-06, "loss": 0.304, "step": 9259 }, { "epoch": 0.45934818195346994, "grad_norm": 4.587778091430664, "learning_rate": 5.721983937116813e-06, "loss": 0.2774, "step": 9260 }, { "epoch": 0.45939778758867006, "grad_norm": 4.167481899261475, "learning_rate": 5.721205086578631e-06, "loss": 0.2617, "step": 9261 }, { "epoch": 0.45944739322387024, "grad_norm": 5.211420059204102, "learning_rate": 5.720426218168662e-06, "loss": 0.2609, "step": 9262 }, { "epoch": 0.4594969988590704, "grad_norm": 6.0397419929504395, "learning_rate": 5.719647331906204e-06, "loss": 0.3122, "step": 9263 }, { "epoch": 0.45954660449427054, "grad_norm": 8.000877380371094, "learning_rate": 5.71886842781056e-06, "loss": 0.3329, "step": 9264 }, { "epoch": 0.4595962101294707, "grad_norm": 12.290239334106445, "learning_rate": 5.718089505901032e-06, "loss": 0.3988, "step": 9265 }, { "epoch": 0.45964581576467084, "grad_norm": 7.01215124130249, "learning_rate": 5.717310566196921e-06, "loss": 0.3574, "step": 9266 }, { "epoch": 0.459695421399871, "grad_norm": 12.046013832092285, "learning_rate": 5.716531608717528e-06, "loss": 0.3811, "step": 9267 }, { "epoch": 0.4597450270350712, "grad_norm": 8.975059509277344, "learning_rate": 5.715752633482159e-06, "loss": 0.3484, "step": 9268 }, { "epoch": 0.4597946326702713, "grad_norm": 4.5241312980651855, "learning_rate": 5.714973640510117e-06, "loss": 0.3189, "step": 9269 }, { "epoch": 0.4598442383054715, "grad_norm": 6.304019927978516, "learning_rate": 5.714194629820703e-06, "loss": 0.1947, "step": 9270 }, { "epoch": 0.4598938439406717, "grad_norm": 5.1098527908325195, "learning_rate": 5.713415601433223e-06, "loss": 0.26, "step": 9271 }, { "epoch": 0.4599434495758718, "grad_norm": 5.667726039886475, "learning_rate": 5.712636555366984e-06, "loss": 0.2239, "step": 9272 }, { "epoch": 0.459993055211072, "grad_norm": 7.194477558135986, "learning_rate": 5.7118574916412865e-06, "loss": 0.3227, "step": 9273 }, { "epoch": 0.46004266084627216, "grad_norm": 5.006032943725586, "learning_rate": 5.711078410275439e-06, "loss": 0.3323, "step": 9274 }, { "epoch": 0.4600922664814723, "grad_norm": 8.640336990356445, "learning_rate": 5.710299311288746e-06, "loss": 0.3257, "step": 9275 }, { "epoch": 0.46014187211667246, "grad_norm": 4.499107360839844, "learning_rate": 5.709520194700516e-06, "loss": 0.2502, "step": 9276 }, { "epoch": 0.46019147775187264, "grad_norm": 10.95711612701416, "learning_rate": 5.708741060530054e-06, "loss": 0.318, "step": 9277 }, { "epoch": 0.46024108338707276, "grad_norm": 8.869858741760254, "learning_rate": 5.707961908796669e-06, "loss": 0.3704, "step": 9278 }, { "epoch": 0.46029068902227294, "grad_norm": 9.842293739318848, "learning_rate": 5.707182739519667e-06, "loss": 0.2985, "step": 9279 }, { "epoch": 0.4603402946574731, "grad_norm": 5.978453636169434, "learning_rate": 5.706403552718356e-06, "loss": 0.3333, "step": 9280 }, { "epoch": 0.46038990029267324, "grad_norm": 5.784607410430908, "learning_rate": 5.705624348412046e-06, "loss": 0.2921, "step": 9281 }, { "epoch": 0.4604395059278734, "grad_norm": 10.633378982543945, "learning_rate": 5.7048451266200455e-06, "loss": 0.2289, "step": 9282 }, { "epoch": 0.46048911156307354, "grad_norm": 10.794290542602539, "learning_rate": 5.704065887361664e-06, "loss": 0.3861, "step": 9283 }, { "epoch": 0.4605387171982737, "grad_norm": 10.201202392578125, "learning_rate": 5.703286630656211e-06, "loss": 0.3015, "step": 9284 }, { "epoch": 0.4605883228334739, "grad_norm": 14.330257415771484, "learning_rate": 5.702507356522997e-06, "loss": 0.3288, "step": 9285 }, { "epoch": 0.460637928468674, "grad_norm": 5.517146110534668, "learning_rate": 5.701728064981334e-06, "loss": 0.2717, "step": 9286 }, { "epoch": 0.4606875341038742, "grad_norm": 9.282094955444336, "learning_rate": 5.700948756050531e-06, "loss": 0.3763, "step": 9287 }, { "epoch": 0.4607371397390744, "grad_norm": 6.035167694091797, "learning_rate": 5.700169429749901e-06, "loss": 0.3274, "step": 9288 }, { "epoch": 0.4607867453742745, "grad_norm": 7.167582035064697, "learning_rate": 5.699390086098757e-06, "loss": 0.3775, "step": 9289 }, { "epoch": 0.4608363510094747, "grad_norm": 5.165111064910889, "learning_rate": 5.698610725116409e-06, "loss": 0.2618, "step": 9290 }, { "epoch": 0.46088595664467485, "grad_norm": 10.740407943725586, "learning_rate": 5.6978313468221716e-06, "loss": 0.3149, "step": 9291 }, { "epoch": 0.460935562279875, "grad_norm": 5.97990608215332, "learning_rate": 5.697051951235357e-06, "loss": 0.2848, "step": 9292 }, { "epoch": 0.46098516791507516, "grad_norm": 12.963329315185547, "learning_rate": 5.696272538375281e-06, "loss": 0.3167, "step": 9293 }, { "epoch": 0.46103477355027533, "grad_norm": 9.079500198364258, "learning_rate": 5.695493108261255e-06, "loss": 0.2703, "step": 9294 }, { "epoch": 0.46108437918547546, "grad_norm": 10.063251495361328, "learning_rate": 5.6947136609125966e-06, "loss": 0.2584, "step": 9295 }, { "epoch": 0.46113398482067564, "grad_norm": 11.05923080444336, "learning_rate": 5.693934196348619e-06, "loss": 0.2401, "step": 9296 }, { "epoch": 0.4611835904558758, "grad_norm": 11.268662452697754, "learning_rate": 5.693154714588638e-06, "loss": 0.3702, "step": 9297 }, { "epoch": 0.46123319609107594, "grad_norm": 6.317438125610352, "learning_rate": 5.692375215651969e-06, "loss": 0.328, "step": 9298 }, { "epoch": 0.4612828017262761, "grad_norm": 5.247861862182617, "learning_rate": 5.69159569955793e-06, "loss": 0.3145, "step": 9299 }, { "epoch": 0.46133240736147624, "grad_norm": 8.370365142822266, "learning_rate": 5.690816166325837e-06, "loss": 0.2178, "step": 9300 }, { "epoch": 0.4613820129966764, "grad_norm": 7.804893493652344, "learning_rate": 5.690036615975006e-06, "loss": 0.3921, "step": 9301 }, { "epoch": 0.4614316186318766, "grad_norm": 4.626971244812012, "learning_rate": 5.6892570485247555e-06, "loss": 0.2766, "step": 9302 }, { "epoch": 0.4614812242670767, "grad_norm": 11.035076141357422, "learning_rate": 5.688477463994404e-06, "loss": 0.2338, "step": 9303 }, { "epoch": 0.4615308299022769, "grad_norm": 12.227349281311035, "learning_rate": 5.687697862403269e-06, "loss": 0.3822, "step": 9304 }, { "epoch": 0.4615804355374771, "grad_norm": 5.8646559715271, "learning_rate": 5.68691824377067e-06, "loss": 0.2692, "step": 9305 }, { "epoch": 0.4616300411726772, "grad_norm": 4.229273796081543, "learning_rate": 5.6861386081159266e-06, "loss": 0.2955, "step": 9306 }, { "epoch": 0.4616796468078774, "grad_norm": 5.863032817840576, "learning_rate": 5.685358955458358e-06, "loss": 0.183, "step": 9307 }, { "epoch": 0.46172925244307755, "grad_norm": 7.545392990112305, "learning_rate": 5.684579285817284e-06, "loss": 0.2807, "step": 9308 }, { "epoch": 0.4617788580782777, "grad_norm": 4.995242118835449, "learning_rate": 5.683799599212026e-06, "loss": 0.195, "step": 9309 }, { "epoch": 0.46182846371347785, "grad_norm": 5.743918418884277, "learning_rate": 5.683019895661906e-06, "loss": 0.3566, "step": 9310 }, { "epoch": 0.46187806934867803, "grad_norm": 9.207231521606445, "learning_rate": 5.682240175186242e-06, "loss": 0.3639, "step": 9311 }, { "epoch": 0.46192767498387816, "grad_norm": 8.528246879577637, "learning_rate": 5.681460437804358e-06, "loss": 0.2901, "step": 9312 }, { "epoch": 0.46197728061907833, "grad_norm": 4.17577600479126, "learning_rate": 5.680680683535577e-06, "loss": 0.2444, "step": 9313 }, { "epoch": 0.4620268862542785, "grad_norm": 7.493597030639648, "learning_rate": 5.67990091239922e-06, "loss": 0.3705, "step": 9314 }, { "epoch": 0.46207649188947864, "grad_norm": 12.847817420959473, "learning_rate": 5.679121124414612e-06, "loss": 0.303, "step": 9315 }, { "epoch": 0.4621260975246788, "grad_norm": 7.268572807312012, "learning_rate": 5.678341319601075e-06, "loss": 0.2595, "step": 9316 }, { "epoch": 0.46217570315987894, "grad_norm": 3.145756959915161, "learning_rate": 5.677561497977933e-06, "loss": 0.2616, "step": 9317 }, { "epoch": 0.4622253087950791, "grad_norm": 4.079695701599121, "learning_rate": 5.676781659564509e-06, "loss": 0.2802, "step": 9318 }, { "epoch": 0.4622749144302793, "grad_norm": 11.729508399963379, "learning_rate": 5.676001804380131e-06, "loss": 0.2908, "step": 9319 }, { "epoch": 0.4623245200654794, "grad_norm": 5.25407075881958, "learning_rate": 5.675221932444123e-06, "loss": 0.3265, "step": 9320 }, { "epoch": 0.4623741257006796, "grad_norm": 8.315386772155762, "learning_rate": 5.674442043775809e-06, "loss": 0.3604, "step": 9321 }, { "epoch": 0.4624237313358798, "grad_norm": 10.754791259765625, "learning_rate": 5.673662138394516e-06, "loss": 0.3485, "step": 9322 }, { "epoch": 0.4624733369710799, "grad_norm": 5.425196647644043, "learning_rate": 5.672882216319572e-06, "loss": 0.3075, "step": 9323 }, { "epoch": 0.4625229426062801, "grad_norm": 6.099789619445801, "learning_rate": 5.6721022775703005e-06, "loss": 0.2947, "step": 9324 }, { "epoch": 0.46257254824148025, "grad_norm": 7.858355522155762, "learning_rate": 5.671322322166031e-06, "loss": 0.2427, "step": 9325 }, { "epoch": 0.4626221538766804, "grad_norm": 6.094968795776367, "learning_rate": 5.670542350126092e-06, "loss": 0.2893, "step": 9326 }, { "epoch": 0.46267175951188055, "grad_norm": 6.166507720947266, "learning_rate": 5.669762361469808e-06, "loss": 0.3004, "step": 9327 }, { "epoch": 0.46272136514708073, "grad_norm": 4.695074081420898, "learning_rate": 5.66898235621651e-06, "loss": 0.35, "step": 9328 }, { "epoch": 0.46277097078228085, "grad_norm": 6.658891201019287, "learning_rate": 5.6682023343855275e-06, "loss": 0.2517, "step": 9329 }, { "epoch": 0.46282057641748103, "grad_norm": 9.112495422363281, "learning_rate": 5.66742229599619e-06, "loss": 0.3393, "step": 9330 }, { "epoch": 0.4628701820526812, "grad_norm": 4.562136173248291, "learning_rate": 5.666642241067825e-06, "loss": 0.2401, "step": 9331 }, { "epoch": 0.46291978768788133, "grad_norm": 8.373275756835938, "learning_rate": 5.665862169619763e-06, "loss": 0.3228, "step": 9332 }, { "epoch": 0.4629693933230815, "grad_norm": 8.508440971374512, "learning_rate": 5.665082081671337e-06, "loss": 0.3792, "step": 9333 }, { "epoch": 0.46301899895828164, "grad_norm": 21.025976181030273, "learning_rate": 5.664301977241874e-06, "loss": 0.3959, "step": 9334 }, { "epoch": 0.4630686045934818, "grad_norm": 4.339075088500977, "learning_rate": 5.663521856350709e-06, "loss": 0.2809, "step": 9335 }, { "epoch": 0.463118210228682, "grad_norm": 6.891913890838623, "learning_rate": 5.662741719017172e-06, "loss": 0.3272, "step": 9336 }, { "epoch": 0.4631678158638821, "grad_norm": 5.8923869132995605, "learning_rate": 5.661961565260595e-06, "loss": 0.2857, "step": 9337 }, { "epoch": 0.4632174214990823, "grad_norm": 5.929880619049072, "learning_rate": 5.66118139510031e-06, "loss": 0.3411, "step": 9338 }, { "epoch": 0.46326702713428247, "grad_norm": 7.3543620109558105, "learning_rate": 5.660401208555653e-06, "loss": 0.3123, "step": 9339 }, { "epoch": 0.4633166327694826, "grad_norm": 5.644719123840332, "learning_rate": 5.659621005645955e-06, "loss": 0.3244, "step": 9340 }, { "epoch": 0.4633662384046828, "grad_norm": 7.7704057693481445, "learning_rate": 5.658840786390549e-06, "loss": 0.3938, "step": 9341 }, { "epoch": 0.46341584403988295, "grad_norm": 10.173004150390625, "learning_rate": 5.65806055080877e-06, "loss": 0.3112, "step": 9342 }, { "epoch": 0.4634654496750831, "grad_norm": 4.7747297286987305, "learning_rate": 5.657280298919955e-06, "loss": 0.3168, "step": 9343 }, { "epoch": 0.46351505531028325, "grad_norm": 4.835946559906006, "learning_rate": 5.656500030743436e-06, "loss": 0.2885, "step": 9344 }, { "epoch": 0.46356466094548343, "grad_norm": 3.5514755249023438, "learning_rate": 5.655719746298548e-06, "loss": 0.2837, "step": 9345 }, { "epoch": 0.46361426658068355, "grad_norm": 6.536111831665039, "learning_rate": 5.654939445604629e-06, "loss": 0.3499, "step": 9346 }, { "epoch": 0.46366387221588373, "grad_norm": 6.469220161437988, "learning_rate": 5.654159128681015e-06, "loss": 0.2692, "step": 9347 }, { "epoch": 0.4637134778510839, "grad_norm": 7.21028995513916, "learning_rate": 5.653378795547041e-06, "loss": 0.2636, "step": 9348 }, { "epoch": 0.46376308348628403, "grad_norm": 13.273734092712402, "learning_rate": 5.652598446222044e-06, "loss": 0.4173, "step": 9349 }, { "epoch": 0.4638126891214842, "grad_norm": 4.063197612762451, "learning_rate": 5.6518180807253645e-06, "loss": 0.22, "step": 9350 }, { "epoch": 0.46386229475668433, "grad_norm": 4.569796562194824, "learning_rate": 5.651037699076336e-06, "loss": 0.234, "step": 9351 }, { "epoch": 0.4639119003918845, "grad_norm": 3.894385814666748, "learning_rate": 5.6502573012943e-06, "loss": 0.2611, "step": 9352 }, { "epoch": 0.4639615060270847, "grad_norm": 5.735062122344971, "learning_rate": 5.649476887398593e-06, "loss": 0.3358, "step": 9353 }, { "epoch": 0.4640111116622848, "grad_norm": 4.50909423828125, "learning_rate": 5.648696457408557e-06, "loss": 0.2613, "step": 9354 }, { "epoch": 0.464060717297485, "grad_norm": 7.8886260986328125, "learning_rate": 5.647916011343527e-06, "loss": 0.3562, "step": 9355 }, { "epoch": 0.46411032293268517, "grad_norm": 12.406942367553711, "learning_rate": 5.6471355492228465e-06, "loss": 0.4504, "step": 9356 }, { "epoch": 0.4641599285678853, "grad_norm": 6.933345794677734, "learning_rate": 5.6463550710658534e-06, "loss": 0.271, "step": 9357 }, { "epoch": 0.46420953420308547, "grad_norm": 6.133172512054443, "learning_rate": 5.645574576891889e-06, "loss": 0.2757, "step": 9358 }, { "epoch": 0.46425913983828565, "grad_norm": 6.919116020202637, "learning_rate": 5.644794066720296e-06, "loss": 0.2771, "step": 9359 }, { "epoch": 0.4643087454734858, "grad_norm": 4.205618858337402, "learning_rate": 5.644013540570414e-06, "loss": 0.1981, "step": 9360 }, { "epoch": 0.46435835110868595, "grad_norm": 8.444815635681152, "learning_rate": 5.643232998461584e-06, "loss": 0.3042, "step": 9361 }, { "epoch": 0.46440795674388613, "grad_norm": 10.737570762634277, "learning_rate": 5.6424524404131505e-06, "loss": 0.3151, "step": 9362 }, { "epoch": 0.46445756237908625, "grad_norm": 9.040989875793457, "learning_rate": 5.641671866444456e-06, "loss": 0.2117, "step": 9363 }, { "epoch": 0.46450716801428643, "grad_norm": 7.916448593139648, "learning_rate": 5.64089127657484e-06, "loss": 0.4363, "step": 9364 }, { "epoch": 0.46455677364948655, "grad_norm": 3.986171007156372, "learning_rate": 5.6401106708236484e-06, "loss": 0.2399, "step": 9365 }, { "epoch": 0.46460637928468673, "grad_norm": 9.662897109985352, "learning_rate": 5.6393300492102255e-06, "loss": 0.4224, "step": 9366 }, { "epoch": 0.4646559849198869, "grad_norm": 6.350343704223633, "learning_rate": 5.638549411753916e-06, "loss": 0.3123, "step": 9367 }, { "epoch": 0.46470559055508703, "grad_norm": 7.894628524780273, "learning_rate": 5.637768758474062e-06, "loss": 0.2647, "step": 9368 }, { "epoch": 0.4647551961902872, "grad_norm": 4.059651851654053, "learning_rate": 5.636988089390009e-06, "loss": 0.1528, "step": 9369 }, { "epoch": 0.4648048018254874, "grad_norm": 6.588113784790039, "learning_rate": 5.636207404521105e-06, "loss": 0.3289, "step": 9370 }, { "epoch": 0.4648544074606875, "grad_norm": 6.889043807983398, "learning_rate": 5.635426703886693e-06, "loss": 0.283, "step": 9371 }, { "epoch": 0.4649040130958877, "grad_norm": 7.093593597412109, "learning_rate": 5.6346459875061186e-06, "loss": 0.2835, "step": 9372 }, { "epoch": 0.46495361873108787, "grad_norm": 10.82973575592041, "learning_rate": 5.633865255398731e-06, "loss": 0.4224, "step": 9373 }, { "epoch": 0.465003224366288, "grad_norm": 6.009505271911621, "learning_rate": 5.633084507583874e-06, "loss": 0.333, "step": 9374 }, { "epoch": 0.46505283000148817, "grad_norm": 11.333259582519531, "learning_rate": 5.632303744080898e-06, "loss": 0.4068, "step": 9375 }, { "epoch": 0.46510243563668835, "grad_norm": 7.124782085418701, "learning_rate": 5.6315229649091485e-06, "loss": 0.4154, "step": 9376 }, { "epoch": 0.4651520412718885, "grad_norm": 17.213640213012695, "learning_rate": 5.630742170087976e-06, "loss": 0.3607, "step": 9377 }, { "epoch": 0.46520164690708865, "grad_norm": 6.072514057159424, "learning_rate": 5.629961359636725e-06, "loss": 0.3145, "step": 9378 }, { "epoch": 0.46525125254228883, "grad_norm": 3.1511874198913574, "learning_rate": 5.629180533574747e-06, "loss": 0.1499, "step": 9379 }, { "epoch": 0.46530085817748895, "grad_norm": 3.925194025039673, "learning_rate": 5.628399691921392e-06, "loss": 0.2102, "step": 9380 }, { "epoch": 0.46535046381268913, "grad_norm": 6.563648700714111, "learning_rate": 5.627618834696007e-06, "loss": 0.3384, "step": 9381 }, { "epoch": 0.46540006944788925, "grad_norm": 3.722698450088501, "learning_rate": 5.626837961917944e-06, "loss": 0.2437, "step": 9382 }, { "epoch": 0.46544967508308943, "grad_norm": 5.141061305999756, "learning_rate": 5.6260570736065525e-06, "loss": 0.2877, "step": 9383 }, { "epoch": 0.4654992807182896, "grad_norm": 4.703892230987549, "learning_rate": 5.625276169781184e-06, "loss": 0.2586, "step": 9384 }, { "epoch": 0.46554888635348973, "grad_norm": 6.926461696624756, "learning_rate": 5.624495250461188e-06, "loss": 0.3407, "step": 9385 }, { "epoch": 0.4655984919886899, "grad_norm": 5.366989612579346, "learning_rate": 5.623714315665918e-06, "loss": 0.1957, "step": 9386 }, { "epoch": 0.4656480976238901, "grad_norm": 8.01173210144043, "learning_rate": 5.622933365414726e-06, "loss": 0.2724, "step": 9387 }, { "epoch": 0.4656977032590902, "grad_norm": 5.017162799835205, "learning_rate": 5.622152399726963e-06, "loss": 0.3199, "step": 9388 }, { "epoch": 0.4657473088942904, "grad_norm": 7.245622634887695, "learning_rate": 5.62137141862198e-06, "loss": 0.2658, "step": 9389 }, { "epoch": 0.46579691452949057, "grad_norm": 9.195611000061035, "learning_rate": 5.6205904221191346e-06, "loss": 0.3276, "step": 9390 }, { "epoch": 0.4658465201646907, "grad_norm": 16.710285186767578, "learning_rate": 5.6198094102377766e-06, "loss": 0.3766, "step": 9391 }, { "epoch": 0.46589612579989087, "grad_norm": 6.218952655792236, "learning_rate": 5.619028382997262e-06, "loss": 0.2809, "step": 9392 }, { "epoch": 0.46594573143509105, "grad_norm": 8.006235122680664, "learning_rate": 5.618247340416943e-06, "loss": 0.3049, "step": 9393 }, { "epoch": 0.46599533707029117, "grad_norm": 7.915230751037598, "learning_rate": 5.617466282516176e-06, "loss": 0.2785, "step": 9394 }, { "epoch": 0.46604494270549135, "grad_norm": 4.571648120880127, "learning_rate": 5.616685209314314e-06, "loss": 0.2326, "step": 9395 }, { "epoch": 0.46609454834069153, "grad_norm": 14.986124992370605, "learning_rate": 5.615904120830715e-06, "loss": 0.4645, "step": 9396 }, { "epoch": 0.46614415397589165, "grad_norm": 5.10359525680542, "learning_rate": 5.6151230170847336e-06, "loss": 0.2722, "step": 9397 }, { "epoch": 0.46619375961109183, "grad_norm": 4.742559909820557, "learning_rate": 5.614341898095724e-06, "loss": 0.2625, "step": 9398 }, { "epoch": 0.46624336524629195, "grad_norm": 9.790063858032227, "learning_rate": 5.6135607638830445e-06, "loss": 0.241, "step": 9399 }, { "epoch": 0.46629297088149213, "grad_norm": 4.251075267791748, "learning_rate": 5.612779614466052e-06, "loss": 0.211, "step": 9400 }, { "epoch": 0.4663425765166923, "grad_norm": 13.300817489624023, "learning_rate": 5.6119984498641045e-06, "loss": 0.3187, "step": 9401 }, { "epoch": 0.46639218215189243, "grad_norm": 4.843626976013184, "learning_rate": 5.6112172700965574e-06, "loss": 0.2612, "step": 9402 }, { "epoch": 0.4664417877870926, "grad_norm": 7.6158223152160645, "learning_rate": 5.6104360751827704e-06, "loss": 0.3454, "step": 9403 }, { "epoch": 0.4664913934222928, "grad_norm": 4.606966495513916, "learning_rate": 5.609654865142102e-06, "loss": 0.3032, "step": 9404 }, { "epoch": 0.4665409990574929, "grad_norm": 18.88844108581543, "learning_rate": 5.608873639993909e-06, "loss": 0.3609, "step": 9405 }, { "epoch": 0.4665906046926931, "grad_norm": 10.03258991241455, "learning_rate": 5.608092399757552e-06, "loss": 0.4373, "step": 9406 }, { "epoch": 0.46664021032789327, "grad_norm": 6.32174015045166, "learning_rate": 5.6073111444523896e-06, "loss": 0.2289, "step": 9407 }, { "epoch": 0.4666898159630934, "grad_norm": 4.448331832885742, "learning_rate": 5.606529874097785e-06, "loss": 0.251, "step": 9408 }, { "epoch": 0.46673942159829357, "grad_norm": 10.083165168762207, "learning_rate": 5.605748588713093e-06, "loss": 0.3304, "step": 9409 }, { "epoch": 0.46678902723349375, "grad_norm": 12.388532638549805, "learning_rate": 5.604967288317678e-06, "loss": 0.4853, "step": 9410 }, { "epoch": 0.46683863286869387, "grad_norm": 5.071529865264893, "learning_rate": 5.6041859729308985e-06, "loss": 0.2358, "step": 9411 }, { "epoch": 0.46688823850389405, "grad_norm": 7.046151161193848, "learning_rate": 5.603404642572119e-06, "loss": 0.4102, "step": 9412 }, { "epoch": 0.4669378441390942, "grad_norm": 6.99114465713501, "learning_rate": 5.6026232972606994e-06, "loss": 0.2861, "step": 9413 }, { "epoch": 0.46698744977429435, "grad_norm": 13.205087661743164, "learning_rate": 5.601841937016002e-06, "loss": 0.3635, "step": 9414 }, { "epoch": 0.46703705540949453, "grad_norm": 6.109428882598877, "learning_rate": 5.601060561857389e-06, "loss": 0.3017, "step": 9415 }, { "epoch": 0.46708666104469465, "grad_norm": 4.7439961433410645, "learning_rate": 5.6002791718042225e-06, "loss": 0.2158, "step": 9416 }, { "epoch": 0.46713626667989483, "grad_norm": 7.7563629150390625, "learning_rate": 5.599497766875868e-06, "loss": 0.2674, "step": 9417 }, { "epoch": 0.467185872315095, "grad_norm": 4.243001461029053, "learning_rate": 5.598716347091686e-06, "loss": 0.2605, "step": 9418 }, { "epoch": 0.46723547795029513, "grad_norm": 6.116101264953613, "learning_rate": 5.597934912471043e-06, "loss": 0.3124, "step": 9419 }, { "epoch": 0.4672850835854953, "grad_norm": 7.391090393066406, "learning_rate": 5.597153463033303e-06, "loss": 0.2921, "step": 9420 }, { "epoch": 0.4673346892206955, "grad_norm": 4.070187568664551, "learning_rate": 5.596371998797829e-06, "loss": 0.1843, "step": 9421 }, { "epoch": 0.4673842948558956, "grad_norm": 6.6020026206970215, "learning_rate": 5.595590519783988e-06, "loss": 0.2707, "step": 9422 }, { "epoch": 0.4674339004910958, "grad_norm": 9.730122566223145, "learning_rate": 5.594809026011144e-06, "loss": 0.3508, "step": 9423 }, { "epoch": 0.46748350612629597, "grad_norm": 3.907715320587158, "learning_rate": 5.5940275174986634e-06, "loss": 0.2093, "step": 9424 }, { "epoch": 0.4675331117614961, "grad_norm": 7.1537089347839355, "learning_rate": 5.593245994265912e-06, "loss": 0.3042, "step": 9425 }, { "epoch": 0.46758271739669627, "grad_norm": 6.4341254234313965, "learning_rate": 5.592464456332257e-06, "loss": 0.2577, "step": 9426 }, { "epoch": 0.46763232303189645, "grad_norm": 7.435670852661133, "learning_rate": 5.591682903717064e-06, "loss": 0.2769, "step": 9427 }, { "epoch": 0.46768192866709657, "grad_norm": 5.107468605041504, "learning_rate": 5.590901336439702e-06, "loss": 0.2459, "step": 9428 }, { "epoch": 0.46773153430229675, "grad_norm": 6.777842044830322, "learning_rate": 5.590119754519537e-06, "loss": 0.2882, "step": 9429 }, { "epoch": 0.4677811399374969, "grad_norm": 4.395033359527588, "learning_rate": 5.589338157975937e-06, "loss": 0.3101, "step": 9430 }, { "epoch": 0.46783074557269705, "grad_norm": 8.10539436340332, "learning_rate": 5.588556546828272e-06, "loss": 0.3709, "step": 9431 }, { "epoch": 0.4678803512078972, "grad_norm": 12.010564804077148, "learning_rate": 5.58777492109591e-06, "loss": 0.4533, "step": 9432 }, { "epoch": 0.46792995684309735, "grad_norm": 8.551619529724121, "learning_rate": 5.586993280798218e-06, "loss": 0.2849, "step": 9433 }, { "epoch": 0.46797956247829753, "grad_norm": 7.111112117767334, "learning_rate": 5.586211625954567e-06, "loss": 0.3302, "step": 9434 }, { "epoch": 0.4680291681134977, "grad_norm": 4.821036338806152, "learning_rate": 5.585429956584327e-06, "loss": 0.207, "step": 9435 }, { "epoch": 0.46807877374869783, "grad_norm": 3.8880679607391357, "learning_rate": 5.584648272706868e-06, "loss": 0.2132, "step": 9436 }, { "epoch": 0.468128379383898, "grad_norm": 6.063905715942383, "learning_rate": 5.583866574341561e-06, "loss": 0.3466, "step": 9437 }, { "epoch": 0.4681779850190982, "grad_norm": 10.423737525939941, "learning_rate": 5.583084861507776e-06, "loss": 0.3674, "step": 9438 }, { "epoch": 0.4682275906542983, "grad_norm": 6.166195869445801, "learning_rate": 5.5823031342248835e-06, "loss": 0.2971, "step": 9439 }, { "epoch": 0.4682771962894985, "grad_norm": 4.433352470397949, "learning_rate": 5.581521392512257e-06, "loss": 0.2594, "step": 9440 }, { "epoch": 0.46832680192469867, "grad_norm": 20.96809959411621, "learning_rate": 5.5807396363892675e-06, "loss": 0.2465, "step": 9441 }, { "epoch": 0.4683764075598988, "grad_norm": 7.167990207672119, "learning_rate": 5.5799578658752865e-06, "loss": 0.3244, "step": 9442 }, { "epoch": 0.46842601319509897, "grad_norm": 8.56279182434082, "learning_rate": 5.5791760809896855e-06, "loss": 0.2221, "step": 9443 }, { "epoch": 0.46847561883029915, "grad_norm": 11.192667961120605, "learning_rate": 5.578394281751842e-06, "loss": 0.4894, "step": 9444 }, { "epoch": 0.46852522446549927, "grad_norm": 13.666899681091309, "learning_rate": 5.577612468181125e-06, "loss": 0.3462, "step": 9445 }, { "epoch": 0.46857483010069945, "grad_norm": 7.368583679199219, "learning_rate": 5.576830640296911e-06, "loss": 0.3554, "step": 9446 }, { "epoch": 0.4686244357358996, "grad_norm": 6.928732395172119, "learning_rate": 5.576048798118571e-06, "loss": 0.3311, "step": 9447 }, { "epoch": 0.46867404137109975, "grad_norm": 9.113760948181152, "learning_rate": 5.5752669416654825e-06, "loss": 0.2755, "step": 9448 }, { "epoch": 0.4687236470062999, "grad_norm": 7.834217071533203, "learning_rate": 5.5744850709570185e-06, "loss": 0.2693, "step": 9449 }, { "epoch": 0.46877325264150005, "grad_norm": 5.372514724731445, "learning_rate": 5.573703186012555e-06, "loss": 0.3354, "step": 9450 }, { "epoch": 0.4688228582767002, "grad_norm": 6.716320514678955, "learning_rate": 5.572921286851467e-06, "loss": 0.2928, "step": 9451 }, { "epoch": 0.4688724639119004, "grad_norm": 8.863229751586914, "learning_rate": 5.572139373493131e-06, "loss": 0.3299, "step": 9452 }, { "epoch": 0.46892206954710053, "grad_norm": 5.33536958694458, "learning_rate": 5.5713574459569205e-06, "loss": 0.2782, "step": 9453 }, { "epoch": 0.4689716751823007, "grad_norm": 8.397418022155762, "learning_rate": 5.570575504262215e-06, "loss": 0.3309, "step": 9454 }, { "epoch": 0.4690212808175009, "grad_norm": 5.025681495666504, "learning_rate": 5.5697935484283915e-06, "loss": 0.3265, "step": 9455 }, { "epoch": 0.469070886452701, "grad_norm": 9.2190580368042, "learning_rate": 5.5690115784748245e-06, "loss": 0.3407, "step": 9456 }, { "epoch": 0.4691204920879012, "grad_norm": 7.6368279457092285, "learning_rate": 5.568229594420893e-06, "loss": 0.4329, "step": 9457 }, { "epoch": 0.46917009772310136, "grad_norm": 5.783238410949707, "learning_rate": 5.5674475962859775e-06, "loss": 0.2812, "step": 9458 }, { "epoch": 0.4692197033583015, "grad_norm": 8.398724555969238, "learning_rate": 5.566665584089453e-06, "loss": 0.257, "step": 9459 }, { "epoch": 0.46926930899350167, "grad_norm": 3.353734254837036, "learning_rate": 5.565883557850698e-06, "loss": 0.2113, "step": 9460 }, { "epoch": 0.46931891462870184, "grad_norm": 7.443633079528809, "learning_rate": 5.565101517589092e-06, "loss": 0.3735, "step": 9461 }, { "epoch": 0.46936852026390197, "grad_norm": 7.4648661613464355, "learning_rate": 5.564319463324016e-06, "loss": 0.3254, "step": 9462 }, { "epoch": 0.46941812589910215, "grad_norm": 17.997594833374023, "learning_rate": 5.563537395074848e-06, "loss": 0.4299, "step": 9463 }, { "epoch": 0.4694677315343023, "grad_norm": 8.752801895141602, "learning_rate": 5.5627553128609675e-06, "loss": 0.3321, "step": 9464 }, { "epoch": 0.46951733716950245, "grad_norm": 9.010555267333984, "learning_rate": 5.561973216701757e-06, "loss": 0.2962, "step": 9465 }, { "epoch": 0.4695669428047026, "grad_norm": 5.077010631561279, "learning_rate": 5.561191106616596e-06, "loss": 0.2711, "step": 9466 }, { "epoch": 0.46961654843990275, "grad_norm": 8.390408515930176, "learning_rate": 5.5604089826248654e-06, "loss": 0.3115, "step": 9467 }, { "epoch": 0.4696661540751029, "grad_norm": 19.021806716918945, "learning_rate": 5.559626844745947e-06, "loss": 0.4425, "step": 9468 }, { "epoch": 0.4697157597103031, "grad_norm": 5.085488319396973, "learning_rate": 5.558844692999221e-06, "loss": 0.2831, "step": 9469 }, { "epoch": 0.4697653653455032, "grad_norm": 9.346527099609375, "learning_rate": 5.5580625274040715e-06, "loss": 0.3239, "step": 9470 }, { "epoch": 0.4698149709807034, "grad_norm": 6.0657734870910645, "learning_rate": 5.557280347979881e-06, "loss": 0.2205, "step": 9471 }, { "epoch": 0.4698645766159036, "grad_norm": 4.4953532218933105, "learning_rate": 5.556498154746029e-06, "loss": 0.3354, "step": 9472 }, { "epoch": 0.4699141822511037, "grad_norm": 11.734251022338867, "learning_rate": 5.555715947721903e-06, "loss": 0.3799, "step": 9473 }, { "epoch": 0.4699637878863039, "grad_norm": 7.8415937423706055, "learning_rate": 5.554933726926883e-06, "loss": 0.3366, "step": 9474 }, { "epoch": 0.47001339352150406, "grad_norm": 7.891895294189453, "learning_rate": 5.554151492380357e-06, "loss": 0.3018, "step": 9475 }, { "epoch": 0.4700629991567042, "grad_norm": 9.662041664123535, "learning_rate": 5.553369244101704e-06, "loss": 0.3847, "step": 9476 }, { "epoch": 0.47011260479190436, "grad_norm": 13.911373138427734, "learning_rate": 5.55258698211031e-06, "loss": 0.3728, "step": 9477 }, { "epoch": 0.47016221042710454, "grad_norm": 7.189542293548584, "learning_rate": 5.5518047064255645e-06, "loss": 0.1945, "step": 9478 }, { "epoch": 0.47021181606230467, "grad_norm": 4.173105239868164, "learning_rate": 5.551022417066845e-06, "loss": 0.304, "step": 9479 }, { "epoch": 0.47026142169750484, "grad_norm": 14.945610046386719, "learning_rate": 5.550240114053542e-06, "loss": 0.3889, "step": 9480 }, { "epoch": 0.470311027332705, "grad_norm": 8.4440336227417, "learning_rate": 5.54945779740504e-06, "loss": 0.3267, "step": 9481 }, { "epoch": 0.47036063296790515, "grad_norm": 9.659040451049805, "learning_rate": 5.548675467140726e-06, "loss": 0.3829, "step": 9482 }, { "epoch": 0.4704102386031053, "grad_norm": 9.653624534606934, "learning_rate": 5.547893123279985e-06, "loss": 0.424, "step": 9483 }, { "epoch": 0.47045984423830545, "grad_norm": 5.900607109069824, "learning_rate": 5.547110765842204e-06, "loss": 0.3459, "step": 9484 }, { "epoch": 0.4705094498735056, "grad_norm": 9.492606163024902, "learning_rate": 5.546328394846771e-06, "loss": 0.3821, "step": 9485 }, { "epoch": 0.4705590555087058, "grad_norm": 5.17764949798584, "learning_rate": 5.545546010313074e-06, "loss": 0.3322, "step": 9486 }, { "epoch": 0.4706086611439059, "grad_norm": 8.267780303955078, "learning_rate": 5.5447636122604995e-06, "loss": 0.3592, "step": 9487 }, { "epoch": 0.4706582667791061, "grad_norm": 12.80454158782959, "learning_rate": 5.543981200708437e-06, "loss": 0.3331, "step": 9488 }, { "epoch": 0.4707078724143063, "grad_norm": 10.303215026855469, "learning_rate": 5.543198775676274e-06, "loss": 0.2963, "step": 9489 }, { "epoch": 0.4707574780495064, "grad_norm": 5.42100191116333, "learning_rate": 5.5424163371833995e-06, "loss": 0.2137, "step": 9490 }, { "epoch": 0.4708070836847066, "grad_norm": 3.7802672386169434, "learning_rate": 5.541633885249201e-06, "loss": 0.2741, "step": 9491 }, { "epoch": 0.47085668931990676, "grad_norm": 8.036043167114258, "learning_rate": 5.540851419893073e-06, "loss": 0.3334, "step": 9492 }, { "epoch": 0.4709062949551069, "grad_norm": 17.480812072753906, "learning_rate": 5.5400689411344e-06, "loss": 0.3444, "step": 9493 }, { "epoch": 0.47095590059030706, "grad_norm": 8.752887725830078, "learning_rate": 5.539286448992575e-06, "loss": 0.3015, "step": 9494 }, { "epoch": 0.47100550622550724, "grad_norm": 6.501951217651367, "learning_rate": 5.538503943486989e-06, "loss": 0.3496, "step": 9495 }, { "epoch": 0.47105511186070737, "grad_norm": 4.624177932739258, "learning_rate": 5.53772142463703e-06, "loss": 0.2998, "step": 9496 }, { "epoch": 0.47110471749590754, "grad_norm": 14.460553169250488, "learning_rate": 5.5369388924620916e-06, "loss": 0.3285, "step": 9497 }, { "epoch": 0.47115432313110767, "grad_norm": 9.01535415649414, "learning_rate": 5.536156346981564e-06, "loss": 0.3023, "step": 9498 }, { "epoch": 0.47120392876630784, "grad_norm": 9.330466270446777, "learning_rate": 5.53537378821484e-06, "loss": 0.2247, "step": 9499 }, { "epoch": 0.471253534401508, "grad_norm": 8.786763191223145, "learning_rate": 5.534591216181311e-06, "loss": 0.3814, "step": 9500 }, { "epoch": 0.47130314003670815, "grad_norm": 5.796733379364014, "learning_rate": 5.53380863090037e-06, "loss": 0.2935, "step": 9501 }, { "epoch": 0.4713527456719083, "grad_norm": 4.178159713745117, "learning_rate": 5.533026032391411e-06, "loss": 0.2078, "step": 9502 }, { "epoch": 0.4714023513071085, "grad_norm": 4.555753231048584, "learning_rate": 5.532243420673824e-06, "loss": 0.2853, "step": 9503 }, { "epoch": 0.4714519569423086, "grad_norm": 5.212914943695068, "learning_rate": 5.531460795767004e-06, "loss": 0.2314, "step": 9504 }, { "epoch": 0.4715015625775088, "grad_norm": 7.171915531158447, "learning_rate": 5.530678157690347e-06, "loss": 0.2747, "step": 9505 }, { "epoch": 0.471551168212709, "grad_norm": 4.647363185882568, "learning_rate": 5.529895506463245e-06, "loss": 0.309, "step": 9506 }, { "epoch": 0.4716007738479091, "grad_norm": 15.651006698608398, "learning_rate": 5.52911284210509e-06, "loss": 0.3271, "step": 9507 }, { "epoch": 0.4716503794831093, "grad_norm": 5.7006916999816895, "learning_rate": 5.528330164635281e-06, "loss": 0.3349, "step": 9508 }, { "epoch": 0.47169998511830946, "grad_norm": 8.189679145812988, "learning_rate": 5.5275474740732125e-06, "loss": 0.314, "step": 9509 }, { "epoch": 0.4717495907535096, "grad_norm": 6.253200531005859, "learning_rate": 5.526764770438278e-06, "loss": 0.2585, "step": 9510 }, { "epoch": 0.47179919638870976, "grad_norm": 9.062397956848145, "learning_rate": 5.525982053749874e-06, "loss": 0.3277, "step": 9511 }, { "epoch": 0.47184880202390994, "grad_norm": 4.886408805847168, "learning_rate": 5.525199324027398e-06, "loss": 0.2559, "step": 9512 }, { "epoch": 0.47189840765911006, "grad_norm": 5.131567001342773, "learning_rate": 5.524416581290244e-06, "loss": 0.3477, "step": 9513 }, { "epoch": 0.47194801329431024, "grad_norm": 5.3200812339782715, "learning_rate": 5.52363382555781e-06, "loss": 0.3145, "step": 9514 }, { "epoch": 0.47199761892951037, "grad_norm": 5.1066718101501465, "learning_rate": 5.522851056849494e-06, "loss": 0.3286, "step": 9515 }, { "epoch": 0.47204722456471054, "grad_norm": 8.611263275146484, "learning_rate": 5.522068275184691e-06, "loss": 0.3193, "step": 9516 }, { "epoch": 0.4720968301999107, "grad_norm": 7.333741664886475, "learning_rate": 5.521285480582801e-06, "loss": 0.2503, "step": 9517 }, { "epoch": 0.47214643583511084, "grad_norm": 16.814205169677734, "learning_rate": 5.52050267306322e-06, "loss": 0.4077, "step": 9518 }, { "epoch": 0.472196041470311, "grad_norm": 6.672906875610352, "learning_rate": 5.519719852645346e-06, "loss": 0.3042, "step": 9519 }, { "epoch": 0.4722456471055112, "grad_norm": 9.078001976013184, "learning_rate": 5.518937019348582e-06, "loss": 0.3808, "step": 9520 }, { "epoch": 0.4722952527407113, "grad_norm": 7.543233871459961, "learning_rate": 5.518154173192323e-06, "loss": 0.3083, "step": 9521 }, { "epoch": 0.4723448583759115, "grad_norm": 8.984679222106934, "learning_rate": 5.5173713141959685e-06, "loss": 0.3916, "step": 9522 }, { "epoch": 0.4723944640111117, "grad_norm": 4.801708221435547, "learning_rate": 5.5165884423789185e-06, "loss": 0.2499, "step": 9523 }, { "epoch": 0.4724440696463118, "grad_norm": 10.35431957244873, "learning_rate": 5.515805557760574e-06, "loss": 0.4363, "step": 9524 }, { "epoch": 0.472493675281512, "grad_norm": 4.094499111175537, "learning_rate": 5.515022660360335e-06, "loss": 0.2326, "step": 9525 }, { "epoch": 0.47254328091671216, "grad_norm": 7.7023606300354, "learning_rate": 5.514239750197601e-06, "loss": 0.3385, "step": 9526 }, { "epoch": 0.4725928865519123, "grad_norm": 10.150032043457031, "learning_rate": 5.513456827291772e-06, "loss": 0.2377, "step": 9527 }, { "epoch": 0.47264249218711246, "grad_norm": 6.801474571228027, "learning_rate": 5.512673891662253e-06, "loss": 0.3233, "step": 9528 }, { "epoch": 0.47269209782231264, "grad_norm": 13.203396797180176, "learning_rate": 5.511890943328442e-06, "loss": 0.4498, "step": 9529 }, { "epoch": 0.47274170345751276, "grad_norm": 5.66383171081543, "learning_rate": 5.511107982309741e-06, "loss": 0.3078, "step": 9530 }, { "epoch": 0.47279130909271294, "grad_norm": 6.578400135040283, "learning_rate": 5.510325008625553e-06, "loss": 0.289, "step": 9531 }, { "epoch": 0.47284091472791306, "grad_norm": 7.863454341888428, "learning_rate": 5.509542022295282e-06, "loss": 0.2364, "step": 9532 }, { "epoch": 0.47289052036311324, "grad_norm": 4.722522258758545, "learning_rate": 5.5087590233383285e-06, "loss": 0.2346, "step": 9533 }, { "epoch": 0.4729401259983134, "grad_norm": 4.740359306335449, "learning_rate": 5.507976011774096e-06, "loss": 0.3115, "step": 9534 }, { "epoch": 0.47298973163351354, "grad_norm": 8.888649940490723, "learning_rate": 5.507192987621988e-06, "loss": 0.369, "step": 9535 }, { "epoch": 0.4730393372687137, "grad_norm": 4.546367645263672, "learning_rate": 5.506409950901409e-06, "loss": 0.3141, "step": 9536 }, { "epoch": 0.4730889429039139, "grad_norm": 4.515886306762695, "learning_rate": 5.505626901631762e-06, "loss": 0.1498, "step": 9537 }, { "epoch": 0.473138548539114, "grad_norm": 8.667075157165527, "learning_rate": 5.504843839832451e-06, "loss": 0.4408, "step": 9538 }, { "epoch": 0.4731881541743142, "grad_norm": 6.243340969085693, "learning_rate": 5.504060765522883e-06, "loss": 0.3085, "step": 9539 }, { "epoch": 0.4732377598095144, "grad_norm": 3.883096694946289, "learning_rate": 5.50327767872246e-06, "loss": 0.3192, "step": 9540 }, { "epoch": 0.4732873654447145, "grad_norm": 4.1744866371154785, "learning_rate": 5.502494579450588e-06, "loss": 0.3286, "step": 9541 }, { "epoch": 0.4733369710799147, "grad_norm": 14.013768196105957, "learning_rate": 5.501711467726673e-06, "loss": 0.4613, "step": 9542 }, { "epoch": 0.47338657671511486, "grad_norm": 7.501332759857178, "learning_rate": 5.500928343570122e-06, "loss": 0.3058, "step": 9543 }, { "epoch": 0.473436182350315, "grad_norm": 5.717423915863037, "learning_rate": 5.500145207000339e-06, "loss": 0.2585, "step": 9544 }, { "epoch": 0.47348578798551516, "grad_norm": 16.80199432373047, "learning_rate": 5.499362058036732e-06, "loss": 0.4871, "step": 9545 }, { "epoch": 0.47353539362071534, "grad_norm": 6.454015731811523, "learning_rate": 5.498578896698709e-06, "loss": 0.2647, "step": 9546 }, { "epoch": 0.47358499925591546, "grad_norm": 10.090903282165527, "learning_rate": 5.4977957230056725e-06, "loss": 0.3047, "step": 9547 }, { "epoch": 0.47363460489111564, "grad_norm": 8.603079795837402, "learning_rate": 5.497012536977034e-06, "loss": 0.2417, "step": 9548 }, { "epoch": 0.47368421052631576, "grad_norm": 4.19808292388916, "learning_rate": 5.496229338632201e-06, "loss": 0.2298, "step": 9549 }, { "epoch": 0.47373381616151594, "grad_norm": 6.222384452819824, "learning_rate": 5.495446127990579e-06, "loss": 0.2835, "step": 9550 }, { "epoch": 0.4737834217967161, "grad_norm": 14.758467674255371, "learning_rate": 5.494662905071578e-06, "loss": 0.2598, "step": 9551 }, { "epoch": 0.47383302743191624, "grad_norm": 5.879430294036865, "learning_rate": 5.493879669894606e-06, "loss": 0.3231, "step": 9552 }, { "epoch": 0.4738826330671164, "grad_norm": 18.391908645629883, "learning_rate": 5.493096422479075e-06, "loss": 0.3379, "step": 9553 }, { "epoch": 0.4739322387023166, "grad_norm": 3.9731621742248535, "learning_rate": 5.492313162844389e-06, "loss": 0.3034, "step": 9554 }, { "epoch": 0.4739818443375167, "grad_norm": 3.228135824203491, "learning_rate": 5.491529891009961e-06, "loss": 0.2426, "step": 9555 }, { "epoch": 0.4740314499727169, "grad_norm": 9.494800567626953, "learning_rate": 5.490746606995201e-06, "loss": 0.2983, "step": 9556 }, { "epoch": 0.4740810556079171, "grad_norm": 9.330263137817383, "learning_rate": 5.489963310819516e-06, "loss": 0.3345, "step": 9557 }, { "epoch": 0.4741306612431172, "grad_norm": 14.08650016784668, "learning_rate": 5.48918000250232e-06, "loss": 0.3598, "step": 9558 }, { "epoch": 0.4741802668783174, "grad_norm": 6.116645336151123, "learning_rate": 5.488396682063022e-06, "loss": 0.2639, "step": 9559 }, { "epoch": 0.47422987251351756, "grad_norm": 7.293928623199463, "learning_rate": 5.487613349521033e-06, "loss": 0.3007, "step": 9560 }, { "epoch": 0.4742794781487177, "grad_norm": 11.260488510131836, "learning_rate": 5.486830004895764e-06, "loss": 0.3379, "step": 9561 }, { "epoch": 0.47432908378391786, "grad_norm": 11.006999015808105, "learning_rate": 5.486046648206628e-06, "loss": 0.3036, "step": 9562 }, { "epoch": 0.47437868941911804, "grad_norm": 12.053488731384277, "learning_rate": 5.485263279473037e-06, "loss": 0.3121, "step": 9563 }, { "epoch": 0.47442829505431816, "grad_norm": 10.567327499389648, "learning_rate": 5.484479898714401e-06, "loss": 0.3632, "step": 9564 }, { "epoch": 0.47447790068951834, "grad_norm": 5.754229545593262, "learning_rate": 5.483696505950135e-06, "loss": 0.2688, "step": 9565 }, { "epoch": 0.47452750632471846, "grad_norm": 4.173362731933594, "learning_rate": 5.48291310119965e-06, "loss": 0.1869, "step": 9566 }, { "epoch": 0.47457711195991864, "grad_norm": 10.792895317077637, "learning_rate": 5.48212968448236e-06, "loss": 0.3068, "step": 9567 }, { "epoch": 0.4746267175951188, "grad_norm": 4.225205421447754, "learning_rate": 5.481346255817679e-06, "loss": 0.2541, "step": 9568 }, { "epoch": 0.47467632323031894, "grad_norm": 11.0111722946167, "learning_rate": 5.48056281522502e-06, "loss": 0.3223, "step": 9569 }, { "epoch": 0.4747259288655191, "grad_norm": 6.100055694580078, "learning_rate": 5.479779362723797e-06, "loss": 0.3117, "step": 9570 }, { "epoch": 0.4747755345007193, "grad_norm": 13.999682426452637, "learning_rate": 5.478995898333423e-06, "loss": 0.2636, "step": 9571 }, { "epoch": 0.4748251401359194, "grad_norm": 5.928342819213867, "learning_rate": 5.478212422073316e-06, "loss": 0.2546, "step": 9572 }, { "epoch": 0.4748747457711196, "grad_norm": 5.606867790222168, "learning_rate": 5.477428933962887e-06, "loss": 0.2738, "step": 9573 }, { "epoch": 0.4749243514063198, "grad_norm": 7.845541000366211, "learning_rate": 5.476645434021555e-06, "loss": 0.2617, "step": 9574 }, { "epoch": 0.4749739570415199, "grad_norm": 9.354289054870605, "learning_rate": 5.4758619222687315e-06, "loss": 0.3415, "step": 9575 }, { "epoch": 0.4750235626767201, "grad_norm": 10.85647964477539, "learning_rate": 5.4750783987238355e-06, "loss": 0.4133, "step": 9576 }, { "epoch": 0.47507316831192026, "grad_norm": 5.5162200927734375, "learning_rate": 5.474294863406281e-06, "loss": 0.2314, "step": 9577 }, { "epoch": 0.4751227739471204, "grad_norm": 7.24168586730957, "learning_rate": 5.473511316335486e-06, "loss": 0.342, "step": 9578 }, { "epoch": 0.47517237958232056, "grad_norm": 12.123202323913574, "learning_rate": 5.472727757530867e-06, "loss": 0.3423, "step": 9579 }, { "epoch": 0.47522198521752074, "grad_norm": 4.688409805297852, "learning_rate": 5.471944187011839e-06, "loss": 0.3296, "step": 9580 }, { "epoch": 0.47527159085272086, "grad_norm": 8.114408493041992, "learning_rate": 5.47116060479782e-06, "loss": 0.3779, "step": 9581 }, { "epoch": 0.47532119648792104, "grad_norm": 6.325111389160156, "learning_rate": 5.47037701090823e-06, "loss": 0.3506, "step": 9582 }, { "epoch": 0.47537080212312116, "grad_norm": 6.453873157501221, "learning_rate": 5.469593405362484e-06, "loss": 0.3215, "step": 9583 }, { "epoch": 0.47542040775832134, "grad_norm": 6.337785720825195, "learning_rate": 5.468809788180002e-06, "loss": 0.2786, "step": 9584 }, { "epoch": 0.4754700133935215, "grad_norm": 7.444343090057373, "learning_rate": 5.4680261593802e-06, "loss": 0.2973, "step": 9585 }, { "epoch": 0.47551961902872164, "grad_norm": 5.5345916748046875, "learning_rate": 5.4672425189825e-06, "loss": 0.2992, "step": 9586 }, { "epoch": 0.4755692246639218, "grad_norm": 6.818549156188965, "learning_rate": 5.466458867006316e-06, "loss": 0.2401, "step": 9587 }, { "epoch": 0.475618830299122, "grad_norm": 12.94998550415039, "learning_rate": 5.465675203471072e-06, "loss": 0.3211, "step": 9588 }, { "epoch": 0.4756684359343221, "grad_norm": 6.187343597412109, "learning_rate": 5.464891528396186e-06, "loss": 0.3087, "step": 9589 }, { "epoch": 0.4757180415695223, "grad_norm": 5.137815952301025, "learning_rate": 5.464107841801077e-06, "loss": 0.2703, "step": 9590 }, { "epoch": 0.4757676472047225, "grad_norm": 7.042447090148926, "learning_rate": 5.463324143705167e-06, "loss": 0.3601, "step": 9591 }, { "epoch": 0.4758172528399226, "grad_norm": 6.4111328125, "learning_rate": 5.462540434127873e-06, "loss": 0.2077, "step": 9592 }, { "epoch": 0.4758668584751228, "grad_norm": 6.386376857757568, "learning_rate": 5.46175671308862e-06, "loss": 0.2235, "step": 9593 }, { "epoch": 0.47591646411032296, "grad_norm": 7.104724407196045, "learning_rate": 5.460972980606826e-06, "loss": 0.3146, "step": 9594 }, { "epoch": 0.4759660697455231, "grad_norm": 6.081597328186035, "learning_rate": 5.460189236701912e-06, "loss": 0.3177, "step": 9595 }, { "epoch": 0.47601567538072326, "grad_norm": 6.139565944671631, "learning_rate": 5.459405481393301e-06, "loss": 0.3026, "step": 9596 }, { "epoch": 0.47606528101592344, "grad_norm": 5.188162326812744, "learning_rate": 5.458621714700415e-06, "loss": 0.276, "step": 9597 }, { "epoch": 0.47611488665112356, "grad_norm": 7.226741313934326, "learning_rate": 5.457837936642676e-06, "loss": 0.3251, "step": 9598 }, { "epoch": 0.47616449228632374, "grad_norm": 12.108394622802734, "learning_rate": 5.457054147239504e-06, "loss": 0.4527, "step": 9599 }, { "epoch": 0.47621409792152386, "grad_norm": 4.232000350952148, "learning_rate": 5.456270346510325e-06, "loss": 0.3232, "step": 9600 }, { "epoch": 0.47626370355672404, "grad_norm": 4.528927326202393, "learning_rate": 5.45548653447456e-06, "loss": 0.2935, "step": 9601 }, { "epoch": 0.4763133091919242, "grad_norm": 7.857845783233643, "learning_rate": 5.4547027111516314e-06, "loss": 0.2422, "step": 9602 }, { "epoch": 0.47636291482712434, "grad_norm": 8.397017478942871, "learning_rate": 5.453918876560966e-06, "loss": 0.2544, "step": 9603 }, { "epoch": 0.4764125204623245, "grad_norm": 4.373483657836914, "learning_rate": 5.453135030721984e-06, "loss": 0.2375, "step": 9604 }, { "epoch": 0.4764621260975247, "grad_norm": 5.402435302734375, "learning_rate": 5.452351173654112e-06, "loss": 0.3163, "step": 9605 }, { "epoch": 0.4765117317327248, "grad_norm": 5.681273460388184, "learning_rate": 5.451567305376773e-06, "loss": 0.2084, "step": 9606 }, { "epoch": 0.476561337367925, "grad_norm": 4.569599151611328, "learning_rate": 5.450783425909391e-06, "loss": 0.2971, "step": 9607 }, { "epoch": 0.4766109430031252, "grad_norm": 8.449057579040527, "learning_rate": 5.449999535271393e-06, "loss": 0.1579, "step": 9608 }, { "epoch": 0.4766605486383253, "grad_norm": 5.1378631591796875, "learning_rate": 5.449215633482202e-06, "loss": 0.3857, "step": 9609 }, { "epoch": 0.4767101542735255, "grad_norm": 11.73900032043457, "learning_rate": 5.448431720561246e-06, "loss": 0.3265, "step": 9610 }, { "epoch": 0.47675975990872566, "grad_norm": 10.287335395812988, "learning_rate": 5.447647796527948e-06, "loss": 0.2646, "step": 9611 }, { "epoch": 0.4768093655439258, "grad_norm": 12.272908210754395, "learning_rate": 5.446863861401735e-06, "loss": 0.3093, "step": 9612 }, { "epoch": 0.47685897117912596, "grad_norm": 5.628060817718506, "learning_rate": 5.446079915202034e-06, "loss": 0.3042, "step": 9613 }, { "epoch": 0.47690857681432614, "grad_norm": 10.664519309997559, "learning_rate": 5.44529595794827e-06, "loss": 0.4313, "step": 9614 }, { "epoch": 0.47695818244952626, "grad_norm": 9.79456901550293, "learning_rate": 5.444511989659871e-06, "loss": 0.3508, "step": 9615 }, { "epoch": 0.47700778808472644, "grad_norm": 4.714568138122559, "learning_rate": 5.4437280103562636e-06, "loss": 0.3005, "step": 9616 }, { "epoch": 0.47705739371992656, "grad_norm": 6.457592487335205, "learning_rate": 5.442944020056876e-06, "loss": 0.2489, "step": 9617 }, { "epoch": 0.47710699935512674, "grad_norm": 12.011041641235352, "learning_rate": 5.442160018781135e-06, "loss": 0.3519, "step": 9618 }, { "epoch": 0.4771566049903269, "grad_norm": 11.102298736572266, "learning_rate": 5.441376006548469e-06, "loss": 0.2674, "step": 9619 }, { "epoch": 0.47720621062552704, "grad_norm": 4.272066116333008, "learning_rate": 5.440591983378306e-06, "loss": 0.2535, "step": 9620 }, { "epoch": 0.4772558162607272, "grad_norm": 6.0008931159973145, "learning_rate": 5.439807949290073e-06, "loss": 0.3024, "step": 9621 }, { "epoch": 0.4773054218959274, "grad_norm": 9.67658519744873, "learning_rate": 5.439023904303201e-06, "loss": 0.3801, "step": 9622 }, { "epoch": 0.4773550275311275, "grad_norm": 5.763215065002441, "learning_rate": 5.438239848437118e-06, "loss": 0.3027, "step": 9623 }, { "epoch": 0.4774046331663277, "grad_norm": 10.728683471679688, "learning_rate": 5.437455781711254e-06, "loss": 0.4302, "step": 9624 }, { "epoch": 0.4774542388015279, "grad_norm": 6.604825496673584, "learning_rate": 5.436671704145038e-06, "loss": 0.2595, "step": 9625 }, { "epoch": 0.477503844436728, "grad_norm": 6.5503716468811035, "learning_rate": 5.4358876157578975e-06, "loss": 0.3825, "step": 9626 }, { "epoch": 0.4775534500719282, "grad_norm": 8.679424285888672, "learning_rate": 5.435103516569267e-06, "loss": 0.2449, "step": 9627 }, { "epoch": 0.47760305570712835, "grad_norm": 7.483638763427734, "learning_rate": 5.434319406598574e-06, "loss": 0.2982, "step": 9628 }, { "epoch": 0.4776526613423285, "grad_norm": 4.97355842590332, "learning_rate": 5.433535285865249e-06, "loss": 0.2472, "step": 9629 }, { "epoch": 0.47770226697752866, "grad_norm": 5.95224142074585, "learning_rate": 5.432751154388723e-06, "loss": 0.28, "step": 9630 }, { "epoch": 0.4777518726127288, "grad_norm": 4.796210289001465, "learning_rate": 5.4319670121884284e-06, "loss": 0.2329, "step": 9631 }, { "epoch": 0.47780147824792896, "grad_norm": 5.23527717590332, "learning_rate": 5.431182859283797e-06, "loss": 0.2471, "step": 9632 }, { "epoch": 0.47785108388312914, "grad_norm": 4.975475311279297, "learning_rate": 5.430398695694257e-06, "loss": 0.3013, "step": 9633 }, { "epoch": 0.47790068951832926, "grad_norm": 8.13860034942627, "learning_rate": 5.4296145214392435e-06, "loss": 0.3432, "step": 9634 }, { "epoch": 0.47795029515352944, "grad_norm": 4.733998775482178, "learning_rate": 5.428830336538188e-06, "loss": 0.2914, "step": 9635 }, { "epoch": 0.4779999007887296, "grad_norm": 5.988987445831299, "learning_rate": 5.428046141010523e-06, "loss": 0.2496, "step": 9636 }, { "epoch": 0.47804950642392974, "grad_norm": 7.0165510177612305, "learning_rate": 5.427261934875682e-06, "loss": 0.328, "step": 9637 }, { "epoch": 0.4780991120591299, "grad_norm": 8.702064514160156, "learning_rate": 5.426477718153095e-06, "loss": 0.2889, "step": 9638 }, { "epoch": 0.4781487176943301, "grad_norm": 5.810704231262207, "learning_rate": 5.425693490862197e-06, "loss": 0.1865, "step": 9639 }, { "epoch": 0.4781983233295302, "grad_norm": 6.377196311950684, "learning_rate": 5.424909253022424e-06, "loss": 0.302, "step": 9640 }, { "epoch": 0.4782479289647304, "grad_norm": 5.775785446166992, "learning_rate": 5.424125004653207e-06, "loss": 0.2812, "step": 9641 }, { "epoch": 0.4782975345999306, "grad_norm": 14.517753601074219, "learning_rate": 5.423340745773978e-06, "loss": 0.3502, "step": 9642 }, { "epoch": 0.4783471402351307, "grad_norm": 8.403218269348145, "learning_rate": 5.422556476404176e-06, "loss": 0.4103, "step": 9643 }, { "epoch": 0.4783967458703309, "grad_norm": 4.146883487701416, "learning_rate": 5.421772196563234e-06, "loss": 0.2651, "step": 9644 }, { "epoch": 0.47844635150553105, "grad_norm": 4.472796440124512, "learning_rate": 5.420987906270585e-06, "loss": 0.2144, "step": 9645 }, { "epoch": 0.4784959571407312, "grad_norm": 6.2599897384643555, "learning_rate": 5.420203605545665e-06, "loss": 0.273, "step": 9646 }, { "epoch": 0.47854556277593135, "grad_norm": 10.863405227661133, "learning_rate": 5.419419294407912e-06, "loss": 0.2921, "step": 9647 }, { "epoch": 0.4785951684111315, "grad_norm": 4.6862287521362305, "learning_rate": 5.4186349728767565e-06, "loss": 0.2746, "step": 9648 }, { "epoch": 0.47864477404633166, "grad_norm": 4.881191253662109, "learning_rate": 5.417850640971637e-06, "loss": 0.2461, "step": 9649 }, { "epoch": 0.47869437968153183, "grad_norm": 5.4461469650268555, "learning_rate": 5.41706629871199e-06, "loss": 0.2984, "step": 9650 }, { "epoch": 0.47874398531673196, "grad_norm": 6.982808589935303, "learning_rate": 5.416281946117252e-06, "loss": 0.2657, "step": 9651 }, { "epoch": 0.47879359095193214, "grad_norm": 8.955222129821777, "learning_rate": 5.415497583206859e-06, "loss": 0.3009, "step": 9652 }, { "epoch": 0.4788431965871323, "grad_norm": 9.723810195922852, "learning_rate": 5.414713210000248e-06, "loss": 0.3093, "step": 9653 }, { "epoch": 0.47889280222233244, "grad_norm": 7.7090582847595215, "learning_rate": 5.413928826516856e-06, "loss": 0.3124, "step": 9654 }, { "epoch": 0.4789424078575326, "grad_norm": 5.379755020141602, "learning_rate": 5.41314443277612e-06, "loss": 0.3497, "step": 9655 }, { "epoch": 0.4789920134927328, "grad_norm": 9.31131362915039, "learning_rate": 5.4123600287974785e-06, "loss": 0.2718, "step": 9656 }, { "epoch": 0.4790416191279329, "grad_norm": 5.767278671264648, "learning_rate": 5.411575614600369e-06, "loss": 0.3629, "step": 9657 }, { "epoch": 0.4790912247631331, "grad_norm": 9.497929573059082, "learning_rate": 5.41079119020423e-06, "loss": 0.3195, "step": 9658 }, { "epoch": 0.4791408303983333, "grad_norm": 5.311008930206299, "learning_rate": 5.410006755628498e-06, "loss": 0.2476, "step": 9659 }, { "epoch": 0.4791904360335334, "grad_norm": 12.212689399719238, "learning_rate": 5.409222310892613e-06, "loss": 0.3647, "step": 9660 }, { "epoch": 0.4792400416687336, "grad_norm": 12.58884334564209, "learning_rate": 5.408437856016016e-06, "loss": 0.3504, "step": 9661 }, { "epoch": 0.47928964730393375, "grad_norm": 11.51585578918457, "learning_rate": 5.407653391018144e-06, "loss": 0.277, "step": 9662 }, { "epoch": 0.4793392529391339, "grad_norm": 10.66556167602539, "learning_rate": 5.406868915918435e-06, "loss": 0.3906, "step": 9663 }, { "epoch": 0.47938885857433405, "grad_norm": 3.541616439819336, "learning_rate": 5.406084430736331e-06, "loss": 0.2309, "step": 9664 }, { "epoch": 0.4794384642095342, "grad_norm": 16.973955154418945, "learning_rate": 5.4052999354912715e-06, "loss": 0.3564, "step": 9665 }, { "epoch": 0.47948806984473435, "grad_norm": 4.846611499786377, "learning_rate": 5.404515430202696e-06, "loss": 0.2859, "step": 9666 }, { "epoch": 0.47953767547993453, "grad_norm": 9.980337142944336, "learning_rate": 5.403730914890045e-06, "loss": 0.3363, "step": 9667 }, { "epoch": 0.47958728111513466, "grad_norm": 5.343125820159912, "learning_rate": 5.4029463895727595e-06, "loss": 0.318, "step": 9668 }, { "epoch": 0.47963688675033483, "grad_norm": 11.020160675048828, "learning_rate": 5.40216185427028e-06, "loss": 0.3513, "step": 9669 }, { "epoch": 0.479686492385535, "grad_norm": 4.937611103057861, "learning_rate": 5.4013773090020475e-06, "loss": 0.2764, "step": 9670 }, { "epoch": 0.47973609802073514, "grad_norm": 5.936680793762207, "learning_rate": 5.400592753787505e-06, "loss": 0.2524, "step": 9671 }, { "epoch": 0.4797857036559353, "grad_norm": 6.779977798461914, "learning_rate": 5.399808188646092e-06, "loss": 0.3103, "step": 9672 }, { "epoch": 0.4798353092911355, "grad_norm": 8.678297996520996, "learning_rate": 5.399023613597251e-06, "loss": 0.3649, "step": 9673 }, { "epoch": 0.4798849149263356, "grad_norm": 9.724115371704102, "learning_rate": 5.398239028660425e-06, "loss": 0.3433, "step": 9674 }, { "epoch": 0.4799345205615358, "grad_norm": 4.0417022705078125, "learning_rate": 5.397454433855055e-06, "loss": 0.2219, "step": 9675 }, { "epoch": 0.47998412619673597, "grad_norm": 5.089617729187012, "learning_rate": 5.396669829200583e-06, "loss": 0.2366, "step": 9676 }, { "epoch": 0.4800337318319361, "grad_norm": 6.518103122711182, "learning_rate": 5.3958852147164554e-06, "loss": 0.3686, "step": 9677 }, { "epoch": 0.4800833374671363, "grad_norm": 14.065682411193848, "learning_rate": 5.395100590422113e-06, "loss": 0.4307, "step": 9678 }, { "epoch": 0.48013294310233645, "grad_norm": 6.500454902648926, "learning_rate": 5.394315956336999e-06, "loss": 0.2046, "step": 9679 }, { "epoch": 0.4801825487375366, "grad_norm": 7.121054172515869, "learning_rate": 5.393531312480557e-06, "loss": 0.2879, "step": 9680 }, { "epoch": 0.48023215437273675, "grad_norm": 5.340768337249756, "learning_rate": 5.392746658872231e-06, "loss": 0.2542, "step": 9681 }, { "epoch": 0.4802817600079369, "grad_norm": 11.763627052307129, "learning_rate": 5.391961995531465e-06, "loss": 0.2695, "step": 9682 }, { "epoch": 0.48033136564313705, "grad_norm": 15.055892944335938, "learning_rate": 5.391177322477703e-06, "loss": 0.456, "step": 9683 }, { "epoch": 0.48038097127833723, "grad_norm": 5.304052352905273, "learning_rate": 5.390392639730391e-06, "loss": 0.2618, "step": 9684 }, { "epoch": 0.48043057691353735, "grad_norm": 11.649343490600586, "learning_rate": 5.389607947308972e-06, "loss": 0.4652, "step": 9685 }, { "epoch": 0.48048018254873753, "grad_norm": 7.411330223083496, "learning_rate": 5.388823245232893e-06, "loss": 0.3533, "step": 9686 }, { "epoch": 0.4805297881839377, "grad_norm": 5.269749641418457, "learning_rate": 5.388038533521597e-06, "loss": 0.3788, "step": 9687 }, { "epoch": 0.48057939381913783, "grad_norm": 6.261430740356445, "learning_rate": 5.38725381219453e-06, "loss": 0.2371, "step": 9688 }, { "epoch": 0.480628999454338, "grad_norm": 6.155971050262451, "learning_rate": 5.3864690812711395e-06, "loss": 0.2797, "step": 9689 }, { "epoch": 0.4806786050895382, "grad_norm": 14.562604904174805, "learning_rate": 5.385684340770871e-06, "loss": 0.2328, "step": 9690 }, { "epoch": 0.4807282107247383, "grad_norm": 9.833511352539062, "learning_rate": 5.38489959071317e-06, "loss": 0.3056, "step": 9691 }, { "epoch": 0.4807778163599385, "grad_norm": 3.85983943939209, "learning_rate": 5.384114831117482e-06, "loss": 0.2494, "step": 9692 }, { "epoch": 0.48082742199513867, "grad_norm": 6.4443359375, "learning_rate": 5.383330062003253e-06, "loss": 0.2801, "step": 9693 }, { "epoch": 0.4808770276303388, "grad_norm": 7.673934459686279, "learning_rate": 5.382545283389935e-06, "loss": 0.2621, "step": 9694 }, { "epoch": 0.48092663326553897, "grad_norm": 3.2954399585723877, "learning_rate": 5.381760495296971e-06, "loss": 0.1631, "step": 9695 }, { "epoch": 0.48097623890073915, "grad_norm": 10.007936477661133, "learning_rate": 5.380975697743808e-06, "loss": 0.4135, "step": 9696 }, { "epoch": 0.4810258445359393, "grad_norm": 6.344486713409424, "learning_rate": 5.380190890749896e-06, "loss": 0.2066, "step": 9697 }, { "epoch": 0.48107545017113945, "grad_norm": 8.91202163696289, "learning_rate": 5.379406074334681e-06, "loss": 0.2137, "step": 9698 }, { "epoch": 0.4811250558063396, "grad_norm": 6.695014953613281, "learning_rate": 5.378621248517613e-06, "loss": 0.3911, "step": 9699 }, { "epoch": 0.48117466144153975, "grad_norm": 7.143740653991699, "learning_rate": 5.377836413318137e-06, "loss": 0.1661, "step": 9700 }, { "epoch": 0.48122426707673993, "grad_norm": 7.4358134269714355, "learning_rate": 5.377051568755707e-06, "loss": 0.3247, "step": 9701 }, { "epoch": 0.48127387271194005, "grad_norm": 11.364277839660645, "learning_rate": 5.376266714849766e-06, "loss": 0.2123, "step": 9702 }, { "epoch": 0.48132347834714023, "grad_norm": 5.345329761505127, "learning_rate": 5.375481851619766e-06, "loss": 0.3048, "step": 9703 }, { "epoch": 0.4813730839823404, "grad_norm": 6.991562843322754, "learning_rate": 5.374696979085155e-06, "loss": 0.3146, "step": 9704 }, { "epoch": 0.48142268961754053, "grad_norm": 3.9437036514282227, "learning_rate": 5.373912097265386e-06, "loss": 0.1515, "step": 9705 }, { "epoch": 0.4814722952527407, "grad_norm": 8.163289070129395, "learning_rate": 5.373127206179905e-06, "loss": 0.3123, "step": 9706 }, { "epoch": 0.4815219008879409, "grad_norm": 7.113397121429443, "learning_rate": 5.372342305848162e-06, "loss": 0.3584, "step": 9707 }, { "epoch": 0.481571506523141, "grad_norm": 5.3707098960876465, "learning_rate": 5.3715573962896094e-06, "loss": 0.3103, "step": 9708 }, { "epoch": 0.4816211121583412, "grad_norm": 9.778843879699707, "learning_rate": 5.370772477523695e-06, "loss": 0.2468, "step": 9709 }, { "epoch": 0.48167071779354137, "grad_norm": 10.565662384033203, "learning_rate": 5.369987549569873e-06, "loss": 0.3929, "step": 9710 }, { "epoch": 0.4817203234287415, "grad_norm": 10.985623359680176, "learning_rate": 5.369202612447592e-06, "loss": 0.4835, "step": 9711 }, { "epoch": 0.48176992906394167, "grad_norm": 6.617645740509033, "learning_rate": 5.368417666176301e-06, "loss": 0.3533, "step": 9712 }, { "epoch": 0.48181953469914185, "grad_norm": 5.040589332580566, "learning_rate": 5.367632710775456e-06, "loss": 0.2422, "step": 9713 }, { "epoch": 0.48186914033434197, "grad_norm": 8.990386962890625, "learning_rate": 5.366847746264504e-06, "loss": 0.3224, "step": 9714 }, { "epoch": 0.48191874596954215, "grad_norm": 16.100244522094727, "learning_rate": 5.3660627726629e-06, "loss": 0.3917, "step": 9715 }, { "epoch": 0.4819683516047423, "grad_norm": 4.478385925292969, "learning_rate": 5.365277789990095e-06, "loss": 0.2466, "step": 9716 }, { "epoch": 0.48201795723994245, "grad_norm": 6.556861877441406, "learning_rate": 5.36449279826554e-06, "loss": 0.2754, "step": 9717 }, { "epoch": 0.48206756287514263, "grad_norm": 8.790719985961914, "learning_rate": 5.36370779750869e-06, "loss": 0.3619, "step": 9718 }, { "epoch": 0.48211716851034275, "grad_norm": 5.7069597244262695, "learning_rate": 5.362922787738997e-06, "loss": 0.2825, "step": 9719 }, { "epoch": 0.48216677414554293, "grad_norm": 12.173112869262695, "learning_rate": 5.362137768975911e-06, "loss": 0.3373, "step": 9720 }, { "epoch": 0.4822163797807431, "grad_norm": 10.893575668334961, "learning_rate": 5.3613527412388875e-06, "loss": 0.4003, "step": 9721 }, { "epoch": 0.48226598541594323, "grad_norm": 5.760580062866211, "learning_rate": 5.360567704547381e-06, "loss": 0.3736, "step": 9722 }, { "epoch": 0.4823155910511434, "grad_norm": 12.616345405578613, "learning_rate": 5.359782658920844e-06, "loss": 0.4228, "step": 9723 }, { "epoch": 0.4823651966863436, "grad_norm": 5.19741678237915, "learning_rate": 5.358997604378729e-06, "loss": 0.25, "step": 9724 }, { "epoch": 0.4824148023215437, "grad_norm": 5.691134929656982, "learning_rate": 5.35821254094049e-06, "loss": 0.3289, "step": 9725 }, { "epoch": 0.4824644079567439, "grad_norm": 4.839534759521484, "learning_rate": 5.357427468625584e-06, "loss": 0.2352, "step": 9726 }, { "epoch": 0.48251401359194407, "grad_norm": 3.730569839477539, "learning_rate": 5.356642387453463e-06, "loss": 0.2012, "step": 9727 }, { "epoch": 0.4825636192271442, "grad_norm": 4.892361164093018, "learning_rate": 5.355857297443582e-06, "loss": 0.3163, "step": 9728 }, { "epoch": 0.48261322486234437, "grad_norm": 8.549574851989746, "learning_rate": 5.355072198615396e-06, "loss": 0.4038, "step": 9729 }, { "epoch": 0.48266283049754455, "grad_norm": 12.578008651733398, "learning_rate": 5.3542870909883605e-06, "loss": 0.457, "step": 9730 }, { "epoch": 0.48271243613274467, "grad_norm": 13.563281059265137, "learning_rate": 5.35350197458193e-06, "loss": 0.4214, "step": 9731 }, { "epoch": 0.48276204176794485, "grad_norm": 9.795700073242188, "learning_rate": 5.352716849415562e-06, "loss": 0.2197, "step": 9732 }, { "epoch": 0.482811647403145, "grad_norm": 7.421695232391357, "learning_rate": 5.3519317155087105e-06, "loss": 0.2667, "step": 9733 }, { "epoch": 0.48286125303834515, "grad_norm": 4.356932163238525, "learning_rate": 5.351146572880831e-06, "loss": 0.2313, "step": 9734 }, { "epoch": 0.48291085867354533, "grad_norm": 5.846103668212891, "learning_rate": 5.350361421551382e-06, "loss": 0.2977, "step": 9735 }, { "epoch": 0.48296046430874545, "grad_norm": 4.979421138763428, "learning_rate": 5.349576261539817e-06, "loss": 0.3199, "step": 9736 }, { "epoch": 0.48301006994394563, "grad_norm": 3.5288991928100586, "learning_rate": 5.3487910928655955e-06, "loss": 0.2315, "step": 9737 }, { "epoch": 0.4830596755791458, "grad_norm": 9.718048095703125, "learning_rate": 5.348005915548171e-06, "loss": 0.3304, "step": 9738 }, { "epoch": 0.48310928121434593, "grad_norm": 7.549717903137207, "learning_rate": 5.3472207296070045e-06, "loss": 0.1983, "step": 9739 }, { "epoch": 0.4831588868495461, "grad_norm": 10.6748046875, "learning_rate": 5.3464355350615505e-06, "loss": 0.2611, "step": 9740 }, { "epoch": 0.4832084924847463, "grad_norm": 8.623848915100098, "learning_rate": 5.345650331931267e-06, "loss": 0.3582, "step": 9741 }, { "epoch": 0.4832580981199464, "grad_norm": 4.874463081359863, "learning_rate": 5.344865120235613e-06, "loss": 0.2668, "step": 9742 }, { "epoch": 0.4833077037551466, "grad_norm": 7.674007892608643, "learning_rate": 5.344079899994043e-06, "loss": 0.2639, "step": 9743 }, { "epoch": 0.48335730939034677, "grad_norm": 5.27474308013916, "learning_rate": 5.3432946712260206e-06, "loss": 0.2437, "step": 9744 }, { "epoch": 0.4834069150255469, "grad_norm": 12.753223419189453, "learning_rate": 5.3425094339509985e-06, "loss": 0.4699, "step": 9745 }, { "epoch": 0.48345652066074707, "grad_norm": 9.400514602661133, "learning_rate": 5.341724188188439e-06, "loss": 0.2495, "step": 9746 }, { "epoch": 0.48350612629594725, "grad_norm": 12.121274948120117, "learning_rate": 5.340938933957797e-06, "loss": 0.3759, "step": 9747 }, { "epoch": 0.48355573193114737, "grad_norm": 4.613625526428223, "learning_rate": 5.340153671278537e-06, "loss": 0.2891, "step": 9748 }, { "epoch": 0.48360533756634755, "grad_norm": 8.196296691894531, "learning_rate": 5.339368400170115e-06, "loss": 0.2522, "step": 9749 }, { "epoch": 0.48365494320154767, "grad_norm": 6.851958751678467, "learning_rate": 5.338583120651989e-06, "loss": 0.3107, "step": 9750 }, { "epoch": 0.48370454883674785, "grad_norm": 9.180004119873047, "learning_rate": 5.337797832743621e-06, "loss": 0.353, "step": 9751 }, { "epoch": 0.48375415447194803, "grad_norm": 8.088887214660645, "learning_rate": 5.337012536464471e-06, "loss": 0.4222, "step": 9752 }, { "epoch": 0.48380376010714815, "grad_norm": 10.658079147338867, "learning_rate": 5.3362272318339955e-06, "loss": 0.3263, "step": 9753 }, { "epoch": 0.48385336574234833, "grad_norm": 7.1572651863098145, "learning_rate": 5.3354419188716575e-06, "loss": 0.2634, "step": 9754 }, { "epoch": 0.4839029713775485, "grad_norm": 10.498228073120117, "learning_rate": 5.334656597596919e-06, "loss": 0.4343, "step": 9755 }, { "epoch": 0.48395257701274863, "grad_norm": 7.823907375335693, "learning_rate": 5.333871268029237e-06, "loss": 0.2816, "step": 9756 }, { "epoch": 0.4840021826479488, "grad_norm": 3.7002298831939697, "learning_rate": 5.333085930188075e-06, "loss": 0.2458, "step": 9757 }, { "epoch": 0.484051788283149, "grad_norm": 4.386672019958496, "learning_rate": 5.332300584092891e-06, "loss": 0.3439, "step": 9758 }, { "epoch": 0.4841013939183491, "grad_norm": 7.267522811889648, "learning_rate": 5.331515229763151e-06, "loss": 0.2344, "step": 9759 }, { "epoch": 0.4841509995535493, "grad_norm": 6.700265407562256, "learning_rate": 5.330729867218311e-06, "loss": 0.289, "step": 9760 }, { "epoch": 0.48420060518874947, "grad_norm": 9.900642395019531, "learning_rate": 5.329944496477837e-06, "loss": 0.263, "step": 9761 }, { "epoch": 0.4842502108239496, "grad_norm": 8.119362831115723, "learning_rate": 5.329159117561189e-06, "loss": 0.3187, "step": 9762 }, { "epoch": 0.48429981645914977, "grad_norm": 6.080886363983154, "learning_rate": 5.328373730487829e-06, "loss": 0.2264, "step": 9763 }, { "epoch": 0.4843494220943499, "grad_norm": 4.76543664932251, "learning_rate": 5.327588335277219e-06, "loss": 0.2297, "step": 9764 }, { "epoch": 0.48439902772955007, "grad_norm": 9.987161636352539, "learning_rate": 5.326802931948823e-06, "loss": 0.3023, "step": 9765 }, { "epoch": 0.48444863336475025, "grad_norm": 7.525223255157471, "learning_rate": 5.326017520522101e-06, "loss": 0.2433, "step": 9766 }, { "epoch": 0.48449823899995037, "grad_norm": 6.622978687286377, "learning_rate": 5.325232101016518e-06, "loss": 0.2437, "step": 9767 }, { "epoch": 0.48454784463515055, "grad_norm": 6.843924522399902, "learning_rate": 5.324446673451535e-06, "loss": 0.3239, "step": 9768 }, { "epoch": 0.4845974502703507, "grad_norm": 15.999227523803711, "learning_rate": 5.323661237846619e-06, "loss": 0.4332, "step": 9769 }, { "epoch": 0.48464705590555085, "grad_norm": 7.977175235748291, "learning_rate": 5.322875794221229e-06, "loss": 0.2642, "step": 9770 }, { "epoch": 0.48469666154075103, "grad_norm": 7.271999359130859, "learning_rate": 5.3220903425948324e-06, "loss": 0.2002, "step": 9771 }, { "epoch": 0.4847462671759512, "grad_norm": 5.82863712310791, "learning_rate": 5.3213048829868915e-06, "loss": 0.3198, "step": 9772 }, { "epoch": 0.48479587281115133, "grad_norm": 6.330735683441162, "learning_rate": 5.32051941541687e-06, "loss": 0.2892, "step": 9773 }, { "epoch": 0.4848454784463515, "grad_norm": 9.447638511657715, "learning_rate": 5.319733939904231e-06, "loss": 0.3007, "step": 9774 }, { "epoch": 0.4848950840815517, "grad_norm": 11.344344139099121, "learning_rate": 5.318948456468441e-06, "loss": 0.2046, "step": 9775 }, { "epoch": 0.4849446897167518, "grad_norm": 4.340096473693848, "learning_rate": 5.318162965128965e-06, "loss": 0.2736, "step": 9776 }, { "epoch": 0.484994295351952, "grad_norm": 6.683979034423828, "learning_rate": 5.3173774659052665e-06, "loss": 0.3808, "step": 9777 }, { "epoch": 0.48504390098715217, "grad_norm": 19.181312561035156, "learning_rate": 5.31659195881681e-06, "loss": 0.4846, "step": 9778 }, { "epoch": 0.4850935066223523, "grad_norm": 3.7914044857025146, "learning_rate": 5.315806443883064e-06, "loss": 0.195, "step": 9779 }, { "epoch": 0.48514311225755247, "grad_norm": 7.136967658996582, "learning_rate": 5.315020921123488e-06, "loss": 0.3365, "step": 9780 }, { "epoch": 0.4851927178927526, "grad_norm": 4.392636299133301, "learning_rate": 5.314235390557552e-06, "loss": 0.2232, "step": 9781 }, { "epoch": 0.48524232352795277, "grad_norm": 16.11343765258789, "learning_rate": 5.313449852204723e-06, "loss": 0.5599, "step": 9782 }, { "epoch": 0.48529192916315295, "grad_norm": 7.648800849914551, "learning_rate": 5.312664306084463e-06, "loss": 0.1844, "step": 9783 }, { "epoch": 0.48534153479835307, "grad_norm": 7.953958988189697, "learning_rate": 5.311878752216241e-06, "loss": 0.3393, "step": 9784 }, { "epoch": 0.48539114043355325, "grad_norm": 10.23384952545166, "learning_rate": 5.311093190619521e-06, "loss": 0.2955, "step": 9785 }, { "epoch": 0.4854407460687534, "grad_norm": 7.870073318481445, "learning_rate": 5.3103076213137726e-06, "loss": 0.2979, "step": 9786 }, { "epoch": 0.48549035170395355, "grad_norm": 5.988476276397705, "learning_rate": 5.30952204431846e-06, "loss": 0.3087, "step": 9787 }, { "epoch": 0.4855399573391537, "grad_norm": 4.520995140075684, "learning_rate": 5.308736459653051e-06, "loss": 0.2591, "step": 9788 }, { "epoch": 0.4855895629743539, "grad_norm": 4.939100742340088, "learning_rate": 5.307950867337014e-06, "loss": 0.2561, "step": 9789 }, { "epoch": 0.48563916860955403, "grad_norm": 14.803253173828125, "learning_rate": 5.307165267389815e-06, "loss": 0.3753, "step": 9790 }, { "epoch": 0.4856887742447542, "grad_norm": 10.692473411560059, "learning_rate": 5.30637965983092e-06, "loss": 0.4074, "step": 9791 }, { "epoch": 0.4857383798799544, "grad_norm": 6.2906999588012695, "learning_rate": 5.3055940446797995e-06, "loss": 0.3022, "step": 9792 }, { "epoch": 0.4857879855151545, "grad_norm": 5.592400550842285, "learning_rate": 5.304808421955921e-06, "loss": 0.3074, "step": 9793 }, { "epoch": 0.4858375911503547, "grad_norm": 5.389010429382324, "learning_rate": 5.304022791678751e-06, "loss": 0.2289, "step": 9794 }, { "epoch": 0.48588719678555486, "grad_norm": 5.950264930725098, "learning_rate": 5.30323715386776e-06, "loss": 0.2387, "step": 9795 }, { "epoch": 0.485936802420755, "grad_norm": 6.768879413604736, "learning_rate": 5.302451508542413e-06, "loss": 0.278, "step": 9796 }, { "epoch": 0.48598640805595517, "grad_norm": 7.062521457672119, "learning_rate": 5.301665855722182e-06, "loss": 0.3647, "step": 9797 }, { "epoch": 0.4860360136911553, "grad_norm": 5.162258625030518, "learning_rate": 5.300880195426535e-06, "loss": 0.2415, "step": 9798 }, { "epoch": 0.48608561932635547, "grad_norm": 12.345131874084473, "learning_rate": 5.300094527674941e-06, "loss": 0.3541, "step": 9799 }, { "epoch": 0.48613522496155565, "grad_norm": 7.308206558227539, "learning_rate": 5.2993088524868675e-06, "loss": 0.2521, "step": 9800 }, { "epoch": 0.48618483059675577, "grad_norm": 12.558944702148438, "learning_rate": 5.298523169881785e-06, "loss": 0.3154, "step": 9801 }, { "epoch": 0.48623443623195595, "grad_norm": 4.569591045379639, "learning_rate": 5.2977374798791645e-06, "loss": 0.1928, "step": 9802 }, { "epoch": 0.4862840418671561, "grad_norm": 7.595170974731445, "learning_rate": 5.296951782498475e-06, "loss": 0.3337, "step": 9803 }, { "epoch": 0.48633364750235625, "grad_norm": 7.241195201873779, "learning_rate": 5.296166077759185e-06, "loss": 0.2729, "step": 9804 }, { "epoch": 0.4863832531375564, "grad_norm": 7.139465808868408, "learning_rate": 5.2953803656807654e-06, "loss": 0.2977, "step": 9805 }, { "epoch": 0.4864328587727566, "grad_norm": 5.023504734039307, "learning_rate": 5.294594646282688e-06, "loss": 0.2826, "step": 9806 }, { "epoch": 0.4864824644079567, "grad_norm": 5.270866394042969, "learning_rate": 5.293808919584421e-06, "loss": 0.2808, "step": 9807 }, { "epoch": 0.4865320700431569, "grad_norm": 9.19725513458252, "learning_rate": 5.293023185605436e-06, "loss": 0.25, "step": 9808 }, { "epoch": 0.4865816756783571, "grad_norm": 6.088191509246826, "learning_rate": 5.292237444365206e-06, "loss": 0.3508, "step": 9809 }, { "epoch": 0.4866312813135572, "grad_norm": 12.216445922851562, "learning_rate": 5.291451695883198e-06, "loss": 0.3935, "step": 9810 }, { "epoch": 0.4866808869487574, "grad_norm": 7.219388008117676, "learning_rate": 5.290665940178884e-06, "loss": 0.2981, "step": 9811 }, { "epoch": 0.48673049258395756, "grad_norm": 6.733118534088135, "learning_rate": 5.289880177271738e-06, "loss": 0.3681, "step": 9812 }, { "epoch": 0.4867800982191577, "grad_norm": 6.955750942230225, "learning_rate": 5.28909440718123e-06, "loss": 0.2402, "step": 9813 }, { "epoch": 0.48682970385435786, "grad_norm": 6.973575115203857, "learning_rate": 5.288308629926832e-06, "loss": 0.2757, "step": 9814 }, { "epoch": 0.486879309489558, "grad_norm": 7.019506454467773, "learning_rate": 5.2875228455280145e-06, "loss": 0.3875, "step": 9815 }, { "epoch": 0.48692891512475817, "grad_norm": 11.68516731262207, "learning_rate": 5.286737054004253e-06, "loss": 0.5173, "step": 9816 }, { "epoch": 0.48697852075995834, "grad_norm": 5.894135475158691, "learning_rate": 5.285951255375017e-06, "loss": 0.247, "step": 9817 }, { "epoch": 0.48702812639515847, "grad_norm": 5.585081100463867, "learning_rate": 5.285165449659778e-06, "loss": 0.2857, "step": 9818 }, { "epoch": 0.48707773203035865, "grad_norm": 13.812097549438477, "learning_rate": 5.284379636878012e-06, "loss": 0.5062, "step": 9819 }, { "epoch": 0.4871273376655588, "grad_norm": 11.556662559509277, "learning_rate": 5.2835938170491885e-06, "loss": 0.4424, "step": 9820 }, { "epoch": 0.48717694330075895, "grad_norm": 14.598160743713379, "learning_rate": 5.282807990192782e-06, "loss": 0.2726, "step": 9821 }, { "epoch": 0.4872265489359591, "grad_norm": 6.207862377166748, "learning_rate": 5.282022156328266e-06, "loss": 0.3138, "step": 9822 }, { "epoch": 0.4872761545711593, "grad_norm": 9.21894645690918, "learning_rate": 5.281236315475114e-06, "loss": 0.3111, "step": 9823 }, { "epoch": 0.4873257602063594, "grad_norm": 7.733770847320557, "learning_rate": 5.280450467652799e-06, "loss": 0.3927, "step": 9824 }, { "epoch": 0.4873753658415596, "grad_norm": 8.217011451721191, "learning_rate": 5.279664612880793e-06, "loss": 0.3531, "step": 9825 }, { "epoch": 0.4874249714767598, "grad_norm": 8.713177680969238, "learning_rate": 5.278878751178573e-06, "loss": 0.3151, "step": 9826 }, { "epoch": 0.4874745771119599, "grad_norm": 7.562319278717041, "learning_rate": 5.278092882565611e-06, "loss": 0.3788, "step": 9827 }, { "epoch": 0.4875241827471601, "grad_norm": 7.455821990966797, "learning_rate": 5.277307007061381e-06, "loss": 0.2716, "step": 9828 }, { "epoch": 0.48757378838236026, "grad_norm": 4.996312618255615, "learning_rate": 5.276521124685357e-06, "loss": 0.2862, "step": 9829 }, { "epoch": 0.4876233940175604, "grad_norm": 3.933717966079712, "learning_rate": 5.275735235457017e-06, "loss": 0.1848, "step": 9830 }, { "epoch": 0.48767299965276056, "grad_norm": 8.029291152954102, "learning_rate": 5.274949339395832e-06, "loss": 0.3999, "step": 9831 }, { "epoch": 0.4877226052879607, "grad_norm": 8.128795623779297, "learning_rate": 5.274163436521277e-06, "loss": 0.2581, "step": 9832 }, { "epoch": 0.48777221092316086, "grad_norm": 5.60612154006958, "learning_rate": 5.273377526852829e-06, "loss": 0.2185, "step": 9833 }, { "epoch": 0.48782181655836104, "grad_norm": 6.405293941497803, "learning_rate": 5.272591610409961e-06, "loss": 0.2354, "step": 9834 }, { "epoch": 0.48787142219356117, "grad_norm": 15.113642692565918, "learning_rate": 5.271805687212151e-06, "loss": 0.4047, "step": 9835 }, { "epoch": 0.48792102782876134, "grad_norm": 16.178937911987305, "learning_rate": 5.271019757278873e-06, "loss": 0.3717, "step": 9836 }, { "epoch": 0.4879706334639615, "grad_norm": 8.9334716796875, "learning_rate": 5.270233820629603e-06, "loss": 0.3159, "step": 9837 }, { "epoch": 0.48802023909916165, "grad_norm": 7.697169780731201, "learning_rate": 5.269447877283817e-06, "loss": 0.3241, "step": 9838 }, { "epoch": 0.4880698447343618, "grad_norm": 9.38213062286377, "learning_rate": 5.268661927260988e-06, "loss": 0.4202, "step": 9839 }, { "epoch": 0.488119450369562, "grad_norm": 12.815366744995117, "learning_rate": 5.267875970580599e-06, "loss": 0.518, "step": 9840 }, { "epoch": 0.4881690560047621, "grad_norm": 5.795200824737549, "learning_rate": 5.267090007262119e-06, "loss": 0.3043, "step": 9841 }, { "epoch": 0.4882186616399623, "grad_norm": 7.736761093139648, "learning_rate": 5.266304037325029e-06, "loss": 0.3506, "step": 9842 }, { "epoch": 0.4882682672751625, "grad_norm": 4.77734899520874, "learning_rate": 5.265518060788806e-06, "loss": 0.2008, "step": 9843 }, { "epoch": 0.4883178729103626, "grad_norm": 5.75441837310791, "learning_rate": 5.264732077672923e-06, "loss": 0.3594, "step": 9844 }, { "epoch": 0.4883674785455628, "grad_norm": 4.593273162841797, "learning_rate": 5.263946087996859e-06, "loss": 0.2632, "step": 9845 }, { "epoch": 0.48841708418076296, "grad_norm": 7.643134117126465, "learning_rate": 5.263160091780093e-06, "loss": 0.2429, "step": 9846 }, { "epoch": 0.4884666898159631, "grad_norm": 12.274948120117188, "learning_rate": 5.262374089042102e-06, "loss": 0.3906, "step": 9847 }, { "epoch": 0.48851629545116326, "grad_norm": 7.800933837890625, "learning_rate": 5.26158807980236e-06, "loss": 0.3156, "step": 9848 }, { "epoch": 0.4885659010863634, "grad_norm": 3.919342041015625, "learning_rate": 5.260802064080348e-06, "loss": 0.1948, "step": 9849 }, { "epoch": 0.48861550672156356, "grad_norm": 6.061631679534912, "learning_rate": 5.260016041895542e-06, "loss": 0.26, "step": 9850 }, { "epoch": 0.48866511235676374, "grad_norm": 5.999487400054932, "learning_rate": 5.259230013267421e-06, "loss": 0.2864, "step": 9851 }, { "epoch": 0.48871471799196387, "grad_norm": 5.963387489318848, "learning_rate": 5.258443978215465e-06, "loss": 0.2849, "step": 9852 }, { "epoch": 0.48876432362716404, "grad_norm": 5.540299892425537, "learning_rate": 5.2576579367591486e-06, "loss": 0.2606, "step": 9853 }, { "epoch": 0.4888139292623642, "grad_norm": 10.293599128723145, "learning_rate": 5.256871888917953e-06, "loss": 0.3065, "step": 9854 }, { "epoch": 0.48886353489756434, "grad_norm": 5.000648498535156, "learning_rate": 5.256085834711354e-06, "loss": 0.1796, "step": 9855 }, { "epoch": 0.4889131405327645, "grad_norm": 5.496933460235596, "learning_rate": 5.2552997741588345e-06, "loss": 0.2275, "step": 9856 }, { "epoch": 0.4889627461679647, "grad_norm": 7.546912670135498, "learning_rate": 5.25451370727987e-06, "loss": 0.2206, "step": 9857 }, { "epoch": 0.4890123518031648, "grad_norm": 5.141834259033203, "learning_rate": 5.2537276340939405e-06, "loss": 0.2274, "step": 9858 }, { "epoch": 0.489061957438365, "grad_norm": 11.77934741973877, "learning_rate": 5.252941554620525e-06, "loss": 0.3662, "step": 9859 }, { "epoch": 0.4891115630735652, "grad_norm": 4.3122148513793945, "learning_rate": 5.252155468879105e-06, "loss": 0.2776, "step": 9860 }, { "epoch": 0.4891611687087653, "grad_norm": 11.972551345825195, "learning_rate": 5.251369376889157e-06, "loss": 0.3734, "step": 9861 }, { "epoch": 0.4892107743439655, "grad_norm": 4.449801445007324, "learning_rate": 5.250583278670163e-06, "loss": 0.2233, "step": 9862 }, { "epoch": 0.48926037997916566, "grad_norm": 9.044787406921387, "learning_rate": 5.249797174241602e-06, "loss": 0.3529, "step": 9863 }, { "epoch": 0.4893099856143658, "grad_norm": 5.437643051147461, "learning_rate": 5.249011063622953e-06, "loss": 0.2984, "step": 9864 }, { "epoch": 0.48935959124956596, "grad_norm": 6.052347183227539, "learning_rate": 5.248224946833698e-06, "loss": 0.3296, "step": 9865 }, { "epoch": 0.4894091968847661, "grad_norm": 5.398989200592041, "learning_rate": 5.247438823893316e-06, "loss": 0.2474, "step": 9866 }, { "epoch": 0.48945880251996626, "grad_norm": 8.963708877563477, "learning_rate": 5.246652694821289e-06, "loss": 0.3142, "step": 9867 }, { "epoch": 0.48950840815516644, "grad_norm": 13.41919231414795, "learning_rate": 5.245866559637096e-06, "loss": 0.3907, "step": 9868 }, { "epoch": 0.48955801379036656, "grad_norm": 7.157254695892334, "learning_rate": 5.245080418360218e-06, "loss": 0.2597, "step": 9869 }, { "epoch": 0.48960761942556674, "grad_norm": 9.58812141418457, "learning_rate": 5.244294271010137e-06, "loss": 0.3826, "step": 9870 }, { "epoch": 0.4896572250607669, "grad_norm": 5.7512526512146, "learning_rate": 5.243508117606334e-06, "loss": 0.3792, "step": 9871 }, { "epoch": 0.48970683069596704, "grad_norm": 4.8651933670043945, "learning_rate": 5.2427219581682885e-06, "loss": 0.286, "step": 9872 }, { "epoch": 0.4897564363311672, "grad_norm": 6.450568199157715, "learning_rate": 5.241935792715484e-06, "loss": 0.2152, "step": 9873 }, { "epoch": 0.4898060419663674, "grad_norm": 3.6908910274505615, "learning_rate": 5.241149621267401e-06, "loss": 0.244, "step": 9874 }, { "epoch": 0.4898556476015675, "grad_norm": 6.347245693206787, "learning_rate": 5.240363443843521e-06, "loss": 0.3063, "step": 9875 }, { "epoch": 0.4899052532367677, "grad_norm": 14.878560066223145, "learning_rate": 5.239577260463325e-06, "loss": 0.3726, "step": 9876 }, { "epoch": 0.4899548588719679, "grad_norm": 21.676862716674805, "learning_rate": 5.238791071146299e-06, "loss": 0.4165, "step": 9877 }, { "epoch": 0.490004464507168, "grad_norm": 5.762522220611572, "learning_rate": 5.2380048759119196e-06, "loss": 0.2947, "step": 9878 }, { "epoch": 0.4900540701423682, "grad_norm": 6.39026403427124, "learning_rate": 5.237218674779672e-06, "loss": 0.2823, "step": 9879 }, { "epoch": 0.49010367577756836, "grad_norm": 4.227600574493408, "learning_rate": 5.2364324677690395e-06, "loss": 0.2536, "step": 9880 }, { "epoch": 0.4901532814127685, "grad_norm": 5.438297748565674, "learning_rate": 5.235646254899502e-06, "loss": 0.3226, "step": 9881 }, { "epoch": 0.49020288704796866, "grad_norm": 6.146712303161621, "learning_rate": 5.234860036190546e-06, "loss": 0.2991, "step": 9882 }, { "epoch": 0.4902524926831688, "grad_norm": 8.422111511230469, "learning_rate": 5.23407381166165e-06, "loss": 0.2046, "step": 9883 }, { "epoch": 0.49030209831836896, "grad_norm": 4.85667085647583, "learning_rate": 5.2332875813323004e-06, "loss": 0.2902, "step": 9884 }, { "epoch": 0.49035170395356914, "grad_norm": 6.133353233337402, "learning_rate": 5.232501345221977e-06, "loss": 0.2989, "step": 9885 }, { "epoch": 0.49040130958876926, "grad_norm": 4.667693138122559, "learning_rate": 5.2317151033501665e-06, "loss": 0.2948, "step": 9886 }, { "epoch": 0.49045091522396944, "grad_norm": 4.735659122467041, "learning_rate": 5.230928855736352e-06, "loss": 0.25, "step": 9887 }, { "epoch": 0.4905005208591696, "grad_norm": 13.959145545959473, "learning_rate": 5.230142602400015e-06, "loss": 0.3073, "step": 9888 }, { "epoch": 0.49055012649436974, "grad_norm": 5.9273786544799805, "learning_rate": 5.2293563433606395e-06, "loss": 0.2928, "step": 9889 }, { "epoch": 0.4905997321295699, "grad_norm": 5.3704447746276855, "learning_rate": 5.228570078637711e-06, "loss": 0.4129, "step": 9890 }, { "epoch": 0.4906493377647701, "grad_norm": 10.031989097595215, "learning_rate": 5.227783808250712e-06, "loss": 0.3983, "step": 9891 }, { "epoch": 0.4906989433999702, "grad_norm": 5.779898166656494, "learning_rate": 5.226997532219128e-06, "loss": 0.2795, "step": 9892 }, { "epoch": 0.4907485490351704, "grad_norm": 10.429482460021973, "learning_rate": 5.226211250562441e-06, "loss": 0.5063, "step": 9893 }, { "epoch": 0.4907981546703706, "grad_norm": 4.896081924438477, "learning_rate": 5.2254249633001385e-06, "loss": 0.3429, "step": 9894 }, { "epoch": 0.4908477603055707, "grad_norm": 6.207757472991943, "learning_rate": 5.224638670451703e-06, "loss": 0.2602, "step": 9895 }, { "epoch": 0.4908973659407709, "grad_norm": 6.418350696563721, "learning_rate": 5.223852372036619e-06, "loss": 0.194, "step": 9896 }, { "epoch": 0.490946971575971, "grad_norm": 7.398200988769531, "learning_rate": 5.223066068074373e-06, "loss": 0.2827, "step": 9897 }, { "epoch": 0.4909965772111712, "grad_norm": 4.757523059844971, "learning_rate": 5.222279758584447e-06, "loss": 0.2304, "step": 9898 }, { "epoch": 0.49104618284637136, "grad_norm": 6.49528169631958, "learning_rate": 5.22149344358633e-06, "loss": 0.2655, "step": 9899 }, { "epoch": 0.4910957884815715, "grad_norm": 7.384490966796875, "learning_rate": 5.220707123099505e-06, "loss": 0.3671, "step": 9900 }, { "epoch": 0.49114539411677166, "grad_norm": 10.868736267089844, "learning_rate": 5.219920797143457e-06, "loss": 0.3963, "step": 9901 }, { "epoch": 0.49119499975197184, "grad_norm": 9.800521850585938, "learning_rate": 5.219134465737672e-06, "loss": 0.3954, "step": 9902 }, { "epoch": 0.49124460538717196, "grad_norm": 6.709048271179199, "learning_rate": 5.218348128901636e-06, "loss": 0.2957, "step": 9903 }, { "epoch": 0.49129421102237214, "grad_norm": 7.886453151702881, "learning_rate": 5.217561786654834e-06, "loss": 0.3496, "step": 9904 }, { "epoch": 0.4913438166575723, "grad_norm": 10.909893035888672, "learning_rate": 5.2167754390167544e-06, "loss": 0.2591, "step": 9905 }, { "epoch": 0.49139342229277244, "grad_norm": 18.365478515625, "learning_rate": 5.21598908600688e-06, "loss": 0.4869, "step": 9906 }, { "epoch": 0.4914430279279726, "grad_norm": 5.708981990814209, "learning_rate": 5.215202727644698e-06, "loss": 0.3357, "step": 9907 }, { "epoch": 0.4914926335631728, "grad_norm": 9.774896621704102, "learning_rate": 5.214416363949695e-06, "loss": 0.3071, "step": 9908 }, { "epoch": 0.4915422391983729, "grad_norm": 10.089506149291992, "learning_rate": 5.213629994941358e-06, "loss": 0.3472, "step": 9909 }, { "epoch": 0.4915918448335731, "grad_norm": 6.348788738250732, "learning_rate": 5.212843620639174e-06, "loss": 0.2221, "step": 9910 }, { "epoch": 0.4916414504687733, "grad_norm": 8.257039070129395, "learning_rate": 5.212057241062628e-06, "loss": 0.3445, "step": 9911 }, { "epoch": 0.4916910561039734, "grad_norm": 7.405411243438721, "learning_rate": 5.2112708562312075e-06, "loss": 0.2973, "step": 9912 }, { "epoch": 0.4917406617391736, "grad_norm": 7.43672513961792, "learning_rate": 5.2104844661643995e-06, "loss": 0.3313, "step": 9913 }, { "epoch": 0.4917902673743737, "grad_norm": 7.916833877563477, "learning_rate": 5.209698070881692e-06, "loss": 0.2807, "step": 9914 }, { "epoch": 0.4918398730095739, "grad_norm": 5.775954246520996, "learning_rate": 5.2089116704025715e-06, "loss": 0.3334, "step": 9915 }, { "epoch": 0.49188947864477406, "grad_norm": 5.1548171043396, "learning_rate": 5.208125264746524e-06, "loss": 0.2707, "step": 9916 }, { "epoch": 0.4919390842799742, "grad_norm": 9.240477561950684, "learning_rate": 5.207338853933039e-06, "loss": 0.4432, "step": 9917 }, { "epoch": 0.49198868991517436, "grad_norm": 5.406302452087402, "learning_rate": 5.206552437981603e-06, "loss": 0.382, "step": 9918 }, { "epoch": 0.49203829555037454, "grad_norm": 4.5282979011535645, "learning_rate": 5.205766016911705e-06, "loss": 0.244, "step": 9919 }, { "epoch": 0.49208790118557466, "grad_norm": 9.885600090026855, "learning_rate": 5.204979590742831e-06, "loss": 0.3223, "step": 9920 }, { "epoch": 0.49213750682077484, "grad_norm": 6.728142738342285, "learning_rate": 5.204193159494472e-06, "loss": 0.2656, "step": 9921 }, { "epoch": 0.492187112455975, "grad_norm": 4.9963250160217285, "learning_rate": 5.2034067231861126e-06, "loss": 0.2551, "step": 9922 }, { "epoch": 0.49223671809117514, "grad_norm": 7.640858173370361, "learning_rate": 5.202620281837243e-06, "loss": 0.2694, "step": 9923 }, { "epoch": 0.4922863237263753, "grad_norm": 5.05973482131958, "learning_rate": 5.201833835467351e-06, "loss": 0.2795, "step": 9924 }, { "epoch": 0.4923359293615755, "grad_norm": 6.763050079345703, "learning_rate": 5.201047384095925e-06, "loss": 0.2941, "step": 9925 }, { "epoch": 0.4923855349967756, "grad_norm": 5.341095924377441, "learning_rate": 5.200260927742454e-06, "loss": 0.2666, "step": 9926 }, { "epoch": 0.4924351406319758, "grad_norm": 4.916743278503418, "learning_rate": 5.199474466426426e-06, "loss": 0.2407, "step": 9927 }, { "epoch": 0.492484746267176, "grad_norm": 5.1827168464660645, "learning_rate": 5.198688000167333e-06, "loss": 0.2666, "step": 9928 }, { "epoch": 0.4925343519023761, "grad_norm": 7.123518943786621, "learning_rate": 5.197901528984659e-06, "loss": 0.3016, "step": 9929 }, { "epoch": 0.4925839575375763, "grad_norm": 4.973653316497803, "learning_rate": 5.197115052897895e-06, "loss": 0.2413, "step": 9930 }, { "epoch": 0.4926335631727764, "grad_norm": 5.428023815155029, "learning_rate": 5.196328571926533e-06, "loss": 0.327, "step": 9931 }, { "epoch": 0.4926831688079766, "grad_norm": 5.9196248054504395, "learning_rate": 5.19554208609006e-06, "loss": 0.3009, "step": 9932 }, { "epoch": 0.49273277444317676, "grad_norm": 7.5355753898620605, "learning_rate": 5.194755595407964e-06, "loss": 0.3032, "step": 9933 }, { "epoch": 0.4927823800783769, "grad_norm": 7.586380481719971, "learning_rate": 5.193969099899738e-06, "loss": 0.2825, "step": 9934 }, { "epoch": 0.49283198571357706, "grad_norm": 11.911052703857422, "learning_rate": 5.193182599584868e-06, "loss": 0.3588, "step": 9935 }, { "epoch": 0.49288159134877724, "grad_norm": 9.773468971252441, "learning_rate": 5.192396094482848e-06, "loss": 0.3098, "step": 9936 }, { "epoch": 0.49293119698397736, "grad_norm": 5.382143974304199, "learning_rate": 5.191609584613164e-06, "loss": 0.2569, "step": 9937 }, { "epoch": 0.49298080261917754, "grad_norm": 6.764337062835693, "learning_rate": 5.190823069995309e-06, "loss": 0.2636, "step": 9938 }, { "epoch": 0.4930304082543777, "grad_norm": 7.316099643707275, "learning_rate": 5.1900365506487714e-06, "loss": 0.2526, "step": 9939 }, { "epoch": 0.49308001388957784, "grad_norm": 5.397401332855225, "learning_rate": 5.189250026593043e-06, "loss": 0.2214, "step": 9940 }, { "epoch": 0.493129619524778, "grad_norm": 6.65148401260376, "learning_rate": 5.188463497847612e-06, "loss": 0.3336, "step": 9941 }, { "epoch": 0.4931792251599782, "grad_norm": 6.124547004699707, "learning_rate": 5.187676964431972e-06, "loss": 0.2493, "step": 9942 }, { "epoch": 0.4932288307951783, "grad_norm": 11.226873397827148, "learning_rate": 5.186890426365609e-06, "loss": 0.4003, "step": 9943 }, { "epoch": 0.4932784364303785, "grad_norm": 8.824226379394531, "learning_rate": 5.1861038836680185e-06, "loss": 0.2989, "step": 9944 }, { "epoch": 0.4933280420655787, "grad_norm": 14.023744583129883, "learning_rate": 5.185317336358691e-06, "loss": 0.5326, "step": 9945 }, { "epoch": 0.4933776477007788, "grad_norm": 7.977948188781738, "learning_rate": 5.184530784457113e-06, "loss": 0.2199, "step": 9946 }, { "epoch": 0.493427253335979, "grad_norm": 8.28648567199707, "learning_rate": 5.183744227982781e-06, "loss": 0.2158, "step": 9947 }, { "epoch": 0.4934768589711791, "grad_norm": 5.242259502410889, "learning_rate": 5.182957666955184e-06, "loss": 0.2601, "step": 9948 }, { "epoch": 0.4935264646063793, "grad_norm": 9.764296531677246, "learning_rate": 5.182171101393811e-06, "loss": 0.408, "step": 9949 }, { "epoch": 0.49357607024157946, "grad_norm": 9.483511924743652, "learning_rate": 5.181384531318157e-06, "loss": 0.3415, "step": 9950 }, { "epoch": 0.4936256758767796, "grad_norm": 8.689580917358398, "learning_rate": 5.180597956747714e-06, "loss": 0.3319, "step": 9951 }, { "epoch": 0.49367528151197976, "grad_norm": 11.979104995727539, "learning_rate": 5.17981137770197e-06, "loss": 0.5167, "step": 9952 }, { "epoch": 0.49372488714717994, "grad_norm": 6.082956790924072, "learning_rate": 5.179024794200419e-06, "loss": 0.2687, "step": 9953 }, { "epoch": 0.49377449278238006, "grad_norm": 4.99788761138916, "learning_rate": 5.178238206262553e-06, "loss": 0.2719, "step": 9954 }, { "epoch": 0.49382409841758024, "grad_norm": 6.2609734535217285, "learning_rate": 5.177451613907863e-06, "loss": 0.2685, "step": 9955 }, { "epoch": 0.4938737040527804, "grad_norm": 6.478408336639404, "learning_rate": 5.176665017155842e-06, "loss": 0.3095, "step": 9956 }, { "epoch": 0.49392330968798054, "grad_norm": 5.715626239776611, "learning_rate": 5.175878416025982e-06, "loss": 0.3006, "step": 9957 }, { "epoch": 0.4939729153231807, "grad_norm": 9.064571380615234, "learning_rate": 5.175091810537775e-06, "loss": 0.3939, "step": 9958 }, { "epoch": 0.4940225209583809, "grad_norm": 6.790001392364502, "learning_rate": 5.174305200710714e-06, "loss": 0.3207, "step": 9959 }, { "epoch": 0.494072126593581, "grad_norm": 6.3738837242126465, "learning_rate": 5.173518586564291e-06, "loss": 0.2037, "step": 9960 }, { "epoch": 0.4941217322287812, "grad_norm": 11.232192993164062, "learning_rate": 5.172731968118e-06, "loss": 0.3627, "step": 9961 }, { "epoch": 0.4941713378639814, "grad_norm": 5.248261451721191, "learning_rate": 5.171945345391332e-06, "loss": 0.2899, "step": 9962 }, { "epoch": 0.4942209434991815, "grad_norm": 6.640726089477539, "learning_rate": 5.171158718403779e-06, "loss": 0.2624, "step": 9963 }, { "epoch": 0.4942705491343817, "grad_norm": 5.652749538421631, "learning_rate": 5.170372087174838e-06, "loss": 0.3332, "step": 9964 }, { "epoch": 0.4943201547695818, "grad_norm": 15.91501235961914, "learning_rate": 5.169585451723998e-06, "loss": 0.4073, "step": 9965 }, { "epoch": 0.494369760404782, "grad_norm": 9.81600284576416, "learning_rate": 5.168798812070754e-06, "loss": 0.2854, "step": 9966 }, { "epoch": 0.49441936603998216, "grad_norm": 5.728621006011963, "learning_rate": 5.1680121682346e-06, "loss": 0.2866, "step": 9967 }, { "epoch": 0.4944689716751823, "grad_norm": 6.43895149230957, "learning_rate": 5.1672255202350284e-06, "loss": 0.2605, "step": 9968 }, { "epoch": 0.49451857731038246, "grad_norm": 5.602861404418945, "learning_rate": 5.166438868091532e-06, "loss": 0.2712, "step": 9969 }, { "epoch": 0.49456818294558264, "grad_norm": 21.26321029663086, "learning_rate": 5.1656522118236065e-06, "loss": 0.3525, "step": 9970 }, { "epoch": 0.49461778858078276, "grad_norm": 8.7938871383667, "learning_rate": 5.164865551450743e-06, "loss": 0.2942, "step": 9971 }, { "epoch": 0.49466739421598294, "grad_norm": 34.971885681152344, "learning_rate": 5.164078886992437e-06, "loss": 0.3536, "step": 9972 }, { "epoch": 0.4947169998511831, "grad_norm": 7.9521708488464355, "learning_rate": 5.163292218468181e-06, "loss": 0.3076, "step": 9973 }, { "epoch": 0.49476660548638324, "grad_norm": 10.005535125732422, "learning_rate": 5.1625055458974714e-06, "loss": 0.3654, "step": 9974 }, { "epoch": 0.4948162111215834, "grad_norm": 6.058260917663574, "learning_rate": 5.161718869299801e-06, "loss": 0.2408, "step": 9975 }, { "epoch": 0.4948658167567836, "grad_norm": 3.160898447036743, "learning_rate": 5.160932188694663e-06, "loss": 0.1219, "step": 9976 }, { "epoch": 0.4949154223919837, "grad_norm": 11.4862699508667, "learning_rate": 5.1601455041015515e-06, "loss": 0.5054, "step": 9977 }, { "epoch": 0.4949650280271839, "grad_norm": 5.436927795410156, "learning_rate": 5.159358815539964e-06, "loss": 0.2844, "step": 9978 }, { "epoch": 0.4950146336623841, "grad_norm": 11.840625762939453, "learning_rate": 5.158572123029392e-06, "loss": 0.358, "step": 9979 }, { "epoch": 0.4950642392975842, "grad_norm": 6.481263160705566, "learning_rate": 5.157785426589331e-06, "loss": 0.3568, "step": 9980 }, { "epoch": 0.4951138449327844, "grad_norm": 8.068035125732422, "learning_rate": 5.156998726239275e-06, "loss": 0.3628, "step": 9981 }, { "epoch": 0.4951634505679845, "grad_norm": 9.371216773986816, "learning_rate": 5.156212021998722e-06, "loss": 0.4482, "step": 9982 }, { "epoch": 0.4952130562031847, "grad_norm": 19.336185455322266, "learning_rate": 5.1554253138871624e-06, "loss": 0.3588, "step": 9983 }, { "epoch": 0.49526266183838485, "grad_norm": 9.024352073669434, "learning_rate": 5.154638601924094e-06, "loss": 0.3702, "step": 9984 }, { "epoch": 0.495312267473585, "grad_norm": 6.002664089202881, "learning_rate": 5.153851886129011e-06, "loss": 0.3123, "step": 9985 }, { "epoch": 0.49536187310878516, "grad_norm": 4.505930423736572, "learning_rate": 5.153065166521408e-06, "loss": 0.3207, "step": 9986 }, { "epoch": 0.49541147874398533, "grad_norm": 6.227695465087891, "learning_rate": 5.1522784431207814e-06, "loss": 0.2978, "step": 9987 }, { "epoch": 0.49546108437918546, "grad_norm": 7.213475227355957, "learning_rate": 5.151491715946627e-06, "loss": 0.3129, "step": 9988 }, { "epoch": 0.49551069001438564, "grad_norm": 4.808908462524414, "learning_rate": 5.150704985018438e-06, "loss": 0.3297, "step": 9989 }, { "epoch": 0.4955602956495858, "grad_norm": 6.16365385055542, "learning_rate": 5.149918250355711e-06, "loss": 0.2831, "step": 9990 }, { "epoch": 0.49560990128478594, "grad_norm": 7.794902801513672, "learning_rate": 5.149131511977943e-06, "loss": 0.439, "step": 9991 }, { "epoch": 0.4956595069199861, "grad_norm": 5.148605823516846, "learning_rate": 5.14834476990463e-06, "loss": 0.2546, "step": 9992 }, { "epoch": 0.4957091125551863, "grad_norm": 6.293702602386475, "learning_rate": 5.1475580241552635e-06, "loss": 0.2753, "step": 9993 }, { "epoch": 0.4957587181903864, "grad_norm": 12.849392890930176, "learning_rate": 5.146771274749344e-06, "loss": 0.4016, "step": 9994 }, { "epoch": 0.4958083238255866, "grad_norm": 4.906828880310059, "learning_rate": 5.145984521706367e-06, "loss": 0.2549, "step": 9995 }, { "epoch": 0.4958579294607868, "grad_norm": 11.338976860046387, "learning_rate": 5.145197765045825e-06, "loss": 0.3729, "step": 9996 }, { "epoch": 0.4959075350959869, "grad_norm": 4.1654839515686035, "learning_rate": 5.144411004787218e-06, "loss": 0.2551, "step": 9997 }, { "epoch": 0.4959571407311871, "grad_norm": 6.850991725921631, "learning_rate": 5.143624240950041e-06, "loss": 0.2844, "step": 9998 }, { "epoch": 0.4960067463663872, "grad_norm": 4.50486421585083, "learning_rate": 5.14283747355379e-06, "loss": 0.3049, "step": 9999 }, { "epoch": 0.4960563520015874, "grad_norm": 7.809747219085693, "learning_rate": 5.142050702617963e-06, "loss": 0.3314, "step": 10000 }, { "epoch": 0.49610595763678755, "grad_norm": 11.018082618713379, "learning_rate": 5.141263928162053e-06, "loss": 0.3414, "step": 10001 }, { "epoch": 0.4961555632719877, "grad_norm": 4.855249404907227, "learning_rate": 5.140477150205562e-06, "loss": 0.1765, "step": 10002 }, { "epoch": 0.49620516890718785, "grad_norm": 10.54318904876709, "learning_rate": 5.139690368767981e-06, "loss": 0.2741, "step": 10003 }, { "epoch": 0.49625477454238803, "grad_norm": 4.63976526260376, "learning_rate": 5.138903583868811e-06, "loss": 0.2545, "step": 10004 }, { "epoch": 0.49630438017758816, "grad_norm": 4.77744722366333, "learning_rate": 5.1381167955275465e-06, "loss": 0.2648, "step": 10005 }, { "epoch": 0.49635398581278833, "grad_norm": 6.039743900299072, "learning_rate": 5.137330003763685e-06, "loss": 0.3721, "step": 10006 }, { "epoch": 0.4964035914479885, "grad_norm": 5.6725006103515625, "learning_rate": 5.1365432085967244e-06, "loss": 0.2789, "step": 10007 }, { "epoch": 0.49645319708318864, "grad_norm": 7.981740474700928, "learning_rate": 5.135756410046162e-06, "loss": 0.3364, "step": 10008 }, { "epoch": 0.4965028027183888, "grad_norm": 5.6883039474487305, "learning_rate": 5.134969608131495e-06, "loss": 0.3401, "step": 10009 }, { "epoch": 0.496552408353589, "grad_norm": 8.13473892211914, "learning_rate": 5.134182802872218e-06, "loss": 0.3167, "step": 10010 }, { "epoch": 0.4966020139887891, "grad_norm": 12.476479530334473, "learning_rate": 5.133395994287831e-06, "loss": 0.3399, "step": 10011 }, { "epoch": 0.4966516196239893, "grad_norm": 7.159725189208984, "learning_rate": 5.1326091823978305e-06, "loss": 0.4101, "step": 10012 }, { "epoch": 0.49670122525918947, "grad_norm": 5.906008243560791, "learning_rate": 5.131822367221716e-06, "loss": 0.2748, "step": 10013 }, { "epoch": 0.4967508308943896, "grad_norm": 4.926085472106934, "learning_rate": 5.1310355487789835e-06, "loss": 0.2309, "step": 10014 }, { "epoch": 0.4968004365295898, "grad_norm": 6.935176372528076, "learning_rate": 5.130248727089129e-06, "loss": 0.2333, "step": 10015 }, { "epoch": 0.4968500421647899, "grad_norm": 12.125812530517578, "learning_rate": 5.129461902171653e-06, "loss": 0.3859, "step": 10016 }, { "epoch": 0.4968996477999901, "grad_norm": 8.543200492858887, "learning_rate": 5.128675074046052e-06, "loss": 0.2154, "step": 10017 }, { "epoch": 0.49694925343519025, "grad_norm": 5.586706161499023, "learning_rate": 5.127888242731826e-06, "loss": 0.2961, "step": 10018 }, { "epoch": 0.4969988590703904, "grad_norm": 5.817416667938232, "learning_rate": 5.127101408248472e-06, "loss": 0.258, "step": 10019 }, { "epoch": 0.49704846470559055, "grad_norm": 7.4753618240356445, "learning_rate": 5.126314570615485e-06, "loss": 0.2953, "step": 10020 }, { "epoch": 0.49709807034079073, "grad_norm": 5.505138397216797, "learning_rate": 5.125527729852368e-06, "loss": 0.2391, "step": 10021 }, { "epoch": 0.49714767597599085, "grad_norm": 13.422934532165527, "learning_rate": 5.124740885978616e-06, "loss": 0.352, "step": 10022 }, { "epoch": 0.49719728161119103, "grad_norm": 7.825465202331543, "learning_rate": 5.123954039013729e-06, "loss": 0.2846, "step": 10023 }, { "epoch": 0.4972468872463912, "grad_norm": 3.3713388442993164, "learning_rate": 5.123167188977204e-06, "loss": 0.1792, "step": 10024 }, { "epoch": 0.49729649288159133, "grad_norm": 7.875011920928955, "learning_rate": 5.122380335888541e-06, "loss": 0.3554, "step": 10025 }, { "epoch": 0.4973460985167915, "grad_norm": 5.225671768188477, "learning_rate": 5.121593479767237e-06, "loss": 0.1791, "step": 10026 }, { "epoch": 0.4973957041519917, "grad_norm": 6.09761381149292, "learning_rate": 5.120806620632792e-06, "loss": 0.2665, "step": 10027 }, { "epoch": 0.4974453097871918, "grad_norm": 7.9993696212768555, "learning_rate": 5.120019758504705e-06, "loss": 0.3999, "step": 10028 }, { "epoch": 0.497494915422392, "grad_norm": 12.270045280456543, "learning_rate": 5.119232893402475e-06, "loss": 0.5641, "step": 10029 }, { "epoch": 0.4975445210575921, "grad_norm": 8.409976959228516, "learning_rate": 5.118446025345598e-06, "loss": 0.3602, "step": 10030 }, { "epoch": 0.4975941266927923, "grad_norm": 8.48727798461914, "learning_rate": 5.117659154353574e-06, "loss": 0.3445, "step": 10031 }, { "epoch": 0.49764373232799247, "grad_norm": 7.101213455200195, "learning_rate": 5.116872280445907e-06, "loss": 0.3359, "step": 10032 }, { "epoch": 0.4976933379631926, "grad_norm": 8.269536972045898, "learning_rate": 5.11608540364209e-06, "loss": 0.1884, "step": 10033 }, { "epoch": 0.4977429435983928, "grad_norm": 6.487573146820068, "learning_rate": 5.115298523961623e-06, "loss": 0.2929, "step": 10034 }, { "epoch": 0.49779254923359295, "grad_norm": 5.7520833015441895, "learning_rate": 5.114511641424006e-06, "loss": 0.2541, "step": 10035 }, { "epoch": 0.4978421548687931, "grad_norm": 5.767866611480713, "learning_rate": 5.11372475604874e-06, "loss": 0.316, "step": 10036 }, { "epoch": 0.49789176050399325, "grad_norm": 8.493309020996094, "learning_rate": 5.112937867855323e-06, "loss": 0.2678, "step": 10037 }, { "epoch": 0.49794136613919343, "grad_norm": 9.971631050109863, "learning_rate": 5.112150976863255e-06, "loss": 0.3675, "step": 10038 }, { "epoch": 0.49799097177439355, "grad_norm": 14.375434875488281, "learning_rate": 5.111364083092035e-06, "loss": 0.2853, "step": 10039 }, { "epoch": 0.49804057740959373, "grad_norm": 12.844331741333008, "learning_rate": 5.110577186561162e-06, "loss": 0.3081, "step": 10040 }, { "epoch": 0.4980901830447939, "grad_norm": 6.969056606292725, "learning_rate": 5.1097902872901365e-06, "loss": 0.2647, "step": 10041 }, { "epoch": 0.49813978867999403, "grad_norm": 8.076080322265625, "learning_rate": 5.1090033852984584e-06, "loss": 0.4454, "step": 10042 }, { "epoch": 0.4981893943151942, "grad_norm": 9.003023147583008, "learning_rate": 5.108216480605627e-06, "loss": 0.3432, "step": 10043 }, { "epoch": 0.4982389999503944, "grad_norm": 5.900435447692871, "learning_rate": 5.107429573231141e-06, "loss": 0.3603, "step": 10044 }, { "epoch": 0.4982886055855945, "grad_norm": 5.488587856292725, "learning_rate": 5.106642663194502e-06, "loss": 0.2519, "step": 10045 }, { "epoch": 0.4983382112207947, "grad_norm": 6.576782703399658, "learning_rate": 5.1058557505152115e-06, "loss": 0.2944, "step": 10046 }, { "epoch": 0.4983878168559948, "grad_norm": 9.8087739944458, "learning_rate": 5.105068835212766e-06, "loss": 0.312, "step": 10047 }, { "epoch": 0.498437422491195, "grad_norm": 5.660604000091553, "learning_rate": 5.104281917306667e-06, "loss": 0.3217, "step": 10048 }, { "epoch": 0.49848702812639517, "grad_norm": 10.309857368469238, "learning_rate": 5.103494996816416e-06, "loss": 0.2407, "step": 10049 }, { "epoch": 0.4985366337615953, "grad_norm": 8.688982963562012, "learning_rate": 5.102708073761512e-06, "loss": 0.2723, "step": 10050 }, { "epoch": 0.49858623939679547, "grad_norm": 5.898219108581543, "learning_rate": 5.101921148161454e-06, "loss": 0.2577, "step": 10051 }, { "epoch": 0.49863584503199565, "grad_norm": 6.941312789916992, "learning_rate": 5.101134220035745e-06, "loss": 0.244, "step": 10052 }, { "epoch": 0.4986854506671958, "grad_norm": 4.776365280151367, "learning_rate": 5.100347289403886e-06, "loss": 0.2788, "step": 10053 }, { "epoch": 0.49873505630239595, "grad_norm": 7.418647766113281, "learning_rate": 5.099560356285372e-06, "loss": 0.3647, "step": 10054 }, { "epoch": 0.49878466193759613, "grad_norm": 6.596678256988525, "learning_rate": 5.09877342069971e-06, "loss": 0.3269, "step": 10055 }, { "epoch": 0.49883426757279625, "grad_norm": 7.527460098266602, "learning_rate": 5.097986482666397e-06, "loss": 0.2735, "step": 10056 }, { "epoch": 0.49888387320799643, "grad_norm": 7.308705806732178, "learning_rate": 5.097199542204935e-06, "loss": 0.2878, "step": 10057 }, { "epoch": 0.4989334788431966, "grad_norm": 8.410953521728516, "learning_rate": 5.096412599334824e-06, "loss": 0.207, "step": 10058 }, { "epoch": 0.49898308447839673, "grad_norm": 9.801460266113281, "learning_rate": 5.095625654075566e-06, "loss": 0.3782, "step": 10059 }, { "epoch": 0.4990326901135969, "grad_norm": 5.208006858825684, "learning_rate": 5.094838706446661e-06, "loss": 0.2996, "step": 10060 }, { "epoch": 0.4990822957487971, "grad_norm": 7.2384114265441895, "learning_rate": 5.094051756467609e-06, "loss": 0.3009, "step": 10061 }, { "epoch": 0.4991319013839972, "grad_norm": 7.692325592041016, "learning_rate": 5.093264804157911e-06, "loss": 0.3567, "step": 10062 }, { "epoch": 0.4991815070191974, "grad_norm": 4.687165260314941, "learning_rate": 5.092477849537072e-06, "loss": 0.2616, "step": 10063 }, { "epoch": 0.4992311126543975, "grad_norm": 3.985992193222046, "learning_rate": 5.091690892624588e-06, "loss": 0.2983, "step": 10064 }, { "epoch": 0.4992807182895977, "grad_norm": 5.719496250152588, "learning_rate": 5.090903933439962e-06, "loss": 0.2879, "step": 10065 }, { "epoch": 0.49933032392479787, "grad_norm": 8.352676391601562, "learning_rate": 5.090116972002695e-06, "loss": 0.3521, "step": 10066 }, { "epoch": 0.499379929559998, "grad_norm": 4.662708759307861, "learning_rate": 5.089330008332292e-06, "loss": 0.2613, "step": 10067 }, { "epoch": 0.49942953519519817, "grad_norm": 9.482650756835938, "learning_rate": 5.088543042448247e-06, "loss": 0.2892, "step": 10068 }, { "epoch": 0.49947914083039835, "grad_norm": 17.713687896728516, "learning_rate": 5.087756074370067e-06, "loss": 0.2735, "step": 10069 }, { "epoch": 0.49952874646559847, "grad_norm": 8.520698547363281, "learning_rate": 5.086969104117252e-06, "loss": 0.3716, "step": 10070 }, { "epoch": 0.49957835210079865, "grad_norm": 5.912116050720215, "learning_rate": 5.086182131709304e-06, "loss": 0.3327, "step": 10071 }, { "epoch": 0.49962795773599883, "grad_norm": 10.183931350708008, "learning_rate": 5.085395157165723e-06, "loss": 0.351, "step": 10072 }, { "epoch": 0.49967756337119895, "grad_norm": 9.19064712524414, "learning_rate": 5.08460818050601e-06, "loss": 0.4094, "step": 10073 }, { "epoch": 0.49972716900639913, "grad_norm": 7.0406270027160645, "learning_rate": 5.083821201749669e-06, "loss": 0.2038, "step": 10074 }, { "epoch": 0.4997767746415993, "grad_norm": 7.432027339935303, "learning_rate": 5.083034220916201e-06, "loss": 0.3379, "step": 10075 }, { "epoch": 0.49982638027679943, "grad_norm": 6.198999881744385, "learning_rate": 5.082247238025108e-06, "loss": 0.2503, "step": 10076 }, { "epoch": 0.4998759859119996, "grad_norm": 13.374300003051758, "learning_rate": 5.08146025309589e-06, "loss": 0.4569, "step": 10077 }, { "epoch": 0.4999255915471998, "grad_norm": 9.191315650939941, "learning_rate": 5.080673266148051e-06, "loss": 0.2873, "step": 10078 }, { "epoch": 0.4999751971823999, "grad_norm": 4.9951395988464355, "learning_rate": 5.079886277201092e-06, "loss": 0.1615, "step": 10079 }, { "epoch": 0.5000248028176001, "grad_norm": 9.215527534484863, "learning_rate": 5.0790992862745144e-06, "loss": 0.2712, "step": 10080 }, { "epoch": 0.5000248028176001, "eval_loss": 0.3055925667285919, "eval_runtime": 35.6007, "eval_samples_per_second": 45.758, "eval_steps_per_second": 5.73, "step": 10080 }, { "epoch": 0.5000744084528003, "grad_norm": 6.341675758361816, "learning_rate": 5.0783122933878206e-06, "loss": 0.3315, "step": 10081 }, { "epoch": 0.5001240140880004, "grad_norm": 7.141168117523193, "learning_rate": 5.077525298560513e-06, "loss": 0.2874, "step": 10082 }, { "epoch": 0.5001736197232005, "grad_norm": 7.34470272064209, "learning_rate": 5.0767383018120945e-06, "loss": 0.2784, "step": 10083 }, { "epoch": 0.5002232253584007, "grad_norm": 5.617773532867432, "learning_rate": 5.075951303162065e-06, "loss": 0.3526, "step": 10084 }, { "epoch": 0.5002728309936009, "grad_norm": 5.7267022132873535, "learning_rate": 5.075164302629927e-06, "loss": 0.2274, "step": 10085 }, { "epoch": 0.500322436628801, "grad_norm": 3.5687317848205566, "learning_rate": 5.074377300235186e-06, "loss": 0.3086, "step": 10086 }, { "epoch": 0.5003720422640012, "grad_norm": 6.640989303588867, "learning_rate": 5.073590295997339e-06, "loss": 0.2312, "step": 10087 }, { "epoch": 0.5004216478992013, "grad_norm": 6.554194450378418, "learning_rate": 5.072803289935893e-06, "loss": 0.3088, "step": 10088 }, { "epoch": 0.5004712535344015, "grad_norm": 7.539403915405273, "learning_rate": 5.072016282070348e-06, "loss": 0.3175, "step": 10089 }, { "epoch": 0.5005208591696017, "grad_norm": 11.808662414550781, "learning_rate": 5.071229272420207e-06, "loss": 0.4545, "step": 10090 }, { "epoch": 0.5005704648048018, "grad_norm": 4.380859375, "learning_rate": 5.0704422610049734e-06, "loss": 0.2521, "step": 10091 }, { "epoch": 0.500620070440002, "grad_norm": 12.085180282592773, "learning_rate": 5.069655247844146e-06, "loss": 0.3474, "step": 10092 }, { "epoch": 0.5006696760752022, "grad_norm": 4.993369102478027, "learning_rate": 5.0688682329572335e-06, "loss": 0.2796, "step": 10093 }, { "epoch": 0.5007192817104023, "grad_norm": 4.086522102355957, "learning_rate": 5.068081216363732e-06, "loss": 0.2764, "step": 10094 }, { "epoch": 0.5007688873456024, "grad_norm": 7.527854919433594, "learning_rate": 5.067294198083149e-06, "loss": 0.2371, "step": 10095 }, { "epoch": 0.5008184929808026, "grad_norm": 7.956541538238525, "learning_rate": 5.066507178134985e-06, "loss": 0.3573, "step": 10096 }, { "epoch": 0.5008680986160028, "grad_norm": 53.041664123535156, "learning_rate": 5.065720156538744e-06, "loss": 0.3614, "step": 10097 }, { "epoch": 0.500917704251203, "grad_norm": 3.900899887084961, "learning_rate": 5.064933133313926e-06, "loss": 0.3274, "step": 10098 }, { "epoch": 0.5009673098864031, "grad_norm": 5.209844589233398, "learning_rate": 5.064146108480036e-06, "loss": 0.261, "step": 10099 }, { "epoch": 0.5010169155216032, "grad_norm": 6.638406753540039, "learning_rate": 5.0633590820565785e-06, "loss": 0.2859, "step": 10100 }, { "epoch": 0.5010665211568034, "grad_norm": 5.830708026885986, "learning_rate": 5.0625720540630515e-06, "loss": 0.2999, "step": 10101 }, { "epoch": 0.5011161267920036, "grad_norm": 8.176077842712402, "learning_rate": 5.061785024518961e-06, "loss": 0.343, "step": 10102 }, { "epoch": 0.5011657324272037, "grad_norm": 6.141563415527344, "learning_rate": 5.0609979934438115e-06, "loss": 0.2268, "step": 10103 }, { "epoch": 0.5012153380624039, "grad_norm": 6.872922420501709, "learning_rate": 5.060210960857103e-06, "loss": 0.3178, "step": 10104 }, { "epoch": 0.501264943697604, "grad_norm": 10.606734275817871, "learning_rate": 5.0594239267783385e-06, "loss": 0.3751, "step": 10105 }, { "epoch": 0.5013145493328042, "grad_norm": 4.381717681884766, "learning_rate": 5.058636891227024e-06, "loss": 0.3583, "step": 10106 }, { "epoch": 0.5013641549680043, "grad_norm": 4.719518184661865, "learning_rate": 5.05784985422266e-06, "loss": 0.3007, "step": 10107 }, { "epoch": 0.5014137606032045, "grad_norm": 5.821807861328125, "learning_rate": 5.057062815784751e-06, "loss": 0.3316, "step": 10108 }, { "epoch": 0.5014633662384047, "grad_norm": 10.081191062927246, "learning_rate": 5.0562757759327985e-06, "loss": 0.2714, "step": 10109 }, { "epoch": 0.5015129718736049, "grad_norm": 5.513812065124512, "learning_rate": 5.055488734686308e-06, "loss": 0.253, "step": 10110 }, { "epoch": 0.501562577508805, "grad_norm": 5.575324535369873, "learning_rate": 5.054701692064781e-06, "loss": 0.2933, "step": 10111 }, { "epoch": 0.5016121831440051, "grad_norm": 8.990559577941895, "learning_rate": 5.053914648087721e-06, "loss": 0.3201, "step": 10112 }, { "epoch": 0.5016617887792053, "grad_norm": 5.413699626922607, "learning_rate": 5.0531276027746336e-06, "loss": 0.39, "step": 10113 }, { "epoch": 0.5017113944144055, "grad_norm": 5.34440279006958, "learning_rate": 5.052340556145018e-06, "loss": 0.3136, "step": 10114 }, { "epoch": 0.5017610000496057, "grad_norm": 6.7003655433654785, "learning_rate": 5.05155350821838e-06, "loss": 0.3867, "step": 10115 }, { "epoch": 0.5018106056848058, "grad_norm": 10.216938972473145, "learning_rate": 5.050766459014222e-06, "loss": 0.2881, "step": 10116 }, { "epoch": 0.5018602113200059, "grad_norm": 5.371045112609863, "learning_rate": 5.04997940855205e-06, "loss": 0.2002, "step": 10117 }, { "epoch": 0.5019098169552061, "grad_norm": 8.116275787353516, "learning_rate": 5.049192356851366e-06, "loss": 0.3267, "step": 10118 }, { "epoch": 0.5019594225904063, "grad_norm": 17.200727462768555, "learning_rate": 5.048405303931672e-06, "loss": 0.2797, "step": 10119 }, { "epoch": 0.5020090282256064, "grad_norm": 4.468558311462402, "learning_rate": 5.0476182498124725e-06, "loss": 0.2157, "step": 10120 }, { "epoch": 0.5020586338608066, "grad_norm": 5.593733787536621, "learning_rate": 5.046831194513272e-06, "loss": 0.2936, "step": 10121 }, { "epoch": 0.5021082394960067, "grad_norm": 5.9118242263793945, "learning_rate": 5.046044138053572e-06, "loss": 0.2684, "step": 10122 }, { "epoch": 0.5021578451312069, "grad_norm": 7.691667556762695, "learning_rate": 5.045257080452878e-06, "loss": 0.3973, "step": 10123 }, { "epoch": 0.502207450766407, "grad_norm": 8.31061840057373, "learning_rate": 5.044470021730693e-06, "loss": 0.242, "step": 10124 }, { "epoch": 0.5022570564016072, "grad_norm": 4.866999626159668, "learning_rate": 5.043682961906522e-06, "loss": 0.276, "step": 10125 }, { "epoch": 0.5023066620368074, "grad_norm": 18.093786239624023, "learning_rate": 5.042895900999866e-06, "loss": 0.5393, "step": 10126 }, { "epoch": 0.5023562676720076, "grad_norm": 5.879266262054443, "learning_rate": 5.04210883903023e-06, "loss": 0.3082, "step": 10127 }, { "epoch": 0.5024058733072077, "grad_norm": 8.415984153747559, "learning_rate": 5.041321776017118e-06, "loss": 0.4289, "step": 10128 }, { "epoch": 0.5024554789424078, "grad_norm": 6.228621959686279, "learning_rate": 5.040534711980034e-06, "loss": 0.2048, "step": 10129 }, { "epoch": 0.502505084577608, "grad_norm": 6.070095539093018, "learning_rate": 5.039747646938483e-06, "loss": 0.2972, "step": 10130 }, { "epoch": 0.5025546902128082, "grad_norm": 5.500394821166992, "learning_rate": 5.038960580911966e-06, "loss": 0.2542, "step": 10131 }, { "epoch": 0.5026042958480084, "grad_norm": 3.7257323265075684, "learning_rate": 5.038173513919988e-06, "loss": 0.3217, "step": 10132 }, { "epoch": 0.5026539014832085, "grad_norm": 9.525280952453613, "learning_rate": 5.0373864459820535e-06, "loss": 0.3821, "step": 10133 }, { "epoch": 0.5027035071184086, "grad_norm": 7.491353511810303, "learning_rate": 5.0365993771176655e-06, "loss": 0.3747, "step": 10134 }, { "epoch": 0.5027531127536088, "grad_norm": 6.895884990692139, "learning_rate": 5.03581230734633e-06, "loss": 0.3278, "step": 10135 }, { "epoch": 0.502802718388809, "grad_norm": 5.713080406188965, "learning_rate": 5.0350252366875465e-06, "loss": 0.3396, "step": 10136 }, { "epoch": 0.5028523240240091, "grad_norm": 5.822707653045654, "learning_rate": 5.034238165160824e-06, "loss": 0.341, "step": 10137 }, { "epoch": 0.5029019296592093, "grad_norm": 12.3209228515625, "learning_rate": 5.033451092785664e-06, "loss": 0.4441, "step": 10138 }, { "epoch": 0.5029515352944094, "grad_norm": 4.682898044586182, "learning_rate": 5.032664019581569e-06, "loss": 0.2748, "step": 10139 }, { "epoch": 0.5030011409296096, "grad_norm": 5.505782127380371, "learning_rate": 5.031876945568047e-06, "loss": 0.2634, "step": 10140 }, { "epoch": 0.5030507465648097, "grad_norm": 4.238722801208496, "learning_rate": 5.0310898707646e-06, "loss": 0.219, "step": 10141 }, { "epoch": 0.5031003522000099, "grad_norm": 3.9116830825805664, "learning_rate": 5.03030279519073e-06, "loss": 0.2705, "step": 10142 }, { "epoch": 0.5031499578352101, "grad_norm": 13.13309097290039, "learning_rate": 5.029515718865944e-06, "loss": 0.3524, "step": 10143 }, { "epoch": 0.5031995634704103, "grad_norm": 10.152663230895996, "learning_rate": 5.0287286418097455e-06, "loss": 0.3132, "step": 10144 }, { "epoch": 0.5032491691056103, "grad_norm": 8.029993057250977, "learning_rate": 5.027941564041638e-06, "loss": 0.4176, "step": 10145 }, { "epoch": 0.5032987747408105, "grad_norm": 5.160091400146484, "learning_rate": 5.027154485581125e-06, "loss": 0.3304, "step": 10146 }, { "epoch": 0.5033483803760107, "grad_norm": 3.537871837615967, "learning_rate": 5.026367406447713e-06, "loss": 0.2249, "step": 10147 }, { "epoch": 0.5033979860112109, "grad_norm": 8.253304481506348, "learning_rate": 5.025580326660904e-06, "loss": 0.3263, "step": 10148 }, { "epoch": 0.5034475916464111, "grad_norm": 7.755084991455078, "learning_rate": 5.0247932462402025e-06, "loss": 0.2692, "step": 10149 }, { "epoch": 0.5034971972816112, "grad_norm": 10.235527992248535, "learning_rate": 5.024006165205114e-06, "loss": 0.4511, "step": 10150 }, { "epoch": 0.5035468029168113, "grad_norm": 7.580565929412842, "learning_rate": 5.0232190835751425e-06, "loss": 0.2593, "step": 10151 }, { "epoch": 0.5035964085520115, "grad_norm": 7.549787521362305, "learning_rate": 5.02243200136979e-06, "loss": 0.3345, "step": 10152 }, { "epoch": 0.5036460141872117, "grad_norm": 5.138314247131348, "learning_rate": 5.021644918608563e-06, "loss": 0.3198, "step": 10153 }, { "epoch": 0.5036956198224118, "grad_norm": 6.617100715637207, "learning_rate": 5.020857835310966e-06, "loss": 0.2964, "step": 10154 }, { "epoch": 0.503745225457612, "grad_norm": 5.044167518615723, "learning_rate": 5.020070751496501e-06, "loss": 0.2075, "step": 10155 }, { "epoch": 0.5037948310928121, "grad_norm": 12.882962226867676, "learning_rate": 5.019283667184675e-06, "loss": 0.3308, "step": 10156 }, { "epoch": 0.5038444367280123, "grad_norm": 8.691221237182617, "learning_rate": 5.01849658239499e-06, "loss": 0.3221, "step": 10157 }, { "epoch": 0.5038940423632124, "grad_norm": 7.174479961395264, "learning_rate": 5.017709497146952e-06, "loss": 0.2351, "step": 10158 }, { "epoch": 0.5039436479984126, "grad_norm": 4.153695583343506, "learning_rate": 5.016922411460064e-06, "loss": 0.2259, "step": 10159 }, { "epoch": 0.5039932536336128, "grad_norm": 11.885159492492676, "learning_rate": 5.0161353253538305e-06, "loss": 0.315, "step": 10160 }, { "epoch": 0.504042859268813, "grad_norm": 7.544816970825195, "learning_rate": 5.015348238847758e-06, "loss": 0.3681, "step": 10161 }, { "epoch": 0.504092464904013, "grad_norm": 6.7614922523498535, "learning_rate": 5.014561151961348e-06, "loss": 0.2083, "step": 10162 }, { "epoch": 0.5041420705392132, "grad_norm": 7.881591796875, "learning_rate": 5.013774064714106e-06, "loss": 0.2873, "step": 10163 }, { "epoch": 0.5041916761744134, "grad_norm": 5.005392551422119, "learning_rate": 5.012986977125537e-06, "loss": 0.2509, "step": 10164 }, { "epoch": 0.5042412818096136, "grad_norm": 5.574682712554932, "learning_rate": 5.012199889215145e-06, "loss": 0.253, "step": 10165 }, { "epoch": 0.5042908874448138, "grad_norm": 5.959031581878662, "learning_rate": 5.011412801002433e-06, "loss": 0.2482, "step": 10166 }, { "epoch": 0.5043404930800139, "grad_norm": 5.706813812255859, "learning_rate": 5.010625712506908e-06, "loss": 0.286, "step": 10167 }, { "epoch": 0.504390098715214, "grad_norm": 13.605709075927734, "learning_rate": 5.009838623748072e-06, "loss": 0.381, "step": 10168 }, { "epoch": 0.5044397043504142, "grad_norm": 8.464911460876465, "learning_rate": 5.009051534745432e-06, "loss": 0.3055, "step": 10169 }, { "epoch": 0.5044893099856144, "grad_norm": 5.33907413482666, "learning_rate": 5.008264445518489e-06, "loss": 0.3574, "step": 10170 }, { "epoch": 0.5045389156208145, "grad_norm": 9.846304893493652, "learning_rate": 5.007477356086751e-06, "loss": 0.2005, "step": 10171 }, { "epoch": 0.5045885212560147, "grad_norm": 6.005799770355225, "learning_rate": 5.0066902664697195e-06, "loss": 0.2013, "step": 10172 }, { "epoch": 0.5046381268912148, "grad_norm": 10.137453079223633, "learning_rate": 5.0059031766869e-06, "loss": 0.366, "step": 10173 }, { "epoch": 0.504687732526415, "grad_norm": 9.976432800292969, "learning_rate": 5.005116086757799e-06, "loss": 0.286, "step": 10174 }, { "epoch": 0.5047373381616151, "grad_norm": 7.556794166564941, "learning_rate": 5.004328996701917e-06, "loss": 0.3103, "step": 10175 }, { "epoch": 0.5047869437968153, "grad_norm": 9.018052101135254, "learning_rate": 5.00354190653876e-06, "loss": 0.3003, "step": 10176 }, { "epoch": 0.5048365494320155, "grad_norm": 8.43010425567627, "learning_rate": 5.002754816287835e-06, "loss": 0.3795, "step": 10177 }, { "epoch": 0.5048861550672157, "grad_norm": 8.508604049682617, "learning_rate": 5.001967725968643e-06, "loss": 0.4051, "step": 10178 }, { "epoch": 0.5049357607024157, "grad_norm": 4.752413272857666, "learning_rate": 5.001180635600691e-06, "loss": 0.2114, "step": 10179 }, { "epoch": 0.5049853663376159, "grad_norm": 4.9821858406066895, "learning_rate": 5.000393545203481e-06, "loss": 0.2642, "step": 10180 }, { "epoch": 0.5050349719728161, "grad_norm": 7.840715408325195, "learning_rate": 4.99960645479652e-06, "loss": 0.3852, "step": 10181 }, { "epoch": 0.5050845776080163, "grad_norm": 13.259681701660156, "learning_rate": 4.998819364399311e-06, "loss": 0.3101, "step": 10182 }, { "epoch": 0.5051341832432165, "grad_norm": 9.285529136657715, "learning_rate": 4.998032274031358e-06, "loss": 0.2113, "step": 10183 }, { "epoch": 0.5051837888784166, "grad_norm": 7.192688941955566, "learning_rate": 4.997245183712167e-06, "loss": 0.3619, "step": 10184 }, { "epoch": 0.5052333945136167, "grad_norm": 6.4968976974487305, "learning_rate": 4.996458093461241e-06, "loss": 0.2924, "step": 10185 }, { "epoch": 0.5052830001488169, "grad_norm": 9.199323654174805, "learning_rate": 4.9956710032980855e-06, "loss": 0.2571, "step": 10186 }, { "epoch": 0.5053326057840171, "grad_norm": 6.610884189605713, "learning_rate": 4.994883913242203e-06, "loss": 0.2202, "step": 10187 }, { "epoch": 0.5053822114192172, "grad_norm": 11.319299697875977, "learning_rate": 4.994096823313102e-06, "loss": 0.3072, "step": 10188 }, { "epoch": 0.5054318170544174, "grad_norm": 6.635526657104492, "learning_rate": 4.993309733530283e-06, "loss": 0.3181, "step": 10189 }, { "epoch": 0.5054814226896175, "grad_norm": 7.351611614227295, "learning_rate": 4.992522643913251e-06, "loss": 0.2633, "step": 10190 }, { "epoch": 0.5055310283248177, "grad_norm": 5.346571445465088, "learning_rate": 4.991735554481513e-06, "loss": 0.3139, "step": 10191 }, { "epoch": 0.5055806339600178, "grad_norm": 7.193594932556152, "learning_rate": 4.990948465254569e-06, "loss": 0.1938, "step": 10192 }, { "epoch": 0.505630239595218, "grad_norm": 14.929519653320312, "learning_rate": 4.990161376251929e-06, "loss": 0.3605, "step": 10193 }, { "epoch": 0.5056798452304182, "grad_norm": 6.67909049987793, "learning_rate": 4.989374287493094e-06, "loss": 0.3171, "step": 10194 }, { "epoch": 0.5057294508656184, "grad_norm": 8.448286056518555, "learning_rate": 4.988587198997567e-06, "loss": 0.4211, "step": 10195 }, { "epoch": 0.5057790565008184, "grad_norm": 8.39950180053711, "learning_rate": 4.987800110784857e-06, "loss": 0.3469, "step": 10196 }, { "epoch": 0.5058286621360186, "grad_norm": 12.084235191345215, "learning_rate": 4.987013022874465e-06, "loss": 0.4532, "step": 10197 }, { "epoch": 0.5058782677712188, "grad_norm": 5.4263787269592285, "learning_rate": 4.986225935285894e-06, "loss": 0.2729, "step": 10198 }, { "epoch": 0.505927873406419, "grad_norm": 9.548151969909668, "learning_rate": 4.9854388480386536e-06, "loss": 0.2802, "step": 10199 }, { "epoch": 0.5059774790416192, "grad_norm": 5.699795246124268, "learning_rate": 4.9846517611522446e-06, "loss": 0.2299, "step": 10200 }, { "epoch": 0.5060270846768193, "grad_norm": 7.92634916305542, "learning_rate": 4.98386467464617e-06, "loss": 0.2879, "step": 10201 }, { "epoch": 0.5060766903120194, "grad_norm": 12.017353057861328, "learning_rate": 4.983077588539938e-06, "loss": 0.3957, "step": 10202 }, { "epoch": 0.5061262959472196, "grad_norm": 6.9896769523620605, "learning_rate": 4.982290502853051e-06, "loss": 0.2219, "step": 10203 }, { "epoch": 0.5061759015824198, "grad_norm": 6.472829341888428, "learning_rate": 4.981503417605012e-06, "loss": 0.3602, "step": 10204 }, { "epoch": 0.5062255072176199, "grad_norm": 6.4791669845581055, "learning_rate": 4.980716332815328e-06, "loss": 0.3149, "step": 10205 }, { "epoch": 0.5062751128528201, "grad_norm": 14.98845100402832, "learning_rate": 4.9799292485035015e-06, "loss": 0.6017, "step": 10206 }, { "epoch": 0.5063247184880202, "grad_norm": 13.760946273803711, "learning_rate": 4.9791421646890354e-06, "loss": 0.3985, "step": 10207 }, { "epoch": 0.5063743241232204, "grad_norm": 7.713657379150391, "learning_rate": 4.978355081391438e-06, "loss": 0.2799, "step": 10208 }, { "epoch": 0.5064239297584205, "grad_norm": 5.712226390838623, "learning_rate": 4.97756799863021e-06, "loss": 0.257, "step": 10209 }, { "epoch": 0.5064735353936207, "grad_norm": 4.367496013641357, "learning_rate": 4.976780916424859e-06, "loss": 0.2653, "step": 10210 }, { "epoch": 0.5065231410288209, "grad_norm": 4.744803428649902, "learning_rate": 4.975993834794888e-06, "loss": 0.2493, "step": 10211 }, { "epoch": 0.5065727466640211, "grad_norm": 4.191468715667725, "learning_rate": 4.9752067537597975e-06, "loss": 0.2292, "step": 10212 }, { "epoch": 0.5066223522992211, "grad_norm": 8.786563873291016, "learning_rate": 4.974419673339097e-06, "loss": 0.3384, "step": 10213 }, { "epoch": 0.5066719579344213, "grad_norm": 8.785992622375488, "learning_rate": 4.973632593552289e-06, "loss": 0.364, "step": 10214 }, { "epoch": 0.5067215635696215, "grad_norm": 7.2477216720581055, "learning_rate": 4.972845514418875e-06, "loss": 0.311, "step": 10215 }, { "epoch": 0.5067711692048217, "grad_norm": 5.1205854415893555, "learning_rate": 4.972058435958363e-06, "loss": 0.2727, "step": 10216 }, { "epoch": 0.5068207748400219, "grad_norm": 4.145666122436523, "learning_rate": 4.971271358190257e-06, "loss": 0.3067, "step": 10217 }, { "epoch": 0.506870380475222, "grad_norm": 11.531705856323242, "learning_rate": 4.970484281134057e-06, "loss": 0.2901, "step": 10218 }, { "epoch": 0.5069199861104221, "grad_norm": 23.82900619506836, "learning_rate": 4.969697204809272e-06, "loss": 0.402, "step": 10219 }, { "epoch": 0.5069695917456223, "grad_norm": 4.62525749206543, "learning_rate": 4.968910129235403e-06, "loss": 0.2957, "step": 10220 }, { "epoch": 0.5070191973808225, "grad_norm": 12.85830307006836, "learning_rate": 4.9681230544319545e-06, "loss": 0.3613, "step": 10221 }, { "epoch": 0.5070688030160226, "grad_norm": 5.585264682769775, "learning_rate": 4.967335980418432e-06, "loss": 0.3468, "step": 10222 }, { "epoch": 0.5071184086512228, "grad_norm": 7.606163024902344, "learning_rate": 4.9665489072143394e-06, "loss": 0.3439, "step": 10223 }, { "epoch": 0.5071680142864229, "grad_norm": 4.637982368469238, "learning_rate": 4.965761834839177e-06, "loss": 0.2363, "step": 10224 }, { "epoch": 0.5072176199216231, "grad_norm": 3.47261643409729, "learning_rate": 4.964974763312455e-06, "loss": 0.243, "step": 10225 }, { "epoch": 0.5072672255568232, "grad_norm": 7.0913496017456055, "learning_rate": 4.964187692653671e-06, "loss": 0.3065, "step": 10226 }, { "epoch": 0.5073168311920234, "grad_norm": 4.839111328125, "learning_rate": 4.963400622882335e-06, "loss": 0.1971, "step": 10227 }, { "epoch": 0.5073664368272236, "grad_norm": 5.847415447235107, "learning_rate": 4.962613554017949e-06, "loss": 0.3453, "step": 10228 }, { "epoch": 0.5074160424624238, "grad_norm": 7.6446051597595215, "learning_rate": 4.961826486080013e-06, "loss": 0.3278, "step": 10229 }, { "epoch": 0.5074656480976238, "grad_norm": 5.8647236824035645, "learning_rate": 4.961039419088036e-06, "loss": 0.2876, "step": 10230 }, { "epoch": 0.507515253732824, "grad_norm": 9.098711967468262, "learning_rate": 4.960252353061518e-06, "loss": 0.3358, "step": 10231 }, { "epoch": 0.5075648593680242, "grad_norm": 10.666495323181152, "learning_rate": 4.959465288019965e-06, "loss": 0.3713, "step": 10232 }, { "epoch": 0.5076144650032244, "grad_norm": 6.008532524108887, "learning_rate": 4.9586782239828826e-06, "loss": 0.2633, "step": 10233 }, { "epoch": 0.5076640706384246, "grad_norm": 5.619235515594482, "learning_rate": 4.957891160969771e-06, "loss": 0.2261, "step": 10234 }, { "epoch": 0.5077136762736247, "grad_norm": 5.747548580169678, "learning_rate": 4.957104099000136e-06, "loss": 0.2287, "step": 10235 }, { "epoch": 0.5077632819088248, "grad_norm": 9.100489616394043, "learning_rate": 4.95631703809348e-06, "loss": 0.3243, "step": 10236 }, { "epoch": 0.507812887544025, "grad_norm": 8.417930603027344, "learning_rate": 4.9555299782693086e-06, "loss": 0.3718, "step": 10237 }, { "epoch": 0.5078624931792252, "grad_norm": 5.062253952026367, "learning_rate": 4.954742919547124e-06, "loss": 0.3111, "step": 10238 }, { "epoch": 0.5079120988144253, "grad_norm": 10.773720741271973, "learning_rate": 4.95395586194643e-06, "loss": 0.3145, "step": 10239 }, { "epoch": 0.5079617044496255, "grad_norm": 8.010977745056152, "learning_rate": 4.953168805486731e-06, "loss": 0.2795, "step": 10240 }, { "epoch": 0.5080113100848256, "grad_norm": 3.7649424076080322, "learning_rate": 4.952381750187529e-06, "loss": 0.214, "step": 10241 }, { "epoch": 0.5080609157200258, "grad_norm": 6.5291242599487305, "learning_rate": 4.9515946960683306e-06, "loss": 0.2989, "step": 10242 }, { "epoch": 0.5081105213552259, "grad_norm": 4.33674955368042, "learning_rate": 4.9508076431486365e-06, "loss": 0.2428, "step": 10243 }, { "epoch": 0.5081601269904261, "grad_norm": 8.001078605651855, "learning_rate": 4.950020591447951e-06, "loss": 0.2992, "step": 10244 }, { "epoch": 0.5082097326256263, "grad_norm": 12.38687801361084, "learning_rate": 4.949233540985779e-06, "loss": 0.3167, "step": 10245 }, { "epoch": 0.5082593382608265, "grad_norm": 15.134125709533691, "learning_rate": 4.94844649178162e-06, "loss": 0.4666, "step": 10246 }, { "epoch": 0.5083089438960265, "grad_norm": 6.389484882354736, "learning_rate": 4.947659443854984e-06, "loss": 0.3107, "step": 10247 }, { "epoch": 0.5083585495312267, "grad_norm": 7.51671838760376, "learning_rate": 4.94687239722537e-06, "loss": 0.2884, "step": 10248 }, { "epoch": 0.5084081551664269, "grad_norm": 8.703864097595215, "learning_rate": 4.946085351912279e-06, "loss": 0.3001, "step": 10249 }, { "epoch": 0.5084577608016271, "grad_norm": 13.007613182067871, "learning_rate": 4.94529830793522e-06, "loss": 0.3541, "step": 10250 }, { "epoch": 0.5085073664368273, "grad_norm": 5.4985527992248535, "learning_rate": 4.944511265313694e-06, "loss": 0.2298, "step": 10251 }, { "epoch": 0.5085569720720274, "grad_norm": 9.718242645263672, "learning_rate": 4.9437242240672015e-06, "loss": 0.3246, "step": 10252 }, { "epoch": 0.5086065777072275, "grad_norm": 5.182747840881348, "learning_rate": 4.9429371842152505e-06, "loss": 0.2525, "step": 10253 }, { "epoch": 0.5086561833424277, "grad_norm": 5.511587619781494, "learning_rate": 4.942150145777342e-06, "loss": 0.2356, "step": 10254 }, { "epoch": 0.5087057889776279, "grad_norm": 7.553070545196533, "learning_rate": 4.941363108772977e-06, "loss": 0.3255, "step": 10255 }, { "epoch": 0.508755394612828, "grad_norm": 4.027078151702881, "learning_rate": 4.940576073221662e-06, "loss": 0.2762, "step": 10256 }, { "epoch": 0.5088050002480282, "grad_norm": 10.021049499511719, "learning_rate": 4.9397890391429e-06, "loss": 0.3059, "step": 10257 }, { "epoch": 0.5088546058832283, "grad_norm": 4.949598789215088, "learning_rate": 4.93900200655619e-06, "loss": 0.2333, "step": 10258 }, { "epoch": 0.5089042115184285, "grad_norm": 8.689262390136719, "learning_rate": 4.93821497548104e-06, "loss": 0.3539, "step": 10259 }, { "epoch": 0.5089538171536286, "grad_norm": 11.891173362731934, "learning_rate": 4.937427945936951e-06, "loss": 0.4133, "step": 10260 }, { "epoch": 0.5090034227888288, "grad_norm": 9.311477661132812, "learning_rate": 4.936640917943424e-06, "loss": 0.31, "step": 10261 }, { "epoch": 0.509053028424029, "grad_norm": 8.273197174072266, "learning_rate": 4.935853891519966e-06, "loss": 0.4231, "step": 10262 }, { "epoch": 0.5091026340592292, "grad_norm": 8.312949180603027, "learning_rate": 4.935066866686074e-06, "loss": 0.348, "step": 10263 }, { "epoch": 0.5091522396944292, "grad_norm": 23.52509117126465, "learning_rate": 4.934279843461258e-06, "loss": 0.3568, "step": 10264 }, { "epoch": 0.5092018453296294, "grad_norm": 6.565138339996338, "learning_rate": 4.933492821865017e-06, "loss": 0.2532, "step": 10265 }, { "epoch": 0.5092514509648296, "grad_norm": 32.74956130981445, "learning_rate": 4.932705801916851e-06, "loss": 0.4287, "step": 10266 }, { "epoch": 0.5093010566000298, "grad_norm": 8.839552879333496, "learning_rate": 4.9319187836362685e-06, "loss": 0.2504, "step": 10267 }, { "epoch": 0.50935066223523, "grad_norm": 7.145938396453857, "learning_rate": 4.931131767042769e-06, "loss": 0.3059, "step": 10268 }, { "epoch": 0.5094002678704301, "grad_norm": 11.881983757019043, "learning_rate": 4.930344752155854e-06, "loss": 0.2838, "step": 10269 }, { "epoch": 0.5094498735056302, "grad_norm": 11.014067649841309, "learning_rate": 4.929557738995029e-06, "loss": 0.3669, "step": 10270 }, { "epoch": 0.5094994791408304, "grad_norm": 6.598433494567871, "learning_rate": 4.928770727579795e-06, "loss": 0.3115, "step": 10271 }, { "epoch": 0.5095490847760306, "grad_norm": 6.593930721282959, "learning_rate": 4.927983717929653e-06, "loss": 0.3044, "step": 10272 }, { "epoch": 0.5095986904112307, "grad_norm": 10.770648002624512, "learning_rate": 4.927196710064109e-06, "loss": 0.2965, "step": 10273 }, { "epoch": 0.5096482960464309, "grad_norm": 6.829646587371826, "learning_rate": 4.9264097040026635e-06, "loss": 0.2018, "step": 10274 }, { "epoch": 0.509697901681631, "grad_norm": 5.97036600112915, "learning_rate": 4.925622699764816e-06, "loss": 0.2489, "step": 10275 }, { "epoch": 0.5097475073168312, "grad_norm": 7.651604175567627, "learning_rate": 4.924835697370075e-06, "loss": 0.2291, "step": 10276 }, { "epoch": 0.5097971129520313, "grad_norm": 5.205723762512207, "learning_rate": 4.924048696837938e-06, "loss": 0.248, "step": 10277 }, { "epoch": 0.5098467185872315, "grad_norm": 4.040471076965332, "learning_rate": 4.923261698187907e-06, "loss": 0.2313, "step": 10278 }, { "epoch": 0.5098963242224317, "grad_norm": 8.980449676513672, "learning_rate": 4.9224747014394885e-06, "loss": 0.3482, "step": 10279 }, { "epoch": 0.5099459298576319, "grad_norm": 4.742276191711426, "learning_rate": 4.92168770661218e-06, "loss": 0.2599, "step": 10280 }, { "epoch": 0.5099955354928319, "grad_norm": 8.021442413330078, "learning_rate": 4.920900713725487e-06, "loss": 0.425, "step": 10281 }, { "epoch": 0.5100451411280321, "grad_norm": 14.470251083374023, "learning_rate": 4.920113722798911e-06, "loss": 0.4483, "step": 10282 }, { "epoch": 0.5100947467632323, "grad_norm": 7.30640983581543, "learning_rate": 4.91932673385195e-06, "loss": 0.268, "step": 10283 }, { "epoch": 0.5101443523984325, "grad_norm": 9.613506317138672, "learning_rate": 4.9185397469041115e-06, "loss": 0.2734, "step": 10284 }, { "epoch": 0.5101939580336327, "grad_norm": 9.492326736450195, "learning_rate": 4.917752761974893e-06, "loss": 0.3183, "step": 10285 }, { "epoch": 0.5102435636688328, "grad_norm": 16.34928321838379, "learning_rate": 4.9169657790838e-06, "loss": 0.4898, "step": 10286 }, { "epoch": 0.5102931693040329, "grad_norm": 6.3059258460998535, "learning_rate": 4.916178798250332e-06, "loss": 0.1911, "step": 10287 }, { "epoch": 0.5103427749392331, "grad_norm": 5.5887908935546875, "learning_rate": 4.915391819493991e-06, "loss": 0.1886, "step": 10288 }, { "epoch": 0.5103923805744333, "grad_norm": 5.798867702484131, "learning_rate": 4.914604842834279e-06, "loss": 0.3304, "step": 10289 }, { "epoch": 0.5104419862096334, "grad_norm": 5.447047233581543, "learning_rate": 4.913817868290698e-06, "loss": 0.3372, "step": 10290 }, { "epoch": 0.5104915918448336, "grad_norm": 11.87225341796875, "learning_rate": 4.913030895882749e-06, "loss": 0.4191, "step": 10291 }, { "epoch": 0.5105411974800337, "grad_norm": 4.600724697113037, "learning_rate": 4.9122439256299335e-06, "loss": 0.2985, "step": 10292 }, { "epoch": 0.5105908031152339, "grad_norm": 5.2253031730651855, "learning_rate": 4.911456957551754e-06, "loss": 0.3258, "step": 10293 }, { "epoch": 0.510640408750434, "grad_norm": 6.0046467781066895, "learning_rate": 4.9106699916677105e-06, "loss": 0.2483, "step": 10294 }, { "epoch": 0.5106900143856342, "grad_norm": 5.24971866607666, "learning_rate": 4.909883027997306e-06, "loss": 0.3451, "step": 10295 }, { "epoch": 0.5107396200208344, "grad_norm": 5.971646308898926, "learning_rate": 4.909096066560039e-06, "loss": 0.3509, "step": 10296 }, { "epoch": 0.5107892256560346, "grad_norm": 5.567930698394775, "learning_rate": 4.908309107375414e-06, "loss": 0.291, "step": 10297 }, { "epoch": 0.5108388312912346, "grad_norm": 9.648993492126465, "learning_rate": 4.90752215046293e-06, "loss": 0.3393, "step": 10298 }, { "epoch": 0.5108884369264348, "grad_norm": 10.287022590637207, "learning_rate": 4.9067351958420895e-06, "loss": 0.3722, "step": 10299 }, { "epoch": 0.510938042561635, "grad_norm": 13.316868782043457, "learning_rate": 4.905948243532392e-06, "loss": 0.3385, "step": 10300 }, { "epoch": 0.5109876481968352, "grad_norm": 6.291504383087158, "learning_rate": 4.905161293553341e-06, "loss": 0.3461, "step": 10301 }, { "epoch": 0.5110372538320354, "grad_norm": 14.458639144897461, "learning_rate": 4.904374345924436e-06, "loss": 0.3892, "step": 10302 }, { "epoch": 0.5110868594672355, "grad_norm": 5.658604621887207, "learning_rate": 4.903587400665177e-06, "loss": 0.3385, "step": 10303 }, { "epoch": 0.5111364651024356, "grad_norm": 9.993212699890137, "learning_rate": 4.902800457795066e-06, "loss": 0.455, "step": 10304 }, { "epoch": 0.5111860707376358, "grad_norm": 8.006503105163574, "learning_rate": 4.902013517333604e-06, "loss": 0.3136, "step": 10305 }, { "epoch": 0.511235676372836, "grad_norm": 8.496750831604004, "learning_rate": 4.901226579300291e-06, "loss": 0.3499, "step": 10306 }, { "epoch": 0.5112852820080361, "grad_norm": 9.215598106384277, "learning_rate": 4.900439643714629e-06, "loss": 0.3101, "step": 10307 }, { "epoch": 0.5113348876432363, "grad_norm": 6.730443954467773, "learning_rate": 4.899652710596118e-06, "loss": 0.2989, "step": 10308 }, { "epoch": 0.5113844932784364, "grad_norm": 5.291771411895752, "learning_rate": 4.898865779964255e-06, "loss": 0.2361, "step": 10309 }, { "epoch": 0.5114340989136366, "grad_norm": 5.26599645614624, "learning_rate": 4.898078851838547e-06, "loss": 0.2925, "step": 10310 }, { "epoch": 0.5114837045488367, "grad_norm": 9.897513389587402, "learning_rate": 4.897291926238491e-06, "loss": 0.3178, "step": 10311 }, { "epoch": 0.5115333101840369, "grad_norm": 7.08079195022583, "learning_rate": 4.896505003183585e-06, "loss": 0.3221, "step": 10312 }, { "epoch": 0.5115829158192371, "grad_norm": 10.436441421508789, "learning_rate": 4.895718082693334e-06, "loss": 0.3759, "step": 10313 }, { "epoch": 0.5116325214544373, "grad_norm": 4.674803256988525, "learning_rate": 4.894931164787237e-06, "loss": 0.2563, "step": 10314 }, { "epoch": 0.5116821270896373, "grad_norm": 4.672872543334961, "learning_rate": 4.89414424948479e-06, "loss": 0.3264, "step": 10315 }, { "epoch": 0.5117317327248375, "grad_norm": 6.166086196899414, "learning_rate": 4.8933573368054985e-06, "loss": 0.3743, "step": 10316 }, { "epoch": 0.5117813383600377, "grad_norm": 4.853763103485107, "learning_rate": 4.892570426768859e-06, "loss": 0.2668, "step": 10317 }, { "epoch": 0.5118309439952379, "grad_norm": 4.599005222320557, "learning_rate": 4.891783519394375e-06, "loss": 0.3411, "step": 10318 }, { "epoch": 0.511880549630438, "grad_norm": 6.177680015563965, "learning_rate": 4.890996614701544e-06, "loss": 0.3792, "step": 10319 }, { "epoch": 0.5119301552656382, "grad_norm": 6.290494918823242, "learning_rate": 4.890209712709864e-06, "loss": 0.3293, "step": 10320 }, { "epoch": 0.5119797609008383, "grad_norm": 6.130918979644775, "learning_rate": 4.889422813438839e-06, "loss": 0.351, "step": 10321 }, { "epoch": 0.5120293665360385, "grad_norm": 6.653745174407959, "learning_rate": 4.888635916907968e-06, "loss": 0.3335, "step": 10322 }, { "epoch": 0.5120789721712387, "grad_norm": 6.402971267700195, "learning_rate": 4.8878490231367455e-06, "loss": 0.2998, "step": 10323 }, { "epoch": 0.5121285778064388, "grad_norm": 6.15738582611084, "learning_rate": 4.887062132144678e-06, "loss": 0.2984, "step": 10324 }, { "epoch": 0.512178183441639, "grad_norm": 8.049633979797363, "learning_rate": 4.886275243951261e-06, "loss": 0.3239, "step": 10325 }, { "epoch": 0.5122277890768391, "grad_norm": 4.7978129386901855, "learning_rate": 4.885488358575995e-06, "loss": 0.2217, "step": 10326 }, { "epoch": 0.5122773947120393, "grad_norm": 6.115696430206299, "learning_rate": 4.884701476038379e-06, "loss": 0.1674, "step": 10327 }, { "epoch": 0.5123270003472394, "grad_norm": 3.901543378829956, "learning_rate": 4.883914596357914e-06, "loss": 0.2254, "step": 10328 }, { "epoch": 0.5123766059824396, "grad_norm": 6.838134765625, "learning_rate": 4.883127719554095e-06, "loss": 0.2889, "step": 10329 }, { "epoch": 0.5124262116176398, "grad_norm": 4.531999588012695, "learning_rate": 4.882340845646426e-06, "loss": 0.3117, "step": 10330 }, { "epoch": 0.51247581725284, "grad_norm": 6.961310863494873, "learning_rate": 4.881553974654404e-06, "loss": 0.249, "step": 10331 }, { "epoch": 0.51252542288804, "grad_norm": 8.23759651184082, "learning_rate": 4.880767106597527e-06, "loss": 0.234, "step": 10332 }, { "epoch": 0.5125750285232402, "grad_norm": 13.883113861083984, "learning_rate": 4.879980241495296e-06, "loss": 0.3309, "step": 10333 }, { "epoch": 0.5126246341584404, "grad_norm": 6.512644290924072, "learning_rate": 4.879193379367208e-06, "loss": 0.3642, "step": 10334 }, { "epoch": 0.5126742397936406, "grad_norm": 4.082782745361328, "learning_rate": 4.8784065202327635e-06, "loss": 0.312, "step": 10335 }, { "epoch": 0.5127238454288408, "grad_norm": 8.591170310974121, "learning_rate": 4.877619664111462e-06, "loss": 0.2459, "step": 10336 }, { "epoch": 0.5127734510640409, "grad_norm": 15.82168197631836, "learning_rate": 4.876832811022797e-06, "loss": 0.3404, "step": 10337 }, { "epoch": 0.512823056699241, "grad_norm": 4.943836212158203, "learning_rate": 4.876045960986273e-06, "loss": 0.1833, "step": 10338 }, { "epoch": 0.5128726623344412, "grad_norm": 7.832231521606445, "learning_rate": 4.875259114021385e-06, "loss": 0.3759, "step": 10339 }, { "epoch": 0.5129222679696414, "grad_norm": 8.333767890930176, "learning_rate": 4.874472270147633e-06, "loss": 0.4569, "step": 10340 }, { "epoch": 0.5129718736048415, "grad_norm": 15.525585174560547, "learning_rate": 4.873685429384516e-06, "loss": 0.2851, "step": 10341 }, { "epoch": 0.5130214792400417, "grad_norm": 6.477675914764404, "learning_rate": 4.87289859175153e-06, "loss": 0.241, "step": 10342 }, { "epoch": 0.5130710848752418, "grad_norm": 6.425060749053955, "learning_rate": 4.8721117572681745e-06, "loss": 0.2716, "step": 10343 }, { "epoch": 0.513120690510442, "grad_norm": 5.381856918334961, "learning_rate": 4.871324925953949e-06, "loss": 0.2046, "step": 10344 }, { "epoch": 0.5131702961456421, "grad_norm": 11.051490783691406, "learning_rate": 4.870538097828348e-06, "loss": 0.4884, "step": 10345 }, { "epoch": 0.5132199017808423, "grad_norm": 6.71872091293335, "learning_rate": 4.869751272910872e-06, "loss": 0.3158, "step": 10346 }, { "epoch": 0.5132695074160425, "grad_norm": 4.748212814331055, "learning_rate": 4.868964451221019e-06, "loss": 0.2737, "step": 10347 }, { "epoch": 0.5133191130512427, "grad_norm": 5.305292129516602, "learning_rate": 4.868177632778286e-06, "loss": 0.2197, "step": 10348 }, { "epoch": 0.5133687186864427, "grad_norm": 5.669645309448242, "learning_rate": 4.86739081760217e-06, "loss": 0.2184, "step": 10349 }, { "epoch": 0.5134183243216429, "grad_norm": 4.087382793426514, "learning_rate": 4.86660400571217e-06, "loss": 0.2664, "step": 10350 }, { "epoch": 0.5134679299568431, "grad_norm": 6.657211780548096, "learning_rate": 4.865817197127783e-06, "loss": 0.3202, "step": 10351 }, { "epoch": 0.5135175355920433, "grad_norm": 5.982981204986572, "learning_rate": 4.865030391868507e-06, "loss": 0.3041, "step": 10352 }, { "epoch": 0.5135671412272435, "grad_norm": 7.288539409637451, "learning_rate": 4.86424358995384e-06, "loss": 0.284, "step": 10353 }, { "epoch": 0.5136167468624435, "grad_norm": 12.384819984436035, "learning_rate": 4.863456791403276e-06, "loss": 0.4716, "step": 10354 }, { "epoch": 0.5136663524976437, "grad_norm": 6.35036563873291, "learning_rate": 4.8626699962363155e-06, "loss": 0.2821, "step": 10355 }, { "epoch": 0.5137159581328439, "grad_norm": 7.268754005432129, "learning_rate": 4.861883204472456e-06, "loss": 0.3499, "step": 10356 }, { "epoch": 0.513765563768044, "grad_norm": 8.663239479064941, "learning_rate": 4.86109641613119e-06, "loss": 0.3989, "step": 10357 }, { "epoch": 0.5138151694032442, "grad_norm": 8.739177703857422, "learning_rate": 4.86030963123202e-06, "loss": 0.2615, "step": 10358 }, { "epoch": 0.5138647750384444, "grad_norm": 6.31417989730835, "learning_rate": 4.8595228497944415e-06, "loss": 0.2517, "step": 10359 }, { "epoch": 0.5139143806736445, "grad_norm": 15.415276527404785, "learning_rate": 4.8587360718379475e-06, "loss": 0.3912, "step": 10360 }, { "epoch": 0.5139639863088447, "grad_norm": 8.606066703796387, "learning_rate": 4.857949297382039e-06, "loss": 0.2573, "step": 10361 }, { "epoch": 0.5140135919440448, "grad_norm": 9.810953140258789, "learning_rate": 4.857162526446212e-06, "loss": 0.3071, "step": 10362 }, { "epoch": 0.514063197579245, "grad_norm": 5.939596176147461, "learning_rate": 4.85637575904996e-06, "loss": 0.3157, "step": 10363 }, { "epoch": 0.5141128032144452, "grad_norm": 6.197330474853516, "learning_rate": 4.855588995212784e-06, "loss": 0.2311, "step": 10364 }, { "epoch": 0.5141624088496454, "grad_norm": 8.2662992477417, "learning_rate": 4.8548022349541775e-06, "loss": 0.3042, "step": 10365 }, { "epoch": 0.5142120144848454, "grad_norm": 6.486877918243408, "learning_rate": 4.854015478293636e-06, "loss": 0.3142, "step": 10366 }, { "epoch": 0.5142616201200456, "grad_norm": 13.157611846923828, "learning_rate": 4.853228725250658e-06, "loss": 0.3443, "step": 10367 }, { "epoch": 0.5143112257552458, "grad_norm": 8.453258514404297, "learning_rate": 4.8524419758447365e-06, "loss": 0.348, "step": 10368 }, { "epoch": 0.514360831390446, "grad_norm": 6.562157154083252, "learning_rate": 4.851655230095373e-06, "loss": 0.3174, "step": 10369 }, { "epoch": 0.5144104370256462, "grad_norm": 8.450716972351074, "learning_rate": 4.850868488022059e-06, "loss": 0.4046, "step": 10370 }, { "epoch": 0.5144600426608462, "grad_norm": 8.487569808959961, "learning_rate": 4.850081749644289e-06, "loss": 0.4252, "step": 10371 }, { "epoch": 0.5145096482960464, "grad_norm": 4.651614189147949, "learning_rate": 4.849295014981563e-06, "loss": 0.3107, "step": 10372 }, { "epoch": 0.5145592539312466, "grad_norm": 7.387716293334961, "learning_rate": 4.848508284053375e-06, "loss": 0.2306, "step": 10373 }, { "epoch": 0.5146088595664468, "grad_norm": 15.94942855834961, "learning_rate": 4.847721556879219e-06, "loss": 0.3487, "step": 10374 }, { "epoch": 0.5146584652016469, "grad_norm": 9.263590812683105, "learning_rate": 4.846934833478593e-06, "loss": 0.3445, "step": 10375 }, { "epoch": 0.5147080708368471, "grad_norm": 8.206082344055176, "learning_rate": 4.846148113870992e-06, "loss": 0.4182, "step": 10376 }, { "epoch": 0.5147576764720472, "grad_norm": 6.031832218170166, "learning_rate": 4.845361398075907e-06, "loss": 0.2309, "step": 10377 }, { "epoch": 0.5148072821072474, "grad_norm": 7.461982250213623, "learning_rate": 4.844574686112839e-06, "loss": 0.3274, "step": 10378 }, { "epoch": 0.5148568877424475, "grad_norm": 7.802023887634277, "learning_rate": 4.843787978001281e-06, "loss": 0.31, "step": 10379 }, { "epoch": 0.5149064933776477, "grad_norm": 6.672614097595215, "learning_rate": 4.8430012737607244e-06, "loss": 0.2842, "step": 10380 }, { "epoch": 0.5149560990128479, "grad_norm": 11.369341850280762, "learning_rate": 4.842214573410671e-06, "loss": 0.3762, "step": 10381 }, { "epoch": 0.5150057046480481, "grad_norm": 9.75640869140625, "learning_rate": 4.841427876970611e-06, "loss": 0.384, "step": 10382 }, { "epoch": 0.5150553102832481, "grad_norm": 7.778446197509766, "learning_rate": 4.840641184460037e-06, "loss": 0.2594, "step": 10383 }, { "epoch": 0.5151049159184483, "grad_norm": 7.780797481536865, "learning_rate": 4.839854495898449e-06, "loss": 0.3163, "step": 10384 }, { "epoch": 0.5151545215536485, "grad_norm": 7.271726608276367, "learning_rate": 4.83906781130534e-06, "loss": 0.294, "step": 10385 }, { "epoch": 0.5152041271888487, "grad_norm": 6.332975387573242, "learning_rate": 4.8382811307002005e-06, "loss": 0.3224, "step": 10386 }, { "epoch": 0.5152537328240488, "grad_norm": 6.169857501983643, "learning_rate": 4.83749445410253e-06, "loss": 0.2928, "step": 10387 }, { "epoch": 0.5153033384592489, "grad_norm": 6.398167610168457, "learning_rate": 4.836707781531819e-06, "loss": 0.2977, "step": 10388 }, { "epoch": 0.5153529440944491, "grad_norm": 8.167325019836426, "learning_rate": 4.835921113007564e-06, "loss": 0.2376, "step": 10389 }, { "epoch": 0.5154025497296493, "grad_norm": 7.824323654174805, "learning_rate": 4.8351344485492595e-06, "loss": 0.3753, "step": 10390 }, { "epoch": 0.5154521553648495, "grad_norm": 5.867006301879883, "learning_rate": 4.834347788176394e-06, "loss": 0.2774, "step": 10391 }, { "epoch": 0.5155017610000496, "grad_norm": 8.159173965454102, "learning_rate": 4.8335611319084686e-06, "loss": 0.2204, "step": 10392 }, { "epoch": 0.5155513666352498, "grad_norm": 5.697804927825928, "learning_rate": 4.832774479764973e-06, "loss": 0.2914, "step": 10393 }, { "epoch": 0.5156009722704499, "grad_norm": 10.970108032226562, "learning_rate": 4.8319878317654e-06, "loss": 0.3482, "step": 10394 }, { "epoch": 0.51565057790565, "grad_norm": 10.836755752563477, "learning_rate": 4.8312011879292466e-06, "loss": 0.3878, "step": 10395 }, { "epoch": 0.5157001835408502, "grad_norm": 10.659506797790527, "learning_rate": 4.8304145482760025e-06, "loss": 0.4162, "step": 10396 }, { "epoch": 0.5157497891760504, "grad_norm": 18.080799102783203, "learning_rate": 4.829627912825163e-06, "loss": 0.3354, "step": 10397 }, { "epoch": 0.5157993948112506, "grad_norm": 8.310121536254883, "learning_rate": 4.828841281596222e-06, "loss": 0.366, "step": 10398 }, { "epoch": 0.5158490004464508, "grad_norm": 7.375701427459717, "learning_rate": 4.828054654608671e-06, "loss": 0.3131, "step": 10399 }, { "epoch": 0.5158986060816508, "grad_norm": 11.727570533752441, "learning_rate": 4.827268031882002e-06, "loss": 0.3728, "step": 10400 }, { "epoch": 0.515948211716851, "grad_norm": 5.040974140167236, "learning_rate": 4.82648141343571e-06, "loss": 0.2784, "step": 10401 }, { "epoch": 0.5159978173520512, "grad_norm": 7.488670349121094, "learning_rate": 4.825694799289288e-06, "loss": 0.3901, "step": 10402 }, { "epoch": 0.5160474229872514, "grad_norm": 8.548012733459473, "learning_rate": 4.824908189462227e-06, "loss": 0.3563, "step": 10403 }, { "epoch": 0.5160970286224515, "grad_norm": 4.019193649291992, "learning_rate": 4.8241215839740194e-06, "loss": 0.3172, "step": 10404 }, { "epoch": 0.5161466342576516, "grad_norm": 3.7792251110076904, "learning_rate": 4.823334982844159e-06, "loss": 0.2675, "step": 10405 }, { "epoch": 0.5161962398928518, "grad_norm": 6.727725982666016, "learning_rate": 4.822548386092138e-06, "loss": 0.4065, "step": 10406 }, { "epoch": 0.516245845528052, "grad_norm": 5.223032474517822, "learning_rate": 4.82176179373745e-06, "loss": 0.2956, "step": 10407 }, { "epoch": 0.5162954511632522, "grad_norm": 5.314462184906006, "learning_rate": 4.820975205799583e-06, "loss": 0.2873, "step": 10408 }, { "epoch": 0.5163450567984523, "grad_norm": 5.043349266052246, "learning_rate": 4.8201886222980316e-06, "loss": 0.2831, "step": 10409 }, { "epoch": 0.5163946624336525, "grad_norm": 4.363898754119873, "learning_rate": 4.819402043252289e-06, "loss": 0.276, "step": 10410 }, { "epoch": 0.5164442680688526, "grad_norm": 8.011933326721191, "learning_rate": 4.818615468681843e-06, "loss": 0.3167, "step": 10411 }, { "epoch": 0.5164938737040528, "grad_norm": 22.413799285888672, "learning_rate": 4.8178288986061896e-06, "loss": 0.4684, "step": 10412 }, { "epoch": 0.5165434793392529, "grad_norm": 5.38362455368042, "learning_rate": 4.817042333044819e-06, "loss": 0.2903, "step": 10413 }, { "epoch": 0.5165930849744531, "grad_norm": 3.981039524078369, "learning_rate": 4.81625577201722e-06, "loss": 0.2107, "step": 10414 }, { "epoch": 0.5166426906096533, "grad_norm": 8.072474479675293, "learning_rate": 4.815469215542888e-06, "loss": 0.3546, "step": 10415 }, { "epoch": 0.5166922962448535, "grad_norm": 5.2152018547058105, "learning_rate": 4.8146826636413125e-06, "loss": 0.2045, "step": 10416 }, { "epoch": 0.5167419018800535, "grad_norm": 4.573492527008057, "learning_rate": 4.813896116331982e-06, "loss": 0.2703, "step": 10417 }, { "epoch": 0.5167915075152537, "grad_norm": 5.085043907165527, "learning_rate": 4.813109573634392e-06, "loss": 0.2586, "step": 10418 }, { "epoch": 0.5168411131504539, "grad_norm": 5.565545082092285, "learning_rate": 4.812323035568032e-06, "loss": 0.3148, "step": 10419 }, { "epoch": 0.5168907187856541, "grad_norm": 7.5592145919799805, "learning_rate": 4.811536502152389e-06, "loss": 0.2541, "step": 10420 }, { "epoch": 0.5169403244208542, "grad_norm": 5.243061065673828, "learning_rate": 4.81074997340696e-06, "loss": 0.1699, "step": 10421 }, { "epoch": 0.5169899300560543, "grad_norm": 16.46040916442871, "learning_rate": 4.8099634493512285e-06, "loss": 0.2934, "step": 10422 }, { "epoch": 0.5170395356912545, "grad_norm": 6.897676944732666, "learning_rate": 4.809176930004692e-06, "loss": 0.2807, "step": 10423 }, { "epoch": 0.5170891413264547, "grad_norm": 3.2130885124206543, "learning_rate": 4.8083904153868375e-06, "loss": 0.2063, "step": 10424 }, { "epoch": 0.5171387469616548, "grad_norm": 5.394913673400879, "learning_rate": 4.807603905517153e-06, "loss": 0.3307, "step": 10425 }, { "epoch": 0.517188352596855, "grad_norm": 7.075695514678955, "learning_rate": 4.8068174004151324e-06, "loss": 0.3275, "step": 10426 }, { "epoch": 0.5172379582320552, "grad_norm": 10.38586139678955, "learning_rate": 4.806030900100265e-06, "loss": 0.2291, "step": 10427 }, { "epoch": 0.5172875638672553, "grad_norm": 4.582618713378906, "learning_rate": 4.805244404592036e-06, "loss": 0.266, "step": 10428 }, { "epoch": 0.5173371695024555, "grad_norm": 6.839280605316162, "learning_rate": 4.804457913909942e-06, "loss": 0.274, "step": 10429 }, { "epoch": 0.5173867751376556, "grad_norm": 7.759975910186768, "learning_rate": 4.8036714280734695e-06, "loss": 0.4131, "step": 10430 }, { "epoch": 0.5174363807728558, "grad_norm": 4.381221294403076, "learning_rate": 4.802884947102105e-06, "loss": 0.2172, "step": 10431 }, { "epoch": 0.517485986408056, "grad_norm": 4.3958659172058105, "learning_rate": 4.8020984710153425e-06, "loss": 0.247, "step": 10432 }, { "epoch": 0.5175355920432562, "grad_norm": 6.283215045928955, "learning_rate": 4.80131199983267e-06, "loss": 0.265, "step": 10433 }, { "epoch": 0.5175851976784562, "grad_norm": 7.9851789474487305, "learning_rate": 4.800525533573574e-06, "loss": 0.3564, "step": 10434 }, { "epoch": 0.5176348033136564, "grad_norm": 5.504475116729736, "learning_rate": 4.799739072257547e-06, "loss": 0.3214, "step": 10435 }, { "epoch": 0.5176844089488566, "grad_norm": 15.875043869018555, "learning_rate": 4.798952615904078e-06, "loss": 0.3424, "step": 10436 }, { "epoch": 0.5177340145840568, "grad_norm": 6.650902271270752, "learning_rate": 4.7981661645326505e-06, "loss": 0.3317, "step": 10437 }, { "epoch": 0.517783620219257, "grad_norm": 7.219059467315674, "learning_rate": 4.797379718162759e-06, "loss": 0.3153, "step": 10438 }, { "epoch": 0.517833225854457, "grad_norm": 5.928733825683594, "learning_rate": 4.79659327681389e-06, "loss": 0.2931, "step": 10439 }, { "epoch": 0.5178828314896572, "grad_norm": 3.440492630004883, "learning_rate": 4.79580684050553e-06, "loss": 0.2132, "step": 10440 }, { "epoch": 0.5179324371248574, "grad_norm": 5.5186872482299805, "learning_rate": 4.79502040925717e-06, "loss": 0.1866, "step": 10441 }, { "epoch": 0.5179820427600575, "grad_norm": 6.592552185058594, "learning_rate": 4.794233983088295e-06, "loss": 0.3063, "step": 10442 }, { "epoch": 0.5180316483952577, "grad_norm": 4.118333339691162, "learning_rate": 4.793447562018398e-06, "loss": 0.2431, "step": 10443 }, { "epoch": 0.5180812540304579, "grad_norm": 6.223860740661621, "learning_rate": 4.792661146066963e-06, "loss": 0.3088, "step": 10444 }, { "epoch": 0.518130859665658, "grad_norm": 12.748936653137207, "learning_rate": 4.7918747352534765e-06, "loss": 0.3818, "step": 10445 }, { "epoch": 0.5181804653008582, "grad_norm": 5.018454074859619, "learning_rate": 4.791088329597431e-06, "loss": 0.2212, "step": 10446 }, { "epoch": 0.5182300709360583, "grad_norm": 4.806131839752197, "learning_rate": 4.79030192911831e-06, "loss": 0.3114, "step": 10447 }, { "epoch": 0.5182796765712585, "grad_norm": 10.943425178527832, "learning_rate": 4.7895155338356005e-06, "loss": 0.411, "step": 10448 }, { "epoch": 0.5183292822064587, "grad_norm": 5.550948619842529, "learning_rate": 4.788729143768794e-06, "loss": 0.3169, "step": 10449 }, { "epoch": 0.5183788878416589, "grad_norm": 4.523853778839111, "learning_rate": 4.787942758937373e-06, "loss": 0.2881, "step": 10450 }, { "epoch": 0.5184284934768589, "grad_norm": 5.798242092132568, "learning_rate": 4.7871563793608275e-06, "loss": 0.3618, "step": 10451 }, { "epoch": 0.5184780991120591, "grad_norm": 4.954835891723633, "learning_rate": 4.7863700050586434e-06, "loss": 0.2695, "step": 10452 }, { "epoch": 0.5185277047472593, "grad_norm": 6.571004390716553, "learning_rate": 4.785583636050306e-06, "loss": 0.2847, "step": 10453 }, { "epoch": 0.5185773103824595, "grad_norm": 5.825112819671631, "learning_rate": 4.784797272355303e-06, "loss": 0.282, "step": 10454 }, { "epoch": 0.5186269160176596, "grad_norm": 39.51676940917969, "learning_rate": 4.784010913993122e-06, "loss": 0.3855, "step": 10455 }, { "epoch": 0.5186765216528597, "grad_norm": 10.142683982849121, "learning_rate": 4.783224560983248e-06, "loss": 0.3578, "step": 10456 }, { "epoch": 0.5187261272880599, "grad_norm": 7.0430498123168945, "learning_rate": 4.782438213345167e-06, "loss": 0.2237, "step": 10457 }, { "epoch": 0.5187757329232601, "grad_norm": 15.709895133972168, "learning_rate": 4.781651871098366e-06, "loss": 0.4063, "step": 10458 }, { "epoch": 0.5188253385584602, "grad_norm": 5.156065464019775, "learning_rate": 4.7808655342623294e-06, "loss": 0.2724, "step": 10459 }, { "epoch": 0.5188749441936604, "grad_norm": 5.293581485748291, "learning_rate": 4.780079202856544e-06, "loss": 0.2804, "step": 10460 }, { "epoch": 0.5189245498288606, "grad_norm": 9.140508651733398, "learning_rate": 4.779292876900498e-06, "loss": 0.3326, "step": 10461 }, { "epoch": 0.5189741554640607, "grad_norm": 4.942971229553223, "learning_rate": 4.778506556413672e-06, "loss": 0.2595, "step": 10462 }, { "epoch": 0.5190237610992608, "grad_norm": 6.071933746337891, "learning_rate": 4.777720241415554e-06, "loss": 0.3108, "step": 10463 }, { "epoch": 0.519073366734461, "grad_norm": 13.611039161682129, "learning_rate": 4.7769339319256295e-06, "loss": 0.3746, "step": 10464 }, { "epoch": 0.5191229723696612, "grad_norm": 5.958398818969727, "learning_rate": 4.776147627963381e-06, "loss": 0.2397, "step": 10465 }, { "epoch": 0.5191725780048614, "grad_norm": 7.373706340789795, "learning_rate": 4.775361329548299e-06, "loss": 0.2939, "step": 10466 }, { "epoch": 0.5192221836400616, "grad_norm": 6.606337070465088, "learning_rate": 4.774575036699864e-06, "loss": 0.254, "step": 10467 }, { "epoch": 0.5192717892752616, "grad_norm": 5.873845100402832, "learning_rate": 4.773788749437559e-06, "loss": 0.2939, "step": 10468 }, { "epoch": 0.5193213949104618, "grad_norm": 7.900230884552002, "learning_rate": 4.773002467780874e-06, "loss": 0.2795, "step": 10469 }, { "epoch": 0.519371000545662, "grad_norm": 5.807553768157959, "learning_rate": 4.77221619174929e-06, "loss": 0.3253, "step": 10470 }, { "epoch": 0.5194206061808622, "grad_norm": 10.579700469970703, "learning_rate": 4.77142992136229e-06, "loss": 0.2587, "step": 10471 }, { "epoch": 0.5194702118160623, "grad_norm": 6.421810150146484, "learning_rate": 4.770643656639362e-06, "loss": 0.35, "step": 10472 }, { "epoch": 0.5195198174512624, "grad_norm": 11.062715530395508, "learning_rate": 4.7698573975999875e-06, "loss": 0.4155, "step": 10473 }, { "epoch": 0.5195694230864626, "grad_norm": 15.351991653442383, "learning_rate": 4.769071144263649e-06, "loss": 0.4125, "step": 10474 }, { "epoch": 0.5196190287216628, "grad_norm": 10.160426139831543, "learning_rate": 4.768284896649834e-06, "loss": 0.3449, "step": 10475 }, { "epoch": 0.519668634356863, "grad_norm": 5.826309680938721, "learning_rate": 4.767498654778022e-06, "loss": 0.2725, "step": 10476 }, { "epoch": 0.5197182399920631, "grad_norm": 4.3864641189575195, "learning_rate": 4.766712418667701e-06, "loss": 0.229, "step": 10477 }, { "epoch": 0.5197678456272633, "grad_norm": 6.309800148010254, "learning_rate": 4.765926188338352e-06, "loss": 0.3126, "step": 10478 }, { "epoch": 0.5198174512624634, "grad_norm": 3.245845079421997, "learning_rate": 4.765139963809455e-06, "loss": 0.1503, "step": 10479 }, { "epoch": 0.5198670568976635, "grad_norm": 6.943321704864502, "learning_rate": 4.764353745100499e-06, "loss": 0.2686, "step": 10480 }, { "epoch": 0.5199166625328637, "grad_norm": 5.836849689483643, "learning_rate": 4.763567532230963e-06, "loss": 0.3293, "step": 10481 }, { "epoch": 0.5199662681680639, "grad_norm": 8.07847785949707, "learning_rate": 4.762781325220328e-06, "loss": 0.324, "step": 10482 }, { "epoch": 0.5200158738032641, "grad_norm": 10.40938949584961, "learning_rate": 4.761995124088082e-06, "loss": 0.2314, "step": 10483 }, { "epoch": 0.5200654794384643, "grad_norm": 9.048964500427246, "learning_rate": 4.761208928853705e-06, "loss": 0.3268, "step": 10484 }, { "epoch": 0.5201150850736643, "grad_norm": 10.853528022766113, "learning_rate": 4.760422739536676e-06, "loss": 0.4654, "step": 10485 }, { "epoch": 0.5201646907088645, "grad_norm": 6.639156818389893, "learning_rate": 4.759636556156481e-06, "loss": 0.2539, "step": 10486 }, { "epoch": 0.5202142963440647, "grad_norm": 9.963400840759277, "learning_rate": 4.758850378732602e-06, "loss": 0.3193, "step": 10487 }, { "epoch": 0.5202639019792649, "grad_norm": 4.866506099700928, "learning_rate": 4.758064207284517e-06, "loss": 0.2308, "step": 10488 }, { "epoch": 0.520313507614465, "grad_norm": 11.981291770935059, "learning_rate": 4.757278041831713e-06, "loss": 0.3516, "step": 10489 }, { "epoch": 0.5203631132496651, "grad_norm": 8.806666374206543, "learning_rate": 4.756491882393669e-06, "loss": 0.3708, "step": 10490 }, { "epoch": 0.5204127188848653, "grad_norm": 13.623820304870605, "learning_rate": 4.7557057289898635e-06, "loss": 0.4262, "step": 10491 }, { "epoch": 0.5204623245200655, "grad_norm": 5.735342502593994, "learning_rate": 4.754919581639783e-06, "loss": 0.2601, "step": 10492 }, { "epoch": 0.5205119301552656, "grad_norm": 10.431693077087402, "learning_rate": 4.754133440362905e-06, "loss": 0.4411, "step": 10493 }, { "epoch": 0.5205615357904658, "grad_norm": 17.206722259521484, "learning_rate": 4.753347305178712e-06, "loss": 0.4385, "step": 10494 }, { "epoch": 0.520611141425666, "grad_norm": 10.32861042022705, "learning_rate": 4.752561176106685e-06, "loss": 0.2536, "step": 10495 }, { "epoch": 0.5206607470608661, "grad_norm": 7.851759910583496, "learning_rate": 4.751775053166302e-06, "loss": 0.4145, "step": 10496 }, { "epoch": 0.5207103526960662, "grad_norm": 5.625084400177002, "learning_rate": 4.750988936377048e-06, "loss": 0.3453, "step": 10497 }, { "epoch": 0.5207599583312664, "grad_norm": 5.046080589294434, "learning_rate": 4.7502028257584005e-06, "loss": 0.2696, "step": 10498 }, { "epoch": 0.5208095639664666, "grad_norm": 6.290425777435303, "learning_rate": 4.749416721329838e-06, "loss": 0.3235, "step": 10499 }, { "epoch": 0.5208591696016668, "grad_norm": 8.86238956451416, "learning_rate": 4.748630623110844e-06, "loss": 0.2664, "step": 10500 }, { "epoch": 0.520908775236867, "grad_norm": 7.927257537841797, "learning_rate": 4.747844531120897e-06, "loss": 0.3499, "step": 10501 }, { "epoch": 0.520958380872067, "grad_norm": 11.966971397399902, "learning_rate": 4.747058445379475e-06, "loss": 0.2909, "step": 10502 }, { "epoch": 0.5210079865072672, "grad_norm": 4.486312389373779, "learning_rate": 4.746272365906061e-06, "loss": 0.2246, "step": 10503 }, { "epoch": 0.5210575921424674, "grad_norm": 6.991075038909912, "learning_rate": 4.745486292720132e-06, "loss": 0.3226, "step": 10504 }, { "epoch": 0.5211071977776676, "grad_norm": 5.0695719718933105, "learning_rate": 4.744700225841166e-06, "loss": 0.2542, "step": 10505 }, { "epoch": 0.5211568034128677, "grad_norm": 7.001497268676758, "learning_rate": 4.743914165288647e-06, "loss": 0.3675, "step": 10506 }, { "epoch": 0.5212064090480678, "grad_norm": 5.122183322906494, "learning_rate": 4.743128111082049e-06, "loss": 0.2552, "step": 10507 }, { "epoch": 0.521256014683268, "grad_norm": 3.905103921890259, "learning_rate": 4.742342063240852e-06, "loss": 0.24, "step": 10508 }, { "epoch": 0.5213056203184682, "grad_norm": 7.164942741394043, "learning_rate": 4.741556021784536e-06, "loss": 0.2996, "step": 10509 }, { "epoch": 0.5213552259536683, "grad_norm": 8.40855884552002, "learning_rate": 4.74076998673258e-06, "loss": 0.3194, "step": 10510 }, { "epoch": 0.5214048315888685, "grad_norm": 4.9937896728515625, "learning_rate": 4.7399839581044595e-06, "loss": 0.278, "step": 10511 }, { "epoch": 0.5214544372240687, "grad_norm": 8.324784278869629, "learning_rate": 4.7391979359196534e-06, "loss": 0.3114, "step": 10512 }, { "epoch": 0.5215040428592688, "grad_norm": 4.273870468139648, "learning_rate": 4.738411920197641e-06, "loss": 0.2125, "step": 10513 }, { "epoch": 0.521553648494469, "grad_norm": 17.016374588012695, "learning_rate": 4.737625910957901e-06, "loss": 0.4382, "step": 10514 }, { "epoch": 0.5216032541296691, "grad_norm": 8.862078666687012, "learning_rate": 4.736839908219909e-06, "loss": 0.3056, "step": 10515 }, { "epoch": 0.5216528597648693, "grad_norm": 7.375319957733154, "learning_rate": 4.736053912003142e-06, "loss": 0.3291, "step": 10516 }, { "epoch": 0.5217024654000695, "grad_norm": 7.875176429748535, "learning_rate": 4.735267922327079e-06, "loss": 0.2445, "step": 10517 }, { "epoch": 0.5217520710352697, "grad_norm": 5.375563144683838, "learning_rate": 4.7344819392111975e-06, "loss": 0.2337, "step": 10518 }, { "epoch": 0.5218016766704697, "grad_norm": 6.398791313171387, "learning_rate": 4.733695962674971e-06, "loss": 0.2977, "step": 10519 }, { "epoch": 0.5218512823056699, "grad_norm": 3.8489274978637695, "learning_rate": 4.732909992737882e-06, "loss": 0.1418, "step": 10520 }, { "epoch": 0.5219008879408701, "grad_norm": 6.341809272766113, "learning_rate": 4.732124029419404e-06, "loss": 0.2763, "step": 10521 }, { "epoch": 0.5219504935760703, "grad_norm": 5.7610697746276855, "learning_rate": 4.731338072739012e-06, "loss": 0.2746, "step": 10522 }, { "epoch": 0.5220000992112704, "grad_norm": 6.150766372680664, "learning_rate": 4.730552122716186e-06, "loss": 0.1957, "step": 10523 }, { "epoch": 0.5220497048464705, "grad_norm": 7.774879455566406, "learning_rate": 4.729766179370399e-06, "loss": 0.2634, "step": 10524 }, { "epoch": 0.5220993104816707, "grad_norm": 5.06270694732666, "learning_rate": 4.7289802427211275e-06, "loss": 0.2833, "step": 10525 }, { "epoch": 0.5221489161168709, "grad_norm": 13.14627456665039, "learning_rate": 4.7281943127878495e-06, "loss": 0.3514, "step": 10526 }, { "epoch": 0.522198521752071, "grad_norm": 4.662899017333984, "learning_rate": 4.72740838959004e-06, "loss": 0.2581, "step": 10527 }, { "epoch": 0.5222481273872712, "grad_norm": 6.45599365234375, "learning_rate": 4.726622473147172e-06, "loss": 0.2655, "step": 10528 }, { "epoch": 0.5222977330224714, "grad_norm": 4.767107009887695, "learning_rate": 4.725836563478725e-06, "loss": 0.2648, "step": 10529 }, { "epoch": 0.5223473386576715, "grad_norm": 7.779543876647949, "learning_rate": 4.725050660604169e-06, "loss": 0.3178, "step": 10530 }, { "epoch": 0.5223969442928716, "grad_norm": 8.653056144714355, "learning_rate": 4.724264764542985e-06, "loss": 0.4244, "step": 10531 }, { "epoch": 0.5224465499280718, "grad_norm": 2.9395594596862793, "learning_rate": 4.723478875314643e-06, "loss": 0.2257, "step": 10532 }, { "epoch": 0.522496155563272, "grad_norm": 5.040225505828857, "learning_rate": 4.72269299293862e-06, "loss": 0.2171, "step": 10533 }, { "epoch": 0.5225457611984722, "grad_norm": 11.37391471862793, "learning_rate": 4.721907117434391e-06, "loss": 0.3785, "step": 10534 }, { "epoch": 0.5225953668336724, "grad_norm": 12.15206527709961, "learning_rate": 4.721121248821429e-06, "loss": 0.3492, "step": 10535 }, { "epoch": 0.5226449724688724, "grad_norm": 6.561086654663086, "learning_rate": 4.720335387119207e-06, "loss": 0.2994, "step": 10536 }, { "epoch": 0.5226945781040726, "grad_norm": 11.165656089782715, "learning_rate": 4.719549532347203e-06, "loss": 0.3954, "step": 10537 }, { "epoch": 0.5227441837392728, "grad_norm": 4.087449550628662, "learning_rate": 4.718763684524888e-06, "loss": 0.2035, "step": 10538 }, { "epoch": 0.522793789374473, "grad_norm": 6.567822456359863, "learning_rate": 4.717977843671734e-06, "loss": 0.2509, "step": 10539 }, { "epoch": 0.5228433950096731, "grad_norm": 4.98523473739624, "learning_rate": 4.717192009807219e-06, "loss": 0.1835, "step": 10540 }, { "epoch": 0.5228930006448732, "grad_norm": 4.646263122558594, "learning_rate": 4.716406182950814e-06, "loss": 0.3234, "step": 10541 }, { "epoch": 0.5229426062800734, "grad_norm": 8.1995267868042, "learning_rate": 4.715620363121989e-06, "loss": 0.3314, "step": 10542 }, { "epoch": 0.5229922119152736, "grad_norm": 9.169836044311523, "learning_rate": 4.714834550340223e-06, "loss": 0.2743, "step": 10543 }, { "epoch": 0.5230418175504737, "grad_norm": 5.391364574432373, "learning_rate": 4.714048744624986e-06, "loss": 0.2956, "step": 10544 }, { "epoch": 0.5230914231856739, "grad_norm": 8.508194923400879, "learning_rate": 4.713262945995748e-06, "loss": 0.3561, "step": 10545 }, { "epoch": 0.5231410288208741, "grad_norm": 6.245769023895264, "learning_rate": 4.712477154471986e-06, "loss": 0.2807, "step": 10546 }, { "epoch": 0.5231906344560742, "grad_norm": 8.875441551208496, "learning_rate": 4.711691370073168e-06, "loss": 0.3686, "step": 10547 }, { "epoch": 0.5232402400912743, "grad_norm": 7.7892165184021, "learning_rate": 4.710905592818771e-06, "loss": 0.4568, "step": 10548 }, { "epoch": 0.5232898457264745, "grad_norm": 9.018012046813965, "learning_rate": 4.710119822728264e-06, "loss": 0.2992, "step": 10549 }, { "epoch": 0.5233394513616747, "grad_norm": 7.477789878845215, "learning_rate": 4.709334059821116e-06, "loss": 0.2853, "step": 10550 }, { "epoch": 0.5233890569968749, "grad_norm": 9.397988319396973, "learning_rate": 4.708548304116804e-06, "loss": 0.3124, "step": 10551 }, { "epoch": 0.5234386626320751, "grad_norm": 6.571567058563232, "learning_rate": 4.707762555634797e-06, "loss": 0.2898, "step": 10552 }, { "epoch": 0.5234882682672751, "grad_norm": 6.7659759521484375, "learning_rate": 4.706976814394564e-06, "loss": 0.3549, "step": 10553 }, { "epoch": 0.5235378739024753, "grad_norm": 6.269765377044678, "learning_rate": 4.70619108041558e-06, "loss": 0.2199, "step": 10554 }, { "epoch": 0.5235874795376755, "grad_norm": 5.432462215423584, "learning_rate": 4.705405353717314e-06, "loss": 0.287, "step": 10555 }, { "epoch": 0.5236370851728757, "grad_norm": 7.759955883026123, "learning_rate": 4.7046196343192345e-06, "loss": 0.3654, "step": 10556 }, { "epoch": 0.5236866908080758, "grad_norm": 12.416790008544922, "learning_rate": 4.703833922240816e-06, "loss": 0.3467, "step": 10557 }, { "epoch": 0.5237362964432759, "grad_norm": 6.2011637687683105, "learning_rate": 4.703048217501526e-06, "loss": 0.2378, "step": 10558 }, { "epoch": 0.5237859020784761, "grad_norm": 6.139786243438721, "learning_rate": 4.702262520120836e-06, "loss": 0.3101, "step": 10559 }, { "epoch": 0.5238355077136763, "grad_norm": 5.441642761230469, "learning_rate": 4.701476830118216e-06, "loss": 0.2459, "step": 10560 }, { "epoch": 0.5238851133488764, "grad_norm": 4.3095502853393555, "learning_rate": 4.700691147513134e-06, "loss": 0.2506, "step": 10561 }, { "epoch": 0.5239347189840766, "grad_norm": 6.242241859436035, "learning_rate": 4.699905472325061e-06, "loss": 0.3237, "step": 10562 }, { "epoch": 0.5239843246192768, "grad_norm": 5.843463897705078, "learning_rate": 4.699119804573466e-06, "loss": 0.2684, "step": 10563 }, { "epoch": 0.5240339302544769, "grad_norm": 5.746583938598633, "learning_rate": 4.69833414427782e-06, "loss": 0.3161, "step": 10564 }, { "epoch": 0.524083535889677, "grad_norm": 5.887850284576416, "learning_rate": 4.6975484914575885e-06, "loss": 0.277, "step": 10565 }, { "epoch": 0.5241331415248772, "grad_norm": 10.398542404174805, "learning_rate": 4.696762846132242e-06, "loss": 0.3884, "step": 10566 }, { "epoch": 0.5241827471600774, "grad_norm": 5.981441497802734, "learning_rate": 4.69597720832125e-06, "loss": 0.3413, "step": 10567 }, { "epoch": 0.5242323527952776, "grad_norm": 5.731645584106445, "learning_rate": 4.6951915780440806e-06, "loss": 0.2627, "step": 10568 }, { "epoch": 0.5242819584304778, "grad_norm": 5.200125217437744, "learning_rate": 4.694405955320202e-06, "loss": 0.2731, "step": 10569 }, { "epoch": 0.5243315640656778, "grad_norm": 7.182010173797607, "learning_rate": 4.693620340169081e-06, "loss": 0.3059, "step": 10570 }, { "epoch": 0.524381169700878, "grad_norm": 10.828187942504883, "learning_rate": 4.692834732610188e-06, "loss": 0.4313, "step": 10571 }, { "epoch": 0.5244307753360782, "grad_norm": 5.100094795227051, "learning_rate": 4.692049132662989e-06, "loss": 0.2759, "step": 10572 }, { "epoch": 0.5244803809712784, "grad_norm": 7.995307445526123, "learning_rate": 4.69126354034695e-06, "loss": 0.2445, "step": 10573 }, { "epoch": 0.5245299866064785, "grad_norm": 7.6199846267700195, "learning_rate": 4.690477955681541e-06, "loss": 0.2978, "step": 10574 }, { "epoch": 0.5245795922416786, "grad_norm": 4.836353778839111, "learning_rate": 4.68969237868623e-06, "loss": 0.241, "step": 10575 }, { "epoch": 0.5246291978768788, "grad_norm": 4.459336757659912, "learning_rate": 4.688906809380479e-06, "loss": 0.3268, "step": 10576 }, { "epoch": 0.524678803512079, "grad_norm": 11.095070838928223, "learning_rate": 4.688121247783761e-06, "loss": 0.288, "step": 10577 }, { "epoch": 0.5247284091472791, "grad_norm": 6.6074604988098145, "learning_rate": 4.68733569391554e-06, "loss": 0.3179, "step": 10578 }, { "epoch": 0.5247780147824793, "grad_norm": 14.501882553100586, "learning_rate": 4.686550147795278e-06, "loss": 0.3081, "step": 10579 }, { "epoch": 0.5248276204176795, "grad_norm": 6.4823737144470215, "learning_rate": 4.6857646094424484e-06, "loss": 0.2745, "step": 10580 }, { "epoch": 0.5248772260528796, "grad_norm": 6.443480014801025, "learning_rate": 4.684979078876514e-06, "loss": 0.2576, "step": 10581 }, { "epoch": 0.5249268316880797, "grad_norm": 5.5720109939575195, "learning_rate": 4.684193556116938e-06, "loss": 0.2834, "step": 10582 }, { "epoch": 0.5249764373232799, "grad_norm": 4.5817975997924805, "learning_rate": 4.683408041183191e-06, "loss": 0.2471, "step": 10583 }, { "epoch": 0.5250260429584801, "grad_norm": 11.153985977172852, "learning_rate": 4.6826225340947335e-06, "loss": 0.3238, "step": 10584 }, { "epoch": 0.5250756485936803, "grad_norm": 9.81482219696045, "learning_rate": 4.681837034871035e-06, "loss": 0.4012, "step": 10585 }, { "epoch": 0.5251252542288805, "grad_norm": 4.076764106750488, "learning_rate": 4.68105154353156e-06, "loss": 0.2283, "step": 10586 }, { "epoch": 0.5251748598640805, "grad_norm": 5.746432781219482, "learning_rate": 4.680266060095769e-06, "loss": 0.2228, "step": 10587 }, { "epoch": 0.5252244654992807, "grad_norm": 9.502705574035645, "learning_rate": 4.679480584583131e-06, "loss": 0.3239, "step": 10588 }, { "epoch": 0.5252740711344809, "grad_norm": 3.9870054721832275, "learning_rate": 4.678695117013111e-06, "loss": 0.2021, "step": 10589 }, { "epoch": 0.5253236767696811, "grad_norm": 7.505858898162842, "learning_rate": 4.6779096574051675e-06, "loss": 0.3358, "step": 10590 }, { "epoch": 0.5253732824048812, "grad_norm": 6.6169304847717285, "learning_rate": 4.6771242057787715e-06, "loss": 0.2508, "step": 10591 }, { "epoch": 0.5254228880400813, "grad_norm": 7.715702533721924, "learning_rate": 4.6763387621533835e-06, "loss": 0.3087, "step": 10592 }, { "epoch": 0.5254724936752815, "grad_norm": 13.982013702392578, "learning_rate": 4.675553326548465e-06, "loss": 0.4477, "step": 10593 }, { "epoch": 0.5255220993104817, "grad_norm": 12.810423851013184, "learning_rate": 4.674767898983485e-06, "loss": 0.2945, "step": 10594 }, { "epoch": 0.5255717049456818, "grad_norm": 10.77485179901123, "learning_rate": 4.673982479477902e-06, "loss": 0.3871, "step": 10595 }, { "epoch": 0.525621310580882, "grad_norm": 15.71967887878418, "learning_rate": 4.673197068051179e-06, "loss": 0.3153, "step": 10596 }, { "epoch": 0.5256709162160822, "grad_norm": 5.645265579223633, "learning_rate": 4.672411664722783e-06, "loss": 0.2968, "step": 10597 }, { "epoch": 0.5257205218512823, "grad_norm": 6.810037612915039, "learning_rate": 4.6716262695121735e-06, "loss": 0.3247, "step": 10598 }, { "epoch": 0.5257701274864824, "grad_norm": 4.9627227783203125, "learning_rate": 4.670840882438812e-06, "loss": 0.2786, "step": 10599 }, { "epoch": 0.5258197331216826, "grad_norm": 5.242947101593018, "learning_rate": 4.670055503522165e-06, "loss": 0.3058, "step": 10600 }, { "epoch": 0.5258693387568828, "grad_norm": 5.746077537536621, "learning_rate": 4.669270132781689e-06, "loss": 0.2322, "step": 10601 }, { "epoch": 0.525918944392083, "grad_norm": 4.644970417022705, "learning_rate": 4.668484770236851e-06, "loss": 0.2116, "step": 10602 }, { "epoch": 0.5259685500272832, "grad_norm": 7.13942289352417, "learning_rate": 4.66769941590711e-06, "loss": 0.255, "step": 10603 }, { "epoch": 0.5260181556624832, "grad_norm": 7.906946659088135, "learning_rate": 4.666914069811926e-06, "loss": 0.345, "step": 10604 }, { "epoch": 0.5260677612976834, "grad_norm": 6.650978088378906, "learning_rate": 4.6661287319707635e-06, "loss": 0.3053, "step": 10605 }, { "epoch": 0.5261173669328836, "grad_norm": 7.480732440948486, "learning_rate": 4.665343402403083e-06, "loss": 0.3111, "step": 10606 }, { "epoch": 0.5261669725680838, "grad_norm": 7.642515659332275, "learning_rate": 4.664558081128342e-06, "loss": 0.2795, "step": 10607 }, { "epoch": 0.5262165782032839, "grad_norm": 5.245926856994629, "learning_rate": 4.663772768166005e-06, "loss": 0.3647, "step": 10608 }, { "epoch": 0.526266183838484, "grad_norm": 7.846590042114258, "learning_rate": 4.662987463535532e-06, "loss": 0.3143, "step": 10609 }, { "epoch": 0.5263157894736842, "grad_norm": 15.716629028320312, "learning_rate": 4.662202167256379e-06, "loss": 0.3612, "step": 10610 }, { "epoch": 0.5263653951088844, "grad_norm": 6.584947109222412, "learning_rate": 4.661416879348012e-06, "loss": 0.3273, "step": 10611 }, { "epoch": 0.5264150007440845, "grad_norm": 8.859326362609863, "learning_rate": 4.660631599829887e-06, "loss": 0.2695, "step": 10612 }, { "epoch": 0.5264646063792847, "grad_norm": 5.772480487823486, "learning_rate": 4.659846328721464e-06, "loss": 0.2615, "step": 10613 }, { "epoch": 0.5265142120144849, "grad_norm": 4.854238033294678, "learning_rate": 4.659061066042203e-06, "loss": 0.1841, "step": 10614 }, { "epoch": 0.526563817649685, "grad_norm": 15.978251457214355, "learning_rate": 4.658275811811563e-06, "loss": 0.5613, "step": 10615 }, { "epoch": 0.5266134232848851, "grad_norm": 5.496284008026123, "learning_rate": 4.657490566049003e-06, "loss": 0.2695, "step": 10616 }, { "epoch": 0.5266630289200853, "grad_norm": 4.707395553588867, "learning_rate": 4.656705328773981e-06, "loss": 0.2766, "step": 10617 }, { "epoch": 0.5267126345552855, "grad_norm": 13.338929176330566, "learning_rate": 4.655920100005957e-06, "loss": 0.3143, "step": 10618 }, { "epoch": 0.5267622401904857, "grad_norm": 10.417559623718262, "learning_rate": 4.655134879764389e-06, "loss": 0.3097, "step": 10619 }, { "epoch": 0.5268118458256857, "grad_norm": 9.569541931152344, "learning_rate": 4.654349668068734e-06, "loss": 0.3296, "step": 10620 }, { "epoch": 0.5268614514608859, "grad_norm": 7.300375461578369, "learning_rate": 4.65356446493845e-06, "loss": 0.3528, "step": 10621 }, { "epoch": 0.5269110570960861, "grad_norm": 11.236503601074219, "learning_rate": 4.652779270392997e-06, "loss": 0.3616, "step": 10622 }, { "epoch": 0.5269606627312863, "grad_norm": 5.780148983001709, "learning_rate": 4.65199408445183e-06, "loss": 0.3631, "step": 10623 }, { "epoch": 0.5270102683664865, "grad_norm": 5.030618190765381, "learning_rate": 4.651208907134406e-06, "loss": 0.3497, "step": 10624 }, { "epoch": 0.5270598740016866, "grad_norm": 8.501974105834961, "learning_rate": 4.6504237384601844e-06, "loss": 0.2275, "step": 10625 }, { "epoch": 0.5271094796368867, "grad_norm": 8.108919143676758, "learning_rate": 4.64963857844862e-06, "loss": 0.4595, "step": 10626 }, { "epoch": 0.5271590852720869, "grad_norm": 5.513978958129883, "learning_rate": 4.64885342711917e-06, "loss": 0.2914, "step": 10627 }, { "epoch": 0.5272086909072871, "grad_norm": 7.558984279632568, "learning_rate": 4.648068284491291e-06, "loss": 0.2907, "step": 10628 }, { "epoch": 0.5272582965424872, "grad_norm": 4.2203049659729, "learning_rate": 4.6472831505844405e-06, "loss": 0.2751, "step": 10629 }, { "epoch": 0.5273079021776874, "grad_norm": 8.013784408569336, "learning_rate": 4.64649802541807e-06, "loss": 0.2912, "step": 10630 }, { "epoch": 0.5273575078128876, "grad_norm": 5.966136932373047, "learning_rate": 4.645712909011641e-06, "loss": 0.2571, "step": 10631 }, { "epoch": 0.5274071134480877, "grad_norm": 8.103470802307129, "learning_rate": 4.644927801384607e-06, "loss": 0.3966, "step": 10632 }, { "epoch": 0.5274567190832878, "grad_norm": 5.366774559020996, "learning_rate": 4.64414270255642e-06, "loss": 0.3449, "step": 10633 }, { "epoch": 0.527506324718488, "grad_norm": 4.8904571533203125, "learning_rate": 4.64335761254654e-06, "loss": 0.1793, "step": 10634 }, { "epoch": 0.5275559303536882, "grad_norm": 7.78325891494751, "learning_rate": 4.642572531374419e-06, "loss": 0.3289, "step": 10635 }, { "epoch": 0.5276055359888884, "grad_norm": 7.708919525146484, "learning_rate": 4.6417874590595105e-06, "loss": 0.3091, "step": 10636 }, { "epoch": 0.5276551416240884, "grad_norm": 8.479117393493652, "learning_rate": 4.641002395621274e-06, "loss": 0.3572, "step": 10637 }, { "epoch": 0.5277047472592886, "grad_norm": 6.762635707855225, "learning_rate": 4.640217341079157e-06, "loss": 0.2785, "step": 10638 }, { "epoch": 0.5277543528944888, "grad_norm": 5.822909832000732, "learning_rate": 4.63943229545262e-06, "loss": 0.243, "step": 10639 }, { "epoch": 0.527803958529689, "grad_norm": 6.8337907791137695, "learning_rate": 4.638647258761113e-06, "loss": 0.3903, "step": 10640 }, { "epoch": 0.5278535641648892, "grad_norm": 8.902597427368164, "learning_rate": 4.63786223102409e-06, "loss": 0.4411, "step": 10641 }, { "epoch": 0.5279031698000893, "grad_norm": 8.850556373596191, "learning_rate": 4.6370772122610055e-06, "loss": 0.3282, "step": 10642 }, { "epoch": 0.5279527754352894, "grad_norm": 5.063500881195068, "learning_rate": 4.6362922024913116e-06, "loss": 0.276, "step": 10643 }, { "epoch": 0.5280023810704896, "grad_norm": 4.47258186340332, "learning_rate": 4.63550720173446e-06, "loss": 0.2684, "step": 10644 }, { "epoch": 0.5280519867056898, "grad_norm": 6.841836929321289, "learning_rate": 4.634722210009907e-06, "loss": 0.2847, "step": 10645 }, { "epoch": 0.5281015923408899, "grad_norm": 8.276463508605957, "learning_rate": 4.633937227337102e-06, "loss": 0.5305, "step": 10646 }, { "epoch": 0.5281511979760901, "grad_norm": 12.564784049987793, "learning_rate": 4.633152253735497e-06, "loss": 0.4046, "step": 10647 }, { "epoch": 0.5282008036112903, "grad_norm": 7.762738227844238, "learning_rate": 4.632367289224546e-06, "loss": 0.3706, "step": 10648 }, { "epoch": 0.5282504092464904, "grad_norm": 9.309874534606934, "learning_rate": 4.631582333823701e-06, "loss": 0.3204, "step": 10649 }, { "epoch": 0.5283000148816905, "grad_norm": 8.885226249694824, "learning_rate": 4.63079738755241e-06, "loss": 0.3045, "step": 10650 }, { "epoch": 0.5283496205168907, "grad_norm": 4.90794038772583, "learning_rate": 4.630012450430128e-06, "loss": 0.254, "step": 10651 }, { "epoch": 0.5283992261520909, "grad_norm": 6.190596580505371, "learning_rate": 4.629227522476306e-06, "loss": 0.323, "step": 10652 }, { "epoch": 0.5284488317872911, "grad_norm": 6.992832183837891, "learning_rate": 4.628442603710391e-06, "loss": 0.3451, "step": 10653 }, { "epoch": 0.5284984374224911, "grad_norm": 6.703396320343018, "learning_rate": 4.627657694151839e-06, "loss": 0.2606, "step": 10654 }, { "epoch": 0.5285480430576913, "grad_norm": 9.012629508972168, "learning_rate": 4.626872793820096e-06, "loss": 0.3606, "step": 10655 }, { "epoch": 0.5285976486928915, "grad_norm": 5.933569431304932, "learning_rate": 4.626087902734615e-06, "loss": 0.2567, "step": 10656 }, { "epoch": 0.5286472543280917, "grad_norm": 7.6901535987854, "learning_rate": 4.6253030209148455e-06, "loss": 0.2986, "step": 10657 }, { "epoch": 0.5286968599632919, "grad_norm": 8.134573936462402, "learning_rate": 4.6245181483802345e-06, "loss": 0.3386, "step": 10658 }, { "epoch": 0.528746465598492, "grad_norm": 5.2861857414245605, "learning_rate": 4.6237332851502355e-06, "loss": 0.2846, "step": 10659 }, { "epoch": 0.5287960712336921, "grad_norm": 6.475879669189453, "learning_rate": 4.6229484312442964e-06, "loss": 0.2346, "step": 10660 }, { "epoch": 0.5288456768688923, "grad_norm": 6.057240962982178, "learning_rate": 4.622163586681863e-06, "loss": 0.2967, "step": 10661 }, { "epoch": 0.5288952825040925, "grad_norm": 6.792957305908203, "learning_rate": 4.621378751482389e-06, "loss": 0.3109, "step": 10662 }, { "epoch": 0.5289448881392926, "grad_norm": 7.5419769287109375, "learning_rate": 4.620593925665321e-06, "loss": 0.303, "step": 10663 }, { "epoch": 0.5289944937744928, "grad_norm": 6.676485061645508, "learning_rate": 4.619809109250105e-06, "loss": 0.3197, "step": 10664 }, { "epoch": 0.529044099409693, "grad_norm": 4.903591632843018, "learning_rate": 4.619024302256193e-06, "loss": 0.3043, "step": 10665 }, { "epoch": 0.5290937050448931, "grad_norm": 8.429607391357422, "learning_rate": 4.618239504703031e-06, "loss": 0.2669, "step": 10666 }, { "epoch": 0.5291433106800932, "grad_norm": 6.587955951690674, "learning_rate": 4.617454716610066e-06, "loss": 0.2776, "step": 10667 }, { "epoch": 0.5291929163152934, "grad_norm": 8.485993385314941, "learning_rate": 4.616669937996748e-06, "loss": 0.3096, "step": 10668 }, { "epoch": 0.5292425219504936, "grad_norm": 4.503844261169434, "learning_rate": 4.61588516888252e-06, "loss": 0.2417, "step": 10669 }, { "epoch": 0.5292921275856938, "grad_norm": 6.243682384490967, "learning_rate": 4.615100409286833e-06, "loss": 0.2985, "step": 10670 }, { "epoch": 0.5293417332208938, "grad_norm": 8.762250900268555, "learning_rate": 4.614315659229131e-06, "loss": 0.2798, "step": 10671 }, { "epoch": 0.529391338856094, "grad_norm": 4.97249174118042, "learning_rate": 4.6135309187288605e-06, "loss": 0.2126, "step": 10672 }, { "epoch": 0.5294409444912942, "grad_norm": 9.158804893493652, "learning_rate": 4.6127461878054706e-06, "loss": 0.2933, "step": 10673 }, { "epoch": 0.5294905501264944, "grad_norm": 10.585591316223145, "learning_rate": 4.611961466478404e-06, "loss": 0.3335, "step": 10674 }, { "epoch": 0.5295401557616946, "grad_norm": 7.225994110107422, "learning_rate": 4.611176754767108e-06, "loss": 0.3631, "step": 10675 }, { "epoch": 0.5295897613968947, "grad_norm": 5.855802059173584, "learning_rate": 4.610392052691029e-06, "loss": 0.311, "step": 10676 }, { "epoch": 0.5296393670320948, "grad_norm": 11.129549026489258, "learning_rate": 4.609607360269611e-06, "loss": 0.4986, "step": 10677 }, { "epoch": 0.529688972667295, "grad_norm": 6.594967842102051, "learning_rate": 4.608822677522298e-06, "loss": 0.2914, "step": 10678 }, { "epoch": 0.5297385783024952, "grad_norm": 5.292237281799316, "learning_rate": 4.608038004468536e-06, "loss": 0.2426, "step": 10679 }, { "epoch": 0.5297881839376953, "grad_norm": 4.787255764007568, "learning_rate": 4.607253341127771e-06, "loss": 0.2825, "step": 10680 }, { "epoch": 0.5298377895728955, "grad_norm": 7.6102800369262695, "learning_rate": 4.606468687519445e-06, "loss": 0.2444, "step": 10681 }, { "epoch": 0.5298873952080957, "grad_norm": 16.17925453186035, "learning_rate": 4.605684043663002e-06, "loss": 0.4711, "step": 10682 }, { "epoch": 0.5299370008432958, "grad_norm": 8.588025093078613, "learning_rate": 4.6048994095778895e-06, "loss": 0.2241, "step": 10683 }, { "epoch": 0.5299866064784959, "grad_norm": 4.922279357910156, "learning_rate": 4.6041147852835445e-06, "loss": 0.3056, "step": 10684 }, { "epoch": 0.5300362121136961, "grad_norm": 7.256928443908691, "learning_rate": 4.603330170799417e-06, "loss": 0.1874, "step": 10685 }, { "epoch": 0.5300858177488963, "grad_norm": 9.120828628540039, "learning_rate": 4.602545566144948e-06, "loss": 0.3467, "step": 10686 }, { "epoch": 0.5301354233840965, "grad_norm": 5.2639994621276855, "learning_rate": 4.601760971339577e-06, "loss": 0.2349, "step": 10687 }, { "epoch": 0.5301850290192965, "grad_norm": 10.72339153289795, "learning_rate": 4.600976386402752e-06, "loss": 0.2505, "step": 10688 }, { "epoch": 0.5302346346544967, "grad_norm": 5.642702579498291, "learning_rate": 4.600191811353911e-06, "loss": 0.3286, "step": 10689 }, { "epoch": 0.5302842402896969, "grad_norm": 5.424497127532959, "learning_rate": 4.599407246212497e-06, "loss": 0.3156, "step": 10690 }, { "epoch": 0.5303338459248971, "grad_norm": 6.092266082763672, "learning_rate": 4.598622690997953e-06, "loss": 0.2049, "step": 10691 }, { "epoch": 0.5303834515600973, "grad_norm": 5.129354476928711, "learning_rate": 4.597838145729721e-06, "loss": 0.2625, "step": 10692 }, { "epoch": 0.5304330571952974, "grad_norm": 10.55755615234375, "learning_rate": 4.597053610427241e-06, "loss": 0.3449, "step": 10693 }, { "epoch": 0.5304826628304975, "grad_norm": 9.58154010772705, "learning_rate": 4.596269085109957e-06, "loss": 0.377, "step": 10694 }, { "epoch": 0.5305322684656977, "grad_norm": 5.0415191650390625, "learning_rate": 4.595484569797304e-06, "loss": 0.2598, "step": 10695 }, { "epoch": 0.5305818741008979, "grad_norm": 8.640406608581543, "learning_rate": 4.59470006450873e-06, "loss": 0.3315, "step": 10696 }, { "epoch": 0.530631479736098, "grad_norm": 15.65603256225586, "learning_rate": 4.5939155692636706e-06, "loss": 0.3363, "step": 10697 }, { "epoch": 0.5306810853712982, "grad_norm": 16.511638641357422, "learning_rate": 4.593131084081565e-06, "loss": 0.334, "step": 10698 }, { "epoch": 0.5307306910064984, "grad_norm": 6.411453723907471, "learning_rate": 4.592346608981858e-06, "loss": 0.2888, "step": 10699 }, { "epoch": 0.5307802966416985, "grad_norm": 5.069234371185303, "learning_rate": 4.591562143983986e-06, "loss": 0.3946, "step": 10700 }, { "epoch": 0.5308299022768986, "grad_norm": 4.823694705963135, "learning_rate": 4.590777689107386e-06, "loss": 0.1753, "step": 10701 }, { "epoch": 0.5308795079120988, "grad_norm": 8.019407272338867, "learning_rate": 4.589993244371503e-06, "loss": 0.4319, "step": 10702 }, { "epoch": 0.530929113547299, "grad_norm": 8.021768569946289, "learning_rate": 4.589208809795773e-06, "loss": 0.3135, "step": 10703 }, { "epoch": 0.5309787191824992, "grad_norm": 6.404660701751709, "learning_rate": 4.588424385399632e-06, "loss": 0.2872, "step": 10704 }, { "epoch": 0.5310283248176992, "grad_norm": 10.66586971282959, "learning_rate": 4.587639971202523e-06, "loss": 0.2525, "step": 10705 }, { "epoch": 0.5310779304528994, "grad_norm": 9.577766418457031, "learning_rate": 4.586855567223882e-06, "loss": 0.3484, "step": 10706 }, { "epoch": 0.5311275360880996, "grad_norm": 8.758386611938477, "learning_rate": 4.5860711734831454e-06, "loss": 0.3766, "step": 10707 }, { "epoch": 0.5311771417232998, "grad_norm": 6.568032264709473, "learning_rate": 4.585286789999754e-06, "loss": 0.3041, "step": 10708 }, { "epoch": 0.5312267473585, "grad_norm": 5.004272937774658, "learning_rate": 4.5845024167931415e-06, "loss": 0.2995, "step": 10709 }, { "epoch": 0.5312763529937001, "grad_norm": 6.38985013961792, "learning_rate": 4.583718053882749e-06, "loss": 0.3157, "step": 10710 }, { "epoch": 0.5313259586289002, "grad_norm": 9.268235206604004, "learning_rate": 4.5829337012880115e-06, "loss": 0.3327, "step": 10711 }, { "epoch": 0.5313755642641004, "grad_norm": 5.875737190246582, "learning_rate": 4.5821493590283635e-06, "loss": 0.2069, "step": 10712 }, { "epoch": 0.5314251698993006, "grad_norm": 5.756457805633545, "learning_rate": 4.581365027123246e-06, "loss": 0.2759, "step": 10713 }, { "epoch": 0.5314747755345007, "grad_norm": 8.5639066696167, "learning_rate": 4.580580705592092e-06, "loss": 0.3453, "step": 10714 }, { "epoch": 0.5315243811697009, "grad_norm": 7.1085309982299805, "learning_rate": 4.579796394454335e-06, "loss": 0.3198, "step": 10715 }, { "epoch": 0.5315739868049011, "grad_norm": 6.998960494995117, "learning_rate": 4.579012093729417e-06, "loss": 0.2994, "step": 10716 }, { "epoch": 0.5316235924401012, "grad_norm": 6.703194618225098, "learning_rate": 4.578227803436768e-06, "loss": 0.2147, "step": 10717 }, { "epoch": 0.5316731980753013, "grad_norm": 6.791098117828369, "learning_rate": 4.577443523595824e-06, "loss": 0.2605, "step": 10718 }, { "epoch": 0.5317228037105015, "grad_norm": 7.68022346496582, "learning_rate": 4.576659254226023e-06, "loss": 0.3246, "step": 10719 }, { "epoch": 0.5317724093457017, "grad_norm": 11.55704116821289, "learning_rate": 4.575874995346796e-06, "loss": 0.2822, "step": 10720 }, { "epoch": 0.5318220149809019, "grad_norm": 11.907225608825684, "learning_rate": 4.575090746977577e-06, "loss": 0.2958, "step": 10721 }, { "epoch": 0.5318716206161019, "grad_norm": 15.94967269897461, "learning_rate": 4.574306509137804e-06, "loss": 0.454, "step": 10722 }, { "epoch": 0.5319212262513021, "grad_norm": 3.5309503078460693, "learning_rate": 4.573522281846907e-06, "loss": 0.2277, "step": 10723 }, { "epoch": 0.5319708318865023, "grad_norm": 5.126935005187988, "learning_rate": 4.5727380651243205e-06, "loss": 0.276, "step": 10724 }, { "epoch": 0.5320204375217025, "grad_norm": 11.558815956115723, "learning_rate": 4.571953858989479e-06, "loss": 0.2692, "step": 10725 }, { "epoch": 0.5320700431569027, "grad_norm": 7.227250099182129, "learning_rate": 4.571169663461813e-06, "loss": 0.2544, "step": 10726 }, { "epoch": 0.5321196487921028, "grad_norm": 5.113390922546387, "learning_rate": 4.570385478560757e-06, "loss": 0.2571, "step": 10727 }, { "epoch": 0.5321692544273029, "grad_norm": 14.763543128967285, "learning_rate": 4.569601304305744e-06, "loss": 0.4433, "step": 10728 }, { "epoch": 0.5322188600625031, "grad_norm": 6.510784149169922, "learning_rate": 4.5688171407162054e-06, "loss": 0.2526, "step": 10729 }, { "epoch": 0.5322684656977033, "grad_norm": 6.512320041656494, "learning_rate": 4.568032987811573e-06, "loss": 0.313, "step": 10730 }, { "epoch": 0.5323180713329034, "grad_norm": 10.160172462463379, "learning_rate": 4.567248845611278e-06, "loss": 0.3739, "step": 10731 }, { "epoch": 0.5323676769681036, "grad_norm": 8.375785827636719, "learning_rate": 4.566464714134753e-06, "loss": 0.4236, "step": 10732 }, { "epoch": 0.5324172826033038, "grad_norm": 7.606430530548096, "learning_rate": 4.565680593401428e-06, "loss": 0.2873, "step": 10733 }, { "epoch": 0.5324668882385039, "grad_norm": 7.112310886383057, "learning_rate": 4.564896483430735e-06, "loss": 0.3493, "step": 10734 }, { "epoch": 0.532516493873704, "grad_norm": 7.950531482696533, "learning_rate": 4.564112384242103e-06, "loss": 0.2522, "step": 10735 }, { "epoch": 0.5325660995089042, "grad_norm": 7.420924186706543, "learning_rate": 4.563328295854964e-06, "loss": 0.3539, "step": 10736 }, { "epoch": 0.5326157051441044, "grad_norm": 7.651327133178711, "learning_rate": 4.562544218288748e-06, "loss": 0.269, "step": 10737 }, { "epoch": 0.5326653107793046, "grad_norm": 7.7921833992004395, "learning_rate": 4.561760151562882e-06, "loss": 0.3078, "step": 10738 }, { "epoch": 0.5327149164145046, "grad_norm": 6.983124256134033, "learning_rate": 4.5609760956968e-06, "loss": 0.3127, "step": 10739 }, { "epoch": 0.5327645220497048, "grad_norm": 10.251583099365234, "learning_rate": 4.560192050709929e-06, "loss": 0.3804, "step": 10740 }, { "epoch": 0.532814127684905, "grad_norm": 5.800336837768555, "learning_rate": 4.559408016621695e-06, "loss": 0.317, "step": 10741 }, { "epoch": 0.5328637333201052, "grad_norm": 7.418840408325195, "learning_rate": 4.558623993451533e-06, "loss": 0.3073, "step": 10742 }, { "epoch": 0.5329133389553053, "grad_norm": 9.639972686767578, "learning_rate": 4.557839981218865e-06, "loss": 0.3948, "step": 10743 }, { "epoch": 0.5329629445905055, "grad_norm": 12.186348915100098, "learning_rate": 4.557055979943125e-06, "loss": 0.3228, "step": 10744 }, { "epoch": 0.5330125502257056, "grad_norm": 6.319822311401367, "learning_rate": 4.556271989643738e-06, "loss": 0.3661, "step": 10745 }, { "epoch": 0.5330621558609058, "grad_norm": 6.796303749084473, "learning_rate": 4.55548801034013e-06, "loss": 0.343, "step": 10746 }, { "epoch": 0.533111761496106, "grad_norm": 6.9757771492004395, "learning_rate": 4.5547040420517315e-06, "loss": 0.2635, "step": 10747 }, { "epoch": 0.5331613671313061, "grad_norm": 20.1612548828125, "learning_rate": 4.553920084797969e-06, "loss": 0.3219, "step": 10748 }, { "epoch": 0.5332109727665063, "grad_norm": 7.304527759552002, "learning_rate": 4.553136138598265e-06, "loss": 0.3224, "step": 10749 }, { "epoch": 0.5332605784017065, "grad_norm": 15.2355375289917, "learning_rate": 4.552352203472053e-06, "loss": 0.3396, "step": 10750 }, { "epoch": 0.5333101840369066, "grad_norm": 6.112907886505127, "learning_rate": 4.551568279438756e-06, "loss": 0.3819, "step": 10751 }, { "epoch": 0.5333597896721067, "grad_norm": 7.36737585067749, "learning_rate": 4.550784366517797e-06, "loss": 0.263, "step": 10752 }, { "epoch": 0.5334093953073069, "grad_norm": 5.977711200714111, "learning_rate": 4.550000464728608e-06, "loss": 0.3369, "step": 10753 }, { "epoch": 0.5334590009425071, "grad_norm": 2.960876941680908, "learning_rate": 4.54921657409061e-06, "loss": 0.2143, "step": 10754 }, { "epoch": 0.5335086065777073, "grad_norm": 12.218278884887695, "learning_rate": 4.548432694623228e-06, "loss": 0.3814, "step": 10755 }, { "epoch": 0.5335582122129073, "grad_norm": 5.129814147949219, "learning_rate": 4.5476488263458895e-06, "loss": 0.285, "step": 10756 }, { "epoch": 0.5336078178481075, "grad_norm": 8.331990242004395, "learning_rate": 4.546864969278017e-06, "loss": 0.4022, "step": 10757 }, { "epoch": 0.5336574234833077, "grad_norm": 4.303906440734863, "learning_rate": 4.546081123439035e-06, "loss": 0.3313, "step": 10758 }, { "epoch": 0.5337070291185079, "grad_norm": 4.774893760681152, "learning_rate": 4.545297288848369e-06, "loss": 0.2391, "step": 10759 }, { "epoch": 0.533756634753708, "grad_norm": 10.580562591552734, "learning_rate": 4.544513465525443e-06, "loss": 0.2431, "step": 10760 }, { "epoch": 0.5338062403889082, "grad_norm": 8.79404354095459, "learning_rate": 4.543729653489677e-06, "loss": 0.368, "step": 10761 }, { "epoch": 0.5338558460241083, "grad_norm": 4.513195514678955, "learning_rate": 4.5429458527604976e-06, "loss": 0.2567, "step": 10762 }, { "epoch": 0.5339054516593085, "grad_norm": 7.2688140869140625, "learning_rate": 4.542162063357325e-06, "loss": 0.3232, "step": 10763 }, { "epoch": 0.5339550572945087, "grad_norm": 5.366124629974365, "learning_rate": 4.5413782852995855e-06, "loss": 0.2837, "step": 10764 }, { "epoch": 0.5340046629297088, "grad_norm": 6.809199333190918, "learning_rate": 4.5405945186067e-06, "loss": 0.3387, "step": 10765 }, { "epoch": 0.534054268564909, "grad_norm": 6.763419151306152, "learning_rate": 4.539810763298088e-06, "loss": 0.2067, "step": 10766 }, { "epoch": 0.5341038742001092, "grad_norm": 6.505075454711914, "learning_rate": 4.5390270193931765e-06, "loss": 0.2977, "step": 10767 }, { "epoch": 0.5341534798353093, "grad_norm": 6.619537353515625, "learning_rate": 4.538243286911383e-06, "loss": 0.3115, "step": 10768 }, { "epoch": 0.5342030854705094, "grad_norm": 5.234086990356445, "learning_rate": 4.537459565872127e-06, "loss": 0.2892, "step": 10769 }, { "epoch": 0.5342526911057096, "grad_norm": 6.619757652282715, "learning_rate": 4.536675856294835e-06, "loss": 0.3333, "step": 10770 }, { "epoch": 0.5343022967409098, "grad_norm": 9.198715209960938, "learning_rate": 4.5358921581989255e-06, "loss": 0.4116, "step": 10771 }, { "epoch": 0.53435190237611, "grad_norm": 8.396726608276367, "learning_rate": 4.535108471603815e-06, "loss": 0.2809, "step": 10772 }, { "epoch": 0.53440150801131, "grad_norm": 10.918152809143066, "learning_rate": 4.534324796528929e-06, "loss": 0.3029, "step": 10773 }, { "epoch": 0.5344511136465102, "grad_norm": 22.50365447998047, "learning_rate": 4.533541132993686e-06, "loss": 0.3619, "step": 10774 }, { "epoch": 0.5345007192817104, "grad_norm": 6.857941627502441, "learning_rate": 4.5327574810175025e-06, "loss": 0.3549, "step": 10775 }, { "epoch": 0.5345503249169106, "grad_norm": 7.84494686126709, "learning_rate": 4.531973840619802e-06, "loss": 0.3797, "step": 10776 }, { "epoch": 0.5345999305521107, "grad_norm": 4.540009498596191, "learning_rate": 4.53119021182e-06, "loss": 0.2981, "step": 10777 }, { "epoch": 0.5346495361873109, "grad_norm": 6.306864261627197, "learning_rate": 4.530406594637517e-06, "loss": 0.3142, "step": 10778 }, { "epoch": 0.534699141822511, "grad_norm": 5.740719318389893, "learning_rate": 4.529622989091772e-06, "loss": 0.3324, "step": 10779 }, { "epoch": 0.5347487474577112, "grad_norm": 8.68764877319336, "learning_rate": 4.52883939520218e-06, "loss": 0.4124, "step": 10780 }, { "epoch": 0.5347983530929113, "grad_norm": 9.536020278930664, "learning_rate": 4.528055812988162e-06, "loss": 0.3645, "step": 10781 }, { "epoch": 0.5348479587281115, "grad_norm": 6.528567314147949, "learning_rate": 4.527272242469135e-06, "loss": 0.2946, "step": 10782 }, { "epoch": 0.5348975643633117, "grad_norm": 6.669495105743408, "learning_rate": 4.526488683664515e-06, "loss": 0.3554, "step": 10783 }, { "epoch": 0.5349471699985119, "grad_norm": 14.337128639221191, "learning_rate": 4.525705136593721e-06, "loss": 0.406, "step": 10784 }, { "epoch": 0.534996775633712, "grad_norm": 5.419307231903076, "learning_rate": 4.524921601276166e-06, "loss": 0.2729, "step": 10785 }, { "epoch": 0.5350463812689121, "grad_norm": 5.1836137771606445, "learning_rate": 4.52413807773127e-06, "loss": 0.2857, "step": 10786 }, { "epoch": 0.5350959869041123, "grad_norm": 4.91018533706665, "learning_rate": 4.523354565978447e-06, "loss": 0.3334, "step": 10787 }, { "epoch": 0.5351455925393125, "grad_norm": 13.764915466308594, "learning_rate": 4.522571066037115e-06, "loss": 0.3843, "step": 10788 }, { "epoch": 0.5351951981745127, "grad_norm": 3.3844919204711914, "learning_rate": 4.521787577926686e-06, "loss": 0.2484, "step": 10789 }, { "epoch": 0.5352448038097127, "grad_norm": 6.71306848526001, "learning_rate": 4.521004101666578e-06, "loss": 0.2786, "step": 10790 }, { "epoch": 0.5352944094449129, "grad_norm": 3.987978219985962, "learning_rate": 4.520220637276206e-06, "loss": 0.2475, "step": 10791 }, { "epoch": 0.5353440150801131, "grad_norm": 5.716221809387207, "learning_rate": 4.519437184774981e-06, "loss": 0.2682, "step": 10792 }, { "epoch": 0.5353936207153133, "grad_norm": 8.56722640991211, "learning_rate": 4.5186537441823225e-06, "loss": 0.3417, "step": 10793 }, { "epoch": 0.5354432263505134, "grad_norm": 7.184267997741699, "learning_rate": 4.517870315517642e-06, "loss": 0.1998, "step": 10794 }, { "epoch": 0.5354928319857136, "grad_norm": 9.200834274291992, "learning_rate": 4.5170868988003505e-06, "loss": 0.4012, "step": 10795 }, { "epoch": 0.5355424376209137, "grad_norm": 7.991308212280273, "learning_rate": 4.516303494049867e-06, "loss": 0.2974, "step": 10796 }, { "epoch": 0.5355920432561139, "grad_norm": 7.952462673187256, "learning_rate": 4.5155201012855985e-06, "loss": 0.2417, "step": 10797 }, { "epoch": 0.535641648891314, "grad_norm": 10.974203109741211, "learning_rate": 4.514736720526964e-06, "loss": 0.4475, "step": 10798 }, { "epoch": 0.5356912545265142, "grad_norm": 8.035144805908203, "learning_rate": 4.513953351793373e-06, "loss": 0.3906, "step": 10799 }, { "epoch": 0.5357408601617144, "grad_norm": 8.0643949508667, "learning_rate": 4.513169995104236e-06, "loss": 0.2719, "step": 10800 }, { "epoch": 0.5357904657969146, "grad_norm": 6.405031204223633, "learning_rate": 4.512386650478968e-06, "loss": 0.314, "step": 10801 }, { "epoch": 0.5358400714321147, "grad_norm": 4.250925064086914, "learning_rate": 4.5116033179369804e-06, "loss": 0.2245, "step": 10802 }, { "epoch": 0.5358896770673148, "grad_norm": 6.72841215133667, "learning_rate": 4.510819997497681e-06, "loss": 0.23, "step": 10803 }, { "epoch": 0.535939282702515, "grad_norm": 5.224693298339844, "learning_rate": 4.5100366891804845e-06, "loss": 0.2617, "step": 10804 }, { "epoch": 0.5359888883377152, "grad_norm": 6.336586952209473, "learning_rate": 4.509253393004802e-06, "loss": 0.2433, "step": 10805 }, { "epoch": 0.5360384939729154, "grad_norm": 6.252139568328857, "learning_rate": 4.50847010899004e-06, "loss": 0.2897, "step": 10806 }, { "epoch": 0.5360880996081154, "grad_norm": 5.6839985847473145, "learning_rate": 4.507686837155612e-06, "loss": 0.307, "step": 10807 }, { "epoch": 0.5361377052433156, "grad_norm": 9.00601863861084, "learning_rate": 4.506903577520928e-06, "loss": 0.3198, "step": 10808 }, { "epoch": 0.5361873108785158, "grad_norm": 5.5337934494018555, "learning_rate": 4.5061203301053935e-06, "loss": 0.2633, "step": 10809 }, { "epoch": 0.536236916513716, "grad_norm": 13.800924301147461, "learning_rate": 4.5053370949284235e-06, "loss": 0.342, "step": 10810 }, { "epoch": 0.5362865221489161, "grad_norm": 5.2435302734375, "learning_rate": 4.504553872009424e-06, "loss": 0.2512, "step": 10811 }, { "epoch": 0.5363361277841163, "grad_norm": 9.735013961791992, "learning_rate": 4.5037706613678005e-06, "loss": 0.3186, "step": 10812 }, { "epoch": 0.5363857334193164, "grad_norm": 4.17939567565918, "learning_rate": 4.502987463022967e-06, "loss": 0.3128, "step": 10813 }, { "epoch": 0.5364353390545166, "grad_norm": 5.893474102020264, "learning_rate": 4.50220427699433e-06, "loss": 0.2319, "step": 10814 }, { "epoch": 0.5364849446897167, "grad_norm": 6.8205461502075195, "learning_rate": 4.501421103301294e-06, "loss": 0.2424, "step": 10815 }, { "epoch": 0.5365345503249169, "grad_norm": 13.094473838806152, "learning_rate": 4.50063794196327e-06, "loss": 0.3078, "step": 10816 }, { "epoch": 0.5365841559601171, "grad_norm": 6.925594329833984, "learning_rate": 4.499854792999661e-06, "loss": 0.3592, "step": 10817 }, { "epoch": 0.5366337615953173, "grad_norm": 4.971644401550293, "learning_rate": 4.499071656429879e-06, "loss": 0.1974, "step": 10818 }, { "epoch": 0.5366833672305173, "grad_norm": 4.755084991455078, "learning_rate": 4.498288532273329e-06, "loss": 0.2821, "step": 10819 }, { "epoch": 0.5367329728657175, "grad_norm": 9.404694557189941, "learning_rate": 4.497505420549413e-06, "loss": 0.2963, "step": 10820 }, { "epoch": 0.5367825785009177, "grad_norm": 5.284237384796143, "learning_rate": 4.496722321277543e-06, "loss": 0.2574, "step": 10821 }, { "epoch": 0.5368321841361179, "grad_norm": 7.10806131362915, "learning_rate": 4.49593923447712e-06, "loss": 0.3526, "step": 10822 }, { "epoch": 0.5368817897713181, "grad_norm": 7.603214740753174, "learning_rate": 4.49515616016755e-06, "loss": 0.2676, "step": 10823 }, { "epoch": 0.5369313954065181, "grad_norm": 16.779972076416016, "learning_rate": 4.49437309836824e-06, "loss": 0.2154, "step": 10824 }, { "epoch": 0.5369810010417183, "grad_norm": 5.601620674133301, "learning_rate": 4.493590049098593e-06, "loss": 0.2439, "step": 10825 }, { "epoch": 0.5370306066769185, "grad_norm": 5.073659896850586, "learning_rate": 4.492807012378013e-06, "loss": 0.2548, "step": 10826 }, { "epoch": 0.5370802123121187, "grad_norm": 9.225974082946777, "learning_rate": 4.492023988225906e-06, "loss": 0.3827, "step": 10827 }, { "epoch": 0.5371298179473188, "grad_norm": 3.6506619453430176, "learning_rate": 4.491240976661674e-06, "loss": 0.1859, "step": 10828 }, { "epoch": 0.537179423582519, "grad_norm": 9.708152770996094, "learning_rate": 4.4904579777047195e-06, "loss": 0.3721, "step": 10829 }, { "epoch": 0.5372290292177191, "grad_norm": 6.548347473144531, "learning_rate": 4.489674991374448e-06, "loss": 0.323, "step": 10830 }, { "epoch": 0.5372786348529193, "grad_norm": 8.375101089477539, "learning_rate": 4.48889201769026e-06, "loss": 0.3708, "step": 10831 }, { "epoch": 0.5373282404881194, "grad_norm": 7.127508163452148, "learning_rate": 4.48810905667156e-06, "loss": 0.356, "step": 10832 }, { "epoch": 0.5373778461233196, "grad_norm": 10.872597694396973, "learning_rate": 4.48732610833775e-06, "loss": 0.3892, "step": 10833 }, { "epoch": 0.5374274517585198, "grad_norm": 6.509859561920166, "learning_rate": 4.486543172708228e-06, "loss": 0.2677, "step": 10834 }, { "epoch": 0.53747705739372, "grad_norm": 7.191247463226318, "learning_rate": 4.485760249802401e-06, "loss": 0.326, "step": 10835 }, { "epoch": 0.53752666302892, "grad_norm": 6.295041084289551, "learning_rate": 4.484977339639666e-06, "loss": 0.1951, "step": 10836 }, { "epoch": 0.5375762686641202, "grad_norm": 7.63931941986084, "learning_rate": 4.484194442239426e-06, "loss": 0.3189, "step": 10837 }, { "epoch": 0.5376258742993204, "grad_norm": 8.611138343811035, "learning_rate": 4.483411557621082e-06, "loss": 0.2032, "step": 10838 }, { "epoch": 0.5376754799345206, "grad_norm": 8.185550689697266, "learning_rate": 4.482628685804033e-06, "loss": 0.3009, "step": 10839 }, { "epoch": 0.5377250855697208, "grad_norm": 12.067953109741211, "learning_rate": 4.481845826807678e-06, "loss": 0.3948, "step": 10840 }, { "epoch": 0.5377746912049208, "grad_norm": 7.258534908294678, "learning_rate": 4.4810629806514185e-06, "loss": 0.3183, "step": 10841 }, { "epoch": 0.537824296840121, "grad_norm": 22.88905143737793, "learning_rate": 4.480280147354655e-06, "loss": 0.5174, "step": 10842 }, { "epoch": 0.5378739024753212, "grad_norm": 9.187211036682129, "learning_rate": 4.479497326936782e-06, "loss": 0.2952, "step": 10843 }, { "epoch": 0.5379235081105214, "grad_norm": 4.446256160736084, "learning_rate": 4.478714519417201e-06, "loss": 0.2808, "step": 10844 }, { "epoch": 0.5379731137457215, "grad_norm": 4.8049139976501465, "learning_rate": 4.477931724815311e-06, "loss": 0.2929, "step": 10845 }, { "epoch": 0.5380227193809217, "grad_norm": 5.81348991394043, "learning_rate": 4.477148943150507e-06, "loss": 0.2428, "step": 10846 }, { "epoch": 0.5380723250161218, "grad_norm": 4.549961566925049, "learning_rate": 4.476366174442191e-06, "loss": 0.2975, "step": 10847 }, { "epoch": 0.538121930651322, "grad_norm": 8.923849105834961, "learning_rate": 4.475583418709758e-06, "loss": 0.3394, "step": 10848 }, { "epoch": 0.5381715362865221, "grad_norm": 6.469094276428223, "learning_rate": 4.474800675972603e-06, "loss": 0.3335, "step": 10849 }, { "epoch": 0.5382211419217223, "grad_norm": 5.2021942138671875, "learning_rate": 4.4740179462501275e-06, "loss": 0.37, "step": 10850 }, { "epoch": 0.5382707475569225, "grad_norm": 14.355632781982422, "learning_rate": 4.473235229561723e-06, "loss": 0.327, "step": 10851 }, { "epoch": 0.5383203531921227, "grad_norm": 13.964637756347656, "learning_rate": 4.472452525926788e-06, "loss": 0.3803, "step": 10852 }, { "epoch": 0.5383699588273227, "grad_norm": 8.743815422058105, "learning_rate": 4.47166983536472e-06, "loss": 0.2317, "step": 10853 }, { "epoch": 0.5384195644625229, "grad_norm": 7.890249729156494, "learning_rate": 4.4708871578949095e-06, "loss": 0.3208, "step": 10854 }, { "epoch": 0.5384691700977231, "grad_norm": 4.395336151123047, "learning_rate": 4.470104493536758e-06, "loss": 0.2243, "step": 10855 }, { "epoch": 0.5385187757329233, "grad_norm": 8.002692222595215, "learning_rate": 4.469321842309656e-06, "loss": 0.3044, "step": 10856 }, { "epoch": 0.5385683813681235, "grad_norm": 6.034592628479004, "learning_rate": 4.468539204232996e-06, "loss": 0.3446, "step": 10857 }, { "epoch": 0.5386179870033235, "grad_norm": 5.5588788986206055, "learning_rate": 4.467756579326178e-06, "loss": 0.2805, "step": 10858 }, { "epoch": 0.5386675926385237, "grad_norm": 11.281806945800781, "learning_rate": 4.466973967608592e-06, "loss": 0.3967, "step": 10859 }, { "epoch": 0.5387171982737239, "grad_norm": 6.1476826667785645, "learning_rate": 4.466191369099631e-06, "loss": 0.3014, "step": 10860 }, { "epoch": 0.5387668039089241, "grad_norm": 4.668106555938721, "learning_rate": 4.46540878381869e-06, "loss": 0.3158, "step": 10861 }, { "epoch": 0.5388164095441242, "grad_norm": 3.9596378803253174, "learning_rate": 4.464626211785162e-06, "loss": 0.2516, "step": 10862 }, { "epoch": 0.5388660151793244, "grad_norm": 15.059908866882324, "learning_rate": 4.463843653018437e-06, "loss": 0.3697, "step": 10863 }, { "epoch": 0.5389156208145245, "grad_norm": 6.382205009460449, "learning_rate": 4.46306110753791e-06, "loss": 0.311, "step": 10864 }, { "epoch": 0.5389652264497247, "grad_norm": 4.472252368927002, "learning_rate": 4.462278575362973e-06, "loss": 0.1974, "step": 10865 }, { "epoch": 0.5390148320849248, "grad_norm": 13.543780326843262, "learning_rate": 4.461496056513013e-06, "loss": 0.3613, "step": 10866 }, { "epoch": 0.539064437720125, "grad_norm": 7.485466480255127, "learning_rate": 4.460713551007426e-06, "loss": 0.2506, "step": 10867 }, { "epoch": 0.5391140433553252, "grad_norm": 8.774378776550293, "learning_rate": 4.4599310588656e-06, "loss": 0.2643, "step": 10868 }, { "epoch": 0.5391636489905253, "grad_norm": 7.226963996887207, "learning_rate": 4.4591485801069284e-06, "loss": 0.3179, "step": 10869 }, { "epoch": 0.5392132546257254, "grad_norm": 12.106852531433105, "learning_rate": 4.4583661147508e-06, "loss": 0.4541, "step": 10870 }, { "epoch": 0.5392628602609256, "grad_norm": 4.12429666519165, "learning_rate": 4.457583662816601e-06, "loss": 0.2486, "step": 10871 }, { "epoch": 0.5393124658961258, "grad_norm": 10.824067115783691, "learning_rate": 4.456801224323727e-06, "loss": 0.339, "step": 10872 }, { "epoch": 0.539362071531326, "grad_norm": 7.416697978973389, "learning_rate": 4.456018799291565e-06, "loss": 0.3282, "step": 10873 }, { "epoch": 0.5394116771665262, "grad_norm": 5.216249942779541, "learning_rate": 4.4552363877395005e-06, "loss": 0.1992, "step": 10874 }, { "epoch": 0.5394612828017262, "grad_norm": 7.346091270446777, "learning_rate": 4.454453989686927e-06, "loss": 0.3316, "step": 10875 }, { "epoch": 0.5395108884369264, "grad_norm": 4.3180694580078125, "learning_rate": 4.45367160515323e-06, "loss": 0.23, "step": 10876 }, { "epoch": 0.5395604940721266, "grad_norm": 9.215753555297852, "learning_rate": 4.452889234157796e-06, "loss": 0.3691, "step": 10877 }, { "epoch": 0.5396100997073268, "grad_norm": 10.392279624938965, "learning_rate": 4.452106876720017e-06, "loss": 0.4047, "step": 10878 }, { "epoch": 0.5396597053425269, "grad_norm": 8.63752269744873, "learning_rate": 4.4513245328592765e-06, "loss": 0.2317, "step": 10879 }, { "epoch": 0.5397093109777271, "grad_norm": 9.299932479858398, "learning_rate": 4.450542202594961e-06, "loss": 0.3907, "step": 10880 }, { "epoch": 0.5397589166129272, "grad_norm": 6.184115409851074, "learning_rate": 4.44975988594646e-06, "loss": 0.315, "step": 10881 }, { "epoch": 0.5398085222481274, "grad_norm": 5.601175785064697, "learning_rate": 4.448977582933157e-06, "loss": 0.2532, "step": 10882 }, { "epoch": 0.5398581278833275, "grad_norm": 4.412271976470947, "learning_rate": 4.448195293574438e-06, "loss": 0.2548, "step": 10883 }, { "epoch": 0.5399077335185277, "grad_norm": 8.028067588806152, "learning_rate": 4.4474130178896905e-06, "loss": 0.2933, "step": 10884 }, { "epoch": 0.5399573391537279, "grad_norm": 9.56920051574707, "learning_rate": 4.446630755898298e-06, "loss": 0.4102, "step": 10885 }, { "epoch": 0.540006944788928, "grad_norm": 4.7387847900390625, "learning_rate": 4.445848507619644e-06, "loss": 0.258, "step": 10886 }, { "epoch": 0.5400565504241281, "grad_norm": 5.850640296936035, "learning_rate": 4.4450662730731175e-06, "loss": 0.2851, "step": 10887 }, { "epoch": 0.5401061560593283, "grad_norm": 4.525616645812988, "learning_rate": 4.444284052278097e-06, "loss": 0.2552, "step": 10888 }, { "epoch": 0.5401557616945285, "grad_norm": 6.325769901275635, "learning_rate": 4.4435018452539715e-06, "loss": 0.3021, "step": 10889 }, { "epoch": 0.5402053673297287, "grad_norm": 7.63157844543457, "learning_rate": 4.442719652020121e-06, "loss": 0.3567, "step": 10890 }, { "epoch": 0.5402549729649289, "grad_norm": 5.425041198730469, "learning_rate": 4.441937472595929e-06, "loss": 0.3106, "step": 10891 }, { "epoch": 0.5403045786001289, "grad_norm": 7.228229522705078, "learning_rate": 4.441155307000781e-06, "loss": 0.3019, "step": 10892 }, { "epoch": 0.5403541842353291, "grad_norm": 7.66350793838501, "learning_rate": 4.440373155254055e-06, "loss": 0.3003, "step": 10893 }, { "epoch": 0.5404037898705293, "grad_norm": 4.193587779998779, "learning_rate": 4.439591017375136e-06, "loss": 0.2237, "step": 10894 }, { "epoch": 0.5404533955057295, "grad_norm": 5.663127422332764, "learning_rate": 4.438808893383406e-06, "loss": 0.3089, "step": 10895 }, { "epoch": 0.5405030011409296, "grad_norm": 20.02022933959961, "learning_rate": 4.4380267832982445e-06, "loss": 0.5069, "step": 10896 }, { "epoch": 0.5405526067761298, "grad_norm": 4.666128635406494, "learning_rate": 4.437244687139033e-06, "loss": 0.2156, "step": 10897 }, { "epoch": 0.5406022124113299, "grad_norm": 6.031500339508057, "learning_rate": 4.436462604925153e-06, "loss": 0.2075, "step": 10898 }, { "epoch": 0.5406518180465301, "grad_norm": 5.489628791809082, "learning_rate": 4.435680536675987e-06, "loss": 0.2582, "step": 10899 }, { "epoch": 0.5407014236817302, "grad_norm": 10.540227890014648, "learning_rate": 4.43489848241091e-06, "loss": 0.3385, "step": 10900 }, { "epoch": 0.5407510293169304, "grad_norm": 7.578526020050049, "learning_rate": 4.4341164421493046e-06, "loss": 0.3961, "step": 10901 }, { "epoch": 0.5408006349521306, "grad_norm": 3.944629430770874, "learning_rate": 4.433334415910551e-06, "loss": 0.2356, "step": 10902 }, { "epoch": 0.5408502405873307, "grad_norm": 6.9875102043151855, "learning_rate": 4.432552403714024e-06, "loss": 0.3603, "step": 10903 }, { "epoch": 0.5408998462225308, "grad_norm": 8.006089210510254, "learning_rate": 4.431770405579108e-06, "loss": 0.3282, "step": 10904 }, { "epoch": 0.540949451857731, "grad_norm": 9.372171401977539, "learning_rate": 4.430988421525177e-06, "loss": 0.4535, "step": 10905 }, { "epoch": 0.5409990574929312, "grad_norm": 5.4071831703186035, "learning_rate": 4.43020645157161e-06, "loss": 0.2701, "step": 10906 }, { "epoch": 0.5410486631281314, "grad_norm": 9.852680206298828, "learning_rate": 4.429424495737787e-06, "loss": 0.4531, "step": 10907 }, { "epoch": 0.5410982687633316, "grad_norm": 5.202315330505371, "learning_rate": 4.42864255404308e-06, "loss": 0.2611, "step": 10908 }, { "epoch": 0.5411478743985316, "grad_norm": 6.123688697814941, "learning_rate": 4.427860626506871e-06, "loss": 0.2147, "step": 10909 }, { "epoch": 0.5411974800337318, "grad_norm": 4.910115718841553, "learning_rate": 4.427078713148535e-06, "loss": 0.2481, "step": 10910 }, { "epoch": 0.541247085668932, "grad_norm": 4.385800361633301, "learning_rate": 4.426296813987446e-06, "loss": 0.1931, "step": 10911 }, { "epoch": 0.5412966913041322, "grad_norm": 7.8649678230285645, "learning_rate": 4.425514929042982e-06, "loss": 0.3484, "step": 10912 }, { "epoch": 0.5413462969393323, "grad_norm": 9.555788040161133, "learning_rate": 4.424733058334519e-06, "loss": 0.3479, "step": 10913 }, { "epoch": 0.5413959025745325, "grad_norm": 5.4689555168151855, "learning_rate": 4.42395120188143e-06, "loss": 0.2919, "step": 10914 }, { "epoch": 0.5414455082097326, "grad_norm": 4.959980487823486, "learning_rate": 4.423169359703091e-06, "loss": 0.2378, "step": 10915 }, { "epoch": 0.5414951138449328, "grad_norm": 15.287491798400879, "learning_rate": 4.422387531818877e-06, "loss": 0.2634, "step": 10916 }, { "epoch": 0.5415447194801329, "grad_norm": 9.982941627502441, "learning_rate": 4.421605718248159e-06, "loss": 0.2513, "step": 10917 }, { "epoch": 0.5415943251153331, "grad_norm": 14.037216186523438, "learning_rate": 4.420823919010315e-06, "loss": 0.3775, "step": 10918 }, { "epoch": 0.5416439307505333, "grad_norm": 10.446613311767578, "learning_rate": 4.420042134124717e-06, "loss": 0.3488, "step": 10919 }, { "epoch": 0.5416935363857334, "grad_norm": 7.264710903167725, "learning_rate": 4.419260363610734e-06, "loss": 0.2581, "step": 10920 }, { "epoch": 0.5417431420209335, "grad_norm": 5.251979827880859, "learning_rate": 4.4184786074877454e-06, "loss": 0.3254, "step": 10921 }, { "epoch": 0.5417927476561337, "grad_norm": 6.766683578491211, "learning_rate": 4.4176968657751165e-06, "loss": 0.2896, "step": 10922 }, { "epoch": 0.5418423532913339, "grad_norm": 7.059072971343994, "learning_rate": 4.416915138492226e-06, "loss": 0.3511, "step": 10923 }, { "epoch": 0.5418919589265341, "grad_norm": 5.116444110870361, "learning_rate": 4.416133425658441e-06, "loss": 0.3192, "step": 10924 }, { "epoch": 0.5419415645617343, "grad_norm": 6.934267997741699, "learning_rate": 4.415351727293132e-06, "loss": 0.3035, "step": 10925 }, { "epoch": 0.5419911701969343, "grad_norm": 23.335947036743164, "learning_rate": 4.414570043415674e-06, "loss": 0.3629, "step": 10926 }, { "epoch": 0.5420407758321345, "grad_norm": 6.283985137939453, "learning_rate": 4.413788374045434e-06, "loss": 0.286, "step": 10927 }, { "epoch": 0.5420903814673347, "grad_norm": 6.998326778411865, "learning_rate": 4.413006719201783e-06, "loss": 0.3608, "step": 10928 }, { "epoch": 0.5421399871025349, "grad_norm": 8.625761032104492, "learning_rate": 4.412225078904092e-06, "loss": 0.2756, "step": 10929 }, { "epoch": 0.542189592737735, "grad_norm": 7.682481288909912, "learning_rate": 4.41144345317173e-06, "loss": 0.3575, "step": 10930 }, { "epoch": 0.5422391983729352, "grad_norm": 23.964330673217773, "learning_rate": 4.410661842024064e-06, "loss": 0.2332, "step": 10931 }, { "epoch": 0.5422888040081353, "grad_norm": 7.389254570007324, "learning_rate": 4.409880245480464e-06, "loss": 0.2932, "step": 10932 }, { "epoch": 0.5423384096433355, "grad_norm": 5.018232822418213, "learning_rate": 4.4090986635603e-06, "loss": 0.2747, "step": 10933 }, { "epoch": 0.5423880152785356, "grad_norm": 4.7328009605407715, "learning_rate": 4.408317096282936e-06, "loss": 0.3036, "step": 10934 }, { "epoch": 0.5424376209137358, "grad_norm": 6.812518119812012, "learning_rate": 4.4075355436677445e-06, "loss": 0.3756, "step": 10935 }, { "epoch": 0.542487226548936, "grad_norm": 6.870270252227783, "learning_rate": 4.40675400573409e-06, "loss": 0.2233, "step": 10936 }, { "epoch": 0.5425368321841361, "grad_norm": 5.695530414581299, "learning_rate": 4.405972482501337e-06, "loss": 0.3285, "step": 10937 }, { "epoch": 0.5425864378193362, "grad_norm": 7.601919174194336, "learning_rate": 4.4051909739888576e-06, "loss": 0.2951, "step": 10938 }, { "epoch": 0.5426360434545364, "grad_norm": 11.494412422180176, "learning_rate": 4.404409480216013e-06, "loss": 0.4802, "step": 10939 }, { "epoch": 0.5426856490897366, "grad_norm": 6.417433261871338, "learning_rate": 4.403628001202171e-06, "loss": 0.303, "step": 10940 }, { "epoch": 0.5427352547249368, "grad_norm": 8.242059707641602, "learning_rate": 4.402846536966699e-06, "loss": 0.2517, "step": 10941 }, { "epoch": 0.542784860360137, "grad_norm": 3.890918016433716, "learning_rate": 4.4020650875289575e-06, "loss": 0.2309, "step": 10942 }, { "epoch": 0.542834465995337, "grad_norm": 7.15536642074585, "learning_rate": 4.401283652908315e-06, "loss": 0.265, "step": 10943 }, { "epoch": 0.5428840716305372, "grad_norm": 8.653519630432129, "learning_rate": 4.400502233124134e-06, "loss": 0.3512, "step": 10944 }, { "epoch": 0.5429336772657374, "grad_norm": 6.354589462280273, "learning_rate": 4.399720828195778e-06, "loss": 0.2424, "step": 10945 }, { "epoch": 0.5429832829009376, "grad_norm": 7.288376331329346, "learning_rate": 4.398939438142613e-06, "loss": 0.2944, "step": 10946 }, { "epoch": 0.5430328885361377, "grad_norm": 13.625127792358398, "learning_rate": 4.398158062983999e-06, "loss": 0.2753, "step": 10947 }, { "epoch": 0.5430824941713379, "grad_norm": 5.1603617668151855, "learning_rate": 4.397376702739301e-06, "loss": 0.2787, "step": 10948 }, { "epoch": 0.543132099806538, "grad_norm": 6.833254814147949, "learning_rate": 4.3965953574278825e-06, "loss": 0.3082, "step": 10949 }, { "epoch": 0.5431817054417382, "grad_norm": 15.72286319732666, "learning_rate": 4.395814027069102e-06, "loss": 0.3456, "step": 10950 }, { "epoch": 0.5432313110769383, "grad_norm": 6.919778347015381, "learning_rate": 4.395032711682324e-06, "loss": 0.3466, "step": 10951 }, { "epoch": 0.5432809167121385, "grad_norm": 7.949720859527588, "learning_rate": 4.394251411286908e-06, "loss": 0.3484, "step": 10952 }, { "epoch": 0.5433305223473387, "grad_norm": 5.172987937927246, "learning_rate": 4.393470125902218e-06, "loss": 0.2369, "step": 10953 }, { "epoch": 0.5433801279825388, "grad_norm": 6.807690620422363, "learning_rate": 4.392688855547611e-06, "loss": 0.2648, "step": 10954 }, { "epoch": 0.5434297336177389, "grad_norm": 19.049909591674805, "learning_rate": 4.3919076002424495e-06, "loss": 0.4394, "step": 10955 }, { "epoch": 0.5434793392529391, "grad_norm": 6.330134868621826, "learning_rate": 4.391126360006094e-06, "loss": 0.1661, "step": 10956 }, { "epoch": 0.5435289448881393, "grad_norm": 12.719806671142578, "learning_rate": 4.390345134857899e-06, "loss": 0.3487, "step": 10957 }, { "epoch": 0.5435785505233395, "grad_norm": 5.608982563018799, "learning_rate": 4.389563924817231e-06, "loss": 0.3029, "step": 10958 }, { "epoch": 0.5436281561585397, "grad_norm": 5.337765693664551, "learning_rate": 4.388782729903443e-06, "loss": 0.3281, "step": 10959 }, { "epoch": 0.5436777617937397, "grad_norm": 6.184189796447754, "learning_rate": 4.388001550135897e-06, "loss": 0.2724, "step": 10960 }, { "epoch": 0.5437273674289399, "grad_norm": 5.860700607299805, "learning_rate": 4.387220385533949e-06, "loss": 0.2651, "step": 10961 }, { "epoch": 0.5437769730641401, "grad_norm": 10.613343238830566, "learning_rate": 4.386439236116955e-06, "loss": 0.2773, "step": 10962 }, { "epoch": 0.5438265786993403, "grad_norm": 9.690171241760254, "learning_rate": 4.385658101904277e-06, "loss": 0.2555, "step": 10963 }, { "epoch": 0.5438761843345404, "grad_norm": 4.3856000900268555, "learning_rate": 4.384876982915269e-06, "loss": 0.258, "step": 10964 }, { "epoch": 0.5439257899697406, "grad_norm": 4.426192283630371, "learning_rate": 4.3840958791692855e-06, "loss": 0.2833, "step": 10965 }, { "epoch": 0.5439753956049407, "grad_norm": 5.052237033843994, "learning_rate": 4.383314790685687e-06, "loss": 0.2847, "step": 10966 }, { "epoch": 0.5440250012401409, "grad_norm": 5.840511322021484, "learning_rate": 4.382533717483827e-06, "loss": 0.2596, "step": 10967 }, { "epoch": 0.544074606875341, "grad_norm": 7.357421398162842, "learning_rate": 4.381752659583058e-06, "loss": 0.3147, "step": 10968 }, { "epoch": 0.5441242125105412, "grad_norm": 4.077954292297363, "learning_rate": 4.380971617002741e-06, "loss": 0.1269, "step": 10969 }, { "epoch": 0.5441738181457414, "grad_norm": 4.38040018081665, "learning_rate": 4.380190589762226e-06, "loss": 0.2803, "step": 10970 }, { "epoch": 0.5442234237809415, "grad_norm": 15.51369571685791, "learning_rate": 4.379409577880866e-06, "loss": 0.2823, "step": 10971 }, { "epoch": 0.5442730294161416, "grad_norm": 6.439256191253662, "learning_rate": 4.378628581378022e-06, "loss": 0.2715, "step": 10972 }, { "epoch": 0.5443226350513418, "grad_norm": 5.950763702392578, "learning_rate": 4.377847600273041e-06, "loss": 0.2493, "step": 10973 }, { "epoch": 0.544372240686542, "grad_norm": 3.2418103218078613, "learning_rate": 4.377066634585276e-06, "loss": 0.1747, "step": 10974 }, { "epoch": 0.5444218463217422, "grad_norm": 11.445634841918945, "learning_rate": 4.376285684334084e-06, "loss": 0.4248, "step": 10975 }, { "epoch": 0.5444714519569424, "grad_norm": 6.427708625793457, "learning_rate": 4.375504749538813e-06, "loss": 0.1961, "step": 10976 }, { "epoch": 0.5445210575921424, "grad_norm": 7.883870601654053, "learning_rate": 4.3747238302188175e-06, "loss": 0.3073, "step": 10977 }, { "epoch": 0.5445706632273426, "grad_norm": 9.683600425720215, "learning_rate": 4.37394292639345e-06, "loss": 0.3519, "step": 10978 }, { "epoch": 0.5446202688625428, "grad_norm": 9.77814769744873, "learning_rate": 4.373162038082057e-06, "loss": 0.2574, "step": 10979 }, { "epoch": 0.544669874497743, "grad_norm": 12.231993675231934, "learning_rate": 4.372381165303994e-06, "loss": 0.3943, "step": 10980 }, { "epoch": 0.5447194801329431, "grad_norm": 8.834158897399902, "learning_rate": 4.371600308078611e-06, "loss": 0.3811, "step": 10981 }, { "epoch": 0.5447690857681433, "grad_norm": 6.059332370758057, "learning_rate": 4.3708194664252536e-06, "loss": 0.286, "step": 10982 }, { "epoch": 0.5448186914033434, "grad_norm": 7.174818515777588, "learning_rate": 4.370038640363276e-06, "loss": 0.1991, "step": 10983 }, { "epoch": 0.5448682970385436, "grad_norm": 13.032937049865723, "learning_rate": 4.369257829912027e-06, "loss": 0.4055, "step": 10984 }, { "epoch": 0.5449179026737437, "grad_norm": 9.094573020935059, "learning_rate": 4.3684770350908515e-06, "loss": 0.3161, "step": 10985 }, { "epoch": 0.5449675083089439, "grad_norm": 8.894719123840332, "learning_rate": 4.367696255919103e-06, "loss": 0.3433, "step": 10986 }, { "epoch": 0.5450171139441441, "grad_norm": 9.513327598571777, "learning_rate": 4.366915492416127e-06, "loss": 0.3298, "step": 10987 }, { "epoch": 0.5450667195793442, "grad_norm": 4.362967491149902, "learning_rate": 4.36613474460127e-06, "loss": 0.2063, "step": 10988 }, { "epoch": 0.5451163252145443, "grad_norm": 6.321534156799316, "learning_rate": 4.365354012493882e-06, "loss": 0.357, "step": 10989 }, { "epoch": 0.5451659308497445, "grad_norm": 13.570571899414062, "learning_rate": 4.3645732961133095e-06, "loss": 0.4588, "step": 10990 }, { "epoch": 0.5452155364849447, "grad_norm": 5.302047252655029, "learning_rate": 4.363792595478896e-06, "loss": 0.2662, "step": 10991 }, { "epoch": 0.5452651421201449, "grad_norm": 9.368884086608887, "learning_rate": 4.363011910609991e-06, "loss": 0.328, "step": 10992 }, { "epoch": 0.545314747755345, "grad_norm": 12.420676231384277, "learning_rate": 4.3622312415259385e-06, "loss": 0.2259, "step": 10993 }, { "epoch": 0.5453643533905451, "grad_norm": 4.956462383270264, "learning_rate": 4.361450588246085e-06, "loss": 0.2588, "step": 10994 }, { "epoch": 0.5454139590257453, "grad_norm": 4.856260299682617, "learning_rate": 4.360669950789775e-06, "loss": 0.1447, "step": 10995 }, { "epoch": 0.5454635646609455, "grad_norm": 10.39501667022705, "learning_rate": 4.3598893291763515e-06, "loss": 0.345, "step": 10996 }, { "epoch": 0.5455131702961457, "grad_norm": 9.69607925415039, "learning_rate": 4.359108723425161e-06, "loss": 0.3312, "step": 10997 }, { "epoch": 0.5455627759313458, "grad_norm": 8.237412452697754, "learning_rate": 4.3583281335555475e-06, "loss": 0.3483, "step": 10998 }, { "epoch": 0.545612381566546, "grad_norm": 8.777947425842285, "learning_rate": 4.35754755958685e-06, "loss": 0.3168, "step": 10999 }, { "epoch": 0.5456619872017461, "grad_norm": 8.951638221740723, "learning_rate": 4.356767001538417e-06, "loss": 0.3723, "step": 11000 }, { "epoch": 0.5457115928369463, "grad_norm": 8.98077392578125, "learning_rate": 4.355986459429588e-06, "loss": 0.2717, "step": 11001 }, { "epoch": 0.5457611984721464, "grad_norm": 8.18182373046875, "learning_rate": 4.355205933279705e-06, "loss": 0.2992, "step": 11002 }, { "epoch": 0.5458108041073466, "grad_norm": 5.8274407386779785, "learning_rate": 4.354425423108112e-06, "loss": 0.2694, "step": 11003 }, { "epoch": 0.5458604097425468, "grad_norm": 7.681480407714844, "learning_rate": 4.353644928934148e-06, "loss": 0.2165, "step": 11004 }, { "epoch": 0.5459100153777469, "grad_norm": 6.432393550872803, "learning_rate": 4.352864450777156e-06, "loss": 0.3701, "step": 11005 }, { "epoch": 0.545959621012947, "grad_norm": 8.981651306152344, "learning_rate": 4.352083988656474e-06, "loss": 0.3574, "step": 11006 }, { "epoch": 0.5460092266481472, "grad_norm": 6.032965660095215, "learning_rate": 4.3513035425914455e-06, "loss": 0.2479, "step": 11007 }, { "epoch": 0.5460588322833474, "grad_norm": 6.606602191925049, "learning_rate": 4.350523112601408e-06, "loss": 0.2777, "step": 11008 }, { "epoch": 0.5461084379185476, "grad_norm": 9.38372802734375, "learning_rate": 4.349742698705701e-06, "loss": 0.2689, "step": 11009 }, { "epoch": 0.5461580435537478, "grad_norm": 28.886903762817383, "learning_rate": 4.348962300923666e-06, "loss": 0.3716, "step": 11010 }, { "epoch": 0.5462076491889478, "grad_norm": 8.321449279785156, "learning_rate": 4.348181919274637e-06, "loss": 0.2745, "step": 11011 }, { "epoch": 0.546257254824148, "grad_norm": 13.232657432556152, "learning_rate": 4.347401553777957e-06, "loss": 0.4195, "step": 11012 }, { "epoch": 0.5463068604593482, "grad_norm": 8.193022727966309, "learning_rate": 4.346621204452961e-06, "loss": 0.2957, "step": 11013 }, { "epoch": 0.5463564660945484, "grad_norm": 5.822330474853516, "learning_rate": 4.345840871318986e-06, "loss": 0.3028, "step": 11014 }, { "epoch": 0.5464060717297485, "grad_norm": 5.429199695587158, "learning_rate": 4.345060554395373e-06, "loss": 0.2825, "step": 11015 }, { "epoch": 0.5464556773649487, "grad_norm": 5.388004302978516, "learning_rate": 4.344280253701452e-06, "loss": 0.3138, "step": 11016 }, { "epoch": 0.5465052830001488, "grad_norm": 4.296051025390625, "learning_rate": 4.343499969256566e-06, "loss": 0.2586, "step": 11017 }, { "epoch": 0.546554888635349, "grad_norm": 4.403627395629883, "learning_rate": 4.342719701080047e-06, "loss": 0.2405, "step": 11018 }, { "epoch": 0.5466044942705491, "grad_norm": 9.11667537689209, "learning_rate": 4.341939449191229e-06, "loss": 0.4375, "step": 11019 }, { "epoch": 0.5466540999057493, "grad_norm": 22.526803970336914, "learning_rate": 4.341159213609452e-06, "loss": 0.4924, "step": 11020 }, { "epoch": 0.5467037055409495, "grad_norm": 8.17431926727295, "learning_rate": 4.340378994354048e-06, "loss": 0.3963, "step": 11021 }, { "epoch": 0.5467533111761496, "grad_norm": 5.583975791931152, "learning_rate": 4.339598791444348e-06, "loss": 0.3036, "step": 11022 }, { "epoch": 0.5468029168113497, "grad_norm": 6.599963665008545, "learning_rate": 4.3388186048996905e-06, "loss": 0.3357, "step": 11023 }, { "epoch": 0.5468525224465499, "grad_norm": 9.232654571533203, "learning_rate": 4.338038434739408e-06, "loss": 0.3922, "step": 11024 }, { "epoch": 0.5469021280817501, "grad_norm": 6.6061506271362305, "learning_rate": 4.3372582809828295e-06, "loss": 0.3326, "step": 11025 }, { "epoch": 0.5469517337169503, "grad_norm": 8.288792610168457, "learning_rate": 4.336478143649293e-06, "loss": 0.3937, "step": 11026 }, { "epoch": 0.5470013393521505, "grad_norm": 12.414575576782227, "learning_rate": 4.335698022758128e-06, "loss": 0.5009, "step": 11027 }, { "epoch": 0.5470509449873505, "grad_norm": 8.881677627563477, "learning_rate": 4.334917918328665e-06, "loss": 0.2626, "step": 11028 }, { "epoch": 0.5471005506225507, "grad_norm": 9.030701637268066, "learning_rate": 4.334137830380238e-06, "loss": 0.3548, "step": 11029 }, { "epoch": 0.5471501562577509, "grad_norm": 5.996098041534424, "learning_rate": 4.333357758932176e-06, "loss": 0.2361, "step": 11030 }, { "epoch": 0.547199761892951, "grad_norm": 8.370743751525879, "learning_rate": 4.332577704003811e-06, "loss": 0.3743, "step": 11031 }, { "epoch": 0.5472493675281512, "grad_norm": 8.486995697021484, "learning_rate": 4.331797665614473e-06, "loss": 0.3494, "step": 11032 }, { "epoch": 0.5472989731633514, "grad_norm": 4.587002754211426, "learning_rate": 4.33101764378349e-06, "loss": 0.3091, "step": 11033 }, { "epoch": 0.5473485787985515, "grad_norm": 7.918156147003174, "learning_rate": 4.3302376385301935e-06, "loss": 0.2507, "step": 11034 }, { "epoch": 0.5473981844337517, "grad_norm": 7.864791393280029, "learning_rate": 4.329457649873912e-06, "loss": 0.3432, "step": 11035 }, { "epoch": 0.5474477900689518, "grad_norm": 19.1275577545166, "learning_rate": 4.328677677833969e-06, "loss": 0.3321, "step": 11036 }, { "epoch": 0.547497395704152, "grad_norm": 5.100338459014893, "learning_rate": 4.327897722429701e-06, "loss": 0.3085, "step": 11037 }, { "epoch": 0.5475470013393522, "grad_norm": 7.487661838531494, "learning_rate": 4.327117783680431e-06, "loss": 0.3138, "step": 11038 }, { "epoch": 0.5475966069745523, "grad_norm": 5.144364356994629, "learning_rate": 4.326337861605485e-06, "loss": 0.2435, "step": 11039 }, { "epoch": 0.5476462126097524, "grad_norm": 3.4148693084716797, "learning_rate": 4.325557956224193e-06, "loss": 0.2203, "step": 11040 }, { "epoch": 0.5476958182449526, "grad_norm": 4.873998165130615, "learning_rate": 4.32477806755588e-06, "loss": 0.2423, "step": 11041 }, { "epoch": 0.5477454238801528, "grad_norm": 8.164387702941895, "learning_rate": 4.32399819561987e-06, "loss": 0.3292, "step": 11042 }, { "epoch": 0.547795029515353, "grad_norm": 6.0243730545043945, "learning_rate": 4.323218340435492e-06, "loss": 0.2835, "step": 11043 }, { "epoch": 0.5478446351505531, "grad_norm": 6.632997035980225, "learning_rate": 4.322438502022071e-06, "loss": 0.229, "step": 11044 }, { "epoch": 0.5478942407857532, "grad_norm": 5.684804916381836, "learning_rate": 4.321658680398927e-06, "loss": 0.2924, "step": 11045 }, { "epoch": 0.5479438464209534, "grad_norm": 5.194449424743652, "learning_rate": 4.3208788755853905e-06, "loss": 0.3971, "step": 11046 }, { "epoch": 0.5479934520561536, "grad_norm": 8.828899383544922, "learning_rate": 4.320099087600779e-06, "loss": 0.3771, "step": 11047 }, { "epoch": 0.5480430576913538, "grad_norm": 4.702301502227783, "learning_rate": 4.319319316464424e-06, "loss": 0.2407, "step": 11048 }, { "epoch": 0.5480926633265539, "grad_norm": 6.32316780090332, "learning_rate": 4.318539562195644e-06, "loss": 0.3286, "step": 11049 }, { "epoch": 0.5481422689617541, "grad_norm": 8.521183013916016, "learning_rate": 4.3177598248137585e-06, "loss": 0.2196, "step": 11050 }, { "epoch": 0.5481918745969542, "grad_norm": 8.353105545043945, "learning_rate": 4.316980104338096e-06, "loss": 0.3997, "step": 11051 }, { "epoch": 0.5482414802321544, "grad_norm": 7.183534622192383, "learning_rate": 4.316200400787975e-06, "loss": 0.3238, "step": 11052 }, { "epoch": 0.5482910858673545, "grad_norm": 12.48697280883789, "learning_rate": 4.315420714182717e-06, "loss": 0.3673, "step": 11053 }, { "epoch": 0.5483406915025547, "grad_norm": 5.539642810821533, "learning_rate": 4.314641044541644e-06, "loss": 0.3381, "step": 11054 }, { "epoch": 0.5483902971377549, "grad_norm": 48.11575698852539, "learning_rate": 4.313861391884074e-06, "loss": 0.2646, "step": 11055 }, { "epoch": 0.548439902772955, "grad_norm": 6.829254150390625, "learning_rate": 4.3130817562293305e-06, "loss": 0.2861, "step": 11056 }, { "epoch": 0.5484895084081551, "grad_norm": 7.044797897338867, "learning_rate": 4.312302137596733e-06, "loss": 0.2359, "step": 11057 }, { "epoch": 0.5485391140433553, "grad_norm": 6.908275604248047, "learning_rate": 4.311522536005598e-06, "loss": 0.3604, "step": 11058 }, { "epoch": 0.5485887196785555, "grad_norm": 8.746980667114258, "learning_rate": 4.310742951475246e-06, "loss": 0.2879, "step": 11059 }, { "epoch": 0.5486383253137557, "grad_norm": 5.49228572845459, "learning_rate": 4.3099633840249955e-06, "loss": 0.2136, "step": 11060 }, { "epoch": 0.5486879309489558, "grad_norm": 6.758436679840088, "learning_rate": 4.309183833674166e-06, "loss": 0.2952, "step": 11061 }, { "epoch": 0.5487375365841559, "grad_norm": 8.261552810668945, "learning_rate": 4.308404300442072e-06, "loss": 0.2822, "step": 11062 }, { "epoch": 0.5487871422193561, "grad_norm": 15.852977752685547, "learning_rate": 4.307624784348032e-06, "loss": 0.4037, "step": 11063 }, { "epoch": 0.5488367478545563, "grad_norm": 9.180527687072754, "learning_rate": 4.306845285411364e-06, "loss": 0.3392, "step": 11064 }, { "epoch": 0.5488863534897565, "grad_norm": 4.517227649688721, "learning_rate": 4.306065803651382e-06, "loss": 0.2366, "step": 11065 }, { "epoch": 0.5489359591249566, "grad_norm": 7.899402618408203, "learning_rate": 4.305286339087405e-06, "loss": 0.3089, "step": 11066 }, { "epoch": 0.5489855647601568, "grad_norm": 9.249143600463867, "learning_rate": 4.3045068917387454e-06, "loss": 0.3237, "step": 11067 }, { "epoch": 0.5490351703953569, "grad_norm": 6.606931209564209, "learning_rate": 4.30372746162472e-06, "loss": 0.3799, "step": 11068 }, { "epoch": 0.549084776030557, "grad_norm": 9.232329368591309, "learning_rate": 4.302948048764644e-06, "loss": 0.2943, "step": 11069 }, { "epoch": 0.5491343816657572, "grad_norm": 9.670470237731934, "learning_rate": 4.302168653177829e-06, "loss": 0.3205, "step": 11070 }, { "epoch": 0.5491839873009574, "grad_norm": 6.017814636230469, "learning_rate": 4.3013892748835924e-06, "loss": 0.2972, "step": 11071 }, { "epoch": 0.5492335929361576, "grad_norm": 6.927311897277832, "learning_rate": 4.300609913901245e-06, "loss": 0.2419, "step": 11072 }, { "epoch": 0.5492831985713577, "grad_norm": 7.427954196929932, "learning_rate": 4.299830570250099e-06, "loss": 0.2129, "step": 11073 }, { "epoch": 0.5493328042065578, "grad_norm": 11.93533706665039, "learning_rate": 4.29905124394947e-06, "loss": 0.4111, "step": 11074 }, { "epoch": 0.549382409841758, "grad_norm": 4.273714065551758, "learning_rate": 4.298271935018669e-06, "loss": 0.2026, "step": 11075 }, { "epoch": 0.5494320154769582, "grad_norm": 5.148862838745117, "learning_rate": 4.297492643477004e-06, "loss": 0.308, "step": 11076 }, { "epoch": 0.5494816211121584, "grad_norm": 3.700882911682129, "learning_rate": 4.296713369343791e-06, "loss": 0.2861, "step": 11077 }, { "epoch": 0.5495312267473585, "grad_norm": 13.193868637084961, "learning_rate": 4.295934112638338e-06, "loss": 0.4347, "step": 11078 }, { "epoch": 0.5495808323825586, "grad_norm": 5.700117588043213, "learning_rate": 4.295154873379955e-06, "loss": 0.3003, "step": 11079 }, { "epoch": 0.5496304380177588, "grad_norm": 4.878114223480225, "learning_rate": 4.294375651587956e-06, "loss": 0.2231, "step": 11080 }, { "epoch": 0.549680043652959, "grad_norm": 4.43227481842041, "learning_rate": 4.293596447281646e-06, "loss": 0.2385, "step": 11081 }, { "epoch": 0.5497296492881592, "grad_norm": 13.374710083007812, "learning_rate": 4.292817260480335e-06, "loss": 0.5632, "step": 11082 }, { "epoch": 0.5497792549233593, "grad_norm": 8.158445358276367, "learning_rate": 4.292038091203334e-06, "loss": 0.355, "step": 11083 }, { "epoch": 0.5498288605585595, "grad_norm": 6.228793144226074, "learning_rate": 4.291258939469946e-06, "loss": 0.2735, "step": 11084 }, { "epoch": 0.5498784661937596, "grad_norm": 9.431523323059082, "learning_rate": 4.290479805299485e-06, "loss": 0.3559, "step": 11085 }, { "epoch": 0.5499280718289598, "grad_norm": 5.2161545753479, "learning_rate": 4.289700688711255e-06, "loss": 0.2754, "step": 11086 }, { "epoch": 0.5499776774641599, "grad_norm": 12.692468643188477, "learning_rate": 4.288921589724562e-06, "loss": 0.3862, "step": 11087 }, { "epoch": 0.5500272830993601, "grad_norm": 3.9070777893066406, "learning_rate": 4.288142508358714e-06, "loss": 0.2339, "step": 11088 }, { "epoch": 0.5500768887345603, "grad_norm": 5.481528282165527, "learning_rate": 4.287363444633019e-06, "loss": 0.2641, "step": 11089 }, { "epoch": 0.5501264943697604, "grad_norm": 7.7662763595581055, "learning_rate": 4.286584398566777e-06, "loss": 0.3047, "step": 11090 }, { "epoch": 0.5501761000049605, "grad_norm": 8.500102043151855, "learning_rate": 4.2858053701792986e-06, "loss": 0.2716, "step": 11091 }, { "epoch": 0.5502257056401607, "grad_norm": 5.8470587730407715, "learning_rate": 4.285026359489886e-06, "loss": 0.3393, "step": 11092 }, { "epoch": 0.5502753112753609, "grad_norm": 4.964484691619873, "learning_rate": 4.284247366517842e-06, "loss": 0.2167, "step": 11093 }, { "epoch": 0.5503249169105611, "grad_norm": 12.073624610900879, "learning_rate": 4.283468391282473e-06, "loss": 0.4536, "step": 11094 }, { "epoch": 0.5503745225457612, "grad_norm": 5.565489768981934, "learning_rate": 4.282689433803082e-06, "loss": 0.3169, "step": 11095 }, { "epoch": 0.5504241281809613, "grad_norm": 5.264599323272705, "learning_rate": 4.281910494098969e-06, "loss": 0.2348, "step": 11096 }, { "epoch": 0.5504737338161615, "grad_norm": 8.278343200683594, "learning_rate": 4.281131572189442e-06, "loss": 0.2605, "step": 11097 }, { "epoch": 0.5505233394513617, "grad_norm": 5.055073261260986, "learning_rate": 4.280352668093798e-06, "loss": 0.3156, "step": 11098 }, { "epoch": 0.5505729450865618, "grad_norm": 6.059590816497803, "learning_rate": 4.279573781831339e-06, "loss": 0.3585, "step": 11099 }, { "epoch": 0.550622550721762, "grad_norm": 4.23112678527832, "learning_rate": 4.27879491342137e-06, "loss": 0.2375, "step": 11100 }, { "epoch": 0.5506721563569622, "grad_norm": 7.517363548278809, "learning_rate": 4.278016062883187e-06, "loss": 0.3389, "step": 11101 }, { "epoch": 0.5507217619921623, "grad_norm": 9.351032257080078, "learning_rate": 4.277237230236095e-06, "loss": 0.421, "step": 11102 }, { "epoch": 0.5507713676273625, "grad_norm": 9.674006462097168, "learning_rate": 4.276458415499391e-06, "loss": 0.2831, "step": 11103 }, { "epoch": 0.5508209732625626, "grad_norm": 4.178666591644287, "learning_rate": 4.275679618692373e-06, "loss": 0.279, "step": 11104 }, { "epoch": 0.5508705788977628, "grad_norm": 8.015949249267578, "learning_rate": 4.274900839834344e-06, "loss": 0.3431, "step": 11105 }, { "epoch": 0.550920184532963, "grad_norm": 8.686012268066406, "learning_rate": 4.274122078944599e-06, "loss": 0.3068, "step": 11106 }, { "epoch": 0.550969790168163, "grad_norm": 6.082414150238037, "learning_rate": 4.273343336042436e-06, "loss": 0.2722, "step": 11107 }, { "epoch": 0.5510193958033632, "grad_norm": 8.12363052368164, "learning_rate": 4.272564611147157e-06, "loss": 0.4304, "step": 11108 }, { "epoch": 0.5510690014385634, "grad_norm": 5.840245723724365, "learning_rate": 4.271785904278053e-06, "loss": 0.3558, "step": 11109 }, { "epoch": 0.5511186070737636, "grad_norm": 6.887963771820068, "learning_rate": 4.2710072154544246e-06, "loss": 0.3506, "step": 11110 }, { "epoch": 0.5511682127089638, "grad_norm": 19.84710693359375, "learning_rate": 4.270228544695569e-06, "loss": 0.4368, "step": 11111 }, { "epoch": 0.551217818344164, "grad_norm": 8.935236930847168, "learning_rate": 4.269449892020777e-06, "loss": 0.3406, "step": 11112 }, { "epoch": 0.551267423979364, "grad_norm": 5.645052433013916, "learning_rate": 4.268671257449348e-06, "loss": 0.2908, "step": 11113 }, { "epoch": 0.5513170296145642, "grad_norm": 9.56423282623291, "learning_rate": 4.267892641000577e-06, "loss": 0.277, "step": 11114 }, { "epoch": 0.5513666352497644, "grad_norm": 11.070547103881836, "learning_rate": 4.267114042693758e-06, "loss": 0.371, "step": 11115 }, { "epoch": 0.5514162408849645, "grad_norm": 5.2844085693359375, "learning_rate": 4.266335462548183e-06, "loss": 0.1804, "step": 11116 }, { "epoch": 0.5514658465201647, "grad_norm": 20.29521942138672, "learning_rate": 4.265556900583148e-06, "loss": 0.4575, "step": 11117 }, { "epoch": 0.5515154521553649, "grad_norm": 6.642126560211182, "learning_rate": 4.264778356817944e-06, "loss": 0.233, "step": 11118 }, { "epoch": 0.551565057790565, "grad_norm": 7.591246128082275, "learning_rate": 4.263999831271866e-06, "loss": 0.3015, "step": 11119 }, { "epoch": 0.5516146634257652, "grad_norm": 7.950838088989258, "learning_rate": 4.263221323964206e-06, "loss": 0.187, "step": 11120 }, { "epoch": 0.5516642690609653, "grad_norm": 7.388998031616211, "learning_rate": 4.2624428349142545e-06, "loss": 0.3395, "step": 11121 }, { "epoch": 0.5517138746961655, "grad_norm": 4.805627822875977, "learning_rate": 4.261664364141302e-06, "loss": 0.2813, "step": 11122 }, { "epoch": 0.5517634803313657, "grad_norm": 9.860970497131348, "learning_rate": 4.260885911664642e-06, "loss": 0.3826, "step": 11123 }, { "epoch": 0.5518130859665658, "grad_norm": 6.292735576629639, "learning_rate": 4.260107477503562e-06, "loss": 0.3167, "step": 11124 }, { "epoch": 0.5518626916017659, "grad_norm": 5.500087738037109, "learning_rate": 4.259329061677354e-06, "loss": 0.3403, "step": 11125 }, { "epoch": 0.5519122972369661, "grad_norm": 9.601019859313965, "learning_rate": 4.258550664205308e-06, "loss": 0.3672, "step": 11126 }, { "epoch": 0.5519619028721663, "grad_norm": 8.02966594696045, "learning_rate": 4.2577722851067095e-06, "loss": 0.3639, "step": 11127 }, { "epoch": 0.5520115085073665, "grad_norm": 6.655842304229736, "learning_rate": 4.256993924400851e-06, "loss": 0.3227, "step": 11128 }, { "epoch": 0.5520611141425666, "grad_norm": 4.977867603302002, "learning_rate": 4.256215582107021e-06, "loss": 0.2171, "step": 11129 }, { "epoch": 0.5521107197777667, "grad_norm": 6.5722784996032715, "learning_rate": 4.255437258244501e-06, "loss": 0.2991, "step": 11130 }, { "epoch": 0.5521603254129669, "grad_norm": 9.977785110473633, "learning_rate": 4.254658952832586e-06, "loss": 0.306, "step": 11131 }, { "epoch": 0.5522099310481671, "grad_norm": 6.101598262786865, "learning_rate": 4.253880665890559e-06, "loss": 0.2739, "step": 11132 }, { "epoch": 0.5522595366833672, "grad_norm": 4.204874515533447, "learning_rate": 4.253102397437704e-06, "loss": 0.2647, "step": 11133 }, { "epoch": 0.5523091423185674, "grad_norm": 11.14865779876709, "learning_rate": 4.252324147493312e-06, "loss": 0.1982, "step": 11134 }, { "epoch": 0.5523587479537675, "grad_norm": 18.41822052001953, "learning_rate": 4.251545916076667e-06, "loss": 0.2707, "step": 11135 }, { "epoch": 0.5524083535889677, "grad_norm": 7.050839900970459, "learning_rate": 4.250767703207049e-06, "loss": 0.2257, "step": 11136 }, { "epoch": 0.5524579592241678, "grad_norm": 8.526309967041016, "learning_rate": 4.249989508903749e-06, "loss": 0.3597, "step": 11137 }, { "epoch": 0.552507564859368, "grad_norm": 10.589630126953125, "learning_rate": 4.249211333186046e-06, "loss": 0.4542, "step": 11138 }, { "epoch": 0.5525571704945682, "grad_norm": 14.50174617767334, "learning_rate": 4.248433176073228e-06, "loss": 0.3468, "step": 11139 }, { "epoch": 0.5526067761297684, "grad_norm": 8.06253719329834, "learning_rate": 4.247655037584576e-06, "loss": 0.3332, "step": 11140 }, { "epoch": 0.5526563817649685, "grad_norm": 3.8091962337493896, "learning_rate": 4.24687691773937e-06, "loss": 0.255, "step": 11141 }, { "epoch": 0.5527059874001686, "grad_norm": 27.75954818725586, "learning_rate": 4.246098816556897e-06, "loss": 0.3739, "step": 11142 }, { "epoch": 0.5527555930353688, "grad_norm": 7.383983612060547, "learning_rate": 4.245320734056436e-06, "loss": 0.2797, "step": 11143 }, { "epoch": 0.552805198670569, "grad_norm": 9.515812873840332, "learning_rate": 4.2445426702572665e-06, "loss": 0.2607, "step": 11144 }, { "epoch": 0.5528548043057692, "grad_norm": 10.619962692260742, "learning_rate": 4.243764625178674e-06, "loss": 0.3927, "step": 11145 }, { "epoch": 0.5529044099409693, "grad_norm": 4.01969575881958, "learning_rate": 4.242986598839935e-06, "loss": 0.2322, "step": 11146 }, { "epoch": 0.5529540155761694, "grad_norm": 6.91994571685791, "learning_rate": 4.24220859126033e-06, "loss": 0.3035, "step": 11147 }, { "epoch": 0.5530036212113696, "grad_norm": 14.114572525024414, "learning_rate": 4.24143060245914e-06, "loss": 0.4455, "step": 11148 }, { "epoch": 0.5530532268465698, "grad_norm": 6.731154441833496, "learning_rate": 4.240652632455643e-06, "loss": 0.3115, "step": 11149 }, { "epoch": 0.55310283248177, "grad_norm": 6.512929916381836, "learning_rate": 4.239874681269116e-06, "loss": 0.3289, "step": 11150 }, { "epoch": 0.5531524381169701, "grad_norm": 20.13624382019043, "learning_rate": 4.239096748918839e-06, "loss": 0.3395, "step": 11151 }, { "epoch": 0.5532020437521702, "grad_norm": 6.910833835601807, "learning_rate": 4.2383188354240895e-06, "loss": 0.3106, "step": 11152 }, { "epoch": 0.5532516493873704, "grad_norm": 11.549677848815918, "learning_rate": 4.237540940804141e-06, "loss": 0.204, "step": 11153 }, { "epoch": 0.5533012550225705, "grad_norm": 15.395511627197266, "learning_rate": 4.236763065078276e-06, "loss": 0.3973, "step": 11154 }, { "epoch": 0.5533508606577707, "grad_norm": 6.808315753936768, "learning_rate": 4.2359852082657645e-06, "loss": 0.3088, "step": 11155 }, { "epoch": 0.5534004662929709, "grad_norm": 5.458040714263916, "learning_rate": 4.235207370385888e-06, "loss": 0.2751, "step": 11156 }, { "epoch": 0.5534500719281711, "grad_norm": 6.430706024169922, "learning_rate": 4.2344295514579185e-06, "loss": 0.2972, "step": 11157 }, { "epoch": 0.5534996775633712, "grad_norm": 4.7319536209106445, "learning_rate": 4.233651751501129e-06, "loss": 0.1973, "step": 11158 }, { "epoch": 0.5535492831985713, "grad_norm": 6.610912799835205, "learning_rate": 4.232873970534798e-06, "loss": 0.312, "step": 11159 }, { "epoch": 0.5535988888337715, "grad_norm": 5.842156410217285, "learning_rate": 4.232096208578197e-06, "loss": 0.2665, "step": 11160 }, { "epoch": 0.5536484944689717, "grad_norm": 5.800335884094238, "learning_rate": 4.231318465650597e-06, "loss": 0.3906, "step": 11161 }, { "epoch": 0.5536981001041719, "grad_norm": 6.9182538986206055, "learning_rate": 4.230540741771275e-06, "loss": 0.3002, "step": 11162 }, { "epoch": 0.553747705739372, "grad_norm": 7.970698356628418, "learning_rate": 4.2297630369595e-06, "loss": 0.1814, "step": 11163 }, { "epoch": 0.5537973113745721, "grad_norm": 5.055023193359375, "learning_rate": 4.228985351234546e-06, "loss": 0.2583, "step": 11164 }, { "epoch": 0.5538469170097723, "grad_norm": 5.770730018615723, "learning_rate": 4.228207684615685e-06, "loss": 0.2982, "step": 11165 }, { "epoch": 0.5538965226449725, "grad_norm": 4.721622943878174, "learning_rate": 4.227430037122186e-06, "loss": 0.2113, "step": 11166 }, { "epoch": 0.5539461282801726, "grad_norm": 6.938100814819336, "learning_rate": 4.22665240877332e-06, "loss": 0.3893, "step": 11167 }, { "epoch": 0.5539957339153728, "grad_norm": 8.106532096862793, "learning_rate": 4.225874799588356e-06, "loss": 0.3139, "step": 11168 }, { "epoch": 0.5540453395505729, "grad_norm": 10.079482078552246, "learning_rate": 4.225097209586567e-06, "loss": 0.3322, "step": 11169 }, { "epoch": 0.5540949451857731, "grad_norm": 6.689660549163818, "learning_rate": 4.224319638787218e-06, "loss": 0.2035, "step": 11170 }, { "epoch": 0.5541445508209732, "grad_norm": 10.842501640319824, "learning_rate": 4.223542087209579e-06, "loss": 0.3151, "step": 11171 }, { "epoch": 0.5541941564561734, "grad_norm": 14.499110221862793, "learning_rate": 4.222764554872919e-06, "loss": 0.3879, "step": 11172 }, { "epoch": 0.5542437620913736, "grad_norm": 5.559630870819092, "learning_rate": 4.221987041796505e-06, "loss": 0.2281, "step": 11173 }, { "epoch": 0.5542933677265738, "grad_norm": 4.894851207733154, "learning_rate": 4.221209547999604e-06, "loss": 0.3147, "step": 11174 }, { "epoch": 0.5543429733617738, "grad_norm": 8.165075302124023, "learning_rate": 4.220432073501482e-06, "loss": 0.3606, "step": 11175 }, { "epoch": 0.554392578996974, "grad_norm": 11.05027961730957, "learning_rate": 4.219654618321406e-06, "loss": 0.3077, "step": 11176 }, { "epoch": 0.5544421846321742, "grad_norm": 5.207669258117676, "learning_rate": 4.218877182478643e-06, "loss": 0.1487, "step": 11177 }, { "epoch": 0.5544917902673744, "grad_norm": 5.161727428436279, "learning_rate": 4.218099765992455e-06, "loss": 0.305, "step": 11178 }, { "epoch": 0.5545413959025746, "grad_norm": 17.80870246887207, "learning_rate": 4.2173223688821095e-06, "loss": 0.3833, "step": 11179 }, { "epoch": 0.5545910015377747, "grad_norm": 5.94107723236084, "learning_rate": 4.21654499116687e-06, "loss": 0.2755, "step": 11180 }, { "epoch": 0.5546406071729748, "grad_norm": 4.226952075958252, "learning_rate": 4.215767632865998e-06, "loss": 0.1327, "step": 11181 }, { "epoch": 0.554690212808175, "grad_norm": 4.593118667602539, "learning_rate": 4.214990293998761e-06, "loss": 0.2205, "step": 11182 }, { "epoch": 0.5547398184433752, "grad_norm": 7.917368412017822, "learning_rate": 4.21421297458442e-06, "loss": 0.2453, "step": 11183 }, { "epoch": 0.5547894240785753, "grad_norm": 8.716678619384766, "learning_rate": 4.2134356746422346e-06, "loss": 0.2765, "step": 11184 }, { "epoch": 0.5548390297137755, "grad_norm": 14.089031219482422, "learning_rate": 4.212658394191471e-06, "loss": 0.4344, "step": 11185 }, { "epoch": 0.5548886353489756, "grad_norm": 5.974073886871338, "learning_rate": 4.211881133251388e-06, "loss": 0.2465, "step": 11186 }, { "epoch": 0.5549382409841758, "grad_norm": 3.8379523754119873, "learning_rate": 4.211103891841245e-06, "loss": 0.1806, "step": 11187 }, { "epoch": 0.554987846619376, "grad_norm": 6.5653839111328125, "learning_rate": 4.210326669980308e-06, "loss": 0.2506, "step": 11188 }, { "epoch": 0.5550374522545761, "grad_norm": 7.454394340515137, "learning_rate": 4.2095494676878315e-06, "loss": 0.2306, "step": 11189 }, { "epoch": 0.5550870578897763, "grad_norm": 6.835746765136719, "learning_rate": 4.208772284983076e-06, "loss": 0.3394, "step": 11190 }, { "epoch": 0.5551366635249765, "grad_norm": 8.769814491271973, "learning_rate": 4.2079951218853025e-06, "loss": 0.2936, "step": 11191 }, { "epoch": 0.5551862691601765, "grad_norm": 10.956603050231934, "learning_rate": 4.207217978413765e-06, "loss": 0.3841, "step": 11192 }, { "epoch": 0.5552358747953767, "grad_norm": 6.525020122528076, "learning_rate": 4.2064408545877285e-06, "loss": 0.3475, "step": 11193 }, { "epoch": 0.5552854804305769, "grad_norm": 7.759938716888428, "learning_rate": 4.205663750426447e-06, "loss": 0.2746, "step": 11194 }, { "epoch": 0.5553350860657771, "grad_norm": 8.937170028686523, "learning_rate": 4.204886665949174e-06, "loss": 0.1913, "step": 11195 }, { "epoch": 0.5553846917009773, "grad_norm": 8.199328422546387, "learning_rate": 4.204109601175171e-06, "loss": 0.346, "step": 11196 }, { "epoch": 0.5554342973361774, "grad_norm": 4.764952182769775, "learning_rate": 4.203332556123692e-06, "loss": 0.2394, "step": 11197 }, { "epoch": 0.5554839029713775, "grad_norm": 8.193346977233887, "learning_rate": 4.202555530813991e-06, "loss": 0.3639, "step": 11198 }, { "epoch": 0.5555335086065777, "grad_norm": 11.577363014221191, "learning_rate": 4.201778525265326e-06, "loss": 0.4418, "step": 11199 }, { "epoch": 0.5555831142417779, "grad_norm": 7.844071388244629, "learning_rate": 4.2010015394969515e-06, "loss": 0.311, "step": 11200 }, { "epoch": 0.555632719876978, "grad_norm": 7.24234676361084, "learning_rate": 4.2002245735281176e-06, "loss": 0.3615, "step": 11201 }, { "epoch": 0.5556823255121782, "grad_norm": 7.135445594787598, "learning_rate": 4.1994476273780825e-06, "loss": 0.3565, "step": 11202 }, { "epoch": 0.5557319311473783, "grad_norm": 15.216772079467773, "learning_rate": 4.198670701066098e-06, "loss": 0.6308, "step": 11203 }, { "epoch": 0.5557815367825785, "grad_norm": 6.866748809814453, "learning_rate": 4.197893794611413e-06, "loss": 0.3501, "step": 11204 }, { "epoch": 0.5558311424177786, "grad_norm": 8.765559196472168, "learning_rate": 4.197116908033285e-06, "loss": 0.2587, "step": 11205 }, { "epoch": 0.5558807480529788, "grad_norm": 8.818377494812012, "learning_rate": 4.196340041350963e-06, "loss": 0.2768, "step": 11206 }, { "epoch": 0.555930353688179, "grad_norm": 7.178250312805176, "learning_rate": 4.195563194583696e-06, "loss": 0.2964, "step": 11207 }, { "epoch": 0.5559799593233792, "grad_norm": 6.223727703094482, "learning_rate": 4.194786367750739e-06, "loss": 0.2853, "step": 11208 }, { "epoch": 0.5560295649585792, "grad_norm": 7.3742756843566895, "learning_rate": 4.1940095608713385e-06, "loss": 0.2949, "step": 11209 }, { "epoch": 0.5560791705937794, "grad_norm": 5.107048034667969, "learning_rate": 4.193232773964749e-06, "loss": 0.1738, "step": 11210 }, { "epoch": 0.5561287762289796, "grad_norm": 8.636017799377441, "learning_rate": 4.192456007050214e-06, "loss": 0.3003, "step": 11211 }, { "epoch": 0.5561783818641798, "grad_norm": 4.676325798034668, "learning_rate": 4.191679260146984e-06, "loss": 0.302, "step": 11212 }, { "epoch": 0.55622798749938, "grad_norm": 7.180301189422607, "learning_rate": 4.190902533274308e-06, "loss": 0.2379, "step": 11213 }, { "epoch": 0.5562775931345801, "grad_norm": 17.27634620666504, "learning_rate": 4.190125826451434e-06, "loss": 0.4077, "step": 11214 }, { "epoch": 0.5563271987697802, "grad_norm": 8.282910346984863, "learning_rate": 4.189349139697607e-06, "loss": 0.3892, "step": 11215 }, { "epoch": 0.5563768044049804, "grad_norm": 13.177501678466797, "learning_rate": 4.188572473032077e-06, "loss": 0.4376, "step": 11216 }, { "epoch": 0.5564264100401806, "grad_norm": 6.40566349029541, "learning_rate": 4.187795826474086e-06, "loss": 0.1904, "step": 11217 }, { "epoch": 0.5564760156753807, "grad_norm": 4.437664031982422, "learning_rate": 4.187019200042883e-06, "loss": 0.2997, "step": 11218 }, { "epoch": 0.5565256213105809, "grad_norm": 8.593194961547852, "learning_rate": 4.186242593757712e-06, "loss": 0.3603, "step": 11219 }, { "epoch": 0.556575226945781, "grad_norm": 4.005207538604736, "learning_rate": 4.1854660076378175e-06, "loss": 0.1981, "step": 11220 }, { "epoch": 0.5566248325809812, "grad_norm": 9.254968643188477, "learning_rate": 4.184689441702443e-06, "loss": 0.355, "step": 11221 }, { "epoch": 0.5566744382161813, "grad_norm": 8.070926666259766, "learning_rate": 4.183912895970835e-06, "loss": 0.188, "step": 11222 }, { "epoch": 0.5567240438513815, "grad_norm": 6.3075852394104, "learning_rate": 4.183136370462232e-06, "loss": 0.3221, "step": 11223 }, { "epoch": 0.5567736494865817, "grad_norm": 4.8291015625, "learning_rate": 4.182359865195881e-06, "loss": 0.2905, "step": 11224 }, { "epoch": 0.5568232551217819, "grad_norm": 8.531338691711426, "learning_rate": 4.181583380191021e-06, "loss": 0.2973, "step": 11225 }, { "epoch": 0.556872860756982, "grad_norm": 7.656320571899414, "learning_rate": 4.180806915466896e-06, "loss": 0.259, "step": 11226 }, { "epoch": 0.5569224663921821, "grad_norm": 5.174907684326172, "learning_rate": 4.180030471042747e-06, "loss": 0.3061, "step": 11227 }, { "epoch": 0.5569720720273823, "grad_norm": 19.135765075683594, "learning_rate": 4.179254046937812e-06, "loss": 0.4209, "step": 11228 }, { "epoch": 0.5570216776625825, "grad_norm": 13.57231330871582, "learning_rate": 4.178477643171334e-06, "loss": 0.4294, "step": 11229 }, { "epoch": 0.5570712832977827, "grad_norm": 5.155337333679199, "learning_rate": 4.177701259762551e-06, "loss": 0.3365, "step": 11230 }, { "epoch": 0.5571208889329828, "grad_norm": 5.082943916320801, "learning_rate": 4.176924896730704e-06, "loss": 0.2538, "step": 11231 }, { "epoch": 0.5571704945681829, "grad_norm": 6.404871463775635, "learning_rate": 4.17614855409503e-06, "loss": 0.3491, "step": 11232 }, { "epoch": 0.5572201002033831, "grad_norm": 3.9562253952026367, "learning_rate": 4.175372231874767e-06, "loss": 0.2405, "step": 11233 }, { "epoch": 0.5572697058385833, "grad_norm": 10.891340255737305, "learning_rate": 4.174595930089154e-06, "loss": 0.2794, "step": 11234 }, { "epoch": 0.5573193114737834, "grad_norm": 9.817455291748047, "learning_rate": 4.173819648757424e-06, "loss": 0.3022, "step": 11235 }, { "epoch": 0.5573689171089836, "grad_norm": 5.981777667999268, "learning_rate": 4.173043387898821e-06, "loss": 0.2646, "step": 11236 }, { "epoch": 0.5574185227441837, "grad_norm": 8.34392261505127, "learning_rate": 4.172267147532576e-06, "loss": 0.2957, "step": 11237 }, { "epoch": 0.5574681283793839, "grad_norm": 4.63228702545166, "learning_rate": 4.171490927677923e-06, "loss": 0.2712, "step": 11238 }, { "epoch": 0.557517734014584, "grad_norm": 8.970864295959473, "learning_rate": 4.170714728354103e-06, "loss": 0.4181, "step": 11239 }, { "epoch": 0.5575673396497842, "grad_norm": 6.050970554351807, "learning_rate": 4.169938549580346e-06, "loss": 0.28, "step": 11240 }, { "epoch": 0.5576169452849844, "grad_norm": 8.741495132446289, "learning_rate": 4.169162391375885e-06, "loss": 0.2097, "step": 11241 }, { "epoch": 0.5576665509201846, "grad_norm": 4.480506420135498, "learning_rate": 4.1683862537599586e-06, "loss": 0.2693, "step": 11242 }, { "epoch": 0.5577161565553846, "grad_norm": 5.426706790924072, "learning_rate": 4.167610136751794e-06, "loss": 0.3145, "step": 11243 }, { "epoch": 0.5577657621905848, "grad_norm": 7.936056137084961, "learning_rate": 4.16683404037063e-06, "loss": 0.3524, "step": 11244 }, { "epoch": 0.557815367825785, "grad_norm": 5.857827663421631, "learning_rate": 4.166057964635695e-06, "loss": 0.2889, "step": 11245 }, { "epoch": 0.5578649734609852, "grad_norm": 5.235998630523682, "learning_rate": 4.165281909566218e-06, "loss": 0.3436, "step": 11246 }, { "epoch": 0.5579145790961854, "grad_norm": 7.124301910400391, "learning_rate": 4.164505875181435e-06, "loss": 0.3166, "step": 11247 }, { "epoch": 0.5579641847313855, "grad_norm": 10.993990898132324, "learning_rate": 4.1637298615005756e-06, "loss": 0.4283, "step": 11248 }, { "epoch": 0.5580137903665856, "grad_norm": 5.429108619689941, "learning_rate": 4.162953868542866e-06, "loss": 0.231, "step": 11249 }, { "epoch": 0.5580633960017858, "grad_norm": 4.160581588745117, "learning_rate": 4.162177896327539e-06, "loss": 0.3266, "step": 11250 }, { "epoch": 0.558113001636986, "grad_norm": 5.1007399559021, "learning_rate": 4.161401944873824e-06, "loss": 0.1962, "step": 11251 }, { "epoch": 0.5581626072721861, "grad_norm": 8.445093154907227, "learning_rate": 4.160626014200946e-06, "loss": 0.2623, "step": 11252 }, { "epoch": 0.5582122129073863, "grad_norm": 4.04754114151001, "learning_rate": 4.159850104328137e-06, "loss": 0.2525, "step": 11253 }, { "epoch": 0.5582618185425864, "grad_norm": 4.833948612213135, "learning_rate": 4.159074215274622e-06, "loss": 0.3411, "step": 11254 }, { "epoch": 0.5583114241777866, "grad_norm": 6.681700229644775, "learning_rate": 4.1582983470596266e-06, "loss": 0.363, "step": 11255 }, { "epoch": 0.5583610298129867, "grad_norm": 11.122176170349121, "learning_rate": 4.157522499702381e-06, "loss": 0.3336, "step": 11256 }, { "epoch": 0.5584106354481869, "grad_norm": 5.060137748718262, "learning_rate": 4.156746673222109e-06, "loss": 0.1749, "step": 11257 }, { "epoch": 0.5584602410833871, "grad_norm": 7.152345180511475, "learning_rate": 4.1559708676380335e-06, "loss": 0.2337, "step": 11258 }, { "epoch": 0.5585098467185873, "grad_norm": 12.772370338439941, "learning_rate": 4.1551950829693845e-06, "loss": 0.3325, "step": 11259 }, { "epoch": 0.5585594523537873, "grad_norm": 11.436156272888184, "learning_rate": 4.154419319235383e-06, "loss": 0.2803, "step": 11260 }, { "epoch": 0.5586090579889875, "grad_norm": 6.764664649963379, "learning_rate": 4.153643576455251e-06, "loss": 0.311, "step": 11261 }, { "epoch": 0.5586586636241877, "grad_norm": 5.956857681274414, "learning_rate": 4.152867854648216e-06, "loss": 0.3589, "step": 11262 }, { "epoch": 0.5587082692593879, "grad_norm": 5.688439846038818, "learning_rate": 4.152092153833497e-06, "loss": 0.3429, "step": 11263 }, { "epoch": 0.5587578748945881, "grad_norm": 7.105591297149658, "learning_rate": 4.15131647403032e-06, "loss": 0.2205, "step": 11264 }, { "epoch": 0.5588074805297882, "grad_norm": 5.079868316650391, "learning_rate": 4.1505408152579044e-06, "loss": 0.2691, "step": 11265 }, { "epoch": 0.5588570861649883, "grad_norm": 5.675685405731201, "learning_rate": 4.1497651775354695e-06, "loss": 0.2644, "step": 11266 }, { "epoch": 0.5589066918001885, "grad_norm": 5.3329362869262695, "learning_rate": 4.148989560882241e-06, "loss": 0.3506, "step": 11267 }, { "epoch": 0.5589562974353887, "grad_norm": 8.428781509399414, "learning_rate": 4.148213965317436e-06, "loss": 0.3107, "step": 11268 }, { "epoch": 0.5590059030705888, "grad_norm": 7.608543872833252, "learning_rate": 4.147438390860272e-06, "loss": 0.3007, "step": 11269 }, { "epoch": 0.559055508705789, "grad_norm": 6.317837715148926, "learning_rate": 4.146662837529972e-06, "loss": 0.3301, "step": 11270 }, { "epoch": 0.5591051143409891, "grad_norm": 5.290679454803467, "learning_rate": 4.1458873053457525e-06, "loss": 0.2577, "step": 11271 }, { "epoch": 0.5591547199761893, "grad_norm": 4.199256896972656, "learning_rate": 4.145111794326831e-06, "loss": 0.2957, "step": 11272 }, { "epoch": 0.5592043256113894, "grad_norm": 4.8304643630981445, "learning_rate": 4.1443363044924275e-06, "loss": 0.3126, "step": 11273 }, { "epoch": 0.5592539312465896, "grad_norm": 18.52402687072754, "learning_rate": 4.143560835861756e-06, "loss": 0.3646, "step": 11274 }, { "epoch": 0.5593035368817898, "grad_norm": 6.625092029571533, "learning_rate": 4.142785388454035e-06, "loss": 0.2658, "step": 11275 }, { "epoch": 0.55935314251699, "grad_norm": 5.133432865142822, "learning_rate": 4.142009962288481e-06, "loss": 0.2401, "step": 11276 }, { "epoch": 0.55940274815219, "grad_norm": 3.2677571773529053, "learning_rate": 4.141234557384307e-06, "loss": 0.1847, "step": 11277 }, { "epoch": 0.5594523537873902, "grad_norm": 4.101136207580566, "learning_rate": 4.140459173760728e-06, "loss": 0.2883, "step": 11278 }, { "epoch": 0.5595019594225904, "grad_norm": 6.6038408279418945, "learning_rate": 4.139683811436962e-06, "loss": 0.289, "step": 11279 }, { "epoch": 0.5595515650577906, "grad_norm": 14.136503219604492, "learning_rate": 4.138908470432218e-06, "loss": 0.1797, "step": 11280 }, { "epoch": 0.5596011706929908, "grad_norm": 6.6932783126831055, "learning_rate": 4.138133150765713e-06, "loss": 0.3762, "step": 11281 }, { "epoch": 0.5596507763281909, "grad_norm": 4.563506126403809, "learning_rate": 4.137357852456658e-06, "loss": 0.2592, "step": 11282 }, { "epoch": 0.559700381963391, "grad_norm": 6.930047035217285, "learning_rate": 4.136582575524266e-06, "loss": 0.3937, "step": 11283 }, { "epoch": 0.5597499875985912, "grad_norm": 10.611886978149414, "learning_rate": 4.135807319987747e-06, "loss": 0.3644, "step": 11284 }, { "epoch": 0.5597995932337914, "grad_norm": 5.46597146987915, "learning_rate": 4.135032085866316e-06, "loss": 0.3102, "step": 11285 }, { "epoch": 0.5598491988689915, "grad_norm": 8.5311918258667, "learning_rate": 4.13425687317918e-06, "loss": 0.2542, "step": 11286 }, { "epoch": 0.5598988045041917, "grad_norm": 16.35192108154297, "learning_rate": 4.133481681945549e-06, "loss": 0.3194, "step": 11287 }, { "epoch": 0.5599484101393918, "grad_norm": 8.131421089172363, "learning_rate": 4.132706512184636e-06, "loss": 0.3052, "step": 11288 }, { "epoch": 0.559998015774592, "grad_norm": 9.84302043914795, "learning_rate": 4.131931363915646e-06, "loss": 0.3826, "step": 11289 }, { "epoch": 0.5600476214097921, "grad_norm": 10.947486877441406, "learning_rate": 4.13115623715779e-06, "loss": 0.3644, "step": 11290 }, { "epoch": 0.5600972270449923, "grad_norm": 10.535238265991211, "learning_rate": 4.130381131930277e-06, "loss": 0.3838, "step": 11291 }, { "epoch": 0.5601468326801925, "grad_norm": 6.056267261505127, "learning_rate": 4.129606048252311e-06, "loss": 0.3044, "step": 11292 }, { "epoch": 0.5601964383153927, "grad_norm": 18.165807723999023, "learning_rate": 4.128830986143102e-06, "loss": 0.3946, "step": 11293 }, { "epoch": 0.5602460439505927, "grad_norm": 5.919218063354492, "learning_rate": 4.128055945621856e-06, "loss": 0.2364, "step": 11294 }, { "epoch": 0.5602956495857929, "grad_norm": 5.608696460723877, "learning_rate": 4.1272809267077756e-06, "loss": 0.291, "step": 11295 }, { "epoch": 0.5603452552209931, "grad_norm": 10.987523078918457, "learning_rate": 4.126505929420071e-06, "loss": 0.3749, "step": 11296 }, { "epoch": 0.5603948608561933, "grad_norm": 7.3489460945129395, "learning_rate": 4.125730953777942e-06, "loss": 0.3867, "step": 11297 }, { "epoch": 0.5604444664913935, "grad_norm": 7.609782695770264, "learning_rate": 4.124955999800597e-06, "loss": 0.2539, "step": 11298 }, { "epoch": 0.5604940721265936, "grad_norm": 7.924513339996338, "learning_rate": 4.12418106750724e-06, "loss": 0.386, "step": 11299 }, { "epoch": 0.5605436777617937, "grad_norm": 7.826423168182373, "learning_rate": 4.123406156917068e-06, "loss": 0.2654, "step": 11300 }, { "epoch": 0.5605932833969939, "grad_norm": 4.527715682983398, "learning_rate": 4.122631268049292e-06, "loss": 0.2435, "step": 11301 }, { "epoch": 0.5606428890321941, "grad_norm": 8.45197582244873, "learning_rate": 4.12185640092311e-06, "loss": 0.2491, "step": 11302 }, { "epoch": 0.5606924946673942, "grad_norm": 11.616361618041992, "learning_rate": 4.12108155555772e-06, "loss": 0.3214, "step": 11303 }, { "epoch": 0.5607421003025944, "grad_norm": 6.455585479736328, "learning_rate": 4.12030673197233e-06, "loss": 0.2792, "step": 11304 }, { "epoch": 0.5607917059377945, "grad_norm": 6.714523792266846, "learning_rate": 4.119531930186136e-06, "loss": 0.2403, "step": 11305 }, { "epoch": 0.5608413115729947, "grad_norm": 7.221638202667236, "learning_rate": 4.118757150218338e-06, "loss": 0.2948, "step": 11306 }, { "epoch": 0.5608909172081948, "grad_norm": 10.385945320129395, "learning_rate": 4.117982392088138e-06, "loss": 0.2762, "step": 11307 }, { "epoch": 0.560940522843395, "grad_norm": 11.117990493774414, "learning_rate": 4.1172076558147335e-06, "loss": 0.3997, "step": 11308 }, { "epoch": 0.5609901284785952, "grad_norm": 5.150323867797852, "learning_rate": 4.11643294141732e-06, "loss": 0.2663, "step": 11309 }, { "epoch": 0.5610397341137954, "grad_norm": 5.193942070007324, "learning_rate": 4.1156582489151e-06, "loss": 0.2292, "step": 11310 }, { "epoch": 0.5610893397489954, "grad_norm": 5.616779327392578, "learning_rate": 4.1148835783272695e-06, "loss": 0.2769, "step": 11311 }, { "epoch": 0.5611389453841956, "grad_norm": 7.158596515655518, "learning_rate": 4.114108929673022e-06, "loss": 0.2138, "step": 11312 }, { "epoch": 0.5611885510193958, "grad_norm": 4.5255303382873535, "learning_rate": 4.113334302971558e-06, "loss": 0.3644, "step": 11313 }, { "epoch": 0.561238156654596, "grad_norm": 5.212838172912598, "learning_rate": 4.1125596982420705e-06, "loss": 0.1819, "step": 11314 }, { "epoch": 0.5612877622897962, "grad_norm": 3.971644878387451, "learning_rate": 4.111785115503754e-06, "loss": 0.1953, "step": 11315 }, { "epoch": 0.5613373679249963, "grad_norm": 9.28511905670166, "learning_rate": 4.111010554775805e-06, "loss": 0.3905, "step": 11316 }, { "epoch": 0.5613869735601964, "grad_norm": 9.580215454101562, "learning_rate": 4.110236016077415e-06, "loss": 0.3807, "step": 11317 }, { "epoch": 0.5614365791953966, "grad_norm": 9.63843059539795, "learning_rate": 4.109461499427782e-06, "loss": 0.3743, "step": 11318 }, { "epoch": 0.5614861848305968, "grad_norm": 4.720014572143555, "learning_rate": 4.1086870048460945e-06, "loss": 0.2566, "step": 11319 }, { "epoch": 0.5615357904657969, "grad_norm": 5.511103630065918, "learning_rate": 4.107912532351544e-06, "loss": 0.3103, "step": 11320 }, { "epoch": 0.5615853961009971, "grad_norm": 7.306469440460205, "learning_rate": 4.107138081963328e-06, "loss": 0.275, "step": 11321 }, { "epoch": 0.5616350017361972, "grad_norm": 3.826725721359253, "learning_rate": 4.106363653700634e-06, "loss": 0.1904, "step": 11322 }, { "epoch": 0.5616846073713974, "grad_norm": 7.623654842376709, "learning_rate": 4.10558924758265e-06, "loss": 0.293, "step": 11323 }, { "epoch": 0.5617342130065975, "grad_norm": 5.367191791534424, "learning_rate": 4.104814863628572e-06, "loss": 0.3521, "step": 11324 }, { "epoch": 0.5617838186417977, "grad_norm": 5.011760234832764, "learning_rate": 4.104040501857585e-06, "loss": 0.2631, "step": 11325 }, { "epoch": 0.5618334242769979, "grad_norm": 4.1980743408203125, "learning_rate": 4.103266162288878e-06, "loss": 0.1737, "step": 11326 }, { "epoch": 0.5618830299121981, "grad_norm": 15.986410140991211, "learning_rate": 4.102491844941644e-06, "loss": 0.5875, "step": 11327 }, { "epoch": 0.5619326355473981, "grad_norm": 5.159412384033203, "learning_rate": 4.101717549835067e-06, "loss": 0.2214, "step": 11328 }, { "epoch": 0.5619822411825983, "grad_norm": 10.060382843017578, "learning_rate": 4.100943276988335e-06, "loss": 0.2014, "step": 11329 }, { "epoch": 0.5620318468177985, "grad_norm": 5.948298454284668, "learning_rate": 4.100169026420637e-06, "loss": 0.3057, "step": 11330 }, { "epoch": 0.5620814524529987, "grad_norm": 5.697675704956055, "learning_rate": 4.0993947981511565e-06, "loss": 0.311, "step": 11331 }, { "epoch": 0.5621310580881989, "grad_norm": 9.83688735961914, "learning_rate": 4.098620592199079e-06, "loss": 0.2557, "step": 11332 }, { "epoch": 0.562180663723399, "grad_norm": 10.025067329406738, "learning_rate": 4.097846408583592e-06, "loss": 0.3644, "step": 11333 }, { "epoch": 0.5622302693585991, "grad_norm": 8.468324661254883, "learning_rate": 4.097072247323879e-06, "loss": 0.3763, "step": 11334 }, { "epoch": 0.5622798749937993, "grad_norm": 7.036801815032959, "learning_rate": 4.096298108439125e-06, "loss": 0.2318, "step": 11335 }, { "epoch": 0.5623294806289995, "grad_norm": 12.678126335144043, "learning_rate": 4.095523991948512e-06, "loss": 0.3449, "step": 11336 }, { "epoch": 0.5623790862641996, "grad_norm": 6.9363884925842285, "learning_rate": 4.094749897871223e-06, "loss": 0.3947, "step": 11337 }, { "epoch": 0.5624286918993998, "grad_norm": 7.738152980804443, "learning_rate": 4.093975826226442e-06, "loss": 0.3647, "step": 11338 }, { "epoch": 0.5624782975345999, "grad_norm": 8.012369155883789, "learning_rate": 4.09320177703335e-06, "loss": 0.346, "step": 11339 }, { "epoch": 0.5625279031698001, "grad_norm": 6.977533340454102, "learning_rate": 4.092427750311128e-06, "loss": 0.3215, "step": 11340 }, { "epoch": 0.5625775088050002, "grad_norm": 11.79633903503418, "learning_rate": 4.091653746078956e-06, "loss": 0.2534, "step": 11341 }, { "epoch": 0.5626271144402004, "grad_norm": 4.33922815322876, "learning_rate": 4.090879764356016e-06, "loss": 0.2631, "step": 11342 }, { "epoch": 0.5626767200754006, "grad_norm": 6.870533466339111, "learning_rate": 4.090105805161486e-06, "loss": 0.2297, "step": 11343 }, { "epoch": 0.5627263257106008, "grad_norm": 6.274921894073486, "learning_rate": 4.089331868514546e-06, "loss": 0.3757, "step": 11344 }, { "epoch": 0.5627759313458008, "grad_norm": 5.217308044433594, "learning_rate": 4.088557954434375e-06, "loss": 0.1967, "step": 11345 }, { "epoch": 0.562825536981001, "grad_norm": 5.484460830688477, "learning_rate": 4.087784062940149e-06, "loss": 0.2199, "step": 11346 }, { "epoch": 0.5628751426162012, "grad_norm": 5.418939590454102, "learning_rate": 4.087010194051048e-06, "loss": 0.3427, "step": 11347 }, { "epoch": 0.5629247482514014, "grad_norm": 4.979115009307861, "learning_rate": 4.086236347786246e-06, "loss": 0.2203, "step": 11348 }, { "epoch": 0.5629743538866016, "grad_norm": 11.981547355651855, "learning_rate": 4.085462524164921e-06, "loss": 0.3426, "step": 11349 }, { "epoch": 0.5630239595218017, "grad_norm": 7.52151346206665, "learning_rate": 4.084688723206248e-06, "loss": 0.3469, "step": 11350 }, { "epoch": 0.5630735651570018, "grad_norm": 6.617217540740967, "learning_rate": 4.083914944929402e-06, "loss": 0.3783, "step": 11351 }, { "epoch": 0.563123170792202, "grad_norm": 7.700706958770752, "learning_rate": 4.08314118935356e-06, "loss": 0.2779, "step": 11352 }, { "epoch": 0.5631727764274022, "grad_norm": 10.264089584350586, "learning_rate": 4.082367456497894e-06, "loss": 0.3882, "step": 11353 }, { "epoch": 0.5632223820626023, "grad_norm": 6.7062458992004395, "learning_rate": 4.081593746381575e-06, "loss": 0.2737, "step": 11354 }, { "epoch": 0.5632719876978025, "grad_norm": 8.304418563842773, "learning_rate": 4.0808200590237804e-06, "loss": 0.3909, "step": 11355 }, { "epoch": 0.5633215933330026, "grad_norm": 4.98399543762207, "learning_rate": 4.08004639444368e-06, "loss": 0.2779, "step": 11356 }, { "epoch": 0.5633711989682028, "grad_norm": 3.611935615539551, "learning_rate": 4.0792727526604455e-06, "loss": 0.2568, "step": 11357 }, { "epoch": 0.5634208046034029, "grad_norm": 4.475366592407227, "learning_rate": 4.07849913369325e-06, "loss": 0.3187, "step": 11358 }, { "epoch": 0.5634704102386031, "grad_norm": 5.074863910675049, "learning_rate": 4.077725537561263e-06, "loss": 0.3641, "step": 11359 }, { "epoch": 0.5635200158738033, "grad_norm": 9.48813247680664, "learning_rate": 4.076951964283653e-06, "loss": 0.4557, "step": 11360 }, { "epoch": 0.5635696215090035, "grad_norm": 9.990453720092773, "learning_rate": 4.076178413879592e-06, "loss": 0.1919, "step": 11361 }, { "epoch": 0.5636192271442035, "grad_norm": 5.051096439361572, "learning_rate": 4.075404886368248e-06, "loss": 0.2407, "step": 11362 }, { "epoch": 0.5636688327794037, "grad_norm": 6.176168441772461, "learning_rate": 4.074631381768787e-06, "loss": 0.2975, "step": 11363 }, { "epoch": 0.5637184384146039, "grad_norm": 4.702520370483398, "learning_rate": 4.0738579001003804e-06, "loss": 0.2613, "step": 11364 }, { "epoch": 0.5637680440498041, "grad_norm": 6.592056751251221, "learning_rate": 4.073084441382195e-06, "loss": 0.2475, "step": 11365 }, { "epoch": 0.5638176496850043, "grad_norm": 5.0294060707092285, "learning_rate": 4.072311005633393e-06, "loss": 0.2353, "step": 11366 }, { "epoch": 0.5638672553202044, "grad_norm": 13.525528907775879, "learning_rate": 4.071537592873148e-06, "loss": 0.2911, "step": 11367 }, { "epoch": 0.5639168609554045, "grad_norm": 16.348796844482422, "learning_rate": 4.070764203120618e-06, "loss": 0.4883, "step": 11368 }, { "epoch": 0.5639664665906047, "grad_norm": 8.638710975646973, "learning_rate": 4.069990836394973e-06, "loss": 0.2705, "step": 11369 }, { "epoch": 0.5640160722258049, "grad_norm": 4.6225810050964355, "learning_rate": 4.069217492715377e-06, "loss": 0.264, "step": 11370 }, { "epoch": 0.564065677861005, "grad_norm": 5.2621660232543945, "learning_rate": 4.0684441721009905e-06, "loss": 0.265, "step": 11371 }, { "epoch": 0.5641152834962052, "grad_norm": 7.844603061676025, "learning_rate": 4.067670874570981e-06, "loss": 0.3737, "step": 11372 }, { "epoch": 0.5641648891314053, "grad_norm": 8.568170547485352, "learning_rate": 4.066897600144508e-06, "loss": 0.3807, "step": 11373 }, { "epoch": 0.5642144947666055, "grad_norm": 6.807919025421143, "learning_rate": 4.0661243488407324e-06, "loss": 0.2983, "step": 11374 }, { "epoch": 0.5642641004018056, "grad_norm": 6.061502456665039, "learning_rate": 4.0653511206788205e-06, "loss": 0.3241, "step": 11375 }, { "epoch": 0.5643137060370058, "grad_norm": 4.333796501159668, "learning_rate": 4.064577915677931e-06, "loss": 0.2079, "step": 11376 }, { "epoch": 0.564363311672206, "grad_norm": 7.2612481117248535, "learning_rate": 4.063804733857221e-06, "loss": 0.2746, "step": 11377 }, { "epoch": 0.5644129173074062, "grad_norm": 4.021237373352051, "learning_rate": 4.063031575235856e-06, "loss": 0.2959, "step": 11378 }, { "epoch": 0.5644625229426062, "grad_norm": 4.121581554412842, "learning_rate": 4.0622584398329914e-06, "loss": 0.2231, "step": 11379 }, { "epoch": 0.5645121285778064, "grad_norm": 7.135910987854004, "learning_rate": 4.061485327667785e-06, "loss": 0.1944, "step": 11380 }, { "epoch": 0.5645617342130066, "grad_norm": 5.073259353637695, "learning_rate": 4.0607122387594e-06, "loss": 0.3405, "step": 11381 }, { "epoch": 0.5646113398482068, "grad_norm": 4.868438243865967, "learning_rate": 4.0599391731269884e-06, "loss": 0.2544, "step": 11382 }, { "epoch": 0.564660945483407, "grad_norm": 5.059095859527588, "learning_rate": 4.05916613078971e-06, "loss": 0.2441, "step": 11383 }, { "epoch": 0.5647105511186071, "grad_norm": 10.716753005981445, "learning_rate": 4.058393111766721e-06, "loss": 0.3427, "step": 11384 }, { "epoch": 0.5647601567538072, "grad_norm": 5.973891735076904, "learning_rate": 4.057620116077175e-06, "loss": 0.3531, "step": 11385 }, { "epoch": 0.5648097623890074, "grad_norm": 8.983691215515137, "learning_rate": 4.05684714374023e-06, "loss": 0.3443, "step": 11386 }, { "epoch": 0.5648593680242076, "grad_norm": 5.404172420501709, "learning_rate": 4.05607419477504e-06, "loss": 0.2528, "step": 11387 }, { "epoch": 0.5649089736594077, "grad_norm": 4.678372383117676, "learning_rate": 4.055301269200756e-06, "loss": 0.2288, "step": 11388 }, { "epoch": 0.5649585792946079, "grad_norm": 11.355387687683105, "learning_rate": 4.054528367036537e-06, "loss": 0.2518, "step": 11389 }, { "epoch": 0.565008184929808, "grad_norm": 7.634647369384766, "learning_rate": 4.053755488301531e-06, "loss": 0.3302, "step": 11390 }, { "epoch": 0.5650577905650082, "grad_norm": 6.8792853355407715, "learning_rate": 4.0529826330148925e-06, "loss": 0.351, "step": 11391 }, { "epoch": 0.5651073962002083, "grad_norm": 6.882616996765137, "learning_rate": 4.052209801195772e-06, "loss": 0.2318, "step": 11392 }, { "epoch": 0.5651570018354085, "grad_norm": 4.179417133331299, "learning_rate": 4.0514369928633225e-06, "loss": 0.2326, "step": 11393 }, { "epoch": 0.5652066074706087, "grad_norm": 9.854165077209473, "learning_rate": 4.050664208036692e-06, "loss": 0.2838, "step": 11394 }, { "epoch": 0.5652562131058089, "grad_norm": 4.061609745025635, "learning_rate": 4.049891446735033e-06, "loss": 0.2298, "step": 11395 }, { "epoch": 0.5653058187410089, "grad_norm": 6.279567718505859, "learning_rate": 4.049118708977494e-06, "loss": 0.352, "step": 11396 }, { "epoch": 0.5653554243762091, "grad_norm": 5.4404616355896, "learning_rate": 4.048345994783222e-06, "loss": 0.3536, "step": 11397 }, { "epoch": 0.5654050300114093, "grad_norm": 8.90314769744873, "learning_rate": 4.047573304171368e-06, "loss": 0.4805, "step": 11398 }, { "epoch": 0.5654546356466095, "grad_norm": 6.786163330078125, "learning_rate": 4.046800637161079e-06, "loss": 0.2841, "step": 11399 }, { "epoch": 0.5655042412818096, "grad_norm": 8.095514297485352, "learning_rate": 4.0460279937715e-06, "loss": 0.3795, "step": 11400 }, { "epoch": 0.5655538469170097, "grad_norm": 6.361336708068848, "learning_rate": 4.045255374021779e-06, "loss": 0.2518, "step": 11401 }, { "epoch": 0.5656034525522099, "grad_norm": 6.134932518005371, "learning_rate": 4.044482777931063e-06, "loss": 0.2077, "step": 11402 }, { "epoch": 0.5656530581874101, "grad_norm": 5.930050849914551, "learning_rate": 4.043710205518494e-06, "loss": 0.4073, "step": 11403 }, { "epoch": 0.5657026638226103, "grad_norm": 9.377429008483887, "learning_rate": 4.04293765680322e-06, "loss": 0.3469, "step": 11404 }, { "epoch": 0.5657522694578104, "grad_norm": 5.831650733947754, "learning_rate": 4.042165131804382e-06, "loss": 0.3683, "step": 11405 }, { "epoch": 0.5658018750930106, "grad_norm": 13.014750480651855, "learning_rate": 4.0413926305411275e-06, "loss": 0.5087, "step": 11406 }, { "epoch": 0.5658514807282107, "grad_norm": 7.150186061859131, "learning_rate": 4.040620153032598e-06, "loss": 0.3391, "step": 11407 }, { "epoch": 0.5659010863634109, "grad_norm": 24.581693649291992, "learning_rate": 4.039847699297932e-06, "loss": 0.3618, "step": 11408 }, { "epoch": 0.565950691998611, "grad_norm": 6.273971080780029, "learning_rate": 4.0390752693562764e-06, "loss": 0.3072, "step": 11409 }, { "epoch": 0.5660002976338112, "grad_norm": 5.614506244659424, "learning_rate": 4.038302863226771e-06, "loss": 0.2331, "step": 11410 }, { "epoch": 0.5660499032690114, "grad_norm": 7.362651824951172, "learning_rate": 4.0375304809285534e-06, "loss": 0.2935, "step": 11411 }, { "epoch": 0.5660995089042116, "grad_norm": 4.637369632720947, "learning_rate": 4.036758122480767e-06, "loss": 0.2709, "step": 11412 }, { "epoch": 0.5661491145394116, "grad_norm": 6.300980091094971, "learning_rate": 4.035985787902551e-06, "loss": 0.3338, "step": 11413 }, { "epoch": 0.5661987201746118, "grad_norm": 5.673692226409912, "learning_rate": 4.035213477213041e-06, "loss": 0.2491, "step": 11414 }, { "epoch": 0.566248325809812, "grad_norm": 12.018936157226562, "learning_rate": 4.03444119043138e-06, "loss": 0.3531, "step": 11415 }, { "epoch": 0.5662979314450122, "grad_norm": 7.995858192443848, "learning_rate": 4.033668927576702e-06, "loss": 0.3411, "step": 11416 }, { "epoch": 0.5663475370802123, "grad_norm": 4.219913482666016, "learning_rate": 4.032896688668144e-06, "loss": 0.2121, "step": 11417 }, { "epoch": 0.5663971427154124, "grad_norm": 14.023330688476562, "learning_rate": 4.032124473724844e-06, "loss": 0.5021, "step": 11418 }, { "epoch": 0.5664467483506126, "grad_norm": 17.94431495666504, "learning_rate": 4.031352282765939e-06, "loss": 0.3604, "step": 11419 }, { "epoch": 0.5664963539858128, "grad_norm": 9.417386054992676, "learning_rate": 4.03058011581056e-06, "loss": 0.4179, "step": 11420 }, { "epoch": 0.566545959621013, "grad_norm": 7.692218780517578, "learning_rate": 4.029807972877847e-06, "loss": 0.2964, "step": 11421 }, { "epoch": 0.5665955652562131, "grad_norm": 8.653599739074707, "learning_rate": 4.0290358539869274e-06, "loss": 0.2857, "step": 11422 }, { "epoch": 0.5666451708914133, "grad_norm": 8.178667068481445, "learning_rate": 4.028263759156942e-06, "loss": 0.3032, "step": 11423 }, { "epoch": 0.5666947765266134, "grad_norm": 12.25601577758789, "learning_rate": 4.02749168840702e-06, "loss": 0.3454, "step": 11424 }, { "epoch": 0.5667443821618136, "grad_norm": 6.557626247406006, "learning_rate": 4.026719641756292e-06, "loss": 0.2741, "step": 11425 }, { "epoch": 0.5667939877970137, "grad_norm": 15.933838844299316, "learning_rate": 4.025947619223894e-06, "loss": 0.3547, "step": 11426 }, { "epoch": 0.5668435934322139, "grad_norm": 6.9979963302612305, "learning_rate": 4.025175620828954e-06, "loss": 0.3684, "step": 11427 }, { "epoch": 0.5668931990674141, "grad_norm": 6.874111652374268, "learning_rate": 4.024403646590601e-06, "loss": 0.383, "step": 11428 }, { "epoch": 0.5669428047026143, "grad_norm": 8.0521879196167, "learning_rate": 4.023631696527969e-06, "loss": 0.3374, "step": 11429 }, { "epoch": 0.5669924103378143, "grad_norm": 4.783707618713379, "learning_rate": 4.0228597706601856e-06, "loss": 0.3047, "step": 11430 }, { "epoch": 0.5670420159730145, "grad_norm": 8.972503662109375, "learning_rate": 4.022087869006376e-06, "loss": 0.3665, "step": 11431 }, { "epoch": 0.5670916216082147, "grad_norm": 12.701032638549805, "learning_rate": 4.021315991585675e-06, "loss": 0.363, "step": 11432 }, { "epoch": 0.5671412272434149, "grad_norm": 7.372990608215332, "learning_rate": 4.020544138417205e-06, "loss": 0.3944, "step": 11433 }, { "epoch": 0.567190832878615, "grad_norm": 5.561150550842285, "learning_rate": 4.019772309520093e-06, "loss": 0.2506, "step": 11434 }, { "epoch": 0.5672404385138151, "grad_norm": 7.367397785186768, "learning_rate": 4.019000504913468e-06, "loss": 0.2696, "step": 11435 }, { "epoch": 0.5672900441490153, "grad_norm": 6.286561012268066, "learning_rate": 4.018228724616454e-06, "loss": 0.3418, "step": 11436 }, { "epoch": 0.5673396497842155, "grad_norm": 6.627023220062256, "learning_rate": 4.017456968648176e-06, "loss": 0.3914, "step": 11437 }, { "epoch": 0.5673892554194157, "grad_norm": 13.647171020507812, "learning_rate": 4.0166852370277595e-06, "loss": 0.4237, "step": 11438 }, { "epoch": 0.5674388610546158, "grad_norm": 6.333795070648193, "learning_rate": 4.0159135297743256e-06, "loss": 0.4487, "step": 11439 }, { "epoch": 0.567488466689816, "grad_norm": 9.172331809997559, "learning_rate": 4.015141846907002e-06, "loss": 0.2417, "step": 11440 }, { "epoch": 0.5675380723250161, "grad_norm": 5.297286033630371, "learning_rate": 4.014370188444908e-06, "loss": 0.2767, "step": 11441 }, { "epoch": 0.5675876779602163, "grad_norm": 6.239570140838623, "learning_rate": 4.013598554407167e-06, "loss": 0.3833, "step": 11442 }, { "epoch": 0.5676372835954164, "grad_norm": 7.576669216156006, "learning_rate": 4.0128269448128995e-06, "loss": 0.2854, "step": 11443 }, { "epoch": 0.5676868892306166, "grad_norm": 4.793759822845459, "learning_rate": 4.0120553596812276e-06, "loss": 0.2692, "step": 11444 }, { "epoch": 0.5677364948658168, "grad_norm": 3.7097926139831543, "learning_rate": 4.011283799031269e-06, "loss": 0.2864, "step": 11445 }, { "epoch": 0.567786100501017, "grad_norm": 5.586594581604004, "learning_rate": 4.0105122628821465e-06, "loss": 0.2844, "step": 11446 }, { "epoch": 0.567835706136217, "grad_norm": 6.941286087036133, "learning_rate": 4.009740751252978e-06, "loss": 0.3992, "step": 11447 }, { "epoch": 0.5678853117714172, "grad_norm": 6.970587730407715, "learning_rate": 4.008969264162881e-06, "loss": 0.2894, "step": 11448 }, { "epoch": 0.5679349174066174, "grad_norm": 5.765650272369385, "learning_rate": 4.0081978016309735e-06, "loss": 0.284, "step": 11449 }, { "epoch": 0.5679845230418176, "grad_norm": 8.95673942565918, "learning_rate": 4.007426363676375e-06, "loss": 0.252, "step": 11450 }, { "epoch": 0.5680341286770177, "grad_norm": 9.576391220092773, "learning_rate": 4.0066549503181985e-06, "loss": 0.396, "step": 11451 }, { "epoch": 0.5680837343122178, "grad_norm": 6.079186916351318, "learning_rate": 4.0058835615755615e-06, "loss": 0.3195, "step": 11452 }, { "epoch": 0.568133339947418, "grad_norm": 5.79182767868042, "learning_rate": 4.005112197467581e-06, "loss": 0.1942, "step": 11453 }, { "epoch": 0.5681829455826182, "grad_norm": 3.7822723388671875, "learning_rate": 4.004340858013368e-06, "loss": 0.2634, "step": 11454 }, { "epoch": 0.5682325512178183, "grad_norm": 9.330608367919922, "learning_rate": 4.003569543232041e-06, "loss": 0.3885, "step": 11455 }, { "epoch": 0.5682821568530185, "grad_norm": 9.880620002746582, "learning_rate": 4.002798253142712e-06, "loss": 0.3369, "step": 11456 }, { "epoch": 0.5683317624882187, "grad_norm": 9.731945037841797, "learning_rate": 4.00202698776449e-06, "loss": 0.3068, "step": 11457 }, { "epoch": 0.5683813681234188, "grad_norm": 6.797420501708984, "learning_rate": 4.001255747116494e-06, "loss": 0.3462, "step": 11458 }, { "epoch": 0.568430973758619, "grad_norm": 4.6403985023498535, "learning_rate": 4.00048453121783e-06, "loss": 0.3074, "step": 11459 }, { "epoch": 0.5684805793938191, "grad_norm": 11.11966323852539, "learning_rate": 3.999713340087613e-06, "loss": 0.43, "step": 11460 }, { "epoch": 0.5685301850290193, "grad_norm": 4.351626396179199, "learning_rate": 3.998942173744952e-06, "loss": 0.2985, "step": 11461 }, { "epoch": 0.5685797906642195, "grad_norm": 4.015268802642822, "learning_rate": 3.9981710322089555e-06, "loss": 0.223, "step": 11462 }, { "epoch": 0.5686293962994197, "grad_norm": 4.094025135040283, "learning_rate": 3.997399915498735e-06, "loss": 0.2107, "step": 11463 }, { "epoch": 0.5686790019346197, "grad_norm": 8.600432395935059, "learning_rate": 3.996628823633399e-06, "loss": 0.3512, "step": 11464 }, { "epoch": 0.5687286075698199, "grad_norm": 6.967907428741455, "learning_rate": 3.995857756632052e-06, "loss": 0.3218, "step": 11465 }, { "epoch": 0.5687782132050201, "grad_norm": 11.597882270812988, "learning_rate": 3.995086714513807e-06, "loss": 0.3582, "step": 11466 }, { "epoch": 0.5688278188402203, "grad_norm": 4.275875091552734, "learning_rate": 3.994315697297767e-06, "loss": 0.2674, "step": 11467 }, { "epoch": 0.5688774244754204, "grad_norm": 7.145211696624756, "learning_rate": 3.9935447050030364e-06, "loss": 0.3043, "step": 11468 }, { "epoch": 0.5689270301106205, "grad_norm": 5.089864253997803, "learning_rate": 3.9927737376487265e-06, "loss": 0.2871, "step": 11469 }, { "epoch": 0.5689766357458207, "grad_norm": 5.259527206420898, "learning_rate": 3.992002795253938e-06, "loss": 0.3374, "step": 11470 }, { "epoch": 0.5690262413810209, "grad_norm": 9.904674530029297, "learning_rate": 3.991231877837774e-06, "loss": 0.2872, "step": 11471 }, { "epoch": 0.569075847016221, "grad_norm": 7.228680610656738, "learning_rate": 3.9904609854193436e-06, "loss": 0.3145, "step": 11472 }, { "epoch": 0.5691254526514212, "grad_norm": 4.921694278717041, "learning_rate": 3.989690118017746e-06, "loss": 0.3148, "step": 11473 }, { "epoch": 0.5691750582866214, "grad_norm": 9.501524925231934, "learning_rate": 3.988919275652082e-06, "loss": 0.3468, "step": 11474 }, { "epoch": 0.5692246639218215, "grad_norm": 8.360940933227539, "learning_rate": 3.988148458341458e-06, "loss": 0.3663, "step": 11475 }, { "epoch": 0.5692742695570217, "grad_norm": 3.781407356262207, "learning_rate": 3.987377666104971e-06, "loss": 0.2643, "step": 11476 }, { "epoch": 0.5693238751922218, "grad_norm": 6.433865070343018, "learning_rate": 3.986606898961726e-06, "loss": 0.3316, "step": 11477 }, { "epoch": 0.569373480827422, "grad_norm": 6.397068023681641, "learning_rate": 3.985836156930819e-06, "loss": 0.2979, "step": 11478 }, { "epoch": 0.5694230864626222, "grad_norm": 8.216242790222168, "learning_rate": 3.98506544003135e-06, "loss": 0.3551, "step": 11479 }, { "epoch": 0.5694726920978224, "grad_norm": 6.143893241882324, "learning_rate": 3.98429474828242e-06, "loss": 0.3021, "step": 11480 }, { "epoch": 0.5695222977330224, "grad_norm": 6.3991498947143555, "learning_rate": 3.983524081703126e-06, "loss": 0.2641, "step": 11481 }, { "epoch": 0.5695719033682226, "grad_norm": 5.530511379241943, "learning_rate": 3.982753440312563e-06, "loss": 0.3419, "step": 11482 }, { "epoch": 0.5696215090034228, "grad_norm": 7.861701965332031, "learning_rate": 3.981982824129831e-06, "loss": 0.282, "step": 11483 }, { "epoch": 0.569671114638623, "grad_norm": 17.05508804321289, "learning_rate": 3.981212233174026e-06, "loss": 0.3456, "step": 11484 }, { "epoch": 0.5697207202738231, "grad_norm": 4.955872535705566, "learning_rate": 3.980441667464241e-06, "loss": 0.3253, "step": 11485 }, { "epoch": 0.5697703259090232, "grad_norm": 5.515517234802246, "learning_rate": 3.979671127019574e-06, "loss": 0.3051, "step": 11486 }, { "epoch": 0.5698199315442234, "grad_norm": 5.858051300048828, "learning_rate": 3.978900611859118e-06, "loss": 0.2958, "step": 11487 }, { "epoch": 0.5698695371794236, "grad_norm": 5.195208549499512, "learning_rate": 3.978130122001965e-06, "loss": 0.2378, "step": 11488 }, { "epoch": 0.5699191428146237, "grad_norm": 7.694495677947998, "learning_rate": 3.977359657467212e-06, "loss": 0.3672, "step": 11489 }, { "epoch": 0.5699687484498239, "grad_norm": 3.5555419921875, "learning_rate": 3.976589218273948e-06, "loss": 0.1879, "step": 11490 }, { "epoch": 0.5700183540850241, "grad_norm": 4.501830101013184, "learning_rate": 3.975818804441266e-06, "loss": 0.186, "step": 11491 }, { "epoch": 0.5700679597202242, "grad_norm": 5.599359512329102, "learning_rate": 3.975048415988259e-06, "loss": 0.3147, "step": 11492 }, { "epoch": 0.5701175653554243, "grad_norm": 10.993048667907715, "learning_rate": 3.974278052934013e-06, "loss": 0.3148, "step": 11493 }, { "epoch": 0.5701671709906245, "grad_norm": 9.273904800415039, "learning_rate": 3.973507715297623e-06, "loss": 0.3968, "step": 11494 }, { "epoch": 0.5702167766258247, "grad_norm": 4.904282569885254, "learning_rate": 3.972737403098176e-06, "loss": 0.2302, "step": 11495 }, { "epoch": 0.5702663822610249, "grad_norm": 5.766441345214844, "learning_rate": 3.971967116354759e-06, "loss": 0.2082, "step": 11496 }, { "epoch": 0.5703159878962251, "grad_norm": 6.743128299713135, "learning_rate": 3.971196855086465e-06, "loss": 0.2636, "step": 11497 }, { "epoch": 0.5703655935314251, "grad_norm": 9.29189395904541, "learning_rate": 3.970426619312377e-06, "loss": 0.3032, "step": 11498 }, { "epoch": 0.5704151991666253, "grad_norm": 6.739627838134766, "learning_rate": 3.969656409051582e-06, "loss": 0.2838, "step": 11499 }, { "epoch": 0.5704648048018255, "grad_norm": 4.859581470489502, "learning_rate": 3.968886224323169e-06, "loss": 0.2524, "step": 11500 }, { "epoch": 0.5705144104370257, "grad_norm": 8.722298622131348, "learning_rate": 3.968116065146219e-06, "loss": 0.4128, "step": 11501 }, { "epoch": 0.5705640160722258, "grad_norm": 10.03711223602295, "learning_rate": 3.9673459315398225e-06, "loss": 0.3765, "step": 11502 }, { "epoch": 0.5706136217074259, "grad_norm": 7.926247596740723, "learning_rate": 3.966575823523059e-06, "loss": 0.2686, "step": 11503 }, { "epoch": 0.5706632273426261, "grad_norm": 13.737171173095703, "learning_rate": 3.965805741115016e-06, "loss": 0.3868, "step": 11504 }, { "epoch": 0.5707128329778263, "grad_norm": 6.205898761749268, "learning_rate": 3.965035684334773e-06, "loss": 0.264, "step": 11505 }, { "epoch": 0.5707624386130264, "grad_norm": 11.2457857131958, "learning_rate": 3.964265653201414e-06, "loss": 0.2876, "step": 11506 }, { "epoch": 0.5708120442482266, "grad_norm": 10.696184158325195, "learning_rate": 3.963495647734022e-06, "loss": 0.2435, "step": 11507 }, { "epoch": 0.5708616498834268, "grad_norm": 4.006843090057373, "learning_rate": 3.962725667951675e-06, "loss": 0.195, "step": 11508 }, { "epoch": 0.5709112555186269, "grad_norm": 14.349632263183594, "learning_rate": 3.961955713873456e-06, "loss": 0.3341, "step": 11509 }, { "epoch": 0.570960861153827, "grad_norm": 3.953152656555176, "learning_rate": 3.961185785518445e-06, "loss": 0.2762, "step": 11510 }, { "epoch": 0.5710104667890272, "grad_norm": 7.307978630065918, "learning_rate": 3.960415882905718e-06, "loss": 0.2444, "step": 11511 }, { "epoch": 0.5710600724242274, "grad_norm": 8.035611152648926, "learning_rate": 3.959646006054357e-06, "loss": 0.3622, "step": 11512 }, { "epoch": 0.5711096780594276, "grad_norm": 8.33228588104248, "learning_rate": 3.958876154983437e-06, "loss": 0.2484, "step": 11513 }, { "epoch": 0.5711592836946278, "grad_norm": 5.074070453643799, "learning_rate": 3.958106329712039e-06, "loss": 0.2856, "step": 11514 }, { "epoch": 0.5712088893298278, "grad_norm": 8.870854377746582, "learning_rate": 3.957336530259237e-06, "loss": 0.3809, "step": 11515 }, { "epoch": 0.571258494965028, "grad_norm": 7.181217670440674, "learning_rate": 3.9565667566441055e-06, "loss": 0.3302, "step": 11516 }, { "epoch": 0.5713081006002282, "grad_norm": 9.313570976257324, "learning_rate": 3.955797008885724e-06, "loss": 0.409, "step": 11517 }, { "epoch": 0.5713577062354284, "grad_norm": 15.447761535644531, "learning_rate": 3.955027287003165e-06, "loss": 0.3631, "step": 11518 }, { "epoch": 0.5714073118706285, "grad_norm": 7.099970817565918, "learning_rate": 3.9542575910155e-06, "loss": 0.3085, "step": 11519 }, { "epoch": 0.5714569175058286, "grad_norm": 6.444973945617676, "learning_rate": 3.953487920941808e-06, "loss": 0.3175, "step": 11520 }, { "epoch": 0.5715065231410288, "grad_norm": 5.956220626831055, "learning_rate": 3.952718276801159e-06, "loss": 0.2571, "step": 11521 }, { "epoch": 0.571556128776229, "grad_norm": 4.432501316070557, "learning_rate": 3.951948658612621e-06, "loss": 0.2243, "step": 11522 }, { "epoch": 0.5716057344114291, "grad_norm": 5.3808512687683105, "learning_rate": 3.951179066395273e-06, "loss": 0.2192, "step": 11523 }, { "epoch": 0.5716553400466293, "grad_norm": 6.1691575050354, "learning_rate": 3.9504095001681806e-06, "loss": 0.2345, "step": 11524 }, { "epoch": 0.5717049456818295, "grad_norm": 7.755115032196045, "learning_rate": 3.949639959950414e-06, "loss": 0.2452, "step": 11525 }, { "epoch": 0.5717545513170296, "grad_norm": 7.874980926513672, "learning_rate": 3.948870445761047e-06, "loss": 0.2684, "step": 11526 }, { "epoch": 0.5718041569522297, "grad_norm": 11.687155723571777, "learning_rate": 3.948100957619146e-06, "loss": 0.3398, "step": 11527 }, { "epoch": 0.5718537625874299, "grad_norm": 7.063784122467041, "learning_rate": 3.947331495543776e-06, "loss": 0.3418, "step": 11528 }, { "epoch": 0.5719033682226301, "grad_norm": 5.4465203285217285, "learning_rate": 3.946562059554011e-06, "loss": 0.2088, "step": 11529 }, { "epoch": 0.5719529738578303, "grad_norm": 5.064638614654541, "learning_rate": 3.9457926496689116e-06, "loss": 0.2715, "step": 11530 }, { "epoch": 0.5720025794930305, "grad_norm": 21.41080093383789, "learning_rate": 3.9450232659075496e-06, "loss": 0.3543, "step": 11531 }, { "epoch": 0.5720521851282305, "grad_norm": 5.851556777954102, "learning_rate": 3.944253908288989e-06, "loss": 0.2468, "step": 11532 }, { "epoch": 0.5721017907634307, "grad_norm": 6.465248107910156, "learning_rate": 3.943484576832292e-06, "loss": 0.3566, "step": 11533 }, { "epoch": 0.5721513963986309, "grad_norm": 12.263740539550781, "learning_rate": 3.942715271556526e-06, "loss": 0.4084, "step": 11534 }, { "epoch": 0.5722010020338311, "grad_norm": 5.29705286026001, "learning_rate": 3.941945992480755e-06, "loss": 0.2216, "step": 11535 }, { "epoch": 0.5722506076690312, "grad_norm": 8.103906631469727, "learning_rate": 3.941176739624037e-06, "loss": 0.3235, "step": 11536 }, { "epoch": 0.5723002133042313, "grad_norm": 16.500606536865234, "learning_rate": 3.940407513005442e-06, "loss": 0.3518, "step": 11537 }, { "epoch": 0.5723498189394315, "grad_norm": 4.8856916427612305, "learning_rate": 3.939638312644028e-06, "loss": 0.2814, "step": 11538 }, { "epoch": 0.5723994245746317, "grad_norm": 15.250792503356934, "learning_rate": 3.938869138558854e-06, "loss": 0.3146, "step": 11539 }, { "epoch": 0.5724490302098318, "grad_norm": 4.8471808433532715, "learning_rate": 3.938099990768985e-06, "loss": 0.2112, "step": 11540 }, { "epoch": 0.572498635845032, "grad_norm": 5.288674354553223, "learning_rate": 3.937330869293479e-06, "loss": 0.2822, "step": 11541 }, { "epoch": 0.5725482414802322, "grad_norm": 11.062284469604492, "learning_rate": 3.936561774151392e-06, "loss": 0.3898, "step": 11542 }, { "epoch": 0.5725978471154323, "grad_norm": 6.820223331451416, "learning_rate": 3.935792705361787e-06, "loss": 0.258, "step": 11543 }, { "epoch": 0.5726474527506324, "grad_norm": 7.628211498260498, "learning_rate": 3.93502366294372e-06, "loss": 0.2176, "step": 11544 }, { "epoch": 0.5726970583858326, "grad_norm": 7.087192058563232, "learning_rate": 3.934254646916247e-06, "loss": 0.2953, "step": 11545 }, { "epoch": 0.5727466640210328, "grad_norm": 5.915410995483398, "learning_rate": 3.933485657298429e-06, "loss": 0.3483, "step": 11546 }, { "epoch": 0.572796269656233, "grad_norm": 9.101157188415527, "learning_rate": 3.932716694109315e-06, "loss": 0.2483, "step": 11547 }, { "epoch": 0.5728458752914332, "grad_norm": 6.228373050689697, "learning_rate": 3.931947757367967e-06, "loss": 0.3269, "step": 11548 }, { "epoch": 0.5728954809266332, "grad_norm": 7.410955429077148, "learning_rate": 3.931178847093437e-06, "loss": 0.3143, "step": 11549 }, { "epoch": 0.5729450865618334, "grad_norm": 5.7185959815979, "learning_rate": 3.930409963304776e-06, "loss": 0.2816, "step": 11550 }, { "epoch": 0.5729946921970336, "grad_norm": 9.633979797363281, "learning_rate": 3.929641106021042e-06, "loss": 0.2601, "step": 11551 }, { "epoch": 0.5730442978322338, "grad_norm": 4.529703617095947, "learning_rate": 3.928872275261284e-06, "loss": 0.2942, "step": 11552 }, { "epoch": 0.5730939034674339, "grad_norm": 8.40658950805664, "learning_rate": 3.9281034710445574e-06, "loss": 0.3159, "step": 11553 }, { "epoch": 0.573143509102634, "grad_norm": 6.334110736846924, "learning_rate": 3.927334693389911e-06, "loss": 0.315, "step": 11554 }, { "epoch": 0.5731931147378342, "grad_norm": 9.137600898742676, "learning_rate": 3.926565942316396e-06, "loss": 0.3813, "step": 11555 }, { "epoch": 0.5732427203730344, "grad_norm": 9.791411399841309, "learning_rate": 3.925797217843062e-06, "loss": 0.319, "step": 11556 }, { "epoch": 0.5732923260082345, "grad_norm": 5.454487323760986, "learning_rate": 3.925028519988959e-06, "loss": 0.291, "step": 11557 }, { "epoch": 0.5733419316434347, "grad_norm": 5.148478984832764, "learning_rate": 3.924259848773137e-06, "loss": 0.366, "step": 11558 }, { "epoch": 0.5733915372786349, "grad_norm": 11.932148933410645, "learning_rate": 3.92349120421464e-06, "loss": 0.409, "step": 11559 }, { "epoch": 0.573441142913835, "grad_norm": 8.900344848632812, "learning_rate": 3.92272258633252e-06, "loss": 0.3629, "step": 11560 }, { "epoch": 0.5734907485490351, "grad_norm": 6.160059452056885, "learning_rate": 3.921953995145821e-06, "loss": 0.2436, "step": 11561 }, { "epoch": 0.5735403541842353, "grad_norm": 4.759520530700684, "learning_rate": 3.921185430673588e-06, "loss": 0.271, "step": 11562 }, { "epoch": 0.5735899598194355, "grad_norm": 6.749697208404541, "learning_rate": 3.920416892934871e-06, "loss": 0.3152, "step": 11563 }, { "epoch": 0.5736395654546357, "grad_norm": 7.029835224151611, "learning_rate": 3.91964838194871e-06, "loss": 0.2855, "step": 11564 }, { "epoch": 0.5736891710898359, "grad_norm": 10.165952682495117, "learning_rate": 3.918879897734151e-06, "loss": 0.4325, "step": 11565 }, { "epoch": 0.5737387767250359, "grad_norm": 8.075937271118164, "learning_rate": 3.9181114403102375e-06, "loss": 0.3017, "step": 11566 }, { "epoch": 0.5737883823602361, "grad_norm": 11.31454849243164, "learning_rate": 3.917343009696011e-06, "loss": 0.5264, "step": 11567 }, { "epoch": 0.5738379879954363, "grad_norm": 5.4519219398498535, "learning_rate": 3.9165746059105155e-06, "loss": 0.2166, "step": 11568 }, { "epoch": 0.5738875936306365, "grad_norm": 5.412806034088135, "learning_rate": 3.915806228972792e-06, "loss": 0.3329, "step": 11569 }, { "epoch": 0.5739371992658366, "grad_norm": 5.479946613311768, "learning_rate": 3.9150378789018785e-06, "loss": 0.2257, "step": 11570 }, { "epoch": 0.5739868049010367, "grad_norm": 8.591588973999023, "learning_rate": 3.914269555716818e-06, "loss": 0.4189, "step": 11571 }, { "epoch": 0.5740364105362369, "grad_norm": 7.364262104034424, "learning_rate": 3.913501259436651e-06, "loss": 0.374, "step": 11572 }, { "epoch": 0.5740860161714371, "grad_norm": 10.444422721862793, "learning_rate": 3.912732990080411e-06, "loss": 0.3917, "step": 11573 }, { "epoch": 0.5741356218066372, "grad_norm": 4.958600044250488, "learning_rate": 3.911964747667142e-06, "loss": 0.2676, "step": 11574 }, { "epoch": 0.5741852274418374, "grad_norm": 9.579185485839844, "learning_rate": 3.911196532215878e-06, "loss": 0.392, "step": 11575 }, { "epoch": 0.5742348330770376, "grad_norm": 5.606718063354492, "learning_rate": 3.910428343745655e-06, "loss": 0.3145, "step": 11576 }, { "epoch": 0.5742844387122377, "grad_norm": 6.452996253967285, "learning_rate": 3.909660182275512e-06, "loss": 0.2424, "step": 11577 }, { "epoch": 0.5743340443474378, "grad_norm": 7.86223840713501, "learning_rate": 3.908892047824483e-06, "loss": 0.3719, "step": 11578 }, { "epoch": 0.574383649982638, "grad_norm": 6.457560062408447, "learning_rate": 3.908123940411601e-06, "loss": 0.2789, "step": 11579 }, { "epoch": 0.5744332556178382, "grad_norm": 5.999075412750244, "learning_rate": 3.907355860055904e-06, "loss": 0.265, "step": 11580 }, { "epoch": 0.5744828612530384, "grad_norm": 5.328132152557373, "learning_rate": 3.906587806776422e-06, "loss": 0.3079, "step": 11581 }, { "epoch": 0.5745324668882386, "grad_norm": 9.334151268005371, "learning_rate": 3.905819780592187e-06, "loss": 0.3127, "step": 11582 }, { "epoch": 0.5745820725234386, "grad_norm": 6.028980255126953, "learning_rate": 3.905051781522235e-06, "loss": 0.2591, "step": 11583 }, { "epoch": 0.5746316781586388, "grad_norm": 5.369102954864502, "learning_rate": 3.904283809585594e-06, "loss": 0.3705, "step": 11584 }, { "epoch": 0.574681283793839, "grad_norm": 6.817761421203613, "learning_rate": 3.9035158648012965e-06, "loss": 0.3602, "step": 11585 }, { "epoch": 0.5747308894290392, "grad_norm": 7.0530571937561035, "learning_rate": 3.902747947188372e-06, "loss": 0.3625, "step": 11586 }, { "epoch": 0.5747804950642393, "grad_norm": 6.088279724121094, "learning_rate": 3.901980056765848e-06, "loss": 0.2783, "step": 11587 }, { "epoch": 0.5748301006994394, "grad_norm": 6.172895908355713, "learning_rate": 3.901212193552757e-06, "loss": 0.2312, "step": 11588 }, { "epoch": 0.5748797063346396, "grad_norm": 6.46592378616333, "learning_rate": 3.900444357568125e-06, "loss": 0.3678, "step": 11589 }, { "epoch": 0.5749293119698398, "grad_norm": 9.907959938049316, "learning_rate": 3.899676548830978e-06, "loss": 0.4339, "step": 11590 }, { "epoch": 0.5749789176050399, "grad_norm": 5.570113182067871, "learning_rate": 3.898908767360344e-06, "loss": 0.2871, "step": 11591 }, { "epoch": 0.5750285232402401, "grad_norm": 11.575867652893066, "learning_rate": 3.898141013175251e-06, "loss": 0.2912, "step": 11592 }, { "epoch": 0.5750781288754403, "grad_norm": 5.162176132202148, "learning_rate": 3.897373286294718e-06, "loss": 0.242, "step": 11593 }, { "epoch": 0.5751277345106404, "grad_norm": 5.025694370269775, "learning_rate": 3.8966055867377765e-06, "loss": 0.3148, "step": 11594 }, { "epoch": 0.5751773401458405, "grad_norm": 4.218784809112549, "learning_rate": 3.895837914523448e-06, "loss": 0.2517, "step": 11595 }, { "epoch": 0.5752269457810407, "grad_norm": 4.781207084655762, "learning_rate": 3.895070269670753e-06, "loss": 0.2654, "step": 11596 }, { "epoch": 0.5752765514162409, "grad_norm": 7.558439254760742, "learning_rate": 3.894302652198719e-06, "loss": 0.3969, "step": 11597 }, { "epoch": 0.5753261570514411, "grad_norm": 5.867618083953857, "learning_rate": 3.893535062126366e-06, "loss": 0.3076, "step": 11598 }, { "epoch": 0.5753757626866413, "grad_norm": 6.3039727210998535, "learning_rate": 3.892767499472711e-06, "loss": 0.2476, "step": 11599 }, { "epoch": 0.5754253683218413, "grad_norm": 10.076108932495117, "learning_rate": 3.891999964256781e-06, "loss": 0.248, "step": 11600 }, { "epoch": 0.5754749739570415, "grad_norm": 4.909493923187256, "learning_rate": 3.891232456497591e-06, "loss": 0.2512, "step": 11601 }, { "epoch": 0.5755245795922417, "grad_norm": 9.930110931396484, "learning_rate": 3.890464976214165e-06, "loss": 0.3782, "step": 11602 }, { "epoch": 0.5755741852274419, "grad_norm": 5.521365642547607, "learning_rate": 3.889697523425519e-06, "loss": 0.3131, "step": 11603 }, { "epoch": 0.575623790862642, "grad_norm": 5.639077186584473, "learning_rate": 3.8889300981506675e-06, "loss": 0.2128, "step": 11604 }, { "epoch": 0.5756733964978421, "grad_norm": 6.472206115722656, "learning_rate": 3.888162700408633e-06, "loss": 0.3169, "step": 11605 }, { "epoch": 0.5757230021330423, "grad_norm": 6.749134540557861, "learning_rate": 3.887395330218429e-06, "loss": 0.3349, "step": 11606 }, { "epoch": 0.5757726077682425, "grad_norm": 4.505013942718506, "learning_rate": 3.886627987599072e-06, "loss": 0.3187, "step": 11607 }, { "epoch": 0.5758222134034426, "grad_norm": 7.578272819519043, "learning_rate": 3.885860672569577e-06, "loss": 0.3222, "step": 11608 }, { "epoch": 0.5758718190386428, "grad_norm": 4.5767292976379395, "learning_rate": 3.8850933851489595e-06, "loss": 0.3879, "step": 11609 }, { "epoch": 0.575921424673843, "grad_norm": 5.985333442687988, "learning_rate": 3.884326125356231e-06, "loss": 0.2557, "step": 11610 }, { "epoch": 0.5759710303090431, "grad_norm": 4.998510360717773, "learning_rate": 3.883558893210406e-06, "loss": 0.1848, "step": 11611 }, { "epoch": 0.5760206359442432, "grad_norm": 8.656773567199707, "learning_rate": 3.882791688730497e-06, "loss": 0.3207, "step": 11612 }, { "epoch": 0.5760702415794434, "grad_norm": 6.719494819641113, "learning_rate": 3.882024511935516e-06, "loss": 0.3108, "step": 11613 }, { "epoch": 0.5761198472146436, "grad_norm": 5.8116960525512695, "learning_rate": 3.881257362844471e-06, "loss": 0.2889, "step": 11614 }, { "epoch": 0.5761694528498438, "grad_norm": 6.3332109451293945, "learning_rate": 3.880490241476377e-06, "loss": 0.2673, "step": 11615 }, { "epoch": 0.576219058485044, "grad_norm": 4.699657440185547, "learning_rate": 3.8797231478502395e-06, "loss": 0.2744, "step": 11616 }, { "epoch": 0.576268664120244, "grad_norm": 7.426095485687256, "learning_rate": 3.87895608198507e-06, "loss": 0.2009, "step": 11617 }, { "epoch": 0.5763182697554442, "grad_norm": 11.77184772491455, "learning_rate": 3.8781890438998765e-06, "loss": 0.3161, "step": 11618 }, { "epoch": 0.5763678753906444, "grad_norm": 10.247734069824219, "learning_rate": 3.8774220336136645e-06, "loss": 0.3198, "step": 11619 }, { "epoch": 0.5764174810258446, "grad_norm": 8.372584342956543, "learning_rate": 3.876655051145445e-06, "loss": 0.3653, "step": 11620 }, { "epoch": 0.5764670866610447, "grad_norm": 5.14470100402832, "learning_rate": 3.875888096514217e-06, "loss": 0.3524, "step": 11621 }, { "epoch": 0.5765166922962448, "grad_norm": 6.706952095031738, "learning_rate": 3.875121169738994e-06, "loss": 0.298, "step": 11622 }, { "epoch": 0.576566297931445, "grad_norm": 5.804941177368164, "learning_rate": 3.874354270838778e-06, "loss": 0.2195, "step": 11623 }, { "epoch": 0.5766159035666452, "grad_norm": 5.122827529907227, "learning_rate": 3.873587399832569e-06, "loss": 0.3162, "step": 11624 }, { "epoch": 0.5766655092018453, "grad_norm": 9.945686340332031, "learning_rate": 3.872820556739376e-06, "loss": 0.3898, "step": 11625 }, { "epoch": 0.5767151148370455, "grad_norm": 5.227128505706787, "learning_rate": 3.8720537415782e-06, "loss": 0.29, "step": 11626 }, { "epoch": 0.5767647204722457, "grad_norm": 7.710998058319092, "learning_rate": 3.8712869543680405e-06, "loss": 0.2588, "step": 11627 }, { "epoch": 0.5768143261074458, "grad_norm": 5.778407573699951, "learning_rate": 3.870520195127903e-06, "loss": 0.1901, "step": 11628 }, { "epoch": 0.5768639317426459, "grad_norm": 14.786620140075684, "learning_rate": 3.869753463876785e-06, "loss": 0.2655, "step": 11629 }, { "epoch": 0.5769135373778461, "grad_norm": 4.659163951873779, "learning_rate": 3.8689867606336865e-06, "loss": 0.2696, "step": 11630 }, { "epoch": 0.5769631430130463, "grad_norm": 7.285162448883057, "learning_rate": 3.868220085417609e-06, "loss": 0.3898, "step": 11631 }, { "epoch": 0.5770127486482465, "grad_norm": 5.41463041305542, "learning_rate": 3.867453438247549e-06, "loss": 0.2541, "step": 11632 }, { "epoch": 0.5770623542834467, "grad_norm": 6.398648738861084, "learning_rate": 3.866686819142504e-06, "loss": 0.2447, "step": 11633 }, { "epoch": 0.5771119599186467, "grad_norm": 4.119485855102539, "learning_rate": 3.8659202281214745e-06, "loss": 0.2317, "step": 11634 }, { "epoch": 0.5771615655538469, "grad_norm": 13.34753131866455, "learning_rate": 3.865153665203455e-06, "loss": 0.2706, "step": 11635 }, { "epoch": 0.5772111711890471, "grad_norm": 12.765371322631836, "learning_rate": 3.8643871304074384e-06, "loss": 0.3084, "step": 11636 }, { "epoch": 0.5772607768242473, "grad_norm": 10.332772254943848, "learning_rate": 3.863620623752424e-06, "loss": 0.3828, "step": 11637 }, { "epoch": 0.5773103824594474, "grad_norm": 12.287019729614258, "learning_rate": 3.862854145257404e-06, "loss": 0.3311, "step": 11638 }, { "epoch": 0.5773599880946475, "grad_norm": 12.20170783996582, "learning_rate": 3.862087694941373e-06, "loss": 0.3403, "step": 11639 }, { "epoch": 0.5774095937298477, "grad_norm": 7.105610370635986, "learning_rate": 3.861321272823325e-06, "loss": 0.396, "step": 11640 }, { "epoch": 0.5774591993650479, "grad_norm": 6.323184967041016, "learning_rate": 3.860554878922248e-06, "loss": 0.279, "step": 11641 }, { "epoch": 0.577508805000248, "grad_norm": 5.930279731750488, "learning_rate": 3.85978851325714e-06, "loss": 0.2542, "step": 11642 }, { "epoch": 0.5775584106354482, "grad_norm": 9.343290328979492, "learning_rate": 3.859022175846987e-06, "loss": 0.3639, "step": 11643 }, { "epoch": 0.5776080162706484, "grad_norm": 5.8747992515563965, "learning_rate": 3.858255866710778e-06, "loss": 0.3534, "step": 11644 }, { "epoch": 0.5776576219058485, "grad_norm": 3.8394994735717773, "learning_rate": 3.857489585867509e-06, "loss": 0.1952, "step": 11645 }, { "epoch": 0.5777072275410486, "grad_norm": 9.26518726348877, "learning_rate": 3.856723333336163e-06, "loss": 0.3104, "step": 11646 }, { "epoch": 0.5777568331762488, "grad_norm": 6.541755199432373, "learning_rate": 3.85595710913573e-06, "loss": 0.3688, "step": 11647 }, { "epoch": 0.577806438811449, "grad_norm": 7.526782989501953, "learning_rate": 3.855190913285197e-06, "loss": 0.2954, "step": 11648 }, { "epoch": 0.5778560444466492, "grad_norm": 4.383493423461914, "learning_rate": 3.854424745803552e-06, "loss": 0.2506, "step": 11649 }, { "epoch": 0.5779056500818494, "grad_norm": 44.389225006103516, "learning_rate": 3.853658606709779e-06, "loss": 0.4886, "step": 11650 }, { "epoch": 0.5779552557170494, "grad_norm": 6.122336387634277, "learning_rate": 3.8528924960228655e-06, "loss": 0.2555, "step": 11651 }, { "epoch": 0.5780048613522496, "grad_norm": 4.866530895233154, "learning_rate": 3.852126413761795e-06, "loss": 0.3135, "step": 11652 }, { "epoch": 0.5780544669874498, "grad_norm": 6.364356517791748, "learning_rate": 3.851360359945549e-06, "loss": 0.2179, "step": 11653 }, { "epoch": 0.57810407262265, "grad_norm": 4.551207065582275, "learning_rate": 3.850594334593115e-06, "loss": 0.2792, "step": 11654 }, { "epoch": 0.5781536782578501, "grad_norm": 8.096631050109863, "learning_rate": 3.849828337723471e-06, "loss": 0.2854, "step": 11655 }, { "epoch": 0.5782032838930502, "grad_norm": 6.608756065368652, "learning_rate": 3.849062369355603e-06, "loss": 0.3621, "step": 11656 }, { "epoch": 0.5782528895282504, "grad_norm": 6.5360212326049805, "learning_rate": 3.8482964295084905e-06, "loss": 0.3375, "step": 11657 }, { "epoch": 0.5783024951634506, "grad_norm": 6.557048797607422, "learning_rate": 3.847530518201111e-06, "loss": 0.3107, "step": 11658 }, { "epoch": 0.5783521007986507, "grad_norm": 7.786978244781494, "learning_rate": 3.846764635452449e-06, "loss": 0.2633, "step": 11659 }, { "epoch": 0.5784017064338509, "grad_norm": 4.753570079803467, "learning_rate": 3.84599878128148e-06, "loss": 0.2939, "step": 11660 }, { "epoch": 0.5784513120690511, "grad_norm": 8.260186195373535, "learning_rate": 3.845232955707184e-06, "loss": 0.2914, "step": 11661 }, { "epoch": 0.5785009177042512, "grad_norm": 7.0517120361328125, "learning_rate": 3.844467158748538e-06, "loss": 0.309, "step": 11662 }, { "epoch": 0.5785505233394513, "grad_norm": 8.658403396606445, "learning_rate": 3.843701390424517e-06, "loss": 0.2878, "step": 11663 }, { "epoch": 0.5786001289746515, "grad_norm": 5.9580254554748535, "learning_rate": 3.842935650754099e-06, "loss": 0.2945, "step": 11664 }, { "epoch": 0.5786497346098517, "grad_norm": 9.602745056152344, "learning_rate": 3.842169939756259e-06, "loss": 0.3233, "step": 11665 }, { "epoch": 0.5786993402450519, "grad_norm": 10.09674072265625, "learning_rate": 3.8414042574499726e-06, "loss": 0.4089, "step": 11666 }, { "epoch": 0.5787489458802519, "grad_norm": 5.800755977630615, "learning_rate": 3.840638603854211e-06, "loss": 0.2884, "step": 11667 }, { "epoch": 0.5787985515154521, "grad_norm": 4.775224208831787, "learning_rate": 3.839872978987951e-06, "loss": 0.2731, "step": 11668 }, { "epoch": 0.5788481571506523, "grad_norm": 8.559344291687012, "learning_rate": 3.839107382870163e-06, "loss": 0.3583, "step": 11669 }, { "epoch": 0.5788977627858525, "grad_norm": 7.112378120422363, "learning_rate": 3.838341815519817e-06, "loss": 0.3669, "step": 11670 }, { "epoch": 0.5789473684210527, "grad_norm": 7.116771221160889, "learning_rate": 3.83757627695589e-06, "loss": 0.253, "step": 11671 }, { "epoch": 0.5789969740562528, "grad_norm": 3.416501998901367, "learning_rate": 3.836810767197346e-06, "loss": 0.2106, "step": 11672 }, { "epoch": 0.5790465796914529, "grad_norm": 6.7902984619140625, "learning_rate": 3.836045286263158e-06, "loss": 0.31, "step": 11673 }, { "epoch": 0.5790961853266531, "grad_norm": 8.274746894836426, "learning_rate": 3.835279834172295e-06, "loss": 0.2969, "step": 11674 }, { "epoch": 0.5791457909618533, "grad_norm": 8.685790061950684, "learning_rate": 3.8345144109437225e-06, "loss": 0.3046, "step": 11675 }, { "epoch": 0.5791953965970534, "grad_norm": 5.441175937652588, "learning_rate": 3.833749016596412e-06, "loss": 0.326, "step": 11676 }, { "epoch": 0.5792450022322536, "grad_norm": 4.951529502868652, "learning_rate": 3.83298365114933e-06, "loss": 0.1912, "step": 11677 }, { "epoch": 0.5792946078674538, "grad_norm": 8.500354766845703, "learning_rate": 3.832218314621437e-06, "loss": 0.4016, "step": 11678 }, { "epoch": 0.5793442135026539, "grad_norm": 5.63857889175415, "learning_rate": 3.831453007031706e-06, "loss": 0.2096, "step": 11679 }, { "epoch": 0.579393819137854, "grad_norm": 7.293982982635498, "learning_rate": 3.8306877283990965e-06, "loss": 0.2877, "step": 11680 }, { "epoch": 0.5794434247730542, "grad_norm": 6.775599002838135, "learning_rate": 3.8299224787425734e-06, "loss": 0.2415, "step": 11681 }, { "epoch": 0.5794930304082544, "grad_norm": 9.774175643920898, "learning_rate": 3.829157258081101e-06, "loss": 0.3311, "step": 11682 }, { "epoch": 0.5795426360434546, "grad_norm": 5.765109539031982, "learning_rate": 3.828392066433643e-06, "loss": 0.2838, "step": 11683 }, { "epoch": 0.5795922416786546, "grad_norm": 7.922440528869629, "learning_rate": 3.827626903819157e-06, "loss": 0.4822, "step": 11684 }, { "epoch": 0.5796418473138548, "grad_norm": 6.03005838394165, "learning_rate": 3.826861770256608e-06, "loss": 0.2943, "step": 11685 }, { "epoch": 0.579691452949055, "grad_norm": 7.5790791511535645, "learning_rate": 3.826096665764956e-06, "loss": 0.3524, "step": 11686 }, { "epoch": 0.5797410585842552, "grad_norm": 9.66236400604248, "learning_rate": 3.825331590363157e-06, "loss": 0.2683, "step": 11687 }, { "epoch": 0.5797906642194554, "grad_norm": 17.79505729675293, "learning_rate": 3.824566544070175e-06, "loss": 0.4421, "step": 11688 }, { "epoch": 0.5798402698546555, "grad_norm": 9.465518951416016, "learning_rate": 3.823801526904964e-06, "loss": 0.2962, "step": 11689 }, { "epoch": 0.5798898754898556, "grad_norm": 6.198391437530518, "learning_rate": 3.823036538886484e-06, "loss": 0.2374, "step": 11690 }, { "epoch": 0.5799394811250558, "grad_norm": 7.7480974197387695, "learning_rate": 3.822271580033692e-06, "loss": 0.235, "step": 11691 }, { "epoch": 0.579989086760256, "grad_norm": 14.53093147277832, "learning_rate": 3.82150665036554e-06, "loss": 0.3363, "step": 11692 }, { "epoch": 0.5800386923954561, "grad_norm": 5.434554576873779, "learning_rate": 3.82074174990099e-06, "loss": 0.2128, "step": 11693 }, { "epoch": 0.5800882980306563, "grad_norm": 6.171929359436035, "learning_rate": 3.819976878658992e-06, "loss": 0.2844, "step": 11694 }, { "epoch": 0.5801379036658565, "grad_norm": 3.8712849617004395, "learning_rate": 3.8192120366584985e-06, "loss": 0.2691, "step": 11695 }, { "epoch": 0.5801875093010566, "grad_norm": 13.870869636535645, "learning_rate": 3.818447223918467e-06, "loss": 0.4349, "step": 11696 }, { "epoch": 0.5802371149362567, "grad_norm": 15.575552940368652, "learning_rate": 3.817682440457847e-06, "loss": 0.4649, "step": 11697 }, { "epoch": 0.5802867205714569, "grad_norm": 5.900088310241699, "learning_rate": 3.81691768629559e-06, "loss": 0.3782, "step": 11698 }, { "epoch": 0.5803363262066571, "grad_norm": 9.76626205444336, "learning_rate": 3.81615296145065e-06, "loss": 0.3849, "step": 11699 }, { "epoch": 0.5803859318418573, "grad_norm": 6.577112674713135, "learning_rate": 3.815388265941974e-06, "loss": 0.2537, "step": 11700 }, { "epoch": 0.5804355374770573, "grad_norm": 5.11937141418457, "learning_rate": 3.814623599788512e-06, "loss": 0.3188, "step": 11701 }, { "epoch": 0.5804851431122575, "grad_norm": 6.540706157684326, "learning_rate": 3.8138589630092144e-06, "loss": 0.2489, "step": 11702 }, { "epoch": 0.5805347487474577, "grad_norm": 9.104968070983887, "learning_rate": 3.8130943556230282e-06, "loss": 0.3672, "step": 11703 }, { "epoch": 0.5805843543826579, "grad_norm": 8.485969543457031, "learning_rate": 3.8123297776488995e-06, "loss": 0.3002, "step": 11704 }, { "epoch": 0.580633960017858, "grad_norm": 4.997097015380859, "learning_rate": 3.8115652291057775e-06, "loss": 0.2602, "step": 11705 }, { "epoch": 0.5806835656530582, "grad_norm": 7.192585468292236, "learning_rate": 3.810800710012607e-06, "loss": 0.3136, "step": 11706 }, { "epoch": 0.5807331712882583, "grad_norm": 10.718961715698242, "learning_rate": 3.810036220388331e-06, "loss": 0.4067, "step": 11707 }, { "epoch": 0.5807827769234585, "grad_norm": 4.981945037841797, "learning_rate": 3.8092717602518976e-06, "loss": 0.2292, "step": 11708 }, { "epoch": 0.5808323825586587, "grad_norm": 10.500033378601074, "learning_rate": 3.8085073296222463e-06, "loss": 0.386, "step": 11709 }, { "epoch": 0.5808819881938588, "grad_norm": 5.308446407318115, "learning_rate": 3.8077429285183247e-06, "loss": 0.2851, "step": 11710 }, { "epoch": 0.580931593829059, "grad_norm": 5.5842084884643555, "learning_rate": 3.806978556959073e-06, "loss": 0.2803, "step": 11711 }, { "epoch": 0.5809811994642592, "grad_norm": 16.209178924560547, "learning_rate": 3.8062142149634294e-06, "loss": 0.281, "step": 11712 }, { "epoch": 0.5810308050994593, "grad_norm": 4.042043685913086, "learning_rate": 3.80544990255034e-06, "loss": 0.2754, "step": 11713 }, { "epoch": 0.5810804107346594, "grad_norm": 14.257519721984863, "learning_rate": 3.8046856197387417e-06, "loss": 0.4826, "step": 11714 }, { "epoch": 0.5811300163698596, "grad_norm": 5.5040202140808105, "learning_rate": 3.8039213665475737e-06, "loss": 0.2273, "step": 11715 }, { "epoch": 0.5811796220050598, "grad_norm": 15.415044784545898, "learning_rate": 3.803157142995777e-06, "loss": 0.2943, "step": 11716 }, { "epoch": 0.58122922764026, "grad_norm": 5.734277248382568, "learning_rate": 3.8023929491022864e-06, "loss": 0.3004, "step": 11717 }, { "epoch": 0.58127883327546, "grad_norm": 11.041919708251953, "learning_rate": 3.80162878488604e-06, "loss": 0.3818, "step": 11718 }, { "epoch": 0.5813284389106602, "grad_norm": 6.473453998565674, "learning_rate": 3.800864650365975e-06, "loss": 0.3813, "step": 11719 }, { "epoch": 0.5813780445458604, "grad_norm": 5.019258975982666, "learning_rate": 3.800100545561027e-06, "loss": 0.2933, "step": 11720 }, { "epoch": 0.5814276501810606, "grad_norm": 12.482170104980469, "learning_rate": 3.7993364704901294e-06, "loss": 0.5344, "step": 11721 }, { "epoch": 0.5814772558162608, "grad_norm": 8.092920303344727, "learning_rate": 3.7985724251722172e-06, "loss": 0.3663, "step": 11722 }, { "epoch": 0.5815268614514609, "grad_norm": 5.42675256729126, "learning_rate": 3.797808409626225e-06, "loss": 0.2863, "step": 11723 }, { "epoch": 0.581576467086661, "grad_norm": 6.789963245391846, "learning_rate": 3.7970444238710825e-06, "loss": 0.2464, "step": 11724 }, { "epoch": 0.5816260727218612, "grad_norm": 8.725544929504395, "learning_rate": 3.7962804679257235e-06, "loss": 0.3272, "step": 11725 }, { "epoch": 0.5816756783570614, "grad_norm": 8.939336776733398, "learning_rate": 3.7955165418090796e-06, "loss": 0.296, "step": 11726 }, { "epoch": 0.5817252839922615, "grad_norm": 6.407647132873535, "learning_rate": 3.79475264554008e-06, "loss": 0.2746, "step": 11727 }, { "epoch": 0.5817748896274617, "grad_norm": 8.76551342010498, "learning_rate": 3.793988779137656e-06, "loss": 0.2569, "step": 11728 }, { "epoch": 0.5818244952626619, "grad_norm": 5.985288619995117, "learning_rate": 3.793224942620735e-06, "loss": 0.2842, "step": 11729 }, { "epoch": 0.581874100897862, "grad_norm": 7.5344557762146, "learning_rate": 3.792461136008246e-06, "loss": 0.3108, "step": 11730 }, { "epoch": 0.5819237065330621, "grad_norm": 9.558079719543457, "learning_rate": 3.791697359319117e-06, "loss": 0.3495, "step": 11731 }, { "epoch": 0.5819733121682623, "grad_norm": 16.31782341003418, "learning_rate": 3.790933612572272e-06, "loss": 0.3172, "step": 11732 }, { "epoch": 0.5820229178034625, "grad_norm": 3.7186203002929688, "learning_rate": 3.7901698957866407e-06, "loss": 0.2861, "step": 11733 }, { "epoch": 0.5820725234386627, "grad_norm": 11.437681198120117, "learning_rate": 3.7894062089811472e-06, "loss": 0.2667, "step": 11734 }, { "epoch": 0.5821221290738627, "grad_norm": 10.223920822143555, "learning_rate": 3.7886425521747126e-06, "loss": 0.3425, "step": 11735 }, { "epoch": 0.5821717347090629, "grad_norm": 5.6949567794799805, "learning_rate": 3.787878925386267e-06, "loss": 0.3399, "step": 11736 }, { "epoch": 0.5822213403442631, "grad_norm": 4.9446702003479, "learning_rate": 3.7871153286347286e-06, "loss": 0.2608, "step": 11737 }, { "epoch": 0.5822709459794633, "grad_norm": 5.666995525360107, "learning_rate": 3.7863517619390196e-06, "loss": 0.2419, "step": 11738 }, { "epoch": 0.5823205516146635, "grad_norm": 22.123754501342773, "learning_rate": 3.785588225318065e-06, "loss": 0.3865, "step": 11739 }, { "epoch": 0.5823701572498636, "grad_norm": 6.338542461395264, "learning_rate": 3.7848247187907826e-06, "loss": 0.3569, "step": 11740 }, { "epoch": 0.5824197628850637, "grad_norm": 7.582590579986572, "learning_rate": 3.7840612423760925e-06, "loss": 0.4243, "step": 11741 }, { "epoch": 0.5824693685202639, "grad_norm": 8.026276588439941, "learning_rate": 3.7832977960929164e-06, "loss": 0.384, "step": 11742 }, { "epoch": 0.582518974155464, "grad_norm": 6.804299354553223, "learning_rate": 3.7825343799601687e-06, "loss": 0.1979, "step": 11743 }, { "epoch": 0.5825685797906642, "grad_norm": 13.609169960021973, "learning_rate": 3.7817709939967728e-06, "loss": 0.2867, "step": 11744 }, { "epoch": 0.5826181854258644, "grad_norm": 17.28516387939453, "learning_rate": 3.781007638221642e-06, "loss": 0.254, "step": 11745 }, { "epoch": 0.5826677910610646, "grad_norm": 7.065984725952148, "learning_rate": 3.780244312653692e-06, "loss": 0.2762, "step": 11746 }, { "epoch": 0.5827173966962647, "grad_norm": 4.488358974456787, "learning_rate": 3.779481017311841e-06, "loss": 0.2717, "step": 11747 }, { "epoch": 0.5827670023314648, "grad_norm": 9.239686012268066, "learning_rate": 3.778717752215003e-06, "loss": 0.2928, "step": 11748 }, { "epoch": 0.582816607966665, "grad_norm": 8.309404373168945, "learning_rate": 3.777954517382089e-06, "loss": 0.3318, "step": 11749 }, { "epoch": 0.5828662136018652, "grad_norm": 10.017191886901855, "learning_rate": 3.7771913128320174e-06, "loss": 0.2834, "step": 11750 }, { "epoch": 0.5829158192370654, "grad_norm": 7.628421306610107, "learning_rate": 3.776428138583698e-06, "loss": 0.3215, "step": 11751 }, { "epoch": 0.5829654248722654, "grad_norm": 20.605430603027344, "learning_rate": 3.775664994656041e-06, "loss": 0.3949, "step": 11752 }, { "epoch": 0.5830150305074656, "grad_norm": 6.436947345733643, "learning_rate": 3.7749018810679613e-06, "loss": 0.3628, "step": 11753 }, { "epoch": 0.5830646361426658, "grad_norm": 37.24808120727539, "learning_rate": 3.774138797838367e-06, "loss": 0.2636, "step": 11754 }, { "epoch": 0.583114241777866, "grad_norm": 12.186457633972168, "learning_rate": 3.773375744986165e-06, "loss": 0.3428, "step": 11755 }, { "epoch": 0.5831638474130661, "grad_norm": 6.086390972137451, "learning_rate": 3.77261272253027e-06, "loss": 0.277, "step": 11756 }, { "epoch": 0.5832134530482663, "grad_norm": 5.441783905029297, "learning_rate": 3.771849730489587e-06, "loss": 0.287, "step": 11757 }, { "epoch": 0.5832630586834664, "grad_norm": 9.368772506713867, "learning_rate": 3.7710867688830205e-06, "loss": 0.3455, "step": 11758 }, { "epoch": 0.5833126643186666, "grad_norm": 7.114223957061768, "learning_rate": 3.7703238377294822e-06, "loss": 0.2961, "step": 11759 }, { "epoch": 0.5833622699538668, "grad_norm": 6.360827445983887, "learning_rate": 3.769560937047875e-06, "loss": 0.3925, "step": 11760 }, { "epoch": 0.5834118755890669, "grad_norm": 16.74336051940918, "learning_rate": 3.7687980668571027e-06, "loss": 0.3302, "step": 11761 }, { "epoch": 0.5834614812242671, "grad_norm": 5.559027671813965, "learning_rate": 3.7680352271760733e-06, "loss": 0.297, "step": 11762 }, { "epoch": 0.5835110868594673, "grad_norm": 9.106410026550293, "learning_rate": 3.7672724180236864e-06, "loss": 0.3483, "step": 11763 }, { "epoch": 0.5835606924946674, "grad_norm": 4.725825786590576, "learning_rate": 3.7665096394188482e-06, "loss": 0.2633, "step": 11764 }, { "epoch": 0.5836102981298675, "grad_norm": 19.0721492767334, "learning_rate": 3.7657468913804594e-06, "loss": 0.2843, "step": 11765 }, { "epoch": 0.5836599037650677, "grad_norm": 7.550282955169678, "learning_rate": 3.7649841739274185e-06, "loss": 0.2788, "step": 11766 }, { "epoch": 0.5837095094002679, "grad_norm": 5.003753185272217, "learning_rate": 3.764221487078631e-06, "loss": 0.2636, "step": 11767 }, { "epoch": 0.5837591150354681, "grad_norm": 4.83036470413208, "learning_rate": 3.763458830852993e-06, "loss": 0.2669, "step": 11768 }, { "epoch": 0.5838087206706681, "grad_norm": 9.392552375793457, "learning_rate": 3.762696205269404e-06, "loss": 0.2984, "step": 11769 }, { "epoch": 0.5838583263058683, "grad_norm": 4.063835144042969, "learning_rate": 3.761933610346764e-06, "loss": 0.1641, "step": 11770 }, { "epoch": 0.5839079319410685, "grad_norm": 6.569011688232422, "learning_rate": 3.7611710461039685e-06, "loss": 0.2525, "step": 11771 }, { "epoch": 0.5839575375762687, "grad_norm": 7.520838737487793, "learning_rate": 3.760408512559914e-06, "loss": 0.2488, "step": 11772 }, { "epoch": 0.5840071432114688, "grad_norm": 5.317506790161133, "learning_rate": 3.7596460097334995e-06, "loss": 0.2929, "step": 11773 }, { "epoch": 0.584056748846669, "grad_norm": 7.925000190734863, "learning_rate": 3.7588835376436163e-06, "loss": 0.254, "step": 11774 }, { "epoch": 0.5841063544818691, "grad_norm": 9.855985641479492, "learning_rate": 3.7581210963091606e-06, "loss": 0.4524, "step": 11775 }, { "epoch": 0.5841559601170693, "grad_norm": 8.641801834106445, "learning_rate": 3.757358685749026e-06, "loss": 0.2824, "step": 11776 }, { "epoch": 0.5842055657522695, "grad_norm": 5.261013507843018, "learning_rate": 3.7565963059821057e-06, "loss": 0.2672, "step": 11777 }, { "epoch": 0.5842551713874696, "grad_norm": 7.723582744598389, "learning_rate": 3.7558339570272905e-06, "loss": 0.3058, "step": 11778 }, { "epoch": 0.5843047770226698, "grad_norm": 4.680980682373047, "learning_rate": 3.755071638903473e-06, "loss": 0.2142, "step": 11779 }, { "epoch": 0.58435438265787, "grad_norm": 6.071273326873779, "learning_rate": 3.7543093516295436e-06, "loss": 0.2691, "step": 11780 }, { "epoch": 0.58440398829307, "grad_norm": 4.169956684112549, "learning_rate": 3.7535470952243913e-06, "loss": 0.2821, "step": 11781 }, { "epoch": 0.5844535939282702, "grad_norm": 4.742163181304932, "learning_rate": 3.7527848697069074e-06, "loss": 0.2581, "step": 11782 }, { "epoch": 0.5845031995634704, "grad_norm": 7.178006649017334, "learning_rate": 3.7520226750959777e-06, "loss": 0.2387, "step": 11783 }, { "epoch": 0.5845528051986706, "grad_norm": 6.3169450759887695, "learning_rate": 3.75126051141049e-06, "loss": 0.3033, "step": 11784 }, { "epoch": 0.5846024108338708, "grad_norm": 7.343147277832031, "learning_rate": 3.7504983786693332e-06, "loss": 0.2771, "step": 11785 }, { "epoch": 0.5846520164690708, "grad_norm": 6.490142822265625, "learning_rate": 3.7497362768913893e-06, "loss": 0.3565, "step": 11786 }, { "epoch": 0.584701622104271, "grad_norm": 6.889679908752441, "learning_rate": 3.7489742060955485e-06, "loss": 0.2678, "step": 11787 }, { "epoch": 0.5847512277394712, "grad_norm": 5.902353286743164, "learning_rate": 3.7482121663006927e-06, "loss": 0.3137, "step": 11788 }, { "epoch": 0.5848008333746714, "grad_norm": 4.684499740600586, "learning_rate": 3.7474501575257034e-06, "loss": 0.2789, "step": 11789 }, { "epoch": 0.5848504390098715, "grad_norm": 7.6470794677734375, "learning_rate": 3.7466881797894682e-06, "loss": 0.3806, "step": 11790 }, { "epoch": 0.5849000446450717, "grad_norm": 7.635977268218994, "learning_rate": 3.7459262331108666e-06, "loss": 0.3255, "step": 11791 }, { "epoch": 0.5849496502802718, "grad_norm": 5.624349594116211, "learning_rate": 3.745164317508778e-06, "loss": 0.1596, "step": 11792 }, { "epoch": 0.584999255915472, "grad_norm": 5.942594051361084, "learning_rate": 3.744402433002088e-06, "loss": 0.3477, "step": 11793 }, { "epoch": 0.5850488615506722, "grad_norm": 7.114222526550293, "learning_rate": 3.7436405796096727e-06, "loss": 0.3067, "step": 11794 }, { "epoch": 0.5850984671858723, "grad_norm": 3.866971492767334, "learning_rate": 3.7428787573504104e-06, "loss": 0.1759, "step": 11795 }, { "epoch": 0.5851480728210725, "grad_norm": 6.923457145690918, "learning_rate": 3.7421169662431832e-06, "loss": 0.3283, "step": 11796 }, { "epoch": 0.5851976784562727, "grad_norm": 8.551892280578613, "learning_rate": 3.7413552063068646e-06, "loss": 0.303, "step": 11797 }, { "epoch": 0.5852472840914728, "grad_norm": 10.764947891235352, "learning_rate": 3.7405934775603358e-06, "loss": 0.3679, "step": 11798 }, { "epoch": 0.5852968897266729, "grad_norm": 5.012413501739502, "learning_rate": 3.73983178002247e-06, "loss": 0.238, "step": 11799 }, { "epoch": 0.5853464953618731, "grad_norm": 9.846202850341797, "learning_rate": 3.7390701137121406e-06, "loss": 0.385, "step": 11800 }, { "epoch": 0.5853961009970733, "grad_norm": 4.600373268127441, "learning_rate": 3.738308478648226e-06, "loss": 0.2023, "step": 11801 }, { "epoch": 0.5854457066322735, "grad_norm": 17.387006759643555, "learning_rate": 3.7375468748495984e-06, "loss": 0.5039, "step": 11802 }, { "epoch": 0.5854953122674735, "grad_norm": 7.4232635498046875, "learning_rate": 3.736785302335129e-06, "loss": 0.2473, "step": 11803 }, { "epoch": 0.5855449179026737, "grad_norm": 12.284346580505371, "learning_rate": 3.7360237611236914e-06, "loss": 0.4027, "step": 11804 }, { "epoch": 0.5855945235378739, "grad_norm": 6.2134785652160645, "learning_rate": 3.735262251234158e-06, "loss": 0.2545, "step": 11805 }, { "epoch": 0.5856441291730741, "grad_norm": 6.75538969039917, "learning_rate": 3.7345007726853956e-06, "loss": 0.2224, "step": 11806 }, { "epoch": 0.5856937348082742, "grad_norm": 5.653261661529541, "learning_rate": 3.7337393254962793e-06, "loss": 0.2174, "step": 11807 }, { "epoch": 0.5857433404434744, "grad_norm": 6.079581260681152, "learning_rate": 3.7329779096856743e-06, "loss": 0.3242, "step": 11808 }, { "epoch": 0.5857929460786745, "grad_norm": 19.526105880737305, "learning_rate": 3.7322165252724483e-06, "loss": 0.3383, "step": 11809 }, { "epoch": 0.5858425517138747, "grad_norm": 8.75289535522461, "learning_rate": 3.7314551722754727e-06, "loss": 0.4339, "step": 11810 }, { "epoch": 0.5858921573490748, "grad_norm": 7.516482353210449, "learning_rate": 3.7306938507136113e-06, "loss": 0.2075, "step": 11811 }, { "epoch": 0.585941762984275, "grad_norm": 7.663663387298584, "learning_rate": 3.7299325606057285e-06, "loss": 0.2839, "step": 11812 }, { "epoch": 0.5859913686194752, "grad_norm": 4.295419216156006, "learning_rate": 3.729171301970693e-06, "loss": 0.2237, "step": 11813 }, { "epoch": 0.5860409742546754, "grad_norm": 8.480179786682129, "learning_rate": 3.728410074827366e-06, "loss": 0.2384, "step": 11814 }, { "epoch": 0.5860905798898755, "grad_norm": 7.058616638183594, "learning_rate": 3.7276488791946148e-06, "loss": 0.3158, "step": 11815 }, { "epoch": 0.5861401855250756, "grad_norm": 13.467755317687988, "learning_rate": 3.7268877150913e-06, "loss": 0.3675, "step": 11816 }, { "epoch": 0.5861897911602758, "grad_norm": 6.415237903594971, "learning_rate": 3.72612658253628e-06, "loss": 0.1488, "step": 11817 }, { "epoch": 0.586239396795476, "grad_norm": 6.065080642700195, "learning_rate": 3.725365481548423e-06, "loss": 0.2855, "step": 11818 }, { "epoch": 0.5862890024306762, "grad_norm": 6.352389335632324, "learning_rate": 3.724604412146585e-06, "loss": 0.2997, "step": 11819 }, { "epoch": 0.5863386080658762, "grad_norm": 10.206319808959961, "learning_rate": 3.7238433743496254e-06, "loss": 0.3013, "step": 11820 }, { "epoch": 0.5863882137010764, "grad_norm": 4.804101943969727, "learning_rate": 3.723082368176406e-06, "loss": 0.2663, "step": 11821 }, { "epoch": 0.5864378193362766, "grad_norm": 9.22696590423584, "learning_rate": 3.7223213936457838e-06, "loss": 0.2993, "step": 11822 }, { "epoch": 0.5864874249714768, "grad_norm": 9.397296905517578, "learning_rate": 3.7215604507766123e-06, "loss": 0.3033, "step": 11823 }, { "epoch": 0.586537030606677, "grad_norm": 10.24038028717041, "learning_rate": 3.7207995395877533e-06, "loss": 0.3823, "step": 11824 }, { "epoch": 0.5865866362418771, "grad_norm": 11.316567420959473, "learning_rate": 3.7200386600980597e-06, "loss": 0.4851, "step": 11825 }, { "epoch": 0.5866362418770772, "grad_norm": 15.396384239196777, "learning_rate": 3.7192778123263873e-06, "loss": 0.2835, "step": 11826 }, { "epoch": 0.5866858475122774, "grad_norm": 8.934938430786133, "learning_rate": 3.71851699629159e-06, "loss": 0.4118, "step": 11827 }, { "epoch": 0.5867354531474775, "grad_norm": 10.960221290588379, "learning_rate": 3.717756212012521e-06, "loss": 0.3156, "step": 11828 }, { "epoch": 0.5867850587826777, "grad_norm": 7.604511260986328, "learning_rate": 3.7169954595080337e-06, "loss": 0.2803, "step": 11829 }, { "epoch": 0.5868346644178779, "grad_norm": 5.063733100891113, "learning_rate": 3.716234738796978e-06, "loss": 0.2028, "step": 11830 }, { "epoch": 0.5868842700530781, "grad_norm": 15.750545501708984, "learning_rate": 3.7154740498982077e-06, "loss": 0.3463, "step": 11831 }, { "epoch": 0.5869338756882782, "grad_norm": 6.7861127853393555, "learning_rate": 3.7147133928305714e-06, "loss": 0.2676, "step": 11832 }, { "epoch": 0.5869834813234783, "grad_norm": 6.970937728881836, "learning_rate": 3.713952767612917e-06, "loss": 0.2651, "step": 11833 }, { "epoch": 0.5870330869586785, "grad_norm": 6.376475811004639, "learning_rate": 3.713192174264096e-06, "loss": 0.2738, "step": 11834 }, { "epoch": 0.5870826925938787, "grad_norm": 7.4042134284973145, "learning_rate": 3.7124316128029548e-06, "loss": 0.2968, "step": 11835 }, { "epoch": 0.5871322982290789, "grad_norm": 8.192151069641113, "learning_rate": 3.7116710832483414e-06, "loss": 0.3533, "step": 11836 }, { "epoch": 0.5871819038642789, "grad_norm": 4.970168590545654, "learning_rate": 3.7109105856191003e-06, "loss": 0.2077, "step": 11837 }, { "epoch": 0.5872315094994791, "grad_norm": 3.946354627609253, "learning_rate": 3.7101501199340784e-06, "loss": 0.2533, "step": 11838 }, { "epoch": 0.5872811151346793, "grad_norm": 9.324336051940918, "learning_rate": 3.7093896862121204e-06, "loss": 0.2007, "step": 11839 }, { "epoch": 0.5873307207698795, "grad_norm": 5.894373893737793, "learning_rate": 3.708629284472068e-06, "loss": 0.3176, "step": 11840 }, { "epoch": 0.5873803264050796, "grad_norm": 8.038956642150879, "learning_rate": 3.7078689147327684e-06, "loss": 0.289, "step": 11841 }, { "epoch": 0.5874299320402798, "grad_norm": 5.092499732971191, "learning_rate": 3.707108577013061e-06, "loss": 0.3332, "step": 11842 }, { "epoch": 0.5874795376754799, "grad_norm": 7.750570774078369, "learning_rate": 3.706348271331786e-06, "loss": 0.3193, "step": 11843 }, { "epoch": 0.5875291433106801, "grad_norm": 8.32852554321289, "learning_rate": 3.7055879977077885e-06, "loss": 0.2905, "step": 11844 }, { "epoch": 0.5875787489458802, "grad_norm": 4.902286529541016, "learning_rate": 3.7048277561599054e-06, "loss": 0.2466, "step": 11845 }, { "epoch": 0.5876283545810804, "grad_norm": 5.03441047668457, "learning_rate": 3.704067546706975e-06, "loss": 0.3168, "step": 11846 }, { "epoch": 0.5876779602162806, "grad_norm": 4.86492395401001, "learning_rate": 3.703307369367839e-06, "loss": 0.2913, "step": 11847 }, { "epoch": 0.5877275658514808, "grad_norm": 5.539889812469482, "learning_rate": 3.7025472241613326e-06, "loss": 0.2853, "step": 11848 }, { "epoch": 0.5877771714866808, "grad_norm": 6.8692474365234375, "learning_rate": 3.701787111106291e-06, "loss": 0.2744, "step": 11849 }, { "epoch": 0.587826777121881, "grad_norm": 5.148409366607666, "learning_rate": 3.7010270302215536e-06, "loss": 0.3097, "step": 11850 }, { "epoch": 0.5878763827570812, "grad_norm": 13.398333549499512, "learning_rate": 3.700266981525952e-06, "loss": 0.3751, "step": 11851 }, { "epoch": 0.5879259883922814, "grad_norm": 18.738903045654297, "learning_rate": 3.6995069650383253e-06, "loss": 0.3755, "step": 11852 }, { "epoch": 0.5879755940274816, "grad_norm": 4.571303367614746, "learning_rate": 3.6987469807775034e-06, "loss": 0.2477, "step": 11853 }, { "epoch": 0.5880251996626816, "grad_norm": 8.24675464630127, "learning_rate": 3.6979870287623186e-06, "loss": 0.3089, "step": 11854 }, { "epoch": 0.5880748052978818, "grad_norm": 4.863914966583252, "learning_rate": 3.697227109011606e-06, "loss": 0.1996, "step": 11855 }, { "epoch": 0.588124410933082, "grad_norm": 13.993760108947754, "learning_rate": 3.696467221544195e-06, "loss": 0.3316, "step": 11856 }, { "epoch": 0.5881740165682822, "grad_norm": 7.345485687255859, "learning_rate": 3.6957073663789133e-06, "loss": 0.3818, "step": 11857 }, { "epoch": 0.5882236222034823, "grad_norm": 12.807315826416016, "learning_rate": 3.694947543534596e-06, "loss": 0.4131, "step": 11858 }, { "epoch": 0.5882732278386825, "grad_norm": 8.193756103515625, "learning_rate": 3.6941877530300685e-06, "loss": 0.2635, "step": 11859 }, { "epoch": 0.5883228334738826, "grad_norm": 6.0647172927856445, "learning_rate": 3.693427994884157e-06, "loss": 0.2132, "step": 11860 }, { "epoch": 0.5883724391090828, "grad_norm": 5.545293807983398, "learning_rate": 3.692668269115693e-06, "loss": 0.2738, "step": 11861 }, { "epoch": 0.588422044744283, "grad_norm": 6.745517730712891, "learning_rate": 3.6919085757435e-06, "loss": 0.313, "step": 11862 }, { "epoch": 0.5884716503794831, "grad_norm": 12.921019554138184, "learning_rate": 3.691148914786403e-06, "loss": 0.4586, "step": 11863 }, { "epoch": 0.5885212560146833, "grad_norm": 7.381965637207031, "learning_rate": 3.69038928626323e-06, "loss": 0.2894, "step": 11864 }, { "epoch": 0.5885708616498835, "grad_norm": 4.435610294342041, "learning_rate": 3.689629690192803e-06, "loss": 0.3038, "step": 11865 }, { "epoch": 0.5886204672850835, "grad_norm": 6.549757480621338, "learning_rate": 3.6888701265939427e-06, "loss": 0.382, "step": 11866 }, { "epoch": 0.5886700729202837, "grad_norm": 7.985335350036621, "learning_rate": 3.6881105954854756e-06, "loss": 0.293, "step": 11867 }, { "epoch": 0.5887196785554839, "grad_norm": 7.1913957595825195, "learning_rate": 3.6873510968862193e-06, "loss": 0.2274, "step": 11868 }, { "epoch": 0.5887692841906841, "grad_norm": 4.772411346435547, "learning_rate": 3.686591630814999e-06, "loss": 0.1845, "step": 11869 }, { "epoch": 0.5888188898258843, "grad_norm": 5.010271072387695, "learning_rate": 3.685832197290632e-06, "loss": 0.267, "step": 11870 }, { "epoch": 0.5888684954610843, "grad_norm": 5.806896209716797, "learning_rate": 3.6850727963319354e-06, "loss": 0.25, "step": 11871 }, { "epoch": 0.5889181010962845, "grad_norm": 7.886273384094238, "learning_rate": 3.684313427957732e-06, "loss": 0.2308, "step": 11872 }, { "epoch": 0.5889677067314847, "grad_norm": 9.103680610656738, "learning_rate": 3.6835540921868374e-06, "loss": 0.298, "step": 11873 }, { "epoch": 0.5890173123666849, "grad_norm": 13.308272361755371, "learning_rate": 3.682794789038065e-06, "loss": 0.4172, "step": 11874 }, { "epoch": 0.589066918001885, "grad_norm": 10.159418106079102, "learning_rate": 3.682035518530236e-06, "loss": 0.3532, "step": 11875 }, { "epoch": 0.5891165236370852, "grad_norm": 4.018378734588623, "learning_rate": 3.6812762806821632e-06, "loss": 0.2583, "step": 11876 }, { "epoch": 0.5891661292722853, "grad_norm": 8.352696418762207, "learning_rate": 3.6805170755126586e-06, "loss": 0.288, "step": 11877 }, { "epoch": 0.5892157349074855, "grad_norm": 6.37325382232666, "learning_rate": 3.6797579030405396e-06, "loss": 0.2963, "step": 11878 }, { "epoch": 0.5892653405426856, "grad_norm": 5.374775409698486, "learning_rate": 3.678998763284616e-06, "loss": 0.1947, "step": 11879 }, { "epoch": 0.5893149461778858, "grad_norm": 6.445427417755127, "learning_rate": 3.6782396562637e-06, "loss": 0.2532, "step": 11880 }, { "epoch": 0.589364551813086, "grad_norm": 7.500645160675049, "learning_rate": 3.677480581996605e-06, "loss": 0.3298, "step": 11881 }, { "epoch": 0.5894141574482862, "grad_norm": 5.570127964019775, "learning_rate": 3.676721540502138e-06, "loss": 0.2819, "step": 11882 }, { "epoch": 0.5894637630834862, "grad_norm": 5.095396041870117, "learning_rate": 3.675962531799111e-06, "loss": 0.2733, "step": 11883 }, { "epoch": 0.5895133687186864, "grad_norm": 7.4795074462890625, "learning_rate": 3.67520355590633e-06, "loss": 0.3092, "step": 11884 }, { "epoch": 0.5895629743538866, "grad_norm": 7.754242897033691, "learning_rate": 3.674444612842606e-06, "loss": 0.3522, "step": 11885 }, { "epoch": 0.5896125799890868, "grad_norm": 5.900182247161865, "learning_rate": 3.6736857026267426e-06, "loss": 0.3342, "step": 11886 }, { "epoch": 0.589662185624287, "grad_norm": 7.0623931884765625, "learning_rate": 3.6729268252775475e-06, "loss": 0.2286, "step": 11887 }, { "epoch": 0.589711791259487, "grad_norm": 5.888876438140869, "learning_rate": 3.672167980813826e-06, "loss": 0.2056, "step": 11888 }, { "epoch": 0.5897613968946872, "grad_norm": 5.829260349273682, "learning_rate": 3.671409169254383e-06, "loss": 0.258, "step": 11889 }, { "epoch": 0.5898110025298874, "grad_norm": 8.440779685974121, "learning_rate": 3.6706503906180224e-06, "loss": 0.3081, "step": 11890 }, { "epoch": 0.5898606081650876, "grad_norm": 7.5629801750183105, "learning_rate": 3.669891644923545e-06, "loss": 0.2742, "step": 11891 }, { "epoch": 0.5899102138002877, "grad_norm": 6.214521884918213, "learning_rate": 3.669132932189755e-06, "loss": 0.1852, "step": 11892 }, { "epoch": 0.5899598194354879, "grad_norm": 8.12473201751709, "learning_rate": 3.668374252435454e-06, "loss": 0.3148, "step": 11893 }, { "epoch": 0.590009425070688, "grad_norm": 9.941009521484375, "learning_rate": 3.6676156056794386e-06, "loss": 0.2993, "step": 11894 }, { "epoch": 0.5900590307058882, "grad_norm": 6.349015712738037, "learning_rate": 3.666856991940514e-06, "loss": 0.2107, "step": 11895 }, { "epoch": 0.5901086363410883, "grad_norm": 19.24252700805664, "learning_rate": 3.666098411237475e-06, "loss": 0.3512, "step": 11896 }, { "epoch": 0.5901582419762885, "grad_norm": 8.662467956542969, "learning_rate": 3.66533986358912e-06, "loss": 0.3903, "step": 11897 }, { "epoch": 0.5902078476114887, "grad_norm": 15.511611938476562, "learning_rate": 3.664581349014249e-06, "loss": 0.3604, "step": 11898 }, { "epoch": 0.5902574532466889, "grad_norm": 11.669614791870117, "learning_rate": 3.6638228675316555e-06, "loss": 0.3825, "step": 11899 }, { "epoch": 0.590307058881889, "grad_norm": 15.263090133666992, "learning_rate": 3.663064419160134e-06, "loss": 0.4067, "step": 11900 }, { "epoch": 0.5903566645170891, "grad_norm": 4.673924922943115, "learning_rate": 3.662306003918483e-06, "loss": 0.2529, "step": 11901 }, { "epoch": 0.5904062701522893, "grad_norm": 7.090181350708008, "learning_rate": 3.661547621825494e-06, "loss": 0.3146, "step": 11902 }, { "epoch": 0.5904558757874895, "grad_norm": 6.953658103942871, "learning_rate": 3.6607892728999596e-06, "loss": 0.3178, "step": 11903 }, { "epoch": 0.5905054814226897, "grad_norm": 6.743746280670166, "learning_rate": 3.6600309571606733e-06, "loss": 0.3608, "step": 11904 }, { "epoch": 0.5905550870578897, "grad_norm": 7.711552143096924, "learning_rate": 3.659272674626425e-06, "loss": 0.3348, "step": 11905 }, { "epoch": 0.5906046926930899, "grad_norm": 4.126347541809082, "learning_rate": 3.6585144253160076e-06, "loss": 0.2187, "step": 11906 }, { "epoch": 0.5906542983282901, "grad_norm": 15.588701248168945, "learning_rate": 3.65775620924821e-06, "loss": 0.3335, "step": 11907 }, { "epoch": 0.5907039039634903, "grad_norm": 5.924596786499023, "learning_rate": 3.656998026441819e-06, "loss": 0.2133, "step": 11908 }, { "epoch": 0.5907535095986904, "grad_norm": 8.55700397491455, "learning_rate": 3.656239876915626e-06, "loss": 0.2903, "step": 11909 }, { "epoch": 0.5908031152338906, "grad_norm": 7.00544548034668, "learning_rate": 3.655481760688416e-06, "loss": 0.3915, "step": 11910 }, { "epoch": 0.5908527208690907, "grad_norm": 5.372525691986084, "learning_rate": 3.6547236777789753e-06, "loss": 0.2372, "step": 11911 }, { "epoch": 0.5909023265042909, "grad_norm": 7.096423149108887, "learning_rate": 3.653965628206092e-06, "loss": 0.3016, "step": 11912 }, { "epoch": 0.590951932139491, "grad_norm": 5.332462787628174, "learning_rate": 3.6532076119885495e-06, "loss": 0.3029, "step": 11913 }, { "epoch": 0.5910015377746912, "grad_norm": 5.5614399909973145, "learning_rate": 3.6524496291451297e-06, "loss": 0.2659, "step": 11914 }, { "epoch": 0.5910511434098914, "grad_norm": 10.190068244934082, "learning_rate": 3.651691679694619e-06, "loss": 0.2802, "step": 11915 }, { "epoch": 0.5911007490450915, "grad_norm": 8.333039283752441, "learning_rate": 3.6509337636557986e-06, "loss": 0.335, "step": 11916 }, { "epoch": 0.5911503546802916, "grad_norm": 4.261068344116211, "learning_rate": 3.6501758810474473e-06, "loss": 0.2446, "step": 11917 }, { "epoch": 0.5911999603154918, "grad_norm": 7.414557456970215, "learning_rate": 3.649418031888351e-06, "loss": 0.2954, "step": 11918 }, { "epoch": 0.591249565950692, "grad_norm": 4.1078877449035645, "learning_rate": 3.648660216197287e-06, "loss": 0.2389, "step": 11919 }, { "epoch": 0.5912991715858922, "grad_norm": 10.18527889251709, "learning_rate": 3.6479024339930314e-06, "loss": 0.4001, "step": 11920 }, { "epoch": 0.5913487772210924, "grad_norm": 8.637131690979004, "learning_rate": 3.647144685294368e-06, "loss": 0.3593, "step": 11921 }, { "epoch": 0.5913983828562924, "grad_norm": 6.9704976081848145, "learning_rate": 3.646386970120068e-06, "loss": 0.3177, "step": 11922 }, { "epoch": 0.5914479884914926, "grad_norm": 8.44443130493164, "learning_rate": 3.6456292884889144e-06, "loss": 0.2156, "step": 11923 }, { "epoch": 0.5914975941266928, "grad_norm": 8.841656684875488, "learning_rate": 3.6448716404196795e-06, "loss": 0.3369, "step": 11924 }, { "epoch": 0.591547199761893, "grad_norm": 3.5029494762420654, "learning_rate": 3.644114025931136e-06, "loss": 0.233, "step": 11925 }, { "epoch": 0.5915968053970931, "grad_norm": 4.862738132476807, "learning_rate": 3.6433564450420624e-06, "loss": 0.2221, "step": 11926 }, { "epoch": 0.5916464110322933, "grad_norm": 13.20947265625, "learning_rate": 3.6425988977712296e-06, "loss": 0.4769, "step": 11927 }, { "epoch": 0.5916960166674934, "grad_norm": 4.710252285003662, "learning_rate": 3.641841384137408e-06, "loss": 0.2409, "step": 11928 }, { "epoch": 0.5917456223026936, "grad_norm": 8.588932037353516, "learning_rate": 3.6410839041593737e-06, "loss": 0.3136, "step": 11929 }, { "epoch": 0.5917952279378937, "grad_norm": 6.8565497398376465, "learning_rate": 3.640326457855895e-06, "loss": 0.3037, "step": 11930 }, { "epoch": 0.5918448335730939, "grad_norm": 8.893604278564453, "learning_rate": 3.6395690452457388e-06, "loss": 0.3103, "step": 11931 }, { "epoch": 0.5918944392082941, "grad_norm": 6.207949161529541, "learning_rate": 3.6388116663476803e-06, "loss": 0.1655, "step": 11932 }, { "epoch": 0.5919440448434942, "grad_norm": 11.933550834655762, "learning_rate": 3.638054321180483e-06, "loss": 0.3037, "step": 11933 }, { "epoch": 0.5919936504786943, "grad_norm": 5.625124931335449, "learning_rate": 3.6372970097629153e-06, "loss": 0.2015, "step": 11934 }, { "epoch": 0.5920432561138945, "grad_norm": 8.450048446655273, "learning_rate": 3.636539732113745e-06, "loss": 0.2572, "step": 11935 }, { "epoch": 0.5920928617490947, "grad_norm": 5.610557556152344, "learning_rate": 3.635782488251737e-06, "loss": 0.2795, "step": 11936 }, { "epoch": 0.5921424673842949, "grad_norm": 9.437423706054688, "learning_rate": 3.6350252781956552e-06, "loss": 0.2675, "step": 11937 }, { "epoch": 0.5921920730194951, "grad_norm": 7.407161712646484, "learning_rate": 3.6342681019642643e-06, "loss": 0.324, "step": 11938 }, { "epoch": 0.5922416786546951, "grad_norm": 5.892401218414307, "learning_rate": 3.633510959576328e-06, "loss": 0.2549, "step": 11939 }, { "epoch": 0.5922912842898953, "grad_norm": 7.25777530670166, "learning_rate": 3.6327538510506087e-06, "loss": 0.3143, "step": 11940 }, { "epoch": 0.5923408899250955, "grad_norm": 5.405200958251953, "learning_rate": 3.631996776405867e-06, "loss": 0.2688, "step": 11941 }, { "epoch": 0.5923904955602957, "grad_norm": 9.360905647277832, "learning_rate": 3.6312397356608643e-06, "loss": 0.3503, "step": 11942 }, { "epoch": 0.5924401011954958, "grad_norm": 5.5869975090026855, "learning_rate": 3.630482728834359e-06, "loss": 0.262, "step": 11943 }, { "epoch": 0.592489706830696, "grad_norm": 8.111056327819824, "learning_rate": 3.629725755945113e-06, "loss": 0.3161, "step": 11944 }, { "epoch": 0.5925393124658961, "grad_norm": 8.597941398620605, "learning_rate": 3.628968817011882e-06, "loss": 0.4012, "step": 11945 }, { "epoch": 0.5925889181010963, "grad_norm": 3.9181079864501953, "learning_rate": 3.628211912053423e-06, "loss": 0.2028, "step": 11946 }, { "epoch": 0.5926385237362964, "grad_norm": 5.8238091468811035, "learning_rate": 3.6274550410884945e-06, "loss": 0.2363, "step": 11947 }, { "epoch": 0.5926881293714966, "grad_norm": 5.879827499389648, "learning_rate": 3.6266982041358495e-06, "loss": 0.2928, "step": 11948 }, { "epoch": 0.5927377350066968, "grad_norm": 12.509892463684082, "learning_rate": 3.6259414012142456e-06, "loss": 0.294, "step": 11949 }, { "epoch": 0.5927873406418969, "grad_norm": 10.324999809265137, "learning_rate": 3.625184632342436e-06, "loss": 0.3662, "step": 11950 }, { "epoch": 0.592836946277097, "grad_norm": 12.941465377807617, "learning_rate": 3.6244278975391706e-06, "loss": 0.4331, "step": 11951 }, { "epoch": 0.5928865519122972, "grad_norm": 3.9996259212493896, "learning_rate": 3.623671196823206e-06, "loss": 0.2407, "step": 11952 }, { "epoch": 0.5929361575474974, "grad_norm": 6.707558631896973, "learning_rate": 3.6229145302132923e-06, "loss": 0.2727, "step": 11953 }, { "epoch": 0.5929857631826976, "grad_norm": 5.488124370574951, "learning_rate": 3.6221578977281767e-06, "loss": 0.3284, "step": 11954 }, { "epoch": 0.5930353688178978, "grad_norm": 9.307931900024414, "learning_rate": 3.621401299386614e-06, "loss": 0.2108, "step": 11955 }, { "epoch": 0.5930849744530978, "grad_norm": 5.320235252380371, "learning_rate": 3.6206447352073514e-06, "loss": 0.2911, "step": 11956 }, { "epoch": 0.593134580088298, "grad_norm": 4.420381546020508, "learning_rate": 3.6198882052091334e-06, "loss": 0.2571, "step": 11957 }, { "epoch": 0.5931841857234982, "grad_norm": 8.11988353729248, "learning_rate": 3.6191317094107127e-06, "loss": 0.3057, "step": 11958 }, { "epoch": 0.5932337913586984, "grad_norm": 5.622486591339111, "learning_rate": 3.6183752478308297e-06, "loss": 0.2338, "step": 11959 }, { "epoch": 0.5932833969938985, "grad_norm": 5.462512016296387, "learning_rate": 3.617618820488236e-06, "loss": 0.1472, "step": 11960 }, { "epoch": 0.5933330026290987, "grad_norm": 11.011625289916992, "learning_rate": 3.616862427401673e-06, "loss": 0.3778, "step": 11961 }, { "epoch": 0.5933826082642988, "grad_norm": 12.242464065551758, "learning_rate": 3.616106068589883e-06, "loss": 0.4349, "step": 11962 }, { "epoch": 0.593432213899499, "grad_norm": 6.578057289123535, "learning_rate": 3.6153497440716124e-06, "loss": 0.3359, "step": 11963 }, { "epoch": 0.5934818195346991, "grad_norm": 8.431621551513672, "learning_rate": 3.6145934538656016e-06, "loss": 0.3133, "step": 11964 }, { "epoch": 0.5935314251698993, "grad_norm": 7.245099067687988, "learning_rate": 3.613837197990589e-06, "loss": 0.333, "step": 11965 }, { "epoch": 0.5935810308050995, "grad_norm": 4.114533424377441, "learning_rate": 3.6130809764653203e-06, "loss": 0.212, "step": 11966 }, { "epoch": 0.5936306364402996, "grad_norm": 6.012314319610596, "learning_rate": 3.6123247893085323e-06, "loss": 0.3575, "step": 11967 }, { "epoch": 0.5936802420754997, "grad_norm": 9.953532218933105, "learning_rate": 3.611568636538961e-06, "loss": 0.2418, "step": 11968 }, { "epoch": 0.5937298477106999, "grad_norm": 6.754824638366699, "learning_rate": 3.61081251817535e-06, "loss": 0.1798, "step": 11969 }, { "epoch": 0.5937794533459001, "grad_norm": 7.97776460647583, "learning_rate": 3.610056434236433e-06, "loss": 0.2481, "step": 11970 }, { "epoch": 0.5938290589811003, "grad_norm": 5.796882629394531, "learning_rate": 3.6093003847409434e-06, "loss": 0.2787, "step": 11971 }, { "epoch": 0.5938786646163005, "grad_norm": 10.096419334411621, "learning_rate": 3.608544369707623e-06, "loss": 0.3814, "step": 11972 }, { "epoch": 0.5939282702515005, "grad_norm": 6.8440656661987305, "learning_rate": 3.607788389155201e-06, "loss": 0.3475, "step": 11973 }, { "epoch": 0.5939778758867007, "grad_norm": 7.796276569366455, "learning_rate": 3.6070324431024114e-06, "loss": 0.3065, "step": 11974 }, { "epoch": 0.5940274815219009, "grad_norm": 19.08188819885254, "learning_rate": 3.606276531567989e-06, "loss": 0.3959, "step": 11975 }, { "epoch": 0.5940770871571011, "grad_norm": 6.148930549621582, "learning_rate": 3.6055206545706632e-06, "loss": 0.2571, "step": 11976 }, { "epoch": 0.5941266927923012, "grad_norm": 10.271753311157227, "learning_rate": 3.6047648121291687e-06, "loss": 0.3883, "step": 11977 }, { "epoch": 0.5941762984275014, "grad_norm": 5.640108108520508, "learning_rate": 3.604009004262233e-06, "loss": 0.2958, "step": 11978 }, { "epoch": 0.5942259040627015, "grad_norm": 6.110240936279297, "learning_rate": 3.603253230988584e-06, "loss": 0.3134, "step": 11979 }, { "epoch": 0.5942755096979017, "grad_norm": 12.853639602661133, "learning_rate": 3.6024974923269535e-06, "loss": 0.3582, "step": 11980 }, { "epoch": 0.5943251153331018, "grad_norm": 7.728140830993652, "learning_rate": 3.601741788296067e-06, "loss": 0.3494, "step": 11981 }, { "epoch": 0.594374720968302, "grad_norm": 11.138729095458984, "learning_rate": 3.60098611891465e-06, "loss": 0.463, "step": 11982 }, { "epoch": 0.5944243266035022, "grad_norm": 4.982840538024902, "learning_rate": 3.600230484201431e-06, "loss": 0.2602, "step": 11983 }, { "epoch": 0.5944739322387023, "grad_norm": 8.060646057128906, "learning_rate": 3.5994748841751346e-06, "loss": 0.3477, "step": 11984 }, { "epoch": 0.5945235378739024, "grad_norm": 12.917595863342285, "learning_rate": 3.5987193188544817e-06, "loss": 0.4883, "step": 11985 }, { "epoch": 0.5945731435091026, "grad_norm": 4.686222553253174, "learning_rate": 3.5979637882582e-06, "loss": 0.2945, "step": 11986 }, { "epoch": 0.5946227491443028, "grad_norm": 9.75456714630127, "learning_rate": 3.597208292405009e-06, "loss": 0.4007, "step": 11987 }, { "epoch": 0.594672354779503, "grad_norm": 5.436427593231201, "learning_rate": 3.5964528313136304e-06, "loss": 0.2644, "step": 11988 }, { "epoch": 0.5947219604147032, "grad_norm": 4.578980922698975, "learning_rate": 3.5956974050027875e-06, "loss": 0.2012, "step": 11989 }, { "epoch": 0.5947715660499032, "grad_norm": 5.083197593688965, "learning_rate": 3.5949420134911964e-06, "loss": 0.3334, "step": 11990 }, { "epoch": 0.5948211716851034, "grad_norm": 5.9382500648498535, "learning_rate": 3.594186656797578e-06, "loss": 0.3019, "step": 11991 }, { "epoch": 0.5948707773203036, "grad_norm": 11.395363807678223, "learning_rate": 3.59343133494065e-06, "loss": 0.3531, "step": 11992 }, { "epoch": 0.5949203829555038, "grad_norm": 5.9068217277526855, "learning_rate": 3.592676047939131e-06, "loss": 0.2645, "step": 11993 }, { "epoch": 0.5949699885907039, "grad_norm": 6.064560890197754, "learning_rate": 3.591920795811736e-06, "loss": 0.2944, "step": 11994 }, { "epoch": 0.5950195942259041, "grad_norm": 5.5876946449279785, "learning_rate": 3.591165578577179e-06, "loss": 0.2852, "step": 11995 }, { "epoch": 0.5950691998611042, "grad_norm": 4.438133716583252, "learning_rate": 3.5904103962541776e-06, "loss": 0.2603, "step": 11996 }, { "epoch": 0.5951188054963044, "grad_norm": 5.539375305175781, "learning_rate": 3.5896552488614435e-06, "loss": 0.2056, "step": 11997 }, { "epoch": 0.5951684111315045, "grad_norm": 4.890854358673096, "learning_rate": 3.5889001364176916e-06, "loss": 0.2172, "step": 11998 }, { "epoch": 0.5952180167667047, "grad_norm": 5.630950450897217, "learning_rate": 3.5881450589416315e-06, "loss": 0.2595, "step": 11999 }, { "epoch": 0.5952676224019049, "grad_norm": 7.333557605743408, "learning_rate": 3.5873900164519755e-06, "loss": 0.3186, "step": 12000 }, { "epoch": 0.595317228037105, "grad_norm": 4.210605621337891, "learning_rate": 3.586635008967435e-06, "loss": 0.2205, "step": 12001 }, { "epoch": 0.5953668336723051, "grad_norm": 7.106311321258545, "learning_rate": 3.585880036506717e-06, "loss": 0.3317, "step": 12002 }, { "epoch": 0.5954164393075053, "grad_norm": 6.808037281036377, "learning_rate": 3.5851250990885318e-06, "loss": 0.294, "step": 12003 }, { "epoch": 0.5954660449427055, "grad_norm": 9.028541564941406, "learning_rate": 3.584370196731587e-06, "loss": 0.2799, "step": 12004 }, { "epoch": 0.5955156505779057, "grad_norm": 9.93371295928955, "learning_rate": 3.5836153294545872e-06, "loss": 0.3402, "step": 12005 }, { "epoch": 0.5955652562131059, "grad_norm": 7.072675704956055, "learning_rate": 3.5828604972762426e-06, "loss": 0.4229, "step": 12006 }, { "epoch": 0.5956148618483059, "grad_norm": 5.553213596343994, "learning_rate": 3.582105700215256e-06, "loss": 0.3484, "step": 12007 }, { "epoch": 0.5956644674835061, "grad_norm": 6.895506858825684, "learning_rate": 3.5813509382903293e-06, "loss": 0.2643, "step": 12008 }, { "epoch": 0.5957140731187063, "grad_norm": 7.0535759925842285, "learning_rate": 3.5805962115201705e-06, "loss": 0.1509, "step": 12009 }, { "epoch": 0.5957636787539065, "grad_norm": 5.4695868492126465, "learning_rate": 3.5798415199234793e-06, "loss": 0.2363, "step": 12010 }, { "epoch": 0.5958132843891066, "grad_norm": 5.673434257507324, "learning_rate": 3.5790868635189555e-06, "loss": 0.2688, "step": 12011 }, { "epoch": 0.5958628900243068, "grad_norm": 7.669707775115967, "learning_rate": 3.5783322423253042e-06, "loss": 0.312, "step": 12012 }, { "epoch": 0.5959124956595069, "grad_norm": 8.472294807434082, "learning_rate": 3.5775776563612207e-06, "loss": 0.4295, "step": 12013 }, { "epoch": 0.5959621012947071, "grad_norm": 11.588003158569336, "learning_rate": 3.5768231056454083e-06, "loss": 0.3828, "step": 12014 }, { "epoch": 0.5960117069299072, "grad_norm": 7.466774940490723, "learning_rate": 3.5760685901965643e-06, "loss": 0.349, "step": 12015 }, { "epoch": 0.5960613125651074, "grad_norm": 10.178526878356934, "learning_rate": 3.575314110033381e-06, "loss": 0.4282, "step": 12016 }, { "epoch": 0.5961109182003076, "grad_norm": 6.6935625076293945, "learning_rate": 3.574559665174562e-06, "loss": 0.3226, "step": 12017 }, { "epoch": 0.5961605238355077, "grad_norm": 5.015941619873047, "learning_rate": 3.573805255638798e-06, "loss": 0.2957, "step": 12018 }, { "epoch": 0.5962101294707078, "grad_norm": 5.360622406005859, "learning_rate": 3.573050881444784e-06, "loss": 0.2873, "step": 12019 }, { "epoch": 0.596259735105908, "grad_norm": 6.981957912445068, "learning_rate": 3.5722965426112156e-06, "loss": 0.2893, "step": 12020 }, { "epoch": 0.5963093407411082, "grad_norm": 8.927078247070312, "learning_rate": 3.5715422391567854e-06, "loss": 0.4059, "step": 12021 }, { "epoch": 0.5963589463763084, "grad_norm": 6.281236171722412, "learning_rate": 3.5707879711001827e-06, "loss": 0.2425, "step": 12022 }, { "epoch": 0.5964085520115086, "grad_norm": 10.015690803527832, "learning_rate": 3.5700337384601027e-06, "loss": 0.2943, "step": 12023 }, { "epoch": 0.5964581576467086, "grad_norm": 26.96123695373535, "learning_rate": 3.5692795412552327e-06, "loss": 0.3839, "step": 12024 }, { "epoch": 0.5965077632819088, "grad_norm": 6.426826477050781, "learning_rate": 3.5685253795042616e-06, "loss": 0.2691, "step": 12025 }, { "epoch": 0.596557368917109, "grad_norm": 6.2594804763793945, "learning_rate": 3.5677712532258806e-06, "loss": 0.302, "step": 12026 }, { "epoch": 0.5966069745523092, "grad_norm": 8.551894187927246, "learning_rate": 3.5670171624387763e-06, "loss": 0.3695, "step": 12027 }, { "epoch": 0.5966565801875093, "grad_norm": 22.533185958862305, "learning_rate": 3.5662631071616327e-06, "loss": 0.4594, "step": 12028 }, { "epoch": 0.5967061858227095, "grad_norm": 13.244804382324219, "learning_rate": 3.56550908741314e-06, "loss": 0.4798, "step": 12029 }, { "epoch": 0.5967557914579096, "grad_norm": 10.486337661743164, "learning_rate": 3.564755103211979e-06, "loss": 0.3962, "step": 12030 }, { "epoch": 0.5968053970931098, "grad_norm": 7.4478559494018555, "learning_rate": 3.5640011545768373e-06, "loss": 0.3394, "step": 12031 }, { "epoch": 0.5968550027283099, "grad_norm": 7.612657070159912, "learning_rate": 3.563247241526397e-06, "loss": 0.3703, "step": 12032 }, { "epoch": 0.5969046083635101, "grad_norm": 11.364404678344727, "learning_rate": 3.562493364079338e-06, "loss": 0.2982, "step": 12033 }, { "epoch": 0.5969542139987103, "grad_norm": 6.2926764488220215, "learning_rate": 3.5617395222543455e-06, "loss": 0.3411, "step": 12034 }, { "epoch": 0.5970038196339104, "grad_norm": 6.561472415924072, "learning_rate": 3.5609857160700985e-06, "loss": 0.3661, "step": 12035 }, { "epoch": 0.5970534252691105, "grad_norm": 13.678047180175781, "learning_rate": 3.560231945545274e-06, "loss": 0.3689, "step": 12036 }, { "epoch": 0.5971030309043107, "grad_norm": 4.664301872253418, "learning_rate": 3.559478210698555e-06, "loss": 0.2618, "step": 12037 }, { "epoch": 0.5971526365395109, "grad_norm": 8.91763973236084, "learning_rate": 3.558724511548618e-06, "loss": 0.4557, "step": 12038 }, { "epoch": 0.5972022421747111, "grad_norm": 12.043627738952637, "learning_rate": 3.5579708481141375e-06, "loss": 0.2915, "step": 12039 }, { "epoch": 0.5972518478099113, "grad_norm": 6.115500450134277, "learning_rate": 3.557217220413794e-06, "loss": 0.2872, "step": 12040 }, { "epoch": 0.5973014534451113, "grad_norm": 5.840587615966797, "learning_rate": 3.556463628466259e-06, "loss": 0.2794, "step": 12041 }, { "epoch": 0.5973510590803115, "grad_norm": 12.915740013122559, "learning_rate": 3.555710072290208e-06, "loss": 0.3508, "step": 12042 }, { "epoch": 0.5974006647155117, "grad_norm": 4.837343692779541, "learning_rate": 3.554956551904316e-06, "loss": 0.2525, "step": 12043 }, { "epoch": 0.5974502703507119, "grad_norm": 5.859275817871094, "learning_rate": 3.5542030673272537e-06, "loss": 0.2658, "step": 12044 }, { "epoch": 0.597499875985912, "grad_norm": 21.32600212097168, "learning_rate": 3.5534496185776933e-06, "loss": 0.4383, "step": 12045 }, { "epoch": 0.5975494816211122, "grad_norm": 14.262266159057617, "learning_rate": 3.552696205674305e-06, "loss": 0.3462, "step": 12046 }, { "epoch": 0.5975990872563123, "grad_norm": 12.449713706970215, "learning_rate": 3.5519428286357607e-06, "loss": 0.4287, "step": 12047 }, { "epoch": 0.5976486928915125, "grad_norm": 4.65377140045166, "learning_rate": 3.5511894874807277e-06, "loss": 0.3099, "step": 12048 }, { "epoch": 0.5976982985267126, "grad_norm": 18.373029708862305, "learning_rate": 3.5504361822278744e-06, "loss": 0.2155, "step": 12049 }, { "epoch": 0.5977479041619128, "grad_norm": 6.019840240478516, "learning_rate": 3.549682912895868e-06, "loss": 0.2872, "step": 12050 }, { "epoch": 0.597797509797113, "grad_norm": 14.326711654663086, "learning_rate": 3.5489296795033762e-06, "loss": 0.2718, "step": 12051 }, { "epoch": 0.5978471154323131, "grad_norm": 17.385784149169922, "learning_rate": 3.548176482069062e-06, "loss": 0.2957, "step": 12052 }, { "epoch": 0.5978967210675132, "grad_norm": 7.6747846603393555, "learning_rate": 3.5474233206115917e-06, "loss": 0.2941, "step": 12053 }, { "epoch": 0.5979463267027134, "grad_norm": 7.280041694641113, "learning_rate": 3.5466701951496286e-06, "loss": 0.217, "step": 12054 }, { "epoch": 0.5979959323379136, "grad_norm": 9.040243148803711, "learning_rate": 3.545917105701836e-06, "loss": 0.3601, "step": 12055 }, { "epoch": 0.5980455379731138, "grad_norm": 6.257031440734863, "learning_rate": 3.545164052286875e-06, "loss": 0.3741, "step": 12056 }, { "epoch": 0.598095143608314, "grad_norm": 5.830136775970459, "learning_rate": 3.5444110349234064e-06, "loss": 0.3015, "step": 12057 }, { "epoch": 0.598144749243514, "grad_norm": 6.568459510803223, "learning_rate": 3.543658053630091e-06, "loss": 0.265, "step": 12058 }, { "epoch": 0.5981943548787142, "grad_norm": 6.606704235076904, "learning_rate": 3.5429051084255874e-06, "loss": 0.2144, "step": 12059 }, { "epoch": 0.5982439605139144, "grad_norm": 4.282029151916504, "learning_rate": 3.5421521993285557e-06, "loss": 0.2201, "step": 12060 }, { "epoch": 0.5982935661491146, "grad_norm": 8.127429008483887, "learning_rate": 3.541399326357652e-06, "loss": 0.3886, "step": 12061 }, { "epoch": 0.5983431717843147, "grad_norm": 4.291781425476074, "learning_rate": 3.54064648953153e-06, "loss": 0.1859, "step": 12062 }, { "epoch": 0.5983927774195149, "grad_norm": 5.532707214355469, "learning_rate": 3.5398936888688506e-06, "loss": 0.3225, "step": 12063 }, { "epoch": 0.598442383054715, "grad_norm": 6.127763271331787, "learning_rate": 3.5391409243882646e-06, "loss": 0.2577, "step": 12064 }, { "epoch": 0.5984919886899152, "grad_norm": 4.067842960357666, "learning_rate": 3.538388196108429e-06, "loss": 0.2166, "step": 12065 }, { "epoch": 0.5985415943251153, "grad_norm": 5.540229320526123, "learning_rate": 3.537635504047995e-06, "loss": 0.2533, "step": 12066 }, { "epoch": 0.5985911999603155, "grad_norm": 3.910639524459839, "learning_rate": 3.5368828482256123e-06, "loss": 0.2741, "step": 12067 }, { "epoch": 0.5986408055955157, "grad_norm": 5.69687032699585, "learning_rate": 3.536130228659937e-06, "loss": 0.2667, "step": 12068 }, { "epoch": 0.5986904112307158, "grad_norm": 5.888288974761963, "learning_rate": 3.5353776453696166e-06, "loss": 0.2862, "step": 12069 }, { "epoch": 0.5987400168659159, "grad_norm": 6.366528034210205, "learning_rate": 3.5346250983732986e-06, "loss": 0.2831, "step": 12070 }, { "epoch": 0.5987896225011161, "grad_norm": 8.35936450958252, "learning_rate": 3.5338725876896364e-06, "loss": 0.2854, "step": 12071 }, { "epoch": 0.5988392281363163, "grad_norm": 5.485378265380859, "learning_rate": 3.5331201133372734e-06, "loss": 0.329, "step": 12072 }, { "epoch": 0.5988888337715165, "grad_norm": 5.037443161010742, "learning_rate": 3.532367675334856e-06, "loss": 0.3108, "step": 12073 }, { "epoch": 0.5989384394067166, "grad_norm": 10.543755531311035, "learning_rate": 3.5316152737010333e-06, "loss": 0.2678, "step": 12074 }, { "epoch": 0.5989880450419167, "grad_norm": 7.000532150268555, "learning_rate": 3.530862908454449e-06, "loss": 0.2707, "step": 12075 }, { "epoch": 0.5990376506771169, "grad_norm": 7.57923698425293, "learning_rate": 3.530110579613744e-06, "loss": 0.2455, "step": 12076 }, { "epoch": 0.5990872563123171, "grad_norm": 5.173208236694336, "learning_rate": 3.529358287197566e-06, "loss": 0.2412, "step": 12077 }, { "epoch": 0.5991368619475173, "grad_norm": 7.769003868103027, "learning_rate": 3.528606031224554e-06, "loss": 0.3492, "step": 12078 }, { "epoch": 0.5991864675827174, "grad_norm": 6.111391544342041, "learning_rate": 3.5278538117133487e-06, "loss": 0.21, "step": 12079 }, { "epoch": 0.5992360732179176, "grad_norm": 3.7546706199645996, "learning_rate": 3.5271016286825943e-06, "loss": 0.2676, "step": 12080 }, { "epoch": 0.5992856788531177, "grad_norm": 5.191736698150635, "learning_rate": 3.5263494821509274e-06, "loss": 0.2052, "step": 12081 }, { "epoch": 0.5993352844883179, "grad_norm": 8.6690673828125, "learning_rate": 3.525597372136985e-06, "loss": 0.3009, "step": 12082 }, { "epoch": 0.599384890123518, "grad_norm": 5.195207595825195, "learning_rate": 3.5248452986594083e-06, "loss": 0.3138, "step": 12083 }, { "epoch": 0.5994344957587182, "grad_norm": 12.388768196105957, "learning_rate": 3.5240932617368304e-06, "loss": 0.4098, "step": 12084 }, { "epoch": 0.5994841013939184, "grad_norm": 5.279168128967285, "learning_rate": 3.523341261387892e-06, "loss": 0.2536, "step": 12085 }, { "epoch": 0.5995337070291185, "grad_norm": 10.290849685668945, "learning_rate": 3.5225892976312246e-06, "loss": 0.3319, "step": 12086 }, { "epoch": 0.5995833126643186, "grad_norm": 11.142820358276367, "learning_rate": 3.52183737048546e-06, "loss": 0.3858, "step": 12087 }, { "epoch": 0.5996329182995188, "grad_norm": 14.106515884399414, "learning_rate": 3.5210854799692374e-06, "loss": 0.4253, "step": 12088 }, { "epoch": 0.599682523934719, "grad_norm": 12.369729042053223, "learning_rate": 3.5203336261011846e-06, "loss": 0.3289, "step": 12089 }, { "epoch": 0.5997321295699192, "grad_norm": 11.389459609985352, "learning_rate": 3.519581808899932e-06, "loss": 0.3232, "step": 12090 }, { "epoch": 0.5997817352051193, "grad_norm": 6.385156154632568, "learning_rate": 3.5188300283841135e-06, "loss": 0.2583, "step": 12091 }, { "epoch": 0.5998313408403194, "grad_norm": 4.896692752838135, "learning_rate": 3.518078284572358e-06, "loss": 0.3422, "step": 12092 }, { "epoch": 0.5998809464755196, "grad_norm": 10.359559059143066, "learning_rate": 3.51732657748329e-06, "loss": 0.3692, "step": 12093 }, { "epoch": 0.5999305521107198, "grad_norm": 9.492865562438965, "learning_rate": 3.5165749071355427e-06, "loss": 0.1979, "step": 12094 }, { "epoch": 0.59998015774592, "grad_norm": 4.738468170166016, "learning_rate": 3.515823273547739e-06, "loss": 0.3139, "step": 12095 }, { "epoch": 0.6000297633811201, "grad_norm": 6.318220138549805, "learning_rate": 3.5150716767385058e-06, "loss": 0.2738, "step": 12096 }, { "epoch": 0.6000297633811201, "eval_loss": 0.2924826741218567, "eval_runtime": 35.5856, "eval_samples_per_second": 45.777, "eval_steps_per_second": 5.733, "step": 12096 }, { "epoch": 0.6000793690163203, "grad_norm": 9.605340003967285, "learning_rate": 3.514320116726469e-06, "loss": 0.3299, "step": 12097 }, { "epoch": 0.6001289746515204, "grad_norm": 12.066939353942871, "learning_rate": 3.513568593530251e-06, "loss": 0.3751, "step": 12098 }, { "epoch": 0.6001785802867206, "grad_norm": 6.097601890563965, "learning_rate": 3.5128171071684757e-06, "loss": 0.2916, "step": 12099 }, { "epoch": 0.6002281859219207, "grad_norm": 4.992825508117676, "learning_rate": 3.5120656576597663e-06, "loss": 0.3355, "step": 12100 }, { "epoch": 0.6002777915571209, "grad_norm": 8.106810569763184, "learning_rate": 3.5113142450227404e-06, "loss": 0.3379, "step": 12101 }, { "epoch": 0.6003273971923211, "grad_norm": 6.217286586761475, "learning_rate": 3.5105628692760237e-06, "loss": 0.3361, "step": 12102 }, { "epoch": 0.6003770028275212, "grad_norm": 8.144830703735352, "learning_rate": 3.5098115304382307e-06, "loss": 0.341, "step": 12103 }, { "epoch": 0.6004266084627213, "grad_norm": 8.490524291992188, "learning_rate": 3.509060228527983e-06, "loss": 0.2984, "step": 12104 }, { "epoch": 0.6004762140979215, "grad_norm": 5.623712539672852, "learning_rate": 3.5083089635638986e-06, "loss": 0.2703, "step": 12105 }, { "epoch": 0.6005258197331217, "grad_norm": 7.801790237426758, "learning_rate": 3.507557735564591e-06, "loss": 0.321, "step": 12106 }, { "epoch": 0.6005754253683219, "grad_norm": 7.447831630706787, "learning_rate": 3.5068065445486784e-06, "loss": 0.3025, "step": 12107 }, { "epoch": 0.600625031003522, "grad_norm": 5.221307754516602, "learning_rate": 3.506055390534775e-06, "loss": 0.3493, "step": 12108 }, { "epoch": 0.6006746366387221, "grad_norm": 8.148722648620605, "learning_rate": 3.5053042735414953e-06, "loss": 0.311, "step": 12109 }, { "epoch": 0.6007242422739223, "grad_norm": 7.884100437164307, "learning_rate": 3.504553193587451e-06, "loss": 0.3521, "step": 12110 }, { "epoch": 0.6007738479091225, "grad_norm": 9.318422317504883, "learning_rate": 3.5038021506912557e-06, "loss": 0.3331, "step": 12111 }, { "epoch": 0.6008234535443226, "grad_norm": 13.044696807861328, "learning_rate": 3.50305114487152e-06, "loss": 0.4852, "step": 12112 }, { "epoch": 0.6008730591795228, "grad_norm": 8.407564163208008, "learning_rate": 3.5023001761468523e-06, "loss": 0.3128, "step": 12113 }, { "epoch": 0.600922664814723, "grad_norm": 15.930521965026855, "learning_rate": 3.501549244535866e-06, "loss": 0.2375, "step": 12114 }, { "epoch": 0.6009722704499231, "grad_norm": 8.745824813842773, "learning_rate": 3.5007983500571673e-06, "loss": 0.2466, "step": 12115 }, { "epoch": 0.6010218760851233, "grad_norm": 5.572402477264404, "learning_rate": 3.5000474927293617e-06, "loss": 0.2873, "step": 12116 }, { "epoch": 0.6010714817203234, "grad_norm": 5.016987323760986, "learning_rate": 3.4992966725710585e-06, "loss": 0.2078, "step": 12117 }, { "epoch": 0.6011210873555236, "grad_norm": 7.894840240478516, "learning_rate": 3.498545889600862e-06, "loss": 0.3291, "step": 12118 }, { "epoch": 0.6011706929907238, "grad_norm": 6.035170555114746, "learning_rate": 3.4977951438373794e-06, "loss": 0.3614, "step": 12119 }, { "epoch": 0.6012202986259239, "grad_norm": 9.864411354064941, "learning_rate": 3.4970444352992124e-06, "loss": 0.4537, "step": 12120 }, { "epoch": 0.601269904261124, "grad_norm": 5.802935600280762, "learning_rate": 3.496293764004962e-06, "loss": 0.3427, "step": 12121 }, { "epoch": 0.6013195098963242, "grad_norm": 7.415651321411133, "learning_rate": 3.4955431299732344e-06, "loss": 0.1817, "step": 12122 }, { "epoch": 0.6013691155315244, "grad_norm": 10.908242225646973, "learning_rate": 3.4947925332226285e-06, "loss": 0.2503, "step": 12123 }, { "epoch": 0.6014187211667246, "grad_norm": 3.246551990509033, "learning_rate": 3.4940419737717428e-06, "loss": 0.2159, "step": 12124 }, { "epoch": 0.6014683268019247, "grad_norm": 6.442008018493652, "learning_rate": 3.4932914516391805e-06, "loss": 0.3521, "step": 12125 }, { "epoch": 0.6015179324371248, "grad_norm": 4.542228698730469, "learning_rate": 3.4925409668435366e-06, "loss": 0.3088, "step": 12126 }, { "epoch": 0.601567538072325, "grad_norm": 11.130040168762207, "learning_rate": 3.4917905194034085e-06, "loss": 0.4485, "step": 12127 }, { "epoch": 0.6016171437075252, "grad_norm": 6.11757230758667, "learning_rate": 3.4910401093373946e-06, "loss": 0.4023, "step": 12128 }, { "epoch": 0.6016667493427253, "grad_norm": 6.0751118659973145, "learning_rate": 3.49028973666409e-06, "loss": 0.2916, "step": 12129 }, { "epoch": 0.6017163549779255, "grad_norm": 11.050909996032715, "learning_rate": 3.489539401402087e-06, "loss": 0.2985, "step": 12130 }, { "epoch": 0.6017659606131257, "grad_norm": 9.47195053100586, "learning_rate": 3.4887891035699815e-06, "loss": 0.3035, "step": 12131 }, { "epoch": 0.6018155662483258, "grad_norm": 11.478535652160645, "learning_rate": 3.4880388431863666e-06, "loss": 0.3037, "step": 12132 }, { "epoch": 0.601865171883526, "grad_norm": 6.880842685699463, "learning_rate": 3.4872886202698307e-06, "loss": 0.2408, "step": 12133 }, { "epoch": 0.6019147775187261, "grad_norm": 4.629567623138428, "learning_rate": 3.48653843483897e-06, "loss": 0.2801, "step": 12134 }, { "epoch": 0.6019643831539263, "grad_norm": 5.645016193389893, "learning_rate": 3.4857882869123703e-06, "loss": 0.2591, "step": 12135 }, { "epoch": 0.6020139887891265, "grad_norm": 8.14920425415039, "learning_rate": 3.4850381765086205e-06, "loss": 0.3109, "step": 12136 }, { "epoch": 0.6020635944243266, "grad_norm": 5.729235649108887, "learning_rate": 3.4842881036463115e-06, "loss": 0.3376, "step": 12137 }, { "epoch": 0.6021132000595267, "grad_norm": 9.41067886352539, "learning_rate": 3.4835380683440277e-06, "loss": 0.2796, "step": 12138 }, { "epoch": 0.6021628056947269, "grad_norm": 5.880199432373047, "learning_rate": 3.4827880706203577e-06, "loss": 0.3028, "step": 12139 }, { "epoch": 0.6022124113299271, "grad_norm": 5.821329116821289, "learning_rate": 3.482038110493886e-06, "loss": 0.356, "step": 12140 }, { "epoch": 0.6022620169651273, "grad_norm": 6.655956745147705, "learning_rate": 3.481288187983195e-06, "loss": 0.22, "step": 12141 }, { "epoch": 0.6023116226003274, "grad_norm": 11.458246231079102, "learning_rate": 3.4805383031068707e-06, "loss": 0.3825, "step": 12142 }, { "epoch": 0.6023612282355275, "grad_norm": 4.9122233390808105, "learning_rate": 3.4797884558834955e-06, "loss": 0.2854, "step": 12143 }, { "epoch": 0.6024108338707277, "grad_norm": 5.133555889129639, "learning_rate": 3.479038646331647e-06, "loss": 0.2009, "step": 12144 }, { "epoch": 0.6024604395059279, "grad_norm": 4.786219120025635, "learning_rate": 3.478288874469912e-06, "loss": 0.294, "step": 12145 }, { "epoch": 0.602510045141128, "grad_norm": 5.9930949211120605, "learning_rate": 3.477539140316867e-06, "loss": 0.2707, "step": 12146 }, { "epoch": 0.6025596507763282, "grad_norm": 8.993919372558594, "learning_rate": 3.476789443891088e-06, "loss": 0.2613, "step": 12147 }, { "epoch": 0.6026092564115284, "grad_norm": 8.255716323852539, "learning_rate": 3.476039785211158e-06, "loss": 0.1848, "step": 12148 }, { "epoch": 0.6026588620467285, "grad_norm": 4.008978843688965, "learning_rate": 3.4752901642956517e-06, "loss": 0.2431, "step": 12149 }, { "epoch": 0.6027084676819287, "grad_norm": 11.440550804138184, "learning_rate": 3.474540581163142e-06, "loss": 0.4556, "step": 12150 }, { "epoch": 0.6027580733171288, "grad_norm": 5.2700886726379395, "learning_rate": 3.4737910358322097e-06, "loss": 0.3331, "step": 12151 }, { "epoch": 0.602807678952329, "grad_norm": 5.941044330596924, "learning_rate": 3.4730415283214248e-06, "loss": 0.2134, "step": 12152 }, { "epoch": 0.6028572845875292, "grad_norm": 8.570199012756348, "learning_rate": 3.472292058649361e-06, "loss": 0.2879, "step": 12153 }, { "epoch": 0.6029068902227293, "grad_norm": 6.6881513595581055, "learning_rate": 3.471542626834592e-06, "loss": 0.3257, "step": 12154 }, { "epoch": 0.6029564958579294, "grad_norm": 4.609913349151611, "learning_rate": 3.470793232895686e-06, "loss": 0.2702, "step": 12155 }, { "epoch": 0.6030061014931296, "grad_norm": 5.062346458435059, "learning_rate": 3.470043876851218e-06, "loss": 0.2232, "step": 12156 }, { "epoch": 0.6030557071283298, "grad_norm": 5.03138542175293, "learning_rate": 3.4692945587197535e-06, "loss": 0.2258, "step": 12157 }, { "epoch": 0.60310531276353, "grad_norm": 6.091907501220703, "learning_rate": 3.4685452785198627e-06, "loss": 0.3618, "step": 12158 }, { "epoch": 0.6031549183987301, "grad_norm": 7.416131496429443, "learning_rate": 3.4677960362701134e-06, "loss": 0.2889, "step": 12159 }, { "epoch": 0.6032045240339302, "grad_norm": 5.676039218902588, "learning_rate": 3.4670468319890704e-06, "loss": 0.2811, "step": 12160 }, { "epoch": 0.6032541296691304, "grad_norm": 3.784433364868164, "learning_rate": 3.4662976656953e-06, "loss": 0.2312, "step": 12161 }, { "epoch": 0.6033037353043306, "grad_norm": 4.057555198669434, "learning_rate": 3.4655485374073673e-06, "loss": 0.1806, "step": 12162 }, { "epoch": 0.6033533409395307, "grad_norm": 6.88074254989624, "learning_rate": 3.4647994471438373e-06, "loss": 0.2131, "step": 12163 }, { "epoch": 0.6034029465747309, "grad_norm": 7.313006401062012, "learning_rate": 3.464050394923271e-06, "loss": 0.2742, "step": 12164 }, { "epoch": 0.6034525522099311, "grad_norm": 6.577213764190674, "learning_rate": 3.463301380764229e-06, "loss": 0.2985, "step": 12165 }, { "epoch": 0.6035021578451312, "grad_norm": 11.800395965576172, "learning_rate": 3.4625524046852766e-06, "loss": 0.3981, "step": 12166 }, { "epoch": 0.6035517634803313, "grad_norm": 5.90423583984375, "learning_rate": 3.4618034667049683e-06, "loss": 0.3092, "step": 12167 }, { "epoch": 0.6036013691155315, "grad_norm": 16.186851501464844, "learning_rate": 3.4610545668418677e-06, "loss": 0.6125, "step": 12168 }, { "epoch": 0.6036509747507317, "grad_norm": 6.779093265533447, "learning_rate": 3.4603057051145315e-06, "loss": 0.2595, "step": 12169 }, { "epoch": 0.6037005803859319, "grad_norm": 12.83569049835205, "learning_rate": 3.4595568815415137e-06, "loss": 0.3398, "step": 12170 }, { "epoch": 0.603750186021132, "grad_norm": 12.7854585647583, "learning_rate": 3.458808096141376e-06, "loss": 0.3592, "step": 12171 }, { "epoch": 0.6037997916563321, "grad_norm": 4.247618675231934, "learning_rate": 3.4580593489326687e-06, "loss": 0.1709, "step": 12172 }, { "epoch": 0.6038493972915323, "grad_norm": 6.745856285095215, "learning_rate": 3.45731063993395e-06, "loss": 0.3153, "step": 12173 }, { "epoch": 0.6038990029267325, "grad_norm": 5.275351524353027, "learning_rate": 3.4565619691637718e-06, "loss": 0.2297, "step": 12174 }, { "epoch": 0.6039486085619327, "grad_norm": 5.573775768280029, "learning_rate": 3.4558133366406835e-06, "loss": 0.319, "step": 12175 }, { "epoch": 0.6039982141971328, "grad_norm": 12.451326370239258, "learning_rate": 3.4550647423832417e-06, "loss": 0.3329, "step": 12176 }, { "epoch": 0.6040478198323329, "grad_norm": 4.8848981857299805, "learning_rate": 3.454316186409995e-06, "loss": 0.2783, "step": 12177 }, { "epoch": 0.6040974254675331, "grad_norm": 14.173917770385742, "learning_rate": 3.4535676687394893e-06, "loss": 0.2023, "step": 12178 }, { "epoch": 0.6041470311027333, "grad_norm": 8.104881286621094, "learning_rate": 3.452819189390279e-06, "loss": 0.2708, "step": 12179 }, { "epoch": 0.6041966367379334, "grad_norm": 7.788670539855957, "learning_rate": 3.452070748380909e-06, "loss": 0.3228, "step": 12180 }, { "epoch": 0.6042462423731336, "grad_norm": 5.1152238845825195, "learning_rate": 3.451322345729924e-06, "loss": 0.2497, "step": 12181 }, { "epoch": 0.6042958480083337, "grad_norm": 5.6356706619262695, "learning_rate": 3.4505739814558736e-06, "loss": 0.3449, "step": 12182 }, { "epoch": 0.6043454536435339, "grad_norm": 5.109768390655518, "learning_rate": 3.449825655577301e-06, "loss": 0.1924, "step": 12183 }, { "epoch": 0.604395059278734, "grad_norm": 8.698917388916016, "learning_rate": 3.4490773681127475e-06, "loss": 0.4187, "step": 12184 }, { "epoch": 0.6044446649139342, "grad_norm": 6.265610694885254, "learning_rate": 3.448329119080761e-06, "loss": 0.3101, "step": 12185 }, { "epoch": 0.6044942705491344, "grad_norm": 6.734457015991211, "learning_rate": 3.4475809084998803e-06, "loss": 0.2836, "step": 12186 }, { "epoch": 0.6045438761843346, "grad_norm": 5.462390422821045, "learning_rate": 3.446832736388645e-06, "loss": 0.3612, "step": 12187 }, { "epoch": 0.6045934818195347, "grad_norm": 5.864140510559082, "learning_rate": 3.4460846027655994e-06, "loss": 0.2705, "step": 12188 }, { "epoch": 0.6046430874547348, "grad_norm": 9.091833114624023, "learning_rate": 3.4453365076492785e-06, "loss": 0.2374, "step": 12189 }, { "epoch": 0.604692693089935, "grad_norm": 8.272453308105469, "learning_rate": 3.4445884510582235e-06, "loss": 0.3211, "step": 12190 }, { "epoch": 0.6047422987251352, "grad_norm": 11.001702308654785, "learning_rate": 3.4438404330109703e-06, "loss": 0.357, "step": 12191 }, { "epoch": 0.6047919043603354, "grad_norm": 4.6457624435424805, "learning_rate": 3.4430924535260534e-06, "loss": 0.1861, "step": 12192 }, { "epoch": 0.6048415099955355, "grad_norm": 3.481842279434204, "learning_rate": 3.4423445126220116e-06, "loss": 0.1632, "step": 12193 }, { "epoch": 0.6048911156307356, "grad_norm": 6.478178024291992, "learning_rate": 3.4415966103173777e-06, "loss": 0.2702, "step": 12194 }, { "epoch": 0.6049407212659358, "grad_norm": 9.2267427444458, "learning_rate": 3.4408487466306823e-06, "loss": 0.3826, "step": 12195 }, { "epoch": 0.604990326901136, "grad_norm": 5.803177833557129, "learning_rate": 3.4401009215804625e-06, "loss": 0.162, "step": 12196 }, { "epoch": 0.6050399325363361, "grad_norm": 5.80661153793335, "learning_rate": 3.4393531351852474e-06, "loss": 0.2638, "step": 12197 }, { "epoch": 0.6050895381715363, "grad_norm": 5.900233745574951, "learning_rate": 3.4386053874635656e-06, "loss": 0.3013, "step": 12198 }, { "epoch": 0.6051391438067364, "grad_norm": 10.842208862304688, "learning_rate": 3.437857678433951e-06, "loss": 0.3122, "step": 12199 }, { "epoch": 0.6051887494419366, "grad_norm": 12.320728302001953, "learning_rate": 3.437110008114929e-06, "loss": 0.5476, "step": 12200 }, { "epoch": 0.6052383550771367, "grad_norm": 5.234588146209717, "learning_rate": 3.4363623765250263e-06, "loss": 0.2703, "step": 12201 }, { "epoch": 0.6052879607123369, "grad_norm": 9.5606689453125, "learning_rate": 3.4356147836827735e-06, "loss": 0.3512, "step": 12202 }, { "epoch": 0.6053375663475371, "grad_norm": 5.5930585861206055, "learning_rate": 3.4348672296066937e-06, "loss": 0.2011, "step": 12203 }, { "epoch": 0.6053871719827373, "grad_norm": 8.452102661132812, "learning_rate": 3.4341197143153097e-06, "loss": 0.2406, "step": 12204 }, { "epoch": 0.6054367776179373, "grad_norm": 5.652596950531006, "learning_rate": 3.43337223782715e-06, "loss": 0.3472, "step": 12205 }, { "epoch": 0.6054863832531375, "grad_norm": 4.502270698547363, "learning_rate": 3.4326248001607335e-06, "loss": 0.2986, "step": 12206 }, { "epoch": 0.6055359888883377, "grad_norm": 5.870769023895264, "learning_rate": 3.431877401334584e-06, "loss": 0.3153, "step": 12207 }, { "epoch": 0.6055855945235379, "grad_norm": 10.441142082214355, "learning_rate": 3.4311300413672223e-06, "loss": 0.2753, "step": 12208 }, { "epoch": 0.6056352001587381, "grad_norm": 8.591368675231934, "learning_rate": 3.430382720277166e-06, "loss": 0.3445, "step": 12209 }, { "epoch": 0.6056848057939382, "grad_norm": 8.998652458190918, "learning_rate": 3.429635438082938e-06, "loss": 0.3742, "step": 12210 }, { "epoch": 0.6057344114291383, "grad_norm": 6.604259490966797, "learning_rate": 3.4288881948030534e-06, "loss": 0.1685, "step": 12211 }, { "epoch": 0.6057840170643385, "grad_norm": 10.180866241455078, "learning_rate": 3.42814099045603e-06, "loss": 0.3394, "step": 12212 }, { "epoch": 0.6058336226995387, "grad_norm": 12.012577056884766, "learning_rate": 3.4273938250603845e-06, "loss": 0.3194, "step": 12213 }, { "epoch": 0.6058832283347388, "grad_norm": 15.04316520690918, "learning_rate": 3.4266466986346303e-06, "loss": 0.3025, "step": 12214 }, { "epoch": 0.605932833969939, "grad_norm": 8.647842407226562, "learning_rate": 3.425899611197283e-06, "loss": 0.2672, "step": 12215 }, { "epoch": 0.6059824396051391, "grad_norm": 5.408130645751953, "learning_rate": 3.4251525627668553e-06, "loss": 0.2388, "step": 12216 }, { "epoch": 0.6060320452403393, "grad_norm": 4.399398326873779, "learning_rate": 3.4244055533618604e-06, "loss": 0.2872, "step": 12217 }, { "epoch": 0.6060816508755394, "grad_norm": 13.887922286987305, "learning_rate": 3.4236585830008074e-06, "loss": 0.2359, "step": 12218 }, { "epoch": 0.6061312565107396, "grad_norm": 8.522356986999512, "learning_rate": 3.422911651702208e-06, "loss": 0.2921, "step": 12219 }, { "epoch": 0.6061808621459398, "grad_norm": 4.086092472076416, "learning_rate": 3.4221647594845725e-06, "loss": 0.1877, "step": 12220 }, { "epoch": 0.60623046778114, "grad_norm": 11.75588607788086, "learning_rate": 3.4214179063664056e-06, "loss": 0.3443, "step": 12221 }, { "epoch": 0.60628007341634, "grad_norm": 16.030109405517578, "learning_rate": 3.420671092366219e-06, "loss": 0.3079, "step": 12222 }, { "epoch": 0.6063296790515402, "grad_norm": 6.030050277709961, "learning_rate": 3.4199243175025176e-06, "loss": 0.3015, "step": 12223 }, { "epoch": 0.6063792846867404, "grad_norm": 9.28581428527832, "learning_rate": 3.4191775817938042e-06, "loss": 0.2564, "step": 12224 }, { "epoch": 0.6064288903219406, "grad_norm": 4.034977436065674, "learning_rate": 3.4184308852585867e-06, "loss": 0.2024, "step": 12225 }, { "epoch": 0.6064784959571408, "grad_norm": 7.8313798904418945, "learning_rate": 3.4176842279153665e-06, "loss": 0.3158, "step": 12226 }, { "epoch": 0.6065281015923409, "grad_norm": 7.685020923614502, "learning_rate": 3.416937609782648e-06, "loss": 0.2997, "step": 12227 }, { "epoch": 0.606577707227541, "grad_norm": 4.00276517868042, "learning_rate": 3.416191030878932e-06, "loss": 0.2578, "step": 12228 }, { "epoch": 0.6066273128627412, "grad_norm": 6.550366401672363, "learning_rate": 3.415444491222717e-06, "loss": 0.3386, "step": 12229 }, { "epoch": 0.6066769184979414, "grad_norm": 5.997395038604736, "learning_rate": 3.4146979908325057e-06, "loss": 0.2892, "step": 12230 }, { "epoch": 0.6067265241331415, "grad_norm": 6.1900105476379395, "learning_rate": 3.4139515297267954e-06, "loss": 0.2775, "step": 12231 }, { "epoch": 0.6067761297683417, "grad_norm": 5.271017551422119, "learning_rate": 3.4132051079240814e-06, "loss": 0.3184, "step": 12232 }, { "epoch": 0.6068257354035418, "grad_norm": 8.550287246704102, "learning_rate": 3.4124587254428653e-06, "loss": 0.4094, "step": 12233 }, { "epoch": 0.606875341038742, "grad_norm": 24.472511291503906, "learning_rate": 3.4117123823016396e-06, "loss": 0.4819, "step": 12234 }, { "epoch": 0.6069249466739421, "grad_norm": 4.94782018661499, "learning_rate": 3.410966078518898e-06, "loss": 0.244, "step": 12235 }, { "epoch": 0.6069745523091423, "grad_norm": 11.488768577575684, "learning_rate": 3.410219814113137e-06, "loss": 0.3802, "step": 12236 }, { "epoch": 0.6070241579443425, "grad_norm": 6.662415981292725, "learning_rate": 3.4094735891028484e-06, "loss": 0.3015, "step": 12237 }, { "epoch": 0.6070737635795427, "grad_norm": 7.551517486572266, "learning_rate": 3.408727403506522e-06, "loss": 0.3918, "step": 12238 }, { "epoch": 0.6071233692147427, "grad_norm": 4.806136608123779, "learning_rate": 3.4079812573426523e-06, "loss": 0.2615, "step": 12239 }, { "epoch": 0.6071729748499429, "grad_norm": 5.35907506942749, "learning_rate": 3.4072351506297275e-06, "loss": 0.3115, "step": 12240 }, { "epoch": 0.6072225804851431, "grad_norm": 13.874096870422363, "learning_rate": 3.4064890833862337e-06, "loss": 0.3066, "step": 12241 }, { "epoch": 0.6072721861203433, "grad_norm": 10.751632690429688, "learning_rate": 3.4057430556306637e-06, "loss": 0.3249, "step": 12242 }, { "epoch": 0.6073217917555435, "grad_norm": 13.377286911010742, "learning_rate": 3.404997067381499e-06, "loss": 0.4206, "step": 12243 }, { "epoch": 0.6073713973907436, "grad_norm": 5.987766742706299, "learning_rate": 3.404251118657231e-06, "loss": 0.293, "step": 12244 }, { "epoch": 0.6074210030259437, "grad_norm": 4.377339839935303, "learning_rate": 3.403505209476342e-06, "loss": 0.2467, "step": 12245 }, { "epoch": 0.6074706086611439, "grad_norm": 6.904313087463379, "learning_rate": 3.4027593398573145e-06, "loss": 0.2523, "step": 12246 }, { "epoch": 0.6075202142963441, "grad_norm": 8.987482070922852, "learning_rate": 3.4020135098186345e-06, "loss": 0.2492, "step": 12247 }, { "epoch": 0.6075698199315442, "grad_norm": 8.284893989562988, "learning_rate": 3.4012677193787837e-06, "loss": 0.3741, "step": 12248 }, { "epoch": 0.6076194255667444, "grad_norm": 6.5403313636779785, "learning_rate": 3.4005219685562384e-06, "loss": 0.2629, "step": 12249 }, { "epoch": 0.6076690312019445, "grad_norm": 4.089576721191406, "learning_rate": 3.3997762573694857e-06, "loss": 0.2818, "step": 12250 }, { "epoch": 0.6077186368371447, "grad_norm": 5.990725517272949, "learning_rate": 3.3990305858370006e-06, "loss": 0.2828, "step": 12251 }, { "epoch": 0.6077682424723448, "grad_norm": 4.826239585876465, "learning_rate": 3.3982849539772593e-06, "loss": 0.2545, "step": 12252 }, { "epoch": 0.607817848107545, "grad_norm": 9.538763999938965, "learning_rate": 3.3975393618087437e-06, "loss": 0.3781, "step": 12253 }, { "epoch": 0.6078674537427452, "grad_norm": 11.678705215454102, "learning_rate": 3.396793809349928e-06, "loss": 0.2589, "step": 12254 }, { "epoch": 0.6079170593779454, "grad_norm": 10.325611114501953, "learning_rate": 3.3960482966192845e-06, "loss": 0.3631, "step": 12255 }, { "epoch": 0.6079666650131454, "grad_norm": 9.749505043029785, "learning_rate": 3.3953028236352917e-06, "loss": 0.3629, "step": 12256 }, { "epoch": 0.6080162706483456, "grad_norm": 6.964395523071289, "learning_rate": 3.3945573904164208e-06, "loss": 0.3173, "step": 12257 }, { "epoch": 0.6080658762835458, "grad_norm": 5.562943458557129, "learning_rate": 3.3938119969811423e-06, "loss": 0.2476, "step": 12258 }, { "epoch": 0.608115481918746, "grad_norm": 11.93132495880127, "learning_rate": 3.393066643347931e-06, "loss": 0.4278, "step": 12259 }, { "epoch": 0.6081650875539462, "grad_norm": 11.633759498596191, "learning_rate": 3.392321329535254e-06, "loss": 0.4317, "step": 12260 }, { "epoch": 0.6082146931891463, "grad_norm": 7.540884971618652, "learning_rate": 3.391576055561582e-06, "loss": 0.2925, "step": 12261 }, { "epoch": 0.6082642988243464, "grad_norm": 4.892962455749512, "learning_rate": 3.390830821445384e-06, "loss": 0.2471, "step": 12262 }, { "epoch": 0.6083139044595466, "grad_norm": 5.759237766265869, "learning_rate": 3.3900856272051234e-06, "loss": 0.2069, "step": 12263 }, { "epoch": 0.6083635100947468, "grad_norm": 7.3497138023376465, "learning_rate": 3.3893404728592715e-06, "loss": 0.2302, "step": 12264 }, { "epoch": 0.6084131157299469, "grad_norm": 6.320952415466309, "learning_rate": 3.388595358426291e-06, "loss": 0.2508, "step": 12265 }, { "epoch": 0.6084627213651471, "grad_norm": 6.919135570526123, "learning_rate": 3.3878502839246453e-06, "loss": 0.3046, "step": 12266 }, { "epoch": 0.6085123270003472, "grad_norm": 6.170307159423828, "learning_rate": 3.387105249372801e-06, "loss": 0.2678, "step": 12267 }, { "epoch": 0.6085619326355474, "grad_norm": 3.940805435180664, "learning_rate": 3.386360254789216e-06, "loss": 0.3104, "step": 12268 }, { "epoch": 0.6086115382707475, "grad_norm": 7.598642826080322, "learning_rate": 3.3856153001923547e-06, "loss": 0.3111, "step": 12269 }, { "epoch": 0.6086611439059477, "grad_norm": 6.89300012588501, "learning_rate": 3.384870385600676e-06, "loss": 0.3253, "step": 12270 }, { "epoch": 0.6087107495411479, "grad_norm": 7.917054176330566, "learning_rate": 3.384125511032641e-06, "loss": 0.259, "step": 12271 }, { "epoch": 0.6087603551763481, "grad_norm": 14.249140739440918, "learning_rate": 3.3833806765067056e-06, "loss": 0.2449, "step": 12272 }, { "epoch": 0.6088099608115481, "grad_norm": 8.741415977478027, "learning_rate": 3.3826358820413284e-06, "loss": 0.2448, "step": 12273 }, { "epoch": 0.6088595664467483, "grad_norm": 6.589578151702881, "learning_rate": 3.3818911276549672e-06, "loss": 0.3011, "step": 12274 }, { "epoch": 0.6089091720819485, "grad_norm": 6.842193603515625, "learning_rate": 3.381146413366073e-06, "loss": 0.3615, "step": 12275 }, { "epoch": 0.6089587777171487, "grad_norm": 6.275721549987793, "learning_rate": 3.3804017391931056e-06, "loss": 0.3003, "step": 12276 }, { "epoch": 0.6090083833523489, "grad_norm": 9.165011405944824, "learning_rate": 3.379657105154516e-06, "loss": 0.3331, "step": 12277 }, { "epoch": 0.609057988987549, "grad_norm": 5.3302435874938965, "learning_rate": 3.3789125112687527e-06, "loss": 0.2401, "step": 12278 }, { "epoch": 0.6091075946227491, "grad_norm": 7.010791301727295, "learning_rate": 3.3781679575542736e-06, "loss": 0.2755, "step": 12279 }, { "epoch": 0.6091572002579493, "grad_norm": 6.862645149230957, "learning_rate": 3.3774234440295254e-06, "loss": 0.3141, "step": 12280 }, { "epoch": 0.6092068058931495, "grad_norm": 4.387575149536133, "learning_rate": 3.376678970712958e-06, "loss": 0.2396, "step": 12281 }, { "epoch": 0.6092564115283496, "grad_norm": 5.190865993499756, "learning_rate": 3.375934537623021e-06, "loss": 0.2994, "step": 12282 }, { "epoch": 0.6093060171635498, "grad_norm": 8.340461730957031, "learning_rate": 3.3751901447781587e-06, "loss": 0.1923, "step": 12283 }, { "epoch": 0.6093556227987499, "grad_norm": 11.91701889038086, "learning_rate": 3.374445792196822e-06, "loss": 0.3109, "step": 12284 }, { "epoch": 0.6094052284339501, "grad_norm": 11.52563190460205, "learning_rate": 3.3737014798974542e-06, "loss": 0.3623, "step": 12285 }, { "epoch": 0.6094548340691502, "grad_norm": 7.29479455947876, "learning_rate": 3.372957207898496e-06, "loss": 0.2588, "step": 12286 }, { "epoch": 0.6095044397043504, "grad_norm": 6.302746295928955, "learning_rate": 3.3722129762183974e-06, "loss": 0.2623, "step": 12287 }, { "epoch": 0.6095540453395506, "grad_norm": 5.76970911026001, "learning_rate": 3.371468784875597e-06, "loss": 0.2181, "step": 12288 }, { "epoch": 0.6096036509747508, "grad_norm": 7.557234287261963, "learning_rate": 3.370724633888535e-06, "loss": 0.3487, "step": 12289 }, { "epoch": 0.6096532566099508, "grad_norm": 5.21614933013916, "learning_rate": 3.3699805232756555e-06, "loss": 0.2915, "step": 12290 }, { "epoch": 0.609702862245151, "grad_norm": 6.634405136108398, "learning_rate": 3.3692364530553967e-06, "loss": 0.3897, "step": 12291 }, { "epoch": 0.6097524678803512, "grad_norm": 11.357643127441406, "learning_rate": 3.3684924232461937e-06, "loss": 0.3663, "step": 12292 }, { "epoch": 0.6098020735155514, "grad_norm": 4.792917728424072, "learning_rate": 3.367748433866488e-06, "loss": 0.2927, "step": 12293 }, { "epoch": 0.6098516791507516, "grad_norm": 4.049340724945068, "learning_rate": 3.367004484934715e-06, "loss": 0.2107, "step": 12294 }, { "epoch": 0.6099012847859517, "grad_norm": 4.757158279418945, "learning_rate": 3.3662605764693066e-06, "loss": 0.229, "step": 12295 }, { "epoch": 0.6099508904211518, "grad_norm": 6.035076141357422, "learning_rate": 3.365516708488703e-06, "loss": 0.2941, "step": 12296 }, { "epoch": 0.610000496056352, "grad_norm": 7.838718891143799, "learning_rate": 3.3647728810113326e-06, "loss": 0.274, "step": 12297 }, { "epoch": 0.6100501016915522, "grad_norm": 11.788344383239746, "learning_rate": 3.364029094055632e-06, "loss": 0.3531, "step": 12298 }, { "epoch": 0.6100997073267523, "grad_norm": 4.31644344329834, "learning_rate": 3.3632853476400307e-06, "loss": 0.1011, "step": 12299 }, { "epoch": 0.6101493129619525, "grad_norm": 3.263286590576172, "learning_rate": 3.3625416417829563e-06, "loss": 0.2246, "step": 12300 }, { "epoch": 0.6101989185971526, "grad_norm": 14.042218208312988, "learning_rate": 3.361797976502843e-06, "loss": 0.357, "step": 12301 }, { "epoch": 0.6102485242323528, "grad_norm": 4.217031478881836, "learning_rate": 3.361054351818117e-06, "loss": 0.2264, "step": 12302 }, { "epoch": 0.6102981298675529, "grad_norm": 8.034811019897461, "learning_rate": 3.3603107677472034e-06, "loss": 0.3292, "step": 12303 }, { "epoch": 0.6103477355027531, "grad_norm": 5.787865161895752, "learning_rate": 3.3595672243085327e-06, "loss": 0.2817, "step": 12304 }, { "epoch": 0.6103973411379533, "grad_norm": 8.882912635803223, "learning_rate": 3.358823721520528e-06, "loss": 0.2365, "step": 12305 }, { "epoch": 0.6104469467731535, "grad_norm": 7.103363513946533, "learning_rate": 3.358080259401612e-06, "loss": 0.2787, "step": 12306 }, { "epoch": 0.6104965524083535, "grad_norm": 10.462430000305176, "learning_rate": 3.357336837970212e-06, "loss": 0.3823, "step": 12307 }, { "epoch": 0.6105461580435537, "grad_norm": 4.197661876678467, "learning_rate": 3.3565934572447485e-06, "loss": 0.3211, "step": 12308 }, { "epoch": 0.6105957636787539, "grad_norm": 28.535160064697266, "learning_rate": 3.35585011724364e-06, "loss": 0.2558, "step": 12309 }, { "epoch": 0.6106453693139541, "grad_norm": 5.3393049240112305, "learning_rate": 3.3551068179853114e-06, "loss": 0.2928, "step": 12310 }, { "epoch": 0.6106949749491543, "grad_norm": 6.425682067871094, "learning_rate": 3.3543635594881803e-06, "loss": 0.2921, "step": 12311 }, { "epoch": 0.6107445805843544, "grad_norm": 5.687534809112549, "learning_rate": 3.353620341770662e-06, "loss": 0.2649, "step": 12312 }, { "epoch": 0.6107941862195545, "grad_norm": 10.064062118530273, "learning_rate": 3.3528771648511786e-06, "loss": 0.3743, "step": 12313 }, { "epoch": 0.6108437918547547, "grad_norm": 9.03697395324707, "learning_rate": 3.3521340287481425e-06, "loss": 0.3087, "step": 12314 }, { "epoch": 0.6108933974899549, "grad_norm": 5.61173152923584, "learning_rate": 3.3513909334799723e-06, "loss": 0.2531, "step": 12315 }, { "epoch": 0.610943003125155, "grad_norm": 3.386455535888672, "learning_rate": 3.3506478790650804e-06, "loss": 0.2121, "step": 12316 }, { "epoch": 0.6109926087603552, "grad_norm": 6.179776191711426, "learning_rate": 3.349904865521878e-06, "loss": 0.3064, "step": 12317 }, { "epoch": 0.6110422143955553, "grad_norm": 5.568449974060059, "learning_rate": 3.349161892868782e-06, "loss": 0.2662, "step": 12318 }, { "epoch": 0.6110918200307555, "grad_norm": 5.9657206535339355, "learning_rate": 3.3484189611242e-06, "loss": 0.3039, "step": 12319 }, { "epoch": 0.6111414256659556, "grad_norm": 12.202512741088867, "learning_rate": 3.3476760703065424e-06, "loss": 0.2829, "step": 12320 }, { "epoch": 0.6111910313011558, "grad_norm": 5.542985916137695, "learning_rate": 3.3469332204342207e-06, "loss": 0.288, "step": 12321 }, { "epoch": 0.611240636936356, "grad_norm": 18.704227447509766, "learning_rate": 3.346190411525641e-06, "loss": 0.3665, "step": 12322 }, { "epoch": 0.6112902425715562, "grad_norm": 5.334096431732178, "learning_rate": 3.3454476435992113e-06, "loss": 0.337, "step": 12323 }, { "epoch": 0.6113398482067562, "grad_norm": 5.227132797241211, "learning_rate": 3.344704916673337e-06, "loss": 0.2601, "step": 12324 }, { "epoch": 0.6113894538419564, "grad_norm": 5.783672332763672, "learning_rate": 3.343962230766424e-06, "loss": 0.2973, "step": 12325 }, { "epoch": 0.6114390594771566, "grad_norm": 9.538211822509766, "learning_rate": 3.343219585896875e-06, "loss": 0.3452, "step": 12326 }, { "epoch": 0.6114886651123568, "grad_norm": 8.740364074707031, "learning_rate": 3.342476982083095e-06, "loss": 0.3917, "step": 12327 }, { "epoch": 0.611538270747557, "grad_norm": 4.196592330932617, "learning_rate": 3.341734419343485e-06, "loss": 0.283, "step": 12328 }, { "epoch": 0.6115878763827571, "grad_norm": 6.524099826812744, "learning_rate": 3.340991897696446e-06, "loss": 0.2409, "step": 12329 }, { "epoch": 0.6116374820179572, "grad_norm": 6.935571670532227, "learning_rate": 3.3402494171603783e-06, "loss": 0.2774, "step": 12330 }, { "epoch": 0.6116870876531574, "grad_norm": 9.992898941040039, "learning_rate": 3.339506977753682e-06, "loss": 0.3868, "step": 12331 }, { "epoch": 0.6117366932883576, "grad_norm": 8.901748657226562, "learning_rate": 3.3387645794947516e-06, "loss": 0.2834, "step": 12332 }, { "epoch": 0.6117862989235577, "grad_norm": 8.645881652832031, "learning_rate": 3.3380222224019887e-06, "loss": 0.3515, "step": 12333 }, { "epoch": 0.6118359045587579, "grad_norm": 11.306553840637207, "learning_rate": 3.3372799064937854e-06, "loss": 0.3338, "step": 12334 }, { "epoch": 0.611885510193958, "grad_norm": 18.400863647460938, "learning_rate": 3.3365376317885377e-06, "loss": 0.4257, "step": 12335 }, { "epoch": 0.6119351158291582, "grad_norm": 5.015962600708008, "learning_rate": 3.335795398304642e-06, "loss": 0.2291, "step": 12336 }, { "epoch": 0.6119847214643583, "grad_norm": 7.511307716369629, "learning_rate": 3.335053206060487e-06, "loss": 0.2885, "step": 12337 }, { "epoch": 0.6120343270995585, "grad_norm": 6.329599380493164, "learning_rate": 3.3343110550744686e-06, "loss": 0.3, "step": 12338 }, { "epoch": 0.6120839327347587, "grad_norm": 19.505720138549805, "learning_rate": 3.3335689453649755e-06, "loss": 0.4717, "step": 12339 }, { "epoch": 0.6121335383699589, "grad_norm": 6.947023868560791, "learning_rate": 3.3328268769503957e-06, "loss": 0.2344, "step": 12340 }, { "epoch": 0.6121831440051589, "grad_norm": 12.508353233337402, "learning_rate": 3.332084849849122e-06, "loss": 0.4005, "step": 12341 }, { "epoch": 0.6122327496403591, "grad_norm": 15.884736061096191, "learning_rate": 3.3313428640795405e-06, "loss": 0.416, "step": 12342 }, { "epoch": 0.6122823552755593, "grad_norm": 10.802193641662598, "learning_rate": 3.330600919660035e-06, "loss": 0.2978, "step": 12343 }, { "epoch": 0.6123319609107595, "grad_norm": 6.664114952087402, "learning_rate": 3.329859016608996e-06, "loss": 0.2716, "step": 12344 }, { "epoch": 0.6123815665459597, "grad_norm": 4.310751914978027, "learning_rate": 3.329117154944807e-06, "loss": 0.2789, "step": 12345 }, { "epoch": 0.6124311721811598, "grad_norm": 11.399490356445312, "learning_rate": 3.3283753346858484e-06, "loss": 0.4082, "step": 12346 }, { "epoch": 0.6124807778163599, "grad_norm": 6.3253302574157715, "learning_rate": 3.327633555850507e-06, "loss": 0.3036, "step": 12347 }, { "epoch": 0.6125303834515601, "grad_norm": 9.208480834960938, "learning_rate": 3.326891818457162e-06, "loss": 0.3426, "step": 12348 }, { "epoch": 0.6125799890867603, "grad_norm": 11.868754386901855, "learning_rate": 3.3261501225241933e-06, "loss": 0.2599, "step": 12349 }, { "epoch": 0.6126295947219604, "grad_norm": 7.575316429138184, "learning_rate": 3.325408468069983e-06, "loss": 0.2813, "step": 12350 }, { "epoch": 0.6126792003571606, "grad_norm": 5.235661506652832, "learning_rate": 3.324666855112907e-06, "loss": 0.2895, "step": 12351 }, { "epoch": 0.6127288059923607, "grad_norm": 13.910449028015137, "learning_rate": 3.323925283671346e-06, "loss": 0.4591, "step": 12352 }, { "epoch": 0.6127784116275609, "grad_norm": 6.525235652923584, "learning_rate": 3.3231837537636748e-06, "loss": 0.1724, "step": 12353 }, { "epoch": 0.612828017262761, "grad_norm": 6.961276531219482, "learning_rate": 3.322442265408266e-06, "loss": 0.2207, "step": 12354 }, { "epoch": 0.6128776228979612, "grad_norm": 7.694730758666992, "learning_rate": 3.3217008186234994e-06, "loss": 0.2804, "step": 12355 }, { "epoch": 0.6129272285331614, "grad_norm": 4.03633975982666, "learning_rate": 3.320959413427746e-06, "loss": 0.2544, "step": 12356 }, { "epoch": 0.6129768341683616, "grad_norm": 4.604050159454346, "learning_rate": 3.320218049839375e-06, "loss": 0.3126, "step": 12357 }, { "epoch": 0.6130264398035616, "grad_norm": 11.609925270080566, "learning_rate": 3.3194767278767626e-06, "loss": 0.3232, "step": 12358 }, { "epoch": 0.6130760454387618, "grad_norm": 6.706599235534668, "learning_rate": 3.3187354475582778e-06, "loss": 0.3596, "step": 12359 }, { "epoch": 0.613125651073962, "grad_norm": 7.195318222045898, "learning_rate": 3.3179942089022866e-06, "loss": 0.3778, "step": 12360 }, { "epoch": 0.6131752567091622, "grad_norm": 12.74594497680664, "learning_rate": 3.3172530119271617e-06, "loss": 0.3852, "step": 12361 }, { "epoch": 0.6132248623443624, "grad_norm": 4.9015913009643555, "learning_rate": 3.316511856651269e-06, "loss": 0.2416, "step": 12362 }, { "epoch": 0.6132744679795625, "grad_norm": 5.171940803527832, "learning_rate": 3.3157707430929707e-06, "loss": 0.2978, "step": 12363 }, { "epoch": 0.6133240736147626, "grad_norm": 12.977076530456543, "learning_rate": 3.3150296712706387e-06, "loss": 0.3097, "step": 12364 }, { "epoch": 0.6133736792499628, "grad_norm": 5.507328510284424, "learning_rate": 3.314288641202633e-06, "loss": 0.3345, "step": 12365 }, { "epoch": 0.613423284885163, "grad_norm": 4.346947193145752, "learning_rate": 3.313547652907315e-06, "loss": 0.2293, "step": 12366 }, { "epoch": 0.6134728905203631, "grad_norm": 5.769890785217285, "learning_rate": 3.3128067064030516e-06, "loss": 0.2495, "step": 12367 }, { "epoch": 0.6135224961555633, "grad_norm": 7.0365986824035645, "learning_rate": 3.3120658017081987e-06, "loss": 0.2404, "step": 12368 }, { "epoch": 0.6135721017907634, "grad_norm": 8.149669647216797, "learning_rate": 3.311324938841122e-06, "loss": 0.3296, "step": 12369 }, { "epoch": 0.6136217074259636, "grad_norm": 9.349218368530273, "learning_rate": 3.310584117820176e-06, "loss": 0.3127, "step": 12370 }, { "epoch": 0.6136713130611637, "grad_norm": 7.109135627746582, "learning_rate": 3.3098433386637176e-06, "loss": 0.2175, "step": 12371 }, { "epoch": 0.6137209186963639, "grad_norm": 4.578463554382324, "learning_rate": 3.3091026013901085e-06, "loss": 0.3021, "step": 12372 }, { "epoch": 0.6137705243315641, "grad_norm": 10.870532035827637, "learning_rate": 3.3083619060177013e-06, "loss": 0.3414, "step": 12373 }, { "epoch": 0.6138201299667643, "grad_norm": 4.546528339385986, "learning_rate": 3.3076212525648493e-06, "loss": 0.2529, "step": 12374 }, { "epoch": 0.6138697356019643, "grad_norm": 4.015227317810059, "learning_rate": 3.3068806410499106e-06, "loss": 0.1565, "step": 12375 }, { "epoch": 0.6139193412371645, "grad_norm": 11.511367797851562, "learning_rate": 3.306140071491234e-06, "loss": 0.3515, "step": 12376 }, { "epoch": 0.6139689468723647, "grad_norm": 11.205129623413086, "learning_rate": 3.305399543907173e-06, "loss": 0.4433, "step": 12377 }, { "epoch": 0.6140185525075649, "grad_norm": 7.888876914978027, "learning_rate": 3.3046590583160786e-06, "loss": 0.3258, "step": 12378 }, { "epoch": 0.614068158142765, "grad_norm": 9.598498344421387, "learning_rate": 3.303918614736299e-06, "loss": 0.3621, "step": 12379 }, { "epoch": 0.6141177637779652, "grad_norm": 5.046672821044922, "learning_rate": 3.3031782131861834e-06, "loss": 0.2999, "step": 12380 }, { "epoch": 0.6141673694131653, "grad_norm": 9.336213111877441, "learning_rate": 3.302437853684079e-06, "loss": 0.2126, "step": 12381 }, { "epoch": 0.6142169750483655, "grad_norm": 6.357258319854736, "learning_rate": 3.301697536248334e-06, "loss": 0.2898, "step": 12382 }, { "epoch": 0.6142665806835657, "grad_norm": 6.676130294799805, "learning_rate": 3.3009572608972906e-06, "loss": 0.3067, "step": 12383 }, { "epoch": 0.6143161863187658, "grad_norm": 9.322975158691406, "learning_rate": 3.300217027649295e-06, "loss": 0.3207, "step": 12384 }, { "epoch": 0.614365791953966, "grad_norm": 5.298770904541016, "learning_rate": 3.2994768365226915e-06, "loss": 0.2817, "step": 12385 }, { "epoch": 0.6144153975891661, "grad_norm": 5.640140533447266, "learning_rate": 3.2987366875358183e-06, "loss": 0.2236, "step": 12386 }, { "epoch": 0.6144650032243663, "grad_norm": 10.346749305725098, "learning_rate": 3.2979965807070225e-06, "loss": 0.3367, "step": 12387 }, { "epoch": 0.6145146088595664, "grad_norm": 7.805069923400879, "learning_rate": 3.29725651605464e-06, "loss": 0.3063, "step": 12388 }, { "epoch": 0.6145642144947666, "grad_norm": 7.590855598449707, "learning_rate": 3.296516493597012e-06, "loss": 0.2474, "step": 12389 }, { "epoch": 0.6146138201299668, "grad_norm": 8.538549423217773, "learning_rate": 3.295776513352476e-06, "loss": 0.3982, "step": 12390 }, { "epoch": 0.614663425765167, "grad_norm": 6.296339511871338, "learning_rate": 3.295036575339367e-06, "loss": 0.2798, "step": 12391 }, { "epoch": 0.614713031400367, "grad_norm": 5.448702812194824, "learning_rate": 3.294296679576025e-06, "loss": 0.2972, "step": 12392 }, { "epoch": 0.6147626370355672, "grad_norm": 10.062487602233887, "learning_rate": 3.2935568260807832e-06, "loss": 0.2811, "step": 12393 }, { "epoch": 0.6148122426707674, "grad_norm": 10.521288871765137, "learning_rate": 3.2928170148719727e-06, "loss": 0.307, "step": 12394 }, { "epoch": 0.6148618483059676, "grad_norm": 5.4089250564575195, "learning_rate": 3.2920772459679314e-06, "loss": 0.215, "step": 12395 }, { "epoch": 0.6149114539411678, "grad_norm": 7.055565357208252, "learning_rate": 3.2913375193869877e-06, "loss": 0.2535, "step": 12396 }, { "epoch": 0.6149610595763679, "grad_norm": 7.809000492095947, "learning_rate": 3.290597835147472e-06, "loss": 0.2922, "step": 12397 }, { "epoch": 0.615010665211568, "grad_norm": 5.111225128173828, "learning_rate": 3.2898581932677165e-06, "loss": 0.2972, "step": 12398 }, { "epoch": 0.6150602708467682, "grad_norm": 4.914419174194336, "learning_rate": 3.2891185937660485e-06, "loss": 0.28, "step": 12399 }, { "epoch": 0.6151098764819684, "grad_norm": 6.488991737365723, "learning_rate": 3.288379036660795e-06, "loss": 0.2866, "step": 12400 }, { "epoch": 0.6151594821171685, "grad_norm": 16.44413948059082, "learning_rate": 3.287639521970284e-06, "loss": 0.3277, "step": 12401 }, { "epoch": 0.6152090877523687, "grad_norm": 14.161989212036133, "learning_rate": 3.28690004971284e-06, "loss": 0.3392, "step": 12402 }, { "epoch": 0.6152586933875688, "grad_norm": 8.470925331115723, "learning_rate": 3.2861606199067864e-06, "loss": 0.3017, "step": 12403 }, { "epoch": 0.615308299022769, "grad_norm": 4.802906036376953, "learning_rate": 3.2854212325704494e-06, "loss": 0.3203, "step": 12404 }, { "epoch": 0.6153579046579691, "grad_norm": 4.822380542755127, "learning_rate": 3.2846818877221482e-06, "loss": 0.3245, "step": 12405 }, { "epoch": 0.6154075102931693, "grad_norm": 4.7414870262146, "learning_rate": 3.2839425853802075e-06, "loss": 0.3033, "step": 12406 }, { "epoch": 0.6154571159283695, "grad_norm": 5.139854431152344, "learning_rate": 3.283203325562946e-06, "loss": 0.3086, "step": 12407 }, { "epoch": 0.6155067215635697, "grad_norm": 3.7655436992645264, "learning_rate": 3.282464108288681e-06, "loss": 0.3232, "step": 12408 }, { "epoch": 0.6155563271987697, "grad_norm": 5.690858840942383, "learning_rate": 3.2817249335757338e-06, "loss": 0.3241, "step": 12409 }, { "epoch": 0.6156059328339699, "grad_norm": 8.797368049621582, "learning_rate": 3.28098580144242e-06, "loss": 0.3798, "step": 12410 }, { "epoch": 0.6156555384691701, "grad_norm": 9.084277153015137, "learning_rate": 3.2802467119070534e-06, "loss": 0.4231, "step": 12411 }, { "epoch": 0.6157051441043703, "grad_norm": 9.848283767700195, "learning_rate": 3.279507664987954e-06, "loss": 0.3816, "step": 12412 }, { "epoch": 0.6157547497395705, "grad_norm": 8.007006645202637, "learning_rate": 3.2787686607034326e-06, "loss": 0.2669, "step": 12413 }, { "epoch": 0.6158043553747706, "grad_norm": 6.165809154510498, "learning_rate": 3.2780296990718e-06, "loss": 0.2889, "step": 12414 }, { "epoch": 0.6158539610099707, "grad_norm": 8.246429443359375, "learning_rate": 3.277290780111373e-06, "loss": 0.4367, "step": 12415 }, { "epoch": 0.6159035666451709, "grad_norm": 7.21616792678833, "learning_rate": 3.2765519038404594e-06, "loss": 0.3008, "step": 12416 }, { "epoch": 0.615953172280371, "grad_norm": 3.546130418777466, "learning_rate": 3.275813070277367e-06, "loss": 0.3162, "step": 12417 }, { "epoch": 0.6160027779155712, "grad_norm": 5.574624538421631, "learning_rate": 3.275074279440409e-06, "loss": 0.3891, "step": 12418 }, { "epoch": 0.6160523835507714, "grad_norm": 4.389509677886963, "learning_rate": 3.274335531347891e-06, "loss": 0.2971, "step": 12419 }, { "epoch": 0.6161019891859715, "grad_norm": 9.117106437683105, "learning_rate": 3.273596826018116e-06, "loss": 0.3592, "step": 12420 }, { "epoch": 0.6161515948211717, "grad_norm": 9.100326538085938, "learning_rate": 3.2728581634693956e-06, "loss": 0.2834, "step": 12421 }, { "epoch": 0.6162012004563718, "grad_norm": 5.196836948394775, "learning_rate": 3.2721195437200287e-06, "loss": 0.3184, "step": 12422 }, { "epoch": 0.616250806091572, "grad_norm": 5.846911430358887, "learning_rate": 3.2713809667883233e-06, "loss": 0.3253, "step": 12423 }, { "epoch": 0.6163004117267722, "grad_norm": 9.317440032958984, "learning_rate": 3.270642432692579e-06, "loss": 0.3579, "step": 12424 }, { "epoch": 0.6163500173619724, "grad_norm": 4.584410667419434, "learning_rate": 3.2699039414510963e-06, "loss": 0.2203, "step": 12425 }, { "epoch": 0.6163996229971724, "grad_norm": 4.545144557952881, "learning_rate": 3.269165493082178e-06, "loss": 0.2626, "step": 12426 }, { "epoch": 0.6164492286323726, "grad_norm": 6.582321643829346, "learning_rate": 3.2684270876041226e-06, "loss": 0.2924, "step": 12427 }, { "epoch": 0.6164988342675728, "grad_norm": 5.414124011993408, "learning_rate": 3.267688725035224e-06, "loss": 0.2836, "step": 12428 }, { "epoch": 0.616548439902773, "grad_norm": 6.635830879211426, "learning_rate": 3.266950405393785e-06, "loss": 0.2664, "step": 12429 }, { "epoch": 0.6165980455379731, "grad_norm": 5.335000038146973, "learning_rate": 3.2662121286980976e-06, "loss": 0.3523, "step": 12430 }, { "epoch": 0.6166476511731733, "grad_norm": 8.183130264282227, "learning_rate": 3.265473894966458e-06, "loss": 0.3856, "step": 12431 }, { "epoch": 0.6166972568083734, "grad_norm": 13.433562278747559, "learning_rate": 3.2647357042171612e-06, "loss": 0.2368, "step": 12432 }, { "epoch": 0.6167468624435736, "grad_norm": 5.899454593658447, "learning_rate": 3.2639975564684978e-06, "loss": 0.3092, "step": 12433 }, { "epoch": 0.6167964680787738, "grad_norm": 16.38134002685547, "learning_rate": 3.2632594517387594e-06, "loss": 0.3912, "step": 12434 }, { "epoch": 0.6168460737139739, "grad_norm": 6.276480674743652, "learning_rate": 3.2625213900462382e-06, "loss": 0.1723, "step": 12435 }, { "epoch": 0.6168956793491741, "grad_norm": 8.97282886505127, "learning_rate": 3.2617833714092234e-06, "loss": 0.3996, "step": 12436 }, { "epoch": 0.6169452849843742, "grad_norm": 5.513903617858887, "learning_rate": 3.261045395846002e-06, "loss": 0.1995, "step": 12437 }, { "epoch": 0.6169948906195744, "grad_norm": 6.063385963439941, "learning_rate": 3.260307463374862e-06, "loss": 0.2752, "step": 12438 }, { "epoch": 0.6170444962547745, "grad_norm": 4.5689263343811035, "learning_rate": 3.259569574014091e-06, "loss": 0.2345, "step": 12439 }, { "epoch": 0.6170941018899747, "grad_norm": 7.204367160797119, "learning_rate": 3.2588317277819724e-06, "loss": 0.3428, "step": 12440 }, { "epoch": 0.6171437075251749, "grad_norm": 9.04654598236084, "learning_rate": 3.2580939246967924e-06, "loss": 0.3937, "step": 12441 }, { "epoch": 0.6171933131603751, "grad_norm": 6.059236526489258, "learning_rate": 3.2573561647768327e-06, "loss": 0.2831, "step": 12442 }, { "epoch": 0.6172429187955751, "grad_norm": 5.853604793548584, "learning_rate": 3.256618448040374e-06, "loss": 0.3268, "step": 12443 }, { "epoch": 0.6172925244307753, "grad_norm": 7.63158655166626, "learning_rate": 3.2558807745057007e-06, "loss": 0.2303, "step": 12444 }, { "epoch": 0.6173421300659755, "grad_norm": 9.476428031921387, "learning_rate": 3.2551431441910887e-06, "loss": 0.2608, "step": 12445 }, { "epoch": 0.6173917357011757, "grad_norm": 9.616081237792969, "learning_rate": 3.2544055571148207e-06, "loss": 0.2463, "step": 12446 }, { "epoch": 0.6174413413363758, "grad_norm": 7.565371990203857, "learning_rate": 3.2536680132951725e-06, "loss": 0.3748, "step": 12447 }, { "epoch": 0.6174909469715759, "grad_norm": 9.050320625305176, "learning_rate": 3.2529305127504195e-06, "loss": 0.2915, "step": 12448 }, { "epoch": 0.6175405526067761, "grad_norm": 6.686338901519775, "learning_rate": 3.25219305549884e-06, "loss": 0.2899, "step": 12449 }, { "epoch": 0.6175901582419763, "grad_norm": 4.557035446166992, "learning_rate": 3.251455641558707e-06, "loss": 0.1746, "step": 12450 }, { "epoch": 0.6176397638771765, "grad_norm": 4.643307685852051, "learning_rate": 3.2507182709482933e-06, "loss": 0.255, "step": 12451 }, { "epoch": 0.6176893695123766, "grad_norm": 5.578122138977051, "learning_rate": 3.2499809436858733e-06, "loss": 0.2811, "step": 12452 }, { "epoch": 0.6177389751475768, "grad_norm": 6.601177215576172, "learning_rate": 3.249243659789717e-06, "loss": 0.2825, "step": 12453 }, { "epoch": 0.6177885807827769, "grad_norm": 13.31695556640625, "learning_rate": 3.2485064192780936e-06, "loss": 0.4156, "step": 12454 }, { "epoch": 0.617838186417977, "grad_norm": 7.857454776763916, "learning_rate": 3.247769222169275e-06, "loss": 0.2868, "step": 12455 }, { "epoch": 0.6178877920531772, "grad_norm": 8.212174415588379, "learning_rate": 3.247032068481527e-06, "loss": 0.3236, "step": 12456 }, { "epoch": 0.6179373976883774, "grad_norm": 6.125545501708984, "learning_rate": 3.246294958233116e-06, "loss": 0.2399, "step": 12457 }, { "epoch": 0.6179870033235776, "grad_norm": 8.59807300567627, "learning_rate": 3.2455578914423114e-06, "loss": 0.247, "step": 12458 }, { "epoch": 0.6180366089587778, "grad_norm": 11.428983688354492, "learning_rate": 3.244820868127373e-06, "loss": 0.3605, "step": 12459 }, { "epoch": 0.6180862145939778, "grad_norm": 7.780675888061523, "learning_rate": 3.2440838883065704e-06, "loss": 0.3879, "step": 12460 }, { "epoch": 0.618135820229178, "grad_norm": 8.107077598571777, "learning_rate": 3.243346951998163e-06, "loss": 0.4041, "step": 12461 }, { "epoch": 0.6181854258643782, "grad_norm": 8.885398864746094, "learning_rate": 3.24261005922041e-06, "loss": 0.2934, "step": 12462 }, { "epoch": 0.6182350314995784, "grad_norm": 9.9443941116333, "learning_rate": 3.241873209991578e-06, "loss": 0.2952, "step": 12463 }, { "epoch": 0.6182846371347785, "grad_norm": 4.978569507598877, "learning_rate": 3.241136404329922e-06, "loss": 0.2874, "step": 12464 }, { "epoch": 0.6183342427699786, "grad_norm": 12.49132251739502, "learning_rate": 3.2403996422537005e-06, "loss": 0.4109, "step": 12465 }, { "epoch": 0.6183838484051788, "grad_norm": 8.097740173339844, "learning_rate": 3.239662923781173e-06, "loss": 0.3217, "step": 12466 }, { "epoch": 0.618433454040379, "grad_norm": 5.209192276000977, "learning_rate": 3.2389262489305952e-06, "loss": 0.2909, "step": 12467 }, { "epoch": 0.6184830596755791, "grad_norm": 7.660445213317871, "learning_rate": 3.2381896177202187e-06, "loss": 0.3029, "step": 12468 }, { "epoch": 0.6185326653107793, "grad_norm": 5.76697301864624, "learning_rate": 3.2374530301683033e-06, "loss": 0.2576, "step": 12469 }, { "epoch": 0.6185822709459795, "grad_norm": 4.939356803894043, "learning_rate": 3.236716486293099e-06, "loss": 0.2482, "step": 12470 }, { "epoch": 0.6186318765811796, "grad_norm": 10.281028747558594, "learning_rate": 3.2359799861128553e-06, "loss": 0.4876, "step": 12471 }, { "epoch": 0.6186814822163798, "grad_norm": 5.289764881134033, "learning_rate": 3.2352435296458277e-06, "loss": 0.2456, "step": 12472 }, { "epoch": 0.6187310878515799, "grad_norm": 4.865142822265625, "learning_rate": 3.2345071169102636e-06, "loss": 0.2114, "step": 12473 }, { "epoch": 0.6187806934867801, "grad_norm": 11.004406929016113, "learning_rate": 3.233770747924411e-06, "loss": 0.338, "step": 12474 }, { "epoch": 0.6188302991219803, "grad_norm": 5.055028438568115, "learning_rate": 3.2330344227065186e-06, "loss": 0.3119, "step": 12475 }, { "epoch": 0.6188799047571805, "grad_norm": 7.8874711990356445, "learning_rate": 3.232298141274831e-06, "loss": 0.2163, "step": 12476 }, { "epoch": 0.6189295103923805, "grad_norm": 6.786172866821289, "learning_rate": 3.231561903647597e-06, "loss": 0.214, "step": 12477 }, { "epoch": 0.6189791160275807, "grad_norm": 9.789066314697266, "learning_rate": 3.2308257098430595e-06, "loss": 0.2984, "step": 12478 }, { "epoch": 0.6190287216627809, "grad_norm": 11.576436996459961, "learning_rate": 3.2300895598794584e-06, "loss": 0.2522, "step": 12479 }, { "epoch": 0.6190783272979811, "grad_norm": 5.594625473022461, "learning_rate": 3.2293534537750415e-06, "loss": 0.2719, "step": 12480 }, { "epoch": 0.6191279329331812, "grad_norm": 4.48685884475708, "learning_rate": 3.2286173915480464e-06, "loss": 0.3147, "step": 12481 }, { "epoch": 0.6191775385683813, "grad_norm": 6.912143707275391, "learning_rate": 3.2278813732167114e-06, "loss": 0.251, "step": 12482 }, { "epoch": 0.6192271442035815, "grad_norm": 5.350707530975342, "learning_rate": 3.2271453987992795e-06, "loss": 0.2077, "step": 12483 }, { "epoch": 0.6192767498387817, "grad_norm": 6.051810264587402, "learning_rate": 3.226409468313987e-06, "loss": 0.2447, "step": 12484 }, { "epoch": 0.6193263554739818, "grad_norm": 8.401904106140137, "learning_rate": 3.2256735817790684e-06, "loss": 0.2773, "step": 12485 }, { "epoch": 0.619375961109182, "grad_norm": 5.301617622375488, "learning_rate": 3.224937739212763e-06, "loss": 0.2831, "step": 12486 }, { "epoch": 0.6194255667443822, "grad_norm": 7.989412784576416, "learning_rate": 3.2242019406333026e-06, "loss": 0.403, "step": 12487 }, { "epoch": 0.6194751723795823, "grad_norm": 7.464160919189453, "learning_rate": 3.2234661860589205e-06, "loss": 0.3436, "step": 12488 }, { "epoch": 0.6195247780147825, "grad_norm": 4.174693584442139, "learning_rate": 3.2227304755078502e-06, "loss": 0.2572, "step": 12489 }, { "epoch": 0.6195743836499826, "grad_norm": 12.76414680480957, "learning_rate": 3.2219948089983245e-06, "loss": 0.2969, "step": 12490 }, { "epoch": 0.6196239892851828, "grad_norm": 6.931270599365234, "learning_rate": 3.22125918654857e-06, "loss": 0.2392, "step": 12491 }, { "epoch": 0.619673594920383, "grad_norm": 6.03961181640625, "learning_rate": 3.2205236081768173e-06, "loss": 0.2822, "step": 12492 }, { "epoch": 0.6197232005555832, "grad_norm": 5.334183216094971, "learning_rate": 3.219788073901294e-06, "loss": 0.3425, "step": 12493 }, { "epoch": 0.6197728061907832, "grad_norm": 10.165781021118164, "learning_rate": 3.2190525837402287e-06, "loss": 0.3124, "step": 12494 }, { "epoch": 0.6198224118259834, "grad_norm": 5.621868133544922, "learning_rate": 3.2183171377118465e-06, "loss": 0.2077, "step": 12495 }, { "epoch": 0.6198720174611836, "grad_norm": 6.843481540679932, "learning_rate": 3.217581735834371e-06, "loss": 0.2778, "step": 12496 }, { "epoch": 0.6199216230963838, "grad_norm": 5.2136993408203125, "learning_rate": 3.2168463781260255e-06, "loss": 0.3607, "step": 12497 }, { "epoch": 0.619971228731584, "grad_norm": 3.7211906909942627, "learning_rate": 3.216111064605035e-06, "loss": 0.2185, "step": 12498 }, { "epoch": 0.620020834366784, "grad_norm": 4.437489986419678, "learning_rate": 3.215375795289617e-06, "loss": 0.2597, "step": 12499 }, { "epoch": 0.6200704400019842, "grad_norm": 12.86941146850586, "learning_rate": 3.2146405701979966e-06, "loss": 0.2931, "step": 12500 }, { "epoch": 0.6201200456371844, "grad_norm": 7.599593162536621, "learning_rate": 3.2139053893483895e-06, "loss": 0.2822, "step": 12501 }, { "epoch": 0.6201696512723845, "grad_norm": 5.839239120483398, "learning_rate": 3.2131702527590136e-06, "loss": 0.3062, "step": 12502 }, { "epoch": 0.6202192569075847, "grad_norm": 6.349334239959717, "learning_rate": 3.2124351604480885e-06, "loss": 0.3668, "step": 12503 }, { "epoch": 0.6202688625427849, "grad_norm": 18.43491554260254, "learning_rate": 3.211700112433829e-06, "loss": 0.2305, "step": 12504 }, { "epoch": 0.620318468177985, "grad_norm": 5.153590202331543, "learning_rate": 3.2109651087344486e-06, "loss": 0.2184, "step": 12505 }, { "epoch": 0.6203680738131852, "grad_norm": 13.417588233947754, "learning_rate": 3.2102301493681626e-06, "loss": 0.3756, "step": 12506 }, { "epoch": 0.6204176794483853, "grad_norm": 5.941348552703857, "learning_rate": 3.2094952343531837e-06, "loss": 0.2708, "step": 12507 }, { "epoch": 0.6204672850835855, "grad_norm": 13.804841995239258, "learning_rate": 3.208760363707721e-06, "loss": 0.5168, "step": 12508 }, { "epoch": 0.6205168907187857, "grad_norm": 7.254275321960449, "learning_rate": 3.2080255374499886e-06, "loss": 0.2645, "step": 12509 }, { "epoch": 0.6205664963539859, "grad_norm": 7.2129364013671875, "learning_rate": 3.2072907555981935e-06, "loss": 0.3139, "step": 12510 }, { "epoch": 0.6206161019891859, "grad_norm": 7.401848316192627, "learning_rate": 3.2065560181705436e-06, "loss": 0.3478, "step": 12511 }, { "epoch": 0.6206657076243861, "grad_norm": 10.075668334960938, "learning_rate": 3.205821325185248e-06, "loss": 0.3487, "step": 12512 }, { "epoch": 0.6207153132595863, "grad_norm": 6.1473517417907715, "learning_rate": 3.2050866766605095e-06, "loss": 0.3301, "step": 12513 }, { "epoch": 0.6207649188947865, "grad_norm": 5.383710861206055, "learning_rate": 3.204352072614537e-06, "loss": 0.2251, "step": 12514 }, { "epoch": 0.6208145245299866, "grad_norm": 5.3056464195251465, "learning_rate": 3.203617513065533e-06, "loss": 0.2608, "step": 12515 }, { "epoch": 0.6208641301651867, "grad_norm": 9.7572021484375, "learning_rate": 3.2028829980316966e-06, "loss": 0.3624, "step": 12516 }, { "epoch": 0.6209137358003869, "grad_norm": 6.336960792541504, "learning_rate": 3.202148527531235e-06, "loss": 0.2318, "step": 12517 }, { "epoch": 0.6209633414355871, "grad_norm": 6.820227146148682, "learning_rate": 3.2014141015823463e-06, "loss": 0.4247, "step": 12518 }, { "epoch": 0.6210129470707872, "grad_norm": 11.852439880371094, "learning_rate": 3.200679720203228e-06, "loss": 0.2467, "step": 12519 }, { "epoch": 0.6210625527059874, "grad_norm": 5.348099708557129, "learning_rate": 3.199945383412082e-06, "loss": 0.3745, "step": 12520 }, { "epoch": 0.6211121583411876, "grad_norm": 6.303349018096924, "learning_rate": 3.1992110912271034e-06, "loss": 0.2565, "step": 12521 }, { "epoch": 0.6211617639763877, "grad_norm": 6.947108268737793, "learning_rate": 3.1984768436664865e-06, "loss": 0.2767, "step": 12522 }, { "epoch": 0.6212113696115878, "grad_norm": 9.42360782623291, "learning_rate": 3.1977426407484303e-06, "loss": 0.3958, "step": 12523 }, { "epoch": 0.621260975246788, "grad_norm": 5.223043918609619, "learning_rate": 3.197008482491127e-06, "loss": 0.2086, "step": 12524 }, { "epoch": 0.6213105808819882, "grad_norm": 5.201212406158447, "learning_rate": 3.1962743689127673e-06, "loss": 0.3036, "step": 12525 }, { "epoch": 0.6213601865171884, "grad_norm": 4.251094341278076, "learning_rate": 3.195540300031546e-06, "loss": 0.2531, "step": 12526 }, { "epoch": 0.6214097921523886, "grad_norm": 8.336753845214844, "learning_rate": 3.1948062758656527e-06, "loss": 0.2756, "step": 12527 }, { "epoch": 0.6214593977875886, "grad_norm": 7.639942169189453, "learning_rate": 3.1940722964332742e-06, "loss": 0.2595, "step": 12528 }, { "epoch": 0.6215090034227888, "grad_norm": 5.130302429199219, "learning_rate": 3.1933383617526037e-06, "loss": 0.2498, "step": 12529 }, { "epoch": 0.621558609057989, "grad_norm": 5.456473350524902, "learning_rate": 3.1926044718418232e-06, "loss": 0.249, "step": 12530 }, { "epoch": 0.6216082146931892, "grad_norm": 5.7996087074279785, "learning_rate": 3.1918706267191236e-06, "loss": 0.3372, "step": 12531 }, { "epoch": 0.6216578203283893, "grad_norm": 4.732886791229248, "learning_rate": 3.1911368264026877e-06, "loss": 0.25, "step": 12532 }, { "epoch": 0.6217074259635894, "grad_norm": 3.8921637535095215, "learning_rate": 3.190403070910698e-06, "loss": 0.3204, "step": 12533 }, { "epoch": 0.6217570315987896, "grad_norm": 3.6925485134124756, "learning_rate": 3.1896693602613405e-06, "loss": 0.2465, "step": 12534 }, { "epoch": 0.6218066372339898, "grad_norm": 11.7525053024292, "learning_rate": 3.1889356944727945e-06, "loss": 0.2679, "step": 12535 }, { "epoch": 0.62185624286919, "grad_norm": 3.818931818008423, "learning_rate": 3.18820207356324e-06, "loss": 0.2222, "step": 12536 }, { "epoch": 0.6219058485043901, "grad_norm": 9.311716079711914, "learning_rate": 3.1874684975508598e-06, "loss": 0.3986, "step": 12537 }, { "epoch": 0.6219554541395903, "grad_norm": 6.612570762634277, "learning_rate": 3.1867349664538283e-06, "loss": 0.3022, "step": 12538 }, { "epoch": 0.6220050597747904, "grad_norm": 8.532392501831055, "learning_rate": 3.186001480290325e-06, "loss": 0.3664, "step": 12539 }, { "epoch": 0.6220546654099905, "grad_norm": 6.45301628112793, "learning_rate": 3.185268039078527e-06, "loss": 0.2438, "step": 12540 }, { "epoch": 0.6221042710451907, "grad_norm": 7.4949870109558105, "learning_rate": 3.1845346428366062e-06, "loss": 0.3864, "step": 12541 }, { "epoch": 0.6221538766803909, "grad_norm": 7.280534267425537, "learning_rate": 3.183801291582739e-06, "loss": 0.2791, "step": 12542 }, { "epoch": 0.6222034823155911, "grad_norm": 6.479137897491455, "learning_rate": 3.1830679853350967e-06, "loss": 0.3144, "step": 12543 }, { "epoch": 0.6222530879507913, "grad_norm": 10.275840759277344, "learning_rate": 3.182334724111853e-06, "loss": 0.3794, "step": 12544 }, { "epoch": 0.6223026935859913, "grad_norm": 5.602231025695801, "learning_rate": 3.1816015079311757e-06, "loss": 0.2849, "step": 12545 }, { "epoch": 0.6223522992211915, "grad_norm": 7.056342601776123, "learning_rate": 3.180868336811236e-06, "loss": 0.2929, "step": 12546 }, { "epoch": 0.6224019048563917, "grad_norm": 8.658683776855469, "learning_rate": 3.180135210770201e-06, "loss": 0.201, "step": 12547 }, { "epoch": 0.6224515104915919, "grad_norm": 5.829548358917236, "learning_rate": 3.179402129826239e-06, "loss": 0.2775, "step": 12548 }, { "epoch": 0.622501116126792, "grad_norm": 5.326780319213867, "learning_rate": 3.1786690939975174e-06, "loss": 0.2795, "step": 12549 }, { "epoch": 0.6225507217619921, "grad_norm": 7.005577564239502, "learning_rate": 3.1779361033021984e-06, "loss": 0.2016, "step": 12550 }, { "epoch": 0.6226003273971923, "grad_norm": 10.322617530822754, "learning_rate": 3.177203157758446e-06, "loss": 0.3309, "step": 12551 }, { "epoch": 0.6226499330323925, "grad_norm": 7.26291036605835, "learning_rate": 3.176470257384426e-06, "loss": 0.2466, "step": 12552 }, { "epoch": 0.6226995386675926, "grad_norm": 7.02330207824707, "learning_rate": 3.1757374021982962e-06, "loss": 0.3244, "step": 12553 }, { "epoch": 0.6227491443027928, "grad_norm": 9.13199520111084, "learning_rate": 3.1750045922182192e-06, "loss": 0.2752, "step": 12554 }, { "epoch": 0.622798749937993, "grad_norm": 4.613415718078613, "learning_rate": 3.1742718274623544e-06, "loss": 0.2453, "step": 12555 }, { "epoch": 0.6228483555731931, "grad_norm": 21.205883026123047, "learning_rate": 3.173539107948858e-06, "loss": 0.329, "step": 12556 }, { "epoch": 0.6228979612083932, "grad_norm": 12.420485496520996, "learning_rate": 3.172806433695891e-06, "loss": 0.3072, "step": 12557 }, { "epoch": 0.6229475668435934, "grad_norm": 9.550141334533691, "learning_rate": 3.1720738047216064e-06, "loss": 0.2258, "step": 12558 }, { "epoch": 0.6229971724787936, "grad_norm": 5.733851909637451, "learning_rate": 3.1713412210441587e-06, "loss": 0.2408, "step": 12559 }, { "epoch": 0.6230467781139938, "grad_norm": 10.676692008972168, "learning_rate": 3.170608682681704e-06, "loss": 0.3487, "step": 12560 }, { "epoch": 0.623096383749194, "grad_norm": 13.255321502685547, "learning_rate": 3.1698761896523935e-06, "loss": 0.3008, "step": 12561 }, { "epoch": 0.623145989384394, "grad_norm": 7.613029479980469, "learning_rate": 3.1691437419743776e-06, "loss": 0.405, "step": 12562 }, { "epoch": 0.6231955950195942, "grad_norm": 5.51688289642334, "learning_rate": 3.1684113396658087e-06, "loss": 0.281, "step": 12563 }, { "epoch": 0.6232452006547944, "grad_norm": 6.533973693847656, "learning_rate": 3.1676789827448346e-06, "loss": 0.27, "step": 12564 }, { "epoch": 0.6232948062899946, "grad_norm": 5.75115442276001, "learning_rate": 3.1669466712296058e-06, "loss": 0.2763, "step": 12565 }, { "epoch": 0.6233444119251947, "grad_norm": 6.389923095703125, "learning_rate": 3.1662144051382675e-06, "loss": 0.2831, "step": 12566 }, { "epoch": 0.6233940175603948, "grad_norm": 5.629753589630127, "learning_rate": 3.165482184488964e-06, "loss": 0.3267, "step": 12567 }, { "epoch": 0.623443623195595, "grad_norm": 8.709600448608398, "learning_rate": 3.164750009299844e-06, "loss": 0.3236, "step": 12568 }, { "epoch": 0.6234932288307952, "grad_norm": 10.086874008178711, "learning_rate": 3.1640178795890486e-06, "loss": 0.3661, "step": 12569 }, { "epoch": 0.6235428344659953, "grad_norm": 5.187322616577148, "learning_rate": 3.1632857953747186e-06, "loss": 0.1915, "step": 12570 }, { "epoch": 0.6235924401011955, "grad_norm": 5.7554030418396, "learning_rate": 3.1625537566749993e-06, "loss": 0.2833, "step": 12571 }, { "epoch": 0.6236420457363957, "grad_norm": 5.667834281921387, "learning_rate": 3.1618217635080294e-06, "loss": 0.2341, "step": 12572 }, { "epoch": 0.6236916513715958, "grad_norm": 5.047766208648682, "learning_rate": 3.1610898158919457e-06, "loss": 0.268, "step": 12573 }, { "epoch": 0.623741257006796, "grad_norm": 4.889415740966797, "learning_rate": 3.16035791384489e-06, "loss": 0.1598, "step": 12574 }, { "epoch": 0.6237908626419961, "grad_norm": 6.383452415466309, "learning_rate": 3.159626057384997e-06, "loss": 0.2674, "step": 12575 }, { "epoch": 0.6238404682771963, "grad_norm": 6.218066215515137, "learning_rate": 3.158894246530401e-06, "loss": 0.2636, "step": 12576 }, { "epoch": 0.6238900739123965, "grad_norm": 5.9271769523620605, "learning_rate": 3.1581624812992405e-06, "loss": 0.3178, "step": 12577 }, { "epoch": 0.6239396795475967, "grad_norm": 9.699820518493652, "learning_rate": 3.1574307617096477e-06, "loss": 0.3824, "step": 12578 }, { "epoch": 0.6239892851827967, "grad_norm": 8.437421798706055, "learning_rate": 3.156699087779751e-06, "loss": 0.3529, "step": 12579 }, { "epoch": 0.6240388908179969, "grad_norm": 12.28696060180664, "learning_rate": 3.1559674595276873e-06, "loss": 0.3801, "step": 12580 }, { "epoch": 0.6240884964531971, "grad_norm": 4.576864242553711, "learning_rate": 3.155235876971584e-06, "loss": 0.3258, "step": 12581 }, { "epoch": 0.6241381020883973, "grad_norm": 7.1126909255981445, "learning_rate": 3.1545043401295678e-06, "loss": 0.3263, "step": 12582 }, { "epoch": 0.6241877077235974, "grad_norm": 6.4921464920043945, "learning_rate": 3.1537728490197713e-06, "loss": 0.258, "step": 12583 }, { "epoch": 0.6242373133587975, "grad_norm": 5.7147111892700195, "learning_rate": 3.1530414036603165e-06, "loss": 0.2393, "step": 12584 }, { "epoch": 0.6242869189939977, "grad_norm": 7.065891265869141, "learning_rate": 3.152310004069333e-06, "loss": 0.3251, "step": 12585 }, { "epoch": 0.6243365246291979, "grad_norm": 9.27868366241455, "learning_rate": 3.1515786502649437e-06, "loss": 0.2603, "step": 12586 }, { "epoch": 0.624386130264398, "grad_norm": 6.235972881317139, "learning_rate": 3.1508473422652695e-06, "loss": 0.3047, "step": 12587 }, { "epoch": 0.6244357358995982, "grad_norm": 9.612869262695312, "learning_rate": 3.1501160800884366e-06, "loss": 0.3451, "step": 12588 }, { "epoch": 0.6244853415347984, "grad_norm": 4.502534866333008, "learning_rate": 3.1493848637525643e-06, "loss": 0.3476, "step": 12589 }, { "epoch": 0.6245349471699985, "grad_norm": 6.482748985290527, "learning_rate": 3.1486536932757694e-06, "loss": 0.2642, "step": 12590 }, { "epoch": 0.6245845528051986, "grad_norm": 8.983951568603516, "learning_rate": 3.147922568676176e-06, "loss": 0.341, "step": 12591 }, { "epoch": 0.6246341584403988, "grad_norm": 7.304887294769287, "learning_rate": 3.147191489971899e-06, "loss": 0.3221, "step": 12592 }, { "epoch": 0.624683764075599, "grad_norm": 5.340207576751709, "learning_rate": 3.1464604571810536e-06, "loss": 0.2846, "step": 12593 }, { "epoch": 0.6247333697107992, "grad_norm": 7.2369160652160645, "learning_rate": 3.145729470321758e-06, "loss": 0.3499, "step": 12594 }, { "epoch": 0.6247829753459994, "grad_norm": 5.797089576721191, "learning_rate": 3.1449985294121243e-06, "loss": 0.3223, "step": 12595 }, { "epoch": 0.6248325809811994, "grad_norm": 8.881843566894531, "learning_rate": 3.1442676344702653e-06, "loss": 0.3876, "step": 12596 }, { "epoch": 0.6248821866163996, "grad_norm": 5.811243057250977, "learning_rate": 3.1435367855142947e-06, "loss": 0.2417, "step": 12597 }, { "epoch": 0.6249317922515998, "grad_norm": 4.746877670288086, "learning_rate": 3.1428059825623224e-06, "loss": 0.2851, "step": 12598 }, { "epoch": 0.6249813978868, "grad_norm": 8.070601463317871, "learning_rate": 3.1420752256324577e-06, "loss": 0.2999, "step": 12599 }, { "epoch": 0.6250310035220001, "grad_norm": 12.318321228027344, "learning_rate": 3.141344514742809e-06, "loss": 0.4432, "step": 12600 }, { "epoch": 0.6250806091572002, "grad_norm": 9.125027656555176, "learning_rate": 3.140613849911484e-06, "loss": 0.2834, "step": 12601 }, { "epoch": 0.6251302147924004, "grad_norm": 7.249276161193848, "learning_rate": 3.13988323115659e-06, "loss": 0.351, "step": 12602 }, { "epoch": 0.6251798204276006, "grad_norm": 8.482400894165039, "learning_rate": 3.13915265849623e-06, "loss": 0.3267, "step": 12603 }, { "epoch": 0.6252294260628007, "grad_norm": 6.3341522216796875, "learning_rate": 3.1384221319485086e-06, "loss": 0.3817, "step": 12604 }, { "epoch": 0.6252790316980009, "grad_norm": 15.504098892211914, "learning_rate": 3.137691651531529e-06, "loss": 0.2919, "step": 12605 }, { "epoch": 0.6253286373332011, "grad_norm": 12.764810562133789, "learning_rate": 3.136961217263394e-06, "loss": 0.2447, "step": 12606 }, { "epoch": 0.6253782429684012, "grad_norm": 7.3108954429626465, "learning_rate": 3.1362308291622014e-06, "loss": 0.1721, "step": 12607 }, { "epoch": 0.6254278486036013, "grad_norm": 4.095725059509277, "learning_rate": 3.135500487246052e-06, "loss": 0.1916, "step": 12608 }, { "epoch": 0.6254774542388015, "grad_norm": 7.634408473968506, "learning_rate": 3.1347701915330455e-06, "loss": 0.3036, "step": 12609 }, { "epoch": 0.6255270598740017, "grad_norm": 7.21493673324585, "learning_rate": 3.1340399420412745e-06, "loss": 0.2994, "step": 12610 }, { "epoch": 0.6255766655092019, "grad_norm": 7.402080059051514, "learning_rate": 3.13330973878884e-06, "loss": 0.3725, "step": 12611 }, { "epoch": 0.6256262711444021, "grad_norm": 7.781299114227295, "learning_rate": 3.132579581793835e-06, "loss": 0.2841, "step": 12612 }, { "epoch": 0.6256758767796021, "grad_norm": 5.308025360107422, "learning_rate": 3.1318494710743502e-06, "loss": 0.3172, "step": 12613 }, { "epoch": 0.6257254824148023, "grad_norm": 11.285568237304688, "learning_rate": 3.1311194066484828e-06, "loss": 0.3215, "step": 12614 }, { "epoch": 0.6257750880500025, "grad_norm": 5.872693061828613, "learning_rate": 3.1303893885343217e-06, "loss": 0.2306, "step": 12615 }, { "epoch": 0.6258246936852027, "grad_norm": 5.014469146728516, "learning_rate": 3.1296594167499554e-06, "loss": 0.2806, "step": 12616 }, { "epoch": 0.6258742993204028, "grad_norm": 6.210236549377441, "learning_rate": 3.1289294913134773e-06, "loss": 0.2457, "step": 12617 }, { "epoch": 0.6259239049556029, "grad_norm": 9.684288024902344, "learning_rate": 3.1281996122429704e-06, "loss": 0.365, "step": 12618 }, { "epoch": 0.6259735105908031, "grad_norm": 10.743510246276855, "learning_rate": 3.1274697795565255e-06, "loss": 0.3268, "step": 12619 }, { "epoch": 0.6260231162260033, "grad_norm": 5.450461387634277, "learning_rate": 3.1267399932722264e-06, "loss": 0.2605, "step": 12620 }, { "epoch": 0.6260727218612034, "grad_norm": 12.32923412322998, "learning_rate": 3.126010253408156e-06, "loss": 0.3509, "step": 12621 }, { "epoch": 0.6261223274964036, "grad_norm": 4.092384338378906, "learning_rate": 3.125280559982401e-06, "loss": 0.268, "step": 12622 }, { "epoch": 0.6261719331316038, "grad_norm": 9.953757286071777, "learning_rate": 3.1245509130130424e-06, "loss": 0.3523, "step": 12623 }, { "epoch": 0.6262215387668039, "grad_norm": 5.175746440887451, "learning_rate": 3.1238213125181583e-06, "loss": 0.2677, "step": 12624 }, { "epoch": 0.626271144402004, "grad_norm": 5.956027507781982, "learning_rate": 3.1230917585158322e-06, "loss": 0.2705, "step": 12625 }, { "epoch": 0.6263207500372042, "grad_norm": 6.415497303009033, "learning_rate": 3.122362251024141e-06, "loss": 0.2255, "step": 12626 }, { "epoch": 0.6263703556724044, "grad_norm": 7.3207526206970215, "learning_rate": 3.121632790061162e-06, "loss": 0.379, "step": 12627 }, { "epoch": 0.6264199613076046, "grad_norm": 5.33299446105957, "learning_rate": 3.1209033756449738e-06, "loss": 0.2856, "step": 12628 }, { "epoch": 0.6264695669428048, "grad_norm": 5.15509033203125, "learning_rate": 3.120174007793649e-06, "loss": 0.2002, "step": 12629 }, { "epoch": 0.6265191725780048, "grad_norm": 6.479535102844238, "learning_rate": 3.1194446865252615e-06, "loss": 0.3592, "step": 12630 }, { "epoch": 0.626568778213205, "grad_norm": 9.55368709564209, "learning_rate": 3.118715411857887e-06, "loss": 0.415, "step": 12631 }, { "epoch": 0.6266183838484052, "grad_norm": 11.095723152160645, "learning_rate": 3.1179861838095954e-06, "loss": 0.3538, "step": 12632 }, { "epoch": 0.6266679894836054, "grad_norm": 4.825717449188232, "learning_rate": 3.1172570023984556e-06, "loss": 0.2373, "step": 12633 }, { "epoch": 0.6267175951188055, "grad_norm": 5.841989040374756, "learning_rate": 3.116527867642541e-06, "loss": 0.3056, "step": 12634 }, { "epoch": 0.6267672007540056, "grad_norm": 8.76035213470459, "learning_rate": 3.1157987795599153e-06, "loss": 0.3058, "step": 12635 }, { "epoch": 0.6268168063892058, "grad_norm": 5.999666690826416, "learning_rate": 3.115069738168651e-06, "loss": 0.251, "step": 12636 }, { "epoch": 0.626866412024406, "grad_norm": 11.050877571105957, "learning_rate": 3.114340743486811e-06, "loss": 0.342, "step": 12637 }, { "epoch": 0.6269160176596061, "grad_norm": 6.483068466186523, "learning_rate": 3.1136117955324587e-06, "loss": 0.3642, "step": 12638 }, { "epoch": 0.6269656232948063, "grad_norm": 15.89300537109375, "learning_rate": 3.112882894323661e-06, "loss": 0.2684, "step": 12639 }, { "epoch": 0.6270152289300065, "grad_norm": 5.555983543395996, "learning_rate": 3.112154039878479e-06, "loss": 0.3004, "step": 12640 }, { "epoch": 0.6270648345652066, "grad_norm": 39.31712341308594, "learning_rate": 3.1114252322149715e-06, "loss": 0.3546, "step": 12641 }, { "epoch": 0.6271144402004067, "grad_norm": 8.335576057434082, "learning_rate": 3.1106964713512033e-06, "loss": 0.3044, "step": 12642 }, { "epoch": 0.6271640458356069, "grad_norm": 9.541888236999512, "learning_rate": 3.109967757305231e-06, "loss": 0.3854, "step": 12643 }, { "epoch": 0.6272136514708071, "grad_norm": 7.493049621582031, "learning_rate": 3.1092390900951107e-06, "loss": 0.2175, "step": 12644 }, { "epoch": 0.6272632571060073, "grad_norm": 5.345727443695068, "learning_rate": 3.108510469738903e-06, "loss": 0.3137, "step": 12645 }, { "epoch": 0.6273128627412075, "grad_norm": 7.92905330657959, "learning_rate": 3.10778189625466e-06, "loss": 0.3916, "step": 12646 }, { "epoch": 0.6273624683764075, "grad_norm": 8.136392593383789, "learning_rate": 3.107053369660439e-06, "loss": 0.3028, "step": 12647 }, { "epoch": 0.6274120740116077, "grad_norm": 7.253669738769531, "learning_rate": 3.1063248899742914e-06, "loss": 0.1757, "step": 12648 }, { "epoch": 0.6274616796468079, "grad_norm": 12.132457733154297, "learning_rate": 3.1055964572142694e-06, "loss": 0.2707, "step": 12649 }, { "epoch": 0.6275112852820081, "grad_norm": 7.052136421203613, "learning_rate": 3.1048680713984236e-06, "loss": 0.231, "step": 12650 }, { "epoch": 0.6275608909172082, "grad_norm": 4.318403720855713, "learning_rate": 3.1041397325448054e-06, "loss": 0.2337, "step": 12651 }, { "epoch": 0.6276104965524083, "grad_norm": 5.953775882720947, "learning_rate": 3.103411440671461e-06, "loss": 0.2606, "step": 12652 }, { "epoch": 0.6276601021876085, "grad_norm": 7.82682466506958, "learning_rate": 3.1026831957964398e-06, "loss": 0.2551, "step": 12653 }, { "epoch": 0.6277097078228087, "grad_norm": 4.005556583404541, "learning_rate": 3.101954997937786e-06, "loss": 0.2601, "step": 12654 }, { "epoch": 0.6277593134580088, "grad_norm": 7.009956359863281, "learning_rate": 3.101226847113547e-06, "loss": 0.323, "step": 12655 }, { "epoch": 0.627808919093209, "grad_norm": 9.172697067260742, "learning_rate": 3.1004987433417668e-06, "loss": 0.3615, "step": 12656 }, { "epoch": 0.6278585247284092, "grad_norm": 5.376890182495117, "learning_rate": 3.0997706866404854e-06, "loss": 0.3125, "step": 12657 }, { "epoch": 0.6279081303636093, "grad_norm": 7.610628604888916, "learning_rate": 3.099042677027746e-06, "loss": 0.2477, "step": 12658 }, { "epoch": 0.6279577359988094, "grad_norm": 6.495904922485352, "learning_rate": 3.0983147145215888e-06, "loss": 0.2976, "step": 12659 }, { "epoch": 0.6280073416340096, "grad_norm": 12.513968467712402, "learning_rate": 3.0975867991400542e-06, "loss": 0.4141, "step": 12660 }, { "epoch": 0.6280569472692098, "grad_norm": 6.676316261291504, "learning_rate": 3.096858930901179e-06, "loss": 0.2564, "step": 12661 }, { "epoch": 0.62810655290441, "grad_norm": 7.431310176849365, "learning_rate": 3.0961311098229996e-06, "loss": 0.2987, "step": 12662 }, { "epoch": 0.6281561585396102, "grad_norm": 11.233927726745605, "learning_rate": 3.0954033359235534e-06, "loss": 0.4584, "step": 12663 }, { "epoch": 0.6282057641748102, "grad_norm": 5.834957122802734, "learning_rate": 3.094675609220873e-06, "loss": 0.2497, "step": 12664 }, { "epoch": 0.6282553698100104, "grad_norm": 7.911571025848389, "learning_rate": 3.093947929732994e-06, "loss": 0.3742, "step": 12665 }, { "epoch": 0.6283049754452106, "grad_norm": 42.087589263916016, "learning_rate": 3.093220297477948e-06, "loss": 0.3541, "step": 12666 }, { "epoch": 0.6283545810804108, "grad_norm": 10.95927619934082, "learning_rate": 3.0924927124737636e-06, "loss": 0.3349, "step": 12667 }, { "epoch": 0.6284041867156109, "grad_norm": 4.672216892242432, "learning_rate": 3.091765174738474e-06, "loss": 0.2594, "step": 12668 }, { "epoch": 0.628453792350811, "grad_norm": 5.814990997314453, "learning_rate": 3.0910376842901067e-06, "loss": 0.2528, "step": 12669 }, { "epoch": 0.6285033979860112, "grad_norm": 9.161664962768555, "learning_rate": 3.090310241146688e-06, "loss": 0.3185, "step": 12670 }, { "epoch": 0.6285530036212114, "grad_norm": 11.777846336364746, "learning_rate": 3.0895828453262466e-06, "loss": 0.3993, "step": 12671 }, { "epoch": 0.6286026092564115, "grad_norm": 10.101808547973633, "learning_rate": 3.0888554968468044e-06, "loss": 0.4033, "step": 12672 }, { "epoch": 0.6286522148916117, "grad_norm": 4.14893102645874, "learning_rate": 3.0881281957263896e-06, "loss": 0.3022, "step": 12673 }, { "epoch": 0.6287018205268119, "grad_norm": 6.0823283195495605, "learning_rate": 3.087400941983023e-06, "loss": 0.2307, "step": 12674 }, { "epoch": 0.628751426162012, "grad_norm": 8.623312950134277, "learning_rate": 3.0866737356347243e-06, "loss": 0.2897, "step": 12675 }, { "epoch": 0.6288010317972121, "grad_norm": 9.299751281738281, "learning_rate": 3.085946576699518e-06, "loss": 0.3809, "step": 12676 }, { "epoch": 0.6288506374324123, "grad_norm": 3.064646005630493, "learning_rate": 3.0852194651954214e-06, "loss": 0.2399, "step": 12677 }, { "epoch": 0.6289002430676125, "grad_norm": 11.945345878601074, "learning_rate": 3.084492401140451e-06, "loss": 0.4038, "step": 12678 }, { "epoch": 0.6289498487028127, "grad_norm": 12.862204551696777, "learning_rate": 3.083765384552626e-06, "loss": 0.3493, "step": 12679 }, { "epoch": 0.6289994543380129, "grad_norm": 4.935543537139893, "learning_rate": 3.0830384154499626e-06, "loss": 0.2787, "step": 12680 }, { "epoch": 0.6290490599732129, "grad_norm": 5.4602952003479, "learning_rate": 3.082311493850473e-06, "loss": 0.227, "step": 12681 }, { "epoch": 0.6290986656084131, "grad_norm": 7.5723066329956055, "learning_rate": 3.0815846197721727e-06, "loss": 0.2758, "step": 12682 }, { "epoch": 0.6291482712436133, "grad_norm": 4.897893905639648, "learning_rate": 3.080857793233074e-06, "loss": 0.2974, "step": 12683 }, { "epoch": 0.6291978768788135, "grad_norm": 9.193093299865723, "learning_rate": 3.0801310142511855e-06, "loss": 0.3778, "step": 12684 }, { "epoch": 0.6292474825140136, "grad_norm": 6.640688896179199, "learning_rate": 3.0794042828445204e-06, "loss": 0.2158, "step": 12685 }, { "epoch": 0.6292970881492137, "grad_norm": 7.083708763122559, "learning_rate": 3.0786775990310857e-06, "loss": 0.3154, "step": 12686 }, { "epoch": 0.6293466937844139, "grad_norm": 10.707045555114746, "learning_rate": 3.077950962828888e-06, "loss": 0.3702, "step": 12687 }, { "epoch": 0.6293962994196141, "grad_norm": 7.738902568817139, "learning_rate": 3.077224374255936e-06, "loss": 0.3296, "step": 12688 }, { "epoch": 0.6294459050548142, "grad_norm": 4.601118087768555, "learning_rate": 3.0764978333302325e-06, "loss": 0.2742, "step": 12689 }, { "epoch": 0.6294955106900144, "grad_norm": 4.237093925476074, "learning_rate": 3.0757713400697846e-06, "loss": 0.2519, "step": 12690 }, { "epoch": 0.6295451163252146, "grad_norm": 8.018595695495605, "learning_rate": 3.0750448944925935e-06, "loss": 0.2714, "step": 12691 }, { "epoch": 0.6295947219604147, "grad_norm": 6.384039878845215, "learning_rate": 3.0743184966166583e-06, "loss": 0.395, "step": 12692 }, { "epoch": 0.6296443275956148, "grad_norm": 6.538338661193848, "learning_rate": 3.073592146459984e-06, "loss": 0.3178, "step": 12693 }, { "epoch": 0.629693933230815, "grad_norm": 6.667537689208984, "learning_rate": 3.072865844040568e-06, "loss": 0.3187, "step": 12694 }, { "epoch": 0.6297435388660152, "grad_norm": 15.760913848876953, "learning_rate": 3.072139589376406e-06, "loss": 0.2447, "step": 12695 }, { "epoch": 0.6297931445012154, "grad_norm": 10.775836944580078, "learning_rate": 3.0714133824854985e-06, "loss": 0.3006, "step": 12696 }, { "epoch": 0.6298427501364156, "grad_norm": 12.345747947692871, "learning_rate": 3.0706872233858405e-06, "loss": 0.4569, "step": 12697 }, { "epoch": 0.6298923557716156, "grad_norm": 8.088743209838867, "learning_rate": 3.069961112095423e-06, "loss": 0.2952, "step": 12698 }, { "epoch": 0.6299419614068158, "grad_norm": 7.53921365737915, "learning_rate": 3.069235048632245e-06, "loss": 0.2483, "step": 12699 }, { "epoch": 0.629991567042016, "grad_norm": 6.797684192657471, "learning_rate": 3.0685090330142954e-06, "loss": 0.2306, "step": 12700 }, { "epoch": 0.6300411726772162, "grad_norm": 8.19563102722168, "learning_rate": 3.0677830652595643e-06, "loss": 0.3112, "step": 12701 }, { "epoch": 0.6300907783124163, "grad_norm": 5.610637664794922, "learning_rate": 3.0670571453860443e-06, "loss": 0.2905, "step": 12702 }, { "epoch": 0.6301403839476164, "grad_norm": 4.717434883117676, "learning_rate": 3.066331273411721e-06, "loss": 0.2461, "step": 12703 }, { "epoch": 0.6301899895828166, "grad_norm": 4.681147575378418, "learning_rate": 3.065605449354585e-06, "loss": 0.2306, "step": 12704 }, { "epoch": 0.6302395952180168, "grad_norm": 9.885808944702148, "learning_rate": 3.064879673232621e-06, "loss": 0.2939, "step": 12705 }, { "epoch": 0.6302892008532169, "grad_norm": 4.392672538757324, "learning_rate": 3.064153945063813e-06, "loss": 0.207, "step": 12706 }, { "epoch": 0.6303388064884171, "grad_norm": 18.690671920776367, "learning_rate": 3.063428264866146e-06, "loss": 0.4537, "step": 12707 }, { "epoch": 0.6303884121236173, "grad_norm": 4.293299198150635, "learning_rate": 3.0627026326576026e-06, "loss": 0.2418, "step": 12708 }, { "epoch": 0.6304380177588174, "grad_norm": 5.836491584777832, "learning_rate": 3.061977048456165e-06, "loss": 0.2838, "step": 12709 }, { "epoch": 0.6304876233940175, "grad_norm": 8.407805442810059, "learning_rate": 3.061251512279813e-06, "loss": 0.2588, "step": 12710 }, { "epoch": 0.6305372290292177, "grad_norm": 11.30429458618164, "learning_rate": 3.060526024146525e-06, "loss": 0.3319, "step": 12711 }, { "epoch": 0.6305868346644179, "grad_norm": 5.7669172286987305, "learning_rate": 3.0598005840742788e-06, "loss": 0.2759, "step": 12712 }, { "epoch": 0.6306364402996181, "grad_norm": 7.847047328948975, "learning_rate": 3.0590751920810524e-06, "loss": 0.3188, "step": 12713 }, { "epoch": 0.6306860459348181, "grad_norm": 5.709244251251221, "learning_rate": 3.0583498481848217e-06, "loss": 0.2305, "step": 12714 }, { "epoch": 0.6307356515700183, "grad_norm": 4.292537689208984, "learning_rate": 3.0576245524035586e-06, "loss": 0.2862, "step": 12715 }, { "epoch": 0.6307852572052185, "grad_norm": 18.29916000366211, "learning_rate": 3.0568993047552374e-06, "loss": 0.4509, "step": 12716 }, { "epoch": 0.6308348628404187, "grad_norm": 5.452953815460205, "learning_rate": 3.056174105257832e-06, "loss": 0.2273, "step": 12717 }, { "epoch": 0.6308844684756189, "grad_norm": 8.891153335571289, "learning_rate": 3.05544895392931e-06, "loss": 0.2686, "step": 12718 }, { "epoch": 0.630934074110819, "grad_norm": 14.93078899383545, "learning_rate": 3.054723850787644e-06, "loss": 0.4548, "step": 12719 }, { "epoch": 0.6309836797460191, "grad_norm": 5.122641563415527, "learning_rate": 3.0539987958508013e-06, "loss": 0.2588, "step": 12720 }, { "epoch": 0.6310332853812193, "grad_norm": 9.37983226776123, "learning_rate": 3.053273789136746e-06, "loss": 0.3506, "step": 12721 }, { "epoch": 0.6310828910164195, "grad_norm": 8.812544822692871, "learning_rate": 3.0525488306634486e-06, "loss": 0.2588, "step": 12722 }, { "epoch": 0.6311324966516196, "grad_norm": 5.547215461730957, "learning_rate": 3.051823920448873e-06, "loss": 0.2649, "step": 12723 }, { "epoch": 0.6311821022868198, "grad_norm": 5.3203253746032715, "learning_rate": 3.05109905851098e-06, "loss": 0.3129, "step": 12724 }, { "epoch": 0.63123170792202, "grad_norm": 7.295323848724365, "learning_rate": 3.0503742448677355e-06, "loss": 0.3098, "step": 12725 }, { "epoch": 0.6312813135572201, "grad_norm": 12.415556907653809, "learning_rate": 3.049649479537097e-06, "loss": 0.3428, "step": 12726 }, { "epoch": 0.6313309191924202, "grad_norm": 9.072728157043457, "learning_rate": 3.048924762537029e-06, "loss": 0.3711, "step": 12727 }, { "epoch": 0.6313805248276204, "grad_norm": 9.062568664550781, "learning_rate": 3.048200093885487e-06, "loss": 0.3211, "step": 12728 }, { "epoch": 0.6314301304628206, "grad_norm": 15.013733863830566, "learning_rate": 3.0474754736004285e-06, "loss": 0.3319, "step": 12729 }, { "epoch": 0.6314797360980208, "grad_norm": 13.636754035949707, "learning_rate": 3.046750901699813e-06, "loss": 0.2971, "step": 12730 }, { "epoch": 0.6315293417332208, "grad_norm": 5.834914684295654, "learning_rate": 3.046026378201593e-06, "loss": 0.2644, "step": 12731 }, { "epoch": 0.631578947368421, "grad_norm": 8.07967758178711, "learning_rate": 3.045301903123722e-06, "loss": 0.2965, "step": 12732 }, { "epoch": 0.6316285530036212, "grad_norm": 5.823596477508545, "learning_rate": 3.044577476484156e-06, "loss": 0.2373, "step": 12733 }, { "epoch": 0.6316781586388214, "grad_norm": 5.230655193328857, "learning_rate": 3.043853098300844e-06, "loss": 0.1421, "step": 12734 }, { "epoch": 0.6317277642740216, "grad_norm": 11.205013275146484, "learning_rate": 3.0431287685917354e-06, "loss": 0.2404, "step": 12735 }, { "epoch": 0.6317773699092217, "grad_norm": 12.637558937072754, "learning_rate": 3.042404487374784e-06, "loss": 0.33, "step": 12736 }, { "epoch": 0.6318269755444218, "grad_norm": 9.77705192565918, "learning_rate": 3.0416802546679335e-06, "loss": 0.224, "step": 12737 }, { "epoch": 0.631876581179622, "grad_norm": 4.593222141265869, "learning_rate": 3.040956070489131e-06, "loss": 0.3178, "step": 12738 }, { "epoch": 0.6319261868148222, "grad_norm": 5.8008270263671875, "learning_rate": 3.0402319348563246e-06, "loss": 0.2632, "step": 12739 }, { "epoch": 0.6319757924500223, "grad_norm": 10.753878593444824, "learning_rate": 3.0395078477874586e-06, "loss": 0.2597, "step": 12740 }, { "epoch": 0.6320253980852225, "grad_norm": 10.20333194732666, "learning_rate": 3.038783809300472e-06, "loss": 0.3215, "step": 12741 }, { "epoch": 0.6320750037204227, "grad_norm": 6.872356414794922, "learning_rate": 3.0380598194133116e-06, "loss": 0.2658, "step": 12742 }, { "epoch": 0.6321246093556228, "grad_norm": 4.709959506988525, "learning_rate": 3.0373358781439143e-06, "loss": 0.2465, "step": 12743 }, { "epoch": 0.6321742149908229, "grad_norm": 8.36800479888916, "learning_rate": 3.0366119855102243e-06, "loss": 0.2118, "step": 12744 }, { "epoch": 0.6322238206260231, "grad_norm": 4.146790027618408, "learning_rate": 3.035888141530178e-06, "loss": 0.2151, "step": 12745 }, { "epoch": 0.6322734262612233, "grad_norm": 11.012872695922852, "learning_rate": 3.0351643462217086e-06, "loss": 0.382, "step": 12746 }, { "epoch": 0.6323230318964235, "grad_norm": 17.850366592407227, "learning_rate": 3.034440599602758e-06, "loss": 0.534, "step": 12747 }, { "epoch": 0.6323726375316235, "grad_norm": 8.700422286987305, "learning_rate": 3.0337169016912586e-06, "loss": 0.2262, "step": 12748 }, { "epoch": 0.6324222431668237, "grad_norm": 19.249103546142578, "learning_rate": 3.032993252505142e-06, "loss": 0.3572, "step": 12749 }, { "epoch": 0.6324718488020239, "grad_norm": 19.49064826965332, "learning_rate": 3.032269652062344e-06, "loss": 0.3087, "step": 12750 }, { "epoch": 0.6325214544372241, "grad_norm": 8.364962577819824, "learning_rate": 3.0315461003807943e-06, "loss": 0.2612, "step": 12751 }, { "epoch": 0.6325710600724243, "grad_norm": 7.300319194793701, "learning_rate": 3.030822597478421e-06, "loss": 0.2896, "step": 12752 }, { "epoch": 0.6326206657076244, "grad_norm": 8.315266609191895, "learning_rate": 3.0300991433731565e-06, "loss": 0.3526, "step": 12753 }, { "epoch": 0.6326702713428245, "grad_norm": 9.77087116241455, "learning_rate": 3.029375738082926e-06, "loss": 0.3219, "step": 12754 }, { "epoch": 0.6327198769780247, "grad_norm": 4.838967800140381, "learning_rate": 3.0286523816256535e-06, "loss": 0.2891, "step": 12755 }, { "epoch": 0.6327694826132249, "grad_norm": 8.273707389831543, "learning_rate": 3.0279290740192692e-06, "loss": 0.3349, "step": 12756 }, { "epoch": 0.632819088248425, "grad_norm": 6.531017780303955, "learning_rate": 3.027205815281694e-06, "loss": 0.2935, "step": 12757 }, { "epoch": 0.6328686938836252, "grad_norm": 9.229125022888184, "learning_rate": 3.0264826054308506e-06, "loss": 0.274, "step": 12758 }, { "epoch": 0.6329182995188254, "grad_norm": 6.529665946960449, "learning_rate": 3.0257594444846623e-06, "loss": 0.3127, "step": 12759 }, { "epoch": 0.6329679051540255, "grad_norm": 7.773346900939941, "learning_rate": 3.0250363324610455e-06, "loss": 0.2819, "step": 12760 }, { "epoch": 0.6330175107892256, "grad_norm": 4.420973300933838, "learning_rate": 3.0243132693779242e-06, "loss": 0.2697, "step": 12761 }, { "epoch": 0.6330671164244258, "grad_norm": 5.057072162628174, "learning_rate": 3.0235902552532125e-06, "loss": 0.212, "step": 12762 }, { "epoch": 0.633116722059626, "grad_norm": 6.90811014175415, "learning_rate": 3.0228672901048284e-06, "loss": 0.2519, "step": 12763 }, { "epoch": 0.6331663276948262, "grad_norm": 4.292438507080078, "learning_rate": 3.0221443739506884e-06, "loss": 0.2582, "step": 12764 }, { "epoch": 0.6332159333300262, "grad_norm": 6.771633625030518, "learning_rate": 3.0214215068087037e-06, "loss": 0.2549, "step": 12765 }, { "epoch": 0.6332655389652264, "grad_norm": 6.00813102722168, "learning_rate": 3.0206986886967905e-06, "loss": 0.1765, "step": 12766 }, { "epoch": 0.6333151446004266, "grad_norm": 3.8106484413146973, "learning_rate": 3.019975919632858e-06, "loss": 0.2239, "step": 12767 }, { "epoch": 0.6333647502356268, "grad_norm": 13.310051918029785, "learning_rate": 3.0192531996348196e-06, "loss": 0.3811, "step": 12768 }, { "epoch": 0.633414355870827, "grad_norm": 5.933167934417725, "learning_rate": 3.0185305287205813e-06, "loss": 0.2392, "step": 12769 }, { "epoch": 0.6334639615060271, "grad_norm": 5.433349132537842, "learning_rate": 3.017807906908053e-06, "loss": 0.2697, "step": 12770 }, { "epoch": 0.6335135671412272, "grad_norm": 12.777947425842285, "learning_rate": 3.0170853342151424e-06, "loss": 0.2615, "step": 12771 }, { "epoch": 0.6335631727764274, "grad_norm": 7.300866603851318, "learning_rate": 3.016362810659752e-06, "loss": 0.2811, "step": 12772 }, { "epoch": 0.6336127784116276, "grad_norm": 10.019171714782715, "learning_rate": 3.0156403362597907e-06, "loss": 0.2637, "step": 12773 }, { "epoch": 0.6336623840468277, "grad_norm": 7.4506072998046875, "learning_rate": 3.0149179110331596e-06, "loss": 0.2523, "step": 12774 }, { "epoch": 0.6337119896820279, "grad_norm": 20.436763763427734, "learning_rate": 3.014195534997758e-06, "loss": 0.3784, "step": 12775 }, { "epoch": 0.6337615953172281, "grad_norm": 5.276311874389648, "learning_rate": 3.0134732081714922e-06, "loss": 0.2414, "step": 12776 }, { "epoch": 0.6338112009524282, "grad_norm": 7.371756553649902, "learning_rate": 3.0127509305722587e-06, "loss": 0.2866, "step": 12777 }, { "epoch": 0.6338608065876283, "grad_norm": 6.217840194702148, "learning_rate": 3.012028702217954e-06, "loss": 0.3306, "step": 12778 }, { "epoch": 0.6339104122228285, "grad_norm": 7.352259159088135, "learning_rate": 3.0113065231264793e-06, "loss": 0.2544, "step": 12779 }, { "epoch": 0.6339600178580287, "grad_norm": 7.266221046447754, "learning_rate": 3.0105843933157265e-06, "loss": 0.1999, "step": 12780 }, { "epoch": 0.6340096234932289, "grad_norm": 5.080777645111084, "learning_rate": 3.0098623128035944e-06, "loss": 0.3338, "step": 12781 }, { "epoch": 0.6340592291284289, "grad_norm": 15.08170223236084, "learning_rate": 3.009140281607974e-06, "loss": 0.3979, "step": 12782 }, { "epoch": 0.6341088347636291, "grad_norm": 7.053506374359131, "learning_rate": 3.008418299746756e-06, "loss": 0.3298, "step": 12783 }, { "epoch": 0.6341584403988293, "grad_norm": 9.341382026672363, "learning_rate": 3.0076963672378355e-06, "loss": 0.3485, "step": 12784 }, { "epoch": 0.6342080460340295, "grad_norm": 6.880833148956299, "learning_rate": 3.0069744840991007e-06, "loss": 0.2871, "step": 12785 }, { "epoch": 0.6342576516692296, "grad_norm": 6.934995174407959, "learning_rate": 3.0062526503484375e-06, "loss": 0.278, "step": 12786 }, { "epoch": 0.6343072573044298, "grad_norm": 12.701546669006348, "learning_rate": 3.0055308660037373e-06, "loss": 0.279, "step": 12787 }, { "epoch": 0.6343568629396299, "grad_norm": 6.636744976043701, "learning_rate": 3.004809131082885e-06, "loss": 0.3403, "step": 12788 }, { "epoch": 0.6344064685748301, "grad_norm": 7.7516703605651855, "learning_rate": 3.0040874456037624e-06, "loss": 0.2932, "step": 12789 }, { "epoch": 0.6344560742100303, "grad_norm": 5.458218097686768, "learning_rate": 3.0033658095842578e-06, "loss": 0.1605, "step": 12790 }, { "epoch": 0.6345056798452304, "grad_norm": 9.096674919128418, "learning_rate": 3.0026442230422516e-06, "loss": 0.2991, "step": 12791 }, { "epoch": 0.6345552854804306, "grad_norm": 9.020706176757812, "learning_rate": 3.001922685995623e-06, "loss": 0.2693, "step": 12792 }, { "epoch": 0.6346048911156308, "grad_norm": 5.88248348236084, "learning_rate": 3.001201198462257e-06, "loss": 0.2794, "step": 12793 }, { "epoch": 0.6346544967508309, "grad_norm": 10.327500343322754, "learning_rate": 3.0004797604600284e-06, "loss": 0.3313, "step": 12794 }, { "epoch": 0.634704102386031, "grad_norm": 8.389033317565918, "learning_rate": 2.9997583720068134e-06, "loss": 0.3873, "step": 12795 }, { "epoch": 0.6347537080212312, "grad_norm": 13.679306983947754, "learning_rate": 2.9990370331204942e-06, "loss": 0.4731, "step": 12796 }, { "epoch": 0.6348033136564314, "grad_norm": 4.163003444671631, "learning_rate": 2.998315743818939e-06, "loss": 0.2521, "step": 12797 }, { "epoch": 0.6348529192916316, "grad_norm": 7.5323486328125, "learning_rate": 2.9975945041200282e-06, "loss": 0.3275, "step": 12798 }, { "epoch": 0.6349025249268316, "grad_norm": 9.380084991455078, "learning_rate": 2.9968733140416313e-06, "loss": 0.3604, "step": 12799 }, { "epoch": 0.6349521305620318, "grad_norm": 5.984190464019775, "learning_rate": 2.9961521736016173e-06, "loss": 0.2356, "step": 12800 }, { "epoch": 0.635001736197232, "grad_norm": 10.09390640258789, "learning_rate": 2.995431082817861e-06, "loss": 0.3545, "step": 12801 }, { "epoch": 0.6350513418324322, "grad_norm": 5.716314315795898, "learning_rate": 2.99471004170823e-06, "loss": 0.2372, "step": 12802 }, { "epoch": 0.6351009474676323, "grad_norm": 7.045124530792236, "learning_rate": 2.9939890502905877e-06, "loss": 0.2213, "step": 12803 }, { "epoch": 0.6351505531028325, "grad_norm": 4.5693769454956055, "learning_rate": 2.9932681085828075e-06, "loss": 0.2876, "step": 12804 }, { "epoch": 0.6352001587380326, "grad_norm": 7.006916522979736, "learning_rate": 2.9925472166027507e-06, "loss": 0.2941, "step": 12805 }, { "epoch": 0.6352497643732328, "grad_norm": 17.5648250579834, "learning_rate": 2.9918263743682802e-06, "loss": 0.3165, "step": 12806 }, { "epoch": 0.635299370008433, "grad_norm": 5.396187782287598, "learning_rate": 2.9911055818972623e-06, "loss": 0.308, "step": 12807 }, { "epoch": 0.6353489756436331, "grad_norm": 7.725216865539551, "learning_rate": 2.9903848392075573e-06, "loss": 0.3351, "step": 12808 }, { "epoch": 0.6353985812788333, "grad_norm": 5.3384199142456055, "learning_rate": 2.989664146317022e-06, "loss": 0.3396, "step": 12809 }, { "epoch": 0.6354481869140335, "grad_norm": 26.386137008666992, "learning_rate": 2.9889435032435214e-06, "loss": 0.6565, "step": 12810 }, { "epoch": 0.6354977925492336, "grad_norm": 11.228143692016602, "learning_rate": 2.9882229100049097e-06, "loss": 0.4101, "step": 12811 }, { "epoch": 0.6355473981844337, "grad_norm": 14.655854225158691, "learning_rate": 2.987502366619044e-06, "loss": 0.4101, "step": 12812 }, { "epoch": 0.6355970038196339, "grad_norm": 4.160569667816162, "learning_rate": 2.9867818731037807e-06, "loss": 0.2092, "step": 12813 }, { "epoch": 0.6356466094548341, "grad_norm": 6.842581272125244, "learning_rate": 2.9860614294769713e-06, "loss": 0.3086, "step": 12814 }, { "epoch": 0.6356962150900343, "grad_norm": 12.748705863952637, "learning_rate": 2.9853410357564728e-06, "loss": 0.3731, "step": 12815 }, { "epoch": 0.6357458207252343, "grad_norm": 6.161209583282471, "learning_rate": 2.984620691960134e-06, "loss": 0.3003, "step": 12816 }, { "epoch": 0.6357954263604345, "grad_norm": 4.778663635253906, "learning_rate": 2.9839003981058057e-06, "loss": 0.2566, "step": 12817 }, { "epoch": 0.6358450319956347, "grad_norm": 7.111082077026367, "learning_rate": 2.9831801542113393e-06, "loss": 0.3135, "step": 12818 }, { "epoch": 0.6358946376308349, "grad_norm": 20.611663818359375, "learning_rate": 2.9824599602945793e-06, "loss": 0.3361, "step": 12819 }, { "epoch": 0.635944243266035, "grad_norm": 7.959120273590088, "learning_rate": 2.981739816373374e-06, "loss": 0.3287, "step": 12820 }, { "epoch": 0.6359938489012352, "grad_norm": 7.708739280700684, "learning_rate": 2.9810197224655692e-06, "loss": 0.3435, "step": 12821 }, { "epoch": 0.6360434545364353, "grad_norm": 8.888609886169434, "learning_rate": 2.98029967858901e-06, "loss": 0.2955, "step": 12822 }, { "epoch": 0.6360930601716355, "grad_norm": 6.340817928314209, "learning_rate": 2.9795796847615375e-06, "loss": 0.3138, "step": 12823 }, { "epoch": 0.6361426658068356, "grad_norm": 10.579319953918457, "learning_rate": 2.9788597410009944e-06, "loss": 0.3608, "step": 12824 }, { "epoch": 0.6361922714420358, "grad_norm": 7.2156219482421875, "learning_rate": 2.9781398473252223e-06, "loss": 0.3917, "step": 12825 }, { "epoch": 0.636241877077236, "grad_norm": 6.7294464111328125, "learning_rate": 2.9774200037520563e-06, "loss": 0.2925, "step": 12826 }, { "epoch": 0.6362914827124362, "grad_norm": 4.7191925048828125, "learning_rate": 2.97670021029934e-06, "loss": 0.3016, "step": 12827 }, { "epoch": 0.6363410883476363, "grad_norm": 4.435173511505127, "learning_rate": 2.9759804669849084e-06, "loss": 0.3486, "step": 12828 }, { "epoch": 0.6363906939828364, "grad_norm": 10.538349151611328, "learning_rate": 2.9752607738265936e-06, "loss": 0.3313, "step": 12829 }, { "epoch": 0.6364402996180366, "grad_norm": 4.957266330718994, "learning_rate": 2.9745411308422358e-06, "loss": 0.2877, "step": 12830 }, { "epoch": 0.6364899052532368, "grad_norm": 5.60081672668457, "learning_rate": 2.9738215380496643e-06, "loss": 0.2927, "step": 12831 }, { "epoch": 0.636539510888437, "grad_norm": 18.006256103515625, "learning_rate": 2.9731019954667093e-06, "loss": 0.3757, "step": 12832 }, { "epoch": 0.636589116523637, "grad_norm": 5.42630672454834, "learning_rate": 2.9723825031112064e-06, "loss": 0.1976, "step": 12833 }, { "epoch": 0.6366387221588372, "grad_norm": 3.8762569427490234, "learning_rate": 2.97166306100098e-06, "loss": 0.2067, "step": 12834 }, { "epoch": 0.6366883277940374, "grad_norm": 11.598101615905762, "learning_rate": 2.9709436691538624e-06, "loss": 0.3083, "step": 12835 }, { "epoch": 0.6367379334292376, "grad_norm": 6.30031156539917, "learning_rate": 2.9702243275876785e-06, "loss": 0.3302, "step": 12836 }, { "epoch": 0.6367875390644377, "grad_norm": 4.336003303527832, "learning_rate": 2.9695050363202515e-06, "loss": 0.1734, "step": 12837 }, { "epoch": 0.6368371446996379, "grad_norm": 7.042449474334717, "learning_rate": 2.968785795369411e-06, "loss": 0.3311, "step": 12838 }, { "epoch": 0.636886750334838, "grad_norm": 7.4303059577941895, "learning_rate": 2.968066604752977e-06, "loss": 0.393, "step": 12839 }, { "epoch": 0.6369363559700382, "grad_norm": 6.435455322265625, "learning_rate": 2.9673474644887695e-06, "loss": 0.333, "step": 12840 }, { "epoch": 0.6369859616052383, "grad_norm": 4.204286575317383, "learning_rate": 2.9666283745946132e-06, "loss": 0.2419, "step": 12841 }, { "epoch": 0.6370355672404385, "grad_norm": 13.242663383483887, "learning_rate": 2.965909335088326e-06, "loss": 0.3792, "step": 12842 }, { "epoch": 0.6370851728756387, "grad_norm": 3.8528263568878174, "learning_rate": 2.9651903459877235e-06, "loss": 0.2137, "step": 12843 }, { "epoch": 0.6371347785108389, "grad_norm": 6.646613121032715, "learning_rate": 2.9644714073106263e-06, "loss": 0.2665, "step": 12844 }, { "epoch": 0.637184384146039, "grad_norm": 9.251696586608887, "learning_rate": 2.963752519074849e-06, "loss": 0.3138, "step": 12845 }, { "epoch": 0.6372339897812391, "grad_norm": 7.47880744934082, "learning_rate": 2.963033681298203e-06, "loss": 0.3494, "step": 12846 }, { "epoch": 0.6372835954164393, "grad_norm": 11.078496932983398, "learning_rate": 2.962314893998506e-06, "loss": 0.368, "step": 12847 }, { "epoch": 0.6373332010516395, "grad_norm": 8.21603012084961, "learning_rate": 2.961596157193567e-06, "loss": 0.227, "step": 12848 }, { "epoch": 0.6373828066868397, "grad_norm": 5.238711357116699, "learning_rate": 2.960877470901196e-06, "loss": 0.3005, "step": 12849 }, { "epoch": 0.6374324123220397, "grad_norm": 8.520681381225586, "learning_rate": 2.960158835139205e-06, "loss": 0.345, "step": 12850 }, { "epoch": 0.6374820179572399, "grad_norm": 4.332456588745117, "learning_rate": 2.9594402499253994e-06, "loss": 0.2395, "step": 12851 }, { "epoch": 0.6375316235924401, "grad_norm": 5.939816951751709, "learning_rate": 2.9587217152775895e-06, "loss": 0.2626, "step": 12852 }, { "epoch": 0.6375812292276403, "grad_norm": 6.835341930389404, "learning_rate": 2.9580032312135788e-06, "loss": 0.2905, "step": 12853 }, { "epoch": 0.6376308348628404, "grad_norm": 6.582479953765869, "learning_rate": 2.95728479775117e-06, "loss": 0.2087, "step": 12854 }, { "epoch": 0.6376804404980406, "grad_norm": 5.813101291656494, "learning_rate": 2.956566414908169e-06, "loss": 0.2274, "step": 12855 }, { "epoch": 0.6377300461332407, "grad_norm": 26.164531707763672, "learning_rate": 2.9558480827023774e-06, "loss": 0.3161, "step": 12856 }, { "epoch": 0.6377796517684409, "grad_norm": 8.515835762023926, "learning_rate": 2.955129801151593e-06, "loss": 0.3264, "step": 12857 }, { "epoch": 0.637829257403641, "grad_norm": 4.59199333190918, "learning_rate": 2.9544115702736186e-06, "loss": 0.2699, "step": 12858 }, { "epoch": 0.6378788630388412, "grad_norm": 9.70376205444336, "learning_rate": 2.953693390086251e-06, "loss": 0.3325, "step": 12859 }, { "epoch": 0.6379284686740414, "grad_norm": 7.180637359619141, "learning_rate": 2.952975260607285e-06, "loss": 0.2735, "step": 12860 }, { "epoch": 0.6379780743092416, "grad_norm": 5.612659931182861, "learning_rate": 2.95225718185452e-06, "loss": 0.262, "step": 12861 }, { "epoch": 0.6380276799444417, "grad_norm": 6.368954181671143, "learning_rate": 2.9515391538457477e-06, "loss": 0.3504, "step": 12862 }, { "epoch": 0.6380772855796418, "grad_norm": 5.991318225860596, "learning_rate": 2.9508211765987603e-06, "loss": 0.2858, "step": 12863 }, { "epoch": 0.638126891214842, "grad_norm": 6.3335981369018555, "learning_rate": 2.9501032501313533e-06, "loss": 0.243, "step": 12864 }, { "epoch": 0.6381764968500422, "grad_norm": 6.417099952697754, "learning_rate": 2.949385374461314e-06, "loss": 0.2363, "step": 12865 }, { "epoch": 0.6382261024852424, "grad_norm": 6.263367176055908, "learning_rate": 2.948667549606432e-06, "loss": 0.3095, "step": 12866 }, { "epoch": 0.6382757081204424, "grad_norm": 5.791743278503418, "learning_rate": 2.9479497755844976e-06, "loss": 0.2959, "step": 12867 }, { "epoch": 0.6383253137556426, "grad_norm": 4.784434795379639, "learning_rate": 2.947232052413294e-06, "loss": 0.3248, "step": 12868 }, { "epoch": 0.6383749193908428, "grad_norm": 5.215724945068359, "learning_rate": 2.9465143801106112e-06, "loss": 0.2706, "step": 12869 }, { "epoch": 0.638424525026043, "grad_norm": 13.460918426513672, "learning_rate": 2.945796758694229e-06, "loss": 0.3504, "step": 12870 }, { "epoch": 0.6384741306612431, "grad_norm": 4.684633731842041, "learning_rate": 2.945079188181933e-06, "loss": 0.2325, "step": 12871 }, { "epoch": 0.6385237362964433, "grad_norm": 9.61574935913086, "learning_rate": 2.944361668591505e-06, "loss": 0.391, "step": 12872 }, { "epoch": 0.6385733419316434, "grad_norm": 6.733206748962402, "learning_rate": 2.943644199940724e-06, "loss": 0.2451, "step": 12873 }, { "epoch": 0.6386229475668436, "grad_norm": 9.785527229309082, "learning_rate": 2.9429267822473694e-06, "loss": 0.3748, "step": 12874 }, { "epoch": 0.6386725532020437, "grad_norm": 5.94602632522583, "learning_rate": 2.9422094155292198e-06, "loss": 0.2783, "step": 12875 }, { "epoch": 0.6387221588372439, "grad_norm": 4.974300384521484, "learning_rate": 2.941492099804053e-06, "loss": 0.238, "step": 12876 }, { "epoch": 0.6387717644724441, "grad_norm": 6.248392105102539, "learning_rate": 2.9407748350896417e-06, "loss": 0.3069, "step": 12877 }, { "epoch": 0.6388213701076443, "grad_norm": 8.561201095581055, "learning_rate": 2.940057621403762e-06, "loss": 0.3466, "step": 12878 }, { "epoch": 0.6388709757428443, "grad_norm": 7.739823341369629, "learning_rate": 2.939340458764187e-06, "loss": 0.2146, "step": 12879 }, { "epoch": 0.6389205813780445, "grad_norm": 11.40695858001709, "learning_rate": 2.938623347188686e-06, "loss": 0.3089, "step": 12880 }, { "epoch": 0.6389701870132447, "grad_norm": 6.216095924377441, "learning_rate": 2.937906286695032e-06, "loss": 0.3636, "step": 12881 }, { "epoch": 0.6390197926484449, "grad_norm": 5.54612922668457, "learning_rate": 2.9371892773009936e-06, "loss": 0.2236, "step": 12882 }, { "epoch": 0.6390693982836451, "grad_norm": 4.73422384262085, "learning_rate": 2.936472319024336e-06, "loss": 0.1672, "step": 12883 }, { "epoch": 0.6391190039188451, "grad_norm": 8.17515754699707, "learning_rate": 2.935755411882829e-06, "loss": 0.2747, "step": 12884 }, { "epoch": 0.6391686095540453, "grad_norm": 4.806931972503662, "learning_rate": 2.935038555894237e-06, "loss": 0.2984, "step": 12885 }, { "epoch": 0.6392182151892455, "grad_norm": 8.480000495910645, "learning_rate": 2.9343217510763226e-06, "loss": 0.3592, "step": 12886 }, { "epoch": 0.6392678208244457, "grad_norm": 6.558422088623047, "learning_rate": 2.933604997446851e-06, "loss": 0.3081, "step": 12887 }, { "epoch": 0.6393174264596458, "grad_norm": 5.518346309661865, "learning_rate": 2.9328882950235805e-06, "loss": 0.3141, "step": 12888 }, { "epoch": 0.639367032094846, "grad_norm": 5.898379802703857, "learning_rate": 2.9321716438242747e-06, "loss": 0.2676, "step": 12889 }, { "epoch": 0.6394166377300461, "grad_norm": 6.422452926635742, "learning_rate": 2.931455043866691e-06, "loss": 0.3107, "step": 12890 }, { "epoch": 0.6394662433652463, "grad_norm": 8.11281681060791, "learning_rate": 2.930738495168586e-06, "loss": 0.2237, "step": 12891 }, { "epoch": 0.6395158490004464, "grad_norm": 7.988262176513672, "learning_rate": 2.930021997747718e-06, "loss": 0.3799, "step": 12892 }, { "epoch": 0.6395654546356466, "grad_norm": 15.76599407196045, "learning_rate": 2.9293055516218416e-06, "loss": 0.36, "step": 12893 }, { "epoch": 0.6396150602708468, "grad_norm": 10.26527214050293, "learning_rate": 2.9285891568087075e-06, "loss": 0.3987, "step": 12894 }, { "epoch": 0.639664665906047, "grad_norm": 6.090719223022461, "learning_rate": 2.9278728133260746e-06, "loss": 0.3096, "step": 12895 }, { "epoch": 0.639714271541247, "grad_norm": 5.979212760925293, "learning_rate": 2.9271565211916897e-06, "loss": 0.2738, "step": 12896 }, { "epoch": 0.6397638771764472, "grad_norm": 13.636995315551758, "learning_rate": 2.9264402804233022e-06, "loss": 0.3077, "step": 12897 }, { "epoch": 0.6398134828116474, "grad_norm": 8.385055541992188, "learning_rate": 2.925724091038664e-06, "loss": 0.3357, "step": 12898 }, { "epoch": 0.6398630884468476, "grad_norm": 9.55871295928955, "learning_rate": 2.925007953055522e-06, "loss": 0.4049, "step": 12899 }, { "epoch": 0.6399126940820478, "grad_norm": 9.83031940460205, "learning_rate": 2.924291866491619e-06, "loss": 0.4049, "step": 12900 }, { "epoch": 0.6399622997172478, "grad_norm": 6.383856773376465, "learning_rate": 2.9235758313647044e-06, "loss": 0.2931, "step": 12901 }, { "epoch": 0.640011905352448, "grad_norm": 6.815403938293457, "learning_rate": 2.9228598476925207e-06, "loss": 0.2886, "step": 12902 }, { "epoch": 0.6400615109876482, "grad_norm": 5.702167987823486, "learning_rate": 2.9221439154928064e-06, "loss": 0.2628, "step": 12903 }, { "epoch": 0.6401111166228484, "grad_norm": 9.546911239624023, "learning_rate": 2.921428034783309e-06, "loss": 0.3288, "step": 12904 }, { "epoch": 0.6401607222580485, "grad_norm": 9.41504955291748, "learning_rate": 2.9207122055817625e-06, "loss": 0.2476, "step": 12905 }, { "epoch": 0.6402103278932487, "grad_norm": 5.891968727111816, "learning_rate": 2.91999642790591e-06, "loss": 0.271, "step": 12906 }, { "epoch": 0.6402599335284488, "grad_norm": 13.501357078552246, "learning_rate": 2.919280701773487e-06, "loss": 0.2196, "step": 12907 }, { "epoch": 0.640309539163649, "grad_norm": 15.767390251159668, "learning_rate": 2.918565027202229e-06, "loss": 0.4526, "step": 12908 }, { "epoch": 0.6403591447988491, "grad_norm": 5.048727035522461, "learning_rate": 2.9178494042098705e-06, "loss": 0.217, "step": 12909 }, { "epoch": 0.6404087504340493, "grad_norm": 5.661229133605957, "learning_rate": 2.917133832814145e-06, "loss": 0.3233, "step": 12910 }, { "epoch": 0.6404583560692495, "grad_norm": 12.172372817993164, "learning_rate": 2.9164183130327865e-06, "loss": 0.3383, "step": 12911 }, { "epoch": 0.6405079617044497, "grad_norm": 6.879884243011475, "learning_rate": 2.9157028448835244e-06, "loss": 0.3013, "step": 12912 }, { "epoch": 0.6405575673396497, "grad_norm": 10.714016914367676, "learning_rate": 2.9149874283840907e-06, "loss": 0.2863, "step": 12913 }, { "epoch": 0.6406071729748499, "grad_norm": 11.876492500305176, "learning_rate": 2.9142720635522072e-06, "loss": 0.385, "step": 12914 }, { "epoch": 0.6406567786100501, "grad_norm": 4.795637607574463, "learning_rate": 2.91355675040561e-06, "loss": 0.21, "step": 12915 }, { "epoch": 0.6407063842452503, "grad_norm": 12.078939437866211, "learning_rate": 2.912841488962018e-06, "loss": 0.3292, "step": 12916 }, { "epoch": 0.6407559898804505, "grad_norm": 7.69764518737793, "learning_rate": 2.9121262792391582e-06, "loss": 0.334, "step": 12917 }, { "epoch": 0.6408055955156505, "grad_norm": 10.033510208129883, "learning_rate": 2.9114111212547534e-06, "loss": 0.2449, "step": 12918 }, { "epoch": 0.6408552011508507, "grad_norm": 10.283465385437012, "learning_rate": 2.9106960150265286e-06, "loss": 0.3017, "step": 12919 }, { "epoch": 0.6409048067860509, "grad_norm": 6.316714763641357, "learning_rate": 2.9099809605721962e-06, "loss": 0.2529, "step": 12920 }, { "epoch": 0.6409544124212511, "grad_norm": 4.607809066772461, "learning_rate": 2.9092659579094863e-06, "loss": 0.1765, "step": 12921 }, { "epoch": 0.6410040180564512, "grad_norm": 11.1845121383667, "learning_rate": 2.9085510070561073e-06, "loss": 0.2696, "step": 12922 }, { "epoch": 0.6410536236916514, "grad_norm": 5.4690260887146, "learning_rate": 2.907836108029784e-06, "loss": 0.2761, "step": 12923 }, { "epoch": 0.6411032293268515, "grad_norm": 5.477080345153809, "learning_rate": 2.907121260848227e-06, "loss": 0.2976, "step": 12924 }, { "epoch": 0.6411528349620517, "grad_norm": 6.77374792098999, "learning_rate": 2.906406465529151e-06, "loss": 0.3897, "step": 12925 }, { "epoch": 0.6412024405972518, "grad_norm": 5.179713249206543, "learning_rate": 2.9056917220902702e-06, "loss": 0.2207, "step": 12926 }, { "epoch": 0.641252046232452, "grad_norm": 4.345375061035156, "learning_rate": 2.9049770305492988e-06, "loss": 0.2784, "step": 12927 }, { "epoch": 0.6413016518676522, "grad_norm": 8.03972053527832, "learning_rate": 2.9042623909239385e-06, "loss": 0.2917, "step": 12928 }, { "epoch": 0.6413512575028524, "grad_norm": 7.576979160308838, "learning_rate": 2.9035478032319097e-06, "loss": 0.281, "step": 12929 }, { "epoch": 0.6414008631380524, "grad_norm": 6.473092079162598, "learning_rate": 2.902833267490912e-06, "loss": 0.2467, "step": 12930 }, { "epoch": 0.6414504687732526, "grad_norm": 5.965426445007324, "learning_rate": 2.9021187837186547e-06, "loss": 0.2436, "step": 12931 }, { "epoch": 0.6415000744084528, "grad_norm": 7.651880264282227, "learning_rate": 2.9014043519328426e-06, "loss": 0.2782, "step": 12932 }, { "epoch": 0.641549680043653, "grad_norm": 6.73362398147583, "learning_rate": 2.9006899721511827e-06, "loss": 0.3638, "step": 12933 }, { "epoch": 0.6415992856788532, "grad_norm": 13.431318283081055, "learning_rate": 2.8999756443913707e-06, "loss": 0.2974, "step": 12934 }, { "epoch": 0.6416488913140532, "grad_norm": 3.9812142848968506, "learning_rate": 2.8992613686711156e-06, "loss": 0.2565, "step": 12935 }, { "epoch": 0.6416984969492534, "grad_norm": 7.907314300537109, "learning_rate": 2.8985471450081126e-06, "loss": 0.3029, "step": 12936 }, { "epoch": 0.6417481025844536, "grad_norm": 11.734100341796875, "learning_rate": 2.8978329734200615e-06, "loss": 0.3132, "step": 12937 }, { "epoch": 0.6417977082196538, "grad_norm": 7.850022792816162, "learning_rate": 2.897118853924661e-06, "loss": 0.2625, "step": 12938 }, { "epoch": 0.6418473138548539, "grad_norm": 4.101490497589111, "learning_rate": 2.8964047865396054e-06, "loss": 0.257, "step": 12939 }, { "epoch": 0.6418969194900541, "grad_norm": 4.357964515686035, "learning_rate": 2.8956907712825915e-06, "loss": 0.2506, "step": 12940 }, { "epoch": 0.6419465251252542, "grad_norm": 9.325519561767578, "learning_rate": 2.894976808171314e-06, "loss": 0.3708, "step": 12941 }, { "epoch": 0.6419961307604544, "grad_norm": 17.147701263427734, "learning_rate": 2.89426289722346e-06, "loss": 0.5383, "step": 12942 }, { "epoch": 0.6420457363956545, "grad_norm": 5.772009372711182, "learning_rate": 2.8935490384567266e-06, "loss": 0.2443, "step": 12943 }, { "epoch": 0.6420953420308547, "grad_norm": 8.377269744873047, "learning_rate": 2.8928352318888e-06, "loss": 0.2009, "step": 12944 }, { "epoch": 0.6421449476660549, "grad_norm": 9.649176597595215, "learning_rate": 2.8921214775373692e-06, "loss": 0.3229, "step": 12945 }, { "epoch": 0.6421945533012551, "grad_norm": 5.505155563354492, "learning_rate": 2.891407775420122e-06, "loss": 0.2131, "step": 12946 }, { "epoch": 0.6422441589364551, "grad_norm": 4.33604097366333, "learning_rate": 2.890694125554745e-06, "loss": 0.2911, "step": 12947 }, { "epoch": 0.6422937645716553, "grad_norm": 6.885207653045654, "learning_rate": 2.889980527958919e-06, "loss": 0.2538, "step": 12948 }, { "epoch": 0.6423433702068555, "grad_norm": 23.743711471557617, "learning_rate": 2.8892669826503327e-06, "loss": 0.2829, "step": 12949 }, { "epoch": 0.6423929758420557, "grad_norm": 6.235145092010498, "learning_rate": 2.888553489646664e-06, "loss": 0.3474, "step": 12950 }, { "epoch": 0.6424425814772559, "grad_norm": 11.32606029510498, "learning_rate": 2.887840048965595e-06, "loss": 0.3352, "step": 12951 }, { "epoch": 0.6424921871124559, "grad_norm": 7.314900875091553, "learning_rate": 2.887126660624805e-06, "loss": 0.3327, "step": 12952 }, { "epoch": 0.6425417927476561, "grad_norm": 6.046982765197754, "learning_rate": 2.8864133246419723e-06, "loss": 0.2939, "step": 12953 }, { "epoch": 0.6425913983828563, "grad_norm": 7.101852893829346, "learning_rate": 2.885700041034773e-06, "loss": 0.3509, "step": 12954 }, { "epoch": 0.6426410040180565, "grad_norm": 7.06243896484375, "learning_rate": 2.884986809820883e-06, "loss": 0.2801, "step": 12955 }, { "epoch": 0.6426906096532566, "grad_norm": 12.655716896057129, "learning_rate": 2.8842736310179785e-06, "loss": 0.3552, "step": 12956 }, { "epoch": 0.6427402152884568, "grad_norm": 9.893261909484863, "learning_rate": 2.8835605046437295e-06, "loss": 0.2571, "step": 12957 }, { "epoch": 0.6427898209236569, "grad_norm": 7.622385501861572, "learning_rate": 2.8828474307158077e-06, "loss": 0.2765, "step": 12958 }, { "epoch": 0.6428394265588571, "grad_norm": 6.445067882537842, "learning_rate": 2.8821344092518844e-06, "loss": 0.2747, "step": 12959 }, { "epoch": 0.6428890321940572, "grad_norm": 5.744488716125488, "learning_rate": 2.8814214402696283e-06, "loss": 0.2473, "step": 12960 }, { "epoch": 0.6429386378292574, "grad_norm": 4.920361042022705, "learning_rate": 2.8807085237867076e-06, "loss": 0.2588, "step": 12961 }, { "epoch": 0.6429882434644576, "grad_norm": 5.500915050506592, "learning_rate": 2.8799956598207885e-06, "loss": 0.3268, "step": 12962 }, { "epoch": 0.6430378490996577, "grad_norm": 7.83236026763916, "learning_rate": 2.8792828483895354e-06, "loss": 0.4182, "step": 12963 }, { "epoch": 0.6430874547348578, "grad_norm": 11.201738357543945, "learning_rate": 2.878570089510615e-06, "loss": 0.3093, "step": 12964 }, { "epoch": 0.643137060370058, "grad_norm": 13.984370231628418, "learning_rate": 2.8778573832016833e-06, "loss": 0.4225, "step": 12965 }, { "epoch": 0.6431866660052582, "grad_norm": 16.109546661376953, "learning_rate": 2.87714472948041e-06, "loss": 0.4441, "step": 12966 }, { "epoch": 0.6432362716404584, "grad_norm": 8.63132095336914, "learning_rate": 2.876432128364448e-06, "loss": 0.3609, "step": 12967 }, { "epoch": 0.6432858772756586, "grad_norm": 5.890930652618408, "learning_rate": 2.8757195798714587e-06, "loss": 0.2518, "step": 12968 }, { "epoch": 0.6433354829108586, "grad_norm": 6.77294397354126, "learning_rate": 2.8750070840190988e-06, "loss": 0.2824, "step": 12969 }, { "epoch": 0.6433850885460588, "grad_norm": 4.681069850921631, "learning_rate": 2.874294640825027e-06, "loss": 0.2838, "step": 12970 }, { "epoch": 0.643434694181259, "grad_norm": 4.4395318031311035, "learning_rate": 2.873582250306891e-06, "loss": 0.2486, "step": 12971 }, { "epoch": 0.6434842998164592, "grad_norm": 4.226199150085449, "learning_rate": 2.8728699124823533e-06, "loss": 0.2759, "step": 12972 }, { "epoch": 0.6435339054516593, "grad_norm": 6.4857683181762695, "learning_rate": 2.872157627369059e-06, "loss": 0.2915, "step": 12973 }, { "epoch": 0.6435835110868595, "grad_norm": 12.240327835083008, "learning_rate": 2.8714453949846617e-06, "loss": 0.3075, "step": 12974 }, { "epoch": 0.6436331167220596, "grad_norm": 7.598708629608154, "learning_rate": 2.87073321534681e-06, "loss": 0.389, "step": 12975 }, { "epoch": 0.6436827223572598, "grad_norm": 7.191987037658691, "learning_rate": 2.870021088473152e-06, "loss": 0.2991, "step": 12976 }, { "epoch": 0.6437323279924599, "grad_norm": 5.933907508850098, "learning_rate": 2.8693090143813356e-06, "loss": 0.3215, "step": 12977 }, { "epoch": 0.6437819336276601, "grad_norm": 5.287552356719971, "learning_rate": 2.868596993089008e-06, "loss": 0.2207, "step": 12978 }, { "epoch": 0.6438315392628603, "grad_norm": 9.471674919128418, "learning_rate": 2.867885024613806e-06, "loss": 0.2971, "step": 12979 }, { "epoch": 0.6438811448980604, "grad_norm": 4.0315775871276855, "learning_rate": 2.8671731089733824e-06, "loss": 0.2051, "step": 12980 }, { "epoch": 0.6439307505332605, "grad_norm": 10.090470314025879, "learning_rate": 2.8664612461853713e-06, "loss": 0.3543, "step": 12981 }, { "epoch": 0.6439803561684607, "grad_norm": 13.492828369140625, "learning_rate": 2.8657494362674156e-06, "loss": 0.3781, "step": 12982 }, { "epoch": 0.6440299618036609, "grad_norm": 6.662865161895752, "learning_rate": 2.865037679237155e-06, "loss": 0.2304, "step": 12983 }, { "epoch": 0.6440795674388611, "grad_norm": 4.169682502746582, "learning_rate": 2.8643259751122278e-06, "loss": 0.2568, "step": 12984 }, { "epoch": 0.6441291730740613, "grad_norm": 9.598871231079102, "learning_rate": 2.863614323910265e-06, "loss": 0.2454, "step": 12985 }, { "epoch": 0.6441787787092613, "grad_norm": 6.181993007659912, "learning_rate": 2.86290272564891e-06, "loss": 0.3564, "step": 12986 }, { "epoch": 0.6442283843444615, "grad_norm": 6.079269886016846, "learning_rate": 2.8621911803457903e-06, "loss": 0.3026, "step": 12987 }, { "epoch": 0.6442779899796617, "grad_norm": 6.087027549743652, "learning_rate": 2.8614796880185403e-06, "loss": 0.2751, "step": 12988 }, { "epoch": 0.6443275956148619, "grad_norm": 8.725532531738281, "learning_rate": 2.86076824868479e-06, "loss": 0.239, "step": 12989 }, { "epoch": 0.644377201250062, "grad_norm": 6.415273189544678, "learning_rate": 2.8600568623621726e-06, "loss": 0.1925, "step": 12990 }, { "epoch": 0.6444268068852622, "grad_norm": 9.348224639892578, "learning_rate": 2.859345529068311e-06, "loss": 0.3388, "step": 12991 }, { "epoch": 0.6444764125204623, "grad_norm": 5.936551094055176, "learning_rate": 2.8586342488208386e-06, "loss": 0.2181, "step": 12992 }, { "epoch": 0.6445260181556625, "grad_norm": 6.3678789138793945, "learning_rate": 2.8579230216373743e-06, "loss": 0.2512, "step": 12993 }, { "epoch": 0.6445756237908626, "grad_norm": 4.608026504516602, "learning_rate": 2.857211847535551e-06, "loss": 0.2006, "step": 12994 }, { "epoch": 0.6446252294260628, "grad_norm": 4.519850254058838, "learning_rate": 2.856500726532985e-06, "loss": 0.2615, "step": 12995 }, { "epoch": 0.644674835061263, "grad_norm": 9.236504554748535, "learning_rate": 2.8557896586473007e-06, "loss": 0.2486, "step": 12996 }, { "epoch": 0.6447244406964631, "grad_norm": 7.350290775299072, "learning_rate": 2.8550786438961186e-06, "loss": 0.2789, "step": 12997 }, { "epoch": 0.6447740463316632, "grad_norm": 10.153239250183105, "learning_rate": 2.85436768229706e-06, "loss": 0.3334, "step": 12998 }, { "epoch": 0.6448236519668634, "grad_norm": 5.491581439971924, "learning_rate": 2.8536567738677366e-06, "loss": 0.321, "step": 12999 }, { "epoch": 0.6448732576020636, "grad_norm": 6.291962146759033, "learning_rate": 2.8529459186257737e-06, "loss": 0.2498, "step": 13000 }, { "epoch": 0.6449228632372638, "grad_norm": 5.783545017242432, "learning_rate": 2.8522351165887797e-06, "loss": 0.2738, "step": 13001 }, { "epoch": 0.644972468872464, "grad_norm": 4.300759792327881, "learning_rate": 2.8515243677743708e-06, "loss": 0.2189, "step": 13002 }, { "epoch": 0.645022074507664, "grad_norm": 7.745749473571777, "learning_rate": 2.85081367220016e-06, "loss": 0.2477, "step": 13003 }, { "epoch": 0.6450716801428642, "grad_norm": 8.34022045135498, "learning_rate": 2.850103029883759e-06, "loss": 0.2827, "step": 13004 }, { "epoch": 0.6451212857780644, "grad_norm": 6.43533992767334, "learning_rate": 2.8493924408427778e-06, "loss": 0.3214, "step": 13005 }, { "epoch": 0.6451708914132646, "grad_norm": 9.071188926696777, "learning_rate": 2.8486819050948256e-06, "loss": 0.3075, "step": 13006 }, { "epoch": 0.6452204970484647, "grad_norm": 6.572591304779053, "learning_rate": 2.8479714226575076e-06, "loss": 0.3137, "step": 13007 }, { "epoch": 0.6452701026836649, "grad_norm": 15.78787612915039, "learning_rate": 2.8472609935484315e-06, "loss": 0.3362, "step": 13008 }, { "epoch": 0.645319708318865, "grad_norm": 6.811252593994141, "learning_rate": 2.846550617785201e-06, "loss": 0.3436, "step": 13009 }, { "epoch": 0.6453693139540652, "grad_norm": 6.89621114730835, "learning_rate": 2.8458402953854213e-06, "loss": 0.3342, "step": 13010 }, { "epoch": 0.6454189195892653, "grad_norm": 10.598980903625488, "learning_rate": 2.8451300263666924e-06, "loss": 0.3459, "step": 13011 }, { "epoch": 0.6454685252244655, "grad_norm": 5.7982378005981445, "learning_rate": 2.844419810746617e-06, "loss": 0.2135, "step": 13012 }, { "epoch": 0.6455181308596657, "grad_norm": 5.11639928817749, "learning_rate": 2.8437096485427938e-06, "loss": 0.3184, "step": 13013 }, { "epoch": 0.6455677364948658, "grad_norm": 9.899833679199219, "learning_rate": 2.8429995397728204e-06, "loss": 0.3073, "step": 13014 }, { "epoch": 0.6456173421300659, "grad_norm": 11.598801612854004, "learning_rate": 2.8422894844542962e-06, "loss": 0.4167, "step": 13015 }, { "epoch": 0.6456669477652661, "grad_norm": 6.765955924987793, "learning_rate": 2.841579482604813e-06, "loss": 0.338, "step": 13016 }, { "epoch": 0.6457165534004663, "grad_norm": 8.770589828491211, "learning_rate": 2.840869534241967e-06, "loss": 0.3659, "step": 13017 }, { "epoch": 0.6457661590356665, "grad_norm": 9.491605758666992, "learning_rate": 2.84015963938335e-06, "loss": 0.3751, "step": 13018 }, { "epoch": 0.6458157646708667, "grad_norm": 6.744256496429443, "learning_rate": 2.8394497980465537e-06, "loss": 0.2917, "step": 13019 }, { "epoch": 0.6458653703060667, "grad_norm": 14.606733322143555, "learning_rate": 2.8387400102491693e-06, "loss": 0.3748, "step": 13020 }, { "epoch": 0.6459149759412669, "grad_norm": 7.077120780944824, "learning_rate": 2.838030276008786e-06, "loss": 0.2219, "step": 13021 }, { "epoch": 0.6459645815764671, "grad_norm": 9.834235191345215, "learning_rate": 2.8373205953429884e-06, "loss": 0.3677, "step": 13022 }, { "epoch": 0.6460141872116673, "grad_norm": 5.405792713165283, "learning_rate": 2.836610968269367e-06, "loss": 0.2428, "step": 13023 }, { "epoch": 0.6460637928468674, "grad_norm": 9.089099884033203, "learning_rate": 2.8359013948055038e-06, "loss": 0.3132, "step": 13024 }, { "epoch": 0.6461133984820676, "grad_norm": 4.535810470581055, "learning_rate": 2.8351918749689823e-06, "loss": 0.2372, "step": 13025 }, { "epoch": 0.6461630041172677, "grad_norm": 20.31692886352539, "learning_rate": 2.8344824087773847e-06, "loss": 0.3354, "step": 13026 }, { "epoch": 0.6462126097524679, "grad_norm": 10.244525909423828, "learning_rate": 2.833772996248295e-06, "loss": 0.2626, "step": 13027 }, { "epoch": 0.646262215387668, "grad_norm": 7.201874256134033, "learning_rate": 2.833063637399287e-06, "loss": 0.357, "step": 13028 }, { "epoch": 0.6463118210228682, "grad_norm": 11.230557441711426, "learning_rate": 2.8323543322479457e-06, "loss": 0.413, "step": 13029 }, { "epoch": 0.6463614266580684, "grad_norm": 7.940932273864746, "learning_rate": 2.831645080811841e-06, "loss": 0.2038, "step": 13030 }, { "epoch": 0.6464110322932685, "grad_norm": 7.472786903381348, "learning_rate": 2.8309358831085554e-06, "loss": 0.3144, "step": 13031 }, { "epoch": 0.6464606379284686, "grad_norm": 9.76806354522705, "learning_rate": 2.830226739155658e-06, "loss": 0.3149, "step": 13032 }, { "epoch": 0.6465102435636688, "grad_norm": 7.548492908477783, "learning_rate": 2.829517648970724e-06, "loss": 0.2636, "step": 13033 }, { "epoch": 0.646559849198869, "grad_norm": 7.3232741355896, "learning_rate": 2.8288086125713244e-06, "loss": 0.1856, "step": 13034 }, { "epoch": 0.6466094548340692, "grad_norm": 11.061275482177734, "learning_rate": 2.8280996299750314e-06, "loss": 0.3801, "step": 13035 }, { "epoch": 0.6466590604692694, "grad_norm": 9.497615814208984, "learning_rate": 2.827390701199407e-06, "loss": 0.2715, "step": 13036 }, { "epoch": 0.6467086661044694, "grad_norm": 5.430698871612549, "learning_rate": 2.8266818262620295e-06, "loss": 0.2029, "step": 13037 }, { "epoch": 0.6467582717396696, "grad_norm": 6.511326313018799, "learning_rate": 2.825973005180457e-06, "loss": 0.2499, "step": 13038 }, { "epoch": 0.6468078773748698, "grad_norm": 8.215846061706543, "learning_rate": 2.8252642379722562e-06, "loss": 0.2746, "step": 13039 }, { "epoch": 0.64685748301007, "grad_norm": 6.544079303741455, "learning_rate": 2.8245555246549916e-06, "loss": 0.3049, "step": 13040 }, { "epoch": 0.6469070886452701, "grad_norm": 7.744607925415039, "learning_rate": 2.823846865246227e-06, "loss": 0.3891, "step": 13041 }, { "epoch": 0.6469566942804703, "grad_norm": 13.241630554199219, "learning_rate": 2.8231382597635177e-06, "loss": 0.3926, "step": 13042 }, { "epoch": 0.6470062999156704, "grad_norm": 7.232774257659912, "learning_rate": 2.822429708224431e-06, "loss": 0.3051, "step": 13043 }, { "epoch": 0.6470559055508706, "grad_norm": 8.557819366455078, "learning_rate": 2.821721210646519e-06, "loss": 0.2409, "step": 13044 }, { "epoch": 0.6471055111860707, "grad_norm": 4.70639181137085, "learning_rate": 2.8210127670473407e-06, "loss": 0.2289, "step": 13045 }, { "epoch": 0.6471551168212709, "grad_norm": 5.356478691101074, "learning_rate": 2.820304377444452e-06, "loss": 0.3007, "step": 13046 }, { "epoch": 0.6472047224564711, "grad_norm": 8.129549980163574, "learning_rate": 2.8195960418554067e-06, "loss": 0.3956, "step": 13047 }, { "epoch": 0.6472543280916712, "grad_norm": 4.96527099609375, "learning_rate": 2.8188877602977582e-06, "loss": 0.2494, "step": 13048 }, { "epoch": 0.6473039337268713, "grad_norm": 4.180896282196045, "learning_rate": 2.818179532789059e-06, "loss": 0.2798, "step": 13049 }, { "epoch": 0.6473535393620715, "grad_norm": 5.910085201263428, "learning_rate": 2.8174713593468537e-06, "loss": 0.2286, "step": 13050 }, { "epoch": 0.6474031449972717, "grad_norm": 6.167849540710449, "learning_rate": 2.8167632399887e-06, "loss": 0.3095, "step": 13051 }, { "epoch": 0.6474527506324719, "grad_norm": 6.000280857086182, "learning_rate": 2.8160551747321387e-06, "loss": 0.3073, "step": 13052 }, { "epoch": 0.647502356267672, "grad_norm": 7.3043928146362305, "learning_rate": 2.8153471635947176e-06, "loss": 0.3008, "step": 13053 }, { "epoch": 0.6475519619028721, "grad_norm": 6.285303592681885, "learning_rate": 2.8146392065939825e-06, "loss": 0.3571, "step": 13054 }, { "epoch": 0.6476015675380723, "grad_norm": 18.8027286529541, "learning_rate": 2.8139313037474782e-06, "loss": 0.4049, "step": 13055 }, { "epoch": 0.6476511731732725, "grad_norm": 5.317712306976318, "learning_rate": 2.813223455072741e-06, "loss": 0.3138, "step": 13056 }, { "epoch": 0.6477007788084727, "grad_norm": 13.27608585357666, "learning_rate": 2.8125156605873206e-06, "loss": 0.3149, "step": 13057 }, { "epoch": 0.6477503844436728, "grad_norm": 6.366243362426758, "learning_rate": 2.811807920308749e-06, "loss": 0.2504, "step": 13058 }, { "epoch": 0.647799990078873, "grad_norm": 5.216480731964111, "learning_rate": 2.8111002342545667e-06, "loss": 0.2886, "step": 13059 }, { "epoch": 0.6478495957140731, "grad_norm": 12.654271125793457, "learning_rate": 2.810392602442311e-06, "loss": 0.4049, "step": 13060 }, { "epoch": 0.6478992013492733, "grad_norm": 4.822188854217529, "learning_rate": 2.809685024889517e-06, "loss": 0.1897, "step": 13061 }, { "epoch": 0.6479488069844734, "grad_norm": 7.064945220947266, "learning_rate": 2.8089775016137187e-06, "loss": 0.2432, "step": 13062 }, { "epoch": 0.6479984126196736, "grad_norm": 4.837804794311523, "learning_rate": 2.8082700326324507e-06, "loss": 0.2112, "step": 13063 }, { "epoch": 0.6480480182548738, "grad_norm": 7.19066858291626, "learning_rate": 2.8075626179632398e-06, "loss": 0.3227, "step": 13064 }, { "epoch": 0.6480976238900739, "grad_norm": 6.83731746673584, "learning_rate": 2.8068552576236225e-06, "loss": 0.2545, "step": 13065 }, { "epoch": 0.648147229525274, "grad_norm": 3.8349273204803467, "learning_rate": 2.806147951631122e-06, "loss": 0.2665, "step": 13066 }, { "epoch": 0.6481968351604742, "grad_norm": 4.870485782623291, "learning_rate": 2.8054407000032667e-06, "loss": 0.2578, "step": 13067 }, { "epoch": 0.6482464407956744, "grad_norm": 15.19212818145752, "learning_rate": 2.804733502757585e-06, "loss": 0.3324, "step": 13068 }, { "epoch": 0.6482960464308746, "grad_norm": 7.9451398849487305, "learning_rate": 2.804026359911599e-06, "loss": 0.3173, "step": 13069 }, { "epoch": 0.6483456520660748, "grad_norm": 7.989175796508789, "learning_rate": 2.803319271482834e-06, "loss": 0.2586, "step": 13070 }, { "epoch": 0.6483952577012748, "grad_norm": 8.626327514648438, "learning_rate": 2.8026122374888107e-06, "loss": 0.3576, "step": 13071 }, { "epoch": 0.648444863336475, "grad_norm": 5.291051864624023, "learning_rate": 2.8019052579470524e-06, "loss": 0.2493, "step": 13072 }, { "epoch": 0.6484944689716752, "grad_norm": 9.402264595031738, "learning_rate": 2.8011983328750742e-06, "loss": 0.3459, "step": 13073 }, { "epoch": 0.6485440746068754, "grad_norm": 6.20951509475708, "learning_rate": 2.8004914622903958e-06, "loss": 0.1847, "step": 13074 }, { "epoch": 0.6485936802420755, "grad_norm": 9.02790641784668, "learning_rate": 2.7997846462105337e-06, "loss": 0.3066, "step": 13075 }, { "epoch": 0.6486432858772757, "grad_norm": 18.758466720581055, "learning_rate": 2.799077884653003e-06, "loss": 0.3961, "step": 13076 }, { "epoch": 0.6486928915124758, "grad_norm": 7.4068098068237305, "learning_rate": 2.7983711776353185e-06, "loss": 0.4086, "step": 13077 }, { "epoch": 0.648742497147676, "grad_norm": 7.524087429046631, "learning_rate": 2.797664525174994e-06, "loss": 0.2487, "step": 13078 }, { "epoch": 0.6487921027828761, "grad_norm": 8.004130363464355, "learning_rate": 2.796957927289535e-06, "loss": 0.2913, "step": 13079 }, { "epoch": 0.6488417084180763, "grad_norm": 4.7198920249938965, "learning_rate": 2.796251383996459e-06, "loss": 0.3007, "step": 13080 }, { "epoch": 0.6488913140532765, "grad_norm": 12.919148445129395, "learning_rate": 2.795544895313268e-06, "loss": 0.3525, "step": 13081 }, { "epoch": 0.6489409196884766, "grad_norm": 7.23077392578125, "learning_rate": 2.7948384612574717e-06, "loss": 0.2774, "step": 13082 }, { "epoch": 0.6489905253236767, "grad_norm": 6.1408371925354, "learning_rate": 2.7941320818465767e-06, "loss": 0.1679, "step": 13083 }, { "epoch": 0.6490401309588769, "grad_norm": 10.866911888122559, "learning_rate": 2.793425757098086e-06, "loss": 0.2275, "step": 13084 }, { "epoch": 0.6490897365940771, "grad_norm": 10.453914642333984, "learning_rate": 2.792719487029503e-06, "loss": 0.2871, "step": 13085 }, { "epoch": 0.6491393422292773, "grad_norm": 13.310494422912598, "learning_rate": 2.7920132716583327e-06, "loss": 0.3586, "step": 13086 }, { "epoch": 0.6491889478644775, "grad_norm": 4.407365322113037, "learning_rate": 2.791307111002067e-06, "loss": 0.2043, "step": 13087 }, { "epoch": 0.6492385534996775, "grad_norm": 8.137064933776855, "learning_rate": 2.7906010050782143e-06, "loss": 0.321, "step": 13088 }, { "epoch": 0.6492881591348777, "grad_norm": 6.813154697418213, "learning_rate": 2.7898949539042665e-06, "loss": 0.2618, "step": 13089 }, { "epoch": 0.6493377647700779, "grad_norm": 6.258947849273682, "learning_rate": 2.7891889574977216e-06, "loss": 0.2971, "step": 13090 }, { "epoch": 0.649387370405278, "grad_norm": 3.9333155155181885, "learning_rate": 2.7884830158760747e-06, "loss": 0.18, "step": 13091 }, { "epoch": 0.6494369760404782, "grad_norm": 6.726078987121582, "learning_rate": 2.78777712905682e-06, "loss": 0.3258, "step": 13092 }, { "epoch": 0.6494865816756784, "grad_norm": 4.438406467437744, "learning_rate": 2.7870712970574453e-06, "loss": 0.1872, "step": 13093 }, { "epoch": 0.6495361873108785, "grad_norm": 10.1992769241333, "learning_rate": 2.7863655198954486e-06, "loss": 0.2808, "step": 13094 }, { "epoch": 0.6495857929460787, "grad_norm": 19.37770652770996, "learning_rate": 2.7856597975883144e-06, "loss": 0.3509, "step": 13095 }, { "epoch": 0.6496353985812788, "grad_norm": 5.101607322692871, "learning_rate": 2.7849541301535314e-06, "loss": 0.2939, "step": 13096 }, { "epoch": 0.649685004216479, "grad_norm": 8.861663818359375, "learning_rate": 2.7842485176085878e-06, "loss": 0.3248, "step": 13097 }, { "epoch": 0.6497346098516792, "grad_norm": 5.374127388000488, "learning_rate": 2.783542959970969e-06, "loss": 0.2341, "step": 13098 }, { "epoch": 0.6497842154868793, "grad_norm": 4.992153167724609, "learning_rate": 2.782837457258155e-06, "loss": 0.3251, "step": 13099 }, { "epoch": 0.6498338211220794, "grad_norm": 5.8084211349487305, "learning_rate": 2.782132009487636e-06, "loss": 0.2379, "step": 13100 }, { "epoch": 0.6498834267572796, "grad_norm": 14.031806945800781, "learning_rate": 2.7814266166768845e-06, "loss": 0.2773, "step": 13101 }, { "epoch": 0.6499330323924798, "grad_norm": 14.271657943725586, "learning_rate": 2.7807212788433892e-06, "loss": 0.5005, "step": 13102 }, { "epoch": 0.64998263802768, "grad_norm": 10.47962760925293, "learning_rate": 2.7800159960046223e-06, "loss": 0.3584, "step": 13103 }, { "epoch": 0.6500322436628801, "grad_norm": 14.572608947753906, "learning_rate": 2.779310768178064e-06, "loss": 0.3162, "step": 13104 }, { "epoch": 0.6500818492980802, "grad_norm": 7.533677101135254, "learning_rate": 2.7786055953811886e-06, "loss": 0.2974, "step": 13105 }, { "epoch": 0.6501314549332804, "grad_norm": 10.051874160766602, "learning_rate": 2.7779004776314733e-06, "loss": 0.34, "step": 13106 }, { "epoch": 0.6501810605684806, "grad_norm": 5.791616916656494, "learning_rate": 2.777195414946386e-06, "loss": 0.3479, "step": 13107 }, { "epoch": 0.6502306662036808, "grad_norm": 6.547043323516846, "learning_rate": 2.776490407343405e-06, "loss": 0.2544, "step": 13108 }, { "epoch": 0.6502802718388809, "grad_norm": 8.760059356689453, "learning_rate": 2.7757854548399964e-06, "loss": 0.3263, "step": 13109 }, { "epoch": 0.6503298774740811, "grad_norm": 7.9293107986450195, "learning_rate": 2.7750805574536298e-06, "loss": 0.3012, "step": 13110 }, { "epoch": 0.6503794831092812, "grad_norm": 5.174124240875244, "learning_rate": 2.7743757152017736e-06, "loss": 0.2453, "step": 13111 }, { "epoch": 0.6504290887444814, "grad_norm": 9.667176246643066, "learning_rate": 2.7736709281018957e-06, "loss": 0.2815, "step": 13112 }, { "epoch": 0.6504786943796815, "grad_norm": 11.620325088500977, "learning_rate": 2.7729661961714553e-06, "loss": 0.3301, "step": 13113 }, { "epoch": 0.6505283000148817, "grad_norm": 4.871365070343018, "learning_rate": 2.7722615194279244e-06, "loss": 0.2658, "step": 13114 }, { "epoch": 0.6505779056500819, "grad_norm": 16.840696334838867, "learning_rate": 2.7715568978887587e-06, "loss": 0.4926, "step": 13115 }, { "epoch": 0.650627511285282, "grad_norm": 8.331253051757812, "learning_rate": 2.7708523315714207e-06, "loss": 0.2751, "step": 13116 }, { "epoch": 0.6506771169204821, "grad_norm": 7.0203704833984375, "learning_rate": 2.7701478204933706e-06, "loss": 0.2459, "step": 13117 }, { "epoch": 0.6507267225556823, "grad_norm": 9.320216178894043, "learning_rate": 2.769443364672066e-06, "loss": 0.3237, "step": 13118 }, { "epoch": 0.6507763281908825, "grad_norm": 7.537779808044434, "learning_rate": 2.768738964124964e-06, "loss": 0.2697, "step": 13119 }, { "epoch": 0.6508259338260827, "grad_norm": 17.347299575805664, "learning_rate": 2.7680346188695196e-06, "loss": 0.3832, "step": 13120 }, { "epoch": 0.6508755394612828, "grad_norm": 7.260290622711182, "learning_rate": 2.7673303289231868e-06, "loss": 0.28, "step": 13121 }, { "epoch": 0.6509251450964829, "grad_norm": 19.06055450439453, "learning_rate": 2.7666260943034207e-06, "loss": 0.3108, "step": 13122 }, { "epoch": 0.6509747507316831, "grad_norm": 6.889955043792725, "learning_rate": 2.7659219150276684e-06, "loss": 0.2755, "step": 13123 }, { "epoch": 0.6510243563668833, "grad_norm": 6.1738972663879395, "learning_rate": 2.765217791113381e-06, "loss": 0.3005, "step": 13124 }, { "epoch": 0.6510739620020835, "grad_norm": 7.802089691162109, "learning_rate": 2.7645137225780085e-06, "loss": 0.2617, "step": 13125 }, { "epoch": 0.6511235676372836, "grad_norm": 11.511022567749023, "learning_rate": 2.763809709438996e-06, "loss": 0.3693, "step": 13126 }, { "epoch": 0.6511731732724838, "grad_norm": 13.708209991455078, "learning_rate": 2.7631057517137918e-06, "loss": 0.2681, "step": 13127 }, { "epoch": 0.6512227789076839, "grad_norm": 13.530247688293457, "learning_rate": 2.762401849419838e-06, "loss": 0.3162, "step": 13128 }, { "epoch": 0.651272384542884, "grad_norm": 3.9702086448669434, "learning_rate": 2.761698002574581e-06, "loss": 0.2516, "step": 13129 }, { "epoch": 0.6513219901780842, "grad_norm": 8.21789836883545, "learning_rate": 2.7609942111954556e-06, "loss": 0.3124, "step": 13130 }, { "epoch": 0.6513715958132844, "grad_norm": 7.750948905944824, "learning_rate": 2.7602904752999106e-06, "loss": 0.3751, "step": 13131 }, { "epoch": 0.6514212014484846, "grad_norm": 6.13088846206665, "learning_rate": 2.759586794905379e-06, "loss": 0.2635, "step": 13132 }, { "epoch": 0.6514708070836847, "grad_norm": 6.206066131591797, "learning_rate": 2.7588831700292994e-06, "loss": 0.3167, "step": 13133 }, { "epoch": 0.6515204127188848, "grad_norm": 10.064522743225098, "learning_rate": 2.7581796006891094e-06, "loss": 0.3158, "step": 13134 }, { "epoch": 0.651570018354085, "grad_norm": 5.640672206878662, "learning_rate": 2.7574760869022427e-06, "loss": 0.2571, "step": 13135 }, { "epoch": 0.6516196239892852, "grad_norm": 5.509579181671143, "learning_rate": 2.756772628686133e-06, "loss": 0.2123, "step": 13136 }, { "epoch": 0.6516692296244854, "grad_norm": 5.186765670776367, "learning_rate": 2.7560692260582146e-06, "loss": 0.3066, "step": 13137 }, { "epoch": 0.6517188352596855, "grad_norm": 6.174731254577637, "learning_rate": 2.755365879035911e-06, "loss": 0.3392, "step": 13138 }, { "epoch": 0.6517684408948856, "grad_norm": 7.779106140136719, "learning_rate": 2.7546625876366604e-06, "loss": 0.3421, "step": 13139 }, { "epoch": 0.6518180465300858, "grad_norm": 10.69587230682373, "learning_rate": 2.753959351877885e-06, "loss": 0.2281, "step": 13140 }, { "epoch": 0.651867652165286, "grad_norm": 9.640599250793457, "learning_rate": 2.753256171777013e-06, "loss": 0.3627, "step": 13141 }, { "epoch": 0.6519172578004861, "grad_norm": 4.867688179016113, "learning_rate": 2.7525530473514693e-06, "loss": 0.2857, "step": 13142 }, { "epoch": 0.6519668634356863, "grad_norm": 5.623103618621826, "learning_rate": 2.751849978618679e-06, "loss": 0.2883, "step": 13143 }, { "epoch": 0.6520164690708865, "grad_norm": 6.766514301300049, "learning_rate": 2.75114696559606e-06, "loss": 0.1827, "step": 13144 }, { "epoch": 0.6520660747060866, "grad_norm": 15.220476150512695, "learning_rate": 2.75044400830104e-06, "loss": 0.3863, "step": 13145 }, { "epoch": 0.6521156803412868, "grad_norm": 9.82820987701416, "learning_rate": 2.749741106751034e-06, "loss": 0.3761, "step": 13146 }, { "epoch": 0.6521652859764869, "grad_norm": 6.686192035675049, "learning_rate": 2.7490382609634603e-06, "loss": 0.3568, "step": 13147 }, { "epoch": 0.6522148916116871, "grad_norm": 10.050235748291016, "learning_rate": 2.7483354709557362e-06, "loss": 0.436, "step": 13148 }, { "epoch": 0.6522644972468873, "grad_norm": 11.362510681152344, "learning_rate": 2.7476327367452805e-06, "loss": 0.3206, "step": 13149 }, { "epoch": 0.6523141028820874, "grad_norm": 7.714383125305176, "learning_rate": 2.7469300583495005e-06, "loss": 0.3022, "step": 13150 }, { "epoch": 0.6523637085172875, "grad_norm": 8.434412002563477, "learning_rate": 2.7462274357858163e-06, "loss": 0.339, "step": 13151 }, { "epoch": 0.6524133141524877, "grad_norm": 5.80583381652832, "learning_rate": 2.7455248690716345e-06, "loss": 0.3007, "step": 13152 }, { "epoch": 0.6524629197876879, "grad_norm": 5.888000011444092, "learning_rate": 2.744822358224366e-06, "loss": 0.3195, "step": 13153 }, { "epoch": 0.6525125254228881, "grad_norm": 8.525125503540039, "learning_rate": 2.7441199032614198e-06, "loss": 0.304, "step": 13154 }, { "epoch": 0.6525621310580882, "grad_norm": 4.495177745819092, "learning_rate": 2.743417504200202e-06, "loss": 0.257, "step": 13155 }, { "epoch": 0.6526117366932883, "grad_norm": 3.408655881881714, "learning_rate": 2.74271516105812e-06, "loss": 0.2333, "step": 13156 }, { "epoch": 0.6526613423284885, "grad_norm": 9.222383499145508, "learning_rate": 2.7420128738525787e-06, "loss": 0.2604, "step": 13157 }, { "epoch": 0.6527109479636887, "grad_norm": 11.465506553649902, "learning_rate": 2.741310642600976e-06, "loss": 0.2499, "step": 13158 }, { "epoch": 0.6527605535988888, "grad_norm": 6.796521186828613, "learning_rate": 2.740608467320722e-06, "loss": 0.3488, "step": 13159 }, { "epoch": 0.652810159234089, "grad_norm": 11.605650901794434, "learning_rate": 2.7399063480292095e-06, "loss": 0.3504, "step": 13160 }, { "epoch": 0.6528597648692892, "grad_norm": 10.653334617614746, "learning_rate": 2.7392042847438405e-06, "loss": 0.3349, "step": 13161 }, { "epoch": 0.6529093705044893, "grad_norm": 6.225457191467285, "learning_rate": 2.7385022774820115e-06, "loss": 0.2466, "step": 13162 }, { "epoch": 0.6529589761396895, "grad_norm": 6.129083156585693, "learning_rate": 2.7378003262611208e-06, "loss": 0.2694, "step": 13163 }, { "epoch": 0.6530085817748896, "grad_norm": 6.06719970703125, "learning_rate": 2.737098431098558e-06, "loss": 0.3117, "step": 13164 }, { "epoch": 0.6530581874100898, "grad_norm": 4.036093235015869, "learning_rate": 2.7363965920117235e-06, "loss": 0.1996, "step": 13165 }, { "epoch": 0.65310779304529, "grad_norm": 8.165752410888672, "learning_rate": 2.7356948090180037e-06, "loss": 0.3288, "step": 13166 }, { "epoch": 0.65315739868049, "grad_norm": 9.258622169494629, "learning_rate": 2.7349930821347902e-06, "loss": 0.3213, "step": 13167 }, { "epoch": 0.6532070043156902, "grad_norm": 6.272636413574219, "learning_rate": 2.7342914113794726e-06, "loss": 0.1977, "step": 13168 }, { "epoch": 0.6532566099508904, "grad_norm": 4.786701202392578, "learning_rate": 2.733589796769439e-06, "loss": 0.2841, "step": 13169 }, { "epoch": 0.6533062155860906, "grad_norm": 6.935171127319336, "learning_rate": 2.7328882383220754e-06, "loss": 0.3553, "step": 13170 }, { "epoch": 0.6533558212212908, "grad_norm": 9.429559707641602, "learning_rate": 2.732186736054769e-06, "loss": 0.3428, "step": 13171 }, { "epoch": 0.653405426856491, "grad_norm": 6.445057392120361, "learning_rate": 2.7314852899848976e-06, "loss": 0.3851, "step": 13172 }, { "epoch": 0.653455032491691, "grad_norm": 9.478135108947754, "learning_rate": 2.7307839001298504e-06, "loss": 0.4389, "step": 13173 }, { "epoch": 0.6535046381268912, "grad_norm": 6.418191432952881, "learning_rate": 2.7300825665070026e-06, "loss": 0.2698, "step": 13174 }, { "epoch": 0.6535542437620914, "grad_norm": 7.339033603668213, "learning_rate": 2.7293812891337358e-06, "loss": 0.2433, "step": 13175 }, { "epoch": 0.6536038493972915, "grad_norm": 4.9111762046813965, "learning_rate": 2.7286800680274273e-06, "loss": 0.1373, "step": 13176 }, { "epoch": 0.6536534550324917, "grad_norm": 5.923558712005615, "learning_rate": 2.727978903205454e-06, "loss": 0.2975, "step": 13177 }, { "epoch": 0.6537030606676919, "grad_norm": 11.40497875213623, "learning_rate": 2.7272777946851915e-06, "loss": 0.384, "step": 13178 }, { "epoch": 0.653752666302892, "grad_norm": 8.107470512390137, "learning_rate": 2.726576742484014e-06, "loss": 0.3439, "step": 13179 }, { "epoch": 0.6538022719380921, "grad_norm": 7.031197547912598, "learning_rate": 2.725875746619294e-06, "loss": 0.2689, "step": 13180 }, { "epoch": 0.6538518775732923, "grad_norm": 5.837532997131348, "learning_rate": 2.7251748071084007e-06, "loss": 0.2604, "step": 13181 }, { "epoch": 0.6539014832084925, "grad_norm": 12.967511177062988, "learning_rate": 2.724473923968704e-06, "loss": 0.1959, "step": 13182 }, { "epoch": 0.6539510888436927, "grad_norm": 6.9413676261901855, "learning_rate": 2.723773097217573e-06, "loss": 0.2494, "step": 13183 }, { "epoch": 0.6540006944788928, "grad_norm": 5.052828788757324, "learning_rate": 2.7230723268723745e-06, "loss": 0.2719, "step": 13184 }, { "epoch": 0.6540503001140929, "grad_norm": 5.64795446395874, "learning_rate": 2.7223716129504734e-06, "loss": 0.2919, "step": 13185 }, { "epoch": 0.6540999057492931, "grad_norm": 9.706986427307129, "learning_rate": 2.7216709554692367e-06, "loss": 0.3894, "step": 13186 }, { "epoch": 0.6541495113844933, "grad_norm": 4.101590633392334, "learning_rate": 2.7209703544460197e-06, "loss": 0.3152, "step": 13187 }, { "epoch": 0.6541991170196935, "grad_norm": 4.27155876159668, "learning_rate": 2.7202698098981917e-06, "loss": 0.2149, "step": 13188 }, { "epoch": 0.6542487226548936, "grad_norm": 6.085710525512695, "learning_rate": 2.7195693218431063e-06, "loss": 0.3329, "step": 13189 }, { "epoch": 0.6542983282900937, "grad_norm": 8.848878860473633, "learning_rate": 2.7188688902981276e-06, "loss": 0.3237, "step": 13190 }, { "epoch": 0.6543479339252939, "grad_norm": 9.789176940917969, "learning_rate": 2.718168515280608e-06, "loss": 0.2464, "step": 13191 }, { "epoch": 0.6543975395604941, "grad_norm": 4.604921817779541, "learning_rate": 2.717468196807905e-06, "loss": 0.3175, "step": 13192 }, { "epoch": 0.6544471451956942, "grad_norm": 12.737570762634277, "learning_rate": 2.716767934897373e-06, "loss": 0.3497, "step": 13193 }, { "epoch": 0.6544967508308944, "grad_norm": 5.840379238128662, "learning_rate": 2.7160677295663655e-06, "loss": 0.2102, "step": 13194 }, { "epoch": 0.6545463564660946, "grad_norm": 8.679987907409668, "learning_rate": 2.7153675808322294e-06, "loss": 0.2224, "step": 13195 }, { "epoch": 0.6545959621012947, "grad_norm": 4.261106014251709, "learning_rate": 2.7146674887123215e-06, "loss": 0.197, "step": 13196 }, { "epoch": 0.6546455677364948, "grad_norm": 8.349296569824219, "learning_rate": 2.7139674532239858e-06, "loss": 0.2773, "step": 13197 }, { "epoch": 0.654695173371695, "grad_norm": 15.795682907104492, "learning_rate": 2.7132674743845698e-06, "loss": 0.3556, "step": 13198 }, { "epoch": 0.6547447790068952, "grad_norm": 7.639626502990723, "learning_rate": 2.712567552211421e-06, "loss": 0.3735, "step": 13199 }, { "epoch": 0.6547943846420954, "grad_norm": 6.8134260177612305, "learning_rate": 2.711867686721885e-06, "loss": 0.3834, "step": 13200 }, { "epoch": 0.6548439902772955, "grad_norm": 7.570075988769531, "learning_rate": 2.711167877933298e-06, "loss": 0.2216, "step": 13201 }, { "epoch": 0.6548935959124956, "grad_norm": 5.591924667358398, "learning_rate": 2.7104681258630106e-06, "loss": 0.17, "step": 13202 }, { "epoch": 0.6549432015476958, "grad_norm": 5.912632465362549, "learning_rate": 2.7097684305283566e-06, "loss": 0.2561, "step": 13203 }, { "epoch": 0.654992807182896, "grad_norm": 9.68108081817627, "learning_rate": 2.7090687919466775e-06, "loss": 0.3031, "step": 13204 }, { "epoch": 0.6550424128180962, "grad_norm": 6.849318981170654, "learning_rate": 2.70836921013531e-06, "loss": 0.2182, "step": 13205 }, { "epoch": 0.6550920184532963, "grad_norm": 8.337479591369629, "learning_rate": 2.7076696851115915e-06, "loss": 0.302, "step": 13206 }, { "epoch": 0.6551416240884964, "grad_norm": 7.162037372589111, "learning_rate": 2.7069702168928515e-06, "loss": 0.3176, "step": 13207 }, { "epoch": 0.6551912297236966, "grad_norm": 9.07577896118164, "learning_rate": 2.7062708054964305e-06, "loss": 0.2769, "step": 13208 }, { "epoch": 0.6552408353588968, "grad_norm": 5.046435356140137, "learning_rate": 2.7055714509396526e-06, "loss": 0.2567, "step": 13209 }, { "epoch": 0.655290440994097, "grad_norm": 5.990900993347168, "learning_rate": 2.704872153239856e-06, "loss": 0.2881, "step": 13210 }, { "epoch": 0.6553400466292971, "grad_norm": 5.261070728302002, "learning_rate": 2.7041729124143633e-06, "loss": 0.2711, "step": 13211 }, { "epoch": 0.6553896522644973, "grad_norm": 5.416801929473877, "learning_rate": 2.7034737284805053e-06, "loss": 0.1789, "step": 13212 }, { "epoch": 0.6554392578996974, "grad_norm": 12.477487564086914, "learning_rate": 2.7027746014556066e-06, "loss": 0.3686, "step": 13213 }, { "epoch": 0.6554888635348975, "grad_norm": 7.099362373352051, "learning_rate": 2.702075531356994e-06, "loss": 0.2198, "step": 13214 }, { "epoch": 0.6555384691700977, "grad_norm": 14.958950996398926, "learning_rate": 2.701376518201986e-06, "loss": 0.3014, "step": 13215 }, { "epoch": 0.6555880748052979, "grad_norm": 5.937378883361816, "learning_rate": 2.7006775620079112e-06, "loss": 0.3102, "step": 13216 }, { "epoch": 0.6556376804404981, "grad_norm": 5.056881904602051, "learning_rate": 2.6999786627920845e-06, "loss": 0.2468, "step": 13217 }, { "epoch": 0.6556872860756982, "grad_norm": 13.685261726379395, "learning_rate": 2.6992798205718275e-06, "loss": 0.3824, "step": 13218 }, { "epoch": 0.6557368917108983, "grad_norm": 7.690862655639648, "learning_rate": 2.6985810353644565e-06, "loss": 0.2596, "step": 13219 }, { "epoch": 0.6557864973460985, "grad_norm": 4.7462286949157715, "learning_rate": 2.6978823071872908e-06, "loss": 0.2591, "step": 13220 }, { "epoch": 0.6558361029812987, "grad_norm": 6.897166728973389, "learning_rate": 2.6971836360576386e-06, "loss": 0.2948, "step": 13221 }, { "epoch": 0.6558857086164989, "grad_norm": 32.77227020263672, "learning_rate": 2.696485021992822e-06, "loss": 0.6191, "step": 13222 }, { "epoch": 0.655935314251699, "grad_norm": 27.111780166625977, "learning_rate": 2.695786465010146e-06, "loss": 0.4227, "step": 13223 }, { "epoch": 0.6559849198868991, "grad_norm": 11.367124557495117, "learning_rate": 2.6950879651269246e-06, "loss": 0.2321, "step": 13224 }, { "epoch": 0.6560345255220993, "grad_norm": 3.504236936569214, "learning_rate": 2.6943895223604653e-06, "loss": 0.1911, "step": 13225 }, { "epoch": 0.6560841311572995, "grad_norm": 11.450220108032227, "learning_rate": 2.693691136728077e-06, "loss": 0.3429, "step": 13226 }, { "epoch": 0.6561337367924996, "grad_norm": 9.83563232421875, "learning_rate": 2.6929928082470656e-06, "loss": 0.3131, "step": 13227 }, { "epoch": 0.6561833424276998, "grad_norm": 7.003175258636475, "learning_rate": 2.6922945369347357e-06, "loss": 0.302, "step": 13228 }, { "epoch": 0.6562329480628999, "grad_norm": 9.019728660583496, "learning_rate": 2.6915963228083915e-06, "loss": 0.2143, "step": 13229 }, { "epoch": 0.6562825536981001, "grad_norm": 11.886300086975098, "learning_rate": 2.690898165885336e-06, "loss": 0.3408, "step": 13230 }, { "epoch": 0.6563321593333002, "grad_norm": 10.495363235473633, "learning_rate": 2.690200066182867e-06, "loss": 0.3201, "step": 13231 }, { "epoch": 0.6563817649685004, "grad_norm": 7.0301408767700195, "learning_rate": 2.6895020237182844e-06, "loss": 0.3222, "step": 13232 }, { "epoch": 0.6564313706037006, "grad_norm": 5.758048057556152, "learning_rate": 2.6888040385088877e-06, "loss": 0.2676, "step": 13233 }, { "epoch": 0.6564809762389008, "grad_norm": 8.489357948303223, "learning_rate": 2.6881061105719718e-06, "loss": 0.3451, "step": 13234 }, { "epoch": 0.6565305818741008, "grad_norm": 6.95176362991333, "learning_rate": 2.687408239924833e-06, "loss": 0.3103, "step": 13235 }, { "epoch": 0.656580187509301, "grad_norm": 4.741576671600342, "learning_rate": 2.686710426584764e-06, "loss": 0.2508, "step": 13236 }, { "epoch": 0.6566297931445012, "grad_norm": 8.409140586853027, "learning_rate": 2.686012670569058e-06, "loss": 0.3955, "step": 13237 }, { "epoch": 0.6566793987797014, "grad_norm": 6.009397029876709, "learning_rate": 2.685314971895002e-06, "loss": 0.2748, "step": 13238 }, { "epoch": 0.6567290044149016, "grad_norm": 6.510936260223389, "learning_rate": 2.684617330579891e-06, "loss": 0.2061, "step": 13239 }, { "epoch": 0.6567786100501017, "grad_norm": 6.723236083984375, "learning_rate": 2.6839197466410084e-06, "loss": 0.2259, "step": 13240 }, { "epoch": 0.6568282156853018, "grad_norm": 3.8948628902435303, "learning_rate": 2.6832222200956424e-06, "loss": 0.1987, "step": 13241 }, { "epoch": 0.656877821320502, "grad_norm": 20.164392471313477, "learning_rate": 2.6825247509610774e-06, "loss": 0.4398, "step": 13242 }, { "epoch": 0.6569274269557022, "grad_norm": 9.685561180114746, "learning_rate": 2.681827339254598e-06, "loss": 0.3604, "step": 13243 }, { "epoch": 0.6569770325909023, "grad_norm": 8.71842098236084, "learning_rate": 2.6811299849934845e-06, "loss": 0.348, "step": 13244 }, { "epoch": 0.6570266382261025, "grad_norm": 12.481903076171875, "learning_rate": 2.680432688195022e-06, "loss": 0.2819, "step": 13245 }, { "epoch": 0.6570762438613026, "grad_norm": 6.842386245727539, "learning_rate": 2.679735448876483e-06, "loss": 0.2578, "step": 13246 }, { "epoch": 0.6571258494965028, "grad_norm": 6.0630202293396, "learning_rate": 2.6790382670551517e-06, "loss": 0.2645, "step": 13247 }, { "epoch": 0.657175455131703, "grad_norm": 12.475591659545898, "learning_rate": 2.6783411427483013e-06, "loss": 0.2665, "step": 13248 }, { "epoch": 0.6572250607669031, "grad_norm": 13.175823211669922, "learning_rate": 2.677644075973207e-06, "loss": 0.3767, "step": 13249 }, { "epoch": 0.6572746664021033, "grad_norm": 11.351655006408691, "learning_rate": 2.676947066747143e-06, "loss": 0.2325, "step": 13250 }, { "epoch": 0.6573242720373035, "grad_norm": 6.577943325042725, "learning_rate": 2.6762501150873843e-06, "loss": 0.2798, "step": 13251 }, { "epoch": 0.6573738776725035, "grad_norm": 5.715504169464111, "learning_rate": 2.6755532210111947e-06, "loss": 0.172, "step": 13252 }, { "epoch": 0.6574234833077037, "grad_norm": 7.931102275848389, "learning_rate": 2.674856384535851e-06, "loss": 0.4375, "step": 13253 }, { "epoch": 0.6574730889429039, "grad_norm": 3.922391891479492, "learning_rate": 2.674159605678616e-06, "loss": 0.251, "step": 13254 }, { "epoch": 0.6575226945781041, "grad_norm": 6.633731365203857, "learning_rate": 2.6734628844567582e-06, "loss": 0.2634, "step": 13255 }, { "epoch": 0.6575723002133043, "grad_norm": 8.280743598937988, "learning_rate": 2.6727662208875426e-06, "loss": 0.2935, "step": 13256 }, { "epoch": 0.6576219058485044, "grad_norm": 8.173355102539062, "learning_rate": 2.6720696149882354e-06, "loss": 0.3665, "step": 13257 }, { "epoch": 0.6576715114837045, "grad_norm": 6.459513187408447, "learning_rate": 2.6713730667760913e-06, "loss": 0.3542, "step": 13258 }, { "epoch": 0.6577211171189047, "grad_norm": 11.600616455078125, "learning_rate": 2.67067657626838e-06, "loss": 0.3165, "step": 13259 }, { "epoch": 0.6577707227541049, "grad_norm": 6.0395073890686035, "learning_rate": 2.669980143482353e-06, "loss": 0.1994, "step": 13260 }, { "epoch": 0.657820328389305, "grad_norm": 6.834393501281738, "learning_rate": 2.6692837684352762e-06, "loss": 0.3276, "step": 13261 }, { "epoch": 0.6578699340245052, "grad_norm": 5.395860195159912, "learning_rate": 2.6685874511443997e-06, "loss": 0.2551, "step": 13262 }, { "epoch": 0.6579195396597053, "grad_norm": 24.245136260986328, "learning_rate": 2.6678911916269812e-06, "loss": 0.3012, "step": 13263 }, { "epoch": 0.6579691452949055, "grad_norm": 12.864550590515137, "learning_rate": 2.6671949899002736e-06, "loss": 0.3865, "step": 13264 }, { "epoch": 0.6580187509301056, "grad_norm": 8.627511024475098, "learning_rate": 2.666498845981531e-06, "loss": 0.3598, "step": 13265 }, { "epoch": 0.6580683565653058, "grad_norm": 18.836849212646484, "learning_rate": 2.6658027598879988e-06, "loss": 0.3404, "step": 13266 }, { "epoch": 0.658117962200506, "grad_norm": 9.944985389709473, "learning_rate": 2.6651067316369337e-06, "loss": 0.4095, "step": 13267 }, { "epoch": 0.6581675678357062, "grad_norm": 11.95029354095459, "learning_rate": 2.6644107612455792e-06, "loss": 0.334, "step": 13268 }, { "epoch": 0.6582171734709062, "grad_norm": 12.400373458862305, "learning_rate": 2.6637148487311814e-06, "loss": 0.3099, "step": 13269 }, { "epoch": 0.6582667791061064, "grad_norm": 5.660275936126709, "learning_rate": 2.6630189941109874e-06, "loss": 0.3508, "step": 13270 }, { "epoch": 0.6583163847413066, "grad_norm": 8.00371265411377, "learning_rate": 2.6623231974022412e-06, "loss": 0.2908, "step": 13271 }, { "epoch": 0.6583659903765068, "grad_norm": 12.847701072692871, "learning_rate": 2.6616274586221795e-06, "loss": 0.4554, "step": 13272 }, { "epoch": 0.658415596011707, "grad_norm": 7.837474346160889, "learning_rate": 2.6609317777880517e-06, "loss": 0.3214, "step": 13273 }, { "epoch": 0.6584652016469071, "grad_norm": 7.083164215087891, "learning_rate": 2.66023615491709e-06, "loss": 0.2977, "step": 13274 }, { "epoch": 0.6585148072821072, "grad_norm": 21.17890167236328, "learning_rate": 2.6595405900265348e-06, "loss": 0.3556, "step": 13275 }, { "epoch": 0.6585644129173074, "grad_norm": 9.556836128234863, "learning_rate": 2.658845083133623e-06, "loss": 0.3343, "step": 13276 }, { "epoch": 0.6586140185525076, "grad_norm": 6.172406196594238, "learning_rate": 2.6581496342555887e-06, "loss": 0.297, "step": 13277 }, { "epoch": 0.6586636241877077, "grad_norm": 13.43008041381836, "learning_rate": 2.657454243409665e-06, "loss": 0.3667, "step": 13278 }, { "epoch": 0.6587132298229079, "grad_norm": 5.925766944885254, "learning_rate": 2.6567589106130883e-06, "loss": 0.3435, "step": 13279 }, { "epoch": 0.658762835458108, "grad_norm": 11.642925262451172, "learning_rate": 2.6560636358830803e-06, "loss": 0.3551, "step": 13280 }, { "epoch": 0.6588124410933082, "grad_norm": 8.30454158782959, "learning_rate": 2.655368419236881e-06, "loss": 0.3607, "step": 13281 }, { "epoch": 0.6588620467285083, "grad_norm": 8.355473518371582, "learning_rate": 2.65467326069171e-06, "loss": 0.269, "step": 13282 }, { "epoch": 0.6589116523637085, "grad_norm": 4.410279273986816, "learning_rate": 2.653978160264797e-06, "loss": 0.2369, "step": 13283 }, { "epoch": 0.6589612579989087, "grad_norm": 7.502536773681641, "learning_rate": 2.6532831179733664e-06, "loss": 0.3089, "step": 13284 }, { "epoch": 0.6590108636341089, "grad_norm": 5.037219524383545, "learning_rate": 2.6525881338346416e-06, "loss": 0.2902, "step": 13285 }, { "epoch": 0.659060469269309, "grad_norm": 7.635717868804932, "learning_rate": 2.6518932078658443e-06, "loss": 0.2693, "step": 13286 }, { "epoch": 0.6591100749045091, "grad_norm": 15.920886993408203, "learning_rate": 2.6511983400841955e-06, "loss": 0.3236, "step": 13287 }, { "epoch": 0.6591596805397093, "grad_norm": 11.106342315673828, "learning_rate": 2.650503530506917e-06, "loss": 0.2677, "step": 13288 }, { "epoch": 0.6592092861749095, "grad_norm": 7.461771011352539, "learning_rate": 2.649808779151221e-06, "loss": 0.3447, "step": 13289 }, { "epoch": 0.6592588918101097, "grad_norm": 17.16832160949707, "learning_rate": 2.649114086034327e-06, "loss": 0.465, "step": 13290 }, { "epoch": 0.6593084974453098, "grad_norm": 5.615340232849121, "learning_rate": 2.6484194511734498e-06, "loss": 0.1805, "step": 13291 }, { "epoch": 0.6593581030805099, "grad_norm": 6.719357490539551, "learning_rate": 2.6477248745858015e-06, "loss": 0.2089, "step": 13292 }, { "epoch": 0.6594077087157101, "grad_norm": 11.308769226074219, "learning_rate": 2.6470303562885954e-06, "loss": 0.3168, "step": 13293 }, { "epoch": 0.6594573143509103, "grad_norm": 6.357223033905029, "learning_rate": 2.6463358962990426e-06, "loss": 0.3358, "step": 13294 }, { "epoch": 0.6595069199861104, "grad_norm": 4.6408796310424805, "learning_rate": 2.6456414946343477e-06, "loss": 0.3171, "step": 13295 }, { "epoch": 0.6595565256213106, "grad_norm": 6.251112461090088, "learning_rate": 2.644947151311726e-06, "loss": 0.3313, "step": 13296 }, { "epoch": 0.6596061312565107, "grad_norm": 5.810761451721191, "learning_rate": 2.6442528663483768e-06, "loss": 0.3201, "step": 13297 }, { "epoch": 0.6596557368917109, "grad_norm": 6.260602951049805, "learning_rate": 2.6435586397615063e-06, "loss": 0.2907, "step": 13298 }, { "epoch": 0.659705342526911, "grad_norm": 6.0533013343811035, "learning_rate": 2.6428644715683194e-06, "loss": 0.3607, "step": 13299 }, { "epoch": 0.6597549481621112, "grad_norm": 6.045809745788574, "learning_rate": 2.642170361786017e-06, "loss": 0.208, "step": 13300 }, { "epoch": 0.6598045537973114, "grad_norm": 8.749139785766602, "learning_rate": 2.6414763104318003e-06, "loss": 0.323, "step": 13301 }, { "epoch": 0.6598541594325116, "grad_norm": 5.4021100997924805, "learning_rate": 2.6407823175228687e-06, "loss": 0.2543, "step": 13302 }, { "epoch": 0.6599037650677116, "grad_norm": 7.64933443069458, "learning_rate": 2.6400883830764145e-06, "loss": 0.2802, "step": 13303 }, { "epoch": 0.6599533707029118, "grad_norm": 10.316797256469727, "learning_rate": 2.6393945071096422e-06, "loss": 0.4142, "step": 13304 }, { "epoch": 0.660002976338112, "grad_norm": 5.586462020874023, "learning_rate": 2.6387006896397395e-06, "loss": 0.3016, "step": 13305 }, { "epoch": 0.6600525819733122, "grad_norm": 8.80262279510498, "learning_rate": 2.638006930683902e-06, "loss": 0.3448, "step": 13306 }, { "epoch": 0.6601021876085124, "grad_norm": 6.0102081298828125, "learning_rate": 2.637313230259321e-06, "loss": 0.2119, "step": 13307 }, { "epoch": 0.6601517932437125, "grad_norm": 10.354825973510742, "learning_rate": 2.636619588383189e-06, "loss": 0.2734, "step": 13308 }, { "epoch": 0.6602013988789126, "grad_norm": 5.400297164916992, "learning_rate": 2.635926005072689e-06, "loss": 0.2503, "step": 13309 }, { "epoch": 0.6602510045141128, "grad_norm": 6.04689884185791, "learning_rate": 2.6352324803450146e-06, "loss": 0.2648, "step": 13310 }, { "epoch": 0.660300610149313, "grad_norm": 6.882610321044922, "learning_rate": 2.634539014217349e-06, "loss": 0.2961, "step": 13311 }, { "epoch": 0.6603502157845131, "grad_norm": 10.280855178833008, "learning_rate": 2.633845606706875e-06, "loss": 0.2797, "step": 13312 }, { "epoch": 0.6603998214197133, "grad_norm": 6.065847873687744, "learning_rate": 2.633152257830778e-06, "loss": 0.2899, "step": 13313 }, { "epoch": 0.6604494270549134, "grad_norm": 8.694743156433105, "learning_rate": 2.6324589676062385e-06, "loss": 0.2235, "step": 13314 }, { "epoch": 0.6604990326901136, "grad_norm": 7.55926513671875, "learning_rate": 2.6317657360504367e-06, "loss": 0.2727, "step": 13315 }, { "epoch": 0.6605486383253137, "grad_norm": 13.006030082702637, "learning_rate": 2.6310725631805533e-06, "loss": 0.3552, "step": 13316 }, { "epoch": 0.6605982439605139, "grad_norm": 8.378911018371582, "learning_rate": 2.63037944901376e-06, "loss": 0.3978, "step": 13317 }, { "epoch": 0.6606478495957141, "grad_norm": 19.778440475463867, "learning_rate": 2.629686393567239e-06, "loss": 0.5079, "step": 13318 }, { "epoch": 0.6606974552309143, "grad_norm": 7.4514288902282715, "learning_rate": 2.6289933968581595e-06, "loss": 0.3349, "step": 13319 }, { "epoch": 0.6607470608661143, "grad_norm": 6.401858806610107, "learning_rate": 2.6283004589036966e-06, "loss": 0.3508, "step": 13320 }, { "epoch": 0.6607966665013145, "grad_norm": 13.830193519592285, "learning_rate": 2.627607579721021e-06, "loss": 0.4287, "step": 13321 }, { "epoch": 0.6608462721365147, "grad_norm": 4.826198577880859, "learning_rate": 2.626914759327304e-06, "loss": 0.263, "step": 13322 }, { "epoch": 0.6608958777717149, "grad_norm": 9.583441734313965, "learning_rate": 2.6262219977397096e-06, "loss": 0.3295, "step": 13323 }, { "epoch": 0.6609454834069151, "grad_norm": 4.381011962890625, "learning_rate": 2.625529294975412e-06, "loss": 0.265, "step": 13324 }, { "epoch": 0.6609950890421152, "grad_norm": 6.368668556213379, "learning_rate": 2.62483665105157e-06, "loss": 0.2849, "step": 13325 }, { "epoch": 0.6610446946773153, "grad_norm": 6.521590709686279, "learning_rate": 2.62414406598535e-06, "loss": 0.3159, "step": 13326 }, { "epoch": 0.6610943003125155, "grad_norm": 9.822810173034668, "learning_rate": 2.623451539793915e-06, "loss": 0.2713, "step": 13327 }, { "epoch": 0.6611439059477157, "grad_norm": 8.157971382141113, "learning_rate": 2.622759072494428e-06, "loss": 0.2731, "step": 13328 }, { "epoch": 0.6611935115829158, "grad_norm": 10.982401847839355, "learning_rate": 2.622066664104043e-06, "loss": 0.3674, "step": 13329 }, { "epoch": 0.661243117218116, "grad_norm": 5.137200832366943, "learning_rate": 2.621374314639925e-06, "loss": 0.2087, "step": 13330 }, { "epoch": 0.6612927228533161, "grad_norm": 7.5478973388671875, "learning_rate": 2.6206820241192256e-06, "loss": 0.2867, "step": 13331 }, { "epoch": 0.6613423284885163, "grad_norm": 9.501402854919434, "learning_rate": 2.6199897925591016e-06, "loss": 0.2925, "step": 13332 }, { "epoch": 0.6613919341237164, "grad_norm": 3.738182544708252, "learning_rate": 2.6192976199767078e-06, "loss": 0.253, "step": 13333 }, { "epoch": 0.6614415397589166, "grad_norm": 4.998517990112305, "learning_rate": 2.6186055063891956e-06, "loss": 0.2743, "step": 13334 }, { "epoch": 0.6614911453941168, "grad_norm": 6.159597873687744, "learning_rate": 2.6179134518137163e-06, "loss": 0.2626, "step": 13335 }, { "epoch": 0.661540751029317, "grad_norm": 19.323522567749023, "learning_rate": 2.6172214562674193e-06, "loss": 0.3456, "step": 13336 }, { "epoch": 0.661590356664517, "grad_norm": 8.912368774414062, "learning_rate": 2.6165295197674515e-06, "loss": 0.4221, "step": 13337 }, { "epoch": 0.6616399622997172, "grad_norm": 3.836664915084839, "learning_rate": 2.6158376423309633e-06, "loss": 0.3181, "step": 13338 }, { "epoch": 0.6616895679349174, "grad_norm": 7.884790420532227, "learning_rate": 2.615145823975095e-06, "loss": 0.3238, "step": 13339 }, { "epoch": 0.6617391735701176, "grad_norm": 5.35994815826416, "learning_rate": 2.6144540647169913e-06, "loss": 0.3379, "step": 13340 }, { "epoch": 0.6617887792053178, "grad_norm": 6.417898178100586, "learning_rate": 2.6137623645737955e-06, "loss": 0.3292, "step": 13341 }, { "epoch": 0.6618383848405179, "grad_norm": 6.753115653991699, "learning_rate": 2.6130707235626474e-06, "loss": 0.272, "step": 13342 }, { "epoch": 0.661887990475718, "grad_norm": 6.224565029144287, "learning_rate": 2.612379141700686e-06, "loss": 0.3596, "step": 13343 }, { "epoch": 0.6619375961109182, "grad_norm": 8.688526153564453, "learning_rate": 2.6116876190050504e-06, "loss": 0.342, "step": 13344 }, { "epoch": 0.6619872017461184, "grad_norm": 7.4606099128723145, "learning_rate": 2.6109961554928777e-06, "loss": 0.2213, "step": 13345 }, { "epoch": 0.6620368073813185, "grad_norm": 7.077756404876709, "learning_rate": 2.6103047511812983e-06, "loss": 0.288, "step": 13346 }, { "epoch": 0.6620864130165187, "grad_norm": 5.138038635253906, "learning_rate": 2.609613406087449e-06, "loss": 0.3639, "step": 13347 }, { "epoch": 0.6621360186517188, "grad_norm": 6.9134697914123535, "learning_rate": 2.6089221202284608e-06, "loss": 0.1998, "step": 13348 }, { "epoch": 0.662185624286919, "grad_norm": 10.06604290008545, "learning_rate": 2.608230893621464e-06, "loss": 0.3274, "step": 13349 }, { "epoch": 0.6622352299221191, "grad_norm": 5.790460109710693, "learning_rate": 2.6075397262835866e-06, "loss": 0.2864, "step": 13350 }, { "epoch": 0.6622848355573193, "grad_norm": 5.676736354827881, "learning_rate": 2.606848618231958e-06, "loss": 0.2136, "step": 13351 }, { "epoch": 0.6623344411925195, "grad_norm": 5.582919120788574, "learning_rate": 2.6061575694837027e-06, "loss": 0.2831, "step": 13352 }, { "epoch": 0.6623840468277197, "grad_norm": 4.080046653747559, "learning_rate": 2.605466580055948e-06, "loss": 0.244, "step": 13353 }, { "epoch": 0.6624336524629197, "grad_norm": 5.357990264892578, "learning_rate": 2.6047756499658104e-06, "loss": 0.2364, "step": 13354 }, { "epoch": 0.6624832580981199, "grad_norm": 8.385110855102539, "learning_rate": 2.60408477923042e-06, "loss": 0.3589, "step": 13355 }, { "epoch": 0.6625328637333201, "grad_norm": 5.857019901275635, "learning_rate": 2.6033939678668905e-06, "loss": 0.228, "step": 13356 }, { "epoch": 0.6625824693685203, "grad_norm": 9.622618675231934, "learning_rate": 2.602703215892343e-06, "loss": 0.378, "step": 13357 }, { "epoch": 0.6626320750037205, "grad_norm": 8.562016487121582, "learning_rate": 2.602012523323893e-06, "loss": 0.3322, "step": 13358 }, { "epoch": 0.6626816806389206, "grad_norm": 10.009353637695312, "learning_rate": 2.6013218901786596e-06, "loss": 0.3412, "step": 13359 }, { "epoch": 0.6627312862741207, "grad_norm": 6.587222576141357, "learning_rate": 2.6006313164737517e-06, "loss": 0.3043, "step": 13360 }, { "epoch": 0.6627808919093209, "grad_norm": 12.8836669921875, "learning_rate": 2.5999408022262885e-06, "loss": 0.458, "step": 13361 }, { "epoch": 0.6628304975445211, "grad_norm": 7.3524885177612305, "learning_rate": 2.599250347453376e-06, "loss": 0.2509, "step": 13362 }, { "epoch": 0.6628801031797212, "grad_norm": 9.555909156799316, "learning_rate": 2.598559952172126e-06, "loss": 0.2206, "step": 13363 }, { "epoch": 0.6629297088149214, "grad_norm": 8.426440238952637, "learning_rate": 2.5978696163996463e-06, "loss": 0.3097, "step": 13364 }, { "epoch": 0.6629793144501215, "grad_norm": 6.16370964050293, "learning_rate": 2.5971793401530465e-06, "loss": 0.1971, "step": 13365 }, { "epoch": 0.6630289200853217, "grad_norm": 6.500400066375732, "learning_rate": 2.596489123449426e-06, "loss": 0.299, "step": 13366 }, { "epoch": 0.6630785257205218, "grad_norm": 7.598057270050049, "learning_rate": 2.5957989663058954e-06, "loss": 0.1935, "step": 13367 }, { "epoch": 0.663128131355722, "grad_norm": 8.95927619934082, "learning_rate": 2.5951088687395503e-06, "loss": 0.3373, "step": 13368 }, { "epoch": 0.6631777369909222, "grad_norm": 4.608347415924072, "learning_rate": 2.594418830767499e-06, "loss": 0.2948, "step": 13369 }, { "epoch": 0.6632273426261224, "grad_norm": 19.37450408935547, "learning_rate": 2.593728852406836e-06, "loss": 0.3043, "step": 13370 }, { "epoch": 0.6632769482613224, "grad_norm": 4.565113067626953, "learning_rate": 2.5930389336746597e-06, "loss": 0.1989, "step": 13371 }, { "epoch": 0.6633265538965226, "grad_norm": 6.968711853027344, "learning_rate": 2.5923490745880675e-06, "loss": 0.3198, "step": 13372 }, { "epoch": 0.6633761595317228, "grad_norm": 4.72227144241333, "learning_rate": 2.5916592751641565e-06, "loss": 0.2222, "step": 13373 }, { "epoch": 0.663425765166923, "grad_norm": 4.269300937652588, "learning_rate": 2.5909695354200144e-06, "loss": 0.2639, "step": 13374 }, { "epoch": 0.6634753708021232, "grad_norm": 4.0844244956970215, "learning_rate": 2.5902798553727405e-06, "loss": 0.2158, "step": 13375 }, { "epoch": 0.6635249764373233, "grad_norm": 5.2364501953125, "learning_rate": 2.5895902350394196e-06, "loss": 0.2708, "step": 13376 }, { "epoch": 0.6635745820725234, "grad_norm": 5.253846168518066, "learning_rate": 2.5889006744371436e-06, "loss": 0.3047, "step": 13377 }, { "epoch": 0.6636241877077236, "grad_norm": 11.950033187866211, "learning_rate": 2.5882111735829995e-06, "loss": 0.2676, "step": 13378 }, { "epoch": 0.6636737933429238, "grad_norm": 5.550020694732666, "learning_rate": 2.5875217324940754e-06, "loss": 0.3076, "step": 13379 }, { "epoch": 0.6637233989781239, "grad_norm": 10.553128242492676, "learning_rate": 2.5868323511874498e-06, "loss": 0.2883, "step": 13380 }, { "epoch": 0.6637730046133241, "grad_norm": 7.908451557159424, "learning_rate": 2.5861430296802147e-06, "loss": 0.3363, "step": 13381 }, { "epoch": 0.6638226102485242, "grad_norm": 5.973939418792725, "learning_rate": 2.5854537679894443e-06, "loss": 0.1977, "step": 13382 }, { "epoch": 0.6638722158837244, "grad_norm": 6.074922561645508, "learning_rate": 2.584764566132223e-06, "loss": 0.2843, "step": 13383 }, { "epoch": 0.6639218215189245, "grad_norm": 21.993345260620117, "learning_rate": 2.584075424125627e-06, "loss": 0.4298, "step": 13384 }, { "epoch": 0.6639714271541247, "grad_norm": 7.362412929534912, "learning_rate": 2.5833863419867354e-06, "loss": 0.3635, "step": 13385 }, { "epoch": 0.6640210327893249, "grad_norm": 5.198684215545654, "learning_rate": 2.5826973197326233e-06, "loss": 0.3293, "step": 13386 }, { "epoch": 0.6640706384245251, "grad_norm": 9.484452247619629, "learning_rate": 2.582008357380367e-06, "loss": 0.3697, "step": 13387 }, { "epoch": 0.6641202440597251, "grad_norm": 9.3378267288208, "learning_rate": 2.5813194549470343e-06, "loss": 0.3114, "step": 13388 }, { "epoch": 0.6641698496949253, "grad_norm": 5.284551620483398, "learning_rate": 2.5806306124497025e-06, "loss": 0.2337, "step": 13389 }, { "epoch": 0.6642194553301255, "grad_norm": 9.616307258605957, "learning_rate": 2.579941829905437e-06, "loss": 0.2262, "step": 13390 }, { "epoch": 0.6642690609653257, "grad_norm": 8.840410232543945, "learning_rate": 2.5792531073313075e-06, "loss": 0.29, "step": 13391 }, { "epoch": 0.6643186666005259, "grad_norm": 6.1632819175720215, "learning_rate": 2.5785644447443814e-06, "loss": 0.3803, "step": 13392 }, { "epoch": 0.664368272235726, "grad_norm": 6.730862140655518, "learning_rate": 2.5778758421617233e-06, "loss": 0.3672, "step": 13393 }, { "epoch": 0.6644178778709261, "grad_norm": 4.546518802642822, "learning_rate": 2.577187299600397e-06, "loss": 0.2423, "step": 13394 }, { "epoch": 0.6644674835061263, "grad_norm": 5.8108367919921875, "learning_rate": 2.5764988170774684e-06, "loss": 0.2384, "step": 13395 }, { "epoch": 0.6645170891413265, "grad_norm": 6.299357891082764, "learning_rate": 2.5758103946099927e-06, "loss": 0.2492, "step": 13396 }, { "epoch": 0.6645666947765266, "grad_norm": 16.510103225708008, "learning_rate": 2.575122032215032e-06, "loss": 0.3106, "step": 13397 }, { "epoch": 0.6646163004117268, "grad_norm": 9.743369102478027, "learning_rate": 2.5744337299096445e-06, "loss": 0.3182, "step": 13398 }, { "epoch": 0.6646659060469269, "grad_norm": 4.618340492248535, "learning_rate": 2.5737454877108863e-06, "loss": 0.2899, "step": 13399 }, { "epoch": 0.6647155116821271, "grad_norm": 9.389665603637695, "learning_rate": 2.573057305635812e-06, "loss": 0.3613, "step": 13400 }, { "epoch": 0.6647651173173272, "grad_norm": 11.330903053283691, "learning_rate": 2.5723691837014765e-06, "loss": 0.3678, "step": 13401 }, { "epoch": 0.6648147229525274, "grad_norm": 5.748371601104736, "learning_rate": 2.5716811219249323e-06, "loss": 0.235, "step": 13402 }, { "epoch": 0.6648643285877276, "grad_norm": 6.689421653747559, "learning_rate": 2.570993120323224e-06, "loss": 0.318, "step": 13403 }, { "epoch": 0.6649139342229278, "grad_norm": 22.492746353149414, "learning_rate": 2.570305178913409e-06, "loss": 0.3914, "step": 13404 }, { "epoch": 0.6649635398581278, "grad_norm": 5.548938751220703, "learning_rate": 2.569617297712529e-06, "loss": 0.2781, "step": 13405 }, { "epoch": 0.665013145493328, "grad_norm": 6.432525157928467, "learning_rate": 2.5689294767376317e-06, "loss": 0.3241, "step": 13406 }, { "epoch": 0.6650627511285282, "grad_norm": 11.282722473144531, "learning_rate": 2.5682417160057615e-06, "loss": 0.3283, "step": 13407 }, { "epoch": 0.6651123567637284, "grad_norm": 5.279489994049072, "learning_rate": 2.567554015533962e-06, "loss": 0.3098, "step": 13408 }, { "epoch": 0.6651619623989286, "grad_norm": 5.354060649871826, "learning_rate": 2.566866375339274e-06, "loss": 0.2796, "step": 13409 }, { "epoch": 0.6652115680341287, "grad_norm": 8.215617179870605, "learning_rate": 2.5661787954387397e-06, "loss": 0.2061, "step": 13410 }, { "epoch": 0.6652611736693288, "grad_norm": 10.44551944732666, "learning_rate": 2.565491275849392e-06, "loss": 0.3937, "step": 13411 }, { "epoch": 0.665310779304529, "grad_norm": 7.022096157073975, "learning_rate": 2.564803816588276e-06, "loss": 0.2083, "step": 13412 }, { "epoch": 0.6653603849397292, "grad_norm": 10.65783977508545, "learning_rate": 2.5641164176724204e-06, "loss": 0.3954, "step": 13413 }, { "epoch": 0.6654099905749293, "grad_norm": 11.497777938842773, "learning_rate": 2.563429079118862e-06, "loss": 0.3754, "step": 13414 }, { "epoch": 0.6654595962101295, "grad_norm": 9.599352836608887, "learning_rate": 2.5627418009446336e-06, "loss": 0.1757, "step": 13415 }, { "epoch": 0.6655092018453296, "grad_norm": 13.272927284240723, "learning_rate": 2.5620545831667678e-06, "loss": 0.2984, "step": 13416 }, { "epoch": 0.6655588074805298, "grad_norm": 7.650866508483887, "learning_rate": 2.5613674258022876e-06, "loss": 0.3547, "step": 13417 }, { "epoch": 0.6656084131157299, "grad_norm": 5.1910881996154785, "learning_rate": 2.56068032886823e-06, "loss": 0.2599, "step": 13418 }, { "epoch": 0.6656580187509301, "grad_norm": 4.583759784698486, "learning_rate": 2.559993292381615e-06, "loss": 0.2119, "step": 13419 }, { "epoch": 0.6657076243861303, "grad_norm": 8.716450691223145, "learning_rate": 2.5593063163594713e-06, "loss": 0.354, "step": 13420 }, { "epoch": 0.6657572300213305, "grad_norm": 12.143160820007324, "learning_rate": 2.558619400818819e-06, "loss": 0.3271, "step": 13421 }, { "epoch": 0.6658068356565305, "grad_norm": 14.185979843139648, "learning_rate": 2.5579325457766842e-06, "loss": 0.3499, "step": 13422 }, { "epoch": 0.6658564412917307, "grad_norm": 5.882006645202637, "learning_rate": 2.5572457512500847e-06, "loss": 0.2445, "step": 13423 }, { "epoch": 0.6659060469269309, "grad_norm": 6.057684898376465, "learning_rate": 2.556559017256043e-06, "loss": 0.2045, "step": 13424 }, { "epoch": 0.6659556525621311, "grad_norm": 7.495823860168457, "learning_rate": 2.5558723438115695e-06, "loss": 0.3746, "step": 13425 }, { "epoch": 0.6660052581973313, "grad_norm": 5.105282306671143, "learning_rate": 2.5551857309336893e-06, "loss": 0.1989, "step": 13426 }, { "epoch": 0.6660548638325314, "grad_norm": 5.733922004699707, "learning_rate": 2.5544991786394104e-06, "loss": 0.258, "step": 13427 }, { "epoch": 0.6661044694677315, "grad_norm": 10.28453254699707, "learning_rate": 2.553812686945748e-06, "loss": 0.2726, "step": 13428 }, { "epoch": 0.6661540751029317, "grad_norm": 5.852580547332764, "learning_rate": 2.5531262558697133e-06, "loss": 0.2668, "step": 13429 }, { "epoch": 0.6662036807381319, "grad_norm": 6.968725681304932, "learning_rate": 2.5524398854283196e-06, "loss": 0.3158, "step": 13430 }, { "epoch": 0.666253286373332, "grad_norm": 8.280372619628906, "learning_rate": 2.5517535756385685e-06, "loss": 0.2593, "step": 13431 }, { "epoch": 0.6663028920085322, "grad_norm": 5.844257354736328, "learning_rate": 2.5510673265174756e-06, "loss": 0.2807, "step": 13432 }, { "epoch": 0.6663524976437323, "grad_norm": 10.98126220703125, "learning_rate": 2.55038113808204e-06, "loss": 0.3385, "step": 13433 }, { "epoch": 0.6664021032789325, "grad_norm": 6.226656436920166, "learning_rate": 2.5496950103492675e-06, "loss": 0.3646, "step": 13434 }, { "epoch": 0.6664517089141326, "grad_norm": 9.524442672729492, "learning_rate": 2.5490089433361607e-06, "loss": 0.3753, "step": 13435 }, { "epoch": 0.6665013145493328, "grad_norm": 7.476489543914795, "learning_rate": 2.548322937059724e-06, "loss": 0.3813, "step": 13436 }, { "epoch": 0.666550920184533, "grad_norm": 8.241641998291016, "learning_rate": 2.547636991536949e-06, "loss": 0.2799, "step": 13437 }, { "epoch": 0.6666005258197332, "grad_norm": 7.4933061599731445, "learning_rate": 2.5469511067848427e-06, "loss": 0.1765, "step": 13438 }, { "epoch": 0.6666501314549332, "grad_norm": 7.944865703582764, "learning_rate": 2.5462652828203947e-06, "loss": 0.2844, "step": 13439 }, { "epoch": 0.6666997370901334, "grad_norm": 4.746970176696777, "learning_rate": 2.545579519660606e-06, "loss": 0.2316, "step": 13440 }, { "epoch": 0.6667493427253336, "grad_norm": 10.716114044189453, "learning_rate": 2.5448938173224653e-06, "loss": 0.4468, "step": 13441 }, { "epoch": 0.6667989483605338, "grad_norm": 5.874568939208984, "learning_rate": 2.5442081758229665e-06, "loss": 0.2417, "step": 13442 }, { "epoch": 0.666848553995734, "grad_norm": 6.223857402801514, "learning_rate": 2.5435225951791e-06, "loss": 0.287, "step": 13443 }, { "epoch": 0.6668981596309341, "grad_norm": 4.965235710144043, "learning_rate": 2.5428370754078565e-06, "loss": 0.2601, "step": 13444 }, { "epoch": 0.6669477652661342, "grad_norm": 15.43471622467041, "learning_rate": 2.5421516165262178e-06, "loss": 0.4489, "step": 13445 }, { "epoch": 0.6669973709013344, "grad_norm": 5.302281856536865, "learning_rate": 2.541466218551178e-06, "loss": 0.2777, "step": 13446 }, { "epoch": 0.6670469765365346, "grad_norm": 15.171712875366211, "learning_rate": 2.540780881499715e-06, "loss": 0.3219, "step": 13447 }, { "epoch": 0.6670965821717347, "grad_norm": 5.582612991333008, "learning_rate": 2.540095605388815e-06, "loss": 0.2845, "step": 13448 }, { "epoch": 0.6671461878069349, "grad_norm": 4.572443962097168, "learning_rate": 2.539410390235458e-06, "loss": 0.2086, "step": 13449 }, { "epoch": 0.667195793442135, "grad_norm": 12.728325843811035, "learning_rate": 2.538725236056625e-06, "loss": 0.3696, "step": 13450 }, { "epoch": 0.6672453990773352, "grad_norm": 5.220358848571777, "learning_rate": 2.5380401428692936e-06, "loss": 0.2217, "step": 13451 }, { "epoch": 0.6672950047125353, "grad_norm": 7.303105354309082, "learning_rate": 2.5373551106904416e-06, "loss": 0.2828, "step": 13452 }, { "epoch": 0.6673446103477355, "grad_norm": 5.309701919555664, "learning_rate": 2.536670139537045e-06, "loss": 0.3087, "step": 13453 }, { "epoch": 0.6673942159829357, "grad_norm": 5.49746036529541, "learning_rate": 2.5359852294260754e-06, "loss": 0.2151, "step": 13454 }, { "epoch": 0.6674438216181359, "grad_norm": 8.333145141601562, "learning_rate": 2.5353003803745057e-06, "loss": 0.2692, "step": 13455 }, { "epoch": 0.6674934272533359, "grad_norm": 3.8965768814086914, "learning_rate": 2.5346155923993088e-06, "loss": 0.2146, "step": 13456 }, { "epoch": 0.6675430328885361, "grad_norm": 13.781292915344238, "learning_rate": 2.5339308655174516e-06, "loss": 0.3466, "step": 13457 }, { "epoch": 0.6675926385237363, "grad_norm": 10.172515869140625, "learning_rate": 2.5332461997459033e-06, "loss": 0.3911, "step": 13458 }, { "epoch": 0.6676422441589365, "grad_norm": 5.265437126159668, "learning_rate": 2.5325615951016302e-06, "loss": 0.3235, "step": 13459 }, { "epoch": 0.6676918497941366, "grad_norm": 10.156432151794434, "learning_rate": 2.531877051601597e-06, "loss": 0.426, "step": 13460 }, { "epoch": 0.6677414554293368, "grad_norm": 8.625860214233398, "learning_rate": 2.5311925692627687e-06, "loss": 0.3611, "step": 13461 }, { "epoch": 0.6677910610645369, "grad_norm": 17.000635147094727, "learning_rate": 2.5305081481021014e-06, "loss": 0.3974, "step": 13462 }, { "epoch": 0.6678406666997371, "grad_norm": 8.280294418334961, "learning_rate": 2.5298237881365635e-06, "loss": 0.2879, "step": 13463 }, { "epoch": 0.6678902723349373, "grad_norm": 6.084366798400879, "learning_rate": 2.5291394893831067e-06, "loss": 0.2235, "step": 13464 }, { "epoch": 0.6679398779701374, "grad_norm": 4.8019022941589355, "learning_rate": 2.528455251858692e-06, "loss": 0.2275, "step": 13465 }, { "epoch": 0.6679894836053376, "grad_norm": 6.533609867095947, "learning_rate": 2.527771075580274e-06, "loss": 0.2715, "step": 13466 }, { "epoch": 0.6680390892405377, "grad_norm": 7.380979537963867, "learning_rate": 2.5270869605648084e-06, "loss": 0.2693, "step": 13467 }, { "epoch": 0.6680886948757379, "grad_norm": 6.388241291046143, "learning_rate": 2.526402906829243e-06, "loss": 0.2608, "step": 13468 }, { "epoch": 0.668138300510938, "grad_norm": 5.348221302032471, "learning_rate": 2.5257189143905364e-06, "loss": 0.2442, "step": 13469 }, { "epoch": 0.6681879061461382, "grad_norm": 14.90085220336914, "learning_rate": 2.5250349832656324e-06, "loss": 0.3283, "step": 13470 }, { "epoch": 0.6682375117813384, "grad_norm": 6.09769344329834, "learning_rate": 2.524351113471481e-06, "loss": 0.3406, "step": 13471 }, { "epoch": 0.6682871174165386, "grad_norm": 15.685836791992188, "learning_rate": 2.5236673050250294e-06, "loss": 0.3477, "step": 13472 }, { "epoch": 0.6683367230517386, "grad_norm": 13.96939754486084, "learning_rate": 2.522983557943223e-06, "loss": 0.3233, "step": 13473 }, { "epoch": 0.6683863286869388, "grad_norm": 8.196832656860352, "learning_rate": 2.5222998722430014e-06, "loss": 0.2, "step": 13474 }, { "epoch": 0.668435934322139, "grad_norm": 10.011246681213379, "learning_rate": 2.5216162479413135e-06, "loss": 0.2985, "step": 13475 }, { "epoch": 0.6684855399573392, "grad_norm": 9.317598342895508, "learning_rate": 2.520932685055092e-06, "loss": 0.3558, "step": 13476 }, { "epoch": 0.6685351455925393, "grad_norm": 13.047635078430176, "learning_rate": 2.5202491836012844e-06, "loss": 0.2988, "step": 13477 }, { "epoch": 0.6685847512277395, "grad_norm": 2.905377149581909, "learning_rate": 2.5195657435968214e-06, "loss": 0.1865, "step": 13478 }, { "epoch": 0.6686343568629396, "grad_norm": 7.760416507720947, "learning_rate": 2.5188823650586414e-06, "loss": 0.3263, "step": 13479 }, { "epoch": 0.6686839624981398, "grad_norm": 10.299233436584473, "learning_rate": 2.5181990480036785e-06, "loss": 0.4128, "step": 13480 }, { "epoch": 0.66873356813334, "grad_norm": 5.5091552734375, "learning_rate": 2.517515792448868e-06, "loss": 0.3096, "step": 13481 }, { "epoch": 0.6687831737685401, "grad_norm": 7.454553127288818, "learning_rate": 2.516832598411135e-06, "loss": 0.2597, "step": 13482 }, { "epoch": 0.6688327794037403, "grad_norm": 6.8330254554748535, "learning_rate": 2.516149465907417e-06, "loss": 0.3542, "step": 13483 }, { "epoch": 0.6688823850389404, "grad_norm": 4.025075435638428, "learning_rate": 2.5154663949546376e-06, "loss": 0.2429, "step": 13484 }, { "epoch": 0.6689319906741406, "grad_norm": 9.08064079284668, "learning_rate": 2.5147833855697248e-06, "loss": 0.2691, "step": 13485 }, { "epoch": 0.6689815963093407, "grad_norm": 10.171293258666992, "learning_rate": 2.514100437769603e-06, "loss": 0.3476, "step": 13486 }, { "epoch": 0.6690312019445409, "grad_norm": 6.399485111236572, "learning_rate": 2.5134175515712002e-06, "loss": 0.3787, "step": 13487 }, { "epoch": 0.6690808075797411, "grad_norm": 8.35201358795166, "learning_rate": 2.5127347269914305e-06, "loss": 0.3306, "step": 13488 }, { "epoch": 0.6691304132149413, "grad_norm": 7.24149751663208, "learning_rate": 2.5120519640472237e-06, "loss": 0.2265, "step": 13489 }, { "epoch": 0.6691800188501413, "grad_norm": 8.353520393371582, "learning_rate": 2.511369262755492e-06, "loss": 0.3102, "step": 13490 }, { "epoch": 0.6692296244853415, "grad_norm": 6.477950096130371, "learning_rate": 2.510686623133156e-06, "loss": 0.3393, "step": 13491 }, { "epoch": 0.6692792301205417, "grad_norm": 7.879878997802734, "learning_rate": 2.510004045197132e-06, "loss": 0.2936, "step": 13492 }, { "epoch": 0.6693288357557419, "grad_norm": 5.386488437652588, "learning_rate": 2.5093215289643335e-06, "loss": 0.3367, "step": 13493 }, { "epoch": 0.669378441390942, "grad_norm": 9.056804656982422, "learning_rate": 2.508639074451674e-06, "loss": 0.298, "step": 13494 }, { "epoch": 0.6694280470261421, "grad_norm": 11.247353553771973, "learning_rate": 2.507956681676068e-06, "loss": 0.3814, "step": 13495 }, { "epoch": 0.6694776526613423, "grad_norm": 4.009224891662598, "learning_rate": 2.5072743506544185e-06, "loss": 0.2014, "step": 13496 }, { "epoch": 0.6695272582965425, "grad_norm": 13.180218696594238, "learning_rate": 2.5065920814036415e-06, "loss": 0.3396, "step": 13497 }, { "epoch": 0.6695768639317426, "grad_norm": 8.57669734954834, "learning_rate": 2.5059098739406396e-06, "loss": 0.3983, "step": 13498 }, { "epoch": 0.6696264695669428, "grad_norm": 5.691537380218506, "learning_rate": 2.5052277282823186e-06, "loss": 0.2907, "step": 13499 }, { "epoch": 0.669676075202143, "grad_norm": 7.17349100112915, "learning_rate": 2.504545644445583e-06, "loss": 0.1755, "step": 13500 }, { "epoch": 0.6697256808373431, "grad_norm": 7.606552600860596, "learning_rate": 2.5038636224473357e-06, "loss": 0.304, "step": 13501 }, { "epoch": 0.6697752864725433, "grad_norm": 4.8301496505737305, "learning_rate": 2.503181662304476e-06, "loss": 0.2313, "step": 13502 }, { "epoch": 0.6698248921077434, "grad_norm": 5.2755889892578125, "learning_rate": 2.5024997640339065e-06, "loss": 0.2266, "step": 13503 }, { "epoch": 0.6698744977429436, "grad_norm": 8.436978340148926, "learning_rate": 2.501817927652521e-06, "loss": 0.2204, "step": 13504 }, { "epoch": 0.6699241033781438, "grad_norm": 11.374476432800293, "learning_rate": 2.5011361531772173e-06, "loss": 0.3444, "step": 13505 }, { "epoch": 0.669973709013344, "grad_norm": 8.29510498046875, "learning_rate": 2.50045444062489e-06, "loss": 0.364, "step": 13506 }, { "epoch": 0.670023314648544, "grad_norm": 4.95755672454834, "learning_rate": 2.499772790012432e-06, "loss": 0.2637, "step": 13507 }, { "epoch": 0.6700729202837442, "grad_norm": 6.048810005187988, "learning_rate": 2.4990912013567352e-06, "loss": 0.2423, "step": 13508 }, { "epoch": 0.6701225259189444, "grad_norm": 4.890799045562744, "learning_rate": 2.49840967467469e-06, "loss": 0.2458, "step": 13509 }, { "epoch": 0.6701721315541446, "grad_norm": 10.091191291809082, "learning_rate": 2.4977282099831846e-06, "loss": 0.3337, "step": 13510 }, { "epoch": 0.6702217371893447, "grad_norm": 5.712497234344482, "learning_rate": 2.497046807299106e-06, "loss": 0.281, "step": 13511 }, { "epoch": 0.6702713428245448, "grad_norm": 8.855037689208984, "learning_rate": 2.4963654666393416e-06, "loss": 0.3324, "step": 13512 }, { "epoch": 0.670320948459745, "grad_norm": 7.410315990447998, "learning_rate": 2.4956841880207717e-06, "loss": 0.2577, "step": 13513 }, { "epoch": 0.6703705540949452, "grad_norm": 5.625947952270508, "learning_rate": 2.4950029714602804e-06, "loss": 0.252, "step": 13514 }, { "epoch": 0.6704201597301453, "grad_norm": 26.044572830200195, "learning_rate": 2.4943218169747485e-06, "loss": 0.4052, "step": 13515 }, { "epoch": 0.6704697653653455, "grad_norm": 6.282968044281006, "learning_rate": 2.4936407245810558e-06, "loss": 0.2674, "step": 13516 }, { "epoch": 0.6705193710005457, "grad_norm": 4.97068977355957, "learning_rate": 2.492959694296079e-06, "loss": 0.2837, "step": 13517 }, { "epoch": 0.6705689766357458, "grad_norm": 9.830769538879395, "learning_rate": 2.492278726136697e-06, "loss": 0.2191, "step": 13518 }, { "epoch": 0.670618582270946, "grad_norm": 6.452939987182617, "learning_rate": 2.4915978201197795e-06, "loss": 0.2478, "step": 13519 }, { "epoch": 0.6706681879061461, "grad_norm": 9.453536987304688, "learning_rate": 2.490916976262206e-06, "loss": 0.3423, "step": 13520 }, { "epoch": 0.6707177935413463, "grad_norm": 3.9142072200775146, "learning_rate": 2.4902361945808428e-06, "loss": 0.2665, "step": 13521 }, { "epoch": 0.6707673991765465, "grad_norm": 4.8662190437316895, "learning_rate": 2.4895554750925623e-06, "loss": 0.2148, "step": 13522 }, { "epoch": 0.6708170048117467, "grad_norm": 5.214504241943359, "learning_rate": 2.4888748178142325e-06, "loss": 0.3018, "step": 13523 }, { "epoch": 0.6708666104469467, "grad_norm": 4.705939292907715, "learning_rate": 2.4881942227627224e-06, "loss": 0.2251, "step": 13524 }, { "epoch": 0.6709162160821469, "grad_norm": 9.455410957336426, "learning_rate": 2.4875136899548923e-06, "loss": 0.293, "step": 13525 }, { "epoch": 0.6709658217173471, "grad_norm": 5.224678993225098, "learning_rate": 2.486833219407613e-06, "loss": 0.284, "step": 13526 }, { "epoch": 0.6710154273525473, "grad_norm": 4.186324596405029, "learning_rate": 2.4861528111377416e-06, "loss": 0.1732, "step": 13527 }, { "epoch": 0.6710650329877474, "grad_norm": 6.895672798156738, "learning_rate": 2.4854724651621405e-06, "loss": 0.2781, "step": 13528 }, { "epoch": 0.6711146386229475, "grad_norm": 6.987334728240967, "learning_rate": 2.484792181497669e-06, "loss": 0.2969, "step": 13529 }, { "epoch": 0.6711642442581477, "grad_norm": 5.886073589324951, "learning_rate": 2.484111960161185e-06, "loss": 0.3065, "step": 13530 }, { "epoch": 0.6712138498933479, "grad_norm": 12.544018745422363, "learning_rate": 2.4834318011695454e-06, "loss": 0.3461, "step": 13531 }, { "epoch": 0.671263455528548, "grad_norm": 4.319141387939453, "learning_rate": 2.4827517045396056e-06, "loss": 0.2691, "step": 13532 }, { "epoch": 0.6713130611637482, "grad_norm": 7.925368309020996, "learning_rate": 2.482071670288213e-06, "loss": 0.3025, "step": 13533 }, { "epoch": 0.6713626667989484, "grad_norm": 9.310744285583496, "learning_rate": 2.481391698432228e-06, "loss": 0.2289, "step": 13534 }, { "epoch": 0.6714122724341485, "grad_norm": 5.049184322357178, "learning_rate": 2.4807117889884936e-06, "loss": 0.2476, "step": 13535 }, { "epoch": 0.6714618780693486, "grad_norm": 6.188129901885986, "learning_rate": 2.4800319419738606e-06, "loss": 0.2366, "step": 13536 }, { "epoch": 0.6715114837045488, "grad_norm": 4.582347393035889, "learning_rate": 2.479352157405176e-06, "loss": 0.2858, "step": 13537 }, { "epoch": 0.671561089339749, "grad_norm": 4.147350311279297, "learning_rate": 2.4786724352992874e-06, "loss": 0.2862, "step": 13538 }, { "epoch": 0.6716106949749492, "grad_norm": 4.989989757537842, "learning_rate": 2.477992775673032e-06, "loss": 0.2483, "step": 13539 }, { "epoch": 0.6716603006101494, "grad_norm": 10.354131698608398, "learning_rate": 2.477313178543261e-06, "loss": 0.3976, "step": 13540 }, { "epoch": 0.6717099062453494, "grad_norm": 4.930483818054199, "learning_rate": 2.476633643926808e-06, "loss": 0.2769, "step": 13541 }, { "epoch": 0.6717595118805496, "grad_norm": 4.1142706871032715, "learning_rate": 2.475954171840515e-06, "loss": 0.2879, "step": 13542 }, { "epoch": 0.6718091175157498, "grad_norm": 6.999690532684326, "learning_rate": 2.4752747623012197e-06, "loss": 0.3245, "step": 13543 }, { "epoch": 0.67185872315095, "grad_norm": 5.066011905670166, "learning_rate": 2.474595415325759e-06, "loss": 0.2372, "step": 13544 }, { "epoch": 0.6719083287861501, "grad_norm": 4.897459030151367, "learning_rate": 2.473916130930963e-06, "loss": 0.3391, "step": 13545 }, { "epoch": 0.6719579344213502, "grad_norm": 4.220517158508301, "learning_rate": 2.473236909133672e-06, "loss": 0.1881, "step": 13546 }, { "epoch": 0.6720075400565504, "grad_norm": 18.87428092956543, "learning_rate": 2.472557749950709e-06, "loss": 0.4856, "step": 13547 }, { "epoch": 0.6720571456917506, "grad_norm": 9.734892845153809, "learning_rate": 2.471878653398912e-06, "loss": 0.3882, "step": 13548 }, { "epoch": 0.6721067513269507, "grad_norm": 14.861517906188965, "learning_rate": 2.4711996194951045e-06, "loss": 0.4076, "step": 13549 }, { "epoch": 0.6721563569621509, "grad_norm": 11.279781341552734, "learning_rate": 2.4705206482561134e-06, "loss": 0.3327, "step": 13550 }, { "epoch": 0.6722059625973511, "grad_norm": 9.021882057189941, "learning_rate": 2.469841739698765e-06, "loss": 0.2822, "step": 13551 }, { "epoch": 0.6722555682325512, "grad_norm": 5.065105438232422, "learning_rate": 2.4691628938398852e-06, "loss": 0.2978, "step": 13552 }, { "epoch": 0.6723051738677513, "grad_norm": 5.598855972290039, "learning_rate": 2.46848411069629e-06, "loss": 0.2697, "step": 13553 }, { "epoch": 0.6723547795029515, "grad_norm": 5.740815162658691, "learning_rate": 2.4678053902848065e-06, "loss": 0.1753, "step": 13554 }, { "epoch": 0.6724043851381517, "grad_norm": 10.294878959655762, "learning_rate": 2.46712673262225e-06, "loss": 0.3061, "step": 13555 }, { "epoch": 0.6724539907733519, "grad_norm": 4.210179328918457, "learning_rate": 2.466448137725438e-06, "loss": 0.2784, "step": 13556 }, { "epoch": 0.6725035964085521, "grad_norm": 5.512451171875, "learning_rate": 2.4657696056111876e-06, "loss": 0.3148, "step": 13557 }, { "epoch": 0.6725532020437521, "grad_norm": 5.802242279052734, "learning_rate": 2.4650911362963124e-06, "loss": 0.1903, "step": 13558 }, { "epoch": 0.6726028076789523, "grad_norm": 6.139578342437744, "learning_rate": 2.4644127297976263e-06, "loss": 0.3104, "step": 13559 }, { "epoch": 0.6726524133141525, "grad_norm": 5.052922248840332, "learning_rate": 2.463734386131939e-06, "loss": 0.1493, "step": 13560 }, { "epoch": 0.6727020189493527, "grad_norm": 8.760028839111328, "learning_rate": 2.463056105316063e-06, "loss": 0.3498, "step": 13561 }, { "epoch": 0.6727516245845528, "grad_norm": 4.2470808029174805, "learning_rate": 2.4623778873668024e-06, "loss": 0.2241, "step": 13562 }, { "epoch": 0.6728012302197529, "grad_norm": 6.52480411529541, "learning_rate": 2.4616997323009656e-06, "loss": 0.2823, "step": 13563 }, { "epoch": 0.6728508358549531, "grad_norm": 11.68404483795166, "learning_rate": 2.4610216401353575e-06, "loss": 0.3896, "step": 13564 }, { "epoch": 0.6729004414901533, "grad_norm": 4.138712406158447, "learning_rate": 2.4603436108867825e-06, "loss": 0.2626, "step": 13565 }, { "epoch": 0.6729500471253534, "grad_norm": 15.15036678314209, "learning_rate": 2.4596656445720406e-06, "loss": 0.3989, "step": 13566 }, { "epoch": 0.6729996527605536, "grad_norm": 7.205172061920166, "learning_rate": 2.458987741207934e-06, "loss": 0.2073, "step": 13567 }, { "epoch": 0.6730492583957538, "grad_norm": 4.9185872077941895, "learning_rate": 2.45830990081126e-06, "loss": 0.273, "step": 13568 }, { "epoch": 0.6730988640309539, "grad_norm": 9.556853294372559, "learning_rate": 2.457632123398818e-06, "loss": 0.3053, "step": 13569 }, { "epoch": 0.673148469666154, "grad_norm": 7.432597637176514, "learning_rate": 2.456954408987401e-06, "loss": 0.264, "step": 13570 }, { "epoch": 0.6731980753013542, "grad_norm": 4.751245498657227, "learning_rate": 2.4562767575938035e-06, "loss": 0.2042, "step": 13571 }, { "epoch": 0.6732476809365544, "grad_norm": 9.045917510986328, "learning_rate": 2.4555991692348184e-06, "loss": 0.3123, "step": 13572 }, { "epoch": 0.6732972865717546, "grad_norm": 7.634855270385742, "learning_rate": 2.454921643927237e-06, "loss": 0.3752, "step": 13573 }, { "epoch": 0.6733468922069548, "grad_norm": 4.9509596824646, "learning_rate": 2.4542441816878486e-06, "loss": 0.2408, "step": 13574 }, { "epoch": 0.6733964978421548, "grad_norm": 8.300446510314941, "learning_rate": 2.4535667825334432e-06, "loss": 0.3897, "step": 13575 }, { "epoch": 0.673446103477355, "grad_norm": 6.468130111694336, "learning_rate": 2.4528894464808008e-06, "loss": 0.2918, "step": 13576 }, { "epoch": 0.6734957091125552, "grad_norm": 3.5022635459899902, "learning_rate": 2.452212173546714e-06, "loss": 0.2825, "step": 13577 }, { "epoch": 0.6735453147477554, "grad_norm": 7.579925537109375, "learning_rate": 2.4515349637479598e-06, "loss": 0.2888, "step": 13578 }, { "epoch": 0.6735949203829555, "grad_norm": 5.896969318389893, "learning_rate": 2.450857817101322e-06, "loss": 0.2646, "step": 13579 }, { "epoch": 0.6736445260181556, "grad_norm": 6.233915328979492, "learning_rate": 2.450180733623581e-06, "loss": 0.2931, "step": 13580 }, { "epoch": 0.6736941316533558, "grad_norm": 5.03082799911499, "learning_rate": 2.4495037133315162e-06, "loss": 0.3078, "step": 13581 }, { "epoch": 0.673743737288556, "grad_norm": 12.530790328979492, "learning_rate": 2.4488267562418997e-06, "loss": 0.5008, "step": 13582 }, { "epoch": 0.6737933429237561, "grad_norm": 5.251917839050293, "learning_rate": 2.448149862371514e-06, "loss": 0.2476, "step": 13583 }, { "epoch": 0.6738429485589563, "grad_norm": 6.516078948974609, "learning_rate": 2.4474730317371253e-06, "loss": 0.3258, "step": 13584 }, { "epoch": 0.6738925541941565, "grad_norm": 8.234854698181152, "learning_rate": 2.4467962643555133e-06, "loss": 0.2882, "step": 13585 }, { "epoch": 0.6739421598293566, "grad_norm": 6.803041934967041, "learning_rate": 2.4461195602434423e-06, "loss": 0.3134, "step": 13586 }, { "epoch": 0.6739917654645567, "grad_norm": 4.622198104858398, "learning_rate": 2.4454429194176845e-06, "loss": 0.2511, "step": 13587 }, { "epoch": 0.6740413710997569, "grad_norm": 20.977022171020508, "learning_rate": 2.4447663418950064e-06, "loss": 0.3974, "step": 13588 }, { "epoch": 0.6740909767349571, "grad_norm": 5.033609867095947, "learning_rate": 2.4440898276921763e-06, "loss": 0.3151, "step": 13589 }, { "epoch": 0.6741405823701573, "grad_norm": 5.697066307067871, "learning_rate": 2.443413376825953e-06, "loss": 0.3411, "step": 13590 }, { "epoch": 0.6741901880053575, "grad_norm": 7.778830528259277, "learning_rate": 2.442736989313106e-06, "loss": 0.2454, "step": 13591 }, { "epoch": 0.6742397936405575, "grad_norm": 7.497089385986328, "learning_rate": 2.442060665170391e-06, "loss": 0.2826, "step": 13592 }, { "epoch": 0.6742893992757577, "grad_norm": 5.901309490203857, "learning_rate": 2.4413844044145703e-06, "loss": 0.2489, "step": 13593 }, { "epoch": 0.6743390049109579, "grad_norm": 24.07619285583496, "learning_rate": 2.440708207062401e-06, "loss": 0.515, "step": 13594 }, { "epoch": 0.6743886105461581, "grad_norm": 5.295111179351807, "learning_rate": 2.4400320731306423e-06, "loss": 0.3184, "step": 13595 }, { "epoch": 0.6744382161813582, "grad_norm": 8.802497863769531, "learning_rate": 2.439356002636043e-06, "loss": 0.2759, "step": 13596 }, { "epoch": 0.6744878218165583, "grad_norm": 8.345726013183594, "learning_rate": 2.438679995595364e-06, "loss": 0.4082, "step": 13597 }, { "epoch": 0.6745374274517585, "grad_norm": 6.913260459899902, "learning_rate": 2.4380040520253514e-06, "loss": 0.2975, "step": 13598 }, { "epoch": 0.6745870330869587, "grad_norm": 4.732115268707275, "learning_rate": 2.437328171942757e-06, "loss": 0.2146, "step": 13599 }, { "epoch": 0.6746366387221588, "grad_norm": 7.857337474822998, "learning_rate": 2.43665235536433e-06, "loss": 0.2424, "step": 13600 }, { "epoch": 0.674686244357359, "grad_norm": 5.862181663513184, "learning_rate": 2.435976602306817e-06, "loss": 0.2793, "step": 13601 }, { "epoch": 0.6747358499925592, "grad_norm": 5.1681227684021, "learning_rate": 2.435300912786964e-06, "loss": 0.2642, "step": 13602 }, { "epoch": 0.6747854556277593, "grad_norm": 5.366426467895508, "learning_rate": 2.4346252868215165e-06, "loss": 0.2559, "step": 13603 }, { "epoch": 0.6748350612629594, "grad_norm": 6.084779262542725, "learning_rate": 2.4339497244272104e-06, "loss": 0.3088, "step": 13604 }, { "epoch": 0.6748846668981596, "grad_norm": 6.893925189971924, "learning_rate": 2.433274225620795e-06, "loss": 0.21, "step": 13605 }, { "epoch": 0.6749342725333598, "grad_norm": 8.683195114135742, "learning_rate": 2.4325987904190033e-06, "loss": 0.3156, "step": 13606 }, { "epoch": 0.67498387816856, "grad_norm": 5.211279392242432, "learning_rate": 2.4319234188385753e-06, "loss": 0.2733, "step": 13607 }, { "epoch": 0.6750334838037602, "grad_norm": 5.778254985809326, "learning_rate": 2.4312481108962468e-06, "loss": 0.2911, "step": 13608 }, { "epoch": 0.6750830894389602, "grad_norm": 3.4573848247528076, "learning_rate": 2.430572866608752e-06, "loss": 0.2821, "step": 13609 }, { "epoch": 0.6751326950741604, "grad_norm": 5.658249855041504, "learning_rate": 2.429897685992823e-06, "loss": 0.1919, "step": 13610 }, { "epoch": 0.6751823007093606, "grad_norm": 7.828802108764648, "learning_rate": 2.429222569065195e-06, "loss": 0.3329, "step": 13611 }, { "epoch": 0.6752319063445608, "grad_norm": 8.187485694885254, "learning_rate": 2.4285475158425918e-06, "loss": 0.2561, "step": 13612 }, { "epoch": 0.6752815119797609, "grad_norm": 10.162586212158203, "learning_rate": 2.4278725263417445e-06, "loss": 0.2931, "step": 13613 }, { "epoch": 0.675331117614961, "grad_norm": 12.062333106994629, "learning_rate": 2.4271976005793803e-06, "loss": 0.3074, "step": 13614 }, { "epoch": 0.6753807232501612, "grad_norm": 9.65295696258545, "learning_rate": 2.4265227385722225e-06, "loss": 0.4505, "step": 13615 }, { "epoch": 0.6754303288853614, "grad_norm": 8.1139497756958, "learning_rate": 2.4258479403369954e-06, "loss": 0.2584, "step": 13616 }, { "epoch": 0.6754799345205615, "grad_norm": 8.369340896606445, "learning_rate": 2.425173205890421e-06, "loss": 0.3176, "step": 13617 }, { "epoch": 0.6755295401557617, "grad_norm": 8.76125717163086, "learning_rate": 2.4244985352492197e-06, "loss": 0.2889, "step": 13618 }, { "epoch": 0.6755791457909619, "grad_norm": 4.534643173217773, "learning_rate": 2.4238239284301106e-06, "loss": 0.2858, "step": 13619 }, { "epoch": 0.675628751426162, "grad_norm": 6.815213680267334, "learning_rate": 2.423149385449809e-06, "loss": 0.3219, "step": 13620 }, { "epoch": 0.6756783570613621, "grad_norm": 5.336435794830322, "learning_rate": 2.4224749063250306e-06, "loss": 0.3106, "step": 13621 }, { "epoch": 0.6757279626965623, "grad_norm": 8.839349746704102, "learning_rate": 2.42180049107249e-06, "loss": 0.2754, "step": 13622 }, { "epoch": 0.6757775683317625, "grad_norm": 3.921574354171753, "learning_rate": 2.4211261397088997e-06, "loss": 0.3281, "step": 13623 }, { "epoch": 0.6758271739669627, "grad_norm": 6.572132587432861, "learning_rate": 2.4204518522509706e-06, "loss": 0.2785, "step": 13624 }, { "epoch": 0.6758767796021629, "grad_norm": 6.5986809730529785, "learning_rate": 2.4197776287154106e-06, "loss": 0.3208, "step": 13625 }, { "epoch": 0.6759263852373629, "grad_norm": 5.812570095062256, "learning_rate": 2.4191034691189307e-06, "loss": 0.2709, "step": 13626 }, { "epoch": 0.6759759908725631, "grad_norm": 16.22319221496582, "learning_rate": 2.4184293734782303e-06, "loss": 0.2992, "step": 13627 }, { "epoch": 0.6760255965077633, "grad_norm": 12.611016273498535, "learning_rate": 2.4177553418100215e-06, "loss": 0.3448, "step": 13628 }, { "epoch": 0.6760752021429635, "grad_norm": 9.01382827758789, "learning_rate": 2.417081374131002e-06, "loss": 0.3521, "step": 13629 }, { "epoch": 0.6761248077781636, "grad_norm": 9.428736686706543, "learning_rate": 2.416407470457874e-06, "loss": 0.2791, "step": 13630 }, { "epoch": 0.6761744134133637, "grad_norm": 5.230874538421631, "learning_rate": 2.415733630807337e-06, "loss": 0.2476, "step": 13631 }, { "epoch": 0.6762240190485639, "grad_norm": 14.365467071533203, "learning_rate": 2.4150598551960925e-06, "loss": 0.3306, "step": 13632 }, { "epoch": 0.6762736246837641, "grad_norm": 3.8160557746887207, "learning_rate": 2.4143861436408295e-06, "loss": 0.2553, "step": 13633 }, { "epoch": 0.6763232303189642, "grad_norm": 7.391506195068359, "learning_rate": 2.4137124961582515e-06, "loss": 0.3296, "step": 13634 }, { "epoch": 0.6763728359541644, "grad_norm": 7.544224262237549, "learning_rate": 2.4130389127650445e-06, "loss": 0.2186, "step": 13635 }, { "epoch": 0.6764224415893646, "grad_norm": 6.339008331298828, "learning_rate": 2.4123653934779067e-06, "loss": 0.3133, "step": 13636 }, { "epoch": 0.6764720472245647, "grad_norm": 11.007166862487793, "learning_rate": 2.411691938313523e-06, "loss": 0.2411, "step": 13637 }, { "epoch": 0.6765216528597648, "grad_norm": 6.076205730438232, "learning_rate": 2.4110185472885845e-06, "loss": 0.2897, "step": 13638 }, { "epoch": 0.676571258494965, "grad_norm": 5.837172031402588, "learning_rate": 2.410345220419777e-06, "loss": 0.333, "step": 13639 }, { "epoch": 0.6766208641301652, "grad_norm": 8.718120574951172, "learning_rate": 2.409671957723788e-06, "loss": 0.3035, "step": 13640 }, { "epoch": 0.6766704697653654, "grad_norm": 11.961341857910156, "learning_rate": 2.4089987592172965e-06, "loss": 0.29, "step": 13641 }, { "epoch": 0.6767200754005656, "grad_norm": 13.533978462219238, "learning_rate": 2.408325624916991e-06, "loss": 0.3798, "step": 13642 }, { "epoch": 0.6767696810357656, "grad_norm": 7.281721115112305, "learning_rate": 2.4076525548395476e-06, "loss": 0.2913, "step": 13643 }, { "epoch": 0.6768192866709658, "grad_norm": 11.705633163452148, "learning_rate": 2.4069795490016456e-06, "loss": 0.3095, "step": 13644 }, { "epoch": 0.676868892306166, "grad_norm": 9.332237243652344, "learning_rate": 2.406306607419964e-06, "loss": 0.3765, "step": 13645 }, { "epoch": 0.6769184979413662, "grad_norm": 6.897247791290283, "learning_rate": 2.40563373011118e-06, "loss": 0.2861, "step": 13646 }, { "epoch": 0.6769681035765663, "grad_norm": 4.340883255004883, "learning_rate": 2.4049609170919614e-06, "loss": 0.3339, "step": 13647 }, { "epoch": 0.6770177092117664, "grad_norm": 10.032035827636719, "learning_rate": 2.404288168378989e-06, "loss": 0.3373, "step": 13648 }, { "epoch": 0.6770673148469666, "grad_norm": 5.243113040924072, "learning_rate": 2.4036154839889284e-06, "loss": 0.2491, "step": 13649 }, { "epoch": 0.6771169204821668, "grad_norm": 17.434490203857422, "learning_rate": 2.4029428639384507e-06, "loss": 0.4207, "step": 13650 }, { "epoch": 0.6771665261173669, "grad_norm": 4.810489177703857, "learning_rate": 2.4022703082442235e-06, "loss": 0.2606, "step": 13651 }, { "epoch": 0.6772161317525671, "grad_norm": 4.065920829772949, "learning_rate": 2.401597816922914e-06, "loss": 0.2882, "step": 13652 }, { "epoch": 0.6772657373877673, "grad_norm": 5.168945789337158, "learning_rate": 2.400925389991184e-06, "loss": 0.2695, "step": 13653 }, { "epoch": 0.6773153430229674, "grad_norm": 12.127132415771484, "learning_rate": 2.400253027465701e-06, "loss": 0.4826, "step": 13654 }, { "epoch": 0.6773649486581675, "grad_norm": 6.508718967437744, "learning_rate": 2.3995807293631206e-06, "loss": 0.2206, "step": 13655 }, { "epoch": 0.6774145542933677, "grad_norm": 4.052859306335449, "learning_rate": 2.3989084957001096e-06, "loss": 0.2902, "step": 13656 }, { "epoch": 0.6774641599285679, "grad_norm": 6.0588860511779785, "learning_rate": 2.3982363264933213e-06, "loss": 0.3312, "step": 13657 }, { "epoch": 0.6775137655637681, "grad_norm": 5.2915167808532715, "learning_rate": 2.397564221759413e-06, "loss": 0.2981, "step": 13658 }, { "epoch": 0.6775633711989683, "grad_norm": 5.5272321701049805, "learning_rate": 2.3968921815150407e-06, "loss": 0.355, "step": 13659 }, { "epoch": 0.6776129768341683, "grad_norm": 13.951957702636719, "learning_rate": 2.39622020577686e-06, "loss": 0.3551, "step": 13660 }, { "epoch": 0.6776625824693685, "grad_norm": 10.864401817321777, "learning_rate": 2.3955482945615166e-06, "loss": 0.2587, "step": 13661 }, { "epoch": 0.6777121881045687, "grad_norm": 8.381282806396484, "learning_rate": 2.3948764478856678e-06, "loss": 0.385, "step": 13662 }, { "epoch": 0.6777617937397689, "grad_norm": 5.450739860534668, "learning_rate": 2.3942046657659572e-06, "loss": 0.3057, "step": 13663 }, { "epoch": 0.677811399374969, "grad_norm": 18.221452713012695, "learning_rate": 2.3935329482190344e-06, "loss": 0.4935, "step": 13664 }, { "epoch": 0.6778610050101691, "grad_norm": 5.784406661987305, "learning_rate": 2.3928612952615437e-06, "loss": 0.3117, "step": 13665 }, { "epoch": 0.6779106106453693, "grad_norm": 6.320915699005127, "learning_rate": 2.3921897069101296e-06, "loss": 0.2566, "step": 13666 }, { "epoch": 0.6779602162805695, "grad_norm": 5.04920768737793, "learning_rate": 2.3915181831814343e-06, "loss": 0.1961, "step": 13667 }, { "epoch": 0.6780098219157696, "grad_norm": 5.629092693328857, "learning_rate": 2.3908467240920997e-06, "loss": 0.2668, "step": 13668 }, { "epoch": 0.6780594275509698, "grad_norm": 13.558135032653809, "learning_rate": 2.3901753296587623e-06, "loss": 0.2986, "step": 13669 }, { "epoch": 0.67810903318617, "grad_norm": 6.625513553619385, "learning_rate": 2.3895039998980603e-06, "loss": 0.2986, "step": 13670 }, { "epoch": 0.6781586388213701, "grad_norm": 5.6314191818237305, "learning_rate": 2.3888327348266303e-06, "loss": 0.2734, "step": 13671 }, { "epoch": 0.6782082444565702, "grad_norm": 3.688445806503296, "learning_rate": 2.3881615344611064e-06, "loss": 0.21, "step": 13672 }, { "epoch": 0.6782578500917704, "grad_norm": 4.040998458862305, "learning_rate": 2.387490398818121e-06, "loss": 0.2077, "step": 13673 }, { "epoch": 0.6783074557269706, "grad_norm": 3.8794126510620117, "learning_rate": 2.386819327914305e-06, "loss": 0.2102, "step": 13674 }, { "epoch": 0.6783570613621708, "grad_norm": 9.125990867614746, "learning_rate": 2.3861483217662884e-06, "loss": 0.2984, "step": 13675 }, { "epoch": 0.678406666997371, "grad_norm": 6.15159797668457, "learning_rate": 2.385477380390699e-06, "loss": 0.2342, "step": 13676 }, { "epoch": 0.678456272632571, "grad_norm": 8.054415702819824, "learning_rate": 2.384806503804164e-06, "loss": 0.3127, "step": 13677 }, { "epoch": 0.6785058782677712, "grad_norm": 6.639970302581787, "learning_rate": 2.3841356920233053e-06, "loss": 0.2926, "step": 13678 }, { "epoch": 0.6785554839029714, "grad_norm": 6.6328911781311035, "learning_rate": 2.383464945064748e-06, "loss": 0.3171, "step": 13679 }, { "epoch": 0.6786050895381716, "grad_norm": 10.338805198669434, "learning_rate": 2.382794262945112e-06, "loss": 0.3472, "step": 13680 }, { "epoch": 0.6786546951733717, "grad_norm": 7.5637078285217285, "learning_rate": 2.382123645681019e-06, "loss": 0.2855, "step": 13681 }, { "epoch": 0.6787043008085718, "grad_norm": 9.129117965698242, "learning_rate": 2.381453093289085e-06, "loss": 0.2432, "step": 13682 }, { "epoch": 0.678753906443772, "grad_norm": 16.449909210205078, "learning_rate": 2.3807826057859306e-06, "loss": 0.3456, "step": 13683 }, { "epoch": 0.6788035120789722, "grad_norm": 6.66381311416626, "learning_rate": 2.3801121831881643e-06, "loss": 0.278, "step": 13684 }, { "epoch": 0.6788531177141723, "grad_norm": 3.923609972000122, "learning_rate": 2.3794418255124067e-06, "loss": 0.1956, "step": 13685 }, { "epoch": 0.6789027233493725, "grad_norm": 9.034074783325195, "learning_rate": 2.3787715327752643e-06, "loss": 0.289, "step": 13686 }, { "epoch": 0.6789523289845727, "grad_norm": 6.69782018661499, "learning_rate": 2.378101304993349e-06, "loss": 0.2287, "step": 13687 }, { "epoch": 0.6790019346197728, "grad_norm": 7.807420253753662, "learning_rate": 2.3774311421832695e-06, "loss": 0.3291, "step": 13688 }, { "epoch": 0.6790515402549729, "grad_norm": 11.967226028442383, "learning_rate": 2.3767610443616322e-06, "loss": 0.2816, "step": 13689 }, { "epoch": 0.6791011458901731, "grad_norm": 9.640352249145508, "learning_rate": 2.376091011545044e-06, "loss": 0.3049, "step": 13690 }, { "epoch": 0.6791507515253733, "grad_norm": 6.754454135894775, "learning_rate": 2.3754210437501078e-06, "loss": 0.3063, "step": 13691 }, { "epoch": 0.6792003571605735, "grad_norm": 13.098584175109863, "learning_rate": 2.374751140993422e-06, "loss": 0.4219, "step": 13692 }, { "epoch": 0.6792499627957737, "grad_norm": 14.1436767578125, "learning_rate": 2.374081303291595e-06, "loss": 0.4432, "step": 13693 }, { "epoch": 0.6792995684309737, "grad_norm": 9.318117141723633, "learning_rate": 2.3734115306612186e-06, "loss": 0.3282, "step": 13694 }, { "epoch": 0.6793491740661739, "grad_norm": 5.037833213806152, "learning_rate": 2.3727418231188925e-06, "loss": 0.2326, "step": 13695 }, { "epoch": 0.6793987797013741, "grad_norm": 8.093951225280762, "learning_rate": 2.372072180681213e-06, "loss": 0.2069, "step": 13696 }, { "epoch": 0.6794483853365743, "grad_norm": 7.822299003601074, "learning_rate": 2.3714026033647753e-06, "loss": 0.2598, "step": 13697 }, { "epoch": 0.6794979909717744, "grad_norm": 5.340963840484619, "learning_rate": 2.370733091186166e-06, "loss": 0.2721, "step": 13698 }, { "epoch": 0.6795475966069745, "grad_norm": 3.1883628368377686, "learning_rate": 2.370063644161984e-06, "loss": 0.2395, "step": 13699 }, { "epoch": 0.6795972022421747, "grad_norm": 10.3469877243042, "learning_rate": 2.369394262308813e-06, "loss": 0.3063, "step": 13700 }, { "epoch": 0.6796468078773749, "grad_norm": 6.231525897979736, "learning_rate": 2.368724945643242e-06, "loss": 0.2938, "step": 13701 }, { "epoch": 0.679696413512575, "grad_norm": 7.3235979080200195, "learning_rate": 2.3680556941818565e-06, "loss": 0.2573, "step": 13702 }, { "epoch": 0.6797460191477752, "grad_norm": 8.412363052368164, "learning_rate": 2.367386507941244e-06, "loss": 0.3064, "step": 13703 }, { "epoch": 0.6797956247829754, "grad_norm": 4.644865989685059, "learning_rate": 2.3667173869379813e-06, "loss": 0.1913, "step": 13704 }, { "epoch": 0.6798452304181755, "grad_norm": 6.151530742645264, "learning_rate": 2.366048331188656e-06, "loss": 0.3099, "step": 13705 }, { "epoch": 0.6798948360533756, "grad_norm": 5.544250965118408, "learning_rate": 2.3653793407098406e-06, "loss": 0.2865, "step": 13706 }, { "epoch": 0.6799444416885758, "grad_norm": 7.732665061950684, "learning_rate": 2.364710415518121e-06, "loss": 0.2163, "step": 13707 }, { "epoch": 0.679994047323776, "grad_norm": 6.003636360168457, "learning_rate": 2.3640415556300674e-06, "loss": 0.3203, "step": 13708 }, { "epoch": 0.6800436529589762, "grad_norm": 7.956758975982666, "learning_rate": 2.3633727610622564e-06, "loss": 0.3307, "step": 13709 }, { "epoch": 0.6800932585941764, "grad_norm": 7.009299278259277, "learning_rate": 2.362704031831261e-06, "loss": 0.2552, "step": 13710 }, { "epoch": 0.6801428642293764, "grad_norm": 4.807535171508789, "learning_rate": 2.362035367953655e-06, "loss": 0.1752, "step": 13711 }, { "epoch": 0.6801924698645766, "grad_norm": 8.772351264953613, "learning_rate": 2.361366769446002e-06, "loss": 0.3081, "step": 13712 }, { "epoch": 0.6802420754997768, "grad_norm": 3.948385000228882, "learning_rate": 2.3606982363248774e-06, "loss": 0.2624, "step": 13713 }, { "epoch": 0.680291681134977, "grad_norm": 3.8498642444610596, "learning_rate": 2.3600297686068426e-06, "loss": 0.288, "step": 13714 }, { "epoch": 0.6803412867701771, "grad_norm": 6.839674472808838, "learning_rate": 2.359361366308465e-06, "loss": 0.2972, "step": 13715 }, { "epoch": 0.6803908924053772, "grad_norm": 8.50862979888916, "learning_rate": 2.3586930294463063e-06, "loss": 0.2424, "step": 13716 }, { "epoch": 0.6804404980405774, "grad_norm": 8.60677433013916, "learning_rate": 2.3580247580369314e-06, "loss": 0.2339, "step": 13717 }, { "epoch": 0.6804901036757776, "grad_norm": 17.19880485534668, "learning_rate": 2.357356552096894e-06, "loss": 0.4487, "step": 13718 }, { "epoch": 0.6805397093109777, "grad_norm": 10.947078704833984, "learning_rate": 2.3566884116427606e-06, "loss": 0.2633, "step": 13719 }, { "epoch": 0.6805893149461779, "grad_norm": 8.855692863464355, "learning_rate": 2.3560203366910824e-06, "loss": 0.3551, "step": 13720 }, { "epoch": 0.6806389205813781, "grad_norm": 11.960600852966309, "learning_rate": 2.355352327258416e-06, "loss": 0.337, "step": 13721 }, { "epoch": 0.6806885262165782, "grad_norm": 7.456754207611084, "learning_rate": 2.3546843833613153e-06, "loss": 0.1935, "step": 13722 }, { "epoch": 0.6807381318517783, "grad_norm": 10.644522666931152, "learning_rate": 2.3540165050163324e-06, "loss": 0.4524, "step": 13723 }, { "epoch": 0.6807877374869785, "grad_norm": 11.116576194763184, "learning_rate": 2.3533486922400174e-06, "loss": 0.2473, "step": 13724 }, { "epoch": 0.6808373431221787, "grad_norm": 4.8516316413879395, "learning_rate": 2.3526809450489184e-06, "loss": 0.2551, "step": 13725 }, { "epoch": 0.6808869487573789, "grad_norm": 8.887216567993164, "learning_rate": 2.352013263459584e-06, "loss": 0.2351, "step": 13726 }, { "epoch": 0.680936554392579, "grad_norm": 12.342802047729492, "learning_rate": 2.35134564748856e-06, "loss": 0.326, "step": 13727 }, { "epoch": 0.6809861600277791, "grad_norm": 6.7179646492004395, "learning_rate": 2.3506780971523874e-06, "loss": 0.3014, "step": 13728 }, { "epoch": 0.6810357656629793, "grad_norm": 6.763086795806885, "learning_rate": 2.35001061246761e-06, "loss": 0.3282, "step": 13729 }, { "epoch": 0.6810853712981795, "grad_norm": 5.9156904220581055, "learning_rate": 2.349343193450768e-06, "loss": 0.2406, "step": 13730 }, { "epoch": 0.6811349769333797, "grad_norm": 5.301656246185303, "learning_rate": 2.3486758401184005e-06, "loss": 0.239, "step": 13731 }, { "epoch": 0.6811845825685798, "grad_norm": 9.38620376586914, "learning_rate": 2.348008552487046e-06, "loss": 0.4269, "step": 13732 }, { "epoch": 0.6812341882037799, "grad_norm": 5.124003887176514, "learning_rate": 2.3473413305732386e-06, "loss": 0.2536, "step": 13733 }, { "epoch": 0.6812837938389801, "grad_norm": 4.853054523468018, "learning_rate": 2.3466741743935146e-06, "loss": 0.2644, "step": 13734 }, { "epoch": 0.6813333994741803, "grad_norm": 7.983030796051025, "learning_rate": 2.3460070839644013e-06, "loss": 0.3542, "step": 13735 }, { "epoch": 0.6813830051093804, "grad_norm": 17.372684478759766, "learning_rate": 2.345340059302437e-06, "loss": 0.4922, "step": 13736 }, { "epoch": 0.6814326107445806, "grad_norm": 5.674596786499023, "learning_rate": 2.3446731004241446e-06, "loss": 0.2778, "step": 13737 }, { "epoch": 0.6814822163797808, "grad_norm": 6.603664875030518, "learning_rate": 2.344006207346054e-06, "loss": 0.293, "step": 13738 }, { "epoch": 0.6815318220149809, "grad_norm": 6.82617712020874, "learning_rate": 2.343339380084691e-06, "loss": 0.2841, "step": 13739 }, { "epoch": 0.681581427650181, "grad_norm": 6.63594913482666, "learning_rate": 2.342672618656582e-06, "loss": 0.2368, "step": 13740 }, { "epoch": 0.6816310332853812, "grad_norm": 4.622006893157959, "learning_rate": 2.342005923078243e-06, "loss": 0.1826, "step": 13741 }, { "epoch": 0.6816806389205814, "grad_norm": 3.69757342338562, "learning_rate": 2.341339293366204e-06, "loss": 0.2077, "step": 13742 }, { "epoch": 0.6817302445557816, "grad_norm": 4.574390888214111, "learning_rate": 2.3406727295369765e-06, "loss": 0.2424, "step": 13743 }, { "epoch": 0.6817798501909818, "grad_norm": 12.603082656860352, "learning_rate": 2.340006231607085e-06, "loss": 0.457, "step": 13744 }, { "epoch": 0.6818294558261818, "grad_norm": 13.490621566772461, "learning_rate": 2.3393397995930406e-06, "loss": 0.3067, "step": 13745 }, { "epoch": 0.681879061461382, "grad_norm": 6.088117599487305, "learning_rate": 2.338673433511359e-06, "loss": 0.2037, "step": 13746 }, { "epoch": 0.6819286670965822, "grad_norm": 4.479728698730469, "learning_rate": 2.338007133378554e-06, "loss": 0.2555, "step": 13747 }, { "epoch": 0.6819782727317824, "grad_norm": 4.522667407989502, "learning_rate": 2.3373408992111385e-06, "loss": 0.217, "step": 13748 }, { "epoch": 0.6820278783669825, "grad_norm": 6.2121734619140625, "learning_rate": 2.336674731025616e-06, "loss": 0.2567, "step": 13749 }, { "epoch": 0.6820774840021826, "grad_norm": 10.489960670471191, "learning_rate": 2.336008628838502e-06, "loss": 0.255, "step": 13750 }, { "epoch": 0.6821270896373828, "grad_norm": 7.623871326446533, "learning_rate": 2.3353425926662975e-06, "loss": 0.3676, "step": 13751 }, { "epoch": 0.682176695272583, "grad_norm": 13.900595664978027, "learning_rate": 2.334676622525509e-06, "loss": 0.4283, "step": 13752 }, { "epoch": 0.6822263009077831, "grad_norm": 5.874702453613281, "learning_rate": 2.3340107184326396e-06, "loss": 0.2684, "step": 13753 }, { "epoch": 0.6822759065429833, "grad_norm": 7.091479301452637, "learning_rate": 2.3333448804041918e-06, "loss": 0.332, "step": 13754 }, { "epoch": 0.6823255121781835, "grad_norm": 5.430492401123047, "learning_rate": 2.3326791084566606e-06, "loss": 0.199, "step": 13755 }, { "epoch": 0.6823751178133836, "grad_norm": 6.256924152374268, "learning_rate": 2.332013402606552e-06, "loss": 0.1814, "step": 13756 }, { "epoch": 0.6824247234485837, "grad_norm": 17.445444107055664, "learning_rate": 2.331347762870356e-06, "loss": 0.3904, "step": 13757 }, { "epoch": 0.6824743290837839, "grad_norm": 11.132499694824219, "learning_rate": 2.330682189264569e-06, "loss": 0.4112, "step": 13758 }, { "epoch": 0.6825239347189841, "grad_norm": 12.0953369140625, "learning_rate": 2.330016681805685e-06, "loss": 0.3497, "step": 13759 }, { "epoch": 0.6825735403541843, "grad_norm": 10.088865280151367, "learning_rate": 2.3293512405101957e-06, "loss": 0.2944, "step": 13760 }, { "epoch": 0.6826231459893843, "grad_norm": 4.3392229080200195, "learning_rate": 2.328685865394591e-06, "loss": 0.3213, "step": 13761 }, { "epoch": 0.6826727516245845, "grad_norm": 11.272287368774414, "learning_rate": 2.3280205564753604e-06, "loss": 0.359, "step": 13762 }, { "epoch": 0.6827223572597847, "grad_norm": 4.21820068359375, "learning_rate": 2.327355313768985e-06, "loss": 0.2346, "step": 13763 }, { "epoch": 0.6827719628949849, "grad_norm": 7.010478496551514, "learning_rate": 2.3266901372919585e-06, "loss": 0.2668, "step": 13764 }, { "epoch": 0.682821568530185, "grad_norm": 13.072840690612793, "learning_rate": 2.3260250270607578e-06, "loss": 0.222, "step": 13765 }, { "epoch": 0.6828711741653852, "grad_norm": 9.408472061157227, "learning_rate": 2.3253599830918654e-06, "loss": 0.3603, "step": 13766 }, { "epoch": 0.6829207798005853, "grad_norm": 4.527966022491455, "learning_rate": 2.3246950054017638e-06, "loss": 0.2932, "step": 13767 }, { "epoch": 0.6829703854357855, "grad_norm": 12.161079406738281, "learning_rate": 2.324030094006931e-06, "loss": 0.331, "step": 13768 }, { "epoch": 0.6830199910709857, "grad_norm": 6.339941024780273, "learning_rate": 2.3233652489238396e-06, "loss": 0.1931, "step": 13769 }, { "epoch": 0.6830695967061858, "grad_norm": 8.338038444519043, "learning_rate": 2.3227004701689717e-06, "loss": 0.3672, "step": 13770 }, { "epoch": 0.683119202341386, "grad_norm": 11.975375175476074, "learning_rate": 2.3220357577587954e-06, "loss": 0.3647, "step": 13771 }, { "epoch": 0.6831688079765862, "grad_norm": 14.789593696594238, "learning_rate": 2.3213711117097842e-06, "loss": 0.3554, "step": 13772 }, { "epoch": 0.6832184136117863, "grad_norm": 9.684221267700195, "learning_rate": 2.3207065320384088e-06, "loss": 0.3202, "step": 13773 }, { "epoch": 0.6832680192469864, "grad_norm": 6.169947147369385, "learning_rate": 2.320042018761138e-06, "loss": 0.3134, "step": 13774 }, { "epoch": 0.6833176248821866, "grad_norm": 9.441716194152832, "learning_rate": 2.3193775718944374e-06, "loss": 0.2432, "step": 13775 }, { "epoch": 0.6833672305173868, "grad_norm": 7.585815906524658, "learning_rate": 2.318713191454775e-06, "loss": 0.2698, "step": 13776 }, { "epoch": 0.683416836152587, "grad_norm": 7.833215236663818, "learning_rate": 2.3180488774586114e-06, "loss": 0.3244, "step": 13777 }, { "epoch": 0.683466441787787, "grad_norm": 11.356821060180664, "learning_rate": 2.3173846299224096e-06, "loss": 0.3653, "step": 13778 }, { "epoch": 0.6835160474229872, "grad_norm": 5.295669078826904, "learning_rate": 2.31672044886263e-06, "loss": 0.3047, "step": 13779 }, { "epoch": 0.6835656530581874, "grad_norm": 11.841883659362793, "learning_rate": 2.3160563342957313e-06, "loss": 0.3294, "step": 13780 }, { "epoch": 0.6836152586933876, "grad_norm": 9.2852201461792, "learning_rate": 2.3153922862381713e-06, "loss": 0.2642, "step": 13781 }, { "epoch": 0.6836648643285878, "grad_norm": 8.655116081237793, "learning_rate": 2.314728304706404e-06, "loss": 0.3219, "step": 13782 }, { "epoch": 0.6837144699637879, "grad_norm": 4.590231895446777, "learning_rate": 2.3140643897168845e-06, "loss": 0.2292, "step": 13783 }, { "epoch": 0.683764075598988, "grad_norm": 4.308256149291992, "learning_rate": 2.3134005412860645e-06, "loss": 0.2673, "step": 13784 }, { "epoch": 0.6838136812341882, "grad_norm": 4.584234714508057, "learning_rate": 2.3127367594303956e-06, "loss": 0.2338, "step": 13785 }, { "epoch": 0.6838632868693884, "grad_norm": 4.624063968658447, "learning_rate": 2.312073044166324e-06, "loss": 0.3211, "step": 13786 }, { "epoch": 0.6839128925045885, "grad_norm": 5.667932033538818, "learning_rate": 2.3114093955102983e-06, "loss": 0.2796, "step": 13787 }, { "epoch": 0.6839624981397887, "grad_norm": 8.811762809753418, "learning_rate": 2.3107458134787636e-06, "loss": 0.3579, "step": 13788 }, { "epoch": 0.6840121037749889, "grad_norm": 11.395095825195312, "learning_rate": 2.3100822980881648e-06, "loss": 0.3295, "step": 13789 }, { "epoch": 0.684061709410189, "grad_norm": 5.9278693199157715, "learning_rate": 2.309418849354943e-06, "loss": 0.3168, "step": 13790 }, { "epoch": 0.6841113150453891, "grad_norm": 15.4409818649292, "learning_rate": 2.308755467295541e-06, "loss": 0.3603, "step": 13791 }, { "epoch": 0.6841609206805893, "grad_norm": 6.425667762756348, "learning_rate": 2.3080921519263923e-06, "loss": 0.2328, "step": 13792 }, { "epoch": 0.6842105263157895, "grad_norm": 6.100076198577881, "learning_rate": 2.3074289032639414e-06, "loss": 0.2771, "step": 13793 }, { "epoch": 0.6842601319509897, "grad_norm": 8.859823226928711, "learning_rate": 2.3067657213246188e-06, "loss": 0.282, "step": 13794 }, { "epoch": 0.6843097375861897, "grad_norm": 8.856951713562012, "learning_rate": 2.306102606124859e-06, "loss": 0.3169, "step": 13795 }, { "epoch": 0.6843593432213899, "grad_norm": 4.933624267578125, "learning_rate": 2.3054395576810956e-06, "loss": 0.183, "step": 13796 }, { "epoch": 0.6844089488565901, "grad_norm": 10.460233688354492, "learning_rate": 2.304776576009759e-06, "loss": 0.261, "step": 13797 }, { "epoch": 0.6844585544917903, "grad_norm": 13.949156761169434, "learning_rate": 2.3041136611272786e-06, "loss": 0.4644, "step": 13798 }, { "epoch": 0.6845081601269905, "grad_norm": 5.576131820678711, "learning_rate": 2.303450813050082e-06, "loss": 0.2435, "step": 13799 }, { "epoch": 0.6845577657621906, "grad_norm": 4.569448471069336, "learning_rate": 2.302788031794591e-06, "loss": 0.2307, "step": 13800 }, { "epoch": 0.6846073713973907, "grad_norm": 5.25942325592041, "learning_rate": 2.302125317377236e-06, "loss": 0.3382, "step": 13801 }, { "epoch": 0.6846569770325909, "grad_norm": 15.468570709228516, "learning_rate": 2.3014626698144344e-06, "loss": 0.3489, "step": 13802 }, { "epoch": 0.684706582667791, "grad_norm": 16.813655853271484, "learning_rate": 2.3008000891226083e-06, "loss": 0.3167, "step": 13803 }, { "epoch": 0.6847561883029912, "grad_norm": 4.814126968383789, "learning_rate": 2.300137575318177e-06, "loss": 0.2403, "step": 13804 }, { "epoch": 0.6848057939381914, "grad_norm": 6.880331516265869, "learning_rate": 2.29947512841756e-06, "loss": 0.2913, "step": 13805 }, { "epoch": 0.6848553995733916, "grad_norm": 4.985332012176514, "learning_rate": 2.298812748437167e-06, "loss": 0.2563, "step": 13806 }, { "epoch": 0.6849050052085917, "grad_norm": 7.331652641296387, "learning_rate": 2.29815043539342e-06, "loss": 0.3202, "step": 13807 }, { "epoch": 0.6849546108437918, "grad_norm": 4.846229076385498, "learning_rate": 2.2974881893027243e-06, "loss": 0.2675, "step": 13808 }, { "epoch": 0.685004216478992, "grad_norm": 4.793744087219238, "learning_rate": 2.296826010181495e-06, "loss": 0.2513, "step": 13809 }, { "epoch": 0.6850538221141922, "grad_norm": 5.315390110015869, "learning_rate": 2.29616389804614e-06, "loss": 0.2705, "step": 13810 }, { "epoch": 0.6851034277493924, "grad_norm": 6.806072235107422, "learning_rate": 2.295501852913068e-06, "loss": 0.3249, "step": 13811 }, { "epoch": 0.6851530333845924, "grad_norm": 13.945836067199707, "learning_rate": 2.29483987479868e-06, "loss": 0.2255, "step": 13812 }, { "epoch": 0.6852026390197926, "grad_norm": 5.504438877105713, "learning_rate": 2.2941779637193866e-06, "loss": 0.2191, "step": 13813 }, { "epoch": 0.6852522446549928, "grad_norm": 5.49910831451416, "learning_rate": 2.2935161196915845e-06, "loss": 0.2679, "step": 13814 }, { "epoch": 0.685301850290193, "grad_norm": 5.100554943084717, "learning_rate": 2.292854342731681e-06, "loss": 0.258, "step": 13815 }, { "epoch": 0.6853514559253931, "grad_norm": 3.625877857208252, "learning_rate": 2.292192632856069e-06, "loss": 0.1851, "step": 13816 }, { "epoch": 0.6854010615605933, "grad_norm": 3.8456287384033203, "learning_rate": 2.2915309900811483e-06, "loss": 0.2271, "step": 13817 }, { "epoch": 0.6854506671957934, "grad_norm": 8.594796180725098, "learning_rate": 2.290869414423315e-06, "loss": 0.3701, "step": 13818 }, { "epoch": 0.6855002728309936, "grad_norm": 5.411508083343506, "learning_rate": 2.2902079058989653e-06, "loss": 0.2904, "step": 13819 }, { "epoch": 0.6855498784661938, "grad_norm": 5.443096160888672, "learning_rate": 2.289546464524485e-06, "loss": 0.236, "step": 13820 }, { "epoch": 0.6855994841013939, "grad_norm": 8.937355995178223, "learning_rate": 2.288885090316274e-06, "loss": 0.3262, "step": 13821 }, { "epoch": 0.6856490897365941, "grad_norm": 7.183876991271973, "learning_rate": 2.2882237832907144e-06, "loss": 0.2774, "step": 13822 }, { "epoch": 0.6856986953717943, "grad_norm": 9.917190551757812, "learning_rate": 2.287562543464197e-06, "loss": 0.3378, "step": 13823 }, { "epoch": 0.6857483010069944, "grad_norm": 7.81758975982666, "learning_rate": 2.2869013708531055e-06, "loss": 0.2345, "step": 13824 }, { "epoch": 0.6857979066421945, "grad_norm": 10.566558837890625, "learning_rate": 2.286240265473828e-06, "loss": 0.328, "step": 13825 }, { "epoch": 0.6858475122773947, "grad_norm": 7.745067119598389, "learning_rate": 2.2855792273427404e-06, "loss": 0.2395, "step": 13826 }, { "epoch": 0.6858971179125949, "grad_norm": 5.850806713104248, "learning_rate": 2.284918256476231e-06, "loss": 0.2361, "step": 13827 }, { "epoch": 0.6859467235477951, "grad_norm": 12.544349670410156, "learning_rate": 2.284257352890673e-06, "loss": 0.3548, "step": 13828 }, { "epoch": 0.6859963291829951, "grad_norm": 6.76073694229126, "learning_rate": 2.283596516602447e-06, "loss": 0.2742, "step": 13829 }, { "epoch": 0.6860459348181953, "grad_norm": 6.565341472625732, "learning_rate": 2.282935747627928e-06, "loss": 0.3222, "step": 13830 }, { "epoch": 0.6860955404533955, "grad_norm": 8.825401306152344, "learning_rate": 2.2822750459834897e-06, "loss": 0.2734, "step": 13831 }, { "epoch": 0.6861451460885957, "grad_norm": 10.004132270812988, "learning_rate": 2.281614411685506e-06, "loss": 0.3579, "step": 13832 }, { "epoch": 0.6861947517237958, "grad_norm": 4.897872447967529, "learning_rate": 2.280953844750346e-06, "loss": 0.3062, "step": 13833 }, { "epoch": 0.686244357358996, "grad_norm": 8.258305549621582, "learning_rate": 2.2802933451943804e-06, "loss": 0.3261, "step": 13834 }, { "epoch": 0.6862939629941961, "grad_norm": 6.4391279220581055, "learning_rate": 2.2796329130339775e-06, "loss": 0.3031, "step": 13835 }, { "epoch": 0.6863435686293963, "grad_norm": 8.632230758666992, "learning_rate": 2.2789725482854997e-06, "loss": 0.2204, "step": 13836 }, { "epoch": 0.6863931742645965, "grad_norm": 6.560237884521484, "learning_rate": 2.2783122509653127e-06, "loss": 0.3243, "step": 13837 }, { "epoch": 0.6864427798997966, "grad_norm": 9.792741775512695, "learning_rate": 2.2776520210897797e-06, "loss": 0.2508, "step": 13838 }, { "epoch": 0.6864923855349968, "grad_norm": 11.084111213684082, "learning_rate": 2.276991858675261e-06, "loss": 0.3302, "step": 13839 }, { "epoch": 0.686541991170197, "grad_norm": 5.502115249633789, "learning_rate": 2.2763317637381154e-06, "loss": 0.2187, "step": 13840 }, { "epoch": 0.686591596805397, "grad_norm": 3.5522499084472656, "learning_rate": 2.275671736294701e-06, "loss": 0.2614, "step": 13841 }, { "epoch": 0.6866412024405972, "grad_norm": 11.562288284301758, "learning_rate": 2.2750117763613754e-06, "loss": 0.3777, "step": 13842 }, { "epoch": 0.6866908080757974, "grad_norm": 9.85171890258789, "learning_rate": 2.2743518839544886e-06, "loss": 0.2588, "step": 13843 }, { "epoch": 0.6867404137109976, "grad_norm": 7.670006275177002, "learning_rate": 2.273692059090395e-06, "loss": 0.3537, "step": 13844 }, { "epoch": 0.6867900193461978, "grad_norm": 10.925511360168457, "learning_rate": 2.2730323017854454e-06, "loss": 0.226, "step": 13845 }, { "epoch": 0.6868396249813978, "grad_norm": 15.169028282165527, "learning_rate": 2.272372612055989e-06, "loss": 0.3077, "step": 13846 }, { "epoch": 0.686889230616598, "grad_norm": 5.570912837982178, "learning_rate": 2.2717129899183732e-06, "loss": 0.1716, "step": 13847 }, { "epoch": 0.6869388362517982, "grad_norm": 9.13288688659668, "learning_rate": 2.2710534353889456e-06, "loss": 0.36, "step": 13848 }, { "epoch": 0.6869884418869984, "grad_norm": 10.985036849975586, "learning_rate": 2.270393948484044e-06, "loss": 0.3582, "step": 13849 }, { "epoch": 0.6870380475221985, "grad_norm": 4.96009635925293, "learning_rate": 2.2697345292200196e-06, "loss": 0.251, "step": 13850 }, { "epoch": 0.6870876531573987, "grad_norm": 7.325268745422363, "learning_rate": 2.269075177613205e-06, "loss": 0.2484, "step": 13851 }, { "epoch": 0.6871372587925988, "grad_norm": 7.060774326324463, "learning_rate": 2.2684158936799457e-06, "loss": 0.3446, "step": 13852 }, { "epoch": 0.687186864427799, "grad_norm": 8.028717041015625, "learning_rate": 2.2677566774365743e-06, "loss": 0.3281, "step": 13853 }, { "epoch": 0.6872364700629991, "grad_norm": 10.344433784484863, "learning_rate": 2.267097528899429e-06, "loss": 0.3352, "step": 13854 }, { "epoch": 0.6872860756981993, "grad_norm": 5.720642566680908, "learning_rate": 2.2664384480848428e-06, "loss": 0.3292, "step": 13855 }, { "epoch": 0.6873356813333995, "grad_norm": 10.633625984191895, "learning_rate": 2.2657794350091505e-06, "loss": 0.4359, "step": 13856 }, { "epoch": 0.6873852869685997, "grad_norm": 5.254182815551758, "learning_rate": 2.265120489688677e-06, "loss": 0.2953, "step": 13857 }, { "epoch": 0.6874348926037998, "grad_norm": 6.581032752990723, "learning_rate": 2.2644616121397576e-06, "loss": 0.2752, "step": 13858 }, { "epoch": 0.6874844982389999, "grad_norm": 7.890392780303955, "learning_rate": 2.263802802378716e-06, "loss": 0.2743, "step": 13859 }, { "epoch": 0.6875341038742001, "grad_norm": 8.090984344482422, "learning_rate": 2.2631440604218785e-06, "loss": 0.2065, "step": 13860 }, { "epoch": 0.6875837095094003, "grad_norm": 7.150228023529053, "learning_rate": 2.2624853862855696e-06, "loss": 0.3605, "step": 13861 }, { "epoch": 0.6876333151446005, "grad_norm": 7.452660083770752, "learning_rate": 2.2618267799861123e-06, "loss": 0.2063, "step": 13862 }, { "epoch": 0.6876829207798005, "grad_norm": 6.473127841949463, "learning_rate": 2.261168241539822e-06, "loss": 0.3141, "step": 13863 }, { "epoch": 0.6877325264150007, "grad_norm": 8.51252269744873, "learning_rate": 2.2605097709630264e-06, "loss": 0.3684, "step": 13864 }, { "epoch": 0.6877821320502009, "grad_norm": 11.44321346282959, "learning_rate": 2.259851368272035e-06, "loss": 0.4112, "step": 13865 }, { "epoch": 0.6878317376854011, "grad_norm": 6.630028247833252, "learning_rate": 2.259193033483167e-06, "loss": 0.336, "step": 13866 }, { "epoch": 0.6878813433206012, "grad_norm": 6.3532233238220215, "learning_rate": 2.258534766612735e-06, "loss": 0.2968, "step": 13867 }, { "epoch": 0.6879309489558014, "grad_norm": 7.376615524291992, "learning_rate": 2.2578765676770515e-06, "loss": 0.2576, "step": 13868 }, { "epoch": 0.6879805545910015, "grad_norm": 4.8247222900390625, "learning_rate": 2.257218436692427e-06, "loss": 0.2467, "step": 13869 }, { "epoch": 0.6880301602262017, "grad_norm": 7.837281703948975, "learning_rate": 2.2565603736751728e-06, "loss": 0.3077, "step": 13870 }, { "epoch": 0.6880797658614018, "grad_norm": 5.120624542236328, "learning_rate": 2.2559023786415896e-06, "loss": 0.2157, "step": 13871 }, { "epoch": 0.688129371496602, "grad_norm": 4.054832935333252, "learning_rate": 2.25524445160799e-06, "loss": 0.2602, "step": 13872 }, { "epoch": 0.6881789771318022, "grad_norm": 5.926267147064209, "learning_rate": 2.254586592590673e-06, "loss": 0.2909, "step": 13873 }, { "epoch": 0.6882285827670024, "grad_norm": 6.16223669052124, "learning_rate": 2.253928801605942e-06, "loss": 0.3752, "step": 13874 }, { "epoch": 0.6882781884022025, "grad_norm": 11.72061538696289, "learning_rate": 2.2532710786700975e-06, "loss": 0.2132, "step": 13875 }, { "epoch": 0.6883277940374026, "grad_norm": 6.8302202224731445, "learning_rate": 2.25261342379944e-06, "loss": 0.1968, "step": 13876 }, { "epoch": 0.6883773996726028, "grad_norm": 21.45834732055664, "learning_rate": 2.2519558370102605e-06, "loss": 0.3309, "step": 13877 }, { "epoch": 0.688427005307803, "grad_norm": 6.663452625274658, "learning_rate": 2.251298318318863e-06, "loss": 0.3019, "step": 13878 }, { "epoch": 0.6884766109430032, "grad_norm": 12.057297706604004, "learning_rate": 2.2506408677415344e-06, "loss": 0.3388, "step": 13879 }, { "epoch": 0.6885262165782032, "grad_norm": 6.671481609344482, "learning_rate": 2.2499834852945686e-06, "loss": 0.3516, "step": 13880 }, { "epoch": 0.6885758222134034, "grad_norm": 11.001245498657227, "learning_rate": 2.249326170994256e-06, "loss": 0.2698, "step": 13881 }, { "epoch": 0.6886254278486036, "grad_norm": 10.39587116241455, "learning_rate": 2.248668924856885e-06, "loss": 0.3395, "step": 13882 }, { "epoch": 0.6886750334838038, "grad_norm": 5.157627582550049, "learning_rate": 2.248011746898743e-06, "loss": 0.1934, "step": 13883 }, { "epoch": 0.688724639119004, "grad_norm": 7.820099353790283, "learning_rate": 2.247354637136116e-06, "loss": 0.2377, "step": 13884 }, { "epoch": 0.6887742447542041, "grad_norm": 4.285958766937256, "learning_rate": 2.246697595585283e-06, "loss": 0.3212, "step": 13885 }, { "epoch": 0.6888238503894042, "grad_norm": 11.488134384155273, "learning_rate": 2.246040622262533e-06, "loss": 0.4051, "step": 13886 }, { "epoch": 0.6888734560246044, "grad_norm": 5.939720630645752, "learning_rate": 2.2453837171841397e-06, "loss": 0.2553, "step": 13887 }, { "epoch": 0.6889230616598045, "grad_norm": 6.897754192352295, "learning_rate": 2.244726880366384e-06, "loss": 0.3259, "step": 13888 }, { "epoch": 0.6889726672950047, "grad_norm": 4.259835243225098, "learning_rate": 2.2440701118255432e-06, "loss": 0.2439, "step": 13889 }, { "epoch": 0.6890222729302049, "grad_norm": 7.596325397491455, "learning_rate": 2.2434134115778912e-06, "loss": 0.2149, "step": 13890 }, { "epoch": 0.6890718785654051, "grad_norm": 5.1905364990234375, "learning_rate": 2.2427567796397017e-06, "loss": 0.2251, "step": 13891 }, { "epoch": 0.6891214842006051, "grad_norm": 6.527749061584473, "learning_rate": 2.2421002160272487e-06, "loss": 0.3347, "step": 13892 }, { "epoch": 0.6891710898358053, "grad_norm": 5.568769454956055, "learning_rate": 2.2414437207567973e-06, "loss": 0.2694, "step": 13893 }, { "epoch": 0.6892206954710055, "grad_norm": 10.206457138061523, "learning_rate": 2.240787293844619e-06, "loss": 0.3966, "step": 13894 }, { "epoch": 0.6892703011062057, "grad_norm": 6.581573963165283, "learning_rate": 2.2401309353069796e-06, "loss": 0.2152, "step": 13895 }, { "epoch": 0.6893199067414059, "grad_norm": 7.032397747039795, "learning_rate": 2.239474645160144e-06, "loss": 0.2102, "step": 13896 }, { "epoch": 0.6893695123766059, "grad_norm": 5.5201311111450195, "learning_rate": 2.2388184234203753e-06, "loss": 0.2308, "step": 13897 }, { "epoch": 0.6894191180118061, "grad_norm": 9.63512897491455, "learning_rate": 2.2381622701039353e-06, "loss": 0.4055, "step": 13898 }, { "epoch": 0.6894687236470063, "grad_norm": 13.369848251342773, "learning_rate": 2.2375061852270845e-06, "loss": 0.3823, "step": 13899 }, { "epoch": 0.6895183292822065, "grad_norm": 6.959100723266602, "learning_rate": 2.2368501688060774e-06, "loss": 0.3001, "step": 13900 }, { "epoch": 0.6895679349174066, "grad_norm": 11.143128395080566, "learning_rate": 2.2361942208571762e-06, "loss": 0.3649, "step": 13901 }, { "epoch": 0.6896175405526068, "grad_norm": 11.74400806427002, "learning_rate": 2.235538341396631e-06, "loss": 0.3171, "step": 13902 }, { "epoch": 0.6896671461878069, "grad_norm": 9.157085418701172, "learning_rate": 2.2348825304406955e-06, "loss": 0.285, "step": 13903 }, { "epoch": 0.6897167518230071, "grad_norm": 5.462715148925781, "learning_rate": 2.2342267880056225e-06, "loss": 0.3286, "step": 13904 }, { "epoch": 0.6897663574582072, "grad_norm": 6.8020124435424805, "learning_rate": 2.2335711141076606e-06, "loss": 0.248, "step": 13905 }, { "epoch": 0.6898159630934074, "grad_norm": 8.462099075317383, "learning_rate": 2.232915508763058e-06, "loss": 0.2727, "step": 13906 }, { "epoch": 0.6898655687286076, "grad_norm": 9.076436042785645, "learning_rate": 2.232259971988062e-06, "loss": 0.2643, "step": 13907 }, { "epoch": 0.6899151743638078, "grad_norm": 8.074848175048828, "learning_rate": 2.2316045037989125e-06, "loss": 0.2481, "step": 13908 }, { "epoch": 0.6899647799990078, "grad_norm": 6.315195560455322, "learning_rate": 2.23094910421186e-06, "loss": 0.3666, "step": 13909 }, { "epoch": 0.690014385634208, "grad_norm": 4.443307876586914, "learning_rate": 2.2302937732431385e-06, "loss": 0.316, "step": 13910 }, { "epoch": 0.6900639912694082, "grad_norm": 6.650001525878906, "learning_rate": 2.2296385109089904e-06, "loss": 0.2672, "step": 13911 }, { "epoch": 0.6901135969046084, "grad_norm": 5.238685607910156, "learning_rate": 2.228983317225653e-06, "loss": 0.2997, "step": 13912 }, { "epoch": 0.6901632025398086, "grad_norm": 4.953912734985352, "learning_rate": 2.2283281922093646e-06, "loss": 0.3124, "step": 13913 }, { "epoch": 0.6902128081750086, "grad_norm": 6.053486347198486, "learning_rate": 2.2276731358763538e-06, "loss": 0.3783, "step": 13914 }, { "epoch": 0.6902624138102088, "grad_norm": 9.33131217956543, "learning_rate": 2.2270181482428606e-06, "loss": 0.3086, "step": 13915 }, { "epoch": 0.690312019445409, "grad_norm": 22.759916305541992, "learning_rate": 2.2263632293251098e-06, "loss": 0.4977, "step": 13916 }, { "epoch": 0.6903616250806092, "grad_norm": 13.01187515258789, "learning_rate": 2.225708379139333e-06, "loss": 0.5379, "step": 13917 }, { "epoch": 0.6904112307158093, "grad_norm": 5.662591457366943, "learning_rate": 2.2250535977017573e-06, "loss": 0.3303, "step": 13918 }, { "epoch": 0.6904608363510095, "grad_norm": 6.693135738372803, "learning_rate": 2.224398885028611e-06, "loss": 0.2849, "step": 13919 }, { "epoch": 0.6905104419862096, "grad_norm": 6.260310173034668, "learning_rate": 2.2237442411361117e-06, "loss": 0.2251, "step": 13920 }, { "epoch": 0.6905600476214098, "grad_norm": 9.519325256347656, "learning_rate": 2.22308966604049e-06, "loss": 0.2105, "step": 13921 }, { "epoch": 0.69060965325661, "grad_norm": 8.773618698120117, "learning_rate": 2.2224351597579584e-06, "loss": 0.3526, "step": 13922 }, { "epoch": 0.6906592588918101, "grad_norm": 7.7921929359436035, "learning_rate": 2.221780722304744e-06, "loss": 0.3804, "step": 13923 }, { "epoch": 0.6907088645270103, "grad_norm": 6.480631351470947, "learning_rate": 2.2211263536970577e-06, "loss": 0.3133, "step": 13924 }, { "epoch": 0.6907584701622105, "grad_norm": 5.953845500946045, "learning_rate": 2.2204720539511175e-06, "loss": 0.3316, "step": 13925 }, { "epoch": 0.6908080757974105, "grad_norm": 6.059716701507568, "learning_rate": 2.219817823083138e-06, "loss": 0.2963, "step": 13926 }, { "epoch": 0.6908576814326107, "grad_norm": 5.607614040374756, "learning_rate": 2.2191636611093313e-06, "loss": 0.2696, "step": 13927 }, { "epoch": 0.6909072870678109, "grad_norm": 11.216512680053711, "learning_rate": 2.218509568045904e-06, "loss": 0.2905, "step": 13928 }, { "epoch": 0.6909568927030111, "grad_norm": 6.327474594116211, "learning_rate": 2.217855543909071e-06, "loss": 0.3141, "step": 13929 }, { "epoch": 0.6910064983382113, "grad_norm": 5.387349605560303, "learning_rate": 2.2172015887150343e-06, "loss": 0.1797, "step": 13930 }, { "epoch": 0.6910561039734113, "grad_norm": 5.5155439376831055, "learning_rate": 2.216547702480001e-06, "loss": 0.2121, "step": 13931 }, { "epoch": 0.6911057096086115, "grad_norm": 6.807157516479492, "learning_rate": 2.215893885220175e-06, "loss": 0.2784, "step": 13932 }, { "epoch": 0.6911553152438117, "grad_norm": 8.588613510131836, "learning_rate": 2.2152401369517596e-06, "loss": 0.2815, "step": 13933 }, { "epoch": 0.6912049208790119, "grad_norm": 7.204949855804443, "learning_rate": 2.21458645769095e-06, "loss": 0.2504, "step": 13934 }, { "epoch": 0.691254526514212, "grad_norm": 5.357787132263184, "learning_rate": 2.213932847453951e-06, "loss": 0.2738, "step": 13935 }, { "epoch": 0.6913041321494122, "grad_norm": 4.694430351257324, "learning_rate": 2.213279306256955e-06, "loss": 0.2773, "step": 13936 }, { "epoch": 0.6913537377846123, "grad_norm": 5.987150192260742, "learning_rate": 2.2126258341161576e-06, "loss": 0.3412, "step": 13937 }, { "epoch": 0.6914033434198125, "grad_norm": 4.743765830993652, "learning_rate": 2.211972431047754e-06, "loss": 0.3099, "step": 13938 }, { "epoch": 0.6914529490550126, "grad_norm": 4.682201862335205, "learning_rate": 2.2113190970679338e-06, "loss": 0.1957, "step": 13939 }, { "epoch": 0.6915025546902128, "grad_norm": 6.090686798095703, "learning_rate": 2.210665832192888e-06, "loss": 0.3043, "step": 13940 }, { "epoch": 0.691552160325413, "grad_norm": 5.029073238372803, "learning_rate": 2.210012636438807e-06, "loss": 0.2364, "step": 13941 }, { "epoch": 0.6916017659606132, "grad_norm": 6.08620023727417, "learning_rate": 2.2093595098218705e-06, "loss": 0.0998, "step": 13942 }, { "epoch": 0.6916513715958132, "grad_norm": 10.350813865661621, "learning_rate": 2.2087064523582723e-06, "loss": 0.3257, "step": 13943 }, { "epoch": 0.6917009772310134, "grad_norm": 3.9559223651885986, "learning_rate": 2.208053464064188e-06, "loss": 0.2189, "step": 13944 }, { "epoch": 0.6917505828662136, "grad_norm": 7.854551792144775, "learning_rate": 2.2074005449558027e-06, "loss": 0.3112, "step": 13945 }, { "epoch": 0.6918001885014138, "grad_norm": 7.313300132751465, "learning_rate": 2.2067476950492942e-06, "loss": 0.2879, "step": 13946 }, { "epoch": 0.691849794136614, "grad_norm": 5.2621378898620605, "learning_rate": 2.2060949143608423e-06, "loss": 0.258, "step": 13947 }, { "epoch": 0.691899399771814, "grad_norm": 6.054382801055908, "learning_rate": 2.205442202906622e-06, "loss": 0.3215, "step": 13948 }, { "epoch": 0.6919490054070142, "grad_norm": 6.404781341552734, "learning_rate": 2.2047895607028077e-06, "loss": 0.3141, "step": 13949 }, { "epoch": 0.6919986110422144, "grad_norm": 6.837968349456787, "learning_rate": 2.2041369877655744e-06, "loss": 0.389, "step": 13950 }, { "epoch": 0.6920482166774146, "grad_norm": 6.697152614593506, "learning_rate": 2.203484484111089e-06, "loss": 0.269, "step": 13951 }, { "epoch": 0.6920978223126147, "grad_norm": 8.32406234741211, "learning_rate": 2.2028320497555237e-06, "loss": 0.3882, "step": 13952 }, { "epoch": 0.6921474279478149, "grad_norm": 9.817344665527344, "learning_rate": 2.2021796847150456e-06, "loss": 0.3892, "step": 13953 }, { "epoch": 0.692197033583015, "grad_norm": 9.684867858886719, "learning_rate": 2.20152738900582e-06, "loss": 0.3333, "step": 13954 }, { "epoch": 0.6922466392182152, "grad_norm": 5.571559906005859, "learning_rate": 2.2008751626440117e-06, "loss": 0.2673, "step": 13955 }, { "epoch": 0.6922962448534153, "grad_norm": 12.793766021728516, "learning_rate": 2.2002230056457835e-06, "loss": 0.5111, "step": 13956 }, { "epoch": 0.6923458504886155, "grad_norm": 5.735838890075684, "learning_rate": 2.199570918027295e-06, "loss": 0.327, "step": 13957 }, { "epoch": 0.6923954561238157, "grad_norm": 6.577070236206055, "learning_rate": 2.1989188998047083e-06, "loss": 0.3571, "step": 13958 }, { "epoch": 0.6924450617590159, "grad_norm": 13.713894844055176, "learning_rate": 2.1982669509941744e-06, "loss": 0.3782, "step": 13959 }, { "epoch": 0.692494667394216, "grad_norm": 8.652400016784668, "learning_rate": 2.1976150716118564e-06, "loss": 0.2363, "step": 13960 }, { "epoch": 0.6925442730294161, "grad_norm": 7.278364658355713, "learning_rate": 2.1969632616739036e-06, "loss": 0.3018, "step": 13961 }, { "epoch": 0.6925938786646163, "grad_norm": 8.35763168334961, "learning_rate": 2.1963115211964682e-06, "loss": 0.313, "step": 13962 }, { "epoch": 0.6926434842998165, "grad_norm": 4.939387321472168, "learning_rate": 2.1956598501957017e-06, "loss": 0.2644, "step": 13963 }, { "epoch": 0.6926930899350167, "grad_norm": 14.606752395629883, "learning_rate": 2.1950082486877548e-06, "loss": 0.3808, "step": 13964 }, { "epoch": 0.6927426955702167, "grad_norm": 7.298062801361084, "learning_rate": 2.1943567166887685e-06, "loss": 0.3627, "step": 13965 }, { "epoch": 0.6927923012054169, "grad_norm": 6.414735794067383, "learning_rate": 2.1937052542148957e-06, "loss": 0.2505, "step": 13966 }, { "epoch": 0.6928419068406171, "grad_norm": 8.232548713684082, "learning_rate": 2.1930538612822738e-06, "loss": 0.3126, "step": 13967 }, { "epoch": 0.6928915124758173, "grad_norm": 7.287210941314697, "learning_rate": 2.1924025379070475e-06, "loss": 0.2878, "step": 13968 }, { "epoch": 0.6929411181110174, "grad_norm": 6.520226955413818, "learning_rate": 2.1917512841053555e-06, "loss": 0.3088, "step": 13969 }, { "epoch": 0.6929907237462176, "grad_norm": 6.339746952056885, "learning_rate": 2.1911000998933395e-06, "loss": 0.2244, "step": 13970 }, { "epoch": 0.6930403293814177, "grad_norm": 7.177554607391357, "learning_rate": 2.19044898528713e-06, "loss": 0.3628, "step": 13971 }, { "epoch": 0.6930899350166179, "grad_norm": 6.371558666229248, "learning_rate": 2.1897979403028693e-06, "loss": 0.2892, "step": 13972 }, { "epoch": 0.693139540651818, "grad_norm": 5.648375034332275, "learning_rate": 2.1891469649566848e-06, "loss": 0.2334, "step": 13973 }, { "epoch": 0.6931891462870182, "grad_norm": 5.538567543029785, "learning_rate": 2.1884960592647097e-06, "loss": 0.3119, "step": 13974 }, { "epoch": 0.6932387519222184, "grad_norm": 6.526549339294434, "learning_rate": 2.1878452232430746e-06, "loss": 0.3769, "step": 13975 }, { "epoch": 0.6932883575574186, "grad_norm": 15.683329582214355, "learning_rate": 2.187194456907906e-06, "loss": 0.4951, "step": 13976 }, { "epoch": 0.6933379631926186, "grad_norm": 6.879661560058594, "learning_rate": 2.186543760275332e-06, "loss": 0.2299, "step": 13977 }, { "epoch": 0.6933875688278188, "grad_norm": 11.613236427307129, "learning_rate": 2.185893133361478e-06, "loss": 0.3352, "step": 13978 }, { "epoch": 0.693437174463019, "grad_norm": 4.597204685211182, "learning_rate": 2.185242576182461e-06, "loss": 0.3131, "step": 13979 }, { "epoch": 0.6934867800982192, "grad_norm": 7.666230201721191, "learning_rate": 2.184592088754411e-06, "loss": 0.349, "step": 13980 }, { "epoch": 0.6935363857334194, "grad_norm": 4.6777801513671875, "learning_rate": 2.1839416710934396e-06, "loss": 0.2877, "step": 13981 }, { "epoch": 0.6935859913686194, "grad_norm": 4.760339736938477, "learning_rate": 2.1832913232156676e-06, "loss": 0.2364, "step": 13982 }, { "epoch": 0.6936355970038196, "grad_norm": 20.6015682220459, "learning_rate": 2.182641045137211e-06, "loss": 0.2871, "step": 13983 }, { "epoch": 0.6936852026390198, "grad_norm": 4.140800952911377, "learning_rate": 2.181990836874186e-06, "loss": 0.187, "step": 13984 }, { "epoch": 0.69373480827422, "grad_norm": 8.764185905456543, "learning_rate": 2.181340698442698e-06, "loss": 0.3826, "step": 13985 }, { "epoch": 0.6937844139094201, "grad_norm": 7.401824474334717, "learning_rate": 2.180690629858867e-06, "loss": 0.3551, "step": 13986 }, { "epoch": 0.6938340195446203, "grad_norm": 6.759864807128906, "learning_rate": 2.1800406311387957e-06, "loss": 0.2362, "step": 13987 }, { "epoch": 0.6938836251798204, "grad_norm": 9.382755279541016, "learning_rate": 2.179390702298593e-06, "loss": 0.2817, "step": 13988 }, { "epoch": 0.6939332308150206, "grad_norm": 7.783249855041504, "learning_rate": 2.1787408433543643e-06, "loss": 0.3947, "step": 13989 }, { "epoch": 0.6939828364502207, "grad_norm": 4.050327777862549, "learning_rate": 2.1780910543222163e-06, "loss": 0.2692, "step": 13990 }, { "epoch": 0.6940324420854209, "grad_norm": 4.805577278137207, "learning_rate": 2.1774413352182438e-06, "loss": 0.2701, "step": 13991 }, { "epoch": 0.6940820477206211, "grad_norm": 4.31103515625, "learning_rate": 2.176791686058556e-06, "loss": 0.2865, "step": 13992 }, { "epoch": 0.6941316533558213, "grad_norm": 3.924023389816284, "learning_rate": 2.1761421068592436e-06, "loss": 0.2557, "step": 13993 }, { "epoch": 0.6941812589910213, "grad_norm": 13.977189064025879, "learning_rate": 2.175492597636411e-06, "loss": 0.3549, "step": 13994 }, { "epoch": 0.6942308646262215, "grad_norm": 5.240283966064453, "learning_rate": 2.1748431584061476e-06, "loss": 0.2396, "step": 13995 }, { "epoch": 0.6942804702614217, "grad_norm": 6.531968116760254, "learning_rate": 2.1741937891845483e-06, "loss": 0.3397, "step": 13996 }, { "epoch": 0.6943300758966219, "grad_norm": 6.568396091461182, "learning_rate": 2.1735444899877056e-06, "loss": 0.3129, "step": 13997 }, { "epoch": 0.6943796815318221, "grad_norm": 16.82305145263672, "learning_rate": 2.172895260831708e-06, "loss": 0.2902, "step": 13998 }, { "epoch": 0.6944292871670221, "grad_norm": 4.720180988311768, "learning_rate": 2.172246101732646e-06, "loss": 0.3013, "step": 13999 }, { "epoch": 0.6944788928022223, "grad_norm": 8.129932403564453, "learning_rate": 2.171597012706606e-06, "loss": 0.3698, "step": 14000 }, { "epoch": 0.6945284984374225, "grad_norm": 13.539698600769043, "learning_rate": 2.17094799376967e-06, "loss": 0.3162, "step": 14001 }, { "epoch": 0.6945781040726227, "grad_norm": 4.712665557861328, "learning_rate": 2.170299044937922e-06, "loss": 0.2964, "step": 14002 }, { "epoch": 0.6946277097078228, "grad_norm": 9.577984809875488, "learning_rate": 2.169650166227443e-06, "loss": 0.3879, "step": 14003 }, { "epoch": 0.694677315343023, "grad_norm": 4.76395845413208, "learning_rate": 2.1690013576543144e-06, "loss": 0.2842, "step": 14004 }, { "epoch": 0.6947269209782231, "grad_norm": 9.2805814743042, "learning_rate": 2.168352619234612e-06, "loss": 0.352, "step": 14005 }, { "epoch": 0.6947765266134233, "grad_norm": 3.59885311126709, "learning_rate": 2.1677039509844134e-06, "loss": 0.3012, "step": 14006 }, { "epoch": 0.6948261322486234, "grad_norm": 7.531680107116699, "learning_rate": 2.1670553529197937e-06, "loss": 0.3071, "step": 14007 }, { "epoch": 0.6948757378838236, "grad_norm": 8.604480743408203, "learning_rate": 2.166406825056819e-06, "loss": 0.3952, "step": 14008 }, { "epoch": 0.6949253435190238, "grad_norm": 5.189638614654541, "learning_rate": 2.1657583674115703e-06, "loss": 0.2743, "step": 14009 }, { "epoch": 0.6949749491542239, "grad_norm": 8.325263977050781, "learning_rate": 2.1651099800001084e-06, "loss": 0.357, "step": 14010 }, { "epoch": 0.695024554789424, "grad_norm": 10.294210433959961, "learning_rate": 2.164461662838504e-06, "loss": 0.3794, "step": 14011 }, { "epoch": 0.6950741604246242, "grad_norm": 4.048187255859375, "learning_rate": 2.163813415942822e-06, "loss": 0.166, "step": 14012 }, { "epoch": 0.6951237660598244, "grad_norm": 4.841038703918457, "learning_rate": 2.163165239329126e-06, "loss": 0.2428, "step": 14013 }, { "epoch": 0.6951733716950246, "grad_norm": 5.587022304534912, "learning_rate": 2.16251713301348e-06, "loss": 0.2016, "step": 14014 }, { "epoch": 0.6952229773302248, "grad_norm": 10.605207443237305, "learning_rate": 2.1618690970119436e-06, "loss": 0.3839, "step": 14015 }, { "epoch": 0.6952725829654248, "grad_norm": 11.326889038085938, "learning_rate": 2.161221131340572e-06, "loss": 0.3467, "step": 14016 }, { "epoch": 0.695322188600625, "grad_norm": 3.763152599334717, "learning_rate": 2.1605732360154275e-06, "loss": 0.1773, "step": 14017 }, { "epoch": 0.6953717942358252, "grad_norm": 8.27088451385498, "learning_rate": 2.159925411052561e-06, "loss": 0.426, "step": 14018 }, { "epoch": 0.6954213998710254, "grad_norm": 12.310257911682129, "learning_rate": 2.1592776564680285e-06, "loss": 0.374, "step": 14019 }, { "epoch": 0.6954710055062255, "grad_norm": 4.055834770202637, "learning_rate": 2.1586299722778796e-06, "loss": 0.2378, "step": 14020 }, { "epoch": 0.6955206111414257, "grad_norm": 13.452219009399414, "learning_rate": 2.157982358498168e-06, "loss": 0.3817, "step": 14021 }, { "epoch": 0.6955702167766258, "grad_norm": 7.691503524780273, "learning_rate": 2.1573348151449352e-06, "loss": 0.298, "step": 14022 }, { "epoch": 0.695619822411826, "grad_norm": 11.433642387390137, "learning_rate": 2.1566873422342347e-06, "loss": 0.3081, "step": 14023 }, { "epoch": 0.6956694280470261, "grad_norm": 5.151022911071777, "learning_rate": 2.156039939782107e-06, "loss": 0.2596, "step": 14024 }, { "epoch": 0.6957190336822263, "grad_norm": 4.731309413909912, "learning_rate": 2.1553926078045954e-06, "loss": 0.2124, "step": 14025 }, { "epoch": 0.6957686393174265, "grad_norm": 3.460158586502075, "learning_rate": 2.154745346317742e-06, "loss": 0.1945, "step": 14026 }, { "epoch": 0.6958182449526266, "grad_norm": 6.241306781768799, "learning_rate": 2.154098155337588e-06, "loss": 0.3213, "step": 14027 }, { "epoch": 0.6958678505878267, "grad_norm": 10.245283126831055, "learning_rate": 2.1534510348801653e-06, "loss": 0.2916, "step": 14028 }, { "epoch": 0.6959174562230269, "grad_norm": 7.851865291595459, "learning_rate": 2.1528039849615167e-06, "loss": 0.2121, "step": 14029 }, { "epoch": 0.6959670618582271, "grad_norm": 5.339324951171875, "learning_rate": 2.15215700559767e-06, "loss": 0.2956, "step": 14030 }, { "epoch": 0.6960166674934273, "grad_norm": 5.087301731109619, "learning_rate": 2.1515100968046646e-06, "loss": 0.2244, "step": 14031 }, { "epoch": 0.6960662731286275, "grad_norm": 9.188570976257324, "learning_rate": 2.1508632585985257e-06, "loss": 0.2869, "step": 14032 }, { "epoch": 0.6961158787638275, "grad_norm": 6.461302757263184, "learning_rate": 2.1502164909952845e-06, "loss": 0.2471, "step": 14033 }, { "epoch": 0.6961654843990277, "grad_norm": 10.954986572265625, "learning_rate": 2.149569794010967e-06, "loss": 0.3075, "step": 14034 }, { "epoch": 0.6962150900342279, "grad_norm": 12.716704368591309, "learning_rate": 2.148923167661601e-06, "loss": 0.2544, "step": 14035 }, { "epoch": 0.6962646956694281, "grad_norm": 8.676475524902344, "learning_rate": 2.148276611963206e-06, "loss": 0.3028, "step": 14036 }, { "epoch": 0.6963143013046282, "grad_norm": 6.594921112060547, "learning_rate": 2.1476301269318093e-06, "loss": 0.3309, "step": 14037 }, { "epoch": 0.6963639069398284, "grad_norm": 4.9983954429626465, "learning_rate": 2.146983712583427e-06, "loss": 0.312, "step": 14038 }, { "epoch": 0.6964135125750285, "grad_norm": 5.718925476074219, "learning_rate": 2.146337368934079e-06, "loss": 0.2625, "step": 14039 }, { "epoch": 0.6964631182102287, "grad_norm": 5.987364292144775, "learning_rate": 2.1456910959997814e-06, "loss": 0.3821, "step": 14040 }, { "epoch": 0.6965127238454288, "grad_norm": 5.361938953399658, "learning_rate": 2.1450448937965514e-06, "loss": 0.1909, "step": 14041 }, { "epoch": 0.696562329480629, "grad_norm": 11.182299613952637, "learning_rate": 2.1443987623403973e-06, "loss": 0.3216, "step": 14042 }, { "epoch": 0.6966119351158292, "grad_norm": 5.2108025550842285, "learning_rate": 2.1437527016473364e-06, "loss": 0.2507, "step": 14043 }, { "epoch": 0.6966615407510293, "grad_norm": 4.567343711853027, "learning_rate": 2.1431067117333744e-06, "loss": 0.2823, "step": 14044 }, { "epoch": 0.6967111463862294, "grad_norm": 6.7562055587768555, "learning_rate": 2.14246079261452e-06, "loss": 0.2962, "step": 14045 }, { "epoch": 0.6967607520214296, "grad_norm": 6.645327568054199, "learning_rate": 2.1418149443067796e-06, "loss": 0.3324, "step": 14046 }, { "epoch": 0.6968103576566298, "grad_norm": 7.703057289123535, "learning_rate": 2.1411691668261588e-06, "loss": 0.3042, "step": 14047 }, { "epoch": 0.69685996329183, "grad_norm": 8.49443244934082, "learning_rate": 2.140523460188658e-06, "loss": 0.2681, "step": 14048 }, { "epoch": 0.6969095689270302, "grad_norm": 9.103952407836914, "learning_rate": 2.139877824410282e-06, "loss": 0.3034, "step": 14049 }, { "epoch": 0.6969591745622302, "grad_norm": 6.240116596221924, "learning_rate": 2.139232259507023e-06, "loss": 0.2156, "step": 14050 }, { "epoch": 0.6970087801974304, "grad_norm": 3.8595285415649414, "learning_rate": 2.1385867654948874e-06, "loss": 0.2408, "step": 14051 }, { "epoch": 0.6970583858326306, "grad_norm": 6.716099262237549, "learning_rate": 2.137941342389863e-06, "loss": 0.2612, "step": 14052 }, { "epoch": 0.6971079914678308, "grad_norm": 11.31003189086914, "learning_rate": 2.1372959902079478e-06, "loss": 0.4152, "step": 14053 }, { "epoch": 0.6971575971030309, "grad_norm": 7.639391899108887, "learning_rate": 2.1366507089651324e-06, "loss": 0.1816, "step": 14054 }, { "epoch": 0.6972072027382311, "grad_norm": 6.3756489753723145, "learning_rate": 2.1360054986774076e-06, "loss": 0.2086, "step": 14055 }, { "epoch": 0.6972568083734312, "grad_norm": 4.829122543334961, "learning_rate": 2.1353603593607617e-06, "loss": 0.2817, "step": 14056 }, { "epoch": 0.6973064140086314, "grad_norm": 4.520565032958984, "learning_rate": 2.1347152910311824e-06, "loss": 0.2735, "step": 14057 }, { "epoch": 0.6973560196438315, "grad_norm": 4.368551254272461, "learning_rate": 2.1340702937046566e-06, "loss": 0.2687, "step": 14058 }, { "epoch": 0.6974056252790317, "grad_norm": 8.248546600341797, "learning_rate": 2.1334253673971627e-06, "loss": 0.3335, "step": 14059 }, { "epoch": 0.6974552309142319, "grad_norm": 6.341497898101807, "learning_rate": 2.132780512124685e-06, "loss": 0.2278, "step": 14060 }, { "epoch": 0.697504836549432, "grad_norm": 5.221780776977539, "learning_rate": 2.132135727903204e-06, "loss": 0.2519, "step": 14061 }, { "epoch": 0.6975544421846321, "grad_norm": 11.001283645629883, "learning_rate": 2.1314910147486963e-06, "loss": 0.3708, "step": 14062 }, { "epoch": 0.6976040478198323, "grad_norm": 13.206406593322754, "learning_rate": 2.130846372677139e-06, "loss": 0.4208, "step": 14063 }, { "epoch": 0.6976536534550325, "grad_norm": 9.455016136169434, "learning_rate": 2.130201801704506e-06, "loss": 0.3532, "step": 14064 }, { "epoch": 0.6977032590902327, "grad_norm": 14.615620613098145, "learning_rate": 2.1295573018467707e-06, "loss": 0.2625, "step": 14065 }, { "epoch": 0.6977528647254329, "grad_norm": 8.737666130065918, "learning_rate": 2.128912873119906e-06, "loss": 0.3077, "step": 14066 }, { "epoch": 0.6978024703606329, "grad_norm": 5.213905334472656, "learning_rate": 2.1282685155398753e-06, "loss": 0.2514, "step": 14067 }, { "epoch": 0.6978520759958331, "grad_norm": 6.561092853546143, "learning_rate": 2.1276242291226536e-06, "loss": 0.282, "step": 14068 }, { "epoch": 0.6979016816310333, "grad_norm": 10.483190536499023, "learning_rate": 2.1269800138842007e-06, "loss": 0.2928, "step": 14069 }, { "epoch": 0.6979512872662335, "grad_norm": 11.375919342041016, "learning_rate": 2.1263358698404823e-06, "loss": 0.3079, "step": 14070 }, { "epoch": 0.6980008929014336, "grad_norm": 4.997050762176514, "learning_rate": 2.1256917970074615e-06, "loss": 0.2506, "step": 14071 }, { "epoch": 0.6980504985366338, "grad_norm": 11.526921272277832, "learning_rate": 2.1250477954011e-06, "loss": 0.421, "step": 14072 }, { "epoch": 0.6981001041718339, "grad_norm": 5.240835666656494, "learning_rate": 2.1244038650373506e-06, "loss": 0.1607, "step": 14073 }, { "epoch": 0.6981497098070341, "grad_norm": 9.615141868591309, "learning_rate": 2.1237600059321784e-06, "loss": 0.2185, "step": 14074 }, { "epoch": 0.6981993154422342, "grad_norm": 4.944425106048584, "learning_rate": 2.123116218101532e-06, "loss": 0.2236, "step": 14075 }, { "epoch": 0.6982489210774344, "grad_norm": 8.93644905090332, "learning_rate": 2.1224725015613674e-06, "loss": 0.3388, "step": 14076 }, { "epoch": 0.6982985267126346, "grad_norm": 6.8306498527526855, "learning_rate": 2.1218288563276355e-06, "loss": 0.1934, "step": 14077 }, { "epoch": 0.6983481323478347, "grad_norm": 6.450562953948975, "learning_rate": 2.1211852824162886e-06, "loss": 0.3692, "step": 14078 }, { "epoch": 0.6983977379830348, "grad_norm": 4.869367599487305, "learning_rate": 2.1205417798432686e-06, "loss": 0.2876, "step": 14079 }, { "epoch": 0.698447343618235, "grad_norm": 10.595829010009766, "learning_rate": 2.11989834862453e-06, "loss": 0.3124, "step": 14080 }, { "epoch": 0.6984969492534352, "grad_norm": 6.417768955230713, "learning_rate": 2.1192549887760096e-06, "loss": 0.3284, "step": 14081 }, { "epoch": 0.6985465548886354, "grad_norm": 7.368531227111816, "learning_rate": 2.1186117003136567e-06, "loss": 0.3369, "step": 14082 }, { "epoch": 0.6985961605238356, "grad_norm": 7.498658657073975, "learning_rate": 2.1179684832534077e-06, "loss": 0.3478, "step": 14083 }, { "epoch": 0.6986457661590356, "grad_norm": 9.413556098937988, "learning_rate": 2.1173253376112037e-06, "loss": 0.2558, "step": 14084 }, { "epoch": 0.6986953717942358, "grad_norm": 4.756252765655518, "learning_rate": 2.116682263402981e-06, "loss": 0.32, "step": 14085 }, { "epoch": 0.698744977429436, "grad_norm": 6.407703876495361, "learning_rate": 2.1160392606446783e-06, "loss": 0.3142, "step": 14086 }, { "epoch": 0.6987945830646362, "grad_norm": 11.665287971496582, "learning_rate": 2.115396329352224e-06, "loss": 0.3846, "step": 14087 }, { "epoch": 0.6988441886998363, "grad_norm": 7.573568820953369, "learning_rate": 2.1147534695415566e-06, "loss": 0.2322, "step": 14088 }, { "epoch": 0.6988937943350365, "grad_norm": 9.189308166503906, "learning_rate": 2.1141106812286022e-06, "loss": 0.3016, "step": 14089 }, { "epoch": 0.6989433999702366, "grad_norm": 6.671210289001465, "learning_rate": 2.11346796442929e-06, "loss": 0.2613, "step": 14090 }, { "epoch": 0.6989930056054368, "grad_norm": 6.880485534667969, "learning_rate": 2.1128253191595475e-06, "loss": 0.3083, "step": 14091 }, { "epoch": 0.6990426112406369, "grad_norm": 14.67988109588623, "learning_rate": 2.1121827454353015e-06, "loss": 0.2364, "step": 14092 }, { "epoch": 0.6990922168758371, "grad_norm": 8.098258018493652, "learning_rate": 2.1115402432724697e-06, "loss": 0.2687, "step": 14093 }, { "epoch": 0.6991418225110373, "grad_norm": 6.0317487716674805, "learning_rate": 2.110897812686981e-06, "loss": 0.3041, "step": 14094 }, { "epoch": 0.6991914281462374, "grad_norm": 5.595034599304199, "learning_rate": 2.1102554536947496e-06, "loss": 0.3102, "step": 14095 }, { "epoch": 0.6992410337814375, "grad_norm": 5.7937541007995605, "learning_rate": 2.1096131663116948e-06, "loss": 0.2538, "step": 14096 }, { "epoch": 0.6992906394166377, "grad_norm": 5.871214389801025, "learning_rate": 2.1089709505537337e-06, "loss": 0.2779, "step": 14097 }, { "epoch": 0.6993402450518379, "grad_norm": 7.451305866241455, "learning_rate": 2.1083288064367817e-06, "loss": 0.2119, "step": 14098 }, { "epoch": 0.6993898506870381, "grad_norm": 6.126989841461182, "learning_rate": 2.1076867339767465e-06, "loss": 0.2467, "step": 14099 }, { "epoch": 0.6994394563222383, "grad_norm": 4.825498580932617, "learning_rate": 2.1070447331895455e-06, "loss": 0.1881, "step": 14100 }, { "epoch": 0.6994890619574383, "grad_norm": 7.879096031188965, "learning_rate": 2.1064028040910817e-06, "loss": 0.2549, "step": 14101 }, { "epoch": 0.6995386675926385, "grad_norm": 7.752624988555908, "learning_rate": 2.1057609466972684e-06, "loss": 0.348, "step": 14102 }, { "epoch": 0.6995882732278387, "grad_norm": 5.4128289222717285, "learning_rate": 2.105119161024007e-06, "loss": 0.2567, "step": 14103 }, { "epoch": 0.6996378788630389, "grad_norm": 5.191481590270996, "learning_rate": 2.1044774470872016e-06, "loss": 0.2205, "step": 14104 }, { "epoch": 0.699687484498239, "grad_norm": 5.330770015716553, "learning_rate": 2.1038358049027545e-06, "loss": 0.3166, "step": 14105 }, { "epoch": 0.6997370901334392, "grad_norm": 5.0950446128845215, "learning_rate": 2.1031942344865667e-06, "loss": 0.2514, "step": 14106 }, { "epoch": 0.6997866957686393, "grad_norm": 5.270681381225586, "learning_rate": 2.1025527358545366e-06, "loss": 0.2308, "step": 14107 }, { "epoch": 0.6998363014038395, "grad_norm": 8.158958435058594, "learning_rate": 2.101911309022562e-06, "loss": 0.3168, "step": 14108 }, { "epoch": 0.6998859070390396, "grad_norm": 6.291012763977051, "learning_rate": 2.101269954006534e-06, "loss": 0.3413, "step": 14109 }, { "epoch": 0.6999355126742398, "grad_norm": 12.543680191040039, "learning_rate": 2.1006286708223477e-06, "loss": 0.3297, "step": 14110 }, { "epoch": 0.69998511830944, "grad_norm": 4.170675754547119, "learning_rate": 2.0999874594858942e-06, "loss": 0.2757, "step": 14111 }, { "epoch": 0.7000347239446401, "grad_norm": 5.530570983886719, "learning_rate": 2.099346320013064e-06, "loss": 0.1629, "step": 14112 }, { "epoch": 0.7000347239446401, "eval_loss": 0.2879594564437866, "eval_runtime": 35.5783, "eval_samples_per_second": 45.786, "eval_steps_per_second": 5.734, "step": 14112 }, { "epoch": 0.7000843295798402, "grad_norm": 8.727596282958984, "learning_rate": 2.0987052524197437e-06, "loss": 0.3276, "step": 14113 }, { "epoch": 0.7001339352150404, "grad_norm": 8.739775657653809, "learning_rate": 2.09806425672182e-06, "loss": 0.2722, "step": 14114 }, { "epoch": 0.7001835408502406, "grad_norm": 6.863175868988037, "learning_rate": 2.0974233329351777e-06, "loss": 0.3702, "step": 14115 }, { "epoch": 0.7002331464854408, "grad_norm": 5.339736461639404, "learning_rate": 2.0967824810756954e-06, "loss": 0.2604, "step": 14116 }, { "epoch": 0.700282752120641, "grad_norm": 5.245752334594727, "learning_rate": 2.09614170115926e-06, "loss": 0.2773, "step": 14117 }, { "epoch": 0.700332357755841, "grad_norm": 9.261123657226562, "learning_rate": 2.0955009932017452e-06, "loss": 0.3198, "step": 14118 }, { "epoch": 0.7003819633910412, "grad_norm": 7.198761463165283, "learning_rate": 2.094860357219029e-06, "loss": 0.3497, "step": 14119 }, { "epoch": 0.7004315690262414, "grad_norm": 4.40090274810791, "learning_rate": 2.0942197932269874e-06, "loss": 0.279, "step": 14120 }, { "epoch": 0.7004811746614416, "grad_norm": 6.1300530433654785, "learning_rate": 2.093579301241494e-06, "loss": 0.4009, "step": 14121 }, { "epoch": 0.7005307802966417, "grad_norm": 7.8033013343811035, "learning_rate": 2.09293888127842e-06, "loss": 0.2672, "step": 14122 }, { "epoch": 0.7005803859318419, "grad_norm": 5.255270481109619, "learning_rate": 2.092298533353637e-06, "loss": 0.3629, "step": 14123 }, { "epoch": 0.700629991567042, "grad_norm": 6.46696138381958, "learning_rate": 2.091658257483009e-06, "loss": 0.2782, "step": 14124 }, { "epoch": 0.7006795972022422, "grad_norm": 23.37333869934082, "learning_rate": 2.091018053682408e-06, "loss": 0.5695, "step": 14125 }, { "epoch": 0.7007292028374423, "grad_norm": 5.887888431549072, "learning_rate": 2.090377921967694e-06, "loss": 0.3974, "step": 14126 }, { "epoch": 0.7007788084726425, "grad_norm": 4.4065775871276855, "learning_rate": 2.089737862354731e-06, "loss": 0.2192, "step": 14127 }, { "epoch": 0.7008284141078427, "grad_norm": 4.722010612487793, "learning_rate": 2.08909787485938e-06, "loss": 0.2955, "step": 14128 }, { "epoch": 0.7008780197430428, "grad_norm": 8.98256778717041, "learning_rate": 2.0884579594975023e-06, "loss": 0.2004, "step": 14129 }, { "epoch": 0.7009276253782429, "grad_norm": 6.5849080085754395, "learning_rate": 2.08781811628495e-06, "loss": 0.2499, "step": 14130 }, { "epoch": 0.7009772310134431, "grad_norm": 8.88677978515625, "learning_rate": 2.087178345237586e-06, "loss": 0.3447, "step": 14131 }, { "epoch": 0.7010268366486433, "grad_norm": 11.794090270996094, "learning_rate": 2.086538646371258e-06, "loss": 0.2467, "step": 14132 }, { "epoch": 0.7010764422838435, "grad_norm": 11.926280975341797, "learning_rate": 2.0858990197018203e-06, "loss": 0.2833, "step": 14133 }, { "epoch": 0.7011260479190436, "grad_norm": 8.967726707458496, "learning_rate": 2.085259465245123e-06, "loss": 0.2923, "step": 14134 }, { "epoch": 0.7011756535542437, "grad_norm": 4.3995232582092285, "learning_rate": 2.0846199830170144e-06, "loss": 0.2887, "step": 14135 }, { "epoch": 0.7012252591894439, "grad_norm": 16.082956314086914, "learning_rate": 2.0839805730333418e-06, "loss": 0.2934, "step": 14136 }, { "epoch": 0.7012748648246441, "grad_norm": 3.9491846561431885, "learning_rate": 2.083341235309951e-06, "loss": 0.2382, "step": 14137 }, { "epoch": 0.7013244704598443, "grad_norm": 10.101219177246094, "learning_rate": 2.082701969862681e-06, "loss": 0.4761, "step": 14138 }, { "epoch": 0.7013740760950444, "grad_norm": 7.462838649749756, "learning_rate": 2.0820627767073792e-06, "loss": 0.3281, "step": 14139 }, { "epoch": 0.7014236817302446, "grad_norm": 5.632392883300781, "learning_rate": 2.0814236558598795e-06, "loss": 0.2087, "step": 14140 }, { "epoch": 0.7014732873654447, "grad_norm": 4.8488850593566895, "learning_rate": 2.080784607336022e-06, "loss": 0.2606, "step": 14141 }, { "epoch": 0.7015228930006449, "grad_norm": 8.279191017150879, "learning_rate": 2.080145631151642e-06, "loss": 0.2754, "step": 14142 }, { "epoch": 0.701572498635845, "grad_norm": 12.779390335083008, "learning_rate": 2.0795067273225765e-06, "loss": 0.3049, "step": 14143 }, { "epoch": 0.7016221042710452, "grad_norm": 11.60751724243164, "learning_rate": 2.078867895864652e-06, "loss": 0.3939, "step": 14144 }, { "epoch": 0.7016717099062454, "grad_norm": 14.415948867797852, "learning_rate": 2.078229136793705e-06, "loss": 0.3671, "step": 14145 }, { "epoch": 0.7017213155414455, "grad_norm": 10.973623275756836, "learning_rate": 2.0775904501255605e-06, "loss": 0.3595, "step": 14146 }, { "epoch": 0.7017709211766456, "grad_norm": 6.3771586418151855, "learning_rate": 2.0769518358760464e-06, "loss": 0.2176, "step": 14147 }, { "epoch": 0.7018205268118458, "grad_norm": 7.014854431152344, "learning_rate": 2.0763132940609886e-06, "loss": 0.197, "step": 14148 }, { "epoch": 0.701870132447046, "grad_norm": 8.267972946166992, "learning_rate": 2.075674824696211e-06, "loss": 0.3215, "step": 14149 }, { "epoch": 0.7019197380822462, "grad_norm": 6.321065902709961, "learning_rate": 2.0750364277975312e-06, "loss": 0.3099, "step": 14150 }, { "epoch": 0.7019693437174463, "grad_norm": 9.532145500183105, "learning_rate": 2.0743981033807748e-06, "loss": 0.247, "step": 14151 }, { "epoch": 0.7020189493526464, "grad_norm": 7.21836519241333, "learning_rate": 2.073759851461755e-06, "loss": 0.3331, "step": 14152 }, { "epoch": 0.7020685549878466, "grad_norm": 6.753483295440674, "learning_rate": 2.0731216720562895e-06, "loss": 0.3614, "step": 14153 }, { "epoch": 0.7021181606230468, "grad_norm": 7.5026445388793945, "learning_rate": 2.072483565180194e-06, "loss": 0.2102, "step": 14154 }, { "epoch": 0.702167766258247, "grad_norm": 7.972965240478516, "learning_rate": 2.0718455308492793e-06, "loss": 0.3657, "step": 14155 }, { "epoch": 0.7022173718934471, "grad_norm": 23.496829986572266, "learning_rate": 2.0712075690793564e-06, "loss": 0.457, "step": 14156 }, { "epoch": 0.7022669775286473, "grad_norm": 7.549759387969971, "learning_rate": 2.0705696798862374e-06, "loss": 0.2544, "step": 14157 }, { "epoch": 0.7023165831638474, "grad_norm": 10.382171630859375, "learning_rate": 2.0699318632857228e-06, "loss": 0.2645, "step": 14158 }, { "epoch": 0.7023661887990476, "grad_norm": 9.194740295410156, "learning_rate": 2.0692941192936263e-06, "loss": 0.2704, "step": 14159 }, { "epoch": 0.7024157944342477, "grad_norm": 7.887315273284912, "learning_rate": 2.0686564479257446e-06, "loss": 0.376, "step": 14160 }, { "epoch": 0.7024654000694479, "grad_norm": 4.5358686447143555, "learning_rate": 2.0680188491978826e-06, "loss": 0.2587, "step": 14161 }, { "epoch": 0.7025150057046481, "grad_norm": 12.66901969909668, "learning_rate": 2.0673813231258395e-06, "loss": 0.4157, "step": 14162 }, { "epoch": 0.7025646113398482, "grad_norm": 5.048694610595703, "learning_rate": 2.0667438697254137e-06, "loss": 0.2259, "step": 14163 }, { "epoch": 0.7026142169750483, "grad_norm": 8.741445541381836, "learning_rate": 2.066106489012402e-06, "loss": 0.3385, "step": 14164 }, { "epoch": 0.7026638226102485, "grad_norm": 3.774871349334717, "learning_rate": 2.0654691810025986e-06, "loss": 0.1731, "step": 14165 }, { "epoch": 0.7027134282454487, "grad_norm": 3.4057068824768066, "learning_rate": 2.064831945711798e-06, "loss": 0.2192, "step": 14166 }, { "epoch": 0.7027630338806489, "grad_norm": 4.364414215087891, "learning_rate": 2.064194783155788e-06, "loss": 0.2436, "step": 14167 }, { "epoch": 0.702812639515849, "grad_norm": 12.782976150512695, "learning_rate": 2.063557693350359e-06, "loss": 0.2638, "step": 14168 }, { "epoch": 0.7028622451510491, "grad_norm": 8.461113929748535, "learning_rate": 2.0629206763112984e-06, "loss": 0.2905, "step": 14169 }, { "epoch": 0.7029118507862493, "grad_norm": 6.884156703948975, "learning_rate": 2.0622837320543926e-06, "loss": 0.2589, "step": 14170 }, { "epoch": 0.7029614564214495, "grad_norm": 9.25714111328125, "learning_rate": 2.061646860595425e-06, "loss": 0.327, "step": 14171 }, { "epoch": 0.7030110620566496, "grad_norm": 7.035882472991943, "learning_rate": 2.061010061950177e-06, "loss": 0.2733, "step": 14172 }, { "epoch": 0.7030606676918498, "grad_norm": 12.372085571289062, "learning_rate": 2.0603733361344284e-06, "loss": 0.3885, "step": 14173 }, { "epoch": 0.70311027332705, "grad_norm": 6.163563251495361, "learning_rate": 2.0597366831639607e-06, "loss": 0.2688, "step": 14174 }, { "epoch": 0.7031598789622501, "grad_norm": 11.600146293640137, "learning_rate": 2.059100103054546e-06, "loss": 0.3161, "step": 14175 }, { "epoch": 0.7032094845974503, "grad_norm": 5.444424152374268, "learning_rate": 2.0584635958219617e-06, "loss": 0.2885, "step": 14176 }, { "epoch": 0.7032590902326504, "grad_norm": 4.8852691650390625, "learning_rate": 2.0578271614819793e-06, "loss": 0.2852, "step": 14177 }, { "epoch": 0.7033086958678506, "grad_norm": 7.517751216888428, "learning_rate": 2.0571908000503714e-06, "loss": 0.3107, "step": 14178 }, { "epoch": 0.7033583015030508, "grad_norm": 11.353267669677734, "learning_rate": 2.0565545115429064e-06, "loss": 0.3687, "step": 14179 }, { "epoch": 0.7034079071382509, "grad_norm": 4.109990119934082, "learning_rate": 2.055918295975354e-06, "loss": 0.1913, "step": 14180 }, { "epoch": 0.703457512773451, "grad_norm": 7.083715438842773, "learning_rate": 2.0552821533634737e-06, "loss": 0.2445, "step": 14181 }, { "epoch": 0.7035071184086512, "grad_norm": 10.959036827087402, "learning_rate": 2.054646083723038e-06, "loss": 0.3378, "step": 14182 }, { "epoch": 0.7035567240438514, "grad_norm": 8.9671049118042, "learning_rate": 2.0540100870698027e-06, "loss": 0.2655, "step": 14183 }, { "epoch": 0.7036063296790516, "grad_norm": 4.0024495124816895, "learning_rate": 2.0533741634195296e-06, "loss": 0.2236, "step": 14184 }, { "epoch": 0.7036559353142517, "grad_norm": 8.559237480163574, "learning_rate": 2.052738312787978e-06, "loss": 0.3207, "step": 14185 }, { "epoch": 0.7037055409494518, "grad_norm": 8.973555564880371, "learning_rate": 2.052102535190906e-06, "loss": 0.4031, "step": 14186 }, { "epoch": 0.703755146584652, "grad_norm": 5.781834125518799, "learning_rate": 2.0514668306440634e-06, "loss": 0.2559, "step": 14187 }, { "epoch": 0.7038047522198522, "grad_norm": 5.751011848449707, "learning_rate": 2.05083119916321e-06, "loss": 0.2679, "step": 14188 }, { "epoch": 0.7038543578550523, "grad_norm": 6.025510787963867, "learning_rate": 2.0501956407640895e-06, "loss": 0.2146, "step": 14189 }, { "epoch": 0.7039039634902525, "grad_norm": 8.2882661819458, "learning_rate": 2.049560155462459e-06, "loss": 0.3307, "step": 14190 }, { "epoch": 0.7039535691254527, "grad_norm": 10.260187149047852, "learning_rate": 2.048924743274061e-06, "loss": 0.389, "step": 14191 }, { "epoch": 0.7040031747606528, "grad_norm": 10.349814414978027, "learning_rate": 2.048289404214643e-06, "loss": 0.3388, "step": 14192 }, { "epoch": 0.704052780395853, "grad_norm": 7.736451625823975, "learning_rate": 2.047654138299948e-06, "loss": 0.3588, "step": 14193 }, { "epoch": 0.7041023860310531, "grad_norm": 7.210470676422119, "learning_rate": 2.0470189455457214e-06, "loss": 0.2666, "step": 14194 }, { "epoch": 0.7041519916662533, "grad_norm": 9.588035583496094, "learning_rate": 2.046383825967697e-06, "loss": 0.2789, "step": 14195 }, { "epoch": 0.7042015973014535, "grad_norm": 9.090933799743652, "learning_rate": 2.0457487795816213e-06, "loss": 0.2924, "step": 14196 }, { "epoch": 0.7042512029366536, "grad_norm": 9.362757682800293, "learning_rate": 2.0451138064032254e-06, "loss": 0.3212, "step": 14197 }, { "epoch": 0.7043008085718537, "grad_norm": 7.277215003967285, "learning_rate": 2.044478906448246e-06, "loss": 0.3851, "step": 14198 }, { "epoch": 0.7043504142070539, "grad_norm": 7.233058929443359, "learning_rate": 2.0438440797324164e-06, "loss": 0.3355, "step": 14199 }, { "epoch": 0.7044000198422541, "grad_norm": 10.66263198852539, "learning_rate": 2.0432093262714693e-06, "loss": 0.3494, "step": 14200 }, { "epoch": 0.7044496254774543, "grad_norm": 5.8565897941589355, "learning_rate": 2.042574646081129e-06, "loss": 0.2605, "step": 14201 }, { "epoch": 0.7044992311126544, "grad_norm": 5.434291362762451, "learning_rate": 2.0419400391771306e-06, "loss": 0.2509, "step": 14202 }, { "epoch": 0.7045488367478545, "grad_norm": 8.074952125549316, "learning_rate": 2.041305505575194e-06, "loss": 0.2748, "step": 14203 }, { "epoch": 0.7045984423830547, "grad_norm": 4.432398319244385, "learning_rate": 2.0406710452910457e-06, "loss": 0.2336, "step": 14204 }, { "epoch": 0.7046480480182549, "grad_norm": 7.484916687011719, "learning_rate": 2.0400366583404074e-06, "loss": 0.2839, "step": 14205 }, { "epoch": 0.704697653653455, "grad_norm": 12.265624046325684, "learning_rate": 2.0394023447389993e-06, "loss": 0.3567, "step": 14206 }, { "epoch": 0.7047472592886552, "grad_norm": 5.447330951690674, "learning_rate": 2.038768104502541e-06, "loss": 0.2176, "step": 14207 }, { "epoch": 0.7047968649238554, "grad_norm": 3.9300100803375244, "learning_rate": 2.0381339376467497e-06, "loss": 0.2854, "step": 14208 }, { "epoch": 0.7048464705590555, "grad_norm": 7.7105631828308105, "learning_rate": 2.037499844187336e-06, "loss": 0.2614, "step": 14209 }, { "epoch": 0.7048960761942556, "grad_norm": 4.867447853088379, "learning_rate": 2.0368658241400197e-06, "loss": 0.2373, "step": 14210 }, { "epoch": 0.7049456818294558, "grad_norm": 5.23500394821167, "learning_rate": 2.036231877520507e-06, "loss": 0.1721, "step": 14211 }, { "epoch": 0.704995287464656, "grad_norm": 8.16914176940918, "learning_rate": 2.035598004344509e-06, "loss": 0.2714, "step": 14212 }, { "epoch": 0.7050448930998562, "grad_norm": 6.61726713180542, "learning_rate": 2.034964204627733e-06, "loss": 0.2917, "step": 14213 }, { "epoch": 0.7050944987350563, "grad_norm": 13.349023818969727, "learning_rate": 2.0343304783858857e-06, "loss": 0.3114, "step": 14214 }, { "epoch": 0.7051441043702564, "grad_norm": 7.077350616455078, "learning_rate": 2.0336968256346707e-06, "loss": 0.2048, "step": 14215 }, { "epoch": 0.7051937100054566, "grad_norm": 6.350419044494629, "learning_rate": 2.0330632463897914e-06, "loss": 0.2114, "step": 14216 }, { "epoch": 0.7052433156406568, "grad_norm": 8.127896308898926, "learning_rate": 2.032429740666946e-06, "loss": 0.2778, "step": 14217 }, { "epoch": 0.705292921275857, "grad_norm": 4.580400466918945, "learning_rate": 2.0317963084818333e-06, "loss": 0.3271, "step": 14218 }, { "epoch": 0.7053425269110571, "grad_norm": 10.032039642333984, "learning_rate": 2.031162949850151e-06, "loss": 0.2884, "step": 14219 }, { "epoch": 0.7053921325462572, "grad_norm": 4.537448406219482, "learning_rate": 2.030529664787594e-06, "loss": 0.2647, "step": 14220 }, { "epoch": 0.7054417381814574, "grad_norm": 9.123893737792969, "learning_rate": 2.029896453309855e-06, "loss": 0.2705, "step": 14221 }, { "epoch": 0.7054913438166576, "grad_norm": 6.174714088439941, "learning_rate": 2.029263315432625e-06, "loss": 0.2889, "step": 14222 }, { "epoch": 0.7055409494518577, "grad_norm": 4.780806541442871, "learning_rate": 2.0286302511715954e-06, "loss": 0.2725, "step": 14223 }, { "epoch": 0.7055905550870579, "grad_norm": 5.0558576583862305, "learning_rate": 2.0279972605424515e-06, "loss": 0.1428, "step": 14224 }, { "epoch": 0.7056401607222581, "grad_norm": 7.865975379943848, "learning_rate": 2.027364343560879e-06, "loss": 0.4477, "step": 14225 }, { "epoch": 0.7056897663574582, "grad_norm": 8.633997917175293, "learning_rate": 2.0267315002425635e-06, "loss": 0.3664, "step": 14226 }, { "epoch": 0.7057393719926583, "grad_norm": 5.416625022888184, "learning_rate": 2.026098730603186e-06, "loss": 0.1995, "step": 14227 }, { "epoch": 0.7057889776278585, "grad_norm": 6.446342468261719, "learning_rate": 2.0254660346584272e-06, "loss": 0.2044, "step": 14228 }, { "epoch": 0.7058385832630587, "grad_norm": 18.261898040771484, "learning_rate": 2.0248334124239656e-06, "loss": 0.3313, "step": 14229 }, { "epoch": 0.7058881888982589, "grad_norm": 8.942487716674805, "learning_rate": 2.0242008639154777e-06, "loss": 0.3708, "step": 14230 }, { "epoch": 0.705937794533459, "grad_norm": 8.438197135925293, "learning_rate": 2.0235683891486403e-06, "loss": 0.377, "step": 14231 }, { "epoch": 0.7059874001686591, "grad_norm": 6.863096237182617, "learning_rate": 2.0229359881391216e-06, "loss": 0.2585, "step": 14232 }, { "epoch": 0.7060370058038593, "grad_norm": 5.577647686004639, "learning_rate": 2.0223036609025986e-06, "loss": 0.2238, "step": 14233 }, { "epoch": 0.7060866114390595, "grad_norm": 8.654276847839355, "learning_rate": 2.021671407454736e-06, "loss": 0.3057, "step": 14234 }, { "epoch": 0.7061362170742597, "grad_norm": 7.066442489624023, "learning_rate": 2.021039227811204e-06, "loss": 0.2861, "step": 14235 }, { "epoch": 0.7061858227094598, "grad_norm": 8.160123825073242, "learning_rate": 2.020407121987667e-06, "loss": 0.3132, "step": 14236 }, { "epoch": 0.7062354283446599, "grad_norm": 7.40518045425415, "learning_rate": 2.019775089999791e-06, "loss": 0.2995, "step": 14237 }, { "epoch": 0.7062850339798601, "grad_norm": 7.0272650718688965, "learning_rate": 2.019143131863233e-06, "loss": 0.2719, "step": 14238 }, { "epoch": 0.7063346396150603, "grad_norm": 3.336682081222534, "learning_rate": 2.01851124759366e-06, "loss": 0.192, "step": 14239 }, { "epoch": 0.7063842452502604, "grad_norm": 11.466010093688965, "learning_rate": 2.017879437206725e-06, "loss": 0.2112, "step": 14240 }, { "epoch": 0.7064338508854606, "grad_norm": 4.926674842834473, "learning_rate": 2.0172477007180867e-06, "loss": 0.1815, "step": 14241 }, { "epoch": 0.7064834565206608, "grad_norm": 12.684621810913086, "learning_rate": 2.0166160381434004e-06, "loss": 0.3674, "step": 14242 }, { "epoch": 0.7065330621558609, "grad_norm": 4.224963665008545, "learning_rate": 2.015984449498317e-06, "loss": 0.271, "step": 14243 }, { "epoch": 0.706582667791061, "grad_norm": 9.028959274291992, "learning_rate": 2.0153529347984894e-06, "loss": 0.2816, "step": 14244 }, { "epoch": 0.7066322734262612, "grad_norm": 7.006876468658447, "learning_rate": 2.014721494059568e-06, "loss": 0.2292, "step": 14245 }, { "epoch": 0.7066818790614614, "grad_norm": 4.028972625732422, "learning_rate": 2.014090127297194e-06, "loss": 0.2446, "step": 14246 }, { "epoch": 0.7067314846966616, "grad_norm": 10.916060447692871, "learning_rate": 2.0134588345270216e-06, "loss": 0.3096, "step": 14247 }, { "epoch": 0.7067810903318617, "grad_norm": 7.495400905609131, "learning_rate": 2.0128276157646886e-06, "loss": 0.2509, "step": 14248 }, { "epoch": 0.7068306959670618, "grad_norm": 7.758208274841309, "learning_rate": 2.0121964710258383e-06, "loss": 0.3057, "step": 14249 }, { "epoch": 0.706880301602262, "grad_norm": 8.550209999084473, "learning_rate": 2.011565400326111e-06, "loss": 0.2545, "step": 14250 }, { "epoch": 0.7069299072374622, "grad_norm": 29.097497940063477, "learning_rate": 2.0109344036811467e-06, "loss": 0.3047, "step": 14251 }, { "epoch": 0.7069795128726624, "grad_norm": 7.4489312171936035, "learning_rate": 2.0103034811065765e-06, "loss": 0.2829, "step": 14252 }, { "epoch": 0.7070291185078625, "grad_norm": 4.253005504608154, "learning_rate": 2.0096726326180417e-06, "loss": 0.2496, "step": 14253 }, { "epoch": 0.7070787241430626, "grad_norm": 8.047561645507812, "learning_rate": 2.00904185823117e-06, "loss": 0.3762, "step": 14254 }, { "epoch": 0.7071283297782628, "grad_norm": 7.1006293296813965, "learning_rate": 2.008411157961594e-06, "loss": 0.3681, "step": 14255 }, { "epoch": 0.707177935413463, "grad_norm": 10.509820938110352, "learning_rate": 2.0077805318249426e-06, "loss": 0.3853, "step": 14256 }, { "epoch": 0.7072275410486631, "grad_norm": 5.539376735687256, "learning_rate": 2.0071499798368456e-06, "loss": 0.2166, "step": 14257 }, { "epoch": 0.7072771466838633, "grad_norm": 6.6687912940979, "learning_rate": 2.006519502012922e-06, "loss": 0.2792, "step": 14258 }, { "epoch": 0.7073267523190635, "grad_norm": 6.180726528167725, "learning_rate": 2.005889098368802e-06, "loss": 0.3096, "step": 14259 }, { "epoch": 0.7073763579542636, "grad_norm": 6.8586602210998535, "learning_rate": 2.005258768920102e-06, "loss": 0.2865, "step": 14260 }, { "epoch": 0.7074259635894637, "grad_norm": 7.9140543937683105, "learning_rate": 2.004628513682448e-06, "loss": 0.2809, "step": 14261 }, { "epoch": 0.7074755692246639, "grad_norm": 6.891801357269287, "learning_rate": 2.003998332671452e-06, "loss": 0.3537, "step": 14262 }, { "epoch": 0.7075251748598641, "grad_norm": 11.187416076660156, "learning_rate": 2.0033682259027326e-06, "loss": 0.2895, "step": 14263 }, { "epoch": 0.7075747804950643, "grad_norm": 16.945302963256836, "learning_rate": 2.002738193391904e-06, "loss": 0.4015, "step": 14264 }, { "epoch": 0.7076243861302643, "grad_norm": 8.542041778564453, "learning_rate": 2.002108235154581e-06, "loss": 0.2864, "step": 14265 }, { "epoch": 0.7076739917654645, "grad_norm": 4.649970054626465, "learning_rate": 2.0014783512063683e-06, "loss": 0.1737, "step": 14266 }, { "epoch": 0.7077235974006647, "grad_norm": 8.519343376159668, "learning_rate": 2.0008485415628824e-06, "loss": 0.3901, "step": 14267 }, { "epoch": 0.7077732030358649, "grad_norm": 5.515195369720459, "learning_rate": 2.0002188062397236e-06, "loss": 0.2579, "step": 14268 }, { "epoch": 0.7078228086710651, "grad_norm": 9.4993257522583, "learning_rate": 1.9995891452525007e-06, "loss": 0.4642, "step": 14269 }, { "epoch": 0.7078724143062652, "grad_norm": 5.4308857917785645, "learning_rate": 1.998959558616815e-06, "loss": 0.2477, "step": 14270 }, { "epoch": 0.7079220199414653, "grad_norm": 27.14497184753418, "learning_rate": 1.9983300463482702e-06, "loss": 0.507, "step": 14271 }, { "epoch": 0.7079716255766655, "grad_norm": 8.993078231811523, "learning_rate": 1.997700608462464e-06, "loss": 0.3851, "step": 14272 }, { "epoch": 0.7080212312118657, "grad_norm": 9.988640785217285, "learning_rate": 1.997071244974996e-06, "loss": 0.3091, "step": 14273 }, { "epoch": 0.7080708368470658, "grad_norm": 5.44329309463501, "learning_rate": 1.9964419559014602e-06, "loss": 0.2078, "step": 14274 }, { "epoch": 0.708120442482266, "grad_norm": 7.4363555908203125, "learning_rate": 1.9958127412574505e-06, "loss": 0.3034, "step": 14275 }, { "epoch": 0.7081700481174661, "grad_norm": 7.539928436279297, "learning_rate": 1.9951836010585606e-06, "loss": 0.33, "step": 14276 }, { "epoch": 0.7082196537526663, "grad_norm": 7.796426773071289, "learning_rate": 1.9945545353203803e-06, "loss": 0.3599, "step": 14277 }, { "epoch": 0.7082692593878664, "grad_norm": 5.445972919464111, "learning_rate": 1.9939255440584978e-06, "loss": 0.2801, "step": 14278 }, { "epoch": 0.7083188650230666, "grad_norm": 6.256592750549316, "learning_rate": 1.9932966272885e-06, "loss": 0.3538, "step": 14279 }, { "epoch": 0.7083684706582668, "grad_norm": 5.5904951095581055, "learning_rate": 1.9926677850259725e-06, "loss": 0.2583, "step": 14280 }, { "epoch": 0.708418076293467, "grad_norm": 8.38350772857666, "learning_rate": 1.992039017286497e-06, "loss": 0.2682, "step": 14281 }, { "epoch": 0.708467681928667, "grad_norm": 6.35814094543457, "learning_rate": 1.9914103240856573e-06, "loss": 0.2884, "step": 14282 }, { "epoch": 0.7085172875638672, "grad_norm": 8.664619445800781, "learning_rate": 1.9907817054390287e-06, "loss": 0.2792, "step": 14283 }, { "epoch": 0.7085668931990674, "grad_norm": 5.986758708953857, "learning_rate": 1.990153161362191e-06, "loss": 0.1937, "step": 14284 }, { "epoch": 0.7086164988342676, "grad_norm": 4.508443355560303, "learning_rate": 1.9895246918707194e-06, "loss": 0.2479, "step": 14285 }, { "epoch": 0.7086661044694678, "grad_norm": 3.790755271911621, "learning_rate": 1.9888962969801876e-06, "loss": 0.2092, "step": 14286 }, { "epoch": 0.7087157101046679, "grad_norm": 8.714317321777344, "learning_rate": 1.988267976706168e-06, "loss": 0.3792, "step": 14287 }, { "epoch": 0.708765315739868, "grad_norm": 6.700689792633057, "learning_rate": 1.987639731064232e-06, "loss": 0.2674, "step": 14288 }, { "epoch": 0.7088149213750682, "grad_norm": 5.624876499176025, "learning_rate": 1.9870115600699423e-06, "loss": 0.2369, "step": 14289 }, { "epoch": 0.7088645270102684, "grad_norm": 4.198848724365234, "learning_rate": 1.9863834637388723e-06, "loss": 0.2298, "step": 14290 }, { "epoch": 0.7089141326454685, "grad_norm": 4.332439422607422, "learning_rate": 1.985755442086582e-06, "loss": 0.2697, "step": 14291 }, { "epoch": 0.7089637382806687, "grad_norm": 13.365583419799805, "learning_rate": 1.9851274951286352e-06, "loss": 0.4266, "step": 14292 }, { "epoch": 0.7090133439158688, "grad_norm": 4.737269401550293, "learning_rate": 1.984499622880593e-06, "loss": 0.206, "step": 14293 }, { "epoch": 0.709062949551069, "grad_norm": 18.892398834228516, "learning_rate": 1.9838718253580158e-06, "loss": 0.4127, "step": 14294 }, { "epoch": 0.7091125551862691, "grad_norm": 7.963271617889404, "learning_rate": 1.9832441025764555e-06, "loss": 0.3654, "step": 14295 }, { "epoch": 0.7091621608214693, "grad_norm": 5.803060531616211, "learning_rate": 1.982616454551475e-06, "loss": 0.2254, "step": 14296 }, { "epoch": 0.7092117664566695, "grad_norm": 10.251805305480957, "learning_rate": 1.9819888812986194e-06, "loss": 0.4175, "step": 14297 }, { "epoch": 0.7092613720918697, "grad_norm": 5.487999439239502, "learning_rate": 1.9813613828334483e-06, "loss": 0.2291, "step": 14298 }, { "epoch": 0.7093109777270697, "grad_norm": 6.9386887550354, "learning_rate": 1.980733959171506e-06, "loss": 0.2431, "step": 14299 }, { "epoch": 0.7093605833622699, "grad_norm": 5.607760906219482, "learning_rate": 1.9801066103283417e-06, "loss": 0.3068, "step": 14300 }, { "epoch": 0.7094101889974701, "grad_norm": 5.927387237548828, "learning_rate": 1.979479336319502e-06, "loss": 0.2525, "step": 14301 }, { "epoch": 0.7094597946326703, "grad_norm": 10.241433143615723, "learning_rate": 1.9788521371605324e-06, "loss": 0.3339, "step": 14302 }, { "epoch": 0.7095094002678705, "grad_norm": 7.441009998321533, "learning_rate": 1.9782250128669696e-06, "loss": 0.2659, "step": 14303 }, { "epoch": 0.7095590059030706, "grad_norm": 14.160561561584473, "learning_rate": 1.977597963454362e-06, "loss": 0.4589, "step": 14304 }, { "epoch": 0.7096086115382707, "grad_norm": 6.784664154052734, "learning_rate": 1.9769709889382415e-06, "loss": 0.2277, "step": 14305 }, { "epoch": 0.7096582171734709, "grad_norm": 9.643067359924316, "learning_rate": 1.976344089334148e-06, "loss": 0.3619, "step": 14306 }, { "epoch": 0.7097078228086711, "grad_norm": 12.027031898498535, "learning_rate": 1.975717264657615e-06, "loss": 0.4493, "step": 14307 }, { "epoch": 0.7097574284438712, "grad_norm": 14.885951042175293, "learning_rate": 1.9750905149241782e-06, "loss": 0.3644, "step": 14308 }, { "epoch": 0.7098070340790714, "grad_norm": 5.179102420806885, "learning_rate": 1.9744638401493637e-06, "loss": 0.2569, "step": 14309 }, { "epoch": 0.7098566397142715, "grad_norm": 9.663646697998047, "learning_rate": 1.973837240348707e-06, "loss": 0.3011, "step": 14310 }, { "epoch": 0.7099062453494717, "grad_norm": 6.423469066619873, "learning_rate": 1.9732107155377313e-06, "loss": 0.3441, "step": 14311 }, { "epoch": 0.7099558509846718, "grad_norm": 5.489946365356445, "learning_rate": 1.972584265731962e-06, "loss": 0.2825, "step": 14312 }, { "epoch": 0.710005456619872, "grad_norm": 3.7965540885925293, "learning_rate": 1.971957890946925e-06, "loss": 0.2064, "step": 14313 }, { "epoch": 0.7100550622550722, "grad_norm": 5.77136754989624, "learning_rate": 1.9713315911981405e-06, "loss": 0.2362, "step": 14314 }, { "epoch": 0.7101046678902724, "grad_norm": 4.326559066772461, "learning_rate": 1.97070536650113e-06, "loss": 0.297, "step": 14315 }, { "epoch": 0.7101542735254724, "grad_norm": 6.283170223236084, "learning_rate": 1.9700792168714122e-06, "loss": 0.2968, "step": 14316 }, { "epoch": 0.7102038791606726, "grad_norm": 4.79791259765625, "learning_rate": 1.9694531423244983e-06, "loss": 0.2745, "step": 14317 }, { "epoch": 0.7102534847958728, "grad_norm": 12.712295532226562, "learning_rate": 1.9688271428759105e-06, "loss": 0.248, "step": 14318 }, { "epoch": 0.710303090431073, "grad_norm": 11.829682350158691, "learning_rate": 1.9682012185411546e-06, "loss": 0.3262, "step": 14319 }, { "epoch": 0.7103526960662732, "grad_norm": 3.8147363662719727, "learning_rate": 1.9675753693357447e-06, "loss": 0.1612, "step": 14320 }, { "epoch": 0.7104023017014733, "grad_norm": 6.581294059753418, "learning_rate": 1.966949595275188e-06, "loss": 0.2272, "step": 14321 }, { "epoch": 0.7104519073366734, "grad_norm": 5.1523356437683105, "learning_rate": 1.9663238963749946e-06, "loss": 0.204, "step": 14322 }, { "epoch": 0.7105015129718736, "grad_norm": 8.753484725952148, "learning_rate": 1.965698272650664e-06, "loss": 0.2854, "step": 14323 }, { "epoch": 0.7105511186070738, "grad_norm": 4.920459747314453, "learning_rate": 1.9650727241177064e-06, "loss": 0.3194, "step": 14324 }, { "epoch": 0.7106007242422739, "grad_norm": 6.146880626678467, "learning_rate": 1.9644472507916174e-06, "loss": 0.2335, "step": 14325 }, { "epoch": 0.7106503298774741, "grad_norm": 5.29819393157959, "learning_rate": 1.963821852687899e-06, "loss": 0.3602, "step": 14326 }, { "epoch": 0.7106999355126742, "grad_norm": 6.5872578620910645, "learning_rate": 1.9631965298220477e-06, "loss": 0.2198, "step": 14327 }, { "epoch": 0.7107495411478744, "grad_norm": 4.392154216766357, "learning_rate": 1.962571282209561e-06, "loss": 0.2806, "step": 14328 }, { "epoch": 0.7107991467830745, "grad_norm": 6.42468786239624, "learning_rate": 1.9619461098659316e-06, "loss": 0.3132, "step": 14329 }, { "epoch": 0.7108487524182747, "grad_norm": 4.125019073486328, "learning_rate": 1.9613210128066517e-06, "loss": 0.1861, "step": 14330 }, { "epoch": 0.7108983580534749, "grad_norm": 11.916777610778809, "learning_rate": 1.9606959910472116e-06, "loss": 0.361, "step": 14331 }, { "epoch": 0.7109479636886751, "grad_norm": 13.374077796936035, "learning_rate": 1.960071044603102e-06, "loss": 0.2902, "step": 14332 }, { "epoch": 0.7109975693238751, "grad_norm": 7.396575450897217, "learning_rate": 1.9594461734898048e-06, "loss": 0.2325, "step": 14333 }, { "epoch": 0.7110471749590753, "grad_norm": 5.662094593048096, "learning_rate": 1.9588213777228064e-06, "loss": 0.2819, "step": 14334 }, { "epoch": 0.7110967805942755, "grad_norm": 4.038329601287842, "learning_rate": 1.958196657317591e-06, "loss": 0.3242, "step": 14335 }, { "epoch": 0.7111463862294757, "grad_norm": 6.053912162780762, "learning_rate": 1.9575720122896376e-06, "loss": 0.3007, "step": 14336 }, { "epoch": 0.7111959918646759, "grad_norm": 4.408926486968994, "learning_rate": 1.956947442654426e-06, "loss": 0.2229, "step": 14337 }, { "epoch": 0.711245597499876, "grad_norm": 8.4163818359375, "learning_rate": 1.956322948427433e-06, "loss": 0.2472, "step": 14338 }, { "epoch": 0.7112952031350761, "grad_norm": 10.711169242858887, "learning_rate": 1.955698529624137e-06, "loss": 0.3916, "step": 14339 }, { "epoch": 0.7113448087702763, "grad_norm": 11.813309669494629, "learning_rate": 1.9550741862600043e-06, "loss": 0.4159, "step": 14340 }, { "epoch": 0.7113944144054765, "grad_norm": 7.82228946685791, "learning_rate": 1.954449918350514e-06, "loss": 0.3122, "step": 14341 }, { "epoch": 0.7114440200406766, "grad_norm": 10.5927734375, "learning_rate": 1.9538257259111308e-06, "loss": 0.338, "step": 14342 }, { "epoch": 0.7114936256758768, "grad_norm": 5.7003278732299805, "learning_rate": 1.953201608957324e-06, "loss": 0.2369, "step": 14343 }, { "epoch": 0.7115432313110769, "grad_norm": 8.852663040161133, "learning_rate": 1.95257756750456e-06, "loss": 0.305, "step": 14344 }, { "epoch": 0.7115928369462771, "grad_norm": 4.556950569152832, "learning_rate": 1.951953601568304e-06, "loss": 0.2409, "step": 14345 }, { "epoch": 0.7116424425814772, "grad_norm": 4.12360954284668, "learning_rate": 1.9513297111640124e-06, "loss": 0.1925, "step": 14346 }, { "epoch": 0.7116920482166774, "grad_norm": 11.247364044189453, "learning_rate": 1.9507058963071536e-06, "loss": 0.2218, "step": 14347 }, { "epoch": 0.7117416538518776, "grad_norm": 8.1144380569458, "learning_rate": 1.95008215701318e-06, "loss": 0.2982, "step": 14348 }, { "epoch": 0.7117912594870778, "grad_norm": 11.894604682922363, "learning_rate": 1.94945849329755e-06, "loss": 0.3734, "step": 14349 }, { "epoch": 0.7118408651222778, "grad_norm": 8.02758502960205, "learning_rate": 1.9488349051757193e-06, "loss": 0.251, "step": 14350 }, { "epoch": 0.711890470757478, "grad_norm": 7.128538131713867, "learning_rate": 1.948211392663139e-06, "loss": 0.3155, "step": 14351 }, { "epoch": 0.7119400763926782, "grad_norm": 7.794830799102783, "learning_rate": 1.9475879557752615e-06, "loss": 0.2878, "step": 14352 }, { "epoch": 0.7119896820278784, "grad_norm": 12.107970237731934, "learning_rate": 1.9469645945275366e-06, "loss": 0.3381, "step": 14353 }, { "epoch": 0.7120392876630786, "grad_norm": 7.061124801635742, "learning_rate": 1.9463413089354067e-06, "loss": 0.2806, "step": 14354 }, { "epoch": 0.7120888932982787, "grad_norm": 6.267955303192139, "learning_rate": 1.945718099014324e-06, "loss": 0.2453, "step": 14355 }, { "epoch": 0.7121384989334788, "grad_norm": 11.537235260009766, "learning_rate": 1.9450949647797266e-06, "loss": 0.3324, "step": 14356 }, { "epoch": 0.712188104568679, "grad_norm": 5.585851669311523, "learning_rate": 1.944471906247058e-06, "loss": 0.1764, "step": 14357 }, { "epoch": 0.7122377102038792, "grad_norm": 5.088315963745117, "learning_rate": 1.9438489234317577e-06, "loss": 0.1953, "step": 14358 }, { "epoch": 0.7122873158390793, "grad_norm": 12.067902565002441, "learning_rate": 1.943226016349265e-06, "loss": 0.4088, "step": 14359 }, { "epoch": 0.7123369214742795, "grad_norm": 4.236334800720215, "learning_rate": 1.9426031850150114e-06, "loss": 0.2711, "step": 14360 }, { "epoch": 0.7123865271094796, "grad_norm": 4.472818374633789, "learning_rate": 1.9419804294444368e-06, "loss": 0.2281, "step": 14361 }, { "epoch": 0.7124361327446798, "grad_norm": 6.916825294494629, "learning_rate": 1.941357749652969e-06, "loss": 0.3715, "step": 14362 }, { "epoch": 0.7124857383798799, "grad_norm": 6.157216548919678, "learning_rate": 1.9407351456560395e-06, "loss": 0.2904, "step": 14363 }, { "epoch": 0.7125353440150801, "grad_norm": 6.092344760894775, "learning_rate": 1.940112617469077e-06, "loss": 0.2884, "step": 14364 }, { "epoch": 0.7125849496502803, "grad_norm": 5.043428421020508, "learning_rate": 1.9394901651075097e-06, "loss": 0.3629, "step": 14365 }, { "epoch": 0.7126345552854805, "grad_norm": 8.753046989440918, "learning_rate": 1.9388677885867573e-06, "loss": 0.2574, "step": 14366 }, { "epoch": 0.7126841609206805, "grad_norm": 20.060712814331055, "learning_rate": 1.938245487922249e-06, "loss": 0.2401, "step": 14367 }, { "epoch": 0.7127337665558807, "grad_norm": 4.685661792755127, "learning_rate": 1.9376232631293996e-06, "loss": 0.2508, "step": 14368 }, { "epoch": 0.7127833721910809, "grad_norm": 6.344277858734131, "learning_rate": 1.937001114223634e-06, "loss": 0.4201, "step": 14369 }, { "epoch": 0.7128329778262811, "grad_norm": 9.285913467407227, "learning_rate": 1.9363790412203644e-06, "loss": 0.3679, "step": 14370 }, { "epoch": 0.7128825834614813, "grad_norm": 4.898636341094971, "learning_rate": 1.9357570441350084e-06, "loss": 0.3198, "step": 14371 }, { "epoch": 0.7129321890966814, "grad_norm": 8.308454513549805, "learning_rate": 1.9351351229829784e-06, "loss": 0.3997, "step": 14372 }, { "epoch": 0.7129817947318815, "grad_norm": 8.260945320129395, "learning_rate": 1.934513277779689e-06, "loss": 0.2501, "step": 14373 }, { "epoch": 0.7130314003670817, "grad_norm": 20.694189071655273, "learning_rate": 1.933891508540543e-06, "loss": 0.4844, "step": 14374 }, { "epoch": 0.7130810060022819, "grad_norm": 5.8934149742126465, "learning_rate": 1.933269815280956e-06, "loss": 0.3316, "step": 14375 }, { "epoch": 0.713130611637482, "grad_norm": 8.51089859008789, "learning_rate": 1.9326481980163287e-06, "loss": 0.2578, "step": 14376 }, { "epoch": 0.7131802172726822, "grad_norm": 5.608602046966553, "learning_rate": 1.9320266567620667e-06, "loss": 0.2293, "step": 14377 }, { "epoch": 0.7132298229078823, "grad_norm": 5.830552577972412, "learning_rate": 1.931405191533572e-06, "loss": 0.3316, "step": 14378 }, { "epoch": 0.7132794285430825, "grad_norm": 9.195891380310059, "learning_rate": 1.9307838023462443e-06, "loss": 0.3173, "step": 14379 }, { "epoch": 0.7133290341782826, "grad_norm": 7.073063373565674, "learning_rate": 1.9301624892154826e-06, "loss": 0.2936, "step": 14380 }, { "epoch": 0.7133786398134828, "grad_norm": 9.158401489257812, "learning_rate": 1.929541252156684e-06, "loss": 0.3871, "step": 14381 }, { "epoch": 0.713428245448683, "grad_norm": 7.4857282638549805, "learning_rate": 1.9289200911852414e-06, "loss": 0.2626, "step": 14382 }, { "epoch": 0.7134778510838832, "grad_norm": 3.898247718811035, "learning_rate": 1.928299006316548e-06, "loss": 0.2673, "step": 14383 }, { "epoch": 0.7135274567190832, "grad_norm": 6.841618537902832, "learning_rate": 1.927677997565994e-06, "loss": 0.3898, "step": 14384 }, { "epoch": 0.7135770623542834, "grad_norm": 18.747556686401367, "learning_rate": 1.9270570649489702e-06, "loss": 0.3999, "step": 14385 }, { "epoch": 0.7136266679894836, "grad_norm": 9.10569953918457, "learning_rate": 1.9264362084808614e-06, "loss": 0.2821, "step": 14386 }, { "epoch": 0.7136762736246838, "grad_norm": 13.421346664428711, "learning_rate": 1.925815428177054e-06, "loss": 0.4004, "step": 14387 }, { "epoch": 0.713725879259884, "grad_norm": 6.205638885498047, "learning_rate": 1.925194724052931e-06, "loss": 0.2843, "step": 14388 }, { "epoch": 0.7137754848950841, "grad_norm": 6.569953918457031, "learning_rate": 1.9245740961238733e-06, "loss": 0.2697, "step": 14389 }, { "epoch": 0.7138250905302842, "grad_norm": 6.863967418670654, "learning_rate": 1.9239535444052627e-06, "loss": 0.3562, "step": 14390 }, { "epoch": 0.7138746961654844, "grad_norm": 6.519579887390137, "learning_rate": 1.9233330689124726e-06, "loss": 0.3381, "step": 14391 }, { "epoch": 0.7139243018006846, "grad_norm": 19.669118881225586, "learning_rate": 1.9227126696608813e-06, "loss": 0.2994, "step": 14392 }, { "epoch": 0.7139739074358847, "grad_norm": 9.293614387512207, "learning_rate": 1.9220923466658612e-06, "loss": 0.2809, "step": 14393 }, { "epoch": 0.7140235130710849, "grad_norm": 8.188010215759277, "learning_rate": 1.921472099942786e-06, "loss": 0.3294, "step": 14394 }, { "epoch": 0.714073118706285, "grad_norm": 21.516565322875977, "learning_rate": 1.9208519295070243e-06, "loss": 0.4332, "step": 14395 }, { "epoch": 0.7141227243414852, "grad_norm": 8.953855514526367, "learning_rate": 1.9202318353739462e-06, "loss": 0.2358, "step": 14396 }, { "epoch": 0.7141723299766853, "grad_norm": 6.75733757019043, "learning_rate": 1.9196118175589136e-06, "loss": 0.2634, "step": 14397 }, { "epoch": 0.7142219356118855, "grad_norm": 5.449706554412842, "learning_rate": 1.9189918760772963e-06, "loss": 0.3813, "step": 14398 }, { "epoch": 0.7142715412470857, "grad_norm": 5.60202169418335, "learning_rate": 1.9183720109444528e-06, "loss": 0.2825, "step": 14399 }, { "epoch": 0.7143211468822859, "grad_norm": 6.443346977233887, "learning_rate": 1.9177522221757443e-06, "loss": 0.3139, "step": 14400 }, { "epoch": 0.7143707525174859, "grad_norm": 7.928791522979736, "learning_rate": 1.91713250978653e-06, "loss": 0.3626, "step": 14401 }, { "epoch": 0.7144203581526861, "grad_norm": 8.707758903503418, "learning_rate": 1.916512873792169e-06, "loss": 0.2593, "step": 14402 }, { "epoch": 0.7144699637878863, "grad_norm": 4.955072402954102, "learning_rate": 1.91589331420801e-06, "loss": 0.2114, "step": 14403 }, { "epoch": 0.7145195694230865, "grad_norm": 7.484859466552734, "learning_rate": 1.915273831049413e-06, "loss": 0.2668, "step": 14404 }, { "epoch": 0.7145691750582867, "grad_norm": 7.973433494567871, "learning_rate": 1.914654424331722e-06, "loss": 0.3131, "step": 14405 }, { "epoch": 0.7146187806934868, "grad_norm": 10.042348861694336, "learning_rate": 1.9140350940702947e-06, "loss": 0.1828, "step": 14406 }, { "epoch": 0.7146683863286869, "grad_norm": 5.983306407928467, "learning_rate": 1.913415840280471e-06, "loss": 0.2708, "step": 14407 }, { "epoch": 0.7147179919638871, "grad_norm": 5.798889636993408, "learning_rate": 1.9127966629775998e-06, "loss": 0.2763, "step": 14408 }, { "epoch": 0.7147675975990873, "grad_norm": 5.266881465911865, "learning_rate": 1.912177562177023e-06, "loss": 0.2056, "step": 14409 }, { "epoch": 0.7148172032342874, "grad_norm": 4.9141011238098145, "learning_rate": 1.9115585378940847e-06, "loss": 0.2473, "step": 14410 }, { "epoch": 0.7148668088694876, "grad_norm": 6.9266204833984375, "learning_rate": 1.9109395901441197e-06, "loss": 0.2947, "step": 14411 }, { "epoch": 0.7149164145046877, "grad_norm": 6.928445339202881, "learning_rate": 1.9103207189424725e-06, "loss": 0.2498, "step": 14412 }, { "epoch": 0.7149660201398879, "grad_norm": 7.077402591705322, "learning_rate": 1.909701924304474e-06, "loss": 0.3292, "step": 14413 }, { "epoch": 0.715015625775088, "grad_norm": 6.017987251281738, "learning_rate": 1.9090832062454595e-06, "loss": 0.2828, "step": 14414 }, { "epoch": 0.7150652314102882, "grad_norm": 7.233903408050537, "learning_rate": 1.908464564780761e-06, "loss": 0.242, "step": 14415 }, { "epoch": 0.7151148370454884, "grad_norm": 10.465228080749512, "learning_rate": 1.907845999925711e-06, "loss": 0.3891, "step": 14416 }, { "epoch": 0.7151644426806886, "grad_norm": 8.160686492919922, "learning_rate": 1.9072275116956325e-06, "loss": 0.2918, "step": 14417 }, { "epoch": 0.7152140483158886, "grad_norm": 5.631200790405273, "learning_rate": 1.906609100105859e-06, "loss": 0.3001, "step": 14418 }, { "epoch": 0.7152636539510888, "grad_norm": 8.289953231811523, "learning_rate": 1.9059907651717091e-06, "loss": 0.1977, "step": 14419 }, { "epoch": 0.715313259586289, "grad_norm": 12.211578369140625, "learning_rate": 1.9053725069085076e-06, "loss": 0.3127, "step": 14420 }, { "epoch": 0.7153628652214892, "grad_norm": 8.077478408813477, "learning_rate": 1.9047543253315753e-06, "loss": 0.2652, "step": 14421 }, { "epoch": 0.7154124708566894, "grad_norm": 5.928051471710205, "learning_rate": 1.9041362204562314e-06, "loss": 0.2526, "step": 14422 }, { "epoch": 0.7154620764918895, "grad_norm": 5.832447052001953, "learning_rate": 1.9035181922977918e-06, "loss": 0.2117, "step": 14423 }, { "epoch": 0.7155116821270896, "grad_norm": 6.816305160522461, "learning_rate": 1.902900240871574e-06, "loss": 0.3829, "step": 14424 }, { "epoch": 0.7155612877622898, "grad_norm": 6.309579372406006, "learning_rate": 1.9022823661928857e-06, "loss": 0.3347, "step": 14425 }, { "epoch": 0.71561089339749, "grad_norm": 6.44206428527832, "learning_rate": 1.9016645682770452e-06, "loss": 0.3835, "step": 14426 }, { "epoch": 0.7156604990326901, "grad_norm": 8.06701374053955, "learning_rate": 1.901046847139356e-06, "loss": 0.3823, "step": 14427 }, { "epoch": 0.7157101046678903, "grad_norm": 6.299617767333984, "learning_rate": 1.9004292027951288e-06, "loss": 0.2353, "step": 14428 }, { "epoch": 0.7157597103030904, "grad_norm": 5.40315055847168, "learning_rate": 1.8998116352596674e-06, "loss": 0.3235, "step": 14429 }, { "epoch": 0.7158093159382906, "grad_norm": 7.831707954406738, "learning_rate": 1.8991941445482775e-06, "loss": 0.3768, "step": 14430 }, { "epoch": 0.7158589215734907, "grad_norm": 9.975370407104492, "learning_rate": 1.8985767306762559e-06, "loss": 0.396, "step": 14431 }, { "epoch": 0.7159085272086909, "grad_norm": 9.191678047180176, "learning_rate": 1.8979593936589097e-06, "loss": 0.4443, "step": 14432 }, { "epoch": 0.7159581328438911, "grad_norm": 4.376015663146973, "learning_rate": 1.8973421335115311e-06, "loss": 0.2814, "step": 14433 }, { "epoch": 0.7160077384790913, "grad_norm": 13.048155784606934, "learning_rate": 1.8967249502494178e-06, "loss": 0.2529, "step": 14434 }, { "epoch": 0.7160573441142913, "grad_norm": 7.859567165374756, "learning_rate": 1.8961078438878645e-06, "loss": 0.2689, "step": 14435 }, { "epoch": 0.7161069497494915, "grad_norm": 16.411968231201172, "learning_rate": 1.8954908144421625e-06, "loss": 0.494, "step": 14436 }, { "epoch": 0.7161565553846917, "grad_norm": 8.250659942626953, "learning_rate": 1.8948738619276019e-06, "loss": 0.379, "step": 14437 }, { "epoch": 0.7162061610198919, "grad_norm": 6.062843322753906, "learning_rate": 1.8942569863594724e-06, "loss": 0.2261, "step": 14438 }, { "epoch": 0.716255766655092, "grad_norm": 10.597864151000977, "learning_rate": 1.893640187753059e-06, "loss": 0.3157, "step": 14439 }, { "epoch": 0.7163053722902922, "grad_norm": 8.104476928710938, "learning_rate": 1.8930234661236491e-06, "loss": 0.3126, "step": 14440 }, { "epoch": 0.7163549779254923, "grad_norm": 11.231148719787598, "learning_rate": 1.8924068214865215e-06, "loss": 0.3321, "step": 14441 }, { "epoch": 0.7164045835606925, "grad_norm": 11.722311019897461, "learning_rate": 1.8917902538569582e-06, "loss": 0.3201, "step": 14442 }, { "epoch": 0.7164541891958927, "grad_norm": 10.746708869934082, "learning_rate": 1.8911737632502385e-06, "loss": 0.2604, "step": 14443 }, { "epoch": 0.7165037948310928, "grad_norm": 4.537089824676514, "learning_rate": 1.89055734968164e-06, "loss": 0.1923, "step": 14444 }, { "epoch": 0.716553400466293, "grad_norm": 8.346376419067383, "learning_rate": 1.889941013166436e-06, "loss": 0.2744, "step": 14445 }, { "epoch": 0.7166030061014931, "grad_norm": 5.200918674468994, "learning_rate": 1.8893247537199005e-06, "loss": 0.2669, "step": 14446 }, { "epoch": 0.7166526117366933, "grad_norm": 10.263457298278809, "learning_rate": 1.8887085713573067e-06, "loss": 0.3999, "step": 14447 }, { "epoch": 0.7167022173718934, "grad_norm": 4.659742832183838, "learning_rate": 1.8880924660939205e-06, "loss": 0.2587, "step": 14448 }, { "epoch": 0.7167518230070936, "grad_norm": 12.46827507019043, "learning_rate": 1.88747643794501e-06, "loss": 0.3819, "step": 14449 }, { "epoch": 0.7168014286422938, "grad_norm": 9.506512641906738, "learning_rate": 1.8868604869258416e-06, "loss": 0.2527, "step": 14450 }, { "epoch": 0.716851034277494, "grad_norm": 8.473999977111816, "learning_rate": 1.8862446130516787e-06, "loss": 0.3534, "step": 14451 }, { "epoch": 0.716900639912694, "grad_norm": 8.964364051818848, "learning_rate": 1.8856288163377829e-06, "loss": 0.3441, "step": 14452 }, { "epoch": 0.7169502455478942, "grad_norm": 9.336346626281738, "learning_rate": 1.8850130967994151e-06, "loss": 0.267, "step": 14453 }, { "epoch": 0.7169998511830944, "grad_norm": 11.076010704040527, "learning_rate": 1.884397454451829e-06, "loss": 0.344, "step": 14454 }, { "epoch": 0.7170494568182946, "grad_norm": 5.050295829772949, "learning_rate": 1.883781889310286e-06, "loss": 0.2673, "step": 14455 }, { "epoch": 0.7170990624534948, "grad_norm": 5.764426231384277, "learning_rate": 1.8831664013900348e-06, "loss": 0.218, "step": 14456 }, { "epoch": 0.7171486680886949, "grad_norm": 15.761356353759766, "learning_rate": 1.8825509907063328e-06, "loss": 0.3489, "step": 14457 }, { "epoch": 0.717198273723895, "grad_norm": 7.375361919403076, "learning_rate": 1.8819356572744264e-06, "loss": 0.4143, "step": 14458 }, { "epoch": 0.7172478793590952, "grad_norm": 8.164745330810547, "learning_rate": 1.8813204011095642e-06, "loss": 0.2247, "step": 14459 }, { "epoch": 0.7172974849942954, "grad_norm": 6.8976149559021, "learning_rate": 1.8807052222269933e-06, "loss": 0.2024, "step": 14460 }, { "epoch": 0.7173470906294955, "grad_norm": 6.352409362792969, "learning_rate": 1.8800901206419597e-06, "loss": 0.2876, "step": 14461 }, { "epoch": 0.7173966962646957, "grad_norm": 7.603401184082031, "learning_rate": 1.8794750963697011e-06, "loss": 0.3046, "step": 14462 }, { "epoch": 0.7174463018998958, "grad_norm": 5.757923603057861, "learning_rate": 1.8788601494254643e-06, "loss": 0.3657, "step": 14463 }, { "epoch": 0.717495907535096, "grad_norm": 4.999113082885742, "learning_rate": 1.8782452798244831e-06, "loss": 0.2787, "step": 14464 }, { "epoch": 0.7175455131702961, "grad_norm": 6.721248626708984, "learning_rate": 1.877630487581996e-06, "loss": 0.2862, "step": 14465 }, { "epoch": 0.7175951188054963, "grad_norm": 4.0732502937316895, "learning_rate": 1.8770157727132382e-06, "loss": 0.2526, "step": 14466 }, { "epoch": 0.7176447244406965, "grad_norm": 6.6133270263671875, "learning_rate": 1.876401135233444e-06, "loss": 0.311, "step": 14467 }, { "epoch": 0.7176943300758967, "grad_norm": 13.463861465454102, "learning_rate": 1.8757865751578387e-06, "loss": 0.2965, "step": 14468 }, { "epoch": 0.7177439357110967, "grad_norm": 19.130184173583984, "learning_rate": 1.8751720925016591e-06, "loss": 0.3012, "step": 14469 }, { "epoch": 0.7177935413462969, "grad_norm": 8.577343940734863, "learning_rate": 1.8745576872801262e-06, "loss": 0.3559, "step": 14470 }, { "epoch": 0.7178431469814971, "grad_norm": 10.839605331420898, "learning_rate": 1.8739433595084682e-06, "loss": 0.4152, "step": 14471 }, { "epoch": 0.7178927526166973, "grad_norm": 5.755329132080078, "learning_rate": 1.8733291092019078e-06, "loss": 0.1589, "step": 14472 }, { "epoch": 0.7179423582518975, "grad_norm": 4.97661018371582, "learning_rate": 1.872714936375668e-06, "loss": 0.3105, "step": 14473 }, { "epoch": 0.7179919638870976, "grad_norm": 9.559392929077148, "learning_rate": 1.8721008410449637e-06, "loss": 0.3436, "step": 14474 }, { "epoch": 0.7180415695222977, "grad_norm": 9.959102630615234, "learning_rate": 1.8714868232250183e-06, "loss": 0.3005, "step": 14475 }, { "epoch": 0.7180911751574979, "grad_norm": 10.085369110107422, "learning_rate": 1.8708728829310413e-06, "loss": 0.595, "step": 14476 }, { "epoch": 0.718140780792698, "grad_norm": 6.9671783447265625, "learning_rate": 1.8702590201782533e-06, "loss": 0.2383, "step": 14477 }, { "epoch": 0.7181903864278982, "grad_norm": 4.773549556732178, "learning_rate": 1.8696452349818605e-06, "loss": 0.2804, "step": 14478 }, { "epoch": 0.7182399920630984, "grad_norm": 7.214084148406982, "learning_rate": 1.8690315273570753e-06, "loss": 0.2622, "step": 14479 }, { "epoch": 0.7182895976982985, "grad_norm": 9.321855545043945, "learning_rate": 1.8684178973191046e-06, "loss": 0.3505, "step": 14480 }, { "epoch": 0.7183392033334987, "grad_norm": 7.7440266609191895, "learning_rate": 1.8678043448831572e-06, "loss": 0.3856, "step": 14481 }, { "epoch": 0.7183888089686988, "grad_norm": 10.866887092590332, "learning_rate": 1.8671908700644315e-06, "loss": 0.3727, "step": 14482 }, { "epoch": 0.718438414603899, "grad_norm": 8.69845199584961, "learning_rate": 1.866577472878136e-06, "loss": 0.2815, "step": 14483 }, { "epoch": 0.7184880202390992, "grad_norm": 4.619094371795654, "learning_rate": 1.8659641533394668e-06, "loss": 0.3001, "step": 14484 }, { "epoch": 0.7185376258742994, "grad_norm": 6.264893531799316, "learning_rate": 1.8653509114636238e-06, "loss": 0.2749, "step": 14485 }, { "epoch": 0.7185872315094994, "grad_norm": 5.975500106811523, "learning_rate": 1.864737747265803e-06, "loss": 0.2491, "step": 14486 }, { "epoch": 0.7186368371446996, "grad_norm": 4.038213729858398, "learning_rate": 1.864124660761199e-06, "loss": 0.2335, "step": 14487 }, { "epoch": 0.7186864427798998, "grad_norm": 9.441092491149902, "learning_rate": 1.8635116519650044e-06, "loss": 0.3833, "step": 14488 }, { "epoch": 0.7187360484151, "grad_norm": 8.176429748535156, "learning_rate": 1.8628987208924115e-06, "loss": 0.2988, "step": 14489 }, { "epoch": 0.7187856540503001, "grad_norm": 5.021081447601318, "learning_rate": 1.8622858675586058e-06, "loss": 0.1757, "step": 14490 }, { "epoch": 0.7188352596855003, "grad_norm": 9.770727157592773, "learning_rate": 1.8616730919787752e-06, "loss": 0.3036, "step": 14491 }, { "epoch": 0.7188848653207004, "grad_norm": 3.9269638061523438, "learning_rate": 1.8610603941681055e-06, "loss": 0.2229, "step": 14492 }, { "epoch": 0.7189344709559006, "grad_norm": 7.104534149169922, "learning_rate": 1.8604477741417792e-06, "loss": 0.2052, "step": 14493 }, { "epoch": 0.7189840765911008, "grad_norm": 4.781095027923584, "learning_rate": 1.859835231914977e-06, "loss": 0.1677, "step": 14494 }, { "epoch": 0.7190336822263009, "grad_norm": 5.126326560974121, "learning_rate": 1.859222767502878e-06, "loss": 0.2792, "step": 14495 }, { "epoch": 0.7190832878615011, "grad_norm": 7.922562599182129, "learning_rate": 1.8586103809206596e-06, "loss": 0.4206, "step": 14496 }, { "epoch": 0.7191328934967012, "grad_norm": 5.7469940185546875, "learning_rate": 1.857998072183499e-06, "loss": 0.2724, "step": 14497 }, { "epoch": 0.7191824991319014, "grad_norm": 11.367037773132324, "learning_rate": 1.8573858413065653e-06, "loss": 0.4746, "step": 14498 }, { "epoch": 0.7192321047671015, "grad_norm": 11.848991394042969, "learning_rate": 1.8567736883050325e-06, "loss": 0.4795, "step": 14499 }, { "epoch": 0.7192817104023017, "grad_norm": 8.548863410949707, "learning_rate": 1.8561616131940695e-06, "loss": 0.3018, "step": 14500 }, { "epoch": 0.7193313160375019, "grad_norm": 33.345245361328125, "learning_rate": 1.8555496159888436e-06, "loss": 0.5049, "step": 14501 }, { "epoch": 0.7193809216727021, "grad_norm": 6.0104570388793945, "learning_rate": 1.8549376967045207e-06, "loss": 0.2455, "step": 14502 }, { "epoch": 0.7194305273079021, "grad_norm": 13.545999526977539, "learning_rate": 1.8543258553562642e-06, "loss": 0.4316, "step": 14503 }, { "epoch": 0.7194801329431023, "grad_norm": 6.603177070617676, "learning_rate": 1.8537140919592378e-06, "loss": 0.2451, "step": 14504 }, { "epoch": 0.7195297385783025, "grad_norm": 6.7371063232421875, "learning_rate": 1.8531024065285963e-06, "loss": 0.2707, "step": 14505 }, { "epoch": 0.7195793442135027, "grad_norm": 6.667366981506348, "learning_rate": 1.8524907990795038e-06, "loss": 0.2049, "step": 14506 }, { "epoch": 0.7196289498487028, "grad_norm": 5.549371242523193, "learning_rate": 1.8518792696271109e-06, "loss": 0.3064, "step": 14507 }, { "epoch": 0.719678555483903, "grad_norm": 5.402060031890869, "learning_rate": 1.8512678181865746e-06, "loss": 0.2632, "step": 14508 }, { "epoch": 0.7197281611191031, "grad_norm": 5.981288909912109, "learning_rate": 1.8506564447730458e-06, "loss": 0.2884, "step": 14509 }, { "epoch": 0.7197777667543033, "grad_norm": 5.799255847930908, "learning_rate": 1.8500451494016747e-06, "loss": 0.2937, "step": 14510 }, { "epoch": 0.7198273723895035, "grad_norm": 12.174453735351562, "learning_rate": 1.8494339320876098e-06, "loss": 0.4029, "step": 14511 }, { "epoch": 0.7198769780247036, "grad_norm": 3.8227641582489014, "learning_rate": 1.8488227928459984e-06, "loss": 0.2781, "step": 14512 }, { "epoch": 0.7199265836599038, "grad_norm": 4.854291915893555, "learning_rate": 1.8482117316919813e-06, "loss": 0.2651, "step": 14513 }, { "epoch": 0.7199761892951039, "grad_norm": 7.972228527069092, "learning_rate": 1.8476007486407055e-06, "loss": 0.2556, "step": 14514 }, { "epoch": 0.720025794930304, "grad_norm": 7.771712303161621, "learning_rate": 1.846989843707308e-06, "loss": 0.278, "step": 14515 }, { "epoch": 0.7200754005655042, "grad_norm": 6.920408248901367, "learning_rate": 1.8463790169069285e-06, "loss": 0.318, "step": 14516 }, { "epoch": 0.7201250062007044, "grad_norm": 10.066374778747559, "learning_rate": 1.8457682682547035e-06, "loss": 0.382, "step": 14517 }, { "epoch": 0.7201746118359046, "grad_norm": 5.685598373413086, "learning_rate": 1.8451575977657694e-06, "loss": 0.3269, "step": 14518 }, { "epoch": 0.7202242174711048, "grad_norm": 19.8157958984375, "learning_rate": 1.8445470054552533e-06, "loss": 0.4025, "step": 14519 }, { "epoch": 0.7202738231063048, "grad_norm": 6.846034049987793, "learning_rate": 1.8439364913382935e-06, "loss": 0.316, "step": 14520 }, { "epoch": 0.720323428741505, "grad_norm": 9.674943923950195, "learning_rate": 1.8433260554300132e-06, "loss": 0.2553, "step": 14521 }, { "epoch": 0.7203730343767052, "grad_norm": 7.720970153808594, "learning_rate": 1.8427156977455413e-06, "loss": 0.2884, "step": 14522 }, { "epoch": 0.7204226400119054, "grad_norm": 5.4459943771362305, "learning_rate": 1.8421054183000026e-06, "loss": 0.2359, "step": 14523 }, { "epoch": 0.7204722456471055, "grad_norm": 5.284818649291992, "learning_rate": 1.8414952171085216e-06, "loss": 0.2445, "step": 14524 }, { "epoch": 0.7205218512823057, "grad_norm": 11.843229293823242, "learning_rate": 1.8408850941862144e-06, "loss": 0.4095, "step": 14525 }, { "epoch": 0.7205714569175058, "grad_norm": 7.559997081756592, "learning_rate": 1.8402750495482076e-06, "loss": 0.3108, "step": 14526 }, { "epoch": 0.720621062552706, "grad_norm": 5.312297821044922, "learning_rate": 1.8396650832096119e-06, "loss": 0.3502, "step": 14527 }, { "epoch": 0.7206706681879061, "grad_norm": 6.827541828155518, "learning_rate": 1.8390551951855456e-06, "loss": 0.2852, "step": 14528 }, { "epoch": 0.7207202738231063, "grad_norm": 14.277974128723145, "learning_rate": 1.838445385491121e-06, "loss": 0.4117, "step": 14529 }, { "epoch": 0.7207698794583065, "grad_norm": 5.282110214233398, "learning_rate": 1.83783565414145e-06, "loss": 0.2569, "step": 14530 }, { "epoch": 0.7208194850935066, "grad_norm": 11.100634574890137, "learning_rate": 1.837226001151642e-06, "loss": 0.3638, "step": 14531 }, { "epoch": 0.7208690907287068, "grad_norm": 9.508890151977539, "learning_rate": 1.8366164265368063e-06, "loss": 0.3711, "step": 14532 }, { "epoch": 0.7209186963639069, "grad_norm": 6.548701763153076, "learning_rate": 1.8360069303120425e-06, "loss": 0.256, "step": 14533 }, { "epoch": 0.7209683019991071, "grad_norm": 5.3069539070129395, "learning_rate": 1.8353975124924623e-06, "loss": 0.2425, "step": 14534 }, { "epoch": 0.7210179076343073, "grad_norm": 6.216127872467041, "learning_rate": 1.834788173093161e-06, "loss": 0.4035, "step": 14535 }, { "epoch": 0.7210675132695075, "grad_norm": 4.964069366455078, "learning_rate": 1.8341789121292408e-06, "loss": 0.2315, "step": 14536 }, { "epoch": 0.7211171189047075, "grad_norm": 9.909786224365234, "learning_rate": 1.833569729615799e-06, "loss": 0.3755, "step": 14537 }, { "epoch": 0.7211667245399077, "grad_norm": 5.199337005615234, "learning_rate": 1.8329606255679333e-06, "loss": 0.2137, "step": 14538 }, { "epoch": 0.7212163301751079, "grad_norm": 14.391104698181152, "learning_rate": 1.832351600000733e-06, "loss": 0.4607, "step": 14539 }, { "epoch": 0.7212659358103081, "grad_norm": 9.72086238861084, "learning_rate": 1.8317426529292958e-06, "loss": 0.304, "step": 14540 }, { "epoch": 0.7213155414455082, "grad_norm": 4.6667866706848145, "learning_rate": 1.8311337843687078e-06, "loss": 0.2783, "step": 14541 }, { "epoch": 0.7213651470807083, "grad_norm": 4.649808406829834, "learning_rate": 1.8305249943340574e-06, "loss": 0.2422, "step": 14542 }, { "epoch": 0.7214147527159085, "grad_norm": 10.188139915466309, "learning_rate": 1.8299162828404316e-06, "loss": 0.2715, "step": 14543 }, { "epoch": 0.7214643583511087, "grad_norm": 14.263688087463379, "learning_rate": 1.829307649902914e-06, "loss": 0.3249, "step": 14544 }, { "epoch": 0.7215139639863088, "grad_norm": 14.195290565490723, "learning_rate": 1.8286990955365875e-06, "loss": 0.29, "step": 14545 }, { "epoch": 0.721563569621509, "grad_norm": 4.804208755493164, "learning_rate": 1.8280906197565335e-06, "loss": 0.2415, "step": 14546 }, { "epoch": 0.7216131752567092, "grad_norm": 8.44432544708252, "learning_rate": 1.8274822225778255e-06, "loss": 0.2573, "step": 14547 }, { "epoch": 0.7216627808919093, "grad_norm": 8.402514457702637, "learning_rate": 1.8268739040155465e-06, "loss": 0.2839, "step": 14548 }, { "epoch": 0.7217123865271095, "grad_norm": 8.45332145690918, "learning_rate": 1.8262656640847653e-06, "loss": 0.2794, "step": 14549 }, { "epoch": 0.7217619921623096, "grad_norm": 5.4443359375, "learning_rate": 1.8256575028005568e-06, "loss": 0.2494, "step": 14550 }, { "epoch": 0.7218115977975098, "grad_norm": 12.048246383666992, "learning_rate": 1.8250494201779917e-06, "loss": 0.4029, "step": 14551 }, { "epoch": 0.72186120343271, "grad_norm": 5.328365802764893, "learning_rate": 1.8244414162321378e-06, "loss": 0.2449, "step": 14552 }, { "epoch": 0.7219108090679102, "grad_norm": 5.542122840881348, "learning_rate": 1.823833490978062e-06, "loss": 0.2936, "step": 14553 }, { "epoch": 0.7219604147031102, "grad_norm": 9.084733963012695, "learning_rate": 1.8232256444308294e-06, "loss": 0.3194, "step": 14554 }, { "epoch": 0.7220100203383104, "grad_norm": 6.378693580627441, "learning_rate": 1.8226178766055036e-06, "loss": 0.2515, "step": 14555 }, { "epoch": 0.7220596259735106, "grad_norm": 4.197410583496094, "learning_rate": 1.8220101875171431e-06, "loss": 0.3485, "step": 14556 }, { "epoch": 0.7221092316087108, "grad_norm": 8.060799598693848, "learning_rate": 1.821402577180807e-06, "loss": 0.3052, "step": 14557 }, { "epoch": 0.722158837243911, "grad_norm": 9.140559196472168, "learning_rate": 1.820795045611553e-06, "loss": 0.2704, "step": 14558 }, { "epoch": 0.722208442879111, "grad_norm": 13.491399765014648, "learning_rate": 1.820187592824436e-06, "loss": 0.3402, "step": 14559 }, { "epoch": 0.7222580485143112, "grad_norm": 5.090244293212891, "learning_rate": 1.819580218834509e-06, "loss": 0.2538, "step": 14560 }, { "epoch": 0.7223076541495114, "grad_norm": 8.211636543273926, "learning_rate": 1.8189729236568243e-06, "loss": 0.2778, "step": 14561 }, { "epoch": 0.7223572597847115, "grad_norm": 31.273557662963867, "learning_rate": 1.8183657073064265e-06, "loss": 0.4823, "step": 14562 }, { "epoch": 0.7224068654199117, "grad_norm": 10.03476619720459, "learning_rate": 1.8177585697983685e-06, "loss": 0.2753, "step": 14563 }, { "epoch": 0.7224564710551119, "grad_norm": 6.38016414642334, "learning_rate": 1.8171515111476894e-06, "loss": 0.2413, "step": 14564 }, { "epoch": 0.722506076690312, "grad_norm": 12.32502269744873, "learning_rate": 1.816544531369439e-06, "loss": 0.2995, "step": 14565 }, { "epoch": 0.7225556823255121, "grad_norm": 7.966108798980713, "learning_rate": 1.8159376304786536e-06, "loss": 0.2999, "step": 14566 }, { "epoch": 0.7226052879607123, "grad_norm": 7.304749488830566, "learning_rate": 1.8153308084903742e-06, "loss": 0.3072, "step": 14567 }, { "epoch": 0.7226548935959125, "grad_norm": 5.813510417938232, "learning_rate": 1.814724065419638e-06, "loss": 0.31, "step": 14568 }, { "epoch": 0.7227044992311127, "grad_norm": 4.5066142082214355, "learning_rate": 1.8141174012814816e-06, "loss": 0.2248, "step": 14569 }, { "epoch": 0.7227541048663129, "grad_norm": 8.057842254638672, "learning_rate": 1.8135108160909338e-06, "loss": 0.31, "step": 14570 }, { "epoch": 0.7228037105015129, "grad_norm": 19.945438385009766, "learning_rate": 1.8129043098630329e-06, "loss": 0.4358, "step": 14571 }, { "epoch": 0.7228533161367131, "grad_norm": 4.922873497009277, "learning_rate": 1.8122978826128029e-06, "loss": 0.287, "step": 14572 }, { "epoch": 0.7229029217719133, "grad_norm": 24.54037857055664, "learning_rate": 1.8116915343552737e-06, "loss": 0.285, "step": 14573 }, { "epoch": 0.7229525274071135, "grad_norm": 9.103763580322266, "learning_rate": 1.81108526510547e-06, "loss": 0.2907, "step": 14574 }, { "epoch": 0.7230021330423136, "grad_norm": 5.099106311798096, "learning_rate": 1.8104790748784185e-06, "loss": 0.2622, "step": 14575 }, { "epoch": 0.7230517386775137, "grad_norm": 15.639488220214844, "learning_rate": 1.8098729636891342e-06, "loss": 0.4135, "step": 14576 }, { "epoch": 0.7231013443127139, "grad_norm": 7.025987148284912, "learning_rate": 1.8092669315526445e-06, "loss": 0.3492, "step": 14577 }, { "epoch": 0.7231509499479141, "grad_norm": 7.357624053955078, "learning_rate": 1.808660978483962e-06, "loss": 0.3595, "step": 14578 }, { "epoch": 0.7232005555831142, "grad_norm": 5.267583847045898, "learning_rate": 1.8080551044981037e-06, "loss": 0.3236, "step": 14579 }, { "epoch": 0.7232501612183144, "grad_norm": 5.079762935638428, "learning_rate": 1.8074493096100843e-06, "loss": 0.2468, "step": 14580 }, { "epoch": 0.7232997668535146, "grad_norm": 13.479391098022461, "learning_rate": 1.8068435938349143e-06, "loss": 0.484, "step": 14581 }, { "epoch": 0.7233493724887147, "grad_norm": 8.795084953308105, "learning_rate": 1.8062379571876054e-06, "loss": 0.3654, "step": 14582 }, { "epoch": 0.7233989781239148, "grad_norm": 7.285422325134277, "learning_rate": 1.805632399683166e-06, "loss": 0.3442, "step": 14583 }, { "epoch": 0.723448583759115, "grad_norm": 8.037700653076172, "learning_rate": 1.8050269213365974e-06, "loss": 0.362, "step": 14584 }, { "epoch": 0.7234981893943152, "grad_norm": 7.4423441886901855, "learning_rate": 1.8044215221629096e-06, "loss": 0.2915, "step": 14585 }, { "epoch": 0.7235477950295154, "grad_norm": 6.750823974609375, "learning_rate": 1.8038162021771011e-06, "loss": 0.3406, "step": 14586 }, { "epoch": 0.7235974006647156, "grad_norm": 7.458082675933838, "learning_rate": 1.803210961394173e-06, "loss": 0.2519, "step": 14587 }, { "epoch": 0.7236470062999156, "grad_norm": 14.03824234008789, "learning_rate": 1.802605799829123e-06, "loss": 0.3358, "step": 14588 }, { "epoch": 0.7236966119351158, "grad_norm": 6.100589275360107, "learning_rate": 1.802000717496949e-06, "loss": 0.2684, "step": 14589 }, { "epoch": 0.723746217570316, "grad_norm": 7.1044020652771, "learning_rate": 1.801395714412641e-06, "loss": 0.3395, "step": 14590 }, { "epoch": 0.7237958232055162, "grad_norm": 5.5182342529296875, "learning_rate": 1.800790790591197e-06, "loss": 0.2142, "step": 14591 }, { "epoch": 0.7238454288407163, "grad_norm": 5.020960330963135, "learning_rate": 1.8001859460476028e-06, "loss": 0.2863, "step": 14592 }, { "epoch": 0.7238950344759164, "grad_norm": 5.9685540199279785, "learning_rate": 1.7995811807968488e-06, "loss": 0.2297, "step": 14593 }, { "epoch": 0.7239446401111166, "grad_norm": 4.462706089019775, "learning_rate": 1.79897649485392e-06, "loss": 0.2627, "step": 14594 }, { "epoch": 0.7239942457463168, "grad_norm": 5.928208827972412, "learning_rate": 1.798371888233804e-06, "loss": 0.2494, "step": 14595 }, { "epoch": 0.724043851381517, "grad_norm": 6.080728530883789, "learning_rate": 1.7977673609514773e-06, "loss": 0.2438, "step": 14596 }, { "epoch": 0.7240934570167171, "grad_norm": 6.210943222045898, "learning_rate": 1.7971629130219275e-06, "loss": 0.1742, "step": 14597 }, { "epoch": 0.7241430626519173, "grad_norm": 7.2583770751953125, "learning_rate": 1.7965585444601274e-06, "loss": 0.2941, "step": 14598 }, { "epoch": 0.7241926682871174, "grad_norm": 4.68846321105957, "learning_rate": 1.7959542552810561e-06, "loss": 0.3355, "step": 14599 }, { "epoch": 0.7242422739223175, "grad_norm": 4.52669095993042, "learning_rate": 1.7953500454996875e-06, "loss": 0.2661, "step": 14600 }, { "epoch": 0.7242918795575177, "grad_norm": 7.097938060760498, "learning_rate": 1.7947459151309942e-06, "loss": 0.3562, "step": 14601 }, { "epoch": 0.7243414851927179, "grad_norm": 7.259729385375977, "learning_rate": 1.7941418641899473e-06, "loss": 0.2784, "step": 14602 }, { "epoch": 0.7243910908279181, "grad_norm": 4.741008281707764, "learning_rate": 1.7935378926915148e-06, "loss": 0.2805, "step": 14603 }, { "epoch": 0.7244406964631183, "grad_norm": 5.3687052726745605, "learning_rate": 1.792934000650664e-06, "loss": 0.2792, "step": 14604 }, { "epoch": 0.7244903020983183, "grad_norm": 6.741578578948975, "learning_rate": 1.7923301880823613e-06, "loss": 0.365, "step": 14605 }, { "epoch": 0.7245399077335185, "grad_norm": 7.054640293121338, "learning_rate": 1.7917264550015656e-06, "loss": 0.3678, "step": 14606 }, { "epoch": 0.7245895133687187, "grad_norm": 6.712400436401367, "learning_rate": 1.7911228014232396e-06, "loss": 0.2932, "step": 14607 }, { "epoch": 0.7246391190039189, "grad_norm": 6.88155460357666, "learning_rate": 1.7905192273623417e-06, "loss": 0.295, "step": 14608 }, { "epoch": 0.724688724639119, "grad_norm": 4.618218898773193, "learning_rate": 1.7899157328338295e-06, "loss": 0.2685, "step": 14609 }, { "epoch": 0.7247383302743191, "grad_norm": 7.241408824920654, "learning_rate": 1.7893123178526577e-06, "loss": 0.2135, "step": 14610 }, { "epoch": 0.7247879359095193, "grad_norm": 8.368706703186035, "learning_rate": 1.7887089824337788e-06, "loss": 0.357, "step": 14611 }, { "epoch": 0.7248375415447195, "grad_norm": 9.983975410461426, "learning_rate": 1.7881057265921458e-06, "loss": 0.2843, "step": 14612 }, { "epoch": 0.7248871471799196, "grad_norm": 8.931862831115723, "learning_rate": 1.7875025503427023e-06, "loss": 0.3012, "step": 14613 }, { "epoch": 0.7249367528151198, "grad_norm": 4.554818153381348, "learning_rate": 1.7868994537004025e-06, "loss": 0.2627, "step": 14614 }, { "epoch": 0.72498635845032, "grad_norm": 6.500936985015869, "learning_rate": 1.7862964366801854e-06, "loss": 0.2738, "step": 14615 }, { "epoch": 0.7250359640855201, "grad_norm": 6.7884602546691895, "learning_rate": 1.785693499296997e-06, "loss": 0.3245, "step": 14616 }, { "epoch": 0.7250855697207202, "grad_norm": 8.663045883178711, "learning_rate": 1.785090641565777e-06, "loss": 0.341, "step": 14617 }, { "epoch": 0.7251351753559204, "grad_norm": 10.915037155151367, "learning_rate": 1.7844878635014657e-06, "loss": 0.3194, "step": 14618 }, { "epoch": 0.7251847809911206, "grad_norm": 4.730015754699707, "learning_rate": 1.7838851651189993e-06, "loss": 0.2198, "step": 14619 }, { "epoch": 0.7252343866263208, "grad_norm": 4.525257110595703, "learning_rate": 1.7832825464333153e-06, "loss": 0.264, "step": 14620 }, { "epoch": 0.725283992261521, "grad_norm": 2.9244794845581055, "learning_rate": 1.782680007459341e-06, "loss": 0.1761, "step": 14621 }, { "epoch": 0.725333597896721, "grad_norm": 7.094211578369141, "learning_rate": 1.782077548212015e-06, "loss": 0.3402, "step": 14622 }, { "epoch": 0.7253832035319212, "grad_norm": 4.49505090713501, "learning_rate": 1.781475168706262e-06, "loss": 0.3327, "step": 14623 }, { "epoch": 0.7254328091671214, "grad_norm": 4.037456035614014, "learning_rate": 1.7808728689570092e-06, "loss": 0.2319, "step": 14624 }, { "epoch": 0.7254824148023216, "grad_norm": 13.607242584228516, "learning_rate": 1.7802706489791838e-06, "loss": 0.3051, "step": 14625 }, { "epoch": 0.7255320204375217, "grad_norm": 8.096634864807129, "learning_rate": 1.779668508787709e-06, "loss": 0.2394, "step": 14626 }, { "epoch": 0.7255816260727218, "grad_norm": 7.304274559020996, "learning_rate": 1.7790664483975023e-06, "loss": 0.3098, "step": 14627 }, { "epoch": 0.725631231707922, "grad_norm": 8.272173881530762, "learning_rate": 1.7784644678234896e-06, "loss": 0.3762, "step": 14628 }, { "epoch": 0.7256808373431222, "grad_norm": 9.968087196350098, "learning_rate": 1.7778625670805827e-06, "loss": 0.3492, "step": 14629 }, { "epoch": 0.7257304429783223, "grad_norm": 7.676144123077393, "learning_rate": 1.7772607461836988e-06, "loss": 0.2721, "step": 14630 }, { "epoch": 0.7257800486135225, "grad_norm": 7.902595043182373, "learning_rate": 1.7766590051477517e-06, "loss": 0.244, "step": 14631 }, { "epoch": 0.7258296542487227, "grad_norm": 11.205348014831543, "learning_rate": 1.7760573439876537e-06, "loss": 0.3197, "step": 14632 }, { "epoch": 0.7258792598839228, "grad_norm": 4.739011764526367, "learning_rate": 1.7754557627183106e-06, "loss": 0.1496, "step": 14633 }, { "epoch": 0.725928865519123, "grad_norm": 7.115948677062988, "learning_rate": 1.7748542613546348e-06, "loss": 0.2972, "step": 14634 }, { "epoch": 0.7259784711543231, "grad_norm": 4.865890026092529, "learning_rate": 1.7742528399115267e-06, "loss": 0.2697, "step": 14635 }, { "epoch": 0.7260280767895233, "grad_norm": 9.514301300048828, "learning_rate": 1.773651498403895e-06, "loss": 0.324, "step": 14636 }, { "epoch": 0.7260776824247235, "grad_norm": 6.196375370025635, "learning_rate": 1.7730502368466373e-06, "loss": 0.2783, "step": 14637 }, { "epoch": 0.7261272880599237, "grad_norm": 11.321942329406738, "learning_rate": 1.7724490552546547e-06, "loss": 0.3764, "step": 14638 }, { "epoch": 0.7261768936951237, "grad_norm": 6.348402976989746, "learning_rate": 1.771847953642844e-06, "loss": 0.2782, "step": 14639 }, { "epoch": 0.7262264993303239, "grad_norm": 11.402287483215332, "learning_rate": 1.7712469320261033e-06, "loss": 0.3026, "step": 14640 }, { "epoch": 0.7262761049655241, "grad_norm": 7.410990238189697, "learning_rate": 1.7706459904193207e-06, "loss": 0.3537, "step": 14641 }, { "epoch": 0.7263257106007243, "grad_norm": 11.476454734802246, "learning_rate": 1.7700451288373944e-06, "loss": 0.4189, "step": 14642 }, { "epoch": 0.7263753162359244, "grad_norm": 8.984256744384766, "learning_rate": 1.7694443472952094e-06, "loss": 0.3676, "step": 14643 }, { "epoch": 0.7264249218711245, "grad_norm": 6.7861104011535645, "learning_rate": 1.7688436458076547e-06, "loss": 0.1752, "step": 14644 }, { "epoch": 0.7264745275063247, "grad_norm": 16.098894119262695, "learning_rate": 1.768243024389616e-06, "loss": 0.4781, "step": 14645 }, { "epoch": 0.7265241331415249, "grad_norm": 4.815096378326416, "learning_rate": 1.767642483055979e-06, "loss": 0.3385, "step": 14646 }, { "epoch": 0.726573738776725, "grad_norm": 3.9996237754821777, "learning_rate": 1.7670420218216199e-06, "loss": 0.2314, "step": 14647 }, { "epoch": 0.7266233444119252, "grad_norm": 9.120519638061523, "learning_rate": 1.766441640701425e-06, "loss": 0.2844, "step": 14648 }, { "epoch": 0.7266729500471254, "grad_norm": 4.7076239585876465, "learning_rate": 1.7658413397102674e-06, "loss": 0.1723, "step": 14649 }, { "epoch": 0.7267225556823255, "grad_norm": 8.100176811218262, "learning_rate": 1.7652411188630246e-06, "loss": 0.2649, "step": 14650 }, { "epoch": 0.7267721613175256, "grad_norm": 16.145986557006836, "learning_rate": 1.7646409781745705e-06, "loss": 0.5312, "step": 14651 }, { "epoch": 0.7268217669527258, "grad_norm": 7.6196088790893555, "learning_rate": 1.7640409176597756e-06, "loss": 0.364, "step": 14652 }, { "epoch": 0.726871372587926, "grad_norm": 11.806659698486328, "learning_rate": 1.763440937333511e-06, "loss": 0.3695, "step": 14653 }, { "epoch": 0.7269209782231262, "grad_norm": 8.458757400512695, "learning_rate": 1.7628410372106458e-06, "loss": 0.4031, "step": 14654 }, { "epoch": 0.7269705838583264, "grad_norm": 3.946479082107544, "learning_rate": 1.7622412173060406e-06, "loss": 0.2451, "step": 14655 }, { "epoch": 0.7270201894935264, "grad_norm": 5.421837329864502, "learning_rate": 1.7616414776345657e-06, "loss": 0.3326, "step": 14656 }, { "epoch": 0.7270697951287266, "grad_norm": 4.213169097900391, "learning_rate": 1.7610418182110783e-06, "loss": 0.2533, "step": 14657 }, { "epoch": 0.7271194007639268, "grad_norm": 10.728120803833008, "learning_rate": 1.7604422390504399e-06, "loss": 0.3344, "step": 14658 }, { "epoch": 0.727169006399127, "grad_norm": 5.644918918609619, "learning_rate": 1.7598427401675083e-06, "loss": 0.2208, "step": 14659 }, { "epoch": 0.7272186120343271, "grad_norm": 8.873764991760254, "learning_rate": 1.7592433215771392e-06, "loss": 0.2567, "step": 14660 }, { "epoch": 0.7272682176695272, "grad_norm": 8.187687873840332, "learning_rate": 1.7586439832941866e-06, "loss": 0.3278, "step": 14661 }, { "epoch": 0.7273178233047274, "grad_norm": 7.661758899688721, "learning_rate": 1.758044725333502e-06, "loss": 0.3121, "step": 14662 }, { "epoch": 0.7273674289399276, "grad_norm": 6.702357769012451, "learning_rate": 1.7574455477099378e-06, "loss": 0.229, "step": 14663 }, { "epoch": 0.7274170345751277, "grad_norm": 26.810087203979492, "learning_rate": 1.7568464504383376e-06, "loss": 0.7771, "step": 14664 }, { "epoch": 0.7274666402103279, "grad_norm": 7.998668193817139, "learning_rate": 1.7562474335335495e-06, "loss": 0.2598, "step": 14665 }, { "epoch": 0.7275162458455281, "grad_norm": 7.648447036743164, "learning_rate": 1.7556484970104177e-06, "loss": 0.2336, "step": 14666 }, { "epoch": 0.7275658514807282, "grad_norm": 6.054469108581543, "learning_rate": 1.755049640883783e-06, "loss": 0.2979, "step": 14667 }, { "epoch": 0.7276154571159283, "grad_norm": 5.388190269470215, "learning_rate": 1.7544508651684866e-06, "loss": 0.2396, "step": 14668 }, { "epoch": 0.7276650627511285, "grad_norm": 4.356058597564697, "learning_rate": 1.753852169879367e-06, "loss": 0.3133, "step": 14669 }, { "epoch": 0.7277146683863287, "grad_norm": 5.259429931640625, "learning_rate": 1.7532535550312563e-06, "loss": 0.2195, "step": 14670 }, { "epoch": 0.7277642740215289, "grad_norm": 9.147750854492188, "learning_rate": 1.752655020638994e-06, "loss": 0.3137, "step": 14671 }, { "epoch": 0.7278138796567291, "grad_norm": 5.915578842163086, "learning_rate": 1.7520565667174082e-06, "loss": 0.2519, "step": 14672 }, { "epoch": 0.7278634852919291, "grad_norm": 12.894857406616211, "learning_rate": 1.7514581932813297e-06, "loss": 0.4397, "step": 14673 }, { "epoch": 0.7279130909271293, "grad_norm": 6.240900039672852, "learning_rate": 1.7508599003455861e-06, "loss": 0.1996, "step": 14674 }, { "epoch": 0.7279626965623295, "grad_norm": 5.566923141479492, "learning_rate": 1.7502616879250045e-06, "loss": 0.3012, "step": 14675 }, { "epoch": 0.7280123021975297, "grad_norm": 5.324392318725586, "learning_rate": 1.7496635560344076e-06, "loss": 0.2495, "step": 14676 }, { "epoch": 0.7280619078327298, "grad_norm": 4.459885597229004, "learning_rate": 1.7490655046886208e-06, "loss": 0.2291, "step": 14677 }, { "epoch": 0.7281115134679299, "grad_norm": 16.820661544799805, "learning_rate": 1.748467533902457e-06, "loss": 0.3003, "step": 14678 }, { "epoch": 0.7281611191031301, "grad_norm": 10.183398246765137, "learning_rate": 1.7478696436907422e-06, "loss": 0.3172, "step": 14679 }, { "epoch": 0.7282107247383303, "grad_norm": 6.8654961585998535, "learning_rate": 1.747271834068287e-06, "loss": 0.2965, "step": 14680 }, { "epoch": 0.7282603303735304, "grad_norm": 8.510756492614746, "learning_rate": 1.7466741050499075e-06, "loss": 0.3809, "step": 14681 }, { "epoch": 0.7283099360087306, "grad_norm": 10.539732933044434, "learning_rate": 1.746076456650415e-06, "loss": 0.3025, "step": 14682 }, { "epoch": 0.7283595416439308, "grad_norm": 6.9419097900390625, "learning_rate": 1.7454788888846214e-06, "loss": 0.2267, "step": 14683 }, { "epoch": 0.7284091472791309, "grad_norm": 5.423163890838623, "learning_rate": 1.7448814017673299e-06, "loss": 0.293, "step": 14684 }, { "epoch": 0.728458752914331, "grad_norm": 11.478523254394531, "learning_rate": 1.7442839953133533e-06, "loss": 0.228, "step": 14685 }, { "epoch": 0.7285083585495312, "grad_norm": 6.931241512298584, "learning_rate": 1.7436866695374898e-06, "loss": 0.3279, "step": 14686 }, { "epoch": 0.7285579641847314, "grad_norm": 6.275190830230713, "learning_rate": 1.7430894244545444e-06, "loss": 0.3545, "step": 14687 }, { "epoch": 0.7286075698199316, "grad_norm": 5.663975238800049, "learning_rate": 1.7424922600793159e-06, "loss": 0.3193, "step": 14688 }, { "epoch": 0.7286571754551318, "grad_norm": 13.145325660705566, "learning_rate": 1.7418951764266023e-06, "loss": 0.3057, "step": 14689 }, { "epoch": 0.7287067810903318, "grad_norm": 11.212374687194824, "learning_rate": 1.7412981735112005e-06, "loss": 0.3269, "step": 14690 }, { "epoch": 0.728756386725532, "grad_norm": 8.039827346801758, "learning_rate": 1.740701251347906e-06, "loss": 0.3324, "step": 14691 }, { "epoch": 0.7288059923607322, "grad_norm": 7.038829326629639, "learning_rate": 1.7401044099515052e-06, "loss": 0.3098, "step": 14692 }, { "epoch": 0.7288555979959324, "grad_norm": 10.446744918823242, "learning_rate": 1.7395076493367946e-06, "loss": 0.2741, "step": 14693 }, { "epoch": 0.7289052036311325, "grad_norm": 5.741715908050537, "learning_rate": 1.7389109695185579e-06, "loss": 0.2693, "step": 14694 }, { "epoch": 0.7289548092663326, "grad_norm": 8.322027206420898, "learning_rate": 1.7383143705115829e-06, "loss": 0.3427, "step": 14695 }, { "epoch": 0.7290044149015328, "grad_norm": 4.928307056427002, "learning_rate": 1.7377178523306531e-06, "loss": 0.2387, "step": 14696 }, { "epoch": 0.729054020536733, "grad_norm": 7.1681809425354, "learning_rate": 1.7371214149905523e-06, "loss": 0.2385, "step": 14697 }, { "epoch": 0.7291036261719331, "grad_norm": 10.524178504943848, "learning_rate": 1.736525058506055e-06, "loss": 0.2937, "step": 14698 }, { "epoch": 0.7291532318071333, "grad_norm": 5.048924446105957, "learning_rate": 1.7359287828919469e-06, "loss": 0.2755, "step": 14699 }, { "epoch": 0.7292028374423335, "grad_norm": 9.389189720153809, "learning_rate": 1.735332588162998e-06, "loss": 0.3928, "step": 14700 }, { "epoch": 0.7292524430775336, "grad_norm": 8.197488784790039, "learning_rate": 1.7347364743339845e-06, "loss": 0.3614, "step": 14701 }, { "epoch": 0.7293020487127337, "grad_norm": 7.638871669769287, "learning_rate": 1.7341404414196783e-06, "loss": 0.3416, "step": 14702 }, { "epoch": 0.7293516543479339, "grad_norm": 6.144380569458008, "learning_rate": 1.7335444894348508e-06, "loss": 0.2179, "step": 14703 }, { "epoch": 0.7294012599831341, "grad_norm": 5.739768981933594, "learning_rate": 1.7329486183942652e-06, "loss": 0.2985, "step": 14704 }, { "epoch": 0.7294508656183343, "grad_norm": 8.882933616638184, "learning_rate": 1.732352828312694e-06, "loss": 0.2803, "step": 14705 }, { "epoch": 0.7295004712535345, "grad_norm": 10.835201263427734, "learning_rate": 1.7317571192048944e-06, "loss": 0.2367, "step": 14706 }, { "epoch": 0.7295500768887345, "grad_norm": 25.11847496032715, "learning_rate": 1.731161491085635e-06, "loss": 0.636, "step": 14707 }, { "epoch": 0.7295996825239347, "grad_norm": 9.160165786743164, "learning_rate": 1.7305659439696708e-06, "loss": 0.3332, "step": 14708 }, { "epoch": 0.7296492881591349, "grad_norm": 11.342479705810547, "learning_rate": 1.7299704778717618e-06, "loss": 0.4282, "step": 14709 }, { "epoch": 0.7296988937943351, "grad_norm": 5.951350212097168, "learning_rate": 1.7293750928066633e-06, "loss": 0.3066, "step": 14710 }, { "epoch": 0.7297484994295352, "grad_norm": 5.258026599884033, "learning_rate": 1.72877978878913e-06, "loss": 0.2109, "step": 14711 }, { "epoch": 0.7297981050647353, "grad_norm": 7.418140411376953, "learning_rate": 1.7281845658339125e-06, "loss": 0.3357, "step": 14712 }, { "epoch": 0.7298477106999355, "grad_norm": 5.807824611663818, "learning_rate": 1.7275894239557634e-06, "loss": 0.1831, "step": 14713 }, { "epoch": 0.7298973163351357, "grad_norm": 7.453897953033447, "learning_rate": 1.7269943631694268e-06, "loss": 0.3205, "step": 14714 }, { "epoch": 0.7299469219703358, "grad_norm": 5.3293633460998535, "learning_rate": 1.7263993834896508e-06, "loss": 0.254, "step": 14715 }, { "epoch": 0.729996527605536, "grad_norm": 6.778207778930664, "learning_rate": 1.725804484931179e-06, "loss": 0.2985, "step": 14716 }, { "epoch": 0.7300461332407362, "grad_norm": 6.822697639465332, "learning_rate": 1.7252096675087526e-06, "loss": 0.1692, "step": 14717 }, { "epoch": 0.7300957388759363, "grad_norm": 4.989416599273682, "learning_rate": 1.7246149312371125e-06, "loss": 0.2472, "step": 14718 }, { "epoch": 0.7301453445111364, "grad_norm": 6.966651439666748, "learning_rate": 1.7240202761309953e-06, "loss": 0.2621, "step": 14719 }, { "epoch": 0.7301949501463366, "grad_norm": 7.3153486251831055, "learning_rate": 1.7234257022051399e-06, "loss": 0.2446, "step": 14720 }, { "epoch": 0.7302445557815368, "grad_norm": 7.624233722686768, "learning_rate": 1.722831209474276e-06, "loss": 0.3031, "step": 14721 }, { "epoch": 0.730294161416737, "grad_norm": 7.1392903327941895, "learning_rate": 1.7222367979531373e-06, "loss": 0.2606, "step": 14722 }, { "epoch": 0.7303437670519372, "grad_norm": 4.406327247619629, "learning_rate": 1.721642467656453e-06, "loss": 0.2858, "step": 14723 }, { "epoch": 0.7303933726871372, "grad_norm": 6.347781658172607, "learning_rate": 1.7210482185989513e-06, "loss": 0.2667, "step": 14724 }, { "epoch": 0.7304429783223374, "grad_norm": 6.979142665863037, "learning_rate": 1.7204540507953583e-06, "loss": 0.3733, "step": 14725 }, { "epoch": 0.7304925839575376, "grad_norm": 11.44902229309082, "learning_rate": 1.7198599642603975e-06, "loss": 0.3629, "step": 14726 }, { "epoch": 0.7305421895927378, "grad_norm": 7.303962707519531, "learning_rate": 1.7192659590087902e-06, "loss": 0.2722, "step": 14727 }, { "epoch": 0.7305917952279379, "grad_norm": 5.31099796295166, "learning_rate": 1.7186720350552584e-06, "loss": 0.2778, "step": 14728 }, { "epoch": 0.730641400863138, "grad_norm": 7.513054847717285, "learning_rate": 1.718078192414514e-06, "loss": 0.2858, "step": 14729 }, { "epoch": 0.7306910064983382, "grad_norm": 15.373307228088379, "learning_rate": 1.7174844311012806e-06, "loss": 0.3452, "step": 14730 }, { "epoch": 0.7307406121335384, "grad_norm": 12.980588912963867, "learning_rate": 1.7168907511302657e-06, "loss": 0.3195, "step": 14731 }, { "epoch": 0.7307902177687385, "grad_norm": 8.023297309875488, "learning_rate": 1.7162971525161836e-06, "loss": 0.2629, "step": 14732 }, { "epoch": 0.7308398234039387, "grad_norm": 4.536343574523926, "learning_rate": 1.7157036352737434e-06, "loss": 0.3372, "step": 14733 }, { "epoch": 0.7308894290391389, "grad_norm": 4.9826178550720215, "learning_rate": 1.7151101994176539e-06, "loss": 0.3306, "step": 14734 }, { "epoch": 0.730939034674339, "grad_norm": 8.731635093688965, "learning_rate": 1.7145168449626165e-06, "loss": 0.2797, "step": 14735 }, { "epoch": 0.7309886403095391, "grad_norm": 4.527810096740723, "learning_rate": 1.713923571923341e-06, "loss": 0.2179, "step": 14736 }, { "epoch": 0.7310382459447393, "grad_norm": 6.671092987060547, "learning_rate": 1.7133303803145246e-06, "loss": 0.3585, "step": 14737 }, { "epoch": 0.7310878515799395, "grad_norm": 8.490083694458008, "learning_rate": 1.7127372701508676e-06, "loss": 0.301, "step": 14738 }, { "epoch": 0.7311374572151397, "grad_norm": 7.45160436630249, "learning_rate": 1.7121442414470684e-06, "loss": 0.2938, "step": 14739 }, { "epoch": 0.7311870628503399, "grad_norm": 7.185000419616699, "learning_rate": 1.7115512942178242e-06, "loss": 0.3379, "step": 14740 }, { "epoch": 0.7312366684855399, "grad_norm": 4.356472015380859, "learning_rate": 1.7109584284778225e-06, "loss": 0.2897, "step": 14741 }, { "epoch": 0.7312862741207401, "grad_norm": 10.121278762817383, "learning_rate": 1.7103656442417621e-06, "loss": 0.3308, "step": 14742 }, { "epoch": 0.7313358797559403, "grad_norm": 7.277932167053223, "learning_rate": 1.7097729415243258e-06, "loss": 0.2147, "step": 14743 }, { "epoch": 0.7313854853911405, "grad_norm": 4.500103950500488, "learning_rate": 1.7091803203402079e-06, "loss": 0.2862, "step": 14744 }, { "epoch": 0.7314350910263406, "grad_norm": 4.807966232299805, "learning_rate": 1.7085877807040885e-06, "loss": 0.224, "step": 14745 }, { "epoch": 0.7314846966615407, "grad_norm": 5.702267646789551, "learning_rate": 1.7079953226306528e-06, "loss": 0.2125, "step": 14746 }, { "epoch": 0.7315343022967409, "grad_norm": 5.34137487411499, "learning_rate": 1.707402946134582e-06, "loss": 0.2455, "step": 14747 }, { "epoch": 0.7315839079319411, "grad_norm": 10.400245666503906, "learning_rate": 1.7068106512305572e-06, "loss": 0.3027, "step": 14748 }, { "epoch": 0.7316335135671412, "grad_norm": 5.563696384429932, "learning_rate": 1.7062184379332513e-06, "loss": 0.2183, "step": 14749 }, { "epoch": 0.7316831192023414, "grad_norm": 5.4527587890625, "learning_rate": 1.7056263062573453e-06, "loss": 0.2711, "step": 14750 }, { "epoch": 0.7317327248375416, "grad_norm": 5.238048076629639, "learning_rate": 1.7050342562175083e-06, "loss": 0.2025, "step": 14751 }, { "epoch": 0.7317823304727417, "grad_norm": 60.00263595581055, "learning_rate": 1.7044422878284123e-06, "loss": 0.2407, "step": 14752 }, { "epoch": 0.7318319361079418, "grad_norm": 8.0656099319458, "learning_rate": 1.7038504011047274e-06, "loss": 0.2254, "step": 14753 }, { "epoch": 0.731881541743142, "grad_norm": 6.931682109832764, "learning_rate": 1.7032585960611225e-06, "loss": 0.3123, "step": 14754 }, { "epoch": 0.7319311473783422, "grad_norm": 5.137237071990967, "learning_rate": 1.7026668727122574e-06, "loss": 0.2764, "step": 14755 }, { "epoch": 0.7319807530135424, "grad_norm": 3.620119094848633, "learning_rate": 1.7020752310728023e-06, "loss": 0.202, "step": 14756 }, { "epoch": 0.7320303586487426, "grad_norm": 8.475930213928223, "learning_rate": 1.701483671157413e-06, "loss": 0.2909, "step": 14757 }, { "epoch": 0.7320799642839426, "grad_norm": 7.115651607513428, "learning_rate": 1.7008921929807504e-06, "loss": 0.2264, "step": 14758 }, { "epoch": 0.7321295699191428, "grad_norm": 3.6562342643737793, "learning_rate": 1.7003007965574713e-06, "loss": 0.2798, "step": 14759 }, { "epoch": 0.732179175554343, "grad_norm": 10.236499786376953, "learning_rate": 1.6997094819022313e-06, "loss": 0.2498, "step": 14760 }, { "epoch": 0.7322287811895432, "grad_norm": 15.978084564208984, "learning_rate": 1.6991182490296832e-06, "loss": 0.3518, "step": 14761 }, { "epoch": 0.7322783868247433, "grad_norm": 6.82235860824585, "learning_rate": 1.69852709795448e-06, "loss": 0.2489, "step": 14762 }, { "epoch": 0.7323279924599434, "grad_norm": 6.194924354553223, "learning_rate": 1.697936028691265e-06, "loss": 0.2603, "step": 14763 }, { "epoch": 0.7323775980951436, "grad_norm": 5.330188751220703, "learning_rate": 1.6973450412546927e-06, "loss": 0.349, "step": 14764 }, { "epoch": 0.7324272037303438, "grad_norm": 7.043915271759033, "learning_rate": 1.696754135659402e-06, "loss": 0.3112, "step": 14765 }, { "epoch": 0.7324768093655439, "grad_norm": 6.930108547210693, "learning_rate": 1.6961633119200382e-06, "loss": 0.3419, "step": 14766 }, { "epoch": 0.7325264150007441, "grad_norm": 4.814866542816162, "learning_rate": 1.695572570051242e-06, "loss": 0.2634, "step": 14767 }, { "epoch": 0.7325760206359443, "grad_norm": 5.950831890106201, "learning_rate": 1.6949819100676524e-06, "loss": 0.2346, "step": 14768 }, { "epoch": 0.7326256262711444, "grad_norm": 4.91056489944458, "learning_rate": 1.694391331983906e-06, "loss": 0.2805, "step": 14769 }, { "epoch": 0.7326752319063445, "grad_norm": 5.753555774688721, "learning_rate": 1.6938008358146396e-06, "loss": 0.2651, "step": 14770 }, { "epoch": 0.7327248375415447, "grad_norm": 6.005724906921387, "learning_rate": 1.693210421574482e-06, "loss": 0.2829, "step": 14771 }, { "epoch": 0.7327744431767449, "grad_norm": 8.663961410522461, "learning_rate": 1.6926200892780663e-06, "loss": 0.329, "step": 14772 }, { "epoch": 0.7328240488119451, "grad_norm": 7.711694240570068, "learning_rate": 1.6920298389400207e-06, "loss": 0.3231, "step": 14773 }, { "epoch": 0.7328736544471453, "grad_norm": 10.336652755737305, "learning_rate": 1.691439670574972e-06, "loss": 0.2736, "step": 14774 }, { "epoch": 0.7329232600823453, "grad_norm": 11.832488059997559, "learning_rate": 1.6908495841975453e-06, "loss": 0.3553, "step": 14775 }, { "epoch": 0.7329728657175455, "grad_norm": 5.915650367736816, "learning_rate": 1.6902595798223626e-06, "loss": 0.2655, "step": 14776 }, { "epoch": 0.7330224713527457, "grad_norm": 5.33955192565918, "learning_rate": 1.6896696574640458e-06, "loss": 0.232, "step": 14777 }, { "epoch": 0.7330720769879459, "grad_norm": 18.455263137817383, "learning_rate": 1.6890798171372096e-06, "loss": 0.3173, "step": 14778 }, { "epoch": 0.733121682623146, "grad_norm": 4.710423469543457, "learning_rate": 1.6884900588564763e-06, "loss": 0.2028, "step": 14779 }, { "epoch": 0.7331712882583461, "grad_norm": 5.300871849060059, "learning_rate": 1.6879003826364553e-06, "loss": 0.3034, "step": 14780 }, { "epoch": 0.7332208938935463, "grad_norm": 25.87107276916504, "learning_rate": 1.6873107884917616e-06, "loss": 0.3743, "step": 14781 }, { "epoch": 0.7332704995287465, "grad_norm": 7.267427444458008, "learning_rate": 1.686721276437005e-06, "loss": 0.2993, "step": 14782 }, { "epoch": 0.7333201051639466, "grad_norm": 6.131269931793213, "learning_rate": 1.6861318464867938e-06, "loss": 0.1766, "step": 14783 }, { "epoch": 0.7333697107991468, "grad_norm": 11.954054832458496, "learning_rate": 1.6855424986557346e-06, "loss": 0.3506, "step": 14784 }, { "epoch": 0.733419316434347, "grad_norm": 5.978689670562744, "learning_rate": 1.6849532329584327e-06, "loss": 0.2893, "step": 14785 }, { "epoch": 0.7334689220695471, "grad_norm": 4.381046772003174, "learning_rate": 1.6843640494094865e-06, "loss": 0.2061, "step": 14786 }, { "epoch": 0.7335185277047472, "grad_norm": 5.250722408294678, "learning_rate": 1.6837749480235023e-06, "loss": 0.2979, "step": 14787 }, { "epoch": 0.7335681333399474, "grad_norm": 8.81259822845459, "learning_rate": 1.6831859288150731e-06, "loss": 0.3658, "step": 14788 }, { "epoch": 0.7336177389751476, "grad_norm": 3.3370187282562256, "learning_rate": 1.6825969917987972e-06, "loss": 0.1997, "step": 14789 }, { "epoch": 0.7336673446103478, "grad_norm": 18.775131225585938, "learning_rate": 1.6820081369892682e-06, "loss": 0.325, "step": 14790 }, { "epoch": 0.733716950245548, "grad_norm": 12.03469181060791, "learning_rate": 1.6814193644010801e-06, "loss": 0.2549, "step": 14791 }, { "epoch": 0.733766555880748, "grad_norm": 6.104595184326172, "learning_rate": 1.6808306740488185e-06, "loss": 0.2316, "step": 14792 }, { "epoch": 0.7338161615159482, "grad_norm": 8.313075065612793, "learning_rate": 1.680242065947077e-06, "loss": 0.2026, "step": 14793 }, { "epoch": 0.7338657671511484, "grad_norm": 5.991031169891357, "learning_rate": 1.6796535401104368e-06, "loss": 0.2375, "step": 14794 }, { "epoch": 0.7339153727863486, "grad_norm": 11.46576976776123, "learning_rate": 1.6790650965534843e-06, "loss": 0.3046, "step": 14795 }, { "epoch": 0.7339649784215487, "grad_norm": 10.056684494018555, "learning_rate": 1.6784767352908011e-06, "loss": 0.3178, "step": 14796 }, { "epoch": 0.7340145840567488, "grad_norm": 7.596285343170166, "learning_rate": 1.6778884563369668e-06, "loss": 0.3392, "step": 14797 }, { "epoch": 0.734064189691949, "grad_norm": 8.046589851379395, "learning_rate": 1.677300259706559e-06, "loss": 0.3227, "step": 14798 }, { "epoch": 0.7341137953271492, "grad_norm": 6.369363307952881, "learning_rate": 1.6767121454141556e-06, "loss": 0.249, "step": 14799 }, { "epoch": 0.7341634009623493, "grad_norm": 6.285698413848877, "learning_rate": 1.6761241134743256e-06, "loss": 0.2526, "step": 14800 }, { "epoch": 0.7342130065975495, "grad_norm": 4.299650192260742, "learning_rate": 1.6755361639016465e-06, "loss": 0.2326, "step": 14801 }, { "epoch": 0.7342626122327497, "grad_norm": 6.755467891693115, "learning_rate": 1.6749482967106834e-06, "loss": 0.2452, "step": 14802 }, { "epoch": 0.7343122178679498, "grad_norm": 12.690414428710938, "learning_rate": 1.6743605119160056e-06, "loss": 0.3281, "step": 14803 }, { "epoch": 0.7343618235031499, "grad_norm": 14.550504684448242, "learning_rate": 1.6737728095321786e-06, "loss": 0.3276, "step": 14804 }, { "epoch": 0.7344114291383501, "grad_norm": 6.838013648986816, "learning_rate": 1.6731851895737683e-06, "loss": 0.3833, "step": 14805 }, { "epoch": 0.7344610347735503, "grad_norm": 6.944370269775391, "learning_rate": 1.67259765205533e-06, "loss": 0.309, "step": 14806 }, { "epoch": 0.7345106404087505, "grad_norm": 7.109377384185791, "learning_rate": 1.6720101969914304e-06, "loss": 0.3409, "step": 14807 }, { "epoch": 0.7345602460439505, "grad_norm": 8.732752799987793, "learning_rate": 1.671422824396622e-06, "loss": 0.3233, "step": 14808 }, { "epoch": 0.7346098516791507, "grad_norm": 11.046456336975098, "learning_rate": 1.6708355342854614e-06, "loss": 0.3586, "step": 14809 }, { "epoch": 0.7346594573143509, "grad_norm": 5.448187351226807, "learning_rate": 1.6702483266725023e-06, "loss": 0.28, "step": 14810 }, { "epoch": 0.7347090629495511, "grad_norm": 6.596301078796387, "learning_rate": 1.6696612015722979e-06, "loss": 0.2715, "step": 14811 }, { "epoch": 0.7347586685847513, "grad_norm": 4.65579891204834, "learning_rate": 1.669074158999392e-06, "loss": 0.2452, "step": 14812 }, { "epoch": 0.7348082742199514, "grad_norm": 6.559710502624512, "learning_rate": 1.6684871989683382e-06, "loss": 0.3006, "step": 14813 }, { "epoch": 0.7348578798551515, "grad_norm": 4.58456563949585, "learning_rate": 1.667900321493675e-06, "loss": 0.2404, "step": 14814 }, { "epoch": 0.7349074854903517, "grad_norm": 7.748855113983154, "learning_rate": 1.6673135265899532e-06, "loss": 0.3172, "step": 14815 }, { "epoch": 0.7349570911255519, "grad_norm": 4.610682487487793, "learning_rate": 1.6667268142717076e-06, "loss": 0.2183, "step": 14816 }, { "epoch": 0.735006696760752, "grad_norm": 5.998157978057861, "learning_rate": 1.6661401845534798e-06, "loss": 0.2718, "step": 14817 }, { "epoch": 0.7350563023959522, "grad_norm": 8.262158393859863, "learning_rate": 1.665553637449806e-06, "loss": 0.281, "step": 14818 }, { "epoch": 0.7351059080311524, "grad_norm": 9.486367225646973, "learning_rate": 1.6649671729752226e-06, "loss": 0.2714, "step": 14819 }, { "epoch": 0.7351555136663525, "grad_norm": 6.872770309448242, "learning_rate": 1.6643807911442588e-06, "loss": 0.3022, "step": 14820 }, { "epoch": 0.7352051193015526, "grad_norm": 3.8677430152893066, "learning_rate": 1.6637944919714505e-06, "loss": 0.2948, "step": 14821 }, { "epoch": 0.7352547249367528, "grad_norm": 5.8982038497924805, "learning_rate": 1.6632082754713224e-06, "loss": 0.3214, "step": 14822 }, { "epoch": 0.735304330571953, "grad_norm": 5.502859115600586, "learning_rate": 1.6626221416584027e-06, "loss": 0.2673, "step": 14823 }, { "epoch": 0.7353539362071532, "grad_norm": 6.989733695983887, "learning_rate": 1.662036090547216e-06, "loss": 0.334, "step": 14824 }, { "epoch": 0.7354035418423532, "grad_norm": 5.867339134216309, "learning_rate": 1.6614501221522849e-06, "loss": 0.2405, "step": 14825 }, { "epoch": 0.7354531474775534, "grad_norm": 6.15355110168457, "learning_rate": 1.66086423648813e-06, "loss": 0.2231, "step": 14826 }, { "epoch": 0.7355027531127536, "grad_norm": 4.1184773445129395, "learning_rate": 1.6602784335692695e-06, "loss": 0.2839, "step": 14827 }, { "epoch": 0.7355523587479538, "grad_norm": 5.320981979370117, "learning_rate": 1.659692713410222e-06, "loss": 0.2515, "step": 14828 }, { "epoch": 0.735601964383154, "grad_norm": 5.849980354309082, "learning_rate": 1.6591070760254984e-06, "loss": 0.3229, "step": 14829 }, { "epoch": 0.7356515700183541, "grad_norm": 5.9726691246032715, "learning_rate": 1.6585215214296125e-06, "loss": 0.327, "step": 14830 }, { "epoch": 0.7357011756535542, "grad_norm": 12.301956176757812, "learning_rate": 1.657936049637075e-06, "loss": 0.3499, "step": 14831 }, { "epoch": 0.7357507812887544, "grad_norm": 5.6352620124816895, "learning_rate": 1.6573506606623941e-06, "loss": 0.3056, "step": 14832 }, { "epoch": 0.7358003869239546, "grad_norm": 7.050224304199219, "learning_rate": 1.6567653545200758e-06, "loss": 0.1926, "step": 14833 }, { "epoch": 0.7358499925591547, "grad_norm": 4.610777854919434, "learning_rate": 1.656180131224624e-06, "loss": 0.2375, "step": 14834 }, { "epoch": 0.7358995981943549, "grad_norm": 6.522546768188477, "learning_rate": 1.6555949907905417e-06, "loss": 0.3653, "step": 14835 }, { "epoch": 0.7359492038295551, "grad_norm": 8.368680000305176, "learning_rate": 1.6550099332323295e-06, "loss": 0.317, "step": 14836 }, { "epoch": 0.7359988094647552, "grad_norm": 8.45448112487793, "learning_rate": 1.654424958564481e-06, "loss": 0.3399, "step": 14837 }, { "epoch": 0.7360484150999553, "grad_norm": 6.532418251037598, "learning_rate": 1.6538400668014986e-06, "loss": 0.3245, "step": 14838 }, { "epoch": 0.7360980207351555, "grad_norm": 5.930272579193115, "learning_rate": 1.653255257957872e-06, "loss": 0.2524, "step": 14839 }, { "epoch": 0.7361476263703557, "grad_norm": 5.617612361907959, "learning_rate": 1.6526705320480934e-06, "loss": 0.3273, "step": 14840 }, { "epoch": 0.7361972320055559, "grad_norm": 6.068315029144287, "learning_rate": 1.6520858890866531e-06, "loss": 0.2643, "step": 14841 }, { "epoch": 0.7362468376407559, "grad_norm": 12.189260482788086, "learning_rate": 1.651501329088041e-06, "loss": 0.3894, "step": 14842 }, { "epoch": 0.7362964432759561, "grad_norm": 7.051865577697754, "learning_rate": 1.6509168520667374e-06, "loss": 0.3132, "step": 14843 }, { "epoch": 0.7363460489111563, "grad_norm": 27.771211624145508, "learning_rate": 1.650332458037232e-06, "loss": 0.353, "step": 14844 }, { "epoch": 0.7363956545463565, "grad_norm": 6.840780735015869, "learning_rate": 1.649748147014002e-06, "loss": 0.271, "step": 14845 }, { "epoch": 0.7364452601815566, "grad_norm": 4.08332633972168, "learning_rate": 1.6491639190115289e-06, "loss": 0.2513, "step": 14846 }, { "epoch": 0.7364948658167568, "grad_norm": 4.480690956115723, "learning_rate": 1.648579774044289e-06, "loss": 0.3122, "step": 14847 }, { "epoch": 0.7365444714519569, "grad_norm": 4.207684516906738, "learning_rate": 1.6479957121267598e-06, "loss": 0.2407, "step": 14848 }, { "epoch": 0.7365940770871571, "grad_norm": 5.220880508422852, "learning_rate": 1.6474117332734102e-06, "loss": 0.2975, "step": 14849 }, { "epoch": 0.7366436827223573, "grad_norm": 4.177001476287842, "learning_rate": 1.6468278374987179e-06, "loss": 0.1646, "step": 14850 }, { "epoch": 0.7366932883575574, "grad_norm": 6.518060207366943, "learning_rate": 1.6462440248171452e-06, "loss": 0.2623, "step": 14851 }, { "epoch": 0.7367428939927576, "grad_norm": 6.159896373748779, "learning_rate": 1.6456602952431654e-06, "loss": 0.2531, "step": 14852 }, { "epoch": 0.7367924996279578, "grad_norm": 9.968591690063477, "learning_rate": 1.64507664879124e-06, "loss": 0.3037, "step": 14853 }, { "epoch": 0.7368421052631579, "grad_norm": 7.868618488311768, "learning_rate": 1.6444930854758323e-06, "loss": 0.3392, "step": 14854 }, { "epoch": 0.736891710898358, "grad_norm": 7.3978657722473145, "learning_rate": 1.6439096053114039e-06, "loss": 0.3042, "step": 14855 }, { "epoch": 0.7369413165335582, "grad_norm": 6.05126428604126, "learning_rate": 1.6433262083124152e-06, "loss": 0.2535, "step": 14856 }, { "epoch": 0.7369909221687584, "grad_norm": 12.106348991394043, "learning_rate": 1.6427428944933183e-06, "loss": 0.3639, "step": 14857 }, { "epoch": 0.7370405278039586, "grad_norm": 5.238063335418701, "learning_rate": 1.6421596638685743e-06, "loss": 0.2033, "step": 14858 }, { "epoch": 0.7370901334391586, "grad_norm": 7.97248649597168, "learning_rate": 1.6415765164526316e-06, "loss": 0.3239, "step": 14859 }, { "epoch": 0.7371397390743588, "grad_norm": 4.627089500427246, "learning_rate": 1.6409934522599413e-06, "loss": 0.3117, "step": 14860 }, { "epoch": 0.737189344709559, "grad_norm": 8.751897811889648, "learning_rate": 1.6404104713049528e-06, "loss": 0.2084, "step": 14861 }, { "epoch": 0.7372389503447592, "grad_norm": 9.527108192443848, "learning_rate": 1.6398275736021146e-06, "loss": 0.3306, "step": 14862 }, { "epoch": 0.7372885559799593, "grad_norm": 7.5871195793151855, "learning_rate": 1.6392447591658655e-06, "loss": 0.2655, "step": 14863 }, { "epoch": 0.7373381616151595, "grad_norm": 6.15240478515625, "learning_rate": 1.6386620280106546e-06, "loss": 0.3345, "step": 14864 }, { "epoch": 0.7373877672503596, "grad_norm": 3.6081979274749756, "learning_rate": 1.6380793801509175e-06, "loss": 0.2805, "step": 14865 }, { "epoch": 0.7374373728855598, "grad_norm": 5.913540840148926, "learning_rate": 1.6374968156010946e-06, "loss": 0.2538, "step": 14866 }, { "epoch": 0.73748697852076, "grad_norm": 6.574421405792236, "learning_rate": 1.6369143343756217e-06, "loss": 0.3129, "step": 14867 }, { "epoch": 0.7375365841559601, "grad_norm": 3.9373714923858643, "learning_rate": 1.6363319364889323e-06, "loss": 0.242, "step": 14868 }, { "epoch": 0.7375861897911603, "grad_norm": 7.86109733581543, "learning_rate": 1.635749621955459e-06, "loss": 0.2848, "step": 14869 }, { "epoch": 0.7376357954263605, "grad_norm": 5.237738609313965, "learning_rate": 1.6351673907896343e-06, "loss": 0.2943, "step": 14870 }, { "epoch": 0.7376854010615606, "grad_norm": 12.206945419311523, "learning_rate": 1.63458524300588e-06, "loss": 0.2661, "step": 14871 }, { "epoch": 0.7377350066967607, "grad_norm": 12.038354873657227, "learning_rate": 1.6340031786186289e-06, "loss": 0.3793, "step": 14872 }, { "epoch": 0.7377846123319609, "grad_norm": 8.652737617492676, "learning_rate": 1.6334211976423002e-06, "loss": 0.3389, "step": 14873 }, { "epoch": 0.7378342179671611, "grad_norm": 6.349597454071045, "learning_rate": 1.6328393000913168e-06, "loss": 0.2821, "step": 14874 }, { "epoch": 0.7378838236023613, "grad_norm": 9.032872200012207, "learning_rate": 1.6322574859800987e-06, "loss": 0.3401, "step": 14875 }, { "epoch": 0.7379334292375613, "grad_norm": 6.014041900634766, "learning_rate": 1.6316757553230639e-06, "loss": 0.2376, "step": 14876 }, { "epoch": 0.7379830348727615, "grad_norm": 6.04191780090332, "learning_rate": 1.6310941081346276e-06, "loss": 0.2365, "step": 14877 }, { "epoch": 0.7380326405079617, "grad_norm": 8.425298690795898, "learning_rate": 1.6305125444292042e-06, "loss": 0.2379, "step": 14878 }, { "epoch": 0.7380822461431619, "grad_norm": 6.8529791831970215, "learning_rate": 1.629931064221203e-06, "loss": 0.3453, "step": 14879 }, { "epoch": 0.738131851778362, "grad_norm": 4.712704658508301, "learning_rate": 1.6293496675250343e-06, "loss": 0.1789, "step": 14880 }, { "epoch": 0.7381814574135622, "grad_norm": 5.6978983879089355, "learning_rate": 1.6287683543551058e-06, "loss": 0.3202, "step": 14881 }, { "epoch": 0.7382310630487623, "grad_norm": 8.823531150817871, "learning_rate": 1.6281871247258223e-06, "loss": 0.376, "step": 14882 }, { "epoch": 0.7382806686839625, "grad_norm": 5.262380123138428, "learning_rate": 1.6276059786515868e-06, "loss": 0.3091, "step": 14883 }, { "epoch": 0.7383302743191626, "grad_norm": 4.989582061767578, "learning_rate": 1.627024916146801e-06, "loss": 0.2562, "step": 14884 }, { "epoch": 0.7383798799543628, "grad_norm": 7.149993419647217, "learning_rate": 1.6264439372258633e-06, "loss": 0.3259, "step": 14885 }, { "epoch": 0.738429485589563, "grad_norm": 4.861989498138428, "learning_rate": 1.6258630419031712e-06, "loss": 0.2516, "step": 14886 }, { "epoch": 0.7384790912247632, "grad_norm": 4.372844696044922, "learning_rate": 1.6252822301931204e-06, "loss": 0.2198, "step": 14887 }, { "epoch": 0.7385286968599633, "grad_norm": 5.239736557006836, "learning_rate": 1.6247015021101014e-06, "loss": 0.1787, "step": 14888 }, { "epoch": 0.7385783024951634, "grad_norm": 4.591830253601074, "learning_rate": 1.6241208576685053e-06, "loss": 0.2194, "step": 14889 }, { "epoch": 0.7386279081303636, "grad_norm": 7.867653846740723, "learning_rate": 1.6235402968827219e-06, "loss": 0.3381, "step": 14890 }, { "epoch": 0.7386775137655638, "grad_norm": 6.126552581787109, "learning_rate": 1.622959819767137e-06, "loss": 0.1908, "step": 14891 }, { "epoch": 0.738727119400764, "grad_norm": 10.551281929016113, "learning_rate": 1.622379426336136e-06, "loss": 0.2849, "step": 14892 }, { "epoch": 0.738776725035964, "grad_norm": 6.620589733123779, "learning_rate": 1.621799116604102e-06, "loss": 0.2785, "step": 14893 }, { "epoch": 0.7388263306711642, "grad_norm": 5.503924369812012, "learning_rate": 1.6212188905854105e-06, "loss": 0.2794, "step": 14894 }, { "epoch": 0.7388759363063644, "grad_norm": 4.943545341491699, "learning_rate": 1.6206387482944464e-06, "loss": 0.2343, "step": 14895 }, { "epoch": 0.7389255419415646, "grad_norm": 4.939916133880615, "learning_rate": 1.6200586897455816e-06, "loss": 0.1965, "step": 14896 }, { "epoch": 0.7389751475767647, "grad_norm": 7.83104133605957, "learning_rate": 1.619478714953191e-06, "loss": 0.2379, "step": 14897 }, { "epoch": 0.7390247532119649, "grad_norm": 5.230563163757324, "learning_rate": 1.6188988239316472e-06, "loss": 0.2276, "step": 14898 }, { "epoch": 0.739074358847165, "grad_norm": 8.541706085205078, "learning_rate": 1.6183190166953217e-06, "loss": 0.2949, "step": 14899 }, { "epoch": 0.7391239644823652, "grad_norm": 5.371738910675049, "learning_rate": 1.6177392932585773e-06, "loss": 0.3094, "step": 14900 }, { "epoch": 0.7391735701175653, "grad_norm": 9.900911331176758, "learning_rate": 1.6171596536357863e-06, "loss": 0.3523, "step": 14901 }, { "epoch": 0.7392231757527655, "grad_norm": 9.785030364990234, "learning_rate": 1.6165800978413066e-06, "loss": 0.3835, "step": 14902 }, { "epoch": 0.7392727813879657, "grad_norm": 14.441457748413086, "learning_rate": 1.616000625889505e-06, "loss": 0.4569, "step": 14903 }, { "epoch": 0.7393223870231659, "grad_norm": 8.484085083007812, "learning_rate": 1.6154212377947376e-06, "loss": 0.3165, "step": 14904 }, { "epoch": 0.739371992658366, "grad_norm": 10.447416305541992, "learning_rate": 1.6148419335713628e-06, "loss": 0.3104, "step": 14905 }, { "epoch": 0.7394215982935661, "grad_norm": 9.135930061340332, "learning_rate": 1.6142627132337363e-06, "loss": 0.171, "step": 14906 }, { "epoch": 0.7394712039287663, "grad_norm": 4.099273204803467, "learning_rate": 1.6136835767962127e-06, "loss": 0.2247, "step": 14907 }, { "epoch": 0.7395208095639665, "grad_norm": 7.018921375274658, "learning_rate": 1.6131045242731386e-06, "loss": 0.3595, "step": 14908 }, { "epoch": 0.7395704151991667, "grad_norm": 7.132316589355469, "learning_rate": 1.6125255556788699e-06, "loss": 0.3175, "step": 14909 }, { "epoch": 0.7396200208343667, "grad_norm": 5.631629467010498, "learning_rate": 1.6119466710277481e-06, "loss": 0.2661, "step": 14910 }, { "epoch": 0.7396696264695669, "grad_norm": 5.380090713500977, "learning_rate": 1.6113678703341213e-06, "loss": 0.2376, "step": 14911 }, { "epoch": 0.7397192321047671, "grad_norm": 9.358868598937988, "learning_rate": 1.6107891536123305e-06, "loss": 0.3827, "step": 14912 }, { "epoch": 0.7397688377399673, "grad_norm": 14.204036712646484, "learning_rate": 1.6102105208767199e-06, "loss": 0.359, "step": 14913 }, { "epoch": 0.7398184433751674, "grad_norm": 13.372952461242676, "learning_rate": 1.6096319721416226e-06, "loss": 0.395, "step": 14914 }, { "epoch": 0.7398680490103676, "grad_norm": 4.786336898803711, "learning_rate": 1.6090535074213815e-06, "loss": 0.338, "step": 14915 }, { "epoch": 0.7399176546455677, "grad_norm": 7.20759391784668, "learning_rate": 1.6084751267303272e-06, "loss": 0.3281, "step": 14916 }, { "epoch": 0.7399672602807679, "grad_norm": 7.185991287231445, "learning_rate": 1.6078968300827935e-06, "loss": 0.3627, "step": 14917 }, { "epoch": 0.740016865915968, "grad_norm": 9.103119850158691, "learning_rate": 1.6073186174931105e-06, "loss": 0.3482, "step": 14918 }, { "epoch": 0.7400664715511682, "grad_norm": 5.200827121734619, "learning_rate": 1.6067404889756082e-06, "loss": 0.2386, "step": 14919 }, { "epoch": 0.7401160771863684, "grad_norm": 3.814826488494873, "learning_rate": 1.6061624445446088e-06, "loss": 0.2282, "step": 14920 }, { "epoch": 0.7401656828215686, "grad_norm": 4.961611270904541, "learning_rate": 1.6055844842144425e-06, "loss": 0.2314, "step": 14921 }, { "epoch": 0.7402152884567686, "grad_norm": 5.873098373413086, "learning_rate": 1.6050066079994242e-06, "loss": 0.2024, "step": 14922 }, { "epoch": 0.7402648940919688, "grad_norm": 33.861324310302734, "learning_rate": 1.6044288159138815e-06, "loss": 0.3946, "step": 14923 }, { "epoch": 0.740314499727169, "grad_norm": 4.779165744781494, "learning_rate": 1.6038511079721274e-06, "loss": 0.2413, "step": 14924 }, { "epoch": 0.7403641053623692, "grad_norm": 13.522871971130371, "learning_rate": 1.603273484188479e-06, "loss": 0.3827, "step": 14925 }, { "epoch": 0.7404137109975694, "grad_norm": 9.317435264587402, "learning_rate": 1.60269594457725e-06, "loss": 0.2202, "step": 14926 }, { "epoch": 0.7404633166327694, "grad_norm": 6.498319625854492, "learning_rate": 1.602118489152754e-06, "loss": 0.2431, "step": 14927 }, { "epoch": 0.7405129222679696, "grad_norm": 9.752352714538574, "learning_rate": 1.6015411179292955e-06, "loss": 0.3707, "step": 14928 }, { "epoch": 0.7405625279031698, "grad_norm": 6.866340160369873, "learning_rate": 1.6009638309211884e-06, "loss": 0.3322, "step": 14929 }, { "epoch": 0.74061213353837, "grad_norm": 5.164510250091553, "learning_rate": 1.6003866281427339e-06, "loss": 0.2181, "step": 14930 }, { "epoch": 0.7406617391735701, "grad_norm": 7.603199481964111, "learning_rate": 1.5998095096082366e-06, "loss": 0.3033, "step": 14931 }, { "epoch": 0.7407113448087703, "grad_norm": 5.585216999053955, "learning_rate": 1.5992324753319976e-06, "loss": 0.303, "step": 14932 }, { "epoch": 0.7407609504439704, "grad_norm": 8.200425148010254, "learning_rate": 1.5986555253283159e-06, "loss": 0.2551, "step": 14933 }, { "epoch": 0.7408105560791706, "grad_norm": 5.0712056159973145, "learning_rate": 1.5980786596114894e-06, "loss": 0.2554, "step": 14934 }, { "epoch": 0.7408601617143707, "grad_norm": 5.830718994140625, "learning_rate": 1.5975018781958124e-06, "loss": 0.3243, "step": 14935 }, { "epoch": 0.7409097673495709, "grad_norm": 21.82951545715332, "learning_rate": 1.5969251810955793e-06, "loss": 0.2727, "step": 14936 }, { "epoch": 0.7409593729847711, "grad_norm": 15.486612319946289, "learning_rate": 1.5963485683250784e-06, "loss": 0.2812, "step": 14937 }, { "epoch": 0.7410089786199713, "grad_norm": 5.73638391494751, "learning_rate": 1.5957720398985994e-06, "loss": 0.2574, "step": 14938 }, { "epoch": 0.7410585842551713, "grad_norm": 4.228328704833984, "learning_rate": 1.5951955958304288e-06, "loss": 0.258, "step": 14939 }, { "epoch": 0.7411081898903715, "grad_norm": 6.620536804199219, "learning_rate": 1.5946192361348524e-06, "loss": 0.2542, "step": 14940 }, { "epoch": 0.7411577955255717, "grad_norm": 12.629483222961426, "learning_rate": 1.594042960826151e-06, "loss": 0.3597, "step": 14941 }, { "epoch": 0.7412074011607719, "grad_norm": 6.308079242706299, "learning_rate": 1.5934667699186057e-06, "loss": 0.2941, "step": 14942 }, { "epoch": 0.7412570067959721, "grad_norm": 13.68693733215332, "learning_rate": 1.5928906634264952e-06, "loss": 0.447, "step": 14943 }, { "epoch": 0.7413066124311721, "grad_norm": 7.963371753692627, "learning_rate": 1.5923146413640967e-06, "loss": 0.2439, "step": 14944 }, { "epoch": 0.7413562180663723, "grad_norm": 4.402637481689453, "learning_rate": 1.5917387037456794e-06, "loss": 0.2235, "step": 14945 }, { "epoch": 0.7414058237015725, "grad_norm": 3.9542455673217773, "learning_rate": 1.591162850585522e-06, "loss": 0.3009, "step": 14946 }, { "epoch": 0.7414554293367727, "grad_norm": 7.239323139190674, "learning_rate": 1.5905870818978898e-06, "loss": 0.3251, "step": 14947 }, { "epoch": 0.7415050349719728, "grad_norm": 10.192192077636719, "learning_rate": 1.5900113976970515e-06, "loss": 0.4589, "step": 14948 }, { "epoch": 0.741554640607173, "grad_norm": 11.693818092346191, "learning_rate": 1.589435797997274e-06, "loss": 0.4073, "step": 14949 }, { "epoch": 0.7416042462423731, "grad_norm": 3.9979605674743652, "learning_rate": 1.5888602828128214e-06, "loss": 0.2055, "step": 14950 }, { "epoch": 0.7416538518775733, "grad_norm": 8.812530517578125, "learning_rate": 1.588284852157951e-06, "loss": 0.2397, "step": 14951 }, { "epoch": 0.7417034575127734, "grad_norm": 6.764960289001465, "learning_rate": 1.5877095060469284e-06, "loss": 0.2602, "step": 14952 }, { "epoch": 0.7417530631479736, "grad_norm": 8.92098617553711, "learning_rate": 1.5871342444940059e-06, "loss": 0.2899, "step": 14953 }, { "epoch": 0.7418026687831738, "grad_norm": 14.358063697814941, "learning_rate": 1.5865590675134407e-06, "loss": 0.3781, "step": 14954 }, { "epoch": 0.741852274418374, "grad_norm": 7.744163513183594, "learning_rate": 1.585983975119486e-06, "loss": 0.3267, "step": 14955 }, { "epoch": 0.741901880053574, "grad_norm": 12.3832368850708, "learning_rate": 1.5854089673263928e-06, "loss": 0.4217, "step": 14956 }, { "epoch": 0.7419514856887742, "grad_norm": 7.687440872192383, "learning_rate": 1.58483404414841e-06, "loss": 0.251, "step": 14957 }, { "epoch": 0.7420010913239744, "grad_norm": 6.754508972167969, "learning_rate": 1.5842592055997857e-06, "loss": 0.3309, "step": 14958 }, { "epoch": 0.7420506969591746, "grad_norm": 7.2446608543396, "learning_rate": 1.5836844516947604e-06, "loss": 0.1792, "step": 14959 }, { "epoch": 0.7421003025943748, "grad_norm": 5.9402618408203125, "learning_rate": 1.5831097824475826e-06, "loss": 0.2472, "step": 14960 }, { "epoch": 0.7421499082295748, "grad_norm": 6.449847221374512, "learning_rate": 1.582535197872489e-06, "loss": 0.2866, "step": 14961 }, { "epoch": 0.742199513864775, "grad_norm": 5.032795429229736, "learning_rate": 1.5819606979837188e-06, "loss": 0.3359, "step": 14962 }, { "epoch": 0.7422491194999752, "grad_norm": 8.89913272857666, "learning_rate": 1.581386282795509e-06, "loss": 0.4127, "step": 14963 }, { "epoch": 0.7422987251351754, "grad_norm": 6.050875186920166, "learning_rate": 1.580811952322095e-06, "loss": 0.2514, "step": 14964 }, { "epoch": 0.7423483307703755, "grad_norm": 7.895577907562256, "learning_rate": 1.5802377065777037e-06, "loss": 0.2765, "step": 14965 }, { "epoch": 0.7423979364055757, "grad_norm": 15.313552856445312, "learning_rate": 1.5796635455765729e-06, "loss": 0.34, "step": 14966 }, { "epoch": 0.7424475420407758, "grad_norm": 5.207653045654297, "learning_rate": 1.5790894693329245e-06, "loss": 0.3229, "step": 14967 }, { "epoch": 0.742497147675976, "grad_norm": 6.857075214385986, "learning_rate": 1.578515477860987e-06, "loss": 0.2873, "step": 14968 }, { "epoch": 0.7425467533111761, "grad_norm": 11.755793571472168, "learning_rate": 1.577941571174983e-06, "loss": 0.3095, "step": 14969 }, { "epoch": 0.7425963589463763, "grad_norm": 4.648157596588135, "learning_rate": 1.577367749289137e-06, "loss": 0.2847, "step": 14970 }, { "epoch": 0.7426459645815765, "grad_norm": 7.510339736938477, "learning_rate": 1.5767940122176629e-06, "loss": 0.2979, "step": 14971 }, { "epoch": 0.7426955702167767, "grad_norm": 5.155077934265137, "learning_rate": 1.5762203599747849e-06, "loss": 0.1673, "step": 14972 }, { "epoch": 0.7427451758519767, "grad_norm": 7.417139530181885, "learning_rate": 1.5756467925747137e-06, "loss": 0.3297, "step": 14973 }, { "epoch": 0.7427947814871769, "grad_norm": 4.418966770172119, "learning_rate": 1.575073310031664e-06, "loss": 0.2554, "step": 14974 }, { "epoch": 0.7428443871223771, "grad_norm": 5.469192981719971, "learning_rate": 1.5744999123598465e-06, "loss": 0.2827, "step": 14975 }, { "epoch": 0.7428939927575773, "grad_norm": 10.276026725769043, "learning_rate": 1.5739265995734709e-06, "loss": 0.292, "step": 14976 }, { "epoch": 0.7429435983927775, "grad_norm": 6.323184490203857, "learning_rate": 1.5733533716867444e-06, "loss": 0.3295, "step": 14977 }, { "epoch": 0.7429932040279775, "grad_norm": 7.959670066833496, "learning_rate": 1.5727802287138727e-06, "loss": 0.2187, "step": 14978 }, { "epoch": 0.7430428096631777, "grad_norm": 9.14306640625, "learning_rate": 1.5722071706690539e-06, "loss": 0.2689, "step": 14979 }, { "epoch": 0.7430924152983779, "grad_norm": 9.683172225952148, "learning_rate": 1.571634197566495e-06, "loss": 0.3645, "step": 14980 }, { "epoch": 0.7431420209335781, "grad_norm": 7.861766815185547, "learning_rate": 1.5710613094203903e-06, "loss": 0.2702, "step": 14981 }, { "epoch": 0.7431916265687782, "grad_norm": 4.8825812339782715, "learning_rate": 1.570488506244937e-06, "loss": 0.2403, "step": 14982 }, { "epoch": 0.7432412322039784, "grad_norm": 7.057222366333008, "learning_rate": 1.5699157880543303e-06, "loss": 0.2446, "step": 14983 }, { "epoch": 0.7432908378391785, "grad_norm": 5.298229694366455, "learning_rate": 1.569343154862762e-06, "loss": 0.3171, "step": 14984 }, { "epoch": 0.7433404434743787, "grad_norm": 4.876870632171631, "learning_rate": 1.5687706066844217e-06, "loss": 0.2806, "step": 14985 }, { "epoch": 0.7433900491095788, "grad_norm": 8.467951774597168, "learning_rate": 1.5681981435334997e-06, "loss": 0.2654, "step": 14986 }, { "epoch": 0.743439654744779, "grad_norm": 11.442179679870605, "learning_rate": 1.5676257654241789e-06, "loss": 0.2996, "step": 14987 }, { "epoch": 0.7434892603799792, "grad_norm": 7.283232688903809, "learning_rate": 1.5670534723706438e-06, "loss": 0.2663, "step": 14988 }, { "epoch": 0.7435388660151794, "grad_norm": 8.571529388427734, "learning_rate": 1.5664812643870764e-06, "loss": 0.2951, "step": 14989 }, { "epoch": 0.7435884716503794, "grad_norm": 11.247881889343262, "learning_rate": 1.5659091414876571e-06, "loss": 0.3606, "step": 14990 }, { "epoch": 0.7436380772855796, "grad_norm": 14.14065170288086, "learning_rate": 1.5653371036865616e-06, "loss": 0.3949, "step": 14991 }, { "epoch": 0.7436876829207798, "grad_norm": 4.894458770751953, "learning_rate": 1.5647651509979673e-06, "loss": 0.2791, "step": 14992 }, { "epoch": 0.74373728855598, "grad_norm": 6.6323113441467285, "learning_rate": 1.5641932834360463e-06, "loss": 0.2323, "step": 14993 }, { "epoch": 0.7437868941911802, "grad_norm": 14.60703182220459, "learning_rate": 1.5636215010149697e-06, "loss": 0.3652, "step": 14994 }, { "epoch": 0.7438364998263802, "grad_norm": 4.407090663909912, "learning_rate": 1.5630498037489085e-06, "loss": 0.2746, "step": 14995 }, { "epoch": 0.7438861054615804, "grad_norm": 10.237885475158691, "learning_rate": 1.562478191652026e-06, "loss": 0.4016, "step": 14996 }, { "epoch": 0.7439357110967806, "grad_norm": 6.256719589233398, "learning_rate": 1.561906664738489e-06, "loss": 0.2375, "step": 14997 }, { "epoch": 0.7439853167319808, "grad_norm": 7.553919315338135, "learning_rate": 1.5613352230224605e-06, "loss": 0.3605, "step": 14998 }, { "epoch": 0.7440349223671809, "grad_norm": 6.861316680908203, "learning_rate": 1.5607638665181001e-06, "loss": 0.2193, "step": 14999 }, { "epoch": 0.7440845280023811, "grad_norm": 7.02097749710083, "learning_rate": 1.560192595239567e-06, "loss": 0.2705, "step": 15000 }, { "epoch": 0.7441341336375812, "grad_norm": 5.0656819343566895, "learning_rate": 1.5596214092010186e-06, "loss": 0.257, "step": 15001 }, { "epoch": 0.7441837392727814, "grad_norm": 7.095611095428467, "learning_rate": 1.5590503084166053e-06, "loss": 0.2603, "step": 15002 }, { "epoch": 0.7442333449079815, "grad_norm": 11.028203964233398, "learning_rate": 1.558479292900485e-06, "loss": 0.4157, "step": 15003 }, { "epoch": 0.7442829505431817, "grad_norm": 7.3136396408081055, "learning_rate": 1.557908362666803e-06, "loss": 0.3444, "step": 15004 }, { "epoch": 0.7443325561783819, "grad_norm": 10.618703842163086, "learning_rate": 1.5573375177297084e-06, "loss": 0.3622, "step": 15005 }, { "epoch": 0.7443821618135821, "grad_norm": 6.337673187255859, "learning_rate": 1.556766758103348e-06, "loss": 0.3275, "step": 15006 }, { "epoch": 0.7444317674487821, "grad_norm": 4.4445624351501465, "learning_rate": 1.5561960838018659e-06, "loss": 0.2234, "step": 15007 }, { "epoch": 0.7444813730839823, "grad_norm": 6.940736770629883, "learning_rate": 1.5556254948394e-06, "loss": 0.3378, "step": 15008 }, { "epoch": 0.7445309787191825, "grad_norm": 9.824953079223633, "learning_rate": 1.555054991230095e-06, "loss": 0.2328, "step": 15009 }, { "epoch": 0.7445805843543827, "grad_norm": 6.293212890625, "learning_rate": 1.554484572988083e-06, "loss": 0.4163, "step": 15010 }, { "epoch": 0.7446301899895829, "grad_norm": 6.332909107208252, "learning_rate": 1.5539142401275048e-06, "loss": 0.2803, "step": 15011 }, { "epoch": 0.7446797956247829, "grad_norm": 5.993283271789551, "learning_rate": 1.5533439926624888e-06, "loss": 0.2482, "step": 15012 }, { "epoch": 0.7447294012599831, "grad_norm": 6.240283966064453, "learning_rate": 1.5527738306071677e-06, "loss": 0.2786, "step": 15013 }, { "epoch": 0.7447790068951833, "grad_norm": 6.064000129699707, "learning_rate": 1.5522037539756707e-06, "loss": 0.2603, "step": 15014 }, { "epoch": 0.7448286125303835, "grad_norm": 9.565983772277832, "learning_rate": 1.551633762782126e-06, "loss": 0.2698, "step": 15015 }, { "epoch": 0.7448782181655836, "grad_norm": 4.5449042320251465, "learning_rate": 1.5510638570406533e-06, "loss": 0.2369, "step": 15016 }, { "epoch": 0.7449278238007838, "grad_norm": 8.298060417175293, "learning_rate": 1.5504940367653815e-06, "loss": 0.2443, "step": 15017 }, { "epoch": 0.7449774294359839, "grad_norm": 11.054777145385742, "learning_rate": 1.5499243019704257e-06, "loss": 0.4145, "step": 15018 }, { "epoch": 0.7450270350711841, "grad_norm": 9.678346633911133, "learning_rate": 1.5493546526699071e-06, "loss": 0.1605, "step": 15019 }, { "epoch": 0.7450766407063842, "grad_norm": 10.346582412719727, "learning_rate": 1.5487850888779403e-06, "loss": 0.2837, "step": 15020 }, { "epoch": 0.7451262463415844, "grad_norm": 8.015579223632812, "learning_rate": 1.5482156106086427e-06, "loss": 0.283, "step": 15021 }, { "epoch": 0.7451758519767846, "grad_norm": 5.778368949890137, "learning_rate": 1.5476462178761197e-06, "loss": 0.2518, "step": 15022 }, { "epoch": 0.7452254576119848, "grad_norm": 4.905170917510986, "learning_rate": 1.5470769106944887e-06, "loss": 0.2529, "step": 15023 }, { "epoch": 0.7452750632471848, "grad_norm": 4.719887733459473, "learning_rate": 1.5465076890778525e-06, "loss": 0.3209, "step": 15024 }, { "epoch": 0.745324668882385, "grad_norm": 11.768182754516602, "learning_rate": 1.545938553040317e-06, "loss": 0.3092, "step": 15025 }, { "epoch": 0.7453742745175852, "grad_norm": 5.139139175415039, "learning_rate": 1.5453695025959876e-06, "loss": 0.2722, "step": 15026 }, { "epoch": 0.7454238801527854, "grad_norm": 7.3217034339904785, "learning_rate": 1.5448005377589637e-06, "loss": 0.282, "step": 15027 }, { "epoch": 0.7454734857879856, "grad_norm": 4.822870254516602, "learning_rate": 1.5442316585433458e-06, "loss": 0.1646, "step": 15028 }, { "epoch": 0.7455230914231856, "grad_norm": 6.532342910766602, "learning_rate": 1.5436628649632324e-06, "loss": 0.3247, "step": 15029 }, { "epoch": 0.7455726970583858, "grad_norm": 9.115348815917969, "learning_rate": 1.5430941570327134e-06, "loss": 0.3345, "step": 15030 }, { "epoch": 0.745622302693586, "grad_norm": 9.698514938354492, "learning_rate": 1.5425255347658886e-06, "loss": 0.3935, "step": 15031 }, { "epoch": 0.7456719083287862, "grad_norm": 6.163455009460449, "learning_rate": 1.5419569981768428e-06, "loss": 0.3707, "step": 15032 }, { "epoch": 0.7457215139639863, "grad_norm": 4.156561851501465, "learning_rate": 1.5413885472796668e-06, "loss": 0.1968, "step": 15033 }, { "epoch": 0.7457711195991865, "grad_norm": 5.006856441497803, "learning_rate": 1.5408201820884473e-06, "loss": 0.2678, "step": 15034 }, { "epoch": 0.7458207252343866, "grad_norm": 9.56129264831543, "learning_rate": 1.5402519026172703e-06, "loss": 0.2618, "step": 15035 }, { "epoch": 0.7458703308695868, "grad_norm": 6.13621187210083, "learning_rate": 1.5396837088802125e-06, "loss": 0.2488, "step": 15036 }, { "epoch": 0.7459199365047869, "grad_norm": 9.95310115814209, "learning_rate": 1.539115600891361e-06, "loss": 0.296, "step": 15037 }, { "epoch": 0.7459695421399871, "grad_norm": 6.783565521240234, "learning_rate": 1.5385475786647885e-06, "loss": 0.3555, "step": 15038 }, { "epoch": 0.7460191477751873, "grad_norm": 5.253169059753418, "learning_rate": 1.5379796422145731e-06, "loss": 0.208, "step": 15039 }, { "epoch": 0.7460687534103875, "grad_norm": 5.400167465209961, "learning_rate": 1.5374117915547888e-06, "loss": 0.2334, "step": 15040 }, { "epoch": 0.7461183590455875, "grad_norm": 6.9727702140808105, "learning_rate": 1.5368440266995062e-06, "loss": 0.1926, "step": 15041 }, { "epoch": 0.7461679646807877, "grad_norm": 7.579902648925781, "learning_rate": 1.5362763476627945e-06, "loss": 0.3581, "step": 15042 }, { "epoch": 0.7462175703159879, "grad_norm": 8.658913612365723, "learning_rate": 1.5357087544587223e-06, "loss": 0.317, "step": 15043 }, { "epoch": 0.7462671759511881, "grad_norm": 7.939488887786865, "learning_rate": 1.5351412471013561e-06, "loss": 0.2547, "step": 15044 }, { "epoch": 0.7463167815863883, "grad_norm": 8.633955955505371, "learning_rate": 1.534573825604755e-06, "loss": 0.2729, "step": 15045 }, { "epoch": 0.7463663872215883, "grad_norm": 4.656940460205078, "learning_rate": 1.5340064899829826e-06, "loss": 0.1999, "step": 15046 }, { "epoch": 0.7464159928567885, "grad_norm": 7.1102447509765625, "learning_rate": 1.533439240250097e-06, "loss": 0.3334, "step": 15047 }, { "epoch": 0.7464655984919887, "grad_norm": 5.652633190155029, "learning_rate": 1.532872076420155e-06, "loss": 0.2921, "step": 15048 }, { "epoch": 0.7465152041271889, "grad_norm": 12.866988182067871, "learning_rate": 1.5323049985072114e-06, "loss": 0.3669, "step": 15049 }, { "epoch": 0.746564809762389, "grad_norm": 3.5087661743164062, "learning_rate": 1.5317380065253185e-06, "loss": 0.2774, "step": 15050 }, { "epoch": 0.7466144153975892, "grad_norm": 6.215128421783447, "learning_rate": 1.5311711004885265e-06, "loss": 0.2375, "step": 15051 }, { "epoch": 0.7466640210327893, "grad_norm": 19.74625015258789, "learning_rate": 1.5306042804108856e-06, "loss": 0.398, "step": 15052 }, { "epoch": 0.7467136266679895, "grad_norm": 9.281557083129883, "learning_rate": 1.530037546306438e-06, "loss": 0.4336, "step": 15053 }, { "epoch": 0.7467632323031896, "grad_norm": 11.920563697814941, "learning_rate": 1.5294708981892304e-06, "loss": 0.3468, "step": 15054 }, { "epoch": 0.7468128379383898, "grad_norm": 4.244786262512207, "learning_rate": 1.528904336073303e-06, "loss": 0.1932, "step": 15055 }, { "epoch": 0.74686244357359, "grad_norm": 6.807100296020508, "learning_rate": 1.5283378599726967e-06, "loss": 0.2417, "step": 15056 }, { "epoch": 0.7469120492087901, "grad_norm": 5.699280261993408, "learning_rate": 1.527771469901449e-06, "loss": 0.2421, "step": 15057 }, { "epoch": 0.7469616548439902, "grad_norm": 4.82875394821167, "learning_rate": 1.5272051658735959e-06, "loss": 0.1854, "step": 15058 }, { "epoch": 0.7470112604791904, "grad_norm": 6.85322380065918, "learning_rate": 1.5266389479031667e-06, "loss": 0.1862, "step": 15059 }, { "epoch": 0.7470608661143906, "grad_norm": 5.035989284515381, "learning_rate": 1.5260728160041987e-06, "loss": 0.3248, "step": 15060 }, { "epoch": 0.7471104717495908, "grad_norm": 4.178426742553711, "learning_rate": 1.5255067701907167e-06, "loss": 0.2043, "step": 15061 }, { "epoch": 0.747160077384791, "grad_norm": 4.378105163574219, "learning_rate": 1.5249408104767476e-06, "loss": 0.1995, "step": 15062 }, { "epoch": 0.747209683019991, "grad_norm": 6.742382526397705, "learning_rate": 1.5243749368763177e-06, "loss": 0.2915, "step": 15063 }, { "epoch": 0.7472592886551912, "grad_norm": 5.870048999786377, "learning_rate": 1.5238091494034496e-06, "loss": 0.2794, "step": 15064 }, { "epoch": 0.7473088942903914, "grad_norm": 5.949400424957275, "learning_rate": 1.5232434480721625e-06, "loss": 0.3408, "step": 15065 }, { "epoch": 0.7473584999255916, "grad_norm": 6.548069477081299, "learning_rate": 1.5226778328964775e-06, "loss": 0.4105, "step": 15066 }, { "epoch": 0.7474081055607917, "grad_norm": 5.751976013183594, "learning_rate": 1.5221123038904051e-06, "loss": 0.2733, "step": 15067 }, { "epoch": 0.7474577111959919, "grad_norm": 2.7109262943267822, "learning_rate": 1.5215468610679668e-06, "loss": 0.1109, "step": 15068 }, { "epoch": 0.747507316831192, "grad_norm": 4.478705406188965, "learning_rate": 1.5209815044431687e-06, "loss": 0.3073, "step": 15069 }, { "epoch": 0.7475569224663922, "grad_norm": 5.678451061248779, "learning_rate": 1.5204162340300228e-06, "loss": 0.1978, "step": 15070 }, { "epoch": 0.7476065281015923, "grad_norm": 8.980857849121094, "learning_rate": 1.5198510498425367e-06, "loss": 0.3856, "step": 15071 }, { "epoch": 0.7476561337367925, "grad_norm": 9.732279777526855, "learning_rate": 1.5192859518947173e-06, "loss": 0.439, "step": 15072 }, { "epoch": 0.7477057393719927, "grad_norm": 8.195856094360352, "learning_rate": 1.5187209402005631e-06, "loss": 0.3412, "step": 15073 }, { "epoch": 0.7477553450071928, "grad_norm": 10.921333312988281, "learning_rate": 1.518156014774082e-06, "loss": 0.3211, "step": 15074 }, { "epoch": 0.7478049506423929, "grad_norm": 9.688446044921875, "learning_rate": 1.5175911756292688e-06, "loss": 0.3436, "step": 15075 }, { "epoch": 0.7478545562775931, "grad_norm": 11.8751802444458, "learning_rate": 1.5170264227801213e-06, "loss": 0.3705, "step": 15076 }, { "epoch": 0.7479041619127933, "grad_norm": 9.041420936584473, "learning_rate": 1.5164617562406342e-06, "loss": 0.3538, "step": 15077 }, { "epoch": 0.7479537675479935, "grad_norm": 16.45967674255371, "learning_rate": 1.5158971760248025e-06, "loss": 0.4066, "step": 15078 }, { "epoch": 0.7480033731831937, "grad_norm": 8.181487083435059, "learning_rate": 1.5153326821466113e-06, "loss": 0.2447, "step": 15079 }, { "epoch": 0.7480529788183937, "grad_norm": 11.621460914611816, "learning_rate": 1.5147682746200558e-06, "loss": 0.2503, "step": 15080 }, { "epoch": 0.7481025844535939, "grad_norm": 16.63606834411621, "learning_rate": 1.5142039534591163e-06, "loss": 0.3989, "step": 15081 }, { "epoch": 0.7481521900887941, "grad_norm": 5.115798473358154, "learning_rate": 1.5136397186777823e-06, "loss": 0.2584, "step": 15082 }, { "epoch": 0.7482017957239943, "grad_norm": 5.922465801239014, "learning_rate": 1.5130755702900318e-06, "loss": 0.2477, "step": 15083 }, { "epoch": 0.7482514013591944, "grad_norm": 12.092357635498047, "learning_rate": 1.5125115083098457e-06, "loss": 0.335, "step": 15084 }, { "epoch": 0.7483010069943946, "grad_norm": 5.005531311035156, "learning_rate": 1.5119475327512023e-06, "loss": 0.2701, "step": 15085 }, { "epoch": 0.7483506126295947, "grad_norm": 15.4310941696167, "learning_rate": 1.5113836436280782e-06, "loss": 0.357, "step": 15086 }, { "epoch": 0.7484002182647949, "grad_norm": 6.217838287353516, "learning_rate": 1.5108198409544428e-06, "loss": 0.3247, "step": 15087 }, { "epoch": 0.748449823899995, "grad_norm": 5.6227803230285645, "learning_rate": 1.5102561247442727e-06, "loss": 0.205, "step": 15088 }, { "epoch": 0.7484994295351952, "grad_norm": 7.82626485824585, "learning_rate": 1.5096924950115327e-06, "loss": 0.2625, "step": 15089 }, { "epoch": 0.7485490351703954, "grad_norm": 11.267915725708008, "learning_rate": 1.5091289517701918e-06, "loss": 0.3203, "step": 15090 }, { "epoch": 0.7485986408055955, "grad_norm": 7.35158634185791, "learning_rate": 1.508565495034215e-06, "loss": 0.2745, "step": 15091 }, { "epoch": 0.7486482464407956, "grad_norm": 8.115525245666504, "learning_rate": 1.5080021248175658e-06, "loss": 0.3774, "step": 15092 }, { "epoch": 0.7486978520759958, "grad_norm": 11.538653373718262, "learning_rate": 1.5074388411342e-06, "loss": 0.3439, "step": 15093 }, { "epoch": 0.748747457711196, "grad_norm": 4.243436336517334, "learning_rate": 1.506875643998083e-06, "loss": 0.2663, "step": 15094 }, { "epoch": 0.7487970633463962, "grad_norm": 4.610899448394775, "learning_rate": 1.506312533423166e-06, "loss": 0.1907, "step": 15095 }, { "epoch": 0.7488466689815964, "grad_norm": 9.66769027709961, "learning_rate": 1.5057495094234053e-06, "loss": 0.3383, "step": 15096 }, { "epoch": 0.7488962746167964, "grad_norm": 8.899218559265137, "learning_rate": 1.5051865720127517e-06, "loss": 0.3646, "step": 15097 }, { "epoch": 0.7489458802519966, "grad_norm": 3.9987144470214844, "learning_rate": 1.5046237212051562e-06, "loss": 0.2505, "step": 15098 }, { "epoch": 0.7489954858871968, "grad_norm": 9.654183387756348, "learning_rate": 1.5040609570145659e-06, "loss": 0.1934, "step": 15099 }, { "epoch": 0.749045091522397, "grad_norm": 5.782263278961182, "learning_rate": 1.5034982794549262e-06, "loss": 0.2969, "step": 15100 }, { "epoch": 0.7490946971575971, "grad_norm": 5.113363265991211, "learning_rate": 1.5029356885401808e-06, "loss": 0.2656, "step": 15101 }, { "epoch": 0.7491443027927973, "grad_norm": 7.263178825378418, "learning_rate": 1.502373184284273e-06, "loss": 0.303, "step": 15102 }, { "epoch": 0.7491939084279974, "grad_norm": 6.5921196937561035, "learning_rate": 1.5018107667011377e-06, "loss": 0.3383, "step": 15103 }, { "epoch": 0.7492435140631976, "grad_norm": 4.6563849449157715, "learning_rate": 1.501248435804714e-06, "loss": 0.2996, "step": 15104 }, { "epoch": 0.7492931196983977, "grad_norm": 10.407867431640625, "learning_rate": 1.5006861916089371e-06, "loss": 0.3968, "step": 15105 }, { "epoch": 0.7493427253335979, "grad_norm": 7.705767631530762, "learning_rate": 1.5001240341277395e-06, "loss": 0.3875, "step": 15106 }, { "epoch": 0.7493923309687981, "grad_norm": 7.032525539398193, "learning_rate": 1.4995619633750514e-06, "loss": 0.2602, "step": 15107 }, { "epoch": 0.7494419366039982, "grad_norm": 5.375176906585693, "learning_rate": 1.4989999793648008e-06, "loss": 0.2737, "step": 15108 }, { "epoch": 0.7494915422391983, "grad_norm": 6.2716593742370605, "learning_rate": 1.4984380821109162e-06, "loss": 0.3483, "step": 15109 }, { "epoch": 0.7495411478743985, "grad_norm": 7.572446823120117, "learning_rate": 1.4978762716273165e-06, "loss": 0.3288, "step": 15110 }, { "epoch": 0.7495907535095987, "grad_norm": 13.456073760986328, "learning_rate": 1.4973145479279305e-06, "loss": 0.2821, "step": 15111 }, { "epoch": 0.7496403591447989, "grad_norm": 11.268531799316406, "learning_rate": 1.4967529110266726e-06, "loss": 0.2831, "step": 15112 }, { "epoch": 0.749689964779999, "grad_norm": 5.213304042816162, "learning_rate": 1.4961913609374624e-06, "loss": 0.2526, "step": 15113 }, { "epoch": 0.7497395704151991, "grad_norm": 4.3408002853393555, "learning_rate": 1.495629897674215e-06, "loss": 0.2691, "step": 15114 }, { "epoch": 0.7497891760503993, "grad_norm": 6.5590620040893555, "learning_rate": 1.495068521250846e-06, "loss": 0.2677, "step": 15115 }, { "epoch": 0.7498387816855995, "grad_norm": 9.135056495666504, "learning_rate": 1.4945072316812609e-06, "loss": 0.2996, "step": 15116 }, { "epoch": 0.7498883873207997, "grad_norm": 7.04800271987915, "learning_rate": 1.4939460289793756e-06, "loss": 0.2983, "step": 15117 }, { "epoch": 0.7499379929559998, "grad_norm": 5.296252250671387, "learning_rate": 1.49338491315909e-06, "loss": 0.1742, "step": 15118 }, { "epoch": 0.7499875985912, "grad_norm": 6.11425256729126, "learning_rate": 1.4928238842343162e-06, "loss": 0.28, "step": 15119 }, { "epoch": 0.7500372042264001, "grad_norm": 8.803847312927246, "learning_rate": 1.4922629422189516e-06, "loss": 0.2665, "step": 15120 }, { "epoch": 0.7500868098616003, "grad_norm": 3.775205135345459, "learning_rate": 1.4917020871268977e-06, "loss": 0.2802, "step": 15121 }, { "epoch": 0.7501364154968004, "grad_norm": 5.514858245849609, "learning_rate": 1.491141318972053e-06, "loss": 0.2911, "step": 15122 }, { "epoch": 0.7501860211320006, "grad_norm": 8.88215446472168, "learning_rate": 1.4905806377683151e-06, "loss": 0.2845, "step": 15123 }, { "epoch": 0.7502356267672008, "grad_norm": 7.532403469085693, "learning_rate": 1.4900200435295736e-06, "loss": 0.2433, "step": 15124 }, { "epoch": 0.7502852324024009, "grad_norm": 5.244832515716553, "learning_rate": 1.4894595362697267e-06, "loss": 0.264, "step": 15125 }, { "epoch": 0.750334838037601, "grad_norm": 4.966854572296143, "learning_rate": 1.4888991160026584e-06, "loss": 0.2186, "step": 15126 }, { "epoch": 0.7503844436728012, "grad_norm": 4.741784572601318, "learning_rate": 1.488338782742258e-06, "loss": 0.2452, "step": 15127 }, { "epoch": 0.7504340493080014, "grad_norm": 8.145112991333008, "learning_rate": 1.4877785365024117e-06, "loss": 0.3374, "step": 15128 }, { "epoch": 0.7504836549432016, "grad_norm": 7.805062294006348, "learning_rate": 1.4872183772970034e-06, "loss": 0.2971, "step": 15129 }, { "epoch": 0.7505332605784018, "grad_norm": 10.163712501525879, "learning_rate": 1.4866583051399092e-06, "loss": 0.3799, "step": 15130 }, { "epoch": 0.7505828662136018, "grad_norm": 8.108796119689941, "learning_rate": 1.4860983200450146e-06, "loss": 0.3656, "step": 15131 }, { "epoch": 0.750632471848802, "grad_norm": 5.242129802703857, "learning_rate": 1.485538422026192e-06, "loss": 0.2636, "step": 15132 }, { "epoch": 0.7506820774840022, "grad_norm": 7.0350165367126465, "learning_rate": 1.4849786110973169e-06, "loss": 0.3624, "step": 15133 }, { "epoch": 0.7507316831192024, "grad_norm": 7.443050384521484, "learning_rate": 1.484418887272262e-06, "loss": 0.323, "step": 15134 }, { "epoch": 0.7507812887544025, "grad_norm": 5.534495830535889, "learning_rate": 1.483859250564897e-06, "loss": 0.2644, "step": 15135 }, { "epoch": 0.7508308943896027, "grad_norm": 6.524015426635742, "learning_rate": 1.4832997009890903e-06, "loss": 0.2684, "step": 15136 }, { "epoch": 0.7508805000248028, "grad_norm": 4.408677101135254, "learning_rate": 1.4827402385587098e-06, "loss": 0.2251, "step": 15137 }, { "epoch": 0.750930105660003, "grad_norm": 9.640246391296387, "learning_rate": 1.4821808632876133e-06, "loss": 0.2835, "step": 15138 }, { "epoch": 0.7509797112952031, "grad_norm": 11.219921112060547, "learning_rate": 1.48162157518967e-06, "loss": 0.4124, "step": 15139 }, { "epoch": 0.7510293169304033, "grad_norm": 8.574149131774902, "learning_rate": 1.481062374278734e-06, "loss": 0.3245, "step": 15140 }, { "epoch": 0.7510789225656035, "grad_norm": 4.359250068664551, "learning_rate": 1.4805032605686643e-06, "loss": 0.2973, "step": 15141 }, { "epoch": 0.7511285282008036, "grad_norm": 4.766926288604736, "learning_rate": 1.4799442340733155e-06, "loss": 0.2856, "step": 15142 }, { "epoch": 0.7511781338360037, "grad_norm": 4.152804374694824, "learning_rate": 1.4793852948065424e-06, "loss": 0.2094, "step": 15143 }, { "epoch": 0.7512277394712039, "grad_norm": 5.968939304351807, "learning_rate": 1.4788264427821914e-06, "loss": 0.2431, "step": 15144 }, { "epoch": 0.7512773451064041, "grad_norm": 8.154034614562988, "learning_rate": 1.478267678014117e-06, "loss": 0.3006, "step": 15145 }, { "epoch": 0.7513269507416043, "grad_norm": 5.470812797546387, "learning_rate": 1.477709000516161e-06, "loss": 0.2466, "step": 15146 }, { "epoch": 0.7513765563768044, "grad_norm": 4.997578144073486, "learning_rate": 1.4771504103021694e-06, "loss": 0.2567, "step": 15147 }, { "epoch": 0.7514261620120045, "grad_norm": 13.174375534057617, "learning_rate": 1.4765919073859842e-06, "loss": 0.3185, "step": 15148 }, { "epoch": 0.7514757676472047, "grad_norm": 6.117171764373779, "learning_rate": 1.476033491781445e-06, "loss": 0.2805, "step": 15149 }, { "epoch": 0.7515253732824049, "grad_norm": 5.4682817459106445, "learning_rate": 1.47547516350239e-06, "loss": 0.1828, "step": 15150 }, { "epoch": 0.751574978917605, "grad_norm": 8.658248901367188, "learning_rate": 1.4749169225626565e-06, "loss": 0.2271, "step": 15151 }, { "epoch": 0.7516245845528052, "grad_norm": 6.786111831665039, "learning_rate": 1.474358768976073e-06, "loss": 0.3225, "step": 15152 }, { "epoch": 0.7516741901880054, "grad_norm": 4.814149379730225, "learning_rate": 1.4738007027564777e-06, "loss": 0.2612, "step": 15153 }, { "epoch": 0.7517237958232055, "grad_norm": 7.896029472351074, "learning_rate": 1.473242723917694e-06, "loss": 0.2913, "step": 15154 }, { "epoch": 0.7517734014584057, "grad_norm": 4.076706886291504, "learning_rate": 1.472684832473551e-06, "loss": 0.1701, "step": 15155 }, { "epoch": 0.7518230070936058, "grad_norm": 10.050339698791504, "learning_rate": 1.4721270284378731e-06, "loss": 0.2329, "step": 15156 }, { "epoch": 0.751872612728806, "grad_norm": 4.283589839935303, "learning_rate": 1.4715693118244834e-06, "loss": 0.1884, "step": 15157 }, { "epoch": 0.7519222183640062, "grad_norm": 6.7355637550354, "learning_rate": 1.4710116826472026e-06, "loss": 0.362, "step": 15158 }, { "epoch": 0.7519718239992063, "grad_norm": 4.407650947570801, "learning_rate": 1.4704541409198486e-06, "loss": 0.2462, "step": 15159 }, { "epoch": 0.7520214296344064, "grad_norm": 7.644137382507324, "learning_rate": 1.4698966866562387e-06, "loss": 0.2794, "step": 15160 }, { "epoch": 0.7520710352696066, "grad_norm": 5.317810535430908, "learning_rate": 1.4693393198701843e-06, "loss": 0.2516, "step": 15161 }, { "epoch": 0.7521206409048068, "grad_norm": 4.320949077606201, "learning_rate": 1.4687820405754988e-06, "loss": 0.3162, "step": 15162 }, { "epoch": 0.752170246540007, "grad_norm": 8.07265853881836, "learning_rate": 1.4682248487859918e-06, "loss": 0.3066, "step": 15163 }, { "epoch": 0.7522198521752071, "grad_norm": 6.053341865539551, "learning_rate": 1.4676677445154708e-06, "loss": 0.2669, "step": 15164 }, { "epoch": 0.7522694578104072, "grad_norm": 5.5234880447387695, "learning_rate": 1.4671107277777403e-06, "loss": 0.2473, "step": 15165 }, { "epoch": 0.7523190634456074, "grad_norm": 6.510488510131836, "learning_rate": 1.4665537985866063e-06, "loss": 0.2745, "step": 15166 }, { "epoch": 0.7523686690808076, "grad_norm": 5.646311283111572, "learning_rate": 1.4659969569558642e-06, "loss": 0.262, "step": 15167 }, { "epoch": 0.7524182747160078, "grad_norm": 5.526706218719482, "learning_rate": 1.4654402028993187e-06, "loss": 0.3194, "step": 15168 }, { "epoch": 0.7524678803512079, "grad_norm": 5.561415672302246, "learning_rate": 1.4648835364307628e-06, "loss": 0.1901, "step": 15169 }, { "epoch": 0.7525174859864081, "grad_norm": 8.191391944885254, "learning_rate": 1.4643269575639918e-06, "loss": 0.3279, "step": 15170 }, { "epoch": 0.7525670916216082, "grad_norm": 4.8876237869262695, "learning_rate": 1.4637704663127983e-06, "loss": 0.2849, "step": 15171 }, { "epoch": 0.7526166972568084, "grad_norm": 9.461809158325195, "learning_rate": 1.4632140626909718e-06, "loss": 0.4129, "step": 15172 }, { "epoch": 0.7526663028920085, "grad_norm": 13.021437644958496, "learning_rate": 1.462657746712301e-06, "loss": 0.3123, "step": 15173 }, { "epoch": 0.7527159085272087, "grad_norm": 6.592841148376465, "learning_rate": 1.4621015183905734e-06, "loss": 0.2941, "step": 15174 }, { "epoch": 0.7527655141624089, "grad_norm": 6.438498497009277, "learning_rate": 1.4615453777395666e-06, "loss": 0.194, "step": 15175 }, { "epoch": 0.752815119797609, "grad_norm": 8.72634220123291, "learning_rate": 1.4609893247730695e-06, "loss": 0.2944, "step": 15176 }, { "epoch": 0.7528647254328091, "grad_norm": 12.486857414245605, "learning_rate": 1.4604333595048564e-06, "loss": 0.3831, "step": 15177 }, { "epoch": 0.7529143310680093, "grad_norm": 10.4002103805542, "learning_rate": 1.4598774819487054e-06, "loss": 0.525, "step": 15178 }, { "epoch": 0.7529639367032095, "grad_norm": 5.0661940574646, "learning_rate": 1.4593216921183922e-06, "loss": 0.272, "step": 15179 }, { "epoch": 0.7530135423384097, "grad_norm": 4.987582683563232, "learning_rate": 1.4587659900276901e-06, "loss": 0.2335, "step": 15180 }, { "epoch": 0.7530631479736098, "grad_norm": 4.341085433959961, "learning_rate": 1.4582103756903659e-06, "loss": 0.2765, "step": 15181 }, { "epoch": 0.7531127536088099, "grad_norm": 8.96285629272461, "learning_rate": 1.4576548491201936e-06, "loss": 0.2509, "step": 15182 }, { "epoch": 0.7531623592440101, "grad_norm": 5.163221836090088, "learning_rate": 1.457099410330935e-06, "loss": 0.2298, "step": 15183 }, { "epoch": 0.7532119648792103, "grad_norm": 6.940960884094238, "learning_rate": 1.4565440593363561e-06, "loss": 0.2054, "step": 15184 }, { "epoch": 0.7532615705144105, "grad_norm": 7.077507972717285, "learning_rate": 1.4559887961502178e-06, "loss": 0.2503, "step": 15185 }, { "epoch": 0.7533111761496106, "grad_norm": 4.787835121154785, "learning_rate": 1.455433620786282e-06, "loss": 0.2269, "step": 15186 }, { "epoch": 0.7533607817848108, "grad_norm": 7.0503950119018555, "learning_rate": 1.454878533258301e-06, "loss": 0.326, "step": 15187 }, { "epoch": 0.7534103874200109, "grad_norm": 9.363543510437012, "learning_rate": 1.454323533580037e-06, "loss": 0.2155, "step": 15188 }, { "epoch": 0.753459993055211, "grad_norm": 10.700912475585938, "learning_rate": 1.4537686217652368e-06, "loss": 0.2988, "step": 15189 }, { "epoch": 0.7535095986904112, "grad_norm": 4.414180755615234, "learning_rate": 1.4532137978276572e-06, "loss": 0.2601, "step": 15190 }, { "epoch": 0.7535592043256114, "grad_norm": 5.192310333251953, "learning_rate": 1.4526590617810426e-06, "loss": 0.1453, "step": 15191 }, { "epoch": 0.7536088099608116, "grad_norm": 5.630002498626709, "learning_rate": 1.452104413639141e-06, "loss": 0.2122, "step": 15192 }, { "epoch": 0.7536584155960117, "grad_norm": 6.051497936248779, "learning_rate": 1.451549853415697e-06, "loss": 0.3804, "step": 15193 }, { "epoch": 0.7537080212312118, "grad_norm": 8.213179588317871, "learning_rate": 1.4509953811244542e-06, "loss": 0.3133, "step": 15194 }, { "epoch": 0.753757626866412, "grad_norm": 6.07063627243042, "learning_rate": 1.4504409967791483e-06, "loss": 0.2994, "step": 15195 }, { "epoch": 0.7538072325016122, "grad_norm": 5.874039173126221, "learning_rate": 1.449886700393523e-06, "loss": 0.2494, "step": 15196 }, { "epoch": 0.7538568381368124, "grad_norm": 4.953341484069824, "learning_rate": 1.4493324919813095e-06, "loss": 0.2735, "step": 15197 }, { "epoch": 0.7539064437720125, "grad_norm": 4.303830623626709, "learning_rate": 1.4487783715562431e-06, "loss": 0.3098, "step": 15198 }, { "epoch": 0.7539560494072126, "grad_norm": 4.998583793640137, "learning_rate": 1.4482243391320555e-06, "loss": 0.2782, "step": 15199 }, { "epoch": 0.7540056550424128, "grad_norm": 6.627145767211914, "learning_rate": 1.4476703947224762e-06, "loss": 0.308, "step": 15200 }, { "epoch": 0.754055260677613, "grad_norm": 7.934408664703369, "learning_rate": 1.4471165383412289e-06, "loss": 0.2832, "step": 15201 }, { "epoch": 0.7541048663128131, "grad_norm": 9.560717582702637, "learning_rate": 1.446562770002044e-06, "loss": 0.3047, "step": 15202 }, { "epoch": 0.7541544719480133, "grad_norm": 9.96750259399414, "learning_rate": 1.4460090897186395e-06, "loss": 0.2744, "step": 15203 }, { "epoch": 0.7542040775832135, "grad_norm": 9.933916091918945, "learning_rate": 1.445455497504737e-06, "loss": 0.3121, "step": 15204 }, { "epoch": 0.7542536832184136, "grad_norm": 3.791074514389038, "learning_rate": 1.4449019933740555e-06, "loss": 0.2243, "step": 15205 }, { "epoch": 0.7543032888536138, "grad_norm": 7.575377941131592, "learning_rate": 1.444348577340311e-06, "loss": 0.2822, "step": 15206 }, { "epoch": 0.7543528944888139, "grad_norm": 6.650109767913818, "learning_rate": 1.4437952494172163e-06, "loss": 0.1848, "step": 15207 }, { "epoch": 0.7544025001240141, "grad_norm": 4.888904094696045, "learning_rate": 1.4432420096184846e-06, "loss": 0.2477, "step": 15208 }, { "epoch": 0.7544521057592143, "grad_norm": 7.170346260070801, "learning_rate": 1.4426888579578252e-06, "loss": 0.2847, "step": 15209 }, { "epoch": 0.7545017113944144, "grad_norm": 6.4811482429504395, "learning_rate": 1.442135794448946e-06, "loss": 0.2316, "step": 15210 }, { "epoch": 0.7545513170296145, "grad_norm": 14.05472183227539, "learning_rate": 1.4415828191055498e-06, "loss": 0.3781, "step": 15211 }, { "epoch": 0.7546009226648147, "grad_norm": 3.7869491577148438, "learning_rate": 1.4410299319413406e-06, "loss": 0.231, "step": 15212 }, { "epoch": 0.7546505283000149, "grad_norm": 6.404938697814941, "learning_rate": 1.44047713297002e-06, "loss": 0.2856, "step": 15213 }, { "epoch": 0.7547001339352151, "grad_norm": 16.975561141967773, "learning_rate": 1.439924422205286e-06, "loss": 0.3621, "step": 15214 }, { "epoch": 0.7547497395704152, "grad_norm": 11.657517433166504, "learning_rate": 1.4393717996608359e-06, "loss": 0.3075, "step": 15215 }, { "epoch": 0.7547993452056153, "grad_norm": 10.034467697143555, "learning_rate": 1.4388192653503625e-06, "loss": 0.3943, "step": 15216 }, { "epoch": 0.7548489508408155, "grad_norm": 5.895119667053223, "learning_rate": 1.4382668192875604e-06, "loss": 0.3125, "step": 15217 }, { "epoch": 0.7548985564760157, "grad_norm": 8.092145919799805, "learning_rate": 1.4377144614861144e-06, "loss": 0.2986, "step": 15218 }, { "epoch": 0.7549481621112158, "grad_norm": 4.865916728973389, "learning_rate": 1.4371621919597183e-06, "loss": 0.1743, "step": 15219 }, { "epoch": 0.754997767746416, "grad_norm": 10.62226676940918, "learning_rate": 1.4366100107220537e-06, "loss": 0.3501, "step": 15220 }, { "epoch": 0.7550473733816162, "grad_norm": 6.187139511108398, "learning_rate": 1.4360579177868051e-06, "loss": 0.159, "step": 15221 }, { "epoch": 0.7550969790168163, "grad_norm": 4.631897926330566, "learning_rate": 1.435505913167653e-06, "loss": 0.3535, "step": 15222 }, { "epoch": 0.7551465846520165, "grad_norm": 11.170002937316895, "learning_rate": 1.4349539968782783e-06, "loss": 0.4052, "step": 15223 }, { "epoch": 0.7551961902872166, "grad_norm": 14.45785903930664, "learning_rate": 1.4344021689323533e-06, "loss": 0.4119, "step": 15224 }, { "epoch": 0.7552457959224168, "grad_norm": 8.80215835571289, "learning_rate": 1.4338504293435585e-06, "loss": 0.322, "step": 15225 }, { "epoch": 0.755295401557617, "grad_norm": 6.6331682205200195, "learning_rate": 1.4332987781255603e-06, "loss": 0.3959, "step": 15226 }, { "epoch": 0.755345007192817, "grad_norm": 15.765204429626465, "learning_rate": 1.4327472152920346e-06, "loss": 0.4498, "step": 15227 }, { "epoch": 0.7553946128280172, "grad_norm": 13.02565860748291, "learning_rate": 1.4321957408566451e-06, "loss": 0.3598, "step": 15228 }, { "epoch": 0.7554442184632174, "grad_norm": 4.729529857635498, "learning_rate": 1.431644354833059e-06, "loss": 0.252, "step": 15229 }, { "epoch": 0.7554938240984176, "grad_norm": 10.831707000732422, "learning_rate": 1.4310930572349397e-06, "loss": 0.3044, "step": 15230 }, { "epoch": 0.7555434297336178, "grad_norm": 10.005325317382812, "learning_rate": 1.4305418480759508e-06, "loss": 0.4357, "step": 15231 }, { "epoch": 0.755593035368818, "grad_norm": 5.818222999572754, "learning_rate": 1.4299907273697462e-06, "loss": 0.3189, "step": 15232 }, { "epoch": 0.755642641004018, "grad_norm": 6.259545803070068, "learning_rate": 1.4294396951299894e-06, "loss": 0.265, "step": 15233 }, { "epoch": 0.7556922466392182, "grad_norm": 4.555484294891357, "learning_rate": 1.4288887513703304e-06, "loss": 0.2414, "step": 15234 }, { "epoch": 0.7557418522744184, "grad_norm": 9.738944053649902, "learning_rate": 1.4283378961044237e-06, "loss": 0.3201, "step": 15235 }, { "epoch": 0.7557914579096185, "grad_norm": 7.609431266784668, "learning_rate": 1.4277871293459195e-06, "loss": 0.3677, "step": 15236 }, { "epoch": 0.7558410635448187, "grad_norm": 4.295038223266602, "learning_rate": 1.427236451108468e-06, "loss": 0.2342, "step": 15237 }, { "epoch": 0.7558906691800189, "grad_norm": 3.4891304969787598, "learning_rate": 1.42668586140571e-06, "loss": 0.2214, "step": 15238 }, { "epoch": 0.755940274815219, "grad_norm": 8.550375938415527, "learning_rate": 1.4261353602512956e-06, "loss": 0.2569, "step": 15239 }, { "epoch": 0.7559898804504191, "grad_norm": 6.119481086730957, "learning_rate": 1.4255849476588623e-06, "loss": 0.1938, "step": 15240 }, { "epoch": 0.7560394860856193, "grad_norm": 18.24945831298828, "learning_rate": 1.425034623642051e-06, "loss": 0.3789, "step": 15241 }, { "epoch": 0.7560890917208195, "grad_norm": 7.445356369018555, "learning_rate": 1.4244843882144988e-06, "loss": 0.2964, "step": 15242 }, { "epoch": 0.7561386973560197, "grad_norm": 7.494801998138428, "learning_rate": 1.423934241389841e-06, "loss": 0.1904, "step": 15243 }, { "epoch": 0.7561883029912198, "grad_norm": 6.701160430908203, "learning_rate": 1.4233841831817096e-06, "loss": 0.2632, "step": 15244 }, { "epoch": 0.7562379086264199, "grad_norm": 6.977924823760986, "learning_rate": 1.4228342136037383e-06, "loss": 0.3004, "step": 15245 }, { "epoch": 0.7562875142616201, "grad_norm": 9.866889953613281, "learning_rate": 1.4222843326695502e-06, "loss": 0.3752, "step": 15246 }, { "epoch": 0.7563371198968203, "grad_norm": 4.289822578430176, "learning_rate": 1.4217345403927775e-06, "loss": 0.2797, "step": 15247 }, { "epoch": 0.7563867255320205, "grad_norm": 12.873724937438965, "learning_rate": 1.42118483678704e-06, "loss": 0.398, "step": 15248 }, { "epoch": 0.7564363311672206, "grad_norm": 5.126529693603516, "learning_rate": 1.4206352218659614e-06, "loss": 0.2405, "step": 15249 }, { "epoch": 0.7564859368024207, "grad_norm": 7.427640438079834, "learning_rate": 1.4200856956431609e-06, "loss": 0.3355, "step": 15250 }, { "epoch": 0.7565355424376209, "grad_norm": 12.074782371520996, "learning_rate": 1.419536258132258e-06, "loss": 0.3593, "step": 15251 }, { "epoch": 0.7565851480728211, "grad_norm": 4.393135070800781, "learning_rate": 1.4189869093468627e-06, "loss": 0.3195, "step": 15252 }, { "epoch": 0.7566347537080212, "grad_norm": 6.740605354309082, "learning_rate": 1.4184376493005952e-06, "loss": 0.2777, "step": 15253 }, { "epoch": 0.7566843593432214, "grad_norm": 7.818419933319092, "learning_rate": 1.4178884780070612e-06, "loss": 0.3106, "step": 15254 }, { "epoch": 0.7567339649784216, "grad_norm": 9.562472343444824, "learning_rate": 1.4173393954798709e-06, "loss": 0.2915, "step": 15255 }, { "epoch": 0.7567835706136217, "grad_norm": 5.609111785888672, "learning_rate": 1.4167904017326311e-06, "loss": 0.3153, "step": 15256 }, { "epoch": 0.7568331762488218, "grad_norm": 4.933338165283203, "learning_rate": 1.4162414967789462e-06, "loss": 0.2728, "step": 15257 }, { "epoch": 0.756882781884022, "grad_norm": 11.480425834655762, "learning_rate": 1.4156926806324179e-06, "loss": 0.4115, "step": 15258 }, { "epoch": 0.7569323875192222, "grad_norm": 4.575624942779541, "learning_rate": 1.415143953306648e-06, "loss": 0.1875, "step": 15259 }, { "epoch": 0.7569819931544224, "grad_norm": 4.966658115386963, "learning_rate": 1.4145953148152291e-06, "loss": 0.2856, "step": 15260 }, { "epoch": 0.7570315987896225, "grad_norm": 11.00710391998291, "learning_rate": 1.4140467651717637e-06, "loss": 0.3605, "step": 15261 }, { "epoch": 0.7570812044248226, "grad_norm": 24.090707778930664, "learning_rate": 1.4134983043898398e-06, "loss": 0.2995, "step": 15262 }, { "epoch": 0.7571308100600228, "grad_norm": 13.438766479492188, "learning_rate": 1.4129499324830503e-06, "loss": 0.2899, "step": 15263 }, { "epoch": 0.757180415695223, "grad_norm": 7.043218612670898, "learning_rate": 1.412401649464984e-06, "loss": 0.2855, "step": 15264 }, { "epoch": 0.7572300213304232, "grad_norm": 5.253851413726807, "learning_rate": 1.4118534553492275e-06, "loss": 0.2862, "step": 15265 }, { "epoch": 0.7572796269656233, "grad_norm": 5.144090175628662, "learning_rate": 1.4113053501493657e-06, "loss": 0.2795, "step": 15266 }, { "epoch": 0.7573292326008234, "grad_norm": 6.078547954559326, "learning_rate": 1.410757333878981e-06, "loss": 0.2924, "step": 15267 }, { "epoch": 0.7573788382360236, "grad_norm": 7.615826606750488, "learning_rate": 1.4102094065516542e-06, "loss": 0.2673, "step": 15268 }, { "epoch": 0.7574284438712238, "grad_norm": 8.479366302490234, "learning_rate": 1.4096615681809611e-06, "loss": 0.2631, "step": 15269 }, { "epoch": 0.757478049506424, "grad_norm": 10.285832405090332, "learning_rate": 1.409113818780478e-06, "loss": 0.351, "step": 15270 }, { "epoch": 0.7575276551416241, "grad_norm": 4.365536689758301, "learning_rate": 1.4085661583637788e-06, "loss": 0.2336, "step": 15271 }, { "epoch": 0.7575772607768243, "grad_norm": 5.8013505935668945, "learning_rate": 1.4080185869444352e-06, "loss": 0.2521, "step": 15272 }, { "epoch": 0.7576268664120244, "grad_norm": 5.202980995178223, "learning_rate": 1.407471104536015e-06, "loss": 0.2483, "step": 15273 }, { "epoch": 0.7576764720472245, "grad_norm": 4.112873077392578, "learning_rate": 1.4069237111520883e-06, "loss": 0.1907, "step": 15274 }, { "epoch": 0.7577260776824247, "grad_norm": 4.808740615844727, "learning_rate": 1.4063764068062142e-06, "loss": 0.2709, "step": 15275 }, { "epoch": 0.7577756833176249, "grad_norm": 5.972844123840332, "learning_rate": 1.405829191511961e-06, "loss": 0.2224, "step": 15276 }, { "epoch": 0.7578252889528251, "grad_norm": 7.723739147186279, "learning_rate": 1.4052820652828846e-06, "loss": 0.3826, "step": 15277 }, { "epoch": 0.7578748945880251, "grad_norm": 6.930663108825684, "learning_rate": 1.404735028132545e-06, "loss": 0.3086, "step": 15278 }, { "epoch": 0.7579245002232253, "grad_norm": 6.867050647735596, "learning_rate": 1.4041880800744978e-06, "loss": 0.3927, "step": 15279 }, { "epoch": 0.7579741058584255, "grad_norm": 3.7942798137664795, "learning_rate": 1.4036412211222967e-06, "loss": 0.1984, "step": 15280 }, { "epoch": 0.7580237114936257, "grad_norm": 10.423836708068848, "learning_rate": 1.4030944512894922e-06, "loss": 0.4099, "step": 15281 }, { "epoch": 0.7580733171288259, "grad_norm": 5.44551420211792, "learning_rate": 1.4025477705896368e-06, "loss": 0.3109, "step": 15282 }, { "epoch": 0.758122922764026, "grad_norm": 6.812054634094238, "learning_rate": 1.4020011790362708e-06, "loss": 0.2099, "step": 15283 }, { "epoch": 0.7581725283992261, "grad_norm": 5.3625288009643555, "learning_rate": 1.401454676642947e-06, "loss": 0.2782, "step": 15284 }, { "epoch": 0.7582221340344263, "grad_norm": 6.750328063964844, "learning_rate": 1.4009082634232025e-06, "loss": 0.2512, "step": 15285 }, { "epoch": 0.7582717396696265, "grad_norm": 6.7921881675720215, "learning_rate": 1.4003619393905788e-06, "loss": 0.2608, "step": 15286 }, { "epoch": 0.7583213453048266, "grad_norm": 12.328208923339844, "learning_rate": 1.3998157045586148e-06, "loss": 0.5198, "step": 15287 }, { "epoch": 0.7583709509400268, "grad_norm": 6.202857971191406, "learning_rate": 1.3992695589408478e-06, "loss": 0.2056, "step": 15288 }, { "epoch": 0.758420556575227, "grad_norm": 18.159360885620117, "learning_rate": 1.3987235025508066e-06, "loss": 0.4977, "step": 15289 }, { "epoch": 0.7584701622104271, "grad_norm": 20.662425994873047, "learning_rate": 1.39817753540203e-06, "loss": 0.2343, "step": 15290 }, { "epoch": 0.7585197678456272, "grad_norm": 6.219146251678467, "learning_rate": 1.3976316575080405e-06, "loss": 0.2581, "step": 15291 }, { "epoch": 0.7585693734808274, "grad_norm": 6.541816234588623, "learning_rate": 1.3970858688823692e-06, "loss": 0.3211, "step": 15292 }, { "epoch": 0.7586189791160276, "grad_norm": 5.498317241668701, "learning_rate": 1.396540169538539e-06, "loss": 0.2377, "step": 15293 }, { "epoch": 0.7586685847512278, "grad_norm": 16.435970306396484, "learning_rate": 1.3959945594900754e-06, "loss": 0.5216, "step": 15294 }, { "epoch": 0.7587181903864278, "grad_norm": 10.252490997314453, "learning_rate": 1.3954490387504937e-06, "loss": 0.2473, "step": 15295 }, { "epoch": 0.758767796021628, "grad_norm": 5.040031909942627, "learning_rate": 1.3949036073333183e-06, "loss": 0.2934, "step": 15296 }, { "epoch": 0.7588174016568282, "grad_norm": 5.491507530212402, "learning_rate": 1.3943582652520593e-06, "loss": 0.3077, "step": 15297 }, { "epoch": 0.7588670072920284, "grad_norm": 7.503713607788086, "learning_rate": 1.3938130125202371e-06, "loss": 0.3321, "step": 15298 }, { "epoch": 0.7589166129272286, "grad_norm": 9.506240844726562, "learning_rate": 1.3932678491513574e-06, "loss": 0.2499, "step": 15299 }, { "epoch": 0.7589662185624287, "grad_norm": 6.7765212059021, "learning_rate": 1.3927227751589323e-06, "loss": 0.2931, "step": 15300 }, { "epoch": 0.7590158241976288, "grad_norm": 5.812833786010742, "learning_rate": 1.3921777905564681e-06, "loss": 0.2649, "step": 15301 }, { "epoch": 0.759065429832829, "grad_norm": 6.594110012054443, "learning_rate": 1.391632895357472e-06, "loss": 0.3631, "step": 15302 }, { "epoch": 0.7591150354680292, "grad_norm": 9.510269165039062, "learning_rate": 1.3910880895754424e-06, "loss": 0.2985, "step": 15303 }, { "epoch": 0.7591646411032293, "grad_norm": 26.286523818969727, "learning_rate": 1.390543373223885e-06, "loss": 0.398, "step": 15304 }, { "epoch": 0.7592142467384295, "grad_norm": 5.47616720199585, "learning_rate": 1.3899987463162939e-06, "loss": 0.1892, "step": 15305 }, { "epoch": 0.7592638523736297, "grad_norm": 7.10435676574707, "learning_rate": 1.3894542088661673e-06, "loss": 0.333, "step": 15306 }, { "epoch": 0.7593134580088298, "grad_norm": 16.17997932434082, "learning_rate": 1.3889097608869983e-06, "loss": 0.493, "step": 15307 }, { "epoch": 0.75936306364403, "grad_norm": 6.633283615112305, "learning_rate": 1.388365402392281e-06, "loss": 0.2893, "step": 15308 }, { "epoch": 0.7594126692792301, "grad_norm": 4.6541595458984375, "learning_rate": 1.387821133395499e-06, "loss": 0.2748, "step": 15309 }, { "epoch": 0.7594622749144303, "grad_norm": 7.428289890289307, "learning_rate": 1.3872769539101466e-06, "loss": 0.2271, "step": 15310 }, { "epoch": 0.7595118805496305, "grad_norm": 5.940121650695801, "learning_rate": 1.386732863949704e-06, "loss": 0.2927, "step": 15311 }, { "epoch": 0.7595614861848305, "grad_norm": 5.612682819366455, "learning_rate": 1.3861888635276555e-06, "loss": 0.2806, "step": 15312 }, { "epoch": 0.7596110918200307, "grad_norm": 6.926743984222412, "learning_rate": 1.3856449526574817e-06, "loss": 0.2295, "step": 15313 }, { "epoch": 0.7596606974552309, "grad_norm": 5.3941545486450195, "learning_rate": 1.3851011313526607e-06, "loss": 0.3119, "step": 15314 }, { "epoch": 0.7597103030904311, "grad_norm": 7.0932512283325195, "learning_rate": 1.3845573996266686e-06, "loss": 0.2363, "step": 15315 }, { "epoch": 0.7597599087256313, "grad_norm": 7.531085014343262, "learning_rate": 1.3840137574929802e-06, "loss": 0.3026, "step": 15316 }, { "epoch": 0.7598095143608314, "grad_norm": 6.923582077026367, "learning_rate": 1.3834702049650667e-06, "loss": 0.2869, "step": 15317 }, { "epoch": 0.7598591199960315, "grad_norm": 3.336113214492798, "learning_rate": 1.3829267420563986e-06, "loss": 0.2106, "step": 15318 }, { "epoch": 0.7599087256312317, "grad_norm": 7.855454444885254, "learning_rate": 1.3823833687804406e-06, "loss": 0.2601, "step": 15319 }, { "epoch": 0.7599583312664319, "grad_norm": 5.302901268005371, "learning_rate": 1.3818400851506587e-06, "loss": 0.2899, "step": 15320 }, { "epoch": 0.760007936901632, "grad_norm": 6.7769246101379395, "learning_rate": 1.381296891180517e-06, "loss": 0.2775, "step": 15321 }, { "epoch": 0.7600575425368322, "grad_norm": 5.979465007781982, "learning_rate": 1.3807537868834747e-06, "loss": 0.2824, "step": 15322 }, { "epoch": 0.7601071481720323, "grad_norm": 4.0866217613220215, "learning_rate": 1.3802107722729907e-06, "loss": 0.2337, "step": 15323 }, { "epoch": 0.7601567538072325, "grad_norm": 5.858952045440674, "learning_rate": 1.3796678473625213e-06, "loss": 0.3181, "step": 15324 }, { "epoch": 0.7602063594424326, "grad_norm": 3.9911551475524902, "learning_rate": 1.3791250121655214e-06, "loss": 0.2813, "step": 15325 }, { "epoch": 0.7602559650776328, "grad_norm": 5.037984848022461, "learning_rate": 1.3785822666954408e-06, "loss": 0.2863, "step": 15326 }, { "epoch": 0.760305570712833, "grad_norm": 6.777380466461182, "learning_rate": 1.3780396109657297e-06, "loss": 0.1687, "step": 15327 }, { "epoch": 0.7603551763480332, "grad_norm": 5.035635471343994, "learning_rate": 1.377497044989835e-06, "loss": 0.1974, "step": 15328 }, { "epoch": 0.7604047819832332, "grad_norm": 8.118261337280273, "learning_rate": 1.3769545687812025e-06, "loss": 0.2449, "step": 15329 }, { "epoch": 0.7604543876184334, "grad_norm": 4.4131999015808105, "learning_rate": 1.3764121823532744e-06, "loss": 0.3095, "step": 15330 }, { "epoch": 0.7605039932536336, "grad_norm": 5.3165283203125, "learning_rate": 1.3758698857194913e-06, "loss": 0.2519, "step": 15331 }, { "epoch": 0.7605535988888338, "grad_norm": 8.883950233459473, "learning_rate": 1.3753276788932924e-06, "loss": 0.2464, "step": 15332 }, { "epoch": 0.760603204524034, "grad_norm": 10.394075393676758, "learning_rate": 1.3747855618881145e-06, "loss": 0.3236, "step": 15333 }, { "epoch": 0.7606528101592341, "grad_norm": 6.929300308227539, "learning_rate": 1.3742435347173872e-06, "loss": 0.3119, "step": 15334 }, { "epoch": 0.7607024157944342, "grad_norm": 4.2046217918396, "learning_rate": 1.3737015973945484e-06, "loss": 0.2666, "step": 15335 }, { "epoch": 0.7607520214296344, "grad_norm": 4.94038724899292, "learning_rate": 1.3731597499330223e-06, "loss": 0.2169, "step": 15336 }, { "epoch": 0.7608016270648346, "grad_norm": 7.443838596343994, "learning_rate": 1.372617992346239e-06, "loss": 0.1987, "step": 15337 }, { "epoch": 0.7608512327000347, "grad_norm": 6.568140983581543, "learning_rate": 1.3720763246476222e-06, "loss": 0.2872, "step": 15338 }, { "epoch": 0.7609008383352349, "grad_norm": 8.58154010772705, "learning_rate": 1.3715347468505968e-06, "loss": 0.2801, "step": 15339 }, { "epoch": 0.760950443970435, "grad_norm": 7.886511325836182, "learning_rate": 1.3709932589685788e-06, "loss": 0.3328, "step": 15340 }, { "epoch": 0.7610000496056352, "grad_norm": 4.694490432739258, "learning_rate": 1.3704518610149925e-06, "loss": 0.2911, "step": 15341 }, { "epoch": 0.7610496552408353, "grad_norm": 6.488954544067383, "learning_rate": 1.369910553003249e-06, "loss": 0.3215, "step": 15342 }, { "epoch": 0.7610992608760355, "grad_norm": 8.679045677185059, "learning_rate": 1.369369334946764e-06, "loss": 0.318, "step": 15343 }, { "epoch": 0.7611488665112357, "grad_norm": 8.930645942687988, "learning_rate": 1.3688282068589498e-06, "loss": 0.2931, "step": 15344 }, { "epoch": 0.7611984721464359, "grad_norm": 13.708151817321777, "learning_rate": 1.3682871687532161e-06, "loss": 0.2745, "step": 15345 }, { "epoch": 0.761248077781636, "grad_norm": 16.471935272216797, "learning_rate": 1.3677462206429664e-06, "loss": 0.2936, "step": 15346 }, { "epoch": 0.7612976834168361, "grad_norm": 9.79115104675293, "learning_rate": 1.3672053625416115e-06, "loss": 0.3463, "step": 15347 }, { "epoch": 0.7613472890520363, "grad_norm": 5.547995567321777, "learning_rate": 1.3666645944625495e-06, "loss": 0.2476, "step": 15348 }, { "epoch": 0.7613968946872365, "grad_norm": 9.518482208251953, "learning_rate": 1.3661239164191825e-06, "loss": 0.2418, "step": 15349 }, { "epoch": 0.7614465003224367, "grad_norm": 8.215341567993164, "learning_rate": 1.3655833284249088e-06, "loss": 0.2926, "step": 15350 }, { "epoch": 0.7614961059576368, "grad_norm": 5.526189804077148, "learning_rate": 1.3650428304931235e-06, "loss": 0.2553, "step": 15351 }, { "epoch": 0.7615457115928369, "grad_norm": 7.839972019195557, "learning_rate": 1.3645024226372217e-06, "loss": 0.3454, "step": 15352 }, { "epoch": 0.7615953172280371, "grad_norm": 5.706414699554443, "learning_rate": 1.3639621048705959e-06, "loss": 0.2917, "step": 15353 }, { "epoch": 0.7616449228632373, "grad_norm": 5.714510440826416, "learning_rate": 1.3634218772066305e-06, "loss": 0.3447, "step": 15354 }, { "epoch": 0.7616945284984374, "grad_norm": 6.651007652282715, "learning_rate": 1.3628817396587191e-06, "loss": 0.2907, "step": 15355 }, { "epoch": 0.7617441341336376, "grad_norm": 7.508284091949463, "learning_rate": 1.3623416922402422e-06, "loss": 0.3062, "step": 15356 }, { "epoch": 0.7617937397688377, "grad_norm": 8.539408683776855, "learning_rate": 1.3618017349645835e-06, "loss": 0.206, "step": 15357 }, { "epoch": 0.7618433454040379, "grad_norm": 3.848054885864258, "learning_rate": 1.3612618678451234e-06, "loss": 0.2256, "step": 15358 }, { "epoch": 0.761892951039238, "grad_norm": 4.598465442657471, "learning_rate": 1.3607220908952419e-06, "loss": 0.2929, "step": 15359 }, { "epoch": 0.7619425566744382, "grad_norm": 12.665260314941406, "learning_rate": 1.36018240412831e-06, "loss": 0.3542, "step": 15360 }, { "epoch": 0.7619921623096384, "grad_norm": 6.683730602264404, "learning_rate": 1.359642807557708e-06, "loss": 0.2711, "step": 15361 }, { "epoch": 0.7620417679448386, "grad_norm": 5.34312629699707, "learning_rate": 1.3591033011968018e-06, "loss": 0.3172, "step": 15362 }, { "epoch": 0.7620913735800386, "grad_norm": 5.067599773406982, "learning_rate": 1.3585638850589632e-06, "loss": 0.2508, "step": 15363 }, { "epoch": 0.7621409792152388, "grad_norm": 4.992459297180176, "learning_rate": 1.3580245591575585e-06, "loss": 0.2107, "step": 15364 }, { "epoch": 0.762190584850439, "grad_norm": 6.797126770019531, "learning_rate": 1.3574853235059533e-06, "loss": 0.2555, "step": 15365 }, { "epoch": 0.7622401904856392, "grad_norm": 5.7981038093566895, "learning_rate": 1.3569461781175086e-06, "loss": 0.2277, "step": 15366 }, { "epoch": 0.7622897961208394, "grad_norm": 3.7024362087249756, "learning_rate": 1.3564071230055876e-06, "loss": 0.2209, "step": 15367 }, { "epoch": 0.7623394017560395, "grad_norm": 6.701279640197754, "learning_rate": 1.355868158183543e-06, "loss": 0.2547, "step": 15368 }, { "epoch": 0.7623890073912396, "grad_norm": 10.82253360748291, "learning_rate": 1.3553292836647375e-06, "loss": 0.3245, "step": 15369 }, { "epoch": 0.7624386130264398, "grad_norm": 6.386322498321533, "learning_rate": 1.3547904994625188e-06, "loss": 0.3043, "step": 15370 }, { "epoch": 0.76248821866164, "grad_norm": 6.966365814208984, "learning_rate": 1.3542518055902404e-06, "loss": 0.2747, "step": 15371 }, { "epoch": 0.7625378242968401, "grad_norm": 6.170523643493652, "learning_rate": 1.353713202061252e-06, "loss": 0.3047, "step": 15372 }, { "epoch": 0.7625874299320403, "grad_norm": 5.735507488250732, "learning_rate": 1.3531746888888991e-06, "loss": 0.2813, "step": 15373 }, { "epoch": 0.7626370355672404, "grad_norm": 8.719460487365723, "learning_rate": 1.3526362660865278e-06, "loss": 0.3609, "step": 15374 }, { "epoch": 0.7626866412024406, "grad_norm": 11.937878608703613, "learning_rate": 1.3520979336674806e-06, "loss": 0.3332, "step": 15375 }, { "epoch": 0.7627362468376407, "grad_norm": 15.403190612792969, "learning_rate": 1.3515596916450957e-06, "loss": 0.4985, "step": 15376 }, { "epoch": 0.7627858524728409, "grad_norm": 5.376856327056885, "learning_rate": 1.3510215400327114e-06, "loss": 0.2724, "step": 15377 }, { "epoch": 0.7628354581080411, "grad_norm": 6.771918773651123, "learning_rate": 1.3504834788436643e-06, "loss": 0.2563, "step": 15378 }, { "epoch": 0.7628850637432413, "grad_norm": 12.445659637451172, "learning_rate": 1.3499455080912876e-06, "loss": 0.3657, "step": 15379 }, { "epoch": 0.7629346693784413, "grad_norm": 3.7534635066986084, "learning_rate": 1.3494076277889123e-06, "loss": 0.2313, "step": 15380 }, { "epoch": 0.7629842750136415, "grad_norm": 7.65077543258667, "learning_rate": 1.348869837949867e-06, "loss": 0.2139, "step": 15381 }, { "epoch": 0.7630338806488417, "grad_norm": 5.457548141479492, "learning_rate": 1.3483321385874804e-06, "loss": 0.2521, "step": 15382 }, { "epoch": 0.7630834862840419, "grad_norm": 11.131635665893555, "learning_rate": 1.3477945297150724e-06, "loss": 0.2806, "step": 15383 }, { "epoch": 0.7631330919192421, "grad_norm": 5.282505035400391, "learning_rate": 1.347257011345971e-06, "loss": 0.2535, "step": 15384 }, { "epoch": 0.7631826975544422, "grad_norm": 11.530799865722656, "learning_rate": 1.3467195834934915e-06, "loss": 0.2881, "step": 15385 }, { "epoch": 0.7632323031896423, "grad_norm": 5.097306251525879, "learning_rate": 1.3461822461709534e-06, "loss": 0.2885, "step": 15386 }, { "epoch": 0.7632819088248425, "grad_norm": 6.715343952178955, "learning_rate": 1.345644999391672e-06, "loss": 0.2092, "step": 15387 }, { "epoch": 0.7633315144600427, "grad_norm": 5.48936653137207, "learning_rate": 1.3451078431689602e-06, "loss": 0.3363, "step": 15388 }, { "epoch": 0.7633811200952428, "grad_norm": 11.70213508605957, "learning_rate": 1.3445707775161298e-06, "loss": 0.3403, "step": 15389 }, { "epoch": 0.763430725730443, "grad_norm": 5.378225326538086, "learning_rate": 1.3440338024464905e-06, "loss": 0.2216, "step": 15390 }, { "epoch": 0.7634803313656431, "grad_norm": 4.603983402252197, "learning_rate": 1.3434969179733443e-06, "loss": 0.2126, "step": 15391 }, { "epoch": 0.7635299370008433, "grad_norm": 9.163580894470215, "learning_rate": 1.3429601241100015e-06, "loss": 0.2847, "step": 15392 }, { "epoch": 0.7635795426360434, "grad_norm": 6.714589595794678, "learning_rate": 1.3424234208697596e-06, "loss": 0.2835, "step": 15393 }, { "epoch": 0.7636291482712436, "grad_norm": 7.29407262802124, "learning_rate": 1.3418868082659199e-06, "loss": 0.2267, "step": 15394 }, { "epoch": 0.7636787539064438, "grad_norm": 9.55265998840332, "learning_rate": 1.3413502863117793e-06, "loss": 0.289, "step": 15395 }, { "epoch": 0.763728359541644, "grad_norm": 3.4326605796813965, "learning_rate": 1.3408138550206357e-06, "loss": 0.1666, "step": 15396 }, { "epoch": 0.763777965176844, "grad_norm": 4.517385959625244, "learning_rate": 1.3402775144057767e-06, "loss": 0.2382, "step": 15397 }, { "epoch": 0.7638275708120442, "grad_norm": 18.626556396484375, "learning_rate": 1.3397412644804996e-06, "loss": 0.3913, "step": 15398 }, { "epoch": 0.7638771764472444, "grad_norm": 5.803714275360107, "learning_rate": 1.339205105258088e-06, "loss": 0.2261, "step": 15399 }, { "epoch": 0.7639267820824446, "grad_norm": 6.237634658813477, "learning_rate": 1.3386690367518302e-06, "loss": 0.2706, "step": 15400 }, { "epoch": 0.7639763877176448, "grad_norm": 20.003957748413086, "learning_rate": 1.338133058975009e-06, "loss": 0.3343, "step": 15401 }, { "epoch": 0.7640259933528449, "grad_norm": 11.0209321975708, "learning_rate": 1.337597171940908e-06, "loss": 0.3213, "step": 15402 }, { "epoch": 0.764075598988045, "grad_norm": 12.430421829223633, "learning_rate": 1.3370613756628054e-06, "loss": 0.4109, "step": 15403 }, { "epoch": 0.7641252046232452, "grad_norm": 6.9928364753723145, "learning_rate": 1.3365256701539802e-06, "loss": 0.3547, "step": 15404 }, { "epoch": 0.7641748102584454, "grad_norm": 7.6716718673706055, "learning_rate": 1.3359900554277034e-06, "loss": 0.3256, "step": 15405 }, { "epoch": 0.7642244158936455, "grad_norm": 5.7331719398498535, "learning_rate": 1.3354545314972533e-06, "loss": 0.2841, "step": 15406 }, { "epoch": 0.7642740215288457, "grad_norm": 6.044097423553467, "learning_rate": 1.3349190983758958e-06, "loss": 0.2546, "step": 15407 }, { "epoch": 0.7643236271640458, "grad_norm": 6.261798858642578, "learning_rate": 1.3343837560769013e-06, "loss": 0.2805, "step": 15408 }, { "epoch": 0.764373232799246, "grad_norm": 8.361985206604004, "learning_rate": 1.3338485046135352e-06, "loss": 0.3812, "step": 15409 }, { "epoch": 0.7644228384344461, "grad_norm": 6.514257431030273, "learning_rate": 1.333313343999063e-06, "loss": 0.3024, "step": 15410 }, { "epoch": 0.7644724440696463, "grad_norm": 13.72039794921875, "learning_rate": 1.3327782742467421e-06, "loss": 0.3939, "step": 15411 }, { "epoch": 0.7645220497048465, "grad_norm": 9.814217567443848, "learning_rate": 1.3322432953698372e-06, "loss": 0.4021, "step": 15412 }, { "epoch": 0.7645716553400467, "grad_norm": 6.96970272064209, "learning_rate": 1.331708407381601e-06, "loss": 0.3226, "step": 15413 }, { "epoch": 0.7646212609752467, "grad_norm": 13.620691299438477, "learning_rate": 1.33117361029529e-06, "loss": 0.3372, "step": 15414 }, { "epoch": 0.7646708666104469, "grad_norm": 6.407003879547119, "learning_rate": 1.3306389041241563e-06, "loss": 0.2935, "step": 15415 }, { "epoch": 0.7647204722456471, "grad_norm": 7.415825843811035, "learning_rate": 1.330104288881452e-06, "loss": 0.3026, "step": 15416 }, { "epoch": 0.7647700778808473, "grad_norm": 15.205933570861816, "learning_rate": 1.3295697645804206e-06, "loss": 0.3586, "step": 15417 }, { "epoch": 0.7648196835160475, "grad_norm": 5.705008506774902, "learning_rate": 1.3290353312343135e-06, "loss": 0.3121, "step": 15418 }, { "epoch": 0.7648692891512476, "grad_norm": 8.189777374267578, "learning_rate": 1.3285009888563698e-06, "loss": 0.2866, "step": 15419 }, { "epoch": 0.7649188947864477, "grad_norm": 15.508015632629395, "learning_rate": 1.3279667374598326e-06, "loss": 0.4176, "step": 15420 }, { "epoch": 0.7649685004216479, "grad_norm": 10.483165740966797, "learning_rate": 1.327432577057941e-06, "loss": 0.4165, "step": 15421 }, { "epoch": 0.7650181060568481, "grad_norm": 10.557506561279297, "learning_rate": 1.3268985076639313e-06, "loss": 0.3116, "step": 15422 }, { "epoch": 0.7650677116920482, "grad_norm": 10.783430099487305, "learning_rate": 1.3263645292910383e-06, "loss": 0.3649, "step": 15423 }, { "epoch": 0.7651173173272484, "grad_norm": 7.008248805999756, "learning_rate": 1.3258306419524946e-06, "loss": 0.2608, "step": 15424 }, { "epoch": 0.7651669229624485, "grad_norm": 9.576128005981445, "learning_rate": 1.3252968456615274e-06, "loss": 0.2371, "step": 15425 }, { "epoch": 0.7652165285976487, "grad_norm": 5.869774341583252, "learning_rate": 1.3247631404313692e-06, "loss": 0.2705, "step": 15426 }, { "epoch": 0.7652661342328488, "grad_norm": 9.382150650024414, "learning_rate": 1.324229526275242e-06, "loss": 0.2798, "step": 15427 }, { "epoch": 0.765315739868049, "grad_norm": 5.471782684326172, "learning_rate": 1.3236960032063696e-06, "loss": 0.2896, "step": 15428 }, { "epoch": 0.7653653455032492, "grad_norm": 8.995447158813477, "learning_rate": 1.323162571237973e-06, "loss": 0.3993, "step": 15429 }, { "epoch": 0.7654149511384494, "grad_norm": 4.23231315612793, "learning_rate": 1.322629230383271e-06, "loss": 0.2361, "step": 15430 }, { "epoch": 0.7654645567736494, "grad_norm": 4.026801109313965, "learning_rate": 1.3220959806554805e-06, "loss": 0.2055, "step": 15431 }, { "epoch": 0.7655141624088496, "grad_norm": 16.059249877929688, "learning_rate": 1.3215628220678152e-06, "loss": 0.3482, "step": 15432 }, { "epoch": 0.7655637680440498, "grad_norm": 7.358248710632324, "learning_rate": 1.3210297546334882e-06, "loss": 0.3642, "step": 15433 }, { "epoch": 0.76561337367925, "grad_norm": 6.240875244140625, "learning_rate": 1.320496778365707e-06, "loss": 0.2394, "step": 15434 }, { "epoch": 0.7656629793144502, "grad_norm": 10.65485668182373, "learning_rate": 1.31996389327768e-06, "loss": 0.3197, "step": 15435 }, { "epoch": 0.7657125849496503, "grad_norm": 8.181197166442871, "learning_rate": 1.3194310993826127e-06, "loss": 0.2475, "step": 15436 }, { "epoch": 0.7657621905848504, "grad_norm": 9.54472827911377, "learning_rate": 1.318898396693707e-06, "loss": 0.2614, "step": 15437 }, { "epoch": 0.7658117962200506, "grad_norm": 21.07317352294922, "learning_rate": 1.3183657852241643e-06, "loss": 0.3224, "step": 15438 }, { "epoch": 0.7658614018552508, "grad_norm": 5.9427947998046875, "learning_rate": 1.3178332649871833e-06, "loss": 0.3198, "step": 15439 }, { "epoch": 0.7659110074904509, "grad_norm": 6.994860649108887, "learning_rate": 1.3173008359959594e-06, "loss": 0.317, "step": 15440 }, { "epoch": 0.7659606131256511, "grad_norm": 10.583698272705078, "learning_rate": 1.316768498263688e-06, "loss": 0.3528, "step": 15441 }, { "epoch": 0.7660102187608512, "grad_norm": 3.9430503845214844, "learning_rate": 1.316236251803556e-06, "loss": 0.2126, "step": 15442 }, { "epoch": 0.7660598243960514, "grad_norm": 18.175796508789062, "learning_rate": 1.31570409662876e-06, "loss": 0.3579, "step": 15443 }, { "epoch": 0.7661094300312515, "grad_norm": 5.652496814727783, "learning_rate": 1.3151720327524809e-06, "loss": 0.2716, "step": 15444 }, { "epoch": 0.7661590356664517, "grad_norm": 12.096941947937012, "learning_rate": 1.3146400601879055e-06, "loss": 0.326, "step": 15445 }, { "epoch": 0.7662086413016519, "grad_norm": 13.50741195678711, "learning_rate": 1.3141081789482168e-06, "loss": 0.3436, "step": 15446 }, { "epoch": 0.7662582469368521, "grad_norm": 9.474241256713867, "learning_rate": 1.3135763890465957e-06, "loss": 0.3571, "step": 15447 }, { "epoch": 0.7663078525720521, "grad_norm": 4.937352180480957, "learning_rate": 1.3130446904962168e-06, "loss": 0.283, "step": 15448 }, { "epoch": 0.7663574582072523, "grad_norm": 14.149813652038574, "learning_rate": 1.3125130833102612e-06, "loss": 0.2203, "step": 15449 }, { "epoch": 0.7664070638424525, "grad_norm": 5.020172119140625, "learning_rate": 1.3119815675018982e-06, "loss": 0.261, "step": 15450 }, { "epoch": 0.7664566694776527, "grad_norm": 6.79613733291626, "learning_rate": 1.3114501430842996e-06, "loss": 0.2355, "step": 15451 }, { "epoch": 0.7665062751128529, "grad_norm": 9.564277648925781, "learning_rate": 1.3109188100706354e-06, "loss": 0.4093, "step": 15452 }, { "epoch": 0.766555880748053, "grad_norm": 6.58036470413208, "learning_rate": 1.3103875684740731e-06, "loss": 0.1714, "step": 15453 }, { "epoch": 0.7666054863832531, "grad_norm": 9.856040000915527, "learning_rate": 1.3098564183077728e-06, "loss": 0.2605, "step": 15454 }, { "epoch": 0.7666550920184533, "grad_norm": 21.96432876586914, "learning_rate": 1.3093253595849032e-06, "loss": 0.5635, "step": 15455 }, { "epoch": 0.7667046976536535, "grad_norm": 4.960902690887451, "learning_rate": 1.308794392318617e-06, "loss": 0.2501, "step": 15456 }, { "epoch": 0.7667543032888536, "grad_norm": 7.2379350662231445, "learning_rate": 1.3082635165220792e-06, "loss": 0.2886, "step": 15457 }, { "epoch": 0.7668039089240538, "grad_norm": 6.351375579833984, "learning_rate": 1.3077327322084393e-06, "loss": 0.3895, "step": 15458 }, { "epoch": 0.7668535145592539, "grad_norm": 9.701404571533203, "learning_rate": 1.3072020393908524e-06, "loss": 0.2825, "step": 15459 }, { "epoch": 0.7669031201944541, "grad_norm": 19.741485595703125, "learning_rate": 1.3066714380824697e-06, "loss": 0.302, "step": 15460 }, { "epoch": 0.7669527258296542, "grad_norm": 5.391019821166992, "learning_rate": 1.306140928296441e-06, "loss": 0.2271, "step": 15461 }, { "epoch": 0.7670023314648544, "grad_norm": 9.361543655395508, "learning_rate": 1.3056105100459082e-06, "loss": 0.1923, "step": 15462 }, { "epoch": 0.7670519371000546, "grad_norm": 9.966107368469238, "learning_rate": 1.3050801833440208e-06, "loss": 0.2405, "step": 15463 }, { "epoch": 0.7671015427352548, "grad_norm": 11.372961044311523, "learning_rate": 1.3045499482039165e-06, "loss": 0.2563, "step": 15464 }, { "epoch": 0.7671511483704548, "grad_norm": 6.627346992492676, "learning_rate": 1.3040198046387359e-06, "loss": 0.2917, "step": 15465 }, { "epoch": 0.767200754005655, "grad_norm": 4.913029670715332, "learning_rate": 1.3034897526616164e-06, "loss": 0.2742, "step": 15466 }, { "epoch": 0.7672503596408552, "grad_norm": 6.5830206871032715, "learning_rate": 1.3029597922856946e-06, "loss": 0.2487, "step": 15467 }, { "epoch": 0.7672999652760554, "grad_norm": 7.039596080780029, "learning_rate": 1.3024299235240983e-06, "loss": 0.3001, "step": 15468 }, { "epoch": 0.7673495709112556, "grad_norm": 8.857658386230469, "learning_rate": 1.3019001463899645e-06, "loss": 0.3494, "step": 15469 }, { "epoch": 0.7673991765464557, "grad_norm": 4.808359622955322, "learning_rate": 1.3013704608964162e-06, "loss": 0.322, "step": 15470 }, { "epoch": 0.7674487821816558, "grad_norm": 8.36349105834961, "learning_rate": 1.3008408670565808e-06, "loss": 0.3465, "step": 15471 }, { "epoch": 0.767498387816856, "grad_norm": 7.9117302894592285, "learning_rate": 1.300311364883582e-06, "loss": 0.2428, "step": 15472 }, { "epoch": 0.7675479934520562, "grad_norm": 11.651126861572266, "learning_rate": 1.2997819543905428e-06, "loss": 0.2424, "step": 15473 }, { "epoch": 0.7675975990872563, "grad_norm": 7.596479415893555, "learning_rate": 1.2992526355905776e-06, "loss": 0.2135, "step": 15474 }, { "epoch": 0.7676472047224565, "grad_norm": 7.295042037963867, "learning_rate": 1.2987234084968091e-06, "loss": 0.2611, "step": 15475 }, { "epoch": 0.7676968103576566, "grad_norm": 6.784264087677002, "learning_rate": 1.2981942731223456e-06, "loss": 0.2823, "step": 15476 }, { "epoch": 0.7677464159928568, "grad_norm": 3.874702215194702, "learning_rate": 1.297665229480306e-06, "loss": 0.1839, "step": 15477 }, { "epoch": 0.7677960216280569, "grad_norm": 9.478896141052246, "learning_rate": 1.2971362775837948e-06, "loss": 0.2967, "step": 15478 }, { "epoch": 0.7678456272632571, "grad_norm": 7.788932800292969, "learning_rate": 1.296607417445922e-06, "loss": 0.2883, "step": 15479 }, { "epoch": 0.7678952328984573, "grad_norm": 9.02302074432373, "learning_rate": 1.2960786490797921e-06, "loss": 0.3254, "step": 15480 }, { "epoch": 0.7679448385336575, "grad_norm": 6.205670356750488, "learning_rate": 1.2955499724985093e-06, "loss": 0.2417, "step": 15481 }, { "epoch": 0.7679944441688575, "grad_norm": 6.633292198181152, "learning_rate": 1.2950213877151735e-06, "loss": 0.2711, "step": 15482 }, { "epoch": 0.7680440498040577, "grad_norm": 9.230530738830566, "learning_rate": 1.2944928947428853e-06, "loss": 0.1851, "step": 15483 }, { "epoch": 0.7680936554392579, "grad_norm": 4.14544677734375, "learning_rate": 1.2939644935947377e-06, "loss": 0.1976, "step": 15484 }, { "epoch": 0.7681432610744581, "grad_norm": 9.719860076904297, "learning_rate": 1.2934361842838267e-06, "loss": 0.2573, "step": 15485 }, { "epoch": 0.7681928667096583, "grad_norm": 8.042455673217773, "learning_rate": 1.2929079668232436e-06, "loss": 0.2709, "step": 15486 }, { "epoch": 0.7682424723448584, "grad_norm": 7.8732829093933105, "learning_rate": 1.2923798412260775e-06, "loss": 0.3501, "step": 15487 }, { "epoch": 0.7682920779800585, "grad_norm": 5.086350917816162, "learning_rate": 1.2918518075054166e-06, "loss": 0.2972, "step": 15488 }, { "epoch": 0.7683416836152587, "grad_norm": 4.653799533843994, "learning_rate": 1.2913238656743455e-06, "loss": 0.182, "step": 15489 }, { "epoch": 0.7683912892504589, "grad_norm": 5.0820207595825195, "learning_rate": 1.2907960157459477e-06, "loss": 0.268, "step": 15490 }, { "epoch": 0.768440894885659, "grad_norm": 7.676180839538574, "learning_rate": 1.2902682577332992e-06, "loss": 0.3899, "step": 15491 }, { "epoch": 0.7684905005208592, "grad_norm": 10.54005241394043, "learning_rate": 1.2897405916494848e-06, "loss": 0.2466, "step": 15492 }, { "epoch": 0.7685401061560593, "grad_norm": 5.33302116394043, "learning_rate": 1.2892130175075752e-06, "loss": 0.2723, "step": 15493 }, { "epoch": 0.7685897117912595, "grad_norm": 6.359208106994629, "learning_rate": 1.2886855353206456e-06, "loss": 0.269, "step": 15494 }, { "epoch": 0.7686393174264596, "grad_norm": 4.072688102722168, "learning_rate": 1.2881581451017677e-06, "loss": 0.1925, "step": 15495 }, { "epoch": 0.7686889230616598, "grad_norm": 7.852564811706543, "learning_rate": 1.2876308468640097e-06, "loss": 0.316, "step": 15496 }, { "epoch": 0.76873852869686, "grad_norm": 7.931211471557617, "learning_rate": 1.2871036406204385e-06, "loss": 0.3594, "step": 15497 }, { "epoch": 0.7687881343320602, "grad_norm": 6.49329948425293, "learning_rate": 1.2865765263841202e-06, "loss": 0.2763, "step": 15498 }, { "epoch": 0.7688377399672602, "grad_norm": 5.1162896156311035, "learning_rate": 1.286049504168112e-06, "loss": 0.1767, "step": 15499 }, { "epoch": 0.7688873456024604, "grad_norm": 4.366115570068359, "learning_rate": 1.2855225739854803e-06, "loss": 0.2581, "step": 15500 }, { "epoch": 0.7689369512376606, "grad_norm": 8.15279483795166, "learning_rate": 1.284995735849278e-06, "loss": 0.276, "step": 15501 }, { "epoch": 0.7689865568728608, "grad_norm": 4.5080342292785645, "learning_rate": 1.2844689897725627e-06, "loss": 0.2582, "step": 15502 }, { "epoch": 0.769036162508061, "grad_norm": 17.231130599975586, "learning_rate": 1.2839423357683856e-06, "loss": 0.4252, "step": 15503 }, { "epoch": 0.7690857681432611, "grad_norm": 4.386404991149902, "learning_rate": 1.2834157738497998e-06, "loss": 0.2724, "step": 15504 }, { "epoch": 0.7691353737784612, "grad_norm": 7.6811699867248535, "learning_rate": 1.2828893040298496e-06, "loss": 0.2864, "step": 15505 }, { "epoch": 0.7691849794136614, "grad_norm": 4.201135635375977, "learning_rate": 1.282362926321587e-06, "loss": 0.2307, "step": 15506 }, { "epoch": 0.7692345850488616, "grad_norm": 5.5219807624816895, "learning_rate": 1.2818366407380512e-06, "loss": 0.2148, "step": 15507 }, { "epoch": 0.7692841906840617, "grad_norm": 10.977869033813477, "learning_rate": 1.281310447292285e-06, "loss": 0.2605, "step": 15508 }, { "epoch": 0.7693337963192619, "grad_norm": 4.981289386749268, "learning_rate": 1.2807843459973285e-06, "loss": 0.2124, "step": 15509 }, { "epoch": 0.769383401954462, "grad_norm": 5.115707874298096, "learning_rate": 1.280258336866218e-06, "loss": 0.2527, "step": 15510 }, { "epoch": 0.7694330075896622, "grad_norm": 5.075733661651611, "learning_rate": 1.2797324199119887e-06, "loss": 0.286, "step": 15511 }, { "epoch": 0.7694826132248623, "grad_norm": 4.38653039932251, "learning_rate": 1.279206595147674e-06, "loss": 0.1021, "step": 15512 }, { "epoch": 0.7695322188600625, "grad_norm": 5.135127544403076, "learning_rate": 1.2786808625863007e-06, "loss": 0.1958, "step": 15513 }, { "epoch": 0.7695818244952627, "grad_norm": 5.7158050537109375, "learning_rate": 1.2781552222409011e-06, "loss": 0.2725, "step": 15514 }, { "epoch": 0.7696314301304629, "grad_norm": 8.343243598937988, "learning_rate": 1.2776296741244977e-06, "loss": 0.368, "step": 15515 }, { "epoch": 0.7696810357656629, "grad_norm": 4.890343189239502, "learning_rate": 1.2771042182501147e-06, "loss": 0.263, "step": 15516 }, { "epoch": 0.7697306414008631, "grad_norm": 13.691330909729004, "learning_rate": 1.2765788546307733e-06, "loss": 0.4446, "step": 15517 }, { "epoch": 0.7697802470360633, "grad_norm": 5.8234758377075195, "learning_rate": 1.2760535832794934e-06, "loss": 0.2235, "step": 15518 }, { "epoch": 0.7698298526712635, "grad_norm": 5.796157360076904, "learning_rate": 1.2755284042092875e-06, "loss": 0.2561, "step": 15519 }, { "epoch": 0.7698794583064636, "grad_norm": 8.362332344055176, "learning_rate": 1.2750033174331755e-06, "loss": 0.2562, "step": 15520 }, { "epoch": 0.7699290639416638, "grad_norm": 9.514958381652832, "learning_rate": 1.2744783229641649e-06, "loss": 0.2643, "step": 15521 }, { "epoch": 0.7699786695768639, "grad_norm": 13.182063102722168, "learning_rate": 1.2739534208152665e-06, "loss": 0.2217, "step": 15522 }, { "epoch": 0.7700282752120641, "grad_norm": 12.40261173248291, "learning_rate": 1.2734286109994876e-06, "loss": 0.3573, "step": 15523 }, { "epoch": 0.7700778808472643, "grad_norm": 8.326191902160645, "learning_rate": 1.2729038935298354e-06, "loss": 0.3206, "step": 15524 }, { "epoch": 0.7701274864824644, "grad_norm": 13.506632804870605, "learning_rate": 1.2723792684193076e-06, "loss": 0.2842, "step": 15525 }, { "epoch": 0.7701770921176646, "grad_norm": 5.943816184997559, "learning_rate": 1.2718547356809102e-06, "loss": 0.2805, "step": 15526 }, { "epoch": 0.7702266977528647, "grad_norm": 9.338162422180176, "learning_rate": 1.2713302953276363e-06, "loss": 0.25, "step": 15527 }, { "epoch": 0.7702763033880649, "grad_norm": 5.345240116119385, "learning_rate": 1.270805947372487e-06, "loss": 0.2731, "step": 15528 }, { "epoch": 0.770325909023265, "grad_norm": 6.036740303039551, "learning_rate": 1.2702816918284521e-06, "loss": 0.3, "step": 15529 }, { "epoch": 0.7703755146584652, "grad_norm": 10.41943359375, "learning_rate": 1.269757528708524e-06, "loss": 0.3377, "step": 15530 }, { "epoch": 0.7704251202936654, "grad_norm": 8.216623306274414, "learning_rate": 1.2692334580256914e-06, "loss": 0.2569, "step": 15531 }, { "epoch": 0.7704747259288656, "grad_norm": 5.179793834686279, "learning_rate": 1.2687094797929433e-06, "loss": 0.2553, "step": 15532 }, { "epoch": 0.7705243315640656, "grad_norm": 11.801115989685059, "learning_rate": 1.268185594023259e-06, "loss": 0.3257, "step": 15533 }, { "epoch": 0.7705739371992658, "grad_norm": 8.314850807189941, "learning_rate": 1.2676618007296264e-06, "loss": 0.3598, "step": 15534 }, { "epoch": 0.770623542834466, "grad_norm": 5.056153774261475, "learning_rate": 1.2671380999250215e-06, "loss": 0.2383, "step": 15535 }, { "epoch": 0.7706731484696662, "grad_norm": 8.384413719177246, "learning_rate": 1.2666144916224227e-06, "loss": 0.2636, "step": 15536 }, { "epoch": 0.7707227541048663, "grad_norm": 12.868274688720703, "learning_rate": 1.266090975834806e-06, "loss": 0.4046, "step": 15537 }, { "epoch": 0.7707723597400665, "grad_norm": 8.624677658081055, "learning_rate": 1.2655675525751437e-06, "loss": 0.3249, "step": 15538 }, { "epoch": 0.7708219653752666, "grad_norm": 5.192154407501221, "learning_rate": 1.2650442218564068e-06, "loss": 0.3172, "step": 15539 }, { "epoch": 0.7708715710104668, "grad_norm": 14.661871910095215, "learning_rate": 1.264520983691564e-06, "loss": 0.408, "step": 15540 }, { "epoch": 0.770921176645667, "grad_norm": 6.72097635269165, "learning_rate": 1.263997838093582e-06, "loss": 0.2975, "step": 15541 }, { "epoch": 0.7709707822808671, "grad_norm": 21.9469051361084, "learning_rate": 1.263474785075422e-06, "loss": 0.4812, "step": 15542 }, { "epoch": 0.7710203879160673, "grad_norm": 7.064821243286133, "learning_rate": 1.2629518246500471e-06, "loss": 0.248, "step": 15543 }, { "epoch": 0.7710699935512674, "grad_norm": 11.80066967010498, "learning_rate": 1.2624289568304165e-06, "loss": 0.2706, "step": 15544 }, { "epoch": 0.7711195991864676, "grad_norm": 4.258457660675049, "learning_rate": 1.261906181629487e-06, "loss": 0.2473, "step": 15545 }, { "epoch": 0.7711692048216677, "grad_norm": 6.226539134979248, "learning_rate": 1.2613834990602135e-06, "loss": 0.2687, "step": 15546 }, { "epoch": 0.7712188104568679, "grad_norm": 14.41695499420166, "learning_rate": 1.2608609091355478e-06, "loss": 0.1839, "step": 15547 }, { "epoch": 0.7712684160920681, "grad_norm": 18.29676055908203, "learning_rate": 1.2603384118684404e-06, "loss": 0.3856, "step": 15548 }, { "epoch": 0.7713180217272683, "grad_norm": 3.978302001953125, "learning_rate": 1.2598160072718397e-06, "loss": 0.1898, "step": 15549 }, { "epoch": 0.7713676273624683, "grad_norm": 4.398130893707275, "learning_rate": 1.2592936953586893e-06, "loss": 0.1861, "step": 15550 }, { "epoch": 0.7714172329976685, "grad_norm": 7.296968460083008, "learning_rate": 1.2587714761419329e-06, "loss": 0.2559, "step": 15551 }, { "epoch": 0.7714668386328687, "grad_norm": 14.745165824890137, "learning_rate": 1.2582493496345116e-06, "loss": 0.4298, "step": 15552 }, { "epoch": 0.7715164442680689, "grad_norm": 15.394826889038086, "learning_rate": 1.2577273158493641e-06, "loss": 0.3684, "step": 15553 }, { "epoch": 0.771566049903269, "grad_norm": 5.909526348114014, "learning_rate": 1.2572053747994268e-06, "loss": 0.2888, "step": 15554 }, { "epoch": 0.7716156555384692, "grad_norm": 7.870988845825195, "learning_rate": 1.2566835264976345e-06, "loss": 0.329, "step": 15555 }, { "epoch": 0.7716652611736693, "grad_norm": 6.518347263336182, "learning_rate": 1.256161770956915e-06, "loss": 0.2753, "step": 15556 }, { "epoch": 0.7717148668088695, "grad_norm": 30.36943244934082, "learning_rate": 1.255640108190203e-06, "loss": 0.3588, "step": 15557 }, { "epoch": 0.7717644724440696, "grad_norm": 6.754211902618408, "learning_rate": 1.255118538210422e-06, "loss": 0.2723, "step": 15558 }, { "epoch": 0.7718140780792698, "grad_norm": 13.586187362670898, "learning_rate": 1.2545970610304968e-06, "loss": 0.3058, "step": 15559 }, { "epoch": 0.77186368371447, "grad_norm": 4.517536640167236, "learning_rate": 1.2540756766633516e-06, "loss": 0.2588, "step": 15560 }, { "epoch": 0.7719132893496701, "grad_norm": 9.12911605834961, "learning_rate": 1.2535543851219062e-06, "loss": 0.3111, "step": 15561 }, { "epoch": 0.7719628949848703, "grad_norm": 12.01794719696045, "learning_rate": 1.2530331864190753e-06, "loss": 0.2887, "step": 15562 }, { "epoch": 0.7720125006200704, "grad_norm": 6.9262800216674805, "learning_rate": 1.2525120805677798e-06, "loss": 0.265, "step": 15563 }, { "epoch": 0.7720621062552706, "grad_norm": 5.417079925537109, "learning_rate": 1.251991067580927e-06, "loss": 0.2464, "step": 15564 }, { "epoch": 0.7721117118904708, "grad_norm": 5.8053178787231445, "learning_rate": 1.2514701474714346e-06, "loss": 0.1851, "step": 15565 }, { "epoch": 0.772161317525671, "grad_norm": 7.094005584716797, "learning_rate": 1.2509493202522055e-06, "loss": 0.3274, "step": 15566 }, { "epoch": 0.772210923160871, "grad_norm": 9.902249336242676, "learning_rate": 1.2504285859361482e-06, "loss": 0.382, "step": 15567 }, { "epoch": 0.7722605287960712, "grad_norm": 5.593498229980469, "learning_rate": 1.2499079445361667e-06, "loss": 0.2807, "step": 15568 }, { "epoch": 0.7723101344312714, "grad_norm": 5.833631992340088, "learning_rate": 1.2493873960651642e-06, "loss": 0.2949, "step": 15569 }, { "epoch": 0.7723597400664716, "grad_norm": 5.6333231925964355, "learning_rate": 1.248866940536036e-06, "loss": 0.3219, "step": 15570 }, { "epoch": 0.7724093457016717, "grad_norm": 4.488767147064209, "learning_rate": 1.2483465779616844e-06, "loss": 0.2176, "step": 15571 }, { "epoch": 0.7724589513368719, "grad_norm": 5.877257347106934, "learning_rate": 1.2478263083550003e-06, "loss": 0.2263, "step": 15572 }, { "epoch": 0.772508556972072, "grad_norm": 8.398975372314453, "learning_rate": 1.247306131728877e-06, "loss": 0.3264, "step": 15573 }, { "epoch": 0.7725581626072722, "grad_norm": 6.487222671508789, "learning_rate": 1.2467860480962063e-06, "loss": 0.3154, "step": 15574 }, { "epoch": 0.7726077682424723, "grad_norm": 9.793002128601074, "learning_rate": 1.246266057469876e-06, "loss": 0.3623, "step": 15575 }, { "epoch": 0.7726573738776725, "grad_norm": 9.013934135437012, "learning_rate": 1.2457461598627675e-06, "loss": 0.2459, "step": 15576 }, { "epoch": 0.7727069795128727, "grad_norm": 4.639791011810303, "learning_rate": 1.2452263552877707e-06, "loss": 0.2487, "step": 15577 }, { "epoch": 0.7727565851480728, "grad_norm": 10.952377319335938, "learning_rate": 1.244706643757762e-06, "loss": 0.3447, "step": 15578 }, { "epoch": 0.772806190783273, "grad_norm": 5.817823886871338, "learning_rate": 1.2441870252856213e-06, "loss": 0.2984, "step": 15579 }, { "epoch": 0.7728557964184731, "grad_norm": 9.873229026794434, "learning_rate": 1.2436674998842247e-06, "loss": 0.4073, "step": 15580 }, { "epoch": 0.7729054020536733, "grad_norm": 6.692860126495361, "learning_rate": 1.243148067566447e-06, "loss": 0.393, "step": 15581 }, { "epoch": 0.7729550076888735, "grad_norm": 8.452564239501953, "learning_rate": 1.2426287283451594e-06, "loss": 0.2788, "step": 15582 }, { "epoch": 0.7730046133240737, "grad_norm": 4.9730658531188965, "learning_rate": 1.242109482233233e-06, "loss": 0.2161, "step": 15583 }, { "epoch": 0.7730542189592737, "grad_norm": 4.513768672943115, "learning_rate": 1.2415903292435311e-06, "loss": 0.2594, "step": 15584 }, { "epoch": 0.7731038245944739, "grad_norm": 4.946013450622559, "learning_rate": 1.2410712693889232e-06, "loss": 0.1749, "step": 15585 }, { "epoch": 0.7731534302296741, "grad_norm": 4.443120002746582, "learning_rate": 1.2405523026822685e-06, "loss": 0.2136, "step": 15586 }, { "epoch": 0.7732030358648743, "grad_norm": 5.037023544311523, "learning_rate": 1.240033429136428e-06, "loss": 0.2542, "step": 15587 }, { "epoch": 0.7732526415000744, "grad_norm": 7.114084243774414, "learning_rate": 1.2395146487642606e-06, "loss": 0.2884, "step": 15588 }, { "epoch": 0.7733022471352745, "grad_norm": 5.938544273376465, "learning_rate": 1.2389959615786207e-06, "loss": 0.2742, "step": 15589 }, { "epoch": 0.7733518527704747, "grad_norm": 6.100551128387451, "learning_rate": 1.2384773675923622e-06, "loss": 0.2958, "step": 15590 }, { "epoch": 0.7734014584056749, "grad_norm": 6.1386332511901855, "learning_rate": 1.2379588668183373e-06, "loss": 0.2302, "step": 15591 }, { "epoch": 0.773451064040875, "grad_norm": 5.8871917724609375, "learning_rate": 1.2374404592693922e-06, "loss": 0.2363, "step": 15592 }, { "epoch": 0.7735006696760752, "grad_norm": 13.063924789428711, "learning_rate": 1.2369221449583753e-06, "loss": 0.3263, "step": 15593 }, { "epoch": 0.7735502753112754, "grad_norm": 13.141125679016113, "learning_rate": 1.236403923898129e-06, "loss": 0.5091, "step": 15594 }, { "epoch": 0.7735998809464755, "grad_norm": 16.316621780395508, "learning_rate": 1.2358857961014958e-06, "loss": 0.2825, "step": 15595 }, { "epoch": 0.7736494865816756, "grad_norm": 4.868273735046387, "learning_rate": 1.2353677615813152e-06, "loss": 0.2817, "step": 15596 }, { "epoch": 0.7736990922168758, "grad_norm": 11.105340003967285, "learning_rate": 1.234849820350425e-06, "loss": 0.3701, "step": 15597 }, { "epoch": 0.773748697852076, "grad_norm": 3.9434640407562256, "learning_rate": 1.2343319724216608e-06, "loss": 0.1902, "step": 15598 }, { "epoch": 0.7737983034872762, "grad_norm": 4.849403381347656, "learning_rate": 1.2338142178078521e-06, "loss": 0.2161, "step": 15599 }, { "epoch": 0.7738479091224764, "grad_norm": 11.93364429473877, "learning_rate": 1.2332965565218309e-06, "loss": 0.3865, "step": 15600 }, { "epoch": 0.7738975147576764, "grad_norm": 10.242537498474121, "learning_rate": 1.2327789885764246e-06, "loss": 0.2793, "step": 15601 }, { "epoch": 0.7739471203928766, "grad_norm": 11.467297554016113, "learning_rate": 1.232261513984459e-06, "loss": 0.2193, "step": 15602 }, { "epoch": 0.7739967260280768, "grad_norm": 7.403697967529297, "learning_rate": 1.2317441327587575e-06, "loss": 0.3197, "step": 15603 }, { "epoch": 0.774046331663277, "grad_norm": 5.599695682525635, "learning_rate": 1.2312268449121411e-06, "loss": 0.2377, "step": 15604 }, { "epoch": 0.7740959372984771, "grad_norm": 16.49236488342285, "learning_rate": 1.2307096504574285e-06, "loss": 0.3009, "step": 15605 }, { "epoch": 0.7741455429336772, "grad_norm": 7.2521443367004395, "learning_rate": 1.2301925494074369e-06, "loss": 0.2714, "step": 15606 }, { "epoch": 0.7741951485688774, "grad_norm": 7.469662189483643, "learning_rate": 1.2296755417749762e-06, "loss": 0.1753, "step": 15607 }, { "epoch": 0.7742447542040776, "grad_norm": 8.176680564880371, "learning_rate": 1.229158627572864e-06, "loss": 0.2937, "step": 15608 }, { "epoch": 0.7742943598392777, "grad_norm": 6.039875507354736, "learning_rate": 1.2286418068139055e-06, "loss": 0.2363, "step": 15609 }, { "epoch": 0.7743439654744779, "grad_norm": 6.238750457763672, "learning_rate": 1.228125079510909e-06, "loss": 0.2854, "step": 15610 }, { "epoch": 0.7743935711096781, "grad_norm": 6.0516862869262695, "learning_rate": 1.227608445676679e-06, "loss": 0.2274, "step": 15611 }, { "epoch": 0.7744431767448782, "grad_norm": 8.625884056091309, "learning_rate": 1.2270919053240198e-06, "loss": 0.3025, "step": 15612 }, { "epoch": 0.7744927823800783, "grad_norm": 10.79043960571289, "learning_rate": 1.2265754584657268e-06, "loss": 0.3746, "step": 15613 }, { "epoch": 0.7745423880152785, "grad_norm": 5.390504360198975, "learning_rate": 1.2260591051146038e-06, "loss": 0.3114, "step": 15614 }, { "epoch": 0.7745919936504787, "grad_norm": 5.016014575958252, "learning_rate": 1.2255428452834423e-06, "loss": 0.2646, "step": 15615 }, { "epoch": 0.7746415992856789, "grad_norm": 5.17580509185791, "learning_rate": 1.2250266789850357e-06, "loss": 0.3032, "step": 15616 }, { "epoch": 0.7746912049208791, "grad_norm": 5.111824035644531, "learning_rate": 1.224510606232176e-06, "loss": 0.2518, "step": 15617 }, { "epoch": 0.7747408105560791, "grad_norm": 5.232761383056641, "learning_rate": 1.2239946270376512e-06, "loss": 0.1847, "step": 15618 }, { "epoch": 0.7747904161912793, "grad_norm": 10.174490928649902, "learning_rate": 1.223478741414248e-06, "loss": 0.2682, "step": 15619 }, { "epoch": 0.7748400218264795, "grad_norm": 5.507869243621826, "learning_rate": 1.2229629493747508e-06, "loss": 0.2836, "step": 15620 }, { "epoch": 0.7748896274616797, "grad_norm": 11.466777801513672, "learning_rate": 1.2224472509319379e-06, "loss": 0.3355, "step": 15621 }, { "epoch": 0.7749392330968798, "grad_norm": 9.271608352661133, "learning_rate": 1.2219316460985937e-06, "loss": 0.348, "step": 15622 }, { "epoch": 0.7749888387320799, "grad_norm": 5.988009452819824, "learning_rate": 1.2214161348874904e-06, "loss": 0.3322, "step": 15623 }, { "epoch": 0.7750384443672801, "grad_norm": 5.253501892089844, "learning_rate": 1.220900717311405e-06, "loss": 0.2362, "step": 15624 }, { "epoch": 0.7750880500024803, "grad_norm": 4.738804817199707, "learning_rate": 1.2203853933831088e-06, "loss": 0.2237, "step": 15625 }, { "epoch": 0.7751376556376804, "grad_norm": 10.28658390045166, "learning_rate": 1.2198701631153742e-06, "loss": 0.3332, "step": 15626 }, { "epoch": 0.7751872612728806, "grad_norm": 7.317825794219971, "learning_rate": 1.2193550265209636e-06, "loss": 0.3261, "step": 15627 }, { "epoch": 0.7752368669080808, "grad_norm": 4.052277088165283, "learning_rate": 1.2188399836126492e-06, "loss": 0.1997, "step": 15628 }, { "epoch": 0.7752864725432809, "grad_norm": 7.833864212036133, "learning_rate": 1.2183250344031883e-06, "loss": 0.2988, "step": 15629 }, { "epoch": 0.775336078178481, "grad_norm": 7.088281631469727, "learning_rate": 1.2178101789053436e-06, "loss": 0.2885, "step": 15630 }, { "epoch": 0.7753856838136812, "grad_norm": 8.14781379699707, "learning_rate": 1.2172954171318741e-06, "loss": 0.3378, "step": 15631 }, { "epoch": 0.7754352894488814, "grad_norm": 13.334183692932129, "learning_rate": 1.2167807490955363e-06, "loss": 0.4445, "step": 15632 }, { "epoch": 0.7754848950840816, "grad_norm": 7.6291351318359375, "learning_rate": 1.2162661748090804e-06, "loss": 0.2902, "step": 15633 }, { "epoch": 0.7755345007192818, "grad_norm": 4.924739360809326, "learning_rate": 1.2157516942852627e-06, "loss": 0.2518, "step": 15634 }, { "epoch": 0.7755841063544818, "grad_norm": 6.435081958770752, "learning_rate": 1.2152373075368268e-06, "loss": 0.3693, "step": 15635 }, { "epoch": 0.775633711989682, "grad_norm": 8.66520881652832, "learning_rate": 1.214723014576526e-06, "loss": 0.3214, "step": 15636 }, { "epoch": 0.7756833176248822, "grad_norm": 14.3638916015625, "learning_rate": 1.214208815417099e-06, "loss": 0.3624, "step": 15637 }, { "epoch": 0.7757329232600824, "grad_norm": 8.3821382522583, "learning_rate": 1.2136947100712903e-06, "loss": 0.2789, "step": 15638 }, { "epoch": 0.7757825288952825, "grad_norm": 6.7475175857543945, "learning_rate": 1.2131806985518396e-06, "loss": 0.2034, "step": 15639 }, { "epoch": 0.7758321345304826, "grad_norm": 6.006917953491211, "learning_rate": 1.212666780871486e-06, "loss": 0.3088, "step": 15640 }, { "epoch": 0.7758817401656828, "grad_norm": 8.923323631286621, "learning_rate": 1.2121529570429591e-06, "loss": 0.3461, "step": 15641 }, { "epoch": 0.775931345800883, "grad_norm": 12.708178520202637, "learning_rate": 1.2116392270789984e-06, "loss": 0.3265, "step": 15642 }, { "epoch": 0.7759809514360831, "grad_norm": 12.459080696105957, "learning_rate": 1.2111255909923302e-06, "loss": 0.3273, "step": 15643 }, { "epoch": 0.7760305570712833, "grad_norm": 9.068096160888672, "learning_rate": 1.2106120487956835e-06, "loss": 0.2353, "step": 15644 }, { "epoch": 0.7760801627064835, "grad_norm": 7.819863319396973, "learning_rate": 1.2100986005017845e-06, "loss": 0.2878, "step": 15645 }, { "epoch": 0.7761297683416836, "grad_norm": 6.6840291023254395, "learning_rate": 1.2095852461233565e-06, "loss": 0.2862, "step": 15646 }, { "epoch": 0.7761793739768837, "grad_norm": 9.895174980163574, "learning_rate": 1.2090719856731203e-06, "loss": 0.3722, "step": 15647 }, { "epoch": 0.7762289796120839, "grad_norm": 16.158405303955078, "learning_rate": 1.208558819163797e-06, "loss": 0.4645, "step": 15648 }, { "epoch": 0.7762785852472841, "grad_norm": 6.2287468910217285, "learning_rate": 1.2080457466080996e-06, "loss": 0.252, "step": 15649 }, { "epoch": 0.7763281908824843, "grad_norm": 13.247929573059082, "learning_rate": 1.2075327680187444e-06, "loss": 0.3223, "step": 15650 }, { "epoch": 0.7763777965176845, "grad_norm": 5.391646862030029, "learning_rate": 1.2070198834084417e-06, "loss": 0.3493, "step": 15651 }, { "epoch": 0.7764274021528845, "grad_norm": 7.8400468826293945, "learning_rate": 1.206507092789903e-06, "loss": 0.3561, "step": 15652 }, { "epoch": 0.7764770077880847, "grad_norm": 8.183643341064453, "learning_rate": 1.205994396175834e-06, "loss": 0.3357, "step": 15653 }, { "epoch": 0.7765266134232849, "grad_norm": 8.292891502380371, "learning_rate": 1.2054817935789403e-06, "loss": 0.2859, "step": 15654 }, { "epoch": 0.7765762190584851, "grad_norm": 6.311578273773193, "learning_rate": 1.2049692850119243e-06, "loss": 0.3606, "step": 15655 }, { "epoch": 0.7766258246936852, "grad_norm": 31.354150772094727, "learning_rate": 1.204456870487486e-06, "loss": 0.2372, "step": 15656 }, { "epoch": 0.7766754303288853, "grad_norm": 9.271430969238281, "learning_rate": 1.2039445500183243e-06, "loss": 0.3402, "step": 15657 }, { "epoch": 0.7767250359640855, "grad_norm": 7.362812519073486, "learning_rate": 1.203432323617133e-06, "loss": 0.2404, "step": 15658 }, { "epoch": 0.7767746415992857, "grad_norm": 9.192140579223633, "learning_rate": 1.202920191296606e-06, "loss": 0.2811, "step": 15659 }, { "epoch": 0.7768242472344858, "grad_norm": 9.152054786682129, "learning_rate": 1.2024081530694342e-06, "loss": 0.3687, "step": 15660 }, { "epoch": 0.776873852869686, "grad_norm": 7.855478763580322, "learning_rate": 1.201896208948306e-06, "loss": 0.3058, "step": 15661 }, { "epoch": 0.7769234585048862, "grad_norm": 7.802514553070068, "learning_rate": 1.2013843589459078e-06, "loss": 0.2236, "step": 15662 }, { "epoch": 0.7769730641400863, "grad_norm": 7.187837600708008, "learning_rate": 1.2008726030749252e-06, "loss": 0.2, "step": 15663 }, { "epoch": 0.7770226697752864, "grad_norm": 6.310245037078857, "learning_rate": 1.2003609413480349e-06, "loss": 0.274, "step": 15664 }, { "epoch": 0.7770722754104866, "grad_norm": 9.20567512512207, "learning_rate": 1.1998493737779227e-06, "loss": 0.3415, "step": 15665 }, { "epoch": 0.7771218810456868, "grad_norm": 7.781176567077637, "learning_rate": 1.1993379003772597e-06, "loss": 0.2707, "step": 15666 }, { "epoch": 0.777171486680887, "grad_norm": 5.059839725494385, "learning_rate": 1.1988265211587236e-06, "loss": 0.2094, "step": 15667 }, { "epoch": 0.7772210923160872, "grad_norm": 8.603282928466797, "learning_rate": 1.1983152361349853e-06, "loss": 0.26, "step": 15668 }, { "epoch": 0.7772706979512872, "grad_norm": 13.896211624145508, "learning_rate": 1.1978040453187162e-06, "loss": 0.3193, "step": 15669 }, { "epoch": 0.7773203035864874, "grad_norm": 5.975171089172363, "learning_rate": 1.1972929487225803e-06, "loss": 0.2976, "step": 15670 }, { "epoch": 0.7773699092216876, "grad_norm": 5.886837005615234, "learning_rate": 1.1967819463592483e-06, "loss": 0.2637, "step": 15671 }, { "epoch": 0.7774195148568878, "grad_norm": 5.8337883949279785, "learning_rate": 1.1962710382413773e-06, "loss": 0.3385, "step": 15672 }, { "epoch": 0.7774691204920879, "grad_norm": 11.672037124633789, "learning_rate": 1.195760224381633e-06, "loss": 0.3368, "step": 15673 }, { "epoch": 0.777518726127288, "grad_norm": 6.039656639099121, "learning_rate": 1.19524950479267e-06, "loss": 0.1707, "step": 15674 }, { "epoch": 0.7775683317624882, "grad_norm": 4.832677841186523, "learning_rate": 1.1947388794871457e-06, "loss": 0.252, "step": 15675 }, { "epoch": 0.7776179373976884, "grad_norm": 4.8973388671875, "learning_rate": 1.1942283484777129e-06, "loss": 0.2721, "step": 15676 }, { "epoch": 0.7776675430328885, "grad_norm": 7.138679027557373, "learning_rate": 1.1937179117770242e-06, "loss": 0.2616, "step": 15677 }, { "epoch": 0.7777171486680887, "grad_norm": 5.855010032653809, "learning_rate": 1.1932075693977253e-06, "loss": 0.2782, "step": 15678 }, { "epoch": 0.7777667543032889, "grad_norm": 11.115769386291504, "learning_rate": 1.1926973213524673e-06, "loss": 0.3594, "step": 15679 }, { "epoch": 0.777816359938489, "grad_norm": 6.073052883148193, "learning_rate": 1.1921871676538905e-06, "loss": 0.2285, "step": 15680 }, { "epoch": 0.7778659655736891, "grad_norm": 6.58043098449707, "learning_rate": 1.1916771083146377e-06, "loss": 0.318, "step": 15681 }, { "epoch": 0.7779155712088893, "grad_norm": 6.04773473739624, "learning_rate": 1.1911671433473492e-06, "loss": 0.2173, "step": 15682 }, { "epoch": 0.7779651768440895, "grad_norm": 4.281455039978027, "learning_rate": 1.1906572727646638e-06, "loss": 0.2638, "step": 15683 }, { "epoch": 0.7780147824792897, "grad_norm": 7.972263336181641, "learning_rate": 1.1901474965792108e-06, "loss": 0.2568, "step": 15684 }, { "epoch": 0.7780643881144899, "grad_norm": 7.347192764282227, "learning_rate": 1.1896378148036292e-06, "loss": 0.3596, "step": 15685 }, { "epoch": 0.7781139937496899, "grad_norm": 12.894655227661133, "learning_rate": 1.189128227450545e-06, "loss": 0.2332, "step": 15686 }, { "epoch": 0.7781635993848901, "grad_norm": 6.876845359802246, "learning_rate": 1.1886187345325873e-06, "loss": 0.2741, "step": 15687 }, { "epoch": 0.7782132050200903, "grad_norm": 7.022755146026611, "learning_rate": 1.1881093360623807e-06, "loss": 0.2886, "step": 15688 }, { "epoch": 0.7782628106552905, "grad_norm": 4.627801418304443, "learning_rate": 1.1876000320525493e-06, "loss": 0.2101, "step": 15689 }, { "epoch": 0.7783124162904906, "grad_norm": 4.871049880981445, "learning_rate": 1.187090822515714e-06, "loss": 0.2167, "step": 15690 }, { "epoch": 0.7783620219256907, "grad_norm": 8.545029640197754, "learning_rate": 1.1865817074644943e-06, "loss": 0.3741, "step": 15691 }, { "epoch": 0.7784116275608909, "grad_norm": 5.330714225769043, "learning_rate": 1.1860726869115019e-06, "loss": 0.2985, "step": 15692 }, { "epoch": 0.7784612331960911, "grad_norm": 8.124597549438477, "learning_rate": 1.1855637608693565e-06, "loss": 0.2703, "step": 15693 }, { "epoch": 0.7785108388312912, "grad_norm": 9.94852066040039, "learning_rate": 1.1850549293506646e-06, "loss": 0.3582, "step": 15694 }, { "epoch": 0.7785604444664914, "grad_norm": 6.374736785888672, "learning_rate": 1.1845461923680378e-06, "loss": 0.2473, "step": 15695 }, { "epoch": 0.7786100501016916, "grad_norm": 8.424836158752441, "learning_rate": 1.1840375499340823e-06, "loss": 0.2833, "step": 15696 }, { "epoch": 0.7786596557368917, "grad_norm": 9.676322937011719, "learning_rate": 1.1835290020614037e-06, "loss": 0.2674, "step": 15697 }, { "epoch": 0.7787092613720918, "grad_norm": 7.352841377258301, "learning_rate": 1.1830205487626e-06, "loss": 0.3237, "step": 15698 }, { "epoch": 0.778758867007292, "grad_norm": 6.614875793457031, "learning_rate": 1.1825121900502761e-06, "loss": 0.2639, "step": 15699 }, { "epoch": 0.7788084726424922, "grad_norm": 6.310088634490967, "learning_rate": 1.1820039259370264e-06, "loss": 0.3077, "step": 15700 }, { "epoch": 0.7788580782776924, "grad_norm": 4.988760471343994, "learning_rate": 1.1814957564354458e-06, "loss": 0.2645, "step": 15701 }, { "epoch": 0.7789076839128926, "grad_norm": 15.706215858459473, "learning_rate": 1.180987681558128e-06, "loss": 0.3503, "step": 15702 }, { "epoch": 0.7789572895480926, "grad_norm": 6.923367023468018, "learning_rate": 1.1804797013176627e-06, "loss": 0.3134, "step": 15703 }, { "epoch": 0.7790068951832928, "grad_norm": 5.231258392333984, "learning_rate": 1.1799718157266387e-06, "loss": 0.2253, "step": 15704 }, { "epoch": 0.779056500818493, "grad_norm": 7.666778087615967, "learning_rate": 1.1794640247976408e-06, "loss": 0.3318, "step": 15705 }, { "epoch": 0.7791061064536932, "grad_norm": 16.765277862548828, "learning_rate": 1.178956328543252e-06, "loss": 0.4051, "step": 15706 }, { "epoch": 0.7791557120888933, "grad_norm": 14.251544952392578, "learning_rate": 1.178448726976056e-06, "loss": 0.4837, "step": 15707 }, { "epoch": 0.7792053177240934, "grad_norm": 12.496367454528809, "learning_rate": 1.1779412201086277e-06, "loss": 0.3237, "step": 15708 }, { "epoch": 0.7792549233592936, "grad_norm": 8.745352745056152, "learning_rate": 1.1774338079535447e-06, "loss": 0.3448, "step": 15709 }, { "epoch": 0.7793045289944938, "grad_norm": 6.621058464050293, "learning_rate": 1.1769264905233813e-06, "loss": 0.2281, "step": 15710 }, { "epoch": 0.7793541346296939, "grad_norm": 11.706988334655762, "learning_rate": 1.1764192678307085e-06, "loss": 0.4279, "step": 15711 }, { "epoch": 0.7794037402648941, "grad_norm": 4.256229877471924, "learning_rate": 1.1759121398880958e-06, "loss": 0.238, "step": 15712 }, { "epoch": 0.7794533459000943, "grad_norm": 5.917830944061279, "learning_rate": 1.1754051067081101e-06, "loss": 0.323, "step": 15713 }, { "epoch": 0.7795029515352944, "grad_norm": 4.765740394592285, "learning_rate": 1.1748981683033177e-06, "loss": 0.2303, "step": 15714 }, { "epoch": 0.7795525571704945, "grad_norm": 6.810046195983887, "learning_rate": 1.1743913246862758e-06, "loss": 0.3439, "step": 15715 }, { "epoch": 0.7796021628056947, "grad_norm": 5.776363372802734, "learning_rate": 1.1738845758695504e-06, "loss": 0.1955, "step": 15716 }, { "epoch": 0.7796517684408949, "grad_norm": 14.804827690124512, "learning_rate": 1.1733779218656943e-06, "loss": 0.3043, "step": 15717 }, { "epoch": 0.7797013740760951, "grad_norm": 4.170810222625732, "learning_rate": 1.1728713626872644e-06, "loss": 0.2643, "step": 15718 }, { "epoch": 0.7797509797112953, "grad_norm": 10.94799518585205, "learning_rate": 1.172364898346814e-06, "loss": 0.3165, "step": 15719 }, { "epoch": 0.7798005853464953, "grad_norm": 10.52203369140625, "learning_rate": 1.1718585288568935e-06, "loss": 0.2547, "step": 15720 }, { "epoch": 0.7798501909816955, "grad_norm": 8.546582221984863, "learning_rate": 1.1713522542300475e-06, "loss": 0.3473, "step": 15721 }, { "epoch": 0.7798997966168957, "grad_norm": 5.9453511238098145, "learning_rate": 1.1708460744788274e-06, "loss": 0.2572, "step": 15722 }, { "epoch": 0.7799494022520959, "grad_norm": 9.9717435836792, "learning_rate": 1.1703399896157724e-06, "loss": 0.3249, "step": 15723 }, { "epoch": 0.779999007887296, "grad_norm": 6.828241348266602, "learning_rate": 1.1698339996534247e-06, "loss": 0.2641, "step": 15724 }, { "epoch": 0.7800486135224961, "grad_norm": 3.0863513946533203, "learning_rate": 1.1693281046043231e-06, "loss": 0.2388, "step": 15725 }, { "epoch": 0.7800982191576963, "grad_norm": 13.173210144042969, "learning_rate": 1.1688223044810037e-06, "loss": 0.3484, "step": 15726 }, { "epoch": 0.7801478247928965, "grad_norm": 4.973415374755859, "learning_rate": 1.168316599296001e-06, "loss": 0.2829, "step": 15727 }, { "epoch": 0.7801974304280966, "grad_norm": 10.971385955810547, "learning_rate": 1.1678109890618472e-06, "loss": 0.2332, "step": 15728 }, { "epoch": 0.7802470360632968, "grad_norm": 4.05854606628418, "learning_rate": 1.1673054737910683e-06, "loss": 0.2231, "step": 15729 }, { "epoch": 0.780296641698497, "grad_norm": 6.824887752532959, "learning_rate": 1.166800053496196e-06, "loss": 0.2354, "step": 15730 }, { "epoch": 0.7803462473336971, "grad_norm": 5.734982490539551, "learning_rate": 1.1662947281897507e-06, "loss": 0.195, "step": 15731 }, { "epoch": 0.7803958529688972, "grad_norm": 5.826078414916992, "learning_rate": 1.1657894978842565e-06, "loss": 0.3359, "step": 15732 }, { "epoch": 0.7804454586040974, "grad_norm": 9.27225399017334, "learning_rate": 1.1652843625922328e-06, "loss": 0.3503, "step": 15733 }, { "epoch": 0.7804950642392976, "grad_norm": 5.808552265167236, "learning_rate": 1.1647793223261987e-06, "loss": 0.2715, "step": 15734 }, { "epoch": 0.7805446698744978, "grad_norm": 6.40712308883667, "learning_rate": 1.1642743770986653e-06, "loss": 0.2922, "step": 15735 }, { "epoch": 0.780594275509698, "grad_norm": 11.440826416015625, "learning_rate": 1.1637695269221506e-06, "loss": 0.2198, "step": 15736 }, { "epoch": 0.780643881144898, "grad_norm": 7.712733268737793, "learning_rate": 1.1632647718091606e-06, "loss": 0.2816, "step": 15737 }, { "epoch": 0.7806934867800982, "grad_norm": 4.320632457733154, "learning_rate": 1.1627601117722054e-06, "loss": 0.2206, "step": 15738 }, { "epoch": 0.7807430924152984, "grad_norm": 6.104265213012695, "learning_rate": 1.1622555468237901e-06, "loss": 0.3091, "step": 15739 }, { "epoch": 0.7807926980504986, "grad_norm": 8.304536819458008, "learning_rate": 1.16175107697642e-06, "loss": 0.3554, "step": 15740 }, { "epoch": 0.7808423036856987, "grad_norm": 6.030738830566406, "learning_rate": 1.1612467022425917e-06, "loss": 0.3126, "step": 15741 }, { "epoch": 0.7808919093208988, "grad_norm": 7.145168781280518, "learning_rate": 1.1607424226348092e-06, "loss": 0.1951, "step": 15742 }, { "epoch": 0.780941514956099, "grad_norm": 6.254945755004883, "learning_rate": 1.1602382381655635e-06, "loss": 0.3323, "step": 15743 }, { "epoch": 0.7809911205912992, "grad_norm": 16.289766311645508, "learning_rate": 1.159734148847354e-06, "loss": 0.338, "step": 15744 }, { "epoch": 0.7810407262264993, "grad_norm": 12.096281051635742, "learning_rate": 1.1592301546926677e-06, "loss": 0.3144, "step": 15745 }, { "epoch": 0.7810903318616995, "grad_norm": 7.35152530670166, "learning_rate": 1.1587262557139961e-06, "loss": 0.2864, "step": 15746 }, { "epoch": 0.7811399374968997, "grad_norm": 6.83385705947876, "learning_rate": 1.158222451923825e-06, "loss": 0.3132, "step": 15747 }, { "epoch": 0.7811895431320998, "grad_norm": 4.136350631713867, "learning_rate": 1.157718743334641e-06, "loss": 0.2642, "step": 15748 }, { "epoch": 0.7812391487672999, "grad_norm": 11.59219741821289, "learning_rate": 1.1572151299589212e-06, "loss": 0.2858, "step": 15749 }, { "epoch": 0.7812887544025001, "grad_norm": 6.281523704528809, "learning_rate": 1.1567116118091525e-06, "loss": 0.2758, "step": 15750 }, { "epoch": 0.7813383600377003, "grad_norm": 9.572088241577148, "learning_rate": 1.1562081888978065e-06, "loss": 0.3963, "step": 15751 }, { "epoch": 0.7813879656729005, "grad_norm": 4.618424415588379, "learning_rate": 1.1557048612373606e-06, "loss": 0.2083, "step": 15752 }, { "epoch": 0.7814375713081007, "grad_norm": 6.733640670776367, "learning_rate": 1.1552016288402873e-06, "loss": 0.2758, "step": 15753 }, { "epoch": 0.7814871769433007, "grad_norm": 5.121525287628174, "learning_rate": 1.1546984917190567e-06, "loss": 0.2499, "step": 15754 }, { "epoch": 0.7815367825785009, "grad_norm": 17.01059913635254, "learning_rate": 1.1541954498861369e-06, "loss": 0.3004, "step": 15755 }, { "epoch": 0.7815863882137011, "grad_norm": 4.409400939941406, "learning_rate": 1.1536925033539948e-06, "loss": 0.2208, "step": 15756 }, { "epoch": 0.7816359938489013, "grad_norm": 13.267826080322266, "learning_rate": 1.1531896521350911e-06, "loss": 0.3329, "step": 15757 }, { "epoch": 0.7816855994841014, "grad_norm": 4.961825847625732, "learning_rate": 1.152686896241888e-06, "loss": 0.1765, "step": 15758 }, { "epoch": 0.7817352051193015, "grad_norm": 6.421370506286621, "learning_rate": 1.1521842356868434e-06, "loss": 0.2604, "step": 15759 }, { "epoch": 0.7817848107545017, "grad_norm": 5.563917636871338, "learning_rate": 1.1516816704824146e-06, "loss": 0.3143, "step": 15760 }, { "epoch": 0.7818344163897019, "grad_norm": 8.777959823608398, "learning_rate": 1.1511792006410545e-06, "loss": 0.2613, "step": 15761 }, { "epoch": 0.781884022024902, "grad_norm": 7.891472816467285, "learning_rate": 1.150676826175215e-06, "loss": 0.2452, "step": 15762 }, { "epoch": 0.7819336276601022, "grad_norm": 7.370481967926025, "learning_rate": 1.1501745470973447e-06, "loss": 0.3596, "step": 15763 }, { "epoch": 0.7819832332953024, "grad_norm": 4.613305568695068, "learning_rate": 1.149672363419891e-06, "loss": 0.2125, "step": 15764 }, { "epoch": 0.7820328389305025, "grad_norm": 5.179397106170654, "learning_rate": 1.1491702751552986e-06, "loss": 0.2361, "step": 15765 }, { "epoch": 0.7820824445657026, "grad_norm": 6.186812877655029, "learning_rate": 1.1486682823160077e-06, "loss": 0.2695, "step": 15766 }, { "epoch": 0.7821320502009028, "grad_norm": 6.939642429351807, "learning_rate": 1.1481663849144592e-06, "loss": 0.2182, "step": 15767 }, { "epoch": 0.782181655836103, "grad_norm": 8.90198802947998, "learning_rate": 1.1476645829630895e-06, "loss": 0.2738, "step": 15768 }, { "epoch": 0.7822312614713032, "grad_norm": 6.3086724281311035, "learning_rate": 1.1471628764743342e-06, "loss": 0.2188, "step": 15769 }, { "epoch": 0.7822808671065034, "grad_norm": 6.371999263763428, "learning_rate": 1.1466612654606258e-06, "loss": 0.3742, "step": 15770 }, { "epoch": 0.7823304727417034, "grad_norm": 8.267860412597656, "learning_rate": 1.1461597499343956e-06, "loss": 0.3006, "step": 15771 }, { "epoch": 0.7823800783769036, "grad_norm": 7.16400146484375, "learning_rate": 1.1456583299080676e-06, "loss": 0.2538, "step": 15772 }, { "epoch": 0.7824296840121038, "grad_norm": 3.762150526046753, "learning_rate": 1.145157005394072e-06, "loss": 0.2295, "step": 15773 }, { "epoch": 0.782479289647304, "grad_norm": 7.145707607269287, "learning_rate": 1.1446557764048283e-06, "loss": 0.2374, "step": 15774 }, { "epoch": 0.7825288952825041, "grad_norm": 7.213411331176758, "learning_rate": 1.144154642952759e-06, "loss": 0.3415, "step": 15775 }, { "epoch": 0.7825785009177042, "grad_norm": 9.29210090637207, "learning_rate": 1.143653605050281e-06, "loss": 0.3257, "step": 15776 }, { "epoch": 0.7826281065529044, "grad_norm": 5.241973876953125, "learning_rate": 1.143152662709811e-06, "loss": 0.247, "step": 15777 }, { "epoch": 0.7826777121881046, "grad_norm": 6.680607318878174, "learning_rate": 1.1426518159437634e-06, "loss": 0.2213, "step": 15778 }, { "epoch": 0.7827273178233047, "grad_norm": 12.4230318069458, "learning_rate": 1.1421510647645495e-06, "loss": 0.4875, "step": 15779 }, { "epoch": 0.7827769234585049, "grad_norm": 7.872467517852783, "learning_rate": 1.1416504091845748e-06, "loss": 0.3138, "step": 15780 }, { "epoch": 0.7828265290937051, "grad_norm": 4.432863235473633, "learning_rate": 1.141149849216251e-06, "loss": 0.3172, "step": 15781 }, { "epoch": 0.7828761347289052, "grad_norm": 5.59531831741333, "learning_rate": 1.1406493848719784e-06, "loss": 0.2698, "step": 15782 }, { "epoch": 0.7829257403641053, "grad_norm": 5.761030197143555, "learning_rate": 1.1401490161641593e-06, "loss": 0.2371, "step": 15783 }, { "epoch": 0.7829753459993055, "grad_norm": 12.607571601867676, "learning_rate": 1.1396487431051938e-06, "loss": 0.3862, "step": 15784 }, { "epoch": 0.7830249516345057, "grad_norm": 7.173558712005615, "learning_rate": 1.1391485657074802e-06, "loss": 0.3824, "step": 15785 }, { "epoch": 0.7830745572697059, "grad_norm": 4.588012218475342, "learning_rate": 1.1386484839834084e-06, "loss": 0.2479, "step": 15786 }, { "epoch": 0.783124162904906, "grad_norm": 7.183777332305908, "learning_rate": 1.138148497945377e-06, "loss": 0.3015, "step": 15787 }, { "epoch": 0.7831737685401061, "grad_norm": 6.661467552185059, "learning_rate": 1.1376486076057714e-06, "loss": 0.2526, "step": 15788 }, { "epoch": 0.7832233741753063, "grad_norm": 9.266399383544922, "learning_rate": 1.1371488129769804e-06, "loss": 0.3547, "step": 15789 }, { "epoch": 0.7832729798105065, "grad_norm": 5.251467704772949, "learning_rate": 1.1366491140713887e-06, "loss": 0.1321, "step": 15790 }, { "epoch": 0.7833225854457067, "grad_norm": 7.353694915771484, "learning_rate": 1.1361495109013814e-06, "loss": 0.3572, "step": 15791 }, { "epoch": 0.7833721910809068, "grad_norm": 4.512121200561523, "learning_rate": 1.1356500034793338e-06, "loss": 0.2695, "step": 15792 }, { "epoch": 0.7834217967161069, "grad_norm": 6.388039588928223, "learning_rate": 1.1351505918176303e-06, "loss": 0.282, "step": 15793 }, { "epoch": 0.7834714023513071, "grad_norm": 10.396442413330078, "learning_rate": 1.1346512759286421e-06, "loss": 0.2856, "step": 15794 }, { "epoch": 0.7835210079865073, "grad_norm": 4.786355495452881, "learning_rate": 1.1341520558247438e-06, "loss": 0.2392, "step": 15795 }, { "epoch": 0.7835706136217074, "grad_norm": 10.335394859313965, "learning_rate": 1.133652931518306e-06, "loss": 0.3791, "step": 15796 }, { "epoch": 0.7836202192569076, "grad_norm": 9.680686950683594, "learning_rate": 1.1331539030216976e-06, "loss": 0.3999, "step": 15797 }, { "epoch": 0.7836698248921078, "grad_norm": 10.145872116088867, "learning_rate": 1.1326549703472845e-06, "loss": 0.3575, "step": 15798 }, { "epoch": 0.7837194305273079, "grad_norm": 4.132721900939941, "learning_rate": 1.1321561335074322e-06, "loss": 0.2074, "step": 15799 }, { "epoch": 0.783769036162508, "grad_norm": 9.779601097106934, "learning_rate": 1.1316573925144981e-06, "loss": 0.3149, "step": 15800 }, { "epoch": 0.7838186417977082, "grad_norm": 5.669146537780762, "learning_rate": 1.1311587473808455e-06, "loss": 0.284, "step": 15801 }, { "epoch": 0.7838682474329084, "grad_norm": 8.167329788208008, "learning_rate": 1.1306601981188286e-06, "loss": 0.3275, "step": 15802 }, { "epoch": 0.7839178530681086, "grad_norm": 7.73535680770874, "learning_rate": 1.130161744740802e-06, "loss": 0.3374, "step": 15803 }, { "epoch": 0.7839674587033088, "grad_norm": 7.101192474365234, "learning_rate": 1.1296633872591178e-06, "loss": 0.2865, "step": 15804 }, { "epoch": 0.7840170643385088, "grad_norm": 9.803021430969238, "learning_rate": 1.1291651256861275e-06, "loss": 0.2349, "step": 15805 }, { "epoch": 0.784066669973709, "grad_norm": 7.177768230438232, "learning_rate": 1.1286669600341732e-06, "loss": 0.3059, "step": 15806 }, { "epoch": 0.7841162756089092, "grad_norm": 8.70563793182373, "learning_rate": 1.1281688903156052e-06, "loss": 0.3618, "step": 15807 }, { "epoch": 0.7841658812441094, "grad_norm": 8.320792198181152, "learning_rate": 1.127670916542763e-06, "loss": 0.2937, "step": 15808 }, { "epoch": 0.7842154868793095, "grad_norm": 6.208896160125732, "learning_rate": 1.1271730387279867e-06, "loss": 0.296, "step": 15809 }, { "epoch": 0.7842650925145096, "grad_norm": 10.313802719116211, "learning_rate": 1.1266752568836142e-06, "loss": 0.3579, "step": 15810 }, { "epoch": 0.7843146981497098, "grad_norm": 6.961588382720947, "learning_rate": 1.1261775710219807e-06, "loss": 0.306, "step": 15811 }, { "epoch": 0.78436430378491, "grad_norm": 5.298364162445068, "learning_rate": 1.1256799811554197e-06, "loss": 0.2421, "step": 15812 }, { "epoch": 0.7844139094201101, "grad_norm": 5.2483320236206055, "learning_rate": 1.1251824872962614e-06, "loss": 0.1971, "step": 15813 }, { "epoch": 0.7844635150553103, "grad_norm": 5.841900825500488, "learning_rate": 1.124685089456834e-06, "loss": 0.2736, "step": 15814 }, { "epoch": 0.7845131206905105, "grad_norm": 14.434966087341309, "learning_rate": 1.1241877876494638e-06, "loss": 0.3892, "step": 15815 }, { "epoch": 0.7845627263257106, "grad_norm": 5.270287990570068, "learning_rate": 1.1236905818864725e-06, "loss": 0.1705, "step": 15816 }, { "epoch": 0.7846123319609107, "grad_norm": 13.000925064086914, "learning_rate": 1.1231934721801818e-06, "loss": 0.3555, "step": 15817 }, { "epoch": 0.7846619375961109, "grad_norm": 7.06099271774292, "learning_rate": 1.1226964585429112e-06, "loss": 0.2561, "step": 15818 }, { "epoch": 0.7847115432313111, "grad_norm": 5.24870491027832, "learning_rate": 1.1221995409869758e-06, "loss": 0.1465, "step": 15819 }, { "epoch": 0.7847611488665113, "grad_norm": 6.1161041259765625, "learning_rate": 1.1217027195246893e-06, "loss": 0.271, "step": 15820 }, { "epoch": 0.7848107545017114, "grad_norm": 7.100076675415039, "learning_rate": 1.1212059941683644e-06, "loss": 0.3476, "step": 15821 }, { "epoch": 0.7848603601369115, "grad_norm": 6.911825656890869, "learning_rate": 1.1207093649303107e-06, "loss": 0.327, "step": 15822 }, { "epoch": 0.7849099657721117, "grad_norm": 6.63205099105835, "learning_rate": 1.120212831822831e-06, "loss": 0.2501, "step": 15823 }, { "epoch": 0.7849595714073119, "grad_norm": 7.532732009887695, "learning_rate": 1.1197163948582356e-06, "loss": 0.3183, "step": 15824 }, { "epoch": 0.785009177042512, "grad_norm": 5.6744303703308105, "learning_rate": 1.1192200540488212e-06, "loss": 0.279, "step": 15825 }, { "epoch": 0.7850587826777122, "grad_norm": 7.048790454864502, "learning_rate": 1.1187238094068897e-06, "loss": 0.3281, "step": 15826 }, { "epoch": 0.7851083883129123, "grad_norm": 9.857754707336426, "learning_rate": 1.1182276609447379e-06, "loss": 0.3564, "step": 15827 }, { "epoch": 0.7851579939481125, "grad_norm": 4.505868434906006, "learning_rate": 1.1177316086746615e-06, "loss": 0.302, "step": 15828 }, { "epoch": 0.7852075995833127, "grad_norm": 3.865710496902466, "learning_rate": 1.1172356526089496e-06, "loss": 0.2193, "step": 15829 }, { "epoch": 0.7852572052185128, "grad_norm": 10.642848014831543, "learning_rate": 1.1167397927598972e-06, "loss": 0.3445, "step": 15830 }, { "epoch": 0.785306810853713, "grad_norm": 16.25092124938965, "learning_rate": 1.1162440291397864e-06, "loss": 0.4041, "step": 15831 }, { "epoch": 0.7853564164889132, "grad_norm": 6.3023362159729, "learning_rate": 1.1157483617609083e-06, "loss": 0.2621, "step": 15832 }, { "epoch": 0.7854060221241133, "grad_norm": 8.4259033203125, "learning_rate": 1.1152527906355415e-06, "loss": 0.317, "step": 15833 }, { "epoch": 0.7854556277593134, "grad_norm": 7.969463348388672, "learning_rate": 1.114757315775968e-06, "loss": 0.2439, "step": 15834 }, { "epoch": 0.7855052333945136, "grad_norm": 10.824190139770508, "learning_rate": 1.1142619371944658e-06, "loss": 0.3121, "step": 15835 }, { "epoch": 0.7855548390297138, "grad_norm": 5.438721656799316, "learning_rate": 1.1137666549033115e-06, "loss": 0.1864, "step": 15836 }, { "epoch": 0.785604444664914, "grad_norm": 4.313404560089111, "learning_rate": 1.1132714689147745e-06, "loss": 0.2439, "step": 15837 }, { "epoch": 0.7856540503001141, "grad_norm": 5.087368011474609, "learning_rate": 1.1127763792411318e-06, "loss": 0.2914, "step": 15838 }, { "epoch": 0.7857036559353142, "grad_norm": 6.968357563018799, "learning_rate": 1.1122813858946475e-06, "loss": 0.2386, "step": 15839 }, { "epoch": 0.7857532615705144, "grad_norm": 8.414191246032715, "learning_rate": 1.1117864888875885e-06, "loss": 0.2479, "step": 15840 }, { "epoch": 0.7858028672057146, "grad_norm": 8.025920867919922, "learning_rate": 1.1112916882322194e-06, "loss": 0.2889, "step": 15841 }, { "epoch": 0.7858524728409148, "grad_norm": 6.483920097351074, "learning_rate": 1.1107969839408029e-06, "loss": 0.2447, "step": 15842 }, { "epoch": 0.7859020784761149, "grad_norm": 5.669736862182617, "learning_rate": 1.1103023760255939e-06, "loss": 0.2276, "step": 15843 }, { "epoch": 0.785951684111315, "grad_norm": 7.059454441070557, "learning_rate": 1.1098078644988536e-06, "loss": 0.3379, "step": 15844 }, { "epoch": 0.7860012897465152, "grad_norm": 6.266445159912109, "learning_rate": 1.109313449372833e-06, "loss": 0.2853, "step": 15845 }, { "epoch": 0.7860508953817154, "grad_norm": 8.827827453613281, "learning_rate": 1.1088191306597855e-06, "loss": 0.3299, "step": 15846 }, { "epoch": 0.7861005010169155, "grad_norm": 12.592585563659668, "learning_rate": 1.1083249083719599e-06, "loss": 0.3755, "step": 15847 }, { "epoch": 0.7861501066521157, "grad_norm": 6.417346000671387, "learning_rate": 1.1078307825216044e-06, "loss": 0.2311, "step": 15848 }, { "epoch": 0.7861997122873159, "grad_norm": 8.603357315063477, "learning_rate": 1.1073367531209605e-06, "loss": 0.2688, "step": 15849 }, { "epoch": 0.786249317922516, "grad_norm": 10.04808521270752, "learning_rate": 1.106842820182275e-06, "loss": 0.273, "step": 15850 }, { "epoch": 0.7862989235577161, "grad_norm": 9.627153396606445, "learning_rate": 1.1063489837177832e-06, "loss": 0.2476, "step": 15851 }, { "epoch": 0.7863485291929163, "grad_norm": 4.649212837219238, "learning_rate": 1.1058552437397275e-06, "loss": 0.279, "step": 15852 }, { "epoch": 0.7863981348281165, "grad_norm": 4.26762580871582, "learning_rate": 1.105361600260339e-06, "loss": 0.2579, "step": 15853 }, { "epoch": 0.7864477404633167, "grad_norm": 6.35495662689209, "learning_rate": 1.1048680532918516e-06, "loss": 0.3719, "step": 15854 }, { "epoch": 0.7864973460985167, "grad_norm": 7.607232093811035, "learning_rate": 1.1043746028464959e-06, "loss": 0.3019, "step": 15855 }, { "epoch": 0.7865469517337169, "grad_norm": 11.67849063873291, "learning_rate": 1.1038812489365013e-06, "loss": 0.2286, "step": 15856 }, { "epoch": 0.7865965573689171, "grad_norm": 9.024506568908691, "learning_rate": 1.1033879915740887e-06, "loss": 0.3447, "step": 15857 }, { "epoch": 0.7866461630041173, "grad_norm": 19.7503662109375, "learning_rate": 1.102894830771487e-06, "loss": 0.3665, "step": 15858 }, { "epoch": 0.7866957686393174, "grad_norm": 13.25075626373291, "learning_rate": 1.1024017665409132e-06, "loss": 0.4004, "step": 15859 }, { "epoch": 0.7867453742745176, "grad_norm": 10.050813674926758, "learning_rate": 1.1019087988945864e-06, "loss": 0.4152, "step": 15860 }, { "epoch": 0.7867949799097177, "grad_norm": 21.095565795898438, "learning_rate": 1.101415927844723e-06, "loss": 0.2822, "step": 15861 }, { "epoch": 0.7868445855449179, "grad_norm": 4.462348461151123, "learning_rate": 1.1009231534035364e-06, "loss": 0.265, "step": 15862 }, { "epoch": 0.786894191180118, "grad_norm": 4.901373386383057, "learning_rate": 1.1004304755832379e-06, "loss": 0.2681, "step": 15863 }, { "epoch": 0.7869437968153182, "grad_norm": 6.153973579406738, "learning_rate": 1.0999378943960375e-06, "loss": 0.2475, "step": 15864 }, { "epoch": 0.7869934024505184, "grad_norm": 10.757072448730469, "learning_rate": 1.099445409854139e-06, "loss": 0.3198, "step": 15865 }, { "epoch": 0.7870430080857186, "grad_norm": 8.103418350219727, "learning_rate": 1.0989530219697476e-06, "loss": 0.3325, "step": 15866 }, { "epoch": 0.7870926137209187, "grad_norm": 5.179365634918213, "learning_rate": 1.0984607307550649e-06, "loss": 0.2917, "step": 15867 }, { "epoch": 0.7871422193561188, "grad_norm": 5.591500759124756, "learning_rate": 1.0979685362222897e-06, "loss": 0.2424, "step": 15868 }, { "epoch": 0.787191824991319, "grad_norm": 5.020756244659424, "learning_rate": 1.0974764383836201e-06, "loss": 0.271, "step": 15869 }, { "epoch": 0.7872414306265192, "grad_norm": 4.0408525466918945, "learning_rate": 1.0969844372512488e-06, "loss": 0.1864, "step": 15870 }, { "epoch": 0.7872910362617194, "grad_norm": 9.98590087890625, "learning_rate": 1.0964925328373694e-06, "loss": 0.3611, "step": 15871 }, { "epoch": 0.7873406418969194, "grad_norm": 13.761107444763184, "learning_rate": 1.0960007251541705e-06, "loss": 0.3214, "step": 15872 }, { "epoch": 0.7873902475321196, "grad_norm": 6.0244460105896, "learning_rate": 1.095509014213841e-06, "loss": 0.2426, "step": 15873 }, { "epoch": 0.7874398531673198, "grad_norm": 5.450096130371094, "learning_rate": 1.095017400028563e-06, "loss": 0.1461, "step": 15874 }, { "epoch": 0.78748945880252, "grad_norm": 6.706722736358643, "learning_rate": 1.0945258826105199e-06, "loss": 0.249, "step": 15875 }, { "epoch": 0.7875390644377201, "grad_norm": 5.198423385620117, "learning_rate": 1.0940344619718924e-06, "loss": 0.2576, "step": 15876 }, { "epoch": 0.7875886700729203, "grad_norm": 9.990852355957031, "learning_rate": 1.093543138124858e-06, "loss": 0.2839, "step": 15877 }, { "epoch": 0.7876382757081204, "grad_norm": 4.530923366546631, "learning_rate": 1.0930519110815912e-06, "loss": 0.3029, "step": 15878 }, { "epoch": 0.7876878813433206, "grad_norm": 6.301232814788818, "learning_rate": 1.092560780854267e-06, "loss": 0.2638, "step": 15879 }, { "epoch": 0.7877374869785208, "grad_norm": 9.337545394897461, "learning_rate": 1.0920697474550512e-06, "loss": 0.353, "step": 15880 }, { "epoch": 0.7877870926137209, "grad_norm": 9.82725715637207, "learning_rate": 1.091578810896118e-06, "loss": 0.3052, "step": 15881 }, { "epoch": 0.7878366982489211, "grad_norm": 10.897449493408203, "learning_rate": 1.0910879711896283e-06, "loss": 0.4415, "step": 15882 }, { "epoch": 0.7878863038841213, "grad_norm": 17.49689483642578, "learning_rate": 1.0905972283477467e-06, "loss": 0.4033, "step": 15883 }, { "epoch": 0.7879359095193214, "grad_norm": 4.137862682342529, "learning_rate": 1.0901065823826345e-06, "loss": 0.2042, "step": 15884 }, { "epoch": 0.7879855151545215, "grad_norm": 5.086541175842285, "learning_rate": 1.0896160333064493e-06, "loss": 0.2771, "step": 15885 }, { "epoch": 0.7880351207897217, "grad_norm": 10.42568588256836, "learning_rate": 1.0891255811313484e-06, "loss": 0.3697, "step": 15886 }, { "epoch": 0.7880847264249219, "grad_norm": 9.872648239135742, "learning_rate": 1.0886352258694855e-06, "loss": 0.2676, "step": 15887 }, { "epoch": 0.7881343320601221, "grad_norm": 15.517576217651367, "learning_rate": 1.088144967533008e-06, "loss": 0.3217, "step": 15888 }, { "epoch": 0.7881839376953221, "grad_norm": 11.437292098999023, "learning_rate": 1.0876548061340709e-06, "loss": 0.2694, "step": 15889 }, { "epoch": 0.7882335433305223, "grad_norm": 9.576814651489258, "learning_rate": 1.0871647416848162e-06, "loss": 0.3441, "step": 15890 }, { "epoch": 0.7882831489657225, "grad_norm": 9.784337997436523, "learning_rate": 1.0866747741973888e-06, "loss": 0.3217, "step": 15891 }, { "epoch": 0.7883327546009227, "grad_norm": 7.087745666503906, "learning_rate": 1.086184903683931e-06, "loss": 0.2736, "step": 15892 }, { "epoch": 0.7883823602361228, "grad_norm": 7.695562839508057, "learning_rate": 1.0856951301565833e-06, "loss": 0.2283, "step": 15893 }, { "epoch": 0.788431965871323, "grad_norm": 4.364265441894531, "learning_rate": 1.0852054536274775e-06, "loss": 0.2736, "step": 15894 }, { "epoch": 0.7884815715065231, "grad_norm": 6.869189262390137, "learning_rate": 1.0847158741087543e-06, "loss": 0.2645, "step": 15895 }, { "epoch": 0.7885311771417233, "grad_norm": 10.335362434387207, "learning_rate": 1.0842263916125417e-06, "loss": 0.3328, "step": 15896 }, { "epoch": 0.7885807827769235, "grad_norm": 13.182558059692383, "learning_rate": 1.0837370061509706e-06, "loss": 0.4197, "step": 15897 }, { "epoch": 0.7886303884121236, "grad_norm": 11.088288307189941, "learning_rate": 1.0832477177361678e-06, "loss": 0.3206, "step": 15898 }, { "epoch": 0.7886799940473238, "grad_norm": 4.03133487701416, "learning_rate": 1.0827585263802593e-06, "loss": 0.2214, "step": 15899 }, { "epoch": 0.788729599682524, "grad_norm": 5.607151031494141, "learning_rate": 1.0822694320953641e-06, "loss": 0.1988, "step": 15900 }, { "epoch": 0.788779205317724, "grad_norm": 6.623896598815918, "learning_rate": 1.0817804348936072e-06, "loss": 0.3204, "step": 15901 }, { "epoch": 0.7888288109529242, "grad_norm": 6.160854816436768, "learning_rate": 1.081291534787101e-06, "loss": 0.3187, "step": 15902 }, { "epoch": 0.7888784165881244, "grad_norm": 5.323890209197998, "learning_rate": 1.0808027317879655e-06, "loss": 0.2509, "step": 15903 }, { "epoch": 0.7889280222233246, "grad_norm": 9.77994155883789, "learning_rate": 1.08031402590831e-06, "loss": 0.3458, "step": 15904 }, { "epoch": 0.7889776278585248, "grad_norm": 4.711495876312256, "learning_rate": 1.0798254171602464e-06, "loss": 0.2537, "step": 15905 }, { "epoch": 0.7890272334937248, "grad_norm": 6.934272289276123, "learning_rate": 1.0793369055558816e-06, "loss": 0.3151, "step": 15906 }, { "epoch": 0.789076839128925, "grad_norm": 12.448153495788574, "learning_rate": 1.0788484911073237e-06, "loss": 0.3448, "step": 15907 }, { "epoch": 0.7891264447641252, "grad_norm": 10.218048095703125, "learning_rate": 1.0783601738266709e-06, "loss": 0.3216, "step": 15908 }, { "epoch": 0.7891760503993254, "grad_norm": 6.8550333976745605, "learning_rate": 1.0778719537260295e-06, "loss": 0.2462, "step": 15909 }, { "epoch": 0.7892256560345255, "grad_norm": 6.03676176071167, "learning_rate": 1.0773838308174944e-06, "loss": 0.254, "step": 15910 }, { "epoch": 0.7892752616697257, "grad_norm": 4.854130744934082, "learning_rate": 1.0768958051131616e-06, "loss": 0.2233, "step": 15911 }, { "epoch": 0.7893248673049258, "grad_norm": 4.333075046539307, "learning_rate": 1.0764078766251262e-06, "loss": 0.2521, "step": 15912 }, { "epoch": 0.789374472940126, "grad_norm": 10.100418090820312, "learning_rate": 1.0759200453654794e-06, "loss": 0.2672, "step": 15913 }, { "epoch": 0.7894240785753261, "grad_norm": 6.941955089569092, "learning_rate": 1.0754323113463061e-06, "loss": 0.2791, "step": 15914 }, { "epoch": 0.7894736842105263, "grad_norm": 7.473562240600586, "learning_rate": 1.0749446745796983e-06, "loss": 0.2048, "step": 15915 }, { "epoch": 0.7895232898457265, "grad_norm": 7.356417179107666, "learning_rate": 1.0744571350777355e-06, "loss": 0.2832, "step": 15916 }, { "epoch": 0.7895728954809267, "grad_norm": 18.96600914001465, "learning_rate": 1.0739696928525007e-06, "loss": 0.3594, "step": 15917 }, { "epoch": 0.7896225011161268, "grad_norm": 9.476222038269043, "learning_rate": 1.0734823479160727e-06, "loss": 0.336, "step": 15918 }, { "epoch": 0.7896721067513269, "grad_norm": 3.6543498039245605, "learning_rate": 1.0729951002805288e-06, "loss": 0.2142, "step": 15919 }, { "epoch": 0.7897217123865271, "grad_norm": 19.508676528930664, "learning_rate": 1.0725079499579422e-06, "loss": 0.2619, "step": 15920 }, { "epoch": 0.7897713180217273, "grad_norm": 7.689435005187988, "learning_rate": 1.0720208969603868e-06, "loss": 0.2395, "step": 15921 }, { "epoch": 0.7898209236569275, "grad_norm": 4.979833602905273, "learning_rate": 1.0715339412999276e-06, "loss": 0.2483, "step": 15922 }, { "epoch": 0.7898705292921275, "grad_norm": 6.6604228019714355, "learning_rate": 1.0710470829886366e-06, "loss": 0.2532, "step": 15923 }, { "epoch": 0.7899201349273277, "grad_norm": 7.625380992889404, "learning_rate": 1.0705603220385752e-06, "loss": 0.3079, "step": 15924 }, { "epoch": 0.7899697405625279, "grad_norm": 6.2362518310546875, "learning_rate": 1.0700736584618065e-06, "loss": 0.2556, "step": 15925 }, { "epoch": 0.7900193461977281, "grad_norm": 5.290922164916992, "learning_rate": 1.0695870922703906e-06, "loss": 0.2676, "step": 15926 }, { "epoch": 0.7900689518329282, "grad_norm": 6.361478328704834, "learning_rate": 1.069100623476384e-06, "loss": 0.2733, "step": 15927 }, { "epoch": 0.7901185574681284, "grad_norm": 9.444369316101074, "learning_rate": 1.0686142520918424e-06, "loss": 0.2428, "step": 15928 }, { "epoch": 0.7901681631033285, "grad_norm": 6.77907133102417, "learning_rate": 1.0681279781288172e-06, "loss": 0.3002, "step": 15929 }, { "epoch": 0.7902177687385287, "grad_norm": 6.7041144371032715, "learning_rate": 1.0676418015993616e-06, "loss": 0.2572, "step": 15930 }, { "epoch": 0.7902673743737288, "grad_norm": 6.196723937988281, "learning_rate": 1.0671557225155193e-06, "loss": 0.2314, "step": 15931 }, { "epoch": 0.790316980008929, "grad_norm": 5.477088451385498, "learning_rate": 1.0666697408893372e-06, "loss": 0.2484, "step": 15932 }, { "epoch": 0.7903665856441292, "grad_norm": 5.13798713684082, "learning_rate": 1.0661838567328581e-06, "loss": 0.2705, "step": 15933 }, { "epoch": 0.7904161912793294, "grad_norm": 7.9109787940979, "learning_rate": 1.0656980700581226e-06, "loss": 0.2808, "step": 15934 }, { "epoch": 0.7904657969145295, "grad_norm": 9.235489845275879, "learning_rate": 1.065212380877168e-06, "loss": 0.2745, "step": 15935 }, { "epoch": 0.7905154025497296, "grad_norm": 8.535965919494629, "learning_rate": 1.0647267892020324e-06, "loss": 0.3039, "step": 15936 }, { "epoch": 0.7905650081849298, "grad_norm": 12.880135536193848, "learning_rate": 1.0642412950447444e-06, "loss": 0.2991, "step": 15937 }, { "epoch": 0.79061461382013, "grad_norm": 4.757865905761719, "learning_rate": 1.0637558984173401e-06, "loss": 0.1711, "step": 15938 }, { "epoch": 0.7906642194553302, "grad_norm": 7.3588738441467285, "learning_rate": 1.0632705993318425e-06, "loss": 0.2607, "step": 15939 }, { "epoch": 0.7907138250905302, "grad_norm": 11.285008430480957, "learning_rate": 1.0627853978002834e-06, "loss": 0.3536, "step": 15940 }, { "epoch": 0.7907634307257304, "grad_norm": 7.401544094085693, "learning_rate": 1.062300293834681e-06, "loss": 0.2864, "step": 15941 }, { "epoch": 0.7908130363609306, "grad_norm": 8.4957275390625, "learning_rate": 1.061815287447059e-06, "loss": 0.3167, "step": 15942 }, { "epoch": 0.7908626419961308, "grad_norm": 8.610700607299805, "learning_rate": 1.061330378649435e-06, "loss": 0.2817, "step": 15943 }, { "epoch": 0.790912247631331, "grad_norm": 5.541733264923096, "learning_rate": 1.060845567453828e-06, "loss": 0.2838, "step": 15944 }, { "epoch": 0.7909618532665311, "grad_norm": 5.9509711265563965, "learning_rate": 1.0603608538722466e-06, "loss": 0.2925, "step": 15945 }, { "epoch": 0.7910114589017312, "grad_norm": 6.352970600128174, "learning_rate": 1.0598762379167083e-06, "loss": 0.281, "step": 15946 }, { "epoch": 0.7910610645369314, "grad_norm": 12.516681671142578, "learning_rate": 1.0593917195992177e-06, "loss": 0.3172, "step": 15947 }, { "epoch": 0.7911106701721315, "grad_norm": 8.916092872619629, "learning_rate": 1.0589072989317821e-06, "loss": 0.2183, "step": 15948 }, { "epoch": 0.7911602758073317, "grad_norm": 7.027492046356201, "learning_rate": 1.0584229759264075e-06, "loss": 0.2982, "step": 15949 }, { "epoch": 0.7912098814425319, "grad_norm": 4.72108268737793, "learning_rate": 1.0579387505950949e-06, "loss": 0.2277, "step": 15950 }, { "epoch": 0.7912594870777321, "grad_norm": 10.462340354919434, "learning_rate": 1.0574546229498411e-06, "loss": 0.2711, "step": 15951 }, { "epoch": 0.7913090927129321, "grad_norm": 9.3051176071167, "learning_rate": 1.056970593002648e-06, "loss": 0.36, "step": 15952 }, { "epoch": 0.7913586983481323, "grad_norm": 12.252374649047852, "learning_rate": 1.0564866607655055e-06, "loss": 0.305, "step": 15953 }, { "epoch": 0.7914083039833325, "grad_norm": 10.3923978805542, "learning_rate": 1.0560028262504073e-06, "loss": 0.2369, "step": 15954 }, { "epoch": 0.7914579096185327, "grad_norm": 8.01872730255127, "learning_rate": 1.0555190894693435e-06, "loss": 0.3453, "step": 15955 }, { "epoch": 0.7915075152537329, "grad_norm": 10.727171897888184, "learning_rate": 1.0550354504343008e-06, "loss": 0.3115, "step": 15956 }, { "epoch": 0.7915571208889329, "grad_norm": 9.731435775756836, "learning_rate": 1.0545519091572643e-06, "loss": 0.3951, "step": 15957 }, { "epoch": 0.7916067265241331, "grad_norm": 5.727297306060791, "learning_rate": 1.0540684656502175e-06, "loss": 0.2893, "step": 15958 }, { "epoch": 0.7916563321593333, "grad_norm": 3.6798224449157715, "learning_rate": 1.0535851199251362e-06, "loss": 0.2012, "step": 15959 }, { "epoch": 0.7917059377945335, "grad_norm": 5.806107044219971, "learning_rate": 1.0531018719940034e-06, "loss": 0.2899, "step": 15960 }, { "epoch": 0.7917555434297336, "grad_norm": 5.950323581695557, "learning_rate": 1.0526187218687906e-06, "loss": 0.3266, "step": 15961 }, { "epoch": 0.7918051490649338, "grad_norm": 6.977194309234619, "learning_rate": 1.0521356695614714e-06, "loss": 0.3255, "step": 15962 }, { "epoch": 0.7918547547001339, "grad_norm": 5.3342719078063965, "learning_rate": 1.051652715084016e-06, "loss": 0.2719, "step": 15963 }, { "epoch": 0.7919043603353341, "grad_norm": 10.37738037109375, "learning_rate": 1.0511698584483937e-06, "loss": 0.2965, "step": 15964 }, { "epoch": 0.7919539659705342, "grad_norm": 5.88286018371582, "learning_rate": 1.0506870996665659e-06, "loss": 0.212, "step": 15965 }, { "epoch": 0.7920035716057344, "grad_norm": 10.954944610595703, "learning_rate": 1.0502044387505005e-06, "loss": 0.3173, "step": 15966 }, { "epoch": 0.7920531772409346, "grad_norm": 11.438520431518555, "learning_rate": 1.0497218757121552e-06, "loss": 0.3188, "step": 15967 }, { "epoch": 0.7921027828761348, "grad_norm": 10.28928279876709, "learning_rate": 1.0492394105634885e-06, "loss": 0.3284, "step": 15968 }, { "epoch": 0.7921523885113348, "grad_norm": 6.982916355133057, "learning_rate": 1.0487570433164557e-06, "loss": 0.3141, "step": 15969 }, { "epoch": 0.792201994146535, "grad_norm": 4.946432590484619, "learning_rate": 1.048274773983013e-06, "loss": 0.2406, "step": 15970 }, { "epoch": 0.7922515997817352, "grad_norm": 7.10854959487915, "learning_rate": 1.0477926025751055e-06, "loss": 0.2878, "step": 15971 }, { "epoch": 0.7923012054169354, "grad_norm": 6.759365081787109, "learning_rate": 1.047310529104688e-06, "loss": 0.2697, "step": 15972 }, { "epoch": 0.7923508110521356, "grad_norm": 7.698535919189453, "learning_rate": 1.0468285535837025e-06, "loss": 0.2208, "step": 15973 }, { "epoch": 0.7924004166873356, "grad_norm": 12.372162818908691, "learning_rate": 1.0463466760240936e-06, "loss": 0.3922, "step": 15974 }, { "epoch": 0.7924500223225358, "grad_norm": 5.832066059112549, "learning_rate": 1.045864896437802e-06, "loss": 0.3248, "step": 15975 }, { "epoch": 0.792499627957736, "grad_norm": 4.803136348724365, "learning_rate": 1.0453832148367676e-06, "loss": 0.2041, "step": 15976 }, { "epoch": 0.7925492335929362, "grad_norm": 9.932945251464844, "learning_rate": 1.0449016312329258e-06, "loss": 0.2541, "step": 15977 }, { "epoch": 0.7925988392281363, "grad_norm": 7.5081329345703125, "learning_rate": 1.0444201456382103e-06, "loss": 0.3481, "step": 15978 }, { "epoch": 0.7926484448633365, "grad_norm": 6.065725326538086, "learning_rate": 1.0439387580645533e-06, "loss": 0.2233, "step": 15979 }, { "epoch": 0.7926980504985366, "grad_norm": 4.598702430725098, "learning_rate": 1.0434574685238847e-06, "loss": 0.3272, "step": 15980 }, { "epoch": 0.7927476561337368, "grad_norm": 6.242412567138672, "learning_rate": 1.0429762770281283e-06, "loss": 0.2428, "step": 15981 }, { "epoch": 0.792797261768937, "grad_norm": 5.1500067710876465, "learning_rate": 1.0424951835892094e-06, "loss": 0.217, "step": 15982 }, { "epoch": 0.7928468674041371, "grad_norm": 8.200021743774414, "learning_rate": 1.0420141882190505e-06, "loss": 0.4086, "step": 15983 }, { "epoch": 0.7928964730393373, "grad_norm": 7.905765533447266, "learning_rate": 1.0415332909295705e-06, "loss": 0.2706, "step": 15984 }, { "epoch": 0.7929460786745375, "grad_norm": 4.851999759674072, "learning_rate": 1.0410524917326858e-06, "loss": 0.2931, "step": 15985 }, { "epoch": 0.7929956843097375, "grad_norm": 4.664021015167236, "learning_rate": 1.0405717906403112e-06, "loss": 0.1978, "step": 15986 }, { "epoch": 0.7930452899449377, "grad_norm": 4.125029563903809, "learning_rate": 1.0400911876643605e-06, "loss": 0.289, "step": 15987 }, { "epoch": 0.7930948955801379, "grad_norm": 3.896805763244629, "learning_rate": 1.039610682816738e-06, "loss": 0.2722, "step": 15988 }, { "epoch": 0.7931445012153381, "grad_norm": 7.062152862548828, "learning_rate": 1.0391302761093575e-06, "loss": 0.2768, "step": 15989 }, { "epoch": 0.7931941068505383, "grad_norm": 12.89831829071045, "learning_rate": 1.038649967554119e-06, "loss": 0.3796, "step": 15990 }, { "epoch": 0.7932437124857383, "grad_norm": 9.675996780395508, "learning_rate": 1.0381697571629263e-06, "loss": 0.2267, "step": 15991 }, { "epoch": 0.7932933181209385, "grad_norm": 5.869605541229248, "learning_rate": 1.0376896449476786e-06, "loss": 0.2656, "step": 15992 }, { "epoch": 0.7933429237561387, "grad_norm": 5.939295291900635, "learning_rate": 1.0372096309202744e-06, "loss": 0.3362, "step": 15993 }, { "epoch": 0.7933925293913389, "grad_norm": 6.201030731201172, "learning_rate": 1.036729715092608e-06, "loss": 0.3183, "step": 15994 }, { "epoch": 0.793442135026539, "grad_norm": 4.415543079376221, "learning_rate": 1.0362498974765733e-06, "loss": 0.1623, "step": 15995 }, { "epoch": 0.7934917406617392, "grad_norm": 7.770078659057617, "learning_rate": 1.0357701780840563e-06, "loss": 0.2642, "step": 15996 }, { "epoch": 0.7935413462969393, "grad_norm": 9.5863676071167, "learning_rate": 1.0352905569269506e-06, "loss": 0.2991, "step": 15997 }, { "epoch": 0.7935909519321395, "grad_norm": 14.428654670715332, "learning_rate": 1.034811034017137e-06, "loss": 0.3051, "step": 15998 }, { "epoch": 0.7936405575673396, "grad_norm": 5.811417579650879, "learning_rate": 1.034331609366499e-06, "loss": 0.3541, "step": 15999 }, { "epoch": 0.7936901632025398, "grad_norm": 8.434805870056152, "learning_rate": 1.033852282986918e-06, "loss": 0.3114, "step": 16000 }, { "epoch": 0.79373976883774, "grad_norm": 6.535741806030273, "learning_rate": 1.0333730548902731e-06, "loss": 0.203, "step": 16001 }, { "epoch": 0.7937893744729402, "grad_norm": 10.711945533752441, "learning_rate": 1.032893925088435e-06, "loss": 0.3201, "step": 16002 }, { "epoch": 0.7938389801081402, "grad_norm": 5.689957141876221, "learning_rate": 1.0324148935932832e-06, "loss": 0.2651, "step": 16003 }, { "epoch": 0.7938885857433404, "grad_norm": 4.199448108673096, "learning_rate": 1.0319359604166835e-06, "loss": 0.2454, "step": 16004 }, { "epoch": 0.7939381913785406, "grad_norm": 7.3861799240112305, "learning_rate": 1.0314571255705053e-06, "loss": 0.1689, "step": 16005 }, { "epoch": 0.7939877970137408, "grad_norm": 4.433871746063232, "learning_rate": 1.0309783890666153e-06, "loss": 0.197, "step": 16006 }, { "epoch": 0.794037402648941, "grad_norm": 7.16080379486084, "learning_rate": 1.0304997509168768e-06, "loss": 0.3026, "step": 16007 }, { "epoch": 0.794087008284141, "grad_norm": 6.621984004974365, "learning_rate": 1.0300212111331481e-06, "loss": 0.2499, "step": 16008 }, { "epoch": 0.7941366139193412, "grad_norm": 4.512199878692627, "learning_rate": 1.029542769727292e-06, "loss": 0.2199, "step": 16009 }, { "epoch": 0.7941862195545414, "grad_norm": 6.522659778594971, "learning_rate": 1.0290644267111593e-06, "loss": 0.2442, "step": 16010 }, { "epoch": 0.7942358251897416, "grad_norm": 5.420057773590088, "learning_rate": 1.0285861820966092e-06, "loss": 0.2736, "step": 16011 }, { "epoch": 0.7942854308249417, "grad_norm": 12.427887916564941, "learning_rate": 1.0281080358954887e-06, "loss": 0.3989, "step": 16012 }, { "epoch": 0.7943350364601419, "grad_norm": 6.091024875640869, "learning_rate": 1.0276299881196473e-06, "loss": 0.3338, "step": 16013 }, { "epoch": 0.794384642095342, "grad_norm": 6.877166271209717, "learning_rate": 1.027152038780932e-06, "loss": 0.3495, "step": 16014 }, { "epoch": 0.7944342477305422, "grad_norm": 6.034886837005615, "learning_rate": 1.0266741878911878e-06, "loss": 0.3097, "step": 16015 }, { "epoch": 0.7944838533657423, "grad_norm": 6.811802864074707, "learning_rate": 1.0261964354622516e-06, "loss": 0.3085, "step": 16016 }, { "epoch": 0.7945334590009425, "grad_norm": 9.878737449645996, "learning_rate": 1.0257187815059682e-06, "loss": 0.3324, "step": 16017 }, { "epoch": 0.7945830646361427, "grad_norm": 5.0816545486450195, "learning_rate": 1.02524122603417e-06, "loss": 0.279, "step": 16018 }, { "epoch": 0.7946326702713429, "grad_norm": 4.448415279388428, "learning_rate": 1.0247637690586914e-06, "loss": 0.2707, "step": 16019 }, { "epoch": 0.794682275906543, "grad_norm": 11.928644180297852, "learning_rate": 1.0242864105913657e-06, "loss": 0.2913, "step": 16020 }, { "epoch": 0.7947318815417431, "grad_norm": 8.366243362426758, "learning_rate": 1.0238091506440218e-06, "loss": 0.2998, "step": 16021 }, { "epoch": 0.7947814871769433, "grad_norm": 10.875696182250977, "learning_rate": 1.0233319892284839e-06, "loss": 0.3239, "step": 16022 }, { "epoch": 0.7948310928121435, "grad_norm": 7.900705814361572, "learning_rate": 1.02285492635658e-06, "loss": 0.2789, "step": 16023 }, { "epoch": 0.7948806984473437, "grad_norm": 6.289324760437012, "learning_rate": 1.022377962040129e-06, "loss": 0.2362, "step": 16024 }, { "epoch": 0.7949303040825437, "grad_norm": 7.925024032592773, "learning_rate": 1.0219010962909514e-06, "loss": 0.2731, "step": 16025 }, { "epoch": 0.7949799097177439, "grad_norm": 5.157288551330566, "learning_rate": 1.0214243291208641e-06, "loss": 0.2259, "step": 16026 }, { "epoch": 0.7950295153529441, "grad_norm": 3.9080374240875244, "learning_rate": 1.0209476605416817e-06, "loss": 0.2104, "step": 16027 }, { "epoch": 0.7950791209881443, "grad_norm": 12.442571640014648, "learning_rate": 1.0204710905652166e-06, "loss": 0.3907, "step": 16028 }, { "epoch": 0.7951287266233444, "grad_norm": 8.160958290100098, "learning_rate": 1.0199946192032788e-06, "loss": 0.3043, "step": 16029 }, { "epoch": 0.7951783322585446, "grad_norm": 10.260030746459961, "learning_rate": 1.0195182464676724e-06, "loss": 0.3514, "step": 16030 }, { "epoch": 0.7952279378937447, "grad_norm": 9.47597599029541, "learning_rate": 1.0190419723702067e-06, "loss": 0.2152, "step": 16031 }, { "epoch": 0.7952775435289449, "grad_norm": 8.397967338562012, "learning_rate": 1.0185657969226803e-06, "loss": 0.3018, "step": 16032 }, { "epoch": 0.795327149164145, "grad_norm": 7.168162822723389, "learning_rate": 1.018089720136895e-06, "loss": 0.3108, "step": 16033 }, { "epoch": 0.7953767547993452, "grad_norm": 15.084661483764648, "learning_rate": 1.0176137420246468e-06, "loss": 0.4483, "step": 16034 }, { "epoch": 0.7954263604345454, "grad_norm": 7.065231800079346, "learning_rate": 1.0171378625977324e-06, "loss": 0.3226, "step": 16035 }, { "epoch": 0.7954759660697456, "grad_norm": 5.65230655670166, "learning_rate": 1.016662081867943e-06, "loss": 0.2238, "step": 16036 }, { "epoch": 0.7955255717049456, "grad_norm": 5.740961074829102, "learning_rate": 1.016186399847069e-06, "loss": 0.2279, "step": 16037 }, { "epoch": 0.7955751773401458, "grad_norm": 4.221415996551514, "learning_rate": 1.0157108165468992e-06, "loss": 0.2111, "step": 16038 }, { "epoch": 0.795624782975346, "grad_norm": 6.100414752960205, "learning_rate": 1.0152353319792169e-06, "loss": 0.2234, "step": 16039 }, { "epoch": 0.7956743886105462, "grad_norm": 9.813333511352539, "learning_rate": 1.0147599461558055e-06, "loss": 0.2748, "step": 16040 }, { "epoch": 0.7957239942457464, "grad_norm": 6.538938999176025, "learning_rate": 1.0142846590884454e-06, "loss": 0.2649, "step": 16041 }, { "epoch": 0.7957735998809464, "grad_norm": 4.948318004608154, "learning_rate": 1.013809470788914e-06, "loss": 0.31, "step": 16042 }, { "epoch": 0.7958232055161466, "grad_norm": 17.89604377746582, "learning_rate": 1.0133343812689872e-06, "loss": 0.4152, "step": 16043 }, { "epoch": 0.7958728111513468, "grad_norm": 6.9351725578308105, "learning_rate": 1.0128593905404387e-06, "loss": 0.2877, "step": 16044 }, { "epoch": 0.795922416786547, "grad_norm": 10.986300468444824, "learning_rate": 1.012384498615036e-06, "loss": 0.3911, "step": 16045 }, { "epoch": 0.7959720224217471, "grad_norm": 7.546120643615723, "learning_rate": 1.0119097055045519e-06, "loss": 0.2565, "step": 16046 }, { "epoch": 0.7960216280569473, "grad_norm": 5.627783298492432, "learning_rate": 1.0114350112207465e-06, "loss": 0.2036, "step": 16047 }, { "epoch": 0.7960712336921474, "grad_norm": 12.374951362609863, "learning_rate": 1.0109604157753884e-06, "loss": 0.3565, "step": 16048 }, { "epoch": 0.7961208393273476, "grad_norm": 9.679491996765137, "learning_rate": 1.0104859191802346e-06, "loss": 0.3412, "step": 16049 }, { "epoch": 0.7961704449625477, "grad_norm": 5.324481010437012, "learning_rate": 1.0100115214470441e-06, "loss": 0.2023, "step": 16050 }, { "epoch": 0.7962200505977479, "grad_norm": 8.768057823181152, "learning_rate": 1.0095372225875727e-06, "loss": 0.2405, "step": 16051 }, { "epoch": 0.7962696562329481, "grad_norm": 11.757899284362793, "learning_rate": 1.0090630226135755e-06, "loss": 0.3638, "step": 16052 }, { "epoch": 0.7963192618681483, "grad_norm": 9.8599271774292, "learning_rate": 1.0085889215367994e-06, "loss": 0.2868, "step": 16053 }, { "epoch": 0.7963688675033483, "grad_norm": 9.045659065246582, "learning_rate": 1.0081149193689982e-06, "loss": 0.2706, "step": 16054 }, { "epoch": 0.7964184731385485, "grad_norm": 3.1611948013305664, "learning_rate": 1.007641016121913e-06, "loss": 0.1862, "step": 16055 }, { "epoch": 0.7964680787737487, "grad_norm": 10.077200889587402, "learning_rate": 1.00716721180729e-06, "loss": 0.3016, "step": 16056 }, { "epoch": 0.7965176844089489, "grad_norm": 8.132200241088867, "learning_rate": 1.0066935064368693e-06, "loss": 0.3565, "step": 16057 }, { "epoch": 0.7965672900441491, "grad_norm": 7.158997058868408, "learning_rate": 1.0062199000223915e-06, "loss": 0.238, "step": 16058 }, { "epoch": 0.7966168956793491, "grad_norm": 10.515727996826172, "learning_rate": 1.0057463925755884e-06, "loss": 0.4235, "step": 16059 }, { "epoch": 0.7966665013145493, "grad_norm": 4.176589488983154, "learning_rate": 1.0052729841081993e-06, "loss": 0.2778, "step": 16060 }, { "epoch": 0.7967161069497495, "grad_norm": 12.012320518493652, "learning_rate": 1.0047996746319511e-06, "loss": 0.4882, "step": 16061 }, { "epoch": 0.7967657125849497, "grad_norm": 6.492408275604248, "learning_rate": 1.0043264641585742e-06, "loss": 0.3293, "step": 16062 }, { "epoch": 0.7968153182201498, "grad_norm": 7.28901481628418, "learning_rate": 1.0038533526997945e-06, "loss": 0.2883, "step": 16063 }, { "epoch": 0.79686492385535, "grad_norm": 7.029445171356201, "learning_rate": 1.003380340267337e-06, "loss": 0.326, "step": 16064 }, { "epoch": 0.7969145294905501, "grad_norm": 9.911140441894531, "learning_rate": 1.0029074268729227e-06, "loss": 0.3448, "step": 16065 }, { "epoch": 0.7969641351257503, "grad_norm": 7.837885856628418, "learning_rate": 1.0024346125282713e-06, "loss": 0.2645, "step": 16066 }, { "epoch": 0.7970137407609504, "grad_norm": 8.672380447387695, "learning_rate": 1.0019618972450962e-06, "loss": 0.3393, "step": 16067 }, { "epoch": 0.7970633463961506, "grad_norm": 6.6357526779174805, "learning_rate": 1.0014892810351161e-06, "loss": 0.3004, "step": 16068 }, { "epoch": 0.7971129520313508, "grad_norm": 6.689123630523682, "learning_rate": 1.0010167639100392e-06, "loss": 0.2314, "step": 16069 }, { "epoch": 0.797162557666551, "grad_norm": 8.961877822875977, "learning_rate": 1.0005443458815756e-06, "loss": 0.3168, "step": 16070 }, { "epoch": 0.797212163301751, "grad_norm": 11.526925086975098, "learning_rate": 1.0000720269614323e-06, "loss": 0.2473, "step": 16071 }, { "epoch": 0.7972617689369512, "grad_norm": 11.687816619873047, "learning_rate": 9.99599807161315e-07, "loss": 0.3118, "step": 16072 }, { "epoch": 0.7973113745721514, "grad_norm": 5.773879051208496, "learning_rate": 9.991276864929217e-07, "loss": 0.2576, "step": 16073 }, { "epoch": 0.7973609802073516, "grad_norm": 3.9087650775909424, "learning_rate": 9.986556649679564e-07, "loss": 0.2023, "step": 16074 }, { "epoch": 0.7974105858425518, "grad_norm": 5.463048934936523, "learning_rate": 9.981837425981123e-07, "loss": 0.318, "step": 16075 }, { "epoch": 0.7974601914777518, "grad_norm": 5.6209635734558105, "learning_rate": 9.977119193950857e-07, "loss": 0.2655, "step": 16076 }, { "epoch": 0.797509797112952, "grad_norm": 7.08820915222168, "learning_rate": 9.97240195370568e-07, "loss": 0.2652, "step": 16077 }, { "epoch": 0.7975594027481522, "grad_norm": 6.366672992706299, "learning_rate": 9.9676857053625e-07, "loss": 0.2763, "step": 16078 }, { "epoch": 0.7976090083833524, "grad_norm": 4.71730375289917, "learning_rate": 9.96297044903815e-07, "loss": 0.2517, "step": 16079 }, { "epoch": 0.7976586140185525, "grad_norm": 14.98841381072998, "learning_rate": 9.958256184849535e-07, "loss": 0.4697, "step": 16080 }, { "epoch": 0.7977082196537527, "grad_norm": 7.950117111206055, "learning_rate": 9.953542912913411e-07, "loss": 0.3008, "step": 16081 }, { "epoch": 0.7977578252889528, "grad_norm": 7.145293712615967, "learning_rate": 9.94883063334664e-07, "loss": 0.333, "step": 16082 }, { "epoch": 0.797807430924153, "grad_norm": 8.67959213256836, "learning_rate": 9.944119346265952e-07, "loss": 0.2106, "step": 16083 }, { "epoch": 0.7978570365593531, "grad_norm": 8.14517879486084, "learning_rate": 9.9394090517881e-07, "loss": 0.3258, "step": 16084 }, { "epoch": 0.7979066421945533, "grad_norm": 4.669554710388184, "learning_rate": 9.934699750029813e-07, "loss": 0.2149, "step": 16085 }, { "epoch": 0.7979562478297535, "grad_norm": 8.622101783752441, "learning_rate": 9.929991441107796e-07, "loss": 0.3476, "step": 16086 }, { "epoch": 0.7980058534649537, "grad_norm": 27.39862823486328, "learning_rate": 9.925284125138711e-07, "loss": 0.4758, "step": 16087 }, { "epoch": 0.7980554591001537, "grad_norm": 8.002237319946289, "learning_rate": 9.920577802239228e-07, "loss": 0.3231, "step": 16088 }, { "epoch": 0.7981050647353539, "grad_norm": 4.3084797859191895, "learning_rate": 9.915872472525945e-07, "loss": 0.2587, "step": 16089 }, { "epoch": 0.7981546703705541, "grad_norm": 4.677121162414551, "learning_rate": 9.911168136115472e-07, "loss": 0.2413, "step": 16090 }, { "epoch": 0.7982042760057543, "grad_norm": 9.354449272155762, "learning_rate": 9.906464793124387e-07, "loss": 0.3054, "step": 16091 }, { "epoch": 0.7982538816409545, "grad_norm": 9.869890213012695, "learning_rate": 9.901762443669244e-07, "loss": 0.2733, "step": 16092 }, { "epoch": 0.7983034872761545, "grad_norm": 7.995340824127197, "learning_rate": 9.897061087866562e-07, "loss": 0.2382, "step": 16093 }, { "epoch": 0.7983530929113547, "grad_norm": 4.944733619689941, "learning_rate": 9.89236072583285e-07, "loss": 0.1987, "step": 16094 }, { "epoch": 0.7984026985465549, "grad_norm": 10.086779594421387, "learning_rate": 9.887661357684592e-07, "loss": 0.2551, "step": 16095 }, { "epoch": 0.7984523041817551, "grad_norm": 6.147800445556641, "learning_rate": 9.882962983538203e-07, "loss": 0.2329, "step": 16096 }, { "epoch": 0.7985019098169552, "grad_norm": 3.517690658569336, "learning_rate": 9.878265603510167e-07, "loss": 0.1237, "step": 16097 }, { "epoch": 0.7985515154521554, "grad_norm": 7.107211589813232, "learning_rate": 9.873569217716845e-07, "loss": 0.2657, "step": 16098 }, { "epoch": 0.7986011210873555, "grad_norm": 21.80063247680664, "learning_rate": 9.868873826274627e-07, "loss": 0.3031, "step": 16099 }, { "epoch": 0.7986507267225557, "grad_norm": 8.839211463928223, "learning_rate": 9.864179429299874e-07, "loss": 0.2296, "step": 16100 }, { "epoch": 0.7987003323577558, "grad_norm": 3.8977417945861816, "learning_rate": 9.859486026908903e-07, "loss": 0.2031, "step": 16101 }, { "epoch": 0.798749937992956, "grad_norm": 6.700416564941406, "learning_rate": 9.854793619218034e-07, "loss": 0.3473, "step": 16102 }, { "epoch": 0.7987995436281562, "grad_norm": 4.13779354095459, "learning_rate": 9.850102206343547e-07, "loss": 0.2017, "step": 16103 }, { "epoch": 0.7988491492633563, "grad_norm": 9.766128540039062, "learning_rate": 9.845411788401666e-07, "loss": 0.3363, "step": 16104 }, { "epoch": 0.7988987548985564, "grad_norm": 6.190100193023682, "learning_rate": 9.840722365508665e-07, "loss": 0.2836, "step": 16105 }, { "epoch": 0.7989483605337566, "grad_norm": 12.124312400817871, "learning_rate": 9.83603393778072e-07, "loss": 0.3863, "step": 16106 }, { "epoch": 0.7989979661689568, "grad_norm": 6.519669532775879, "learning_rate": 9.831346505334021e-07, "loss": 0.2737, "step": 16107 }, { "epoch": 0.799047571804157, "grad_norm": 11.76542854309082, "learning_rate": 9.826660068284733e-07, "loss": 0.3816, "step": 16108 }, { "epoch": 0.7990971774393572, "grad_norm": 5.456503868103027, "learning_rate": 9.821974626748987e-07, "loss": 0.2597, "step": 16109 }, { "epoch": 0.7991467830745572, "grad_norm": 5.985058307647705, "learning_rate": 9.817290180842858e-07, "loss": 0.2339, "step": 16110 }, { "epoch": 0.7991963887097574, "grad_norm": 8.586002349853516, "learning_rate": 9.812606730682484e-07, "loss": 0.2196, "step": 16111 }, { "epoch": 0.7992459943449576, "grad_norm": 5.784310340881348, "learning_rate": 9.80792427638388e-07, "loss": 0.1748, "step": 16112 }, { "epoch": 0.7992955999801578, "grad_norm": 16.664663314819336, "learning_rate": 9.803242818063091e-07, "loss": 0.3964, "step": 16113 }, { "epoch": 0.7993452056153579, "grad_norm": 13.626860618591309, "learning_rate": 9.798562355836128e-07, "loss": 0.3383, "step": 16114 }, { "epoch": 0.7993948112505581, "grad_norm": 4.975074291229248, "learning_rate": 9.79388288981899e-07, "loss": 0.2474, "step": 16115 }, { "epoch": 0.7994444168857582, "grad_norm": 5.721699237823486, "learning_rate": 9.789204420127596e-07, "loss": 0.3202, "step": 16116 }, { "epoch": 0.7994940225209584, "grad_norm": 5.501602649688721, "learning_rate": 9.78452694687793e-07, "loss": 0.2458, "step": 16117 }, { "epoch": 0.7995436281561585, "grad_norm": 19.11027717590332, "learning_rate": 9.779850470185852e-07, "loss": 0.4036, "step": 16118 }, { "epoch": 0.7995932337913587, "grad_norm": 10.18673038482666, "learning_rate": 9.775174990167296e-07, "loss": 0.2998, "step": 16119 }, { "epoch": 0.7996428394265589, "grad_norm": 11.216367721557617, "learning_rate": 9.770500506938091e-07, "loss": 0.358, "step": 16120 }, { "epoch": 0.799692445061759, "grad_norm": 9.135311126708984, "learning_rate": 9.765827020614082e-07, "loss": 0.3032, "step": 16121 }, { "epoch": 0.7997420506969591, "grad_norm": 6.124640941619873, "learning_rate": 9.761154531311079e-07, "loss": 0.3252, "step": 16122 }, { "epoch": 0.7997916563321593, "grad_norm": 4.785306930541992, "learning_rate": 9.756483039144877e-07, "loss": 0.1541, "step": 16123 }, { "epoch": 0.7998412619673595, "grad_norm": 7.658026695251465, "learning_rate": 9.751812544231205e-07, "loss": 0.2693, "step": 16124 }, { "epoch": 0.7998908676025597, "grad_norm": 11.483826637268066, "learning_rate": 9.747143046685853e-07, "loss": 0.3724, "step": 16125 }, { "epoch": 0.7999404732377599, "grad_norm": 15.035785675048828, "learning_rate": 9.742474546624491e-07, "loss": 0.3541, "step": 16126 }, { "epoch": 0.7999900788729599, "grad_norm": 5.06755256652832, "learning_rate": 9.737807044162817e-07, "loss": 0.2036, "step": 16127 }, { "epoch": 0.8000396845081601, "grad_norm": 5.366166591644287, "learning_rate": 9.733140539416502e-07, "loss": 0.2761, "step": 16128 }, { "epoch": 0.8000396845081601, "eval_loss": 0.2839374840259552, "eval_runtime": 35.5813, "eval_samples_per_second": 45.783, "eval_steps_per_second": 5.733, "step": 16128 }, { "epoch": 0.8000892901433603, "grad_norm": 5.187222957611084, "learning_rate": 9.72847503250119e-07, "loss": 0.2655, "step": 16129 }, { "epoch": 0.8001388957785605, "grad_norm": 8.097678184509277, "learning_rate": 9.723810523532456e-07, "loss": 0.3169, "step": 16130 }, { "epoch": 0.8001885014137606, "grad_norm": 4.344776630401611, "learning_rate": 9.71914701262594e-07, "loss": 0.226, "step": 16131 }, { "epoch": 0.8002381070489608, "grad_norm": 6.474576473236084, "learning_rate": 9.714484499897176e-07, "loss": 0.2336, "step": 16132 }, { "epoch": 0.8002877126841609, "grad_norm": 3.635444402694702, "learning_rate": 9.709822985461703e-07, "loss": 0.241, "step": 16133 }, { "epoch": 0.8003373183193611, "grad_norm": 4.74758243560791, "learning_rate": 9.705162469435042e-07, "loss": 0.2504, "step": 16134 }, { "epoch": 0.8003869239545612, "grad_norm": 5.798196792602539, "learning_rate": 9.700502951932688e-07, "loss": 0.3225, "step": 16135 }, { "epoch": 0.8004365295897614, "grad_norm": 5.65070104598999, "learning_rate": 9.695844433070094e-07, "loss": 0.2838, "step": 16136 }, { "epoch": 0.8004861352249616, "grad_norm": 16.56279754638672, "learning_rate": 9.691186912962725e-07, "loss": 0.4054, "step": 16137 }, { "epoch": 0.8005357408601617, "grad_norm": 12.476035118103027, "learning_rate": 9.686530391725946e-07, "loss": 0.3506, "step": 16138 }, { "epoch": 0.8005853464953618, "grad_norm": 16.849470138549805, "learning_rate": 9.681874869475212e-07, "loss": 0.3822, "step": 16139 }, { "epoch": 0.800634952130562, "grad_norm": 6.873945236206055, "learning_rate": 9.677220346325839e-07, "loss": 0.2978, "step": 16140 }, { "epoch": 0.8006845577657622, "grad_norm": 4.884576797485352, "learning_rate": 9.672566822393186e-07, "loss": 0.3101, "step": 16141 }, { "epoch": 0.8007341634009624, "grad_norm": 5.323185443878174, "learning_rate": 9.667914297792575e-07, "loss": 0.2393, "step": 16142 }, { "epoch": 0.8007837690361626, "grad_norm": 9.293495178222656, "learning_rate": 9.663262772639288e-07, "loss": 0.2599, "step": 16143 }, { "epoch": 0.8008333746713626, "grad_norm": 5.462319374084473, "learning_rate": 9.658612247048594e-07, "loss": 0.3277, "step": 16144 }, { "epoch": 0.8008829803065628, "grad_norm": 7.076474666595459, "learning_rate": 9.653962721135735e-07, "loss": 0.2265, "step": 16145 }, { "epoch": 0.800932585941763, "grad_norm": 7.270830154418945, "learning_rate": 9.649314195015947e-07, "loss": 0.2348, "step": 16146 }, { "epoch": 0.8009821915769632, "grad_norm": 5.581017017364502, "learning_rate": 9.644666668804391e-07, "loss": 0.2633, "step": 16147 }, { "epoch": 0.8010317972121633, "grad_norm": 6.864373207092285, "learning_rate": 9.640020142616253e-07, "loss": 0.2537, "step": 16148 }, { "epoch": 0.8010814028473635, "grad_norm": 5.004536151885986, "learning_rate": 9.635374616566668e-07, "loss": 0.223, "step": 16149 }, { "epoch": 0.8011310084825636, "grad_norm": 11.914731979370117, "learning_rate": 9.630730090770762e-07, "loss": 0.4112, "step": 16150 }, { "epoch": 0.8011806141177638, "grad_norm": 9.587773323059082, "learning_rate": 9.626086565343623e-07, "loss": 0.3328, "step": 16151 }, { "epoch": 0.8012302197529639, "grad_norm": 6.1270270347595215, "learning_rate": 9.621444040400325e-07, "loss": 0.2718, "step": 16152 }, { "epoch": 0.8012798253881641, "grad_norm": 6.269731521606445, "learning_rate": 9.616802516055907e-07, "loss": 0.2289, "step": 16153 }, { "epoch": 0.8013294310233643, "grad_norm": 6.530653476715088, "learning_rate": 9.612161992425407e-07, "loss": 0.3038, "step": 16154 }, { "epoch": 0.8013790366585644, "grad_norm": 6.774275779724121, "learning_rate": 9.607522469623787e-07, "loss": 0.2732, "step": 16155 }, { "epoch": 0.8014286422937645, "grad_norm": 9.52173900604248, "learning_rate": 9.602883947766028e-07, "loss": 0.3055, "step": 16156 }, { "epoch": 0.8014782479289647, "grad_norm": 7.313470363616943, "learning_rate": 9.598246426967078e-07, "loss": 0.2207, "step": 16157 }, { "epoch": 0.8015278535641649, "grad_norm": 9.505101203918457, "learning_rate": 9.593609907341862e-07, "loss": 0.262, "step": 16158 }, { "epoch": 0.8015774591993651, "grad_norm": 16.37501335144043, "learning_rate": 9.588974389005267e-07, "loss": 0.3096, "step": 16159 }, { "epoch": 0.8016270648345653, "grad_norm": 8.259608268737793, "learning_rate": 9.584339872072185e-07, "loss": 0.2187, "step": 16160 }, { "epoch": 0.8016766704697653, "grad_norm": 8.765667915344238, "learning_rate": 9.579706356657409e-07, "loss": 0.3338, "step": 16161 }, { "epoch": 0.8017262761049655, "grad_norm": 6.114516258239746, "learning_rate": 9.575073842875826e-07, "loss": 0.2872, "step": 16162 }, { "epoch": 0.8017758817401657, "grad_norm": 4.512874603271484, "learning_rate": 9.570442330842183e-07, "loss": 0.2089, "step": 16163 }, { "epoch": 0.8018254873753659, "grad_norm": 18.099123001098633, "learning_rate": 9.565811820671267e-07, "loss": 0.3249, "step": 16164 }, { "epoch": 0.801875093010566, "grad_norm": 6.573257923126221, "learning_rate": 9.56118231247783e-07, "loss": 0.2099, "step": 16165 }, { "epoch": 0.8019246986457662, "grad_norm": 4.893880844116211, "learning_rate": 9.556553806376595e-07, "loss": 0.2148, "step": 16166 }, { "epoch": 0.8019743042809663, "grad_norm": 6.722325801849365, "learning_rate": 9.551926302482223e-07, "loss": 0.3344, "step": 16167 }, { "epoch": 0.8020239099161665, "grad_norm": 7.8367815017700195, "learning_rate": 9.54729980090945e-07, "loss": 0.3389, "step": 16168 }, { "epoch": 0.8020735155513666, "grad_norm": 11.780189514160156, "learning_rate": 9.542674301772865e-07, "loss": 0.408, "step": 16169 }, { "epoch": 0.8021231211865668, "grad_norm": 11.87862777709961, "learning_rate": 9.53804980518711e-07, "loss": 0.4208, "step": 16170 }, { "epoch": 0.802172726821767, "grad_norm": 7.1551313400268555, "learning_rate": 9.533426311266792e-07, "loss": 0.2821, "step": 16171 }, { "epoch": 0.8022223324569671, "grad_norm": 4.507291793823242, "learning_rate": 9.528803820126469e-07, "loss": 0.1925, "step": 16172 }, { "epoch": 0.8022719380921672, "grad_norm": 6.934199333190918, "learning_rate": 9.524182331880693e-07, "loss": 0.3149, "step": 16173 }, { "epoch": 0.8023215437273674, "grad_norm": 7.0090813636779785, "learning_rate": 9.519561846644004e-07, "loss": 0.2968, "step": 16174 }, { "epoch": 0.8023711493625676, "grad_norm": 7.406027793884277, "learning_rate": 9.51494236453086e-07, "loss": 0.3029, "step": 16175 }, { "epoch": 0.8024207549977678, "grad_norm": 8.330042839050293, "learning_rate": 9.510323885655781e-07, "loss": 0.3176, "step": 16176 }, { "epoch": 0.802470360632968, "grad_norm": 6.8911051750183105, "learning_rate": 9.505706410133181e-07, "loss": 0.2561, "step": 16177 }, { "epoch": 0.802519966268168, "grad_norm": 9.138540267944336, "learning_rate": 9.50108993807749e-07, "loss": 0.3157, "step": 16178 }, { "epoch": 0.8025695719033682, "grad_norm": 4.058312892913818, "learning_rate": 9.496474469603111e-07, "loss": 0.2441, "step": 16179 }, { "epoch": 0.8026191775385684, "grad_norm": 12.5136079788208, "learning_rate": 9.491860004824433e-07, "loss": 0.3163, "step": 16180 }, { "epoch": 0.8026687831737686, "grad_norm": 5.088250637054443, "learning_rate": 9.48724654385576e-07, "loss": 0.2954, "step": 16181 }, { "epoch": 0.8027183888089687, "grad_norm": 11.649231910705566, "learning_rate": 9.482634086811465e-07, "loss": 0.3735, "step": 16182 }, { "epoch": 0.8027679944441689, "grad_norm": 19.27558135986328, "learning_rate": 9.478022633805817e-07, "loss": 0.2927, "step": 16183 }, { "epoch": 0.802817600079369, "grad_norm": 5.33994722366333, "learning_rate": 9.473412184953095e-07, "loss": 0.2273, "step": 16184 }, { "epoch": 0.8028672057145692, "grad_norm": 5.531368255615234, "learning_rate": 9.468802740367555e-07, "loss": 0.2591, "step": 16185 }, { "epoch": 0.8029168113497693, "grad_norm": 4.736954689025879, "learning_rate": 9.464194300163432e-07, "loss": 0.2591, "step": 16186 }, { "epoch": 0.8029664169849695, "grad_norm": 13.665432929992676, "learning_rate": 9.459586864454879e-07, "loss": 0.4641, "step": 16187 }, { "epoch": 0.8030160226201697, "grad_norm": 4.018740653991699, "learning_rate": 9.454980433356131e-07, "loss": 0.2638, "step": 16188 }, { "epoch": 0.8030656282553698, "grad_norm": 9.360610008239746, "learning_rate": 9.450375006981283e-07, "loss": 0.2535, "step": 16189 }, { "epoch": 0.8031152338905699, "grad_norm": 7.472098350524902, "learning_rate": 9.445770585444508e-07, "loss": 0.3055, "step": 16190 }, { "epoch": 0.8031648395257701, "grad_norm": 4.511664867401123, "learning_rate": 9.441167168859872e-07, "loss": 0.2797, "step": 16191 }, { "epoch": 0.8032144451609703, "grad_norm": 6.138382434844971, "learning_rate": 9.436564757341459e-07, "loss": 0.2245, "step": 16192 }, { "epoch": 0.8032640507961705, "grad_norm": 4.397697925567627, "learning_rate": 9.431963351003321e-07, "loss": 0.2231, "step": 16193 }, { "epoch": 0.8033136564313706, "grad_norm": 16.420930862426758, "learning_rate": 9.427362949959478e-07, "loss": 0.4277, "step": 16194 }, { "epoch": 0.8033632620665707, "grad_norm": 6.6907267570495605, "learning_rate": 9.422763554323939e-07, "loss": 0.3276, "step": 16195 }, { "epoch": 0.8034128677017709, "grad_norm": 4.138330936431885, "learning_rate": 9.418165164210686e-07, "loss": 0.1883, "step": 16196 }, { "epoch": 0.8034624733369711, "grad_norm": 7.867049694061279, "learning_rate": 9.413567779733645e-07, "loss": 0.2943, "step": 16197 }, { "epoch": 0.8035120789721713, "grad_norm": 22.860862731933594, "learning_rate": 9.408971401006755e-07, "loss": 0.4209, "step": 16198 }, { "epoch": 0.8035616846073714, "grad_norm": 5.646994590759277, "learning_rate": 9.404376028143913e-07, "loss": 0.2122, "step": 16199 }, { "epoch": 0.8036112902425716, "grad_norm": 14.908166885375977, "learning_rate": 9.399781661258995e-07, "loss": 0.2839, "step": 16200 }, { "epoch": 0.8036608958777717, "grad_norm": 8.536449432373047, "learning_rate": 9.395188300465852e-07, "loss": 0.2553, "step": 16201 }, { "epoch": 0.8037105015129719, "grad_norm": 5.553345203399658, "learning_rate": 9.390595945878317e-07, "loss": 0.2404, "step": 16202 }, { "epoch": 0.803760107148172, "grad_norm": 6.606256008148193, "learning_rate": 9.386004597610187e-07, "loss": 0.3059, "step": 16203 }, { "epoch": 0.8038097127833722, "grad_norm": 11.183846473693848, "learning_rate": 9.381414255775228e-07, "loss": 0.3321, "step": 16204 }, { "epoch": 0.8038593184185724, "grad_norm": 5.145853519439697, "learning_rate": 9.376824920487199e-07, "loss": 0.206, "step": 16205 }, { "epoch": 0.8039089240537725, "grad_norm": 6.416974067687988, "learning_rate": 9.372236591859823e-07, "loss": 0.2839, "step": 16206 }, { "epoch": 0.8039585296889726, "grad_norm": 5.486724376678467, "learning_rate": 9.367649270006801e-07, "loss": 0.1885, "step": 16207 }, { "epoch": 0.8040081353241728, "grad_norm": 11.978948593139648, "learning_rate": 9.363062955041813e-07, "loss": 0.5283, "step": 16208 }, { "epoch": 0.804057740959373, "grad_norm": 7.26023006439209, "learning_rate": 9.358477647078507e-07, "loss": 0.3469, "step": 16209 }, { "epoch": 0.8041073465945732, "grad_norm": 7.7555084228515625, "learning_rate": 9.353893346230503e-07, "loss": 0.3336, "step": 16210 }, { "epoch": 0.8041569522297733, "grad_norm": 3.713676929473877, "learning_rate": 9.349310052611427e-07, "loss": 0.2329, "step": 16211 }, { "epoch": 0.8042065578649734, "grad_norm": 7.406009197235107, "learning_rate": 9.34472776633481e-07, "loss": 0.2533, "step": 16212 }, { "epoch": 0.8042561635001736, "grad_norm": 6.948089122772217, "learning_rate": 9.340146487514251e-07, "loss": 0.2333, "step": 16213 }, { "epoch": 0.8043057691353738, "grad_norm": 5.356306076049805, "learning_rate": 9.335566216263242e-07, "loss": 0.2101, "step": 16214 }, { "epoch": 0.804355374770574, "grad_norm": 8.498551368713379, "learning_rate": 9.330986952695304e-07, "loss": 0.3929, "step": 16215 }, { "epoch": 0.8044049804057741, "grad_norm": 9.61155891418457, "learning_rate": 9.3264086969239e-07, "loss": 0.3864, "step": 16216 }, { "epoch": 0.8044545860409743, "grad_norm": 7.151148319244385, "learning_rate": 9.321831449062497e-07, "loss": 0.2422, "step": 16217 }, { "epoch": 0.8045041916761744, "grad_norm": 6.466748237609863, "learning_rate": 9.317255209224491e-07, "loss": 0.2859, "step": 16218 }, { "epoch": 0.8045537973113746, "grad_norm": 5.223620891571045, "learning_rate": 9.312679977523326e-07, "loss": 0.2138, "step": 16219 }, { "epoch": 0.8046034029465747, "grad_norm": 14.857125282287598, "learning_rate": 9.30810575407235e-07, "loss": 0.3238, "step": 16220 }, { "epoch": 0.8046530085817749, "grad_norm": 3.6927995681762695, "learning_rate": 9.303532538984911e-07, "loss": 0.1918, "step": 16221 }, { "epoch": 0.8047026142169751, "grad_norm": 14.299338340759277, "learning_rate": 9.298960332374352e-07, "loss": 0.324, "step": 16222 }, { "epoch": 0.8047522198521752, "grad_norm": 10.883392333984375, "learning_rate": 9.294389134353965e-07, "loss": 0.2948, "step": 16223 }, { "epoch": 0.8048018254873753, "grad_norm": 11.257235527038574, "learning_rate": 9.289818945037026e-07, "loss": 0.3004, "step": 16224 }, { "epoch": 0.8048514311225755, "grad_norm": 9.452537536621094, "learning_rate": 9.285249764536802e-07, "loss": 0.2632, "step": 16225 }, { "epoch": 0.8049010367577757, "grad_norm": 6.879701614379883, "learning_rate": 9.280681592966484e-07, "loss": 0.188, "step": 16226 }, { "epoch": 0.8049506423929759, "grad_norm": 9.259279251098633, "learning_rate": 9.276114430439315e-07, "loss": 0.3281, "step": 16227 }, { "epoch": 0.805000248028176, "grad_norm": 5.59070348739624, "learning_rate": 9.271548277068443e-07, "loss": 0.1571, "step": 16228 }, { "epoch": 0.8050498536633761, "grad_norm": 4.3036789894104, "learning_rate": 9.266983132967028e-07, "loss": 0.241, "step": 16229 }, { "epoch": 0.8050994592985763, "grad_norm": 6.804757118225098, "learning_rate": 9.262418998248196e-07, "loss": 0.2424, "step": 16230 }, { "epoch": 0.8051490649337765, "grad_norm": 6.164713382720947, "learning_rate": 9.257855873025062e-07, "loss": 0.2939, "step": 16231 }, { "epoch": 0.8051986705689766, "grad_norm": 6.538158416748047, "learning_rate": 9.253293757410664e-07, "loss": 0.223, "step": 16232 }, { "epoch": 0.8052482762041768, "grad_norm": 5.990468502044678, "learning_rate": 9.248732651518106e-07, "loss": 0.2736, "step": 16233 }, { "epoch": 0.805297881839377, "grad_norm": 16.03607749938965, "learning_rate": 9.244172555460374e-07, "loss": 0.544, "step": 16234 }, { "epoch": 0.8053474874745771, "grad_norm": 16.489927291870117, "learning_rate": 9.239613469350478e-07, "loss": 0.3565, "step": 16235 }, { "epoch": 0.8053970931097773, "grad_norm": 6.7880988121032715, "learning_rate": 9.235055393301401e-07, "loss": 0.2762, "step": 16236 }, { "epoch": 0.8054466987449774, "grad_norm": 8.757824897766113, "learning_rate": 9.230498327426102e-07, "loss": 0.2185, "step": 16237 }, { "epoch": 0.8054963043801776, "grad_norm": 12.412262916564941, "learning_rate": 9.225942271837468e-07, "loss": 0.4324, "step": 16238 }, { "epoch": 0.8055459100153778, "grad_norm": 4.7937164306640625, "learning_rate": 9.221387226648459e-07, "loss": 0.2342, "step": 16239 }, { "epoch": 0.8055955156505779, "grad_norm": 4.092941761016846, "learning_rate": 9.216833191971908e-07, "loss": 0.2188, "step": 16240 }, { "epoch": 0.805645121285778, "grad_norm": 7.080353260040283, "learning_rate": 9.212280167920679e-07, "loss": 0.3012, "step": 16241 }, { "epoch": 0.8056947269209782, "grad_norm": 3.8986570835113525, "learning_rate": 9.207728154607592e-07, "loss": 0.1653, "step": 16242 }, { "epoch": 0.8057443325561784, "grad_norm": 11.132038116455078, "learning_rate": 9.203177152145459e-07, "loss": 0.3577, "step": 16243 }, { "epoch": 0.8057939381913786, "grad_norm": 10.45143985748291, "learning_rate": 9.19862716064705e-07, "loss": 0.375, "step": 16244 }, { "epoch": 0.8058435438265787, "grad_norm": 15.553388595581055, "learning_rate": 9.19407818022513e-07, "loss": 0.4013, "step": 16245 }, { "epoch": 0.8058931494617788, "grad_norm": 5.233071327209473, "learning_rate": 9.189530210992387e-07, "loss": 0.2632, "step": 16246 }, { "epoch": 0.805942755096979, "grad_norm": 20.102407455444336, "learning_rate": 9.184983253061564e-07, "loss": 0.4461, "step": 16247 }, { "epoch": 0.8059923607321792, "grad_norm": 9.081986427307129, "learning_rate": 9.180437306545314e-07, "loss": 0.2529, "step": 16248 }, { "epoch": 0.8060419663673793, "grad_norm": 5.3193039894104, "learning_rate": 9.175892371556289e-07, "loss": 0.2358, "step": 16249 }, { "epoch": 0.8060915720025795, "grad_norm": 5.866185665130615, "learning_rate": 9.171348448207118e-07, "loss": 0.2828, "step": 16250 }, { "epoch": 0.8061411776377797, "grad_norm": 5.04028844833374, "learning_rate": 9.166805536610401e-07, "loss": 0.2543, "step": 16251 }, { "epoch": 0.8061907832729798, "grad_norm": 9.008773803710938, "learning_rate": 9.162263636878716e-07, "loss": 0.2478, "step": 16252 }, { "epoch": 0.80624038890818, "grad_norm": 7.121656894683838, "learning_rate": 9.15772274912462e-07, "loss": 0.3529, "step": 16253 }, { "epoch": 0.8062899945433801, "grad_norm": 8.867268562316895, "learning_rate": 9.153182873460615e-07, "loss": 0.2894, "step": 16254 }, { "epoch": 0.8063396001785803, "grad_norm": 5.578839302062988, "learning_rate": 9.148644009999219e-07, "loss": 0.2065, "step": 16255 }, { "epoch": 0.8063892058137805, "grad_norm": 7.103087425231934, "learning_rate": 9.144106158852905e-07, "loss": 0.3301, "step": 16256 }, { "epoch": 0.8064388114489806, "grad_norm": 5.9923834800720215, "learning_rate": 9.139569320134117e-07, "loss": 0.3118, "step": 16257 }, { "epoch": 0.8064884170841807, "grad_norm": 6.228634357452393, "learning_rate": 9.135033493955287e-07, "loss": 0.207, "step": 16258 }, { "epoch": 0.8065380227193809, "grad_norm": 4.625569820404053, "learning_rate": 9.130498680428812e-07, "loss": 0.2425, "step": 16259 }, { "epoch": 0.8065876283545811, "grad_norm": 5.7620038986206055, "learning_rate": 9.125964879667071e-07, "loss": 0.3175, "step": 16260 }, { "epoch": 0.8066372339897813, "grad_norm": 6.697577953338623, "learning_rate": 9.1214320917824e-07, "loss": 0.2498, "step": 16261 }, { "epoch": 0.8066868396249814, "grad_norm": 6.500033378601074, "learning_rate": 9.116900316887156e-07, "loss": 0.3641, "step": 16262 }, { "epoch": 0.8067364452601815, "grad_norm": 5.8515167236328125, "learning_rate": 9.112369555093597e-07, "loss": 0.3534, "step": 16263 }, { "epoch": 0.8067860508953817, "grad_norm": 8.567347526550293, "learning_rate": 9.107839806514018e-07, "loss": 0.3824, "step": 16264 }, { "epoch": 0.8068356565305819, "grad_norm": 4.088231086730957, "learning_rate": 9.103311071260667e-07, "loss": 0.1348, "step": 16265 }, { "epoch": 0.806885262165782, "grad_norm": 11.096065521240234, "learning_rate": 9.098783349445767e-07, "loss": 0.2317, "step": 16266 }, { "epoch": 0.8069348678009822, "grad_norm": 4.438343048095703, "learning_rate": 9.094256641181515e-07, "loss": 0.2905, "step": 16267 }, { "epoch": 0.8069844734361824, "grad_norm": 8.290349960327148, "learning_rate": 9.089730946580099e-07, "loss": 0.2653, "step": 16268 }, { "epoch": 0.8070340790713825, "grad_norm": 5.909565448760986, "learning_rate": 9.085206265753633e-07, "loss": 0.3078, "step": 16269 }, { "epoch": 0.8070836847065826, "grad_norm": 10.83862590789795, "learning_rate": 9.080682598814289e-07, "loss": 0.358, "step": 16270 }, { "epoch": 0.8071332903417828, "grad_norm": 7.614812850952148, "learning_rate": 9.076159945874125e-07, "loss": 0.2351, "step": 16271 }, { "epoch": 0.807182895976983, "grad_norm": 7.779858112335205, "learning_rate": 9.071638307045233e-07, "loss": 0.2047, "step": 16272 }, { "epoch": 0.8072325016121832, "grad_norm": 5.314227104187012, "learning_rate": 9.067117682439652e-07, "loss": 0.3151, "step": 16273 }, { "epoch": 0.8072821072473833, "grad_norm": 6.900272369384766, "learning_rate": 9.062598072169426e-07, "loss": 0.2344, "step": 16274 }, { "epoch": 0.8073317128825834, "grad_norm": 6.579193115234375, "learning_rate": 9.058079476346515e-07, "loss": 0.1673, "step": 16275 }, { "epoch": 0.8073813185177836, "grad_norm": 4.721131801605225, "learning_rate": 9.05356189508294e-07, "loss": 0.2387, "step": 16276 }, { "epoch": 0.8074309241529838, "grad_norm": 5.098461151123047, "learning_rate": 9.049045328490596e-07, "loss": 0.193, "step": 16277 }, { "epoch": 0.807480529788184, "grad_norm": 5.269759654998779, "learning_rate": 9.04452977668146e-07, "loss": 0.2717, "step": 16278 }, { "epoch": 0.8075301354233841, "grad_norm": 6.90714168548584, "learning_rate": 9.040015239767385e-07, "loss": 0.2577, "step": 16279 }, { "epoch": 0.8075797410585842, "grad_norm": 4.491454124450684, "learning_rate": 9.035501717860268e-07, "loss": 0.2315, "step": 16280 }, { "epoch": 0.8076293466937844, "grad_norm": 16.578100204467773, "learning_rate": 9.030989211071944e-07, "loss": 0.2894, "step": 16281 }, { "epoch": 0.8076789523289846, "grad_norm": 5.8446574211120605, "learning_rate": 9.026477719514254e-07, "loss": 0.2701, "step": 16282 }, { "epoch": 0.8077285579641847, "grad_norm": 4.688518047332764, "learning_rate": 9.021967243298952e-07, "loss": 0.2327, "step": 16283 }, { "epoch": 0.8077781635993849, "grad_norm": 8.796192169189453, "learning_rate": 9.017457782537864e-07, "loss": 0.2961, "step": 16284 }, { "epoch": 0.8078277692345851, "grad_norm": 4.96268892288208, "learning_rate": 9.012949337342702e-07, "loss": 0.2503, "step": 16285 }, { "epoch": 0.8078773748697852, "grad_norm": 6.726522445678711, "learning_rate": 9.008441907825194e-07, "loss": 0.2612, "step": 16286 }, { "epoch": 0.8079269805049853, "grad_norm": 7.702664375305176, "learning_rate": 9.003935494097033e-07, "loss": 0.3825, "step": 16287 }, { "epoch": 0.8079765861401855, "grad_norm": 6.409157752990723, "learning_rate": 8.999430096269913e-07, "loss": 0.3408, "step": 16288 }, { "epoch": 0.8080261917753857, "grad_norm": 5.3183088302612305, "learning_rate": 8.994925714455439e-07, "loss": 0.2836, "step": 16289 }, { "epoch": 0.8080757974105859, "grad_norm": 7.3840436935424805, "learning_rate": 8.990422348765277e-07, "loss": 0.3406, "step": 16290 }, { "epoch": 0.808125403045786, "grad_norm": 7.507861614227295, "learning_rate": 8.985919999310983e-07, "loss": 0.287, "step": 16291 }, { "epoch": 0.8081750086809861, "grad_norm": 4.024639129638672, "learning_rate": 8.981418666204144e-07, "loss": 0.2688, "step": 16292 }, { "epoch": 0.8082246143161863, "grad_norm": 6.831450462341309, "learning_rate": 8.976918349556313e-07, "loss": 0.3839, "step": 16293 }, { "epoch": 0.8082742199513865, "grad_norm": 10.890182495117188, "learning_rate": 8.972419049479008e-07, "loss": 0.4432, "step": 16294 }, { "epoch": 0.8083238255865867, "grad_norm": 8.096753120422363, "learning_rate": 8.967920766083699e-07, "loss": 0.3002, "step": 16295 }, { "epoch": 0.8083734312217868, "grad_norm": 6.27682638168335, "learning_rate": 8.963423499481893e-07, "loss": 0.2327, "step": 16296 }, { "epoch": 0.8084230368569869, "grad_norm": 8.795989036560059, "learning_rate": 8.958927249784993e-07, "loss": 0.3677, "step": 16297 }, { "epoch": 0.8084726424921871, "grad_norm": 9.913710594177246, "learning_rate": 8.954432017104464e-07, "loss": 0.3146, "step": 16298 }, { "epoch": 0.8085222481273873, "grad_norm": 10.817934036254883, "learning_rate": 8.949937801551667e-07, "loss": 0.4039, "step": 16299 }, { "epoch": 0.8085718537625874, "grad_norm": 5.1145219802856445, "learning_rate": 8.945444603237979e-07, "loss": 0.2712, "step": 16300 }, { "epoch": 0.8086214593977876, "grad_norm": 13.428095817565918, "learning_rate": 8.94095242227474e-07, "loss": 0.4105, "step": 16301 }, { "epoch": 0.8086710650329878, "grad_norm": 7.87785530090332, "learning_rate": 8.936461258773293e-07, "loss": 0.25, "step": 16302 }, { "epoch": 0.8087206706681879, "grad_norm": 4.093491554260254, "learning_rate": 8.931971112844884e-07, "loss": 0.247, "step": 16303 }, { "epoch": 0.808770276303388, "grad_norm": 9.208223342895508, "learning_rate": 8.927481984600833e-07, "loss": 0.2071, "step": 16304 }, { "epoch": 0.8088198819385882, "grad_norm": 5.790622234344482, "learning_rate": 8.922993874152341e-07, "loss": 0.2649, "step": 16305 }, { "epoch": 0.8088694875737884, "grad_norm": 11.256834983825684, "learning_rate": 8.918506781610647e-07, "loss": 0.4066, "step": 16306 }, { "epoch": 0.8089190932089886, "grad_norm": 8.208189010620117, "learning_rate": 8.914020707086935e-07, "loss": 0.2293, "step": 16307 }, { "epoch": 0.8089686988441886, "grad_norm": 11.731048583984375, "learning_rate": 8.909535650692375e-07, "loss": 0.32, "step": 16308 }, { "epoch": 0.8090183044793888, "grad_norm": 4.037943363189697, "learning_rate": 8.905051612538113e-07, "loss": 0.2791, "step": 16309 }, { "epoch": 0.809067910114589, "grad_norm": 6.248701572418213, "learning_rate": 8.900568592735253e-07, "loss": 0.2673, "step": 16310 }, { "epoch": 0.8091175157497892, "grad_norm": 9.67138957977295, "learning_rate": 8.896086591394915e-07, "loss": 0.28, "step": 16311 }, { "epoch": 0.8091671213849894, "grad_norm": 4.249716281890869, "learning_rate": 8.891605608628129e-07, "loss": 0.2672, "step": 16312 }, { "epoch": 0.8092167270201895, "grad_norm": 9.546774864196777, "learning_rate": 8.887125644545952e-07, "loss": 0.2803, "step": 16313 }, { "epoch": 0.8092663326553896, "grad_norm": 7.821985244750977, "learning_rate": 8.882646699259401e-07, "loss": 0.3955, "step": 16314 }, { "epoch": 0.8093159382905898, "grad_norm": 11.562128067016602, "learning_rate": 8.878168772879459e-07, "loss": 0.3484, "step": 16315 }, { "epoch": 0.80936554392579, "grad_norm": 5.659371852874756, "learning_rate": 8.873691865517098e-07, "loss": 0.3188, "step": 16316 }, { "epoch": 0.8094151495609901, "grad_norm": 9.747150421142578, "learning_rate": 8.869215977283258e-07, "loss": 0.2911, "step": 16317 }, { "epoch": 0.8094647551961903, "grad_norm": 5.466671943664551, "learning_rate": 8.86474110828885e-07, "loss": 0.2982, "step": 16318 }, { "epoch": 0.8095143608313905, "grad_norm": 6.617627143859863, "learning_rate": 8.860267258644778e-07, "loss": 0.3552, "step": 16319 }, { "epoch": 0.8095639664665906, "grad_norm": 18.968400955200195, "learning_rate": 8.855794428461867e-07, "loss": 0.4186, "step": 16320 }, { "epoch": 0.8096135721017907, "grad_norm": 6.1898651123046875, "learning_rate": 8.85132261785101e-07, "loss": 0.3045, "step": 16321 }, { "epoch": 0.8096631777369909, "grad_norm": 4.940182685852051, "learning_rate": 8.846851826922975e-07, "loss": 0.2078, "step": 16322 }, { "epoch": 0.8097127833721911, "grad_norm": 23.413862228393555, "learning_rate": 8.842382055788573e-07, "loss": 0.4395, "step": 16323 }, { "epoch": 0.8097623890073913, "grad_norm": 5.693296432495117, "learning_rate": 8.837913304558555e-07, "loss": 0.3483, "step": 16324 }, { "epoch": 0.8098119946425913, "grad_norm": 4.439686298370361, "learning_rate": 8.83344557334368e-07, "loss": 0.2163, "step": 16325 }, { "epoch": 0.8098616002777915, "grad_norm": 7.957606315612793, "learning_rate": 8.828978862254623e-07, "loss": 0.3605, "step": 16326 }, { "epoch": 0.8099112059129917, "grad_norm": 9.136099815368652, "learning_rate": 8.824513171402116e-07, "loss": 0.2701, "step": 16327 }, { "epoch": 0.8099608115481919, "grad_norm": 7.593206882476807, "learning_rate": 8.820048500896782e-07, "loss": 0.2414, "step": 16328 }, { "epoch": 0.8100104171833921, "grad_norm": 5.317447185516357, "learning_rate": 8.815584850849279e-07, "loss": 0.2361, "step": 16329 }, { "epoch": 0.8100600228185922, "grad_norm": 11.694700241088867, "learning_rate": 8.811122221370211e-07, "loss": 0.429, "step": 16330 }, { "epoch": 0.8101096284537923, "grad_norm": 4.480686187744141, "learning_rate": 8.806660612570167e-07, "loss": 0.1973, "step": 16331 }, { "epoch": 0.8101592340889925, "grad_norm": 13.548550605773926, "learning_rate": 8.80220002455971e-07, "loss": 0.3457, "step": 16332 }, { "epoch": 0.8102088397241927, "grad_norm": 5.9782795906066895, "learning_rate": 8.797740457449377e-07, "loss": 0.3125, "step": 16333 }, { "epoch": 0.8102584453593928, "grad_norm": 12.260634422302246, "learning_rate": 8.793281911349655e-07, "loss": 0.3863, "step": 16334 }, { "epoch": 0.810308050994593, "grad_norm": 11.731826782226562, "learning_rate": 8.788824386371069e-07, "loss": 0.3663, "step": 16335 }, { "epoch": 0.8103576566297932, "grad_norm": 7.1051859855651855, "learning_rate": 8.784367882624045e-07, "loss": 0.2968, "step": 16336 }, { "epoch": 0.8104072622649933, "grad_norm": 8.235369682312012, "learning_rate": 8.779912400219032e-07, "loss": 0.2518, "step": 16337 }, { "epoch": 0.8104568679001934, "grad_norm": 9.667095184326172, "learning_rate": 8.77545793926643e-07, "loss": 0.2394, "step": 16338 }, { "epoch": 0.8105064735353936, "grad_norm": 7.477657794952393, "learning_rate": 8.77100449987664e-07, "loss": 0.2826, "step": 16339 }, { "epoch": 0.8105560791705938, "grad_norm": 4.038305759429932, "learning_rate": 8.766552082159991e-07, "loss": 0.1489, "step": 16340 }, { "epoch": 0.810605684805794, "grad_norm": 11.236947059631348, "learning_rate": 8.762100686226849e-07, "loss": 0.3644, "step": 16341 }, { "epoch": 0.810655290440994, "grad_norm": 15.853628158569336, "learning_rate": 8.757650312187499e-07, "loss": 0.344, "step": 16342 }, { "epoch": 0.8107048960761942, "grad_norm": 8.785587310791016, "learning_rate": 8.75320096015223e-07, "loss": 0.3503, "step": 16343 }, { "epoch": 0.8107545017113944, "grad_norm": 8.630926132202148, "learning_rate": 8.7487526302313e-07, "loss": 0.2842, "step": 16344 }, { "epoch": 0.8108041073465946, "grad_norm": 5.280241012573242, "learning_rate": 8.744305322534946e-07, "loss": 0.3334, "step": 16345 }, { "epoch": 0.8108537129817948, "grad_norm": 13.93093204498291, "learning_rate": 8.739859037173349e-07, "loss": 0.3184, "step": 16346 }, { "epoch": 0.8109033186169949, "grad_norm": 4.405747413635254, "learning_rate": 8.735413774256735e-07, "loss": 0.1405, "step": 16347 }, { "epoch": 0.810952924252195, "grad_norm": 8.576274871826172, "learning_rate": 8.730969533895206e-07, "loss": 0.3084, "step": 16348 }, { "epoch": 0.8110025298873952, "grad_norm": 7.509439468383789, "learning_rate": 8.726526316198946e-07, "loss": 0.2822, "step": 16349 }, { "epoch": 0.8110521355225954, "grad_norm": 5.532066822052002, "learning_rate": 8.722084121278018e-07, "loss": 0.2771, "step": 16350 }, { "epoch": 0.8111017411577955, "grad_norm": 11.648731231689453, "learning_rate": 8.717642949242522e-07, "loss": 0.2781, "step": 16351 }, { "epoch": 0.8111513467929957, "grad_norm": 6.493833541870117, "learning_rate": 8.713202800202509e-07, "loss": 0.269, "step": 16352 }, { "epoch": 0.8112009524281959, "grad_norm": 5.583858489990234, "learning_rate": 8.708763674268012e-07, "loss": 0.3284, "step": 16353 }, { "epoch": 0.811250558063396, "grad_norm": 9.635909080505371, "learning_rate": 8.70432557154901e-07, "loss": 0.2639, "step": 16354 }, { "epoch": 0.8113001636985961, "grad_norm": 7.050295352935791, "learning_rate": 8.699888492155522e-07, "loss": 0.3378, "step": 16355 }, { "epoch": 0.8113497693337963, "grad_norm": 5.3002142906188965, "learning_rate": 8.695452436197471e-07, "loss": 0.2667, "step": 16356 }, { "epoch": 0.8113993749689965, "grad_norm": 6.147171974182129, "learning_rate": 8.69101740378479e-07, "loss": 0.271, "step": 16357 }, { "epoch": 0.8114489806041967, "grad_norm": 5.211670398712158, "learning_rate": 8.686583395027381e-07, "loss": 0.2973, "step": 16358 }, { "epoch": 0.8114985862393967, "grad_norm": 6.37416410446167, "learning_rate": 8.682150410035128e-07, "loss": 0.2976, "step": 16359 }, { "epoch": 0.8115481918745969, "grad_norm": 8.410940170288086, "learning_rate": 8.677718448917876e-07, "loss": 0.2733, "step": 16360 }, { "epoch": 0.8115977975097971, "grad_norm": 8.899688720703125, "learning_rate": 8.673287511785467e-07, "loss": 0.324, "step": 16361 }, { "epoch": 0.8116474031449973, "grad_norm": 7.4851202964782715, "learning_rate": 8.668857598747677e-07, "loss": 0.3089, "step": 16362 }, { "epoch": 0.8116970087801975, "grad_norm": 8.499402046203613, "learning_rate": 8.664428709914285e-07, "loss": 0.39, "step": 16363 }, { "epoch": 0.8117466144153976, "grad_norm": 4.897403240203857, "learning_rate": 8.66000084539505e-07, "loss": 0.2115, "step": 16364 }, { "epoch": 0.8117962200505977, "grad_norm": 7.3288679122924805, "learning_rate": 8.6555740052997e-07, "loss": 0.1766, "step": 16365 }, { "epoch": 0.8118458256857979, "grad_norm": 11.035696983337402, "learning_rate": 8.651148189737923e-07, "loss": 0.3167, "step": 16366 }, { "epoch": 0.8118954313209981, "grad_norm": 26.410850524902344, "learning_rate": 8.646723398819396e-07, "loss": 0.4854, "step": 16367 }, { "epoch": 0.8119450369561982, "grad_norm": 5.848120212554932, "learning_rate": 8.642299632653772e-07, "loss": 0.2516, "step": 16368 }, { "epoch": 0.8119946425913984, "grad_norm": 8.399893760681152, "learning_rate": 8.637876891350671e-07, "loss": 0.259, "step": 16369 }, { "epoch": 0.8120442482265985, "grad_norm": 4.650007724761963, "learning_rate": 8.633455175019706e-07, "loss": 0.3078, "step": 16370 }, { "epoch": 0.8120938538617987, "grad_norm": 8.43085765838623, "learning_rate": 8.62903448377042e-07, "loss": 0.3436, "step": 16371 }, { "epoch": 0.8121434594969988, "grad_norm": 13.676044464111328, "learning_rate": 8.624614817712373e-07, "loss": 0.2985, "step": 16372 }, { "epoch": 0.812193065132199, "grad_norm": 14.768702507019043, "learning_rate": 8.620196176955087e-07, "loss": 0.3167, "step": 16373 }, { "epoch": 0.8122426707673992, "grad_norm": 6.010178089141846, "learning_rate": 8.615778561608057e-07, "loss": 0.2017, "step": 16374 }, { "epoch": 0.8122922764025994, "grad_norm": 24.742481231689453, "learning_rate": 8.611361971780758e-07, "loss": 0.3539, "step": 16375 }, { "epoch": 0.8123418820377994, "grad_norm": 7.672418117523193, "learning_rate": 8.606946407582639e-07, "loss": 0.2633, "step": 16376 }, { "epoch": 0.8123914876729996, "grad_norm": 5.3782219886779785, "learning_rate": 8.602531869123088e-07, "loss": 0.287, "step": 16377 }, { "epoch": 0.8124410933081998, "grad_norm": 5.020875453948975, "learning_rate": 8.598118356511547e-07, "loss": 0.2531, "step": 16378 }, { "epoch": 0.8124906989434, "grad_norm": 5.857989311218262, "learning_rate": 8.593705869857356e-07, "loss": 0.2575, "step": 16379 }, { "epoch": 0.8125403045786002, "grad_norm": 7.667396068572998, "learning_rate": 8.589294409269855e-07, "loss": 0.3378, "step": 16380 }, { "epoch": 0.8125899102138003, "grad_norm": 5.970874786376953, "learning_rate": 8.584883974858376e-07, "loss": 0.31, "step": 16381 }, { "epoch": 0.8126395158490004, "grad_norm": 3.2546157836914062, "learning_rate": 8.580474566732216e-07, "loss": 0.1675, "step": 16382 }, { "epoch": 0.8126891214842006, "grad_norm": 4.731687068939209, "learning_rate": 8.576066185000609e-07, "loss": 0.1333, "step": 16383 }, { "epoch": 0.8127387271194008, "grad_norm": 4.980350017547607, "learning_rate": 8.571658829772844e-07, "loss": 0.2571, "step": 16384 }, { "epoch": 0.8127883327546009, "grad_norm": 6.325122833251953, "learning_rate": 8.567252501158091e-07, "loss": 0.2504, "step": 16385 }, { "epoch": 0.8128379383898011, "grad_norm": 7.843670845031738, "learning_rate": 8.562847199265583e-07, "loss": 0.2813, "step": 16386 }, { "epoch": 0.8128875440250012, "grad_norm": 4.778640270233154, "learning_rate": 8.558442924204457e-07, "loss": 0.2367, "step": 16387 }, { "epoch": 0.8129371496602014, "grad_norm": 6.386848449707031, "learning_rate": 8.554039676083858e-07, "loss": 0.2656, "step": 16388 }, { "epoch": 0.8129867552954015, "grad_norm": 10.081724166870117, "learning_rate": 8.54963745501291e-07, "loss": 0.3166, "step": 16389 }, { "epoch": 0.8130363609306017, "grad_norm": 4.786495685577393, "learning_rate": 8.545236261100703e-07, "loss": 0.2426, "step": 16390 }, { "epoch": 0.8130859665658019, "grad_norm": 6.809844970703125, "learning_rate": 8.540836094456267e-07, "loss": 0.3201, "step": 16391 }, { "epoch": 0.8131355722010021, "grad_norm": 4.9938530921936035, "learning_rate": 8.536436955188698e-07, "loss": 0.2466, "step": 16392 }, { "epoch": 0.8131851778362021, "grad_norm": 10.57382869720459, "learning_rate": 8.532038843406959e-07, "loss": 0.3009, "step": 16393 }, { "epoch": 0.8132347834714023, "grad_norm": 6.054563045501709, "learning_rate": 8.527641759220057e-07, "loss": 0.3056, "step": 16394 }, { "epoch": 0.8132843891066025, "grad_norm": 10.60114574432373, "learning_rate": 8.523245702736955e-07, "loss": 0.4016, "step": 16395 }, { "epoch": 0.8133339947418027, "grad_norm": 4.039577960968018, "learning_rate": 8.518850674066598e-07, "loss": 0.2584, "step": 16396 }, { "epoch": 0.8133836003770029, "grad_norm": 5.709846496582031, "learning_rate": 8.514456673317855e-07, "loss": 0.2468, "step": 16397 }, { "epoch": 0.813433206012203, "grad_norm": 5.157979965209961, "learning_rate": 8.510063700599674e-07, "loss": 0.2839, "step": 16398 }, { "epoch": 0.8134828116474031, "grad_norm": 16.915184020996094, "learning_rate": 8.505671756020867e-07, "loss": 0.4196, "step": 16399 }, { "epoch": 0.8135324172826033, "grad_norm": 6.65925407409668, "learning_rate": 8.501280839690285e-07, "loss": 0.2581, "step": 16400 }, { "epoch": 0.8135820229178035, "grad_norm": 5.30044412612915, "learning_rate": 8.496890951716735e-07, "loss": 0.2856, "step": 16401 }, { "epoch": 0.8136316285530036, "grad_norm": 11.363941192626953, "learning_rate": 8.492502092208998e-07, "loss": 0.3424, "step": 16402 }, { "epoch": 0.8136812341882038, "grad_norm": 5.499676704406738, "learning_rate": 8.488114261275837e-07, "loss": 0.2997, "step": 16403 }, { "epoch": 0.8137308398234039, "grad_norm": 4.122587203979492, "learning_rate": 8.483727459025998e-07, "loss": 0.179, "step": 16404 }, { "epoch": 0.8137804454586041, "grad_norm": 6.0851054191589355, "learning_rate": 8.479341685568143e-07, "loss": 0.3107, "step": 16405 }, { "epoch": 0.8138300510938042, "grad_norm": 11.772150993347168, "learning_rate": 8.474956941011009e-07, "loss": 0.3206, "step": 16406 }, { "epoch": 0.8138796567290044, "grad_norm": 11.694184303283691, "learning_rate": 8.470573225463213e-07, "loss": 0.3116, "step": 16407 }, { "epoch": 0.8139292623642046, "grad_norm": 7.867792129516602, "learning_rate": 8.466190539033404e-07, "loss": 0.3825, "step": 16408 }, { "epoch": 0.8139788679994048, "grad_norm": 5.587932109832764, "learning_rate": 8.461808881830174e-07, "loss": 0.2385, "step": 16409 }, { "epoch": 0.8140284736346048, "grad_norm": 4.7971978187561035, "learning_rate": 8.457428253962124e-07, "loss": 0.2917, "step": 16410 }, { "epoch": 0.814078079269805, "grad_norm": 5.215501308441162, "learning_rate": 8.453048655537771e-07, "loss": 0.3093, "step": 16411 }, { "epoch": 0.8141276849050052, "grad_norm": 10.968572616577148, "learning_rate": 8.448670086665689e-07, "loss": 0.4514, "step": 16412 }, { "epoch": 0.8141772905402054, "grad_norm": 6.503202438354492, "learning_rate": 8.444292547454347e-07, "loss": 0.266, "step": 16413 }, { "epoch": 0.8142268961754056, "grad_norm": 26.664690017700195, "learning_rate": 8.43991603801223e-07, "loss": 0.4253, "step": 16414 }, { "epoch": 0.8142765018106057, "grad_norm": 7.33685302734375, "learning_rate": 8.435540558447797e-07, "loss": 0.2832, "step": 16415 }, { "epoch": 0.8143261074458058, "grad_norm": 5.859673976898193, "learning_rate": 8.431166108869465e-07, "loss": 0.2584, "step": 16416 }, { "epoch": 0.814375713081006, "grad_norm": 13.80883502960205, "learning_rate": 8.426792689385649e-07, "loss": 0.422, "step": 16417 }, { "epoch": 0.8144253187162062, "grad_norm": 15.446252822875977, "learning_rate": 8.422420300104711e-07, "loss": 0.3911, "step": 16418 }, { "epoch": 0.8144749243514063, "grad_norm": 10.31094741821289, "learning_rate": 8.418048941135016e-07, "loss": 0.3502, "step": 16419 }, { "epoch": 0.8145245299866065, "grad_norm": 10.308298110961914, "learning_rate": 8.413678612584869e-07, "loss": 0.3145, "step": 16420 }, { "epoch": 0.8145741356218066, "grad_norm": 4.494985580444336, "learning_rate": 8.409309314562575e-07, "loss": 0.2293, "step": 16421 }, { "epoch": 0.8146237412570068, "grad_norm": 4.4169440269470215, "learning_rate": 8.404941047176413e-07, "loss": 0.2311, "step": 16422 }, { "epoch": 0.8146733468922069, "grad_norm": 10.897308349609375, "learning_rate": 8.400573810534624e-07, "loss": 0.3177, "step": 16423 }, { "epoch": 0.8147229525274071, "grad_norm": 7.203981876373291, "learning_rate": 8.396207604745432e-07, "loss": 0.2743, "step": 16424 }, { "epoch": 0.8147725581626073, "grad_norm": 5.190765380859375, "learning_rate": 8.391842429917041e-07, "loss": 0.2778, "step": 16425 }, { "epoch": 0.8148221637978075, "grad_norm": 6.61847448348999, "learning_rate": 8.387478286157613e-07, "loss": 0.1964, "step": 16426 }, { "epoch": 0.8148717694330075, "grad_norm": 9.729666709899902, "learning_rate": 8.383115173575306e-07, "loss": 0.3883, "step": 16427 }, { "epoch": 0.8149213750682077, "grad_norm": 4.8877434730529785, "learning_rate": 8.378753092278219e-07, "loss": 0.2464, "step": 16428 }, { "epoch": 0.8149709807034079, "grad_norm": 5.342201232910156, "learning_rate": 8.374392042374462e-07, "loss": 0.243, "step": 16429 }, { "epoch": 0.8150205863386081, "grad_norm": 6.11935567855835, "learning_rate": 8.370032023972097e-07, "loss": 0.2394, "step": 16430 }, { "epoch": 0.8150701919738083, "grad_norm": 8.798458099365234, "learning_rate": 8.365673037179167e-07, "loss": 0.3886, "step": 16431 }, { "epoch": 0.8151197976090084, "grad_norm": 6.723662853240967, "learning_rate": 8.361315082103694e-07, "loss": 0.311, "step": 16432 }, { "epoch": 0.8151694032442085, "grad_norm": 16.02187156677246, "learning_rate": 8.356958158853685e-07, "loss": 0.322, "step": 16433 }, { "epoch": 0.8152190088794087, "grad_norm": 12.382001876831055, "learning_rate": 8.352602267537063e-07, "loss": 0.3345, "step": 16434 }, { "epoch": 0.8152686145146089, "grad_norm": 6.025512218475342, "learning_rate": 8.348247408261817e-07, "loss": 0.3462, "step": 16435 }, { "epoch": 0.815318220149809, "grad_norm": 17.673728942871094, "learning_rate": 8.343893581135831e-07, "loss": 0.3526, "step": 16436 }, { "epoch": 0.8153678257850092, "grad_norm": 4.526487827301025, "learning_rate": 8.339540786267009e-07, "loss": 0.2399, "step": 16437 }, { "epoch": 0.8154174314202093, "grad_norm": 3.9210596084594727, "learning_rate": 8.335189023763208e-07, "loss": 0.2217, "step": 16438 }, { "epoch": 0.8154670370554095, "grad_norm": 10.614181518554688, "learning_rate": 8.330838293732269e-07, "loss": 0.2222, "step": 16439 }, { "epoch": 0.8155166426906096, "grad_norm": 15.262685775756836, "learning_rate": 8.326488596282007e-07, "loss": 0.3015, "step": 16440 }, { "epoch": 0.8155662483258098, "grad_norm": 8.713204383850098, "learning_rate": 8.322139931520223e-07, "loss": 0.2796, "step": 16441 }, { "epoch": 0.81561585396101, "grad_norm": 11.977999687194824, "learning_rate": 8.317792299554634e-07, "loss": 0.3263, "step": 16442 }, { "epoch": 0.8156654595962102, "grad_norm": 7.549305438995361, "learning_rate": 8.313445700493039e-07, "loss": 0.2364, "step": 16443 }, { "epoch": 0.8157150652314102, "grad_norm": 5.669318199157715, "learning_rate": 8.309100134443099e-07, "loss": 0.306, "step": 16444 }, { "epoch": 0.8157646708666104, "grad_norm": 5.746523380279541, "learning_rate": 8.304755601512515e-07, "loss": 0.2585, "step": 16445 }, { "epoch": 0.8158142765018106, "grad_norm": 9.46963119506836, "learning_rate": 8.300412101808952e-07, "loss": 0.203, "step": 16446 }, { "epoch": 0.8158638821370108, "grad_norm": 4.831437110900879, "learning_rate": 8.296069635440051e-07, "loss": 0.2698, "step": 16447 }, { "epoch": 0.815913487772211, "grad_norm": 8.435284614562988, "learning_rate": 8.291728202513388e-07, "loss": 0.3007, "step": 16448 }, { "epoch": 0.8159630934074111, "grad_norm": 9.22724723815918, "learning_rate": 8.287387803136587e-07, "loss": 0.3372, "step": 16449 }, { "epoch": 0.8160126990426112, "grad_norm": 11.03454875946045, "learning_rate": 8.283048437417174e-07, "loss": 0.3466, "step": 16450 }, { "epoch": 0.8160623046778114, "grad_norm": 11.667354583740234, "learning_rate": 8.278710105462689e-07, "loss": 0.3171, "step": 16451 }, { "epoch": 0.8161119103130116, "grad_norm": 5.346413612365723, "learning_rate": 8.274372807380643e-07, "loss": 0.2461, "step": 16452 }, { "epoch": 0.8161615159482117, "grad_norm": 6.313594818115234, "learning_rate": 8.270036543278526e-07, "loss": 0.2806, "step": 16453 }, { "epoch": 0.8162111215834119, "grad_norm": 6.666875839233398, "learning_rate": 8.265701313263758e-07, "loss": 0.2921, "step": 16454 }, { "epoch": 0.816260727218612, "grad_norm": 11.704919815063477, "learning_rate": 8.261367117443808e-07, "loss": 0.2562, "step": 16455 }, { "epoch": 0.8163103328538122, "grad_norm": 6.39930534362793, "learning_rate": 8.257033955926042e-07, "loss": 0.2363, "step": 16456 }, { "epoch": 0.8163599384890123, "grad_norm": 11.54876708984375, "learning_rate": 8.252701828817878e-07, "loss": 0.3001, "step": 16457 }, { "epoch": 0.8164095441242125, "grad_norm": 9.05066204071045, "learning_rate": 8.248370736226635e-07, "loss": 0.3602, "step": 16458 }, { "epoch": 0.8164591497594127, "grad_norm": 12.862936973571777, "learning_rate": 8.244040678259652e-07, "loss": 0.3273, "step": 16459 }, { "epoch": 0.8165087553946129, "grad_norm": 9.835339546203613, "learning_rate": 8.23971165502423e-07, "loss": 0.2694, "step": 16460 }, { "epoch": 0.8165583610298129, "grad_norm": 12.664724349975586, "learning_rate": 8.235383666627656e-07, "loss": 0.4517, "step": 16461 }, { "epoch": 0.8166079666650131, "grad_norm": 4.245538234710693, "learning_rate": 8.231056713177144e-07, "loss": 0.2062, "step": 16462 }, { "epoch": 0.8166575723002133, "grad_norm": 7.1464080810546875, "learning_rate": 8.226730794779963e-07, "loss": 0.2495, "step": 16463 }, { "epoch": 0.8167071779354135, "grad_norm": 7.931830406188965, "learning_rate": 8.222405911543274e-07, "loss": 0.266, "step": 16464 }, { "epoch": 0.8167567835706137, "grad_norm": 7.171144485473633, "learning_rate": 8.218082063574268e-07, "loss": 0.2478, "step": 16465 }, { "epoch": 0.8168063892058138, "grad_norm": 9.988303184509277, "learning_rate": 8.213759250980086e-07, "loss": 0.1943, "step": 16466 }, { "epoch": 0.8168559948410139, "grad_norm": 9.29581069946289, "learning_rate": 8.209437473867853e-07, "loss": 0.3833, "step": 16467 }, { "epoch": 0.8169056004762141, "grad_norm": 5.169480323791504, "learning_rate": 8.205116732344665e-07, "loss": 0.2689, "step": 16468 }, { "epoch": 0.8169552061114143, "grad_norm": 6.266747951507568, "learning_rate": 8.200797026517604e-07, "loss": 0.3046, "step": 16469 }, { "epoch": 0.8170048117466144, "grad_norm": 4.809295654296875, "learning_rate": 8.196478356493686e-07, "loss": 0.2451, "step": 16470 }, { "epoch": 0.8170544173818146, "grad_norm": 8.59589672088623, "learning_rate": 8.192160722379943e-07, "loss": 0.2969, "step": 16471 }, { "epoch": 0.8171040230170147, "grad_norm": 5.938992977142334, "learning_rate": 8.187844124283372e-07, "loss": 0.2855, "step": 16472 }, { "epoch": 0.8171536286522149, "grad_norm": 9.09764289855957, "learning_rate": 8.183528562310933e-07, "loss": 0.3504, "step": 16473 }, { "epoch": 0.817203234287415, "grad_norm": 4.461373805999756, "learning_rate": 8.179214036569578e-07, "loss": 0.1804, "step": 16474 }, { "epoch": 0.8172528399226152, "grad_norm": 8.953771591186523, "learning_rate": 8.174900547166209e-07, "loss": 0.2805, "step": 16475 }, { "epoch": 0.8173024455578154, "grad_norm": 7.46409797668457, "learning_rate": 8.170588094207727e-07, "loss": 0.2328, "step": 16476 }, { "epoch": 0.8173520511930156, "grad_norm": 9.796892166137695, "learning_rate": 8.166276677801011e-07, "loss": 0.3223, "step": 16477 }, { "epoch": 0.8174016568282156, "grad_norm": 5.719356060028076, "learning_rate": 8.161966298052865e-07, "loss": 0.2807, "step": 16478 }, { "epoch": 0.8174512624634158, "grad_norm": 8.48752498626709, "learning_rate": 8.157656955070121e-07, "loss": 0.3164, "step": 16479 }, { "epoch": 0.817500868098616, "grad_norm": 8.421636581420898, "learning_rate": 8.153348648959564e-07, "loss": 0.3387, "step": 16480 }, { "epoch": 0.8175504737338162, "grad_norm": 8.343432426452637, "learning_rate": 8.149041379827955e-07, "loss": 0.3287, "step": 16481 }, { "epoch": 0.8176000793690164, "grad_norm": 3.4241464138031006, "learning_rate": 8.144735147782034e-07, "loss": 0.2181, "step": 16482 }, { "epoch": 0.8176496850042165, "grad_norm": 16.926225662231445, "learning_rate": 8.140429952928508e-07, "loss": 0.4306, "step": 16483 }, { "epoch": 0.8176992906394166, "grad_norm": 8.05112361907959, "learning_rate": 8.136125795374078e-07, "loss": 0.2889, "step": 16484 }, { "epoch": 0.8177488962746168, "grad_norm": 6.880166530609131, "learning_rate": 8.131822675225359e-07, "loss": 0.3463, "step": 16485 }, { "epoch": 0.817798501909817, "grad_norm": 9.582306861877441, "learning_rate": 8.127520592589044e-07, "loss": 0.2659, "step": 16486 }, { "epoch": 0.8178481075450171, "grad_norm": 7.876351356506348, "learning_rate": 8.123219547571693e-07, "loss": 0.3035, "step": 16487 }, { "epoch": 0.8178977131802173, "grad_norm": 5.079440116882324, "learning_rate": 8.118919540279907e-07, "loss": 0.3048, "step": 16488 }, { "epoch": 0.8179473188154174, "grad_norm": 5.0158305168151855, "learning_rate": 8.114620570820236e-07, "loss": 0.2842, "step": 16489 }, { "epoch": 0.8179969244506176, "grad_norm": 6.341017723083496, "learning_rate": 8.110322639299228e-07, "loss": 0.2954, "step": 16490 }, { "epoch": 0.8180465300858177, "grad_norm": 6.250901699066162, "learning_rate": 8.106025745823349e-07, "loss": 0.2682, "step": 16491 }, { "epoch": 0.8180961357210179, "grad_norm": 5.572329998016357, "learning_rate": 8.101729890499127e-07, "loss": 0.2772, "step": 16492 }, { "epoch": 0.8181457413562181, "grad_norm": 4.706478118896484, "learning_rate": 8.097435073432969e-07, "loss": 0.2481, "step": 16493 }, { "epoch": 0.8181953469914183, "grad_norm": 4.851249694824219, "learning_rate": 8.09314129473135e-07, "loss": 0.2001, "step": 16494 }, { "epoch": 0.8182449526266183, "grad_norm": 25.02651596069336, "learning_rate": 8.088848554500628e-07, "loss": 0.2408, "step": 16495 }, { "epoch": 0.8182945582618185, "grad_norm": 11.725685119628906, "learning_rate": 8.084556852847203e-07, "loss": 0.3609, "step": 16496 }, { "epoch": 0.8183441638970187, "grad_norm": 12.910085678100586, "learning_rate": 8.080266189877417e-07, "loss": 0.3009, "step": 16497 }, { "epoch": 0.8183937695322189, "grad_norm": 7.722627639770508, "learning_rate": 8.075976565697607e-07, "loss": 0.2451, "step": 16498 }, { "epoch": 0.818443375167419, "grad_norm": 4.791321277618408, "learning_rate": 8.071687980414039e-07, "loss": 0.2402, "step": 16499 }, { "epoch": 0.8184929808026192, "grad_norm": 10.245950698852539, "learning_rate": 8.067400434133038e-07, "loss": 0.3645, "step": 16500 }, { "epoch": 0.8185425864378193, "grad_norm": 7.194405555725098, "learning_rate": 8.063113926960803e-07, "loss": 0.27, "step": 16501 }, { "epoch": 0.8185921920730195, "grad_norm": 6.652216911315918, "learning_rate": 8.058828459003581e-07, "loss": 0.2602, "step": 16502 }, { "epoch": 0.8186417977082197, "grad_norm": 5.4813032150268555, "learning_rate": 8.054544030367562e-07, "loss": 0.235, "step": 16503 }, { "epoch": 0.8186914033434198, "grad_norm": 5.21000862121582, "learning_rate": 8.050260641158924e-07, "loss": 0.2378, "step": 16504 }, { "epoch": 0.81874100897862, "grad_norm": 9.25662899017334, "learning_rate": 8.04597829148378e-07, "loss": 0.2181, "step": 16505 }, { "epoch": 0.8187906146138201, "grad_norm": 4.152161598205566, "learning_rate": 8.041696981448299e-07, "loss": 0.285, "step": 16506 }, { "epoch": 0.8188402202490203, "grad_norm": 4.866642951965332, "learning_rate": 8.03741671115853e-07, "loss": 0.1522, "step": 16507 }, { "epoch": 0.8188898258842204, "grad_norm": 7.12840461730957, "learning_rate": 8.033137480720559e-07, "loss": 0.3865, "step": 16508 }, { "epoch": 0.8189394315194206, "grad_norm": 11.823333740234375, "learning_rate": 8.028859290240426e-07, "loss": 0.384, "step": 16509 }, { "epoch": 0.8189890371546208, "grad_norm": 5.65138578414917, "learning_rate": 8.024582139824144e-07, "loss": 0.2834, "step": 16510 }, { "epoch": 0.819038642789821, "grad_norm": 11.708572387695312, "learning_rate": 8.020306029577707e-07, "loss": 0.3435, "step": 16511 }, { "epoch": 0.819088248425021, "grad_norm": 4.501149654388428, "learning_rate": 8.016030959607086e-07, "loss": 0.3086, "step": 16512 }, { "epoch": 0.8191378540602212, "grad_norm": 12.09151554107666, "learning_rate": 8.011756930018183e-07, "loss": 0.3104, "step": 16513 }, { "epoch": 0.8191874596954214, "grad_norm": 7.590608596801758, "learning_rate": 8.00748394091696e-07, "loss": 0.2742, "step": 16514 }, { "epoch": 0.8192370653306216, "grad_norm": 7.352795124053955, "learning_rate": 8.003211992409271e-07, "loss": 0.2642, "step": 16515 }, { "epoch": 0.8192866709658218, "grad_norm": 4.497827053070068, "learning_rate": 7.998941084600986e-07, "loss": 0.2069, "step": 16516 }, { "epoch": 0.8193362766010219, "grad_norm": 8.047414779663086, "learning_rate": 7.994671217597938e-07, "loss": 0.2826, "step": 16517 }, { "epoch": 0.819385882236222, "grad_norm": 7.330737113952637, "learning_rate": 7.990402391505952e-07, "loss": 0.3574, "step": 16518 }, { "epoch": 0.8194354878714222, "grad_norm": 12.954773902893066, "learning_rate": 7.986134606430773e-07, "loss": 0.5185, "step": 16519 }, { "epoch": 0.8194850935066224, "grad_norm": 6.249025344848633, "learning_rate": 7.981867862478209e-07, "loss": 0.286, "step": 16520 }, { "epoch": 0.8195346991418225, "grad_norm": 7.001260280609131, "learning_rate": 7.977602159753955e-07, "loss": 0.2894, "step": 16521 }, { "epoch": 0.8195843047770227, "grad_norm": 3.6750216484069824, "learning_rate": 7.973337498363726e-07, "loss": 0.2215, "step": 16522 }, { "epoch": 0.8196339104122228, "grad_norm": 6.6076226234436035, "learning_rate": 7.969073878413208e-07, "loss": 0.276, "step": 16523 }, { "epoch": 0.819683516047423, "grad_norm": 10.024205207824707, "learning_rate": 7.964811300008046e-07, "loss": 0.3171, "step": 16524 }, { "epoch": 0.8197331216826231, "grad_norm": 7.034392356872559, "learning_rate": 7.960549763253877e-07, "loss": 0.321, "step": 16525 }, { "epoch": 0.8197827273178233, "grad_norm": 4.7830915451049805, "learning_rate": 7.956289268256318e-07, "loss": 0.2591, "step": 16526 }, { "epoch": 0.8198323329530235, "grad_norm": 7.465545177459717, "learning_rate": 7.952029815120899e-07, "loss": 0.2968, "step": 16527 }, { "epoch": 0.8198819385882237, "grad_norm": 6.092264652252197, "learning_rate": 7.947771403953231e-07, "loss": 0.2835, "step": 16528 }, { "epoch": 0.8199315442234237, "grad_norm": 10.967089653015137, "learning_rate": 7.94351403485879e-07, "loss": 0.2461, "step": 16529 }, { "epoch": 0.8199811498586239, "grad_norm": 9.893938064575195, "learning_rate": 7.939257707943105e-07, "loss": 0.2437, "step": 16530 }, { "epoch": 0.8200307554938241, "grad_norm": 7.633327484130859, "learning_rate": 7.935002423311633e-07, "loss": 0.4016, "step": 16531 }, { "epoch": 0.8200803611290243, "grad_norm": 7.105104923248291, "learning_rate": 7.93074818106983e-07, "loss": 0.291, "step": 16532 }, { "epoch": 0.8201299667642244, "grad_norm": 6.907370567321777, "learning_rate": 7.926494981323119e-07, "loss": 0.277, "step": 16533 }, { "epoch": 0.8201795723994246, "grad_norm": 6.083693981170654, "learning_rate": 7.922242824176895e-07, "loss": 0.2907, "step": 16534 }, { "epoch": 0.8202291780346247, "grad_norm": 7.833483695983887, "learning_rate": 7.917991709736539e-07, "loss": 0.3467, "step": 16535 }, { "epoch": 0.8202787836698249, "grad_norm": 4.737788677215576, "learning_rate": 7.91374163810737e-07, "loss": 0.2387, "step": 16536 }, { "epoch": 0.820328389305025, "grad_norm": 7.9638543128967285, "learning_rate": 7.909492609394725e-07, "loss": 0.3204, "step": 16537 }, { "epoch": 0.8203779949402252, "grad_norm": 3.7937419414520264, "learning_rate": 7.90524462370389e-07, "loss": 0.2186, "step": 16538 }, { "epoch": 0.8204276005754254, "grad_norm": 9.037691116333008, "learning_rate": 7.900997681140132e-07, "loss": 0.2656, "step": 16539 }, { "epoch": 0.8204772062106255, "grad_norm": 10.497453689575195, "learning_rate": 7.896751781808698e-07, "loss": 0.429, "step": 16540 }, { "epoch": 0.8205268118458257, "grad_norm": 6.251776218414307, "learning_rate": 7.892506925814813e-07, "loss": 0.2857, "step": 16541 }, { "epoch": 0.8205764174810258, "grad_norm": 4.160506725311279, "learning_rate": 7.888263113263628e-07, "loss": 0.2702, "step": 16542 }, { "epoch": 0.820626023116226, "grad_norm": 6.348485946655273, "learning_rate": 7.884020344260357e-07, "loss": 0.2941, "step": 16543 }, { "epoch": 0.8206756287514262, "grad_norm": 8.74372386932373, "learning_rate": 7.879778618910105e-07, "loss": 0.3105, "step": 16544 }, { "epoch": 0.8207252343866264, "grad_norm": 6.153698444366455, "learning_rate": 7.875537937317984e-07, "loss": 0.3387, "step": 16545 }, { "epoch": 0.8207748400218264, "grad_norm": 7.622305393218994, "learning_rate": 7.871298299589097e-07, "loss": 0.2931, "step": 16546 }, { "epoch": 0.8208244456570266, "grad_norm": 11.12551498413086, "learning_rate": 7.86705970582849e-07, "loss": 0.3288, "step": 16547 }, { "epoch": 0.8208740512922268, "grad_norm": 7.143354415893555, "learning_rate": 7.862822156141198e-07, "loss": 0.243, "step": 16548 }, { "epoch": 0.820923656927427, "grad_norm": 11.598424911499023, "learning_rate": 7.85858565063225e-07, "loss": 0.3654, "step": 16549 }, { "epoch": 0.8209732625626271, "grad_norm": 5.68461799621582, "learning_rate": 7.85435018940659e-07, "loss": 0.1783, "step": 16550 }, { "epoch": 0.8210228681978273, "grad_norm": 7.814141750335693, "learning_rate": 7.850115772569217e-07, "loss": 0.2763, "step": 16551 }, { "epoch": 0.8210724738330274, "grad_norm": 11.913061141967773, "learning_rate": 7.845882400225035e-07, "loss": 0.294, "step": 16552 }, { "epoch": 0.8211220794682276, "grad_norm": 8.326904296875, "learning_rate": 7.841650072478952e-07, "loss": 0.2938, "step": 16553 }, { "epoch": 0.8211716851034278, "grad_norm": 11.816514015197754, "learning_rate": 7.837418789435846e-07, "loss": 0.4079, "step": 16554 }, { "epoch": 0.8212212907386279, "grad_norm": 5.929641246795654, "learning_rate": 7.833188551200594e-07, "loss": 0.2994, "step": 16555 }, { "epoch": 0.8212708963738281, "grad_norm": 13.266093254089355, "learning_rate": 7.82895935787798e-07, "loss": 0.2924, "step": 16556 }, { "epoch": 0.8213205020090282, "grad_norm": 5.805810451507568, "learning_rate": 7.82473120957285e-07, "loss": 0.2682, "step": 16557 }, { "epoch": 0.8213701076442284, "grad_norm": 4.9935526847839355, "learning_rate": 7.820504106389942e-07, "loss": 0.2529, "step": 16558 }, { "epoch": 0.8214197132794285, "grad_norm": 4.949250221252441, "learning_rate": 7.816278048434028e-07, "loss": 0.227, "step": 16559 }, { "epoch": 0.8214693189146287, "grad_norm": 11.313441276550293, "learning_rate": 7.812053035809824e-07, "loss": 0.3671, "step": 16560 }, { "epoch": 0.8215189245498289, "grad_norm": 11.640022277832031, "learning_rate": 7.807829068622042e-07, "loss": 0.2538, "step": 16561 }, { "epoch": 0.8215685301850291, "grad_norm": 5.994566917419434, "learning_rate": 7.803606146975318e-07, "loss": 0.2789, "step": 16562 }, { "epoch": 0.8216181358202291, "grad_norm": 9.661304473876953, "learning_rate": 7.799384270974347e-07, "loss": 0.2399, "step": 16563 }, { "epoch": 0.8216677414554293, "grad_norm": 5.5015645027160645, "learning_rate": 7.795163440723697e-07, "loss": 0.2884, "step": 16564 }, { "epoch": 0.8217173470906295, "grad_norm": 7.70307731628418, "learning_rate": 7.790943656328009e-07, "loss": 0.2927, "step": 16565 }, { "epoch": 0.8217669527258297, "grad_norm": 9.045125961303711, "learning_rate": 7.786724917891825e-07, "loss": 0.2264, "step": 16566 }, { "epoch": 0.8218165583610298, "grad_norm": 16.16279411315918, "learning_rate": 7.782507225519687e-07, "loss": 0.3445, "step": 16567 }, { "epoch": 0.82186616399623, "grad_norm": 4.6240057945251465, "learning_rate": 7.778290579316117e-07, "loss": 0.3145, "step": 16568 }, { "epoch": 0.8219157696314301, "grad_norm": 4.121523380279541, "learning_rate": 7.774074979385621e-07, "loss": 0.1664, "step": 16569 }, { "epoch": 0.8219653752666303, "grad_norm": 10.034299850463867, "learning_rate": 7.769860425832626e-07, "loss": 0.3729, "step": 16570 }, { "epoch": 0.8220149809018304, "grad_norm": 4.741670608520508, "learning_rate": 7.76564691876161e-07, "loss": 0.276, "step": 16571 }, { "epoch": 0.8220645865370306, "grad_norm": 11.200685501098633, "learning_rate": 7.761434458276956e-07, "loss": 0.3929, "step": 16572 }, { "epoch": 0.8221141921722308, "grad_norm": 11.903252601623535, "learning_rate": 7.75722304448307e-07, "loss": 0.2943, "step": 16573 }, { "epoch": 0.8221637978074309, "grad_norm": 6.845920085906982, "learning_rate": 7.753012677484301e-07, "loss": 0.2601, "step": 16574 }, { "epoch": 0.822213403442631, "grad_norm": 7.0284104347229, "learning_rate": 7.748803357384999e-07, "loss": 0.3624, "step": 16575 }, { "epoch": 0.8222630090778312, "grad_norm": 4.30138635635376, "learning_rate": 7.744595084289441e-07, "loss": 0.2264, "step": 16576 }, { "epoch": 0.8223126147130314, "grad_norm": 9.199411392211914, "learning_rate": 7.740387858301951e-07, "loss": 0.1783, "step": 16577 }, { "epoch": 0.8223622203482316, "grad_norm": 7.442093372344971, "learning_rate": 7.736181679526756e-07, "loss": 0.2352, "step": 16578 }, { "epoch": 0.8224118259834318, "grad_norm": 9.206981658935547, "learning_rate": 7.731976548068098e-07, "loss": 0.2786, "step": 16579 }, { "epoch": 0.8224614316186318, "grad_norm": 8.691659927368164, "learning_rate": 7.727772464030181e-07, "loss": 0.4001, "step": 16580 }, { "epoch": 0.822511037253832, "grad_norm": 5.958597183227539, "learning_rate": 7.72356942751718e-07, "loss": 0.2351, "step": 16581 }, { "epoch": 0.8225606428890322, "grad_norm": 9.223922729492188, "learning_rate": 7.719367438633257e-07, "loss": 0.3375, "step": 16582 }, { "epoch": 0.8226102485242324, "grad_norm": 4.457005500793457, "learning_rate": 7.715166497482534e-07, "loss": 0.2498, "step": 16583 }, { "epoch": 0.8226598541594325, "grad_norm": 12.226950645446777, "learning_rate": 7.710966604169112e-07, "loss": 0.297, "step": 16584 }, { "epoch": 0.8227094597946327, "grad_norm": 6.739090919494629, "learning_rate": 7.706767758797079e-07, "loss": 0.257, "step": 16585 }, { "epoch": 0.8227590654298328, "grad_norm": 8.545426368713379, "learning_rate": 7.702569961470463e-07, "loss": 0.3767, "step": 16586 }, { "epoch": 0.822808671065033, "grad_norm": 6.464001178741455, "learning_rate": 7.698373212293292e-07, "loss": 0.2385, "step": 16587 }, { "epoch": 0.8228582767002331, "grad_norm": 6.6261467933654785, "learning_rate": 7.694177511369572e-07, "loss": 0.3108, "step": 16588 }, { "epoch": 0.8229078823354333, "grad_norm": 8.97218132019043, "learning_rate": 7.689982858803263e-07, "loss": 0.2641, "step": 16589 }, { "epoch": 0.8229574879706335, "grad_norm": 8.436202049255371, "learning_rate": 7.685789254698323e-07, "loss": 0.2705, "step": 16590 }, { "epoch": 0.8230070936058336, "grad_norm": 6.716442108154297, "learning_rate": 7.681596699158667e-07, "loss": 0.2786, "step": 16591 }, { "epoch": 0.8230566992410338, "grad_norm": 5.646714687347412, "learning_rate": 7.677405192288201e-07, "loss": 0.2911, "step": 16592 }, { "epoch": 0.8231063048762339, "grad_norm": 8.224140167236328, "learning_rate": 7.673214734190749e-07, "loss": 0.2981, "step": 16593 }, { "epoch": 0.8231559105114341, "grad_norm": 4.879589557647705, "learning_rate": 7.669025324970208e-07, "loss": 0.2238, "step": 16594 }, { "epoch": 0.8232055161466343, "grad_norm": 3.792405843734741, "learning_rate": 7.66483696473036e-07, "loss": 0.2682, "step": 16595 }, { "epoch": 0.8232551217818345, "grad_norm": 14.655739784240723, "learning_rate": 7.660649653574997e-07, "loss": 0.3913, "step": 16596 }, { "epoch": 0.8233047274170345, "grad_norm": 10.992600440979004, "learning_rate": 7.656463391607894e-07, "loss": 0.339, "step": 16597 }, { "epoch": 0.8233543330522347, "grad_norm": 5.384742259979248, "learning_rate": 7.652278178932776e-07, "loss": 0.303, "step": 16598 }, { "epoch": 0.8234039386874349, "grad_norm": 8.648231506347656, "learning_rate": 7.648094015653362e-07, "loss": 0.3547, "step": 16599 }, { "epoch": 0.8234535443226351, "grad_norm": 6.824077129364014, "learning_rate": 7.643910901873352e-07, "loss": 0.2747, "step": 16600 }, { "epoch": 0.8235031499578352, "grad_norm": 5.038405418395996, "learning_rate": 7.639728837696364e-07, "loss": 0.3164, "step": 16601 }, { "epoch": 0.8235527555930354, "grad_norm": 17.022457122802734, "learning_rate": 7.63554782322608e-07, "loss": 0.377, "step": 16602 }, { "epoch": 0.8236023612282355, "grad_norm": 15.324675559997559, "learning_rate": 7.631367858566069e-07, "loss": 0.398, "step": 16603 }, { "epoch": 0.8236519668634357, "grad_norm": 5.458794116973877, "learning_rate": 7.627188943819935e-07, "loss": 0.2517, "step": 16604 }, { "epoch": 0.8237015724986358, "grad_norm": 4.068026542663574, "learning_rate": 7.623011079091219e-07, "loss": 0.2135, "step": 16605 }, { "epoch": 0.823751178133836, "grad_norm": 7.092925071716309, "learning_rate": 7.618834264483476e-07, "loss": 0.2839, "step": 16606 }, { "epoch": 0.8238007837690362, "grad_norm": 7.37889289855957, "learning_rate": 7.614658500100164e-07, "loss": 0.21, "step": 16607 }, { "epoch": 0.8238503894042363, "grad_norm": 13.14029312133789, "learning_rate": 7.610483786044809e-07, "loss": 0.4147, "step": 16608 }, { "epoch": 0.8238999950394365, "grad_norm": 17.81859588623047, "learning_rate": 7.606310122420835e-07, "loss": 0.1798, "step": 16609 }, { "epoch": 0.8239496006746366, "grad_norm": 5.722409248352051, "learning_rate": 7.602137509331669e-07, "loss": 0.3049, "step": 16610 }, { "epoch": 0.8239992063098368, "grad_norm": 9.737859725952148, "learning_rate": 7.597965946880714e-07, "loss": 0.2469, "step": 16611 }, { "epoch": 0.824048811945037, "grad_norm": 7.290888786315918, "learning_rate": 7.593795435171353e-07, "loss": 0.2725, "step": 16612 }, { "epoch": 0.8240984175802372, "grad_norm": 5.345469951629639, "learning_rate": 7.589625974306902e-07, "loss": 0.268, "step": 16613 }, { "epoch": 0.8241480232154372, "grad_norm": 7.006592750549316, "learning_rate": 7.585457564390731e-07, "loss": 0.269, "step": 16614 }, { "epoch": 0.8241976288506374, "grad_norm": 5.8763298988342285, "learning_rate": 7.581290205526093e-07, "loss": 0.2156, "step": 16615 }, { "epoch": 0.8242472344858376, "grad_norm": 5.642367839813232, "learning_rate": 7.577123897816269e-07, "loss": 0.2058, "step": 16616 }, { "epoch": 0.8242968401210378, "grad_norm": 12.568474769592285, "learning_rate": 7.572958641364508e-07, "loss": 0.3356, "step": 16617 }, { "epoch": 0.8243464457562379, "grad_norm": 8.789128303527832, "learning_rate": 7.568794436274024e-07, "loss": 0.3397, "step": 16618 }, { "epoch": 0.8243960513914381, "grad_norm": 9.649575233459473, "learning_rate": 7.564631282648006e-07, "loss": 0.3438, "step": 16619 }, { "epoch": 0.8244456570266382, "grad_norm": 6.637078285217285, "learning_rate": 7.560469180589635e-07, "loss": 0.3012, "step": 16620 }, { "epoch": 0.8244952626618384, "grad_norm": 4.691351890563965, "learning_rate": 7.556308130202006e-07, "loss": 0.2101, "step": 16621 }, { "epoch": 0.8245448682970385, "grad_norm": 5.179774284362793, "learning_rate": 7.552148131588289e-07, "loss": 0.2413, "step": 16622 }, { "epoch": 0.8245944739322387, "grad_norm": 5.581680774688721, "learning_rate": 7.547989184851529e-07, "loss": 0.2966, "step": 16623 }, { "epoch": 0.8246440795674389, "grad_norm": 5.095905780792236, "learning_rate": 7.543831290094799e-07, "loss": 0.2094, "step": 16624 }, { "epoch": 0.824693685202639, "grad_norm": 6.426541805267334, "learning_rate": 7.539674447421141e-07, "loss": 0.2916, "step": 16625 }, { "epoch": 0.8247432908378391, "grad_norm": 9.046972274780273, "learning_rate": 7.53551865693356e-07, "loss": 0.3556, "step": 16626 }, { "epoch": 0.8247928964730393, "grad_norm": 4.0973100662231445, "learning_rate": 7.531363918735019e-07, "loss": 0.2476, "step": 16627 }, { "epoch": 0.8248425021082395, "grad_norm": 9.14229679107666, "learning_rate": 7.527210232928511e-07, "loss": 0.3107, "step": 16628 }, { "epoch": 0.8248921077434397, "grad_norm": 11.467401504516602, "learning_rate": 7.523057599616934e-07, "loss": 0.3517, "step": 16629 }, { "epoch": 0.8249417133786399, "grad_norm": 7.392775058746338, "learning_rate": 7.518906018903205e-07, "loss": 0.2226, "step": 16630 }, { "epoch": 0.8249913190138399, "grad_norm": 8.380779266357422, "learning_rate": 7.514755490890203e-07, "loss": 0.3625, "step": 16631 }, { "epoch": 0.8250409246490401, "grad_norm": 5.886956691741943, "learning_rate": 7.510606015680777e-07, "loss": 0.1751, "step": 16632 }, { "epoch": 0.8250905302842403, "grad_norm": 6.087443828582764, "learning_rate": 7.506457593377753e-07, "loss": 0.33, "step": 16633 }, { "epoch": 0.8251401359194405, "grad_norm": 9.191996574401855, "learning_rate": 7.502310224083942e-07, "loss": 0.3355, "step": 16634 }, { "epoch": 0.8251897415546406, "grad_norm": 5.205461502075195, "learning_rate": 7.498163907902084e-07, "loss": 0.2984, "step": 16635 }, { "epoch": 0.8252393471898407, "grad_norm": 4.594411373138428, "learning_rate": 7.494018644934974e-07, "loss": 0.3104, "step": 16636 }, { "epoch": 0.8252889528250409, "grad_norm": 13.078023910522461, "learning_rate": 7.4898744352853e-07, "loss": 0.3579, "step": 16637 }, { "epoch": 0.8253385584602411, "grad_norm": 8.924664497375488, "learning_rate": 7.48573127905577e-07, "loss": 0.3577, "step": 16638 }, { "epoch": 0.8253881640954412, "grad_norm": 12.260505676269531, "learning_rate": 7.481589176349041e-07, "loss": 0.2873, "step": 16639 }, { "epoch": 0.8254377697306414, "grad_norm": 16.16437339782715, "learning_rate": 7.477448127267772e-07, "loss": 0.3538, "step": 16640 }, { "epoch": 0.8254873753658416, "grad_norm": 15.315472602844238, "learning_rate": 7.47330813191457e-07, "loss": 0.5375, "step": 16641 }, { "epoch": 0.8255369810010417, "grad_norm": 4.371968746185303, "learning_rate": 7.469169190392034e-07, "loss": 0.3465, "step": 16642 }, { "epoch": 0.8255865866362418, "grad_norm": 22.673301696777344, "learning_rate": 7.465031302802733e-07, "loss": 0.4945, "step": 16643 }, { "epoch": 0.825636192271442, "grad_norm": 5.721934795379639, "learning_rate": 7.460894469249186e-07, "loss": 0.2283, "step": 16644 }, { "epoch": 0.8256857979066422, "grad_norm": 12.507649421691895, "learning_rate": 7.456758689833915e-07, "loss": 0.2938, "step": 16645 }, { "epoch": 0.8257354035418424, "grad_norm": 12.171724319458008, "learning_rate": 7.452623964659406e-07, "loss": 0.3541, "step": 16646 }, { "epoch": 0.8257850091770426, "grad_norm": 7.0216779708862305, "learning_rate": 7.448490293828125e-07, "loss": 0.25, "step": 16647 }, { "epoch": 0.8258346148122426, "grad_norm": 3.5880327224731445, "learning_rate": 7.444357677442504e-07, "loss": 0.2173, "step": 16648 }, { "epoch": 0.8258842204474428, "grad_norm": 7.6861443519592285, "learning_rate": 7.44022611560496e-07, "loss": 0.1955, "step": 16649 }, { "epoch": 0.825933826082643, "grad_norm": 7.881358623504639, "learning_rate": 7.436095608417837e-07, "loss": 0.2343, "step": 16650 }, { "epoch": 0.8259834317178432, "grad_norm": 9.883520126342773, "learning_rate": 7.43196615598355e-07, "loss": 0.2683, "step": 16651 }, { "epoch": 0.8260330373530433, "grad_norm": 6.327159404754639, "learning_rate": 7.427837758404366e-07, "loss": 0.2426, "step": 16652 }, { "epoch": 0.8260826429882434, "grad_norm": 9.142383575439453, "learning_rate": 7.423710415782648e-07, "loss": 0.3804, "step": 16653 }, { "epoch": 0.8261322486234436, "grad_norm": 4.275259494781494, "learning_rate": 7.419584128220637e-07, "loss": 0.2453, "step": 16654 }, { "epoch": 0.8261818542586438, "grad_norm": 7.044580936431885, "learning_rate": 7.415458895820582e-07, "loss": 0.2948, "step": 16655 }, { "epoch": 0.826231459893844, "grad_norm": 6.142329216003418, "learning_rate": 7.411334718684726e-07, "loss": 0.2702, "step": 16656 }, { "epoch": 0.8262810655290441, "grad_norm": 8.836322784423828, "learning_rate": 7.407211596915271e-07, "loss": 0.2996, "step": 16657 }, { "epoch": 0.8263306711642443, "grad_norm": 6.7318434715271, "learning_rate": 7.40308953061436e-07, "loss": 0.2524, "step": 16658 }, { "epoch": 0.8263802767994444, "grad_norm": 11.319464683532715, "learning_rate": 7.398968519884176e-07, "loss": 0.3601, "step": 16659 }, { "epoch": 0.8264298824346445, "grad_norm": 10.039066314697266, "learning_rate": 7.394848564826812e-07, "loss": 0.338, "step": 16660 }, { "epoch": 0.8264794880698447, "grad_norm": 7.151019096374512, "learning_rate": 7.390729665544377e-07, "loss": 0.3868, "step": 16661 }, { "epoch": 0.8265290937050449, "grad_norm": 4.737135887145996, "learning_rate": 7.386611822138934e-07, "loss": 0.2624, "step": 16662 }, { "epoch": 0.8265786993402451, "grad_norm": 5.428291320800781, "learning_rate": 7.382495034712533e-07, "loss": 0.2676, "step": 16663 }, { "epoch": 0.8266283049754453, "grad_norm": 8.841798782348633, "learning_rate": 7.378379303367167e-07, "loss": 0.372, "step": 16664 }, { "epoch": 0.8266779106106453, "grad_norm": 7.53786039352417, "learning_rate": 7.374264628204858e-07, "loss": 0.303, "step": 16665 }, { "epoch": 0.8267275162458455, "grad_norm": 7.772427558898926, "learning_rate": 7.370151009327547e-07, "loss": 0.2915, "step": 16666 }, { "epoch": 0.8267771218810457, "grad_norm": 10.637174606323242, "learning_rate": 7.366038446837176e-07, "loss": 0.4278, "step": 16667 }, { "epoch": 0.8268267275162459, "grad_norm": 5.8406982421875, "learning_rate": 7.361926940835656e-07, "loss": 0.27, "step": 16668 }, { "epoch": 0.826876333151446, "grad_norm": 7.762337684631348, "learning_rate": 7.357816491424886e-07, "loss": 0.2847, "step": 16669 }, { "epoch": 0.8269259387866461, "grad_norm": 10.502009391784668, "learning_rate": 7.353707098706686e-07, "loss": 0.3669, "step": 16670 }, { "epoch": 0.8269755444218463, "grad_norm": 9.510405540466309, "learning_rate": 7.349598762782945e-07, "loss": 0.3396, "step": 16671 }, { "epoch": 0.8270251500570465, "grad_norm": 8.214187622070312, "learning_rate": 7.345491483755413e-07, "loss": 0.1597, "step": 16672 }, { "epoch": 0.8270747556922466, "grad_norm": 10.618736267089844, "learning_rate": 7.341385261725919e-07, "loss": 0.3409, "step": 16673 }, { "epoch": 0.8271243613274468, "grad_norm": 5.445907115936279, "learning_rate": 7.337280096796184e-07, "loss": 0.2311, "step": 16674 }, { "epoch": 0.827173966962647, "grad_norm": 9.80744743347168, "learning_rate": 7.333175989067947e-07, "loss": 0.415, "step": 16675 }, { "epoch": 0.8272235725978471, "grad_norm": 8.56112289428711, "learning_rate": 7.329072938642911e-07, "loss": 0.2937, "step": 16676 }, { "epoch": 0.8272731782330472, "grad_norm": 11.666001319885254, "learning_rate": 7.324970945622762e-07, "loss": 0.3925, "step": 16677 }, { "epoch": 0.8273227838682474, "grad_norm": 6.29841423034668, "learning_rate": 7.32087001010911e-07, "loss": 0.21, "step": 16678 }, { "epoch": 0.8273723895034476, "grad_norm": 8.664219856262207, "learning_rate": 7.316770132203632e-07, "loss": 0.2799, "step": 16679 }, { "epoch": 0.8274219951386478, "grad_norm": 10.096782684326172, "learning_rate": 7.31267131200788e-07, "loss": 0.3662, "step": 16680 }, { "epoch": 0.827471600773848, "grad_norm": 4.314714431762695, "learning_rate": 7.308573549623449e-07, "loss": 0.2711, "step": 16681 }, { "epoch": 0.827521206409048, "grad_norm": 5.565334796905518, "learning_rate": 7.304476845151876e-07, "loss": 0.2801, "step": 16682 }, { "epoch": 0.8275708120442482, "grad_norm": 10.455633163452148, "learning_rate": 7.300381198694689e-07, "loss": 0.3792, "step": 16683 }, { "epoch": 0.8276204176794484, "grad_norm": 5.478099346160889, "learning_rate": 7.296286610353348e-07, "loss": 0.2541, "step": 16684 }, { "epoch": 0.8276700233146486, "grad_norm": 16.905324935913086, "learning_rate": 7.292193080229365e-07, "loss": 0.2522, "step": 16685 }, { "epoch": 0.8277196289498487, "grad_norm": 3.6751062870025635, "learning_rate": 7.288100608424148e-07, "loss": 0.2162, "step": 16686 }, { "epoch": 0.8277692345850488, "grad_norm": 6.4897236824035645, "learning_rate": 7.284009195039116e-07, "loss": 0.3842, "step": 16687 }, { "epoch": 0.827818840220249, "grad_norm": 6.648192405700684, "learning_rate": 7.279918840175659e-07, "loss": 0.269, "step": 16688 }, { "epoch": 0.8278684458554492, "grad_norm": 16.434545516967773, "learning_rate": 7.275829543935136e-07, "loss": 0.2511, "step": 16689 }, { "epoch": 0.8279180514906493, "grad_norm": 5.973814010620117, "learning_rate": 7.271741306418883e-07, "loss": 0.177, "step": 16690 }, { "epoch": 0.8279676571258495, "grad_norm": 5.1854634284973145, "learning_rate": 7.267654127728213e-07, "loss": 0.2502, "step": 16691 }, { "epoch": 0.8280172627610497, "grad_norm": 6.738595962524414, "learning_rate": 7.2635680079644e-07, "loss": 0.3158, "step": 16692 }, { "epoch": 0.8280668683962498, "grad_norm": 4.251889705657959, "learning_rate": 7.259482947228713e-07, "loss": 0.2777, "step": 16693 }, { "epoch": 0.82811647403145, "grad_norm": 6.398914337158203, "learning_rate": 7.255398945622366e-07, "loss": 0.232, "step": 16694 }, { "epoch": 0.8281660796666501, "grad_norm": 3.6567721366882324, "learning_rate": 7.251316003246567e-07, "loss": 0.1592, "step": 16695 }, { "epoch": 0.8282156853018503, "grad_norm": 10.052175521850586, "learning_rate": 7.247234120202495e-07, "loss": 0.2537, "step": 16696 }, { "epoch": 0.8282652909370505, "grad_norm": 6.0050201416015625, "learning_rate": 7.243153296591299e-07, "loss": 0.3066, "step": 16697 }, { "epoch": 0.8283148965722507, "grad_norm": 4.850287914276123, "learning_rate": 7.239073532514107e-07, "loss": 0.2688, "step": 16698 }, { "epoch": 0.8283645022074507, "grad_norm": 5.788156032562256, "learning_rate": 7.234994828072012e-07, "loss": 0.2925, "step": 16699 }, { "epoch": 0.8284141078426509, "grad_norm": 12.2774658203125, "learning_rate": 7.230917183366104e-07, "loss": 0.2603, "step": 16700 }, { "epoch": 0.8284637134778511, "grad_norm": 16.349756240844727, "learning_rate": 7.226840598497403e-07, "loss": 0.3192, "step": 16701 }, { "epoch": 0.8285133191130513, "grad_norm": 15.881523132324219, "learning_rate": 7.22276507356694e-07, "loss": 0.3358, "step": 16702 }, { "epoch": 0.8285629247482514, "grad_norm": 8.041800498962402, "learning_rate": 7.21869060867571e-07, "loss": 0.3612, "step": 16703 }, { "epoch": 0.8286125303834515, "grad_norm": 7.49251127243042, "learning_rate": 7.214617203924679e-07, "loss": 0.2809, "step": 16704 }, { "epoch": 0.8286621360186517, "grad_norm": 7.766364574432373, "learning_rate": 7.210544859414786e-07, "loss": 0.2582, "step": 16705 }, { "epoch": 0.8287117416538519, "grad_norm": 6.228903770446777, "learning_rate": 7.206473575246948e-07, "loss": 0.2621, "step": 16706 }, { "epoch": 0.828761347289052, "grad_norm": 7.9412126541137695, "learning_rate": 7.202403351522052e-07, "loss": 0.3084, "step": 16707 }, { "epoch": 0.8288109529242522, "grad_norm": 8.309545516967773, "learning_rate": 7.198334188340972e-07, "loss": 0.322, "step": 16708 }, { "epoch": 0.8288605585594524, "grad_norm": 6.051477432250977, "learning_rate": 7.194266085804514e-07, "loss": 0.3127, "step": 16709 }, { "epoch": 0.8289101641946525, "grad_norm": 5.843040943145752, "learning_rate": 7.190199044013524e-07, "loss": 0.2541, "step": 16710 }, { "epoch": 0.8289597698298526, "grad_norm": 6.172683238983154, "learning_rate": 7.186133063068751e-07, "loss": 0.3195, "step": 16711 }, { "epoch": 0.8290093754650528, "grad_norm": 4.709130764007568, "learning_rate": 7.182068143070975e-07, "loss": 0.2854, "step": 16712 }, { "epoch": 0.829058981100253, "grad_norm": 10.445463180541992, "learning_rate": 7.178004284120915e-07, "loss": 0.2398, "step": 16713 }, { "epoch": 0.8291085867354532, "grad_norm": 5.326131820678711, "learning_rate": 7.173941486319291e-07, "loss": 0.2611, "step": 16714 }, { "epoch": 0.8291581923706534, "grad_norm": 8.466682434082031, "learning_rate": 7.169879749766745e-07, "loss": 0.3052, "step": 16715 }, { "epoch": 0.8292077980058534, "grad_norm": 12.551844596862793, "learning_rate": 7.165819074563979e-07, "loss": 0.4115, "step": 16716 }, { "epoch": 0.8292574036410536, "grad_norm": 9.98192024230957, "learning_rate": 7.161759460811579e-07, "loss": 0.3751, "step": 16717 }, { "epoch": 0.8293070092762538, "grad_norm": 13.926264762878418, "learning_rate": 7.15770090861016e-07, "loss": 0.3831, "step": 16718 }, { "epoch": 0.829356614911454, "grad_norm": 4.255605697631836, "learning_rate": 7.153643418060291e-07, "loss": 0.2655, "step": 16719 }, { "epoch": 0.8294062205466541, "grad_norm": 6.514428615570068, "learning_rate": 7.14958698926253e-07, "loss": 0.2914, "step": 16720 }, { "epoch": 0.8294558261818542, "grad_norm": 6.127142906188965, "learning_rate": 7.145531622317364e-07, "loss": 0.2145, "step": 16721 }, { "epoch": 0.8295054318170544, "grad_norm": 5.1387786865234375, "learning_rate": 7.141477317325329e-07, "loss": 0.3076, "step": 16722 }, { "epoch": 0.8295550374522546, "grad_norm": 6.8153228759765625, "learning_rate": 7.137424074386856e-07, "loss": 0.2293, "step": 16723 }, { "epoch": 0.8296046430874547, "grad_norm": 10.746614456176758, "learning_rate": 7.133371893602425e-07, "loss": 0.2813, "step": 16724 }, { "epoch": 0.8296542487226549, "grad_norm": 7.587164878845215, "learning_rate": 7.129320775072419e-07, "loss": 0.2895, "step": 16725 }, { "epoch": 0.8297038543578551, "grad_norm": 5.545583724975586, "learning_rate": 7.125270718897231e-07, "loss": 0.2823, "step": 16726 }, { "epoch": 0.8297534599930552, "grad_norm": 4.591845512390137, "learning_rate": 7.121221725177236e-07, "loss": 0.2315, "step": 16727 }, { "epoch": 0.8298030656282553, "grad_norm": 9.213038444519043, "learning_rate": 7.117173794012766e-07, "loss": 0.3589, "step": 16728 }, { "epoch": 0.8298526712634555, "grad_norm": 7.018911361694336, "learning_rate": 7.113126925504111e-07, "loss": 0.2189, "step": 16729 }, { "epoch": 0.8299022768986557, "grad_norm": 11.981236457824707, "learning_rate": 7.109081119751593e-07, "loss": 0.2356, "step": 16730 }, { "epoch": 0.8299518825338559, "grad_norm": 9.775952339172363, "learning_rate": 7.105036376855434e-07, "loss": 0.3003, "step": 16731 }, { "epoch": 0.8300014881690561, "grad_norm": 5.298151969909668, "learning_rate": 7.100992696915876e-07, "loss": 0.2853, "step": 16732 }, { "epoch": 0.8300510938042561, "grad_norm": 15.751189231872559, "learning_rate": 7.096950080033127e-07, "loss": 0.357, "step": 16733 }, { "epoch": 0.8301006994394563, "grad_norm": 5.706897258758545, "learning_rate": 7.092908526307368e-07, "loss": 0.3498, "step": 16734 }, { "epoch": 0.8301503050746565, "grad_norm": 9.469555854797363, "learning_rate": 7.08886803583873e-07, "loss": 0.2179, "step": 16735 }, { "epoch": 0.8301999107098567, "grad_norm": 4.3930134773254395, "learning_rate": 7.084828608727367e-07, "loss": 0.2287, "step": 16736 }, { "epoch": 0.8302495163450568, "grad_norm": 12.559610366821289, "learning_rate": 7.08079024507336e-07, "loss": 0.321, "step": 16737 }, { "epoch": 0.8302991219802569, "grad_norm": 5.830789089202881, "learning_rate": 7.076752944976783e-07, "loss": 0.2861, "step": 16738 }, { "epoch": 0.8303487276154571, "grad_norm": 15.456428527832031, "learning_rate": 7.072716708537681e-07, "loss": 0.4064, "step": 16739 }, { "epoch": 0.8303983332506573, "grad_norm": 10.791000366210938, "learning_rate": 7.068681535856081e-07, "loss": 0.3085, "step": 16740 }, { "epoch": 0.8304479388858574, "grad_norm": 18.6842098236084, "learning_rate": 7.064647427031967e-07, "loss": 0.3212, "step": 16741 }, { "epoch": 0.8304975445210576, "grad_norm": 8.38216781616211, "learning_rate": 7.060614382165332e-07, "loss": 0.3626, "step": 16742 }, { "epoch": 0.8305471501562578, "grad_norm": 10.259531021118164, "learning_rate": 7.056582401356066e-07, "loss": 0.2526, "step": 16743 }, { "epoch": 0.8305967557914579, "grad_norm": 12.371756553649902, "learning_rate": 7.05255148470414e-07, "loss": 0.2931, "step": 16744 }, { "epoch": 0.830646361426658, "grad_norm": 5.2901387214660645, "learning_rate": 7.048521632309402e-07, "loss": 0.2015, "step": 16745 }, { "epoch": 0.8306959670618582, "grad_norm": 12.820232391357422, "learning_rate": 7.044492844271727e-07, "loss": 0.3467, "step": 16746 }, { "epoch": 0.8307455726970584, "grad_norm": 7.172200679779053, "learning_rate": 7.040465120690948e-07, "loss": 0.3533, "step": 16747 }, { "epoch": 0.8307951783322586, "grad_norm": 4.846913814544678, "learning_rate": 7.036438461666878e-07, "loss": 0.2453, "step": 16748 }, { "epoch": 0.8308447839674588, "grad_norm": 9.210049629211426, "learning_rate": 7.032412867299298e-07, "loss": 0.2978, "step": 16749 }, { "epoch": 0.8308943896026588, "grad_norm": 11.2256441116333, "learning_rate": 7.02838833768798e-07, "loss": 0.3094, "step": 16750 }, { "epoch": 0.830943995237859, "grad_norm": 7.8708600997924805, "learning_rate": 7.024364872932621e-07, "loss": 0.2527, "step": 16751 }, { "epoch": 0.8309936008730592, "grad_norm": 11.837353706359863, "learning_rate": 7.02034247313294e-07, "loss": 0.2619, "step": 16752 }, { "epoch": 0.8310432065082594, "grad_norm": 5.24958610534668, "learning_rate": 7.016321138388615e-07, "loss": 0.239, "step": 16753 }, { "epoch": 0.8310928121434595, "grad_norm": 8.403358459472656, "learning_rate": 7.012300868799293e-07, "loss": 0.3076, "step": 16754 }, { "epoch": 0.8311424177786596, "grad_norm": 7.9360671043396, "learning_rate": 7.008281664464606e-07, "loss": 0.3164, "step": 16755 }, { "epoch": 0.8311920234138598, "grad_norm": 11.01785659790039, "learning_rate": 7.00426352548414e-07, "loss": 0.341, "step": 16756 }, { "epoch": 0.83124162904906, "grad_norm": 4.65323543548584, "learning_rate": 7.000246451957493e-07, "loss": 0.2058, "step": 16757 }, { "epoch": 0.8312912346842601, "grad_norm": 11.706035614013672, "learning_rate": 6.996230443984159e-07, "loss": 0.3217, "step": 16758 }, { "epoch": 0.8313408403194603, "grad_norm": 5.024621486663818, "learning_rate": 6.992215501663713e-07, "loss": 0.2796, "step": 16759 }, { "epoch": 0.8313904459546605, "grad_norm": 6.0816450119018555, "learning_rate": 6.988201625095614e-07, "loss": 0.2878, "step": 16760 }, { "epoch": 0.8314400515898606, "grad_norm": 15.285521507263184, "learning_rate": 6.984188814379328e-07, "loss": 0.4882, "step": 16761 }, { "epoch": 0.8314896572250607, "grad_norm": 4.140768051147461, "learning_rate": 6.980177069614308e-07, "loss": 0.2773, "step": 16762 }, { "epoch": 0.8315392628602609, "grad_norm": 4.541797637939453, "learning_rate": 6.976166390899958e-07, "loss": 0.1721, "step": 16763 }, { "epoch": 0.8315888684954611, "grad_norm": 12.241170883178711, "learning_rate": 6.972156778335665e-07, "loss": 0.237, "step": 16764 }, { "epoch": 0.8316384741306613, "grad_norm": 7.950922012329102, "learning_rate": 6.9681482320208e-07, "loss": 0.3583, "step": 16765 }, { "epoch": 0.8316880797658615, "grad_norm": 4.679736614227295, "learning_rate": 6.964140752054672e-07, "loss": 0.2553, "step": 16766 }, { "epoch": 0.8317376854010615, "grad_norm": 8.816436767578125, "learning_rate": 6.960134338536617e-07, "loss": 0.2818, "step": 16767 }, { "epoch": 0.8317872910362617, "grad_norm": 5.3140082359313965, "learning_rate": 6.956128991565897e-07, "loss": 0.2599, "step": 16768 }, { "epoch": 0.8318368966714619, "grad_norm": 21.447988510131836, "learning_rate": 6.95212471124177e-07, "loss": 0.4139, "step": 16769 }, { "epoch": 0.8318865023066621, "grad_norm": 7.693559169769287, "learning_rate": 6.948121497663462e-07, "loss": 0.2728, "step": 16770 }, { "epoch": 0.8319361079418622, "grad_norm": 6.903844833374023, "learning_rate": 6.944119350930195e-07, "loss": 0.358, "step": 16771 }, { "epoch": 0.8319857135770623, "grad_norm": 7.3772406578063965, "learning_rate": 6.9401182711411e-07, "loss": 0.2931, "step": 16772 }, { "epoch": 0.8320353192122625, "grad_norm": 12.1494722366333, "learning_rate": 6.936118258395375e-07, "loss": 0.4333, "step": 16773 }, { "epoch": 0.8320849248474627, "grad_norm": 5.721051216125488, "learning_rate": 6.932119312792112e-07, "loss": 0.287, "step": 16774 }, { "epoch": 0.8321345304826628, "grad_norm": 7.863195896148682, "learning_rate": 6.928121434430413e-07, "loss": 0.3273, "step": 16775 }, { "epoch": 0.832184136117863, "grad_norm": 7.021826267242432, "learning_rate": 6.924124623409345e-07, "loss": 0.3124, "step": 16776 }, { "epoch": 0.8322337417530632, "grad_norm": 8.81239128112793, "learning_rate": 6.920128879827959e-07, "loss": 0.3599, "step": 16777 }, { "epoch": 0.8322833473882633, "grad_norm": 7.706019401550293, "learning_rate": 6.916134203785263e-07, "loss": 0.1791, "step": 16778 }, { "epoch": 0.8323329530234634, "grad_norm": 8.601232528686523, "learning_rate": 6.912140595380262e-07, "loss": 0.2885, "step": 16779 }, { "epoch": 0.8323825586586636, "grad_norm": 3.8226797580718994, "learning_rate": 6.908148054711883e-07, "loss": 0.2374, "step": 16780 }, { "epoch": 0.8324321642938638, "grad_norm": 3.8471362590789795, "learning_rate": 6.904156581879112e-07, "loss": 0.2203, "step": 16781 }, { "epoch": 0.832481769929064, "grad_norm": 4.771932125091553, "learning_rate": 6.900166176980827e-07, "loss": 0.3069, "step": 16782 }, { "epoch": 0.8325313755642642, "grad_norm": 5.78900671005249, "learning_rate": 6.896176840115915e-07, "loss": 0.2403, "step": 16783 }, { "epoch": 0.8325809811994642, "grad_norm": 9.151007652282715, "learning_rate": 6.892188571383246e-07, "loss": 0.3425, "step": 16784 }, { "epoch": 0.8326305868346644, "grad_norm": 6.289120674133301, "learning_rate": 6.888201370881648e-07, "loss": 0.2568, "step": 16785 }, { "epoch": 0.8326801924698646, "grad_norm": 4.628958702087402, "learning_rate": 6.884215238709902e-07, "loss": 0.1884, "step": 16786 }, { "epoch": 0.8327297981050648, "grad_norm": 4.650777816772461, "learning_rate": 6.880230174966828e-07, "loss": 0.2867, "step": 16787 }, { "epoch": 0.8327794037402649, "grad_norm": 4.978809833526611, "learning_rate": 6.876246179751145e-07, "loss": 0.288, "step": 16788 }, { "epoch": 0.832829009375465, "grad_norm": 12.817635536193848, "learning_rate": 6.872263253161593e-07, "loss": 0.4667, "step": 16789 }, { "epoch": 0.8328786150106652, "grad_norm": 4.2394537925720215, "learning_rate": 6.868281395296861e-07, "loss": 0.3022, "step": 16790 }, { "epoch": 0.8329282206458654, "grad_norm": 8.402968406677246, "learning_rate": 6.864300606255636e-07, "loss": 0.2534, "step": 16791 }, { "epoch": 0.8329778262810655, "grad_norm": 10.996655464172363, "learning_rate": 6.860320886136535e-07, "loss": 0.3319, "step": 16792 }, { "epoch": 0.8330274319162657, "grad_norm": 7.877924919128418, "learning_rate": 6.856342235038222e-07, "loss": 0.3176, "step": 16793 }, { "epoch": 0.8330770375514659, "grad_norm": 11.121772766113281, "learning_rate": 6.852364653059245e-07, "loss": 0.3269, "step": 16794 }, { "epoch": 0.833126643186666, "grad_norm": 5.787689685821533, "learning_rate": 6.848388140298196e-07, "loss": 0.2396, "step": 16795 }, { "epoch": 0.8331762488218661, "grad_norm": 10.642431259155273, "learning_rate": 6.844412696853609e-07, "loss": 0.267, "step": 16796 }, { "epoch": 0.8332258544570663, "grad_norm": 4.819629192352295, "learning_rate": 6.840438322823994e-07, "loss": 0.2967, "step": 16797 }, { "epoch": 0.8332754600922665, "grad_norm": 5.85101842880249, "learning_rate": 6.836465018307842e-07, "loss": 0.2308, "step": 16798 }, { "epoch": 0.8333250657274667, "grad_norm": 5.455413341522217, "learning_rate": 6.832492783403621e-07, "loss": 0.2366, "step": 16799 }, { "epoch": 0.8333746713626669, "grad_norm": 13.754437446594238, "learning_rate": 6.828521618209738e-07, "loss": 0.3192, "step": 16800 }, { "epoch": 0.8334242769978669, "grad_norm": 4.9801025390625, "learning_rate": 6.824551522824635e-07, "loss": 0.2808, "step": 16801 }, { "epoch": 0.8334738826330671, "grad_norm": 7.692720413208008, "learning_rate": 6.820582497346667e-07, "loss": 0.2713, "step": 16802 }, { "epoch": 0.8335234882682673, "grad_norm": 4.622408866882324, "learning_rate": 6.816614541874195e-07, "loss": 0.2355, "step": 16803 }, { "epoch": 0.8335730939034675, "grad_norm": 12.55568790435791, "learning_rate": 6.81264765650555e-07, "loss": 0.2994, "step": 16804 }, { "epoch": 0.8336226995386676, "grad_norm": 4.993504047393799, "learning_rate": 6.808681841339032e-07, "loss": 0.3373, "step": 16805 }, { "epoch": 0.8336723051738677, "grad_norm": 8.045047760009766, "learning_rate": 6.804717096472918e-07, "loss": 0.2424, "step": 16806 }, { "epoch": 0.8337219108090679, "grad_norm": 5.926415920257568, "learning_rate": 6.800753422005452e-07, "loss": 0.2589, "step": 16807 }, { "epoch": 0.8337715164442681, "grad_norm": 5.574965000152588, "learning_rate": 6.796790818034865e-07, "loss": 0.2423, "step": 16808 }, { "epoch": 0.8338211220794682, "grad_norm": 6.475647926330566, "learning_rate": 6.792829284659336e-07, "loss": 0.3267, "step": 16809 }, { "epoch": 0.8338707277146684, "grad_norm": 5.766733646392822, "learning_rate": 6.788868821977041e-07, "loss": 0.1682, "step": 16810 }, { "epoch": 0.8339203333498686, "grad_norm": 3.4018964767456055, "learning_rate": 6.784909430086129e-07, "loss": 0.1819, "step": 16811 }, { "epoch": 0.8339699389850687, "grad_norm": 13.32458782196045, "learning_rate": 6.780951109084699e-07, "loss": 0.4282, "step": 16812 }, { "epoch": 0.8340195446202688, "grad_norm": 6.271411418914795, "learning_rate": 6.776993859070858e-07, "loss": 0.2207, "step": 16813 }, { "epoch": 0.834069150255469, "grad_norm": 6.720189094543457, "learning_rate": 6.773037680142658e-07, "loss": 0.3374, "step": 16814 }, { "epoch": 0.8341187558906692, "grad_norm": 6.93135404586792, "learning_rate": 6.769082572398139e-07, "loss": 0.3475, "step": 16815 }, { "epoch": 0.8341683615258694, "grad_norm": 13.159601211547852, "learning_rate": 6.765128535935322e-07, "loss": 0.4677, "step": 16816 }, { "epoch": 0.8342179671610696, "grad_norm": 5.828497886657715, "learning_rate": 6.761175570852158e-07, "loss": 0.2476, "step": 16817 }, { "epoch": 0.8342675727962696, "grad_norm": 12.249361038208008, "learning_rate": 6.757223677246638e-07, "loss": 0.4029, "step": 16818 }, { "epoch": 0.8343171784314698, "grad_norm": 7.446327209472656, "learning_rate": 6.753272855216669e-07, "loss": 0.2795, "step": 16819 }, { "epoch": 0.83436678406667, "grad_norm": 7.152336120605469, "learning_rate": 6.749323104860156e-07, "loss": 0.2908, "step": 16820 }, { "epoch": 0.8344163897018702, "grad_norm": 4.5261712074279785, "learning_rate": 6.745374426274987e-07, "loss": 0.2416, "step": 16821 }, { "epoch": 0.8344659953370703, "grad_norm": 6.541309356689453, "learning_rate": 6.741426819559016e-07, "loss": 0.2074, "step": 16822 }, { "epoch": 0.8345156009722704, "grad_norm": 6.812590599060059, "learning_rate": 6.737480284810033e-07, "loss": 0.2625, "step": 16823 }, { "epoch": 0.8345652066074706, "grad_norm": 6.049990177154541, "learning_rate": 6.733534822125876e-07, "loss": 0.2415, "step": 16824 }, { "epoch": 0.8346148122426708, "grad_norm": 9.115819931030273, "learning_rate": 6.729590431604293e-07, "loss": 0.2678, "step": 16825 }, { "epoch": 0.8346644178778709, "grad_norm": 7.219510555267334, "learning_rate": 6.725647113343031e-07, "loss": 0.31, "step": 16826 }, { "epoch": 0.8347140235130711, "grad_norm": 7.339024543762207, "learning_rate": 6.721704867439805e-07, "loss": 0.2722, "step": 16827 }, { "epoch": 0.8347636291482713, "grad_norm": 6.653574466705322, "learning_rate": 6.717763693992324e-07, "loss": 0.3124, "step": 16828 }, { "epoch": 0.8348132347834714, "grad_norm": 5.329239845275879, "learning_rate": 6.713823593098218e-07, "loss": 0.275, "step": 16829 }, { "epoch": 0.8348628404186715, "grad_norm": 5.006302833557129, "learning_rate": 6.709884564855156e-07, "loss": 0.3085, "step": 16830 }, { "epoch": 0.8349124460538717, "grad_norm": 10.84361457824707, "learning_rate": 6.705946609360725e-07, "loss": 0.3216, "step": 16831 }, { "epoch": 0.8349620516890719, "grad_norm": 10.585186958312988, "learning_rate": 6.702009726712538e-07, "loss": 0.278, "step": 16832 }, { "epoch": 0.8350116573242721, "grad_norm": 10.774072647094727, "learning_rate": 6.698073917008125e-07, "loss": 0.3175, "step": 16833 }, { "epoch": 0.8350612629594723, "grad_norm": 5.178319454193115, "learning_rate": 6.694139180345032e-07, "loss": 0.3053, "step": 16834 }, { "epoch": 0.8351108685946723, "grad_norm": 7.501613616943359, "learning_rate": 6.690205516820753e-07, "loss": 0.3219, "step": 16835 }, { "epoch": 0.8351604742298725, "grad_norm": 15.259232521057129, "learning_rate": 6.686272926532789e-07, "loss": 0.4723, "step": 16836 }, { "epoch": 0.8352100798650727, "grad_norm": 5.432710647583008, "learning_rate": 6.682341409578552e-07, "loss": 0.2699, "step": 16837 }, { "epoch": 0.8352596855002729, "grad_norm": 5.620805263519287, "learning_rate": 6.678410966055509e-07, "loss": 0.2982, "step": 16838 }, { "epoch": 0.835309291135473, "grad_norm": 7.145174503326416, "learning_rate": 6.674481596061027e-07, "loss": 0.219, "step": 16839 }, { "epoch": 0.8353588967706731, "grad_norm": 6.476187705993652, "learning_rate": 6.670553299692495e-07, "loss": 0.2516, "step": 16840 }, { "epoch": 0.8354085024058733, "grad_norm": 8.947511672973633, "learning_rate": 6.666626077047244e-07, "loss": 0.2956, "step": 16841 }, { "epoch": 0.8354581080410735, "grad_norm": 11.134811401367188, "learning_rate": 6.662699928222615e-07, "loss": 0.3356, "step": 16842 }, { "epoch": 0.8355077136762736, "grad_norm": 6.297872066497803, "learning_rate": 6.658774853315869e-07, "loss": 0.2823, "step": 16843 }, { "epoch": 0.8355573193114738, "grad_norm": 6.1600871086120605, "learning_rate": 6.654850852424305e-07, "loss": 0.3888, "step": 16844 }, { "epoch": 0.835606924946674, "grad_norm": 9.689661979675293, "learning_rate": 6.650927925645135e-07, "loss": 0.3458, "step": 16845 }, { "epoch": 0.8356565305818741, "grad_norm": 5.018188953399658, "learning_rate": 6.647006073075574e-07, "loss": 0.311, "step": 16846 }, { "epoch": 0.8357061362170742, "grad_norm": 12.932604789733887, "learning_rate": 6.643085294812817e-07, "loss": 0.2703, "step": 16847 }, { "epoch": 0.8357557418522744, "grad_norm": 4.835220813751221, "learning_rate": 6.639165590954022e-07, "loss": 0.2898, "step": 16848 }, { "epoch": 0.8358053474874746, "grad_norm": 5.742145538330078, "learning_rate": 6.635246961596314e-07, "loss": 0.3082, "step": 16849 }, { "epoch": 0.8358549531226748, "grad_norm": 4.525119304656982, "learning_rate": 6.631329406836812e-07, "loss": 0.29, "step": 16850 }, { "epoch": 0.835904558757875, "grad_norm": 7.9931111335754395, "learning_rate": 6.627412926772564e-07, "loss": 0.2245, "step": 16851 }, { "epoch": 0.835954164393075, "grad_norm": 7.684970855712891, "learning_rate": 6.623497521500672e-07, "loss": 0.3207, "step": 16852 }, { "epoch": 0.8360037700282752, "grad_norm": 6.8302435874938965, "learning_rate": 6.619583191118112e-07, "loss": 0.2466, "step": 16853 }, { "epoch": 0.8360533756634754, "grad_norm": 6.601719856262207, "learning_rate": 6.615669935721908e-07, "loss": 0.3186, "step": 16854 }, { "epoch": 0.8361029812986756, "grad_norm": 9.10964298248291, "learning_rate": 6.611757755409027e-07, "loss": 0.2062, "step": 16855 }, { "epoch": 0.8361525869338757, "grad_norm": 3.8534958362579346, "learning_rate": 6.607846650276417e-07, "loss": 0.2726, "step": 16856 }, { "epoch": 0.8362021925690758, "grad_norm": 4.632150173187256, "learning_rate": 6.603936620420998e-07, "loss": 0.195, "step": 16857 }, { "epoch": 0.836251798204276, "grad_norm": 5.194397449493408, "learning_rate": 6.600027665939662e-07, "loss": 0.1998, "step": 16858 }, { "epoch": 0.8363014038394762, "grad_norm": 4.130980491638184, "learning_rate": 6.596119786929267e-07, "loss": 0.2129, "step": 16859 }, { "epoch": 0.8363510094746763, "grad_norm": 9.562078475952148, "learning_rate": 6.592212983486662e-07, "loss": 0.3127, "step": 16860 }, { "epoch": 0.8364006151098765, "grad_norm": 9.721393585205078, "learning_rate": 6.588307255708649e-07, "loss": 0.2878, "step": 16861 }, { "epoch": 0.8364502207450767, "grad_norm": 8.030320167541504, "learning_rate": 6.584402603692019e-07, "loss": 0.2957, "step": 16862 }, { "epoch": 0.8364998263802768, "grad_norm": 4.383976936340332, "learning_rate": 6.580499027533533e-07, "loss": 0.2319, "step": 16863 }, { "epoch": 0.8365494320154769, "grad_norm": 4.40280818939209, "learning_rate": 6.576596527329921e-07, "loss": 0.2126, "step": 16864 }, { "epoch": 0.8365990376506771, "grad_norm": 7.759068489074707, "learning_rate": 6.572695103177901e-07, "loss": 0.2872, "step": 16865 }, { "epoch": 0.8366486432858773, "grad_norm": 7.257604122161865, "learning_rate": 6.568794755174119e-07, "loss": 0.2873, "step": 16866 }, { "epoch": 0.8366982489210775, "grad_norm": 9.518985748291016, "learning_rate": 6.564895483415274e-07, "loss": 0.3744, "step": 16867 }, { "epoch": 0.8367478545562776, "grad_norm": 7.197405815124512, "learning_rate": 6.56099728799795e-07, "loss": 0.2887, "step": 16868 }, { "epoch": 0.8367974601914777, "grad_norm": 6.958996295928955, "learning_rate": 6.557100169018766e-07, "loss": 0.2829, "step": 16869 }, { "epoch": 0.8368470658266779, "grad_norm": 4.964828968048096, "learning_rate": 6.553204126574292e-07, "loss": 0.249, "step": 16870 }, { "epoch": 0.8368966714618781, "grad_norm": 8.453819274902344, "learning_rate": 6.54930916076107e-07, "loss": 0.3214, "step": 16871 }, { "epoch": 0.8369462770970783, "grad_norm": 6.119584560394287, "learning_rate": 6.545415271675626e-07, "loss": 0.2079, "step": 16872 }, { "epoch": 0.8369958827322784, "grad_norm": 6.198323726654053, "learning_rate": 6.541522459414457e-07, "loss": 0.24, "step": 16873 }, { "epoch": 0.8370454883674785, "grad_norm": 4.464644908905029, "learning_rate": 6.537630724073995e-07, "loss": 0.2206, "step": 16874 }, { "epoch": 0.8370950940026787, "grad_norm": 4.4883713722229, "learning_rate": 6.533740065750727e-07, "loss": 0.2219, "step": 16875 }, { "epoch": 0.8371446996378789, "grad_norm": 5.405082702636719, "learning_rate": 6.529850484541034e-07, "loss": 0.2229, "step": 16876 }, { "epoch": 0.837194305273079, "grad_norm": 5.884715557098389, "learning_rate": 6.525961980541307e-07, "loss": 0.2925, "step": 16877 }, { "epoch": 0.8372439109082792, "grad_norm": 8.6102294921875, "learning_rate": 6.522074553847912e-07, "loss": 0.3127, "step": 16878 }, { "epoch": 0.8372935165434794, "grad_norm": 3.8057861328125, "learning_rate": 6.518188204557179e-07, "loss": 0.1738, "step": 16879 }, { "epoch": 0.8373431221786795, "grad_norm": 4.256016254425049, "learning_rate": 6.514302932765399e-07, "loss": 0.1797, "step": 16880 }, { "epoch": 0.8373927278138796, "grad_norm": 10.277345657348633, "learning_rate": 6.51041873856888e-07, "loss": 0.2356, "step": 16881 }, { "epoch": 0.8374423334490798, "grad_norm": 7.208406448364258, "learning_rate": 6.506535622063847e-07, "loss": 0.3194, "step": 16882 }, { "epoch": 0.83749193908428, "grad_norm": 8.226951599121094, "learning_rate": 6.502653583346536e-07, "loss": 0.2733, "step": 16883 }, { "epoch": 0.8375415447194802, "grad_norm": 8.618010520935059, "learning_rate": 6.498772622513144e-07, "loss": 0.3434, "step": 16884 }, { "epoch": 0.8375911503546803, "grad_norm": 12.426565170288086, "learning_rate": 6.494892739659847e-07, "loss": 0.4111, "step": 16885 }, { "epoch": 0.8376407559898804, "grad_norm": 5.222794055938721, "learning_rate": 6.491013934882784e-07, "loss": 0.2758, "step": 16886 }, { "epoch": 0.8376903616250806, "grad_norm": 4.479767799377441, "learning_rate": 6.487136208278089e-07, "loss": 0.247, "step": 16887 }, { "epoch": 0.8377399672602808, "grad_norm": 9.507922172546387, "learning_rate": 6.483259559941818e-07, "loss": 0.2406, "step": 16888 }, { "epoch": 0.837789572895481, "grad_norm": 8.569005012512207, "learning_rate": 6.479383989970084e-07, "loss": 0.2735, "step": 16889 }, { "epoch": 0.8378391785306811, "grad_norm": 6.445131301879883, "learning_rate": 6.475509498458892e-07, "loss": 0.3319, "step": 16890 }, { "epoch": 0.8378887841658812, "grad_norm": 5.509575366973877, "learning_rate": 6.471636085504263e-07, "loss": 0.2453, "step": 16891 }, { "epoch": 0.8379383898010814, "grad_norm": 5.287723064422607, "learning_rate": 6.467763751202177e-07, "loss": 0.238, "step": 16892 }, { "epoch": 0.8379879954362816, "grad_norm": 10.118480682373047, "learning_rate": 6.46389249564861e-07, "loss": 0.3144, "step": 16893 }, { "epoch": 0.8380376010714817, "grad_norm": 6.277280807495117, "learning_rate": 6.460022318939457e-07, "loss": 0.3276, "step": 16894 }, { "epoch": 0.8380872067066819, "grad_norm": 5.687836170196533, "learning_rate": 6.456153221170675e-07, "loss": 0.1937, "step": 16895 }, { "epoch": 0.8381368123418821, "grad_norm": 5.237370491027832, "learning_rate": 6.452285202438096e-07, "loss": 0.2658, "step": 16896 }, { "epoch": 0.8381864179770822, "grad_norm": 11.722803115844727, "learning_rate": 6.448418262837592e-07, "loss": 0.35, "step": 16897 }, { "epoch": 0.8382360236122823, "grad_norm": 8.967571258544922, "learning_rate": 6.444552402464982e-07, "loss": 0.3799, "step": 16898 }, { "epoch": 0.8382856292474825, "grad_norm": 6.504181385040283, "learning_rate": 6.440687621416075e-07, "loss": 0.2072, "step": 16899 }, { "epoch": 0.8383352348826827, "grad_norm": 6.516653537750244, "learning_rate": 6.436823919786611e-07, "loss": 0.2695, "step": 16900 }, { "epoch": 0.8383848405178829, "grad_norm": 9.572186470031738, "learning_rate": 6.43296129767238e-07, "loss": 0.2849, "step": 16901 }, { "epoch": 0.8384344461530829, "grad_norm": 4.267997741699219, "learning_rate": 6.429099755169054e-07, "loss": 0.1785, "step": 16902 }, { "epoch": 0.8384840517882831, "grad_norm": 8.883447647094727, "learning_rate": 6.425239292372365e-07, "loss": 0.2995, "step": 16903 }, { "epoch": 0.8385336574234833, "grad_norm": 11.96962833404541, "learning_rate": 6.421379909377951e-07, "loss": 0.4071, "step": 16904 }, { "epoch": 0.8385832630586835, "grad_norm": 10.629321098327637, "learning_rate": 6.417521606281452e-07, "loss": 0.3177, "step": 16905 }, { "epoch": 0.8386328686938836, "grad_norm": 4.180134296417236, "learning_rate": 6.413664383178481e-07, "loss": 0.1663, "step": 16906 }, { "epoch": 0.8386824743290838, "grad_norm": 5.89580774307251, "learning_rate": 6.409808240164639e-07, "loss": 0.2477, "step": 16907 }, { "epoch": 0.8387320799642839, "grad_norm": 7.786810874938965, "learning_rate": 6.405953177335439e-07, "loss": 0.2685, "step": 16908 }, { "epoch": 0.8387816855994841, "grad_norm": 7.673425197601318, "learning_rate": 6.402099194786466e-07, "loss": 0.2696, "step": 16909 }, { "epoch": 0.8388312912346843, "grad_norm": 7.040525436401367, "learning_rate": 6.39824629261318e-07, "loss": 0.2395, "step": 16910 }, { "epoch": 0.8388808968698844, "grad_norm": 5.132191181182861, "learning_rate": 6.394394470911075e-07, "loss": 0.2406, "step": 16911 }, { "epoch": 0.8389305025050846, "grad_norm": 9.46186637878418, "learning_rate": 6.390543729775605e-07, "loss": 0.3182, "step": 16912 }, { "epoch": 0.8389801081402848, "grad_norm": 7.500753879547119, "learning_rate": 6.386694069302185e-07, "loss": 0.2384, "step": 16913 }, { "epoch": 0.8390297137754849, "grad_norm": 4.312313556671143, "learning_rate": 6.382845489586214e-07, "loss": 0.2568, "step": 16914 }, { "epoch": 0.839079319410685, "grad_norm": 10.423419952392578, "learning_rate": 6.378997990723063e-07, "loss": 0.271, "step": 16915 }, { "epoch": 0.8391289250458852, "grad_norm": 3.990480899810791, "learning_rate": 6.37515157280808e-07, "loss": 0.2378, "step": 16916 }, { "epoch": 0.8391785306810854, "grad_norm": 8.11054515838623, "learning_rate": 6.371306235936569e-07, "loss": 0.3228, "step": 16917 }, { "epoch": 0.8392281363162856, "grad_norm": 5.5699782371521, "learning_rate": 6.367461980203826e-07, "loss": 0.2164, "step": 16918 }, { "epoch": 0.8392777419514856, "grad_norm": 5.900259017944336, "learning_rate": 6.363618805705107e-07, "loss": 0.3194, "step": 16919 }, { "epoch": 0.8393273475866858, "grad_norm": 6.436413764953613, "learning_rate": 6.359776712535654e-07, "loss": 0.3326, "step": 16920 }, { "epoch": 0.839376953221886, "grad_norm": 7.759904384613037, "learning_rate": 6.355935700790677e-07, "loss": 0.2623, "step": 16921 }, { "epoch": 0.8394265588570862, "grad_norm": 5.870636463165283, "learning_rate": 6.352095770565358e-07, "loss": 0.1217, "step": 16922 }, { "epoch": 0.8394761644922863, "grad_norm": 4.945733547210693, "learning_rate": 6.348256921954843e-07, "loss": 0.2546, "step": 16923 }, { "epoch": 0.8395257701274865, "grad_norm": 10.539031982421875, "learning_rate": 6.344419155054282e-07, "loss": 0.326, "step": 16924 }, { "epoch": 0.8395753757626866, "grad_norm": 5.3817620277404785, "learning_rate": 6.340582469958745e-07, "loss": 0.2796, "step": 16925 }, { "epoch": 0.8396249813978868, "grad_norm": 4.229772090911865, "learning_rate": 6.336746866763338e-07, "loss": 0.2188, "step": 16926 }, { "epoch": 0.839674587033087, "grad_norm": 3.425105094909668, "learning_rate": 6.332912345563086e-07, "loss": 0.1823, "step": 16927 }, { "epoch": 0.8397241926682871, "grad_norm": 5.663559436798096, "learning_rate": 6.329078906453018e-07, "loss": 0.3195, "step": 16928 }, { "epoch": 0.8397737983034873, "grad_norm": 7.898871898651123, "learning_rate": 6.32524654952813e-07, "loss": 0.3722, "step": 16929 }, { "epoch": 0.8398234039386875, "grad_norm": 7.743161201477051, "learning_rate": 6.321415274883403e-07, "loss": 0.3166, "step": 16930 }, { "epoch": 0.8398730095738876, "grad_norm": 6.468167781829834, "learning_rate": 6.317585082613736e-07, "loss": 0.2737, "step": 16931 }, { "epoch": 0.8399226152090877, "grad_norm": 9.124873161315918, "learning_rate": 6.313755972814095e-07, "loss": 0.2588, "step": 16932 }, { "epoch": 0.8399722208442879, "grad_norm": 6.653195381164551, "learning_rate": 6.309927945579336e-07, "loss": 0.2297, "step": 16933 }, { "epoch": 0.8400218264794881, "grad_norm": 3.6497576236724854, "learning_rate": 6.306101001004317e-07, "loss": 0.178, "step": 16934 }, { "epoch": 0.8400714321146883, "grad_norm": 9.807635307312012, "learning_rate": 6.302275139183889e-07, "loss": 0.2412, "step": 16935 }, { "epoch": 0.8401210377498883, "grad_norm": 3.8113036155700684, "learning_rate": 6.298450360212854e-07, "loss": 0.1619, "step": 16936 }, { "epoch": 0.8401706433850885, "grad_norm": 15.123814582824707, "learning_rate": 6.294626664185965e-07, "loss": 0.352, "step": 16937 }, { "epoch": 0.8402202490202887, "grad_norm": 6.774657249450684, "learning_rate": 6.290804051198024e-07, "loss": 0.2087, "step": 16938 }, { "epoch": 0.8402698546554889, "grad_norm": 6.080682277679443, "learning_rate": 6.286982521343704e-07, "loss": 0.2808, "step": 16939 }, { "epoch": 0.840319460290689, "grad_norm": 11.903654098510742, "learning_rate": 6.283162074717758e-07, "loss": 0.4088, "step": 16940 }, { "epoch": 0.8403690659258892, "grad_norm": 6.433358192443848, "learning_rate": 6.279342711414816e-07, "loss": 0.3694, "step": 16941 }, { "epoch": 0.8404186715610893, "grad_norm": 10.310050010681152, "learning_rate": 6.275524431529539e-07, "loss": 0.2533, "step": 16942 }, { "epoch": 0.8404682771962895, "grad_norm": 6.608763694763184, "learning_rate": 6.271707235156549e-07, "loss": 0.2259, "step": 16943 }, { "epoch": 0.8405178828314896, "grad_norm": 10.812410354614258, "learning_rate": 6.26789112239044e-07, "loss": 0.3409, "step": 16944 }, { "epoch": 0.8405674884666898, "grad_norm": 8.427349090576172, "learning_rate": 6.264076093325749e-07, "loss": 0.3785, "step": 16945 }, { "epoch": 0.84061709410189, "grad_norm": 9.10367488861084, "learning_rate": 6.260262148057061e-07, "loss": 0.353, "step": 16946 }, { "epoch": 0.8406666997370902, "grad_norm": 8.60661506652832, "learning_rate": 6.256449286678851e-07, "loss": 0.3368, "step": 16947 }, { "epoch": 0.8407163053722903, "grad_norm": 6.5172905921936035, "learning_rate": 6.252637509285614e-07, "loss": 0.3171, "step": 16948 }, { "epoch": 0.8407659110074904, "grad_norm": 8.355107307434082, "learning_rate": 6.248826815971814e-07, "loss": 0.2774, "step": 16949 }, { "epoch": 0.8408155166426906, "grad_norm": 4.366506099700928, "learning_rate": 6.245017206831883e-07, "loss": 0.216, "step": 16950 }, { "epoch": 0.8408651222778908, "grad_norm": 3.995732307434082, "learning_rate": 6.241208681960193e-07, "loss": 0.2341, "step": 16951 }, { "epoch": 0.840914727913091, "grad_norm": 10.873483657836914, "learning_rate": 6.237401241451174e-07, "loss": 0.3665, "step": 16952 }, { "epoch": 0.840964333548291, "grad_norm": 6.812661170959473, "learning_rate": 6.233594885399136e-07, "loss": 0.2333, "step": 16953 }, { "epoch": 0.8410139391834912, "grad_norm": 6.560635089874268, "learning_rate": 6.229789613898412e-07, "loss": 0.3134, "step": 16954 }, { "epoch": 0.8410635448186914, "grad_norm": 12.477943420410156, "learning_rate": 6.225985427043302e-07, "loss": 0.3447, "step": 16955 }, { "epoch": 0.8411131504538916, "grad_norm": 5.25577449798584, "learning_rate": 6.222182324928072e-07, "loss": 0.2699, "step": 16956 }, { "epoch": 0.8411627560890917, "grad_norm": 9.199825286865234, "learning_rate": 6.21838030764697e-07, "loss": 0.3496, "step": 16957 }, { "epoch": 0.8412123617242919, "grad_norm": 8.16813850402832, "learning_rate": 6.214579375294216e-07, "loss": 0.2991, "step": 16958 }, { "epoch": 0.841261967359492, "grad_norm": 6.988177299499512, "learning_rate": 6.210779527963972e-07, "loss": 0.2211, "step": 16959 }, { "epoch": 0.8413115729946922, "grad_norm": 8.285245895385742, "learning_rate": 6.206980765750443e-07, "loss": 0.3123, "step": 16960 }, { "epoch": 0.8413611786298923, "grad_norm": 21.290254592895508, "learning_rate": 6.203183088747727e-07, "loss": 0.3698, "step": 16961 }, { "epoch": 0.8414107842650925, "grad_norm": 5.747694969177246, "learning_rate": 6.199386497049947e-07, "loss": 0.2761, "step": 16962 }, { "epoch": 0.8414603899002927, "grad_norm": 11.207366943359375, "learning_rate": 6.195590990751182e-07, "loss": 0.2924, "step": 16963 }, { "epoch": 0.8415099955354929, "grad_norm": 10.736361503601074, "learning_rate": 6.191796569945485e-07, "loss": 0.27, "step": 16964 }, { "epoch": 0.841559601170693, "grad_norm": 4.874240875244141, "learning_rate": 6.188003234726892e-07, "loss": 0.3204, "step": 16965 }, { "epoch": 0.8416092068058931, "grad_norm": 8.034700393676758, "learning_rate": 6.184210985189399e-07, "loss": 0.3314, "step": 16966 }, { "epoch": 0.8416588124410933, "grad_norm": 16.503461837768555, "learning_rate": 6.180419821426975e-07, "loss": 0.3597, "step": 16967 }, { "epoch": 0.8417084180762935, "grad_norm": 5.317646026611328, "learning_rate": 6.176629743533563e-07, "loss": 0.2928, "step": 16968 }, { "epoch": 0.8417580237114937, "grad_norm": 4.952462673187256, "learning_rate": 6.172840751603093e-07, "loss": 0.1808, "step": 16969 }, { "epoch": 0.8418076293466937, "grad_norm": 5.201773166656494, "learning_rate": 6.169052845729457e-07, "loss": 0.3056, "step": 16970 }, { "epoch": 0.8418572349818939, "grad_norm": 6.746832370758057, "learning_rate": 6.165266026006512e-07, "loss": 0.3413, "step": 16971 }, { "epoch": 0.8419068406170941, "grad_norm": 9.963760375976562, "learning_rate": 6.16148029252811e-07, "loss": 0.2867, "step": 16972 }, { "epoch": 0.8419564462522943, "grad_norm": 12.32260799407959, "learning_rate": 6.157695645388051e-07, "loss": 0.3062, "step": 16973 }, { "epoch": 0.8420060518874944, "grad_norm": 6.096014976501465, "learning_rate": 6.153912084680131e-07, "loss": 0.3358, "step": 16974 }, { "epoch": 0.8420556575226946, "grad_norm": 11.555359840393066, "learning_rate": 6.150129610498112e-07, "loss": 0.2402, "step": 16975 }, { "epoch": 0.8421052631578947, "grad_norm": 5.992761611938477, "learning_rate": 6.146348222935705e-07, "loss": 0.2667, "step": 16976 }, { "epoch": 0.8421548687930949, "grad_norm": 5.945949554443359, "learning_rate": 6.142567922086623e-07, "loss": 0.2632, "step": 16977 }, { "epoch": 0.842204474428295, "grad_norm": 6.381432056427002, "learning_rate": 6.138788708044552e-07, "loss": 0.2143, "step": 16978 }, { "epoch": 0.8422540800634952, "grad_norm": 10.84124755859375, "learning_rate": 6.135010580903139e-07, "loss": 0.235, "step": 16979 }, { "epoch": 0.8423036856986954, "grad_norm": 9.748680114746094, "learning_rate": 6.131233540755998e-07, "loss": 0.3149, "step": 16980 }, { "epoch": 0.8423532913338956, "grad_norm": 7.355239391326904, "learning_rate": 6.127457587696751e-07, "loss": 0.2549, "step": 16981 }, { "epoch": 0.8424028969690956, "grad_norm": 6.145259857177734, "learning_rate": 6.12368272181893e-07, "loss": 0.1855, "step": 16982 }, { "epoch": 0.8424525026042958, "grad_norm": 5.930918216705322, "learning_rate": 6.119908943216119e-07, "loss": 0.2805, "step": 16983 }, { "epoch": 0.842502108239496, "grad_norm": 9.156641006469727, "learning_rate": 6.116136251981796e-07, "loss": 0.2843, "step": 16984 }, { "epoch": 0.8425517138746962, "grad_norm": 5.454034328460693, "learning_rate": 6.112364648209473e-07, "loss": 0.3394, "step": 16985 }, { "epoch": 0.8426013195098964, "grad_norm": 6.9781060218811035, "learning_rate": 6.108594131992607e-07, "loss": 0.2438, "step": 16986 }, { "epoch": 0.8426509251450964, "grad_norm": 9.311699867248535, "learning_rate": 6.104824703424645e-07, "loss": 0.4169, "step": 16987 }, { "epoch": 0.8427005307802966, "grad_norm": 6.073285102844238, "learning_rate": 6.101056362598962e-07, "loss": 0.2679, "step": 16988 }, { "epoch": 0.8427501364154968, "grad_norm": 7.125565528869629, "learning_rate": 6.097289109608978e-07, "loss": 0.3116, "step": 16989 }, { "epoch": 0.842799742050697, "grad_norm": 6.033172607421875, "learning_rate": 6.09352294454802e-07, "loss": 0.2721, "step": 16990 }, { "epoch": 0.8428493476858971, "grad_norm": 4.357297420501709, "learning_rate": 6.089757867509422e-07, "loss": 0.28, "step": 16991 }, { "epoch": 0.8428989533210973, "grad_norm": 6.577086448669434, "learning_rate": 6.085993878586488e-07, "loss": 0.3191, "step": 16992 }, { "epoch": 0.8429485589562974, "grad_norm": 5.6396164894104, "learning_rate": 6.082230977872488e-07, "loss": 0.2602, "step": 16993 }, { "epoch": 0.8429981645914976, "grad_norm": 8.050936698913574, "learning_rate": 6.078469165460676e-07, "loss": 0.2957, "step": 16994 }, { "epoch": 0.8430477702266977, "grad_norm": 6.576452732086182, "learning_rate": 6.074708441444272e-07, "loss": 0.2205, "step": 16995 }, { "epoch": 0.8430973758618979, "grad_norm": 8.640097618103027, "learning_rate": 6.070948805916443e-07, "loss": 0.2461, "step": 16996 }, { "epoch": 0.8431469814970981, "grad_norm": 7.375662803649902, "learning_rate": 6.067190258970396e-07, "loss": 0.2333, "step": 16997 }, { "epoch": 0.8431965871322983, "grad_norm": 5.262808799743652, "learning_rate": 6.063432800699237e-07, "loss": 0.3372, "step": 16998 }, { "epoch": 0.8432461927674983, "grad_norm": 5.254796504974365, "learning_rate": 6.059676431196087e-07, "loss": 0.2558, "step": 16999 }, { "epoch": 0.8432957984026985, "grad_norm": 4.568641185760498, "learning_rate": 6.055921150554028e-07, "loss": 0.2557, "step": 17000 }, { "epoch": 0.8433454040378987, "grad_norm": 7.453451633453369, "learning_rate": 6.05216695886614e-07, "loss": 0.2297, "step": 17001 }, { "epoch": 0.8433950096730989, "grad_norm": 13.220282554626465, "learning_rate": 6.048413856225404e-07, "loss": 0.3759, "step": 17002 }, { "epoch": 0.8434446153082991, "grad_norm": 5.435853958129883, "learning_rate": 6.04466184272488e-07, "loss": 0.2219, "step": 17003 }, { "epoch": 0.8434942209434991, "grad_norm": 7.838186264038086, "learning_rate": 6.04091091845751e-07, "loss": 0.2883, "step": 17004 }, { "epoch": 0.8435438265786993, "grad_norm": 5.326371669769287, "learning_rate": 6.037161083516247e-07, "loss": 0.1445, "step": 17005 }, { "epoch": 0.8435934322138995, "grad_norm": 12.227433204650879, "learning_rate": 6.033412337994021e-07, "loss": 0.3353, "step": 17006 }, { "epoch": 0.8436430378490997, "grad_norm": 7.570695400238037, "learning_rate": 6.029664681983737e-07, "loss": 0.2744, "step": 17007 }, { "epoch": 0.8436926434842998, "grad_norm": 8.974329948425293, "learning_rate": 6.025918115578233e-07, "loss": 0.3795, "step": 17008 }, { "epoch": 0.8437422491195, "grad_norm": 8.464073181152344, "learning_rate": 6.022172638870388e-07, "loss": 0.2613, "step": 17009 }, { "epoch": 0.8437918547547001, "grad_norm": 8.6527738571167, "learning_rate": 6.018428251952979e-07, "loss": 0.3866, "step": 17010 }, { "epoch": 0.8438414603899003, "grad_norm": 5.8680419921875, "learning_rate": 6.014684954918831e-07, "loss": 0.264, "step": 17011 }, { "epoch": 0.8438910660251004, "grad_norm": 7.889405250549316, "learning_rate": 6.010942747860677e-07, "loss": 0.3681, "step": 17012 }, { "epoch": 0.8439406716603006, "grad_norm": 7.714733600616455, "learning_rate": 6.007201630871262e-07, "loss": 0.273, "step": 17013 }, { "epoch": 0.8439902772955008, "grad_norm": 14.094736099243164, "learning_rate": 6.003461604043292e-07, "loss": 0.4009, "step": 17014 }, { "epoch": 0.844039882930701, "grad_norm": 8.630135536193848, "learning_rate": 5.999722667469448e-07, "loss": 0.2456, "step": 17015 }, { "epoch": 0.844089488565901, "grad_norm": 9.478797912597656, "learning_rate": 5.995984821242368e-07, "loss": 0.3716, "step": 17016 }, { "epoch": 0.8441390942011012, "grad_norm": 6.183563232421875, "learning_rate": 5.992248065454704e-07, "loss": 0.2759, "step": 17017 }, { "epoch": 0.8441886998363014, "grad_norm": 11.169540405273438, "learning_rate": 5.988512400199031e-07, "loss": 0.3678, "step": 17018 }, { "epoch": 0.8442383054715016, "grad_norm": 6.68020486831665, "learning_rate": 5.984777825567933e-07, "loss": 0.2729, "step": 17019 }, { "epoch": 0.8442879111067018, "grad_norm": 7.309153079986572, "learning_rate": 5.981044341653947e-07, "loss": 0.2661, "step": 17020 }, { "epoch": 0.8443375167419018, "grad_norm": 11.465194702148438, "learning_rate": 5.977311948549597e-07, "loss": 0.4962, "step": 17021 }, { "epoch": 0.844387122377102, "grad_norm": 3.4945826530456543, "learning_rate": 5.973580646347365e-07, "loss": 0.1864, "step": 17022 }, { "epoch": 0.8444367280123022, "grad_norm": 9.999767303466797, "learning_rate": 5.969850435139723e-07, "loss": 0.3442, "step": 17023 }, { "epoch": 0.8444863336475024, "grad_norm": 6.252696990966797, "learning_rate": 5.966121315019119e-07, "loss": 0.3225, "step": 17024 }, { "epoch": 0.8445359392827025, "grad_norm": 5.534495830535889, "learning_rate": 5.962393286077933e-07, "loss": 0.1803, "step": 17025 }, { "epoch": 0.8445855449179027, "grad_norm": 4.101344108581543, "learning_rate": 5.958666348408559e-07, "loss": 0.236, "step": 17026 }, { "epoch": 0.8446351505531028, "grad_norm": 7.630739212036133, "learning_rate": 5.954940502103362e-07, "loss": 0.4061, "step": 17027 }, { "epoch": 0.844684756188303, "grad_norm": 8.667587280273438, "learning_rate": 5.951215747254658e-07, "loss": 0.3475, "step": 17028 }, { "epoch": 0.8447343618235031, "grad_norm": 4.751222133636475, "learning_rate": 5.947492083954753e-07, "loss": 0.2403, "step": 17029 }, { "epoch": 0.8447839674587033, "grad_norm": 5.0957465171813965, "learning_rate": 5.943769512295922e-07, "loss": 0.2606, "step": 17030 }, { "epoch": 0.8448335730939035, "grad_norm": 8.847123146057129, "learning_rate": 5.94004803237041e-07, "loss": 0.2114, "step": 17031 }, { "epoch": 0.8448831787291037, "grad_norm": 4.891839504241943, "learning_rate": 5.936327644270451e-07, "loss": 0.2224, "step": 17032 }, { "epoch": 0.8449327843643037, "grad_norm": 6.33991813659668, "learning_rate": 5.932608348088215e-07, "loss": 0.2293, "step": 17033 }, { "epoch": 0.8449823899995039, "grad_norm": 9.790157318115234, "learning_rate": 5.928890143915877e-07, "loss": 0.3293, "step": 17034 }, { "epoch": 0.8450319956347041, "grad_norm": 7.8209357261657715, "learning_rate": 5.925173031845577e-07, "loss": 0.2743, "step": 17035 }, { "epoch": 0.8450816012699043, "grad_norm": 7.435624122619629, "learning_rate": 5.921457011969428e-07, "loss": 0.2922, "step": 17036 }, { "epoch": 0.8451312069051045, "grad_norm": 5.8227763175964355, "learning_rate": 5.917742084379513e-07, "loss": 0.2237, "step": 17037 }, { "epoch": 0.8451808125403045, "grad_norm": 5.01591682434082, "learning_rate": 5.914028249167896e-07, "loss": 0.2714, "step": 17038 }, { "epoch": 0.8452304181755047, "grad_norm": 5.09599494934082, "learning_rate": 5.910315506426584e-07, "loss": 0.3271, "step": 17039 }, { "epoch": 0.8452800238107049, "grad_norm": 5.476105690002441, "learning_rate": 5.906603856247616e-07, "loss": 0.2839, "step": 17040 }, { "epoch": 0.8453296294459051, "grad_norm": 6.567257404327393, "learning_rate": 5.902893298722945e-07, "loss": 0.2371, "step": 17041 }, { "epoch": 0.8453792350811052, "grad_norm": 9.496736526489258, "learning_rate": 5.899183833944522e-07, "loss": 0.3354, "step": 17042 }, { "epoch": 0.8454288407163054, "grad_norm": 9.680105209350586, "learning_rate": 5.895475462004269e-07, "loss": 0.3272, "step": 17043 }, { "epoch": 0.8454784463515055, "grad_norm": 6.357840538024902, "learning_rate": 5.891768182994101e-07, "loss": 0.277, "step": 17044 }, { "epoch": 0.8455280519867057, "grad_norm": 8.980484008789062, "learning_rate": 5.888061997005845e-07, "loss": 0.2758, "step": 17045 }, { "epoch": 0.8455776576219058, "grad_norm": 7.006725788116455, "learning_rate": 5.884356904131389e-07, "loss": 0.2308, "step": 17046 }, { "epoch": 0.845627263257106, "grad_norm": 7.650789260864258, "learning_rate": 5.880652904462503e-07, "loss": 0.294, "step": 17047 }, { "epoch": 0.8456768688923062, "grad_norm": 4.861139297485352, "learning_rate": 5.876949998091014e-07, "loss": 0.2682, "step": 17048 }, { "epoch": 0.8457264745275064, "grad_norm": 6.071254253387451, "learning_rate": 5.87324818510866e-07, "loss": 0.1905, "step": 17049 }, { "epoch": 0.8457760801627064, "grad_norm": 7.795627117156982, "learning_rate": 5.869547465607168e-07, "loss": 0.3288, "step": 17050 }, { "epoch": 0.8458256857979066, "grad_norm": 5.205495357513428, "learning_rate": 5.865847839678257e-07, "loss": 0.2655, "step": 17051 }, { "epoch": 0.8458752914331068, "grad_norm": 7.874948501586914, "learning_rate": 5.862149307413612e-07, "loss": 0.3739, "step": 17052 }, { "epoch": 0.845924897068307, "grad_norm": 38.8097038269043, "learning_rate": 5.858451868904852e-07, "loss": 0.5066, "step": 17053 }, { "epoch": 0.8459745027035072, "grad_norm": 5.570990562438965, "learning_rate": 5.854755524243638e-07, "loss": 0.3351, "step": 17054 }, { "epoch": 0.8460241083387072, "grad_norm": 10.03196907043457, "learning_rate": 5.851060273521548e-07, "loss": 0.2419, "step": 17055 }, { "epoch": 0.8460737139739074, "grad_norm": 5.078449249267578, "learning_rate": 5.84736611683015e-07, "loss": 0.2097, "step": 17056 }, { "epoch": 0.8461233196091076, "grad_norm": 19.411230087280273, "learning_rate": 5.84367305426099e-07, "loss": 0.2913, "step": 17057 }, { "epoch": 0.8461729252443078, "grad_norm": 3.72878098487854, "learning_rate": 5.839981085905599e-07, "loss": 0.1997, "step": 17058 }, { "epoch": 0.8462225308795079, "grad_norm": 7.946236610412598, "learning_rate": 5.836290211855433e-07, "loss": 0.2752, "step": 17059 }, { "epoch": 0.8462721365147081, "grad_norm": 3.224069833755493, "learning_rate": 5.832600432201995e-07, "loss": 0.2096, "step": 17060 }, { "epoch": 0.8463217421499082, "grad_norm": 8.810230255126953, "learning_rate": 5.828911747036681e-07, "loss": 0.3362, "step": 17061 }, { "epoch": 0.8463713477851084, "grad_norm": 4.676577568054199, "learning_rate": 5.825224156450921e-07, "loss": 0.1467, "step": 17062 }, { "epoch": 0.8464209534203085, "grad_norm": 10.108625411987305, "learning_rate": 5.821537660536086e-07, "loss": 0.304, "step": 17063 }, { "epoch": 0.8464705590555087, "grad_norm": 5.9913010597229, "learning_rate": 5.817852259383533e-07, "loss": 0.2586, "step": 17064 }, { "epoch": 0.8465201646907089, "grad_norm": 6.255080223083496, "learning_rate": 5.814167953084582e-07, "loss": 0.2773, "step": 17065 }, { "epoch": 0.8465697703259091, "grad_norm": 4.720077991485596, "learning_rate": 5.810484741730554e-07, "loss": 0.2683, "step": 17066 }, { "epoch": 0.8466193759611091, "grad_norm": 7.211032390594482, "learning_rate": 5.806802625412678e-07, "loss": 0.2286, "step": 17067 }, { "epoch": 0.8466689815963093, "grad_norm": 5.80319356918335, "learning_rate": 5.803121604222245e-07, "loss": 0.2289, "step": 17068 }, { "epoch": 0.8467185872315095, "grad_norm": 9.68513298034668, "learning_rate": 5.799441678250445e-07, "loss": 0.3079, "step": 17069 }, { "epoch": 0.8467681928667097, "grad_norm": 11.276652336120605, "learning_rate": 5.79576284758847e-07, "loss": 0.277, "step": 17070 }, { "epoch": 0.8468177985019099, "grad_norm": 4.933798789978027, "learning_rate": 5.792085112327489e-07, "loss": 0.2607, "step": 17071 }, { "epoch": 0.8468674041371099, "grad_norm": 18.511140823364258, "learning_rate": 5.788408472558632e-07, "loss": 0.3422, "step": 17072 }, { "epoch": 0.8469170097723101, "grad_norm": 4.623445510864258, "learning_rate": 5.784732928373015e-07, "loss": 0.2247, "step": 17073 }, { "epoch": 0.8469666154075103, "grad_norm": 8.07114315032959, "learning_rate": 5.781058479861729e-07, "loss": 0.339, "step": 17074 }, { "epoch": 0.8470162210427105, "grad_norm": 7.403608798980713, "learning_rate": 5.777385127115803e-07, "loss": 0.3785, "step": 17075 }, { "epoch": 0.8470658266779106, "grad_norm": 5.251389503479004, "learning_rate": 5.773712870226284e-07, "loss": 0.3241, "step": 17076 }, { "epoch": 0.8471154323131108, "grad_norm": 7.313183784484863, "learning_rate": 5.770041709284158e-07, "loss": 0.2851, "step": 17077 }, { "epoch": 0.8471650379483109, "grad_norm": 7.281365394592285, "learning_rate": 5.766371644380408e-07, "loss": 0.3625, "step": 17078 }, { "epoch": 0.8472146435835111, "grad_norm": 11.497570037841797, "learning_rate": 5.76270267560598e-07, "loss": 0.3301, "step": 17079 }, { "epoch": 0.8472642492187112, "grad_norm": 11.049169540405273, "learning_rate": 5.759034803051789e-07, "loss": 0.3489, "step": 17080 }, { "epoch": 0.8473138548539114, "grad_norm": 6.662930488586426, "learning_rate": 5.755368026808727e-07, "loss": 0.2515, "step": 17081 }, { "epoch": 0.8473634604891116, "grad_norm": 5.419757843017578, "learning_rate": 5.751702346967669e-07, "loss": 0.3121, "step": 17082 }, { "epoch": 0.8474130661243118, "grad_norm": 4.754396915435791, "learning_rate": 5.748037763619436e-07, "loss": 0.2974, "step": 17083 }, { "epoch": 0.8474626717595118, "grad_norm": 4.786002159118652, "learning_rate": 5.744374276854842e-07, "loss": 0.2178, "step": 17084 }, { "epoch": 0.847512277394712, "grad_norm": 7.2483344078063965, "learning_rate": 5.740711886764671e-07, "loss": 0.2673, "step": 17085 }, { "epoch": 0.8475618830299122, "grad_norm": 11.67420482635498, "learning_rate": 5.737050593439686e-07, "loss": 0.3387, "step": 17086 }, { "epoch": 0.8476114886651124, "grad_norm": 9.044415473937988, "learning_rate": 5.733390396970601e-07, "loss": 0.2634, "step": 17087 }, { "epoch": 0.8476610943003126, "grad_norm": 5.763453006744385, "learning_rate": 5.729731297448132e-07, "loss": 0.2642, "step": 17088 }, { "epoch": 0.8477106999355126, "grad_norm": 3.8522186279296875, "learning_rate": 5.726073294962958e-07, "loss": 0.2185, "step": 17089 }, { "epoch": 0.8477603055707128, "grad_norm": 6.121110916137695, "learning_rate": 5.72241638960569e-07, "loss": 0.1472, "step": 17090 }, { "epoch": 0.847809911205913, "grad_norm": 4.117712497711182, "learning_rate": 5.718760581466998e-07, "loss": 0.2918, "step": 17091 }, { "epoch": 0.8478595168411132, "grad_norm": 8.226236343383789, "learning_rate": 5.715105870637433e-07, "loss": 0.2721, "step": 17092 }, { "epoch": 0.8479091224763133, "grad_norm": 5.406557083129883, "learning_rate": 5.711452257207584e-07, "loss": 0.2912, "step": 17093 }, { "epoch": 0.8479587281115135, "grad_norm": 6.699110984802246, "learning_rate": 5.707799741267978e-07, "loss": 0.2908, "step": 17094 }, { "epoch": 0.8480083337467136, "grad_norm": 5.8166093826293945, "learning_rate": 5.704148322909137e-07, "loss": 0.2243, "step": 17095 }, { "epoch": 0.8480579393819138, "grad_norm": 7.542774200439453, "learning_rate": 5.700498002221521e-07, "loss": 0.2765, "step": 17096 }, { "epoch": 0.8481075450171139, "grad_norm": 6.860996246337891, "learning_rate": 5.696848779295628e-07, "loss": 0.2362, "step": 17097 }, { "epoch": 0.8481571506523141, "grad_norm": 8.528854370117188, "learning_rate": 5.693200654221836e-07, "loss": 0.302, "step": 17098 }, { "epoch": 0.8482067562875143, "grad_norm": 10.841065406799316, "learning_rate": 5.689553627090594e-07, "loss": 0.2468, "step": 17099 }, { "epoch": 0.8482563619227145, "grad_norm": 5.985565185546875, "learning_rate": 5.685907697992249e-07, "loss": 0.2484, "step": 17100 }, { "epoch": 0.8483059675579145, "grad_norm": 8.049798011779785, "learning_rate": 5.682262867017157e-07, "loss": 0.2568, "step": 17101 }, { "epoch": 0.8483555731931147, "grad_norm": 5.68327522277832, "learning_rate": 5.678619134255642e-07, "loss": 0.2241, "step": 17102 }, { "epoch": 0.8484051788283149, "grad_norm": 8.302522659301758, "learning_rate": 5.674976499797997e-07, "loss": 0.3095, "step": 17103 }, { "epoch": 0.8484547844635151, "grad_norm": 6.745124816894531, "learning_rate": 5.671334963734464e-07, "loss": 0.3388, "step": 17104 }, { "epoch": 0.8485043900987153, "grad_norm": 13.340888977050781, "learning_rate": 5.667694526155332e-07, "loss": 0.32, "step": 17105 }, { "epoch": 0.8485539957339153, "grad_norm": 5.604915618896484, "learning_rate": 5.664055187150763e-07, "loss": 0.2149, "step": 17106 }, { "epoch": 0.8486036013691155, "grad_norm": 5.473822116851807, "learning_rate": 5.660416946810971e-07, "loss": 0.2329, "step": 17107 }, { "epoch": 0.8486532070043157, "grad_norm": 7.294122695922852, "learning_rate": 5.656779805226098e-07, "loss": 0.2587, "step": 17108 }, { "epoch": 0.8487028126395159, "grad_norm": 3.528125524520874, "learning_rate": 5.65314376248629e-07, "loss": 0.2192, "step": 17109 }, { "epoch": 0.848752418274716, "grad_norm": 13.951112747192383, "learning_rate": 5.649508818681621e-07, "loss": 0.3836, "step": 17110 }, { "epoch": 0.8488020239099162, "grad_norm": 6.639777183532715, "learning_rate": 5.645874973902205e-07, "loss": 0.2287, "step": 17111 }, { "epoch": 0.8488516295451163, "grad_norm": 9.45584774017334, "learning_rate": 5.64224222823806e-07, "loss": 0.3073, "step": 17112 }, { "epoch": 0.8489012351803165, "grad_norm": 5.891988277435303, "learning_rate": 5.638610581779219e-07, "loss": 0.242, "step": 17113 }, { "epoch": 0.8489508408155166, "grad_norm": 11.327788352966309, "learning_rate": 5.634980034615673e-07, "loss": 0.3271, "step": 17114 }, { "epoch": 0.8490004464507168, "grad_norm": 5.297369956970215, "learning_rate": 5.631350586837403e-07, "loss": 0.275, "step": 17115 }, { "epoch": 0.849050052085917, "grad_norm": 10.683613777160645, "learning_rate": 5.627722238534316e-07, "loss": 0.3916, "step": 17116 }, { "epoch": 0.8490996577211172, "grad_norm": 6.081208229064941, "learning_rate": 5.624094989796363e-07, "loss": 0.3038, "step": 17117 }, { "epoch": 0.8491492633563172, "grad_norm": 9.215371131896973, "learning_rate": 5.620468840713389e-07, "loss": 0.3301, "step": 17118 }, { "epoch": 0.8491988689915174, "grad_norm": 8.697066307067871, "learning_rate": 5.616843791375288e-07, "loss": 0.3215, "step": 17119 }, { "epoch": 0.8492484746267176, "grad_norm": 6.241215705871582, "learning_rate": 5.61321984187187e-07, "loss": 0.2577, "step": 17120 }, { "epoch": 0.8492980802619178, "grad_norm": 6.797806739807129, "learning_rate": 5.609596992292943e-07, "loss": 0.2475, "step": 17121 }, { "epoch": 0.849347685897118, "grad_norm": 4.852354049682617, "learning_rate": 5.605975242728278e-07, "loss": 0.2833, "step": 17122 }, { "epoch": 0.849397291532318, "grad_norm": 5.200216293334961, "learning_rate": 5.60235459326764e-07, "loss": 0.2788, "step": 17123 }, { "epoch": 0.8494468971675182, "grad_norm": 11.727167129516602, "learning_rate": 5.598735044000719e-07, "loss": 0.333, "step": 17124 }, { "epoch": 0.8494965028027184, "grad_norm": 9.44141674041748, "learning_rate": 5.595116595017247e-07, "loss": 0.2907, "step": 17125 }, { "epoch": 0.8495461084379186, "grad_norm": 12.616087913513184, "learning_rate": 5.591499246406867e-07, "loss": 0.4498, "step": 17126 }, { "epoch": 0.8495957140731187, "grad_norm": 9.527790069580078, "learning_rate": 5.58788299825922e-07, "loss": 0.3131, "step": 17127 }, { "epoch": 0.8496453197083189, "grad_norm": 8.344648361206055, "learning_rate": 5.584267850663921e-07, "loss": 0.2672, "step": 17128 }, { "epoch": 0.849694925343519, "grad_norm": 6.317423343658447, "learning_rate": 5.580653803710556e-07, "loss": 0.2665, "step": 17129 }, { "epoch": 0.8497445309787192, "grad_norm": 5.972620010375977, "learning_rate": 5.577040857488686e-07, "loss": 0.3151, "step": 17130 }, { "epoch": 0.8497941366139193, "grad_norm": 6.0044403076171875, "learning_rate": 5.57342901208785e-07, "loss": 0.305, "step": 17131 }, { "epoch": 0.8498437422491195, "grad_norm": 10.217484474182129, "learning_rate": 5.569818267597521e-07, "loss": 0.3227, "step": 17132 }, { "epoch": 0.8498933478843197, "grad_norm": 5.979554176330566, "learning_rate": 5.5662086241072e-07, "loss": 0.2471, "step": 17133 }, { "epoch": 0.8499429535195199, "grad_norm": 3.4042911529541016, "learning_rate": 5.562600081706326e-07, "loss": 0.189, "step": 17134 }, { "epoch": 0.8499925591547199, "grad_norm": 5.948871612548828, "learning_rate": 5.558992640484328e-07, "loss": 0.247, "step": 17135 }, { "epoch": 0.8500421647899201, "grad_norm": 9.704477310180664, "learning_rate": 5.555386300530591e-07, "loss": 0.2891, "step": 17136 }, { "epoch": 0.8500917704251203, "grad_norm": 10.03368091583252, "learning_rate": 5.551781061934485e-07, "loss": 0.3495, "step": 17137 }, { "epoch": 0.8501413760603205, "grad_norm": 12.4933500289917, "learning_rate": 5.548176924785354e-07, "loss": 0.2026, "step": 17138 }, { "epoch": 0.8501909816955207, "grad_norm": 7.547188758850098, "learning_rate": 5.544573889172506e-07, "loss": 0.2348, "step": 17139 }, { "epoch": 0.8502405873307207, "grad_norm": 9.780852317810059, "learning_rate": 5.540971955185232e-07, "loss": 0.4048, "step": 17140 }, { "epoch": 0.8502901929659209, "grad_norm": 8.110733032226562, "learning_rate": 5.537371122912777e-07, "loss": 0.3192, "step": 17141 }, { "epoch": 0.8503397986011211, "grad_norm": 5.784004211425781, "learning_rate": 5.533771392444382e-07, "loss": 0.3215, "step": 17142 }, { "epoch": 0.8503894042363213, "grad_norm": 6.614655494689941, "learning_rate": 5.53017276386924e-07, "loss": 0.2886, "step": 17143 }, { "epoch": 0.8504390098715214, "grad_norm": 5.0616655349731445, "learning_rate": 5.526575237276533e-07, "loss": 0.3104, "step": 17144 }, { "epoch": 0.8504886155067216, "grad_norm": 7.475296497344971, "learning_rate": 5.522978812755414e-07, "loss": 0.2345, "step": 17145 }, { "epoch": 0.8505382211419217, "grad_norm": 38.89625549316406, "learning_rate": 5.519383490395008e-07, "loss": 0.397, "step": 17146 }, { "epoch": 0.8505878267771219, "grad_norm": 8.505025863647461, "learning_rate": 5.515789270284377e-07, "loss": 0.283, "step": 17147 }, { "epoch": 0.850637432412322, "grad_norm": 10.026585578918457, "learning_rate": 5.512196152512633e-07, "loss": 0.3097, "step": 17148 }, { "epoch": 0.8506870380475222, "grad_norm": 5.35164213180542, "learning_rate": 5.508604137168777e-07, "loss": 0.1796, "step": 17149 }, { "epoch": 0.8507366436827224, "grad_norm": 10.681633949279785, "learning_rate": 5.505013224341843e-07, "loss": 0.2879, "step": 17150 }, { "epoch": 0.8507862493179225, "grad_norm": 8.029250144958496, "learning_rate": 5.501423414120805e-07, "loss": 0.3203, "step": 17151 }, { "epoch": 0.8508358549531226, "grad_norm": 7.851545810699463, "learning_rate": 5.497834706594623e-07, "loss": 0.2643, "step": 17152 }, { "epoch": 0.8508854605883228, "grad_norm": 5.794688701629639, "learning_rate": 5.49424710185223e-07, "loss": 0.2042, "step": 17153 }, { "epoch": 0.850935066223523, "grad_norm": 10.081762313842773, "learning_rate": 5.490660599982534e-07, "loss": 0.2885, "step": 17154 }, { "epoch": 0.8509846718587232, "grad_norm": 10.045266151428223, "learning_rate": 5.487075201074382e-07, "loss": 0.328, "step": 17155 }, { "epoch": 0.8510342774939234, "grad_norm": 4.299908638000488, "learning_rate": 5.483490905216665e-07, "loss": 0.2129, "step": 17156 }, { "epoch": 0.8510838831291234, "grad_norm": 4.687142848968506, "learning_rate": 5.479907712498167e-07, "loss": 0.3265, "step": 17157 }, { "epoch": 0.8511334887643236, "grad_norm": 5.826653480529785, "learning_rate": 5.476325623007695e-07, "loss": 0.2435, "step": 17158 }, { "epoch": 0.8511830943995238, "grad_norm": 9.092941284179688, "learning_rate": 5.472744636834016e-07, "loss": 0.3837, "step": 17159 }, { "epoch": 0.851232700034724, "grad_norm": 5.232022285461426, "learning_rate": 5.469164754065875e-07, "loss": 0.2844, "step": 17160 }, { "epoch": 0.8512823056699241, "grad_norm": 6.5030598640441895, "learning_rate": 5.465585974791959e-07, "loss": 0.2963, "step": 17161 }, { "epoch": 0.8513319113051243, "grad_norm": 7.2170610427856445, "learning_rate": 5.462008299100985e-07, "loss": 0.3085, "step": 17162 }, { "epoch": 0.8513815169403244, "grad_norm": 6.073780059814453, "learning_rate": 5.458431727081581e-07, "loss": 0.1298, "step": 17163 }, { "epoch": 0.8514311225755246, "grad_norm": 7.824880599975586, "learning_rate": 5.454856258822383e-07, "loss": 0.3743, "step": 17164 }, { "epoch": 0.8514807282107247, "grad_norm": 10.462034225463867, "learning_rate": 5.451281894412003e-07, "loss": 0.4404, "step": 17165 }, { "epoch": 0.8515303338459249, "grad_norm": 8.7423677444458, "learning_rate": 5.447708633939019e-07, "loss": 0.3002, "step": 17166 }, { "epoch": 0.8515799394811251, "grad_norm": 3.850048303604126, "learning_rate": 5.444136477491946e-07, "loss": 0.2144, "step": 17167 }, { "epoch": 0.8516295451163252, "grad_norm": 7.991739273071289, "learning_rate": 5.44056542515935e-07, "loss": 0.2778, "step": 17168 }, { "epoch": 0.8516791507515253, "grad_norm": 7.588028907775879, "learning_rate": 5.436995477029682e-07, "loss": 0.3541, "step": 17169 }, { "epoch": 0.8517287563867255, "grad_norm": 4.717226505279541, "learning_rate": 5.43342663319143e-07, "loss": 0.2735, "step": 17170 }, { "epoch": 0.8517783620219257, "grad_norm": 13.230375289916992, "learning_rate": 5.429858893733025e-07, "loss": 0.2645, "step": 17171 }, { "epoch": 0.8518279676571259, "grad_norm": 4.529781341552734, "learning_rate": 5.426292258742876e-07, "loss": 0.2113, "step": 17172 }, { "epoch": 0.851877573292326, "grad_norm": 8.75092601776123, "learning_rate": 5.422726728309368e-07, "loss": 0.229, "step": 17173 }, { "epoch": 0.8519271789275261, "grad_norm": 4.2894606590271, "learning_rate": 5.419162302520864e-07, "loss": 0.2141, "step": 17174 }, { "epoch": 0.8519767845627263, "grad_norm": 6.957200527191162, "learning_rate": 5.415598981465669e-07, "loss": 0.3422, "step": 17175 }, { "epoch": 0.8520263901979265, "grad_norm": 12.171006202697754, "learning_rate": 5.412036765232115e-07, "loss": 0.3117, "step": 17176 }, { "epoch": 0.8520759958331267, "grad_norm": 7.857074737548828, "learning_rate": 5.408475653908452e-07, "loss": 0.2437, "step": 17177 }, { "epoch": 0.8521256014683268, "grad_norm": 4.854753494262695, "learning_rate": 5.404915647582932e-07, "loss": 0.2744, "step": 17178 }, { "epoch": 0.852175207103527, "grad_norm": 33.03129577636719, "learning_rate": 5.401356746343778e-07, "loss": 0.4174, "step": 17179 }, { "epoch": 0.8522248127387271, "grad_norm": 11.027949333190918, "learning_rate": 5.397798950279181e-07, "loss": 0.4008, "step": 17180 }, { "epoch": 0.8522744183739273, "grad_norm": 6.26650333404541, "learning_rate": 5.394242259477289e-07, "loss": 0.2633, "step": 17181 }, { "epoch": 0.8523240240091274, "grad_norm": 4.738563060760498, "learning_rate": 5.390686674026268e-07, "loss": 0.2032, "step": 17182 }, { "epoch": 0.8523736296443276, "grad_norm": 5.007403373718262, "learning_rate": 5.387132194014199e-07, "loss": 0.2379, "step": 17183 }, { "epoch": 0.8524232352795278, "grad_norm": 10.884538650512695, "learning_rate": 5.383578819529178e-07, "loss": 0.3461, "step": 17184 }, { "epoch": 0.8524728409147279, "grad_norm": 5.555176734924316, "learning_rate": 5.380026550659251e-07, "loss": 0.2338, "step": 17185 }, { "epoch": 0.852522446549928, "grad_norm": 11.13375186920166, "learning_rate": 5.376475387492458e-07, "loss": 0.288, "step": 17186 }, { "epoch": 0.8525720521851282, "grad_norm": 5.274056434631348, "learning_rate": 5.372925330116785e-07, "loss": 0.2325, "step": 17187 }, { "epoch": 0.8526216578203284, "grad_norm": 7.042200565338135, "learning_rate": 5.369376378620206e-07, "loss": 0.3258, "step": 17188 }, { "epoch": 0.8526712634555286, "grad_norm": 10.26651668548584, "learning_rate": 5.365828533090672e-07, "loss": 0.3677, "step": 17189 }, { "epoch": 0.8527208690907288, "grad_norm": 8.301427841186523, "learning_rate": 5.362281793616108e-07, "loss": 0.2743, "step": 17190 }, { "epoch": 0.8527704747259288, "grad_norm": 5.599545478820801, "learning_rate": 5.358736160284383e-07, "loss": 0.2305, "step": 17191 }, { "epoch": 0.852820080361129, "grad_norm": 6.722823143005371, "learning_rate": 5.355191633183371e-07, "loss": 0.231, "step": 17192 }, { "epoch": 0.8528696859963292, "grad_norm": 11.460514068603516, "learning_rate": 5.351648212400906e-07, "loss": 0.1957, "step": 17193 }, { "epoch": 0.8529192916315294, "grad_norm": 4.923346042633057, "learning_rate": 5.348105898024791e-07, "loss": 0.2748, "step": 17194 }, { "epoch": 0.8529688972667295, "grad_norm": 9.605106353759766, "learning_rate": 5.344564690142812e-07, "loss": 0.2958, "step": 17195 }, { "epoch": 0.8530185029019297, "grad_norm": 6.410858631134033, "learning_rate": 5.341024588842719e-07, "loss": 0.2403, "step": 17196 }, { "epoch": 0.8530681085371298, "grad_norm": 6.3080620765686035, "learning_rate": 5.33748559421225e-07, "loss": 0.2637, "step": 17197 }, { "epoch": 0.85311771417233, "grad_norm": 6.316153049468994, "learning_rate": 5.333947706339066e-07, "loss": 0.2322, "step": 17198 }, { "epoch": 0.8531673198075301, "grad_norm": 4.386523723602295, "learning_rate": 5.330410925310886e-07, "loss": 0.2034, "step": 17199 }, { "epoch": 0.8532169254427303, "grad_norm": 5.531962871551514, "learning_rate": 5.326875251215325e-07, "loss": 0.2299, "step": 17200 }, { "epoch": 0.8532665310779305, "grad_norm": 6.513710021972656, "learning_rate": 5.323340684139994e-07, "loss": 0.3269, "step": 17201 }, { "epoch": 0.8533161367131306, "grad_norm": 4.450096607208252, "learning_rate": 5.319807224172496e-07, "loss": 0.1994, "step": 17202 }, { "epoch": 0.8533657423483307, "grad_norm": 14.076040267944336, "learning_rate": 5.316274871400395e-07, "loss": 0.3489, "step": 17203 }, { "epoch": 0.8534153479835309, "grad_norm": 6.759541988372803, "learning_rate": 5.312743625911193e-07, "loss": 0.2564, "step": 17204 }, { "epoch": 0.8534649536187311, "grad_norm": 10.012937545776367, "learning_rate": 5.309213487792442e-07, "loss": 0.3351, "step": 17205 }, { "epoch": 0.8535145592539313, "grad_norm": 19.19255256652832, "learning_rate": 5.305684457131571e-07, "loss": 0.3113, "step": 17206 }, { "epoch": 0.8535641648891314, "grad_norm": 4.629810333251953, "learning_rate": 5.302156534016078e-07, "loss": 0.2101, "step": 17207 }, { "epoch": 0.8536137705243315, "grad_norm": 3.7230684757232666, "learning_rate": 5.29862971853336e-07, "loss": 0.2209, "step": 17208 }, { "epoch": 0.8536633761595317, "grad_norm": 11.03316593170166, "learning_rate": 5.295104010770813e-07, "loss": 0.4459, "step": 17209 }, { "epoch": 0.8537129817947319, "grad_norm": 10.987677574157715, "learning_rate": 5.291579410815817e-07, "loss": 0.4335, "step": 17210 }, { "epoch": 0.853762587429932, "grad_norm": 4.376277446746826, "learning_rate": 5.288055918755714e-07, "loss": 0.2702, "step": 17211 }, { "epoch": 0.8538121930651322, "grad_norm": 11.650742530822754, "learning_rate": 5.284533534677788e-07, "loss": 0.2723, "step": 17212 }, { "epoch": 0.8538617987003324, "grad_norm": 5.817903518676758, "learning_rate": 5.281012258669366e-07, "loss": 0.2686, "step": 17213 }, { "epoch": 0.8539114043355325, "grad_norm": 7.907988548278809, "learning_rate": 5.277492090817688e-07, "loss": 0.316, "step": 17214 }, { "epoch": 0.8539610099707327, "grad_norm": 10.519013404846191, "learning_rate": 5.273973031209978e-07, "loss": 0.3846, "step": 17215 }, { "epoch": 0.8540106156059328, "grad_norm": 5.628170967102051, "learning_rate": 5.270455079933451e-07, "loss": 0.2906, "step": 17216 }, { "epoch": 0.854060221241133, "grad_norm": 8.725749969482422, "learning_rate": 5.266938237075292e-07, "loss": 0.3126, "step": 17217 }, { "epoch": 0.8541098268763332, "grad_norm": 7.761303901672363, "learning_rate": 5.263422502722615e-07, "loss": 0.3669, "step": 17218 }, { "epoch": 0.8541594325115333, "grad_norm": 3.3056442737579346, "learning_rate": 5.259907876962589e-07, "loss": 0.2247, "step": 17219 }, { "epoch": 0.8542090381467334, "grad_norm": 5.037326335906982, "learning_rate": 5.256394359882272e-07, "loss": 0.2612, "step": 17220 }, { "epoch": 0.8542586437819336, "grad_norm": 14.095442771911621, "learning_rate": 5.252881951568739e-07, "loss": 0.35, "step": 17221 }, { "epoch": 0.8543082494171338, "grad_norm": 6.114685535430908, "learning_rate": 5.249370652109037e-07, "loss": 0.304, "step": 17222 }, { "epoch": 0.854357855052334, "grad_norm": 4.525545120239258, "learning_rate": 5.245860461590174e-07, "loss": 0.2605, "step": 17223 }, { "epoch": 0.8544074606875341, "grad_norm": 5.490660667419434, "learning_rate": 5.242351380099131e-07, "loss": 0.2263, "step": 17224 }, { "epoch": 0.8544570663227342, "grad_norm": 6.189167499542236, "learning_rate": 5.238843407722877e-07, "loss": 0.3212, "step": 17225 }, { "epoch": 0.8545066719579344, "grad_norm": 4.6472344398498535, "learning_rate": 5.235336544548308e-07, "loss": 0.2584, "step": 17226 }, { "epoch": 0.8545562775931346, "grad_norm": 4.87017822265625, "learning_rate": 5.231830790662373e-07, "loss": 0.2404, "step": 17227 }, { "epoch": 0.8546058832283348, "grad_norm": 4.210923194885254, "learning_rate": 5.228326146151913e-07, "loss": 0.2082, "step": 17228 }, { "epoch": 0.8546554888635349, "grad_norm": 5.681887149810791, "learning_rate": 5.22482261110378e-07, "loss": 0.3348, "step": 17229 }, { "epoch": 0.8547050944987351, "grad_norm": 4.91530704498291, "learning_rate": 5.221320185604794e-07, "loss": 0.2472, "step": 17230 }, { "epoch": 0.8547547001339352, "grad_norm": 8.683191299438477, "learning_rate": 5.217818869741765e-07, "loss": 0.2545, "step": 17231 }, { "epoch": 0.8548043057691354, "grad_norm": 5.224432468414307, "learning_rate": 5.21431866360142e-07, "loss": 0.2867, "step": 17232 }, { "epoch": 0.8548539114043355, "grad_norm": 4.7364983558654785, "learning_rate": 5.210819567270536e-07, "loss": 0.2608, "step": 17233 }, { "epoch": 0.8549035170395357, "grad_norm": 9.122270584106445, "learning_rate": 5.2073215808358e-07, "loss": 0.3404, "step": 17234 }, { "epoch": 0.8549531226747359, "grad_norm": 7.280991554260254, "learning_rate": 5.203824704383892e-07, "loss": 0.2946, "step": 17235 }, { "epoch": 0.855002728309936, "grad_norm": 25.09008026123047, "learning_rate": 5.20032893800147e-07, "loss": 0.3052, "step": 17236 }, { "epoch": 0.8550523339451361, "grad_norm": 6.089921951293945, "learning_rate": 5.19683428177516e-07, "loss": 0.2969, "step": 17237 }, { "epoch": 0.8551019395803363, "grad_norm": 13.090702056884766, "learning_rate": 5.193340735791569e-07, "loss": 0.4267, "step": 17238 }, { "epoch": 0.8551515452155365, "grad_norm": 10.740492820739746, "learning_rate": 5.189848300137268e-07, "loss": 0.2786, "step": 17239 }, { "epoch": 0.8552011508507367, "grad_norm": 7.282358169555664, "learning_rate": 5.186356974898788e-07, "loss": 0.2686, "step": 17240 }, { "epoch": 0.8552507564859368, "grad_norm": 6.03063440322876, "learning_rate": 5.182866760162658e-07, "loss": 0.2974, "step": 17241 }, { "epoch": 0.8553003621211369, "grad_norm": 7.364713191986084, "learning_rate": 5.179377656015355e-07, "loss": 0.2556, "step": 17242 }, { "epoch": 0.8553499677563371, "grad_norm": 3.7350013256073, "learning_rate": 5.175889662543354e-07, "loss": 0.1841, "step": 17243 }, { "epoch": 0.8553995733915373, "grad_norm": 6.182069301605225, "learning_rate": 5.17240277983308e-07, "loss": 0.2656, "step": 17244 }, { "epoch": 0.8554491790267374, "grad_norm": 7.038674354553223, "learning_rate": 5.168917007970942e-07, "loss": 0.343, "step": 17245 }, { "epoch": 0.8554987846619376, "grad_norm": 8.953299522399902, "learning_rate": 5.165432347043325e-07, "loss": 0.3417, "step": 17246 }, { "epoch": 0.8555483902971378, "grad_norm": 4.80424690246582, "learning_rate": 5.161948797136568e-07, "loss": 0.2533, "step": 17247 }, { "epoch": 0.8555979959323379, "grad_norm": 6.9185075759887695, "learning_rate": 5.158466358337017e-07, "loss": 0.2642, "step": 17248 }, { "epoch": 0.855647601567538, "grad_norm": 5.2438130378723145, "learning_rate": 5.154985030730947e-07, "loss": 0.2105, "step": 17249 }, { "epoch": 0.8556972072027382, "grad_norm": 5.689802646636963, "learning_rate": 5.151504814404629e-07, "loss": 0.2687, "step": 17250 }, { "epoch": 0.8557468128379384, "grad_norm": 11.776315689086914, "learning_rate": 5.14802570944431e-07, "loss": 0.4673, "step": 17251 }, { "epoch": 0.8557964184731386, "grad_norm": 6.558267116546631, "learning_rate": 5.144547715936199e-07, "loss": 0.2265, "step": 17252 }, { "epoch": 0.8558460241083387, "grad_norm": 8.37504768371582, "learning_rate": 5.141070833966488e-07, "loss": 0.2993, "step": 17253 }, { "epoch": 0.8558956297435388, "grad_norm": 7.485572338104248, "learning_rate": 5.137595063621348e-07, "loss": 0.2266, "step": 17254 }, { "epoch": 0.855945235378739, "grad_norm": 5.6268630027771, "learning_rate": 5.13412040498687e-07, "loss": 0.3378, "step": 17255 }, { "epoch": 0.8559948410139392, "grad_norm": 4.318729877471924, "learning_rate": 5.130646858149208e-07, "loss": 0.2408, "step": 17256 }, { "epoch": 0.8560444466491394, "grad_norm": 6.113436698913574, "learning_rate": 5.127174423194398e-07, "loss": 0.2539, "step": 17257 }, { "epoch": 0.8560940522843395, "grad_norm": 10.03061294555664, "learning_rate": 5.12370310020851e-07, "loss": 0.3507, "step": 17258 }, { "epoch": 0.8561436579195396, "grad_norm": 5.894167423248291, "learning_rate": 5.120232889277554e-07, "loss": 0.2538, "step": 17259 }, { "epoch": 0.8561932635547398, "grad_norm": 4.402042388916016, "learning_rate": 5.116763790487533e-07, "loss": 0.2829, "step": 17260 }, { "epoch": 0.85624286918994, "grad_norm": 7.072408199310303, "learning_rate": 5.113295803924407e-07, "loss": 0.2935, "step": 17261 }, { "epoch": 0.8562924748251401, "grad_norm": 13.071244239807129, "learning_rate": 5.109828929674122e-07, "loss": 0.4154, "step": 17262 }, { "epoch": 0.8563420804603403, "grad_norm": 19.341136932373047, "learning_rate": 5.106363167822565e-07, "loss": 0.5239, "step": 17263 }, { "epoch": 0.8563916860955405, "grad_norm": 9.483282089233398, "learning_rate": 5.102898518455657e-07, "loss": 0.292, "step": 17264 }, { "epoch": 0.8564412917307406, "grad_norm": 6.668995380401611, "learning_rate": 5.099434981659218e-07, "loss": 0.3069, "step": 17265 }, { "epoch": 0.8564908973659408, "grad_norm": 8.687195777893066, "learning_rate": 5.095972557519096e-07, "loss": 0.261, "step": 17266 }, { "epoch": 0.8565405030011409, "grad_norm": 4.14406156539917, "learning_rate": 5.092511246121085e-07, "loss": 0.2561, "step": 17267 }, { "epoch": 0.8565901086363411, "grad_norm": 7.252281665802002, "learning_rate": 5.089051047550969e-07, "loss": 0.2962, "step": 17268 }, { "epoch": 0.8566397142715413, "grad_norm": 4.270227909088135, "learning_rate": 5.085591961894465e-07, "loss": 0.2738, "step": 17269 }, { "epoch": 0.8566893199067414, "grad_norm": 9.177360534667969, "learning_rate": 5.082133989237326e-07, "loss": 0.2259, "step": 17270 }, { "epoch": 0.8567389255419415, "grad_norm": 6.722933769226074, "learning_rate": 5.078677129665216e-07, "loss": 0.2776, "step": 17271 }, { "epoch": 0.8567885311771417, "grad_norm": 6.597292423248291, "learning_rate": 5.075221383263806e-07, "loss": 0.2265, "step": 17272 }, { "epoch": 0.8568381368123419, "grad_norm": 6.174081325531006, "learning_rate": 5.071766750118734e-07, "loss": 0.2357, "step": 17273 }, { "epoch": 0.8568877424475421, "grad_norm": 6.231081485748291, "learning_rate": 5.068313230315614e-07, "loss": 0.2981, "step": 17274 }, { "epoch": 0.8569373480827422, "grad_norm": 5.213678359985352, "learning_rate": 5.064860823939999e-07, "loss": 0.2423, "step": 17275 }, { "epoch": 0.8569869537179423, "grad_norm": 6.063696384429932, "learning_rate": 5.061409531077477e-07, "loss": 0.2651, "step": 17276 }, { "epoch": 0.8570365593531425, "grad_norm": 7.444508075714111, "learning_rate": 5.057959351813535e-07, "loss": 0.3534, "step": 17277 }, { "epoch": 0.8570861649883427, "grad_norm": 4.231558799743652, "learning_rate": 5.05451028623371e-07, "loss": 0.1943, "step": 17278 }, { "epoch": 0.8571357706235428, "grad_norm": 6.823161602020264, "learning_rate": 5.051062334423445e-07, "loss": 0.1894, "step": 17279 }, { "epoch": 0.857185376258743, "grad_norm": 5.430460453033447, "learning_rate": 5.047615496468184e-07, "loss": 0.1811, "step": 17280 }, { "epoch": 0.8572349818939432, "grad_norm": 9.867083549499512, "learning_rate": 5.044169772453345e-07, "loss": 0.2783, "step": 17281 }, { "epoch": 0.8572845875291433, "grad_norm": 9.260415077209473, "learning_rate": 5.040725162464327e-07, "loss": 0.3132, "step": 17282 }, { "epoch": 0.8573341931643434, "grad_norm": 5.526832580566406, "learning_rate": 5.037281666586452e-07, "loss": 0.2086, "step": 17283 }, { "epoch": 0.8573837987995436, "grad_norm": 4.495924949645996, "learning_rate": 5.033839284905101e-07, "loss": 0.2218, "step": 17284 }, { "epoch": 0.8574334044347438, "grad_norm": 5.718414306640625, "learning_rate": 5.030398017505545e-07, "loss": 0.2209, "step": 17285 }, { "epoch": 0.857483010069944, "grad_norm": 8.49101448059082, "learning_rate": 5.026957864473059e-07, "loss": 0.3851, "step": 17286 }, { "epoch": 0.857532615705144, "grad_norm": 10.407415390014648, "learning_rate": 5.023518825892909e-07, "loss": 0.2489, "step": 17287 }, { "epoch": 0.8575822213403442, "grad_norm": 8.290863037109375, "learning_rate": 5.020080901850316e-07, "loss": 0.2711, "step": 17288 }, { "epoch": 0.8576318269755444, "grad_norm": 5.831336975097656, "learning_rate": 5.016644092430439e-07, "loss": 0.2448, "step": 17289 }, { "epoch": 0.8576814326107446, "grad_norm": 5.903230667114258, "learning_rate": 5.013208397718494e-07, "loss": 0.2336, "step": 17290 }, { "epoch": 0.8577310382459448, "grad_norm": 8.608803749084473, "learning_rate": 5.009773817799585e-07, "loss": 0.3561, "step": 17291 }, { "epoch": 0.8577806438811449, "grad_norm": 6.776717185974121, "learning_rate": 5.006340352758832e-07, "loss": 0.1944, "step": 17292 }, { "epoch": 0.857830249516345, "grad_norm": 7.292111873626709, "learning_rate": 5.002908002681323e-07, "loss": 0.2964, "step": 17293 }, { "epoch": 0.8578798551515452, "grad_norm": 15.815820693969727, "learning_rate": 4.999476767652101e-07, "loss": 0.3726, "step": 17294 }, { "epoch": 0.8579294607867454, "grad_norm": 8.121139526367188, "learning_rate": 4.996046647756203e-07, "loss": 0.326, "step": 17295 }, { "epoch": 0.8579790664219455, "grad_norm": 6.4902215003967285, "learning_rate": 4.992617643078623e-07, "loss": 0.2621, "step": 17296 }, { "epoch": 0.8580286720571457, "grad_norm": 12.718265533447266, "learning_rate": 4.989189753704343e-07, "loss": 0.3053, "step": 17297 }, { "epoch": 0.8580782776923459, "grad_norm": 6.5965962409973145, "learning_rate": 4.985762979718306e-07, "loss": 0.3068, "step": 17298 }, { "epoch": 0.858127883327546, "grad_norm": 7.172734260559082, "learning_rate": 4.982337321205416e-07, "loss": 0.2177, "step": 17299 }, { "epoch": 0.8581774889627461, "grad_norm": 13.699909210205078, "learning_rate": 4.978912778250571e-07, "loss": 0.4692, "step": 17300 }, { "epoch": 0.8582270945979463, "grad_norm": 5.5875091552734375, "learning_rate": 4.975489350938634e-07, "loss": 0.2746, "step": 17301 }, { "epoch": 0.8582767002331465, "grad_norm": 12.654264450073242, "learning_rate": 4.972067039354439e-07, "loss": 0.3253, "step": 17302 }, { "epoch": 0.8583263058683467, "grad_norm": 6.886008262634277, "learning_rate": 4.968645843582787e-07, "loss": 0.1733, "step": 17303 }, { "epoch": 0.8583759115035468, "grad_norm": 7.812591075897217, "learning_rate": 4.965225763708465e-07, "loss": 0.2854, "step": 17304 }, { "epoch": 0.8584255171387469, "grad_norm": 6.574864387512207, "learning_rate": 4.961806799816227e-07, "loss": 0.2318, "step": 17305 }, { "epoch": 0.8584751227739471, "grad_norm": 6.746219635009766, "learning_rate": 4.958388951990783e-07, "loss": 0.2673, "step": 17306 }, { "epoch": 0.8585247284091473, "grad_norm": 7.2406487464904785, "learning_rate": 4.954972220316834e-07, "loss": 0.2686, "step": 17307 }, { "epoch": 0.8585743340443475, "grad_norm": 4.836866855621338, "learning_rate": 4.951556604879049e-07, "loss": 0.2746, "step": 17308 }, { "epoch": 0.8586239396795476, "grad_norm": 3.5417227745056152, "learning_rate": 4.948142105762066e-07, "loss": 0.1774, "step": 17309 }, { "epoch": 0.8586735453147477, "grad_norm": 5.937604904174805, "learning_rate": 4.944728723050502e-07, "loss": 0.2841, "step": 17310 }, { "epoch": 0.8587231509499479, "grad_norm": 6.592759132385254, "learning_rate": 4.941316456828954e-07, "loss": 0.2473, "step": 17311 }, { "epoch": 0.8587727565851481, "grad_norm": 11.992201805114746, "learning_rate": 4.937905307181945e-07, "loss": 0.2682, "step": 17312 }, { "epoch": 0.8588223622203482, "grad_norm": 8.2783784866333, "learning_rate": 4.934495274194045e-07, "loss": 0.3435, "step": 17313 }, { "epoch": 0.8588719678555484, "grad_norm": 9.335787773132324, "learning_rate": 4.931086357949716e-07, "loss": 0.2359, "step": 17314 }, { "epoch": 0.8589215734907486, "grad_norm": 5.546092987060547, "learning_rate": 4.927678558533477e-07, "loss": 0.2919, "step": 17315 }, { "epoch": 0.8589711791259487, "grad_norm": 8.792269706726074, "learning_rate": 4.92427187602974e-07, "loss": 0.2423, "step": 17316 }, { "epoch": 0.8590207847611488, "grad_norm": 8.384398460388184, "learning_rate": 4.920866310522937e-07, "loss": 0.2146, "step": 17317 }, { "epoch": 0.859070390396349, "grad_norm": 6.635369777679443, "learning_rate": 4.917461862097461e-07, "loss": 0.2409, "step": 17318 }, { "epoch": 0.8591199960315492, "grad_norm": 7.928389072418213, "learning_rate": 4.914058530837679e-07, "loss": 0.2793, "step": 17319 }, { "epoch": 0.8591696016667494, "grad_norm": 6.681982040405273, "learning_rate": 4.910656316827905e-07, "loss": 0.3522, "step": 17320 }, { "epoch": 0.8592192073019495, "grad_norm": 4.573083400726318, "learning_rate": 4.907255220152479e-07, "loss": 0.204, "step": 17321 }, { "epoch": 0.8592688129371496, "grad_norm": 3.905820846557617, "learning_rate": 4.903855240895661e-07, "loss": 0.2214, "step": 17322 }, { "epoch": 0.8593184185723498, "grad_norm": 5.817005634307861, "learning_rate": 4.900456379141705e-07, "loss": 0.2601, "step": 17323 }, { "epoch": 0.85936802420755, "grad_norm": 8.328289031982422, "learning_rate": 4.897058634974844e-07, "loss": 0.343, "step": 17324 }, { "epoch": 0.8594176298427502, "grad_norm": 10.337827682495117, "learning_rate": 4.893662008479278e-07, "loss": 0.3661, "step": 17325 }, { "epoch": 0.8594672354779503, "grad_norm": 10.129132270812988, "learning_rate": 4.890266499739155e-07, "loss": 0.424, "step": 17326 }, { "epoch": 0.8595168411131504, "grad_norm": 6.839808464050293, "learning_rate": 4.886872108838653e-07, "loss": 0.2751, "step": 17327 }, { "epoch": 0.8595664467483506, "grad_norm": 4.646509647369385, "learning_rate": 4.883478835861854e-07, "loss": 0.2377, "step": 17328 }, { "epoch": 0.8596160523835508, "grad_norm": 6.497074604034424, "learning_rate": 4.88008668089286e-07, "loss": 0.3235, "step": 17329 }, { "epoch": 0.8596656580187509, "grad_norm": 11.15284538269043, "learning_rate": 4.876695644015733e-07, "loss": 0.3254, "step": 17330 }, { "epoch": 0.8597152636539511, "grad_norm": 7.672220706939697, "learning_rate": 4.873305725314492e-07, "loss": 0.3503, "step": 17331 }, { "epoch": 0.8597648692891513, "grad_norm": 5.4799017906188965, "learning_rate": 4.869916924873153e-07, "loss": 0.2823, "step": 17332 }, { "epoch": 0.8598144749243514, "grad_norm": 5.562465667724609, "learning_rate": 4.866529242775691e-07, "loss": 0.2701, "step": 17333 }, { "epoch": 0.8598640805595515, "grad_norm": 5.402460098266602, "learning_rate": 4.863142679106036e-07, "loss": 0.288, "step": 17334 }, { "epoch": 0.8599136861947517, "grad_norm": 5.793045520782471, "learning_rate": 4.859757233948142e-07, "loss": 0.248, "step": 17335 }, { "epoch": 0.8599632918299519, "grad_norm": 15.534688949584961, "learning_rate": 4.856372907385875e-07, "loss": 0.3413, "step": 17336 }, { "epoch": 0.8600128974651521, "grad_norm": 13.619118690490723, "learning_rate": 4.852989699503102e-07, "loss": 0.4328, "step": 17337 }, { "epoch": 0.8600625031003521, "grad_norm": 10.530603408813477, "learning_rate": 4.849607610383672e-07, "loss": 0.2812, "step": 17338 }, { "epoch": 0.8601121087355523, "grad_norm": 15.124011039733887, "learning_rate": 4.846226640111396e-07, "loss": 0.3276, "step": 17339 }, { "epoch": 0.8601617143707525, "grad_norm": 6.8596882820129395, "learning_rate": 4.842846788770034e-07, "loss": 0.2318, "step": 17340 }, { "epoch": 0.8602113200059527, "grad_norm": 8.345208168029785, "learning_rate": 4.839468056443369e-07, "loss": 0.295, "step": 17341 }, { "epoch": 0.8602609256411529, "grad_norm": 6.622158527374268, "learning_rate": 4.836090443215108e-07, "loss": 0.2623, "step": 17342 }, { "epoch": 0.860310531276353, "grad_norm": 13.733566284179688, "learning_rate": 4.832713949168955e-07, "loss": 0.3393, "step": 17343 }, { "epoch": 0.8603601369115531, "grad_norm": 6.949060916900635, "learning_rate": 4.829338574388581e-07, "loss": 0.2656, "step": 17344 }, { "epoch": 0.8604097425467533, "grad_norm": 6.536808490753174, "learning_rate": 4.825964318957627e-07, "loss": 0.2405, "step": 17345 }, { "epoch": 0.8604593481819535, "grad_norm": 8.81110954284668, "learning_rate": 4.822591182959718e-07, "loss": 0.2822, "step": 17346 }, { "epoch": 0.8605089538171536, "grad_norm": 5.546283721923828, "learning_rate": 4.819219166478439e-07, "loss": 0.2048, "step": 17347 }, { "epoch": 0.8605585594523538, "grad_norm": 5.3566484451293945, "learning_rate": 4.815848269597329e-07, "loss": 0.2626, "step": 17348 }, { "epoch": 0.860608165087554, "grad_norm": 5.0371222496032715, "learning_rate": 4.812478492399958e-07, "loss": 0.2704, "step": 17349 }, { "epoch": 0.8606577707227541, "grad_norm": 6.076406002044678, "learning_rate": 4.809109834969805e-07, "loss": 0.1894, "step": 17350 }, { "epoch": 0.8607073763579542, "grad_norm": 8.570001602172852, "learning_rate": 4.805742297390348e-07, "loss": 0.4256, "step": 17351 }, { "epoch": 0.8607569819931544, "grad_norm": 6.901970863342285, "learning_rate": 4.802375879745042e-07, "loss": 0.3281, "step": 17352 }, { "epoch": 0.8608065876283546, "grad_norm": 25.13019371032715, "learning_rate": 4.799010582117303e-07, "loss": 0.5483, "step": 17353 }, { "epoch": 0.8608561932635548, "grad_norm": 6.163936138153076, "learning_rate": 4.795646404590532e-07, "loss": 0.2379, "step": 17354 }, { "epoch": 0.8609057988987548, "grad_norm": 11.483922958374023, "learning_rate": 4.7922833472481e-07, "loss": 0.4414, "step": 17355 }, { "epoch": 0.860955404533955, "grad_norm": 4.61430549621582, "learning_rate": 4.788921410173325e-07, "loss": 0.2233, "step": 17356 }, { "epoch": 0.8610050101691552, "grad_norm": 7.769354343414307, "learning_rate": 4.785560593449529e-07, "loss": 0.2703, "step": 17357 }, { "epoch": 0.8610546158043554, "grad_norm": 4.918781757354736, "learning_rate": 4.78220089716e-07, "loss": 0.2965, "step": 17358 }, { "epoch": 0.8611042214395556, "grad_norm": 8.69690227508545, "learning_rate": 4.778842321387983e-07, "loss": 0.2894, "step": 17359 }, { "epoch": 0.8611538270747557, "grad_norm": 12.364546775817871, "learning_rate": 4.775484866216706e-07, "loss": 0.2543, "step": 17360 }, { "epoch": 0.8612034327099558, "grad_norm": 6.69206428527832, "learning_rate": 4.772128531729375e-07, "loss": 0.2273, "step": 17361 }, { "epoch": 0.861253038345156, "grad_norm": 3.913679361343384, "learning_rate": 4.768773318009167e-07, "loss": 0.2168, "step": 17362 }, { "epoch": 0.8613026439803562, "grad_norm": 5.3427629470825195, "learning_rate": 4.765419225139195e-07, "loss": 0.3165, "step": 17363 }, { "epoch": 0.8613522496155563, "grad_norm": 3.6194918155670166, "learning_rate": 4.762066253202619e-07, "loss": 0.2844, "step": 17364 }, { "epoch": 0.8614018552507565, "grad_norm": 6.450228214263916, "learning_rate": 4.758714402282494e-07, "loss": 0.2957, "step": 17365 }, { "epoch": 0.8614514608859567, "grad_norm": 8.838334083557129, "learning_rate": 4.755363672461888e-07, "loss": 0.2734, "step": 17366 }, { "epoch": 0.8615010665211568, "grad_norm": 5.970449924468994, "learning_rate": 4.75201406382384e-07, "loss": 0.3279, "step": 17367 }, { "epoch": 0.861550672156357, "grad_norm": 15.938116073608398, "learning_rate": 4.74866557645135e-07, "loss": 0.3567, "step": 17368 }, { "epoch": 0.8616002777915571, "grad_norm": 4.495236396789551, "learning_rate": 4.7453182104273963e-07, "loss": 0.2083, "step": 17369 }, { "epoch": 0.8616498834267573, "grad_norm": 6.899221420288086, "learning_rate": 4.741971965834935e-07, "loss": 0.2318, "step": 17370 }, { "epoch": 0.8616994890619575, "grad_norm": 12.861626625061035, "learning_rate": 4.738626842756866e-07, "loss": 0.3814, "step": 17371 }, { "epoch": 0.8617490946971575, "grad_norm": 5.227965831756592, "learning_rate": 4.7352828412761176e-07, "loss": 0.3262, "step": 17372 }, { "epoch": 0.8617987003323577, "grad_norm": 8.216184616088867, "learning_rate": 4.7319399614755235e-07, "loss": 0.3548, "step": 17373 }, { "epoch": 0.8618483059675579, "grad_norm": 14.742186546325684, "learning_rate": 4.728598203437934e-07, "loss": 0.5845, "step": 17374 }, { "epoch": 0.8618979116027581, "grad_norm": 4.33573579788208, "learning_rate": 4.725257567246161e-07, "loss": 0.2268, "step": 17375 }, { "epoch": 0.8619475172379583, "grad_norm": 4.765657424926758, "learning_rate": 4.721918052982993e-07, "loss": 0.2563, "step": 17376 }, { "epoch": 0.8619971228731584, "grad_norm": 5.337029457092285, "learning_rate": 4.7185796607311596e-07, "loss": 0.3079, "step": 17377 }, { "epoch": 0.8620467285083585, "grad_norm": 8.956920623779297, "learning_rate": 4.715242390573427e-07, "loss": 0.3249, "step": 17378 }, { "epoch": 0.8620963341435587, "grad_norm": 5.904597282409668, "learning_rate": 4.7119062425924567e-07, "loss": 0.226, "step": 17379 }, { "epoch": 0.8621459397787589, "grad_norm": 9.165789604187012, "learning_rate": 4.708571216870944e-07, "loss": 0.2817, "step": 17380 }, { "epoch": 0.862195545413959, "grad_norm": 6.3555474281311035, "learning_rate": 4.7052373134915173e-07, "loss": 0.1997, "step": 17381 }, { "epoch": 0.8622451510491592, "grad_norm": 5.240975379943848, "learning_rate": 4.701904532536811e-07, "loss": 0.3367, "step": 17382 }, { "epoch": 0.8622947566843594, "grad_norm": 4.900423526763916, "learning_rate": 4.69857287408938e-07, "loss": 0.2324, "step": 17383 }, { "epoch": 0.8623443623195595, "grad_norm": 5.827795028686523, "learning_rate": 4.6952423382318255e-07, "loss": 0.2847, "step": 17384 }, { "epoch": 0.8623939679547596, "grad_norm": 5.075106143951416, "learning_rate": 4.6919129250466436e-07, "loss": 0.3266, "step": 17385 }, { "epoch": 0.8624435735899598, "grad_norm": 7.795623302459717, "learning_rate": 4.688584634616367e-07, "loss": 0.2165, "step": 17386 }, { "epoch": 0.86249317922516, "grad_norm": 6.2300333976745605, "learning_rate": 4.6852574670234574e-07, "loss": 0.1749, "step": 17387 }, { "epoch": 0.8625427848603602, "grad_norm": 5.640110015869141, "learning_rate": 4.681931422350361e-07, "loss": 0.2947, "step": 17388 }, { "epoch": 0.8625923904955602, "grad_norm": 6.096749305725098, "learning_rate": 4.6786065006795056e-07, "loss": 0.2562, "step": 17389 }, { "epoch": 0.8626419961307604, "grad_norm": 4.802092552185059, "learning_rate": 4.6752827020932924e-07, "loss": 0.2568, "step": 17390 }, { "epoch": 0.8626916017659606, "grad_norm": 9.698390007019043, "learning_rate": 4.671960026674055e-07, "loss": 0.2302, "step": 17391 }, { "epoch": 0.8627412074011608, "grad_norm": 5.345358848571777, "learning_rate": 4.6686384745041725e-07, "loss": 0.2058, "step": 17392 }, { "epoch": 0.862790813036361, "grad_norm": 5.540051460266113, "learning_rate": 4.665318045665923e-07, "loss": 0.3454, "step": 17393 }, { "epoch": 0.8628404186715611, "grad_norm": 8.5226411819458, "learning_rate": 4.6619987402416024e-07, "loss": 0.2375, "step": 17394 }, { "epoch": 0.8628900243067612, "grad_norm": 4.790616035461426, "learning_rate": 4.6586805583134607e-07, "loss": 0.212, "step": 17395 }, { "epoch": 0.8629396299419614, "grad_norm": 8.317136764526367, "learning_rate": 4.6553634999637334e-07, "loss": 0.3433, "step": 17396 }, { "epoch": 0.8629892355771616, "grad_norm": 4.765612602233887, "learning_rate": 4.652047565274592e-07, "loss": 0.27, "step": 17397 }, { "epoch": 0.8630388412123617, "grad_norm": 10.050116539001465, "learning_rate": 4.648732754328239e-07, "loss": 0.2451, "step": 17398 }, { "epoch": 0.8630884468475619, "grad_norm": 4.285073757171631, "learning_rate": 4.6454190672068023e-07, "loss": 0.2009, "step": 17399 }, { "epoch": 0.8631380524827621, "grad_norm": 4.145475387573242, "learning_rate": 4.6421065039923884e-07, "loss": 0.1834, "step": 17400 }, { "epoch": 0.8631876581179622, "grad_norm": 5.789050102233887, "learning_rate": 4.638795064767099e-07, "loss": 0.2815, "step": 17401 }, { "epoch": 0.8632372637531623, "grad_norm": 8.965155601501465, "learning_rate": 4.63548474961299e-07, "loss": 0.2969, "step": 17402 }, { "epoch": 0.8632868693883625, "grad_norm": 3.280012607574463, "learning_rate": 4.6321755586120843e-07, "loss": 0.2024, "step": 17403 }, { "epoch": 0.8633364750235627, "grad_norm": 12.051353454589844, "learning_rate": 4.6288674918464005e-07, "loss": 0.4028, "step": 17404 }, { "epoch": 0.8633860806587629, "grad_norm": 11.865617752075195, "learning_rate": 4.625560549397884e-07, "loss": 0.2504, "step": 17405 }, { "epoch": 0.863435686293963, "grad_norm": 11.033466339111328, "learning_rate": 4.622254731348519e-07, "loss": 0.3852, "step": 17406 }, { "epoch": 0.8634852919291631, "grad_norm": 5.53373908996582, "learning_rate": 4.618950037780207e-07, "loss": 0.254, "step": 17407 }, { "epoch": 0.8635348975643633, "grad_norm": 8.726507186889648, "learning_rate": 4.615646468774831e-07, "loss": 0.1754, "step": 17408 }, { "epoch": 0.8635845031995635, "grad_norm": 7.401331901550293, "learning_rate": 4.6123440244142724e-07, "loss": 0.3864, "step": 17409 }, { "epoch": 0.8636341088347637, "grad_norm": 4.5028510093688965, "learning_rate": 4.609042704780359e-07, "loss": 0.238, "step": 17410 }, { "epoch": 0.8636837144699638, "grad_norm": 9.175704002380371, "learning_rate": 4.6057425099548967e-07, "loss": 0.2422, "step": 17411 }, { "epoch": 0.8637333201051639, "grad_norm": 7.017155170440674, "learning_rate": 4.6024434400196716e-07, "loss": 0.2549, "step": 17412 }, { "epoch": 0.8637829257403641, "grad_norm": 10.768067359924316, "learning_rate": 4.599145495056445e-07, "loss": 0.2696, "step": 17413 }, { "epoch": 0.8638325313755643, "grad_norm": 6.3253397941589355, "learning_rate": 4.5958486751469133e-07, "loss": 0.3247, "step": 17414 }, { "epoch": 0.8638821370107644, "grad_norm": 14.862377166748047, "learning_rate": 4.5925529803727997e-07, "loss": 0.3568, "step": 17415 }, { "epoch": 0.8639317426459646, "grad_norm": 8.589970588684082, "learning_rate": 4.5892584108157557e-07, "loss": 0.3673, "step": 17416 }, { "epoch": 0.8639813482811647, "grad_norm": 5.822839260101318, "learning_rate": 4.5859649665574325e-07, "loss": 0.2704, "step": 17417 }, { "epoch": 0.8640309539163649, "grad_norm": 5.912962436676025, "learning_rate": 4.582672647679443e-07, "loss": 0.2564, "step": 17418 }, { "epoch": 0.864080559551565, "grad_norm": 5.16497278213501, "learning_rate": 4.5793814542633654e-07, "loss": 0.2814, "step": 17419 }, { "epoch": 0.8641301651867652, "grad_norm": 6.971219539642334, "learning_rate": 4.576091386390763e-07, "loss": 0.1769, "step": 17420 }, { "epoch": 0.8641797708219654, "grad_norm": 6.1934709548950195, "learning_rate": 4.572802444143171e-07, "loss": 0.2701, "step": 17421 }, { "epoch": 0.8642293764571656, "grad_norm": 6.5731096267700195, "learning_rate": 4.5695146276020676e-07, "loss": 0.3469, "step": 17422 }, { "epoch": 0.8642789820923656, "grad_norm": 7.993266582489014, "learning_rate": 4.5662279368489606e-07, "loss": 0.3596, "step": 17423 }, { "epoch": 0.8643285877275658, "grad_norm": 5.9537577629089355, "learning_rate": 4.5629423719652677e-07, "loss": 0.2198, "step": 17424 }, { "epoch": 0.864378193362766, "grad_norm": 5.342778205871582, "learning_rate": 4.559657933032419e-07, "loss": 0.2347, "step": 17425 }, { "epoch": 0.8644277989979662, "grad_norm": 4.724422454833984, "learning_rate": 4.556374620131798e-07, "loss": 0.2683, "step": 17426 }, { "epoch": 0.8644774046331664, "grad_norm": 13.079713821411133, "learning_rate": 4.5530924333447804e-07, "loss": 0.3326, "step": 17427 }, { "epoch": 0.8645270102683665, "grad_norm": 6.453274726867676, "learning_rate": 4.549811372752666e-07, "loss": 0.3275, "step": 17428 }, { "epoch": 0.8645766159035666, "grad_norm": 8.331315994262695, "learning_rate": 4.546531438436813e-07, "loss": 0.2458, "step": 17429 }, { "epoch": 0.8646262215387668, "grad_norm": 10.685624122619629, "learning_rate": 4.5432526304784565e-07, "loss": 0.3498, "step": 17430 }, { "epoch": 0.864675827173967, "grad_norm": 10.520350456237793, "learning_rate": 4.539974948958864e-07, "loss": 0.35, "step": 17431 }, { "epoch": 0.8647254328091671, "grad_norm": 5.463348865509033, "learning_rate": 4.5366983939592544e-07, "loss": 0.2624, "step": 17432 }, { "epoch": 0.8647750384443673, "grad_norm": 14.086175918579102, "learning_rate": 4.533422965560835e-07, "loss": 0.3963, "step": 17433 }, { "epoch": 0.8648246440795674, "grad_norm": 6.30160665512085, "learning_rate": 4.530148663844747e-07, "loss": 0.231, "step": 17434 }, { "epoch": 0.8648742497147676, "grad_norm": 8.56164264678955, "learning_rate": 4.5268754888921573e-07, "loss": 0.325, "step": 17435 }, { "epoch": 0.8649238553499677, "grad_norm": 7.424067974090576, "learning_rate": 4.523603440784152e-07, "loss": 0.3364, "step": 17436 }, { "epoch": 0.8649734609851679, "grad_norm": 4.947346210479736, "learning_rate": 4.520332519601828e-07, "loss": 0.2812, "step": 17437 }, { "epoch": 0.8650230666203681, "grad_norm": 6.39620304107666, "learning_rate": 4.5170627254262354e-07, "loss": 0.2733, "step": 17438 }, { "epoch": 0.8650726722555683, "grad_norm": 14.204994201660156, "learning_rate": 4.5137940583384e-07, "loss": 0.3703, "step": 17439 }, { "epoch": 0.8651222778907683, "grad_norm": 6.710595607757568, "learning_rate": 4.510526518419328e-07, "loss": 0.3223, "step": 17440 }, { "epoch": 0.8651718835259685, "grad_norm": 7.409119129180908, "learning_rate": 4.507260105749994e-07, "loss": 0.4318, "step": 17441 }, { "epoch": 0.8652214891611687, "grad_norm": 5.300476551055908, "learning_rate": 4.5039948204113116e-07, "loss": 0.3106, "step": 17442 }, { "epoch": 0.8652710947963689, "grad_norm": 6.627967357635498, "learning_rate": 4.500730662484237e-07, "loss": 0.3047, "step": 17443 }, { "epoch": 0.8653207004315691, "grad_norm": 4.461027145385742, "learning_rate": 4.4974676320496347e-07, "loss": 0.2476, "step": 17444 }, { "epoch": 0.8653703060667692, "grad_norm": 5.833381652832031, "learning_rate": 4.494205729188361e-07, "loss": 0.2518, "step": 17445 }, { "epoch": 0.8654199117019693, "grad_norm": 11.706961631774902, "learning_rate": 4.4909449539812577e-07, "loss": 0.4364, "step": 17446 }, { "epoch": 0.8654695173371695, "grad_norm": 8.28516960144043, "learning_rate": 4.487685306509132e-07, "loss": 0.3628, "step": 17447 }, { "epoch": 0.8655191229723697, "grad_norm": 5.545381546020508, "learning_rate": 4.4844267868527304e-07, "loss": 0.2211, "step": 17448 }, { "epoch": 0.8655687286075698, "grad_norm": 5.220392227172852, "learning_rate": 4.4811693950928436e-07, "loss": 0.2641, "step": 17449 }, { "epoch": 0.86561833424277, "grad_norm": 5.146458148956299, "learning_rate": 4.477913131310163e-07, "loss": 0.3114, "step": 17450 }, { "epoch": 0.8656679398779701, "grad_norm": 6.458098888397217, "learning_rate": 4.4746579955853795e-07, "loss": 0.2151, "step": 17451 }, { "epoch": 0.8657175455131703, "grad_norm": 7.124593257904053, "learning_rate": 4.4714039879991736e-07, "loss": 0.2485, "step": 17452 }, { "epoch": 0.8657671511483704, "grad_norm": 12.65889835357666, "learning_rate": 4.468151108632174e-07, "loss": 0.3259, "step": 17453 }, { "epoch": 0.8658167567835706, "grad_norm": 6.874587535858154, "learning_rate": 4.4648993575649725e-07, "loss": 0.3363, "step": 17454 }, { "epoch": 0.8658663624187708, "grad_norm": 5.461682319641113, "learning_rate": 4.4616487348781767e-07, "loss": 0.2587, "step": 17455 }, { "epoch": 0.865915968053971, "grad_norm": 8.299301147460938, "learning_rate": 4.4583992406523055e-07, "loss": 0.3323, "step": 17456 }, { "epoch": 0.865965573689171, "grad_norm": 6.796627044677734, "learning_rate": 4.455150874967923e-07, "loss": 0.2868, "step": 17457 }, { "epoch": 0.8660151793243712, "grad_norm": 10.229436874389648, "learning_rate": 4.451903637905497e-07, "loss": 0.3758, "step": 17458 }, { "epoch": 0.8660647849595714, "grad_norm": 6.661122798919678, "learning_rate": 4.448657529545497e-07, "loss": 0.2348, "step": 17459 }, { "epoch": 0.8661143905947716, "grad_norm": 5.385020732879639, "learning_rate": 4.4454125499683753e-07, "loss": 0.1961, "step": 17460 }, { "epoch": 0.8661639962299718, "grad_norm": 6.16645622253418, "learning_rate": 4.4421686992545286e-07, "loss": 0.3012, "step": 17461 }, { "epoch": 0.8662136018651719, "grad_norm": 4.7850823402404785, "learning_rate": 4.438925977484354e-07, "loss": 0.2606, "step": 17462 }, { "epoch": 0.866263207500372, "grad_norm": 5.432106971740723, "learning_rate": 4.435684384738215e-07, "loss": 0.226, "step": 17463 }, { "epoch": 0.8663128131355722, "grad_norm": 5.901587963104248, "learning_rate": 4.432443921096419e-07, "loss": 0.1678, "step": 17464 }, { "epoch": 0.8663624187707724, "grad_norm": 5.4378743171691895, "learning_rate": 4.4292045866392684e-07, "loss": 0.2646, "step": 17465 }, { "epoch": 0.8664120244059725, "grad_norm": 8.135619163513184, "learning_rate": 4.42596638144705e-07, "loss": 0.356, "step": 17466 }, { "epoch": 0.8664616300411727, "grad_norm": 12.980676651000977, "learning_rate": 4.422729305599993e-07, "loss": 0.3017, "step": 17467 }, { "epoch": 0.8665112356763728, "grad_norm": 6.456593990325928, "learning_rate": 4.419493359178323e-07, "loss": 0.2917, "step": 17468 }, { "epoch": 0.866560841311573, "grad_norm": 4.458552837371826, "learning_rate": 4.416258542262231e-07, "loss": 0.237, "step": 17469 }, { "epoch": 0.8666104469467731, "grad_norm": 10.659979820251465, "learning_rate": 4.4130248549318745e-07, "loss": 0.1766, "step": 17470 }, { "epoch": 0.8666600525819733, "grad_norm": 7.493292808532715, "learning_rate": 4.409792297267368e-07, "loss": 0.3158, "step": 17471 }, { "epoch": 0.8667096582171735, "grad_norm": 3.6171834468841553, "learning_rate": 4.406560869348847e-07, "loss": 0.2403, "step": 17472 }, { "epoch": 0.8667592638523737, "grad_norm": 11.330897331237793, "learning_rate": 4.403330571256365e-07, "loss": 0.4145, "step": 17473 }, { "epoch": 0.8668088694875737, "grad_norm": 9.203960418701172, "learning_rate": 4.4001014030699784e-07, "loss": 0.4027, "step": 17474 }, { "epoch": 0.8668584751227739, "grad_norm": 9.177270889282227, "learning_rate": 4.3968733648697027e-07, "loss": 0.2695, "step": 17475 }, { "epoch": 0.8669080807579741, "grad_norm": 7.685533046722412, "learning_rate": 4.393646456735534e-07, "loss": 0.3234, "step": 17476 }, { "epoch": 0.8669576863931743, "grad_norm": 5.563616752624512, "learning_rate": 4.3904206787474366e-07, "loss": 0.1588, "step": 17477 }, { "epoch": 0.8670072920283745, "grad_norm": 5.4887800216674805, "learning_rate": 4.387196030985358e-07, "loss": 0.3547, "step": 17478 }, { "epoch": 0.8670568976635746, "grad_norm": 14.387791633605957, "learning_rate": 4.3839725135291775e-07, "loss": 0.341, "step": 17479 }, { "epoch": 0.8671065032987747, "grad_norm": 18.292634963989258, "learning_rate": 4.380750126458805e-07, "loss": 0.2659, "step": 17480 }, { "epoch": 0.8671561089339749, "grad_norm": 9.362858772277832, "learning_rate": 4.3775288698540753e-07, "loss": 0.1964, "step": 17481 }, { "epoch": 0.8672057145691751, "grad_norm": 7.485412120819092, "learning_rate": 4.374308743794814e-07, "loss": 0.2726, "step": 17482 }, { "epoch": 0.8672553202043752, "grad_norm": 6.71547269821167, "learning_rate": 4.371089748360824e-07, "loss": 0.2863, "step": 17483 }, { "epoch": 0.8673049258395754, "grad_norm": 4.752416133880615, "learning_rate": 4.3678718836318803e-07, "loss": 0.2465, "step": 17484 }, { "epoch": 0.8673545314747755, "grad_norm": 5.8809733390808105, "learning_rate": 4.364655149687691e-07, "loss": 0.2819, "step": 17485 }, { "epoch": 0.8674041371099757, "grad_norm": 11.664897918701172, "learning_rate": 4.3614395466080093e-07, "loss": 0.336, "step": 17486 }, { "epoch": 0.8674537427451758, "grad_norm": 6.74872350692749, "learning_rate": 4.358225074472494e-07, "loss": 0.3497, "step": 17487 }, { "epoch": 0.867503348380376, "grad_norm": 8.557906150817871, "learning_rate": 4.3550117333608035e-07, "loss": 0.3243, "step": 17488 }, { "epoch": 0.8675529540155762, "grad_norm": 5.495757579803467, "learning_rate": 4.3517995233525733e-07, "loss": 0.2975, "step": 17489 }, { "epoch": 0.8676025596507764, "grad_norm": 19.016258239746094, "learning_rate": 4.348588444527402e-07, "loss": 0.4509, "step": 17490 }, { "epoch": 0.8676521652859764, "grad_norm": 5.933291435241699, "learning_rate": 4.345378496964847e-07, "loss": 0.261, "step": 17491 }, { "epoch": 0.8677017709211766, "grad_norm": 6.895486831665039, "learning_rate": 4.3421696807444847e-07, "loss": 0.2711, "step": 17492 }, { "epoch": 0.8677513765563768, "grad_norm": 8.327628135681152, "learning_rate": 4.33896199594579e-07, "loss": 0.367, "step": 17493 }, { "epoch": 0.867800982191577, "grad_norm": 4.947776794433594, "learning_rate": 4.3357554426482875e-07, "loss": 0.2281, "step": 17494 }, { "epoch": 0.8678505878267772, "grad_norm": 4.684431076049805, "learning_rate": 4.332550020931414e-07, "loss": 0.1897, "step": 17495 }, { "epoch": 0.8679001934619773, "grad_norm": 7.381856441497803, "learning_rate": 4.3293457308746125e-07, "loss": 0.2681, "step": 17496 }, { "epoch": 0.8679497990971774, "grad_norm": 10.810538291931152, "learning_rate": 4.326142572557279e-07, "loss": 0.3478, "step": 17497 }, { "epoch": 0.8679994047323776, "grad_norm": 10.763398170471191, "learning_rate": 4.3229405460588013e-07, "loss": 0.2758, "step": 17498 }, { "epoch": 0.8680490103675778, "grad_norm": 7.277365207672119, "learning_rate": 4.3197396514585045e-07, "loss": 0.351, "step": 17499 }, { "epoch": 0.8680986160027779, "grad_norm": 5.067229270935059, "learning_rate": 4.3165398888357415e-07, "loss": 0.2468, "step": 17500 }, { "epoch": 0.8681482216379781, "grad_norm": 9.509544372558594, "learning_rate": 4.313341258269771e-07, "loss": 0.3535, "step": 17501 }, { "epoch": 0.8681978272731782, "grad_norm": 11.160103797912598, "learning_rate": 4.310143759839874e-07, "loss": 0.3744, "step": 17502 }, { "epoch": 0.8682474329083784, "grad_norm": 15.748355865478516, "learning_rate": 4.306947393625277e-07, "loss": 0.2553, "step": 17503 }, { "epoch": 0.8682970385435785, "grad_norm": 6.2640790939331055, "learning_rate": 4.30375215970521e-07, "loss": 0.2873, "step": 17504 }, { "epoch": 0.8683466441787787, "grad_norm": 16.346725463867188, "learning_rate": 4.3005580581588104e-07, "loss": 0.342, "step": 17505 }, { "epoch": 0.8683962498139789, "grad_norm": 6.738061904907227, "learning_rate": 4.297365089065275e-07, "loss": 0.2478, "step": 17506 }, { "epoch": 0.8684458554491791, "grad_norm": 8.384634971618652, "learning_rate": 4.2941732525036973e-07, "loss": 0.3479, "step": 17507 }, { "epoch": 0.8684954610843791, "grad_norm": 5.207651138305664, "learning_rate": 4.2909825485531795e-07, "loss": 0.3348, "step": 17508 }, { "epoch": 0.8685450667195793, "grad_norm": 6.739530086517334, "learning_rate": 4.2877929772927927e-07, "loss": 0.2235, "step": 17509 }, { "epoch": 0.8685946723547795, "grad_norm": 8.080657958984375, "learning_rate": 4.2846045388015735e-07, "loss": 0.2486, "step": 17510 }, { "epoch": 0.8686442779899797, "grad_norm": 4.159981727600098, "learning_rate": 4.281417233158536e-07, "loss": 0.2137, "step": 17511 }, { "epoch": 0.8686938836251799, "grad_norm": 10.706735610961914, "learning_rate": 4.278231060442661e-07, "loss": 0.3548, "step": 17512 }, { "epoch": 0.86874348926038, "grad_norm": 7.330031871795654, "learning_rate": 4.2750460207328926e-07, "loss": 0.2357, "step": 17513 }, { "epoch": 0.8687930948955801, "grad_norm": 11.315762519836426, "learning_rate": 4.271862114108183e-07, "loss": 0.3751, "step": 17514 }, { "epoch": 0.8688427005307803, "grad_norm": 8.450128555297852, "learning_rate": 4.2686793406474024e-07, "loss": 0.243, "step": 17515 }, { "epoch": 0.8688923061659805, "grad_norm": 8.153478622436523, "learning_rate": 4.2654977004294386e-07, "loss": 0.2994, "step": 17516 }, { "epoch": 0.8689419118011806, "grad_norm": 4.6822733879089355, "learning_rate": 4.2623171935331276e-07, "loss": 0.1905, "step": 17517 }, { "epoch": 0.8689915174363808, "grad_norm": 5.728858470916748, "learning_rate": 4.25913782003729e-07, "loss": 0.2975, "step": 17518 }, { "epoch": 0.8690411230715809, "grad_norm": 7.297357082366943, "learning_rate": 4.255959580020702e-07, "loss": 0.2885, "step": 17519 }, { "epoch": 0.8690907287067811, "grad_norm": 10.068336486816406, "learning_rate": 4.2527824735621335e-07, "loss": 0.2134, "step": 17520 }, { "epoch": 0.8691403343419812, "grad_norm": 10.790380477905273, "learning_rate": 4.249606500740316e-07, "loss": 0.3362, "step": 17521 }, { "epoch": 0.8691899399771814, "grad_norm": 10.026013374328613, "learning_rate": 4.2464316616339373e-07, "loss": 0.2859, "step": 17522 }, { "epoch": 0.8692395456123816, "grad_norm": 11.095174789428711, "learning_rate": 4.2432579563216725e-07, "loss": 0.3248, "step": 17523 }, { "epoch": 0.8692891512475818, "grad_norm": 8.06734561920166, "learning_rate": 4.240085384882181e-07, "loss": 0.3454, "step": 17524 }, { "epoch": 0.8693387568827818, "grad_norm": 11.869817733764648, "learning_rate": 4.236913947394067e-07, "loss": 0.3355, "step": 17525 }, { "epoch": 0.869388362517982, "grad_norm": 10.810439109802246, "learning_rate": 4.233743643935928e-07, "loss": 0.2022, "step": 17526 }, { "epoch": 0.8694379681531822, "grad_norm": 7.301629066467285, "learning_rate": 4.2305744745863243e-07, "loss": 0.24, "step": 17527 }, { "epoch": 0.8694875737883824, "grad_norm": 10.87171745300293, "learning_rate": 4.2274064394237925e-07, "loss": 0.3265, "step": 17528 }, { "epoch": 0.8695371794235826, "grad_norm": 8.235379219055176, "learning_rate": 4.224239538526842e-07, "loss": 0.2191, "step": 17529 }, { "epoch": 0.8695867850587827, "grad_norm": 11.252586364746094, "learning_rate": 4.221073771973927e-07, "loss": 0.4131, "step": 17530 }, { "epoch": 0.8696363906939828, "grad_norm": 11.57197380065918, "learning_rate": 4.217909139843518e-07, "loss": 0.4786, "step": 17531 }, { "epoch": 0.869685996329183, "grad_norm": 3.9616639614105225, "learning_rate": 4.21474564221403e-07, "loss": 0.2879, "step": 17532 }, { "epoch": 0.8697356019643832, "grad_norm": 6.031185626983643, "learning_rate": 4.2115832791638553e-07, "loss": 0.2712, "step": 17533 }, { "epoch": 0.8697852075995833, "grad_norm": 5.407458782196045, "learning_rate": 4.208422050771355e-07, "loss": 0.2505, "step": 17534 }, { "epoch": 0.8698348132347835, "grad_norm": 8.454463005065918, "learning_rate": 4.2052619571148865e-07, "loss": 0.3019, "step": 17535 }, { "epoch": 0.8698844188699836, "grad_norm": 8.917162895202637, "learning_rate": 4.2021029982727167e-07, "loss": 0.3585, "step": 17536 }, { "epoch": 0.8699340245051838, "grad_norm": 8.019266128540039, "learning_rate": 4.198945174323171e-07, "loss": 0.2585, "step": 17537 }, { "epoch": 0.8699836301403839, "grad_norm": 7.004737377166748, "learning_rate": 4.195788485344476e-07, "loss": 0.3324, "step": 17538 }, { "epoch": 0.8700332357755841, "grad_norm": 9.640609741210938, "learning_rate": 4.192632931414864e-07, "loss": 0.2691, "step": 17539 }, { "epoch": 0.8700828414107843, "grad_norm": 5.59218692779541, "learning_rate": 4.1894785126125213e-07, "loss": 0.2355, "step": 17540 }, { "epoch": 0.8701324470459845, "grad_norm": 7.115644931793213, "learning_rate": 4.1863252290156363e-07, "loss": 0.3256, "step": 17541 }, { "epoch": 0.8701820526811845, "grad_norm": 10.164793014526367, "learning_rate": 4.183173080702319e-07, "loss": 0.348, "step": 17542 }, { "epoch": 0.8702316583163847, "grad_norm": 6.309624671936035, "learning_rate": 4.1800220677507164e-07, "loss": 0.2976, "step": 17543 }, { "epoch": 0.8702812639515849, "grad_norm": 9.215672492980957, "learning_rate": 4.1768721902388734e-07, "loss": 0.2896, "step": 17544 }, { "epoch": 0.8703308695867851, "grad_norm": 4.8394880294799805, "learning_rate": 4.173723448244882e-07, "loss": 0.3197, "step": 17545 }, { "epoch": 0.8703804752219853, "grad_norm": 12.632296562194824, "learning_rate": 4.170575841846747e-07, "loss": 0.3928, "step": 17546 }, { "epoch": 0.8704300808571854, "grad_norm": 7.153964042663574, "learning_rate": 4.1674293711224724e-07, "loss": 0.3582, "step": 17547 }, { "epoch": 0.8704796864923855, "grad_norm": 6.888754844665527, "learning_rate": 4.1642840361500347e-07, "loss": 0.3073, "step": 17548 }, { "epoch": 0.8705292921275857, "grad_norm": 4.105193138122559, "learning_rate": 4.1611398370073774e-07, "loss": 0.2565, "step": 17549 }, { "epoch": 0.8705788977627859, "grad_norm": 5.8296732902526855, "learning_rate": 4.157996773772388e-07, "loss": 0.2471, "step": 17550 }, { "epoch": 0.870628503397986, "grad_norm": 5.939638137817383, "learning_rate": 4.1548548465229986e-07, "loss": 0.2408, "step": 17551 }, { "epoch": 0.8706781090331862, "grad_norm": 8.860990524291992, "learning_rate": 4.151714055337036e-07, "loss": 0.2634, "step": 17552 }, { "epoch": 0.8707277146683863, "grad_norm": 8.037761688232422, "learning_rate": 4.1485744002923325e-07, "loss": 0.2656, "step": 17553 }, { "epoch": 0.8707773203035865, "grad_norm": 7.306467533111572, "learning_rate": 4.1454358814667026e-07, "loss": 0.2249, "step": 17554 }, { "epoch": 0.8708269259387866, "grad_norm": 5.9689860343933105, "learning_rate": 4.1422984989379187e-07, "loss": 0.2928, "step": 17555 }, { "epoch": 0.8708765315739868, "grad_norm": 7.004425525665283, "learning_rate": 4.139162252783707e-07, "loss": 0.2828, "step": 17556 }, { "epoch": 0.870926137209187, "grad_norm": 8.765425682067871, "learning_rate": 4.1360271430818166e-07, "loss": 0.3687, "step": 17557 }, { "epoch": 0.8709757428443872, "grad_norm": 12.614635467529297, "learning_rate": 4.132893169909907e-07, "loss": 0.3476, "step": 17558 }, { "epoch": 0.8710253484795872, "grad_norm": 6.918558120727539, "learning_rate": 4.129760333345656e-07, "loss": 0.3122, "step": 17559 }, { "epoch": 0.8710749541147874, "grad_norm": 9.145313262939453, "learning_rate": 4.126628633466695e-07, "loss": 0.2611, "step": 17560 }, { "epoch": 0.8711245597499876, "grad_norm": 25.578136444091797, "learning_rate": 4.1234980703506346e-07, "loss": 0.3281, "step": 17561 }, { "epoch": 0.8711741653851878, "grad_norm": 9.840948104858398, "learning_rate": 4.1203686440750293e-07, "loss": 0.341, "step": 17562 }, { "epoch": 0.871223771020388, "grad_norm": 44.27722930908203, "learning_rate": 4.1172403547174553e-07, "loss": 0.3378, "step": 17563 }, { "epoch": 0.8712733766555881, "grad_norm": 11.081079483032227, "learning_rate": 4.114113202355408e-07, "loss": 0.3393, "step": 17564 }, { "epoch": 0.8713229822907882, "grad_norm": 11.087456703186035, "learning_rate": 4.1109871870664065e-07, "loss": 0.294, "step": 17565 }, { "epoch": 0.8713725879259884, "grad_norm": 7.100939750671387, "learning_rate": 4.10786230892789e-07, "loss": 0.2579, "step": 17566 }, { "epoch": 0.8714221935611886, "grad_norm": 6.533313274383545, "learning_rate": 4.104738568017308e-07, "loss": 0.2796, "step": 17567 }, { "epoch": 0.8714717991963887, "grad_norm": 8.795327186584473, "learning_rate": 4.101615964412059e-07, "loss": 0.2225, "step": 17568 }, { "epoch": 0.8715214048315889, "grad_norm": 7.505208492279053, "learning_rate": 4.098494498189537e-07, "loss": 0.3167, "step": 17569 }, { "epoch": 0.871571010466789, "grad_norm": 6.840094089508057, "learning_rate": 4.0953741694270796e-07, "loss": 0.3008, "step": 17570 }, { "epoch": 0.8716206161019892, "grad_norm": 6.106980800628662, "learning_rate": 4.092254978202026e-07, "loss": 0.3036, "step": 17571 }, { "epoch": 0.8716702217371893, "grad_norm": 5.018366813659668, "learning_rate": 4.089136924591647e-07, "loss": 0.2581, "step": 17572 }, { "epoch": 0.8717198273723895, "grad_norm": 10.023757934570312, "learning_rate": 4.086020008673225e-07, "loss": 0.2842, "step": 17573 }, { "epoch": 0.8717694330075897, "grad_norm": 8.087265968322754, "learning_rate": 4.0829042305239985e-07, "loss": 0.3176, "step": 17574 }, { "epoch": 0.8718190386427899, "grad_norm": 4.790767192840576, "learning_rate": 4.079789590221178e-07, "loss": 0.2415, "step": 17575 }, { "epoch": 0.8718686442779899, "grad_norm": 7.080511093139648, "learning_rate": 4.076676087841941e-07, "loss": 0.2872, "step": 17576 }, { "epoch": 0.8719182499131901, "grad_norm": 8.01223087310791, "learning_rate": 4.073563723463442e-07, "loss": 0.3474, "step": 17577 }, { "epoch": 0.8719678555483903, "grad_norm": 5.66435432434082, "learning_rate": 4.070452497162819e-07, "loss": 0.2809, "step": 17578 }, { "epoch": 0.8720174611835905, "grad_norm": 15.076351165771484, "learning_rate": 4.06734240901715e-07, "loss": 0.2657, "step": 17579 }, { "epoch": 0.8720670668187906, "grad_norm": 5.477118968963623, "learning_rate": 4.0642334591035117e-07, "loss": 0.2723, "step": 17580 }, { "epoch": 0.8721166724539908, "grad_norm": 5.627723693847656, "learning_rate": 4.061125647498954e-07, "loss": 0.3692, "step": 17581 }, { "epoch": 0.8721662780891909, "grad_norm": 7.195966720581055, "learning_rate": 4.0580189742804756e-07, "loss": 0.2699, "step": 17582 }, { "epoch": 0.8722158837243911, "grad_norm": 10.63100528717041, "learning_rate": 4.054913439525071e-07, "loss": 0.25, "step": 17583 }, { "epoch": 0.8722654893595913, "grad_norm": 10.800522804260254, "learning_rate": 4.051809043309696e-07, "loss": 0.264, "step": 17584 }, { "epoch": 0.8723150949947914, "grad_norm": 3.686460256576538, "learning_rate": 4.0487057857112767e-07, "loss": 0.1978, "step": 17585 }, { "epoch": 0.8723647006299916, "grad_norm": 11.011236190795898, "learning_rate": 4.0456036668067247e-07, "loss": 0.4099, "step": 17586 }, { "epoch": 0.8724143062651917, "grad_norm": 5.008779048919678, "learning_rate": 4.042502686672883e-07, "loss": 0.2524, "step": 17587 }, { "epoch": 0.8724639119003919, "grad_norm": 7.545248508453369, "learning_rate": 4.0394028453866306e-07, "loss": 0.2747, "step": 17588 }, { "epoch": 0.872513517535592, "grad_norm": 4.570380687713623, "learning_rate": 4.036304143024755e-07, "loss": 0.2926, "step": 17589 }, { "epoch": 0.8725631231707922, "grad_norm": 5.710977077484131, "learning_rate": 4.033206579664056e-07, "loss": 0.3602, "step": 17590 }, { "epoch": 0.8726127288059924, "grad_norm": 23.31020736694336, "learning_rate": 4.030110155381295e-07, "loss": 0.4122, "step": 17591 }, { "epoch": 0.8726623344411926, "grad_norm": 4.034876823425293, "learning_rate": 4.0270148702532043e-07, "loss": 0.2177, "step": 17592 }, { "epoch": 0.8727119400763926, "grad_norm": 8.058206558227539, "learning_rate": 4.0239207243564614e-07, "loss": 0.2285, "step": 17593 }, { "epoch": 0.8727615457115928, "grad_norm": 5.090205192565918, "learning_rate": 4.020827717767778e-07, "loss": 0.2828, "step": 17594 }, { "epoch": 0.872811151346793, "grad_norm": 6.478175640106201, "learning_rate": 4.017735850563775e-07, "loss": 0.2468, "step": 17595 }, { "epoch": 0.8728607569819932, "grad_norm": 6.801741600036621, "learning_rate": 4.0146451228210814e-07, "loss": 0.3582, "step": 17596 }, { "epoch": 0.8729103626171933, "grad_norm": 8.827781677246094, "learning_rate": 4.011555534616279e-07, "loss": 0.36, "step": 17597 }, { "epoch": 0.8729599682523935, "grad_norm": 6.597146987915039, "learning_rate": 4.00846708602593e-07, "loss": 0.3097, "step": 17598 }, { "epoch": 0.8730095738875936, "grad_norm": 10.13033390045166, "learning_rate": 4.0053797771265777e-07, "loss": 0.2873, "step": 17599 }, { "epoch": 0.8730591795227938, "grad_norm": 6.244838714599609, "learning_rate": 4.002293607994723e-07, "loss": 0.3543, "step": 17600 }, { "epoch": 0.873108785157994, "grad_norm": 8.50180435180664, "learning_rate": 3.9992085787068257e-07, "loss": 0.3441, "step": 17601 }, { "epoch": 0.8731583907931941, "grad_norm": 8.791033744812012, "learning_rate": 3.996124689339365e-07, "loss": 0.2456, "step": 17602 }, { "epoch": 0.8732079964283943, "grad_norm": 7.857614040374756, "learning_rate": 3.993041939968734e-07, "loss": 0.2733, "step": 17603 }, { "epoch": 0.8732576020635944, "grad_norm": 14.323375701904297, "learning_rate": 3.9899603306713384e-07, "loss": 0.3547, "step": 17604 }, { "epoch": 0.8733072076987946, "grad_norm": 6.5515642166137695, "learning_rate": 3.9868798615235347e-07, "loss": 0.2852, "step": 17605 }, { "epoch": 0.8733568133339947, "grad_norm": 8.623294830322266, "learning_rate": 3.9838005326016726e-07, "loss": 0.4156, "step": 17606 }, { "epoch": 0.8734064189691949, "grad_norm": 6.830049514770508, "learning_rate": 3.9807223439820355e-07, "loss": 0.3254, "step": 17607 }, { "epoch": 0.8734560246043951, "grad_norm": 7.122363567352295, "learning_rate": 3.9776452957409283e-07, "loss": 0.3042, "step": 17608 }, { "epoch": 0.8735056302395953, "grad_norm": 6.605335235595703, "learning_rate": 3.97456938795458e-07, "loss": 0.2421, "step": 17609 }, { "epoch": 0.8735552358747953, "grad_norm": 7.479638576507568, "learning_rate": 3.9714946206992234e-07, "loss": 0.3067, "step": 17610 }, { "epoch": 0.8736048415099955, "grad_norm": 6.526050090789795, "learning_rate": 3.9684209940510477e-07, "loss": 0.3199, "step": 17611 }, { "epoch": 0.8736544471451957, "grad_norm": 5.654921054840088, "learning_rate": 3.965348508086231e-07, "loss": 0.2156, "step": 17612 }, { "epoch": 0.8737040527803959, "grad_norm": 13.090489387512207, "learning_rate": 3.96227716288089e-07, "loss": 0.357, "step": 17613 }, { "epoch": 0.873753658415596, "grad_norm": 16.66387176513672, "learning_rate": 3.959206958511158e-07, "loss": 0.3911, "step": 17614 }, { "epoch": 0.8738032640507962, "grad_norm": 7.472156524658203, "learning_rate": 3.956137895053097e-07, "loss": 0.3009, "step": 17615 }, { "epoch": 0.8738528696859963, "grad_norm": 10.961577415466309, "learning_rate": 3.9530699725827683e-07, "loss": 0.296, "step": 17616 }, { "epoch": 0.8739024753211965, "grad_norm": 7.994786739349365, "learning_rate": 3.950003191176194e-07, "loss": 0.2567, "step": 17617 }, { "epoch": 0.8739520809563966, "grad_norm": 5.733460426330566, "learning_rate": 3.946937550909369e-07, "loss": 0.1989, "step": 17618 }, { "epoch": 0.8740016865915968, "grad_norm": 6.85154390335083, "learning_rate": 3.943873051858266e-07, "loss": 0.2743, "step": 17619 }, { "epoch": 0.874051292226797, "grad_norm": 5.817679405212402, "learning_rate": 3.940809694098835e-07, "loss": 0.3236, "step": 17620 }, { "epoch": 0.8741008978619971, "grad_norm": 11.800312042236328, "learning_rate": 3.937747477706949e-07, "loss": 0.3937, "step": 17621 }, { "epoch": 0.8741505034971973, "grad_norm": 13.924237251281738, "learning_rate": 3.934686402758542e-07, "loss": 0.3903, "step": 17622 }, { "epoch": 0.8742001091323974, "grad_norm": 8.978349685668945, "learning_rate": 3.93162646932943e-07, "loss": 0.2147, "step": 17623 }, { "epoch": 0.8742497147675976, "grad_norm": 7.306947231292725, "learning_rate": 3.928567677495454e-07, "loss": 0.3315, "step": 17624 }, { "epoch": 0.8742993204027978, "grad_norm": 6.5891523361206055, "learning_rate": 3.925510027332413e-07, "loss": 0.2852, "step": 17625 }, { "epoch": 0.874348926037998, "grad_norm": 5.708381652832031, "learning_rate": 3.9224535189160695e-07, "loss": 0.2421, "step": 17626 }, { "epoch": 0.874398531673198, "grad_norm": 12.046242713928223, "learning_rate": 3.919398152322179e-07, "loss": 0.2366, "step": 17627 }, { "epoch": 0.8744481373083982, "grad_norm": 20.58540916442871, "learning_rate": 3.916343927626448e-07, "loss": 0.3498, "step": 17628 }, { "epoch": 0.8744977429435984, "grad_norm": 7.203904151916504, "learning_rate": 3.91329084490456e-07, "loss": 0.3128, "step": 17629 }, { "epoch": 0.8745473485787986, "grad_norm": 7.40522575378418, "learning_rate": 3.9102389042321667e-07, "loss": 0.3444, "step": 17630 }, { "epoch": 0.8745969542139987, "grad_norm": 7.576371669769287, "learning_rate": 3.9071881056849005e-07, "loss": 0.355, "step": 17631 }, { "epoch": 0.8746465598491989, "grad_norm": 8.067410469055176, "learning_rate": 3.904138449338368e-07, "loss": 0.286, "step": 17632 }, { "epoch": 0.874696165484399, "grad_norm": 5.848062515258789, "learning_rate": 3.901089935268132e-07, "loss": 0.2617, "step": 17633 }, { "epoch": 0.8747457711195992, "grad_norm": 8.49581241607666, "learning_rate": 3.898042563549742e-07, "loss": 0.3357, "step": 17634 }, { "epoch": 0.8747953767547993, "grad_norm": 6.257934093475342, "learning_rate": 3.8949963342587105e-07, "loss": 0.2729, "step": 17635 }, { "epoch": 0.8748449823899995, "grad_norm": 5.903203964233398, "learning_rate": 3.891951247470527e-07, "loss": 0.2176, "step": 17636 }, { "epoch": 0.8748945880251997, "grad_norm": 11.605572700500488, "learning_rate": 3.8889073032606585e-07, "loss": 0.3548, "step": 17637 }, { "epoch": 0.8749441936603998, "grad_norm": 8.564011573791504, "learning_rate": 3.8858645017045114e-07, "loss": 0.3467, "step": 17638 }, { "epoch": 0.8749937992956, "grad_norm": 4.29807710647583, "learning_rate": 3.882822842877504e-07, "loss": 0.2791, "step": 17639 }, { "epoch": 0.8750434049308001, "grad_norm": 6.005732536315918, "learning_rate": 3.8797823268550086e-07, "loss": 0.2769, "step": 17640 }, { "epoch": 0.8750930105660003, "grad_norm": 10.432087898254395, "learning_rate": 3.87674295371237e-07, "loss": 0.3176, "step": 17641 }, { "epoch": 0.8751426162012005, "grad_norm": 3.9027552604675293, "learning_rate": 3.873704723524907e-07, "loss": 0.2636, "step": 17642 }, { "epoch": 0.8751922218364007, "grad_norm": 6.718380928039551, "learning_rate": 3.8706676363679084e-07, "loss": 0.2151, "step": 17643 }, { "epoch": 0.8752418274716007, "grad_norm": 9.095401763916016, "learning_rate": 3.8676316923166204e-07, "loss": 0.2889, "step": 17644 }, { "epoch": 0.8752914331068009, "grad_norm": 11.293816566467285, "learning_rate": 3.8645968914463036e-07, "loss": 0.4121, "step": 17645 }, { "epoch": 0.8753410387420011, "grad_norm": 4.6674065589904785, "learning_rate": 3.861563233832133e-07, "loss": 0.2301, "step": 17646 }, { "epoch": 0.8753906443772013, "grad_norm": 3.855123519897461, "learning_rate": 3.858530719549297e-07, "loss": 0.1461, "step": 17647 }, { "epoch": 0.8754402500124014, "grad_norm": 4.446688175201416, "learning_rate": 3.8554993486729475e-07, "loss": 0.1917, "step": 17648 }, { "epoch": 0.8754898556476016, "grad_norm": 7.775974750518799, "learning_rate": 3.852469121278202e-07, "loss": 0.3601, "step": 17649 }, { "epoch": 0.8755394612828017, "grad_norm": 12.926297187805176, "learning_rate": 3.8494400374401274e-07, "loss": 0.4205, "step": 17650 }, { "epoch": 0.8755890669180019, "grad_norm": 9.759720802307129, "learning_rate": 3.846412097233826e-07, "loss": 0.3226, "step": 17651 }, { "epoch": 0.875638672553202, "grad_norm": 4.965616703033447, "learning_rate": 3.8433853007342926e-07, "loss": 0.1938, "step": 17652 }, { "epoch": 0.8756882781884022, "grad_norm": 6.298069953918457, "learning_rate": 3.840359648016562e-07, "loss": 0.2058, "step": 17653 }, { "epoch": 0.8757378838236024, "grad_norm": 12.783682823181152, "learning_rate": 3.837335139155596e-07, "loss": 0.317, "step": 17654 }, { "epoch": 0.8757874894588025, "grad_norm": 8.198127746582031, "learning_rate": 3.834311774226346e-07, "loss": 0.2858, "step": 17655 }, { "epoch": 0.8758370950940026, "grad_norm": 10.268976211547852, "learning_rate": 3.8312895533037354e-07, "loss": 0.3478, "step": 17656 }, { "epoch": 0.8758867007292028, "grad_norm": 5.141524314880371, "learning_rate": 3.8282684764626596e-07, "loss": 0.294, "step": 17657 }, { "epoch": 0.875936306364403, "grad_norm": 5.6815266609191895, "learning_rate": 3.8252485437779596e-07, "loss": 0.287, "step": 17658 }, { "epoch": 0.8759859119996032, "grad_norm": 10.18425178527832, "learning_rate": 3.822229755324508e-07, "loss": 0.369, "step": 17659 }, { "epoch": 0.8760355176348034, "grad_norm": 6.1645827293396, "learning_rate": 3.8192121111770785e-07, "loss": 0.2347, "step": 17660 }, { "epoch": 0.8760851232700034, "grad_norm": 4.505553722381592, "learning_rate": 3.816195611410467e-07, "loss": 0.2364, "step": 17661 }, { "epoch": 0.8761347289052036, "grad_norm": 11.141251564025879, "learning_rate": 3.8131802560994134e-07, "loss": 0.4411, "step": 17662 }, { "epoch": 0.8761843345404038, "grad_norm": 4.730480194091797, "learning_rate": 3.8101660453186583e-07, "loss": 0.1932, "step": 17663 }, { "epoch": 0.876233940175604, "grad_norm": 10.448793411254883, "learning_rate": 3.8071529791428696e-07, "loss": 0.478, "step": 17664 }, { "epoch": 0.8762835458108041, "grad_norm": 6.971653461456299, "learning_rate": 3.8041410576467373e-07, "loss": 0.2587, "step": 17665 }, { "epoch": 0.8763331514460043, "grad_norm": 8.516736030578613, "learning_rate": 3.80113028090488e-07, "loss": 0.3079, "step": 17666 }, { "epoch": 0.8763827570812044, "grad_norm": 5.553275108337402, "learning_rate": 3.79812064899191e-07, "loss": 0.2928, "step": 17667 }, { "epoch": 0.8764323627164046, "grad_norm": 9.161176681518555, "learning_rate": 3.795112161982412e-07, "loss": 0.2649, "step": 17668 }, { "epoch": 0.8764819683516047, "grad_norm": 4.344233512878418, "learning_rate": 3.7921048199509316e-07, "loss": 0.2771, "step": 17669 }, { "epoch": 0.8765315739868049, "grad_norm": 8.456672668457031, "learning_rate": 3.7890986229719985e-07, "loss": 0.3238, "step": 17670 }, { "epoch": 0.8765811796220051, "grad_norm": 7.571868419647217, "learning_rate": 3.786093571120114e-07, "loss": 0.3014, "step": 17671 }, { "epoch": 0.8766307852572052, "grad_norm": 19.328887939453125, "learning_rate": 3.783089664469719e-07, "loss": 0.3214, "step": 17672 }, { "epoch": 0.8766803908924053, "grad_norm": 4.433091640472412, "learning_rate": 3.780086903095281e-07, "loss": 0.228, "step": 17673 }, { "epoch": 0.8767299965276055, "grad_norm": 4.042325019836426, "learning_rate": 3.7770852870711916e-07, "loss": 0.2166, "step": 17674 }, { "epoch": 0.8767796021628057, "grad_norm": 7.873967170715332, "learning_rate": 3.7740848164718403e-07, "loss": 0.373, "step": 17675 }, { "epoch": 0.8768292077980059, "grad_norm": 11.52640151977539, "learning_rate": 3.7710854913715734e-07, "loss": 0.4408, "step": 17676 }, { "epoch": 0.8768788134332061, "grad_norm": 6.597993850708008, "learning_rate": 3.7680873118447313e-07, "loss": 0.3869, "step": 17677 }, { "epoch": 0.8769284190684061, "grad_norm": 4.1096038818359375, "learning_rate": 3.765090277965583e-07, "loss": 0.2423, "step": 17678 }, { "epoch": 0.8769780247036063, "grad_norm": 6.007767200469971, "learning_rate": 3.7620943898084237e-07, "loss": 0.2791, "step": 17679 }, { "epoch": 0.8770276303388065, "grad_norm": 4.749589920043945, "learning_rate": 3.7590996474474727e-07, "loss": 0.2236, "step": 17680 }, { "epoch": 0.8770772359740067, "grad_norm": 3.798867702484131, "learning_rate": 3.7561060509569533e-07, "loss": 0.2676, "step": 17681 }, { "epoch": 0.8771268416092068, "grad_norm": 6.287576198577881, "learning_rate": 3.753113600411046e-07, "loss": 0.3931, "step": 17682 }, { "epoch": 0.8771764472444069, "grad_norm": 6.280789375305176, "learning_rate": 3.750122295883896e-07, "loss": 0.2278, "step": 17683 }, { "epoch": 0.8772260528796071, "grad_norm": 5.1954522132873535, "learning_rate": 3.7471321374496437e-07, "loss": 0.2676, "step": 17684 }, { "epoch": 0.8772756585148073, "grad_norm": 12.452921867370605, "learning_rate": 3.744143125182376e-07, "loss": 0.4013, "step": 17685 }, { "epoch": 0.8773252641500074, "grad_norm": 10.596847534179688, "learning_rate": 3.741155259156176e-07, "loss": 0.2934, "step": 17686 }, { "epoch": 0.8773748697852076, "grad_norm": 4.522667407989502, "learning_rate": 3.738168539445064e-07, "loss": 0.1922, "step": 17687 }, { "epoch": 0.8774244754204078, "grad_norm": 13.958528518676758, "learning_rate": 3.7351829661230576e-07, "loss": 0.3028, "step": 17688 }, { "epoch": 0.8774740810556079, "grad_norm": 4.63587760925293, "learning_rate": 3.7321985392641534e-07, "loss": 0.2355, "step": 17689 }, { "epoch": 0.877523686690808, "grad_norm": 4.202218532562256, "learning_rate": 3.7292152589422926e-07, "loss": 0.2777, "step": 17690 }, { "epoch": 0.8775732923260082, "grad_norm": 10.12447738647461, "learning_rate": 3.72623312523141e-07, "loss": 0.2876, "step": 17691 }, { "epoch": 0.8776228979612084, "grad_norm": 4.924129009246826, "learning_rate": 3.723252138205402e-07, "loss": 0.1778, "step": 17692 }, { "epoch": 0.8776725035964086, "grad_norm": 27.808263778686523, "learning_rate": 3.7202722979381433e-07, "loss": 0.3529, "step": 17693 }, { "epoch": 0.8777221092316088, "grad_norm": 3.920788049697876, "learning_rate": 3.717293604503475e-07, "loss": 0.2015, "step": 17694 }, { "epoch": 0.8777717148668088, "grad_norm": 8.569412231445312, "learning_rate": 3.7143160579751925e-07, "loss": 0.3312, "step": 17695 }, { "epoch": 0.877821320502009, "grad_norm": 4.536868095397949, "learning_rate": 3.7113396584271154e-07, "loss": 0.2544, "step": 17696 }, { "epoch": 0.8778709261372092, "grad_norm": 5.235662460327148, "learning_rate": 3.708364405932968e-07, "loss": 0.2866, "step": 17697 }, { "epoch": 0.8779205317724094, "grad_norm": 6.343418121337891, "learning_rate": 3.7053903005664915e-07, "loss": 0.2066, "step": 17698 }, { "epoch": 0.8779701374076095, "grad_norm": 6.125127792358398, "learning_rate": 3.702417342401382e-07, "loss": 0.3178, "step": 17699 }, { "epoch": 0.8780197430428096, "grad_norm": 15.879104614257812, "learning_rate": 3.6994455315113253e-07, "loss": 0.3977, "step": 17700 }, { "epoch": 0.8780693486780098, "grad_norm": 7.862165927886963, "learning_rate": 3.696474867969935e-07, "loss": 0.359, "step": 17701 }, { "epoch": 0.87811895431321, "grad_norm": 5.978221893310547, "learning_rate": 3.693505351850857e-07, "loss": 0.2814, "step": 17702 }, { "epoch": 0.8781685599484101, "grad_norm": 5.917963027954102, "learning_rate": 3.690536983227655e-07, "loss": 0.211, "step": 17703 }, { "epoch": 0.8782181655836103, "grad_norm": 10.333436012268066, "learning_rate": 3.687569762173898e-07, "loss": 0.3504, "step": 17704 }, { "epoch": 0.8782677712188105, "grad_norm": 9.27243423461914, "learning_rate": 3.684603688763111e-07, "loss": 0.377, "step": 17705 }, { "epoch": 0.8783173768540106, "grad_norm": 10.194204330444336, "learning_rate": 3.68163876306879e-07, "loss": 0.3092, "step": 17706 }, { "epoch": 0.8783669824892107, "grad_norm": 15.132067680358887, "learning_rate": 3.678674985164421e-07, "loss": 0.3555, "step": 17707 }, { "epoch": 0.8784165881244109, "grad_norm": 3.32370662689209, "learning_rate": 3.6757123551234395e-07, "loss": 0.1536, "step": 17708 }, { "epoch": 0.8784661937596111, "grad_norm": 11.927863121032715, "learning_rate": 3.6727508730192485e-07, "loss": 0.4182, "step": 17709 }, { "epoch": 0.8785157993948113, "grad_norm": 10.418317794799805, "learning_rate": 3.6697905389252664e-07, "loss": 0.3274, "step": 17710 }, { "epoch": 0.8785654050300115, "grad_norm": 6.4508748054504395, "learning_rate": 3.666831352914818e-07, "loss": 0.2455, "step": 17711 }, { "epoch": 0.8786150106652115, "grad_norm": 4.952939987182617, "learning_rate": 3.6638733150612505e-07, "loss": 0.2357, "step": 17712 }, { "epoch": 0.8786646163004117, "grad_norm": 5.73075008392334, "learning_rate": 3.6609164254378604e-07, "loss": 0.2085, "step": 17713 }, { "epoch": 0.8787142219356119, "grad_norm": 5.323554992675781, "learning_rate": 3.6579606841179336e-07, "loss": 0.2345, "step": 17714 }, { "epoch": 0.8787638275708121, "grad_norm": 5.3474931716918945, "learning_rate": 3.655006091174684e-07, "loss": 0.3001, "step": 17715 }, { "epoch": 0.8788134332060122, "grad_norm": 8.056768417358398, "learning_rate": 3.6520526466813646e-07, "loss": 0.3575, "step": 17716 }, { "epoch": 0.8788630388412123, "grad_norm": 4.188056945800781, "learning_rate": 3.649100350711143e-07, "loss": 0.2579, "step": 17717 }, { "epoch": 0.8789126444764125, "grad_norm": 14.624370574951172, "learning_rate": 3.6461492033371736e-07, "loss": 0.3424, "step": 17718 }, { "epoch": 0.8789622501116127, "grad_norm": 5.920345306396484, "learning_rate": 3.6431992046326026e-07, "loss": 0.2962, "step": 17719 }, { "epoch": 0.8790118557468128, "grad_norm": 3.8211171627044678, "learning_rate": 3.640250354670527e-07, "loss": 0.1673, "step": 17720 }, { "epoch": 0.879061461382013, "grad_norm": 7.439023494720459, "learning_rate": 3.6373026535240054e-07, "loss": 0.3286, "step": 17721 }, { "epoch": 0.8791110670172132, "grad_norm": 9.619903564453125, "learning_rate": 3.634356101266112e-07, "loss": 0.3367, "step": 17722 }, { "epoch": 0.8791606726524133, "grad_norm": 10.407310485839844, "learning_rate": 3.631410697969834e-07, "loss": 0.3295, "step": 17723 }, { "epoch": 0.8792102782876134, "grad_norm": 9.353336334228516, "learning_rate": 3.628466443708184e-07, "loss": 0.3285, "step": 17724 }, { "epoch": 0.8792598839228136, "grad_norm": 6.510465621948242, "learning_rate": 3.62552333855411e-07, "loss": 0.2773, "step": 17725 }, { "epoch": 0.8793094895580138, "grad_norm": 7.318095684051514, "learning_rate": 3.6225813825805424e-07, "loss": 0.3359, "step": 17726 }, { "epoch": 0.879359095193214, "grad_norm": 4.094930171966553, "learning_rate": 3.6196405758603837e-07, "loss": 0.2648, "step": 17727 }, { "epoch": 0.8794087008284142, "grad_norm": 4.852667331695557, "learning_rate": 3.6167009184665257e-07, "loss": 0.2312, "step": 17728 }, { "epoch": 0.8794583064636142, "grad_norm": 10.354692459106445, "learning_rate": 3.613762410471777e-07, "loss": 0.3202, "step": 17729 }, { "epoch": 0.8795079120988144, "grad_norm": 6.051602840423584, "learning_rate": 3.610825051949002e-07, "loss": 0.339, "step": 17730 }, { "epoch": 0.8795575177340146, "grad_norm": 5.3282389640808105, "learning_rate": 3.6078888429709524e-07, "loss": 0.17, "step": 17731 }, { "epoch": 0.8796071233692148, "grad_norm": 4.072934150695801, "learning_rate": 3.6049537836104043e-07, "loss": 0.2793, "step": 17732 }, { "epoch": 0.8796567290044149, "grad_norm": 9.373849868774414, "learning_rate": 3.6020198739400936e-07, "loss": 0.362, "step": 17733 }, { "epoch": 0.879706334639615, "grad_norm": 7.120912551879883, "learning_rate": 3.599087114032712e-07, "loss": 0.3032, "step": 17734 }, { "epoch": 0.8797559402748152, "grad_norm": 5.8917341232299805, "learning_rate": 3.596155503960941e-07, "loss": 0.2159, "step": 17735 }, { "epoch": 0.8798055459100154, "grad_norm": 7.1174163818359375, "learning_rate": 3.593225043797438e-07, "loss": 0.3784, "step": 17736 }, { "epoch": 0.8798551515452155, "grad_norm": 9.561861038208008, "learning_rate": 3.5902957336148014e-07, "loss": 0.2322, "step": 17737 }, { "epoch": 0.8799047571804157, "grad_norm": 6.022407531738281, "learning_rate": 3.587367573485634e-07, "loss": 0.3411, "step": 17738 }, { "epoch": 0.8799543628156159, "grad_norm": 6.522349834442139, "learning_rate": 3.584440563482489e-07, "loss": 0.289, "step": 17739 }, { "epoch": 0.880003968450816, "grad_norm": 9.484406471252441, "learning_rate": 3.581514703677902e-07, "loss": 0.3031, "step": 17740 }, { "epoch": 0.8800535740860161, "grad_norm": 6.015079498291016, "learning_rate": 3.5785899941443826e-07, "loss": 0.1767, "step": 17741 }, { "epoch": 0.8801031797212163, "grad_norm": 7.112480163574219, "learning_rate": 3.5756664349543946e-07, "loss": 0.2597, "step": 17742 }, { "epoch": 0.8801527853564165, "grad_norm": 5.739185810089111, "learning_rate": 3.572744026180397e-07, "loss": 0.2397, "step": 17743 }, { "epoch": 0.8802023909916167, "grad_norm": 5.044515609741211, "learning_rate": 3.569822767894804e-07, "loss": 0.2959, "step": 17744 }, { "epoch": 0.8802519966268169, "grad_norm": 3.5678937435150146, "learning_rate": 3.566902660170013e-07, "loss": 0.2138, "step": 17745 }, { "epoch": 0.8803016022620169, "grad_norm": 9.52962875366211, "learning_rate": 3.563983703078372e-07, "loss": 0.2937, "step": 17746 }, { "epoch": 0.8803512078972171, "grad_norm": 3.8470680713653564, "learning_rate": 3.561065896692217e-07, "loss": 0.2138, "step": 17747 }, { "epoch": 0.8804008135324173, "grad_norm": 6.537435531616211, "learning_rate": 3.5581492410838525e-07, "loss": 0.2735, "step": 17748 }, { "epoch": 0.8804504191676175, "grad_norm": 13.056386947631836, "learning_rate": 3.555233736325564e-07, "loss": 0.3782, "step": 17749 }, { "epoch": 0.8805000248028176, "grad_norm": 4.7196760177612305, "learning_rate": 3.5523193824895884e-07, "loss": 0.2654, "step": 17750 }, { "epoch": 0.8805496304380177, "grad_norm": 4.893336296081543, "learning_rate": 3.5494061796481626e-07, "loss": 0.2421, "step": 17751 }, { "epoch": 0.8805992360732179, "grad_norm": 14.79360580444336, "learning_rate": 3.54649412787344e-07, "loss": 0.3163, "step": 17752 }, { "epoch": 0.8806488417084181, "grad_norm": 8.377435684204102, "learning_rate": 3.543583227237629e-07, "loss": 0.316, "step": 17753 }, { "epoch": 0.8806984473436182, "grad_norm": 10.977540969848633, "learning_rate": 3.540673477812828e-07, "loss": 0.256, "step": 17754 }, { "epoch": 0.8807480529788184, "grad_norm": 9.295382499694824, "learning_rate": 3.5377648796711575e-07, "loss": 0.3315, "step": 17755 }, { "epoch": 0.8807976586140186, "grad_norm": 10.350001335144043, "learning_rate": 3.5348574328846865e-07, "loss": 0.3239, "step": 17756 }, { "epoch": 0.8808472642492187, "grad_norm": 5.37690544128418, "learning_rate": 3.53195113752548e-07, "loss": 0.2674, "step": 17757 }, { "epoch": 0.8808968698844188, "grad_norm": 8.374288558959961, "learning_rate": 3.5290459936655197e-07, "loss": 0.2969, "step": 17758 }, { "epoch": 0.880946475519619, "grad_norm": 9.316082000732422, "learning_rate": 3.526142001376842e-07, "loss": 0.2889, "step": 17759 }, { "epoch": 0.8809960811548192, "grad_norm": 8.27357292175293, "learning_rate": 3.523239160731373e-07, "loss": 0.2575, "step": 17760 }, { "epoch": 0.8810456867900194, "grad_norm": 6.002372741699219, "learning_rate": 3.5203374718010774e-07, "loss": 0.3257, "step": 17761 }, { "epoch": 0.8810952924252196, "grad_norm": 7.82688570022583, "learning_rate": 3.5174369346578363e-07, "loss": 0.3107, "step": 17762 }, { "epoch": 0.8811448980604196, "grad_norm": 7.058992862701416, "learning_rate": 3.5145375493735313e-07, "loss": 0.364, "step": 17763 }, { "epoch": 0.8811945036956198, "grad_norm": 8.985718727111816, "learning_rate": 3.5116393160200156e-07, "loss": 0.3568, "step": 17764 }, { "epoch": 0.88124410933082, "grad_norm": 6.2346601486206055, "learning_rate": 3.508742234669116e-07, "loss": 0.2717, "step": 17765 }, { "epoch": 0.8812937149660202, "grad_norm": 6.363485813140869, "learning_rate": 3.5058463053925963e-07, "loss": 0.3418, "step": 17766 }, { "epoch": 0.8813433206012203, "grad_norm": 4.949194431304932, "learning_rate": 3.5029515282622494e-07, "loss": 0.2028, "step": 17767 }, { "epoch": 0.8813929262364204, "grad_norm": 8.50450611114502, "learning_rate": 3.5000579033497964e-07, "loss": 0.3624, "step": 17768 }, { "epoch": 0.8814425318716206, "grad_norm": 3.5367465019226074, "learning_rate": 3.497165430726934e-07, "loss": 0.1408, "step": 17769 }, { "epoch": 0.8814921375068208, "grad_norm": 11.97311782836914, "learning_rate": 3.494274110465351e-07, "loss": 0.4458, "step": 17770 }, { "epoch": 0.8815417431420209, "grad_norm": 6.8280110359191895, "learning_rate": 3.4913839426367057e-07, "loss": 0.317, "step": 17771 }, { "epoch": 0.8815913487772211, "grad_norm": 9.728567123413086, "learning_rate": 3.48849492731258e-07, "loss": 0.3315, "step": 17772 }, { "epoch": 0.8816409544124213, "grad_norm": 13.463565826416016, "learning_rate": 3.485607064564611e-07, "loss": 0.5026, "step": 17773 }, { "epoch": 0.8816905600476214, "grad_norm": 9.525517463684082, "learning_rate": 3.4827203544643306e-07, "loss": 0.3893, "step": 17774 }, { "epoch": 0.8817401656828215, "grad_norm": 6.32841682434082, "learning_rate": 3.479834797083281e-07, "loss": 0.3388, "step": 17775 }, { "epoch": 0.8817897713180217, "grad_norm": 5.150657653808594, "learning_rate": 3.476950392492973e-07, "loss": 0.2512, "step": 17776 }, { "epoch": 0.8818393769532219, "grad_norm": 13.030820846557617, "learning_rate": 3.4740671407648697e-07, "loss": 0.3258, "step": 17777 }, { "epoch": 0.8818889825884221, "grad_norm": 5.60840368270874, "learning_rate": 3.471185041970437e-07, "loss": 0.2348, "step": 17778 }, { "epoch": 0.8819385882236223, "grad_norm": 8.12621784210205, "learning_rate": 3.468304096181091e-07, "loss": 0.2676, "step": 17779 }, { "epoch": 0.8819881938588223, "grad_norm": 8.241061210632324, "learning_rate": 3.4654243034682013e-07, "loss": 0.3053, "step": 17780 }, { "epoch": 0.8820377994940225, "grad_norm": 6.049731731414795, "learning_rate": 3.462545663903166e-07, "loss": 0.2802, "step": 17781 }, { "epoch": 0.8820874051292227, "grad_norm": 4.552029132843018, "learning_rate": 3.4596681775572906e-07, "loss": 0.3072, "step": 17782 }, { "epoch": 0.8821370107644229, "grad_norm": 5.4548540115356445, "learning_rate": 3.4567918445018887e-07, "loss": 0.2798, "step": 17783 }, { "epoch": 0.882186616399623, "grad_norm": 16.736623764038086, "learning_rate": 3.4539166648082376e-07, "loss": 0.3379, "step": 17784 }, { "epoch": 0.8822362220348231, "grad_norm": 15.069670677185059, "learning_rate": 3.451042638547597e-07, "loss": 0.4018, "step": 17785 }, { "epoch": 0.8822858276700233, "grad_norm": 13.951247215270996, "learning_rate": 3.448169765791165e-07, "loss": 0.331, "step": 17786 }, { "epoch": 0.8823354333052235, "grad_norm": 4.57267951965332, "learning_rate": 3.4452980466101515e-07, "loss": 0.2621, "step": 17787 }, { "epoch": 0.8823850389404236, "grad_norm": 8.146553039550781, "learning_rate": 3.442427481075711e-07, "loss": 0.3433, "step": 17788 }, { "epoch": 0.8824346445756238, "grad_norm": 6.790341854095459, "learning_rate": 3.439558069258969e-07, "loss": 0.3714, "step": 17789 }, { "epoch": 0.882484250210824, "grad_norm": 6.75014066696167, "learning_rate": 3.4366898112310477e-07, "loss": 0.2127, "step": 17790 }, { "epoch": 0.8825338558460241, "grad_norm": 5.4718217849731445, "learning_rate": 3.4338227070630115e-07, "loss": 0.21, "step": 17791 }, { "epoch": 0.8825834614812242, "grad_norm": 5.241774559020996, "learning_rate": 3.430956756825915e-07, "loss": 0.3052, "step": 17792 }, { "epoch": 0.8826330671164244, "grad_norm": 15.276106834411621, "learning_rate": 3.4280919605907735e-07, "loss": 0.4811, "step": 17793 }, { "epoch": 0.8826826727516246, "grad_norm": 7.493122100830078, "learning_rate": 3.4252283184285805e-07, "loss": 0.3331, "step": 17794 }, { "epoch": 0.8827322783868248, "grad_norm": 6.160067081451416, "learning_rate": 3.4223658304103067e-07, "loss": 0.2596, "step": 17795 }, { "epoch": 0.882781884022025, "grad_norm": 7.494685173034668, "learning_rate": 3.419504496606868e-07, "loss": 0.3368, "step": 17796 }, { "epoch": 0.882831489657225, "grad_norm": 6.881493091583252, "learning_rate": 3.4166443170891796e-07, "loss": 0.1985, "step": 17797 }, { "epoch": 0.8828810952924252, "grad_norm": 7.175377368927002, "learning_rate": 3.4137852919281177e-07, "loss": 0.3025, "step": 17798 }, { "epoch": 0.8829307009276254, "grad_norm": 4.815310478210449, "learning_rate": 3.4109274211945265e-07, "loss": 0.2576, "step": 17799 }, { "epoch": 0.8829803065628256, "grad_norm": 6.347103118896484, "learning_rate": 3.408070704959232e-07, "loss": 0.2387, "step": 17800 }, { "epoch": 0.8830299121980257, "grad_norm": 17.243574142456055, "learning_rate": 3.405215143293017e-07, "loss": 0.4807, "step": 17801 }, { "epoch": 0.8830795178332258, "grad_norm": 6.521469593048096, "learning_rate": 3.4023607362666576e-07, "loss": 0.2965, "step": 17802 }, { "epoch": 0.883129123468426, "grad_norm": 13.524070739746094, "learning_rate": 3.399507483950865e-07, "loss": 0.3176, "step": 17803 }, { "epoch": 0.8831787291036262, "grad_norm": 8.352486610412598, "learning_rate": 3.3966553864163697e-07, "loss": 0.2759, "step": 17804 }, { "epoch": 0.8832283347388263, "grad_norm": 5.142958641052246, "learning_rate": 3.3938044437338227e-07, "loss": 0.2311, "step": 17805 }, { "epoch": 0.8832779403740265, "grad_norm": 8.863936424255371, "learning_rate": 3.390954655973888e-07, "loss": 0.3235, "step": 17806 }, { "epoch": 0.8833275460092267, "grad_norm": 9.266159057617188, "learning_rate": 3.388106023207183e-07, "loss": 0.3934, "step": 17807 }, { "epoch": 0.8833771516444268, "grad_norm": 9.156590461730957, "learning_rate": 3.3852585455042996e-07, "loss": 0.3289, "step": 17808 }, { "epoch": 0.8834267572796269, "grad_norm": 10.264378547668457, "learning_rate": 3.3824122229357826e-07, "loss": 0.3592, "step": 17809 }, { "epoch": 0.8834763629148271, "grad_norm": 4.997419834136963, "learning_rate": 3.3795670555721914e-07, "loss": 0.2609, "step": 17810 }, { "epoch": 0.8835259685500273, "grad_norm": 5.521200180053711, "learning_rate": 3.3767230434840093e-07, "loss": 0.2023, "step": 17811 }, { "epoch": 0.8835755741852275, "grad_norm": 5.33400821685791, "learning_rate": 3.3738801867417236e-07, "loss": 0.2708, "step": 17812 }, { "epoch": 0.8836251798204277, "grad_norm": 6.890100479125977, "learning_rate": 3.3710384854157784e-07, "loss": 0.2363, "step": 17813 }, { "epoch": 0.8836747854556277, "grad_norm": 8.005352973937988, "learning_rate": 3.3681979395765896e-07, "loss": 0.4119, "step": 17814 }, { "epoch": 0.8837243910908279, "grad_norm": 12.36086654663086, "learning_rate": 3.3653585492945504e-07, "loss": 0.3426, "step": 17815 }, { "epoch": 0.8837739967260281, "grad_norm": 4.6193742752075195, "learning_rate": 3.3625203146400276e-07, "loss": 0.2549, "step": 17816 }, { "epoch": 0.8838236023612283, "grad_norm": 5.658348560333252, "learning_rate": 3.359683235683331e-07, "loss": 0.2307, "step": 17817 }, { "epoch": 0.8838732079964284, "grad_norm": 7.576824188232422, "learning_rate": 3.3568473124947985e-07, "loss": 0.2825, "step": 17818 }, { "epoch": 0.8839228136316285, "grad_norm": 5.954305171966553, "learning_rate": 3.354012545144686e-07, "loss": 0.2675, "step": 17819 }, { "epoch": 0.8839724192668287, "grad_norm": 3.715043783187866, "learning_rate": 3.3511789337032363e-07, "loss": 0.221, "step": 17820 }, { "epoch": 0.8840220249020289, "grad_norm": 5.659032821655273, "learning_rate": 3.348346478240677e-07, "loss": 0.2196, "step": 17821 }, { "epoch": 0.884071630537229, "grad_norm": 6.29849910736084, "learning_rate": 3.3455151788272023e-07, "loss": 0.2706, "step": 17822 }, { "epoch": 0.8841212361724292, "grad_norm": 6.030227184295654, "learning_rate": 3.34268503553295e-07, "loss": 0.3461, "step": 17823 }, { "epoch": 0.8841708418076294, "grad_norm": 5.134563446044922, "learning_rate": 3.339856048428086e-07, "loss": 0.2548, "step": 17824 }, { "epoch": 0.8842204474428295, "grad_norm": 11.409595489501953, "learning_rate": 3.337028217582683e-07, "loss": 0.2489, "step": 17825 }, { "epoch": 0.8842700530780296, "grad_norm": 8.252911567687988, "learning_rate": 3.3342015430668337e-07, "loss": 0.337, "step": 17826 }, { "epoch": 0.8843196587132298, "grad_norm": 3.722466230392456, "learning_rate": 3.331376024950578e-07, "loss": 0.1913, "step": 17827 }, { "epoch": 0.88436926434843, "grad_norm": 7.449653148651123, "learning_rate": 3.328551663303936e-07, "loss": 0.3138, "step": 17828 }, { "epoch": 0.8844188699836302, "grad_norm": 5.076942443847656, "learning_rate": 3.325728458196886e-07, "loss": 0.2727, "step": 17829 }, { "epoch": 0.8844684756188304, "grad_norm": 5.561312198638916, "learning_rate": 3.322906409699417e-07, "loss": 0.2089, "step": 17830 }, { "epoch": 0.8845180812540304, "grad_norm": 17.77351951599121, "learning_rate": 3.320085517881422e-07, "loss": 0.4806, "step": 17831 }, { "epoch": 0.8845676868892306, "grad_norm": 6.756974220275879, "learning_rate": 3.3172657828128397e-07, "loss": 0.3677, "step": 17832 }, { "epoch": 0.8846172925244308, "grad_norm": 12.638219833374023, "learning_rate": 3.31444720456352e-07, "loss": 0.3074, "step": 17833 }, { "epoch": 0.884666898159631, "grad_norm": 10.394108772277832, "learning_rate": 3.311629783203313e-07, "loss": 0.3382, "step": 17834 }, { "epoch": 0.8847165037948311, "grad_norm": 16.99224090576172, "learning_rate": 3.3088135188020454e-07, "loss": 0.2952, "step": 17835 }, { "epoch": 0.8847661094300312, "grad_norm": 4.683152198791504, "learning_rate": 3.3059984114295064e-07, "loss": 0.2891, "step": 17836 }, { "epoch": 0.8848157150652314, "grad_norm": 7.026031970977783, "learning_rate": 3.3031844611554285e-07, "loss": 0.1923, "step": 17837 }, { "epoch": 0.8848653207004316, "grad_norm": 9.981354713439941, "learning_rate": 3.3003716680495844e-07, "loss": 0.3089, "step": 17838 }, { "epoch": 0.8849149263356317, "grad_norm": 6.5360188484191895, "learning_rate": 3.2975600321816405e-07, "loss": 0.2517, "step": 17839 }, { "epoch": 0.8849645319708319, "grad_norm": 6.9541802406311035, "learning_rate": 3.2947495536212906e-07, "loss": 0.3029, "step": 17840 }, { "epoch": 0.8850141376060321, "grad_norm": 6.752969741821289, "learning_rate": 3.291940232438173e-07, "loss": 0.2552, "step": 17841 }, { "epoch": 0.8850637432412322, "grad_norm": 8.223093032836914, "learning_rate": 3.2891320687019e-07, "loss": 0.3079, "step": 17842 }, { "epoch": 0.8851133488764323, "grad_norm": 9.665032386779785, "learning_rate": 3.286325062482071e-07, "loss": 0.3025, "step": 17843 }, { "epoch": 0.8851629545116325, "grad_norm": 16.19131851196289, "learning_rate": 3.283519213848241e-07, "loss": 0.3622, "step": 17844 }, { "epoch": 0.8852125601468327, "grad_norm": 5.33306360244751, "learning_rate": 3.280714522869932e-07, "loss": 0.3036, "step": 17845 }, { "epoch": 0.8852621657820329, "grad_norm": 26.280210494995117, "learning_rate": 3.277910989616645e-07, "loss": 0.343, "step": 17846 }, { "epoch": 0.885311771417233, "grad_norm": 6.25399923324585, "learning_rate": 3.2751086141578624e-07, "loss": 0.281, "step": 17847 }, { "epoch": 0.8853613770524331, "grad_norm": 7.673150539398193, "learning_rate": 3.272307396563029e-07, "loss": 0.3213, "step": 17848 }, { "epoch": 0.8854109826876333, "grad_norm": 7.515946865081787, "learning_rate": 3.2695073369015505e-07, "loss": 0.2325, "step": 17849 }, { "epoch": 0.8854605883228335, "grad_norm": 12.13088607788086, "learning_rate": 3.2667084352428156e-07, "loss": 0.3178, "step": 17850 }, { "epoch": 0.8855101939580337, "grad_norm": 5.5868988037109375, "learning_rate": 3.2639106916561915e-07, "loss": 0.2219, "step": 17851 }, { "epoch": 0.8855597995932338, "grad_norm": 6.0823283195495605, "learning_rate": 3.2611141062109996e-07, "loss": 0.3213, "step": 17852 }, { "epoch": 0.8856094052284339, "grad_norm": 4.647308826446533, "learning_rate": 3.2583186789765465e-07, "loss": 0.2287, "step": 17853 }, { "epoch": 0.8856590108636341, "grad_norm": 4.144866466522217, "learning_rate": 3.255524410022098e-07, "loss": 0.26, "step": 17854 }, { "epoch": 0.8857086164988343, "grad_norm": 6.799834728240967, "learning_rate": 3.252731299416895e-07, "loss": 0.2411, "step": 17855 }, { "epoch": 0.8857582221340344, "grad_norm": 7.736921310424805, "learning_rate": 3.2499393472301576e-07, "loss": 0.2585, "step": 17856 }, { "epoch": 0.8858078277692346, "grad_norm": 6.071813106536865, "learning_rate": 3.247148553531071e-07, "loss": 0.2924, "step": 17857 }, { "epoch": 0.8858574334044348, "grad_norm": 11.352375030517578, "learning_rate": 3.244358918388796e-07, "loss": 0.3591, "step": 17858 }, { "epoch": 0.8859070390396349, "grad_norm": 7.642735481262207, "learning_rate": 3.2415704418724604e-07, "loss": 0.3144, "step": 17859 }, { "epoch": 0.885956644674835, "grad_norm": 13.522692680358887, "learning_rate": 3.2387831240511424e-07, "loss": 0.349, "step": 17860 }, { "epoch": 0.8860062503100352, "grad_norm": 5.3924241065979, "learning_rate": 3.235996964993954e-07, "loss": 0.2128, "step": 17861 }, { "epoch": 0.8860558559452354, "grad_norm": 5.959136962890625, "learning_rate": 3.2332119647699e-07, "loss": 0.302, "step": 17862 }, { "epoch": 0.8861054615804356, "grad_norm": 3.6807048320770264, "learning_rate": 3.2304281234480096e-07, "loss": 0.2905, "step": 17863 }, { "epoch": 0.8861550672156357, "grad_norm": 4.61195182800293, "learning_rate": 3.227645441097266e-07, "loss": 0.2704, "step": 17864 }, { "epoch": 0.8862046728508358, "grad_norm": 9.702911376953125, "learning_rate": 3.224863917786636e-07, "loss": 0.3508, "step": 17865 }, { "epoch": 0.886254278486036, "grad_norm": 4.740593433380127, "learning_rate": 3.22208355358502e-07, "loss": 0.2614, "step": 17866 }, { "epoch": 0.8863038841212362, "grad_norm": 4.45465612411499, "learning_rate": 3.219304348561353e-07, "loss": 0.2204, "step": 17867 }, { "epoch": 0.8863534897564364, "grad_norm": 9.776287078857422, "learning_rate": 3.216526302784462e-07, "loss": 0.2473, "step": 17868 }, { "epoch": 0.8864030953916365, "grad_norm": 5.90140438079834, "learning_rate": 3.2137494163232307e-07, "loss": 0.3599, "step": 17869 }, { "epoch": 0.8864527010268366, "grad_norm": 6.4694504737854, "learning_rate": 3.2109736892464495e-07, "loss": 0.3375, "step": 17870 }, { "epoch": 0.8865023066620368, "grad_norm": 5.5545830726623535, "learning_rate": 3.208199121622901e-07, "loss": 0.2611, "step": 17871 }, { "epoch": 0.886551912297237, "grad_norm": 7.446581840515137, "learning_rate": 3.205425713521343e-07, "loss": 0.317, "step": 17872 }, { "epoch": 0.8866015179324371, "grad_norm": 3.750213861465454, "learning_rate": 3.2026534650105133e-07, "loss": 0.2526, "step": 17873 }, { "epoch": 0.8866511235676373, "grad_norm": 8.548152923583984, "learning_rate": 3.199882376159086e-07, "loss": 0.3416, "step": 17874 }, { "epoch": 0.8867007292028375, "grad_norm": 5.407722473144531, "learning_rate": 3.197112447035755e-07, "loss": 0.3278, "step": 17875 }, { "epoch": 0.8867503348380376, "grad_norm": 6.737960338592529, "learning_rate": 3.194343677709144e-07, "loss": 0.2431, "step": 17876 }, { "epoch": 0.8867999404732377, "grad_norm": 7.32720422744751, "learning_rate": 3.191576068247865e-07, "loss": 0.3436, "step": 17877 }, { "epoch": 0.8868495461084379, "grad_norm": 5.597154140472412, "learning_rate": 3.1888096187205065e-07, "loss": 0.3062, "step": 17878 }, { "epoch": 0.8868991517436381, "grad_norm": 6.004022121429443, "learning_rate": 3.186044329195631e-07, "loss": 0.2962, "step": 17879 }, { "epoch": 0.8869487573788383, "grad_norm": 8.930183410644531, "learning_rate": 3.183280199741734e-07, "loss": 0.1839, "step": 17880 }, { "epoch": 0.8869983630140384, "grad_norm": 9.415979385375977, "learning_rate": 3.180517230427349e-07, "loss": 0.2714, "step": 17881 }, { "epoch": 0.8870479686492385, "grad_norm": 4.370400905609131, "learning_rate": 3.1777554213209215e-07, "loss": 0.284, "step": 17882 }, { "epoch": 0.8870975742844387, "grad_norm": 5.384340763092041, "learning_rate": 3.1749947724908914e-07, "loss": 0.3032, "step": 17883 }, { "epoch": 0.8871471799196389, "grad_norm": 4.42462158203125, "learning_rate": 3.1722352840056703e-07, "loss": 0.2321, "step": 17884 }, { "epoch": 0.887196785554839, "grad_norm": 3.8875679969787598, "learning_rate": 3.1694769559336425e-07, "loss": 0.1972, "step": 17885 }, { "epoch": 0.8872463911900392, "grad_norm": 5.59759521484375, "learning_rate": 3.166719788343159e-07, "loss": 0.2735, "step": 17886 }, { "epoch": 0.8872959968252393, "grad_norm": 4.710353851318359, "learning_rate": 3.1639637813025534e-07, "loss": 0.1707, "step": 17887 }, { "epoch": 0.8873456024604395, "grad_norm": 4.2593092918396, "learning_rate": 3.1612089348800947e-07, "loss": 0.247, "step": 17888 }, { "epoch": 0.8873952080956397, "grad_norm": 8.724449157714844, "learning_rate": 3.1584552491440825e-07, "loss": 0.2239, "step": 17889 }, { "epoch": 0.8874448137308398, "grad_norm": 6.641521453857422, "learning_rate": 3.155702724162729e-07, "loss": 0.2443, "step": 17890 }, { "epoch": 0.88749441936604, "grad_norm": 7.534241199493408, "learning_rate": 3.1529513600042527e-07, "loss": 0.2343, "step": 17891 }, { "epoch": 0.8875440250012402, "grad_norm": 6.5074639320373535, "learning_rate": 3.1502011567368317e-07, "loss": 0.3018, "step": 17892 }, { "epoch": 0.8875936306364403, "grad_norm": 6.920517921447754, "learning_rate": 3.1474521144286284e-07, "loss": 0.3397, "step": 17893 }, { "epoch": 0.8876432362716404, "grad_norm": 5.496623516082764, "learning_rate": 3.1447042331477384e-07, "loss": 0.2691, "step": 17894 }, { "epoch": 0.8876928419068406, "grad_norm": 7.214369773864746, "learning_rate": 3.1419575129622906e-07, "loss": 0.2813, "step": 17895 }, { "epoch": 0.8877424475420408, "grad_norm": 6.806845188140869, "learning_rate": 3.139211953940324e-07, "loss": 0.2856, "step": 17896 }, { "epoch": 0.887792053177241, "grad_norm": 5.912024021148682, "learning_rate": 3.1364675561498805e-07, "loss": 0.1518, "step": 17897 }, { "epoch": 0.8878416588124411, "grad_norm": 9.960453987121582, "learning_rate": 3.1337243196589706e-07, "loss": 0.3386, "step": 17898 }, { "epoch": 0.8878912644476412, "grad_norm": 10.608305931091309, "learning_rate": 3.1309822445355685e-07, "loss": 0.307, "step": 17899 }, { "epoch": 0.8879408700828414, "grad_norm": 6.527993679046631, "learning_rate": 3.128241330847631e-07, "loss": 0.2412, "step": 17900 }, { "epoch": 0.8879904757180416, "grad_norm": 11.731643676757812, "learning_rate": 3.125501578663076e-07, "loss": 0.4128, "step": 17901 }, { "epoch": 0.8880400813532418, "grad_norm": 6.9488325119018555, "learning_rate": 3.1227629880497936e-07, "loss": 0.2983, "step": 17902 }, { "epoch": 0.8880896869884419, "grad_norm": 5.255542755126953, "learning_rate": 3.1200255590756567e-07, "loss": 0.2413, "step": 17903 }, { "epoch": 0.888139292623642, "grad_norm": 12.743135452270508, "learning_rate": 3.11728929180849e-07, "loss": 0.4464, "step": 17904 }, { "epoch": 0.8881888982588422, "grad_norm": 4.940708160400391, "learning_rate": 3.114554186316099e-07, "loss": 0.2394, "step": 17905 }, { "epoch": 0.8882385038940424, "grad_norm": 5.678831577301025, "learning_rate": 3.111820242666264e-07, "loss": 0.3308, "step": 17906 }, { "epoch": 0.8882881095292425, "grad_norm": 6.080506801605225, "learning_rate": 3.1090874609267295e-07, "loss": 0.2713, "step": 17907 }, { "epoch": 0.8883377151644427, "grad_norm": 8.221390724182129, "learning_rate": 3.1063558411652204e-07, "loss": 0.2705, "step": 17908 }, { "epoch": 0.8883873207996429, "grad_norm": 17.80695915222168, "learning_rate": 3.1036253834494266e-07, "loss": 0.3664, "step": 17909 }, { "epoch": 0.888436926434843, "grad_norm": 6.102510929107666, "learning_rate": 3.100896087847022e-07, "loss": 0.1976, "step": 17910 }, { "epoch": 0.8884865320700431, "grad_norm": 12.597140312194824, "learning_rate": 3.098167954425613e-07, "loss": 0.2683, "step": 17911 }, { "epoch": 0.8885361377052433, "grad_norm": 9.711591720581055, "learning_rate": 3.0954409832528177e-07, "loss": 0.1822, "step": 17912 }, { "epoch": 0.8885857433404435, "grad_norm": 5.7281813621521, "learning_rate": 3.092715174396216e-07, "loss": 0.2598, "step": 17913 }, { "epoch": 0.8886353489756437, "grad_norm": 11.684870719909668, "learning_rate": 3.0899905279233477e-07, "loss": 0.3684, "step": 17914 }, { "epoch": 0.8886849546108438, "grad_norm": 4.3011393547058105, "learning_rate": 3.087267043901737e-07, "loss": 0.2712, "step": 17915 }, { "epoch": 0.8887345602460439, "grad_norm": 3.6485342979431152, "learning_rate": 3.0845447223988747e-07, "loss": 0.2391, "step": 17916 }, { "epoch": 0.8887841658812441, "grad_norm": 16.252309799194336, "learning_rate": 3.0818235634822003e-07, "loss": 0.2951, "step": 17917 }, { "epoch": 0.8888337715164443, "grad_norm": 8.028559684753418, "learning_rate": 3.0791035672191827e-07, "loss": 0.3181, "step": 17918 }, { "epoch": 0.8888833771516444, "grad_norm": 6.045014381408691, "learning_rate": 3.076384733677179e-07, "loss": 0.3155, "step": 17919 }, { "epoch": 0.8889329827868446, "grad_norm": 3.9945671558380127, "learning_rate": 3.073667062923608e-07, "loss": 0.2274, "step": 17920 }, { "epoch": 0.8889825884220447, "grad_norm": 11.03923225402832, "learning_rate": 3.070950555025787e-07, "loss": 0.325, "step": 17921 }, { "epoch": 0.8890321940572449, "grad_norm": 7.111626625061035, "learning_rate": 3.0682352100510407e-07, "loss": 0.2029, "step": 17922 }, { "epoch": 0.889081799692445, "grad_norm": 11.293696403503418, "learning_rate": 3.0655210280666537e-07, "loss": 0.4053, "step": 17923 }, { "epoch": 0.8891314053276452, "grad_norm": 14.424500465393066, "learning_rate": 3.062808009139895e-07, "loss": 0.3411, "step": 17924 }, { "epoch": 0.8891810109628454, "grad_norm": 4.563230037689209, "learning_rate": 3.060096153337966e-07, "loss": 0.2685, "step": 17925 }, { "epoch": 0.8892306165980456, "grad_norm": 7.587242603302002, "learning_rate": 3.0573854607281074e-07, "loss": 0.3001, "step": 17926 }, { "epoch": 0.8892802222332457, "grad_norm": 7.9546027183532715, "learning_rate": 3.0546759313774656e-07, "loss": 0.3552, "step": 17927 }, { "epoch": 0.8893298278684458, "grad_norm": 6.252732753753662, "learning_rate": 3.0519675653531867e-07, "loss": 0.2523, "step": 17928 }, { "epoch": 0.889379433503646, "grad_norm": 5.932989597320557, "learning_rate": 3.049260362722389e-07, "loss": 0.3051, "step": 17929 }, { "epoch": 0.8894290391388462, "grad_norm": 4.3826470375061035, "learning_rate": 3.046554323552164e-07, "loss": 0.2319, "step": 17930 }, { "epoch": 0.8894786447740464, "grad_norm": 11.766980171203613, "learning_rate": 3.043849447909547e-07, "loss": 0.2648, "step": 17931 }, { "epoch": 0.8895282504092465, "grad_norm": 10.798115730285645, "learning_rate": 3.0411457358616004e-07, "loss": 0.328, "step": 17932 }, { "epoch": 0.8895778560444466, "grad_norm": 5.955966472625732, "learning_rate": 3.038443187475293e-07, "loss": 0.3107, "step": 17933 }, { "epoch": 0.8896274616796468, "grad_norm": 11.541526794433594, "learning_rate": 3.0357418028176046e-07, "loss": 0.3828, "step": 17934 }, { "epoch": 0.889677067314847, "grad_norm": 7.043366432189941, "learning_rate": 3.0330415819554816e-07, "loss": 0.3026, "step": 17935 }, { "epoch": 0.8897266729500471, "grad_norm": 7.769618511199951, "learning_rate": 3.0303425249558426e-07, "loss": 0.1976, "step": 17936 }, { "epoch": 0.8897762785852473, "grad_norm": 4.0723466873168945, "learning_rate": 3.0276446318855455e-07, "loss": 0.2639, "step": 17937 }, { "epoch": 0.8898258842204474, "grad_norm": 4.77003812789917, "learning_rate": 3.0249479028114756e-07, "loss": 0.2991, "step": 17938 }, { "epoch": 0.8898754898556476, "grad_norm": 10.157652854919434, "learning_rate": 3.0222523378004354e-07, "loss": 0.2771, "step": 17939 }, { "epoch": 0.8899250954908478, "grad_norm": 6.281198501586914, "learning_rate": 3.019557936919243e-07, "loss": 0.2378, "step": 17940 }, { "epoch": 0.8899747011260479, "grad_norm": 6.073380947113037, "learning_rate": 3.016864700234651e-07, "loss": 0.2995, "step": 17941 }, { "epoch": 0.8900243067612481, "grad_norm": 5.220894813537598, "learning_rate": 3.0141726278134054e-07, "loss": 0.2617, "step": 17942 }, { "epoch": 0.8900739123964483, "grad_norm": 4.179598808288574, "learning_rate": 3.0114817197222145e-07, "loss": 0.2069, "step": 17943 }, { "epoch": 0.8901235180316484, "grad_norm": 7.704068660736084, "learning_rate": 3.0087919760277695e-07, "loss": 0.3116, "step": 17944 }, { "epoch": 0.8901731236668485, "grad_norm": 7.989761829376221, "learning_rate": 3.0061033967967004e-07, "loss": 0.3603, "step": 17945 }, { "epoch": 0.8902227293020487, "grad_norm": 8.7592191696167, "learning_rate": 3.003415982095664e-07, "loss": 0.2855, "step": 17946 }, { "epoch": 0.8902723349372489, "grad_norm": 6.733885765075684, "learning_rate": 3.000729731991231e-07, "loss": 0.3161, "step": 17947 }, { "epoch": 0.8903219405724491, "grad_norm": 5.968824863433838, "learning_rate": 2.9980446465499747e-07, "loss": 0.208, "step": 17948 }, { "epoch": 0.8903715462076491, "grad_norm": 4.869011878967285, "learning_rate": 2.995360725838431e-07, "loss": 0.2861, "step": 17949 }, { "epoch": 0.8904211518428493, "grad_norm": 6.725067138671875, "learning_rate": 2.992677969923119e-07, "loss": 0.2327, "step": 17950 }, { "epoch": 0.8904707574780495, "grad_norm": 5.946518421173096, "learning_rate": 2.9899963788704967e-07, "loss": 0.2453, "step": 17951 }, { "epoch": 0.8905203631132497, "grad_norm": 4.2199387550354, "learning_rate": 2.987315952747044e-07, "loss": 0.3025, "step": 17952 }, { "epoch": 0.8905699687484498, "grad_norm": 8.509011268615723, "learning_rate": 2.984636691619158e-07, "loss": 0.3599, "step": 17953 }, { "epoch": 0.89061957438365, "grad_norm": 6.810975074768066, "learning_rate": 2.981958595553247e-07, "loss": 0.2619, "step": 17954 }, { "epoch": 0.8906691800188501, "grad_norm": 7.463273048400879, "learning_rate": 2.979281664615669e-07, "loss": 0.3203, "step": 17955 }, { "epoch": 0.8907187856540503, "grad_norm": 6.946254730224609, "learning_rate": 2.9766058988727585e-07, "loss": 0.2701, "step": 17956 }, { "epoch": 0.8907683912892504, "grad_norm": 5.183581352233887, "learning_rate": 2.973931298390831e-07, "loss": 0.2261, "step": 17957 }, { "epoch": 0.8908179969244506, "grad_norm": 5.978809833526611, "learning_rate": 2.971257863236149e-07, "loss": 0.2734, "step": 17958 }, { "epoch": 0.8908676025596508, "grad_norm": 3.910830020904541, "learning_rate": 2.9685855934749764e-07, "loss": 0.2651, "step": 17959 }, { "epoch": 0.890917208194851, "grad_norm": 11.507580757141113, "learning_rate": 2.9659144891735383e-07, "loss": 0.374, "step": 17960 }, { "epoch": 0.890966813830051, "grad_norm": 4.560873508453369, "learning_rate": 2.9632445503980034e-07, "loss": 0.2053, "step": 17961 }, { "epoch": 0.8910164194652512, "grad_norm": 7.73089599609375, "learning_rate": 2.9605757772145414e-07, "loss": 0.4253, "step": 17962 }, { "epoch": 0.8910660251004514, "grad_norm": 7.026621341705322, "learning_rate": 2.9579081696892943e-07, "loss": 0.2649, "step": 17963 }, { "epoch": 0.8911156307356516, "grad_norm": 5.053976535797119, "learning_rate": 2.9552417278883637e-07, "loss": 0.2948, "step": 17964 }, { "epoch": 0.8911652363708518, "grad_norm": 4.7028279304504395, "learning_rate": 2.95257645187782e-07, "loss": 0.2176, "step": 17965 }, { "epoch": 0.8912148420060518, "grad_norm": 11.79819393157959, "learning_rate": 2.949912341723715e-07, "loss": 0.4215, "step": 17966 }, { "epoch": 0.891264447641252, "grad_norm": 15.368856430053711, "learning_rate": 2.947249397492075e-07, "loss": 0.3381, "step": 17967 }, { "epoch": 0.8913140532764522, "grad_norm": 9.802691459655762, "learning_rate": 2.944587619248862e-07, "loss": 0.3997, "step": 17968 }, { "epoch": 0.8913636589116524, "grad_norm": 17.233016967773438, "learning_rate": 2.941927007060069e-07, "loss": 0.3934, "step": 17969 }, { "epoch": 0.8914132645468525, "grad_norm": 5.920225620269775, "learning_rate": 2.939267560991604e-07, "loss": 0.2329, "step": 17970 }, { "epoch": 0.8914628701820527, "grad_norm": 4.239216327667236, "learning_rate": 2.9366092811093763e-07, "loss": 0.189, "step": 17971 }, { "epoch": 0.8915124758172528, "grad_norm": 7.985313892364502, "learning_rate": 2.9339521674792594e-07, "loss": 0.403, "step": 17972 }, { "epoch": 0.891562081452453, "grad_norm": 5.741793155670166, "learning_rate": 2.9312962201671014e-07, "loss": 0.2463, "step": 17973 }, { "epoch": 0.8916116870876531, "grad_norm": 6.703259468078613, "learning_rate": 2.928641439238711e-07, "loss": 0.2777, "step": 17974 }, { "epoch": 0.8916612927228533, "grad_norm": 8.81912612915039, "learning_rate": 2.925987824759885e-07, "loss": 0.288, "step": 17975 }, { "epoch": 0.8917108983580535, "grad_norm": 5.032670497894287, "learning_rate": 2.9233353767963604e-07, "loss": 0.2871, "step": 17976 }, { "epoch": 0.8917605039932537, "grad_norm": 9.323760986328125, "learning_rate": 2.9206840954138947e-07, "loss": 0.2994, "step": 17977 }, { "epoch": 0.8918101096284538, "grad_norm": 8.866644859313965, "learning_rate": 2.91803398067817e-07, "loss": 0.3029, "step": 17978 }, { "epoch": 0.8918597152636539, "grad_norm": 6.978926181793213, "learning_rate": 2.9153850326548606e-07, "loss": 0.2652, "step": 17979 }, { "epoch": 0.8919093208988541, "grad_norm": 5.593499660491943, "learning_rate": 2.912737251409603e-07, "loss": 0.3448, "step": 17980 }, { "epoch": 0.8919589265340543, "grad_norm": 13.224333763122559, "learning_rate": 2.910090637008028e-07, "loss": 0.3145, "step": 17981 }, { "epoch": 0.8920085321692545, "grad_norm": 3.9322664737701416, "learning_rate": 2.90744518951569e-07, "loss": 0.1798, "step": 17982 }, { "epoch": 0.8920581378044545, "grad_norm": 4.633730411529541, "learning_rate": 2.9048009089981787e-07, "loss": 0.2375, "step": 17983 }, { "epoch": 0.8921077434396547, "grad_norm": 9.983796119689941, "learning_rate": 2.902157795520999e-07, "loss": 0.2702, "step": 17984 }, { "epoch": 0.8921573490748549, "grad_norm": 20.581384658813477, "learning_rate": 2.899515849149653e-07, "loss": 0.3081, "step": 17985 }, { "epoch": 0.8922069547100551, "grad_norm": 5.492249011993408, "learning_rate": 2.8968750699496116e-07, "loss": 0.2638, "step": 17986 }, { "epoch": 0.8922565603452552, "grad_norm": 5.732814311981201, "learning_rate": 2.8942354579863163e-07, "loss": 0.3437, "step": 17987 }, { "epoch": 0.8923061659804554, "grad_norm": 12.855623245239258, "learning_rate": 2.891597013325165e-07, "loss": 0.4646, "step": 17988 }, { "epoch": 0.8923557716156555, "grad_norm": 8.989920616149902, "learning_rate": 2.88895973603156e-07, "loss": 0.2996, "step": 17989 }, { "epoch": 0.8924053772508557, "grad_norm": 6.633686542510986, "learning_rate": 2.8863236261708336e-07, "loss": 0.3185, "step": 17990 }, { "epoch": 0.8924549828860558, "grad_norm": 4.590590000152588, "learning_rate": 2.883688683808322e-07, "loss": 0.1748, "step": 17991 }, { "epoch": 0.892504588521256, "grad_norm": 18.385845184326172, "learning_rate": 2.881054909009323e-07, "loss": 0.3662, "step": 17992 }, { "epoch": 0.8925541941564562, "grad_norm": 6.624843120574951, "learning_rate": 2.87842230183909e-07, "loss": 0.2426, "step": 17993 }, { "epoch": 0.8926037997916564, "grad_norm": 13.91305160522461, "learning_rate": 2.8757908623628705e-07, "loss": 0.3028, "step": 17994 }, { "epoch": 0.8926534054268564, "grad_norm": 10.0743989944458, "learning_rate": 2.873160590645879e-07, "loss": 0.2782, "step": 17995 }, { "epoch": 0.8927030110620566, "grad_norm": 7.857527256011963, "learning_rate": 2.87053148675327e-07, "loss": 0.2016, "step": 17996 }, { "epoch": 0.8927526166972568, "grad_norm": 5.147023677825928, "learning_rate": 2.867903550750228e-07, "loss": 0.2798, "step": 17997 }, { "epoch": 0.892802222332457, "grad_norm": 7.5843706130981445, "learning_rate": 2.8652767827018425e-07, "loss": 0.3028, "step": 17998 }, { "epoch": 0.8928518279676572, "grad_norm": 6.844125270843506, "learning_rate": 2.8626511826732207e-07, "loss": 0.2735, "step": 17999 }, { "epoch": 0.8929014336028572, "grad_norm": 13.667299270629883, "learning_rate": 2.860026750729428e-07, "loss": 0.3799, "step": 18000 }, { "epoch": 0.8929510392380574, "grad_norm": 6.498020648956299, "learning_rate": 2.857403486935506e-07, "loss": 0.3181, "step": 18001 }, { "epoch": 0.8930006448732576, "grad_norm": 7.547567844390869, "learning_rate": 2.8547813913564313e-07, "loss": 0.237, "step": 18002 }, { "epoch": 0.8930502505084578, "grad_norm": 7.193349838256836, "learning_rate": 2.8521604640572133e-07, "loss": 0.29, "step": 18003 }, { "epoch": 0.8930998561436579, "grad_norm": 5.443418502807617, "learning_rate": 2.8495407051027823e-07, "loss": 0.2207, "step": 18004 }, { "epoch": 0.8931494617788581, "grad_norm": 7.354450702667236, "learning_rate": 2.8469221145580597e-07, "loss": 0.2753, "step": 18005 }, { "epoch": 0.8931990674140582, "grad_norm": 6.581933498382568, "learning_rate": 2.844304692487937e-07, "loss": 0.3173, "step": 18006 }, { "epoch": 0.8932486730492584, "grad_norm": 7.3170552253723145, "learning_rate": 2.841688438957274e-07, "loss": 0.286, "step": 18007 }, { "epoch": 0.8932982786844585, "grad_norm": 8.742256164550781, "learning_rate": 2.839073354030908e-07, "loss": 0.4373, "step": 18008 }, { "epoch": 0.8933478843196587, "grad_norm": 10.324867248535156, "learning_rate": 2.8364594377736367e-07, "loss": 0.3995, "step": 18009 }, { "epoch": 0.8933974899548589, "grad_norm": 14.150225639343262, "learning_rate": 2.833846690250225e-07, "loss": 0.2871, "step": 18010 }, { "epoch": 0.8934470955900591, "grad_norm": 27.544776916503906, "learning_rate": 2.831235111525443e-07, "loss": 0.3369, "step": 18011 }, { "epoch": 0.8934967012252591, "grad_norm": 6.011198997497559, "learning_rate": 2.8286247016639845e-07, "loss": 0.2295, "step": 18012 }, { "epoch": 0.8935463068604593, "grad_norm": 5.87554931640625, "learning_rate": 2.826015460730541e-07, "loss": 0.2945, "step": 18013 }, { "epoch": 0.8935959124956595, "grad_norm": 9.456157684326172, "learning_rate": 2.8234073887897784e-07, "loss": 0.2869, "step": 18014 }, { "epoch": 0.8936455181308597, "grad_norm": 4.617887020111084, "learning_rate": 2.8208004859063164e-07, "loss": 0.1911, "step": 18015 }, { "epoch": 0.8936951237660599, "grad_norm": 5.406991958618164, "learning_rate": 2.818194752144765e-07, "loss": 0.2413, "step": 18016 }, { "epoch": 0.8937447294012599, "grad_norm": 46.87500762939453, "learning_rate": 2.8155901875696833e-07, "loss": 0.3575, "step": 18017 }, { "epoch": 0.8937943350364601, "grad_norm": 7.713346004486084, "learning_rate": 2.8129867922456314e-07, "loss": 0.2614, "step": 18018 }, { "epoch": 0.8938439406716603, "grad_norm": 8.294185638427734, "learning_rate": 2.8103845662371064e-07, "loss": 0.3484, "step": 18019 }, { "epoch": 0.8938935463068605, "grad_norm": 4.997306823730469, "learning_rate": 2.807783509608597e-07, "loss": 0.2783, "step": 18020 }, { "epoch": 0.8939431519420606, "grad_norm": 5.754305839538574, "learning_rate": 2.805183622424557e-07, "loss": 0.3137, "step": 18021 }, { "epoch": 0.8939927575772608, "grad_norm": 6.6313557624816895, "learning_rate": 2.8025849047494167e-07, "loss": 0.3376, "step": 18022 }, { "epoch": 0.8940423632124609, "grad_norm": 4.344479084014893, "learning_rate": 2.799987356647571e-07, "loss": 0.3185, "step": 18023 }, { "epoch": 0.8940919688476611, "grad_norm": 13.207358360290527, "learning_rate": 2.7973909781834007e-07, "loss": 0.3226, "step": 18024 }, { "epoch": 0.8941415744828612, "grad_norm": 7.772857666015625, "learning_rate": 2.7947957694212156e-07, "loss": 0.241, "step": 18025 }, { "epoch": 0.8941911801180614, "grad_norm": 5.365719318389893, "learning_rate": 2.7922017304253643e-07, "loss": 0.3055, "step": 18026 }, { "epoch": 0.8942407857532616, "grad_norm": 5.273291110992432, "learning_rate": 2.78960886126009e-07, "loss": 0.2769, "step": 18027 }, { "epoch": 0.8942903913884618, "grad_norm": 5.613566875457764, "learning_rate": 2.7870171619896746e-07, "loss": 0.2389, "step": 18028 }, { "epoch": 0.8943399970236618, "grad_norm": 9.412496566772461, "learning_rate": 2.7844266326783274e-07, "loss": 0.2824, "step": 18029 }, { "epoch": 0.894389602658862, "grad_norm": 4.676949501037598, "learning_rate": 2.7818372733902477e-07, "loss": 0.2655, "step": 18030 }, { "epoch": 0.8944392082940622, "grad_norm": 5.3061652183532715, "learning_rate": 2.7792490841896e-07, "loss": 0.1879, "step": 18031 }, { "epoch": 0.8944888139292624, "grad_norm": 8.731086730957031, "learning_rate": 2.776662065140523e-07, "loss": 0.2937, "step": 18032 }, { "epoch": 0.8945384195644626, "grad_norm": 4.070481777191162, "learning_rate": 2.774076216307109e-07, "loss": 0.2696, "step": 18033 }, { "epoch": 0.8945880251996626, "grad_norm": 4.3631367683410645, "learning_rate": 2.771491537753468e-07, "loss": 0.161, "step": 18034 }, { "epoch": 0.8946376308348628, "grad_norm": 11.31812572479248, "learning_rate": 2.7689080295436153e-07, "loss": 0.3223, "step": 18035 }, { "epoch": 0.894687236470063, "grad_norm": 7.295368671417236, "learning_rate": 2.7663256917415893e-07, "loss": 0.2077, "step": 18036 }, { "epoch": 0.8947368421052632, "grad_norm": 8.108955383300781, "learning_rate": 2.7637445244113824e-07, "loss": 0.2551, "step": 18037 }, { "epoch": 0.8947864477404633, "grad_norm": 7.748248100280762, "learning_rate": 2.761164527616961e-07, "loss": 0.3785, "step": 18038 }, { "epoch": 0.8948360533756635, "grad_norm": 9.866857528686523, "learning_rate": 2.7585857014222285e-07, "loss": 0.3643, "step": 18039 }, { "epoch": 0.8948856590108636, "grad_norm": 5.801894664764404, "learning_rate": 2.7560080458911344e-07, "loss": 0.2469, "step": 18040 }, { "epoch": 0.8949352646460638, "grad_norm": 5.255162239074707, "learning_rate": 2.753431561087522e-07, "loss": 0.26, "step": 18041 }, { "epoch": 0.894984870281264, "grad_norm": 7.4123430252075195, "learning_rate": 2.750856247075245e-07, "loss": 0.2639, "step": 18042 }, { "epoch": 0.8950344759164641, "grad_norm": 8.072537422180176, "learning_rate": 2.748282103918126e-07, "loss": 0.3052, "step": 18043 }, { "epoch": 0.8950840815516643, "grad_norm": 5.954550743103027, "learning_rate": 2.745709131679952e-07, "loss": 0.2911, "step": 18044 }, { "epoch": 0.8951336871868645, "grad_norm": 8.503827095031738, "learning_rate": 2.743137330424478e-07, "loss": 0.2585, "step": 18045 }, { "epoch": 0.8951832928220645, "grad_norm": 6.990259647369385, "learning_rate": 2.740566700215447e-07, "loss": 0.2389, "step": 18046 }, { "epoch": 0.8952328984572647, "grad_norm": 11.37341594696045, "learning_rate": 2.737997241116536e-07, "loss": 0.2574, "step": 18047 }, { "epoch": 0.8952825040924649, "grad_norm": 5.083678245544434, "learning_rate": 2.735428953191449e-07, "loss": 0.3051, "step": 18048 }, { "epoch": 0.8953321097276651, "grad_norm": 9.289558410644531, "learning_rate": 2.732861836503803e-07, "loss": 0.2788, "step": 18049 }, { "epoch": 0.8953817153628653, "grad_norm": 7.742672443389893, "learning_rate": 2.7302958911172184e-07, "loss": 0.2691, "step": 18050 }, { "epoch": 0.8954313209980653, "grad_norm": 7.515446662902832, "learning_rate": 2.7277311170952894e-07, "loss": 0.262, "step": 18051 }, { "epoch": 0.8954809266332655, "grad_norm": 7.871843338012695, "learning_rate": 2.72516751450157e-07, "loss": 0.282, "step": 18052 }, { "epoch": 0.8955305322684657, "grad_norm": 9.93625545501709, "learning_rate": 2.722605083399571e-07, "loss": 0.3818, "step": 18053 }, { "epoch": 0.8955801379036659, "grad_norm": 4.635597229003906, "learning_rate": 2.7200438238528195e-07, "loss": 0.3143, "step": 18054 }, { "epoch": 0.895629743538866, "grad_norm": 6.4963226318359375, "learning_rate": 2.717483735924764e-07, "loss": 0.2995, "step": 18055 }, { "epoch": 0.8956793491740662, "grad_norm": 7.781663417816162, "learning_rate": 2.714924819678849e-07, "loss": 0.2386, "step": 18056 }, { "epoch": 0.8957289548092663, "grad_norm": 5.249287128448486, "learning_rate": 2.7123670751784847e-07, "loss": 0.3287, "step": 18057 }, { "epoch": 0.8957785604444665, "grad_norm": 17.046436309814453, "learning_rate": 2.709810502487059e-07, "loss": 0.4161, "step": 18058 }, { "epoch": 0.8958281660796666, "grad_norm": 4.821937561035156, "learning_rate": 2.7072551016679107e-07, "loss": 0.2445, "step": 18059 }, { "epoch": 0.8958777717148668, "grad_norm": 6.5252861976623535, "learning_rate": 2.704700872784388e-07, "loss": 0.2229, "step": 18060 }, { "epoch": 0.895927377350067, "grad_norm": 15.50204849243164, "learning_rate": 2.702147815899764e-07, "loss": 0.3601, "step": 18061 }, { "epoch": 0.8959769829852672, "grad_norm": 7.386551856994629, "learning_rate": 2.699595931077309e-07, "loss": 0.3388, "step": 18062 }, { "epoch": 0.8960265886204672, "grad_norm": 4.470968723297119, "learning_rate": 2.6970452183802674e-07, "loss": 0.2734, "step": 18063 }, { "epoch": 0.8960761942556674, "grad_norm": 6.437692165374756, "learning_rate": 2.694495677871839e-07, "loss": 0.2151, "step": 18064 }, { "epoch": 0.8961257998908676, "grad_norm": 6.19354248046875, "learning_rate": 2.6919473096152115e-07, "loss": 0.2667, "step": 18065 }, { "epoch": 0.8961754055260678, "grad_norm": 5.931463241577148, "learning_rate": 2.689400113673524e-07, "loss": 0.2692, "step": 18066 }, { "epoch": 0.896225011161268, "grad_norm": 7.049427509307861, "learning_rate": 2.686854090109908e-07, "loss": 0.2497, "step": 18067 }, { "epoch": 0.896274616796468, "grad_norm": 9.182486534118652, "learning_rate": 2.6843092389874536e-07, "loss": 0.3391, "step": 18068 }, { "epoch": 0.8963242224316682, "grad_norm": 7.456502437591553, "learning_rate": 2.6817655603692096e-07, "loss": 0.3586, "step": 18069 }, { "epoch": 0.8963738280668684, "grad_norm": 10.006381034851074, "learning_rate": 2.679223054318225e-07, "loss": 0.3924, "step": 18070 }, { "epoch": 0.8964234337020686, "grad_norm": 9.938789367675781, "learning_rate": 2.676681720897495e-07, "loss": 0.3679, "step": 18071 }, { "epoch": 0.8964730393372687, "grad_norm": 7.597586631774902, "learning_rate": 2.6741415601700013e-07, "loss": 0.2314, "step": 18072 }, { "epoch": 0.8965226449724689, "grad_norm": 5.0166521072387695, "learning_rate": 2.6716025721986836e-07, "loss": 0.3019, "step": 18073 }, { "epoch": 0.896572250607669, "grad_norm": 5.058136463165283, "learning_rate": 2.669064757046469e-07, "loss": 0.1566, "step": 18074 }, { "epoch": 0.8966218562428692, "grad_norm": 4.795787334442139, "learning_rate": 2.66652811477624e-07, "loss": 0.3053, "step": 18075 }, { "epoch": 0.8966714618780693, "grad_norm": 10.89262866973877, "learning_rate": 2.663992645450847e-07, "loss": 0.2663, "step": 18076 }, { "epoch": 0.8967210675132695, "grad_norm": 9.37427043914795, "learning_rate": 2.661458349133139e-07, "loss": 0.2283, "step": 18077 }, { "epoch": 0.8967706731484697, "grad_norm": 11.315134048461914, "learning_rate": 2.6589252258859e-07, "loss": 0.3113, "step": 18078 }, { "epoch": 0.8968202787836699, "grad_norm": 8.013832092285156, "learning_rate": 2.6563932757719126e-07, "loss": 0.301, "step": 18079 }, { "epoch": 0.89686988441887, "grad_norm": 4.851363182067871, "learning_rate": 2.6538624988539096e-07, "loss": 0.2537, "step": 18080 }, { "epoch": 0.8969194900540701, "grad_norm": 7.098740577697754, "learning_rate": 2.651332895194614e-07, "loss": 0.325, "step": 18081 }, { "epoch": 0.8969690956892703, "grad_norm": 28.809310913085938, "learning_rate": 2.648804464856708e-07, "loss": 0.6515, "step": 18082 }, { "epoch": 0.8970187013244705, "grad_norm": 6.0768914222717285, "learning_rate": 2.6462772079028534e-07, "loss": 0.2688, "step": 18083 }, { "epoch": 0.8970683069596707, "grad_norm": 15.69296646118164, "learning_rate": 2.6437511243956546e-07, "loss": 0.2747, "step": 18084 }, { "epoch": 0.8971179125948707, "grad_norm": 7.555765628814697, "learning_rate": 2.641226214397741e-07, "loss": 0.2709, "step": 18085 }, { "epoch": 0.8971675182300709, "grad_norm": 7.660199165344238, "learning_rate": 2.638702477971655e-07, "loss": 0.2128, "step": 18086 }, { "epoch": 0.8972171238652711, "grad_norm": 9.3507719039917, "learning_rate": 2.6361799151799484e-07, "loss": 0.282, "step": 18087 }, { "epoch": 0.8972667295004713, "grad_norm": 4.208372116088867, "learning_rate": 2.6336585260851254e-07, "loss": 0.2458, "step": 18088 }, { "epoch": 0.8973163351356714, "grad_norm": 8.567630767822266, "learning_rate": 2.6311383107496814e-07, "loss": 0.2843, "step": 18089 }, { "epoch": 0.8973659407708716, "grad_norm": 6.165302276611328, "learning_rate": 2.628619269236038e-07, "loss": 0.2953, "step": 18090 }, { "epoch": 0.8974155464060717, "grad_norm": 5.222751140594482, "learning_rate": 2.6261014016066566e-07, "loss": 0.2721, "step": 18091 }, { "epoch": 0.8974651520412719, "grad_norm": 6.2739129066467285, "learning_rate": 2.6235847079238983e-07, "loss": 0.3265, "step": 18092 }, { "epoch": 0.897514757676472, "grad_norm": 4.485351085662842, "learning_rate": 2.621069188250147e-07, "loss": 0.2887, "step": 18093 }, { "epoch": 0.8975643633116722, "grad_norm": 6.262730121612549, "learning_rate": 2.6185548426477357e-07, "loss": 0.304, "step": 18094 }, { "epoch": 0.8976139689468724, "grad_norm": 8.186176300048828, "learning_rate": 2.61604167117897e-07, "loss": 0.1965, "step": 18095 }, { "epoch": 0.8976635745820726, "grad_norm": 6.10750675201416, "learning_rate": 2.613529673906112e-07, "loss": 0.2712, "step": 18096 }, { "epoch": 0.8977131802172726, "grad_norm": 4.469847679138184, "learning_rate": 2.611018850891439e-07, "loss": 0.2129, "step": 18097 }, { "epoch": 0.8977627858524728, "grad_norm": 4.332308292388916, "learning_rate": 2.6085092021971406e-07, "loss": 0.2671, "step": 18098 }, { "epoch": 0.897812391487673, "grad_norm": 5.339920997619629, "learning_rate": 2.6060007278854383e-07, "loss": 0.2566, "step": 18099 }, { "epoch": 0.8978619971228732, "grad_norm": 5.392362594604492, "learning_rate": 2.6034934280184667e-07, "loss": 0.2838, "step": 18100 }, { "epoch": 0.8979116027580734, "grad_norm": 4.506789684295654, "learning_rate": 2.60098730265837e-07, "loss": 0.2041, "step": 18101 }, { "epoch": 0.8979612083932734, "grad_norm": 9.158905029296875, "learning_rate": 2.5984823518672486e-07, "loss": 0.329, "step": 18102 }, { "epoch": 0.8980108140284736, "grad_norm": 11.916828155517578, "learning_rate": 2.5959785757071807e-07, "loss": 0.4769, "step": 18103 }, { "epoch": 0.8980604196636738, "grad_norm": 10.2540283203125, "learning_rate": 2.5934759742401947e-07, "loss": 0.3171, "step": 18104 }, { "epoch": 0.898110025298874, "grad_norm": 13.061494827270508, "learning_rate": 2.590974547528335e-07, "loss": 0.347, "step": 18105 }, { "epoch": 0.8981596309340741, "grad_norm": 10.612540245056152, "learning_rate": 2.5884742956335585e-07, "loss": 0.1784, "step": 18106 }, { "epoch": 0.8982092365692743, "grad_norm": 5.737185955047607, "learning_rate": 2.585975218617842e-07, "loss": 0.2069, "step": 18107 }, { "epoch": 0.8982588422044744, "grad_norm": 11.36510944366455, "learning_rate": 2.5834773165431036e-07, "loss": 0.395, "step": 18108 }, { "epoch": 0.8983084478396746, "grad_norm": 13.775078773498535, "learning_rate": 2.5809805894712547e-07, "loss": 0.3942, "step": 18109 }, { "epoch": 0.8983580534748747, "grad_norm": 3.903156042098999, "learning_rate": 2.5784850374641403e-07, "loss": 0.2673, "step": 18110 }, { "epoch": 0.8984076591100749, "grad_norm": 12.728175163269043, "learning_rate": 2.5759906605836325e-07, "loss": 0.3495, "step": 18111 }, { "epoch": 0.8984572647452751, "grad_norm": 6.995873928070068, "learning_rate": 2.573497458891522e-07, "loss": 0.2897, "step": 18112 }, { "epoch": 0.8985068703804753, "grad_norm": 7.519039630889893, "learning_rate": 2.571005432449597e-07, "loss": 0.3032, "step": 18113 }, { "epoch": 0.8985564760156753, "grad_norm": 11.84531307220459, "learning_rate": 2.5685145813196146e-07, "loss": 0.3252, "step": 18114 }, { "epoch": 0.8986060816508755, "grad_norm": 10.390148162841797, "learning_rate": 2.566024905563291e-07, "loss": 0.2456, "step": 18115 }, { "epoch": 0.8986556872860757, "grad_norm": 10.02374267578125, "learning_rate": 2.563536405242334e-07, "loss": 0.2339, "step": 18116 }, { "epoch": 0.8987052929212759, "grad_norm": 5.220520496368408, "learning_rate": 2.561049080418404e-07, "loss": 0.2632, "step": 18117 }, { "epoch": 0.8987548985564761, "grad_norm": 6.765008926391602, "learning_rate": 2.5585629311531243e-07, "loss": 0.2327, "step": 18118 }, { "epoch": 0.8988045041916761, "grad_norm": 10.692605018615723, "learning_rate": 2.5560779575081296e-07, "loss": 0.4112, "step": 18119 }, { "epoch": 0.8988541098268763, "grad_norm": 6.304233074188232, "learning_rate": 2.553594159544981e-07, "loss": 0.2869, "step": 18120 }, { "epoch": 0.8989037154620765, "grad_norm": 5.244013786315918, "learning_rate": 2.5511115373252295e-07, "loss": 0.2317, "step": 18121 }, { "epoch": 0.8989533210972767, "grad_norm": 5.940886974334717, "learning_rate": 2.548630090910392e-07, "loss": 0.2529, "step": 18122 }, { "epoch": 0.8990029267324768, "grad_norm": 6.076381683349609, "learning_rate": 2.5461498203619705e-07, "loss": 0.2527, "step": 18123 }, { "epoch": 0.899052532367677, "grad_norm": 7.9124956130981445, "learning_rate": 2.5436707257414205e-07, "loss": 0.3059, "step": 18124 }, { "epoch": 0.8991021380028771, "grad_norm": 12.387482643127441, "learning_rate": 2.5411928071101823e-07, "loss": 0.4121, "step": 18125 }, { "epoch": 0.8991517436380773, "grad_norm": 8.955511093139648, "learning_rate": 2.538716064529656e-07, "loss": 0.327, "step": 18126 }, { "epoch": 0.8992013492732774, "grad_norm": 6.496259689331055, "learning_rate": 2.5362404980612045e-07, "loss": 0.3385, "step": 18127 }, { "epoch": 0.8992509549084776, "grad_norm": 18.58071517944336, "learning_rate": 2.533766107766189e-07, "loss": 0.2282, "step": 18128 }, { "epoch": 0.8993005605436778, "grad_norm": 5.311077117919922, "learning_rate": 2.531292893705917e-07, "loss": 0.2188, "step": 18129 }, { "epoch": 0.899350166178878, "grad_norm": 6.486027240753174, "learning_rate": 2.5288208559416827e-07, "loss": 0.3143, "step": 18130 }, { "epoch": 0.899399771814078, "grad_norm": 5.441786766052246, "learning_rate": 2.5263499945347437e-07, "loss": 0.2567, "step": 18131 }, { "epoch": 0.8994493774492782, "grad_norm": 5.868258953094482, "learning_rate": 2.523880309546328e-07, "loss": 0.2456, "step": 18132 }, { "epoch": 0.8994989830844784, "grad_norm": 6.000047206878662, "learning_rate": 2.521411801037621e-07, "loss": 0.2316, "step": 18133 }, { "epoch": 0.8995485887196786, "grad_norm": 5.300726890563965, "learning_rate": 2.518944469069823e-07, "loss": 0.2827, "step": 18134 }, { "epoch": 0.8995981943548788, "grad_norm": 5.853241920471191, "learning_rate": 2.5164783137040515e-07, "loss": 0.2646, "step": 18135 }, { "epoch": 0.8996477999900788, "grad_norm": 5.192758083343506, "learning_rate": 2.514013335001425e-07, "loss": 0.2798, "step": 18136 }, { "epoch": 0.899697405625279, "grad_norm": 4.2674126625061035, "learning_rate": 2.5115495330230277e-07, "loss": 0.1869, "step": 18137 }, { "epoch": 0.8997470112604792, "grad_norm": 8.353425979614258, "learning_rate": 2.509086907829916e-07, "loss": 0.2183, "step": 18138 }, { "epoch": 0.8997966168956794, "grad_norm": 8.242629051208496, "learning_rate": 2.506625459483114e-07, "loss": 0.3445, "step": 18139 }, { "epoch": 0.8998462225308795, "grad_norm": 4.6888275146484375, "learning_rate": 2.5041651880436224e-07, "loss": 0.2906, "step": 18140 }, { "epoch": 0.8998958281660797, "grad_norm": 12.160880088806152, "learning_rate": 2.501706093572387e-07, "loss": 0.331, "step": 18141 }, { "epoch": 0.8999454338012798, "grad_norm": 8.063919067382812, "learning_rate": 2.4992481761303756e-07, "loss": 0.1903, "step": 18142 }, { "epoch": 0.89999503943648, "grad_norm": 8.177062034606934, "learning_rate": 2.4967914357784726e-07, "loss": 0.3254, "step": 18143 }, { "epoch": 0.9000446450716801, "grad_norm": 3.9504268169403076, "learning_rate": 2.494335872577569e-07, "loss": 0.1861, "step": 18144 }, { "epoch": 0.9000446450716801, "eval_loss": 0.2812660038471222, "eval_runtime": 35.5845, "eval_samples_per_second": 45.778, "eval_steps_per_second": 5.733, "step": 18144 }, { "epoch": 0.9000942507068803, "grad_norm": 8.266641616821289, "learning_rate": 2.49188148658851e-07, "loss": 0.246, "step": 18145 }, { "epoch": 0.9001438563420805, "grad_norm": 28.026445388793945, "learning_rate": 2.4894282778721245e-07, "loss": 0.3591, "step": 18146 }, { "epoch": 0.9001934619772807, "grad_norm": 5.765142440795898, "learning_rate": 2.486976246489181e-07, "loss": 0.3445, "step": 18147 }, { "epoch": 0.9002430676124807, "grad_norm": 4.250341892242432, "learning_rate": 2.4845253925004807e-07, "loss": 0.2634, "step": 18148 }, { "epoch": 0.9002926732476809, "grad_norm": 4.672181606292725, "learning_rate": 2.48207571596672e-07, "loss": 0.1666, "step": 18149 }, { "epoch": 0.9003422788828811, "grad_norm": 8.712636947631836, "learning_rate": 2.4796272169486213e-07, "loss": 0.2644, "step": 18150 }, { "epoch": 0.9003918845180813, "grad_norm": 4.397940158843994, "learning_rate": 2.477179895506854e-07, "loss": 0.2263, "step": 18151 }, { "epoch": 0.9004414901532815, "grad_norm": 10.351309776306152, "learning_rate": 2.4747337517020687e-07, "loss": 0.3586, "step": 18152 }, { "epoch": 0.9004910957884815, "grad_norm": 12.656286239624023, "learning_rate": 2.4722887855948784e-07, "loss": 0.4728, "step": 18153 }, { "epoch": 0.9005407014236817, "grad_norm": 7.039549827575684, "learning_rate": 2.4698449972458737e-07, "loss": 0.2676, "step": 18154 }, { "epoch": 0.9005903070588819, "grad_norm": 10.202743530273438, "learning_rate": 2.4674023867156003e-07, "loss": 0.3391, "step": 18155 }, { "epoch": 0.9006399126940821, "grad_norm": 5.202947616577148, "learning_rate": 2.46496095406461e-07, "loss": 0.3269, "step": 18156 }, { "epoch": 0.9006895183292822, "grad_norm": 18.8669376373291, "learning_rate": 2.462520699353388e-07, "loss": 0.3692, "step": 18157 }, { "epoch": 0.9007391239644824, "grad_norm": 5.300055503845215, "learning_rate": 2.4600816226424015e-07, "loss": 0.3014, "step": 18158 }, { "epoch": 0.9007887295996825, "grad_norm": 9.153376579284668, "learning_rate": 2.457643723992098e-07, "loss": 0.2843, "step": 18159 }, { "epoch": 0.9008383352348827, "grad_norm": 10.055496215820312, "learning_rate": 2.455207003462895e-07, "loss": 0.3462, "step": 18160 }, { "epoch": 0.9008879408700828, "grad_norm": 5.217222690582275, "learning_rate": 2.4527714611151555e-07, "loss": 0.2859, "step": 18161 }, { "epoch": 0.900937546505283, "grad_norm": 8.279172897338867, "learning_rate": 2.450337097009259e-07, "loss": 0.29, "step": 18162 }, { "epoch": 0.9009871521404832, "grad_norm": 7.378831386566162, "learning_rate": 2.4479039112055126e-07, "loss": 0.2987, "step": 18163 }, { "epoch": 0.9010367577756834, "grad_norm": 4.815336227416992, "learning_rate": 2.4454719037642184e-07, "loss": 0.1614, "step": 18164 }, { "epoch": 0.9010863634108834, "grad_norm": 10.916125297546387, "learning_rate": 2.443041074745639e-07, "loss": 0.3564, "step": 18165 }, { "epoch": 0.9011359690460836, "grad_norm": 14.166976928710938, "learning_rate": 2.4406114242100263e-07, "loss": 0.3428, "step": 18166 }, { "epoch": 0.9011855746812838, "grad_norm": 5.942845821380615, "learning_rate": 2.4381829522175606e-07, "loss": 0.339, "step": 18167 }, { "epoch": 0.901235180316484, "grad_norm": 22.611772537231445, "learning_rate": 2.4357556588284483e-07, "loss": 0.2786, "step": 18168 }, { "epoch": 0.9012847859516842, "grad_norm": 4.935755729675293, "learning_rate": 2.433329544102814e-07, "loss": 0.2601, "step": 18169 }, { "epoch": 0.9013343915868842, "grad_norm": 9.682504653930664, "learning_rate": 2.4309046081007983e-07, "loss": 0.2385, "step": 18170 }, { "epoch": 0.9013839972220844, "grad_norm": 5.2444257736206055, "learning_rate": 2.428480850882486e-07, "loss": 0.3103, "step": 18171 }, { "epoch": 0.9014336028572846, "grad_norm": 9.418745994567871, "learning_rate": 2.426058272507931e-07, "loss": 0.2748, "step": 18172 }, { "epoch": 0.9014832084924848, "grad_norm": 9.536396980285645, "learning_rate": 2.4236368730371773e-07, "loss": 0.3445, "step": 18173 }, { "epoch": 0.9015328141276849, "grad_norm": 6.228531837463379, "learning_rate": 2.421216652530217e-07, "loss": 0.2053, "step": 18174 }, { "epoch": 0.9015824197628851, "grad_norm": 5.603036880493164, "learning_rate": 2.4187976110470357e-07, "loss": 0.2135, "step": 18175 }, { "epoch": 0.9016320253980852, "grad_norm": 8.767404556274414, "learning_rate": 2.4163797486475737e-07, "loss": 0.3239, "step": 18176 }, { "epoch": 0.9016816310332854, "grad_norm": 9.991186141967773, "learning_rate": 2.4139630653917445e-07, "loss": 0.2879, "step": 18177 }, { "epoch": 0.9017312366684855, "grad_norm": 6.674290180206299, "learning_rate": 2.4115475613394334e-07, "loss": 0.2347, "step": 18178 }, { "epoch": 0.9017808423036857, "grad_norm": 9.758295059204102, "learning_rate": 2.409133236550504e-07, "loss": 0.3326, "step": 18179 }, { "epoch": 0.9018304479388859, "grad_norm": 4.403385162353516, "learning_rate": 2.406720091084774e-07, "loss": 0.2135, "step": 18180 }, { "epoch": 0.9018800535740861, "grad_norm": 6.382279396057129, "learning_rate": 2.4043081250020527e-07, "loss": 0.2422, "step": 18181 }, { "epoch": 0.9019296592092861, "grad_norm": 4.312603950500488, "learning_rate": 2.401897338362108e-07, "loss": 0.3042, "step": 18182 }, { "epoch": 0.9019792648444863, "grad_norm": 6.986233711242676, "learning_rate": 2.399487731224681e-07, "loss": 0.3371, "step": 18183 }, { "epoch": 0.9020288704796865, "grad_norm": 13.608126640319824, "learning_rate": 2.3970793036494746e-07, "loss": 0.3091, "step": 18184 }, { "epoch": 0.9020784761148867, "grad_norm": 6.377904415130615, "learning_rate": 2.394672055696179e-07, "loss": 0.2424, "step": 18185 }, { "epoch": 0.9021280817500869, "grad_norm": 11.8283109664917, "learning_rate": 2.392265987424441e-07, "loss": 0.3758, "step": 18186 }, { "epoch": 0.9021776873852869, "grad_norm": 7.376835823059082, "learning_rate": 2.389861098893892e-07, "loss": 0.3128, "step": 18187 }, { "epoch": 0.9022272930204871, "grad_norm": 4.405696868896484, "learning_rate": 2.387457390164116e-07, "loss": 0.2419, "step": 18188 }, { "epoch": 0.9022768986556873, "grad_norm": 3.8155648708343506, "learning_rate": 2.385054861294683e-07, "loss": 0.1513, "step": 18189 }, { "epoch": 0.9023265042908875, "grad_norm": 5.991025447845459, "learning_rate": 2.3826535123451343e-07, "loss": 0.3316, "step": 18190 }, { "epoch": 0.9023761099260876, "grad_norm": 5.551610469818115, "learning_rate": 2.380253343374972e-07, "loss": 0.211, "step": 18191 }, { "epoch": 0.9024257155612878, "grad_norm": 5.127650737762451, "learning_rate": 2.3778543544436595e-07, "loss": 0.1364, "step": 18192 }, { "epoch": 0.9024753211964879, "grad_norm": 7.7813262939453125, "learning_rate": 2.375456545610677e-07, "loss": 0.28, "step": 18193 }, { "epoch": 0.9025249268316881, "grad_norm": 7.209781646728516, "learning_rate": 2.3730599169354106e-07, "loss": 0.3622, "step": 18194 }, { "epoch": 0.9025745324668882, "grad_norm": 6.512776851654053, "learning_rate": 2.3706644684772684e-07, "loss": 0.2151, "step": 18195 }, { "epoch": 0.9026241381020884, "grad_norm": 4.150611877441406, "learning_rate": 2.3682702002956081e-07, "loss": 0.2288, "step": 18196 }, { "epoch": 0.9026737437372886, "grad_norm": 4.249025344848633, "learning_rate": 2.3658771124497605e-07, "loss": 0.3123, "step": 18197 }, { "epoch": 0.9027233493724887, "grad_norm": 4.341790676116943, "learning_rate": 2.3634852049990108e-07, "loss": 0.2421, "step": 18198 }, { "epoch": 0.9027729550076888, "grad_norm": 5.100574970245361, "learning_rate": 2.3610944780026624e-07, "loss": 0.2954, "step": 18199 }, { "epoch": 0.902822560642889, "grad_norm": 5.537299633026123, "learning_rate": 2.358704931519934e-07, "loss": 0.2724, "step": 18200 }, { "epoch": 0.9028721662780892, "grad_norm": 9.385641098022461, "learning_rate": 2.3563165656100506e-07, "loss": 0.2568, "step": 18201 }, { "epoch": 0.9029217719132894, "grad_norm": 5.237985134124756, "learning_rate": 2.3539293803321926e-07, "loss": 0.2155, "step": 18202 }, { "epoch": 0.9029713775484896, "grad_norm": 7.339376926422119, "learning_rate": 2.3515433757455242e-07, "loss": 0.3215, "step": 18203 }, { "epoch": 0.9030209831836896, "grad_norm": 4.861443996429443, "learning_rate": 2.3491585519091476e-07, "loss": 0.1658, "step": 18204 }, { "epoch": 0.9030705888188898, "grad_norm": 4.877134323120117, "learning_rate": 2.3467749088821935e-07, "loss": 0.2415, "step": 18205 }, { "epoch": 0.90312019445409, "grad_norm": 9.241689682006836, "learning_rate": 2.344392446723698e-07, "loss": 0.3019, "step": 18206 }, { "epoch": 0.9031698000892902, "grad_norm": 7.252490997314453, "learning_rate": 2.34201116549273e-07, "loss": 0.2444, "step": 18207 }, { "epoch": 0.9032194057244903, "grad_norm": 6.8657307624816895, "learning_rate": 2.3396310652482768e-07, "loss": 0.2441, "step": 18208 }, { "epoch": 0.9032690113596905, "grad_norm": 7.643096923828125, "learning_rate": 2.3372521460493237e-07, "loss": 0.2692, "step": 18209 }, { "epoch": 0.9033186169948906, "grad_norm": 6.182608604431152, "learning_rate": 2.3348744079548235e-07, "loss": 0.2785, "step": 18210 }, { "epoch": 0.9033682226300908, "grad_norm": 7.159979820251465, "learning_rate": 2.3324978510237072e-07, "loss": 0.2994, "step": 18211 }, { "epoch": 0.9034178282652909, "grad_norm": 4.041317939758301, "learning_rate": 2.330122475314839e-07, "loss": 0.242, "step": 18212 }, { "epoch": 0.9034674339004911, "grad_norm": 7.1825361251831055, "learning_rate": 2.3277482808871098e-07, "loss": 0.2604, "step": 18213 }, { "epoch": 0.9035170395356913, "grad_norm": 7.1525797843933105, "learning_rate": 2.3253752677993403e-07, "loss": 0.2662, "step": 18214 }, { "epoch": 0.9035666451708914, "grad_norm": 8.6780424118042, "learning_rate": 2.3230034361103383e-07, "loss": 0.3473, "step": 18215 }, { "epoch": 0.9036162508060915, "grad_norm": 13.902705192565918, "learning_rate": 2.3206327858788735e-07, "loss": 0.3657, "step": 18216 }, { "epoch": 0.9036658564412917, "grad_norm": 9.096508026123047, "learning_rate": 2.3182633171637048e-07, "loss": 0.2531, "step": 18217 }, { "epoch": 0.9037154620764919, "grad_norm": 7.345362186431885, "learning_rate": 2.315895030023524e-07, "loss": 0.2669, "step": 18218 }, { "epoch": 0.9037650677116921, "grad_norm": 8.12875747680664, "learning_rate": 2.3135279245170505e-07, "loss": 0.3276, "step": 18219 }, { "epoch": 0.9038146733468923, "grad_norm": 9.961304664611816, "learning_rate": 2.3111620007029211e-07, "loss": 0.2935, "step": 18220 }, { "epoch": 0.9038642789820923, "grad_norm": 4.946739196777344, "learning_rate": 2.3087972586397667e-07, "loss": 0.2412, "step": 18221 }, { "epoch": 0.9039138846172925, "grad_norm": 4.485898971557617, "learning_rate": 2.3064336983861902e-07, "loss": 0.3096, "step": 18222 }, { "epoch": 0.9039634902524927, "grad_norm": 11.24146556854248, "learning_rate": 2.3040713200007614e-07, "loss": 0.3905, "step": 18223 }, { "epoch": 0.9040130958876929, "grad_norm": 6.9096832275390625, "learning_rate": 2.301710123542017e-07, "loss": 0.2147, "step": 18224 }, { "epoch": 0.904062701522893, "grad_norm": 9.77284049987793, "learning_rate": 2.2993501090684823e-07, "loss": 0.3435, "step": 18225 }, { "epoch": 0.9041123071580932, "grad_norm": 10.800780296325684, "learning_rate": 2.296991276638616e-07, "loss": 0.4917, "step": 18226 }, { "epoch": 0.9041619127932933, "grad_norm": 8.759330749511719, "learning_rate": 2.2946336263108937e-07, "loss": 0.3385, "step": 18227 }, { "epoch": 0.9042115184284935, "grad_norm": 3.747196674346924, "learning_rate": 2.292277158143724e-07, "loss": 0.226, "step": 18228 }, { "epoch": 0.9042611240636936, "grad_norm": 6.073301315307617, "learning_rate": 2.2899218721955108e-07, "loss": 0.2657, "step": 18229 }, { "epoch": 0.9043107296988938, "grad_norm": 6.564184188842773, "learning_rate": 2.2875677685246123e-07, "loss": 0.293, "step": 18230 }, { "epoch": 0.904360335334094, "grad_norm": 5.944050312042236, "learning_rate": 2.285214847189371e-07, "loss": 0.325, "step": 18231 }, { "epoch": 0.9044099409692941, "grad_norm": 4.421927452087402, "learning_rate": 2.2828631082480847e-07, "loss": 0.2596, "step": 18232 }, { "epoch": 0.9044595466044942, "grad_norm": 7.019112586975098, "learning_rate": 2.2805125517590453e-07, "loss": 0.239, "step": 18233 }, { "epoch": 0.9045091522396944, "grad_norm": 5.30724573135376, "learning_rate": 2.278163177780479e-07, "loss": 0.2419, "step": 18234 }, { "epoch": 0.9045587578748946, "grad_norm": 8.150565147399902, "learning_rate": 2.2758149863706225e-07, "loss": 0.2844, "step": 18235 }, { "epoch": 0.9046083635100948, "grad_norm": 5.937725067138672, "learning_rate": 2.273467977587651e-07, "loss": 0.3029, "step": 18236 }, { "epoch": 0.904657969145295, "grad_norm": 3.5708224773406982, "learning_rate": 2.2711221514897408e-07, "loss": 0.215, "step": 18237 }, { "epoch": 0.904707574780495, "grad_norm": 13.334083557128906, "learning_rate": 2.268777508135006e-07, "loss": 0.3929, "step": 18238 }, { "epoch": 0.9047571804156952, "grad_norm": 4.553377151489258, "learning_rate": 2.2664340475815617e-07, "loss": 0.2624, "step": 18239 }, { "epoch": 0.9048067860508954, "grad_norm": 5.740139484405518, "learning_rate": 2.2640917698874776e-07, "loss": 0.2512, "step": 18240 }, { "epoch": 0.9048563916860956, "grad_norm": 6.046375274658203, "learning_rate": 2.2617506751107798e-07, "loss": 0.2228, "step": 18241 }, { "epoch": 0.9049059973212957, "grad_norm": 7.519055366516113, "learning_rate": 2.2594107633095108e-07, "loss": 0.3256, "step": 18242 }, { "epoch": 0.9049556029564959, "grad_norm": 5.8747239112854, "learning_rate": 2.2570720345416297e-07, "loss": 0.2193, "step": 18243 }, { "epoch": 0.905005208591696, "grad_norm": 8.163007736206055, "learning_rate": 2.2547344888650956e-07, "loss": 0.2725, "step": 18244 }, { "epoch": 0.9050548142268962, "grad_norm": 6.480956554412842, "learning_rate": 2.2523981263378457e-07, "loss": 0.3002, "step": 18245 }, { "epoch": 0.9051044198620963, "grad_norm": 6.993973255157471, "learning_rate": 2.250062947017767e-07, "loss": 0.2933, "step": 18246 }, { "epoch": 0.9051540254972965, "grad_norm": 5.871047019958496, "learning_rate": 2.2477289509627243e-07, "loss": 0.2063, "step": 18247 }, { "epoch": 0.9052036311324967, "grad_norm": 6.664222240447998, "learning_rate": 2.2453961382305712e-07, "loss": 0.3271, "step": 18248 }, { "epoch": 0.9052532367676968, "grad_norm": 11.933865547180176, "learning_rate": 2.2430645088790836e-07, "loss": 0.2879, "step": 18249 }, { "epoch": 0.9053028424028969, "grad_norm": 6.515860080718994, "learning_rate": 2.2407340629660769e-07, "loss": 0.299, "step": 18250 }, { "epoch": 0.9053524480380971, "grad_norm": 5.331415176391602, "learning_rate": 2.2384048005492764e-07, "loss": 0.2942, "step": 18251 }, { "epoch": 0.9054020536732973, "grad_norm": 5.652408599853516, "learning_rate": 2.2360767216864143e-07, "loss": 0.197, "step": 18252 }, { "epoch": 0.9054516593084975, "grad_norm": 4.552584648132324, "learning_rate": 2.233749826435172e-07, "loss": 0.2418, "step": 18253 }, { "epoch": 0.9055012649436976, "grad_norm": 10.267909049987793, "learning_rate": 2.2314241148532257e-07, "loss": 0.3396, "step": 18254 }, { "epoch": 0.9055508705788977, "grad_norm": 5.814613342285156, "learning_rate": 2.2290995869981847e-07, "loss": 0.2876, "step": 18255 }, { "epoch": 0.9056004762140979, "grad_norm": 6.279578685760498, "learning_rate": 2.226776242927675e-07, "loss": 0.2816, "step": 18256 }, { "epoch": 0.9056500818492981, "grad_norm": 4.705801486968994, "learning_rate": 2.2244540826992566e-07, "loss": 0.2503, "step": 18257 }, { "epoch": 0.9056996874844983, "grad_norm": 4.043527126312256, "learning_rate": 2.2221331063704775e-07, "loss": 0.178, "step": 18258 }, { "epoch": 0.9057492931196984, "grad_norm": 7.252593994140625, "learning_rate": 2.2198133139988475e-07, "loss": 0.2386, "step": 18259 }, { "epoch": 0.9057988987548986, "grad_norm": 3.8495194911956787, "learning_rate": 2.2174947056418596e-07, "loss": 0.1717, "step": 18260 }, { "epoch": 0.9058485043900987, "grad_norm": 3.340855598449707, "learning_rate": 2.2151772813569672e-07, "loss": 0.1973, "step": 18261 }, { "epoch": 0.9058981100252989, "grad_norm": 6.646685600280762, "learning_rate": 2.2128610412016083e-07, "loss": 0.2851, "step": 18262 }, { "epoch": 0.905947715660499, "grad_norm": 5.773212432861328, "learning_rate": 2.2105459852331534e-07, "loss": 0.2873, "step": 18263 }, { "epoch": 0.9059973212956992, "grad_norm": 6.742265224456787, "learning_rate": 2.2082321135089956e-07, "loss": 0.2881, "step": 18264 }, { "epoch": 0.9060469269308994, "grad_norm": 7.917171955108643, "learning_rate": 2.2059194260864613e-07, "loss": 0.2975, "step": 18265 }, { "epoch": 0.9060965325660995, "grad_norm": 8.961396217346191, "learning_rate": 2.203607923022866e-07, "loss": 0.2554, "step": 18266 }, { "epoch": 0.9061461382012996, "grad_norm": 10.048196792602539, "learning_rate": 2.2012976043754797e-07, "loss": 0.2715, "step": 18267 }, { "epoch": 0.9061957438364998, "grad_norm": 7.617480754852295, "learning_rate": 2.198988470201574e-07, "loss": 0.217, "step": 18268 }, { "epoch": 0.9062453494717, "grad_norm": 11.352092742919922, "learning_rate": 2.196680520558342e-07, "loss": 0.3452, "step": 18269 }, { "epoch": 0.9062949551069002, "grad_norm": 10.325804710388184, "learning_rate": 2.1943737555029988e-07, "loss": 0.3891, "step": 18270 }, { "epoch": 0.9063445607421003, "grad_norm": 9.047883987426758, "learning_rate": 2.1920681750926986e-07, "loss": 0.3003, "step": 18271 }, { "epoch": 0.9063941663773004, "grad_norm": 5.702495098114014, "learning_rate": 2.1897637793845737e-07, "loss": 0.2264, "step": 18272 }, { "epoch": 0.9064437720125006, "grad_norm": 9.606496810913086, "learning_rate": 2.1874605684357285e-07, "loss": 0.3521, "step": 18273 }, { "epoch": 0.9064933776477008, "grad_norm": 5.582303524017334, "learning_rate": 2.185158542303245e-07, "loss": 0.2869, "step": 18274 }, { "epoch": 0.906542983282901, "grad_norm": 5.655004501342773, "learning_rate": 2.1828577010441443e-07, "loss": 0.2928, "step": 18275 }, { "epoch": 0.9065925889181011, "grad_norm": 7.492475986480713, "learning_rate": 2.1805580447154807e-07, "loss": 0.3235, "step": 18276 }, { "epoch": 0.9066421945533013, "grad_norm": 4.826060771942139, "learning_rate": 2.1782595733741974e-07, "loss": 0.2396, "step": 18277 }, { "epoch": 0.9066918001885014, "grad_norm": 7.007991790771484, "learning_rate": 2.1759622870772934e-07, "loss": 0.2374, "step": 18278 }, { "epoch": 0.9067414058237016, "grad_norm": 5.121593952178955, "learning_rate": 2.1736661858816732e-07, "loss": 0.2667, "step": 18279 }, { "epoch": 0.9067910114589017, "grad_norm": 15.665846824645996, "learning_rate": 2.1713712698442357e-07, "loss": 0.2716, "step": 18280 }, { "epoch": 0.9068406170941019, "grad_norm": 8.022333145141602, "learning_rate": 2.1690775390218522e-07, "loss": 0.3403, "step": 18281 }, { "epoch": 0.9068902227293021, "grad_norm": 5.0210041999816895, "learning_rate": 2.1667849934713714e-07, "loss": 0.2763, "step": 18282 }, { "epoch": 0.9069398283645022, "grad_norm": 6.872273921966553, "learning_rate": 2.1644936332495813e-07, "loss": 0.1886, "step": 18283 }, { "epoch": 0.9069894339997023, "grad_norm": 6.232670783996582, "learning_rate": 2.1622034584132923e-07, "loss": 0.3383, "step": 18284 }, { "epoch": 0.9070390396349025, "grad_norm": 11.16647720336914, "learning_rate": 2.1599144690192309e-07, "loss": 0.358, "step": 18285 }, { "epoch": 0.9070886452701027, "grad_norm": 10.764533996582031, "learning_rate": 2.157626665124135e-07, "loss": 0.356, "step": 18286 }, { "epoch": 0.9071382509053029, "grad_norm": 11.497307777404785, "learning_rate": 2.1553400467846875e-07, "loss": 0.4987, "step": 18287 }, { "epoch": 0.907187856540503, "grad_norm": 6.586008071899414, "learning_rate": 2.1530546140575593e-07, "loss": 0.2592, "step": 18288 }, { "epoch": 0.9072374621757031, "grad_norm": 10.7900972366333, "learning_rate": 2.1507703669993775e-07, "loss": 0.3859, "step": 18289 }, { "epoch": 0.9072870678109033, "grad_norm": 9.924457550048828, "learning_rate": 2.148487305666752e-07, "loss": 0.2567, "step": 18290 }, { "epoch": 0.9073366734461035, "grad_norm": 9.801581382751465, "learning_rate": 2.146205430116266e-07, "loss": 0.3572, "step": 18291 }, { "epoch": 0.9073862790813036, "grad_norm": 10.161170959472656, "learning_rate": 2.143924740404446e-07, "loss": 0.3607, "step": 18292 }, { "epoch": 0.9074358847165038, "grad_norm": 5.9422101974487305, "learning_rate": 2.1416452365878194e-07, "loss": 0.2236, "step": 18293 }, { "epoch": 0.907485490351704, "grad_norm": 3.7522614002227783, "learning_rate": 2.1393669187228738e-07, "loss": 0.2524, "step": 18294 }, { "epoch": 0.9075350959869041, "grad_norm": 5.2251105308532715, "learning_rate": 2.1370897868660644e-07, "loss": 0.3029, "step": 18295 }, { "epoch": 0.9075847016221043, "grad_norm": 8.332536697387695, "learning_rate": 2.1348138410738238e-07, "loss": 0.2393, "step": 18296 }, { "epoch": 0.9076343072573044, "grad_norm": 9.977242469787598, "learning_rate": 2.132539081402546e-07, "loss": 0.3435, "step": 18297 }, { "epoch": 0.9076839128925046, "grad_norm": 8.589815139770508, "learning_rate": 2.130265507908602e-07, "loss": 0.3175, "step": 18298 }, { "epoch": 0.9077335185277048, "grad_norm": 6.9801926612854, "learning_rate": 2.1279931206483361e-07, "loss": 0.3412, "step": 18299 }, { "epoch": 0.9077831241629049, "grad_norm": 10.17684555053711, "learning_rate": 2.1257219196780477e-07, "loss": 0.3025, "step": 18300 }, { "epoch": 0.907832729798105, "grad_norm": 5.985992908477783, "learning_rate": 2.1234519050540358e-07, "loss": 0.2801, "step": 18301 }, { "epoch": 0.9078823354333052, "grad_norm": 4.342414379119873, "learning_rate": 2.121183076832539e-07, "loss": 0.2023, "step": 18302 }, { "epoch": 0.9079319410685054, "grad_norm": 6.910731792449951, "learning_rate": 2.1189154350697792e-07, "loss": 0.2299, "step": 18303 }, { "epoch": 0.9079815467037056, "grad_norm": 10.668715476989746, "learning_rate": 2.116648979821956e-07, "loss": 0.2996, "step": 18304 }, { "epoch": 0.9080311523389057, "grad_norm": 6.656639575958252, "learning_rate": 2.1143837111452403e-07, "loss": 0.2644, "step": 18305 }, { "epoch": 0.9080807579741058, "grad_norm": 3.885563850402832, "learning_rate": 2.1121196290957436e-07, "loss": 0.2556, "step": 18306 }, { "epoch": 0.908130363609306, "grad_norm": 11.704107284545898, "learning_rate": 2.1098567337295929e-07, "loss": 0.2813, "step": 18307 }, { "epoch": 0.9081799692445062, "grad_norm": 8.314225196838379, "learning_rate": 2.1075950251028543e-07, "loss": 0.336, "step": 18308 }, { "epoch": 0.9082295748797063, "grad_norm": 7.624927043914795, "learning_rate": 2.1053345032715723e-07, "loss": 0.303, "step": 18309 }, { "epoch": 0.9082791805149065, "grad_norm": 3.8327291011810303, "learning_rate": 2.1030751682917683e-07, "loss": 0.2489, "step": 18310 }, { "epoch": 0.9083287861501067, "grad_norm": 47.805606842041016, "learning_rate": 2.1008170202194312e-07, "loss": 0.2832, "step": 18311 }, { "epoch": 0.9083783917853068, "grad_norm": 3.862266778945923, "learning_rate": 2.098560059110505e-07, "loss": 0.2574, "step": 18312 }, { "epoch": 0.908427997420507, "grad_norm": 7.34846830368042, "learning_rate": 2.0963042850209392e-07, "loss": 0.2232, "step": 18313 }, { "epoch": 0.9084776030557071, "grad_norm": 6.310834884643555, "learning_rate": 2.0940496980066116e-07, "loss": 0.1947, "step": 18314 }, { "epoch": 0.9085272086909073, "grad_norm": 7.4650559425354, "learning_rate": 2.0917962981234163e-07, "loss": 0.3611, "step": 18315 }, { "epoch": 0.9085768143261075, "grad_norm": 7.5983757972717285, "learning_rate": 2.08954408542717e-07, "loss": 0.3083, "step": 18316 }, { "epoch": 0.9086264199613076, "grad_norm": 12.089534759521484, "learning_rate": 2.087293059973694e-07, "loss": 0.3, "step": 18317 }, { "epoch": 0.9086760255965077, "grad_norm": 5.42537260055542, "learning_rate": 2.0850432218187722e-07, "loss": 0.2426, "step": 18318 }, { "epoch": 0.9087256312317079, "grad_norm": 13.842422485351562, "learning_rate": 2.0827945710181595e-07, "loss": 0.3861, "step": 18319 }, { "epoch": 0.9087752368669081, "grad_norm": 7.112640857696533, "learning_rate": 2.0805471076275619e-07, "loss": 0.2974, "step": 18320 }, { "epoch": 0.9088248425021083, "grad_norm": 5.651798248291016, "learning_rate": 2.07830083170269e-07, "loss": 0.249, "step": 18321 }, { "epoch": 0.9088744481373084, "grad_norm": 5.442843914031982, "learning_rate": 2.076055743299199e-07, "loss": 0.25, "step": 18322 }, { "epoch": 0.9089240537725085, "grad_norm": 7.59346342086792, "learning_rate": 2.0738118424727227e-07, "loss": 0.2926, "step": 18323 }, { "epoch": 0.9089736594077087, "grad_norm": 5.856934547424316, "learning_rate": 2.071569129278872e-07, "loss": 0.357, "step": 18324 }, { "epoch": 0.9090232650429089, "grad_norm": 4.684215068817139, "learning_rate": 2.0693276037732247e-07, "loss": 0.2057, "step": 18325 }, { "epoch": 0.909072870678109, "grad_norm": 6.667625427246094, "learning_rate": 2.0670872660113027e-07, "loss": 0.2528, "step": 18326 }, { "epoch": 0.9091224763133092, "grad_norm": 4.395689964294434, "learning_rate": 2.0648481160486566e-07, "loss": 0.2843, "step": 18327 }, { "epoch": 0.9091720819485094, "grad_norm": 8.131075859069824, "learning_rate": 2.0626101539407527e-07, "loss": 0.2322, "step": 18328 }, { "epoch": 0.9092216875837095, "grad_norm": 5.654765605926514, "learning_rate": 2.0603733797430524e-07, "loss": 0.3132, "step": 18329 }, { "epoch": 0.9092712932189096, "grad_norm": 10.142585754394531, "learning_rate": 2.0581377935109837e-07, "loss": 0.3725, "step": 18330 }, { "epoch": 0.9093208988541098, "grad_norm": 7.265697002410889, "learning_rate": 2.0559033952999517e-07, "loss": 0.213, "step": 18331 }, { "epoch": 0.90937050448931, "grad_norm": 5.5638580322265625, "learning_rate": 2.0536701851653128e-07, "loss": 0.2815, "step": 18332 }, { "epoch": 0.9094201101245102, "grad_norm": 7.442800998687744, "learning_rate": 2.0514381631624282e-07, "loss": 0.3066, "step": 18333 }, { "epoch": 0.9094697157597103, "grad_norm": 6.012727737426758, "learning_rate": 2.0492073293465808e-07, "loss": 0.2354, "step": 18334 }, { "epoch": 0.9095193213949104, "grad_norm": 6.907137870788574, "learning_rate": 2.0469776837730826e-07, "loss": 0.2952, "step": 18335 }, { "epoch": 0.9095689270301106, "grad_norm": 6.108758449554443, "learning_rate": 2.0447492264971558e-07, "loss": 0.2541, "step": 18336 }, { "epoch": 0.9096185326653108, "grad_norm": 6.220860004425049, "learning_rate": 2.0425219575740396e-07, "loss": 0.2247, "step": 18337 }, { "epoch": 0.909668138300511, "grad_norm": 4.928214073181152, "learning_rate": 2.0402958770589232e-07, "loss": 0.2677, "step": 18338 }, { "epoch": 0.9097177439357111, "grad_norm": 4.281429767608643, "learning_rate": 2.038070985006968e-07, "loss": 0.2096, "step": 18339 }, { "epoch": 0.9097673495709112, "grad_norm": 7.299476146697998, "learning_rate": 2.0358472814733077e-07, "loss": 0.2643, "step": 18340 }, { "epoch": 0.9098169552061114, "grad_norm": 5.159688472747803, "learning_rate": 2.0336247665130592e-07, "loss": 0.2892, "step": 18341 }, { "epoch": 0.9098665608413116, "grad_norm": 6.960238933563232, "learning_rate": 2.031403440181279e-07, "loss": 0.1489, "step": 18342 }, { "epoch": 0.9099161664765117, "grad_norm": 5.688459873199463, "learning_rate": 2.0291833025330165e-07, "loss": 0.2996, "step": 18343 }, { "epoch": 0.9099657721117119, "grad_norm": 5.320908069610596, "learning_rate": 2.0269643536232953e-07, "loss": 0.2075, "step": 18344 }, { "epoch": 0.9100153777469121, "grad_norm": 9.726155281066895, "learning_rate": 2.024746593507093e-07, "loss": 0.3638, "step": 18345 }, { "epoch": 0.9100649833821122, "grad_norm": 6.691892623901367, "learning_rate": 2.0225300222393774e-07, "loss": 0.3175, "step": 18346 }, { "epoch": 0.9101145890173123, "grad_norm": 14.149576187133789, "learning_rate": 2.020314639875065e-07, "loss": 0.4627, "step": 18347 }, { "epoch": 0.9101641946525125, "grad_norm": 3.9845337867736816, "learning_rate": 2.0181004464690678e-07, "loss": 0.2198, "step": 18348 }, { "epoch": 0.9102138002877127, "grad_norm": 10.828193664550781, "learning_rate": 2.0158874420762365e-07, "loss": 0.3601, "step": 18349 }, { "epoch": 0.9102634059229129, "grad_norm": 3.0008716583251953, "learning_rate": 2.0136756267514323e-07, "loss": 0.2568, "step": 18350 }, { "epoch": 0.910313011558113, "grad_norm": 7.285285472869873, "learning_rate": 2.0114650005494452e-07, "loss": 0.3227, "step": 18351 }, { "epoch": 0.9103626171933131, "grad_norm": 6.501132488250732, "learning_rate": 2.0092555635250643e-07, "loss": 0.2868, "step": 18352 }, { "epoch": 0.9104122228285133, "grad_norm": 7.433154106140137, "learning_rate": 2.007047315733035e-07, "loss": 0.2645, "step": 18353 }, { "epoch": 0.9104618284637135, "grad_norm": 7.114932060241699, "learning_rate": 2.0048402572280857e-07, "loss": 0.306, "step": 18354 }, { "epoch": 0.9105114340989137, "grad_norm": 12.471193313598633, "learning_rate": 2.0026343880649056e-07, "loss": 0.3655, "step": 18355 }, { "epoch": 0.9105610397341138, "grad_norm": 9.046372413635254, "learning_rate": 2.0004297082981627e-07, "loss": 0.2987, "step": 18356 }, { "epoch": 0.9106106453693139, "grad_norm": 5.506246089935303, "learning_rate": 1.9982262179824684e-07, "loss": 0.2235, "step": 18357 }, { "epoch": 0.9106602510045141, "grad_norm": 4.392031669616699, "learning_rate": 1.9960239171724572e-07, "loss": 0.2152, "step": 18358 }, { "epoch": 0.9107098566397143, "grad_norm": 17.339744567871094, "learning_rate": 1.9938228059226795e-07, "loss": 0.4882, "step": 18359 }, { "epoch": 0.9107594622749144, "grad_norm": 8.052059173583984, "learning_rate": 1.9916228842876917e-07, "loss": 0.2555, "step": 18360 }, { "epoch": 0.9108090679101146, "grad_norm": 9.083187103271484, "learning_rate": 1.989424152322006e-07, "loss": 0.2805, "step": 18361 }, { "epoch": 0.9108586735453148, "grad_norm": 4.043371677398682, "learning_rate": 1.987226610080112e-07, "loss": 0.2151, "step": 18362 }, { "epoch": 0.9109082791805149, "grad_norm": 4.269881248474121, "learning_rate": 1.9850302576164493e-07, "loss": 0.2871, "step": 18363 }, { "epoch": 0.910957884815715, "grad_norm": 5.32421875, "learning_rate": 1.982835094985469e-07, "loss": 0.3006, "step": 18364 }, { "epoch": 0.9110074904509152, "grad_norm": 7.271162509918213, "learning_rate": 1.9806411222415446e-07, "loss": 0.2054, "step": 18365 }, { "epoch": 0.9110570960861154, "grad_norm": 9.544024467468262, "learning_rate": 1.9784483394390597e-07, "loss": 0.3182, "step": 18366 }, { "epoch": 0.9111067017213156, "grad_norm": 5.3302507400512695, "learning_rate": 1.9762567466323435e-07, "loss": 0.2728, "step": 18367 }, { "epoch": 0.9111563073565156, "grad_norm": 5.227100372314453, "learning_rate": 1.9740663438757134e-07, "loss": 0.2929, "step": 18368 }, { "epoch": 0.9112059129917158, "grad_norm": 13.01585865020752, "learning_rate": 1.971877131223443e-07, "loss": 0.2501, "step": 18369 }, { "epoch": 0.911255518626916, "grad_norm": 8.525093078613281, "learning_rate": 1.9696891087297888e-07, "loss": 0.1861, "step": 18370 }, { "epoch": 0.9113051242621162, "grad_norm": 7.375311374664307, "learning_rate": 1.9675022764489517e-07, "loss": 0.3538, "step": 18371 }, { "epoch": 0.9113547298973164, "grad_norm": 6.1732282638549805, "learning_rate": 1.9653166344351436e-07, "loss": 0.2555, "step": 18372 }, { "epoch": 0.9114043355325165, "grad_norm": 5.8537468910217285, "learning_rate": 1.9631321827425166e-07, "loss": 0.2794, "step": 18373 }, { "epoch": 0.9114539411677166, "grad_norm": 4.805635452270508, "learning_rate": 1.9609489214252043e-07, "loss": 0.2124, "step": 18374 }, { "epoch": 0.9115035468029168, "grad_norm": 9.628944396972656, "learning_rate": 1.9587668505373082e-07, "loss": 0.2833, "step": 18375 }, { "epoch": 0.911553152438117, "grad_norm": 4.593740940093994, "learning_rate": 1.9565859701329014e-07, "loss": 0.2656, "step": 18376 }, { "epoch": 0.9116027580733171, "grad_norm": 5.038387298583984, "learning_rate": 1.954406280266019e-07, "loss": 0.2559, "step": 18377 }, { "epoch": 0.9116523637085173, "grad_norm": 3.3491547107696533, "learning_rate": 1.9522277809906955e-07, "loss": 0.1907, "step": 18378 }, { "epoch": 0.9117019693437175, "grad_norm": 4.0179033279418945, "learning_rate": 1.9500504723608926e-07, "loss": 0.2909, "step": 18379 }, { "epoch": 0.9117515749789176, "grad_norm": 15.96994686126709, "learning_rate": 1.947874354430579e-07, "loss": 0.281, "step": 18380 }, { "epoch": 0.9118011806141177, "grad_norm": 7.494857311248779, "learning_rate": 1.9456994272536723e-07, "loss": 0.2876, "step": 18381 }, { "epoch": 0.9118507862493179, "grad_norm": 6.758763790130615, "learning_rate": 1.943525690884074e-07, "loss": 0.3095, "step": 18382 }, { "epoch": 0.9119003918845181, "grad_norm": 7.51524019241333, "learning_rate": 1.9413531453756407e-07, "loss": 0.3028, "step": 18383 }, { "epoch": 0.9119499975197183, "grad_norm": 5.518219947814941, "learning_rate": 1.9391817907822242e-07, "loss": 0.2217, "step": 18384 }, { "epoch": 0.9119996031549183, "grad_norm": 8.411897659301758, "learning_rate": 1.9370116271576145e-07, "loss": 0.3057, "step": 18385 }, { "epoch": 0.9120492087901185, "grad_norm": 4.724970817565918, "learning_rate": 1.9348426545556076e-07, "loss": 0.2647, "step": 18386 }, { "epoch": 0.9120988144253187, "grad_norm": 12.138930320739746, "learning_rate": 1.9326748730299382e-07, "loss": 0.3568, "step": 18387 }, { "epoch": 0.9121484200605189, "grad_norm": 5.687455177307129, "learning_rate": 1.9305082826343247e-07, "loss": 0.2728, "step": 18388 }, { "epoch": 0.9121980256957191, "grad_norm": 4.350741386413574, "learning_rate": 1.9283428834224626e-07, "loss": 0.2409, "step": 18389 }, { "epoch": 0.9122476313309192, "grad_norm": 4.908034324645996, "learning_rate": 1.9261786754480149e-07, "loss": 0.2947, "step": 18390 }, { "epoch": 0.9122972369661193, "grad_norm": 6.67061185836792, "learning_rate": 1.9240156587645942e-07, "loss": 0.258, "step": 18391 }, { "epoch": 0.9123468426013195, "grad_norm": 6.012340068817139, "learning_rate": 1.921853833425824e-07, "loss": 0.2581, "step": 18392 }, { "epoch": 0.9123964482365197, "grad_norm": 4.63582706451416, "learning_rate": 1.9196931994852564e-07, "loss": 0.2059, "step": 18393 }, { "epoch": 0.9124460538717198, "grad_norm": 15.60690689086914, "learning_rate": 1.9175337569964426e-07, "loss": 0.3264, "step": 18394 }, { "epoch": 0.91249565950692, "grad_norm": 6.101459503173828, "learning_rate": 1.9153755060128954e-07, "loss": 0.2463, "step": 18395 }, { "epoch": 0.9125452651421202, "grad_norm": 4.751541614532471, "learning_rate": 1.9132184465880887e-07, "loss": 0.1886, "step": 18396 }, { "epoch": 0.9125948707773203, "grad_norm": 11.022281646728516, "learning_rate": 1.91106257877548e-07, "loss": 0.3209, "step": 18397 }, { "epoch": 0.9126444764125204, "grad_norm": 6.965586185455322, "learning_rate": 1.9089079026284985e-07, "loss": 0.2344, "step": 18398 }, { "epoch": 0.9126940820477206, "grad_norm": 4.9877495765686035, "learning_rate": 1.9067544182005348e-07, "loss": 0.229, "step": 18399 }, { "epoch": 0.9127436876829208, "grad_norm": 6.167849063873291, "learning_rate": 1.9046021255449522e-07, "loss": 0.2598, "step": 18400 }, { "epoch": 0.912793293318121, "grad_norm": 6.815780162811279, "learning_rate": 1.9024510247150795e-07, "loss": 0.2486, "step": 18401 }, { "epoch": 0.912842898953321, "grad_norm": 4.083813667297363, "learning_rate": 1.9003011157642248e-07, "loss": 0.2769, "step": 18402 }, { "epoch": 0.9128925045885212, "grad_norm": 4.699824333190918, "learning_rate": 1.898152398745673e-07, "loss": 0.1863, "step": 18403 }, { "epoch": 0.9129421102237214, "grad_norm": 18.715139389038086, "learning_rate": 1.8960048737126592e-07, "loss": 0.3539, "step": 18404 }, { "epoch": 0.9129917158589216, "grad_norm": 8.069690704345703, "learning_rate": 1.8938585407184073e-07, "loss": 0.2932, "step": 18405 }, { "epoch": 0.9130413214941218, "grad_norm": 11.487157821655273, "learning_rate": 1.8917133998160976e-07, "loss": 0.4359, "step": 18406 }, { "epoch": 0.9130909271293219, "grad_norm": 6.565241813659668, "learning_rate": 1.889569451058898e-07, "loss": 0.3808, "step": 18407 }, { "epoch": 0.913140532764522, "grad_norm": 4.6306891441345215, "learning_rate": 1.8874266944999277e-07, "loss": 0.3078, "step": 18408 }, { "epoch": 0.9131901383997222, "grad_norm": 10.355332374572754, "learning_rate": 1.885285130192288e-07, "loss": 0.3067, "step": 18409 }, { "epoch": 0.9132397440349224, "grad_norm": 3.7436470985412598, "learning_rate": 1.8831447581890428e-07, "loss": 0.167, "step": 18410 }, { "epoch": 0.9132893496701225, "grad_norm": 9.842680931091309, "learning_rate": 1.881005578543238e-07, "loss": 0.2909, "step": 18411 }, { "epoch": 0.9133389553053227, "grad_norm": 5.698759078979492, "learning_rate": 1.8788675913078813e-07, "loss": 0.2497, "step": 18412 }, { "epoch": 0.9133885609405229, "grad_norm": 5.448122978210449, "learning_rate": 1.876730796535964e-07, "loss": 0.2896, "step": 18413 }, { "epoch": 0.913438166575723, "grad_norm": 2.8766846656799316, "learning_rate": 1.8745951942804097e-07, "loss": 0.2221, "step": 18414 }, { "epoch": 0.9134877722109231, "grad_norm": 5.169493198394775, "learning_rate": 1.8724607845941656e-07, "loss": 0.3132, "step": 18415 }, { "epoch": 0.9135373778461233, "grad_norm": 5.33436918258667, "learning_rate": 1.8703275675301114e-07, "loss": 0.2338, "step": 18416 }, { "epoch": 0.9135869834813235, "grad_norm": 6.693160057067871, "learning_rate": 1.8681955431411102e-07, "loss": 0.2471, "step": 18417 }, { "epoch": 0.9136365891165237, "grad_norm": 9.620893478393555, "learning_rate": 1.866064711479998e-07, "loss": 0.2628, "step": 18418 }, { "epoch": 0.9136861947517237, "grad_norm": 12.347075462341309, "learning_rate": 1.863935072599571e-07, "loss": 0.4478, "step": 18419 }, { "epoch": 0.9137358003869239, "grad_norm": 7.67612886428833, "learning_rate": 1.8618066265526147e-07, "loss": 0.2654, "step": 18420 }, { "epoch": 0.9137854060221241, "grad_norm": 9.658245086669922, "learning_rate": 1.859679373391865e-07, "loss": 0.2421, "step": 18421 }, { "epoch": 0.9138350116573243, "grad_norm": 5.5264811515808105, "learning_rate": 1.8575533131700242e-07, "loss": 0.2327, "step": 18422 }, { "epoch": 0.9138846172925245, "grad_norm": 12.184396743774414, "learning_rate": 1.8554284459398053e-07, "loss": 0.3638, "step": 18423 }, { "epoch": 0.9139342229277246, "grad_norm": 4.7698469161987305, "learning_rate": 1.8533047717538388e-07, "loss": 0.1862, "step": 18424 }, { "epoch": 0.9139838285629247, "grad_norm": 8.693987846374512, "learning_rate": 1.8511822906647603e-07, "loss": 0.2609, "step": 18425 }, { "epoch": 0.9140334341981249, "grad_norm": 11.880291938781738, "learning_rate": 1.8490610027251664e-07, "loss": 0.2584, "step": 18426 }, { "epoch": 0.9140830398333251, "grad_norm": 5.917051792144775, "learning_rate": 1.8469409079876267e-07, "loss": 0.3011, "step": 18427 }, { "epoch": 0.9141326454685252, "grad_norm": 10.955424308776855, "learning_rate": 1.8448220065046596e-07, "loss": 0.3518, "step": 18428 }, { "epoch": 0.9141822511037254, "grad_norm": 6.871006965637207, "learning_rate": 1.8427042983288013e-07, "loss": 0.3496, "step": 18429 }, { "epoch": 0.9142318567389256, "grad_norm": 7.147096633911133, "learning_rate": 1.840587783512504e-07, "loss": 0.2867, "step": 18430 }, { "epoch": 0.9142814623741257, "grad_norm": 6.72556734085083, "learning_rate": 1.8384724621082318e-07, "loss": 0.2687, "step": 18431 }, { "epoch": 0.9143310680093258, "grad_norm": 3.6422104835510254, "learning_rate": 1.8363583341683923e-07, "loss": 0.154, "step": 18432 }, { "epoch": 0.914380673644526, "grad_norm": 10.219228744506836, "learning_rate": 1.8342453997453934e-07, "loss": 0.2641, "step": 18433 }, { "epoch": 0.9144302792797262, "grad_norm": 9.216035842895508, "learning_rate": 1.8321336588915607e-07, "loss": 0.3957, "step": 18434 }, { "epoch": 0.9144798849149264, "grad_norm": 12.21204662322998, "learning_rate": 1.8300231116592627e-07, "loss": 0.3894, "step": 18435 }, { "epoch": 0.9145294905501264, "grad_norm": 6.8465256690979, "learning_rate": 1.8279137581007745e-07, "loss": 0.2178, "step": 18436 }, { "epoch": 0.9145790961853266, "grad_norm": 10.502737045288086, "learning_rate": 1.825805598268371e-07, "loss": 0.352, "step": 18437 }, { "epoch": 0.9146287018205268, "grad_norm": 4.522125720977783, "learning_rate": 1.8236986322142991e-07, "loss": 0.2534, "step": 18438 }, { "epoch": 0.914678307455727, "grad_norm": 9.281279563903809, "learning_rate": 1.8215928599907672e-07, "loss": 0.3606, "step": 18439 }, { "epoch": 0.9147279130909272, "grad_norm": 7.811180114746094, "learning_rate": 1.819488281649956e-07, "loss": 0.2185, "step": 18440 }, { "epoch": 0.9147775187261273, "grad_norm": 6.678256511688232, "learning_rate": 1.817384897244029e-07, "loss": 0.2306, "step": 18441 }, { "epoch": 0.9148271243613274, "grad_norm": 9.364923477172852, "learning_rate": 1.8152827068250832e-07, "loss": 0.2927, "step": 18442 }, { "epoch": 0.9148767299965276, "grad_norm": 8.277361869812012, "learning_rate": 1.8131817104452444e-07, "loss": 0.256, "step": 18443 }, { "epoch": 0.9149263356317278, "grad_norm": 9.304494857788086, "learning_rate": 1.8110819081565534e-07, "loss": 0.3354, "step": 18444 }, { "epoch": 0.9149759412669279, "grad_norm": 7.9746317863464355, "learning_rate": 1.8089833000110468e-07, "loss": 0.2702, "step": 18445 }, { "epoch": 0.9150255469021281, "grad_norm": 7.9264607429504395, "learning_rate": 1.8068858860607386e-07, "loss": 0.1803, "step": 18446 }, { "epoch": 0.9150751525373283, "grad_norm": 10.568938255310059, "learning_rate": 1.8047896663575926e-07, "loss": 0.3661, "step": 18447 }, { "epoch": 0.9151247581725284, "grad_norm": 6.183512210845947, "learning_rate": 1.8026946409535672e-07, "loss": 0.2744, "step": 18448 }, { "epoch": 0.9151743638077285, "grad_norm": 4.2170939445495605, "learning_rate": 1.8006008099005712e-07, "loss": 0.2299, "step": 18449 }, { "epoch": 0.9152239694429287, "grad_norm": 5.59635066986084, "learning_rate": 1.798508173250485e-07, "loss": 0.2853, "step": 18450 }, { "epoch": 0.9152735750781289, "grad_norm": 8.01502513885498, "learning_rate": 1.7964167310551671e-07, "loss": 0.3488, "step": 18451 }, { "epoch": 0.9153231807133291, "grad_norm": 6.2587690353393555, "learning_rate": 1.7943264833664542e-07, "loss": 0.2369, "step": 18452 }, { "epoch": 0.9153727863485291, "grad_norm": 4.985372543334961, "learning_rate": 1.7922374302361323e-07, "loss": 0.2416, "step": 18453 }, { "epoch": 0.9154223919837293, "grad_norm": 5.912759304046631, "learning_rate": 1.7901495717159767e-07, "loss": 0.3005, "step": 18454 }, { "epoch": 0.9154719976189295, "grad_norm": 5.275184154510498, "learning_rate": 1.7880629078577183e-07, "loss": 0.2754, "step": 18455 }, { "epoch": 0.9155216032541297, "grad_norm": 4.37817907333374, "learning_rate": 1.7859774387130713e-07, "loss": 0.235, "step": 18456 }, { "epoch": 0.9155712088893299, "grad_norm": 6.949232578277588, "learning_rate": 1.783893164333722e-07, "loss": 0.3544, "step": 18457 }, { "epoch": 0.91562081452453, "grad_norm": 6.028357982635498, "learning_rate": 1.7818100847713017e-07, "loss": 0.303, "step": 18458 }, { "epoch": 0.9156704201597301, "grad_norm": 8.803135871887207, "learning_rate": 1.7797282000774406e-07, "loss": 0.2882, "step": 18459 }, { "epoch": 0.9157200257949303, "grad_norm": 6.739006042480469, "learning_rate": 1.7776475103037262e-07, "loss": 0.3775, "step": 18460 }, { "epoch": 0.9157696314301305, "grad_norm": 9.475200653076172, "learning_rate": 1.7755680155017164e-07, "loss": 0.3409, "step": 18461 }, { "epoch": 0.9158192370653306, "grad_norm": 10.18023681640625, "learning_rate": 1.773489715722948e-07, "loss": 0.3666, "step": 18462 }, { "epoch": 0.9158688427005308, "grad_norm": 8.808488845825195, "learning_rate": 1.771412611018919e-07, "loss": 0.3786, "step": 18463 }, { "epoch": 0.9159184483357309, "grad_norm": 8.096980094909668, "learning_rate": 1.76933670144111e-07, "loss": 0.3512, "step": 18464 }, { "epoch": 0.9159680539709311, "grad_norm": 2.639798164367676, "learning_rate": 1.7672619870409412e-07, "loss": 0.1176, "step": 18465 }, { "epoch": 0.9160176596061312, "grad_norm": 9.197601318359375, "learning_rate": 1.7651884678698495e-07, "loss": 0.3634, "step": 18466 }, { "epoch": 0.9160672652413314, "grad_norm": 5.870378494262695, "learning_rate": 1.7631161439792044e-07, "loss": 0.1969, "step": 18467 }, { "epoch": 0.9161168708765316, "grad_norm": 5.336447238922119, "learning_rate": 1.7610450154203596e-07, "loss": 0.3246, "step": 18468 }, { "epoch": 0.9161664765117318, "grad_norm": 4.6920857429504395, "learning_rate": 1.7589750822446404e-07, "loss": 0.302, "step": 18469 }, { "epoch": 0.9162160821469318, "grad_norm": 3.8462462425231934, "learning_rate": 1.7569063445033452e-07, "loss": 0.1921, "step": 18470 }, { "epoch": 0.916265687782132, "grad_norm": 4.675968647003174, "learning_rate": 1.7548388022477213e-07, "loss": 0.266, "step": 18471 }, { "epoch": 0.9163152934173322, "grad_norm": 5.5357866287231445, "learning_rate": 1.7527724555290282e-07, "loss": 0.1982, "step": 18472 }, { "epoch": 0.9163648990525324, "grad_norm": 12.708091735839844, "learning_rate": 1.7507073043984414e-07, "loss": 0.3023, "step": 18473 }, { "epoch": 0.9164145046877326, "grad_norm": 8.350553512573242, "learning_rate": 1.7486433489071697e-07, "loss": 0.3173, "step": 18474 }, { "epoch": 0.9164641103229327, "grad_norm": 9.370165824890137, "learning_rate": 1.7465805891063392e-07, "loss": 0.3415, "step": 18475 }, { "epoch": 0.9165137159581328, "grad_norm": 11.378523826599121, "learning_rate": 1.7445190250470646e-07, "loss": 0.2548, "step": 18476 }, { "epoch": 0.916563321593333, "grad_norm": 5.929887771606445, "learning_rate": 1.742458656780438e-07, "loss": 0.217, "step": 18477 }, { "epoch": 0.9166129272285332, "grad_norm": 6.396329402923584, "learning_rate": 1.740399484357519e-07, "loss": 0.3343, "step": 18478 }, { "epoch": 0.9166625328637333, "grad_norm": 7.376939296722412, "learning_rate": 1.738341507829322e-07, "loss": 0.2379, "step": 18479 }, { "epoch": 0.9167121384989335, "grad_norm": 6.8546671867370605, "learning_rate": 1.7362847272468676e-07, "loss": 0.1853, "step": 18480 }, { "epoch": 0.9167617441341336, "grad_norm": 10.588852882385254, "learning_rate": 1.734229142661098e-07, "loss": 0.2766, "step": 18481 }, { "epoch": 0.9168113497693338, "grad_norm": 9.157949447631836, "learning_rate": 1.732174754122967e-07, "loss": 0.3928, "step": 18482 }, { "epoch": 0.9168609554045339, "grad_norm": 6.8692708015441895, "learning_rate": 1.730121561683379e-07, "loss": 0.267, "step": 18483 }, { "epoch": 0.9169105610397341, "grad_norm": 4.722973346710205, "learning_rate": 1.72806956539322e-07, "loss": 0.1967, "step": 18484 }, { "epoch": 0.9169601666749343, "grad_norm": 11.871521949768066, "learning_rate": 1.7260187653033222e-07, "loss": 0.2032, "step": 18485 }, { "epoch": 0.9170097723101345, "grad_norm": 5.428323268890381, "learning_rate": 1.7239691614645225e-07, "loss": 0.2798, "step": 18486 }, { "epoch": 0.9170593779453345, "grad_norm": 14.173651695251465, "learning_rate": 1.7219207539276028e-07, "loss": 0.4167, "step": 18487 }, { "epoch": 0.9171089835805347, "grad_norm": 9.659730911254883, "learning_rate": 1.7198735427433277e-07, "loss": 0.351, "step": 18488 }, { "epoch": 0.9171585892157349, "grad_norm": 6.499260425567627, "learning_rate": 1.7178275279624233e-07, "loss": 0.2034, "step": 18489 }, { "epoch": 0.9172081948509351, "grad_norm": 6.928098678588867, "learning_rate": 1.7157827096355938e-07, "loss": 0.3487, "step": 18490 }, { "epoch": 0.9172578004861353, "grad_norm": 6.193599700927734, "learning_rate": 1.7137390878135096e-07, "loss": 0.2123, "step": 18491 }, { "epoch": 0.9173074061213354, "grad_norm": 6.771200656890869, "learning_rate": 1.7116966625468134e-07, "loss": 0.2752, "step": 18492 }, { "epoch": 0.9173570117565355, "grad_norm": 5.97829532623291, "learning_rate": 1.709655433886115e-07, "loss": 0.2206, "step": 18493 }, { "epoch": 0.9174066173917357, "grad_norm": 6.314580917358398, "learning_rate": 1.7076154018820014e-07, "loss": 0.255, "step": 18494 }, { "epoch": 0.9174562230269359, "grad_norm": 5.437861442565918, "learning_rate": 1.7055765665850266e-07, "loss": 0.3163, "step": 18495 }, { "epoch": 0.917505828662136, "grad_norm": 16.460132598876953, "learning_rate": 1.703538928045706e-07, "loss": 0.2437, "step": 18496 }, { "epoch": 0.9175554342973362, "grad_norm": 6.644107818603516, "learning_rate": 1.7015024863145324e-07, "loss": 0.2843, "step": 18497 }, { "epoch": 0.9176050399325363, "grad_norm": 5.40642786026001, "learning_rate": 1.6994672414419877e-07, "loss": 0.1939, "step": 18498 }, { "epoch": 0.9176546455677365, "grad_norm": 5.69719934463501, "learning_rate": 1.6974331934784817e-07, "loss": 0.264, "step": 18499 }, { "epoch": 0.9177042512029366, "grad_norm": 4.744537830352783, "learning_rate": 1.6954003424744404e-07, "loss": 0.2133, "step": 18500 }, { "epoch": 0.9177538568381368, "grad_norm": 7.851705551147461, "learning_rate": 1.6933686884802236e-07, "loss": 0.3123, "step": 18501 }, { "epoch": 0.917803462473337, "grad_norm": 10.597126960754395, "learning_rate": 1.69133823154618e-07, "loss": 0.4831, "step": 18502 }, { "epoch": 0.9178530681085372, "grad_norm": 7.711836814880371, "learning_rate": 1.6893089717226307e-07, "loss": 0.2959, "step": 18503 }, { "epoch": 0.9179026737437372, "grad_norm": 3.7166993618011475, "learning_rate": 1.6872809090598574e-07, "loss": 0.2123, "step": 18504 }, { "epoch": 0.9179522793789374, "grad_norm": 4.913505554199219, "learning_rate": 1.68525404360812e-07, "loss": 0.2001, "step": 18505 }, { "epoch": 0.9180018850141376, "grad_norm": 3.666753053665161, "learning_rate": 1.6832283754176448e-07, "loss": 0.2256, "step": 18506 }, { "epoch": 0.9180514906493378, "grad_norm": 9.536867141723633, "learning_rate": 1.6812039045386197e-07, "loss": 0.3706, "step": 18507 }, { "epoch": 0.918101096284538, "grad_norm": 5.027134418487549, "learning_rate": 1.679180631021221e-07, "loss": 0.2314, "step": 18508 }, { "epoch": 0.9181507019197381, "grad_norm": 5.628653049468994, "learning_rate": 1.6771585549155812e-07, "loss": 0.2563, "step": 18509 }, { "epoch": 0.9182003075549382, "grad_norm": 11.051082611083984, "learning_rate": 1.6751376762718152e-07, "loss": 0.2361, "step": 18510 }, { "epoch": 0.9182499131901384, "grad_norm": 2.916696310043335, "learning_rate": 1.6731179951399944e-07, "loss": 0.1051, "step": 18511 }, { "epoch": 0.9182995188253386, "grad_norm": 5.334299087524414, "learning_rate": 1.6710995115701733e-07, "loss": 0.2581, "step": 18512 }, { "epoch": 0.9183491244605387, "grad_norm": 5.595615386962891, "learning_rate": 1.6690822256123674e-07, "loss": 0.2391, "step": 18513 }, { "epoch": 0.9183987300957389, "grad_norm": 9.650410652160645, "learning_rate": 1.6670661373165643e-07, "loss": 0.4707, "step": 18514 }, { "epoch": 0.918448335730939, "grad_norm": 3.8561620712280273, "learning_rate": 1.665051246732735e-07, "loss": 0.197, "step": 18515 }, { "epoch": 0.9184979413661392, "grad_norm": 4.986390590667725, "learning_rate": 1.6630375539107902e-07, "loss": 0.3379, "step": 18516 }, { "epoch": 0.9185475470013393, "grad_norm": 6.809848785400391, "learning_rate": 1.6610250589006393e-07, "loss": 0.293, "step": 18517 }, { "epoch": 0.9185971526365395, "grad_norm": 12.053729057312012, "learning_rate": 1.6590137617521595e-07, "loss": 0.3423, "step": 18518 }, { "epoch": 0.9186467582717397, "grad_norm": 6.814153671264648, "learning_rate": 1.6570036625151832e-07, "loss": 0.2514, "step": 18519 }, { "epoch": 0.9186963639069399, "grad_norm": 7.437450408935547, "learning_rate": 1.6549947612395202e-07, "loss": 0.2599, "step": 18520 }, { "epoch": 0.9187459695421399, "grad_norm": 4.197768211364746, "learning_rate": 1.6529870579749641e-07, "loss": 0.2485, "step": 18521 }, { "epoch": 0.9187955751773401, "grad_norm": 5.010116100311279, "learning_rate": 1.6509805527712475e-07, "loss": 0.273, "step": 18522 }, { "epoch": 0.9188451808125403, "grad_norm": 4.129500865936279, "learning_rate": 1.648975245678114e-07, "loss": 0.2226, "step": 18523 }, { "epoch": 0.9188947864477405, "grad_norm": 7.663327217102051, "learning_rate": 1.6469711367452402e-07, "loss": 0.3916, "step": 18524 }, { "epoch": 0.9189443920829407, "grad_norm": 4.7976765632629395, "learning_rate": 1.6449682260222977e-07, "loss": 0.2619, "step": 18525 }, { "epoch": 0.9189939977181408, "grad_norm": 8.735367774963379, "learning_rate": 1.642966513558908e-07, "loss": 0.3313, "step": 18526 }, { "epoch": 0.9190436033533409, "grad_norm": 6.247663974761963, "learning_rate": 1.6409659994046867e-07, "loss": 0.3386, "step": 18527 }, { "epoch": 0.9190932089885411, "grad_norm": 8.130156517028809, "learning_rate": 1.6389666836092054e-07, "loss": 0.3074, "step": 18528 }, { "epoch": 0.9191428146237413, "grad_norm": 12.122902870178223, "learning_rate": 1.636968566222008e-07, "loss": 0.2927, "step": 18529 }, { "epoch": 0.9191924202589414, "grad_norm": 4.744195938110352, "learning_rate": 1.634971647292599e-07, "loss": 0.2675, "step": 18530 }, { "epoch": 0.9192420258941416, "grad_norm": 5.523486614227295, "learning_rate": 1.632975926870478e-07, "loss": 0.2281, "step": 18531 }, { "epoch": 0.9192916315293417, "grad_norm": 6.780769348144531, "learning_rate": 1.6309814050050888e-07, "loss": 0.2845, "step": 18532 }, { "epoch": 0.9193412371645419, "grad_norm": 13.294083595275879, "learning_rate": 1.628988081745858e-07, "loss": 0.4869, "step": 18533 }, { "epoch": 0.919390842799742, "grad_norm": 5.021249771118164, "learning_rate": 1.6269959571421856e-07, "loss": 0.2184, "step": 18534 }, { "epoch": 0.9194404484349422, "grad_norm": 5.853742599487305, "learning_rate": 1.6250050312434373e-07, "loss": 0.2161, "step": 18535 }, { "epoch": 0.9194900540701424, "grad_norm": 12.130571365356445, "learning_rate": 1.623015304098935e-07, "loss": 0.3314, "step": 18536 }, { "epoch": 0.9195396597053426, "grad_norm": 5.342949390411377, "learning_rate": 1.621026775758011e-07, "loss": 0.2143, "step": 18537 }, { "epoch": 0.9195892653405426, "grad_norm": 7.3470001220703125, "learning_rate": 1.619039446269921e-07, "loss": 0.1621, "step": 18538 }, { "epoch": 0.9196388709757428, "grad_norm": 4.1503190994262695, "learning_rate": 1.6170533156839197e-07, "loss": 0.1659, "step": 18539 }, { "epoch": 0.919688476610943, "grad_norm": 5.070368766784668, "learning_rate": 1.6150683840492231e-07, "loss": 0.1712, "step": 18540 }, { "epoch": 0.9197380822461432, "grad_norm": 7.9251179695129395, "learning_rate": 1.6130846514150257e-07, "loss": 0.3795, "step": 18541 }, { "epoch": 0.9197876878813434, "grad_norm": 4.876401901245117, "learning_rate": 1.6111021178304653e-07, "loss": 0.2353, "step": 18542 }, { "epoch": 0.9198372935165435, "grad_norm": 6.375889778137207, "learning_rate": 1.609120783344692e-07, "loss": 0.236, "step": 18543 }, { "epoch": 0.9198868991517436, "grad_norm": 8.997884750366211, "learning_rate": 1.6071406480067887e-07, "loss": 0.2682, "step": 18544 }, { "epoch": 0.9199365047869438, "grad_norm": 7.577597141265869, "learning_rate": 1.605161711865838e-07, "loss": 0.3276, "step": 18545 }, { "epoch": 0.919986110422144, "grad_norm": 10.71668815612793, "learning_rate": 1.6031839749708677e-07, "loss": 0.2521, "step": 18546 }, { "epoch": 0.9200357160573441, "grad_norm": 6.109577178955078, "learning_rate": 1.6012074373708887e-07, "loss": 0.2137, "step": 18547 }, { "epoch": 0.9200853216925443, "grad_norm": 8.990116119384766, "learning_rate": 1.599232099114889e-07, "loss": 0.3108, "step": 18548 }, { "epoch": 0.9201349273277444, "grad_norm": 14.681117057800293, "learning_rate": 1.597257960251808e-07, "loss": 0.3433, "step": 18549 }, { "epoch": 0.9201845329629446, "grad_norm": 5.99169397354126, "learning_rate": 1.595285020830567e-07, "loss": 0.2861, "step": 18550 }, { "epoch": 0.9202341385981447, "grad_norm": 6.543168544769287, "learning_rate": 1.5933132809000663e-07, "loss": 0.2204, "step": 18551 }, { "epoch": 0.9202837442333449, "grad_norm": 4.494722843170166, "learning_rate": 1.5913427405091554e-07, "loss": 0.2388, "step": 18552 }, { "epoch": 0.9203333498685451, "grad_norm": 4.030198097229004, "learning_rate": 1.5893733997066672e-07, "loss": 0.2251, "step": 18553 }, { "epoch": 0.9203829555037453, "grad_norm": 6.2255859375, "learning_rate": 1.5874052585414024e-07, "loss": 0.3342, "step": 18554 }, { "epoch": 0.9204325611389453, "grad_norm": 5.301052093505859, "learning_rate": 1.5854383170621434e-07, "loss": 0.286, "step": 18555 }, { "epoch": 0.9204821667741455, "grad_norm": 7.65757942199707, "learning_rate": 1.583472575317613e-07, "loss": 0.321, "step": 18556 }, { "epoch": 0.9205317724093457, "grad_norm": 4.686351299285889, "learning_rate": 1.5815080333565435e-07, "loss": 0.2404, "step": 18557 }, { "epoch": 0.9205813780445459, "grad_norm": 4.878328800201416, "learning_rate": 1.5795446912275968e-07, "loss": 0.257, "step": 18558 }, { "epoch": 0.920630983679746, "grad_norm": 6.597251892089844, "learning_rate": 1.5775825489794394e-07, "loss": 0.2893, "step": 18559 }, { "epoch": 0.9206805893149462, "grad_norm": 4.494462490081787, "learning_rate": 1.5756216066606932e-07, "loss": 0.2877, "step": 18560 }, { "epoch": 0.9207301949501463, "grad_norm": 17.443822860717773, "learning_rate": 1.5736618643199474e-07, "loss": 0.2476, "step": 18561 }, { "epoch": 0.9207798005853465, "grad_norm": 6.7262654304504395, "learning_rate": 1.5717033220057633e-07, "loss": 0.294, "step": 18562 }, { "epoch": 0.9208294062205467, "grad_norm": 15.028164863586426, "learning_rate": 1.569745979766679e-07, "loss": 0.4006, "step": 18563 }, { "epoch": 0.9208790118557468, "grad_norm": 7.103928565979004, "learning_rate": 1.5677898376511956e-07, "loss": 0.1239, "step": 18564 }, { "epoch": 0.920928617490947, "grad_norm": 5.343377113342285, "learning_rate": 1.5658348957077963e-07, "loss": 0.2663, "step": 18565 }, { "epoch": 0.9209782231261471, "grad_norm": 12.391002655029297, "learning_rate": 1.5638811539849086e-07, "loss": 0.4154, "step": 18566 }, { "epoch": 0.9210278287613473, "grad_norm": 5.290737628936768, "learning_rate": 1.561928612530955e-07, "loss": 0.2638, "step": 18567 }, { "epoch": 0.9210774343965474, "grad_norm": 4.865002632141113, "learning_rate": 1.559977271394325e-07, "loss": 0.2897, "step": 18568 }, { "epoch": 0.9211270400317476, "grad_norm": 5.835153579711914, "learning_rate": 1.5580271306233686e-07, "loss": 0.32, "step": 18569 }, { "epoch": 0.9211766456669478, "grad_norm": 7.217074394226074, "learning_rate": 1.5560781902664136e-07, "loss": 0.3431, "step": 18570 }, { "epoch": 0.921226251302148, "grad_norm": 6.253312110900879, "learning_rate": 1.5541304503717548e-07, "loss": 0.3092, "step": 18571 }, { "epoch": 0.921275856937348, "grad_norm": 5.773707389831543, "learning_rate": 1.5521839109876647e-07, "loss": 0.2485, "step": 18572 }, { "epoch": 0.9213254625725482, "grad_norm": 5.374644756317139, "learning_rate": 1.5502385721623548e-07, "loss": 0.285, "step": 18573 }, { "epoch": 0.9213750682077484, "grad_norm": 5.975480556488037, "learning_rate": 1.5482944339440642e-07, "loss": 0.3201, "step": 18574 }, { "epoch": 0.9214246738429486, "grad_norm": 5.559149742126465, "learning_rate": 1.5463514963809546e-07, "loss": 0.1981, "step": 18575 }, { "epoch": 0.9214742794781488, "grad_norm": 14.585868835449219, "learning_rate": 1.5444097595211648e-07, "loss": 0.3576, "step": 18576 }, { "epoch": 0.9215238851133489, "grad_norm": 9.197394371032715, "learning_rate": 1.5424692234128236e-07, "loss": 0.3257, "step": 18577 }, { "epoch": 0.921573490748549, "grad_norm": 6.247350215911865, "learning_rate": 1.54052988810402e-07, "loss": 0.2724, "step": 18578 }, { "epoch": 0.9216230963837492, "grad_norm": 4.228768348693848, "learning_rate": 1.5385917536427986e-07, "loss": 0.2154, "step": 18579 }, { "epoch": 0.9216727020189494, "grad_norm": 10.262880325317383, "learning_rate": 1.536654820077199e-07, "loss": 0.3158, "step": 18580 }, { "epoch": 0.9217223076541495, "grad_norm": 6.324732780456543, "learning_rate": 1.534719087455211e-07, "loss": 0.2316, "step": 18581 }, { "epoch": 0.9217719132893497, "grad_norm": 5.673435688018799, "learning_rate": 1.5327845558248123e-07, "loss": 0.2708, "step": 18582 }, { "epoch": 0.9218215189245498, "grad_norm": 13.315140724182129, "learning_rate": 1.530851225233937e-07, "loss": 0.3961, "step": 18583 }, { "epoch": 0.92187112455975, "grad_norm": 6.072700500488281, "learning_rate": 1.5289190957304855e-07, "loss": 0.3266, "step": 18584 }, { "epoch": 0.9219207301949501, "grad_norm": 6.287940502166748, "learning_rate": 1.5269881673623532e-07, "loss": 0.2606, "step": 18585 }, { "epoch": 0.9219703358301503, "grad_norm": 4.728734970092773, "learning_rate": 1.5250584401773795e-07, "loss": 0.3123, "step": 18586 }, { "epoch": 0.9220199414653505, "grad_norm": 8.010477066040039, "learning_rate": 1.5231299142233813e-07, "loss": 0.2066, "step": 18587 }, { "epoch": 0.9220695471005507, "grad_norm": 6.874679088592529, "learning_rate": 1.5212025895481598e-07, "loss": 0.3078, "step": 18588 }, { "epoch": 0.9221191527357507, "grad_norm": 4.807215213775635, "learning_rate": 1.5192764661994598e-07, "loss": 0.2278, "step": 18589 }, { "epoch": 0.9221687583709509, "grad_norm": 5.885919570922852, "learning_rate": 1.5173515442250208e-07, "loss": 0.2117, "step": 18590 }, { "epoch": 0.9222183640061511, "grad_norm": 9.599017143249512, "learning_rate": 1.5154278236725384e-07, "loss": 0.2611, "step": 18591 }, { "epoch": 0.9222679696413513, "grad_norm": 5.328461647033691, "learning_rate": 1.5135053045896963e-07, "loss": 0.227, "step": 18592 }, { "epoch": 0.9223175752765514, "grad_norm": 7.600118637084961, "learning_rate": 1.5115839870241178e-07, "loss": 0.241, "step": 18593 }, { "epoch": 0.9223671809117516, "grad_norm": 4.724938869476318, "learning_rate": 1.5096638710234257e-07, "loss": 0.3167, "step": 18594 }, { "epoch": 0.9224167865469517, "grad_norm": 9.273456573486328, "learning_rate": 1.507744956635193e-07, "loss": 0.3666, "step": 18595 }, { "epoch": 0.9224663921821519, "grad_norm": 4.988114356994629, "learning_rate": 1.505827243906982e-07, "loss": 0.2483, "step": 18596 }, { "epoch": 0.922515997817352, "grad_norm": 6.130337238311768, "learning_rate": 1.503910732886299e-07, "loss": 0.2437, "step": 18597 }, { "epoch": 0.9225656034525522, "grad_norm": 18.3856143951416, "learning_rate": 1.5019954236206502e-07, "loss": 0.465, "step": 18598 }, { "epoch": 0.9226152090877524, "grad_norm": 5.252509593963623, "learning_rate": 1.5000813161574923e-07, "loss": 0.2046, "step": 18599 }, { "epoch": 0.9226648147229525, "grad_norm": 9.145092964172363, "learning_rate": 1.498168410544265e-07, "loss": 0.2781, "step": 18600 }, { "epoch": 0.9227144203581527, "grad_norm": 5.447176456451416, "learning_rate": 1.4962567068283528e-07, "loss": 0.3535, "step": 18601 }, { "epoch": 0.9227640259933528, "grad_norm": 14.289443969726562, "learning_rate": 1.494346205057151e-07, "loss": 0.3737, "step": 18602 }, { "epoch": 0.922813631628553, "grad_norm": 4.8045973777771, "learning_rate": 1.4924369052779885e-07, "loss": 0.2164, "step": 18603 }, { "epoch": 0.9228632372637532, "grad_norm": 9.621800422668457, "learning_rate": 1.4905288075381773e-07, "loss": 0.2363, "step": 18604 }, { "epoch": 0.9229128428989534, "grad_norm": 5.628194808959961, "learning_rate": 1.4886219118850075e-07, "loss": 0.2435, "step": 18605 }, { "epoch": 0.9229624485341534, "grad_norm": 5.297124862670898, "learning_rate": 1.4867162183657357e-07, "loss": 0.247, "step": 18606 }, { "epoch": 0.9230120541693536, "grad_norm": 5.873312950134277, "learning_rate": 1.4848117270275685e-07, "loss": 0.2645, "step": 18607 }, { "epoch": 0.9230616598045538, "grad_norm": 11.715530395507812, "learning_rate": 1.4829084379177294e-07, "loss": 0.4161, "step": 18608 }, { "epoch": 0.923111265439754, "grad_norm": 6.118992328643799, "learning_rate": 1.4810063510833529e-07, "loss": 0.2113, "step": 18609 }, { "epoch": 0.9231608710749541, "grad_norm": 12.257213592529297, "learning_rate": 1.4791054665715897e-07, "loss": 0.3051, "step": 18610 }, { "epoch": 0.9232104767101543, "grad_norm": 7.430637836456299, "learning_rate": 1.4772057844295417e-07, "loss": 0.2862, "step": 18611 }, { "epoch": 0.9232600823453544, "grad_norm": 8.283063888549805, "learning_rate": 1.4753073047042876e-07, "loss": 0.2025, "step": 18612 }, { "epoch": 0.9233096879805546, "grad_norm": 5.901214122772217, "learning_rate": 1.4734100274428619e-07, "loss": 0.2511, "step": 18613 }, { "epoch": 0.9233592936157548, "grad_norm": 8.257307052612305, "learning_rate": 1.471513952692294e-07, "loss": 0.291, "step": 18614 }, { "epoch": 0.9234088992509549, "grad_norm": 9.27828311920166, "learning_rate": 1.4696190804995514e-07, "loss": 0.2603, "step": 18615 }, { "epoch": 0.9234585048861551, "grad_norm": 5.707587242126465, "learning_rate": 1.4677254109116135e-07, "loss": 0.2761, "step": 18616 }, { "epoch": 0.9235081105213552, "grad_norm": 9.910475730895996, "learning_rate": 1.465832943975387e-07, "loss": 0.3124, "step": 18617 }, { "epoch": 0.9235577161565554, "grad_norm": 4.209449291229248, "learning_rate": 1.4639416797377738e-07, "loss": 0.1724, "step": 18618 }, { "epoch": 0.9236073217917555, "grad_norm": 14.12757396697998, "learning_rate": 1.4620516182456413e-07, "loss": 0.64, "step": 18619 }, { "epoch": 0.9236569274269557, "grad_norm": 7.788273334503174, "learning_rate": 1.4601627595458302e-07, "loss": 0.3142, "step": 18620 }, { "epoch": 0.9237065330621559, "grad_norm": 8.430350303649902, "learning_rate": 1.4582751036851362e-07, "loss": 0.329, "step": 18621 }, { "epoch": 0.9237561386973561, "grad_norm": 9.172758102416992, "learning_rate": 1.4563886507103442e-07, "loss": 0.2815, "step": 18622 }, { "epoch": 0.9238057443325561, "grad_norm": 7.0148444175720215, "learning_rate": 1.4545034006682056e-07, "loss": 0.2834, "step": 18623 }, { "epoch": 0.9238553499677563, "grad_norm": 5.039119243621826, "learning_rate": 1.4526193536054277e-07, "loss": 0.288, "step": 18624 }, { "epoch": 0.9239049556029565, "grad_norm": 6.045688152313232, "learning_rate": 1.4507365095687064e-07, "loss": 0.3375, "step": 18625 }, { "epoch": 0.9239545612381567, "grad_norm": 18.02491569519043, "learning_rate": 1.4488548686046933e-07, "loss": 0.3194, "step": 18626 }, { "epoch": 0.9240041668733568, "grad_norm": 4.860077381134033, "learning_rate": 1.4469744307600176e-07, "loss": 0.3121, "step": 18627 }, { "epoch": 0.924053772508557, "grad_norm": 11.868600845336914, "learning_rate": 1.4450951960812753e-07, "loss": 0.3362, "step": 18628 }, { "epoch": 0.9241033781437571, "grad_norm": 14.541637420654297, "learning_rate": 1.4432171646150462e-07, "loss": 0.2886, "step": 18629 }, { "epoch": 0.9241529837789573, "grad_norm": 8.267288208007812, "learning_rate": 1.4413403364078482e-07, "loss": 0.2273, "step": 18630 }, { "epoch": 0.9242025894141574, "grad_norm": 5.658566951751709, "learning_rate": 1.4394647115062165e-07, "loss": 0.3183, "step": 18631 }, { "epoch": 0.9242521950493576, "grad_norm": 7.144067287445068, "learning_rate": 1.4375902899566085e-07, "loss": 0.1916, "step": 18632 }, { "epoch": 0.9243018006845578, "grad_norm": 7.7070631980896, "learning_rate": 1.4357170718054815e-07, "loss": 0.2392, "step": 18633 }, { "epoch": 0.9243514063197579, "grad_norm": 8.424321174621582, "learning_rate": 1.433845057099248e-07, "loss": 0.2089, "step": 18634 }, { "epoch": 0.924401011954958, "grad_norm": 5.0469651222229, "learning_rate": 1.43197424588431e-07, "loss": 0.2011, "step": 18635 }, { "epoch": 0.9244506175901582, "grad_norm": 4.713942527770996, "learning_rate": 1.4301046382070139e-07, "loss": 0.2037, "step": 18636 }, { "epoch": 0.9245002232253584, "grad_norm": 8.066873550415039, "learning_rate": 1.4282362341137057e-07, "loss": 0.316, "step": 18637 }, { "epoch": 0.9245498288605586, "grad_norm": 4.893163204193115, "learning_rate": 1.4263690336506596e-07, "loss": 0.1903, "step": 18638 }, { "epoch": 0.9245994344957588, "grad_norm": 8.257370948791504, "learning_rate": 1.4245030368641776e-07, "loss": 0.2702, "step": 18639 }, { "epoch": 0.9246490401309588, "grad_norm": 11.734848022460938, "learning_rate": 1.4226382438004728e-07, "loss": 0.3915, "step": 18640 }, { "epoch": 0.924698645766159, "grad_norm": 3.6815848350524902, "learning_rate": 1.4207746545057688e-07, "loss": 0.2438, "step": 18641 }, { "epoch": 0.9247482514013592, "grad_norm": 6.959133625030518, "learning_rate": 1.4189122690262402e-07, "loss": 0.2285, "step": 18642 }, { "epoch": 0.9247978570365594, "grad_norm": 5.109774589538574, "learning_rate": 1.4170510874080502e-07, "loss": 0.2467, "step": 18643 }, { "epoch": 0.9248474626717595, "grad_norm": 4.498076915740967, "learning_rate": 1.4151911096973002e-07, "loss": 0.2749, "step": 18644 }, { "epoch": 0.9248970683069597, "grad_norm": 7.983517169952393, "learning_rate": 1.413332335940104e-07, "loss": 0.2816, "step": 18645 }, { "epoch": 0.9249466739421598, "grad_norm": 5.263754844665527, "learning_rate": 1.4114747661825024e-07, "loss": 0.2378, "step": 18646 }, { "epoch": 0.92499627957736, "grad_norm": 5.482622146606445, "learning_rate": 1.409618400470536e-07, "loss": 0.315, "step": 18647 }, { "epoch": 0.9250458852125601, "grad_norm": 8.062029838562012, "learning_rate": 1.4077632388502015e-07, "loss": 0.3129, "step": 18648 }, { "epoch": 0.9250954908477603, "grad_norm": 5.714928150177002, "learning_rate": 1.4059092813674846e-07, "loss": 0.2117, "step": 18649 }, { "epoch": 0.9251450964829605, "grad_norm": 10.915440559387207, "learning_rate": 1.4040565280683038e-07, "loss": 0.3335, "step": 18650 }, { "epoch": 0.9251947021181606, "grad_norm": 7.322501182556152, "learning_rate": 1.4022049789986003e-07, "loss": 0.2585, "step": 18651 }, { "epoch": 0.9252443077533608, "grad_norm": 6.5533599853515625, "learning_rate": 1.4003546342042261e-07, "loss": 0.2847, "step": 18652 }, { "epoch": 0.9252939133885609, "grad_norm": 7.107389450073242, "learning_rate": 1.398505493731056e-07, "loss": 0.3397, "step": 18653 }, { "epoch": 0.9253435190237611, "grad_norm": 6.071531295776367, "learning_rate": 1.396657557624903e-07, "loss": 0.2905, "step": 18654 }, { "epoch": 0.9253931246589613, "grad_norm": 7.690374374389648, "learning_rate": 1.3948108259315585e-07, "loss": 0.2477, "step": 18655 }, { "epoch": 0.9254427302941615, "grad_norm": 4.789221286773682, "learning_rate": 1.3929652986967913e-07, "loss": 0.2453, "step": 18656 }, { "epoch": 0.9254923359293615, "grad_norm": 7.219784259796143, "learning_rate": 1.391120975966337e-07, "loss": 0.2285, "step": 18657 }, { "epoch": 0.9255419415645617, "grad_norm": 14.098156929016113, "learning_rate": 1.3892778577858813e-07, "loss": 0.3043, "step": 18658 }, { "epoch": 0.9255915471997619, "grad_norm": 8.435665130615234, "learning_rate": 1.387435944201121e-07, "loss": 0.2541, "step": 18659 }, { "epoch": 0.9256411528349621, "grad_norm": 14.45042610168457, "learning_rate": 1.3855952352576808e-07, "loss": 0.3343, "step": 18660 }, { "epoch": 0.9256907584701622, "grad_norm": 7.703645706176758, "learning_rate": 1.3837557310011795e-07, "loss": 0.1769, "step": 18661 }, { "epoch": 0.9257403641053624, "grad_norm": 6.571539878845215, "learning_rate": 1.3819174314772087e-07, "loss": 0.1933, "step": 18662 }, { "epoch": 0.9257899697405625, "grad_norm": 8.411962509155273, "learning_rate": 1.3800803367313154e-07, "loss": 0.3274, "step": 18663 }, { "epoch": 0.9258395753757627, "grad_norm": 5.405137062072754, "learning_rate": 1.3782444468090183e-07, "loss": 0.2656, "step": 18664 }, { "epoch": 0.9258891810109628, "grad_norm": 9.062736511230469, "learning_rate": 1.376409761755826e-07, "loss": 0.2474, "step": 18665 }, { "epoch": 0.925938786646163, "grad_norm": 7.144866943359375, "learning_rate": 1.3745762816171904e-07, "loss": 0.3161, "step": 18666 }, { "epoch": 0.9259883922813632, "grad_norm": 8.371280670166016, "learning_rate": 1.3727440064385477e-07, "loss": 0.3389, "step": 18667 }, { "epoch": 0.9260379979165633, "grad_norm": 6.787683963775635, "learning_rate": 1.3709129362653118e-07, "loss": 0.2046, "step": 18668 }, { "epoch": 0.9260876035517634, "grad_norm": 5.003224849700928, "learning_rate": 1.369083071142846e-07, "loss": 0.3503, "step": 18669 }, { "epoch": 0.9261372091869636, "grad_norm": 5.528216361999512, "learning_rate": 1.3672544111164976e-07, "loss": 0.2579, "step": 18670 }, { "epoch": 0.9261868148221638, "grad_norm": 5.8423871994018555, "learning_rate": 1.3654269562315858e-07, "loss": 0.2102, "step": 18671 }, { "epoch": 0.926236420457364, "grad_norm": 4.727110385894775, "learning_rate": 1.3636007065333967e-07, "loss": 0.2223, "step": 18672 }, { "epoch": 0.9262860260925642, "grad_norm": 12.333800315856934, "learning_rate": 1.361775662067183e-07, "loss": 0.3698, "step": 18673 }, { "epoch": 0.9263356317277642, "grad_norm": 6.545404434204102, "learning_rate": 1.3599518228781694e-07, "loss": 0.2061, "step": 18674 }, { "epoch": 0.9263852373629644, "grad_norm": 8.168961524963379, "learning_rate": 1.3581291890115477e-07, "loss": 0.2799, "step": 18675 }, { "epoch": 0.9264348429981646, "grad_norm": 6.636816501617432, "learning_rate": 1.356307760512493e-07, "loss": 0.1822, "step": 18676 }, { "epoch": 0.9264844486333648, "grad_norm": 7.182290554046631, "learning_rate": 1.3544875374261302e-07, "loss": 0.2563, "step": 18677 }, { "epoch": 0.9265340542685649, "grad_norm": 4.423266887664795, "learning_rate": 1.352668519797573e-07, "loss": 0.2219, "step": 18678 }, { "epoch": 0.9265836599037651, "grad_norm": 5.997987270355225, "learning_rate": 1.3508507076718967e-07, "loss": 0.2333, "step": 18679 }, { "epoch": 0.9266332655389652, "grad_norm": 11.118863105773926, "learning_rate": 1.3490341010941488e-07, "loss": 0.4425, "step": 18680 }, { "epoch": 0.9266828711741654, "grad_norm": 5.270001411437988, "learning_rate": 1.3472187001093372e-07, "loss": 0.1885, "step": 18681 }, { "epoch": 0.9267324768093655, "grad_norm": 4.689113140106201, "learning_rate": 1.3454045047624597e-07, "loss": 0.2088, "step": 18682 }, { "epoch": 0.9267820824445657, "grad_norm": 11.352783203125, "learning_rate": 1.343591515098469e-07, "loss": 0.3614, "step": 18683 }, { "epoch": 0.9268316880797659, "grad_norm": 8.356346130371094, "learning_rate": 1.341779731162285e-07, "loss": 0.275, "step": 18684 }, { "epoch": 0.926881293714966, "grad_norm": 14.023161888122559, "learning_rate": 1.3399691529988102e-07, "loss": 0.4005, "step": 18685 }, { "epoch": 0.9269308993501661, "grad_norm": 12.377615928649902, "learning_rate": 1.3381597806529146e-07, "loss": 0.3445, "step": 18686 }, { "epoch": 0.9269805049853663, "grad_norm": 5.151670932769775, "learning_rate": 1.3363516141694233e-07, "loss": 0.2497, "step": 18687 }, { "epoch": 0.9270301106205665, "grad_norm": 7.133171558380127, "learning_rate": 1.3345446535931616e-07, "loss": 0.2628, "step": 18688 }, { "epoch": 0.9270797162557667, "grad_norm": 10.219067573547363, "learning_rate": 1.3327388989688883e-07, "loss": 0.3461, "step": 18689 }, { "epoch": 0.9271293218909669, "grad_norm": 4.516903877258301, "learning_rate": 1.330934350341373e-07, "loss": 0.2002, "step": 18690 }, { "epoch": 0.9271789275261669, "grad_norm": 8.187594413757324, "learning_rate": 1.3291310077553078e-07, "loss": 0.2669, "step": 18691 }, { "epoch": 0.9272285331613671, "grad_norm": 4.6691508293151855, "learning_rate": 1.3273288712553956e-07, "loss": 0.2246, "step": 18692 }, { "epoch": 0.9272781387965673, "grad_norm": 6.309380531311035, "learning_rate": 1.3255279408862953e-07, "loss": 0.2303, "step": 18693 }, { "epoch": 0.9273277444317675, "grad_norm": 5.501605033874512, "learning_rate": 1.3237282166926323e-07, "loss": 0.248, "step": 18694 }, { "epoch": 0.9273773500669676, "grad_norm": 16.190813064575195, "learning_rate": 1.3219296987189878e-07, "loss": 0.4476, "step": 18695 }, { "epoch": 0.9274269557021678, "grad_norm": 7.348943710327148, "learning_rate": 1.320132387009959e-07, "loss": 0.3214, "step": 18696 }, { "epoch": 0.9274765613373679, "grad_norm": 6.61341667175293, "learning_rate": 1.3183362816100663e-07, "loss": 0.2577, "step": 18697 }, { "epoch": 0.9275261669725681, "grad_norm": 9.421147346496582, "learning_rate": 1.3165413825638241e-07, "loss": 0.3804, "step": 18698 }, { "epoch": 0.9275757726077682, "grad_norm": 8.858757972717285, "learning_rate": 1.3147476899157018e-07, "loss": 0.3401, "step": 18699 }, { "epoch": 0.9276253782429684, "grad_norm": 9.304561614990234, "learning_rate": 1.3129552037101644e-07, "loss": 0.3288, "step": 18700 }, { "epoch": 0.9276749838781686, "grad_norm": 8.916678428649902, "learning_rate": 1.3111639239916095e-07, "loss": 0.2808, "step": 18701 }, { "epoch": 0.9277245895133687, "grad_norm": 5.630863666534424, "learning_rate": 1.3093738508044462e-07, "loss": 0.3372, "step": 18702 }, { "epoch": 0.9277741951485688, "grad_norm": 3.6777217388153076, "learning_rate": 1.3075849841930166e-07, "loss": 0.2171, "step": 18703 }, { "epoch": 0.927823800783769, "grad_norm": 7.038613319396973, "learning_rate": 1.3057973242016575e-07, "loss": 0.3064, "step": 18704 }, { "epoch": 0.9278734064189692, "grad_norm": 8.072315216064453, "learning_rate": 1.3040108708746724e-07, "loss": 0.2386, "step": 18705 }, { "epoch": 0.9279230120541694, "grad_norm": 4.721263885498047, "learning_rate": 1.3022256242563203e-07, "loss": 0.2481, "step": 18706 }, { "epoch": 0.9279726176893696, "grad_norm": 8.568058013916016, "learning_rate": 1.300441584390849e-07, "loss": 0.2693, "step": 18707 }, { "epoch": 0.9280222233245696, "grad_norm": 10.638544082641602, "learning_rate": 1.298658751322468e-07, "loss": 0.3467, "step": 18708 }, { "epoch": 0.9280718289597698, "grad_norm": 10.59304141998291, "learning_rate": 1.2968771250953416e-07, "loss": 0.2017, "step": 18709 }, { "epoch": 0.92812143459497, "grad_norm": 7.737713813781738, "learning_rate": 1.2950967057536457e-07, "loss": 0.3222, "step": 18710 }, { "epoch": 0.9281710402301702, "grad_norm": 7.759925365447998, "learning_rate": 1.2933174933414727e-07, "loss": 0.3041, "step": 18711 }, { "epoch": 0.9282206458653703, "grad_norm": 11.408839225769043, "learning_rate": 1.291539487902932e-07, "loss": 0.3266, "step": 18712 }, { "epoch": 0.9282702515005705, "grad_norm": 8.92357349395752, "learning_rate": 1.2897626894820714e-07, "loss": 0.3063, "step": 18713 }, { "epoch": 0.9283198571357706, "grad_norm": 11.238760948181152, "learning_rate": 1.2879870981229336e-07, "loss": 0.389, "step": 18714 }, { "epoch": 0.9283694627709708, "grad_norm": 9.354105949401855, "learning_rate": 1.2862127138695e-07, "loss": 0.3397, "step": 18715 }, { "epoch": 0.9284190684061709, "grad_norm": 4.137567043304443, "learning_rate": 1.284439536765758e-07, "loss": 0.219, "step": 18716 }, { "epoch": 0.9284686740413711, "grad_norm": 11.748875617980957, "learning_rate": 1.2826675668556389e-07, "loss": 0.2796, "step": 18717 }, { "epoch": 0.9285182796765713, "grad_norm": 6.532464504241943, "learning_rate": 1.2808968041830516e-07, "loss": 0.238, "step": 18718 }, { "epoch": 0.9285678853117714, "grad_norm": 8.854634284973145, "learning_rate": 1.2791272487918782e-07, "loss": 0.2764, "step": 18719 }, { "epoch": 0.9286174909469715, "grad_norm": 10.638751983642578, "learning_rate": 1.277358900725978e-07, "loss": 0.3659, "step": 18720 }, { "epoch": 0.9286670965821717, "grad_norm": 6.2064409255981445, "learning_rate": 1.2755917600291545e-07, "loss": 0.2445, "step": 18721 }, { "epoch": 0.9287167022173719, "grad_norm": 7.210628986358643, "learning_rate": 1.2738258267452176e-07, "loss": 0.3612, "step": 18722 }, { "epoch": 0.9287663078525721, "grad_norm": 15.011055946350098, "learning_rate": 1.2720611009179096e-07, "loss": 0.2645, "step": 18723 }, { "epoch": 0.9288159134877723, "grad_norm": 5.164531707763672, "learning_rate": 1.2702975825909792e-07, "loss": 0.2821, "step": 18724 }, { "epoch": 0.9288655191229723, "grad_norm": 12.077468872070312, "learning_rate": 1.2685352718081079e-07, "loss": 0.4551, "step": 18725 }, { "epoch": 0.9289151247581725, "grad_norm": 14.190797805786133, "learning_rate": 1.266774168612983e-07, "loss": 0.3578, "step": 18726 }, { "epoch": 0.9289647303933727, "grad_norm": 5.473387718200684, "learning_rate": 1.2650142730492364e-07, "loss": 0.225, "step": 18727 }, { "epoch": 0.9290143360285729, "grad_norm": 5.7988128662109375, "learning_rate": 1.2632555851604778e-07, "loss": 0.2354, "step": 18728 }, { "epoch": 0.929063941663773, "grad_norm": 10.207640647888184, "learning_rate": 1.261498104990294e-07, "loss": 0.3579, "step": 18729 }, { "epoch": 0.9291135472989731, "grad_norm": 11.196846008300781, "learning_rate": 1.259741832582234e-07, "loss": 0.3614, "step": 18730 }, { "epoch": 0.9291631529341733, "grad_norm": 9.086179733276367, "learning_rate": 1.2579867679798242e-07, "loss": 0.3402, "step": 18731 }, { "epoch": 0.9292127585693735, "grad_norm": 3.7123286724090576, "learning_rate": 1.256232911226546e-07, "loss": 0.2461, "step": 18732 }, { "epoch": 0.9292623642045736, "grad_norm": 7.822927474975586, "learning_rate": 1.2544802623658648e-07, "loss": 0.3576, "step": 18733 }, { "epoch": 0.9293119698397738, "grad_norm": 6.3321919441223145, "learning_rate": 1.2527288214412126e-07, "loss": 0.2753, "step": 18734 }, { "epoch": 0.929361575474974, "grad_norm": 6.105440139770508, "learning_rate": 1.250978588495988e-07, "loss": 0.2478, "step": 18735 }, { "epoch": 0.9294111811101741, "grad_norm": 5.687012672424316, "learning_rate": 1.2492295635735728e-07, "loss": 0.2568, "step": 18736 }, { "epoch": 0.9294607867453742, "grad_norm": 7.66058874130249, "learning_rate": 1.2474817467172995e-07, "loss": 0.334, "step": 18737 }, { "epoch": 0.9295103923805744, "grad_norm": 3.9843950271606445, "learning_rate": 1.2457351379704773e-07, "loss": 0.1886, "step": 18738 }, { "epoch": 0.9295599980157746, "grad_norm": 8.518585205078125, "learning_rate": 1.2439897373763997e-07, "loss": 0.2938, "step": 18739 }, { "epoch": 0.9296096036509748, "grad_norm": 5.833609580993652, "learning_rate": 1.2422455449783045e-07, "loss": 0.2138, "step": 18740 }, { "epoch": 0.929659209286175, "grad_norm": 9.825175285339355, "learning_rate": 1.2405025608194232e-07, "loss": 0.4085, "step": 18741 }, { "epoch": 0.929708814921375, "grad_norm": 4.895529270172119, "learning_rate": 1.238760784942944e-07, "loss": 0.3096, "step": 18742 }, { "epoch": 0.9297584205565752, "grad_norm": 7.000894546508789, "learning_rate": 1.2370202173920264e-07, "loss": 0.3116, "step": 18743 }, { "epoch": 0.9298080261917754, "grad_norm": 11.957425117492676, "learning_rate": 1.2352808582098087e-07, "loss": 0.3449, "step": 18744 }, { "epoch": 0.9298576318269756, "grad_norm": 7.156240463256836, "learning_rate": 1.2335427074393946e-07, "loss": 0.3081, "step": 18745 }, { "epoch": 0.9299072374621757, "grad_norm": 5.639389514923096, "learning_rate": 1.2318057651238446e-07, "loss": 0.3247, "step": 18746 }, { "epoch": 0.9299568430973758, "grad_norm": 4.1991353034973145, "learning_rate": 1.2300700313062132e-07, "loss": 0.2624, "step": 18747 }, { "epoch": 0.930006448732576, "grad_norm": 5.032643795013428, "learning_rate": 1.2283355060295043e-07, "loss": 0.2774, "step": 18748 }, { "epoch": 0.9300560543677762, "grad_norm": 4.746809959411621, "learning_rate": 1.2266021893367008e-07, "loss": 0.2522, "step": 18749 }, { "epoch": 0.9301056600029763, "grad_norm": 8.108329772949219, "learning_rate": 1.2248700812707626e-07, "loss": 0.3566, "step": 18750 }, { "epoch": 0.9301552656381765, "grad_norm": 5.701767921447754, "learning_rate": 1.2231391818746108e-07, "loss": 0.284, "step": 18751 }, { "epoch": 0.9302048712733767, "grad_norm": 6.334697246551514, "learning_rate": 1.2214094911911222e-07, "loss": 0.263, "step": 18752 }, { "epoch": 0.9302544769085768, "grad_norm": 9.074960708618164, "learning_rate": 1.219681009263185e-07, "loss": 0.3161, "step": 18753 }, { "epoch": 0.930304082543777, "grad_norm": 5.347255229949951, "learning_rate": 1.2179537361336093e-07, "loss": 0.2787, "step": 18754 }, { "epoch": 0.9303536881789771, "grad_norm": 14.546757698059082, "learning_rate": 1.216227671845205e-07, "loss": 0.3289, "step": 18755 }, { "epoch": 0.9304032938141773, "grad_norm": 10.884872436523438, "learning_rate": 1.214502816440749e-07, "loss": 0.3135, "step": 18756 }, { "epoch": 0.9304528994493775, "grad_norm": 5.14539098739624, "learning_rate": 1.212779169962991e-07, "loss": 0.2376, "step": 18757 }, { "epoch": 0.9305025050845777, "grad_norm": 16.243343353271484, "learning_rate": 1.2110567324546184e-07, "loss": 0.3559, "step": 18758 }, { "epoch": 0.9305521107197777, "grad_norm": 6.435212135314941, "learning_rate": 1.2093355039583422e-07, "loss": 0.3022, "step": 18759 }, { "epoch": 0.9306017163549779, "grad_norm": 13.822184562683105, "learning_rate": 1.2076154845167887e-07, "loss": 0.4187, "step": 18760 }, { "epoch": 0.9306513219901781, "grad_norm": 4.149242877960205, "learning_rate": 1.2058966741726074e-07, "loss": 0.2141, "step": 18761 }, { "epoch": 0.9307009276253783, "grad_norm": 9.127786636352539, "learning_rate": 1.2041790729683755e-07, "loss": 0.2801, "step": 18762 }, { "epoch": 0.9307505332605784, "grad_norm": 7.570674419403076, "learning_rate": 1.2024626809466532e-07, "loss": 0.3071, "step": 18763 }, { "epoch": 0.9308001388957785, "grad_norm": 8.459579467773438, "learning_rate": 1.2007474981499844e-07, "loss": 0.3872, "step": 18764 }, { "epoch": 0.9308497445309787, "grad_norm": 12.944421768188477, "learning_rate": 1.1990335246208685e-07, "loss": 0.4535, "step": 18765 }, { "epoch": 0.9308993501661789, "grad_norm": 17.77693748474121, "learning_rate": 1.1973207604017712e-07, "loss": 0.3898, "step": 18766 }, { "epoch": 0.930948955801379, "grad_norm": 5.794972896575928, "learning_rate": 1.1956092055351477e-07, "loss": 0.2441, "step": 18767 }, { "epoch": 0.9309985614365792, "grad_norm": 6.362402439117432, "learning_rate": 1.1938988600634028e-07, "loss": 0.307, "step": 18768 }, { "epoch": 0.9310481670717794, "grad_norm": 6.0629072189331055, "learning_rate": 1.1921897240289192e-07, "loss": 0.2738, "step": 18769 }, { "epoch": 0.9310977727069795, "grad_norm": 4.580608367919922, "learning_rate": 1.1904817974740523e-07, "loss": 0.1774, "step": 18770 }, { "epoch": 0.9311473783421796, "grad_norm": 7.753298282623291, "learning_rate": 1.1887750804411291e-07, "loss": 0.3113, "step": 18771 }, { "epoch": 0.9311969839773798, "grad_norm": 8.037172317504883, "learning_rate": 1.1870695729724325e-07, "loss": 0.1889, "step": 18772 }, { "epoch": 0.93124658961258, "grad_norm": 5.446648120880127, "learning_rate": 1.1853652751102395e-07, "loss": 0.194, "step": 18773 }, { "epoch": 0.9312961952477802, "grad_norm": 7.4421234130859375, "learning_rate": 1.1836621868967724e-07, "loss": 0.2787, "step": 18774 }, { "epoch": 0.9313458008829804, "grad_norm": 6.739739894866943, "learning_rate": 1.181960308374236e-07, "loss": 0.2908, "step": 18775 }, { "epoch": 0.9313954065181804, "grad_norm": 10.036273956298828, "learning_rate": 1.1802596395848076e-07, "loss": 0.3608, "step": 18776 }, { "epoch": 0.9314450121533806, "grad_norm": 7.0636305809021, "learning_rate": 1.1785601805706315e-07, "loss": 0.3127, "step": 18777 }, { "epoch": 0.9314946177885808, "grad_norm": 6.134929656982422, "learning_rate": 1.1768619313738183e-07, "loss": 0.2721, "step": 18778 }, { "epoch": 0.931544223423781, "grad_norm": 4.725320339202881, "learning_rate": 1.1751648920364511e-07, "loss": 0.1869, "step": 18779 }, { "epoch": 0.9315938290589811, "grad_norm": 6.788449287414551, "learning_rate": 1.1734690626005796e-07, "loss": 0.261, "step": 18780 }, { "epoch": 0.9316434346941812, "grad_norm": 7.708705425262451, "learning_rate": 1.1717744431082423e-07, "loss": 0.3283, "step": 18781 }, { "epoch": 0.9316930403293814, "grad_norm": 9.756131172180176, "learning_rate": 1.1700810336014113e-07, "loss": 0.266, "step": 18782 }, { "epoch": 0.9317426459645816, "grad_norm": 6.83258056640625, "learning_rate": 1.1683888341220639e-07, "loss": 0.3571, "step": 18783 }, { "epoch": 0.9317922515997817, "grad_norm": 6.25071907043457, "learning_rate": 1.166697844712128e-07, "loss": 0.2559, "step": 18784 }, { "epoch": 0.9318418572349819, "grad_norm": 9.950784683227539, "learning_rate": 1.1650080654135087e-07, "loss": 0.3706, "step": 18785 }, { "epoch": 0.9318914628701821, "grad_norm": 6.25568151473999, "learning_rate": 1.1633194962680838e-07, "loss": 0.243, "step": 18786 }, { "epoch": 0.9319410685053822, "grad_norm": 7.3531389236450195, "learning_rate": 1.1616321373176919e-07, "loss": 0.3053, "step": 18787 }, { "epoch": 0.9319906741405823, "grad_norm": 12.661711692810059, "learning_rate": 1.1599459886041498e-07, "loss": 0.3063, "step": 18788 }, { "epoch": 0.9320402797757825, "grad_norm": 6.652085781097412, "learning_rate": 1.1582610501692348e-07, "loss": 0.2186, "step": 18789 }, { "epoch": 0.9320898854109827, "grad_norm": 8.196174621582031, "learning_rate": 1.1565773220547083e-07, "loss": 0.3559, "step": 18790 }, { "epoch": 0.9321394910461829, "grad_norm": 7.149220943450928, "learning_rate": 1.1548948043022867e-07, "loss": 0.2998, "step": 18791 }, { "epoch": 0.9321890966813831, "grad_norm": 5.933577537536621, "learning_rate": 1.1532134969536646e-07, "loss": 0.3528, "step": 18792 }, { "epoch": 0.9322387023165831, "grad_norm": 4.480226993560791, "learning_rate": 1.151533400050514e-07, "loss": 0.2132, "step": 18793 }, { "epoch": 0.9322883079517833, "grad_norm": 11.122827529907227, "learning_rate": 1.1498545136344575e-07, "loss": 0.3552, "step": 18794 }, { "epoch": 0.9323379135869835, "grad_norm": 5.6290788650512695, "learning_rate": 1.1481768377471059e-07, "loss": 0.2116, "step": 18795 }, { "epoch": 0.9323875192221837, "grad_norm": 4.064760208129883, "learning_rate": 1.1465003724300316e-07, "loss": 0.2171, "step": 18796 }, { "epoch": 0.9324371248573838, "grad_norm": 7.566169261932373, "learning_rate": 1.1448251177247682e-07, "loss": 0.2296, "step": 18797 }, { "epoch": 0.9324867304925839, "grad_norm": 7.713047027587891, "learning_rate": 1.143151073672849e-07, "loss": 0.3305, "step": 18798 }, { "epoch": 0.9325363361277841, "grad_norm": 11.674880981445312, "learning_rate": 1.1414782403157466e-07, "loss": 0.3149, "step": 18799 }, { "epoch": 0.9325859417629843, "grad_norm": 9.287993431091309, "learning_rate": 1.1398066176949052e-07, "loss": 0.3024, "step": 18800 }, { "epoch": 0.9326355473981844, "grad_norm": 9.355496406555176, "learning_rate": 1.1381362058517642e-07, "loss": 0.3292, "step": 18801 }, { "epoch": 0.9326851530333846, "grad_norm": 11.02487564086914, "learning_rate": 1.1364670048277127e-07, "loss": 0.3312, "step": 18802 }, { "epoch": 0.9327347586685848, "grad_norm": 6.252482891082764, "learning_rate": 1.1347990146641064e-07, "loss": 0.2853, "step": 18803 }, { "epoch": 0.9327843643037849, "grad_norm": 6.16845178604126, "learning_rate": 1.13313223540229e-07, "loss": 0.2554, "step": 18804 }, { "epoch": 0.932833969938985, "grad_norm": 3.8610939979553223, "learning_rate": 1.1314666670835583e-07, "loss": 0.2301, "step": 18805 }, { "epoch": 0.9328835755741852, "grad_norm": 11.75085735321045, "learning_rate": 1.1298023097491894e-07, "loss": 0.4391, "step": 18806 }, { "epoch": 0.9329331812093854, "grad_norm": 6.758932113647461, "learning_rate": 1.1281391634404226e-07, "loss": 0.2278, "step": 18807 }, { "epoch": 0.9329827868445856, "grad_norm": 5.5029401779174805, "learning_rate": 1.1264772281984803e-07, "loss": 0.1559, "step": 18808 }, { "epoch": 0.9330323924797858, "grad_norm": 10.788660049438477, "learning_rate": 1.1248165040645298e-07, "loss": 0.305, "step": 18809 }, { "epoch": 0.9330819981149858, "grad_norm": 8.310738563537598, "learning_rate": 1.1231569910797491e-07, "loss": 0.2375, "step": 18810 }, { "epoch": 0.933131603750186, "grad_norm": 6.576465129852295, "learning_rate": 1.1214986892852386e-07, "loss": 0.3545, "step": 18811 }, { "epoch": 0.9331812093853862, "grad_norm": 4.934376239776611, "learning_rate": 1.1198415987221045e-07, "loss": 0.3048, "step": 18812 }, { "epoch": 0.9332308150205864, "grad_norm": 6.897992134094238, "learning_rate": 1.1181857194314028e-07, "loss": 0.2577, "step": 18813 }, { "epoch": 0.9332804206557865, "grad_norm": 4.447527885437012, "learning_rate": 1.1165310514541728e-07, "loss": 0.1887, "step": 18814 }, { "epoch": 0.9333300262909866, "grad_norm": 6.088572025299072, "learning_rate": 1.114877594831415e-07, "loss": 0.2597, "step": 18815 }, { "epoch": 0.9333796319261868, "grad_norm": 3.953216075897217, "learning_rate": 1.1132253496041078e-07, "loss": 0.2385, "step": 18816 }, { "epoch": 0.933429237561387, "grad_norm": 6.616719722747803, "learning_rate": 1.1115743158131798e-07, "loss": 0.2481, "step": 18817 }, { "epoch": 0.9334788431965871, "grad_norm": 5.454404354095459, "learning_rate": 1.1099244934995646e-07, "loss": 0.2598, "step": 18818 }, { "epoch": 0.9335284488317873, "grad_norm": 5.31696891784668, "learning_rate": 1.1082758827041351e-07, "loss": 0.2299, "step": 18819 }, { "epoch": 0.9335780544669875, "grad_norm": 6.74648380279541, "learning_rate": 1.1066284834677421e-07, "loss": 0.2653, "step": 18820 }, { "epoch": 0.9336276601021876, "grad_norm": 7.503951072692871, "learning_rate": 1.1049822958312084e-07, "loss": 0.3178, "step": 18821 }, { "epoch": 0.9336772657373877, "grad_norm": 7.609936237335205, "learning_rate": 1.1033373198353403e-07, "loss": 0.2572, "step": 18822 }, { "epoch": 0.9337268713725879, "grad_norm": 4.2091851234436035, "learning_rate": 1.101693555520883e-07, "loss": 0.2716, "step": 18823 }, { "epoch": 0.9337764770077881, "grad_norm": 10.845766067504883, "learning_rate": 1.1000510029285871e-07, "loss": 0.2496, "step": 18824 }, { "epoch": 0.9338260826429883, "grad_norm": 4.172605991363525, "learning_rate": 1.0984096620991425e-07, "loss": 0.2649, "step": 18825 }, { "epoch": 0.9338756882781885, "grad_norm": 6.546325206756592, "learning_rate": 1.0967695330732276e-07, "loss": 0.2404, "step": 18826 }, { "epoch": 0.9339252939133885, "grad_norm": 7.662496089935303, "learning_rate": 1.095130615891482e-07, "loss": 0.3278, "step": 18827 }, { "epoch": 0.9339748995485887, "grad_norm": 10.913455963134766, "learning_rate": 1.0934929105945292e-07, "loss": 0.3494, "step": 18828 }, { "epoch": 0.9340245051837889, "grad_norm": 7.065659046173096, "learning_rate": 1.0918564172229362e-07, "loss": 0.2542, "step": 18829 }, { "epoch": 0.9340741108189891, "grad_norm": 3.965846538543701, "learning_rate": 1.090221135817271e-07, "loss": 0.1884, "step": 18830 }, { "epoch": 0.9341237164541892, "grad_norm": 6.805399417877197, "learning_rate": 1.0885870664180453e-07, "loss": 0.2501, "step": 18831 }, { "epoch": 0.9341733220893893, "grad_norm": 6.7843852043151855, "learning_rate": 1.086954209065766e-07, "loss": 0.3, "step": 18832 }, { "epoch": 0.9342229277245895, "grad_norm": 4.0344929695129395, "learning_rate": 1.085322563800878e-07, "loss": 0.2575, "step": 18833 }, { "epoch": 0.9342725333597897, "grad_norm": 4.288553714752197, "learning_rate": 1.083692130663827e-07, "loss": 0.2268, "step": 18834 }, { "epoch": 0.9343221389949898, "grad_norm": 7.408940315246582, "learning_rate": 1.0820629096950086e-07, "loss": 0.3078, "step": 18835 }, { "epoch": 0.93437174463019, "grad_norm": 11.366090774536133, "learning_rate": 1.0804349009348014e-07, "loss": 0.305, "step": 18836 }, { "epoch": 0.9344213502653902, "grad_norm": 9.593732833862305, "learning_rate": 1.078808104423551e-07, "loss": 0.366, "step": 18837 }, { "epoch": 0.9344709559005903, "grad_norm": 7.9203619956970215, "learning_rate": 1.077182520201564e-07, "loss": 0.3009, "step": 18838 }, { "epoch": 0.9345205615357904, "grad_norm": 6.3329925537109375, "learning_rate": 1.0755581483091193e-07, "loss": 0.2362, "step": 18839 }, { "epoch": 0.9345701671709906, "grad_norm": 4.970542907714844, "learning_rate": 1.0739349887864792e-07, "loss": 0.2096, "step": 18840 }, { "epoch": 0.9346197728061908, "grad_norm": 5.283435821533203, "learning_rate": 1.0723130416738559e-07, "loss": 0.2644, "step": 18841 }, { "epoch": 0.934669378441391, "grad_norm": 8.475232124328613, "learning_rate": 1.0706923070114505e-07, "loss": 0.2736, "step": 18842 }, { "epoch": 0.9347189840765912, "grad_norm": 18.59331703186035, "learning_rate": 1.0690727848394256e-07, "loss": 0.4703, "step": 18843 }, { "epoch": 0.9347685897117912, "grad_norm": 5.826147556304932, "learning_rate": 1.06745447519791e-07, "loss": 0.2928, "step": 18844 }, { "epoch": 0.9348181953469914, "grad_norm": 6.942152500152588, "learning_rate": 1.0658373781270104e-07, "loss": 0.2724, "step": 18845 }, { "epoch": 0.9348678009821916, "grad_norm": 10.512146949768066, "learning_rate": 1.0642214936667839e-07, "loss": 0.3632, "step": 18846 }, { "epoch": 0.9349174066173918, "grad_norm": 6.024167537689209, "learning_rate": 1.0626068218572983e-07, "loss": 0.2245, "step": 18847 }, { "epoch": 0.9349670122525919, "grad_norm": 5.850568771362305, "learning_rate": 1.0609933627385494e-07, "loss": 0.2797, "step": 18848 }, { "epoch": 0.935016617887792, "grad_norm": 17.369470596313477, "learning_rate": 1.0593811163505163e-07, "loss": 0.3159, "step": 18849 }, { "epoch": 0.9350662235229922, "grad_norm": 13.147327423095703, "learning_rate": 1.0577700827331617e-07, "loss": 0.3033, "step": 18850 }, { "epoch": 0.9351158291581924, "grad_norm": 5.187484264373779, "learning_rate": 1.0561602619264033e-07, "loss": 0.2009, "step": 18851 }, { "epoch": 0.9351654347933925, "grad_norm": 5.898248195648193, "learning_rate": 1.0545516539701317e-07, "loss": 0.1857, "step": 18852 }, { "epoch": 0.9352150404285927, "grad_norm": 9.131537437438965, "learning_rate": 1.0529442589042149e-07, "loss": 0.309, "step": 18853 }, { "epoch": 0.9352646460637929, "grad_norm": 9.308748245239258, "learning_rate": 1.0513380767684711e-07, "loss": 0.3037, "step": 18854 }, { "epoch": 0.935314251698993, "grad_norm": 10.155562400817871, "learning_rate": 1.0497331076027239e-07, "loss": 0.4614, "step": 18855 }, { "epoch": 0.9353638573341931, "grad_norm": 5.156620979309082, "learning_rate": 1.048129351446725e-07, "loss": 0.2986, "step": 18856 }, { "epoch": 0.9354134629693933, "grad_norm": 6.78565788269043, "learning_rate": 1.0465268083402258e-07, "loss": 0.2317, "step": 18857 }, { "epoch": 0.9354630686045935, "grad_norm": 7.9410929679870605, "learning_rate": 1.044925478322939e-07, "loss": 0.3018, "step": 18858 }, { "epoch": 0.9355126742397937, "grad_norm": 5.404781818389893, "learning_rate": 1.043325361434544e-07, "loss": 0.2711, "step": 18859 }, { "epoch": 0.9355622798749939, "grad_norm": 5.429087162017822, "learning_rate": 1.0417264577146813e-07, "loss": 0.2645, "step": 18860 }, { "epoch": 0.9356118855101939, "grad_norm": 5.505990982055664, "learning_rate": 1.040128767202997e-07, "loss": 0.293, "step": 18861 }, { "epoch": 0.9356614911453941, "grad_norm": 3.633247137069702, "learning_rate": 1.0385322899390649e-07, "loss": 0.2421, "step": 18862 }, { "epoch": 0.9357110967805943, "grad_norm": 10.219600677490234, "learning_rate": 1.0369370259624479e-07, "loss": 0.4031, "step": 18863 }, { "epoch": 0.9357607024157945, "grad_norm": 6.818748474121094, "learning_rate": 1.0353429753126753e-07, "loss": 0.2332, "step": 18864 }, { "epoch": 0.9358103080509946, "grad_norm": 10.174793243408203, "learning_rate": 1.0337501380292603e-07, "loss": 0.3479, "step": 18865 }, { "epoch": 0.9358599136861947, "grad_norm": 3.899838924407959, "learning_rate": 1.0321585141516655e-07, "loss": 0.2355, "step": 18866 }, { "epoch": 0.9359095193213949, "grad_norm": 10.22192668914795, "learning_rate": 1.0305681037193315e-07, "loss": 0.3934, "step": 18867 }, { "epoch": 0.9359591249565951, "grad_norm": 10.353561401367188, "learning_rate": 1.0289789067716716e-07, "loss": 0.3062, "step": 18868 }, { "epoch": 0.9360087305917952, "grad_norm": 4.921163082122803, "learning_rate": 1.0273909233480705e-07, "loss": 0.3048, "step": 18869 }, { "epoch": 0.9360583362269954, "grad_norm": 7.961733341217041, "learning_rate": 1.0258041534878693e-07, "loss": 0.1853, "step": 18870 }, { "epoch": 0.9361079418621956, "grad_norm": 3.1580183506011963, "learning_rate": 1.0242185972303974e-07, "loss": 0.1779, "step": 18871 }, { "epoch": 0.9361575474973957, "grad_norm": 5.215198516845703, "learning_rate": 1.0226342546149404e-07, "loss": 0.2357, "step": 18872 }, { "epoch": 0.9362071531325958, "grad_norm": 6.645596504211426, "learning_rate": 1.0210511256807664e-07, "loss": 0.2096, "step": 18873 }, { "epoch": 0.936256758767796, "grad_norm": 4.299086093902588, "learning_rate": 1.0194692104670889e-07, "loss": 0.208, "step": 18874 }, { "epoch": 0.9363063644029962, "grad_norm": 7.46790885925293, "learning_rate": 1.0178885090131374e-07, "loss": 0.3301, "step": 18875 }, { "epoch": 0.9363559700381964, "grad_norm": 8.787195205688477, "learning_rate": 1.0163090213580585e-07, "loss": 0.2757, "step": 18876 }, { "epoch": 0.9364055756733966, "grad_norm": 8.9862642288208, "learning_rate": 1.0147307475409984e-07, "loss": 0.3101, "step": 18877 }, { "epoch": 0.9364551813085966, "grad_norm": 3.9823050498962402, "learning_rate": 1.013153687601065e-07, "loss": 0.0694, "step": 18878 }, { "epoch": 0.9365047869437968, "grad_norm": 8.433734893798828, "learning_rate": 1.0115778415773492e-07, "loss": 0.3082, "step": 18879 }, { "epoch": 0.936554392578997, "grad_norm": 7.70374059677124, "learning_rate": 1.0100032095088918e-07, "loss": 0.2696, "step": 18880 }, { "epoch": 0.9366039982141972, "grad_norm": 3.832239866256714, "learning_rate": 1.0084297914347174e-07, "loss": 0.2862, "step": 18881 }, { "epoch": 0.9366536038493973, "grad_norm": 7.386139869689941, "learning_rate": 1.0068575873938114e-07, "loss": 0.3055, "step": 18882 }, { "epoch": 0.9367032094845974, "grad_norm": 7.185003280639648, "learning_rate": 1.005286597425137e-07, "loss": 0.3416, "step": 18883 }, { "epoch": 0.9367528151197976, "grad_norm": 11.849298477172852, "learning_rate": 1.003716821567624e-07, "loss": 0.4057, "step": 18884 }, { "epoch": 0.9368024207549978, "grad_norm": 3.623121738433838, "learning_rate": 1.0021482598601695e-07, "loss": 0.1551, "step": 18885 }, { "epoch": 0.9368520263901979, "grad_norm": 6.511542320251465, "learning_rate": 1.0005809123416477e-07, "loss": 0.2835, "step": 18886 }, { "epoch": 0.9369016320253981, "grad_norm": 6.897406578063965, "learning_rate": 9.990147790508997e-08, "loss": 0.3831, "step": 18887 }, { "epoch": 0.9369512376605983, "grad_norm": 8.874802589416504, "learning_rate": 9.974498600267224e-08, "loss": 0.3762, "step": 18888 }, { "epoch": 0.9370008432957984, "grad_norm": 14.175857543945312, "learning_rate": 9.95886155307907e-08, "loss": 0.4494, "step": 18889 }, { "epoch": 0.9370504489309985, "grad_norm": 9.842391014099121, "learning_rate": 9.943236649332e-08, "loss": 0.3205, "step": 18890 }, { "epoch": 0.9371000545661987, "grad_norm": 4.442887306213379, "learning_rate": 9.927623889413207e-08, "loss": 0.2279, "step": 18891 }, { "epoch": 0.9371496602013989, "grad_norm": 7.428386211395264, "learning_rate": 9.912023273709604e-08, "loss": 0.2498, "step": 18892 }, { "epoch": 0.9371992658365991, "grad_norm": 7.148815155029297, "learning_rate": 9.896434802607713e-08, "loss": 0.2476, "step": 18893 }, { "epoch": 0.9372488714717992, "grad_norm": 5.171117305755615, "learning_rate": 9.880858476493893e-08, "loss": 0.2796, "step": 18894 }, { "epoch": 0.9372984771069993, "grad_norm": 5.167929172515869, "learning_rate": 9.865294295754058e-08, "loss": 0.232, "step": 18895 }, { "epoch": 0.9373480827421995, "grad_norm": 10.804348945617676, "learning_rate": 9.849742260774009e-08, "loss": 0.2991, "step": 18896 }, { "epoch": 0.9373976883773997, "grad_norm": 9.739679336547852, "learning_rate": 9.83420237193905e-08, "loss": 0.3697, "step": 18897 }, { "epoch": 0.9374472940125999, "grad_norm": 4.396762371063232, "learning_rate": 9.818674629634261e-08, "loss": 0.222, "step": 18898 }, { "epoch": 0.9374968996478, "grad_norm": 4.804443836212158, "learning_rate": 9.803159034244447e-08, "loss": 0.2801, "step": 18899 }, { "epoch": 0.9375465052830001, "grad_norm": 10.248363494873047, "learning_rate": 9.787655586154132e-08, "loss": 0.3091, "step": 18900 }, { "epoch": 0.9375961109182003, "grad_norm": 8.313486099243164, "learning_rate": 9.772164285747398e-08, "loss": 0.3403, "step": 18901 }, { "epoch": 0.9376457165534005, "grad_norm": 6.847110748291016, "learning_rate": 9.756685133408217e-08, "loss": 0.2898, "step": 18902 }, { "epoch": 0.9376953221886006, "grad_norm": 7.856015682220459, "learning_rate": 9.741218129520114e-08, "loss": 0.2622, "step": 18903 }, { "epoch": 0.9377449278238008, "grad_norm": 9.752381324768066, "learning_rate": 9.725763274466449e-08, "loss": 0.3644, "step": 18904 }, { "epoch": 0.937794533459001, "grad_norm": 9.987922668457031, "learning_rate": 9.710320568630083e-08, "loss": 0.3564, "step": 18905 }, { "epoch": 0.9378441390942011, "grad_norm": 7.4306321144104, "learning_rate": 9.69489001239382e-08, "loss": 0.2857, "step": 18906 }, { "epoch": 0.9378937447294012, "grad_norm": 16.305238723754883, "learning_rate": 9.679471606139967e-08, "loss": 0.2855, "step": 18907 }, { "epoch": 0.9379433503646014, "grad_norm": 7.173310279846191, "learning_rate": 9.664065350250551e-08, "loss": 0.3536, "step": 18908 }, { "epoch": 0.9379929559998016, "grad_norm": 5.675821781158447, "learning_rate": 9.648671245107433e-08, "loss": 0.1978, "step": 18909 }, { "epoch": 0.9380425616350018, "grad_norm": 5.31739616394043, "learning_rate": 9.633289291092086e-08, "loss": 0.2793, "step": 18910 }, { "epoch": 0.938092167270202, "grad_norm": 4.895803451538086, "learning_rate": 9.617919488585536e-08, "loss": 0.2624, "step": 18911 }, { "epoch": 0.938141772905402, "grad_norm": 6.229432582855225, "learning_rate": 9.60256183796887e-08, "loss": 0.3297, "step": 18912 }, { "epoch": 0.9381913785406022, "grad_norm": 14.82739543914795, "learning_rate": 9.587216339622507e-08, "loss": 0.377, "step": 18913 }, { "epoch": 0.9382409841758024, "grad_norm": 4.6005659103393555, "learning_rate": 9.571882993926752e-08, "loss": 0.2689, "step": 18914 }, { "epoch": 0.9382905898110026, "grad_norm": 6.157167911529541, "learning_rate": 9.556561801261576e-08, "loss": 0.3076, "step": 18915 }, { "epoch": 0.9383401954462027, "grad_norm": 6.370330810546875, "learning_rate": 9.541252762006736e-08, "loss": 0.3099, "step": 18916 }, { "epoch": 0.9383898010814028, "grad_norm": 6.751593589782715, "learning_rate": 9.525955876541426e-08, "loss": 0.3255, "step": 18917 }, { "epoch": 0.938439406716603, "grad_norm": 5.384758949279785, "learning_rate": 9.510671145244844e-08, "loss": 0.301, "step": 18918 }, { "epoch": 0.9384890123518032, "grad_norm": 6.297911643981934, "learning_rate": 9.495398568495628e-08, "loss": 0.3088, "step": 18919 }, { "epoch": 0.9385386179870033, "grad_norm": 4.784641265869141, "learning_rate": 9.480138146672369e-08, "loss": 0.2966, "step": 18920 }, { "epoch": 0.9385882236222035, "grad_norm": 7.174399375915527, "learning_rate": 9.464889880153149e-08, "loss": 0.3016, "step": 18921 }, { "epoch": 0.9386378292574037, "grad_norm": 7.7914862632751465, "learning_rate": 9.449653769315892e-08, "loss": 0.2867, "step": 18922 }, { "epoch": 0.9386874348926038, "grad_norm": 5.150630950927734, "learning_rate": 9.434429814538071e-08, "loss": 0.2934, "step": 18923 }, { "epoch": 0.9387370405278039, "grad_norm": 6.208982944488525, "learning_rate": 9.419218016197052e-08, "loss": 0.2543, "step": 18924 }, { "epoch": 0.9387866461630041, "grad_norm": 8.693782806396484, "learning_rate": 9.404018374669643e-08, "loss": 0.3096, "step": 18925 }, { "epoch": 0.9388362517982043, "grad_norm": 6.1119256019592285, "learning_rate": 9.388830890332656e-08, "loss": 0.3235, "step": 18926 }, { "epoch": 0.9388858574334045, "grad_norm": 5.083118438720703, "learning_rate": 9.37365556356229e-08, "loss": 0.2664, "step": 18927 }, { "epoch": 0.9389354630686046, "grad_norm": 7.387030124664307, "learning_rate": 9.358492394734742e-08, "loss": 0.3674, "step": 18928 }, { "epoch": 0.9389850687038047, "grad_norm": 9.938764572143555, "learning_rate": 9.343341384225657e-08, "loss": 0.3035, "step": 18929 }, { "epoch": 0.9390346743390049, "grad_norm": 5.262739658355713, "learning_rate": 9.328202532410568e-08, "loss": 0.2248, "step": 18930 }, { "epoch": 0.9390842799742051, "grad_norm": 4.473442554473877, "learning_rate": 9.313075839664454e-08, "loss": 0.2248, "step": 18931 }, { "epoch": 0.9391338856094053, "grad_norm": 4.093027591705322, "learning_rate": 9.297961306362402e-08, "loss": 0.2442, "step": 18932 }, { "epoch": 0.9391834912446054, "grad_norm": 5.273280143737793, "learning_rate": 9.28285893287878e-08, "loss": 0.244, "step": 18933 }, { "epoch": 0.9392330968798055, "grad_norm": 10.987075805664062, "learning_rate": 9.2677687195879e-08, "loss": 0.2945, "step": 18934 }, { "epoch": 0.9392827025150057, "grad_norm": 6.538907527923584, "learning_rate": 9.25269066686374e-08, "loss": 0.1667, "step": 18935 }, { "epoch": 0.9393323081502059, "grad_norm": 14.729281425476074, "learning_rate": 9.23762477507989e-08, "loss": 0.3336, "step": 18936 }, { "epoch": 0.939381913785406, "grad_norm": 12.753551483154297, "learning_rate": 9.222571044609607e-08, "loss": 0.4527, "step": 18937 }, { "epoch": 0.9394315194206062, "grad_norm": 4.7777509689331055, "learning_rate": 9.207529475826093e-08, "loss": 0.2211, "step": 18938 }, { "epoch": 0.9394811250558064, "grad_norm": 12.203583717346191, "learning_rate": 9.192500069101939e-08, "loss": 0.3476, "step": 18939 }, { "epoch": 0.9395307306910065, "grad_norm": 6.201378345489502, "learning_rate": 9.177482824809736e-08, "loss": 0.2199, "step": 18940 }, { "epoch": 0.9395803363262066, "grad_norm": 8.755101203918457, "learning_rate": 9.162477743321463e-08, "loss": 0.2389, "step": 18941 }, { "epoch": 0.9396299419614068, "grad_norm": 9.14950942993164, "learning_rate": 9.147484825009046e-08, "loss": 0.3287, "step": 18942 }, { "epoch": 0.939679547596607, "grad_norm": 8.02890396118164, "learning_rate": 9.132504070243964e-08, "loss": 0.3392, "step": 18943 }, { "epoch": 0.9397291532318072, "grad_norm": 7.551154613494873, "learning_rate": 9.117535479397533e-08, "loss": 0.292, "step": 18944 }, { "epoch": 0.9397787588670073, "grad_norm": 5.517758369445801, "learning_rate": 9.102579052840565e-08, "loss": 0.2697, "step": 18945 }, { "epoch": 0.9398283645022074, "grad_norm": 6.9740166664123535, "learning_rate": 9.087634790943822e-08, "loss": 0.3069, "step": 18946 }, { "epoch": 0.9398779701374076, "grad_norm": 6.219710350036621, "learning_rate": 9.072702694077507e-08, "loss": 0.3014, "step": 18947 }, { "epoch": 0.9399275757726078, "grad_norm": 8.927661895751953, "learning_rate": 9.057782762611655e-08, "loss": 0.319, "step": 18948 }, { "epoch": 0.939977181407808, "grad_norm": 7.238189697265625, "learning_rate": 9.042874996916084e-08, "loss": 0.2715, "step": 18949 }, { "epoch": 0.9400267870430081, "grad_norm": 7.26317834854126, "learning_rate": 9.02797939736011e-08, "loss": 0.2455, "step": 18950 }, { "epoch": 0.9400763926782082, "grad_norm": 7.461441993713379, "learning_rate": 9.013095964312935e-08, "loss": 0.2801, "step": 18951 }, { "epoch": 0.9401259983134084, "grad_norm": 7.9193267822265625, "learning_rate": 8.998224698143265e-08, "loss": 0.299, "step": 18952 }, { "epoch": 0.9401756039486086, "grad_norm": 4.684781551361084, "learning_rate": 8.983365599219806e-08, "loss": 0.2823, "step": 18953 }, { "epoch": 0.9402252095838087, "grad_norm": 5.873636722564697, "learning_rate": 8.968518667910542e-08, "loss": 0.2071, "step": 18954 }, { "epoch": 0.9402748152190089, "grad_norm": 5.4300031661987305, "learning_rate": 8.953683904583621e-08, "loss": 0.3006, "step": 18955 }, { "epoch": 0.9403244208542091, "grad_norm": 6.685856819152832, "learning_rate": 8.938861309606473e-08, "loss": 0.1868, "step": 18956 }, { "epoch": 0.9403740264894092, "grad_norm": 7.111665725708008, "learning_rate": 8.924050883346468e-08, "loss": 0.2705, "step": 18957 }, { "epoch": 0.9404236321246093, "grad_norm": 14.542240142822266, "learning_rate": 8.909252626170595e-08, "loss": 0.3863, "step": 18958 }, { "epoch": 0.9404732377598095, "grad_norm": 13.484643936157227, "learning_rate": 8.894466538445611e-08, "loss": 0.3627, "step": 18959 }, { "epoch": 0.9405228433950097, "grad_norm": 9.40341567993164, "learning_rate": 8.879692620537895e-08, "loss": 0.3402, "step": 18960 }, { "epoch": 0.9405724490302099, "grad_norm": 6.770848751068115, "learning_rate": 8.864930872813593e-08, "loss": 0.4684, "step": 18961 }, { "epoch": 0.94062205466541, "grad_norm": 16.05809783935547, "learning_rate": 8.850181295638416e-08, "loss": 0.3774, "step": 18962 }, { "epoch": 0.9406716603006101, "grad_norm": 5.016775131225586, "learning_rate": 8.83544388937796e-08, "loss": 0.2485, "step": 18963 }, { "epoch": 0.9407212659358103, "grad_norm": 6.801857948303223, "learning_rate": 8.820718654397376e-08, "loss": 0.253, "step": 18964 }, { "epoch": 0.9407708715710105, "grad_norm": 5.016450881958008, "learning_rate": 8.806005591061539e-08, "loss": 0.2522, "step": 18965 }, { "epoch": 0.9408204772062106, "grad_norm": 6.687038898468018, "learning_rate": 8.791304699735104e-08, "loss": 0.3179, "step": 18966 }, { "epoch": 0.9408700828414108, "grad_norm": 4.615012168884277, "learning_rate": 8.776615980782333e-08, "loss": 0.1721, "step": 18967 }, { "epoch": 0.9409196884766109, "grad_norm": 6.310171604156494, "learning_rate": 8.761939434567213e-08, "loss": 0.2165, "step": 18968 }, { "epoch": 0.9409692941118111, "grad_norm": 5.5075883865356445, "learning_rate": 8.747275061453452e-08, "loss": 0.25, "step": 18969 }, { "epoch": 0.9410188997470113, "grad_norm": 8.875746726989746, "learning_rate": 8.732622861804429e-08, "loss": 0.3051, "step": 18970 }, { "epoch": 0.9410685053822114, "grad_norm": 4.942371845245361, "learning_rate": 8.717982835983241e-08, "loss": 0.3363, "step": 18971 }, { "epoch": 0.9411181110174116, "grad_norm": 5.364959239959717, "learning_rate": 8.703354984352652e-08, "loss": 0.2095, "step": 18972 }, { "epoch": 0.9411677166526118, "grad_norm": 9.057890892028809, "learning_rate": 8.688739307275151e-08, "loss": 0.3727, "step": 18973 }, { "epoch": 0.9412173222878119, "grad_norm": 5.8002495765686035, "learning_rate": 8.674135805112949e-08, "loss": 0.2178, "step": 18974 }, { "epoch": 0.941266927923012, "grad_norm": 6.773036956787109, "learning_rate": 8.659544478227976e-08, "loss": 0.2805, "step": 18975 }, { "epoch": 0.9413165335582122, "grad_norm": 16.647798538208008, "learning_rate": 8.644965326981613e-08, "loss": 0.4495, "step": 18976 }, { "epoch": 0.9413661391934124, "grad_norm": 4.077781677246094, "learning_rate": 8.630398351735403e-08, "loss": 0.1492, "step": 18977 }, { "epoch": 0.9414157448286126, "grad_norm": 5.1696977615356445, "learning_rate": 8.615843552850167e-08, "loss": 0.2138, "step": 18978 }, { "epoch": 0.9414653504638127, "grad_norm": 5.13774299621582, "learning_rate": 8.601300930686562e-08, "loss": 0.206, "step": 18979 }, { "epoch": 0.9415149560990128, "grad_norm": 9.2883939743042, "learning_rate": 8.586770485605023e-08, "loss": 0.3517, "step": 18980 }, { "epoch": 0.941564561734213, "grad_norm": 6.229229927062988, "learning_rate": 8.572252217965593e-08, "loss": 0.2523, "step": 18981 }, { "epoch": 0.9416141673694132, "grad_norm": 8.842266082763672, "learning_rate": 8.557746128128042e-08, "loss": 0.2513, "step": 18982 }, { "epoch": 0.9416637730046133, "grad_norm": 11.33237075805664, "learning_rate": 8.543252216451914e-08, "loss": 0.3527, "step": 18983 }, { "epoch": 0.9417133786398135, "grad_norm": 5.626935005187988, "learning_rate": 8.528770483296201e-08, "loss": 0.266, "step": 18984 }, { "epoch": 0.9417629842750136, "grad_norm": 10.065045356750488, "learning_rate": 8.514300929019947e-08, "loss": 0.3769, "step": 18985 }, { "epoch": 0.9418125899102138, "grad_norm": 9.977861404418945, "learning_rate": 8.49984355398159e-08, "loss": 0.3242, "step": 18986 }, { "epoch": 0.941862195545414, "grad_norm": 11.165189743041992, "learning_rate": 8.485398358539509e-08, "loss": 0.3702, "step": 18987 }, { "epoch": 0.9419118011806141, "grad_norm": 10.262898445129395, "learning_rate": 8.470965343051529e-08, "loss": 0.3049, "step": 18988 }, { "epoch": 0.9419614068158143, "grad_norm": 20.496604919433594, "learning_rate": 8.456544507875419e-08, "loss": 0.4857, "step": 18989 }, { "epoch": 0.9420110124510145, "grad_norm": 6.778773784637451, "learning_rate": 8.442135853368393e-08, "loss": 0.2176, "step": 18990 }, { "epoch": 0.9420606180862146, "grad_norm": 4.948479652404785, "learning_rate": 8.427739379887722e-08, "loss": 0.1686, "step": 18991 }, { "epoch": 0.9421102237214147, "grad_norm": 4.7400994300842285, "learning_rate": 8.413355087789954e-08, "loss": 0.2235, "step": 18992 }, { "epoch": 0.9421598293566149, "grad_norm": 7.90736198425293, "learning_rate": 8.398982977431636e-08, "loss": 0.3324, "step": 18993 }, { "epoch": 0.9422094349918151, "grad_norm": 6.506944179534912, "learning_rate": 8.384623049168871e-08, "loss": 0.2273, "step": 18994 }, { "epoch": 0.9422590406270153, "grad_norm": 7.906266212463379, "learning_rate": 8.3702753033576e-08, "loss": 0.3629, "step": 18995 }, { "epoch": 0.9423086462622153, "grad_norm": 4.0424957275390625, "learning_rate": 8.355939740353203e-08, "loss": 0.1607, "step": 18996 }, { "epoch": 0.9423582518974155, "grad_norm": 5.171513557434082, "learning_rate": 8.341616360511062e-08, "loss": 0.2622, "step": 18997 }, { "epoch": 0.9424078575326157, "grad_norm": 13.697964668273926, "learning_rate": 8.327305164186061e-08, "loss": 0.4913, "step": 18998 }, { "epoch": 0.9424574631678159, "grad_norm": 7.760273456573486, "learning_rate": 8.31300615173286e-08, "loss": 0.259, "step": 18999 }, { "epoch": 0.942507068803016, "grad_norm": 6.3715314865112305, "learning_rate": 8.298719323505789e-08, "loss": 0.2564, "step": 19000 }, { "epoch": 0.9425566744382162, "grad_norm": 6.748265743255615, "learning_rate": 8.284444679858838e-08, "loss": 0.2069, "step": 19001 }, { "epoch": 0.9426062800734163, "grad_norm": 10.752931594848633, "learning_rate": 8.270182221145784e-08, "loss": 0.4928, "step": 19002 }, { "epoch": 0.9426558857086165, "grad_norm": 12.368282318115234, "learning_rate": 8.255931947720008e-08, "loss": 0.442, "step": 19003 }, { "epoch": 0.9427054913438166, "grad_norm": 4.377119064331055, "learning_rate": 8.241693859934785e-08, "loss": 0.2379, "step": 19004 }, { "epoch": 0.9427550969790168, "grad_norm": 6.324795246124268, "learning_rate": 8.227467958142777e-08, "loss": 0.319, "step": 19005 }, { "epoch": 0.942804702614217, "grad_norm": 4.592115879058838, "learning_rate": 8.213254242696533e-08, "loss": 0.2098, "step": 19006 }, { "epoch": 0.9428543082494172, "grad_norm": 8.223610877990723, "learning_rate": 8.19905271394833e-08, "loss": 0.3086, "step": 19007 }, { "epoch": 0.9429039138846173, "grad_norm": 6.346108913421631, "learning_rate": 8.184863372250106e-08, "loss": 0.2983, "step": 19008 }, { "epoch": 0.9429535195198174, "grad_norm": 10.320975303649902, "learning_rate": 8.17068621795336e-08, "loss": 0.3271, "step": 19009 }, { "epoch": 0.9430031251550176, "grad_norm": 9.028883934020996, "learning_rate": 8.156521251409588e-08, "loss": 0.3225, "step": 19010 }, { "epoch": 0.9430527307902178, "grad_norm": 7.209604740142822, "learning_rate": 8.142368472969619e-08, "loss": 0.2862, "step": 19011 }, { "epoch": 0.943102336425418, "grad_norm": 5.714145660400391, "learning_rate": 8.128227882984341e-08, "loss": 0.2691, "step": 19012 }, { "epoch": 0.943151942060618, "grad_norm": 9.830124855041504, "learning_rate": 8.114099481803971e-08, "loss": 0.3931, "step": 19013 }, { "epoch": 0.9432015476958182, "grad_norm": 4.960953712463379, "learning_rate": 8.099983269778788e-08, "loss": 0.3106, "step": 19014 }, { "epoch": 0.9432511533310184, "grad_norm": 24.533300399780273, "learning_rate": 8.08587924725851e-08, "loss": 0.3955, "step": 19015 }, { "epoch": 0.9433007589662186, "grad_norm": 6.473985195159912, "learning_rate": 8.071787414592636e-08, "loss": 0.3191, "step": 19016 }, { "epoch": 0.9433503646014187, "grad_norm": 8.002817153930664, "learning_rate": 8.057707772130441e-08, "loss": 0.2799, "step": 19017 }, { "epoch": 0.9433999702366189, "grad_norm": 8.09632396697998, "learning_rate": 8.043640320220758e-08, "loss": 0.3695, "step": 19018 }, { "epoch": 0.943449575871819, "grad_norm": 14.674884796142578, "learning_rate": 8.029585059212142e-08, "loss": 0.3438, "step": 19019 }, { "epoch": 0.9434991815070192, "grad_norm": 7.396935939788818, "learning_rate": 8.01554198945309e-08, "loss": 0.2746, "step": 19020 }, { "epoch": 0.9435487871422193, "grad_norm": 5.948664665222168, "learning_rate": 8.001511111291327e-08, "loss": 0.2967, "step": 19021 }, { "epoch": 0.9435983927774195, "grad_norm": 5.830400466918945, "learning_rate": 7.987492425074738e-08, "loss": 0.2204, "step": 19022 }, { "epoch": 0.9436479984126197, "grad_norm": 6.77229118347168, "learning_rate": 7.973485931150604e-08, "loss": 0.3743, "step": 19023 }, { "epoch": 0.9436976040478199, "grad_norm": 5.81315803527832, "learning_rate": 7.959491629866145e-08, "loss": 0.2277, "step": 19024 }, { "epoch": 0.94374720968302, "grad_norm": 3.382697582244873, "learning_rate": 7.945509521567973e-08, "loss": 0.2515, "step": 19025 }, { "epoch": 0.9437968153182201, "grad_norm": 13.061014175415039, "learning_rate": 7.931539606602701e-08, "loss": 0.3459, "step": 19026 }, { "epoch": 0.9438464209534203, "grad_norm": 6.055792808532715, "learning_rate": 7.917581885316383e-08, "loss": 0.2401, "step": 19027 }, { "epoch": 0.9438960265886205, "grad_norm": 11.506163597106934, "learning_rate": 7.903636358055078e-08, "loss": 0.3808, "step": 19028 }, { "epoch": 0.9439456322238207, "grad_norm": 9.213884353637695, "learning_rate": 7.88970302516423e-08, "loss": 0.325, "step": 19029 }, { "epoch": 0.9439952378590207, "grad_norm": 11.28193473815918, "learning_rate": 7.87578188698912e-08, "loss": 0.249, "step": 19030 }, { "epoch": 0.9440448434942209, "grad_norm": 13.461954116821289, "learning_rate": 7.861872943874748e-08, "loss": 0.3974, "step": 19031 }, { "epoch": 0.9440944491294211, "grad_norm": 13.623696327209473, "learning_rate": 7.84797619616584e-08, "loss": 0.3792, "step": 19032 }, { "epoch": 0.9441440547646213, "grad_norm": 3.908940076828003, "learning_rate": 7.83409164420662e-08, "loss": 0.2631, "step": 19033 }, { "epoch": 0.9441936603998214, "grad_norm": 3.932523488998413, "learning_rate": 7.820219288341368e-08, "loss": 0.2028, "step": 19034 }, { "epoch": 0.9442432660350216, "grad_norm": 11.10851001739502, "learning_rate": 7.806359128913588e-08, "loss": 0.3275, "step": 19035 }, { "epoch": 0.9442928716702217, "grad_norm": 7.176726818084717, "learning_rate": 7.792511166266947e-08, "loss": 0.3607, "step": 19036 }, { "epoch": 0.9443424773054219, "grad_norm": 9.429859161376953, "learning_rate": 7.778675400744506e-08, "loss": 0.2447, "step": 19037 }, { "epoch": 0.944392082940622, "grad_norm": 7.967405796051025, "learning_rate": 7.764851832689158e-08, "loss": 0.1903, "step": 19038 }, { "epoch": 0.9444416885758222, "grad_norm": 7.06331729888916, "learning_rate": 7.751040462443405e-08, "loss": 0.2523, "step": 19039 }, { "epoch": 0.9444912942110224, "grad_norm": 14.31532096862793, "learning_rate": 7.737241290349584e-08, "loss": 0.2921, "step": 19040 }, { "epoch": 0.9445408998462226, "grad_norm": 12.642315864562988, "learning_rate": 7.723454316749591e-08, "loss": 0.4009, "step": 19041 }, { "epoch": 0.9445905054814226, "grad_norm": 17.35093116760254, "learning_rate": 7.709679541985037e-08, "loss": 0.2973, "step": 19042 }, { "epoch": 0.9446401111166228, "grad_norm": 13.470988273620605, "learning_rate": 7.69591696639732e-08, "loss": 0.4755, "step": 19043 }, { "epoch": 0.944689716751823, "grad_norm": 4.645732402801514, "learning_rate": 7.6821665903275e-08, "loss": 0.1964, "step": 19044 }, { "epoch": 0.9447393223870232, "grad_norm": 5.8090667724609375, "learning_rate": 7.668428414116303e-08, "loss": 0.2717, "step": 19045 }, { "epoch": 0.9447889280222234, "grad_norm": 6.236560821533203, "learning_rate": 7.654702438104178e-08, "loss": 0.2415, "step": 19046 }, { "epoch": 0.9448385336574234, "grad_norm": 7.403571605682373, "learning_rate": 7.640988662631132e-08, "loss": 0.2647, "step": 19047 }, { "epoch": 0.9448881392926236, "grad_norm": 8.348516464233398, "learning_rate": 7.627287088037227e-08, "loss": 0.3497, "step": 19048 }, { "epoch": 0.9449377449278238, "grad_norm": 11.455822944641113, "learning_rate": 7.613597714661803e-08, "loss": 0.3475, "step": 19049 }, { "epoch": 0.944987350563024, "grad_norm": 8.495636940002441, "learning_rate": 7.599920542844196e-08, "loss": 0.3047, "step": 19050 }, { "epoch": 0.9450369561982241, "grad_norm": 6.351198196411133, "learning_rate": 7.586255572923251e-08, "loss": 0.3101, "step": 19051 }, { "epoch": 0.9450865618334243, "grad_norm": 8.76816177368164, "learning_rate": 7.572602805237638e-08, "loss": 0.3508, "step": 19052 }, { "epoch": 0.9451361674686244, "grad_norm": 10.566190719604492, "learning_rate": 7.5589622401257e-08, "loss": 0.2681, "step": 19053 }, { "epoch": 0.9451857731038246, "grad_norm": 5.419559478759766, "learning_rate": 7.545333877925443e-08, "loss": 0.2388, "step": 19054 }, { "epoch": 0.9452353787390247, "grad_norm": 5.371800899505615, "learning_rate": 7.531717718974596e-08, "loss": 0.2857, "step": 19055 }, { "epoch": 0.9452849843742249, "grad_norm": 9.049001693725586, "learning_rate": 7.518113763610501e-08, "loss": 0.2354, "step": 19056 }, { "epoch": 0.9453345900094251, "grad_norm": 4.7615275382995605, "learning_rate": 7.504522012170335e-08, "loss": 0.2213, "step": 19057 }, { "epoch": 0.9453841956446253, "grad_norm": 9.607431411743164, "learning_rate": 7.490942464990881e-08, "loss": 0.314, "step": 19058 }, { "epoch": 0.9454338012798253, "grad_norm": 5.794699192047119, "learning_rate": 7.477375122408647e-08, "loss": 0.3173, "step": 19059 }, { "epoch": 0.9454834069150255, "grad_norm": 10.785768508911133, "learning_rate": 7.463819984759868e-08, "loss": 0.3972, "step": 19060 }, { "epoch": 0.9455330125502257, "grad_norm": 10.702303886413574, "learning_rate": 7.450277052380495e-08, "loss": 0.3665, "step": 19061 }, { "epoch": 0.9455826181854259, "grad_norm": 7.856738090515137, "learning_rate": 7.436746325605981e-08, "loss": 0.3364, "step": 19062 }, { "epoch": 0.9456322238206261, "grad_norm": 6.133291244506836, "learning_rate": 7.423227804771727e-08, "loss": 0.2499, "step": 19063 }, { "epoch": 0.9456818294558261, "grad_norm": 9.20652961730957, "learning_rate": 7.409721490212684e-08, "loss": 0.3158, "step": 19064 }, { "epoch": 0.9457314350910263, "grad_norm": 6.001690864562988, "learning_rate": 7.396227382263588e-08, "loss": 0.2818, "step": 19065 }, { "epoch": 0.9457810407262265, "grad_norm": 7.4402570724487305, "learning_rate": 7.382745481258779e-08, "loss": 0.2492, "step": 19066 }, { "epoch": 0.9458306463614267, "grad_norm": 4.0904035568237305, "learning_rate": 7.369275787532437e-08, "loss": 0.2725, "step": 19067 }, { "epoch": 0.9458802519966268, "grad_norm": 5.674670696258545, "learning_rate": 7.355818301418182e-08, "loss": 0.2844, "step": 19068 }, { "epoch": 0.945929857631827, "grad_norm": 10.890243530273438, "learning_rate": 7.342373023249693e-08, "loss": 0.3109, "step": 19069 }, { "epoch": 0.9459794632670271, "grad_norm": 3.5400500297546387, "learning_rate": 7.328939953359982e-08, "loss": 0.1998, "step": 19070 }, { "epoch": 0.9460290689022273, "grad_norm": 6.045274257659912, "learning_rate": 7.315519092082058e-08, "loss": 0.289, "step": 19071 }, { "epoch": 0.9460786745374274, "grad_norm": 3.9927523136138916, "learning_rate": 7.302110439748433e-08, "loss": 0.1987, "step": 19072 }, { "epoch": 0.9461282801726276, "grad_norm": 5.197790145874023, "learning_rate": 7.288713996691344e-08, "loss": 0.2837, "step": 19073 }, { "epoch": 0.9461778858078278, "grad_norm": 6.617076396942139, "learning_rate": 7.2753297632428e-08, "loss": 0.307, "step": 19074 }, { "epoch": 0.946227491443028, "grad_norm": 9.738044738769531, "learning_rate": 7.261957739734538e-08, "loss": 0.2871, "step": 19075 }, { "epoch": 0.946277097078228, "grad_norm": 10.553389549255371, "learning_rate": 7.248597926497735e-08, "loss": 0.3633, "step": 19076 }, { "epoch": 0.9463267027134282, "grad_norm": 4.620861053466797, "learning_rate": 7.235250323863685e-08, "loss": 0.2331, "step": 19077 }, { "epoch": 0.9463763083486284, "grad_norm": 11.695831298828125, "learning_rate": 7.221914932162954e-08, "loss": 0.3571, "step": 19078 }, { "epoch": 0.9464259139838286, "grad_norm": 5.486923694610596, "learning_rate": 7.208591751726112e-08, "loss": 0.2828, "step": 19079 }, { "epoch": 0.9464755196190288, "grad_norm": 4.06318998336792, "learning_rate": 7.195280782883285e-08, "loss": 0.2147, "step": 19080 }, { "epoch": 0.9465251252542288, "grad_norm": 5.14914608001709, "learning_rate": 7.181982025964318e-08, "loss": 0.1771, "step": 19081 }, { "epoch": 0.946574730889429, "grad_norm": 4.8047356605529785, "learning_rate": 7.168695481298727e-08, "loss": 0.2547, "step": 19082 }, { "epoch": 0.9466243365246292, "grad_norm": 9.921277046203613, "learning_rate": 7.155421149215857e-08, "loss": 0.3121, "step": 19083 }, { "epoch": 0.9466739421598294, "grad_norm": 6.810436248779297, "learning_rate": 7.142159030044505e-08, "loss": 0.2774, "step": 19084 }, { "epoch": 0.9467235477950295, "grad_norm": 6.448984146118164, "learning_rate": 7.128909124113459e-08, "loss": 0.2713, "step": 19085 }, { "epoch": 0.9467731534302297, "grad_norm": 5.251317024230957, "learning_rate": 7.115671431751014e-08, "loss": 0.26, "step": 19086 }, { "epoch": 0.9468227590654298, "grad_norm": 6.674148082733154, "learning_rate": 7.102445953285131e-08, "loss": 0.3078, "step": 19087 }, { "epoch": 0.94687236470063, "grad_norm": 4.80713415145874, "learning_rate": 7.089232689043602e-08, "loss": 0.2352, "step": 19088 }, { "epoch": 0.9469219703358301, "grad_norm": 5.021312713623047, "learning_rate": 7.076031639353942e-08, "loss": 0.2659, "step": 19089 }, { "epoch": 0.9469715759710303, "grad_norm": 18.318246841430664, "learning_rate": 7.062842804543058e-08, "loss": 0.3145, "step": 19090 }, { "epoch": 0.9470211816062305, "grad_norm": 6.446197986602783, "learning_rate": 7.04966618493802e-08, "loss": 0.3008, "step": 19091 }, { "epoch": 0.9470707872414307, "grad_norm": 7.27470588684082, "learning_rate": 7.03650178086518e-08, "loss": 0.3088, "step": 19092 }, { "epoch": 0.9471203928766307, "grad_norm": 7.281439304351807, "learning_rate": 7.02334959265083e-08, "loss": 0.3013, "step": 19093 }, { "epoch": 0.9471699985118309, "grad_norm": 13.041946411132812, "learning_rate": 7.010209620620879e-08, "loss": 0.2928, "step": 19094 }, { "epoch": 0.9472196041470311, "grad_norm": 8.043124198913574, "learning_rate": 6.997081865100953e-08, "loss": 0.2297, "step": 19095 }, { "epoch": 0.9472692097822313, "grad_norm": 6.495877265930176, "learning_rate": 6.983966326416291e-08, "loss": 0.257, "step": 19096 }, { "epoch": 0.9473188154174315, "grad_norm": 10.513018608093262, "learning_rate": 6.970863004892026e-08, "loss": 0.3904, "step": 19097 }, { "epoch": 0.9473684210526315, "grad_norm": 5.385434150695801, "learning_rate": 6.957771900852728e-08, "loss": 0.2569, "step": 19098 }, { "epoch": 0.9474180266878317, "grad_norm": 6.15402364730835, "learning_rate": 6.944693014622971e-08, "loss": 0.2564, "step": 19099 }, { "epoch": 0.9474676323230319, "grad_norm": 7.259485244750977, "learning_rate": 6.931626346526666e-08, "loss": 0.3341, "step": 19100 }, { "epoch": 0.9475172379582321, "grad_norm": 4.716643333435059, "learning_rate": 6.918571896887716e-08, "loss": 0.2426, "step": 19101 }, { "epoch": 0.9475668435934322, "grad_norm": 8.969625473022461, "learning_rate": 6.905529666029643e-08, "loss": 0.2029, "step": 19102 }, { "epoch": 0.9476164492286324, "grad_norm": 14.728294372558594, "learning_rate": 6.892499654275575e-08, "loss": 0.3508, "step": 19103 }, { "epoch": 0.9476660548638325, "grad_norm": 7.053300857543945, "learning_rate": 6.879481861948368e-08, "loss": 0.2529, "step": 19104 }, { "epoch": 0.9477156604990327, "grad_norm": 8.504460334777832, "learning_rate": 6.866476289370761e-08, "loss": 0.3018, "step": 19105 }, { "epoch": 0.9477652661342328, "grad_norm": 4.8622026443481445, "learning_rate": 6.85348293686483e-08, "loss": 0.2676, "step": 19106 }, { "epoch": 0.947814871769433, "grad_norm": 8.725312232971191, "learning_rate": 6.840501804752764e-08, "loss": 0.3201, "step": 19107 }, { "epoch": 0.9478644774046332, "grad_norm": 9.518095016479492, "learning_rate": 6.827532893356081e-08, "loss": 0.3278, "step": 19108 }, { "epoch": 0.9479140830398334, "grad_norm": 6.546563148498535, "learning_rate": 6.814576202996248e-08, "loss": 0.3328, "step": 19109 }, { "epoch": 0.9479636886750334, "grad_norm": 8.989126205444336, "learning_rate": 6.801631733994285e-08, "loss": 0.3072, "step": 19110 }, { "epoch": 0.9480132943102336, "grad_norm": 7.214967250823975, "learning_rate": 6.788699486670992e-08, "loss": 0.2892, "step": 19111 }, { "epoch": 0.9480628999454338, "grad_norm": 5.93687105178833, "learning_rate": 6.775779461346832e-08, "loss": 0.2897, "step": 19112 }, { "epoch": 0.948112505580634, "grad_norm": 4.707129001617432, "learning_rate": 6.762871658341997e-08, "loss": 0.2439, "step": 19113 }, { "epoch": 0.9481621112158342, "grad_norm": 4.655182361602783, "learning_rate": 6.749976077976284e-08, "loss": 0.2692, "step": 19114 }, { "epoch": 0.9482117168510342, "grad_norm": 10.186295509338379, "learning_rate": 6.737092720569271e-08, "loss": 0.3364, "step": 19115 }, { "epoch": 0.9482613224862344, "grad_norm": 5.52255916595459, "learning_rate": 6.72422158644026e-08, "loss": 0.2054, "step": 19116 }, { "epoch": 0.9483109281214346, "grad_norm": 4.9956536293029785, "learning_rate": 6.711362675908162e-08, "loss": 0.2365, "step": 19117 }, { "epoch": 0.9483605337566348, "grad_norm": 22.054384231567383, "learning_rate": 6.69851598929161e-08, "loss": 0.4883, "step": 19118 }, { "epoch": 0.9484101393918349, "grad_norm": 8.0869722366333, "learning_rate": 6.685681526909016e-08, "loss": 0.2667, "step": 19119 }, { "epoch": 0.9484597450270351, "grad_norm": 6.191868782043457, "learning_rate": 6.672859289078404e-08, "loss": 0.3005, "step": 19120 }, { "epoch": 0.9485093506622352, "grad_norm": 6.479495525360107, "learning_rate": 6.660049276117464e-08, "loss": 0.2405, "step": 19121 }, { "epoch": 0.9485589562974354, "grad_norm": 10.334979057312012, "learning_rate": 6.647251488343665e-08, "loss": 0.3861, "step": 19122 }, { "epoch": 0.9486085619326355, "grad_norm": 7.860386371612549, "learning_rate": 6.634465926074141e-08, "loss": 0.2441, "step": 19123 }, { "epoch": 0.9486581675678357, "grad_norm": 13.461048126220703, "learning_rate": 6.621692589625694e-08, "loss": 0.312, "step": 19124 }, { "epoch": 0.9487077732030359, "grad_norm": 6.0055832862854, "learning_rate": 6.608931479314906e-08, "loss": 0.3084, "step": 19125 }, { "epoch": 0.9487573788382361, "grad_norm": 7.041101932525635, "learning_rate": 6.596182595458022e-08, "loss": 0.2487, "step": 19126 }, { "epoch": 0.9488069844734361, "grad_norm": 5.5868000984191895, "learning_rate": 6.583445938370847e-08, "loss": 0.3209, "step": 19127 }, { "epoch": 0.9488565901086363, "grad_norm": 5.346471309661865, "learning_rate": 6.570721508369127e-08, "loss": 0.1845, "step": 19128 }, { "epoch": 0.9489061957438365, "grad_norm": 16.778945922851562, "learning_rate": 6.558009305768165e-08, "loss": 0.3715, "step": 19129 }, { "epoch": 0.9489558013790367, "grad_norm": 9.99305248260498, "learning_rate": 6.545309330882876e-08, "loss": 0.2894, "step": 19130 }, { "epoch": 0.9490054070142369, "grad_norm": 7.763658046722412, "learning_rate": 6.532621584028065e-08, "loss": 0.372, "step": 19131 }, { "epoch": 0.9490550126494369, "grad_norm": 7.569161891937256, "learning_rate": 6.519946065518146e-08, "loss": 0.26, "step": 19132 }, { "epoch": 0.9491046182846371, "grad_norm": 8.832688331604004, "learning_rate": 6.50728277566709e-08, "loss": 0.3333, "step": 19133 }, { "epoch": 0.9491542239198373, "grad_norm": 10.812568664550781, "learning_rate": 6.494631714788869e-08, "loss": 0.3304, "step": 19134 }, { "epoch": 0.9492038295550375, "grad_norm": 18.087329864501953, "learning_rate": 6.481992883196841e-08, "loss": 0.3476, "step": 19135 }, { "epoch": 0.9492534351902376, "grad_norm": 11.01569652557373, "learning_rate": 6.46936628120437e-08, "loss": 0.4343, "step": 19136 }, { "epoch": 0.9493030408254378, "grad_norm": 16.605440139770508, "learning_rate": 6.456751909124203e-08, "loss": 0.3545, "step": 19137 }, { "epoch": 0.9493526464606379, "grad_norm": 9.789993286132812, "learning_rate": 6.444149767268981e-08, "loss": 0.4288, "step": 19138 }, { "epoch": 0.9494022520958381, "grad_norm": 8.067736625671387, "learning_rate": 6.431559855951008e-08, "loss": 0.1527, "step": 19139 }, { "epoch": 0.9494518577310382, "grad_norm": 7.838983535766602, "learning_rate": 6.418982175482258e-08, "loss": 0.3305, "step": 19140 }, { "epoch": 0.9495014633662384, "grad_norm": 6.921988010406494, "learning_rate": 6.406416726174314e-08, "loss": 0.3206, "step": 19141 }, { "epoch": 0.9495510690014386, "grad_norm": 4.5811944007873535, "learning_rate": 6.39386350833876e-08, "loss": 0.3001, "step": 19142 }, { "epoch": 0.9496006746366388, "grad_norm": 4.923741817474365, "learning_rate": 6.38132252228646e-08, "loss": 0.2538, "step": 19143 }, { "epoch": 0.9496502802718388, "grad_norm": 6.006072044372559, "learning_rate": 6.368793768328274e-08, "loss": 0.1657, "step": 19144 }, { "epoch": 0.949699885907039, "grad_norm": 7.706988334655762, "learning_rate": 6.356277246774733e-08, "loss": 0.2395, "step": 19145 }, { "epoch": 0.9497494915422392, "grad_norm": 7.249891757965088, "learning_rate": 6.343772957935923e-08, "loss": 0.304, "step": 19146 }, { "epoch": 0.9497990971774394, "grad_norm": 11.896976470947266, "learning_rate": 6.331280902121706e-08, "loss": 0.3501, "step": 19147 }, { "epoch": 0.9498487028126396, "grad_norm": 6.410202503204346, "learning_rate": 6.318801079641667e-08, "loss": 0.287, "step": 19148 }, { "epoch": 0.9498983084478396, "grad_norm": 5.43602180480957, "learning_rate": 6.306333490805061e-08, "loss": 0.2377, "step": 19149 }, { "epoch": 0.9499479140830398, "grad_norm": 4.971682071685791, "learning_rate": 6.293878135920806e-08, "loss": 0.2681, "step": 19150 }, { "epoch": 0.94999751971824, "grad_norm": 4.928112506866455, "learning_rate": 6.281435015297598e-08, "loss": 0.3004, "step": 19151 }, { "epoch": 0.9500471253534402, "grad_norm": 5.286684989929199, "learning_rate": 6.26900412924375e-08, "loss": 0.2756, "step": 19152 }, { "epoch": 0.9500967309886403, "grad_norm": 10.68917465209961, "learning_rate": 6.256585478067345e-08, "loss": 0.3605, "step": 19153 }, { "epoch": 0.9501463366238405, "grad_norm": 5.883572578430176, "learning_rate": 6.244179062076083e-08, "loss": 0.2932, "step": 19154 }, { "epoch": 0.9501959422590406, "grad_norm": 10.803668975830078, "learning_rate": 6.231784881577385e-08, "loss": 0.2899, "step": 19155 }, { "epoch": 0.9502455478942408, "grad_norm": 5.731359004974365, "learning_rate": 6.21940293687845e-08, "loss": 0.2873, "step": 19156 }, { "epoch": 0.9502951535294409, "grad_norm": 12.008597373962402, "learning_rate": 6.207033228286086e-08, "loss": 0.3307, "step": 19157 }, { "epoch": 0.9503447591646411, "grad_norm": 9.673324584960938, "learning_rate": 6.194675756106771e-08, "loss": 0.3325, "step": 19158 }, { "epoch": 0.9503943647998413, "grad_norm": 4.463616847991943, "learning_rate": 6.182330520646762e-08, "loss": 0.2546, "step": 19159 }, { "epoch": 0.9504439704350415, "grad_norm": 4.300642967224121, "learning_rate": 6.169997522212034e-08, "loss": 0.1852, "step": 19160 }, { "epoch": 0.9504935760702415, "grad_norm": 6.242547988891602, "learning_rate": 6.157676761108067e-08, "loss": 0.2789, "step": 19161 }, { "epoch": 0.9505431817054417, "grad_norm": 11.720561981201172, "learning_rate": 6.145368237640336e-08, "loss": 0.2983, "step": 19162 }, { "epoch": 0.9505927873406419, "grad_norm": 9.00255298614502, "learning_rate": 6.133071952113712e-08, "loss": 0.3204, "step": 19163 }, { "epoch": 0.9506423929758421, "grad_norm": 4.857428550720215, "learning_rate": 6.120787904833003e-08, "loss": 0.2874, "step": 19164 }, { "epoch": 0.9506919986110423, "grad_norm": 6.662024974822998, "learning_rate": 6.108516096102579e-08, "loss": 0.3267, "step": 19165 }, { "epoch": 0.9507416042462423, "grad_norm": 7.981224060058594, "learning_rate": 6.096256526226475e-08, "loss": 0.1769, "step": 19166 }, { "epoch": 0.9507912098814425, "grad_norm": 10.569823265075684, "learning_rate": 6.084009195508611e-08, "loss": 0.3157, "step": 19167 }, { "epoch": 0.9508408155166427, "grad_norm": 5.164368629455566, "learning_rate": 6.071774104252415e-08, "loss": 0.2796, "step": 19168 }, { "epoch": 0.9508904211518429, "grad_norm": 4.322676658630371, "learning_rate": 6.059551252761087e-08, "loss": 0.231, "step": 19169 }, { "epoch": 0.950940026787043, "grad_norm": 3.4513604640960693, "learning_rate": 6.047340641337551e-08, "loss": 0.1854, "step": 19170 }, { "epoch": 0.9509896324222432, "grad_norm": 6.96325159072876, "learning_rate": 6.035142270284289e-08, "loss": 0.2663, "step": 19171 }, { "epoch": 0.9510392380574433, "grad_norm": 3.9182920455932617, "learning_rate": 6.022956139903668e-08, "loss": 0.2364, "step": 19172 }, { "epoch": 0.9510888436926435, "grad_norm": 7.684162139892578, "learning_rate": 6.010782250497615e-08, "loss": 0.3193, "step": 19173 }, { "epoch": 0.9511384493278436, "grad_norm": 8.653568267822266, "learning_rate": 5.998620602367888e-08, "loss": 0.3589, "step": 19174 }, { "epoch": 0.9511880549630438, "grad_norm": 22.07525634765625, "learning_rate": 5.986471195815746e-08, "loss": 0.307, "step": 19175 }, { "epoch": 0.951237660598244, "grad_norm": 8.378491401672363, "learning_rate": 5.974334031142337e-08, "loss": 0.2802, "step": 19176 }, { "epoch": 0.9512872662334442, "grad_norm": 19.446556091308594, "learning_rate": 5.96220910864842e-08, "loss": 0.3066, "step": 19177 }, { "epoch": 0.9513368718686442, "grad_norm": 5.710622310638428, "learning_rate": 5.950096428634422e-08, "loss": 0.2626, "step": 19178 }, { "epoch": 0.9513864775038444, "grad_norm": 6.6420207023620605, "learning_rate": 5.937995991400547e-08, "loss": 0.2348, "step": 19179 }, { "epoch": 0.9514360831390446, "grad_norm": 9.184794425964355, "learning_rate": 5.925907797246611e-08, "loss": 0.2603, "step": 19180 }, { "epoch": 0.9514856887742448, "grad_norm": 8.451811790466309, "learning_rate": 5.913831846472151e-08, "loss": 0.2565, "step": 19181 }, { "epoch": 0.951535294409445, "grad_norm": 6.412223815917969, "learning_rate": 5.901768139376429e-08, "loss": 0.324, "step": 19182 }, { "epoch": 0.951584900044645, "grad_norm": 7.215432643890381, "learning_rate": 5.889716676258428e-08, "loss": 0.2649, "step": 19183 }, { "epoch": 0.9516345056798452, "grad_norm": 6.9359917640686035, "learning_rate": 5.8776774574167416e-08, "loss": 0.288, "step": 19184 }, { "epoch": 0.9516841113150454, "grad_norm": 6.832175254821777, "learning_rate": 5.865650483149743e-08, "loss": 0.252, "step": 19185 }, { "epoch": 0.9517337169502456, "grad_norm": 14.896903991699219, "learning_rate": 5.8536357537554154e-08, "loss": 0.4404, "step": 19186 }, { "epoch": 0.9517833225854457, "grad_norm": 6.384645938873291, "learning_rate": 5.8416332695315217e-08, "loss": 0.3297, "step": 19187 }, { "epoch": 0.9518329282206459, "grad_norm": 7.789712429046631, "learning_rate": 5.8296430307755445e-08, "loss": 0.361, "step": 19188 }, { "epoch": 0.951882533855846, "grad_norm": 13.345263481140137, "learning_rate": 5.81766503778447e-08, "loss": 0.3639, "step": 19189 }, { "epoch": 0.9519321394910462, "grad_norm": 8.706932067871094, "learning_rate": 5.805699290855227e-08, "loss": 0.357, "step": 19190 }, { "epoch": 0.9519817451262463, "grad_norm": 4.844555854797363, "learning_rate": 5.793745790284355e-08, "loss": 0.2503, "step": 19191 }, { "epoch": 0.9520313507614465, "grad_norm": 6.926796913146973, "learning_rate": 5.781804536367952e-08, "loss": 0.2595, "step": 19192 }, { "epoch": 0.9520809563966467, "grad_norm": 11.889209747314453, "learning_rate": 5.769875529402058e-08, "loss": 0.4189, "step": 19193 }, { "epoch": 0.9521305620318469, "grad_norm": 10.53573226928711, "learning_rate": 5.7579587696821594e-08, "loss": 0.3383, "step": 19194 }, { "epoch": 0.9521801676670469, "grad_norm": 9.22829532623291, "learning_rate": 5.746054257503575e-08, "loss": 0.3054, "step": 19195 }, { "epoch": 0.9522297733022471, "grad_norm": 7.277060508728027, "learning_rate": 5.7341619931614024e-08, "loss": 0.2833, "step": 19196 }, { "epoch": 0.9522793789374473, "grad_norm": 6.862552642822266, "learning_rate": 5.7222819769502944e-08, "loss": 0.2968, "step": 19197 }, { "epoch": 0.9523289845726475, "grad_norm": 9.867066383361816, "learning_rate": 5.710414209164572e-08, "loss": 0.3808, "step": 19198 }, { "epoch": 0.9523785902078477, "grad_norm": 9.570579528808594, "learning_rate": 5.698558690098444e-08, "loss": 0.2602, "step": 19199 }, { "epoch": 0.9524281958430477, "grad_norm": 9.665644645690918, "learning_rate": 5.686715420045563e-08, "loss": 0.3726, "step": 19200 }, { "epoch": 0.9524778014782479, "grad_norm": 6.444928169250488, "learning_rate": 5.674884399299474e-08, "loss": 0.2302, "step": 19201 }, { "epoch": 0.9525274071134481, "grad_norm": 6.984148979187012, "learning_rate": 5.6630656281533855e-08, "loss": 0.2695, "step": 19202 }, { "epoch": 0.9525770127486483, "grad_norm": 5.004326343536377, "learning_rate": 5.6512591069001756e-08, "loss": 0.1998, "step": 19203 }, { "epoch": 0.9526266183838484, "grad_norm": 5.7856831550598145, "learning_rate": 5.639464835832331e-08, "loss": 0.1513, "step": 19204 }, { "epoch": 0.9526762240190486, "grad_norm": 5.739744663238525, "learning_rate": 5.6276828152421746e-08, "loss": 0.2253, "step": 19205 }, { "epoch": 0.9527258296542487, "grad_norm": 5.905810356140137, "learning_rate": 5.615913045421639e-08, "loss": 0.2998, "step": 19206 }, { "epoch": 0.9527754352894489, "grad_norm": 11.36275577545166, "learning_rate": 5.6041555266624915e-08, "loss": 0.2723, "step": 19207 }, { "epoch": 0.952825040924649, "grad_norm": 5.4332098960876465, "learning_rate": 5.592410259255998e-08, "loss": 0.3055, "step": 19208 }, { "epoch": 0.9528746465598492, "grad_norm": 9.051541328430176, "learning_rate": 5.580677243493149e-08, "loss": 0.3366, "step": 19209 }, { "epoch": 0.9529242521950494, "grad_norm": 5.218780517578125, "learning_rate": 5.568956479664822e-08, "loss": 0.1923, "step": 19210 }, { "epoch": 0.9529738578302496, "grad_norm": 5.8598737716674805, "learning_rate": 5.5572479680614525e-08, "loss": 0.2402, "step": 19211 }, { "epoch": 0.9530234634654496, "grad_norm": 6.787646293640137, "learning_rate": 5.5455517089730295e-08, "loss": 0.2839, "step": 19212 }, { "epoch": 0.9530730691006498, "grad_norm": 7.640780925750732, "learning_rate": 5.533867702689599e-08, "loss": 0.3401, "step": 19213 }, { "epoch": 0.95312267473585, "grad_norm": 8.60669231414795, "learning_rate": 5.522195949500542e-08, "loss": 0.3819, "step": 19214 }, { "epoch": 0.9531722803710502, "grad_norm": 4.499576091766357, "learning_rate": 5.510536449695181e-08, "loss": 0.2369, "step": 19215 }, { "epoch": 0.9532218860062504, "grad_norm": 4.587542533874512, "learning_rate": 5.4988892035623965e-08, "loss": 0.2291, "step": 19216 }, { "epoch": 0.9532714916414504, "grad_norm": 6.264342784881592, "learning_rate": 5.487254211390791e-08, "loss": 0.283, "step": 19217 }, { "epoch": 0.9533210972766506, "grad_norm": 14.787751197814941, "learning_rate": 5.475631473468745e-08, "loss": 0.3893, "step": 19218 }, { "epoch": 0.9533707029118508, "grad_norm": 10.961588859558105, "learning_rate": 5.4640209900843066e-08, "loss": 0.3568, "step": 19219 }, { "epoch": 0.953420308547051, "grad_norm": 14.816287994384766, "learning_rate": 5.4524227615250225e-08, "loss": 0.3645, "step": 19220 }, { "epoch": 0.9534699141822511, "grad_norm": 7.183239936828613, "learning_rate": 5.440836788078496e-08, "loss": 0.2771, "step": 19221 }, { "epoch": 0.9535195198174513, "grad_norm": 6.08168888092041, "learning_rate": 5.429263070031721e-08, "loss": 0.2295, "step": 19222 }, { "epoch": 0.9535691254526514, "grad_norm": 6.979286193847656, "learning_rate": 5.4177016076714663e-08, "loss": 0.2652, "step": 19223 }, { "epoch": 0.9536187310878516, "grad_norm": 7.225457191467285, "learning_rate": 5.406152401284337e-08, "loss": 0.2666, "step": 19224 }, { "epoch": 0.9536683367230517, "grad_norm": 13.537288665771484, "learning_rate": 5.394615451156493e-08, "loss": 0.4173, "step": 19225 }, { "epoch": 0.9537179423582519, "grad_norm": 7.1886444091796875, "learning_rate": 5.383090757573817e-08, "loss": 0.2377, "step": 19226 }, { "epoch": 0.9537675479934521, "grad_norm": 6.874292850494385, "learning_rate": 5.3715783208218575e-08, "loss": 0.223, "step": 19227 }, { "epoch": 0.9538171536286523, "grad_norm": 9.703091621398926, "learning_rate": 5.360078141185998e-08, "loss": 0.2798, "step": 19228 }, { "epoch": 0.9538667592638523, "grad_norm": 12.473671913146973, "learning_rate": 5.3485902189511216e-08, "loss": 0.3227, "step": 19229 }, { "epoch": 0.9539163648990525, "grad_norm": 6.6422295570373535, "learning_rate": 5.33711455440189e-08, "loss": 0.2436, "step": 19230 }, { "epoch": 0.9539659705342527, "grad_norm": 6.635080337524414, "learning_rate": 5.3256511478227415e-08, "loss": 0.3236, "step": 19231 }, { "epoch": 0.9540155761694529, "grad_norm": 8.704939842224121, "learning_rate": 5.314199999497727e-08, "loss": 0.2293, "step": 19232 }, { "epoch": 0.954065181804653, "grad_norm": 5.394457817077637, "learning_rate": 5.302761109710619e-08, "loss": 0.2034, "step": 19233 }, { "epoch": 0.9541147874398531, "grad_norm": 7.458678245544434, "learning_rate": 5.291334478744914e-08, "loss": 0.313, "step": 19234 }, { "epoch": 0.9541643930750533, "grad_norm": 6.467414379119873, "learning_rate": 5.279920106883607e-08, "loss": 0.2598, "step": 19235 }, { "epoch": 0.9542139987102535, "grad_norm": 7.4335713386535645, "learning_rate": 5.2685179944097496e-08, "loss": 0.1886, "step": 19236 }, { "epoch": 0.9542636043454537, "grad_norm": 4.0224480628967285, "learning_rate": 5.257128141605783e-08, "loss": 0.1758, "step": 19237 }, { "epoch": 0.9543132099806538, "grad_norm": 6.983367443084717, "learning_rate": 5.2457505487539806e-08, "loss": 0.2226, "step": 19238 }, { "epoch": 0.954362815615854, "grad_norm": 11.2274808883667, "learning_rate": 5.234385216136284e-08, "loss": 0.2678, "step": 19239 }, { "epoch": 0.9544124212510541, "grad_norm": 5.470916748046875, "learning_rate": 5.223032144034357e-08, "loss": 0.2694, "step": 19240 }, { "epoch": 0.9544620268862543, "grad_norm": 7.039944648742676, "learning_rate": 5.211691332729474e-08, "loss": 0.2552, "step": 19241 }, { "epoch": 0.9545116325214544, "grad_norm": 7.658082008361816, "learning_rate": 5.200362782502688e-08, "loss": 0.3152, "step": 19242 }, { "epoch": 0.9545612381566546, "grad_norm": 11.079790115356445, "learning_rate": 5.18904649363472e-08, "loss": 0.3923, "step": 19243 }, { "epoch": 0.9546108437918548, "grad_norm": 5.369144439697266, "learning_rate": 5.177742466406066e-08, "loss": 0.2026, "step": 19244 }, { "epoch": 0.9546604494270549, "grad_norm": 5.249011993408203, "learning_rate": 5.1664507010967255e-08, "loss": 0.2221, "step": 19245 }, { "epoch": 0.954710055062255, "grad_norm": 6.927638053894043, "learning_rate": 5.155171197986586e-08, "loss": 0.3335, "step": 19246 }, { "epoch": 0.9547596606974552, "grad_norm": 4.398911476135254, "learning_rate": 5.1439039573552006e-08, "loss": 0.2955, "step": 19247 }, { "epoch": 0.9548092663326554, "grad_norm": 12.398429870605469, "learning_rate": 5.13264897948168e-08, "loss": 0.3545, "step": 19248 }, { "epoch": 0.9548588719678556, "grad_norm": 3.460814952850342, "learning_rate": 5.121406264644968e-08, "loss": 0.1691, "step": 19249 }, { "epoch": 0.9549084776030557, "grad_norm": 9.620684623718262, "learning_rate": 5.110175813123674e-08, "loss": 0.3465, "step": 19250 }, { "epoch": 0.9549580832382558, "grad_norm": 6.064058303833008, "learning_rate": 5.098957625196077e-08, "loss": 0.281, "step": 19251 }, { "epoch": 0.955007688873456, "grad_norm": 7.032529354095459, "learning_rate": 5.0877517011401754e-08, "loss": 0.2945, "step": 19252 }, { "epoch": 0.9550572945086562, "grad_norm": 6.738504409790039, "learning_rate": 5.076558041233692e-08, "loss": 0.2086, "step": 19253 }, { "epoch": 0.9551069001438564, "grad_norm": 4.637014865875244, "learning_rate": 5.065376645753961e-08, "loss": 0.2353, "step": 19254 }, { "epoch": 0.9551565057790565, "grad_norm": 5.463951587677002, "learning_rate": 5.054207514978038e-08, "loss": 0.1634, "step": 19255 }, { "epoch": 0.9552061114142567, "grad_norm": 16.890399932861328, "learning_rate": 5.0430506491828124e-08, "loss": 0.5551, "step": 19256 }, { "epoch": 0.9552557170494568, "grad_norm": 4.1394243240356445, "learning_rate": 5.031906048644675e-08, "loss": 0.239, "step": 19257 }, { "epoch": 0.955305322684657, "grad_norm": 8.390093803405762, "learning_rate": 5.020773713639793e-08, "loss": 0.3846, "step": 19258 }, { "epoch": 0.9553549283198571, "grad_norm": 5.166927814483643, "learning_rate": 5.0096536444440013e-08, "loss": 0.2019, "step": 19259 }, { "epoch": 0.9554045339550573, "grad_norm": 7.127172470092773, "learning_rate": 4.9985458413329137e-08, "loss": 0.2556, "step": 19260 }, { "epoch": 0.9554541395902575, "grad_norm": 11.234954833984375, "learning_rate": 4.987450304581809e-08, "loss": 0.3293, "step": 19261 }, { "epoch": 0.9555037452254576, "grad_norm": 6.105810165405273, "learning_rate": 4.976367034465579e-08, "loss": 0.1848, "step": 19262 }, { "epoch": 0.9555533508606577, "grad_norm": 8.897588729858398, "learning_rate": 4.965296031258893e-08, "loss": 0.3294, "step": 19263 }, { "epoch": 0.9556029564958579, "grad_norm": 7.862977504730225, "learning_rate": 4.9542372952361415e-08, "loss": 0.3052, "step": 19264 }, { "epoch": 0.9556525621310581, "grad_norm": 18.327585220336914, "learning_rate": 4.9431908266712735e-08, "loss": 0.3372, "step": 19265 }, { "epoch": 0.9557021677662583, "grad_norm": 10.16336441040039, "learning_rate": 4.932156625838125e-08, "loss": 0.328, "step": 19266 }, { "epoch": 0.9557517734014584, "grad_norm": 4.761266708374023, "learning_rate": 4.9211346930100324e-08, "loss": 0.1835, "step": 19267 }, { "epoch": 0.9558013790366585, "grad_norm": 7.187533855438232, "learning_rate": 4.910125028460222e-08, "loss": 0.2998, "step": 19268 }, { "epoch": 0.9558509846718587, "grad_norm": 4.278548717498779, "learning_rate": 4.8991276324613645e-08, "loss": 0.2727, "step": 19269 }, { "epoch": 0.9559005903070589, "grad_norm": 4.085076808929443, "learning_rate": 4.888142505286186e-08, "loss": 0.2622, "step": 19270 }, { "epoch": 0.955950195942259, "grad_norm": 4.965263843536377, "learning_rate": 4.877169647206747e-08, "loss": 0.2427, "step": 19271 }, { "epoch": 0.9559998015774592, "grad_norm": 20.199432373046875, "learning_rate": 4.866209058495053e-08, "loss": 0.4564, "step": 19272 }, { "epoch": 0.9560494072126594, "grad_norm": 6.916726112365723, "learning_rate": 4.855260739422607e-08, "loss": 0.2927, "step": 19273 }, { "epoch": 0.9560990128478595, "grad_norm": 5.908644199371338, "learning_rate": 4.84432469026086e-08, "loss": 0.2785, "step": 19274 }, { "epoch": 0.9561486184830597, "grad_norm": 7.260901927947998, "learning_rate": 4.83340091128065e-08, "loss": 0.3141, "step": 19275 }, { "epoch": 0.9561982241182598, "grad_norm": 5.208851337432861, "learning_rate": 4.822489402752817e-08, "loss": 0.1879, "step": 19276 }, { "epoch": 0.95624782975346, "grad_norm": 10.096802711486816, "learning_rate": 4.811590164947644e-08, "loss": 0.2377, "step": 19277 }, { "epoch": 0.9562974353886602, "grad_norm": 5.331770420074463, "learning_rate": 4.800703198135304e-08, "loss": 0.2617, "step": 19278 }, { "epoch": 0.9563470410238603, "grad_norm": 4.975700855255127, "learning_rate": 4.789828502585525e-08, "loss": 0.2968, "step": 19279 }, { "epoch": 0.9563966466590604, "grad_norm": 6.4183831214904785, "learning_rate": 4.7789660785678146e-08, "loss": 0.2909, "step": 19280 }, { "epoch": 0.9564462522942606, "grad_norm": 4.623285293579102, "learning_rate": 4.768115926351291e-08, "loss": 0.2664, "step": 19281 }, { "epoch": 0.9564958579294608, "grad_norm": 10.181060791015625, "learning_rate": 4.757278046204905e-08, "loss": 0.2726, "step": 19282 }, { "epoch": 0.956545463564661, "grad_norm": 7.8183817863464355, "learning_rate": 4.74645243839722e-08, "loss": 0.3118, "step": 19283 }, { "epoch": 0.9565950691998611, "grad_norm": 4.992382526397705, "learning_rate": 4.73563910319641e-08, "loss": 0.2283, "step": 19284 }, { "epoch": 0.9566446748350612, "grad_norm": 4.597748756408691, "learning_rate": 4.724838040870594e-08, "loss": 0.1966, "step": 19285 }, { "epoch": 0.9566942804702614, "grad_norm": 5.759791374206543, "learning_rate": 4.714049251687225e-08, "loss": 0.2582, "step": 19286 }, { "epoch": 0.9567438861054616, "grad_norm": 6.073205471038818, "learning_rate": 4.703272735913811e-08, "loss": 0.3098, "step": 19287 }, { "epoch": 0.9567934917406618, "grad_norm": 5.621448993682861, "learning_rate": 4.692508493817305e-08, "loss": 0.2658, "step": 19288 }, { "epoch": 0.9568430973758619, "grad_norm": 18.61273956298828, "learning_rate": 4.681756525664549e-08, "loss": 0.3499, "step": 19289 }, { "epoch": 0.9568927030110621, "grad_norm": 7.225008964538574, "learning_rate": 4.671016831721886e-08, "loss": 0.28, "step": 19290 }, { "epoch": 0.9569423086462622, "grad_norm": 3.8583755493164062, "learning_rate": 4.660289412255492e-08, "loss": 0.2453, "step": 19291 }, { "epoch": 0.9569919142814624, "grad_norm": 16.213708877563477, "learning_rate": 4.64957426753121e-08, "loss": 0.3491, "step": 19292 }, { "epoch": 0.9570415199166625, "grad_norm": 5.843105792999268, "learning_rate": 4.6388713978145485e-08, "loss": 0.3012, "step": 19293 }, { "epoch": 0.9570911255518627, "grad_norm": 5.526360511779785, "learning_rate": 4.628180803370685e-08, "loss": 0.2825, "step": 19294 }, { "epoch": 0.9571407311870629, "grad_norm": 3.7871086597442627, "learning_rate": 4.617502484464631e-08, "loss": 0.2062, "step": 19295 }, { "epoch": 0.957190336822263, "grad_norm": 9.481456756591797, "learning_rate": 4.606836441360896e-08, "loss": 0.3489, "step": 19296 }, { "epoch": 0.9572399424574631, "grad_norm": 5.6367506980896, "learning_rate": 4.596182674323879e-08, "loss": 0.2441, "step": 19297 }, { "epoch": 0.9572895480926633, "grad_norm": 8.005295753479004, "learning_rate": 4.585541183617592e-08, "loss": 0.3763, "step": 19298 }, { "epoch": 0.9573391537278635, "grad_norm": 13.179770469665527, "learning_rate": 4.574911969505658e-08, "loss": 0.2153, "step": 19299 }, { "epoch": 0.9573887593630637, "grad_norm": 6.880493640899658, "learning_rate": 4.5642950322514754e-08, "loss": 0.2525, "step": 19300 }, { "epoch": 0.9574383649982638, "grad_norm": 3.58783221244812, "learning_rate": 4.5536903721182246e-08, "loss": 0.2473, "step": 19301 }, { "epoch": 0.9574879706334639, "grad_norm": 11.000690460205078, "learning_rate": 4.543097989368639e-08, "loss": 0.2887, "step": 19302 }, { "epoch": 0.9575375762686641, "grad_norm": 5.172020435333252, "learning_rate": 4.532517884265175e-08, "loss": 0.2464, "step": 19303 }, { "epoch": 0.9575871819038643, "grad_norm": 9.088732719421387, "learning_rate": 4.521950057070068e-08, "loss": 0.3589, "step": 19304 }, { "epoch": 0.9576367875390644, "grad_norm": 3.9603679180145264, "learning_rate": 4.5113945080451635e-08, "loss": 0.1747, "step": 19305 }, { "epoch": 0.9576863931742646, "grad_norm": 10.68432331085205, "learning_rate": 4.500851237451975e-08, "loss": 0.3533, "step": 19306 }, { "epoch": 0.9577359988094648, "grad_norm": 10.954657554626465, "learning_rate": 4.4903202455519046e-08, "loss": 0.3664, "step": 19307 }, { "epoch": 0.9577856044446649, "grad_norm": 5.198391914367676, "learning_rate": 4.479801532605854e-08, "loss": 0.212, "step": 19308 }, { "epoch": 0.957835210079865, "grad_norm": 4.562281608581543, "learning_rate": 4.469295098874449e-08, "loss": 0.1946, "step": 19309 }, { "epoch": 0.9578848157150652, "grad_norm": 5.235253810882568, "learning_rate": 4.458800944618036e-08, "loss": 0.2265, "step": 19310 }, { "epoch": 0.9579344213502654, "grad_norm": 4.404293537139893, "learning_rate": 4.4483190700967406e-08, "loss": 0.1883, "step": 19311 }, { "epoch": 0.9579840269854656, "grad_norm": 7.016033172607422, "learning_rate": 4.4378494755701885e-08, "loss": 0.2763, "step": 19312 }, { "epoch": 0.9580336326206657, "grad_norm": 4.733550548553467, "learning_rate": 4.42739216129795e-08, "loss": 0.2306, "step": 19313 }, { "epoch": 0.9580832382558658, "grad_norm": 5.640267372131348, "learning_rate": 4.4169471275390394e-08, "loss": 0.2913, "step": 19314 }, { "epoch": 0.958132843891066, "grad_norm": 11.934110641479492, "learning_rate": 4.406514374552473e-08, "loss": 0.2653, "step": 19315 }, { "epoch": 0.9581824495262662, "grad_norm": 7.659263610839844, "learning_rate": 4.396093902596599e-08, "loss": 0.3027, "step": 19316 }, { "epoch": 0.9582320551614664, "grad_norm": 5.618750095367432, "learning_rate": 4.385685711929655e-08, "loss": 0.2805, "step": 19317 }, { "epoch": 0.9582816607966665, "grad_norm": 5.5501275062561035, "learning_rate": 4.375289802809657e-08, "loss": 0.2304, "step": 19318 }, { "epoch": 0.9583312664318666, "grad_norm": 6.414637088775635, "learning_rate": 4.3649061754942326e-08, "loss": 0.3108, "step": 19319 }, { "epoch": 0.9583808720670668, "grad_norm": 11.286749839782715, "learning_rate": 4.354534830240509e-08, "loss": 0.2694, "step": 19320 }, { "epoch": 0.958430477702267, "grad_norm": 5.733665943145752, "learning_rate": 4.344175767305725e-08, "loss": 0.2407, "step": 19321 }, { "epoch": 0.9584800833374671, "grad_norm": 10.179603576660156, "learning_rate": 4.333828986946398e-08, "loss": 0.2941, "step": 19322 }, { "epoch": 0.9585296889726673, "grad_norm": 7.85198450088501, "learning_rate": 4.3234944894190446e-08, "loss": 0.2265, "step": 19323 }, { "epoch": 0.9585792946078675, "grad_norm": 12.870015144348145, "learning_rate": 4.313172274979738e-08, "loss": 0.2462, "step": 19324 }, { "epoch": 0.9586289002430676, "grad_norm": 6.034400939941406, "learning_rate": 4.302862343884162e-08, "loss": 0.3002, "step": 19325 }, { "epoch": 0.9586785058782678, "grad_norm": 8.107392311096191, "learning_rate": 4.2925646963879464e-08, "loss": 0.2669, "step": 19326 }, { "epoch": 0.9587281115134679, "grad_norm": 7.6684064865112305, "learning_rate": 4.28227933274622e-08, "loss": 0.2069, "step": 19327 }, { "epoch": 0.9587777171486681, "grad_norm": 11.946084976196289, "learning_rate": 4.272006253213834e-08, "loss": 0.3034, "step": 19328 }, { "epoch": 0.9588273227838683, "grad_norm": 11.638179779052734, "learning_rate": 4.261745458045363e-08, "loss": 0.401, "step": 19329 }, { "epoch": 0.9588769284190684, "grad_norm": 7.02569055557251, "learning_rate": 4.251496947495104e-08, "loss": 0.2012, "step": 19330 }, { "epoch": 0.9589265340542685, "grad_norm": 9.795884132385254, "learning_rate": 4.24126072181702e-08, "loss": 0.275, "step": 19331 }, { "epoch": 0.9589761396894687, "grad_norm": 4.224629878997803, "learning_rate": 4.231036781264686e-08, "loss": 0.2282, "step": 19332 }, { "epoch": 0.9590257453246689, "grad_norm": 8.43086051940918, "learning_rate": 4.2208251260915655e-08, "loss": 0.3854, "step": 19333 }, { "epoch": 0.9590753509598691, "grad_norm": 12.532917022705078, "learning_rate": 4.210625756550679e-08, "loss": 0.301, "step": 19334 }, { "epoch": 0.9591249565950692, "grad_norm": 10.808279037475586, "learning_rate": 4.200438672894769e-08, "loss": 0.3163, "step": 19335 }, { "epoch": 0.9591745622302693, "grad_norm": 14.843757629394531, "learning_rate": 4.1902638753762436e-08, "loss": 0.3526, "step": 19336 }, { "epoch": 0.9592241678654695, "grad_norm": 8.62985897064209, "learning_rate": 4.180101364247235e-08, "loss": 0.213, "step": 19337 }, { "epoch": 0.9592737735006697, "grad_norm": 7.121491432189941, "learning_rate": 4.1699511397595984e-08, "loss": 0.3498, "step": 19338 }, { "epoch": 0.9593233791358698, "grad_norm": 5.342090606689453, "learning_rate": 4.159813202164853e-08, "loss": 0.1466, "step": 19339 }, { "epoch": 0.95937298477107, "grad_norm": 5.127276420593262, "learning_rate": 4.1496875517142434e-08, "loss": 0.2646, "step": 19340 }, { "epoch": 0.9594225904062702, "grad_norm": 8.127408027648926, "learning_rate": 4.1395741886586794e-08, "loss": 0.4032, "step": 19341 }, { "epoch": 0.9594721960414703, "grad_norm": 6.627622604370117, "learning_rate": 4.129473113248794e-08, "loss": 0.2905, "step": 19342 }, { "epoch": 0.9595218016766704, "grad_norm": 4.447325229644775, "learning_rate": 4.119384325734832e-08, "loss": 0.2235, "step": 19343 }, { "epoch": 0.9595714073118706, "grad_norm": 8.033936500549316, "learning_rate": 4.1093078263668705e-08, "loss": 0.2745, "step": 19344 }, { "epoch": 0.9596210129470708, "grad_norm": 4.323497295379639, "learning_rate": 4.099243615394543e-08, "loss": 0.1914, "step": 19345 }, { "epoch": 0.959670618582271, "grad_norm": 3.7608025074005127, "learning_rate": 4.089191693067318e-08, "loss": 0.2669, "step": 19346 }, { "epoch": 0.959720224217471, "grad_norm": 7.821969985961914, "learning_rate": 4.079152059634217e-08, "loss": 0.3049, "step": 19347 }, { "epoch": 0.9597698298526712, "grad_norm": 7.9698662757873535, "learning_rate": 4.0691247153440415e-08, "loss": 0.252, "step": 19348 }, { "epoch": 0.9598194354878714, "grad_norm": 5.914795398712158, "learning_rate": 4.0591096604453154e-08, "loss": 0.2291, "step": 19349 }, { "epoch": 0.9598690411230716, "grad_norm": 6.24425745010376, "learning_rate": 4.049106895186228e-08, "loss": 0.2737, "step": 19350 }, { "epoch": 0.9599186467582718, "grad_norm": 10.559416770935059, "learning_rate": 4.039116419814526e-08, "loss": 0.4462, "step": 19351 }, { "epoch": 0.9599682523934719, "grad_norm": 6.434370040893555, "learning_rate": 4.0291382345779564e-08, "loss": 0.306, "step": 19352 }, { "epoch": 0.960017858028672, "grad_norm": 4.772884368896484, "learning_rate": 4.0191723397236536e-08, "loss": 0.233, "step": 19353 }, { "epoch": 0.9600674636638722, "grad_norm": 12.247215270996094, "learning_rate": 4.009218735498588e-08, "loss": 0.2709, "step": 19354 }, { "epoch": 0.9601170692990724, "grad_norm": 10.799654006958008, "learning_rate": 3.9992774221494504e-08, "loss": 0.3179, "step": 19355 }, { "epoch": 0.9601666749342725, "grad_norm": 11.893582344055176, "learning_rate": 3.989348399922655e-08, "loss": 0.2924, "step": 19356 }, { "epoch": 0.9602162805694727, "grad_norm": 4.691486358642578, "learning_rate": 3.97943166906406e-08, "loss": 0.2994, "step": 19357 }, { "epoch": 0.9602658862046729, "grad_norm": 6.348902702331543, "learning_rate": 3.969527229819636e-08, "loss": 0.2773, "step": 19358 }, { "epoch": 0.960315491839873, "grad_norm": 6.375115871429443, "learning_rate": 3.959635082434632e-08, "loss": 0.2958, "step": 19359 }, { "epoch": 0.9603650974750731, "grad_norm": 8.169628143310547, "learning_rate": 3.949755227154295e-08, "loss": 0.3356, "step": 19360 }, { "epoch": 0.9604147031102733, "grad_norm": 8.63396167755127, "learning_rate": 3.939887664223374e-08, "loss": 0.2962, "step": 19361 }, { "epoch": 0.9604643087454735, "grad_norm": 7.039306163787842, "learning_rate": 3.930032393886507e-08, "loss": 0.2853, "step": 19362 }, { "epoch": 0.9605139143806737, "grad_norm": 8.575034141540527, "learning_rate": 3.9201894163877207e-08, "loss": 0.2793, "step": 19363 }, { "epoch": 0.9605635200158738, "grad_norm": 8.364941596984863, "learning_rate": 3.9103587319711535e-08, "loss": 0.2425, "step": 19364 }, { "epoch": 0.9606131256510739, "grad_norm": 4.872196197509766, "learning_rate": 3.900540340880221e-08, "loss": 0.3473, "step": 19365 }, { "epoch": 0.9606627312862741, "grad_norm": 6.848993301391602, "learning_rate": 3.89073424335834e-08, "loss": 0.269, "step": 19366 }, { "epoch": 0.9607123369214743, "grad_norm": 9.269723892211914, "learning_rate": 3.880940439648484e-08, "loss": 0.2404, "step": 19367 }, { "epoch": 0.9607619425566745, "grad_norm": 8.715354919433594, "learning_rate": 3.871158929993346e-08, "loss": 0.3101, "step": 19368 }, { "epoch": 0.9608115481918746, "grad_norm": 11.861635208129883, "learning_rate": 3.861389714635289e-08, "loss": 0.384, "step": 19369 }, { "epoch": 0.9608611538270747, "grad_norm": 3.6755030155181885, "learning_rate": 3.851632793816451e-08, "loss": 0.2753, "step": 19370 }, { "epoch": 0.9609107594622749, "grad_norm": 7.507654190063477, "learning_rate": 3.841888167778529e-08, "loss": 0.3245, "step": 19371 }, { "epoch": 0.9609603650974751, "grad_norm": 5.249795913696289, "learning_rate": 3.832155836763163e-08, "loss": 0.287, "step": 19372 }, { "epoch": 0.9610099707326752, "grad_norm": 10.903688430786133, "learning_rate": 3.822435801011326e-08, "loss": 0.4006, "step": 19373 }, { "epoch": 0.9610595763678754, "grad_norm": 8.198380470275879, "learning_rate": 3.812728060763993e-08, "loss": 0.3306, "step": 19374 }, { "epoch": 0.9611091820030756, "grad_norm": 5.902804851531982, "learning_rate": 3.803032616261693e-08, "loss": 0.3004, "step": 19375 }, { "epoch": 0.9611587876382757, "grad_norm": 5.568999290466309, "learning_rate": 3.7933494677447334e-08, "loss": 0.2452, "step": 19376 }, { "epoch": 0.9612083932734758, "grad_norm": 5.0466179847717285, "learning_rate": 3.783678615453035e-08, "loss": 0.2701, "step": 19377 }, { "epoch": 0.961257998908676, "grad_norm": 7.957810878753662, "learning_rate": 3.7740200596262375e-08, "loss": 0.2469, "step": 19378 }, { "epoch": 0.9613076045438762, "grad_norm": 7.803343296051025, "learning_rate": 3.764373800503651e-08, "loss": 0.2333, "step": 19379 }, { "epoch": 0.9613572101790764, "grad_norm": 3.861056327819824, "learning_rate": 3.754739838324417e-08, "loss": 0.2047, "step": 19380 }, { "epoch": 0.9614068158142764, "grad_norm": 7.321211814880371, "learning_rate": 3.745118173327178e-08, "loss": 0.2227, "step": 19381 }, { "epoch": 0.9614564214494766, "grad_norm": 13.05591106414795, "learning_rate": 3.735508805750354e-08, "loss": 0.3046, "step": 19382 }, { "epoch": 0.9615060270846768, "grad_norm": 5.381778717041016, "learning_rate": 3.725911735832144e-08, "loss": 0.1991, "step": 19383 }, { "epoch": 0.961555632719877, "grad_norm": 14.832999229431152, "learning_rate": 3.716326963810357e-08, "loss": 0.268, "step": 19384 }, { "epoch": 0.9616052383550772, "grad_norm": 8.326794624328613, "learning_rate": 3.706754489922415e-08, "loss": 0.3417, "step": 19385 }, { "epoch": 0.9616548439902773, "grad_norm": 20.023151397705078, "learning_rate": 3.6971943144056277e-08, "loss": 0.4321, "step": 19386 }, { "epoch": 0.9617044496254774, "grad_norm": 10.119613647460938, "learning_rate": 3.687646437496861e-08, "loss": 0.3565, "step": 19387 }, { "epoch": 0.9617540552606776, "grad_norm": 7.116285800933838, "learning_rate": 3.6781108594327594e-08, "loss": 0.2693, "step": 19388 }, { "epoch": 0.9618036608958778, "grad_norm": 11.240854263305664, "learning_rate": 3.668587580449523e-08, "loss": 0.2505, "step": 19389 }, { "epoch": 0.9618532665310779, "grad_norm": 5.7317094802856445, "learning_rate": 3.65907660078324e-08, "loss": 0.2716, "step": 19390 }, { "epoch": 0.9619028721662781, "grad_norm": 8.668707847595215, "learning_rate": 3.649577920669556e-08, "loss": 0.3061, "step": 19391 }, { "epoch": 0.9619524778014783, "grad_norm": 6.787134170532227, "learning_rate": 3.640091540343838e-08, "loss": 0.343, "step": 19392 }, { "epoch": 0.9620020834366784, "grad_norm": 8.298761367797852, "learning_rate": 3.63061746004123e-08, "loss": 0.3016, "step": 19393 }, { "epoch": 0.9620516890718785, "grad_norm": 6.902164459228516, "learning_rate": 3.62115567999638e-08, "loss": 0.3892, "step": 19394 }, { "epoch": 0.9621012947070787, "grad_norm": 14.626225471496582, "learning_rate": 3.611706200443876e-08, "loss": 0.3178, "step": 19395 }, { "epoch": 0.9621509003422789, "grad_norm": 8.786059379577637, "learning_rate": 3.60226902161781e-08, "loss": 0.323, "step": 19396 }, { "epoch": 0.9622005059774791, "grad_norm": 10.518370628356934, "learning_rate": 3.59284414375205e-08, "loss": 0.3049, "step": 19397 }, { "epoch": 0.9622501116126791, "grad_norm": 4.742584228515625, "learning_rate": 3.5834315670801866e-08, "loss": 0.2402, "step": 19398 }, { "epoch": 0.9622997172478793, "grad_norm": 5.561801433563232, "learning_rate": 3.574031291835478e-08, "loss": 0.1536, "step": 19399 }, { "epoch": 0.9623493228830795, "grad_norm": 13.973397254943848, "learning_rate": 3.564643318250738e-08, "loss": 0.2905, "step": 19400 }, { "epoch": 0.9623989285182797, "grad_norm": 11.813895225524902, "learning_rate": 3.5552676465587796e-08, "loss": 0.3589, "step": 19401 }, { "epoch": 0.9624485341534799, "grad_norm": 4.509530067443848, "learning_rate": 3.5459042769918075e-08, "loss": 0.2312, "step": 19402 }, { "epoch": 0.96249813978868, "grad_norm": 6.407126426696777, "learning_rate": 3.5365532097819676e-08, "loss": 0.2608, "step": 19403 }, { "epoch": 0.9625477454238801, "grad_norm": 7.434360980987549, "learning_rate": 3.527214445160854e-08, "loss": 0.2533, "step": 19404 }, { "epoch": 0.9625973510590803, "grad_norm": 9.009198188781738, "learning_rate": 3.5178879833599466e-08, "loss": 0.2989, "step": 19405 }, { "epoch": 0.9626469566942805, "grad_norm": 5.504766941070557, "learning_rate": 3.508573824610395e-08, "loss": 0.2526, "step": 19406 }, { "epoch": 0.9626965623294806, "grad_norm": 6.606627464294434, "learning_rate": 3.499271969142959e-08, "loss": 0.2546, "step": 19407 }, { "epoch": 0.9627461679646808, "grad_norm": 8.683887481689453, "learning_rate": 3.48998241718812e-08, "loss": 0.3483, "step": 19408 }, { "epoch": 0.962795773599881, "grad_norm": 6.060787677764893, "learning_rate": 3.48070516897614e-08, "loss": 0.2865, "step": 19409 }, { "epoch": 0.9628453792350811, "grad_norm": 11.202096939086914, "learning_rate": 3.471440224736888e-08, "loss": 0.3211, "step": 19410 }, { "epoch": 0.9628949848702812, "grad_norm": 8.740740776062012, "learning_rate": 3.462187584699905e-08, "loss": 0.2667, "step": 19411 }, { "epoch": 0.9629445905054814, "grad_norm": 5.565578937530518, "learning_rate": 3.4529472490945626e-08, "loss": 0.2175, "step": 19412 }, { "epoch": 0.9629941961406816, "grad_norm": 5.149444580078125, "learning_rate": 3.4437192181498436e-08, "loss": 0.3343, "step": 19413 }, { "epoch": 0.9630438017758818, "grad_norm": 10.490564346313477, "learning_rate": 3.434503492094288e-08, "loss": 0.3765, "step": 19414 }, { "epoch": 0.9630934074110818, "grad_norm": 6.296832084655762, "learning_rate": 3.425300071156434e-08, "loss": 0.2893, "step": 19415 }, { "epoch": 0.963143013046282, "grad_norm": 8.662742614746094, "learning_rate": 3.416108955564268e-08, "loss": 0.2985, "step": 19416 }, { "epoch": 0.9631926186814822, "grad_norm": 5.04498815536499, "learning_rate": 3.406930145545495e-08, "loss": 0.247, "step": 19417 }, { "epoch": 0.9632422243166824, "grad_norm": 3.765256404876709, "learning_rate": 3.397763641327656e-08, "loss": 0.1915, "step": 19418 }, { "epoch": 0.9632918299518826, "grad_norm": 10.053552627563477, "learning_rate": 3.388609443137847e-08, "loss": 0.3498, "step": 19419 }, { "epoch": 0.9633414355870827, "grad_norm": 9.685288429260254, "learning_rate": 3.379467551202997e-08, "loss": 0.3931, "step": 19420 }, { "epoch": 0.9633910412222828, "grad_norm": 13.239988327026367, "learning_rate": 3.3703379657495375e-08, "loss": 0.3489, "step": 19421 }, { "epoch": 0.963440646857483, "grad_norm": 11.124909400939941, "learning_rate": 3.36122068700373e-08, "loss": 0.4479, "step": 19422 }, { "epoch": 0.9634902524926832, "grad_norm": 5.253438949584961, "learning_rate": 3.352115715191617e-08, "loss": 0.1369, "step": 19423 }, { "epoch": 0.9635398581278833, "grad_norm": 8.327645301818848, "learning_rate": 3.343023050538685e-08, "loss": 0.3027, "step": 19424 }, { "epoch": 0.9635894637630835, "grad_norm": 17.64602279663086, "learning_rate": 3.333942693270309e-08, "loss": 0.4387, "step": 19425 }, { "epoch": 0.9636390693982837, "grad_norm": 6.107213973999023, "learning_rate": 3.324874643611475e-08, "loss": 0.1793, "step": 19426 }, { "epoch": 0.9636886750334838, "grad_norm": 5.957132339477539, "learning_rate": 3.315818901786949e-08, "loss": 0.2307, "step": 19427 }, { "epoch": 0.9637382806686839, "grad_norm": 5.901559829711914, "learning_rate": 3.3067754680210506e-08, "loss": 0.3356, "step": 19428 }, { "epoch": 0.9637878863038841, "grad_norm": 7.351221084594727, "learning_rate": 3.29774434253799e-08, "loss": 0.3447, "step": 19429 }, { "epoch": 0.9638374919390843, "grad_norm": 7.0664873123168945, "learning_rate": 3.2887255255615314e-08, "loss": 0.268, "step": 19430 }, { "epoch": 0.9638870975742845, "grad_norm": 9.821272850036621, "learning_rate": 3.2797190173151084e-08, "loss": 0.3226, "step": 19431 }, { "epoch": 0.9639367032094845, "grad_norm": 9.0376615524292, "learning_rate": 3.270724818021931e-08, "loss": 0.2716, "step": 19432 }, { "epoch": 0.9639863088446847, "grad_norm": 8.132655143737793, "learning_rate": 3.2617429279048765e-08, "loss": 0.2995, "step": 19433 }, { "epoch": 0.9640359144798849, "grad_norm": 7.896340847015381, "learning_rate": 3.252773347186544e-08, "loss": 0.252, "step": 19434 }, { "epoch": 0.9640855201150851, "grad_norm": 9.79094123840332, "learning_rate": 3.243816076089201e-08, "loss": 0.3018, "step": 19435 }, { "epoch": 0.9641351257502853, "grad_norm": 6.066939830780029, "learning_rate": 3.234871114834781e-08, "loss": 0.2841, "step": 19436 }, { "epoch": 0.9641847313854854, "grad_norm": 9.136524200439453, "learning_rate": 3.225938463644995e-08, "loss": 0.3276, "step": 19437 }, { "epoch": 0.9642343370206855, "grad_norm": 9.546064376831055, "learning_rate": 3.217018122741167e-08, "loss": 0.3477, "step": 19438 }, { "epoch": 0.9642839426558857, "grad_norm": 7.996868133544922, "learning_rate": 3.2081100923442856e-08, "loss": 0.3662, "step": 19439 }, { "epoch": 0.9643335482910859, "grad_norm": 10.006555557250977, "learning_rate": 3.19921437267523e-08, "loss": 0.3427, "step": 19440 }, { "epoch": 0.964383153926286, "grad_norm": 6.786446571350098, "learning_rate": 3.190330963954325e-08, "loss": 0.325, "step": 19441 }, { "epoch": 0.9644327595614862, "grad_norm": 6.260916233062744, "learning_rate": 3.1814598664017816e-08, "loss": 0.2879, "step": 19442 }, { "epoch": 0.9644823651966864, "grad_norm": 5.6446638107299805, "learning_rate": 3.172601080237425e-08, "loss": 0.2738, "step": 19443 }, { "epoch": 0.9645319708318865, "grad_norm": 11.372706413269043, "learning_rate": 3.163754605680691e-08, "loss": 0.4295, "step": 19444 }, { "epoch": 0.9645815764670866, "grad_norm": 4.256937026977539, "learning_rate": 3.154920442950904e-08, "loss": 0.2083, "step": 19445 }, { "epoch": 0.9646311821022868, "grad_norm": 12.26024055480957, "learning_rate": 3.146098592266944e-08, "loss": 0.2725, "step": 19446 }, { "epoch": 0.964680787737487, "grad_norm": 18.47844696044922, "learning_rate": 3.1372890538473585e-08, "loss": 0.3861, "step": 19447 }, { "epoch": 0.9647303933726872, "grad_norm": 7.2914862632751465, "learning_rate": 3.128491827910529e-08, "loss": 0.2174, "step": 19448 }, { "epoch": 0.9647799990078872, "grad_norm": 9.51037883758545, "learning_rate": 3.119706914674447e-08, "loss": 0.371, "step": 19449 }, { "epoch": 0.9648296046430874, "grad_norm": 5.656634330749512, "learning_rate": 3.1109343143568284e-08, "loss": 0.2969, "step": 19450 }, { "epoch": 0.9648792102782876, "grad_norm": 9.057923316955566, "learning_rate": 3.1021740271749426e-08, "loss": 0.3126, "step": 19451 }, { "epoch": 0.9649288159134878, "grad_norm": 7.34610652923584, "learning_rate": 3.093426053346005e-08, "loss": 0.3288, "step": 19452 }, { "epoch": 0.964978421548688, "grad_norm": 6.20582389831543, "learning_rate": 3.084690393086731e-08, "loss": 0.2568, "step": 19453 }, { "epoch": 0.9650280271838881, "grad_norm": 7.818568229675293, "learning_rate": 3.075967046613615e-08, "loss": 0.3108, "step": 19454 }, { "epoch": 0.9650776328190882, "grad_norm": 11.709896087646484, "learning_rate": 3.067256014142816e-08, "loss": 0.3198, "step": 19455 }, { "epoch": 0.9651272384542884, "grad_norm": 4.604019641876221, "learning_rate": 3.0585572958902186e-08, "loss": 0.2662, "step": 19456 }, { "epoch": 0.9651768440894886, "grad_norm": 3.481398105621338, "learning_rate": 3.049870892071316e-08, "loss": 0.2101, "step": 19457 }, { "epoch": 0.9652264497246887, "grad_norm": 4.409202575683594, "learning_rate": 3.0411968029014364e-08, "loss": 0.2628, "step": 19458 }, { "epoch": 0.9652760553598889, "grad_norm": 5.171166896820068, "learning_rate": 3.0325350285954625e-08, "loss": 0.2757, "step": 19459 }, { "epoch": 0.9653256609950891, "grad_norm": 10.025068283081055, "learning_rate": 3.0238855693681676e-08, "loss": 0.343, "step": 19460 }, { "epoch": 0.9653752666302892, "grad_norm": 5.6903181076049805, "learning_rate": 3.015248425433715e-08, "loss": 0.2861, "step": 19461 }, { "epoch": 0.9654248722654893, "grad_norm": 9.764375686645508, "learning_rate": 3.006623597006264e-08, "loss": 0.3829, "step": 19462 }, { "epoch": 0.9654744779006895, "grad_norm": 7.21019983291626, "learning_rate": 2.998011084299424e-08, "loss": 0.3246, "step": 19463 }, { "epoch": 0.9655240835358897, "grad_norm": 6.426811695098877, "learning_rate": 2.9894108875267445e-08, "loss": 0.2939, "step": 19464 }, { "epoch": 0.9655736891710899, "grad_norm": 4.9307146072387695, "learning_rate": 2.980823006901279e-08, "loss": 0.2648, "step": 19465 }, { "epoch": 0.96562329480629, "grad_norm": 19.07172203063965, "learning_rate": 2.9722474426358562e-08, "loss": 0.3113, "step": 19466 }, { "epoch": 0.9656729004414901, "grad_norm": 4.099616050720215, "learning_rate": 2.963684194942973e-08, "loss": 0.2785, "step": 19467 }, { "epoch": 0.9657225060766903, "grad_norm": 4.5842061042785645, "learning_rate": 2.955133264034793e-08, "loss": 0.2322, "step": 19468 }, { "epoch": 0.9657721117118905, "grad_norm": 5.477068901062012, "learning_rate": 2.9465946501232577e-08, "loss": 0.2937, "step": 19469 }, { "epoch": 0.9658217173470907, "grad_norm": 4.624278545379639, "learning_rate": 2.938068353419976e-08, "loss": 0.2488, "step": 19470 }, { "epoch": 0.9658713229822908, "grad_norm": 5.209695816040039, "learning_rate": 2.9295543741361676e-08, "loss": 0.1996, "step": 19471 }, { "epoch": 0.9659209286174909, "grad_norm": 5.650912761688232, "learning_rate": 2.9210527124828858e-08, "loss": 0.2713, "step": 19472 }, { "epoch": 0.9659705342526911, "grad_norm": 5.228856086730957, "learning_rate": 2.91256336867074e-08, "loss": 0.2612, "step": 19473 }, { "epoch": 0.9660201398878913, "grad_norm": 5.4390692710876465, "learning_rate": 2.9040863429101727e-08, "loss": 0.2285, "step": 19474 }, { "epoch": 0.9660697455230914, "grad_norm": 5.659012794494629, "learning_rate": 2.8956216354111832e-08, "loss": 0.2345, "step": 19475 }, { "epoch": 0.9661193511582916, "grad_norm": 8.546870231628418, "learning_rate": 2.8871692463835477e-08, "loss": 0.3292, "step": 19476 }, { "epoch": 0.9661689567934918, "grad_norm": 4.6555495262146, "learning_rate": 2.87872917603671e-08, "loss": 0.257, "step": 19477 }, { "epoch": 0.9662185624286919, "grad_norm": 7.267120361328125, "learning_rate": 2.870301424579891e-08, "loss": 0.3635, "step": 19478 }, { "epoch": 0.966268168063892, "grad_norm": 13.406012535095215, "learning_rate": 2.8618859922218134e-08, "loss": 0.3517, "step": 19479 }, { "epoch": 0.9663177736990922, "grad_norm": 15.427496910095215, "learning_rate": 2.8534828791711434e-08, "loss": 0.3602, "step": 19480 }, { "epoch": 0.9663673793342924, "grad_norm": 5.6393351554870605, "learning_rate": 2.8450920856360476e-08, "loss": 0.2354, "step": 19481 }, { "epoch": 0.9664169849694926, "grad_norm": 10.181633949279785, "learning_rate": 2.8367136118244154e-08, "loss": 0.2808, "step": 19482 }, { "epoch": 0.9664665906046926, "grad_norm": 4.538392066955566, "learning_rate": 2.828347457943914e-08, "loss": 0.2041, "step": 19483 }, { "epoch": 0.9665161962398928, "grad_norm": 12.807490348815918, "learning_rate": 2.8199936242018777e-08, "loss": 0.3908, "step": 19484 }, { "epoch": 0.966565801875093, "grad_norm": 10.195605278015137, "learning_rate": 2.8116521108053074e-08, "loss": 0.4135, "step": 19485 }, { "epoch": 0.9666154075102932, "grad_norm": 7.1659255027771, "learning_rate": 2.803322917960871e-08, "loss": 0.3128, "step": 19486 }, { "epoch": 0.9666650131454934, "grad_norm": 6.06153678894043, "learning_rate": 2.795006045875015e-08, "loss": 0.2086, "step": 19487 }, { "epoch": 0.9667146187806935, "grad_norm": 5.208564758300781, "learning_rate": 2.7867014947538517e-08, "loss": 0.3163, "step": 19488 }, { "epoch": 0.9667642244158936, "grad_norm": 4.922384738922119, "learning_rate": 2.778409264803106e-08, "loss": 0.2364, "step": 19489 }, { "epoch": 0.9668138300510938, "grad_norm": 4.700551986694336, "learning_rate": 2.7701293562282793e-08, "loss": 0.1584, "step": 19490 }, { "epoch": 0.966863435686294, "grad_norm": 5.979387283325195, "learning_rate": 2.761861769234597e-08, "loss": 0.1962, "step": 19491 }, { "epoch": 0.9669130413214941, "grad_norm": 4.855733871459961, "learning_rate": 2.7536065040268956e-08, "loss": 0.3018, "step": 19492 }, { "epoch": 0.9669626469566943, "grad_norm": 6.224314212799072, "learning_rate": 2.745363560809733e-08, "loss": 0.2627, "step": 19493 }, { "epoch": 0.9670122525918945, "grad_norm": 4.472717761993408, "learning_rate": 2.7371329397874457e-08, "loss": 0.2363, "step": 19494 }, { "epoch": 0.9670618582270946, "grad_norm": 7.756635665893555, "learning_rate": 2.7289146411638713e-08, "loss": 0.3512, "step": 19495 }, { "epoch": 0.9671114638622947, "grad_norm": 4.053355693817139, "learning_rate": 2.7207086651427904e-08, "loss": 0.2618, "step": 19496 }, { "epoch": 0.9671610694974949, "grad_norm": 8.934731483459473, "learning_rate": 2.7125150119274856e-08, "loss": 0.3493, "step": 19497 }, { "epoch": 0.9672106751326951, "grad_norm": 7.661153316497803, "learning_rate": 2.704333681720961e-08, "loss": 0.2622, "step": 19498 }, { "epoch": 0.9672602807678953, "grad_norm": 8.549032211303711, "learning_rate": 2.696164674726054e-08, "loss": 0.2503, "step": 19499 }, { "epoch": 0.9673098864030953, "grad_norm": 13.547324180603027, "learning_rate": 2.688007991145103e-08, "loss": 0.4337, "step": 19500 }, { "epoch": 0.9673594920382955, "grad_norm": 7.883616924285889, "learning_rate": 2.6798636311802794e-08, "loss": 0.2768, "step": 19501 }, { "epoch": 0.9674090976734957, "grad_norm": 11.485236167907715, "learning_rate": 2.671731595033422e-08, "loss": 0.2981, "step": 19502 }, { "epoch": 0.9674587033086959, "grad_norm": 7.227161407470703, "learning_rate": 2.663611882905981e-08, "loss": 0.2627, "step": 19503 }, { "epoch": 0.9675083089438961, "grad_norm": 5.334074974060059, "learning_rate": 2.6555044949992392e-08, "loss": 0.2917, "step": 19504 }, { "epoch": 0.9675579145790962, "grad_norm": 8.166852951049805, "learning_rate": 2.647409431514092e-08, "loss": 0.2843, "step": 19505 }, { "epoch": 0.9676075202142963, "grad_norm": 5.656879901885986, "learning_rate": 2.639326692651045e-08, "loss": 0.277, "step": 19506 }, { "epoch": 0.9676571258494965, "grad_norm": 6.616357803344727, "learning_rate": 2.63125627861055e-08, "loss": 0.2346, "step": 19507 }, { "epoch": 0.9677067314846967, "grad_norm": 31.99894142150879, "learning_rate": 2.6231981895924463e-08, "loss": 0.3351, "step": 19508 }, { "epoch": 0.9677563371198968, "grad_norm": 8.171670913696289, "learning_rate": 2.615152425796519e-08, "loss": 0.3776, "step": 19509 }, { "epoch": 0.967805942755097, "grad_norm": 5.013514995574951, "learning_rate": 2.6071189874220527e-08, "loss": 0.2844, "step": 19510 }, { "epoch": 0.9678555483902971, "grad_norm": 9.278793334960938, "learning_rate": 2.5990978746682215e-08, "loss": 0.3531, "step": 19511 }, { "epoch": 0.9679051540254973, "grad_norm": 7.771509170532227, "learning_rate": 2.5910890877337004e-08, "loss": 0.3103, "step": 19512 }, { "epoch": 0.9679547596606974, "grad_norm": 5.527402400970459, "learning_rate": 2.583092626817052e-08, "loss": 0.2252, "step": 19513 }, { "epoch": 0.9680043652958976, "grad_norm": 10.581283569335938, "learning_rate": 2.5751084921163405e-08, "loss": 0.2912, "step": 19514 }, { "epoch": 0.9680539709310978, "grad_norm": 5.735589027404785, "learning_rate": 2.567136683829463e-08, "loss": 0.3197, "step": 19515 }, { "epoch": 0.968103576566298, "grad_norm": 6.465695381164551, "learning_rate": 2.5591772021539285e-08, "loss": 0.2514, "step": 19516 }, { "epoch": 0.968153182201498, "grad_norm": 5.621885299682617, "learning_rate": 2.551230047287023e-08, "loss": 0.2707, "step": 19517 }, { "epoch": 0.9682027878366982, "grad_norm": 7.255664348602295, "learning_rate": 2.5432952194256456e-08, "loss": 0.3344, "step": 19518 }, { "epoch": 0.9682523934718984, "grad_norm": 15.641105651855469, "learning_rate": 2.5353727187664155e-08, "loss": 0.337, "step": 19519 }, { "epoch": 0.9683019991070986, "grad_norm": 3.4933557510375977, "learning_rate": 2.5274625455057323e-08, "loss": 0.2312, "step": 19520 }, { "epoch": 0.9683516047422988, "grad_norm": 13.665709495544434, "learning_rate": 2.5195646998395494e-08, "loss": 0.2928, "step": 19521 }, { "epoch": 0.9684012103774989, "grad_norm": 5.099363803863525, "learning_rate": 2.5116791819635444e-08, "loss": 0.2537, "step": 19522 }, { "epoch": 0.968450816012699, "grad_norm": 4.568922996520996, "learning_rate": 2.5038059920731717e-08, "loss": 0.2554, "step": 19523 }, { "epoch": 0.9685004216478992, "grad_norm": 7.724748611450195, "learning_rate": 2.495945130363553e-08, "loss": 0.2989, "step": 19524 }, { "epoch": 0.9685500272830994, "grad_norm": 8.50871753692627, "learning_rate": 2.4880965970294768e-08, "loss": 0.3128, "step": 19525 }, { "epoch": 0.9685996329182995, "grad_norm": 4.860337257385254, "learning_rate": 2.480260392265399e-08, "loss": 0.2232, "step": 19526 }, { "epoch": 0.9686492385534997, "grad_norm": 7.302404403686523, "learning_rate": 2.4724365162654973e-08, "loss": 0.3436, "step": 19527 }, { "epoch": 0.9686988441886998, "grad_norm": 10.057880401611328, "learning_rate": 2.4646249692236724e-08, "loss": 0.4608, "step": 19528 }, { "epoch": 0.9687484498239, "grad_norm": 11.445586204528809, "learning_rate": 2.4568257513335468e-08, "loss": 0.3403, "step": 19529 }, { "epoch": 0.9687980554591001, "grad_norm": 7.301465034484863, "learning_rate": 2.4490388627883e-08, "loss": 0.3, "step": 19530 }, { "epoch": 0.9688476610943003, "grad_norm": 8.38611888885498, "learning_rate": 2.4412643037809437e-08, "loss": 0.2493, "step": 19531 }, { "epoch": 0.9688972667295005, "grad_norm": 4.713995933532715, "learning_rate": 2.433502074504157e-08, "loss": 0.1999, "step": 19532 }, { "epoch": 0.9689468723647007, "grad_norm": 3.066002607345581, "learning_rate": 2.4257521751502312e-08, "loss": 0.1524, "step": 19533 }, { "epoch": 0.9689964779999007, "grad_norm": 9.846894264221191, "learning_rate": 2.418014605911234e-08, "loss": 0.2796, "step": 19534 }, { "epoch": 0.9690460836351009, "grad_norm": 13.380441665649414, "learning_rate": 2.4102893669789572e-08, "loss": 0.3571, "step": 19535 }, { "epoch": 0.9690956892703011, "grad_norm": 4.493326663970947, "learning_rate": 2.4025764585447475e-08, "loss": 0.2777, "step": 19536 }, { "epoch": 0.9691452949055013, "grad_norm": 10.133124351501465, "learning_rate": 2.3948758807998407e-08, "loss": 0.4051, "step": 19537 }, { "epoch": 0.9691949005407015, "grad_norm": 5.607681751251221, "learning_rate": 2.3871876339349732e-08, "loss": 0.2656, "step": 19538 }, { "epoch": 0.9692445061759016, "grad_norm": 9.250421524047852, "learning_rate": 2.379511718140659e-08, "loss": 0.3407, "step": 19539 }, { "epoch": 0.9692941118111017, "grad_norm": 16.249267578125, "learning_rate": 2.3718481336071908e-08, "loss": 0.3649, "step": 19540 }, { "epoch": 0.9693437174463019, "grad_norm": 5.2055792808532715, "learning_rate": 2.3641968805244164e-08, "loss": 0.3248, "step": 19541 }, { "epoch": 0.9693933230815021, "grad_norm": 6.791497707366943, "learning_rate": 2.3565579590819065e-08, "loss": 0.3044, "step": 19542 }, { "epoch": 0.9694429287167022, "grad_norm": 6.583644390106201, "learning_rate": 2.348931369469065e-08, "loss": 0.3181, "step": 19543 }, { "epoch": 0.9694925343519024, "grad_norm": 11.977058410644531, "learning_rate": 2.3413171118747413e-08, "loss": 0.3246, "step": 19544 }, { "epoch": 0.9695421399871025, "grad_norm": 7.877555847167969, "learning_rate": 2.3337151864877838e-08, "loss": 0.3098, "step": 19545 }, { "epoch": 0.9695917456223027, "grad_norm": 6.423076629638672, "learning_rate": 2.3261255934964312e-08, "loss": 0.2021, "step": 19546 }, { "epoch": 0.9696413512575028, "grad_norm": 4.1795549392700195, "learning_rate": 2.3185483330888104e-08, "loss": 0.2856, "step": 19547 }, { "epoch": 0.969690956892703, "grad_norm": 5.060952186584473, "learning_rate": 2.3109834054526604e-08, "loss": 0.3359, "step": 19548 }, { "epoch": 0.9697405625279032, "grad_norm": 5.240940093994141, "learning_rate": 2.3034308107754977e-08, "loss": 0.1866, "step": 19549 }, { "epoch": 0.9697901681631034, "grad_norm": 6.789581298828125, "learning_rate": 2.2958905492444504e-08, "loss": 0.195, "step": 19550 }, { "epoch": 0.9698397737983034, "grad_norm": 7.525742530822754, "learning_rate": 2.288362621046425e-08, "loss": 0.2933, "step": 19551 }, { "epoch": 0.9698893794335036, "grad_norm": 6.752322196960449, "learning_rate": 2.2808470263678272e-08, "loss": 0.3073, "step": 19552 }, { "epoch": 0.9699389850687038, "grad_norm": 7.06156587600708, "learning_rate": 2.273343765395064e-08, "loss": 0.308, "step": 19553 }, { "epoch": 0.969988590703904, "grad_norm": 7.3552093505859375, "learning_rate": 2.265852838313931e-08, "loss": 0.2584, "step": 19554 }, { "epoch": 0.9700381963391042, "grad_norm": 6.95412015914917, "learning_rate": 2.258374245310113e-08, "loss": 0.3365, "step": 19555 }, { "epoch": 0.9700878019743043, "grad_norm": 6.1888813972473145, "learning_rate": 2.250907986568962e-08, "loss": 0.2674, "step": 19556 }, { "epoch": 0.9701374076095044, "grad_norm": 7.1697587966918945, "learning_rate": 2.243454062275441e-08, "loss": 0.2899, "step": 19557 }, { "epoch": 0.9701870132447046, "grad_norm": 7.555652141571045, "learning_rate": 2.2360124726142906e-08, "loss": 0.3103, "step": 19558 }, { "epoch": 0.9702366188799048, "grad_norm": 5.704811096191406, "learning_rate": 2.2285832177699195e-08, "loss": 0.2643, "step": 19559 }, { "epoch": 0.9702862245151049, "grad_norm": 10.546355247497559, "learning_rate": 2.221166297926458e-08, "loss": 0.2458, "step": 19560 }, { "epoch": 0.9703358301503051, "grad_norm": 7.927308559417725, "learning_rate": 2.213761713267648e-08, "loss": 0.3527, "step": 19561 }, { "epoch": 0.9703854357855052, "grad_norm": 4.314023017883301, "learning_rate": 2.206369463976954e-08, "loss": 0.1973, "step": 19562 }, { "epoch": 0.9704350414207054, "grad_norm": 6.919195652008057, "learning_rate": 2.198989550237618e-08, "loss": 0.2875, "step": 19563 }, { "epoch": 0.9704846470559055, "grad_norm": 5.60263729095459, "learning_rate": 2.1916219722325494e-08, "loss": 0.2129, "step": 19564 }, { "epoch": 0.9705342526911057, "grad_norm": 5.7593092918396, "learning_rate": 2.1842667301442134e-08, "loss": 0.3062, "step": 19565 }, { "epoch": 0.9705838583263059, "grad_norm": 23.767168045043945, "learning_rate": 2.176923824154964e-08, "loss": 0.1967, "step": 19566 }, { "epoch": 0.9706334639615061, "grad_norm": 7.235864639282227, "learning_rate": 2.1695932544467113e-08, "loss": 0.2714, "step": 19567 }, { "epoch": 0.9706830695967061, "grad_norm": 5.887807369232178, "learning_rate": 2.1622750212011434e-08, "loss": 0.2379, "step": 19568 }, { "epoch": 0.9707326752319063, "grad_norm": 10.568436622619629, "learning_rate": 2.154969124599615e-08, "loss": 0.3317, "step": 19569 }, { "epoch": 0.9707822808671065, "grad_norm": 9.512401580810547, "learning_rate": 2.1476755648231485e-08, "loss": 0.3123, "step": 19570 }, { "epoch": 0.9708318865023067, "grad_norm": 3.7152204513549805, "learning_rate": 2.140394342052432e-08, "loss": 0.231, "step": 19571 }, { "epoch": 0.9708814921375069, "grad_norm": 5.4635090827941895, "learning_rate": 2.1331254564680436e-08, "loss": 0.2794, "step": 19572 }, { "epoch": 0.970931097772707, "grad_norm": 11.189817428588867, "learning_rate": 2.1258689082499506e-08, "loss": 0.2703, "step": 19573 }, { "epoch": 0.9709807034079071, "grad_norm": 10.815755844116211, "learning_rate": 2.1186246975780646e-08, "loss": 0.4009, "step": 19574 }, { "epoch": 0.9710303090431073, "grad_norm": 4.977697372436523, "learning_rate": 2.1113928246318528e-08, "loss": 0.2367, "step": 19575 }, { "epoch": 0.9710799146783075, "grad_norm": 7.777148723602295, "learning_rate": 2.1041732895905608e-08, "loss": 0.3616, "step": 19576 }, { "epoch": 0.9711295203135076, "grad_norm": 18.911460876464844, "learning_rate": 2.096966092633046e-08, "loss": 0.4352, "step": 19577 }, { "epoch": 0.9711791259487078, "grad_norm": 6.413588523864746, "learning_rate": 2.0897712339379983e-08, "loss": 0.3011, "step": 19578 }, { "epoch": 0.9712287315839079, "grad_norm": 6.629293441772461, "learning_rate": 2.0825887136835533e-08, "loss": 0.2108, "step": 19579 }, { "epoch": 0.9712783372191081, "grad_norm": 11.838496208190918, "learning_rate": 2.0754185320479013e-08, "loss": 0.3514, "step": 19580 }, { "epoch": 0.9713279428543082, "grad_norm": 8.124545097351074, "learning_rate": 2.0682606892085122e-08, "loss": 0.2382, "step": 19581 }, { "epoch": 0.9713775484895084, "grad_norm": 16.432275772094727, "learning_rate": 2.0611151853429658e-08, "loss": 0.2909, "step": 19582 }, { "epoch": 0.9714271541247086, "grad_norm": 8.223026275634766, "learning_rate": 2.0539820206281758e-08, "loss": 0.3691, "step": 19583 }, { "epoch": 0.9714767597599088, "grad_norm": 6.90492582321167, "learning_rate": 2.0468611952409455e-08, "loss": 0.2281, "step": 19584 }, { "epoch": 0.9715263653951088, "grad_norm": 7.973618984222412, "learning_rate": 2.039752709357745e-08, "loss": 0.3126, "step": 19585 }, { "epoch": 0.971575971030309, "grad_norm": 4.866107940673828, "learning_rate": 2.0326565631547667e-08, "loss": 0.181, "step": 19586 }, { "epoch": 0.9716255766655092, "grad_norm": 8.629890441894531, "learning_rate": 2.0255727568077587e-08, "loss": 0.291, "step": 19587 }, { "epoch": 0.9716751823007094, "grad_norm": 6.895249366760254, "learning_rate": 2.0185012904923585e-08, "loss": 0.2862, "step": 19588 }, { "epoch": 0.9717247879359096, "grad_norm": 4.764701843261719, "learning_rate": 2.0114421643837035e-08, "loss": 0.3414, "step": 19589 }, { "epoch": 0.9717743935711097, "grad_norm": 6.766246795654297, "learning_rate": 2.0043953786568204e-08, "loss": 0.3023, "step": 19590 }, { "epoch": 0.9718239992063098, "grad_norm": 6.673396110534668, "learning_rate": 1.997360933486292e-08, "loss": 0.3273, "step": 19591 }, { "epoch": 0.97187360484151, "grad_norm": 14.290046691894531, "learning_rate": 1.990338829046423e-08, "loss": 0.3507, "step": 19592 }, { "epoch": 0.9719232104767102, "grad_norm": 5.807436943054199, "learning_rate": 1.9833290655111858e-08, "loss": 0.273, "step": 19593 }, { "epoch": 0.9719728161119103, "grad_norm": 8.639464378356934, "learning_rate": 1.9763316430543854e-08, "loss": 0.3192, "step": 19594 }, { "epoch": 0.9720224217471105, "grad_norm": 5.162454128265381, "learning_rate": 1.9693465618493835e-08, "loss": 0.2473, "step": 19595 }, { "epoch": 0.9720720273823106, "grad_norm": 7.5679612159729, "learning_rate": 1.962373822069208e-08, "loss": 0.197, "step": 19596 }, { "epoch": 0.9721216330175108, "grad_norm": 7.043304443359375, "learning_rate": 1.9554134238867206e-08, "loss": 0.3035, "step": 19597 }, { "epoch": 0.9721712386527109, "grad_norm": 5.1643967628479, "learning_rate": 1.948465367474395e-08, "loss": 0.3078, "step": 19598 }, { "epoch": 0.9722208442879111, "grad_norm": 6.794783115386963, "learning_rate": 1.9415296530043705e-08, "loss": 0.2986, "step": 19599 }, { "epoch": 0.9722704499231113, "grad_norm": 6.052535533905029, "learning_rate": 1.93460628064851e-08, "loss": 0.2332, "step": 19600 }, { "epoch": 0.9723200555583115, "grad_norm": 7.335382461547852, "learning_rate": 1.927695250578454e-08, "loss": 0.3229, "step": 19601 }, { "epoch": 0.9723696611935115, "grad_norm": 10.96066951751709, "learning_rate": 1.920796562965399e-08, "loss": 0.3806, "step": 19602 }, { "epoch": 0.9724192668287117, "grad_norm": 7.234040260314941, "learning_rate": 1.9139102179802638e-08, "loss": 0.2575, "step": 19603 }, { "epoch": 0.9724688724639119, "grad_norm": 8.02143383026123, "learning_rate": 1.9070362157938006e-08, "loss": 0.2596, "step": 19604 }, { "epoch": 0.9725184780991121, "grad_norm": 12.89395523071289, "learning_rate": 1.900174556576262e-08, "loss": 0.369, "step": 19605 }, { "epoch": 0.9725680837343122, "grad_norm": 8.367192268371582, "learning_rate": 1.8933252404977344e-08, "loss": 0.308, "step": 19606 }, { "epoch": 0.9726176893695124, "grad_norm": 4.803760528564453, "learning_rate": 1.886488267727915e-08, "loss": 0.292, "step": 19607 }, { "epoch": 0.9726672950047125, "grad_norm": 3.421889305114746, "learning_rate": 1.87966363843628e-08, "loss": 0.2126, "step": 19608 }, { "epoch": 0.9727169006399127, "grad_norm": 4.2533745765686035, "learning_rate": 1.87285135279186e-08, "loss": 0.2044, "step": 19609 }, { "epoch": 0.9727665062751129, "grad_norm": 7.237636089324951, "learning_rate": 1.8660514109634652e-08, "loss": 0.3434, "step": 19610 }, { "epoch": 0.972816111910313, "grad_norm": 5.544144153594971, "learning_rate": 1.8592638131197383e-08, "loss": 0.3163, "step": 19611 }, { "epoch": 0.9728657175455132, "grad_norm": 12.644244194030762, "learning_rate": 1.8524885594287112e-08, "loss": 0.399, "step": 19612 }, { "epoch": 0.9729153231807133, "grad_norm": 11.361942291259766, "learning_rate": 1.8457256500584163e-08, "loss": 0.2909, "step": 19613 }, { "epoch": 0.9729649288159135, "grad_norm": 7.068279266357422, "learning_rate": 1.838975085176331e-08, "loss": 0.3037, "step": 19614 }, { "epoch": 0.9730145344511136, "grad_norm": 4.115576267242432, "learning_rate": 1.8322368649497656e-08, "loss": 0.2617, "step": 19615 }, { "epoch": 0.9730641400863138, "grad_norm": 4.894372463226318, "learning_rate": 1.825510989545809e-08, "loss": 0.2077, "step": 19616 }, { "epoch": 0.973113745721514, "grad_norm": 6.122842788696289, "learning_rate": 1.8187974591309944e-08, "loss": 0.3221, "step": 19617 }, { "epoch": 0.9731633513567142, "grad_norm": 7.282337188720703, "learning_rate": 1.8120962738716884e-08, "loss": 0.2089, "step": 19618 }, { "epoch": 0.9732129569919142, "grad_norm": 25.22010612487793, "learning_rate": 1.8054074339340365e-08, "loss": 0.3479, "step": 19619 }, { "epoch": 0.9732625626271144, "grad_norm": 4.79408073425293, "learning_rate": 1.7987309394837394e-08, "loss": 0.286, "step": 19620 }, { "epoch": 0.9733121682623146, "grad_norm": 5.614243984222412, "learning_rate": 1.7920667906862198e-08, "loss": 0.3071, "step": 19621 }, { "epoch": 0.9733617738975148, "grad_norm": 11.581028938293457, "learning_rate": 1.7854149877066797e-08, "loss": 0.2941, "step": 19622 }, { "epoch": 0.973411379532715, "grad_norm": 6.650128364562988, "learning_rate": 1.7787755307099307e-08, "loss": 0.2719, "step": 19623 }, { "epoch": 0.9734609851679151, "grad_norm": 7.525557994842529, "learning_rate": 1.7721484198604532e-08, "loss": 0.3102, "step": 19624 }, { "epoch": 0.9735105908031152, "grad_norm": 5.562171459197998, "learning_rate": 1.76553365532256e-08, "loss": 0.2407, "step": 19625 }, { "epoch": 0.9735601964383154, "grad_norm": 6.773600101470947, "learning_rate": 1.7589312372601197e-08, "loss": 0.2425, "step": 19626 }, { "epoch": 0.9736098020735156, "grad_norm": 8.019652366638184, "learning_rate": 1.752341165836724e-08, "loss": 0.3183, "step": 19627 }, { "epoch": 0.9736594077087157, "grad_norm": 6.061839580535889, "learning_rate": 1.7457634412156864e-08, "loss": 0.2886, "step": 19628 }, { "epoch": 0.9737090133439159, "grad_norm": 7.04910945892334, "learning_rate": 1.7391980635600436e-08, "loss": 0.1589, "step": 19629 }, { "epoch": 0.973758618979116, "grad_norm": 11.386894226074219, "learning_rate": 1.732645033032443e-08, "loss": 0.3139, "step": 19630 }, { "epoch": 0.9738082246143162, "grad_norm": 4.947815895080566, "learning_rate": 1.7261043497952546e-08, "loss": 0.2728, "step": 19631 }, { "epoch": 0.9738578302495163, "grad_norm": 4.833279609680176, "learning_rate": 1.7195760140106264e-08, "loss": 0.1977, "step": 19632 }, { "epoch": 0.9739074358847165, "grad_norm": 8.27418327331543, "learning_rate": 1.7130600258403184e-08, "loss": 0.3322, "step": 19633 }, { "epoch": 0.9739570415199167, "grad_norm": 5.411259651184082, "learning_rate": 1.7065563854457567e-08, "loss": 0.1582, "step": 19634 }, { "epoch": 0.9740066471551169, "grad_norm": 11.678939819335938, "learning_rate": 1.7000650929881457e-08, "loss": 0.2863, "step": 19635 }, { "epoch": 0.9740562527903169, "grad_norm": 7.0607008934021, "learning_rate": 1.693586148628301e-08, "loss": 0.2897, "step": 19636 }, { "epoch": 0.9741058584255171, "grad_norm": 4.378319263458252, "learning_rate": 1.6871195525268724e-08, "loss": 0.2573, "step": 19637 }, { "epoch": 0.9741554640607173, "grad_norm": 5.581948280334473, "learning_rate": 1.6806653048439536e-08, "loss": 0.2279, "step": 19638 }, { "epoch": 0.9742050696959175, "grad_norm": 6.299326419830322, "learning_rate": 1.6742234057395833e-08, "loss": 0.2491, "step": 19639 }, { "epoch": 0.9742546753311176, "grad_norm": 3.6590754985809326, "learning_rate": 1.667793855373412e-08, "loss": 0.2424, "step": 19640 }, { "epoch": 0.9743042809663178, "grad_norm": 5.1327619552612305, "learning_rate": 1.661376653904645e-08, "loss": 0.2446, "step": 19641 }, { "epoch": 0.9743538866015179, "grad_norm": 5.648082733154297, "learning_rate": 1.6549718014924888e-08, "loss": 0.2261, "step": 19642 }, { "epoch": 0.9744034922367181, "grad_norm": 11.2343168258667, "learning_rate": 1.6485792982954828e-08, "loss": 0.3155, "step": 19643 }, { "epoch": 0.9744530978719183, "grad_norm": 8.352288246154785, "learning_rate": 1.642199144472112e-08, "loss": 0.321, "step": 19644 }, { "epoch": 0.9745027035071184, "grad_norm": 13.710844039916992, "learning_rate": 1.635831340180527e-08, "loss": 0.3772, "step": 19645 }, { "epoch": 0.9745523091423186, "grad_norm": 7.978039741516113, "learning_rate": 1.629475885578491e-08, "loss": 0.3339, "step": 19646 }, { "epoch": 0.9746019147775187, "grad_norm": 7.717109680175781, "learning_rate": 1.6231327808234332e-08, "loss": 0.2891, "step": 19647 }, { "epoch": 0.9746515204127189, "grad_norm": 8.865334510803223, "learning_rate": 1.6168020260725615e-08, "loss": 0.2592, "step": 19648 }, { "epoch": 0.974701126047919, "grad_norm": 18.988971710205078, "learning_rate": 1.610483621482861e-08, "loss": 0.3991, "step": 19649 }, { "epoch": 0.9747507316831192, "grad_norm": 6.5302534103393555, "learning_rate": 1.6041775672107075e-08, "loss": 0.2992, "step": 19650 }, { "epoch": 0.9748003373183194, "grad_norm": 7.242983818054199, "learning_rate": 1.597883863412586e-08, "loss": 0.2184, "step": 19651 }, { "epoch": 0.9748499429535196, "grad_norm": 8.795523643493652, "learning_rate": 1.5916025102443165e-08, "loss": 0.3045, "step": 19652 }, { "epoch": 0.9748995485887196, "grad_norm": 4.350183963775635, "learning_rate": 1.5853335078615527e-08, "loss": 0.1586, "step": 19653 }, { "epoch": 0.9749491542239198, "grad_norm": 7.146099090576172, "learning_rate": 1.5790768564197255e-08, "loss": 0.2582, "step": 19654 }, { "epoch": 0.97499875985912, "grad_norm": 7.869629383087158, "learning_rate": 1.572832556073822e-08, "loss": 0.32, "step": 19655 }, { "epoch": 0.9750483654943202, "grad_norm": 5.174501419067383, "learning_rate": 1.5666006069786076e-08, "loss": 0.2622, "step": 19656 }, { "epoch": 0.9750979711295203, "grad_norm": 7.621548175811768, "learning_rate": 1.560381009288514e-08, "loss": 0.3715, "step": 19657 }, { "epoch": 0.9751475767647205, "grad_norm": 15.930977821350098, "learning_rate": 1.554173763157585e-08, "loss": 0.2481, "step": 19658 }, { "epoch": 0.9751971823999206, "grad_norm": 11.289217948913574, "learning_rate": 1.5479788687397524e-08, "loss": 0.3456, "step": 19659 }, { "epoch": 0.9752467880351208, "grad_norm": 3.325226068496704, "learning_rate": 1.5417963261884493e-08, "loss": 0.2098, "step": 19660 }, { "epoch": 0.975296393670321, "grad_norm": 4.928473949432373, "learning_rate": 1.5356261356569424e-08, "loss": 0.2929, "step": 19661 }, { "epoch": 0.9753459993055211, "grad_norm": 5.6874284744262695, "learning_rate": 1.529468297298109e-08, "loss": 0.1982, "step": 19662 }, { "epoch": 0.9753956049407213, "grad_norm": 7.75847053527832, "learning_rate": 1.5233228112644938e-08, "loss": 0.3509, "step": 19663 }, { "epoch": 0.9754452105759214, "grad_norm": 4.4624762535095215, "learning_rate": 1.5171896777084194e-08, "loss": 0.2186, "step": 19664 }, { "epoch": 0.9754948162111216, "grad_norm": 10.168261528015137, "learning_rate": 1.5110688967818756e-08, "loss": 0.4026, "step": 19665 }, { "epoch": 0.9755444218463217, "grad_norm": 5.203671932220459, "learning_rate": 1.5049604686365738e-08, "loss": 0.3122, "step": 19666 }, { "epoch": 0.9755940274815219, "grad_norm": 4.256291389465332, "learning_rate": 1.498864393423838e-08, "loss": 0.2594, "step": 19667 }, { "epoch": 0.9756436331167221, "grad_norm": 8.373602867126465, "learning_rate": 1.4927806712947136e-08, "loss": 0.2747, "step": 19668 }, { "epoch": 0.9756932387519223, "grad_norm": 4.9219651222229, "learning_rate": 1.486709302399969e-08, "loss": 0.2192, "step": 19669 }, { "epoch": 0.9757428443871223, "grad_norm": 6.9345479011535645, "learning_rate": 1.4806502868900952e-08, "loss": 0.3261, "step": 19670 }, { "epoch": 0.9757924500223225, "grad_norm": 18.80384063720703, "learning_rate": 1.474603624915194e-08, "loss": 0.5177, "step": 19671 }, { "epoch": 0.9758420556575227, "grad_norm": 4.388735294342041, "learning_rate": 1.4685693166251458e-08, "loss": 0.2457, "step": 19672 }, { "epoch": 0.9758916612927229, "grad_norm": 5.430013179779053, "learning_rate": 1.4625473621694419e-08, "loss": 0.2739, "step": 19673 }, { "epoch": 0.975941266927923, "grad_norm": 13.35968017578125, "learning_rate": 1.4565377616974075e-08, "loss": 0.2548, "step": 19674 }, { "epoch": 0.9759908725631232, "grad_norm": 4.406520843505859, "learning_rate": 1.4505405153577567e-08, "loss": 0.2669, "step": 19675 }, { "epoch": 0.9760404781983233, "grad_norm": 7.06452751159668, "learning_rate": 1.4445556232993152e-08, "loss": 0.282, "step": 19676 }, { "epoch": 0.9760900838335235, "grad_norm": 6.3155083656311035, "learning_rate": 1.4385830856702976e-08, "loss": 0.2063, "step": 19677 }, { "epoch": 0.9761396894687236, "grad_norm": 7.43228006362915, "learning_rate": 1.4326229026186967e-08, "loss": 0.2517, "step": 19678 }, { "epoch": 0.9761892951039238, "grad_norm": 6.345419406890869, "learning_rate": 1.4266750742922276e-08, "loss": 0.3354, "step": 19679 }, { "epoch": 0.976238900739124, "grad_norm": 6.141888618469238, "learning_rate": 1.4207396008382723e-08, "loss": 0.2842, "step": 19680 }, { "epoch": 0.9762885063743241, "grad_norm": 7.733271598815918, "learning_rate": 1.4148164824039356e-08, "loss": 0.3285, "step": 19681 }, { "epoch": 0.9763381120095243, "grad_norm": 4.509158611297607, "learning_rate": 1.408905719135989e-08, "loss": 0.1901, "step": 19682 }, { "epoch": 0.9763877176447244, "grad_norm": 13.809399604797363, "learning_rate": 1.4030073111808707e-08, "loss": 0.3697, "step": 19683 }, { "epoch": 0.9764373232799246, "grad_norm": 6.296395778656006, "learning_rate": 1.3971212586847416e-08, "loss": 0.3082, "step": 19684 }, { "epoch": 0.9764869289151248, "grad_norm": 6.537017822265625, "learning_rate": 1.3912475617935407e-08, "loss": 0.2973, "step": 19685 }, { "epoch": 0.976536534550325, "grad_norm": 8.120450973510742, "learning_rate": 1.3853862206527623e-08, "loss": 0.2597, "step": 19686 }, { "epoch": 0.976586140185525, "grad_norm": 8.352367401123047, "learning_rate": 1.3795372354076797e-08, "loss": 0.2817, "step": 19687 }, { "epoch": 0.9766357458207252, "grad_norm": 8.063251495361328, "learning_rate": 1.3737006062031766e-08, "loss": 0.1845, "step": 19688 }, { "epoch": 0.9766853514559254, "grad_norm": 11.606021881103516, "learning_rate": 1.3678763331839151e-08, "loss": 0.3928, "step": 19689 }, { "epoch": 0.9767349570911256, "grad_norm": 8.632596969604492, "learning_rate": 1.3620644164942798e-08, "loss": 0.263, "step": 19690 }, { "epoch": 0.9767845627263257, "grad_norm": 7.852653980255127, "learning_rate": 1.3562648562782665e-08, "loss": 0.3343, "step": 19691 }, { "epoch": 0.9768341683615259, "grad_norm": 4.010926723480225, "learning_rate": 1.3504776526795382e-08, "loss": 0.2067, "step": 19692 }, { "epoch": 0.976883773996726, "grad_norm": 6.856979846954346, "learning_rate": 1.3447028058415356e-08, "loss": 0.2325, "step": 19693 }, { "epoch": 0.9769333796319262, "grad_norm": 7.87985372543335, "learning_rate": 1.3389403159073665e-08, "loss": 0.256, "step": 19694 }, { "epoch": 0.9769829852671263, "grad_norm": 5.416036605834961, "learning_rate": 1.3331901830198058e-08, "loss": 0.2353, "step": 19695 }, { "epoch": 0.9770325909023265, "grad_norm": 4.440287113189697, "learning_rate": 1.3274524073213501e-08, "loss": 0.2778, "step": 19696 }, { "epoch": 0.9770821965375267, "grad_norm": 4.682826519012451, "learning_rate": 1.3217269889542194e-08, "loss": 0.2611, "step": 19697 }, { "epoch": 0.9771318021727268, "grad_norm": 4.384463310241699, "learning_rate": 1.3160139280603002e-08, "loss": 0.1195, "step": 19698 }, { "epoch": 0.977181407807927, "grad_norm": 11.038570404052734, "learning_rate": 1.3103132247810901e-08, "loss": 0.3889, "step": 19699 }, { "epoch": 0.9772310134431271, "grad_norm": 5.891327381134033, "learning_rate": 1.3046248792579763e-08, "loss": 0.2813, "step": 19700 }, { "epoch": 0.9772806190783273, "grad_norm": 5.775862693786621, "learning_rate": 1.298948891631735e-08, "loss": 0.2739, "step": 19701 }, { "epoch": 0.9773302247135275, "grad_norm": 6.991873264312744, "learning_rate": 1.293285262043198e-08, "loss": 0.2631, "step": 19702 }, { "epoch": 0.9773798303487277, "grad_norm": 7.741660118103027, "learning_rate": 1.2876339906326418e-08, "loss": 0.3137, "step": 19703 }, { "epoch": 0.9774294359839277, "grad_norm": 5.080777168273926, "learning_rate": 1.2819950775400659e-08, "loss": 0.2729, "step": 19704 }, { "epoch": 0.9774790416191279, "grad_norm": 6.612114429473877, "learning_rate": 1.2763685229053025e-08, "loss": 0.3319, "step": 19705 }, { "epoch": 0.9775286472543281, "grad_norm": 7.000090599060059, "learning_rate": 1.2707543268676848e-08, "loss": 0.2445, "step": 19706 }, { "epoch": 0.9775782528895283, "grad_norm": 5.032207489013672, "learning_rate": 1.265152489566379e-08, "loss": 0.2836, "step": 19707 }, { "epoch": 0.9776278585247284, "grad_norm": 7.05936336517334, "learning_rate": 1.259563011140219e-08, "loss": 0.3202, "step": 19708 }, { "epoch": 0.9776774641599286, "grad_norm": 6.465931415557861, "learning_rate": 1.2539858917276492e-08, "loss": 0.1913, "step": 19709 }, { "epoch": 0.9777270697951287, "grad_norm": 6.159189701080322, "learning_rate": 1.2484211314669481e-08, "loss": 0.2115, "step": 19710 }, { "epoch": 0.9777766754303289, "grad_norm": 4.538697719573975, "learning_rate": 1.2428687304959497e-08, "loss": 0.2559, "step": 19711 }, { "epoch": 0.977826281065529, "grad_norm": 7.98756217956543, "learning_rate": 1.2373286889523218e-08, "loss": 0.2817, "step": 19712 }, { "epoch": 0.9778758867007292, "grad_norm": 11.603503227233887, "learning_rate": 1.2318010069732877e-08, "loss": 0.2863, "step": 19713 }, { "epoch": 0.9779254923359294, "grad_norm": 8.642760276794434, "learning_rate": 1.2262856846958494e-08, "loss": 0.2787, "step": 19714 }, { "epoch": 0.9779750979711295, "grad_norm": 6.549655914306641, "learning_rate": 1.2207827222566194e-08, "loss": 0.2001, "step": 19715 }, { "epoch": 0.9780247036063296, "grad_norm": 5.751637935638428, "learning_rate": 1.2152921197920997e-08, "loss": 0.3358, "step": 19716 }, { "epoch": 0.9780743092415298, "grad_norm": 8.524810791015625, "learning_rate": 1.2098138774382373e-08, "loss": 0.3875, "step": 19717 }, { "epoch": 0.97812391487673, "grad_norm": 8.622041702270508, "learning_rate": 1.2043479953307568e-08, "loss": 0.2189, "step": 19718 }, { "epoch": 0.9781735205119302, "grad_norm": 5.42014741897583, "learning_rate": 1.1988944736052165e-08, "loss": 0.2427, "step": 19719 }, { "epoch": 0.9782231261471304, "grad_norm": 10.078123092651367, "learning_rate": 1.1934533123966752e-08, "loss": 0.3672, "step": 19720 }, { "epoch": 0.9782727317823304, "grad_norm": 4.780080795288086, "learning_rate": 1.1880245118400247e-08, "loss": 0.2103, "step": 19721 }, { "epoch": 0.9783223374175306, "grad_norm": 7.200533390045166, "learning_rate": 1.1826080720697686e-08, "loss": 0.3265, "step": 19722 }, { "epoch": 0.9783719430527308, "grad_norm": 6.987323760986328, "learning_rate": 1.1772039932200774e-08, "loss": 0.2042, "step": 19723 }, { "epoch": 0.978421548687931, "grad_norm": 6.327686309814453, "learning_rate": 1.171812275424955e-08, "loss": 0.3099, "step": 19724 }, { "epoch": 0.9784711543231311, "grad_norm": 8.742945671081543, "learning_rate": 1.1664329188179612e-08, "loss": 0.3832, "step": 19725 }, { "epoch": 0.9785207599583313, "grad_norm": 9.02108097076416, "learning_rate": 1.1610659235324339e-08, "loss": 0.2406, "step": 19726 }, { "epoch": 0.9785703655935314, "grad_norm": 5.624812602996826, "learning_rate": 1.1557112897013223e-08, "loss": 0.1382, "step": 19727 }, { "epoch": 0.9786199712287316, "grad_norm": 4.291807651519775, "learning_rate": 1.150369017457298e-08, "loss": 0.208, "step": 19728 }, { "epoch": 0.9786695768639317, "grad_norm": 11.776795387268066, "learning_rate": 1.1450391069328104e-08, "loss": 0.426, "step": 19729 }, { "epoch": 0.9787191824991319, "grad_norm": 16.433561325073242, "learning_rate": 1.1397215582599209e-08, "loss": 0.4168, "step": 19730 }, { "epoch": 0.9787687881343321, "grad_norm": 3.3375210762023926, "learning_rate": 1.1344163715704126e-08, "loss": 0.1897, "step": 19731 }, { "epoch": 0.9788183937695322, "grad_norm": 10.596595764160156, "learning_rate": 1.1291235469956807e-08, "loss": 0.1965, "step": 19732 }, { "epoch": 0.9788679994047323, "grad_norm": 7.40064811706543, "learning_rate": 1.1238430846669535e-08, "loss": 0.2755, "step": 19733 }, { "epoch": 0.9789176050399325, "grad_norm": 10.318948745727539, "learning_rate": 1.1185749847150152e-08, "loss": 0.4026, "step": 19734 }, { "epoch": 0.9789672106751327, "grad_norm": 4.68938684463501, "learning_rate": 1.113319247270539e-08, "loss": 0.3151, "step": 19735 }, { "epoch": 0.9790168163103329, "grad_norm": 6.9854936599731445, "learning_rate": 1.108075872463643e-08, "loss": 0.3431, "step": 19736 }, { "epoch": 0.9790664219455331, "grad_norm": 7.275058746337891, "learning_rate": 1.1028448604242792e-08, "loss": 0.2759, "step": 19737 }, { "epoch": 0.9791160275807331, "grad_norm": 6.103118896484375, "learning_rate": 1.0976262112821213e-08, "loss": 0.299, "step": 19738 }, { "epoch": 0.9791656332159333, "grad_norm": 7.584556579589844, "learning_rate": 1.0924199251664546e-08, "loss": 0.2504, "step": 19739 }, { "epoch": 0.9792152388511335, "grad_norm": 10.28790283203125, "learning_rate": 1.0872260022062875e-08, "loss": 0.2638, "step": 19740 }, { "epoch": 0.9792648444863337, "grad_norm": 5.418117523193359, "learning_rate": 1.0820444425303501e-08, "loss": 0.2308, "step": 19741 }, { "epoch": 0.9793144501215338, "grad_norm": 6.096280574798584, "learning_rate": 1.0768752462670951e-08, "loss": 0.2382, "step": 19742 }, { "epoch": 0.979364055756734, "grad_norm": 4.601860046386719, "learning_rate": 1.0717184135444759e-08, "loss": 0.2348, "step": 19743 }, { "epoch": 0.9794136613919341, "grad_norm": 6.703986644744873, "learning_rate": 1.06657394449039e-08, "loss": 0.2212, "step": 19744 }, { "epoch": 0.9794632670271343, "grad_norm": 9.476422309875488, "learning_rate": 1.0614418392323467e-08, "loss": 0.3966, "step": 19745 }, { "epoch": 0.9795128726623344, "grad_norm": 5.671013832092285, "learning_rate": 1.0563220978974109e-08, "loss": 0.1628, "step": 19746 }, { "epoch": 0.9795624782975346, "grad_norm": 10.03283405303955, "learning_rate": 1.0512147206125367e-08, "loss": 0.2894, "step": 19747 }, { "epoch": 0.9796120839327348, "grad_norm": 13.071877479553223, "learning_rate": 1.046119707504234e-08, "loss": 0.3392, "step": 19748 }, { "epoch": 0.9796616895679349, "grad_norm": 19.381603240966797, "learning_rate": 1.0410370586987906e-08, "loss": 0.3858, "step": 19749 }, { "epoch": 0.979711295203135, "grad_norm": 8.201580047607422, "learning_rate": 1.0359667743221613e-08, "loss": 0.29, "step": 19750 }, { "epoch": 0.9797609008383352, "grad_norm": 7.7658562660217285, "learning_rate": 1.0309088545000235e-08, "loss": 0.2582, "step": 19751 }, { "epoch": 0.9798105064735354, "grad_norm": 27.04878044128418, "learning_rate": 1.02586329935761e-08, "loss": 0.4658, "step": 19752 }, { "epoch": 0.9798601121087356, "grad_norm": 7.631939888000488, "learning_rate": 1.0208301090200434e-08, "loss": 0.2274, "step": 19753 }, { "epoch": 0.9799097177439358, "grad_norm": 6.403567790985107, "learning_rate": 1.0158092836120015e-08, "loss": 0.0853, "step": 19754 }, { "epoch": 0.9799593233791358, "grad_norm": 5.391888618469238, "learning_rate": 1.0108008232579403e-08, "loss": 0.2323, "step": 19755 }, { "epoch": 0.980008929014336, "grad_norm": 10.261797904968262, "learning_rate": 1.0058047280819271e-08, "loss": 0.3239, "step": 19756 }, { "epoch": 0.9800585346495362, "grad_norm": 10.14657211303711, "learning_rate": 1.0008209982078631e-08, "loss": 0.2515, "step": 19757 }, { "epoch": 0.9801081402847364, "grad_norm": 16.72603416442871, "learning_rate": 9.95849633759094e-09, "loss": 0.3133, "step": 19758 }, { "epoch": 0.9801577459199365, "grad_norm": 7.134169101715088, "learning_rate": 9.908906348589098e-09, "loss": 0.2021, "step": 19759 }, { "epoch": 0.9802073515551367, "grad_norm": 7.515770435333252, "learning_rate": 9.859440016302124e-09, "loss": 0.3381, "step": 19760 }, { "epoch": 0.9802569571903368, "grad_norm": 7.670945167541504, "learning_rate": 9.81009734195515e-09, "loss": 0.3496, "step": 19761 }, { "epoch": 0.980306562825537, "grad_norm": 5.905206680297852, "learning_rate": 9.760878326771639e-09, "loss": 0.3308, "step": 19762 }, { "epoch": 0.9803561684607371, "grad_norm": 5.201662063598633, "learning_rate": 9.711782971970618e-09, "loss": 0.3356, "step": 19763 }, { "epoch": 0.9804057740959373, "grad_norm": 9.02849006652832, "learning_rate": 9.66281127876889e-09, "loss": 0.3412, "step": 19764 }, { "epoch": 0.9804553797311375, "grad_norm": 8.621391296386719, "learning_rate": 9.613963248380486e-09, "loss": 0.2833, "step": 19765 }, { "epoch": 0.9805049853663376, "grad_norm": 4.694313049316406, "learning_rate": 9.565238882014994e-09, "loss": 0.1873, "step": 19766 }, { "epoch": 0.9805545910015377, "grad_norm": 9.019084930419922, "learning_rate": 9.516638180880889e-09, "loss": 0.3868, "step": 19767 }, { "epoch": 0.9806041966367379, "grad_norm": 4.617687702178955, "learning_rate": 9.468161146181653e-09, "loss": 0.2455, "step": 19768 }, { "epoch": 0.9806538022719381, "grad_norm": 7.141571998596191, "learning_rate": 9.419807779118551e-09, "loss": 0.2739, "step": 19769 }, { "epoch": 0.9807034079071383, "grad_norm": 9.702753067016602, "learning_rate": 9.37157808089062e-09, "loss": 0.3837, "step": 19770 }, { "epoch": 0.9807530135423385, "grad_norm": 7.999522686004639, "learning_rate": 9.323472052691906e-09, "loss": 0.2992, "step": 19771 }, { "epoch": 0.9808026191775385, "grad_norm": 8.626145362854004, "learning_rate": 9.275489695715344e-09, "loss": 0.2108, "step": 19772 }, { "epoch": 0.9808522248127387, "grad_norm": 5.165338039398193, "learning_rate": 9.227631011149984e-09, "loss": 0.2216, "step": 19773 }, { "epoch": 0.9809018304479389, "grad_norm": 11.208455085754395, "learning_rate": 9.179896000181543e-09, "loss": 0.3645, "step": 19774 }, { "epoch": 0.9809514360831391, "grad_norm": 5.336730480194092, "learning_rate": 9.132284663992408e-09, "loss": 0.3096, "step": 19775 }, { "epoch": 0.9810010417183392, "grad_norm": 6.283509254455566, "learning_rate": 9.084797003762747e-09, "loss": 0.284, "step": 19776 }, { "epoch": 0.9810506473535393, "grad_norm": 6.410876274108887, "learning_rate": 9.03743302066995e-09, "loss": 0.3509, "step": 19777 }, { "epoch": 0.9811002529887395, "grad_norm": 8.076759338378906, "learning_rate": 8.99019271588697e-09, "loss": 0.2966, "step": 19778 }, { "epoch": 0.9811498586239397, "grad_norm": 5.727726459503174, "learning_rate": 8.94307609058509e-09, "loss": 0.2501, "step": 19779 }, { "epoch": 0.9811994642591398, "grad_norm": 9.887908935546875, "learning_rate": 8.896083145931156e-09, "loss": 0.3796, "step": 19780 }, { "epoch": 0.98124906989434, "grad_norm": 5.429755687713623, "learning_rate": 8.849213883089792e-09, "loss": 0.2336, "step": 19781 }, { "epoch": 0.9812986755295402, "grad_norm": 6.297818660736084, "learning_rate": 8.802468303222845e-09, "loss": 0.3268, "step": 19782 }, { "epoch": 0.9813482811647403, "grad_norm": 9.964591026306152, "learning_rate": 8.755846407488277e-09, "loss": 0.3547, "step": 19783 }, { "epoch": 0.9813978867999404, "grad_norm": 6.585378646850586, "learning_rate": 8.709348197041834e-09, "loss": 0.2248, "step": 19784 }, { "epoch": 0.9814474924351406, "grad_norm": 6.457697868347168, "learning_rate": 8.662973673035923e-09, "loss": 0.3428, "step": 19785 }, { "epoch": 0.9814970980703408, "grad_norm": 3.6319050788879395, "learning_rate": 8.616722836619074e-09, "loss": 0.1967, "step": 19786 }, { "epoch": 0.981546703705541, "grad_norm": 4.540735721588135, "learning_rate": 8.570595688937589e-09, "loss": 0.1919, "step": 19787 }, { "epoch": 0.9815963093407412, "grad_norm": 6.449329853057861, "learning_rate": 8.524592231134444e-09, "loss": 0.276, "step": 19788 }, { "epoch": 0.9816459149759412, "grad_norm": 8.284845352172852, "learning_rate": 8.478712464349836e-09, "loss": 0.2867, "step": 19789 }, { "epoch": 0.9816955206111414, "grad_norm": 3.8166933059692383, "learning_rate": 8.432956389721192e-09, "loss": 0.2083, "step": 19790 }, { "epoch": 0.9817451262463416, "grad_norm": 6.260567665100098, "learning_rate": 8.387324008380938e-09, "loss": 0.3113, "step": 19791 }, { "epoch": 0.9817947318815418, "grad_norm": 11.98906421661377, "learning_rate": 8.341815321461499e-09, "loss": 0.428, "step": 19792 }, { "epoch": 0.9818443375167419, "grad_norm": 6.170821189880371, "learning_rate": 8.296430330089755e-09, "loss": 0.2738, "step": 19793 }, { "epoch": 0.981893943151942, "grad_norm": 5.758434772491455, "learning_rate": 8.251169035389806e-09, "loss": 0.2375, "step": 19794 }, { "epoch": 0.9819435487871422, "grad_norm": 10.321352005004883, "learning_rate": 8.20603143848464e-09, "loss": 0.3098, "step": 19795 }, { "epoch": 0.9819931544223424, "grad_norm": 13.722657203674316, "learning_rate": 8.1610175404917e-09, "loss": 0.4321, "step": 19796 }, { "epoch": 0.9820427600575425, "grad_norm": 10.861416816711426, "learning_rate": 8.116127342526759e-09, "loss": 0.2752, "step": 19797 }, { "epoch": 0.9820923656927427, "grad_norm": 5.317153453826904, "learning_rate": 8.071360845702813e-09, "loss": 0.1745, "step": 19798 }, { "epoch": 0.9821419713279429, "grad_norm": 4.942934989929199, "learning_rate": 8.026718051127869e-09, "loss": 0.2575, "step": 19799 }, { "epoch": 0.982191576963143, "grad_norm": 5.303786754608154, "learning_rate": 7.98219895990937e-09, "loss": 0.152, "step": 19800 }, { "epoch": 0.9822411825983431, "grad_norm": 3.401780366897583, "learning_rate": 7.937803573150326e-09, "loss": 0.1821, "step": 19801 }, { "epoch": 0.9822907882335433, "grad_norm": 8.927698135375977, "learning_rate": 7.893531891950412e-09, "loss": 0.3695, "step": 19802 }, { "epoch": 0.9823403938687435, "grad_norm": 14.853782653808594, "learning_rate": 7.849383917406528e-09, "loss": 0.3029, "step": 19803 }, { "epoch": 0.9823899995039437, "grad_norm": 4.791178226470947, "learning_rate": 7.805359650613908e-09, "loss": 0.3284, "step": 19804 }, { "epoch": 0.9824396051391439, "grad_norm": 5.127820014953613, "learning_rate": 7.761459092662238e-09, "loss": 0.3094, "step": 19805 }, { "epoch": 0.9824892107743439, "grad_norm": 8.447021484375, "learning_rate": 7.717682244639534e-09, "loss": 0.303, "step": 19806 }, { "epoch": 0.9825388164095441, "grad_norm": 4.420248031616211, "learning_rate": 7.674029107631597e-09, "loss": 0.1452, "step": 19807 }, { "epoch": 0.9825884220447443, "grad_norm": 5.320058345794678, "learning_rate": 7.630499682719228e-09, "loss": 0.3139, "step": 19808 }, { "epoch": 0.9826380276799445, "grad_norm": 3.684295654296875, "learning_rate": 7.587093970981007e-09, "loss": 0.259, "step": 19809 }, { "epoch": 0.9826876333151446, "grad_norm": 8.231956481933594, "learning_rate": 7.543811973493298e-09, "loss": 0.2604, "step": 19810 }, { "epoch": 0.9827372389503447, "grad_norm": 8.661484718322754, "learning_rate": 7.50065369132802e-09, "loss": 0.283, "step": 19811 }, { "epoch": 0.9827868445855449, "grad_norm": 5.565945625305176, "learning_rate": 7.457619125555426e-09, "loss": 0.2751, "step": 19812 }, { "epoch": 0.9828364502207451, "grad_norm": 11.44849967956543, "learning_rate": 7.414708277240778e-09, "loss": 0.3341, "step": 19813 }, { "epoch": 0.9828860558559452, "grad_norm": 8.681607246398926, "learning_rate": 7.371921147448779e-09, "loss": 0.274, "step": 19814 }, { "epoch": 0.9829356614911454, "grad_norm": 6.271017551422119, "learning_rate": 7.329257737238582e-09, "loss": 0.2246, "step": 19815 }, { "epoch": 0.9829852671263456, "grad_norm": 10.665861129760742, "learning_rate": 7.286718047667674e-09, "loss": 0.3586, "step": 19816 }, { "epoch": 0.9830348727615457, "grad_norm": 13.355523109436035, "learning_rate": 7.2443020797902105e-09, "loss": 0.3454, "step": 19817 }, { "epoch": 0.9830844783967458, "grad_norm": 3.997582197189331, "learning_rate": 7.202009834657575e-09, "loss": 0.3255, "step": 19818 }, { "epoch": 0.983134084031946, "grad_norm": 8.096778869628906, "learning_rate": 7.159841313317817e-09, "loss": 0.233, "step": 19819 }, { "epoch": 0.9831836896671462, "grad_norm": 6.881201267242432, "learning_rate": 7.117796516815656e-09, "loss": 0.2988, "step": 19820 }, { "epoch": 0.9832332953023464, "grad_norm": 4.795331001281738, "learning_rate": 7.075875446193037e-09, "loss": 0.2126, "step": 19821 }, { "epoch": 0.9832829009375466, "grad_norm": 5.004374027252197, "learning_rate": 7.034078102488573e-09, "loss": 0.2159, "step": 19822 }, { "epoch": 0.9833325065727466, "grad_norm": 10.632170677185059, "learning_rate": 6.9924044867381025e-09, "loss": 0.3781, "step": 19823 }, { "epoch": 0.9833821122079468, "grad_norm": 3.7837159633636475, "learning_rate": 6.950854599974688e-09, "loss": 0.2673, "step": 19824 }, { "epoch": 0.983431717843147, "grad_norm": 6.892848014831543, "learning_rate": 6.909428443227506e-09, "loss": 0.3181, "step": 19825 }, { "epoch": 0.9834813234783472, "grad_norm": 6.089984893798828, "learning_rate": 6.868126017523513e-09, "loss": 0.3224, "step": 19826 }, { "epoch": 0.9835309291135473, "grad_norm": 10.637513160705566, "learning_rate": 6.82694732388578e-09, "loss": 0.231, "step": 19827 }, { "epoch": 0.9835805347487474, "grad_norm": 8.618212699890137, "learning_rate": 6.785892363335156e-09, "loss": 0.2055, "step": 19828 }, { "epoch": 0.9836301403839476, "grad_norm": 11.13172721862793, "learning_rate": 6.744961136889161e-09, "loss": 0.2653, "step": 19829 }, { "epoch": 0.9836797460191478, "grad_norm": 4.198953151702881, "learning_rate": 6.704153645560873e-09, "loss": 0.2427, "step": 19830 }, { "epoch": 0.9837293516543479, "grad_norm": 9.048197746276855, "learning_rate": 6.663469890362817e-09, "loss": 0.3316, "step": 19831 }, { "epoch": 0.9837789572895481, "grad_norm": 4.1466383934021, "learning_rate": 6.622909872303074e-09, "loss": 0.2516, "step": 19832 }, { "epoch": 0.9838285629247483, "grad_norm": 6.363491535186768, "learning_rate": 6.582473592385841e-09, "loss": 0.2818, "step": 19833 }, { "epoch": 0.9838781685599484, "grad_norm": 4.710648059844971, "learning_rate": 6.542161051613649e-09, "loss": 0.2154, "step": 19834 }, { "epoch": 0.9839277741951485, "grad_norm": 7.409512519836426, "learning_rate": 6.501972250985699e-09, "loss": 0.3164, "step": 19835 }, { "epoch": 0.9839773798303487, "grad_norm": 7.895401477813721, "learning_rate": 6.4619071914978625e-09, "loss": 0.3627, "step": 19836 }, { "epoch": 0.9840269854655489, "grad_norm": 8.975802421569824, "learning_rate": 6.421965874142677e-09, "loss": 0.3255, "step": 19837 }, { "epoch": 0.9840765911007491, "grad_norm": 5.12650203704834, "learning_rate": 6.3821482999099074e-09, "loss": 0.2129, "step": 19838 }, { "epoch": 0.9841261967359493, "grad_norm": 10.531866073608398, "learning_rate": 6.342454469786541e-09, "loss": 0.3546, "step": 19839 }, { "epoch": 0.9841758023711493, "grad_norm": 18.698955535888672, "learning_rate": 6.302884384755681e-09, "loss": 0.452, "step": 19840 }, { "epoch": 0.9842254080063495, "grad_norm": 3.3835620880126953, "learning_rate": 6.263438045798209e-09, "loss": 0.1497, "step": 19841 }, { "epoch": 0.9842750136415497, "grad_norm": 6.041391849517822, "learning_rate": 6.224115453892232e-09, "loss": 0.2647, "step": 19842 }, { "epoch": 0.9843246192767499, "grad_norm": 6.297718524932861, "learning_rate": 6.18491661001086e-09, "loss": 0.3104, "step": 19843 }, { "epoch": 0.98437422491195, "grad_norm": 4.168558120727539, "learning_rate": 6.145841515126649e-09, "loss": 0.2654, "step": 19844 }, { "epoch": 0.9844238305471501, "grad_norm": 8.48587703704834, "learning_rate": 6.106890170207713e-09, "loss": 0.3583, "step": 19845 }, { "epoch": 0.9844734361823503, "grad_norm": 4.392652988433838, "learning_rate": 6.068062576218281e-09, "loss": 0.2665, "step": 19846 }, { "epoch": 0.9845230418175505, "grad_norm": 5.633840560913086, "learning_rate": 6.029358734122026e-09, "loss": 0.2564, "step": 19847 }, { "epoch": 0.9845726474527506, "grad_norm": 10.843189239501953, "learning_rate": 5.990778644876516e-09, "loss": 0.3457, "step": 19848 }, { "epoch": 0.9846222530879508, "grad_norm": 5.518898010253906, "learning_rate": 5.952322309439318e-09, "loss": 0.1967, "step": 19849 }, { "epoch": 0.984671858723151, "grad_norm": 6.259346008300781, "learning_rate": 5.913989728762448e-09, "loss": 0.2418, "step": 19850 }, { "epoch": 0.9847214643583511, "grad_norm": 6.5706071853637695, "learning_rate": 5.875780903795702e-09, "loss": 0.2941, "step": 19851 }, { "epoch": 0.9847710699935512, "grad_norm": 9.444664001464844, "learning_rate": 5.837695835486101e-09, "loss": 0.3459, "step": 19852 }, { "epoch": 0.9848206756287514, "grad_norm": 11.881793975830078, "learning_rate": 5.799734524777889e-09, "loss": 0.2981, "step": 19853 }, { "epoch": 0.9848702812639516, "grad_norm": 5.619268417358398, "learning_rate": 5.7618969726108695e-09, "loss": 0.2648, "step": 19854 }, { "epoch": 0.9849198868991518, "grad_norm": 6.598169326782227, "learning_rate": 5.724183179923737e-09, "loss": 0.3229, "step": 19855 }, { "epoch": 0.984969492534352, "grad_norm": 9.204005241394043, "learning_rate": 5.6865931476501876e-09, "loss": 0.3493, "step": 19856 }, { "epoch": 0.985019098169552, "grad_norm": 10.587162017822266, "learning_rate": 5.6491268767222555e-09, "loss": 0.3785, "step": 19857 }, { "epoch": 0.9850687038047522, "grad_norm": 5.0520806312561035, "learning_rate": 5.611784368068085e-09, "loss": 0.2708, "step": 19858 }, { "epoch": 0.9851183094399524, "grad_norm": 8.888141632080078, "learning_rate": 5.574565622613048e-09, "loss": 0.2019, "step": 19859 }, { "epoch": 0.9851679150751526, "grad_norm": 9.250432968139648, "learning_rate": 5.537470641279186e-09, "loss": 0.3101, "step": 19860 }, { "epoch": 0.9852175207103527, "grad_norm": 9.657979011535645, "learning_rate": 5.500499424986316e-09, "loss": 0.3409, "step": 19861 }, { "epoch": 0.9852671263455528, "grad_norm": 6.068750858306885, "learning_rate": 5.463651974650374e-09, "loss": 0.2394, "step": 19862 }, { "epoch": 0.985316731980753, "grad_norm": 8.457306861877441, "learning_rate": 5.4269282911845185e-09, "loss": 0.3722, "step": 19863 }, { "epoch": 0.9853663376159532, "grad_norm": 10.737791061401367, "learning_rate": 5.390328375498577e-09, "loss": 0.2915, "step": 19864 }, { "epoch": 0.9854159432511533, "grad_norm": 7.6728196144104, "learning_rate": 5.353852228499601e-09, "loss": 0.343, "step": 19865 }, { "epoch": 0.9854655488863535, "grad_norm": 7.6815185546875, "learning_rate": 5.317499851091312e-09, "loss": 0.1969, "step": 19866 }, { "epoch": 0.9855151545215537, "grad_norm": 7.849350452423096, "learning_rate": 5.281271244175212e-09, "loss": 0.2809, "step": 19867 }, { "epoch": 0.9855647601567538, "grad_norm": 4.659329414367676, "learning_rate": 5.245166408647806e-09, "loss": 0.3027, "step": 19868 }, { "epoch": 0.9856143657919539, "grad_norm": 6.475406169891357, "learning_rate": 5.209185345405043e-09, "loss": 0.2639, "step": 19869 }, { "epoch": 0.9856639714271541, "grad_norm": 11.742096900939941, "learning_rate": 5.173328055338433e-09, "loss": 0.3173, "step": 19870 }, { "epoch": 0.9857135770623543, "grad_norm": 6.216599464416504, "learning_rate": 5.1375945393356e-09, "loss": 0.1695, "step": 19871 }, { "epoch": 0.9857631826975545, "grad_norm": 6.1006059646606445, "learning_rate": 5.1019847982825e-09, "loss": 0.2692, "step": 19872 }, { "epoch": 0.9858127883327547, "grad_norm": 6.232845306396484, "learning_rate": 5.066498833061761e-09, "loss": 0.1905, "step": 19873 }, { "epoch": 0.9858623939679547, "grad_norm": 4.593359470367432, "learning_rate": 5.031136644552681e-09, "loss": 0.2185, "step": 19874 }, { "epoch": 0.9859119996031549, "grad_norm": 6.710733890533447, "learning_rate": 4.995898233631779e-09, "loss": 0.2857, "step": 19875 }, { "epoch": 0.9859616052383551, "grad_norm": 10.606969833374023, "learning_rate": 4.9607836011716925e-09, "loss": 0.2944, "step": 19876 }, { "epoch": 0.9860112108735553, "grad_norm": 5.884095191955566, "learning_rate": 4.925792748042835e-09, "loss": 0.2222, "step": 19877 }, { "epoch": 0.9860608165087554, "grad_norm": 14.145352363586426, "learning_rate": 4.890925675112845e-09, "loss": 0.3633, "step": 19878 }, { "epoch": 0.9861104221439555, "grad_norm": 8.068133354187012, "learning_rate": 4.856182383244923e-09, "loss": 0.2866, "step": 19879 }, { "epoch": 0.9861600277791557, "grad_norm": 6.4847846031188965, "learning_rate": 4.8215628733000455e-09, "loss": 0.2583, "step": 19880 }, { "epoch": 0.9862096334143559, "grad_norm": 7.693328857421875, "learning_rate": 4.78706714613697e-09, "loss": 0.3303, "step": 19881 }, { "epoch": 0.986259239049556, "grad_norm": 6.577464580535889, "learning_rate": 4.752695202609459e-09, "loss": 0.313, "step": 19882 }, { "epoch": 0.9863088446847562, "grad_norm": 11.3624906539917, "learning_rate": 4.718447043570162e-09, "loss": 0.2827, "step": 19883 }, { "epoch": 0.9863584503199564, "grad_norm": 5.156741619110107, "learning_rate": 4.684322669867292e-09, "loss": 0.2429, "step": 19884 }, { "epoch": 0.9864080559551565, "grad_norm": 5.457193374633789, "learning_rate": 4.650322082346281e-09, "loss": 0.2275, "step": 19885 }, { "epoch": 0.9864576615903566, "grad_norm": 10.548737525939941, "learning_rate": 4.616445281849791e-09, "loss": 0.2376, "step": 19886 }, { "epoch": 0.9865072672255568, "grad_norm": 10.225488662719727, "learning_rate": 4.5826922692177035e-09, "loss": 0.2952, "step": 19887 }, { "epoch": 0.986556872860757, "grad_norm": 5.378666400909424, "learning_rate": 4.549063045286573e-09, "loss": 0.331, "step": 19888 }, { "epoch": 0.9866064784959572, "grad_norm": 10.104714393615723, "learning_rate": 4.51555761088851e-09, "loss": 0.3223, "step": 19889 }, { "epoch": 0.9866560841311574, "grad_norm": 9.368975639343262, "learning_rate": 4.482175966855629e-09, "loss": 0.3588, "step": 19890 }, { "epoch": 0.9867056897663574, "grad_norm": 8.197779655456543, "learning_rate": 4.448918114013379e-09, "loss": 0.3109, "step": 19891 }, { "epoch": 0.9867552954015576, "grad_norm": 7.740743637084961, "learning_rate": 4.415784053186656e-09, "loss": 0.1917, "step": 19892 }, { "epoch": 0.9868049010367578, "grad_norm": 11.894535064697266, "learning_rate": 4.382773785197026e-09, "loss": 0.2929, "step": 19893 }, { "epoch": 0.986854506671958, "grad_norm": 5.912233352661133, "learning_rate": 4.3498873108616116e-09, "loss": 0.2374, "step": 19894 }, { "epoch": 0.9869041123071581, "grad_norm": 5.298185348510742, "learning_rate": 4.317124630995873e-09, "loss": 0.2474, "step": 19895 }, { "epoch": 0.9869537179423582, "grad_norm": 5.802549839019775, "learning_rate": 4.284485746411937e-09, "loss": 0.2021, "step": 19896 }, { "epoch": 0.9870033235775584, "grad_norm": 5.394021511077881, "learning_rate": 4.251970657918048e-09, "loss": 0.1824, "step": 19897 }, { "epoch": 0.9870529292127586, "grad_norm": 8.011263847351074, "learning_rate": 4.219579366320225e-09, "loss": 0.174, "step": 19898 }, { "epoch": 0.9871025348479587, "grad_norm": 19.070106506347656, "learning_rate": 4.187311872420607e-09, "loss": 0.3812, "step": 19899 }, { "epoch": 0.9871521404831589, "grad_norm": 8.973453521728516, "learning_rate": 4.155168177019664e-09, "loss": 0.3386, "step": 19900 }, { "epoch": 0.9872017461183591, "grad_norm": 5.649111270904541, "learning_rate": 4.1231482809134245e-09, "loss": 0.3117, "step": 19901 }, { "epoch": 0.9872513517535592, "grad_norm": 5.646722316741943, "learning_rate": 4.0912521848951445e-09, "loss": 0.232, "step": 19902 }, { "epoch": 0.9873009573887593, "grad_norm": 4.780389785766602, "learning_rate": 4.059479889755857e-09, "loss": 0.228, "step": 19903 }, { "epoch": 0.9873505630239595, "grad_norm": 4.247158527374268, "learning_rate": 4.027831396282711e-09, "loss": 0.2041, "step": 19904 }, { "epoch": 0.9874001686591597, "grad_norm": 8.487401962280273, "learning_rate": 3.996306705259523e-09, "loss": 0.2214, "step": 19905 }, { "epoch": 0.9874497742943599, "grad_norm": 6.1676411628723145, "learning_rate": 3.96490581746789e-09, "loss": 0.2664, "step": 19906 }, { "epoch": 0.98749937992956, "grad_norm": 4.331714630126953, "learning_rate": 3.933628733686079e-09, "loss": 0.1924, "step": 19907 }, { "epoch": 0.9875489855647601, "grad_norm": 8.282315254211426, "learning_rate": 3.902475454688471e-09, "loss": 0.2969, "step": 19908 }, { "epoch": 0.9875985911999603, "grad_norm": 7.352614402770996, "learning_rate": 3.8714459812477786e-09, "loss": 0.2803, "step": 19909 }, { "epoch": 0.9876481968351605, "grad_norm": 6.030458927154541, "learning_rate": 3.840540314132834e-09, "loss": 0.2027, "step": 19910 }, { "epoch": 0.9876978024703607, "grad_norm": 4.098154067993164, "learning_rate": 3.809758454109136e-09, "loss": 0.2473, "step": 19911 }, { "epoch": 0.9877474081055608, "grad_norm": 3.989792823791504, "learning_rate": 3.779100401939406e-09, "loss": 0.1416, "step": 19912 }, { "epoch": 0.9877970137407609, "grad_norm": 6.225881576538086, "learning_rate": 3.748566158384148e-09, "loss": 0.2283, "step": 19913 }, { "epoch": 0.9878466193759611, "grad_norm": 12.139349937438965, "learning_rate": 3.7181557241988687e-09, "loss": 0.2556, "step": 19914 }, { "epoch": 0.9878962250111613, "grad_norm": 5.414527893066406, "learning_rate": 3.687869100137964e-09, "loss": 0.2044, "step": 19915 }, { "epoch": 0.9879458306463614, "grad_norm": 5.749218940734863, "learning_rate": 3.6577062869519453e-09, "loss": 0.2488, "step": 19916 }, { "epoch": 0.9879954362815616, "grad_norm": 8.070923805236816, "learning_rate": 3.6276672853879923e-09, "loss": 0.3644, "step": 19917 }, { "epoch": 0.9880450419167618, "grad_norm": 9.344330787658691, "learning_rate": 3.59775209619051e-09, "loss": 0.3028, "step": 19918 }, { "epoch": 0.9880946475519619, "grad_norm": 4.639369964599609, "learning_rate": 3.5679607201005716e-09, "loss": 0.2732, "step": 19919 }, { "epoch": 0.988144253187162, "grad_norm": 11.740653991699219, "learning_rate": 3.5382931578570313e-09, "loss": 0.4947, "step": 19920 }, { "epoch": 0.9881938588223622, "grad_norm": 5.145877361297607, "learning_rate": 3.508749410194301e-09, "loss": 0.242, "step": 19921 }, { "epoch": 0.9882434644575624, "grad_norm": 13.878171920776367, "learning_rate": 3.4793294778456833e-09, "loss": 0.3421, "step": 19922 }, { "epoch": 0.9882930700927626, "grad_norm": 5.1603922843933105, "learning_rate": 3.450033361538929e-09, "loss": 0.2774, "step": 19923 }, { "epoch": 0.9883426757279627, "grad_norm": 8.6272611618042, "learning_rate": 3.42086106200068e-09, "loss": 0.3081, "step": 19924 }, { "epoch": 0.9883922813631628, "grad_norm": 5.771631717681885, "learning_rate": 3.3918125799531353e-09, "loss": 0.2906, "step": 19925 }, { "epoch": 0.988441886998363, "grad_norm": 10.211871147155762, "learning_rate": 3.3628879161173854e-09, "loss": 0.2753, "step": 19926 }, { "epoch": 0.9884914926335632, "grad_norm": 6.302245616912842, "learning_rate": 3.334087071209524e-09, "loss": 0.2513, "step": 19927 }, { "epoch": 0.9885410982687634, "grad_norm": 8.24793815612793, "learning_rate": 3.3054100459428694e-09, "loss": 0.2975, "step": 19928 }, { "epoch": 0.9885907039039635, "grad_norm": 4.898728370666504, "learning_rate": 3.2768568410285195e-09, "loss": 0.211, "step": 19929 }, { "epoch": 0.9886403095391636, "grad_norm": 6.6313347816467285, "learning_rate": 3.2484274571736864e-09, "loss": 0.2133, "step": 19930 }, { "epoch": 0.9886899151743638, "grad_norm": 12.074565887451172, "learning_rate": 3.2201218950833613e-09, "loss": 0.2432, "step": 19931 }, { "epoch": 0.988739520809564, "grad_norm": 7.449402809143066, "learning_rate": 3.1919401554586506e-09, "loss": 0.3262, "step": 19932 }, { "epoch": 0.9887891264447641, "grad_norm": 7.538166046142578, "learning_rate": 3.163882238998439e-09, "loss": 0.1767, "step": 19933 }, { "epoch": 0.9888387320799643, "grad_norm": 10.371611595153809, "learning_rate": 3.1359481463966167e-09, "loss": 0.2778, "step": 19934 }, { "epoch": 0.9888883377151645, "grad_norm": 4.727957725524902, "learning_rate": 3.1081378783470727e-09, "loss": 0.2847, "step": 19935 }, { "epoch": 0.9889379433503646, "grad_norm": 5.9199628829956055, "learning_rate": 3.0804514355381454e-09, "loss": 0.2928, "step": 19936 }, { "epoch": 0.9889875489855647, "grad_norm": 6.610846996307373, "learning_rate": 3.052888818655397e-09, "loss": 0.345, "step": 19937 }, { "epoch": 0.9890371546207649, "grad_norm": 5.861455917358398, "learning_rate": 3.025450028382726e-09, "loss": 0.2217, "step": 19938 }, { "epoch": 0.9890867602559651, "grad_norm": 5.086665630340576, "learning_rate": 2.998135065400143e-09, "loss": 0.1977, "step": 19939 }, { "epoch": 0.9891363658911653, "grad_norm": 5.607058048248291, "learning_rate": 2.9709439303837738e-09, "loss": 0.2856, "step": 19940 }, { "epoch": 0.9891859715263654, "grad_norm": 8.054883003234863, "learning_rate": 2.9438766240080795e-09, "loss": 0.3195, "step": 19941 }, { "epoch": 0.9892355771615655, "grad_norm": 6.73266077041626, "learning_rate": 2.9169331469430795e-09, "loss": 0.2837, "step": 19942 }, { "epoch": 0.9892851827967657, "grad_norm": 4.860906600952148, "learning_rate": 2.890113499857683e-09, "loss": 0.2791, "step": 19943 }, { "epoch": 0.9893347884319659, "grad_norm": 7.15854549407959, "learning_rate": 2.8634176834152485e-09, "loss": 0.2695, "step": 19944 }, { "epoch": 0.989384394067166, "grad_norm": 6.124317169189453, "learning_rate": 2.8368456982774684e-09, "loss": 0.253, "step": 19945 }, { "epoch": 0.9894339997023662, "grad_norm": 9.821271896362305, "learning_rate": 2.8103975451032605e-09, "loss": 0.3159, "step": 19946 }, { "epoch": 0.9894836053375663, "grad_norm": 9.953910827636719, "learning_rate": 2.7840732245482114e-09, "loss": 0.3114, "step": 19947 }, { "epoch": 0.9895332109727665, "grad_norm": 13.556512832641602, "learning_rate": 2.7578727372640223e-09, "loss": 0.4951, "step": 19948 }, { "epoch": 0.9895828166079667, "grad_norm": 7.016551971435547, "learning_rate": 2.7317960839007285e-09, "loss": 0.3469, "step": 19949 }, { "epoch": 0.9896324222431668, "grad_norm": 10.186247825622559, "learning_rate": 2.7058432651033697e-09, "loss": 0.2941, "step": 19950 }, { "epoch": 0.989682027878367, "grad_norm": 6.329725742340088, "learning_rate": 2.680014281515875e-09, "loss": 0.272, "step": 19951 }, { "epoch": 0.9897316335135672, "grad_norm": 8.96352481842041, "learning_rate": 2.654309133778843e-09, "loss": 0.392, "step": 19952 }, { "epoch": 0.9897812391487673, "grad_norm": 3.953871965408325, "learning_rate": 2.628727822527877e-09, "loss": 0.2319, "step": 19953 }, { "epoch": 0.9898308447839674, "grad_norm": 6.403088092803955, "learning_rate": 2.6032703483974686e-09, "loss": 0.192, "step": 19954 }, { "epoch": 0.9898804504191676, "grad_norm": 7.762337684631348, "learning_rate": 2.5779367120193356e-09, "loss": 0.3257, "step": 19955 }, { "epoch": 0.9899300560543678, "grad_norm": 16.888521194458008, "learning_rate": 2.552726914019643e-09, "loss": 0.4759, "step": 19956 }, { "epoch": 0.989979661689568, "grad_norm": 6.488460063934326, "learning_rate": 2.5276409550245573e-09, "loss": 0.2656, "step": 19957 }, { "epoch": 0.9900292673247681, "grad_norm": 6.600928783416748, "learning_rate": 2.502678835655248e-09, "loss": 0.185, "step": 19958 }, { "epoch": 0.9900788729599682, "grad_norm": 5.684041500091553, "learning_rate": 2.4778405565295536e-09, "loss": 0.2401, "step": 19959 }, { "epoch": 0.9901284785951684, "grad_norm": 6.676444053649902, "learning_rate": 2.453126118264204e-09, "loss": 0.2815, "step": 19960 }, { "epoch": 0.9901780842303686, "grad_norm": 9.445612907409668, "learning_rate": 2.4285355214703767e-09, "loss": 0.2861, "step": 19961 }, { "epoch": 0.9902276898655687, "grad_norm": 4.481873989105225, "learning_rate": 2.4040687667586937e-09, "loss": 0.2072, "step": 19962 }, { "epoch": 0.9902772955007689, "grad_norm": 5.794229507446289, "learning_rate": 2.379725854734782e-09, "loss": 0.2899, "step": 19963 }, { "epoch": 0.990326901135969, "grad_norm": 11.666841506958008, "learning_rate": 2.3555067860014935e-09, "loss": 0.353, "step": 19964 }, { "epoch": 0.9903765067711692, "grad_norm": 21.701549530029297, "learning_rate": 2.331411561160013e-09, "loss": 0.4655, "step": 19965 }, { "epoch": 0.9904261124063694, "grad_norm": 15.318490028381348, "learning_rate": 2.3074401808065305e-09, "loss": 0.3801, "step": 19966 }, { "epoch": 0.9904757180415695, "grad_norm": 10.367531776428223, "learning_rate": 2.2835926455350155e-09, "loss": 0.371, "step": 19967 }, { "epoch": 0.9905253236767697, "grad_norm": 6.202901840209961, "learning_rate": 2.2598689559372167e-09, "loss": 0.2538, "step": 19968 }, { "epoch": 0.9905749293119699, "grad_norm": 7.4207587242126465, "learning_rate": 2.236269112600442e-09, "loss": 0.2852, "step": 19969 }, { "epoch": 0.99062453494717, "grad_norm": 5.815871238708496, "learning_rate": 2.2127931161097795e-09, "loss": 0.2246, "step": 19970 }, { "epoch": 0.9906741405823701, "grad_norm": 4.503551959991455, "learning_rate": 2.1894409670469853e-09, "loss": 0.2552, "step": 19971 }, { "epoch": 0.9907237462175703, "grad_norm": 4.734907150268555, "learning_rate": 2.166212665990486e-09, "loss": 0.285, "step": 19972 }, { "epoch": 0.9907733518527705, "grad_norm": 6.581222057342529, "learning_rate": 2.143108213515932e-09, "loss": 0.2286, "step": 19973 }, { "epoch": 0.9908229574879707, "grad_norm": 5.565414905548096, "learning_rate": 2.1201276101956436e-09, "loss": 0.2671, "step": 19974 }, { "epoch": 0.9908725631231708, "grad_norm": 12.619629859924316, "learning_rate": 2.09727085659972e-09, "loss": 0.3696, "step": 19975 }, { "epoch": 0.9909221687583709, "grad_norm": 4.231388568878174, "learning_rate": 2.0745379532943753e-09, "loss": 0.3048, "step": 19976 }, { "epoch": 0.9909717743935711, "grad_norm": 5.439425468444824, "learning_rate": 2.0519289008424926e-09, "loss": 0.1816, "step": 19977 }, { "epoch": 0.9910213800287713, "grad_norm": 4.766497611999512, "learning_rate": 2.029443699804734e-09, "loss": 0.3282, "step": 19978 }, { "epoch": 0.9910709856639714, "grad_norm": 6.867814540863037, "learning_rate": 2.007082350738432e-09, "loss": 0.315, "step": 19979 }, { "epoch": 0.9911205912991716, "grad_norm": 6.178144454956055, "learning_rate": 1.9848448541970324e-09, "loss": 0.2305, "step": 19980 }, { "epoch": 0.9911701969343717, "grad_norm": 6.187352180480957, "learning_rate": 1.962731210732316e-09, "loss": 0.2619, "step": 19981 }, { "epoch": 0.9912198025695719, "grad_norm": 10.238421440124512, "learning_rate": 1.940741420891623e-09, "loss": 0.3201, "step": 19982 }, { "epoch": 0.991269408204772, "grad_norm": 4.222777843475342, "learning_rate": 1.918875485220628e-09, "loss": 0.3068, "step": 19983 }, { "epoch": 0.9913190138399722, "grad_norm": 5.51200532913208, "learning_rate": 1.897133404260565e-09, "loss": 0.1953, "step": 19984 }, { "epoch": 0.9913686194751724, "grad_norm": 6.774050712585449, "learning_rate": 1.875515178550447e-09, "loss": 0.2887, "step": 19985 }, { "epoch": 0.9914182251103726, "grad_norm": 5.674448490142822, "learning_rate": 1.8540208086259559e-09, "loss": 0.2261, "step": 19986 }, { "epoch": 0.9914678307455727, "grad_norm": 6.997238636016846, "learning_rate": 1.8326502950199998e-09, "loss": 0.165, "step": 19987 }, { "epoch": 0.9915174363807728, "grad_norm": 6.109760761260986, "learning_rate": 1.8114036382615996e-09, "loss": 0.3373, "step": 19988 }, { "epoch": 0.991567042015973, "grad_norm": 8.448415756225586, "learning_rate": 1.7902808388775562e-09, "loss": 0.3349, "step": 19989 }, { "epoch": 0.9916166476511732, "grad_norm": 7.917844295501709, "learning_rate": 1.7692818973913394e-09, "loss": 0.3062, "step": 19990 }, { "epoch": 0.9916662532863734, "grad_norm": 3.9384987354278564, "learning_rate": 1.7484068143236443e-09, "loss": 0.1948, "step": 19991 }, { "epoch": 0.9917158589215735, "grad_norm": 6.773329257965088, "learning_rate": 1.7276555901907243e-09, "loss": 0.3303, "step": 19992 }, { "epoch": 0.9917654645567736, "grad_norm": 6.882015705108643, "learning_rate": 1.7070282255077232e-09, "loss": 0.2151, "step": 19993 }, { "epoch": 0.9918150701919738, "grad_norm": 4.2686238288879395, "learning_rate": 1.686524720785343e-09, "loss": 0.2346, "step": 19994 }, { "epoch": 0.991864675827174, "grad_norm": 8.893892288208008, "learning_rate": 1.6661450765326215e-09, "loss": 0.4351, "step": 19995 }, { "epoch": 0.9919142814623741, "grad_norm": 9.581582069396973, "learning_rate": 1.6458892932530446e-09, "loss": 0.3462, "step": 19996 }, { "epoch": 0.9919638870975743, "grad_norm": 5.8992414474487305, "learning_rate": 1.6257573714495433e-09, "loss": 0.2624, "step": 19997 }, { "epoch": 0.9920134927327744, "grad_norm": 7.693901062011719, "learning_rate": 1.605749311620608e-09, "loss": 0.3556, "step": 19998 }, { "epoch": 0.9920630983679746, "grad_norm": 8.741888999938965, "learning_rate": 1.5858651142625082e-09, "loss": 0.2607, "step": 19999 }, { "epoch": 0.9921127040031748, "grad_norm": 4.038976669311523, "learning_rate": 1.5661047798676277e-09, "loss": 0.2771, "step": 20000 }, { "epoch": 0.9921623096383749, "grad_norm": 5.3471479415893555, "learning_rate": 1.546468308925575e-09, "loss": 0.2344, "step": 20001 }, { "epoch": 0.9922119152735751, "grad_norm": 10.000184059143066, "learning_rate": 1.5269557019231829e-09, "loss": 0.2758, "step": 20002 }, { "epoch": 0.9922615209087753, "grad_norm": 4.43823766708374, "learning_rate": 1.5075669593439534e-09, "loss": 0.2445, "step": 20003 }, { "epoch": 0.9923111265439754, "grad_norm": 4.299681663513184, "learning_rate": 1.4883020816686134e-09, "loss": 0.2891, "step": 20004 }, { "epoch": 0.9923607321791755, "grad_norm": 9.949328422546387, "learning_rate": 1.4691610693734483e-09, "loss": 0.2624, "step": 20005 }, { "epoch": 0.9924103378143757, "grad_norm": 4.117279052734375, "learning_rate": 1.4501439229341885e-09, "loss": 0.2076, "step": 20006 }, { "epoch": 0.9924599434495759, "grad_norm": 7.392883777618408, "learning_rate": 1.4312506428210138e-09, "loss": 0.2223, "step": 20007 }, { "epoch": 0.9925095490847761, "grad_norm": 9.072877883911133, "learning_rate": 1.4124812295029932e-09, "loss": 0.3192, "step": 20008 }, { "epoch": 0.9925591547199762, "grad_norm": 9.932538032531738, "learning_rate": 1.3938356834447552e-09, "loss": 0.29, "step": 20009 }, { "epoch": 0.9926087603551763, "grad_norm": 6.339111804962158, "learning_rate": 1.3753140051081527e-09, "loss": 0.2621, "step": 20010 }, { "epoch": 0.9926583659903765, "grad_norm": 6.562730312347412, "learning_rate": 1.3569161949522624e-09, "loss": 0.3311, "step": 20011 }, { "epoch": 0.9927079716255767, "grad_norm": 7.126472473144531, "learning_rate": 1.3386422534333866e-09, "loss": 0.3436, "step": 20012 }, { "epoch": 0.9927575772607768, "grad_norm": 9.224822998046875, "learning_rate": 1.3204921810039407e-09, "loss": 0.1504, "step": 20013 }, { "epoch": 0.992807182895977, "grad_norm": 9.284130096435547, "learning_rate": 1.3024659781135652e-09, "loss": 0.3749, "step": 20014 }, { "epoch": 0.9928567885311771, "grad_norm": 6.68326473236084, "learning_rate": 1.28456364520968e-09, "loss": 0.2665, "step": 20015 }, { "epoch": 0.9929063941663773, "grad_norm": 6.205219268798828, "learning_rate": 1.266785182735264e-09, "loss": 0.2762, "step": 20016 }, { "epoch": 0.9929559998015774, "grad_norm": 7.777164459228516, "learning_rate": 1.2491305911310758e-09, "loss": 0.294, "step": 20017 }, { "epoch": 0.9930056054367776, "grad_norm": 4.095841884613037, "learning_rate": 1.2315998708350984e-09, "loss": 0.2572, "step": 20018 }, { "epoch": 0.9930552110719778, "grad_norm": 3.9015350341796875, "learning_rate": 1.214193022280874e-09, "loss": 0.2924, "step": 20019 }, { "epoch": 0.993104816707178, "grad_norm": 10.665104866027832, "learning_rate": 1.1969100459002792e-09, "loss": 0.3051, "step": 20020 }, { "epoch": 0.993154422342378, "grad_norm": 6.759461402893066, "learning_rate": 1.179750942121305e-09, "loss": 0.3265, "step": 20021 }, { "epoch": 0.9932040279775782, "grad_norm": 6.680950164794922, "learning_rate": 1.1627157113691667e-09, "loss": 0.259, "step": 20022 }, { "epoch": 0.9932536336127784, "grad_norm": 10.268800735473633, "learning_rate": 1.1458043540663043e-09, "loss": 0.2856, "step": 20023 }, { "epoch": 0.9933032392479786, "grad_norm": 6.233260154724121, "learning_rate": 1.1290168706318272e-09, "loss": 0.2584, "step": 20024 }, { "epoch": 0.9933528448831788, "grad_norm": 17.596332550048828, "learning_rate": 1.1123532614809584e-09, "loss": 0.3092, "step": 20025 }, { "epoch": 0.9934024505183789, "grad_norm": 9.167593002319336, "learning_rate": 1.0958135270278114e-09, "loss": 0.2869, "step": 20026 }, { "epoch": 0.993452056153579, "grad_norm": 5.456160545349121, "learning_rate": 1.0793976676815033e-09, "loss": 0.2587, "step": 20027 }, { "epoch": 0.9935016617887792, "grad_norm": 4.131052017211914, "learning_rate": 1.0631056838489306e-09, "loss": 0.2358, "step": 20028 }, { "epoch": 0.9935512674239794, "grad_norm": 5.922163486480713, "learning_rate": 1.0469375759342148e-09, "loss": 0.24, "step": 20029 }, { "epoch": 0.9936008730591795, "grad_norm": 8.269929885864258, "learning_rate": 1.030893344337036e-09, "loss": 0.3584, "step": 20030 }, { "epoch": 0.9936504786943797, "grad_norm": 18.2795467376709, "learning_rate": 1.0149729894559645e-09, "loss": 0.4173, "step": 20031 }, { "epoch": 0.9937000843295798, "grad_norm": 14.665590286254883, "learning_rate": 9.991765116851293e-10, "loss": 0.4543, "step": 20032 }, { "epoch": 0.99374968996478, "grad_norm": 8.489911079406738, "learning_rate": 9.83503911415884e-10, "loss": 0.3491, "step": 20033 }, { "epoch": 0.9937992955999801, "grad_norm": 12.904141426086426, "learning_rate": 9.679551890368066e-10, "loss": 0.3727, "step": 20034 }, { "epoch": 0.9938489012351803, "grad_norm": 5.097026348114014, "learning_rate": 9.525303449331446e-10, "loss": 0.2123, "step": 20035 }, { "epoch": 0.9938985068703805, "grad_norm": 5.752573013305664, "learning_rate": 9.372293794868149e-10, "loss": 0.3169, "step": 20036 }, { "epoch": 0.9939481125055807, "grad_norm": 5.160177707672119, "learning_rate": 9.220522930775133e-10, "loss": 0.2109, "step": 20037 }, { "epoch": 0.9939977181407808, "grad_norm": 5.494714736938477, "learning_rate": 9.069990860810506e-10, "loss": 0.2209, "step": 20038 }, { "epoch": 0.9940473237759809, "grad_norm": 4.977043151855469, "learning_rate": 8.920697588704619e-10, "loss": 0.2978, "step": 20039 }, { "epoch": 0.9940969294111811, "grad_norm": 8.3429594039917, "learning_rate": 8.772643118160063e-10, "loss": 0.3257, "step": 20040 }, { "epoch": 0.9941465350463813, "grad_norm": 6.667695999145508, "learning_rate": 8.625827452840574e-10, "loss": 0.3024, "step": 20041 }, { "epoch": 0.9941961406815815, "grad_norm": 9.307328224182129, "learning_rate": 8.480250596387685e-10, "loss": 0.3071, "step": 20042 }, { "epoch": 0.9942457463167815, "grad_norm": 9.43769359588623, "learning_rate": 8.335912552404068e-10, "loss": 0.3226, "step": 20043 }, { "epoch": 0.9942953519519817, "grad_norm": 9.718266487121582, "learning_rate": 8.192813324470195e-10, "loss": 0.3081, "step": 20044 }, { "epoch": 0.9943449575871819, "grad_norm": 7.635897159576416, "learning_rate": 8.050952916138777e-10, "loss": 0.3185, "step": 20045 }, { "epoch": 0.9943945632223821, "grad_norm": 7.239254951477051, "learning_rate": 7.910331330912569e-10, "loss": 0.338, "step": 20046 }, { "epoch": 0.9944441688575822, "grad_norm": 11.622856140136719, "learning_rate": 7.770948572283221e-10, "loss": 0.2665, "step": 20047 }, { "epoch": 0.9944937744927824, "grad_norm": 10.89788818359375, "learning_rate": 7.632804643709079e-10, "loss": 0.3352, "step": 20048 }, { "epoch": 0.9945433801279825, "grad_norm": 5.911253929138184, "learning_rate": 7.495899548598529e-10, "loss": 0.3049, "step": 20049 }, { "epoch": 0.9945929857631827, "grad_norm": 6.40521240234375, "learning_rate": 7.360233290359953e-10, "loss": 0.2577, "step": 20050 }, { "epoch": 0.9946425913983828, "grad_norm": 14.687324523925781, "learning_rate": 7.225805872346226e-10, "loss": 0.3512, "step": 20051 }, { "epoch": 0.994692197033583, "grad_norm": 6.0092082023620605, "learning_rate": 7.092617297893567e-10, "loss": 0.3623, "step": 20052 }, { "epoch": 0.9947418026687832, "grad_norm": 5.035456657409668, "learning_rate": 6.96066757029934e-10, "loss": 0.3147, "step": 20053 }, { "epoch": 0.9947914083039834, "grad_norm": 9.552082061767578, "learning_rate": 6.829956692833151e-10, "loss": 0.3813, "step": 20054 }, { "epoch": 0.9948410139391834, "grad_norm": 6.509822845458984, "learning_rate": 6.700484668736851e-10, "loss": 0.257, "step": 20055 }, { "epoch": 0.9948906195743836, "grad_norm": 11.184897422790527, "learning_rate": 6.572251501218984e-10, "loss": 0.3398, "step": 20056 }, { "epoch": 0.9949402252095838, "grad_norm": 5.419698715209961, "learning_rate": 6.445257193449239e-10, "loss": 0.286, "step": 20057 }, { "epoch": 0.994989830844784, "grad_norm": 12.198330879211426, "learning_rate": 6.319501748586199e-10, "loss": 0.3945, "step": 20058 }, { "epoch": 0.9950394364799842, "grad_norm": 15.317516326904297, "learning_rate": 6.194985169738488e-10, "loss": 0.3443, "step": 20059 }, { "epoch": 0.9950890421151842, "grad_norm": 6.944520473480225, "learning_rate": 6.071707459992527e-10, "loss": 0.2263, "step": 20060 }, { "epoch": 0.9951386477503844, "grad_norm": 6.343781471252441, "learning_rate": 5.949668622401428e-10, "loss": 0.289, "step": 20061 }, { "epoch": 0.9951882533855846, "grad_norm": 21.47368621826172, "learning_rate": 5.828868659996101e-10, "loss": 0.4618, "step": 20062 }, { "epoch": 0.9952378590207848, "grad_norm": 6.417712688446045, "learning_rate": 5.709307575768597e-10, "loss": 0.2739, "step": 20063 }, { "epoch": 0.9952874646559849, "grad_norm": 6.395596504211426, "learning_rate": 5.59098537267766e-10, "loss": 0.2375, "step": 20064 }, { "epoch": 0.9953370702911851, "grad_norm": 9.947408676147461, "learning_rate": 5.473902053654279e-10, "loss": 0.3737, "step": 20065 }, { "epoch": 0.9953866759263852, "grad_norm": 6.655721187591553, "learning_rate": 5.358057621601687e-10, "loss": 0.3115, "step": 20066 }, { "epoch": 0.9954362815615854, "grad_norm": 5.779903888702393, "learning_rate": 5.243452079395361e-10, "loss": 0.2737, "step": 20067 }, { "epoch": 0.9954858871967855, "grad_norm": 5.516944885253906, "learning_rate": 5.130085429871923e-10, "loss": 0.2973, "step": 20068 }, { "epoch": 0.9955354928319857, "grad_norm": 5.977870464324951, "learning_rate": 5.017957675840234e-10, "loss": 0.2597, "step": 20069 }, { "epoch": 0.9955850984671859, "grad_norm": 4.122594833374023, "learning_rate": 4.907068820075856e-10, "loss": 0.2118, "step": 20070 }, { "epoch": 0.9956347041023861, "grad_norm": 4.952587127685547, "learning_rate": 4.797418865332138e-10, "loss": 0.2809, "step": 20071 }, { "epoch": 0.9956843097375861, "grad_norm": 5.813620090484619, "learning_rate": 4.689007814318026e-10, "loss": 0.3005, "step": 20072 }, { "epoch": 0.9957339153727863, "grad_norm": 8.240900039672852, "learning_rate": 4.5818356697313603e-10, "loss": 0.258, "step": 20073 }, { "epoch": 0.9957835210079865, "grad_norm": 8.245244026184082, "learning_rate": 4.4759024342200255e-10, "loss": 0.3036, "step": 20074 }, { "epoch": 0.9958331266431867, "grad_norm": 5.047558784484863, "learning_rate": 4.371208110409697e-10, "loss": 0.348, "step": 20075 }, { "epoch": 0.9958827322783869, "grad_norm": 6.94770622253418, "learning_rate": 4.2677527008982977e-10, "loss": 0.3401, "step": 20076 }, { "epoch": 0.9959323379135869, "grad_norm": 6.372413158416748, "learning_rate": 4.1655362082504423e-10, "loss": 0.3001, "step": 20077 }, { "epoch": 0.9959819435487871, "grad_norm": 5.237733364105225, "learning_rate": 4.064558634991889e-10, "loss": 0.239, "step": 20078 }, { "epoch": 0.9960315491839873, "grad_norm": 5.094659805297852, "learning_rate": 3.9648199836317405e-10, "loss": 0.2677, "step": 20079 }, { "epoch": 0.9960811548191875, "grad_norm": 5.9093780517578125, "learning_rate": 3.8663202566402437e-10, "loss": 0.2505, "step": 20080 }, { "epoch": 0.9961307604543876, "grad_norm": 5.048299789428711, "learning_rate": 3.769059456454338e-10, "loss": 0.295, "step": 20081 }, { "epoch": 0.9961803660895878, "grad_norm": 6.168378829956055, "learning_rate": 3.673037585483208e-10, "loss": 0.2601, "step": 20082 }, { "epoch": 0.9962299717247879, "grad_norm": 5.748040676116943, "learning_rate": 3.578254646113832e-10, "loss": 0.2628, "step": 20083 }, { "epoch": 0.9962795773599881, "grad_norm": 8.512858390808105, "learning_rate": 3.484710640694333e-10, "loss": 0.2458, "step": 20084 }, { "epoch": 0.9963291829951882, "grad_norm": 6.846198558807373, "learning_rate": 3.3924055715339743e-10, "loss": 0.3104, "step": 20085 }, { "epoch": 0.9963787886303884, "grad_norm": 7.052303314208984, "learning_rate": 3.3013394409309176e-10, "loss": 0.1632, "step": 20086 }, { "epoch": 0.9964283942655886, "grad_norm": 5.294978618621826, "learning_rate": 3.211512251133364e-10, "loss": 0.3073, "step": 20087 }, { "epoch": 0.9964779999007888, "grad_norm": 5.783570289611816, "learning_rate": 3.1229240043728624e-10, "loss": 0.2741, "step": 20088 }, { "epoch": 0.9965276055359888, "grad_norm": 6.030980587005615, "learning_rate": 3.0355747028421035e-10, "loss": 0.252, "step": 20089 }, { "epoch": 0.996577211171189, "grad_norm": 9.406728744506836, "learning_rate": 2.94946434870047e-10, "loss": 0.3118, "step": 20090 }, { "epoch": 0.9966268168063892, "grad_norm": 4.4188127517700195, "learning_rate": 2.864592944090694e-10, "loss": 0.3167, "step": 20091 }, { "epoch": 0.9966764224415894, "grad_norm": 8.654065132141113, "learning_rate": 2.7809604911166467e-10, "loss": 0.2198, "step": 20092 }, { "epoch": 0.9967260280767896, "grad_norm": 10.166865348815918, "learning_rate": 2.6985669918433433e-10, "loss": 0.4212, "step": 20093 }, { "epoch": 0.9967756337119896, "grad_norm": 5.757081031799316, "learning_rate": 2.617412448313594e-10, "loss": 0.2663, "step": 20094 }, { "epoch": 0.9968252393471898, "grad_norm": 6.54683256149292, "learning_rate": 2.5374968625424545e-10, "loss": 0.3318, "step": 20095 }, { "epoch": 0.99687484498239, "grad_norm": 15.444694519042969, "learning_rate": 2.458820236511672e-10, "loss": 0.3634, "step": 20096 }, { "epoch": 0.9969244506175902, "grad_norm": 41.067222595214844, "learning_rate": 2.381382572164137e-10, "loss": 0.3921, "step": 20097 }, { "epoch": 0.9969740562527903, "grad_norm": 12.276830673217773, "learning_rate": 2.3051838714205354e-10, "loss": 0.404, "step": 20098 }, { "epoch": 0.9970236618879905, "grad_norm": 10.292216300964355, "learning_rate": 2.2302241361737976e-10, "loss": 0.4064, "step": 20099 }, { "epoch": 0.9970732675231906, "grad_norm": 8.609394073486328, "learning_rate": 2.1565033682779956e-10, "loss": 0.3849, "step": 20100 }, { "epoch": 0.9971228731583908, "grad_norm": 8.25710391998291, "learning_rate": 2.0840215695594467e-10, "loss": 0.4289, "step": 20101 }, { "epoch": 0.9971724787935909, "grad_norm": 4.759786605834961, "learning_rate": 2.0127787418167122e-10, "loss": 0.2879, "step": 20102 }, { "epoch": 0.9972220844287911, "grad_norm": 7.825258255004883, "learning_rate": 1.9427748868150464e-10, "loss": 0.2748, "step": 20103 }, { "epoch": 0.9972716900639913, "grad_norm": 9.156157493591309, "learning_rate": 1.8740100062863974e-10, "loss": 0.2421, "step": 20104 }, { "epoch": 0.9973212956991915, "grad_norm": 4.916425704956055, "learning_rate": 1.8064841019349576e-10, "loss": 0.1953, "step": 20105 }, { "epoch": 0.9973709013343915, "grad_norm": 4.6652445793151855, "learning_rate": 1.7401971754371638e-10, "loss": 0.1943, "step": 20106 }, { "epoch": 0.9974205069695917, "grad_norm": 7.44619083404541, "learning_rate": 1.6751492284361458e-10, "loss": 0.2637, "step": 20107 }, { "epoch": 0.9974701126047919, "grad_norm": 8.478923797607422, "learning_rate": 1.6113402625361764e-10, "loss": 0.2382, "step": 20108 }, { "epoch": 0.9975197182399921, "grad_norm": 7.104162216186523, "learning_rate": 1.548770279330425e-10, "loss": 0.3162, "step": 20109 }, { "epoch": 0.9975693238751923, "grad_norm": 8.32886028289795, "learning_rate": 1.4874392803565507e-10, "loss": 0.2955, "step": 20110 }, { "epoch": 0.9976189295103923, "grad_norm": 6.919798851013184, "learning_rate": 1.4273472671466614e-10, "loss": 0.2309, "step": 20111 }, { "epoch": 0.9976685351455925, "grad_norm": 4.428337097167969, "learning_rate": 1.3684942411773538e-10, "loss": 0.3276, "step": 20112 }, { "epoch": 0.9977181407807927, "grad_norm": 5.858794689178467, "learning_rate": 1.3108802039196732e-10, "loss": 0.1918, "step": 20113 }, { "epoch": 0.9977677464159929, "grad_norm": 13.036504745483398, "learning_rate": 1.2545051567891543e-10, "loss": 0.3208, "step": 20114 }, { "epoch": 0.997817352051193, "grad_norm": 4.435927391052246, "learning_rate": 1.1993691011957797e-10, "loss": 0.2119, "step": 20115 }, { "epoch": 0.9978669576863932, "grad_norm": 4.455946922302246, "learning_rate": 1.1454720384940221e-10, "loss": 0.2558, "step": 20116 }, { "epoch": 0.9979165633215933, "grad_norm": 6.9071431159973145, "learning_rate": 1.092813970027251e-10, "loss": 0.2804, "step": 20117 }, { "epoch": 0.9979661689567935, "grad_norm": 6.7077484130859375, "learning_rate": 1.0413948970944276e-10, "loss": 0.3222, "step": 20118 }, { "epoch": 0.9980157745919936, "grad_norm": 10.503074645996094, "learning_rate": 9.912148209778594e-11, "loss": 0.2717, "step": 20119 }, { "epoch": 0.9980653802271938, "grad_norm": 8.135251998901367, "learning_rate": 9.422737429098939e-11, "loss": 0.244, "step": 20120 }, { "epoch": 0.998114985862394, "grad_norm": 6.191436290740967, "learning_rate": 8.945716641117763e-11, "loss": 0.2432, "step": 20121 }, { "epoch": 0.9981645914975942, "grad_norm": 5.388368606567383, "learning_rate": 8.481085857658944e-11, "loss": 0.3049, "step": 20122 }, { "epoch": 0.9982141971327942, "grad_norm": 5.9478888511657715, "learning_rate": 8.028845090213288e-11, "loss": 0.2983, "step": 20123 }, { "epoch": 0.9982638027679944, "grad_norm": 4.7891011238098145, "learning_rate": 7.588994349938538e-11, "loss": 0.2838, "step": 20124 }, { "epoch": 0.9983134084031946, "grad_norm": 8.690753936767578, "learning_rate": 7.161533647770392e-11, "loss": 0.2452, "step": 20125 }, { "epoch": 0.9983630140383948, "grad_norm": 4.918007850646973, "learning_rate": 6.746462994366987e-11, "loss": 0.1869, "step": 20126 }, { "epoch": 0.998412619673595, "grad_norm": 6.902133941650391, "learning_rate": 6.343782399942378e-11, "loss": 0.3183, "step": 20127 }, { "epoch": 0.998462225308795, "grad_norm": 9.670369148254395, "learning_rate": 5.95349187448857e-11, "loss": 0.4507, "step": 20128 }, { "epoch": 0.9985118309439952, "grad_norm": 4.597684860229492, "learning_rate": 5.575591427664506e-11, "loss": 0.1916, "step": 20129 }, { "epoch": 0.9985614365791954, "grad_norm": 3.852457046508789, "learning_rate": 5.210081068851569e-11, "loss": 0.2413, "step": 20130 }, { "epoch": 0.9986110422143956, "grad_norm": 11.266117095947266, "learning_rate": 4.856960807098077e-11, "loss": 0.3127, "step": 20131 }, { "epoch": 0.9986606478495957, "grad_norm": 9.765954971313477, "learning_rate": 4.516230651174791e-11, "loss": 0.4024, "step": 20132 }, { "epoch": 0.9987102534847959, "grad_norm": 10.588056564331055, "learning_rate": 4.1878906095194074e-11, "loss": 0.3791, "step": 20133 }, { "epoch": 0.998759859119996, "grad_norm": 6.410992622375488, "learning_rate": 3.871940690236553e-11, "loss": 0.3601, "step": 20134 }, { "epoch": 0.9988094647551962, "grad_norm": 4.7970991134643555, "learning_rate": 3.568380901208812e-11, "loss": 0.2859, "step": 20135 }, { "epoch": 0.9988590703903963, "grad_norm": 6.471855163574219, "learning_rate": 3.27721124993019e-11, "loss": 0.268, "step": 20136 }, { "epoch": 0.9989086760255965, "grad_norm": 7.80462121963501, "learning_rate": 2.998431743561625e-11, "loss": 0.2668, "step": 20137 }, { "epoch": 0.9989582816607967, "grad_norm": 6.5778374671936035, "learning_rate": 2.7320423890975222e-11, "loss": 0.2141, "step": 20138 }, { "epoch": 0.9990078872959969, "grad_norm": 9.646644592285156, "learning_rate": 2.4780431931437088e-11, "loss": 0.2225, "step": 20139 }, { "epoch": 0.9990574929311969, "grad_norm": 5.885125637054443, "learning_rate": 2.2364341619174334e-11, "loss": 0.2849, "step": 20140 }, { "epoch": 0.9991070985663971, "grad_norm": 4.121371746063232, "learning_rate": 2.0072153014694118e-11, "loss": 0.174, "step": 20141 }, { "epoch": 0.9991567042015973, "grad_norm": 12.465739250183105, "learning_rate": 1.7903866174062702e-11, "loss": 0.3536, "step": 20142 }, { "epoch": 0.9992063098367975, "grad_norm": 10.895715713500977, "learning_rate": 1.585948115223612e-11, "loss": 0.3374, "step": 20143 }, { "epoch": 0.9992559154719977, "grad_norm": 10.047262191772461, "learning_rate": 1.3938997998619309e-11, "loss": 0.2676, "step": 20144 }, { "epoch": 0.9993055211071977, "grad_norm": 9.308844566345215, "learning_rate": 1.2142416761506958e-11, "loss": 0.2701, "step": 20145 }, { "epoch": 0.9993551267423979, "grad_norm": 6.19842529296875, "learning_rate": 1.0469737485307995e-11, "loss": 0.2374, "step": 20146 }, { "epoch": 0.9994047323775981, "grad_norm": 6.9364142417907715, "learning_rate": 8.92096021110067e-12, "loss": 0.1849, "step": 20147 }, { "epoch": 0.9994543380127983, "grad_norm": 9.26431941986084, "learning_rate": 7.496084978297902e-12, "loss": 0.3188, "step": 20148 }, { "epoch": 0.9995039436479984, "grad_norm": 3.8338563442230225, "learning_rate": 6.19511182076149e-12, "loss": 0.2458, "step": 20149 }, { "epoch": 0.9995535492831986, "grad_norm": 13.283394813537598, "learning_rate": 5.0180407717981276e-12, "loss": 0.3325, "step": 20150 }, { "epoch": 0.9996031549183987, "grad_norm": 7.21682071685791, "learning_rate": 3.964871860273611e-12, "loss": 0.2822, "step": 20151 }, { "epoch": 0.9996527605535989, "grad_norm": 11.419830322265625, "learning_rate": 3.0356051117230725e-12, "loss": 0.3658, "step": 20152 }, { "epoch": 0.999702366188799, "grad_norm": 9.86095905303955, "learning_rate": 2.230240550016305e-12, "loss": 0.2853, "step": 20153 }, { "epoch": 0.9997519718239992, "grad_norm": 4.8470306396484375, "learning_rate": 1.5487781951373238e-12, "loss": 0.3243, "step": 20154 }, { "epoch": 0.9998015774591994, "grad_norm": 6.245424270629883, "learning_rate": 9.912180631843627e-13, "loss": 0.2894, "step": 20155 }, { "epoch": 0.9998511830943996, "grad_norm": 4.971164226531982, "learning_rate": 5.575601685903209e-13, "loss": 0.3222, "step": 20156 }, { "epoch": 0.9999007887295996, "grad_norm": 9.445212364196777, "learning_rate": 2.478045219023173e-13, "loss": 0.2698, "step": 20157 }, { "epoch": 0.9999503943647998, "grad_norm": 7.357999324798584, "learning_rate": 6.195113089191296e-14, "loss": 0.3287, "step": 20158 }, { "epoch": 1.0, "grad_norm": 11.2273588180542, "learning_rate": 0.0, "loss": 0.3443, "step": 20159 }, { "epoch": 1.0, "step": 20159, "total_flos": 2.8370015511411425e+18, "train_loss": 0.31716680183272683, "train_runtime": 19770.7669, "train_samples_per_second": 8.157, "train_steps_per_second": 1.02 } ], "logging_steps": 1, "max_steps": 20159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20159, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8370015511411425e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }